├── .cirrus.yml
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .hlint.yaml
├── Changelog.md
├── Data
    ├── ByteString.hs
    └── ByteString
    │   ├── Builder.hs
    │   ├── Builder
    │       ├── ASCII.hs
    │       ├── Extra.hs
    │       ├── Internal.hs
    │       ├── Prim.hs
    │       ├── Prim
    │       │   ├── ASCII.hs
    │       │   ├── Binary.hs
    │       │   ├── Internal.hs
    │       │   └── Internal
    │       │   │   ├── Base16.hs
    │       │   │   └── Floating.hs
    │       ├── RealFloat.hs
    │       └── RealFloat
    │       │   ├── D2S.hs
    │       │   ├── F2S.hs
    │       │   ├── Internal.hs
    │       │   └── TableGenerator.hs
    │   ├── Char8.hs
    │   ├── Internal.hs
    │   ├── Internal
    │       ├── Pure.hs
    │       └── Type.hs
    │   ├── Lazy.hs
    │   ├── Lazy
    │       ├── Char8.hs
    │       ├── Internal.hs
    │       ├── ReadInt.hs
    │       └── ReadNat.hs
    │   ├── ReadInt.hs
    │   ├── ReadNat.hs
    │   ├── Short.hs
    │   ├── Short
    │       └── Internal.hs
    │   ├── Unsafe.hs
    │   └── Utils
    │       ├── ByteOrder.hs
    │       └── UnalignedAccess.hs
├── LICENSE
├── README.md
├── Setup.hs
├── bench
    ├── BenchAll.hs
    ├── BenchBoundsCheckFusion.hs
    ├── BenchCSV.hs
    ├── BenchCount.hs
    ├── BenchIndices.hs
    ├── BenchReadInt.hs
    └── BenchShort.hs
├── bytestring.cabal
├── cabal.project.wasi
├── cbits
    ├── aarch64
    │   └── is-valid-utf8.c
    ├── aligned-static-hs-data.c
    ├── fpstring.c
    ├── is-valid-utf8.c
    ├── itoa.c
    └── shortbytestring.c
├── include
    ├── bytestring-cpp-macros.h
    └── fpstring.h
└── tests
    ├── .gitignore
    ├── Builder.hs
    ├── IsValidUtf8.hs
    ├── LazyHClose.hs
    ├── Lift.hs
    ├── Main.hs
    ├── Properties.hs
    ├── Properties
        ├── ByteString.hs
        ├── ByteStringChar8.hs
        ├── ByteStringLazy.hs
        ├── ByteStringLazyChar8.hs
        └── ShortByteString.hs
    ├── QuickCheckUtils.hs
    └── builder
        └── Data
            └── ByteString
                └── Builder
                    ├── Prim
                        ├── TestUtils.hs
                        └── Tests.hs
                    └── Tests.hs


/.cirrus.yml:
--------------------------------------------------------------------------------
 1 | task:
 2 |   name: FreeBSD
 3 |   freebsd_instance:
 4 |     image_family: freebsd-14-0
 5 |   install_script:
 6 |     - pkg install -y git gmake
 7 |     - curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh
 8 |   script:
 9 |     - export PATH="$HOME/.ghcup/bin:$PATH"
10 |     - cabal test --test-show-details=direct
11 | 
12 | task:
13 |   name: OpenBSD
14 |   compute_engine_instance:
15 |     image_project: pg-ci-images
16 |     # See https://github.com/anarazel/pg-vm-images/blob/main/packer/openbsd.pkrvars.hcl
17 |     image: family/pg-ci-openbsd-vanilla
18 |     platform: openbsd
19 |   install_script: pkg_add ghc cabal-install git
20 |   script:
21 |     - export CABAL_DIR=/tmp/.cabal
22 |     - cabal update
23 |     - cabal test --test-show-details=direct
24 | 
25 | task:
26 |   name: NetBSD
27 |   compute_engine_instance:
28 |     image_project: pg-ci-images
29 |     # See https://github.com/anarazel/pg-vm-images/blob/main/packer/netbsd.pkrvars.hcl
30 |     image: family/pg-ci-netbsd-vanilla
31 |     platform: netbsd
32 |   install_script:
33 |     - export PKG_PATH="http://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/$(uname -p)/$(uname -r|cut -f '1 2' -d.)/All/"
34 |     - pkg_add ghc cabal-install git
35 |   script:
36 |     - export CABAL_DIR=/tmp/.cabal
37 |     - cabal update
38 |     - cabal test --test-show-details=direct
39 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: ci
  2 | on:
  3 |   push:
  4 |     branches:
  5 |       - master
  6 |       - bytestring-0.11
  7 |       - bytestring-0.12
  8 |   pull_request: {} # Validate all PRs
  9 | 
 10 | defaults:
 11 |   run:
 12 |     shell: bash
 13 | 
 14 | jobs:
 15 |   build:
 16 |     runs-on: ${{ matrix.os }}
 17 |     strategy:
 18 |       fail-fast: true
 19 |       matrix:
 20 |         os: [ubuntu-latest]
 21 |         ghc: ['8.4', '8.6', '8.8', '8.10', '9.0', '9.2', '9.4', '9.6', '9.8', '9.10', '9.12']
 22 |         include:
 23 |         - os: macOS-latest
 24 |           ghc: 'latest'
 25 |     steps:
 26 |     - uses: actions/checkout@v4
 27 |     - uses: haskell-actions/setup@v2
 28 |       id: setup-haskell-cabal
 29 |       with:
 30 |         ghc-version: ${{ matrix.ghc }}
 31 |     - name: Update cabal package database
 32 |       run: cabal update
 33 |     - uses: actions/cache@v3
 34 |       name: Cache cabal stuff
 35 |       with:
 36 |         path: |
 37 |           ${{ steps.setup-haskell-cabal.outputs.cabal-store }}
 38 |           dist-newstyle
 39 |         key: ${{ runner.os }}-${{ matrix.ghc }}
 40 |     - name: Test
 41 |       run: |
 42 |         cabal sdist -z -o .
 43 |         cabal get bytestring-*.tar.gz
 44 |         cd bytestring-*/
 45 |         cabal build bytestring:tests --enable-tests --enable-benchmarks
 46 |         cabal test --enable-tests --enable-benchmarks --test-show-details=direct all
 47 |     - name: Bench
 48 |       run: |
 49 |         cd bytestring-*/
 50 |         cabal bench --enable-tests --enable-benchmarks --benchmark-option=-l all
 51 |     - name: Haddock
 52 |       run: |
 53 |         cd bytestring-*/
 54 |         cabal haddock all
 55 |     - name: Cabal check
 56 |       run: |
 57 |         cd bytestring-*/
 58 |         cabal check
 59 | 
 60 |   windows-build:
 61 |     runs-on: windows-latest
 62 |     needs: build
 63 |     strategy:
 64 |       fail-fast: true
 65 |       matrix:
 66 |         ghc: ['9.2', '9.4', '9.6', '9.8', '9.10']
 67 |     steps:
 68 |     - uses: actions/checkout@v4
 69 |     - uses: haskell-actions/setup@v2
 70 |       id: setup-haskell-cabal
 71 |       with:
 72 |         ghc-version: ${{ matrix.ghc }}
 73 |     - name: Update cabal package database
 74 |       run: cabal update
 75 |     - uses: actions/cache@v3
 76 |       name: Cache cabal stuff
 77 |       with:
 78 |         path: |
 79 |           ${{ steps.setup-haskell-cabal.outputs.cabal-store }}
 80 |           dist-newstyle
 81 |         key: ${{ runner.os }}-${{ matrix.ghc }}
 82 |     # We rebuild tests several times to avoid intermittent failures on Windows
 83 |     # https://github.com/haskell/actions/issues/36
 84 |     # We also use --enable-tests and --enable-benchmarks to avoid
 85 |     # test and bench commands from reconfiguring and thus rebuilding.
 86 |     - name: Build Test
 87 |       run: |
 88 |         cabal sdist -z -o .
 89 |         cabal get bytestring-*.tar.gz
 90 |         cd bytestring-*/
 91 |         bld() { cabal build bytestring:tests --enable-tests --enable-benchmarks; }
 92 |         bld || bld || bld
 93 | 
 94 |     - name: Run Test
 95 |       # test broken linking on windows: https://github.com/haskell/bytestring/issues/497
 96 |       run: |
 97 |         cd bytestring-*/
 98 |         $bin = cabal list-bin bytestring-tests
 99 |         $env:PATH = ''
100 |         & "$bin"
101 |       shell: pwsh
102 | 
103 |     - name: Bench
104 |       run: |
105 |         cd bytestring-*/
106 |         cabal bench --enable-tests --enable-benchmarks --benchmark-option=-l all
107 |     - name: Haddock
108 |       run: |
109 |         cd bytestring-*/
110 |         cabal haddock all
111 |     - name: Cabal check
112 |       run: |
113 |         cd bytestring-*/
114 |         cabal check
115 | 
116 |   # Emulation is incredibly slow and memory demanding. It seems that any
117 |   # executable with GHC RTS takes at least 7-8 Gb of RAM, so we can run
118 |   # `cabal` or `ghc` on their own, but cannot run them both at the same time,
119 |   # striking out `cabal test`. Instead we rely on system packages and invoke
120 |   # `ghc --make` manually, and even so `ghc -O` is prohibitively expensive.
121 |   emulated:
122 |     needs: build
123 |     runs-on: ubuntu-latest
124 |     strategy:
125 |       fail-fast: true
126 |       matrix:
127 |         arch: ['s390x', 'ppc64le', 'riscv64']
128 |     steps:
129 |     - uses: actions/checkout@v4
130 |     - uses: uraimo/run-on-arch-action@v2.8.1
131 |       timeout-minutes: 60
132 |       with:
133 |         arch: ${{ matrix.arch }}
134 |         distro: ubuntu_rolling
135 |         githubToken: ${{ github.token }}
136 |         install: |
137 |           apt-get update -y
138 |           apt-get install -y ghc libghc-tasty-quickcheck-dev libghc-syb-dev
139 |         run: |
140 |           ghc --version
141 |           ghc --make -fPIC -XHaskell2010 -XBangPatterns -XDeriveDataTypeable -XDeriveGeneric -XDeriveLift -XFlexibleContexts -XFlexibleInstances -XLambdaCase -XMagicHash -XMultiWayIf -XNamedFieldPuns -XPatternSynonyms -XRankNTypes -XScopedTypeVariables -XStandaloneDeriving -XTupleSections -XTypeApplications -XTypeOperators -XUnboxedTuples -optP-Wall -optP-Werror=undef -DPURE_HASKELL=0 -Iinclude -itests:tests/builder -o Main cbits/*.c tests/Main.hs +RTS -s
142 |           ./Main +RTS -s
143 | 
144 |   bounds-checking:
145 |     needs: build
146 |     runs-on: ubuntu-latest
147 |     steps:
148 |     - uses: actions/checkout@v4
149 |     - uses: haskell-actions/setup@v2
150 |       id: setup-haskell-cabal
151 |       with:
152 |         ghc-version: 'latest'
153 |     - name: Update cabal package database
154 |       run: cabal update
155 |     - uses: actions/cache@v3
156 |       name: Cache cabal stuff
157 |       with:
158 |         path: |
159 |           ${{ steps.setup-haskell-cabal.outputs.cabal-store }}
160 |           dist-newstyle
161 |         key: ${{ runner.os }}-latest
162 |     - name: Test
163 |       run: cabal test --ghc-options='-fcheck-prim-bounds -fno-ignore-asserts -DHS_BYTESTRING_ASSERTIONS'
164 | 
165 |   pure-haskell:
166 |     needs: build
167 |     runs-on: ubuntu-latest
168 |     steps:
169 |     - uses: actions/checkout@v4
170 |     - uses: haskell-actions/setup@v2
171 |       id: setup-haskell-cabal
172 |       with:
173 |         ghc-version: 'latest'
174 |     - name: Update cabal package database
175 |       run: cabal update
176 |     - uses: actions/cache@v3
177 |       name: Cache cabal stuff
178 |       with:
179 |         path: |
180 |           ${{ steps.setup-haskell-cabal.outputs.cabal-store }}
181 |           dist-newstyle
182 |         key: ${{ runner.os }}-latest-pure-haskell
183 |     - name: Test
184 |       run: cabal test -fpure-haskell --ghc-options=-fno-ignore-asserts --enable-tests --test-show-details=direct all
185 | 
186 |   i386:
187 |     needs: build
188 |     runs-on: ubuntu-latest
189 |     container:
190 |       image: i386/ubuntu:bionic
191 |     steps:
192 |     - name: Install
193 |       run: |
194 |         apt-get update -y
195 |         apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev
196 |         curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh
197 |     - uses: actions/checkout@v1 #This version must stay old enough to remain compatible with the container image
198 |     - name: Test
199 |       run: |
200 |         source ~/.ghcup/env
201 |         cabal update
202 |         cabal test
203 | 
204 |   # We use github.com/haskell self-hosted runners for ARM testing.
205 |   # If they become unavailable in future, put ['armv7', 'aarch64']
206 |   # back to emulation jobs above.
207 |   arm:
208 |     needs: build
209 |     runs-on: [self-hosted, Linux, ARM64]
210 |     strategy:
211 |       fail-fast: true
212 |       matrix:
213 |         arch: [arm32v7, arm64v8]
214 |     steps:
215 |       - uses: docker://hasufell/arm64v8-ubuntu-haskell:focal
216 |         name: Cleanup
217 |         with:
218 |           args: "find . -mindepth 1 -maxdepth 1 -exec rm -rf -- {} +"
219 | 
220 |       - name: Checkout code
221 |         uses: actions/checkout@v4
222 | 
223 |       - if: matrix.arch == 'arm32v7'
224 |         uses: docker://hasufell/arm32v7-ubuntu-haskell:focal
225 |         name: Run build (arm32v7 linux)
226 |         with:
227 |           args: sh -c "cabal update && cabal test"
228 | 
229 |       - if: matrix.arch == 'arm64v8'
230 |         uses: docker://hasufell/arm64v8-ubuntu-haskell:focal
231 |         name: Run build (arm64v8 linux)
232 |         with:
233 |           args: sh -c "ghcup install ghc 9.10.1 && cabal update && cabal test -w ~/.ghcup/bin/ghc-9.10.1"
234 | 
235 |   wasi:
236 |     runs-on: ubuntu-latest
237 |     needs: build
238 |     env:
239 |       GHC_WASM_META_REV: a04cc1a2206d2030326e1d49be9c6a94ee4283a3
240 |     strategy:
241 |       matrix:
242 |         ghc: ['9.10']
243 |       fail-fast: false
244 |     steps:
245 |     - name: setup-ghc-wasm32-wasi
246 |       run: |
247 |         cd $(mktemp -d)
248 |         curl -L https://gitlab.haskell.org/ghc/ghc-wasm-meta/-/archive/$GHC_WASM_META_REV/ghc-wasm-meta.tar.gz | tar xz --strip-components=1
249 |         ./setup.sh
250 |         ~/.ghc-wasm/add_to_github_path.sh
251 |       env:
252 |         FLAVOUR: ${{ matrix.ghc }}
253 |     - uses: actions/checkout@v4
254 |     - uses: actions/cache@v3
255 |       with:
256 |         path: |
257 |           ~/.ghc-wasm/.cabal/store
258 |         key: wasi-${{ runner.os }}-${{ env.GHC_WASM_META_REV }}-flavour-${{ matrix.ghc }}-${{ github.sha }}
259 |         restore-keys: |
260 |           wasi-${{ runner.os }}-${{ env.GHC_WASM_META_REV }}-flavour-${{ matrix.ghc }}-
261 |     - name: Build
262 |       run: |
263 |         mv cabal.project.wasi cabal.project.local
264 |         wasm32-wasi-cabal build --enable-tests
265 |         wasm32-wasi-cabal list-bin test:bytestring-tests
266 |     - name: Test
267 |       run: |
268 |         wasmtime.sh $(wasm32-wasi-cabal list-bin test:bytestring-tests)
269 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | GNUmakefile
 2 | cabal-dev
 3 | dist
 4 | dist-install
 5 | ghc.mk
 6 | .cabal-sandbox
 7 | cabal.sandbox.config
 8 | .hsenv
 9 | *~
10 | dist-newstyle/
11 | cabal.project.local*
12 | .nvimrc
13 | .ghc.environment*
14 | 


--------------------------------------------------------------------------------
/.hlint.yaml:
--------------------------------------------------------------------------------
 1 | - ignore:
 2 |     name: Use camelCase
 3 |     within:
 4 |       - Data.ByteString.Builder.Internal
 5 |       - Data.ByteString.Short.Internal
 6 | - ignore:
 7 |     name: Use fewer imports
 8 |     within:
 9 |       - Data.ByteString
10 |       - Data.ByteString.Internal
11 |       - Data.ByteString.Short.Internal
12 | - ignore:
13 |     name: Avoid lambda
14 |     within:
15 |       - Data.ByteString.Builder.Internal
16 |       - Data.ByteString.Builder.Prim
17 | - ignore:
18 |     name: Reduce duplication
19 |     within:
20 |       - Data.ByteString  
21 | - ignore:
22 |     name: Redundant lambda
23 |     within:
24 |       - Data.ByteString.Builder.Internal
25 |       - Data.ByteString  
26 | 


--------------------------------------------------------------------------------
/Data/ByteString/Builder/ASCII.hs:
--------------------------------------------------------------------------------
  1 | {-# OPTIONS_HADDOCK not-home #-}
  2 | 
  3 | -- | Copyright : (c) 2010 - 2011 Simon Meier
  4 | -- License     : BSD3-style (see LICENSE)
  5 | --
  6 | -- Maintainer  : Simon Meier <iridcode@gmail.com>
  7 | -- Portability : GHC
  8 | --
  9 | -- Constructing 'Builder's using ASCII-based encodings.
 10 | --
 11 | module Data.ByteString.Builder.ASCII
 12 |     (
 13 |       -- ** Formatting numbers as text
 14 |       -- | Formatting of numbers as ASCII text.
 15 |       --
 16 |       -- Note that you can also use these functions for the ISO/IEC 8859-1 and
 17 |       -- UTF-8 encodings, as the ASCII encoding is equivalent on the
 18 |       -- codepoints 0-127.
 19 | 
 20 |       -- *** Decimal numbers
 21 |       -- | Decimal encoding of numbers using ASCII encoded characters.
 22 |       int8Dec
 23 |     , int16Dec
 24 |     , int32Dec
 25 |     , int64Dec
 26 |     , intDec
 27 |     , integerDec
 28 | 
 29 |     , word8Dec
 30 |     , word16Dec
 31 |     , word32Dec
 32 |     , word64Dec
 33 |     , wordDec
 34 | 
 35 |     , floatDec
 36 |     , doubleDec
 37 | 
 38 |       -- *** Hexadecimal numbers
 39 | 
 40 |       -- | Encoding positive integers as hexadecimal numbers using lower-case
 41 |       -- ASCII characters. The shortest
 42 |       -- possible representation is used. For example,
 43 |       --
 44 |       -- >>> toLazyByteString (word16Hex 0x0a10)
 45 |       -- Chunk "a10" Empty
 46 |       --
 47 |       -- Note that there is no support for using upper-case characters. Please
 48 |       -- contact the maintainer, if your application cannot work without
 49 |       -- hexadecimal encodings that use upper-case characters.
 50 |       --
 51 |     , word8Hex
 52 |     , word16Hex
 53 |     , word32Hex
 54 |     , word64Hex
 55 |     , wordHex
 56 | 
 57 |       -- *** Fixed-width hexadecimal numbers
 58 |       --
 59 |     , int8HexFixed
 60 |     , int16HexFixed
 61 |     , int32HexFixed
 62 |     , int64HexFixed
 63 |     , word8HexFixed
 64 |     , word16HexFixed
 65 |     , word32HexFixed
 66 |     , word64HexFixed
 67 | 
 68 |     , floatHexFixed
 69 |     , doubleHexFixed
 70 | 
 71 |     , byteStringHex
 72 |     , lazyByteStringHex
 73 | 
 74 |     ) where
 75 | 
 76 | import           Data.ByteString                                as S
 77 | import           Data.ByteString.Lazy                           as L
 78 | import           Data.ByteString.Builder.Internal (Builder)
 79 | import qualified Data.ByteString.Builder.Prim                   as P
 80 | import qualified Data.ByteString.Builder.Prim.Internal          as P
 81 | import           Data.ByteString.Builder.RealFloat (floatDec, doubleDec)
 82 | import           Data.ByteString.Internal.Type (c_uint32_dec_padded9, c_uint64_dec_padded18)
 83 | 
 84 | import           Foreign
 85 | import           Data.List.NonEmpty (NonEmpty(..))
 86 | 
 87 | ------------------------------------------------------------------------------
 88 | -- Decimal Encoding
 89 | ------------------------------------------------------------------------------
 90 | 
 91 | -- Signed integers
 92 | ------------------
 93 | 
 94 | -- | Decimal encoding of an 'Int8' using the ASCII digits.
 95 | --
 96 | -- e.g.
 97 | --
 98 | -- > toLazyByteString (int8Dec 42)   = "42"
 99 | -- > toLazyByteString (int8Dec (-1)) = "-1"
100 | --
101 | {-# INLINE int8Dec #-}
102 | int8Dec :: Int8 -> Builder
103 | int8Dec = P.primBounded P.int8Dec
104 | 
105 | -- | Decimal encoding of an 'Int16' using the ASCII digits.
106 | {-# INLINE int16Dec #-}
107 | int16Dec :: Int16 -> Builder
108 | int16Dec = P.primBounded P.int16Dec
109 | 
110 | -- | Decimal encoding of an 'Int32' using the ASCII digits.
111 | {-# INLINE int32Dec #-}
112 | int32Dec :: Int32 -> Builder
113 | int32Dec = P.primBounded P.int32Dec
114 | 
115 | -- | Decimal encoding of an 'Int64' using the ASCII digits.
116 | {-# INLINE int64Dec #-}
117 | int64Dec :: Int64 -> Builder
118 | int64Dec = P.primBounded P.int64Dec
119 | 
120 | -- | Decimal encoding of an 'Int' using the ASCII digits.
121 | {-# INLINE intDec #-}
122 | intDec :: Int -> Builder
123 | intDec = P.primBounded P.intDec
124 | 
125 | 
126 | -- Unsigned integers
127 | --------------------
128 | 
129 | -- | Decimal encoding of a 'Word8' using the ASCII digits.
130 | {-# INLINE word8Dec #-}
131 | word8Dec :: Word8 -> Builder
132 | word8Dec = P.primBounded P.word8Dec
133 | 
134 | -- | Decimal encoding of a 'Word16' using the ASCII digits.
135 | {-# INLINE word16Dec #-}
136 | word16Dec :: Word16 -> Builder
137 | word16Dec = P.primBounded P.word16Dec
138 | 
139 | -- | Decimal encoding of a 'Word32' using the ASCII digits.
140 | {-# INLINE word32Dec #-}
141 | word32Dec :: Word32 -> Builder
142 | word32Dec = P.primBounded P.word32Dec
143 | 
144 | -- | Decimal encoding of a 'Word64' using the ASCII digits.
145 | {-# INLINE word64Dec #-}
146 | word64Dec :: Word64 -> Builder
147 | word64Dec = P.primBounded P.word64Dec
148 | 
149 | -- | Decimal encoding of a 'Word' using the ASCII digits.
150 | {-# INLINE wordDec #-}
151 | wordDec :: Word -> Builder
152 | wordDec = P.primBounded P.wordDec
153 | 
154 | 
155 | ------------------------------------------------------------------------------
156 | -- Hexadecimal Encoding
157 | ------------------------------------------------------------------------------
158 | 
159 | -- without lead
160 | ---------------
161 | 
162 | -- | Shortest hexadecimal encoding of a 'Word8' using lower-case characters.
163 | {-# INLINE word8Hex #-}
164 | word8Hex :: Word8 -> Builder
165 | word8Hex = P.primBounded P.word8Hex
166 | 
167 | -- | Shortest hexadecimal encoding of a 'Word16' using lower-case characters.
168 | {-# INLINE word16Hex #-}
169 | word16Hex :: Word16 -> Builder
170 | word16Hex = P.primBounded P.word16Hex
171 | 
172 | -- | Shortest hexadecimal encoding of a 'Word32' using lower-case characters.
173 | {-# INLINE word32Hex #-}
174 | word32Hex :: Word32 -> Builder
175 | word32Hex = P.primBounded P.word32Hex
176 | 
177 | -- | Shortest hexadecimal encoding of a 'Word64' using lower-case characters.
178 | {-# INLINE word64Hex #-}
179 | word64Hex :: Word64 -> Builder
180 | word64Hex = P.primBounded P.word64Hex
181 | 
182 | -- | Shortest hexadecimal encoding of a 'Word' using lower-case characters.
183 | {-# INLINE wordHex #-}
184 | wordHex :: Word -> Builder
185 | wordHex = P.primBounded P.wordHex
186 | 
187 | 
188 | -- fixed width; leading zeroes
189 | ------------------------------
190 | 
191 | -- | Encode a 'Int8' using 2 nibbles (hexadecimal digits).
192 | {-# INLINE int8HexFixed #-}
193 | int8HexFixed :: Int8 -> Builder
194 | int8HexFixed = P.primFixed P.int8HexFixed
195 | 
196 | -- | Encode a 'Int16' using 4 nibbles.
197 | {-# INLINE int16HexFixed #-}
198 | int16HexFixed :: Int16 -> Builder
199 | int16HexFixed = P.primFixed P.int16HexFixed
200 | 
201 | -- | Encode a 'Int32' using 8 nibbles.
202 | {-# INLINE int32HexFixed #-}
203 | int32HexFixed :: Int32 -> Builder
204 | int32HexFixed = P.primFixed P.int32HexFixed
205 | 
206 | -- | Encode a 'Int64' using 16 nibbles.
207 | {-# INLINE int64HexFixed #-}
208 | int64HexFixed :: Int64 -> Builder
209 | int64HexFixed = P.primFixed P.int64HexFixed
210 | 
211 | -- | Encode a 'Word8' using 2 nibbles (hexadecimal digits).
212 | {-# INLINE word8HexFixed #-}
213 | word8HexFixed :: Word8 -> Builder
214 | word8HexFixed = P.primFixed P.word8HexFixed
215 | 
216 | -- | Encode a 'Word16' using 4 nibbles.
217 | {-# INLINE word16HexFixed #-}
218 | word16HexFixed :: Word16 -> Builder
219 | word16HexFixed = P.primFixed P.word16HexFixed
220 | 
221 | -- | Encode a 'Word32' using 8 nibbles.
222 | {-# INLINE word32HexFixed #-}
223 | word32HexFixed :: Word32 -> Builder
224 | word32HexFixed = P.primFixed P.word32HexFixed
225 | 
226 | -- | Encode a 'Word64' using 16 nibbles.
227 | {-# INLINE word64HexFixed #-}
228 | word64HexFixed :: Word64 -> Builder
229 | word64HexFixed = P.primFixed P.word64HexFixed
230 | 
231 | -- | Encode an IEEE 'Float' using 8 nibbles.
232 | {-# INLINE floatHexFixed #-}
233 | floatHexFixed :: Float -> Builder
234 | floatHexFixed = P.primFixed P.floatHexFixed
235 | 
236 | -- | Encode an IEEE 'Double' using 16 nibbles.
237 | {-# INLINE doubleHexFixed #-}
238 | doubleHexFixed :: Double -> Builder
239 | doubleHexFixed = P.primFixed P.doubleHexFixed
240 | 
241 | -- | Encode each byte of a 'S.StrictByteString' using its fixed-width hex encoding.
242 | {-# NOINLINE byteStringHex #-} -- share code
243 | byteStringHex :: S.StrictByteString -> Builder
244 | byteStringHex = P.primMapByteStringFixed P.word8HexFixed
245 | 
246 | -- | Encode each byte of a 'L.LazyByteString' using its fixed-width hex encoding.
247 | {-# NOINLINE lazyByteStringHex #-} -- share code
248 | lazyByteStringHex :: L.LazyByteString -> Builder
249 | lazyByteStringHex = P.primMapLazyByteStringFixed P.word8HexFixed
250 | 
251 | 
252 | ------------------------------------------------------------------------------
253 | -- Fast decimal 'Integer' encoding.
254 | ------------------------------------------------------------------------------
255 | 
256 | -- An optimized version of the integer serialization code
257 | -- in blaze-textual (c) 2011 MailRank, Inc. Bryan O'Sullivan
258 | -- <bos@mailrank.com>. It is 2.5x faster on Int-sized integers and 4.5x faster
259 | -- on larger integers.
260 | 
261 | -- | Maximal power of 10 fitting into an 'Int' without using the MSB.
262 | --     10 ^ 9  for 32 bit ints  (31 * log 2 / log 10 =  9.33)
263 | --     10 ^ 18 for 64 bit ints  (63 * log 2 / log 10 = 18.96)
264 | --
265 | -- FIXME: Think about also using the MSB. For 64 bit 'Int's this makes a
266 | -- difference.
267 | maxPow10 :: Integer
268 | maxPow10 = toInteger $ (10 :: Int) ^ P.caseWordSize_32_64 (9 :: Int) 18
269 | 
270 | -- | Decimal encoding of an 'Integer' using the ASCII digits.
271 | integerDec :: Integer -> Builder
272 | integerDec i
273 |     | i' <- fromInteger i, toInteger i' == i = intDec i'
274 |     | i < 0     = P.primFixed P.char8 '-' `mappend` go (-i)
275 |     | otherwise =                                   go i
276 |   where
277 |     go :: Integer -> Builder
278 |     go n = case putH (splitf (maxPow10 * maxPow10) n) of
279 |              x:|xs -> intDec x `mappend` P.primMapListBounded intDecPadded xs
280 | 
281 |     splitf :: Integer -> Integer -> NonEmpty Integer
282 |     splitf pow10 n0
283 |       | pow10 > n0  = n0 :| []
284 |       | otherwise   = splith (splitf (pow10 * pow10) n0)
285 |       where
286 |         splith (n:|ns) =
287 |             case n `quotRem` pow10 of
288 |                 (q,r) | q > 0     -> q :| r :  splitb ns
289 |                       | otherwise ->      r :| splitb ns
290 | 
291 |         splitb []     = []
292 |         splitb (n:ns) = case n `quotRem` pow10 of
293 |                             (q,r) -> q : r : splitb ns
294 | 
295 |     putH :: NonEmpty Integer -> NonEmpty Int
296 |     putH (n:|ns) = case n `quotRem` maxPow10 of
297 |                     (x,y)
298 |                         | q > 0     -> q :| r :  putB ns
299 |                         | otherwise ->      r :| putB ns
300 |                         where q = fromInteger x
301 |                               r = fromInteger y
302 | 
303 |     putB :: [Integer] -> [Int]
304 |     putB []     = []
305 |     putB (n:ns) = case n `quotRem` maxPow10 of
306 |                     (q,r) -> fromInteger q : fromInteger r : putB ns
307 | 
308 | 
309 | {-# INLINE intDecPadded #-}
310 | intDecPadded :: P.BoundedPrim Int
311 | intDecPadded = P.liftFixedToBounded $ P.caseWordSize_32_64
312 |     (P.fixedPrim  9 $ c_uint32_dec_padded9  . fromIntegral)
313 |     (P.fixedPrim 18 $ c_uint64_dec_padded18 . fromIntegral)
314 | 


--------------------------------------------------------------------------------
/Data/ByteString/Builder/Extra.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE Trustworthy #-}
  2 | 
  3 | -----------------------------------------------------------------------------
  4 | -- | Copyright : (c) 2010      Jasper Van der Jeugt
  5 | --               (c) 2010-2011 Simon Meier
  6 | -- License     : BSD3-style (see LICENSE)
  7 | --
  8 | -- Maintainer  : Simon Meier <iridcode@gmail.com>
  9 | -- Portability : GHC
 10 | --
 11 | -- Extra functions for creating and executing 'Builder's. They are intended
 12 | -- for application-specific fine-tuning the performance of 'Builder's.
 13 | --
 14 | -----------------------------------------------------------------------------
 15 | module Data.ByteString.Builder.Extra
 16 |     (
 17 |     -- * Execution strategies
 18 |       toLazyByteStringWith
 19 |     , AllocationStrategy
 20 |     , safeStrategy
 21 |     , untrimmedStrategy
 22 |     , smallChunkSize
 23 |     , defaultChunkSize
 24 | 
 25 |     -- * Controlling chunk boundaries
 26 |     , byteStringCopy
 27 |     , byteStringInsert
 28 |     , byteStringThreshold
 29 | 
 30 |     , lazyByteStringCopy
 31 |     , lazyByteStringInsert
 32 |     , lazyByteStringThreshold
 33 | 
 34 |     , flush
 35 | 
 36 |     -- * Low level execution
 37 |     , BufferWriter
 38 |     , Next(..)
 39 |     , runBuilder
 40 | 
 41 |     -- * Host-specific binary encodings
 42 |     , intHost
 43 |     , int16Host
 44 |     , int32Host
 45 |     , int64Host
 46 | 
 47 |     , wordHost
 48 |     , word16Host
 49 |     , word32Host
 50 |     , word64Host
 51 | 
 52 |     , floatHost
 53 |     , doubleHost
 54 | 
 55 |     ) where
 56 | 
 57 | 
 58 | import Data.ByteString.Builder.Internal
 59 |          ( Builder, toLazyByteStringWith
 60 |          , AllocationStrategy, safeStrategy, untrimmedStrategy
 61 |          , smallChunkSize, defaultChunkSize, flush
 62 |          , byteStringCopy, byteStringInsert, byteStringThreshold
 63 |          , lazyByteStringCopy, lazyByteStringInsert, lazyByteStringThreshold )
 64 | 
 65 | import qualified Data.ByteString.Builder.Internal as I
 66 | import qualified Data.ByteString.Builder.Prim  as P
 67 | import qualified Data.ByteString.Internal      as S
 68 | 
 69 | import Foreign
 70 | 
 71 | ------------------------------------------------------------------------------
 72 | -- Builder execution public API
 73 | ------------------------------------------------------------------------------
 74 | 
 75 | -- | A 'BufferWriter' represents the result of running a 'Builder'.
 76 | -- It unfolds as a sequence of chunks of data. These chunks come in two forms:
 77 | --
 78 | --  * an IO action for writing the Builder's data into a user-supplied memory
 79 | --    buffer.
 80 | --
 81 | --  * a pre-existing chunks of data represented by a 'S.StrictByteString'
 82 | --
 83 | -- While this is rather low level, it provides you with full flexibility in
 84 | -- how the data is written out.
 85 | --
 86 | -- The 'BufferWriter' itself is an IO action: you supply it with a buffer
 87 | -- (as a pointer and length) and it will write data into the buffer.
 88 | -- It returns a number indicating how many bytes were actually written
 89 | -- (which can be @0@). It also returns a 'Next' which describes what
 90 | -- comes next.
 91 | --
 92 | type BufferWriter = Ptr Word8 -> Int -> IO (Int, Next)
 93 | 
 94 | -- | After running a 'BufferWriter' action there are three possibilities for
 95 | -- what comes next:
 96 | --
 97 | data Next =
 98 |      -- | This means we're all done. All the builder data has now been written.
 99 |      Done
100 | 
101 |      -- | This indicates that there may be more data to write. It
102 |      -- gives you the next 'BufferWriter' action. You should call that action
103 |      -- with an appropriate buffer. The int indicates the /minimum/ buffer size
104 |      -- required by the next 'BufferWriter' action. That is, if you call the next
105 |      -- action you /must/ supply it with a buffer length of at least this size.
106 |    | More   !Int          BufferWriter
107 | 
108 |      -- | In addition to the data that has just been written into your buffer
109 |      -- by the 'BufferWriter' action, it gives you a pre-existing chunk
110 |      -- of data as a 'S.StrictByteString'. It also gives you the following 'BufferWriter'
111 |      -- action. It is safe to run this following action using a buffer with as
112 |      -- much free space as was left by the previous run action.
113 |    | Chunk  !S.StrictByteString BufferWriter
114 | 
115 | -- | Turn a 'Builder' into its initial 'BufferWriter' action.
116 | --
117 | runBuilder :: Builder -> BufferWriter
118 | runBuilder = run . I.runBuilder
119 |   where
120 |     bytesWritten startPtr endPtr = endPtr `minusPtr` startPtr
121 | 
122 |     run :: I.BuildStep () -> BufferWriter
123 |     run step = \buf len ->
124 |       let doneH endPtr () =
125 |             let !wc  = bytesWritten buf endPtr
126 |                 next = Done
127 |              in return (wc, next)
128 | 
129 |           bufferFullH endPtr minReq step' =
130 |             let !wc  = bytesWritten buf endPtr
131 |                 next = More minReq (run step')
132 |              in return (wc, next)
133 | 
134 |           insertChunkH endPtr bs step' =
135 |             let !wc  = bytesWritten buf endPtr
136 |                 next = Chunk bs (run step')
137 |              in return (wc, next)
138 | 
139 |           br = I.BufferRange buf (buf `plusPtr` len)
140 | 
141 |       in I.fillWithBuildStep step doneH bufferFullH insertChunkH br
142 | 
143 | 
144 | 
145 | ------------------------------------------------------------------------------
146 | -- Host-specific encodings
147 | ------------------------------------------------------------------------------
148 | 
149 | -- | Encode a single native machine 'Int'. The 'Int' is encoded in host order,
150 | -- host endian form, for the machine you're on. On a 64 bit machine the 'Int'
151 | -- is an 8 byte value, on a 32 bit machine, 4 bytes. Values encoded this way
152 | -- are not portable to different endian or int sized machines, without
153 | -- conversion.
154 | --
155 | {-# INLINE intHost #-}
156 | intHost :: Int -> Builder
157 | intHost = P.primFixed P.intHost
158 | 
159 | -- | Encode a 'Int16' in native host order and host endianness.
160 | {-# INLINE int16Host #-}
161 | int16Host :: Int16 -> Builder
162 | int16Host = P.primFixed P.int16Host
163 | 
164 | -- | Encode a 'Int32' in native host order and host endianness.
165 | {-# INLINE int32Host #-}
166 | int32Host :: Int32 -> Builder
167 | int32Host = P.primFixed P.int32Host
168 | 
169 | -- | Encode a 'Int64' in native host order and host endianness.
170 | {-# INLINE int64Host #-}
171 | int64Host :: Int64 -> Builder
172 | int64Host = P.primFixed P.int64Host
173 | 
174 | -- | Encode a single native machine 'Word'. The 'Word' is encoded in host order,
175 | -- host endian form, for the machine you're on. On a 64 bit machine the 'Word'
176 | -- is an 8 byte value, on a 32 bit machine, 4 bytes. Values encoded this way
177 | -- are not portable to different endian or word sized machines, without
178 | -- conversion.
179 | --
180 | {-# INLINE wordHost #-}
181 | wordHost :: Word -> Builder
182 | wordHost = P.primFixed P.wordHost
183 | 
184 | -- | Encode a 'Word16' in native host order and host endianness.
185 | {-# INLINE word16Host #-}
186 | word16Host :: Word16 -> Builder
187 | word16Host = P.primFixed P.word16Host
188 | 
189 | -- | Encode a 'Word32' in native host order and host endianness.
190 | {-# INLINE word32Host #-}
191 | word32Host :: Word32 -> Builder
192 | word32Host = P.primFixed P.word32Host
193 | 
194 | -- | Encode a 'Word64' in native host order and host endianness.
195 | {-# INLINE word64Host #-}
196 | word64Host :: Word64 -> Builder
197 | word64Host = P.primFixed P.word64Host
198 | 
199 | -- | Encode a 'Float' in native host order. Values encoded this way are not
200 | -- portable to different endian machines, without conversion.
201 | {-# INLINE floatHost #-}
202 | floatHost :: Float -> Builder
203 | floatHost = P.primFixed P.floatHost
204 | 
205 | -- | Encode a 'Double' in native host order.
206 | {-# INLINE doubleHost #-}
207 | doubleHost :: Double -> Builder
208 | doubleHost = P.primFixed P.doubleHost
209 | 
210 | 


--------------------------------------------------------------------------------
/Data/ByteString/Builder/Prim/ASCII.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TypeFamilies #-}
  2 | 
  3 | -- | Copyright   : (c) 2010 Jasper Van der Jeugt
  4 | --                 (c) 2010 - 2011 Simon Meier
  5 | -- License       : BSD3-style (see LICENSE)
  6 | --
  7 | -- Maintainer    : Simon Meier <iridcode@gmail.com>
  8 | -- Portability   : GHC
  9 | --
 10 | -- Encodings using ASCII encoded Unicode characters.
 11 | --
 12 | module Data.ByteString.Builder.Prim.ASCII
 13 |     (
 14 | 
 15 |      -- *** ASCII
 16 |      char7
 17 | 
 18 |       -- **** Decimal numbers
 19 |       -- | Decimal encoding of numbers using ASCII encoded characters.
 20 |     , int8Dec
 21 |     , int16Dec
 22 |     , int32Dec
 23 |     , int64Dec
 24 |     , intDec
 25 | 
 26 |     , word8Dec
 27 |     , word16Dec
 28 |     , word32Dec
 29 |     , word64Dec
 30 |     , wordDec
 31 | 
 32 |     {-
 33 |     -- These are the functions currently provided by Bryan O'Sullivans
 34 |     -- double-conversion library.
 35 |     --
 36 |     -- , float
 37 |     -- , floatWith
 38 |     -- , double
 39 |     -- , doubleWith
 40 |     -}
 41 | 
 42 |       -- **** Hexadecimal numbers
 43 | 
 44 |       -- | Encoding positive integers as hexadecimal numbers using lower-case
 45 |       -- ASCII characters. The shortest possible representation is used. For
 46 |       -- example,
 47 |       --
 48 |       -- > toLazyByteString (primBounded word16Hex 0x0a10) = "a10"
 49 |       --
 50 |       -- Note that there is no support for using upper-case characters. Please
 51 |       -- contact the maintainer if your application cannot work without
 52 |       -- hexadecimal encodings that use upper-case characters.
 53 |       --
 54 |     , word8Hex
 55 |     , word16Hex
 56 |     , word32Hex
 57 |     , word64Hex
 58 |     , wordHex
 59 | 
 60 |       -- **** Fixed-width hexadecimal numbers
 61 |       --
 62 |       -- | Encoding the bytes of fixed-width types as hexadecimal
 63 |       -- numbers using lower-case ASCII characters. For example,
 64 |       --
 65 |       -- > toLazyByteString (primFixed word16HexFixed 0x0a10) = "0a10"
 66 |       --
 67 |     , int8HexFixed
 68 |     , int16HexFixed
 69 |     , int32HexFixed
 70 |     , int64HexFixed
 71 |     , word8HexFixed
 72 |     , word16HexFixed
 73 |     , word32HexFixed
 74 |     , word64HexFixed
 75 |     , floatHexFixed
 76 |     , doubleHexFixed
 77 | 
 78 |     ) where
 79 | 
 80 | import Data.ByteString.Internal.Type
 81 | import Data.ByteString.Builder.Prim.Binary
 82 | import Data.ByteString.Builder.Prim.Internal
 83 | import Data.ByteString.Builder.Prim.Internal.Floating
 84 | import Data.ByteString.Builder.Prim.Internal.Base16
 85 | import Data.ByteString.Utils.UnalignedAccess
 86 | 
 87 | import Data.Char (ord)
 88 | 
 89 | import Foreign
 90 | 
 91 | -- | Encode the least 7-bits of a 'Char' using the ASCII encoding.
 92 | {-# INLINE char7 #-}
 93 | char7 :: FixedPrim Char
 94 | char7 = (\c -> fromIntegral $ ord c .&. 0x7f) >$< word8
 95 | 
 96 | 
 97 | ------------------------------------------------------------------------------
 98 | -- Decimal Encoding
 99 | ------------------------------------------------------------------------------
100 | 
101 | -- Signed integers
102 | ------------------
103 | 
104 | type family CorrespondingUnsigned s where
105 |   CorrespondingUnsigned Int8  = Word8
106 |   CorrespondingUnsigned Int16 = Word16
107 |   CorrespondingUnsigned Int32 = Word32
108 |   CorrespondingUnsigned Int   = Word
109 |   CorrespondingUnsigned Int64 = Word64
110 | 
111 | {-# INLINE encodeSignedViaUnsigned #-}
112 | encodeSignedViaUnsigned ::
113 |   forall s.
114 |   (Integral s, Num (CorrespondingUnsigned s)) =>
115 |   Int -> (BoundedPrim (CorrespondingUnsigned s)) -> BoundedPrim s
116 | encodeSignedViaUnsigned bound writeUnsigned = boundedPrim bound $ \sval ptr ->
117 |   if sval < 0 then do
118 |     poke ptr (c2w '-')
119 |     runB writeUnsigned (makeUnsigned (negate sval)) (ptr `plusPtr` 1)
120 |     -- This call to 'negate' may overflow if `sval == minBound`.
121 |     -- But since we insist that the unsigned type has the same width,
122 |     -- this causes no trouble.
123 |   else do
124 |     runB writeUnsigned (makeUnsigned sval) ptr
125 |   where
126 |     makeUnsigned = fromIntegral @s @(CorrespondingUnsigned s)
127 | 
128 | -- | Decimal encoding of an 'Int8'.
129 | {-# INLINE int8Dec #-}
130 | int8Dec :: BoundedPrim Int8
131 | int8Dec = encodeSignedViaUnsigned 4 word8Dec
132 | 
133 | -- | Decimal encoding of an 'Int16'.
134 | {-# INLINE int16Dec #-}
135 | int16Dec :: BoundedPrim Int16
136 | int16Dec = encodeSignedViaUnsigned 6 word16Dec
137 | 
138 | 
139 | -- | Decimal encoding of an 'Int32'.
140 | {-# INLINE int32Dec #-}
141 | int32Dec :: BoundedPrim Int32
142 | int32Dec = encodeSignedViaUnsigned 11 word32Dec
143 | 
144 | -- | Decimal encoding of an 'Int64'.
145 | {-# INLINE int64Dec #-}
146 | int64Dec :: BoundedPrim Int64
147 | int64Dec = encodeSignedViaUnsigned 20 word64Dec
148 | 
149 | -- | Decimal encoding of an 'Int'.
150 | {-# INLINE intDec #-}
151 | intDec :: BoundedPrim Int
152 | intDec = caseWordSize_32_64
153 |     (fromIntegral >$< int32Dec)
154 |     (fromIntegral >$< int64Dec)
155 | 
156 | 
157 | -- Unsigned integers
158 | --------------------
159 | 
160 | {-# INLINE encodeWord32Decimal #-}
161 | encodeWord32Decimal :: Integral a => Int -> BoundedPrim a
162 | encodeWord32Decimal bound = boundedPrim bound $ c_uint32_dec . fromIntegral
163 | 
164 | -- | Decimal encoding of a 'Word8'.
165 | {-# INLINE word8Dec #-}
166 | word8Dec :: BoundedPrim Word8
167 | word8Dec = encodeWord32Decimal 3
168 | 
169 | -- | Decimal encoding of a 'Word16'.
170 | {-# INLINE word16Dec #-}
171 | word16Dec :: BoundedPrim Word16
172 | word16Dec = encodeWord32Decimal 5
173 | 
174 | -- | Decimal encoding of a 'Word32'.
175 | {-# INLINE word32Dec #-}
176 | word32Dec :: BoundedPrim Word32
177 | word32Dec = encodeWord32Decimal 10
178 | 
179 | -- | Decimal encoding of a 'Word64'.
180 | {-# INLINE word64Dec #-}
181 | word64Dec :: BoundedPrim Word64
182 | word64Dec = boundedPrim 20 c_uint64_dec
183 | 
184 | -- | Decimal encoding of a 'Word'.
185 | {-# INLINE wordDec #-}
186 | wordDec :: BoundedPrim Word
187 | wordDec = caseWordSize_32_64
188 |     (fromIntegral >$< word32Dec)
189 |     (fromIntegral >$< word64Dec)
190 | 
191 | ------------------------------------------------------------------------------
192 | -- Hexadecimal Encoding
193 | ------------------------------------------------------------------------------
194 | 
195 | -- without lead
196 | ---------------
197 | 
198 | {-# INLINE encodeWord32Hex #-}
199 | encodeWord32Hex :: forall a. (Storable a, Integral a) => BoundedPrim a
200 | encodeWord32Hex =
201 |     boundedPrim (2 * sizeOf @a undefined) $ c_uint32_hex . fromIntegral
202 | 
203 | -- | Hexadecimal encoding of a 'Word8'.
204 | {-# INLINE word8Hex #-}
205 | word8Hex :: BoundedPrim Word8
206 | word8Hex = encodeWord32Hex
207 | 
208 | -- | Hexadecimal encoding of a 'Word16'.
209 | {-# INLINE word16Hex #-}
210 | word16Hex :: BoundedPrim Word16
211 | word16Hex = encodeWord32Hex
212 | 
213 | -- | Hexadecimal encoding of a 'Word32'.
214 | {-# INLINE word32Hex #-}
215 | word32Hex :: BoundedPrim Word32
216 | word32Hex = encodeWord32Hex
217 | 
218 | -- | Hexadecimal encoding of a 'Word64'.
219 | {-# INLINE word64Hex #-}
220 | word64Hex :: BoundedPrim Word64
221 | word64Hex = boundedPrim 16 c_uint64_hex
222 | 
223 | -- | Hexadecimal encoding of a 'Word'.
224 | {-# INLINE wordHex #-}
225 | wordHex :: BoundedPrim Word
226 | wordHex = caseWordSize_32_64
227 |     (fromIntegral >$< word32Hex)
228 |     (fromIntegral >$< word64Hex)
229 | 
230 | 
231 | -- fixed width; leading zeroes
232 | ------------------------------
233 | 
234 | -- | Encode a 'Word8' using 2 nibbles (hexadecimal digits).
235 | {-# INLINE word8HexFixed #-}
236 | word8HexFixed :: FixedPrim Word8
237 | word8HexFixed = fixedPrim 2 $ \x op -> do
238 |   enc <- encode8_as_16h lowerTable x
239 |   unalignedWriteU16 enc op
240 | 
241 | -- | Encode a 'Word16' using 4 nibbles.
242 | {-# INLINE word16HexFixed #-}
243 | word16HexFixed :: FixedPrim Word16
244 | word16HexFixed =
245 |     (\x -> (fromIntegral $ x `shiftR` 8, fromIntegral x))
246 |       >$< pairF word8HexFixed word8HexFixed
247 | 
248 | -- | Encode a 'Word32' using 8 nibbles.
249 | {-# INLINE word32HexFixed #-}
250 | word32HexFixed :: FixedPrim Word32
251 | word32HexFixed =
252 |     (\x -> (fromIntegral $ x `shiftR` 16, fromIntegral x))
253 |       >$< pairF word16HexFixed word16HexFixed
254 | 
255 | -- | Encode a 'Word64' using 16 nibbles.
256 | {-# INLINE word64HexFixed #-}
257 | word64HexFixed :: FixedPrim Word64
258 | word64HexFixed =
259 |     (\x -> (fromIntegral $ x `shiftR` 32, fromIntegral x))
260 |       >$< pairF word32HexFixed word32HexFixed
261 | 
262 | -- | Encode a 'Int8' using 2 nibbles (hexadecimal digits).
263 | {-# INLINE int8HexFixed #-}
264 | int8HexFixed :: FixedPrim Int8
265 | int8HexFixed = fromIntegral >$< word8HexFixed
266 | 
267 | -- | Encode a 'Int16' using 4 nibbles.
268 | {-# INLINE int16HexFixed #-}
269 | int16HexFixed :: FixedPrim Int16
270 | int16HexFixed = fromIntegral >$< word16HexFixed
271 | 
272 | -- | Encode a 'Int32' using 8 nibbles.
273 | {-# INLINE int32HexFixed #-}
274 | int32HexFixed :: FixedPrim Int32
275 | int32HexFixed = fromIntegral >$< word32HexFixed
276 | 
277 | -- | Encode a 'Int64' using 16 nibbles.
278 | {-# INLINE int64HexFixed #-}
279 | int64HexFixed :: FixedPrim Int64
280 | int64HexFixed = fromIntegral >$< word64HexFixed
281 | 
282 | -- | Encode an IEEE 'Float' using 8 nibbles.
283 | {-# INLINE floatHexFixed #-}
284 | floatHexFixed :: FixedPrim Float
285 | floatHexFixed = encodeFloatViaWord32F word32HexFixed
286 | 
287 | -- | Encode an IEEE 'Double' using 16 nibbles.
288 | {-# INLINE doubleHexFixed #-}
289 | doubleHexFixed :: FixedPrim Double
290 | doubleHexFixed = encodeDoubleViaWord64F word64HexFixed
291 | 


--------------------------------------------------------------------------------
/Data/ByteString/Builder/Prim/Binary.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE Trustworthy #-}
  2 | 
  3 | -- | Copyright   : (c) 2010-2011 Simon Meier
  4 | -- License       : BSD3-style (see LICENSE)
  5 | --
  6 | -- Maintainer    : Simon Meier <iridcode@gmail.com>
  7 | -- Portability   : GHC
  8 | --
  9 | module Data.ByteString.Builder.Prim.Binary (
 10 | 
 11 |   -- ** Binary encodings
 12 |     int8
 13 |   , word8
 14 | 
 15 |   -- *** Big-endian
 16 |   , int16BE
 17 |   , int32BE
 18 |   , int64BE
 19 | 
 20 |   , word16BE
 21 |   , word32BE
 22 |   , word64BE
 23 | 
 24 |   , floatBE
 25 |   , doubleBE
 26 | 
 27 |   -- *** Little-endian
 28 |   , int16LE
 29 |   , int32LE
 30 |   , int64LE
 31 | 
 32 |   , word16LE
 33 |   , word32LE
 34 |   , word64LE
 35 | 
 36 |   , floatLE
 37 |   , doubleLE
 38 | 
 39 |   -- *** Non-portable, host-dependent
 40 |   , intHost
 41 |   , int16Host
 42 |   , int32Host
 43 |   , int64Host
 44 | 
 45 |   , wordHost
 46 |   , word16Host
 47 |   , word32Host
 48 |   , word64Host
 49 | 
 50 |   , floatHost
 51 |   , doubleHost
 52 | 
 53 |   ) where
 54 | 
 55 | import Data.ByteString.Builder.Prim.Internal
 56 | import Data.ByteString.Builder.Prim.Internal.Floating
 57 | import Data.ByteString.Utils.ByteOrder
 58 | import Data.ByteString.Utils.UnalignedAccess
 59 | 
 60 | import Foreign
 61 | 
 62 | ------------------------------------------------------------------------------
 63 | -- Binary encoding
 64 | ------------------------------------------------------------------------------
 65 | 
 66 | -- Word encodings
 67 | -----------------
 68 | 
 69 | -- | Encoding single unsigned bytes as-is.
 70 | --
 71 | {-# INLINE word8 #-}
 72 | word8 :: FixedPrim Word8
 73 | word8 = fixedPrim 1 (flip poke) -- Word8 is always aligned
 74 | 
 75 | --
 76 | -- We rely on the fromIntegral to do the right masking for us.
 77 | -- The inlining here is critical, and can be worth 4x performance
 78 | --
 79 | 
 80 | -- | Encoding 'Word16's in big endian format.
 81 | {-# INLINE word16BE #-}
 82 | word16BE :: FixedPrim Word16
 83 | word16BE = whenLittleEndian byteSwap16 >$< word16Host
 84 | 
 85 | -- | Encoding 'Word16's in little endian format.
 86 | {-# INLINE word16LE #-}
 87 | word16LE :: FixedPrim Word16
 88 | word16LE = whenBigEndian byteSwap16 >$< word16Host
 89 | 
 90 | -- | Encoding 'Word32's in big endian format.
 91 | {-# INLINE word32BE #-}
 92 | word32BE :: FixedPrim Word32
 93 | word32BE = whenLittleEndian byteSwap32 >$< word32Host
 94 | 
 95 | -- | Encoding 'Word32's in little endian format.
 96 | {-# INLINE word32LE #-}
 97 | word32LE :: FixedPrim Word32
 98 | word32LE = whenBigEndian byteSwap32 >$< word32Host
 99 | 
100 | -- on a little endian machine:
101 | -- word32LE w32 = fixedPrim 4 (\w p -> poke (castPtr p) w32)
102 | 
103 | -- | Encoding 'Word64's in big endian format.
104 | {-# INLINE word64BE #-}
105 | word64BE :: FixedPrim Word64
106 | word64BE = whenLittleEndian byteSwap64 >$< word64Host
107 | 
108 | -- | Encoding 'Word64's in little endian format.
109 | {-# INLINE word64LE #-}
110 | word64LE :: FixedPrim Word64
111 | word64LE = whenBigEndian byteSwap64 >$< word64Host
112 | 
113 | 
114 | -- | Encode a single native machine 'Word'. The 'Word's is encoded in host order,
115 | -- host endian form, for the machine you are on. On a 64 bit machine the 'Word'
116 | -- is an 8 byte value, on a 32 bit machine, 4 bytes. Values encoded this way
117 | -- are not portable to different endian or word sized machines, without
118 | -- conversion.
119 | --
120 | {-# INLINE wordHost #-}
121 | wordHost :: FixedPrim Word
122 | wordHost = case finiteBitSize (0 :: Word) of
123 |   32 -> fromIntegral @Word @Word32 >$< word32Host
124 |   64 -> fromIntegral @Word @Word64 >$< word64Host
125 |   _ -> error "Data.ByteString.Builder.Prim.Binary.wordHost: unexpected word size"
126 | 
127 | -- | Encoding 'Word16's in native host order and host endianness.
128 | {-# INLINE word16Host #-}
129 | word16Host :: FixedPrim Word16
130 | word16Host = fixedPrim 2 unalignedWriteU16
131 | 
132 | -- | Encoding 'Word32's in native host order and host endianness.
133 | {-# INLINE word32Host #-}
134 | word32Host :: FixedPrim Word32
135 | word32Host = fixedPrim 4 unalignedWriteU32
136 | 
137 | -- | Encoding 'Word64's in native host order and host endianness.
138 | {-# INLINE word64Host #-}
139 | word64Host :: FixedPrim Word64
140 | word64Host = fixedPrim 8 unalignedWriteU64
141 | 
142 | ------------------------------------------------------------------------------
143 | -- Int encodings
144 | ------------------------------------------------------------------------------
145 | --
146 | -- We rely on 'fromIntegral' to do a loss-less conversion to the corresponding
147 | -- 'Word' type
148 | --
149 | ------------------------------------------------------------------------------
150 | 
151 | -- | Encoding single signed bytes as-is.
152 | --
153 | {-# INLINE int8 #-}
154 | int8 :: FixedPrim Int8
155 | int8 = fromIntegral >$< word8
156 | 
157 | -- | Encoding 'Int16's in big endian format.
158 | {-# INLINE int16BE #-}
159 | int16BE :: FixedPrim Int16
160 | int16BE = fromIntegral >$< word16BE
161 | 
162 | -- | Encoding 'Int16's in little endian format.
163 | {-# INLINE int16LE #-}
164 | int16LE :: FixedPrim Int16
165 | int16LE = fromIntegral >$< word16LE
166 | 
167 | -- | Encoding 'Int32's in big endian format.
168 | {-# INLINE int32BE #-}
169 | int32BE :: FixedPrim Int32
170 | int32BE = fromIntegral >$< word32BE
171 | 
172 | -- | Encoding 'Int32's in little endian format.
173 | {-# INLINE int32LE #-}
174 | int32LE :: FixedPrim Int32
175 | int32LE = fromIntegral >$< word32LE
176 | 
177 | -- | Encoding 'Int64's in big endian format.
178 | {-# INLINE int64BE #-}
179 | int64BE :: FixedPrim Int64
180 | int64BE = fromIntegral >$< word64BE
181 | 
182 | -- | Encoding 'Int64's in little endian format.
183 | {-# INLINE int64LE #-}
184 | int64LE :: FixedPrim Int64
185 | int64LE = fromIntegral >$< word64LE
186 | 
187 | 
188 | -- | Encode a single native machine 'Int'. The 'Int's is encoded in host order,
189 | -- host endian form, for the machine you are on. On a 64 bit machine the 'Int'
190 | -- is an 8 byte value, on a 32 bit machine, 4 bytes. Values encoded this way
191 | -- are not portable to different endian or integer sized machines, without
192 | -- conversion.
193 | --
194 | {-# INLINE intHost #-}
195 | intHost :: FixedPrim Int
196 | intHost = fromIntegral @Int @Word >$< wordHost
197 | 
198 | -- | Encoding 'Int16's in native host order and host endianness.
199 | {-# INLINE int16Host #-}
200 | int16Host :: FixedPrim Int16
201 | int16Host = fromIntegral @Int16 @Word16 >$< word16Host
202 | 
203 | -- | Encoding 'Int32's in native host order and host endianness.
204 | {-# INLINE int32Host #-}
205 | int32Host :: FixedPrim Int32
206 | int32Host = fromIntegral @Int32 @Word32 >$< word32Host
207 | 
208 | -- | Encoding 'Int64's in native host order and host endianness.
209 | {-# INLINE int64Host #-}
210 | int64Host :: FixedPrim Int64
211 | int64Host = fromIntegral @Int64 @Word64 >$< word64Host
212 | 
213 | -- IEEE Floating Point Numbers
214 | ------------------------------
215 | 
216 | -- | Encode a 'Float' in big endian format.
217 | {-# INLINE floatBE #-}
218 | floatBE :: FixedPrim Float
219 | floatBE = encodeFloatViaWord32F word32BE
220 | 
221 | -- | Encode a 'Float' in little endian format.
222 | {-# INLINE floatLE #-}
223 | floatLE :: FixedPrim Float
224 | floatLE = encodeFloatViaWord32F word32LE
225 | 
226 | -- | Encode a 'Double' in big endian format.
227 | {-# INLINE doubleBE #-}
228 | doubleBE :: FixedPrim Double
229 | doubleBE = encodeDoubleViaWord64F word64BE
230 | 
231 | -- | Encode a 'Double' in little endian format.
232 | {-# INLINE doubleLE #-}
233 | doubleLE :: FixedPrim Double
234 | doubleLE = encodeDoubleViaWord64F word64LE
235 | 
236 | 
237 | -- | Encode a 'Float' in native host order and host endianness. Values written
238 | -- this way are not portable to different endian machines, without conversion.
239 | --
240 | {-# INLINE floatHost #-}
241 | floatHost :: FixedPrim Float
242 | floatHost = fixedPrim (sizeOf @Float 0) unalignedWriteFloat
243 | 
244 | -- | Encode a 'Double' in native host order and host endianness.
245 | {-# INLINE doubleHost #-}
246 | doubleHost :: FixedPrim Double
247 | doubleHost = fixedPrim (sizeOf @Double 0) unalignedWriteDouble
248 | 


--------------------------------------------------------------------------------
/Data/ByteString/Builder/Prim/Internal.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | {-# LANGUAGE Unsafe #-}
  3 | 
  4 | {-# OPTIONS_HADDOCK not-home #-}
  5 | 
  6 | -- |
  7 | -- Copyright   : 2010-2011 Simon Meier, 2010 Jasper van der Jeugt
  8 | -- License     : BSD3-style (see LICENSE)
  9 | --
 10 | -- Maintainer  : Simon Meier <iridcode@gmail.com>
 11 | -- Stability   : unstable, private
 12 | -- Portability : GHC
 13 | --
 14 | -- *Warning:* this module is internal. If you find that you need it please
 15 | -- contact the maintainers and explain what you are trying to do and discuss
 16 | -- what you would need in the public API. It is important that you do this as
 17 | -- the module may not be exposed at all in future releases.
 18 | --
 19 | -- The maintainers are glad to accept patches for further
 20 | -- standard encodings of standard Haskell values.
 21 | --
 22 | -- If you need to write your own builder primitives, then be aware that you are
 23 | -- writing code with /all safety belts off/; i.e.,
 24 | -- *this is the code that might make your application vulnerable to buffer-overflow attacks!*
 25 | -- The "Data.ByteString.Builder.Prim.Tests" module provides you with
 26 | -- utilities for testing your encodings thoroughly.
 27 | --
 28 | module Data.ByteString.Builder.Prim.Internal (
 29 |   -- * Fixed-size builder primitives
 30 |     Size
 31 |   , FixedPrim
 32 |   , fixedPrim
 33 |   , size
 34 |   , runF
 35 | 
 36 |   , emptyF
 37 |   , contramapF
 38 |   , pairF
 39 |   -- , liftIOF
 40 | 
 41 |   , storableToF
 42 | 
 43 |   -- * Bounded-size builder primitives
 44 |   , BoundedPrim
 45 |   , boundedPrim
 46 |   , sizeBound
 47 |   , runB
 48 | 
 49 |   , emptyB
 50 |   , contramapB
 51 |   , pairB
 52 |   , eitherB
 53 |   , condB
 54 | 
 55 |   -- , liftIOB
 56 | 
 57 |   , toB
 58 |   , liftFixedToBounded
 59 | 
 60 |   -- , withSizeFB
 61 |   -- , withSizeBB
 62 | 
 63 |   -- * Shared operators
 64 |   , (>$<)
 65 |   , (>*<)
 66 | 
 67 |   -- * Helpers
 68 |   , caseWordSize_32_64
 69 | 
 70 |   -- * Deprecated
 71 |   , boudedPrim
 72 |   ) where
 73 | 
 74 | import Foreign
 75 | import Prelude hiding (maxBound)
 76 | 
 77 | #include "MachDeps.h"
 78 | #include "bytestring-cpp-macros.h"
 79 | 
 80 | ------------------------------------------------------------------------------
 81 | -- Supporting infrastructure
 82 | ------------------------------------------------------------------------------
 83 | 
 84 | -- | Contravariant functors as in the @contravariant@ package.
 85 | class Contravariant f where
 86 |     contramap :: (b -> a) -> f a -> f b
 87 | 
 88 | infixl 4 >$<
 89 | 
 90 | -- | A fmap-like operator for builder primitives, both bounded and fixed size.
 91 | --
 92 | -- Builder primitives are contravariant so it's like the normal fmap, but
 93 | -- backwards (look at the type). (If it helps to remember, the operator symbol
 94 | -- is like (<$>) but backwards.)
 95 | --
 96 | -- We can use it for example to prepend and/or append fixed values to an
 97 | -- primitive.
 98 | --
 99 | -- > import Data.ByteString.Builder.Prim as P
100 | -- >showEncoding ((\x -> ('\'', (x, '\''))) >$< fixed3) 'x' = "'x'"
101 | -- >  where
102 | -- >    fixed3 = P.char7 >*< P.char7 >*< P.char7
103 | --
104 | -- Note that the rather verbose syntax for composition stems from the
105 | -- requirement to be able to compute the size / size bound at compile time.
106 | --
107 | (>$<) :: Contravariant f => (b -> a) -> f a -> f b
108 | (>$<) = contramap
109 | 
110 | 
111 | instance Contravariant FixedPrim where
112 |     contramap = contramapF
113 | 
114 | instance Contravariant BoundedPrim where
115 |     contramap = contramapB
116 | 
117 | 
118 | -- | Type-constructors supporting lifting of type-products.
119 | class Monoidal f where
120 |     pair :: f a -> f b -> f (a, b)
121 | 
122 | instance Monoidal FixedPrim where
123 |     pair = pairF
124 | 
125 | instance Monoidal BoundedPrim where
126 |     pair = pairB
127 | 
128 | infixr 5 >*<
129 | 
130 | -- | A pairing/concatenation operator for builder primitives, both bounded and
131 | -- fixed size.
132 | --
133 | -- For example,
134 | --
135 | -- > toLazyByteString (primFixed (char7 >*< char7) ('x','y')) = "xy"
136 | --
137 | -- We can combine multiple primitives using '>*<' multiple times.
138 | --
139 | -- > toLazyByteString (primFixed (char7 >*< char7 >*< char7) ('x',('y','z'))) = "xyz"
140 | --
141 | (>*<) :: Monoidal f => f a -> f b -> f (a, b)
142 | (>*<) = pair
143 | 
144 | 
145 | -- | The type used for sizes and sizeBounds of sizes.
146 | type Size = Int
147 | 
148 | 
149 | ------------------------------------------------------------------------------
150 | -- Fixed-size builder primitives
151 | ------------------------------------------------------------------------------
152 | 
153 | -- | A builder primitive that always results in a sequence of bytes of a
154 | -- pre-determined, fixed size.
155 | data FixedPrim a = FP {-# UNPACK #-} !Int (a -> Ptr Word8 -> IO ())
156 | 
157 | fixedPrim :: Int -> (a -> Ptr Word8 -> IO ()) -> FixedPrim a
158 | fixedPrim = FP
159 | 
160 | -- | The size of the sequences of bytes generated by this 'FixedPrim'.
161 | {-# INLINE CONLIKE size #-}
162 | size :: FixedPrim a -> Int
163 | size (FP l _) = l
164 | 
165 | {-# INLINE CONLIKE runF #-}
166 | runF :: FixedPrim a -> a -> Ptr Word8 -> IO ()
167 | runF (FP _ io) = io
168 | 
169 | -- | The 'FixedPrim' that always results in the zero-length sequence.
170 | {-# INLINE CONLIKE emptyF #-}
171 | emptyF :: FixedPrim a
172 | emptyF = FP 0 (\_ _ -> return ())
173 | 
174 | -- | Encode a pair by encoding its first component and then its second component.
175 | {-# INLINE CONLIKE pairF #-}
176 | pairF :: FixedPrim a -> FixedPrim b -> FixedPrim (a, b)
177 | pairF (FP l1 io1) (FP l2 io2) =
178 |     FP (l1 + l2) (\(x1,x2) op -> io1 x1 op >> io2 x2 (op `plusPtr` l1))
179 | 
180 | -- | Change a primitives such that it first applies a function to the value
181 | -- to be encoded.
182 | --
183 | -- Note that primitives are 'Contravariant'
184 | -- <http://hackage.haskell.org/package/contravariant>. Hence, the following
185 | -- laws hold.
186 | --
187 | -- >contramapF id = id
188 | -- >contramapF f . contramapF g = contramapF (g . f)
189 | {-# INLINE CONLIKE contramapF #-}
190 | contramapF :: (b -> a) -> FixedPrim a -> FixedPrim b
191 | contramapF f (FP l io) = FP l (io . f)
192 | 
193 | -- | Convert a 'FixedPrim' to a 'BoundedPrim'.
194 | {-# INLINE CONLIKE toB #-}
195 | toB :: FixedPrim a -> BoundedPrim a
196 | toB (FP l io) = BP l (\x op -> io x op >> (return $! op `plusPtr` l))
197 | 
198 | -- | Lift a 'FixedPrim' to a 'BoundedPrim'.
199 | {-# INLINE CONLIKE liftFixedToBounded #-}
200 | liftFixedToBounded :: FixedPrim a -> BoundedPrim a
201 | liftFixedToBounded = toB
202 | 
203 | {-# INLINE CONLIKE storableToF #-}
204 | {-# DEPRECATED storableToF
205 |      "Deprecated since @bytestring-0.12.1.0@.\n\nThis function is dangerous in the presence of internal padding\nand makes naive assumptions about alignment.\n\n * For a primitive Haskell type like 'Int64', use the\n   corresponding primitive like 'Data.ByteString.Builder.Prim.int64Host'.\n * For other types, it is recommended to manually write a small\n   function that performs the necessary unaligned write\n   and zeroes or removes any internal padding bits."
206 |   #-}
207 | storableToF :: forall a. Storable a => FixedPrim a
208 | #if HS_UNALIGNED_POKES_OK
209 | storableToF = FP (sizeOf (undefined :: a)) (\x op -> poke (castPtr op) x)
210 | #else
211 | storableToF = FP (sizeOf (undefined :: a)) $ \x op ->
212 |     if ptrToWordPtr op `mod` fromIntegral (alignment (undefined :: a)) == 0 then poke (castPtr op) x
213 |     else with x $ \tp -> copyBytes op (castPtr tp) (sizeOf (undefined :: a))
214 | #endif
215 | 
216 | {-
217 | {-# INLINE CONLIKE liftIOF #-}
218 | liftIOF :: FixedPrim a -> FixedPrim (IO a)
219 | liftIOF (FP l io) = FP l (\xWrapped op -> do x <- xWrapped; io x op)
220 | -}
221 | 
222 | ------------------------------------------------------------------------------
223 | -- Bounded-size builder primitives
224 | ------------------------------------------------------------------------------
225 | 
226 | -- | A builder primitive that always results in sequence of bytes that is no longer
227 | -- than a pre-determined bound.
228 | data BoundedPrim a = BP {-# UNPACK #-} !Int (a -> Ptr Word8 -> IO (Ptr Word8))
229 | 
230 | -- | The bound on the size of sequences of bytes generated by this 'BoundedPrim'.
231 | {-# INLINE CONLIKE sizeBound #-}
232 | sizeBound :: BoundedPrim a -> Int
233 | sizeBound (BP b _) = b
234 | 
235 | -- | @since 0.10.12.0
236 | boundedPrim :: Int -> (a -> Ptr Word8 -> IO (Ptr Word8)) -> BoundedPrim a
237 | boundedPrim = BP
238 | 
239 | {-# DEPRECATED boudedPrim "Use 'boundedPrim' instead" #-}
240 | boudedPrim :: Int -> (a -> Ptr Word8 -> IO (Ptr Word8)) -> BoundedPrim a
241 | boudedPrim = BP
242 | 
243 | {-# INLINE CONLIKE runB #-}
244 | runB :: BoundedPrim a -> a -> Ptr Word8 -> IO (Ptr Word8)
245 | runB (BP _ io) = io
246 | 
247 | -- | Change a 'BoundedPrim' such that it first applies a function to the
248 | -- value to be encoded.
249 | --
250 | -- Note that 'BoundedPrim's are 'Contravariant'
251 | -- <http://hackage.haskell.org/package/contravariant>. Hence, the following
252 | -- laws hold.
253 | --
254 | -- >contramapB id = id
255 | -- >contramapB f . contramapB g = contramapB (g . f)
256 | {-# INLINE CONLIKE contramapB #-}
257 | contramapB :: (b -> a) -> BoundedPrim a -> BoundedPrim b
258 | contramapB f (BP b io) = BP b (io . f)
259 | 
260 | -- | The 'BoundedPrim' that always results in the zero-length sequence.
261 | {-# INLINE CONLIKE emptyB #-}
262 | emptyB :: BoundedPrim a
263 | emptyB = BP 0 (\_ op -> return op)
264 | 
265 | -- | Encode a pair by encoding its first component and then its second component.
266 | {-# INLINE CONLIKE pairB #-}
267 | pairB :: BoundedPrim a -> BoundedPrim b -> BoundedPrim (a, b)
268 | pairB (BP b1 io1) (BP b2 io2) =
269 |     BP (b1 + b2) (\(x1,x2) op -> io1 x1 op >>= io2 x2)
270 | 
271 | -- | Encode an 'Either' value using the first 'BoundedPrim' for 'Left'
272 | -- values and the second 'BoundedPrim' for 'Right' values.
273 | --
274 | -- Note that the functions 'eitherB', 'pairB', and 'contramapB' (written below
275 | -- using '>$<') suffice to construct 'BoundedPrim's for all non-recursive
276 | -- algebraic datatypes. For example,
277 | --
278 | -- @
279 | --maybeB :: BoundedPrim () -> BoundedPrim a -> BoundedPrim (Maybe a)
280 | --maybeB nothing just = 'maybe' (Left ()) Right '>$<' eitherB nothing just
281 | -- @
282 | {-# INLINE CONLIKE eitherB #-}
283 | eitherB :: BoundedPrim a -> BoundedPrim b -> BoundedPrim (Either a b)
284 | eitherB (BP b1 io1) (BP b2 io2) =
285 |     BP (max b1 b2)
286 |         (\x op -> case x of Left x1 -> io1 x1 op; Right x2 -> io2 x2 op)
287 | 
288 | -- | Conditionally select a 'BoundedPrim'.
289 | -- For example, we can implement the ASCII primitive that drops characters with
290 | -- Unicode codepoints above 127 as follows.
291 | --
292 | -- @
293 | --charASCIIDrop = 'condB' (< \'\\128\') ('liftFixedToBounded' 'Data.ByteString.Builder.Prim.char7') 'emptyB'
294 | -- @
295 | {-# INLINE CONLIKE condB #-}
296 | condB :: (a -> Bool) -> BoundedPrim a -> BoundedPrim a -> BoundedPrim a
297 | condB p be1 be2 =
298 |     contramapB (\x -> if p x then Left x else Right x) (eitherB be1 be2)
299 | 
300 | -- | Select an implementation depending on bitness.
301 | -- Throw a compile time error if bitness is neither 32 nor 64.
302 | {-# INLINE caseWordSize_32_64 #-}
303 | caseWordSize_32_64
304 |   :: a -- Value for 32-bit architecture
305 |   -> a -- Value for 64-bit architecture
306 |   -> a
307 | #if WORD_SIZE_IN_BITS == 32
308 | caseWordSize_32_64 = const
309 | #endif
310 | #if WORD_SIZE_IN_BITS == 64
311 | caseWordSize_32_64 = const id
312 | #endif
313 | 


--------------------------------------------------------------------------------
/Data/ByteString/Builder/Prim/Internal/Base16.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP #-}
 2 | 
 3 | -- |
 4 | -- Copyright   : (c) 2011 Simon Meier
 5 | -- License     : BSD3-style (see LICENSE)
 6 | --
 7 | -- Maintainer  : Simon Meier <iridcode@gmail.com>
 8 | -- Stability   : experimental
 9 | -- Portability : GHC
10 | --
11 | -- Hexadecimal encoding of nibbles (4-bit) and octets (8-bit) as ASCII
12 | -- characters.
13 | --
14 | -- The current implementation is based on a table based encoding inspired by
15 | -- the code in the 'base64-bytestring' library by Bryan O'Sullivan. In our
16 | -- benchmarks on a 32-bit machine it turned out to be the fastest
17 | -- implementation option.
18 | --
19 | module Data.ByteString.Builder.Prim.Internal.Base16 (
20 |     EncodingTable
21 |   , lowerTable
22 |   , encode8_as_16h
23 |   ) where
24 | 
25 | import Foreign
26 | import GHC.Exts (Addr#, Ptr(..))
27 | #if PURE_HASKELL
28 | import qualified Data.ByteString.Internal.Pure as Pure
29 | #else
30 | import Foreign.C.Types
31 | #endif
32 | 
33 | -- Creating the encoding table
34 | ------------------------------
35 | 
36 | -- | An encoding table for Base16 encoding.
37 | data EncodingTable = EncodingTable Addr#
38 | 
39 | -- | The encoding table for hexadecimal values with lower-case characters;
40 | -- e.g., deadbeef.
41 | lowerTable :: EncodingTable
42 | lowerTable =
43 | #if PURE_HASKELL
44 |   case Pure.lower_hex_table of
45 |     Ptr p# -> EncodingTable p#
46 | #else
47 |   case c_lower_hex_table of
48 |     Ptr p# -> EncodingTable p#
49 | 
50 | foreign import ccall "&hs_bytestring_lower_hex_table"
51 |   c_lower_hex_table :: Ptr CChar
52 | #endif
53 | 
54 | -- | Encode an octet as 16bit word comprising both encoded nibbles ordered
55 | -- according to the host endianness. Writing these 16bit to memory will write
56 | -- the nibbles in the correct order (i.e. big-endian).
57 | {-# INLINE encode8_as_16h #-}
58 | encode8_as_16h :: EncodingTable -> Word8 -> IO Word16
59 | encode8_as_16h (EncodingTable table) =
60 |     peekElemOff (Ptr table) . fromIntegral
61 | 


--------------------------------------------------------------------------------
/Data/ByteString/Builder/Prim/Internal/Floating.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP #-}
 2 | 
 3 | #include "MachDeps.h"
 4 | #include "bytestring-cpp-macros.h"
 5 | 
 6 | -- |
 7 | -- Copyright   : (c) 2010 Simon Meier
 8 | --
 9 | -- License     : BSD3-style (see LICENSE)
10 | --
11 | -- Maintainer  : Simon Meier <iridcode@gmail.com>
12 | -- Stability   : experimental
13 | -- Portability : GHC
14 | --
15 | -- Conversion of 'Float's and 'Double's to 'Word32's and 'Word64's.
16 | --
17 | module Data.ByteString.Builder.Prim.Internal.Floating
18 |   ( castFloatToWord32
19 |   , castDoubleToWord64
20 |   , encodeFloatViaWord32F
21 |   , encodeDoubleViaWord64F
22 |   ) where
23 | 
24 | import Data.ByteString.Builder.Prim.Internal
25 | import Data.Word
26 | 
27 | #if HS_CAST_FLOAT_WORD_OPS_AVAILABLE
28 | import GHC.Float (castFloatToWord32, castDoubleToWord64)
29 | #else
30 | import Foreign.Marshal.Utils
31 | import Foreign.Storable
32 | import Foreign.Ptr
33 | 
34 | import Data.ByteString.Internal.Type (unsafeDupablePerformIO)
35 | {-
36 | We work around ticket http://ghc.haskell.org/trac/ghc/ticket/4092 by
37 | storing the Float/Double in a temp buffer and peeking it out again from there.
38 | -}
39 | 
40 | -- | Interpret a 'Float' as a 'Word32' as if through a bit-for-bit copy.
41 | -- (fallback if not available through GHC.Float)
42 | --
43 | -- e.g
44 | --
45 | -- > showHex (castFloatToWord32 1.0) [] = "3f800000"
46 | {-# NOINLINE castFloatToWord32 #-}
47 | castFloatToWord32 :: Float -> Word32
48 | #if (SIZEOF_HSFLOAT != SIZEOF_WORD32) || (ALIGNMENT_HSFLOAT < ALIGNMENT_WORD32)
49 |   #error "don't know how to cast Float to Word32"
50 | #endif
51 | castFloatToWord32 x = unsafeDupablePerformIO (with x (peek . castPtr))
52 | 
53 | -- | Interpret a 'Double' as a 'Word64' as if through a bit-for-bit copy.
54 | -- (fallback if not available through GHC.Float)
55 | --
56 | -- e.g
57 | --
58 | -- > showHex (castDoubleToWord64 1.0) [] = "3ff0000000000000"
59 | {-# NOINLINE castDoubleToWord64 #-}
60 | castDoubleToWord64 :: Double -> Word64
61 | #if (SIZEOF_HSDOUBLE != SIZEOF_WORD64) || (ALIGNMENT_HSDOUBLE < ALIGNMENT_WORD64)
62 |   #error "don't know how to cast Double to Word64"
63 | #endif
64 | castDoubleToWord64 x = unsafeDupablePerformIO (with x (peek . castPtr))
65 | #endif
66 | 
67 | 
68 | -- | Encode a 'Float' using a 'Word32' encoding.
69 | {-# INLINE encodeFloatViaWord32F #-}
70 | encodeFloatViaWord32F :: FixedPrim Word32 -> FixedPrim Float
71 | #if HS_CAST_FLOAT_WORD_OPS_AVAILABLE
72 | encodeFloatViaWord32F = (castFloatToWord32 >$<)
73 | #else
74 | encodeFloatViaWord32F w32fe = fixedPrim (size w32fe) $ \x op -> do
75 |   x' <- with x (peek . castPtr)
76 |   runF w32fe x' op
77 | #endif
78 | 
79 | -- | Encode a 'Double' using a 'Word64' encoding.
80 | {-# INLINE encodeDoubleViaWord64F #-}
81 | encodeDoubleViaWord64F :: FixedPrim Word64 -> FixedPrim Double
82 | #if HS_CAST_FLOAT_WORD_OPS_AVAILABLE
83 | encodeDoubleViaWord64F = (castDoubleToWord64 >$<)
84 | #else
85 | encodeDoubleViaWord64F w64fe = fixedPrim (size w64fe) $ \x op -> do
86 |   x' <- with x (peek . castPtr)
87 |   runF w64fe x' op
88 | #endif
89 | 


--------------------------------------------------------------------------------
/Data/ByteString/Builder/RealFloat.hs:
--------------------------------------------------------------------------------
  1 | -- |
  2 | -- Module      : Data.ByteString.Builder.RealFloat
  3 | -- Copyright   : (c) Lawrence Wu 2021
  4 | -- License     : BSD-style
  5 | -- Maintainer  : lawrencejwu@gmail.com
  6 | --
  7 | -- Floating point formatting for @Bytestring.Builder@
  8 | --
  9 | -- This module primarily exposes `floatDec` and `doubleDec` which do the
 10 | -- equivalent of converting through @'Data.ByteString.Builder.string7' . 'show'@.
 11 | --
 12 | -- It also exposes `formatFloat` and `formatDouble` with a similar API as
 13 | -- `GHC.Float.formatRealFloat`.
 14 | --
 15 | -- NB: The float-to-string conversions exposed by this module match `show`'s
 16 | -- output (specifically with respect to default rounding and length). In
 17 | -- particular, there are boundary cases where the closest and \'shortest\'
 18 | -- string representations are not used.  Mentions of \'shortest\' in the docs
 19 | -- below are with this caveat.
 20 | --
 21 | -- For example, for fidelity, we match `show` on the output below.
 22 | --
 23 | -- >>> show (1.0e23 :: Float)
 24 | -- "1.0e23"
 25 | -- >>> show (1.0e23 :: Double)
 26 | -- "9.999999999999999e22"
 27 | -- >>> floatDec 1.0e23
 28 | -- "1.0e23"
 29 | -- >>> doubleDec 1.0e23
 30 | -- "9.999999999999999e22"
 31 | --
 32 | -- Simplifying, we can build a shorter, lossless representation by just using
 33 | -- @"1.0e23"@ since the floating point values that are 1 ULP away are
 34 | --
 35 | -- >>> showHex (castDoubleToWord64 1.0e23) []
 36 | -- "44b52d02c7e14af6"
 37 | -- >>> castWord64ToDouble 0x44b52d02c7e14af5
 38 | -- 9.999999999999997e22
 39 | -- >>> castWord64ToDouble 0x44b52d02c7e14af6
 40 | -- 9.999999999999999e22
 41 | -- >>> castWord64ToDouble 0x44b52d02c7e14af7
 42 | -- 1.0000000000000001e23
 43 | --
 44 | -- In particular, we could use the exact boundary if it is the shortest
 45 | -- representation and the original floating number is even. To experiment with
 46 | -- the shorter rounding, refer to
 47 | -- `Data.ByteString.Builder.RealFloat.Internal.acceptBounds`. This will give us
 48 | --
 49 | -- >>> floatDec 1.0e23
 50 | -- "1.0e23"
 51 | -- >>> doubleDec 1.0e23
 52 | -- "1.0e23"
 53 | --
 54 | -- For more details, please refer to the
 55 | -- <https://dl.acm.org/doi/10.1145/3192366.3192369 Ryu paper>.
 56 | --
 57 | -- @since 0.11.2.0
 58 | 
 59 | module Data.ByteString.Builder.RealFloat
 60 |   ( floatDec
 61 |   , doubleDec
 62 | 
 63 |   -- * Custom formatting
 64 |   , formatFloat
 65 |   , formatDouble
 66 |   , FloatFormat
 67 |   , standard
 68 |   , standardDefaultPrecision
 69 |   , scientific
 70 |   , generic
 71 |   ) where
 72 | 
 73 | import Data.ByteString.Builder.Internal (Builder)
 74 | import qualified Data.ByteString.Builder.RealFloat.Internal as R
 75 | import qualified Data.ByteString.Builder.RealFloat.F2S as RF
 76 | import qualified Data.ByteString.Builder.RealFloat.D2S as RD
 77 | import qualified Data.ByteString.Builder.Prim as BP
 78 | import GHC.Float (roundTo)
 79 | import GHC.Word (Word64)
 80 | import GHC.Show (intToDigit)
 81 | 
 82 | -- | Returns a rendered Float. Matches `show` in displaying in standard or
 83 | -- scientific notation
 84 | --
 85 | -- @
 86 | -- floatDec = 'formatFloat' 'generic'
 87 | -- @
 88 | {-# INLINABLE floatDec #-}
 89 | floatDec :: Float -> Builder
 90 | floatDec = formatFloat generic
 91 | 
 92 | -- | Returns a rendered Double. Matches `show` in displaying in standard or
 93 | -- scientific notation
 94 | --
 95 | -- @
 96 | -- doubleDec = 'formatDouble' 'generic'
 97 | -- @
 98 | {-# INLINABLE doubleDec #-}
 99 | doubleDec :: Double -> Builder
100 | doubleDec = formatDouble generic
101 | 
102 | -- | Format type for use with `formatFloat` and `formatDouble`.
103 | --
104 | -- @since 0.11.2.0
105 | data FloatFormat = MkFloatFormat FormatMode (Maybe Int)
106 | 
107 | -- | Standard notation with `n` decimal places
108 | --
109 | -- @since 0.11.2.0
110 | standard :: Int -> FloatFormat
111 | standard n = MkFloatFormat FStandard (Just n)
112 | 
113 | -- | Standard notation with the \'default precision\' (decimal places matching `show`)
114 | --
115 | -- @since 0.11.2.0
116 | standardDefaultPrecision :: FloatFormat
117 | standardDefaultPrecision = MkFloatFormat FStandard Nothing
118 | 
119 | -- | Scientific notation with \'default precision\' (decimal places matching `show`)
120 | --
121 | -- @since 0.11.2.0
122 | scientific :: FloatFormat
123 | scientific = MkFloatFormat FScientific Nothing
124 | 
125 | -- | Standard or scientific notation depending on the exponent. Matches `show`
126 | --
127 | -- @since 0.11.2.0
128 | generic :: FloatFormat
129 | generic = MkFloatFormat FGeneric Nothing
130 | 
131 | -- | ByteString float-to-string format
132 | data FormatMode
133 |   = FScientific     -- ^ scientific notation
134 |   | FStandard       -- ^ standard notation with `Maybe Int` digits after the decimal
135 |   | FGeneric        -- ^ dispatches to scientific or standard notation based on the exponent
136 |   deriving Show
137 | 
138 | -- TODO: support precision argument for FGeneric and FScientific
139 | -- | Returns a rendered Float. Returns the \'shortest\' representation in
140 | -- scientific notation and takes an optional precision argument in standard
141 | -- notation. Also see `floatDec`.
142 | --
143 | -- With standard notation, the precision argument is used to truncate (or
144 | -- extend with 0s) the \'shortest\' rendered Float. The \'default precision\' does
145 | -- no such modifications and will return as many decimal places as the
146 | -- representation demands.
147 | --
148 | -- e.g
149 | --
150 | -- >>> formatFloat (standard 1) 1.2345e-2
151 | -- "0.0"
152 | -- >>> formatFloat (standard 10) 1.2345e-2
153 | -- "0.0123450000"
154 | -- >>> formatFloat standardDefaultPrecision 1.2345e-2
155 | -- "0.01234"
156 | -- >>> formatFloat scientific 12.345
157 | -- "1.2345e1"
158 | -- >>> formatFloat generic 12.345
159 | -- "12.345"
160 | --
161 | -- @since 0.11.2.0
162 | {-# INLINABLE formatFloat #-}
163 | formatFloat :: FloatFormat -> Float -> Builder
164 | formatFloat (MkFloatFormat fmt prec) = \f ->
165 |   let (RF.FloatingDecimal m e) = RF.f2Intermediate f
166 |       e' = R.int32ToInt e + R.decimalLength9 m in
167 |   case fmt of
168 |     FGeneric ->
169 |       case specialStr f of
170 |         Just b -> b
171 |         Nothing ->
172 |           if e' >= 0 && e' <= 7
173 |              then sign f `mappend` showStandard (R.word32ToWord64 m) e' prec
174 |              else BP.primBounded (R.toCharsScientific (f < 0) m e) ()
175 |     FScientific -> RF.f2s f
176 |     FStandard ->
177 |       case specialStr f of
178 |         Just b -> b
179 |         Nothing -> sign f `mappend` showStandard (R.word32ToWord64 m) e' prec
180 | 
181 | -- TODO: support precision argument for FGeneric and FScientific
182 | -- | Returns a rendered Double. Returns the \'shortest\' representation in
183 | -- scientific notation and takes an optional precision argument in standard
184 | -- notation. Also see `doubleDec`.
185 | --
186 | -- With standard notation, the precision argument is used to truncate (or
187 | -- extend with 0s) the \'shortest\' rendered Float. The \'default precision\'
188 | -- does no such modifications and will return as many decimal places as the
189 | -- representation demands.
190 | --
191 | -- e.g
192 | --
193 | -- >>> formatDouble (standard 1) 1.2345e-2
194 | -- "0.0"
195 | -- >>> formatDouble (standard 10) 1.2345e-2
196 | -- "0.0123450000"
197 | -- >>> formatDouble standardDefaultPrecision 1.2345e-2
198 | -- "0.01234"
199 | -- >>> formatDouble scientific 12.345
200 | -- "1.2345e1"
201 | -- >>> formatDouble generic 12.345
202 | -- "12.345"
203 | --
204 | -- @since 0.11.2.0
205 | {-# INLINABLE formatDouble #-}
206 | formatDouble :: FloatFormat -> Double -> Builder
207 | formatDouble (MkFloatFormat fmt prec) = \f ->
208 |   let (RD.FloatingDecimal m e) = RD.d2Intermediate f
209 |       e' = R.int32ToInt e + R.decimalLength17 m in
210 |   case fmt of
211 |     FGeneric ->
212 |       case specialStr f of
213 |         Just b -> b
214 |         Nothing ->
215 |           if e' >= 0 && e' <= 7
216 |              then sign f `mappend` showStandard m e' prec
217 |              else BP.primBounded (R.toCharsScientific (f < 0) m e) ()
218 |     FScientific -> RD.d2s f
219 |     FStandard ->
220 |       case specialStr f of
221 |         Just b -> b
222 |         Nothing -> sign f `mappend` showStandard m e' prec
223 | 
224 | -- | Char7 encode a 'Char'.
225 | {-# INLINE char7 #-}
226 | char7 :: Char -> Builder
227 | char7 = BP.primFixed BP.char7
228 | 
229 | -- | Char7 encode a 'String'.
230 | {-# INLINE string7 #-}
231 | string7 :: String -> Builder
232 | string7 = BP.primMapListFixed BP.char7
233 | 
234 | -- | Encodes a `-` if input is negative
235 | sign :: RealFloat a => a -> Builder
236 | sign f = if f < 0 then char7 '-' else mempty
237 | 
238 | -- | Special rendering for Nan, Infinity, and 0. See
239 | -- RealFloat.Internal.NonNumbersAndZero
240 | specialStr :: RealFloat a => a -> Maybe Builder
241 | specialStr f
242 |   | isNaN f          = Just $ string7 "NaN"
243 |   | isInfinite f     = Just $ sign f `mappend` string7 "Infinity"
244 |   | isNegativeZero f = Just $ string7 "-0.0"
245 |   | f == 0           = Just $ string7 "0.0"
246 |   | otherwise        = Nothing
247 | 
248 | -- | Returns a list of decimal digits in a Word64
249 | digits :: Word64 -> [Int]
250 | digits w = go [] w
251 |   where go ds 0 = ds
252 |         go ds c = let (q, r) = R.dquotRem10 c
253 |                    in go ((R.word64ToInt r) : ds) q
254 | 
255 | -- | Show a floating point value in standard notation. Based on GHC.Float.showFloat
256 | showStandard :: Word64 -> Int -> Maybe Int -> Builder
257 | showStandard m e prec =
258 |   case prec of
259 |     Nothing
260 |       | e <= 0 -> char7 '0'
261 |                `mappend` char7 '.'
262 |                `mappend` string7 (replicate (-e) '0')
263 |                `mappend` mconcat (digitsToBuilder ds)
264 |       | otherwise ->
265 |           let f 0 s     rs = mk0 (reverse s) `mappend` char7 '.' `mappend` mk0 rs
266 |               f n s     [] = f (n-1) (char7 '0':s) []
267 |               f n s (r:rs) = f (n-1) (r:s) rs
268 |            in f e [] (digitsToBuilder ds)
269 |     Just p
270 |       | e >= 0 ->
271 |           let (ei, is') = roundTo 10 (p' + e) ds
272 |               (ls, rs) = splitAt (e + ei) (digitsToBuilder is')
273 |            in mk0 ls `mappend` mkDot rs
274 |       | otherwise ->
275 |           let (ei, is') = roundTo 10 p' (replicate (-e) 0 ++ ds)
276 |               -- ds' should always be non-empty but use redundant pattern
277 |               -- matching to silence warning
278 |               ds' = if ei > 0 then is' else 0:is'
279 |               (ls, rs) = splitAt 1 $ digitsToBuilder ds'
280 |            in mk0 ls `mappend` mkDot rs
281 |           where p' = max p 0
282 |   where
283 |     mk0 ls = case ls of [] -> char7 '0'; _ -> mconcat ls
284 |     mkDot rs = if null rs then mempty else char7 '.' `mappend` mconcat rs
285 |     ds = digits m
286 |     digitsToBuilder = fmap (char7 . intToDigit)
287 | 
288 | 


--------------------------------------------------------------------------------
/Data/ByteString/Builder/RealFloat/F2S.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | 
  3 | -- |
  4 | -- Module      : Data.ByteString.Builder.RealFloat.F2S
  5 | -- Copyright   : (c) Lawrence Wu 2021
  6 | -- License     : BSD-style
  7 | -- Maintainer  : lawrencejwu@gmail.com
  8 | --
  9 | -- Implementation of float-to-string conversion
 10 | 
 11 | module Data.ByteString.Builder.RealFloat.F2S
 12 |     ( FloatingDecimal(..)
 13 |     , f2s
 14 |     , f2Intermediate
 15 |     ) where
 16 | 
 17 | import Control.Arrow (first)
 18 | import Data.Bits ((.|.), (.&.), unsafeShiftL, unsafeShiftR)
 19 | import Data.ByteString.Builder.Internal (Builder)
 20 | import Data.ByteString.Builder.Prim (primBounded)
 21 | import Data.ByteString.Builder.RealFloat.Internal
 22 | import GHC.Int (Int32(..))
 23 | import GHC.Word (Word32(..), Word64(..))
 24 | 
 25 | #if !PURE_HASKELL
 26 | import GHC.Ptr (Ptr(..))
 27 | #endif
 28 | 
 29 | -- See Data.ByteString.Builder.RealFloat.TableGenerator for a high-level
 30 | -- explanation of the ryu algorithm
 31 | 
 32 | #if !PURE_HASKELL
 33 | -- | Table of 2^k / 5^q + 1
 34 | --
 35 | -- > fmap (finv float_pow5_inv_bitcount) [0..float_max_inv_split]
 36 | foreign import ccall "&hs_bytestring_float_pow5_inv_split"
 37 |   float_pow5_inv_split :: Ptr Word64
 38 | 
 39 | -- | Table of 5^(-e2-q) / 2^k + 1
 40 | --
 41 | -- > fmap (fnorm float_pow5_bitcount) [0..float_max_split]
 42 | foreign import ccall "&hs_bytestring_float_pow5_split"
 43 |   float_pow5_split :: Ptr Word64
 44 | #endif
 45 | 
 46 | -- | Number of mantissa bits of a 32-bit float. The number of significant bits
 47 | -- (floatDigits (undefined :: Float)) is 24 since we have a leading 1 for
 48 | -- normal floats and 0 for subnormal floats
 49 | float_mantissa_bits :: Int
 50 | float_mantissa_bits = 23
 51 | 
 52 | -- | Number of exponent bits of a 32-bit float
 53 | float_exponent_bits :: Int
 54 | float_exponent_bits = 8
 55 | 
 56 | -- | Bias in encoded 32-bit float representation (2^7 - 1)
 57 | float_bias :: Int
 58 | float_bias = 127
 59 | 
 60 | data FloatingDecimal = FloatingDecimal
 61 |   { fmantissa :: !Word32
 62 |   , fexponent :: !Int32
 63 |   } deriving (Show, Eq)
 64 | 
 65 | -- | Multiply a 32-bit number with a 64-bit number while keeping the upper 64
 66 | -- bits. Then shift by specified amount minus 32
 67 | mulShift32 :: Word32 -> Word64 -> Int -> Word32
 68 | mulShift32 m factor shift =
 69 |   let factorLo = factor .&. mask 32
 70 |       factorHi = factor `unsafeShiftR` 32
 71 |       bits0 = word32ToWord64 m * factorLo
 72 |       bits1 = word32ToWord64 m * factorHi
 73 |       total  = (bits0 `unsafeShiftR` 32) + bits1
 74 |    in word64ToWord32 $ total `unsafeShiftR` (shift - 32)
 75 | 
 76 | -- | Index into the 64-bit word lookup table float_pow5_inv_split
 77 | get_float_pow5_inv_split :: Int -> Word64
 78 | #if !PURE_HASKELL
 79 | get_float_pow5_inv_split = getWord64At float_pow5_inv_split
 80 | #else
 81 | -- > putStr $ case64 (finv float_pow5_inv_bitcount) [0..float_max_inv_split]
 82 | get_float_pow5_inv_split i = case i of
 83 |   0  -> 0x800000000000001
 84 |   1  -> 0x666666666666667
 85 |   2  -> 0x51eb851eb851eb9
 86 |   3  -> 0x4189374bc6a7efa
 87 |   4  -> 0x68db8bac710cb2a
 88 |   5  -> 0x53e2d6238da3c22
 89 |   6  -> 0x431bde82d7b634e
 90 |   7  -> 0x6b5fca6af2bd216
 91 |   8  -> 0x55e63b88c230e78
 92 |   9  -> 0x44b82fa09b5a52d
 93 |   10 -> 0x6df37f675ef6eae
 94 |   11 -> 0x57f5ff85e592558
 95 |   12 -> 0x465e6604b7a8447
 96 |   13 -> 0x709709a125da071
 97 |   14 -> 0x5a126e1a84ae6c1
 98 |   15 -> 0x480ebe7b9d58567
 99 |   16 -> 0x734aca5f6226f0b
100 |   17 -> 0x5c3bd5191b525a3
101 |   18 -> 0x49c97747490eae9
102 |   19 -> 0x760f253edb4ab0e
103 |   20 -> 0x5e72843249088d8
104 |   21 -> 0x4b8ed0283a6d3e0
105 |   22 -> 0x78e480405d7b966
106 |   23 -> 0x60b6cd004ac9452
107 |   24 -> 0x4d5f0a66a23a9db
108 |   25 -> 0x7bcb43d769f762b
109 |   26 -> 0x63090312bb2c4ef
110 |   27 -> 0x4f3a68dbc8f03f3
111 |   28 -> 0x7ec3daf94180651
112 |   29 -> 0x65697bfa9acd1da
113 |   _  -> 0x51212ffbaf0a7e2
114 | #endif
115 | 
116 | -- | Index into the 64-bit word lookup table float_pow5_split
117 | get_float_pow5_split :: Int -> Word64
118 | #if !PURE_HASKELL
119 | get_float_pow5_split = getWord64At float_pow5_split
120 | #else
121 | -- > putStr $ case64 (fnorm float_pow5_bitcount) [0..float_max_split]
122 | get_float_pow5_split i = case i of
123 |   0  -> 0x1000000000000000
124 |   1  -> 0x1400000000000000
125 |   2  -> 0x1900000000000000
126 |   3  -> 0x1f40000000000000
127 |   4  -> 0x1388000000000000
128 |   5  -> 0x186a000000000000
129 |   6  -> 0x1e84800000000000
130 |   7  -> 0x1312d00000000000
131 |   8  -> 0x17d7840000000000
132 |   9  -> 0x1dcd650000000000
133 |   10 -> 0x12a05f2000000000
134 |   11 -> 0x174876e800000000
135 |   12 -> 0x1d1a94a200000000
136 |   13 -> 0x12309ce540000000
137 |   14 -> 0x16bcc41e90000000
138 |   15 -> 0x1c6bf52634000000
139 |   16 -> 0x11c37937e0800000
140 |   17 -> 0x16345785d8a00000
141 |   18 -> 0x1bc16d674ec80000
142 |   19 -> 0x1158e460913d0000
143 |   20 -> 0x15af1d78b58c4000
144 |   21 -> 0x1b1ae4d6e2ef5000
145 |   22 -> 0x10f0cf064dd59200
146 |   23 -> 0x152d02c7e14af680
147 |   24 -> 0x1a784379d99db420
148 |   25 -> 0x108b2a2c28029094
149 |   26 -> 0x14adf4b7320334b9
150 |   27 -> 0x19d971e4fe8401e7
151 |   28 -> 0x1027e72f1f128130
152 |   29 -> 0x1431e0fae6d7217c
153 |   30 -> 0x193e5939a08ce9db
154 |   31 -> 0x1f8def8808b02452
155 |   32 -> 0x13b8b5b5056e16b3
156 |   33 -> 0x18a6e32246c99c60
157 |   34 -> 0x1ed09bead87c0378
158 |   35 -> 0x13426172c74d822b
159 |   36 -> 0x1812f9cf7920e2b6
160 |   37 -> 0x1e17b84357691b64
161 |   38 -> 0x12ced32a16a1b11e
162 |   39 -> 0x178287f49c4a1d66
163 |   40 -> 0x1d6329f1c35ca4bf
164 |   41 -> 0x125dfa371a19e6f7
165 |   42 -> 0x16f578c4e0a060b5
166 |   43 -> 0x1cb2d6f618c878e3
167 |   44 -> 0x11efc659cf7d4b8d
168 |   45 -> 0x166bb7f0435c9e71
169 |   _  -> 0x1c06a5ec5433c60d
170 | #endif
171 | 
172 | -- | Take the high bits of m * 2^k / 5^q / 2^-e2+q+k
173 | mulPow5InvDivPow2 :: Word32 -> Int -> Int -> Word32
174 | mulPow5InvDivPow2 m q j = mulShift32 m (get_float_pow5_inv_split q) j
175 | 
176 | -- | Take the high bits of m * 5^-e2-q / 2^k / 2^q-k
177 | mulPow5DivPow2 :: Word32 -> Int -> Int -> Word32
178 | mulPow5DivPow2 m i j = mulShift32 m (get_float_pow5_split i) j
179 | 
180 | -- | Handle case e2 >= 0
181 | f2dGT :: Int32 -> Word32 -> Word32 -> Word32 -> (BoundsState Word32, Int32)
182 | f2dGT e2' u v w =
183 |   let e2 = int32ToInt e2'
184 |       -- q = e10 = log_10(2^e2)
185 |       q = log10pow2 e2
186 |       -- k = B0 + log_2(5^q)
187 |       k = float_pow5_inv_bitcount + pow5bits q - 1
188 |       i = -e2 + q + k
189 |       -- (u, v, w) * 2^k / 5^q / 2^-e2+q+k
190 |       u' = mulPow5InvDivPow2 u q i
191 |       v' = mulPow5InvDivPow2 v q i
192 |       w' = mulPow5InvDivPow2 w q i
193 |       !lastRemoved =
194 |         if q /= 0 && fquot10 (w' - 1) <= fquot10 u'
195 |           -- We need to know one removed digit even if we are not going to loop
196 |           -- below. We could use q = X - 1 above, except that would require 33
197 |           -- bits for the result, and we've found that 32-bit arithmetic is
198 |           -- faster even on 64-bit machines.
199 |           then let l = float_pow5_inv_bitcount + pow5bits (q - 1) - 1
200 |                 in frem10 (mulPow5InvDivPow2 v (q - 1) (-e2 + q - 1 + l))
201 |           else 0
202 |       !(vvTrailing, vuTrailing, vw') =
203 |         case () of
204 |           _ | q < 9 && frem5 v == 0
205 |                 -> (multipleOfPowerOf5 v q, False, w')
206 |             | q < 9 && acceptBounds v
207 |                 -> (False, multipleOfPowerOf5 u q, w')
208 |             | q < 9
209 |                 -> (False, False, w' - boolToWord32 (multipleOfPowerOf5 w q))
210 |             | otherwise
211 |                 -> (False, False, w')
212 |    in (BoundsState u' v' vw' lastRemoved vuTrailing vvTrailing, intToInt32 q)
213 | 
214 | -- | Handle case e2 < 0
215 | f2dLT :: Int32 -> Word32 -> Word32 -> Word32 -> (BoundsState Word32, Int32)
216 | f2dLT e2' u v w =
217 |   let e2 = int32ToInt e2'
218 |       q = log10pow5 (-e2)
219 |       e10 = q + e2
220 |       i = (-e2) - q
221 |       -- k = log_2(5^-e2-q) - B1
222 |       k = pow5bits i - float_pow5_bitcount
223 |       j = q - k
224 |       -- (u, v, w) * 5^-e2-q / 2^k / 2^q-k
225 |       u' = mulPow5DivPow2 u i j
226 |       v' = mulPow5DivPow2 v i j
227 |       w' = mulPow5DivPow2 w i j
228 |       !lastRemoved =
229 |         if q /= 0 && fquot10 (w' - 1) <= fquot10 u'
230 |           then let j' = q - 1 - (pow5bits (i + 1) - float_pow5_bitcount)
231 |                 in frem10 (mulPow5DivPow2 v (i + 1) j')
232 |           else 0
233 |       !(vvTrailing , vuTrailing, vw') =
234 |         case () of
235 |           _ | q <= 1 && acceptBounds v
236 |                 -> (True, v - u == 2, w') -- mmShift == 1
237 |             | q <= 1
238 |                 -> (True, False, w' - 1)
239 |             | q < 31
240 |                 -> (multipleOfPowerOf2 v (q - 1), False, w')
241 |             | otherwise
242 |                 -> (False, False, w')
243 |    in (BoundsState u' v' vw' lastRemoved vuTrailing vvTrailing, intToInt32 e10)
244 | 
245 | -- | Returns the decimal representation of the given mantissa and exponent of a
246 | -- 32-bit Float using the ryu algorithm.
247 | f2d :: Word32 -> Word32 -> FloatingDecimal
248 | f2d m e =
249 |   let !mf = if e == 0
250 |               then m
251 |               else (1 `unsafeShiftL` float_mantissa_bits) .|. m
252 |       !ef = intToInt32 $ if e == 0
253 |               then 1 - (float_bias + float_mantissa_bits)
254 |               else word32ToInt e - (float_bias + float_mantissa_bits)
255 |       !e2 = ef - 2
256 |       -- Step 2. 3-tuple (u, v, w) * 2**e2
257 |       !u = 4 * mf - 1 - boolToWord32 (m /= 0 || e <= 1)
258 |       !v = 4 * mf
259 |       !w = 4 * mf + 2
260 |       -- Step 3. convert to decimal power base
261 |       !(state, e10) =
262 |         if e2 >= 0
263 |            then f2dGT e2 u v w
264 |            else f2dLT e2 u v w
265 |       -- Step 4: Find the shortest decimal representation in the interval of
266 |       -- valid representations.
267 |       !(output, removed) =
268 |         let rounded = closestCorrectlyRounded (acceptBounds v)
269 |          in first rounded $ if vvIsTrailingZeros state || vuIsTrailingZeros state
270 |            then trimTrailing state
271 |            else trimNoTrailing state
272 |       !e' = e10 + removed
273 |    in FloatingDecimal output e'
274 | 
275 | -- | Split a Float into (sign, mantissa, exponent)
276 | breakdown :: Float -> (Bool, Word32, Word32)
277 | breakdown f =
278 |   let bits = castFloatToWord32 f
279 |       sign = ((bits `unsafeShiftR` (float_mantissa_bits + float_exponent_bits)) .&. 1) /= 0
280 |       mantissa = bits .&. mask float_mantissa_bits
281 |       expo = (bits `unsafeShiftR` float_mantissa_bits) .&. mask float_exponent_bits
282 |    in (sign, mantissa, expo)
283 | 
284 | -- | Dispatches to `f2d` and applies the given formatters
285 | {-# INLINE f2s' #-}
286 | f2s' :: (Bool -> Word32 -> Int32 -> a) -> (NonNumbersAndZero -> a) -> Float -> a
287 | f2s' formatter specialFormatter f =
288 |   let (sign, mantissa, expo) = breakdown f
289 |    in if (expo == mask float_exponent_bits) || (expo == 0 && mantissa == 0)
290 |          then specialFormatter NonNumbersAndZero
291 |                   { negative=sign
292 |                   , exponent_all_one=expo > 0
293 |                   , mantissa_non_zero=mantissa > 0 }
294 |          else let FloatingDecimal m e = f2d mantissa expo
295 |                in formatter sign m e
296 | 
297 | -- | Render a Float in scientific notation
298 | f2s :: Float -> Builder
299 | f2s f = primBounded (f2s' toCharsScientific toCharsNonNumbersAndZero f) ()
300 | 
301 | -- | Returns the decimal representation of a Float. NaN and Infinity will
302 | -- return `FloatingDecimal 0 0`
303 | f2Intermediate :: Float -> FloatingDecimal
304 | f2Intermediate = f2s' (const FloatingDecimal) (const $ FloatingDecimal 0 0)
305 | 


--------------------------------------------------------------------------------
/Data/ByteString/Builder/RealFloat/TableGenerator.hs:
--------------------------------------------------------------------------------
  1 | -- |
  2 | -- Module      : Data.ByteString.Builder.RealFloat.TableGenerator
  3 | -- Copyright   : (c) Lawrence Wu 2021
  4 | -- License     : BSD-style
  5 | -- Maintainer  : lawrencejwu@gmail.com
  6 | --
  7 | -- Constants and overview for compile-time table generation for Ryu internals
  8 | --
  9 | -- This module uses Haskell's arbitrary-precision `Integer` types to compute
 10 | -- the necessary multipliers for efficient conversion to a decimal power base.
 11 | --
 12 | -- It also exposes constants relevant to the 32- and 64-bit tables (e.g maximum
 13 | -- number of bits required to store the table values).
 14 | 
 15 | module Data.ByteString.Builder.RealFloat.TableGenerator
 16 |   ( float_pow5_inv_bitcount
 17 |   , float_pow5_bitcount
 18 |   , double_pow5_bitcount
 19 |   , double_pow5_inv_bitcount
 20 |   , float_max_split
 21 |   , float_max_inv_split
 22 |   , double_max_split
 23 |   , double_max_inv_split
 24 | 
 25 |   , finv
 26 |   , fnorm
 27 |   , splitWord128s
 28 |   , case64
 29 |   , case128
 30 |   ) where
 31 | 
 32 | import GHC.Float (int2Double)
 33 | 
 34 | import Data.Bits
 35 | import Data.Word
 36 | import Numeric
 37 | 
 38 | 
 39 | -- The basic floating point conversion algorithm is as such:
 40 | --
 41 | -- Given floating point
 42 | --
 43 | --   f = (-1)^s * m_f * 2^e_f
 44 | --
 45 | -- which is IEEE encoded by `[s] [.. e ..] [.. m ..]`. `s` is the sign bit, `e`
 46 | -- is the biased exponent, and `m` is the mantissa, let
 47 | --
 48 | --       | e /= 0            | e == 0
 49 | --  -----+-------------------+-----------
 50 | --   m_f | 2^len(m) + m      | m
 51 | --   e_f | e - bias - len(m) | 1 - bias - len(m)
 52 | --
 53 | -- we compute the halfway points to the next smaller (`f-`) and larger (`f+`)
 54 | -- floating point numbers as
 55 | --
 56 | --  lower halfway point u * 2^e2, u = 4 * m_f - (if m == 0 then 1 else 2)
 57 | --                      v * 2^e2, v = 4 * m_f
 58 | --  upper halfway point w * 2^e2, u = 4 * m_f + 2
 59 | --  where e2 = ef - 2 (so u, v, w are integers)
 60 | --
 61 | --
 62 | -- Then we compute (a, b, c) * 10^e10 = (u, v, w) * 2^e2 which is split into
 63 | -- the case of
 64 | --
 65 | --   e2 >= 0   ==>    e10 = 0 , (a, b, c) = (u, v, w) * 2^e2
 66 | --   e2 <  0   ==>    e10 = e2, (a, b, c) = (u, v, w) * 5^-e2
 67 | --
 68 | -- And finally we find the shortest representation from integers d0 and e0 such
 69 | -- that
 70 | --
 71 | --  a * 10^e10 < d0 * 10^(e0+e10) < c * 10^e10
 72 | --
 73 | -- such that e0 is maximal (we allow equality to smaller or larger halfway
 74 | -- point depending on rounding mode). This is found through iteratively
 75 | -- dividing by 10 while a/10^j < c/10^j and doing some bookkeeping around
 76 | -- zeros.
 77 | --
 78 | --
 79 | --
 80 | --
 81 | -- The ryu algorithm removes the requirement for arbitrary precision arithmetic
 82 | -- and improves the runtime significantly by skipping most of the iterative
 83 | -- division by carefully selecting a point where certain invariants hold and
 84 | -- precomputing a few tables.
 85 | --
 86 | -- Specifically, define `q` such that the correspondings values of a/10^q <
 87 | -- c/10^q - 1. We can prove (not shown) that
 88 | --
 89 | --    if e2 >= 0, q = e2 * log_10(2)
 90 | --    if e2 <  0, q = -e2 * log_10(5)
 91 | --
 92 | -- Then we can compute (a, b, c) / 10^q. Starting from (u, v, w) we have
 93 | --
 94 | --      (a, b, c) / 10^q                  (a, b, c) / 10^q
 95 | --    = (u, v, w) * 2^e2 / 10^q    OR   = (u, v, w) * 5^-e2 / 10^q
 96 | --
 97 | -- And since q < e2,
 98 | --
 99 | --    = (u, v, w) * 2^e2-q / 5^q   OR   = (u, v, w) * 5^-e2-q / 2^q
100 | --
101 | -- While (u, v, w) are n-bit numbers, 5^q and whatnot are significantly larger,
102 | -- but we only need the top-most n bits of the result so we can choose `k` that
103 | -- reduce the number of bits required to ~2n. We then multiply by either
104 | --
105 | --    2^k / 5^q                    OR   5^-e2-q / 2^k
106 | --
107 | -- The required `k` is roughly linear in the exponent (we need more of the
108 | -- multiplication to be precise) but the number of bits to store the
109 | -- multiplicands above stays fixed.
110 | --
111 | -- Since the number of bits needed is relatively small for IEEE 32- and 64-bit
112 | -- floating types, we can compute appropriate values for `k` for the
113 | -- floating-point-type-specific bounds instead of each e2.
114 | --
115 | -- Finally, we need to do some final manual iterations potentially to do a
116 | -- final fixup of the skipped state
117 | 
118 | 
119 | -- | Bound for bits of @2^k / 5^q@ for floats
120 | float_pow5_inv_bitcount :: Int
121 | float_pow5_inv_bitcount = 59
122 | 
123 | -- | Bound for bits of @5^-e2-q / 2^k@ for floats
124 | float_pow5_bitcount :: Int
125 | float_pow5_bitcount = 61
126 | 
127 | -- | Bound for bits of @5^-e2-q / 2^k@ for doubles
128 | double_pow5_bitcount :: Int
129 | double_pow5_bitcount = 125
130 | 
131 | -- | Bound for bits of @2^k / 5^q@ for doubles
132 | double_pow5_inv_bitcount :: Int
133 | double_pow5_inv_bitcount = 125
134 | 
135 | -- NB: these tables are encoded directly into the
136 | -- source code in cbits/aligned-static-hs-data.c
137 | 
138 | -- | Number of bits in a positive integer
139 | blen :: Integer -> Int
140 | blen 0 = 0
141 | blen 1 = 1
142 | blen n = 1 + blen (n `quot` 2)
143 | 
144 | -- | Used for table generation of 2^k / 5^q + 1
145 | finv :: Int -> Int -> Integer
146 | finv bitcount i =
147 |   let p = 5^i
148 |    in (1 `shiftL` (blen p - 1 + bitcount)) `div` p + 1
149 | 
150 | -- | Used for table generation of 5^-e2-q / 2^k
151 | fnorm :: Int -> Int -> Integer
152 | fnorm bitcount i =
153 |   let p = 5^i
154 |       s = blen p - bitcount
155 |    in if s < 0 then p `shiftL` (-s) else p `shiftR` s
156 | 
157 | -- | Breaks each integer into two Word64s (lowBits, highBits)
158 | splitWord128s :: [Integer] -> [Word64]
159 | splitWord128s li
160 |   = [fromInteger w | x <- li, w <- [x .&. maxWord64, x `shiftR` 64]]
161 |   where  maxWord64 = toInteger (maxBound :: Word64)
162 | 
163 | splitWord128 :: Integer -> (Word64,Word64)
164 | splitWord128 x = (fromInteger (x `shiftR` 64), fromInteger (x .&. maxWord64))
165 |   where  maxWord64 = toInteger (maxBound :: Word64)
166 | 
167 | 
168 | -- Helpers to generate case alternatives returning either one Word64 (case64) or
169 | -- two Word64s (case128) for the PURE_HASKELL variant of the tables.
170 | case64 :: (Int -> Integer) -> [Int] -> String
171 | case64 f range = concat
172 |   [ show i ++ " -> 0x" ++ showHex (f i) "\n"
173 |   | i <- range]
174 | 
175 | case128 :: (Int -> Integer) -> [Int] -> String
176 | case128 f range = concat
177 |   [ show i ++ " -> (0x" ++ showHex hi "" ++ ", 0x" ++ showHex lo ")\n"
178 |   | i <- range
179 |   , let (hi,lo) = splitWord128 (f i)
180 |   ]
181 | 
182 | -- Given a specific floating-point type, determine the range of q for the < 0
183 | -- and >= 0 cases
184 | get_range :: forall ff. (RealFloat ff) => ff -> (Int, Int)
185 | get_range f =
186 |   let (emin, emax) = floatRange f
187 |       mantissaDigits = floatDigits f
188 |       emin' = emin - mantissaDigits - 2
189 |       emax' = emax - mantissaDigits - 2
190 |    in ( (-emin') - floor (int2Double (-emin') * logBase 10 5)
191 |       , floor (int2Double emax' * logBase 10 2))
192 | 
193 | float_max_split :: Int     -- = 46
194 | float_max_inv_split :: Int -- = 30
195 | (float_max_split, float_max_inv_split) = get_range (undefined :: Float)
196 | 
197 | -- we take a slightly different codepath s.t we need one extra entry
198 | double_max_split :: Int     -- = 325
199 | double_max_inv_split :: Int -- = 291
200 | (double_max_split, double_max_inv_split) =
201 |     let (m, mi) = get_range (undefined :: Double)
202 |      in (m + 1, mi)
203 | 
204 | 


--------------------------------------------------------------------------------
/Data/ByteString/Internal.hs:
--------------------------------------------------------------------------------
 1 | {-# OPTIONS_HADDOCK not-home #-}
 2 | 
 3 | -- |
 4 | -- Module      : Data.ByteString.Internal
 5 | -- Copyright   : (c) Don Stewart 2006-2008
 6 | --               (c) Duncan Coutts 2006-2012
 7 | -- License     : BSD-style
 8 | -- Maintainer  : dons00@gmail.com, duncan@community.haskell.org
 9 | -- Stability   : unstable
10 | -- Portability : non-portable
11 | --
12 | -- A module containing semi-public 'ByteString' internals. This exposes the
13 | -- 'ByteString' representation and low level construction functions. As such
14 | -- all the functions in this module are unsafe. The API is also not stable.
15 | --
16 | -- Where possible application should instead use the functions from the normal
17 | -- public interface modules, such as "Data.ByteString.Unsafe". Packages that
18 | -- extend the ByteString system at a low level will need to use this module.
19 | --
20 | module Data.ByteString.Internal (
21 | 
22 |         -- * The @ByteString@ type and representation
23 |         ByteString
24 |         ( BS
25 |         , PS -- backwards compatibility shim
26 |         ),
27 | 
28 |         StrictByteString,
29 | 
30 |         -- * Internal indexing
31 |         findIndexOrLength,
32 | 
33 |         -- * Conversion with lists: packing and unpacking
34 |         packBytes, packUptoLenBytes, unsafePackLenBytes,
35 |         packChars, packUptoLenChars, unsafePackLenChars,
36 |         unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict,
37 |         unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict,
38 |         unsafePackAddress, unsafePackLenAddress,
39 |         unsafePackLiteral, unsafePackLenLiteral,
40 | 
41 |         -- * Low level imperative construction
42 |         empty,
43 |         create,
44 |         createUptoN,
45 |         createUptoN',
46 |         createAndTrim,
47 |         createAndTrim',
48 |         unsafeCreate,
49 |         unsafeCreateUptoN,
50 |         unsafeCreateUptoN',
51 |         mallocByteString,
52 | 
53 |         -- * Conversion to and from ForeignPtrs
54 |         mkDeferredByteString,
55 |         fromForeignPtr,
56 |         toForeignPtr,
57 |         fromForeignPtr0,
58 |         toForeignPtr0,
59 | 
60 |         -- * Utilities
61 |         nullForeignPtr,
62 |         deferForeignPtrAvailability,
63 |         SizeOverflowException,
64 |         overflowError,
65 |         checkedAdd,
66 |         checkedMultiply,
67 | 
68 |         -- * Standard C Functions
69 |         c_strlen,
70 |         c_free_finalizer,
71 | 
72 |         memchr,
73 |         memcmp,
74 |         memcpy,
75 |         memset,
76 | 
77 |         -- * cbits functions
78 |         c_reverse,
79 |         c_intersperse,
80 |         c_maximum,
81 |         c_minimum,
82 |         c_count,
83 |         c_sort,
84 | 
85 |         -- * Chars
86 |         w2c, c2w, isSpaceWord8, isSpaceChar8,
87 | 
88 |         -- * Deprecated and unmentionable
89 |         accursedUnutterablePerformIO,
90 | 
91 |         -- * Exported compatibility shim
92 |         plusForeignPtr,
93 |         unsafeWithForeignPtr
94 |   ) where
95 | 
96 | import Data.ByteString.Internal.Type
97 | 


--------------------------------------------------------------------------------
/Data/ByteString/Lazy/ReadInt.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | 
  3 | -- This file is also included by "Data.ByteString.ReadInt", after defining
  4 | -- "BYTESTRING_STRICT".  The two modules share much of their code, but
  5 | -- the lazy version adds an outer loop over the chunks.
  6 | 
  7 | #ifdef BYTESTRING_STRICT
  8 | module Data.ByteString.ReadInt
  9 | #else
 10 | module Data.ByteString.Lazy.ReadInt
 11 | #endif
 12 |     ( readInt
 13 |     , readInt8
 14 |     , readInt16
 15 |     , readInt32
 16 |     , readWord
 17 |     , readWord8
 18 |     , readWord16
 19 |     , readWord32
 20 |     , readInt64
 21 |     , readWord64
 22 |     ) where
 23 | 
 24 | import qualified Data.ByteString.Internal as BI
 25 | #ifdef BYTESTRING_STRICT
 26 | import Data.ByteString
 27 | #else
 28 | import Data.ByteString.Lazy
 29 | import Data.ByteString.Lazy.Internal
 30 | #endif
 31 | import Data.Bits (FiniteBits, isSigned)
 32 | import Data.ByteString.Internal (pattern BS, plusForeignPtr)
 33 | import Data.Int
 34 | import Data.Word
 35 | import Foreign.ForeignPtr (ForeignPtr)
 36 | import Foreign.Ptr (minusPtr, plusPtr)
 37 | import Foreign.Storable (Storable(..))
 38 | 
 39 | ----- Public API
 40 | 
 41 | -- | Try to read a signed 'Int' value from the 'ByteString', returning
 42 | -- @Just (val, str)@ on success, where @val@ is the value read and @str@ is the
 43 | -- rest of the input string.  If the sequence of digits decodes to a value
 44 | -- larger than can be represented by an 'Int', the returned value will be
 45 | -- 'Nothing'.
 46 | --
 47 | -- 'readInt' does not ignore leading whitespace, the value must start
 48 | -- immediately at the beginning of the input string.
 49 | --
 50 | -- ==== __Examples__
 51 | -- >>> readInt "-1729 sum of cubes"
 52 | -- Just (-1729," sum of cubes")
 53 | -- >>> readInt "+1: readInt also accepts a leading '+'"
 54 | -- Just (1, ": readInt also accepts a leading '+'")
 55 | -- >>> readInt "not a decimal number"
 56 | -- Nothing
 57 | -- >>> readInt "12345678901234567890 overflows maxBound"
 58 | -- Nothing
 59 | -- >>> readInt "-12345678901234567890 underflows minBound"
 60 | -- Nothing
 61 | --
 62 | readInt :: ByteString -> Maybe (Int, ByteString)
 63 | readInt = _read
 64 | 
 65 | -- | A variant of 'readInt' specialised to 'Int32'.
 66 | readInt32 :: ByteString -> Maybe (Int32, ByteString)
 67 | readInt32 = _read
 68 | 
 69 | -- | A variant of 'readInt' specialised to 'Int16'.
 70 | readInt16 :: ByteString -> Maybe (Int16, ByteString)
 71 | readInt16 = _read
 72 | 
 73 | -- | A variant of 'readInt' specialised to 'Int8'.
 74 | readInt8 :: ByteString -> Maybe (Int8, ByteString)
 75 | readInt8 = _read
 76 | 
 77 | -- | Try to read a 'Word' value from the 'ByteString', returning
 78 | -- @Just (val, str)@ on success, where @val@ is the value read and @str@ is the
 79 | -- rest of the input string.  If the sequence of digits decodes to a value
 80 | -- larger than can be represented by a 'Word', the returned value will be
 81 | -- 'Nothing'.
 82 | --
 83 | -- 'readWord' does not ignore leading whitespace, the value must start with a
 84 | -- decimal digit immediately at the beginning of the input string.  Leading @+@
 85 | -- signs are not accepted.
 86 | --
 87 | -- ==== __Examples__
 88 | -- >>> readWord "1729 sum of cubes"
 89 | -- Just (1729," sum of cubes")
 90 | -- >>> readWord "+1729 has an explicit sign"
 91 | -- Nothing
 92 | -- >>> readWord "not a decimal number"
 93 | -- Nothing
 94 | -- >>> readWord "98765432109876543210 overflows maxBound"
 95 | -- Nothing
 96 | --
 97 | readWord :: ByteString -> Maybe (Word, ByteString)
 98 | readWord = _read
 99 | 
100 | -- | A variant of 'readWord' specialised to 'Word32'.
101 | readWord32 :: ByteString -> Maybe (Word32, ByteString)
102 | readWord32 = _read
103 | 
104 | -- | A variant of 'readWord' specialised to 'Word16'.
105 | readWord16 :: ByteString -> Maybe (Word16, ByteString)
106 | readWord16 = _read
107 | 
108 | -- | A variant of 'readWord' specialised to 'Word8'.
109 | readWord8 :: ByteString -> Maybe (Word8, ByteString)
110 | readWord8 = _read
111 | 
112 | -- | A variant of 'readInt' specialised to 'Int64'.
113 | readInt64 :: ByteString -> Maybe (Int64, ByteString)
114 | readInt64 = _read
115 | 
116 | -- | A variant of 'readWord' specialised to 'Word64'.
117 | readWord64 :: ByteString -> Maybe (Word64, ByteString)
118 | readWord64 = _read
119 | 
120 | -- | Polymorphic Int*/Word* reader
121 | _read :: forall a. (Integral a, FiniteBits a, Bounded a)
122 |       => ByteString  -> Maybe (a, ByteString)
123 | {-# INLINE _read #-}
124 | _read
125 |     | isSigned @a 0
126 |       = \ bs -> signed bs >>= \ (r, s, d1) -> _readDecimal r s d1
127 |     | otherwise
128 |       -- When the input is @16^n-1@, as is the case with 'maxBound' for
129 |       -- all the Word* types, the last decimal digit of 'maxBound' is 5.
130 |       = \ bs -> unsigned 5 bs >>= \ (r, s, d1) -> _readDecimal r s d1
131 |   where
132 |     -- Returns:
133 |     --  * Mod 10 min/max bound remainder
134 |     --  * 2nd and later digits
135 |     --  * 1st digit
136 |     --
137 |     -- When the input is @8*16^n-1@, as is the case with 'maxBound' for
138 |     -- all the Int* types, the last decimal digit of 'maxBound' is 7.
139 |     --
140 |     signed :: ByteString -> Maybe (Word64, ByteString, Word64)
141 |     signed bs = do
142 |         (w, s) <- uncons bs
143 |         let d1 = fromDigit w
144 |         if | d1 <= 9   -> Just (7, s, d1) -- leading digit
145 |            | w == 0x2d -> unsigned 8 s    -- minus sign
146 |            | w == 0x2b -> unsigned 7 s    -- plus sign
147 |            | otherwise -> Nothing         -- not a number
148 | 
149 |     unsigned :: Word64 -> ByteString -> Maybe (Word64, ByteString, Word64)
150 |     unsigned r bs = do
151 |         (w, s) <- uncons bs
152 |         let d1 = fromDigit w
153 |         if | d1 <= 9   -> Just (r, s, d1) -- leading digit
154 |            | otherwise -> Nothing         -- not a number
155 | 
156 | ----- Fixed-width unsigned reader
157 | 
158 | -- | Intermediate result from scanning a chunk, final output is
159 | -- converted to the requested type once all chunks are processed.
160 | --
161 | data Result = Overflow
162 |             | Result !Int    -- number of bytes (digits) read
163 |                      !Word64 -- accumulator value
164 | 
165 | _readDecimal :: forall a. (Integral a, Bounded a)
166 |              => Word64     -- ^ abs(maxBound/minBound) `mod` 10
167 |              -> ByteString -- ^ Input string
168 |              -> Word64     -- ^ First digit value
169 |              -> Maybe (a, ByteString)
170 | {-# INLINE _readDecimal #-}
171 | _readDecimal !r = consume
172 |   where
173 |     consume :: ByteString -> Word64 -> Maybe (a, ByteString)
174 | #ifdef BYTESTRING_STRICT
175 |     consume (BS fp len) a = case _digits q r fp len a of
176 |         Result used acc
177 |             | used == len
178 |               -> convert acc empty
179 |             | otherwise
180 |               -> convert acc $ BS (fp `plusForeignPtr` used) (len - used)
181 |         _   -> Nothing
182 | #else
183 |     -- All done
184 |     consume Empty acc = convert acc Empty
185 |     -- Process next chunk
186 |     consume (Chunk (BS fp len) cs) acc
187 |         = case _digits q r fp len acc of
188 |             Result used acc'
189 |                 | used == len
190 |                   -- process remaining chunks
191 |                   -> consume cs acc'
192 |                 | otherwise
193 |                   -- ran into a non-digit
194 |                   -> convert acc' $
195 |                      Chunk (BS (fp `plusForeignPtr` used) (len - used)) cs
196 |             _     -> Nothing
197 | #endif
198 |     convert :: Word64 -> ByteString -> Maybe (a, ByteString)
199 |     convert !acc rest =
200 |         let !i = case r of
201 |                 -- minBound @Int* `mod` 10 == 8
202 |                 8 -> negate $ fromIntegral @Word64 @a acc
203 |                 _ -> fromIntegral @Word64 @a acc
204 |          in Just (i, rest)
205 | 
206 |     -- The quotient of 'maxBound' divided by 10 is needed for
207 |     -- overflow checks, once the accumulator exceeds this value
208 |     -- no further digits can be added.  If equal, the last digit
209 |     -- must not exceed the `r` value (max/min bound `mod` 10).
210 |     --
211 |     q = fromIntegral @a @Word64 maxBound `div` 10
212 | 
213 | ----- Per chunk decoder
214 | 
215 | -- | Process as many digits as we can, returning the additional
216 | -- number of digits found and the updated accumulator.  If the
217 | -- accumulator would overflow return 'Overflow'.
218 | --
219 | _digits :: Word64           -- ^ maximum non-overflow value `div` 10
220 |         -> Word64           -- ^ maximum non-overflow vavlue `mod` 10
221 |         -> ForeignPtr Word8 -- ^ Input buffer
222 |         -> Int              -- ^ Input length
223 |         -> Word64           -- ^ Accumulated value of leading digits
224 |         -> Result           -- ^ Bytes read and final accumulator,
225 |                             -- or else overflow indication
226 | {-# INLINE _digits #-}
227 | _digits !q !r fp len a = BI.accursedUnutterablePerformIO $
228 |     BI.unsafeWithForeignPtr fp $ \ ptr -> do
229 |         let end = ptr `plusPtr` len
230 |         go ptr end ptr a
231 |   where
232 |     go !start !end = loop
233 |       where
234 |         loop !ptr !acc = getDigit >>= \ !d ->
235 |             if | d > 9
236 |                  -> return $ Result (ptr `minusPtr` start) acc
237 |                | acc < q || acc == q && d <= r
238 |                  -> loop (ptr `plusPtr` 1) (acc * 10 + d)
239 |                | otherwise
240 |                  -> return Overflow
241 |           where
242 |             getDigit :: IO Word64
243 |             getDigit
244 |                 | ptr /= end = fromDigit <$> peek ptr
245 |                 | otherwise  = pure 10  -- End of input
246 |             {-# NOINLINE getDigit #-}
247 |             -- 'getDigit' makes it possible to implement a single success
248 |             -- exit point from the loop.  If instead we return 'Result'
249 |             -- from multiple places, when '_digits' is inlined we get (at
250 |             -- least GHC 8.10 through 9.2) for each exit path a separate
251 |             -- join point implementing the continuation code.  GHC ticket
252 |             -- <https://gitlab.haskell.org/ghc/ghc/-/issues/20739>.
253 |             --
254 |             -- The NOINLINE pragma is required to avoid inlining branches
255 |             -- that would restore multiple exit points.
256 | 
257 | fromDigit :: Word8 -> Word64
258 | {-# INLINE fromDigit #-}
259 | fromDigit = \ !w -> fromIntegral w - 0x30 -- i.e. w - '0'
260 | 


--------------------------------------------------------------------------------
/Data/ByteString/Lazy/ReadNat.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | 
  3 | -- This file is included by "Data.ByteString.ReadInt", after defining
  4 | -- "BYTESTRING_STRICT".  The two modules are largely identical, except for the
  5 | -- choice of ByteString type and the loops in `readNatural`, where the lazy
  6 | -- version needs to nest the inner loop inside a loop over the constituent
  7 | -- chunks.
  8 | 
  9 | #ifdef BYTESTRING_STRICT
 10 | module Data.ByteString.ReadNat
 11 | #else
 12 | module Data.ByteString.Lazy.ReadNat
 13 | #endif
 14 |     ( readInteger
 15 |     , readNatural
 16 |     ) where
 17 | 
 18 | import qualified Data.ByteString.Internal as BI
 19 | #ifdef BYTESTRING_STRICT
 20 | import Data.ByteString
 21 | #else
 22 | import Data.ByteString.Lazy
 23 | import Data.ByteString.Lazy.Internal
 24 | #endif
 25 | import Data.Bits (finiteBitSize)
 26 | import Data.ByteString.Internal (pattern BS, plusForeignPtr)
 27 | import Data.Word
 28 | import Foreign.ForeignPtr (ForeignPtr)
 29 | import Foreign.Ptr (Ptr, minusPtr, plusPtr)
 30 | import Foreign.Storable (Storable(..))
 31 | import Numeric.Natural (Natural)
 32 | 
 33 | ----- Public API
 34 | 
 35 | -- | 'readInteger' reads an 'Integer' from the beginning of the 'ByteString'.
 36 | -- If there is no 'Integer' at the beginning of the string, it returns
 37 | -- 'Nothing', otherwise it just returns the 'Integer' read, and the rest of
 38 | -- the string.
 39 | --
 40 | -- 'readInteger' does not ignore leading whitespace, the value must start
 41 | -- immediately at the beginning of the input string.
 42 | --
 43 | -- ==== __Examples__
 44 | -- >>> readInteger "-000111222333444555666777888999 all done"
 45 | -- Just (-111222333444555666777888999," all done")
 46 | -- >>> readInteger "+1: readInteger also accepts a leading '+'"
 47 | -- Just (1, ": readInteger also accepts a leading '+'")
 48 | -- >>> readInteger "not a decimal number"
 49 | -- Nothing
 50 | --
 51 | readInteger :: ByteString -> Maybe (Integer, ByteString)
 52 | readInteger = \ bs -> do
 53 |     (w, s) <- uncons bs
 54 |     let d = fromDigit w
 55 |     if | d <=    9 -> unsigned d s -- leading digit
 56 |        | w == 0x2d -> negative s   -- minus sign
 57 |        | w == 0x2b -> positive s   -- plus sign
 58 |        | otherwise -> Nothing      -- not a number
 59 |   where
 60 |     unsigned :: Word -> ByteString -> Maybe (Integer, ByteString)
 61 |     unsigned d s =
 62 |          let (!n, rest) = _readDecimal d s
 63 |              !i = toInteger n
 64 |           in Just (i, rest)
 65 | 
 66 |     positive :: ByteString -> Maybe (Integer, ByteString)
 67 |     positive bs = do
 68 |         (w, s) <- uncons bs
 69 |         let d = fromDigit w
 70 |         if | d <=    9 -> unsigned d s
 71 |            | otherwise -> Nothing
 72 | 
 73 |     negative :: ByteString -> Maybe (Integer, ByteString)
 74 |     negative bs = do
 75 |         (w, s) <- uncons bs
 76 |         let d = fromDigit w
 77 |         if | d >     9 -> Nothing
 78 |            | otherwise -> let (n, rest) = _readDecimal d s
 79 |                               !i = negate $ toInteger n
 80 |                            in Just (i, rest)
 81 | 
 82 | -- | 'readNatural' reads a 'Natural' number from the beginning of the
 83 | -- 'ByteString'.  If there is no 'Natural' number at the beginning of the
 84 | -- string, it returns 'Nothing', otherwise it just returns the number read, and
 85 | -- the rest of the string.
 86 | --
 87 | -- 'readNatural' does not ignore leading whitespace, the value must start with
 88 | -- a decimal digit immediately at the beginning of the input string.  Leading
 89 | -- @+@ signs are not accepted.
 90 | --
 91 | -- ==== __Examples__
 92 | -- >>> readNatural "000111222333444555666777888999 all done"
 93 | -- Just (111222333444555666777888999," all done")
 94 | -- >>> readNatural "+000111222333444555666777888999 explicit sign"
 95 | -- Nothing
 96 | -- >>> readNatural "not a decimal number"
 97 | -- Nothing
 98 | --
 99 | readNatural :: ByteString -> Maybe (Natural, ByteString)
100 | readNatural bs = do
101 |     (w, s) <- uncons bs
102 |     let d = fromDigit w
103 |     if | d <=    9 -> Just $! _readDecimal d s
104 |        | otherwise -> Nothing
105 | 
106 | ----- Internal implementation
107 | 
108 | -- | Intermediate result from scanning a chunk, final output is
109 | -- obtained via `convert` after all the chunks are processed.
110 | --
111 | data Result = Result !Int      -- Bytes consumed
112 |                      !Word     -- Value of LSW
113 |                      !Int      -- Digits in LSW
114 |                      [Natural] -- Little endian MSW list
115 | 
116 | _readDecimal :: Word -> ByteString -> (Natural, ByteString)
117 | _readDecimal =
118 |     -- Having read one digit, we're about to read the 2nd So the digit count
119 |     -- up to 'safeLog' starts at 2.
120 |     consume [] 2
121 |   where
122 |     consume :: [Natural] -> Int -> Word -> ByteString
123 |             -> (Natural, ByteString)
124 | #ifdef BYTESTRING_STRICT
125 |     consume ns cnt acc (BS fp len) =
126 |         -- Having read one digit, we're about to read the 2nd
127 |         -- So the digit count up to 'safeLog' starts at 2.
128 |         case natdigits fp len acc cnt ns of
129 |             Result used acc' cnt' ns'
130 |                 | used == len
131 |                   -> convert acc' cnt' ns' $ empty
132 |                 | otherwise
133 |                   -> convert acc' cnt' ns' $
134 |                      BS (fp `plusForeignPtr` used) (len - used)
135 | #else
136 |     -- All done
137 |     consume ns cnt acc Empty = convert acc cnt ns Empty
138 |     -- Process next chunk
139 |     consume ns cnt acc (Chunk (BS fp len) cs)
140 |         = case natdigits fp len acc cnt ns of
141 |             Result used acc' cnt' ns'
142 |                 | used == len -- process more chunks
143 |                   -> consume ns' cnt' acc' cs
144 |                 | otherwise   -- ran into a non-digit
145 |                   -> let c = Chunk (BS (fp `plusForeignPtr` used) (len - used)) cs
146 |                       in convert acc' cnt' ns' c
147 | #endif
148 |     convert !acc !cnt !ns rest =
149 |         let !n = combine acc cnt ns
150 |          in (n, rest)
151 | 
152 |     -- | Merge least-significant word with reduction of of little-endian tail.
153 |     --
154 |     -- The input is:
155 |     --
156 |     -- * Least significant digits as a 'Word' (LSW)
157 |     -- * The number of digits that went into the LSW
158 |     -- * All the remaining digit groups ('safeLog' digits each),
159 |     --   in little-endian order
160 |     --
161 |     -- The result is obtained by pairwise recursive combining of all the
162 |     -- full size digit groups, followed by multiplication by @10^cnt@ and
163 |     -- addition of the LSW.
164 |     combine :: Word      -- ^ value of LSW
165 |             -> Int       -- ^ count of digits in LSW
166 |             -> [Natural] -- ^ tail elements (base @10^'safeLog'@)
167 |             -> Natural
168 |     {-# INLINE combine #-}
169 |     combine !acc !_   [] = wordToNatural acc
170 |     combine !acc !cnt ns =
171 |         wordToNatural (10^cnt) * combine1 safeBase ns + wordToNatural acc
172 | 
173 |     -- | Recursive reduction of little-endian sequence of 'Natural'-valued
174 |     -- /digits/ in base @base@ (a power of 10).  The base is squared after
175 |     -- each round.  This shows better asymptotic performance than one word
176 |     -- at a time multiply-add folds.  See:
177 |     -- <https://gmplib.org/manual/Multiplication-Algorithms>
178 |     --
179 |     combine1 :: Natural -> [Natural] -> Natural
180 |     combine1 _    [n] = n
181 |     combine1 base ns  = combine1 (base * base) (combine2 base ns)
182 | 
183 |     -- | One round pairwise merge of numbers in base @base@.
184 |     combine2 :: Natural -> [Natural] -> [Natural]
185 |     combine2 base (n:m:ns) = let !t = m * base + n in t : combine2 base ns
186 |     combine2 _    ns       = ns
187 | 
188 | -- The intermediate representation is a little-endian sequence in base
189 | -- @10^'safeLog'@, prefixed by an initial element in base @10^cnt@ for some
190 | -- @cnt@ between 1 and 'safeLog'.  The final result is obtained by recursive
191 | -- pairwise merging of the tail followed by a final multiplication by @10^cnt@
192 | -- and addition of the head.
193 | --
194 | natdigits :: ForeignPtr Word8 -- ^ Input chunk
195 |           -> Int              -- ^ Chunk length
196 |           -> Word             -- ^ accumulated element
197 |           -> Int              -- ^ partial digit count
198 |           -> [Natural]        -- ^ accumulated MSB elements
199 |           -> Result
200 | {-# INLINE natdigits #-}
201 | natdigits fp len = \ acc cnt ns ->
202 |     BI.accursedUnutterablePerformIO $
203 |         BI.unsafeWithForeignPtr fp $ \ ptr -> do
204 |             let end = ptr `plusPtr` len
205 |             go ptr end acc cnt ns ptr
206 |   where
207 |     go !start !end = loop
208 |       where
209 |         loop :: Word -> Int -> [Natural] -> Ptr Word8 -> IO Result
210 |         loop !acc !cnt ns !ptr = getDigit >>= \ !d ->
211 |             if | d > 9
212 |                  -> return $ Result (ptr `minusPtr` start) acc cnt ns
213 |                | cnt < safeLog
214 |                  -> loop (10*acc + d) (cnt+1) ns $ ptr `plusPtr` 1
215 |                | otherwise
216 |                  -> let !acc' = wordToNatural acc
217 |                      in loop d 1 (acc' : ns) $ ptr `plusPtr` 1
218 |           where
219 |             getDigit | ptr /= end = fromDigit <$> peek ptr
220 |                      | otherwise  = pure 10  -- End of input
221 |             {-# NOINLINE getDigit #-}
222 |             -- 'getDigit' makes it possible to implement a single success
223 |             -- exit point from the loop.  If instead we return 'Result'
224 |             -- from multiple places, when 'natdigits' is inlined we get (at
225 |             -- least GHC 8.10 through 9.2) for each exit path a separate
226 |             -- join point implementing the continuation code.  GHC ticket
227 |             -- <https://gitlab.haskell.org/ghc/ghc/-/issues/20739>.
228 |             --
229 |             -- The NOINLINE pragma is required to avoid inlining branches
230 |             -- that would restore multiple exit points.
231 | 
232 | ----- Misc functions
233 | 
234 | -- | Largest decimal digit count that never overflows the accumulator
235 | -- The base 10 logarithm of 2 is ~0.30103, therefore 2^n has at least
236 | -- @1 + floor (0.3 n)@ decimal digits.  Therefore @floor (0.3 n)@,
237 | -- digits cannot overflow the upper bound of an @n-bit@ word.
238 | --
239 | safeLog :: Int
240 | safeLog = 3 * finiteBitSize @Word 0 `div` 10
241 | 
242 | -- | 10-power base for little-endian sequence of ~Word-sized "digits"
243 | safeBase :: Natural
244 | safeBase = 10 ^ safeLog
245 | 
246 | fromDigit :: Word8 -> Word
247 | {-# INLINE fromDigit #-}
248 | fromDigit = \ !w -> fromIntegral w - 0x30 -- i.e. w - '0'
249 | 
250 | wordToNatural :: Word -> Natural
251 | {-# INLINE wordToNatural #-}
252 | wordToNatural  = fromIntegral
253 | 


--------------------------------------------------------------------------------
/Data/ByteString/ReadInt.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE CPP #-}
2 | #define BYTESTRING_STRICT
3 | #include "Lazy/ReadInt.hs"
4 | 


--------------------------------------------------------------------------------
/Data/ByteString/ReadNat.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE CPP #-}
2 | #define BYTESTRING_STRICT
3 | #include "Lazy/ReadNat.hs"
4 | 


--------------------------------------------------------------------------------
/Data/ByteString/Short.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE Trustworthy #-}
  2 | 
  3 | -- |
  4 | -- Module      : Data.ByteString.Short
  5 | -- Copyright   : (c) Duncan Coutts 2012-2013, Julian Ospald 2022
  6 | -- License     : BSD-style
  7 | --
  8 | -- Maintainer  : hasufell@posteo.de
  9 | -- Stability   : stable
 10 | -- Portability : ghc only
 11 | --
 12 | -- A compact representation suitable for storing short byte strings in memory.
 13 | --
 14 | -- In typical use cases it can be imported alongside "Data.ByteString", e.g.
 15 | --
 16 | -- > import qualified Data.ByteString       as B
 17 | -- > import qualified Data.ByteString.Short as B
 18 | -- >          (ShortByteString, toShort, fromShort)
 19 | --
 20 | -- Other 'ShortByteString' operations clash with "Data.ByteString" or "Prelude"
 21 | -- functions however, so they should be imported @qualified@ with a different
 22 | -- alias e.g.
 23 | --
 24 | -- > import qualified Data.ByteString.Short as B.Short
 25 | --
 26 | module Data.ByteString.Short (
 27 | 
 28 |     -- * The @ShortByteString@ type
 29 | 
 30 |     ShortByteString(..),
 31 | 
 32 |     -- ** Memory overhead
 33 |     -- | With GHC, the memory overheads are as follows, expressed in words and
 34 |     -- in bytes (words are 4 and 8 bytes on 32 or 64bit machines respectively).
 35 |     --
 36 |     -- * t'Data.ByteString.ByteString' unshared: 8 words; 32 or 64 bytes.
 37 |     --
 38 |     -- * t'Data.ByteString.ByteString' shared substring: 4 words; 16 or 32 bytes.
 39 |     --
 40 |     -- * 'ShortByteString': 4 words; 16 or 32 bytes.
 41 |     --
 42 |     -- For the string data itself, both 'ShortByteString' and t'Data.ByteString.ByteString' use
 43 |     -- one byte per element, rounded up to the nearest word. For example,
 44 |     -- including the overheads, a length 10 'ShortByteString' would take
 45 |     -- @16 + 12 = 28@ bytes on a 32bit platform and @32 + 16 = 48@ bytes on a
 46 |     -- 64bit platform.
 47 |     --
 48 |     -- These overheads can all be reduced by 1 word (4 or 8 bytes) when the
 49 |     -- 'ShortByteString' or t'Data.ByteString.ByteString' is unpacked into another constructor.
 50 |     --
 51 |     -- For example:
 52 |     --
 53 |     -- > data ThingId = ThingId {-# UNPACK #-} !Int
 54 |     -- >                        {-# UNPACK #-} !ShortByteString
 55 |     --
 56 |     -- This will take @1 + 1 + 3@ words (the @ThingId@ constructor +
 57 |     -- unpacked @Int@ + unpacked @ShortByteString@), plus the words for the
 58 |     -- string data.
 59 | 
 60 |     -- ** Heap fragmentation
 61 |     -- | With GHC, the t'Data.ByteString.ByteString' representation uses /pinned/ memory,
 62 |     -- meaning it cannot be moved by the GC. This is usually the right thing to
 63 |     -- do for larger strings, but for small strings using pinned memory can
 64 |     -- lead to heap fragmentation which wastes space. The 'ShortByteString'
 65 |     -- type (and the @Text@ type from the @text@ package) use /unpinned/ memory
 66 |     -- so they do not contribute to heap fragmentation. In addition, with GHC,
 67 |     -- small unpinned strings are allocated in the same way as normal heap
 68 |     -- allocations, rather than in a separate pinned area.
 69 | 
 70 |     -- * Introducing and eliminating 'ShortByteString's
 71 |     empty,
 72 |     singleton,
 73 |     pack,
 74 |     unpack,
 75 |     fromShort,
 76 |     toShort,
 77 | 
 78 |     -- * Basic interface
 79 |     snoc,
 80 |     cons,
 81 |     append,
 82 |     last,
 83 |     tail,
 84 |     uncons,
 85 |     head,
 86 |     init,
 87 |     unsnoc,
 88 |     null,
 89 |     length,
 90 | 
 91 |     -- * Encoding validation
 92 |     isValidUtf8,
 93 | 
 94 |     -- * Transforming ShortByteStrings
 95 |     map,
 96 |     reverse,
 97 |     intercalate,
 98 | 
 99 |     -- * Reducing 'ShortByteString's (folds)
100 |     foldl,
101 |     foldl',
102 |     foldl1,
103 |     foldl1',
104 | 
105 |     foldr,
106 |     foldr',
107 |     foldr1,
108 |     foldr1',
109 | 
110 |     -- ** Special folds
111 |     all,
112 |     any,
113 |     concat,
114 | 
115 |     -- ** Generating and unfolding ByteStrings
116 |     replicate,
117 |     unfoldr,
118 |     unfoldrN,
119 | 
120 |     -- * Substrings
121 | 
122 |     -- ** Breaking strings
123 |     take,
124 |     takeEnd,
125 |     takeWhileEnd,
126 |     takeWhile,
127 |     drop,
128 |     dropEnd,
129 |     dropWhile,
130 |     dropWhileEnd,
131 |     breakEnd,
132 |     break,
133 |     span,
134 |     spanEnd,
135 |     splitAt,
136 |     split,
137 |     splitWith,
138 |     stripSuffix,
139 |     stripPrefix,
140 | 
141 |     -- * Predicates
142 |     isInfixOf,
143 |     isPrefixOf,
144 |     isSuffixOf,
145 | 
146 |     -- ** Search for arbitrary substrings
147 |     breakSubstring,
148 | 
149 |     -- * Searching ShortByteStrings
150 | 
151 |     -- ** Searching by equality
152 |     elem,
153 | 
154 |     -- ** Searching with a predicate
155 |     find,
156 |     filter,
157 |     partition,
158 | 
159 |     -- * Indexing ShortByteStrings
160 |     index,
161 |     indexMaybe,
162 |     (!?),
163 |     elemIndex,
164 |     elemIndices,
165 |     count,
166 |     findIndex,
167 |     findIndices,
168 | 
169 |     -- * Low level conversions
170 |     -- ** Packing 'Foreign.C.String.CString's and pointers
171 |     packCString,
172 |     packCStringLen,
173 | 
174 |     -- ** Using ShortByteStrings as 'Foreign.C.String.CString's
175 |     useAsCString,
176 |     useAsCStringLen,
177 |   ) where
178 | 
179 | import Data.ByteString.Short.Internal
180 | import Prelude ()
181 | 
182 | 


--------------------------------------------------------------------------------
/Data/ByteString/Unsafe.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE Unsafe #-}
  2 | 
  3 | -- |
  4 | -- Module      : Data.ByteString.Unsafe
  5 | -- Copyright   : (c) Don Stewart 2006-2008
  6 | --               (c) Duncan Coutts 2006-2011
  7 | -- License     : BSD-style
  8 | -- Maintainer  : dons00@gmail.com, duncan@community.haskell.org
  9 | -- Stability   : provisional
 10 | -- Portability : non-portable
 11 | --
 12 | -- A module containing unsafe 'ByteString' operations.
 13 | --
 14 | -- While these functions have a stable API and you may use these functions in
 15 | -- applications, do carefully consider the documented pre-conditions;
 16 | -- incorrect use can break referential transparency or worse.
 17 | --
 18 | module Data.ByteString.Unsafe (
 19 | 
 20 |         -- * Unchecked access
 21 |         unsafeHead,
 22 |         unsafeTail,
 23 |         unsafeInit,
 24 |         unsafeLast,
 25 |         unsafeIndex,
 26 |         unsafeTake,
 27 |         unsafeDrop,
 28 | 
 29 |         -- * Low level interaction with CStrings
 30 |         -- ** Using ByteStrings with functions for CStrings
 31 |         unsafeUseAsCString,
 32 |         unsafeUseAsCStringLen,
 33 | 
 34 |         -- ** Converting CStrings to ByteStrings
 35 |         unsafePackCString,
 36 |         unsafePackCStringLen,
 37 |         unsafePackMallocCString,
 38 |         unsafePackMallocCStringLen,
 39 | 
 40 |         unsafePackAddress,
 41 |         unsafePackAddressLen,
 42 |         unsafePackCStringFinalizer,
 43 |         unsafeFinalize,
 44 | 
 45 |   ) where
 46 | 
 47 | import Data.ByteString.Internal
 48 | 
 49 | import Foreign.ForeignPtr       (newForeignPtr_, newForeignPtr, withForeignPtr)
 50 | 
 51 | import Foreign.Storable         (Storable(..))
 52 | import Foreign.C.String         (CString, CStringLen)
 53 | 
 54 | import Control.Exception        (assert)
 55 | 
 56 | import Data.Word                (Word8)
 57 | 
 58 | import qualified Foreign.ForeignPtr as FC (finalizeForeignPtr)
 59 | import qualified Foreign.Concurrent as FC (newForeignPtr)
 60 | 
 61 | import GHC.Exts                 (Addr#)
 62 | import GHC.Ptr                  (Ptr(..), castPtr)
 63 | 
 64 | -- ---------------------------------------------------------------------
 65 | --
 66 | -- Extensions to the basic interface
 67 | --
 68 | 
 69 | -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
 70 | -- check for the empty case, so there is an obligation on the programmer
 71 | -- to provide a proof that the ByteString is non-empty.
 72 | unsafeHead :: ByteString -> Word8
 73 | unsafeHead (BS x l) = assert (l > 0) $
 74 |     accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> peek p
 75 | {-# INLINE unsafeHead #-}
 76 | 
 77 | -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
 78 | -- check for the empty case. As with 'unsafeHead', the programmer must
 79 | -- provide a separate proof that the ByteString is non-empty.
 80 | unsafeTail :: ByteString -> ByteString
 81 | unsafeTail (BS ps l) = assert (l > 0) $ BS (plusForeignPtr ps 1) (l-1)
 82 | {-# INLINE unsafeTail #-}
 83 | 
 84 | -- | A variety of 'init' for non-empty ByteStrings. 'unsafeInit' omits the
 85 | -- check for the empty case. As with 'unsafeHead', the programmer must
 86 | -- provide a separate proof that the ByteString is non-empty.
 87 | unsafeInit :: ByteString -> ByteString
 88 | unsafeInit (BS ps l) = assert (l > 0) $ BS ps (l-1)
 89 | {-# INLINE unsafeInit #-}
 90 | 
 91 | -- | A variety of 'last' for non-empty ByteStrings. 'unsafeLast' omits the
 92 | -- check for the empty case. As with 'unsafeHead', the programmer must
 93 | -- provide a separate proof that the ByteString is non-empty.
 94 | unsafeLast :: ByteString -> Word8
 95 | unsafeLast (BS x l) = assert (l > 0) $
 96 |     accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> peekByteOff p (l-1)
 97 | {-# INLINE unsafeLast #-}
 98 | 
 99 | -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
100 | -- This omits the bounds check, which means there is an accompanying
101 | -- obligation on the programmer to ensure the bounds are checked in some
102 | -- other way.
103 | unsafeIndex :: ByteString -> Int -> Word8
104 | unsafeIndex (BS x l) i = assert (i >= 0 && i < l) $
105 |     accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> peekByteOff p i
106 | {-# INLINE unsafeIndex #-}
107 | 
108 | -- | A variety of 'take' which omits the checks on @n@ so there is an
109 | -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
110 | unsafeTake :: Int -> ByteString -> ByteString
111 | unsafeTake n (BS x l) = assert (0 <= n && n <= l) $ BS x n
112 | {-# INLINE unsafeTake #-}
113 | 
114 | -- | A variety of 'drop' which omits the checks on @n@ so there is an
115 | -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
116 | unsafeDrop  :: Int -> ByteString -> ByteString
117 | unsafeDrop n (BS x l) = assert (0 <= n && n <= l) $ BS (plusForeignPtr x n) (l-n)
118 | {-# INLINE unsafeDrop #-}
119 | 
120 | 
121 | -- | /O(1)/ 'unsafePackAddressLen' provides constant-time construction of
122 | -- 'ByteString's, which is ideal for string literals. It packs a sequence
123 | -- of bytes into a 'ByteString', given a raw 'Addr#' to the string, and
124 | -- the length of the string.
125 | --
126 | -- This function is /unsafe/ in two ways:
127 | --
128 | -- * the length argument is assumed to be correct. If the length
129 | -- argument is incorrect, it is possible to overstep the end of the
130 | -- byte array.
131 | --
132 | -- * if the underlying 'Addr#' is later modified, this change will be
133 | -- reflected in the resulting 'ByteString', breaking referential
134 | -- transparency.
135 | --
136 | -- If in doubt, don't use this function.
137 | --
138 | unsafePackAddressLen :: Int -> Addr# -> IO ByteString
139 | unsafePackAddressLen len addr# = do
140 |     p <- newForeignPtr_ (Ptr addr#)
141 |     return $ BS p len
142 | {-# INLINE unsafePackAddressLen #-}
143 | 
144 | -- | /O(1)/ Construct a 'ByteString' given a Ptr Word8 to a buffer, a
145 | -- length, and an IO action representing a finalizer. This function is
146 | -- not available on Hugs.
147 | --
148 | -- This function is /unsafe/, it is possible to break referential
149 | -- transparency by modifying the underlying buffer pointed to by the
150 | -- first argument. Any changes to the original buffer will be reflected
151 | -- in the resulting 'ByteString'.
152 | --
153 | unsafePackCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
154 | unsafePackCStringFinalizer p l f = do
155 |     fp <- FC.newForeignPtr p f
156 |     return $ BS fp l
157 | 
158 | -- | Explicitly run the finaliser associated with a 'ByteString'.
159 | -- References to this value after finalisation may generate invalid memory
160 | -- references.
161 | --
162 | -- This function is /unsafe/, as there may be other
163 | -- 'ByteString's referring to the same underlying pages. If you use
164 | -- this, you need to have a proof of some kind that all 'ByteString's
165 | -- ever generated from the underlying byte array are no longer live.
166 | --
167 | unsafeFinalize :: ByteString -> IO ()
168 | unsafeFinalize (BS p _) = FC.finalizeForeignPtr p
169 | 
170 | ------------------------------------------------------------------------
171 | -- Packing CStrings into ByteStrings
172 | 
173 | -- | /O(n)/ Build a 'ByteString' from a 'CString'. This value will have /no/
174 | -- finalizer associated to it, and will not be garbage collected by
175 | -- Haskell. The ByteString length is calculated using /strlen(3)/,
176 | -- and thus the complexity is a /O(n)/.
177 | --
178 | -- This function is /unsafe/. If the 'CString' is later modified, this
179 | -- change will be reflected in the resulting 'ByteString', breaking
180 | -- referential transparency.
181 | --
182 | unsafePackCString :: CString -> IO ByteString
183 | unsafePackCString cstr = do
184 |     fp <- newForeignPtr_ (castPtr cstr)
185 |     l <- c_strlen cstr
186 |     return $! BS fp (fromIntegral l)
187 | 
188 | -- | /O(1)/ Build a 'ByteString' from a 'CStringLen'. This value will
189 | -- have /no/ finalizer associated with it, and will not be garbage
190 | -- collected by Haskell. This operation has /O(1)/ complexity as we
191 | -- already know the final size, so no /strlen(3)/ is required.
192 | --
193 | -- This function is /unsafe/. If the original 'CStringLen' is later
194 | -- modified, this change will be reflected in the resulting 'ByteString',
195 | -- breaking referential transparency.
196 | --
197 | unsafePackCStringLen :: CStringLen -> IO ByteString
198 | unsafePackCStringLen (ptr,len) = do
199 |     fp <- newForeignPtr_ (castPtr ptr)
200 |     return $! BS fp (fromIntegral len)
201 | 
202 | -- | /O(n)/ Build a 'ByteString' from a malloced 'CString'. This value will
203 | -- have a @free(3)@ finalizer associated to it.
204 | --
205 | -- This function is /unsafe/. If the original 'CString' is later
206 | -- modified, this change will be reflected in the resulting 'ByteString',
207 | -- breaking referential transparency.
208 | --
209 | -- This function is also unsafe if you call its finalizer twice,
210 | -- which will result in a /double free/ error, or if you pass it
211 | -- a 'CString' not allocated with 'Foreign.Marshal.Alloc.malloc'.
212 | --
213 | unsafePackMallocCString :: CString -> IO ByteString
214 | unsafePackMallocCString cstr = do
215 |     fp <- newForeignPtr c_free_finalizer (castPtr cstr)
216 |     len <- c_strlen cstr
217 |     return $! BS fp (fromIntegral len)
218 | 
219 | -- | /O(1)/ Build a 'ByteString' from a malloced 'CStringLen'. This
220 | -- value will have a @free(3)@ finalizer associated to it.
221 | --
222 | -- This function is /unsafe/. If the original 'CString' is later
223 | -- modified, this change will be reflected in the resulting 'ByteString',
224 | -- breaking referential transparency.
225 | --
226 | -- This function is also unsafe if you call its finalizer twice,
227 | -- which will result in a /double free/ error, or if you pass it
228 | -- a 'CString' not allocated with 'Foreign.Marshal.Alloc.malloc'.
229 | --
230 | unsafePackMallocCStringLen :: CStringLen -> IO ByteString
231 | unsafePackMallocCStringLen (cstr, len) = do
232 |     fp <- newForeignPtr c_free_finalizer (castPtr cstr)
233 |     return $! BS fp len
234 | 
235 | -- ---------------------------------------------------------------------
236 | 
237 | -- | /O(1) construction/ Use a 'ByteString' with a function requiring a
238 | -- 'CString'.
239 | --
240 | -- This function does zero copying, and merely unwraps a 'ByteString' to
241 | -- appear as a 'CString'. It is /unsafe/ in two ways:
242 | --
243 | -- * After calling this function the 'CString' shares the underlying
244 | -- byte buffer with the original 'ByteString'. Thus modifying the
245 | -- 'CString', either in C, or using poke, will cause the contents of the
246 | -- 'ByteString' to change, breaking referential transparency. Other
247 | -- 'ByteString's created by sharing (such as those produced via 'take'
248 | -- or 'drop') will also reflect these changes. Modifying the 'CString'
249 | -- will break referential transparency. To avoid this, use
250 | -- 'Data.ByteString.useAsCString', which makes a copy of the original 'ByteString'.
251 | --
252 | -- * 'CString's are often passed to functions that require them to be
253 | -- null-terminated. If the original 'ByteString' wasn't null terminated,
254 | -- neither will the 'CString' be. It is the programmers responsibility
255 | -- to guarantee that the 'ByteString' is indeed null terminated. If in
256 | -- doubt, use 'Data.ByteString.useAsCString'.
257 | --
258 | -- * The memory may freed at any point after the subcomputation
259 | -- terminates, so the pointer to the storage must *not* be used
260 | -- after this.
261 | --
262 | unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
263 | unsafeUseAsCString (BS ps _) action = withForeignPtr ps $ \p -> action (castPtr p)
264 | -- Cannot use unsafeWithForeignPtr, because action can diverge
265 | 
266 | -- | /O(1) construction/ Use a 'ByteString' with a function requiring a
267 | -- 'CStringLen'.
268 | --
269 | -- This function does zero copying, and merely unwraps a 'ByteString' to
270 | -- appear as a 'CStringLen'. It is /unsafe/:
271 | --
272 | -- * After calling this function the 'CStringLen' shares the underlying
273 | -- byte buffer with the original 'ByteString'. Thus modifying the
274 | -- 'CStringLen', either in C, or using poke, will cause the contents of the
275 | -- 'ByteString' to change, breaking referential transparency. Other
276 | -- 'ByteString's created by sharing (such as those produced via 'take'
277 | -- or 'drop') will also reflect these changes. Modifying the 'CStringLen'
278 | -- will break referential transparency. To avoid this, use
279 | -- 'Data.ByteString.useAsCStringLen', which makes a copy of the original 'ByteString'.
280 | --
281 | -- If 'Data.ByteString.empty' is given, it will pass @('Foreign.Ptr.nullPtr', 0)@.
282 | unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
283 | unsafeUseAsCStringLen (BS ps l) action = withForeignPtr ps $ \p -> action (castPtr p, l)
284 | -- Cannot use unsafeWithForeignPtr, because action can diverge
285 | 


--------------------------------------------------------------------------------
/Data/ByteString/Utils/ByteOrder.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP #-}
 2 | 
 3 | #include "MachDeps.h"
 4 | 
 5 | -- | Why does this module exist? There is "GHC.ByteOrder" in base.
 6 | -- But that module is /broken/ until base-4.14/ghc-8.10, so we
 7 | -- can't rely on it until we drop support for older ghcs.
 8 | -- See https://gitlab.haskell.org/ghc/ghc/-/issues/20338
 9 | -- and https://gitlab.haskell.org/ghc/ghc/-/issues/18445
10 | 
11 | module Data.ByteString.Utils.ByteOrder
12 |   ( ByteOrder(..)
13 |   , hostByteOrder
14 |   , whenLittleEndian
15 |   , whenBigEndian
16 |   ) where
17 | 
18 | import GHC.ByteOrder (ByteOrder(..))
19 | 
20 | hostByteOrder :: ByteOrder
21 | hostByteOrder =
22 | #ifdef WORDS_BIGENDIAN
23 |   BigEndian
24 | #else
25 |   LittleEndian
26 | #endif
27 | 
28 | -- | If the host is little-endian, applies the given function to the given arg.
29 | -- If the host is big-endian, returns the second argument unchanged.
30 | whenLittleEndian :: (a -> a) -> a -> a
31 | whenLittleEndian fun val = case hostByteOrder of
32 |   LittleEndian -> fun val
33 |   BigEndian    -> val
34 | 
35 | -- | If the host is little-endian, returns the second argument unchanged.
36 | -- If the host is big-endian, applies the given function to the given arg.
37 | whenBigEndian :: (a -> a) -> a -> a
38 | whenBigEndian fun val = case hostByteOrder of
39 |   LittleEndian -> val
40 |   BigEndian    -> fun val
41 | 


--------------------------------------------------------------------------------
/Data/ByteString/Utils/UnalignedAccess.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP #-}
 2 | 
 3 | #include "bytestring-cpp-macros.h"
 4 | 
 5 | -- |
 6 | -- Module      : Data.ByteString.Utils.UnalignedAccess
 7 | -- Copyright   : (c) Matthew Craven 2023-2024
 8 | -- License     : BSD-style
 9 | -- Maintainer  : clyring@gmail.com
10 | -- Stability   : internal
11 | -- Portability : non-portable
12 | --
13 | -- Primitives for reading and writing at potentially-unaligned memory locations
14 | 
15 | module Data.ByteString.Utils.UnalignedAccess
16 |   ( unalignedWriteU16
17 |   , unalignedWriteU32
18 |   , unalignedWriteU64
19 |   , unalignedWriteFloat
20 |   , unalignedWriteDouble
21 |   , unalignedReadU64
22 |   ) where
23 | 
24 | import Foreign.Ptr
25 | import Data.Word
26 | 
27 | 
28 | #if HS_UNALIGNED_ADDR_PRIMOPS_AVAILABLE
29 | import GHC.IO (IO(..))
30 | import GHC.Word (Word16(..), Word32(..), Word64(..))
31 | import GHC.Exts
32 | 
33 | unalignedWriteU16 :: Word16 -> Ptr Word8 -> IO ()
34 | unalignedWriteU16 = coerce $ \(W16# x#) (Ptr p#) s
35 |   -> (# writeWord8OffAddrAsWord16# p# 0# x# s, () #)
36 | 
37 | unalignedWriteU32 :: Word32 -> Ptr Word8 -> IO ()
38 | unalignedWriteU32 = coerce $ \(W32# x#) (Ptr p#) s
39 |   -> (# writeWord8OffAddrAsWord32# p# 0# x# s, () #)
40 | 
41 | unalignedWriteU64 :: Word64 -> Ptr Word8 -> IO ()
42 | unalignedWriteU64 = coerce $ \(W64# x#) (Ptr p#) s
43 |   -> (# writeWord8OffAddrAsWord64# p# 0# x# s, () #)
44 | 
45 | unalignedWriteFloat :: Float -> Ptr Word8 -> IO ()
46 | unalignedWriteFloat = coerce $ \(F# x#) (Ptr p#) s
47 |   -> (# writeWord8OffAddrAsFloat# p# 0# x# s, () #)
48 | 
49 | unalignedWriteDouble :: Double -> Ptr Word8 -> IO ()
50 | unalignedWriteDouble = coerce $ \(D# x#) (Ptr p#) s
51 |   -> (# writeWord8OffAddrAsDouble# p# 0# x# s, () #)
52 | 
53 | unalignedReadU64 :: Ptr Word8 -> IO Word64
54 | unalignedReadU64 = coerce $ \(Ptr p#) s
55 |   -> case readWord8OffAddrAsWord64# p# 0# s of
56 |        (# s', w64# #) -> (# s', W64# w64# #)
57 | 
58 | #elif HS_UNALIGNED_POKES_OK
59 | import Foreign.Storable
60 | 
61 | unalignedWriteU16 :: Word16 -> Ptr Word8 -> IO ()
62 | unalignedWriteU16 x p = poke (castPtr p) x
63 | 
64 | unalignedWriteU32 :: Word32 -> Ptr Word8 -> IO ()
65 | unalignedWriteU32 x p = poke (castPtr p) x
66 | 
67 | unalignedWriteU64 :: Word64 -> Ptr Word8 -> IO ()
68 | unalignedWriteU64 x p = poke (castPtr p) x
69 | 
70 | unalignedWriteFloat :: Float -> Ptr Word8 -> IO ()
71 | unalignedWriteFloat x p = poke (castPtr p) x
72 | 
73 | unalignedWriteDouble :: Double -> Ptr Word8 -> IO ()
74 | unalignedWriteDouble x p = poke (castPtr p) x
75 | 
76 | unalignedReadU64 :: Ptr Word8 -> IO Word64
77 | unalignedReadU64 p = peek (castPtr p)
78 | 
79 | #else
80 | foreign import ccall unsafe "static fpstring.h fps_unaligned_write_u16"
81 |   unalignedWriteU16 :: Word16 -> Ptr Word8 -> IO ()
82 | foreign import ccall unsafe "static fpstring.h fps_unaligned_write_u32"
83 |   unalignedWriteU32 :: Word32 -> Ptr Word8 -> IO ()
84 | foreign import ccall unsafe "static fpstring.h fps_unaligned_write_u64"
85 |   unalignedWriteU64 :: Word64 -> Ptr Word8 -> IO ()
86 | foreign import ccall unsafe "static fpstring.h fps_unaligned_write_HsFloat"
87 |   unalignedWriteFloat :: Float -> Ptr Word8 -> IO ()
88 | foreign import ccall unsafe "static fpstring.h fps_unaligned_write_HsDouble"
89 |   unalignedWriteDouble :: Double -> Ptr Word8 -> IO ()
90 | foreign import ccall unsafe "static fpstring.h fps_unaligned_read_u64"
91 |   unalignedReadU64 :: Ptr Word8 -> IO Word64
92 | #endif
93 | 
94 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) Don Stewart 2005-2009
 2 |           (c) Duncan Coutts 2006-2015
 3 |           (c) David Roundy 2003-2005
 4 |           (c) Simon Meier 2010-2011
 5 |           (c) Koz Ross 2021
 6 | 
 7 | All rights reserved.
 8 | 
 9 | Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions
11 | are met:
12 | 1. Redistributions of source code must retain the above copyright
13 |    notice, this list of conditions and the following disclaimer.
14 | 2. Redistributions in binary form must reproduce the above copyright
15 |    notice, this list of conditions and the following disclaimer in the
16 |    documentation and/or other materials provided with the distribution.
17 | 3. Neither the name of the author nor the names of his contributors
18 |    may be used to endorse or promote products derived from this software
19 |    without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 | ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 | SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ByteString: Fast, Packed Strings of Bytes
 2 | 
 3 | [![Build Status](https://github.com/haskell/bytestring/workflows/ci/badge.svg)](https://github.com/haskell/bytestring/actions?query=workflow%3Aci) [![Hackage](http://img.shields.io/hackage/v/bytestring.svg)](https://hackage.haskell.org/package/bytestring) [![Stackage LTS](http://stackage.org/package/bytestring/badge/lts)](http://stackage.org/lts/package/bytestring) [![Stackage Nightly](http://stackage.org/package/bytestring/badge/nightly)](http://stackage.org/nightly/package/bytestring)
 4 | 
 5 | This library provides the `Data.ByteString` module -- strict and lazy
 6 | byte arrays manipulable as strings -- providing very time/space-efficient
 7 | string and IO operations.
 8 | 
 9 | For very large data requirements, or constraints on heap size,
10 | `Data.ByteString.Lazy` is provided, a lazy list of bytestring chunks.
11 | Efficient processing of multi-gigabyte data can be achieved this way.
12 | 
13 | The library also provides `Data.ByteString.Builder` for efficient construction
14 | of `ByteString` values from smaller pieces during binary serialization.
15 | 
16 | Requirements:
17 | 
18 |   * Cabal 2.2 or greater
19 |   * GHC 8.4 or greater
20 | 
21 | ### Authors
22 | 
23 | `ByteString` was derived from the GHC `PackedString` library,
24 | originally written by Bryan O'Sullivan, and then by Simon Marlow.
25 | It was adapted and greatly extended for darcs by David Roundy and
26 | others. Don Stewart and Duncan Coutts cleaned up and further extended
27 | the implementation and added the `.Lazy` code. Simon Meier contributed
28 | the `Builder` feature.
29 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/bench/BenchBoundsCheckFusion.hs:
--------------------------------------------------------------------------------
  1 | -- |
  2 | -- Copyright   : (c) 2011 Simon Meier
  3 | -- License     : BSD3-style (see LICENSE)
  4 | --
  5 | -- Maintainer  : Simon Meier <iridcode@gmail.com>
  6 | -- Stability   : experimental
  7 | -- Portability : tested on GHC only
  8 | --
  9 | -- Benchmark that the bounds checks fuse.
 10 | 
 11 | module BenchBoundsCheckFusion (benchBoundsCheckFusion) where
 12 | 
 13 | import Prelude hiding (words)
 14 | import Data.Monoid
 15 | import Data.Foldable (foldMap)
 16 | import Test.Tasty.Bench
 17 | 
 18 | import qualified Data.ByteString                  as S
 19 | import qualified Data.ByteString.Lazy             as L
 20 | 
 21 | import           Data.ByteString.Builder
 22 | import           Data.ByteString.Builder.Extra
 23 | import           Data.ByteString.Builder.Prim
 24 |                    ( FixedPrim, BoundedPrim, (>$<), (>*<) )
 25 | import qualified Data.ByteString.Builder.Prim          as P
 26 | import qualified Data.ByteString.Builder.Internal      as I
 27 | import qualified Data.ByteString.Builder.Prim.Internal as I
 28 | 
 29 | import Foreign
 30 | 
 31 | ------------------------------------------------------------------------------
 32 | -- Benchmark support
 33 | ------------------------------------------------------------------------------
 34 | 
 35 | countToZero :: Int -> Maybe (Int, Int)
 36 | countToZero 0 = Nothing
 37 | countToZero n = Just (n, n - 1)
 38 | 
 39 | 
 40 | ------------------------------------------------------------------------------
 41 | -- Benchmark
 42 | ------------------------------------------------------------------------------
 43 | 
 44 | -- input data (NOINLINE to ensure memoization)
 45 | ----------------------------------------------
 46 | 
 47 | -- | Few-enough repetitions to avoid making GC too expensive.
 48 | nRepl :: Int
 49 | nRepl = 10000
 50 | 
 51 | {-# NOINLINE intData #-}
 52 | intData :: [Int]
 53 | intData = [1..nRepl]
 54 | 
 55 | -- benchmark wrappers
 56 | ---------------------
 57 | 
 58 | {-# INLINE benchB #-}
 59 | benchB :: String -> a -> (a -> Builder) -> Benchmark
 60 | benchB name x b =
 61 |     bench (name ++" (" ++ show nRepl ++ ")") $
 62 |         whnf (L.length . toLazyByteString . b) x
 63 | 
 64 | {-# INLINE benchBInts #-}
 65 | benchBInts :: String -> ([Int] -> Builder) -> Benchmark
 66 | benchBInts name = benchB name intData
 67 | 
 68 | 
 69 | -- benchmarks
 70 | -------------
 71 | 
 72 | benchBoundsCheckFusion :: Benchmark
 73 | benchBoundsCheckFusion = bgroup "BoundsCheckFusion"
 74 |     [ bgroup "Data.ByteString.Builder"
 75 |         [ benchBInts "foldMap (left-assoc)" $
 76 |             foldMap (\x -> (stringUtf8 "s" `mappend` intHost x) `mappend` intHost x)
 77 | 
 78 |         , benchBInts "foldMap (right-assoc)" $
 79 |             foldMap (\x -> intHost x `mappend` (intHost x `mappend` stringUtf8 "s"))
 80 | 
 81 |         , benchBInts "foldMap [manually fused, left-assoc]" $
 82 |             foldMap (\x -> stringUtf8 "s" `mappend` P.primBounded (P.liftFixedToBounded $ P.intHost >*< P.intHost) (x, x))
 83 | 
 84 |         , benchBInts "foldMap [manually fused, right-assoc]" $
 85 |             foldMap (\x -> P.primBounded (P.liftFixedToBounded $ P.intHost >*< P.intHost) (x, x) `mappend` stringUtf8 "s")
 86 |         ]
 87 |     ]
 88 | 
 89 | {-# RULES
 90 | 
 91 | "append/encodeWithB" forall w1 w2 x1 x2.
 92 |        I.append (P.primBounded w1 x1) (P.primBounded w2 x2)
 93 |      = P.primBounded (I.pairB w1 w2) (x1, x2)
 94 | 
 95 | "append/encodeWithB/assoc_r" forall w1 w2 x1 x2 b.
 96 |        I.append (P.primBounded w1 x1) (I.append (P.primBounded w2 x2) b)
 97 |      = I.append (P.primBounded (I.pairB w1 w2) (x1, x2)) b
 98 | 
 99 | "append/encodeWithB/assoc_l" forall w1 w2 x1 x2 b.
100 |        I.append (I.append b (P.primBounded w1 x1)) (P.primBounded w2 x2)
101 |      = I.append b (P.primBounded (I.pairB w1 w2) (x1, x2))
102 |   #-}
103 | 
104 | 


--------------------------------------------------------------------------------
/bench/BenchCount.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Copyright   : (c) 2021 Georg Rudoy
 3 | -- License     : BSD3-style (see LICENSE)
 4 | --
 5 | -- Maintainer  : Georg Rudoy <0xd34df00d+github@gmail.com>
 6 | --
 7 | -- Benchmark count
 8 | 
 9 | module BenchCount (benchCount) where
10 | 
11 | import           Test.Tasty.Bench
12 | import qualified Data.ByteString.Char8 as B
13 | 
14 | benchCount :: Benchmark
15 | benchCount = bgroup "Count"
16 |   [ bgroup "no matches, same char"       $ mkBenches (1 : commonSizes) (\s -> B.replicate s 'b')
17 |   , bgroup "no matches, different chars" $ mkBenches      commonSizes  (\s -> genCyclic 10 s 'b')
18 |   , bgroup "some matches, alternating"   $ mkBenches      commonSizes  (\s -> genCyclic 2 s 'a')
19 |   , bgroup "some matches, short cycle"   $ mkBenches      commonSizes  (\s -> genCyclic 5 s 'a')
20 |   , bgroup "some matches, long cycle"    $ mkBenches      commonSizes  (\s -> genCyclic 10 s 'a')
21 |   , bgroup "all matches"                 $ mkBenches (1 : commonSizes) (\s -> B.replicate s 'a')
22 |   ]
23 |   where
24 |     aboveSimdSwitchThreshold = 1030 -- something above the threshold of 1024 that's divisible by cycle lengths
25 |     commonSizes = [ 10, 100, 1000, aboveSimdSwitchThreshold, 10000, 100000, 1000000 ]
26 |     mkBenches sizes gen = [ bench (show size ++ " chars long") $ nf (B.count 'a') (gen size)
27 |                           | size <- sizes
28 |                           ]
29 |     genCyclic cycleLen size from = B.concat $ replicate (size `div` cycleLen) $ B.pack (take cycleLen [from..])
30 | 


--------------------------------------------------------------------------------
/bench/BenchIndices.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Copyright   : (c) 2020 Peter Duchovni
 3 | -- License     : BSD3-style (see LICENSE)
 4 | --
 5 | -- Maintainer  : Peter Duchovni <caufeminecraft+github@gmail.com>
 6 | --
 7 | -- Benchmark elemIndex, findIndex, elemIndices, and findIndices
 8 | 
 9 | module BenchIndices (benchIndices) where
10 | 
11 | import           Data.Foldable                         (foldMap)
12 | import           Data.Maybe                            (listToMaybe)
13 | import           Data.Monoid
14 | import           Data.String
15 | import           Test.Tasty.Bench
16 | import           Prelude                               hiding (words, head, tail)
17 | import           Data.Word                             (Word8)
18 | 
19 | import qualified Data.ByteString                       as S
20 | import qualified Data.ByteString.Unsafe                as S
21 | 
22 | 
23 | ------------------------------------------------------------------------------
24 | -- Benchmark
25 | ------------------------------------------------------------------------------
26 | 
27 | -- ASCII \n to ensure no typos
28 | nl :: Word8
29 | nl = 0xa
30 | {-# INLINE nl #-}
31 | 
32 | -- non-inlined equality test
33 | nilEq :: Word8 -> Word8 -> Bool
34 | {-# NOINLINE nilEq #-}
35 | nilEq = (==)
36 | 
37 | -- lines of 200 letters from a to e, followed by repeated letter f
38 | absurdlong :: S.ByteString
39 | absurdlong = (S.replicate 200 0x61 <> S.singleton nl
40 |           <> S.replicate 200 0x62 <> S.singleton nl
41 |           <> S.replicate 200 0x63 <> S.singleton nl
42 |           <> S.replicate 200 0x64 <> S.singleton nl
43 |           <> S.replicate 200 0x65 <> S.singleton nl)
44 |           <> S.replicate 999999 0x66
45 | 
46 | benchIndices :: Benchmark
47 | benchIndices = absurdlong `seq` bgroup "Indices"
48 |     [ bgroup "ByteString strict first index" $
49 |         [ bench "FindIndices" $ nf (listToMaybe . S.findIndices (== nl)) absurdlong
50 |         , bench "ElemIndices" $ nf (listToMaybe . S.elemIndices     nl)  absurdlong
51 |         , bench "FindIndex"   $ nf (S.findIndex (== nl)) absurdlong
52 |         , bench "ElemIndex"   $ nf (S.elemIndex     nl)  absurdlong
53 |         ]
54 |     , bgroup "ByteString strict second index" $
55 |         [ bench "FindIndices" $ nf (listToMaybe . drop 1 . S.findIndices (== nl)) absurdlong
56 |         , bench "ElemIndices" $ nf (listToMaybe . drop 1 . S.elemIndices     nl)  absurdlong
57 |         , bench "FindIndex"   $ nf bench_find_index_second absurdlong
58 |         , bench "ElemIndex"   $ nf bench_elem_index_second absurdlong
59 |         ]
60 |     , bgroup "ByteString index equality inlining" $
61 |         [ bench "FindIndices/inlined"     $ nf (S.findIndices    (== nl)) absurdlong
62 |         , bench "FindIndices/non-inlined" $ nf (S.findIndices (nilEq nl)) absurdlong
63 |         , bench "FindIndex/inlined"       $ nf (S.findIndex      (== nl)) absurdlong
64 |         , bench "FindIndex/non-inlined"   $ nf (S.findIndex   (nilEq nl)) absurdlong
65 |         ]
66 |     ]
67 | 
68 | bench_find_index_second :: S.ByteString -> Maybe Int
69 | bench_find_index_second bs =
70 |   let isNl = (== nl)
71 |    in case S.findIndex isNl bs of
72 |         Just !i -> S.findIndex isNl (S.unsafeDrop (i+1) bs)
73 |         Nothing -> Nothing
74 | {-# INLINE bench_find_index_second #-}
75 | 
76 | bench_elem_index_second :: S.ByteString -> Maybe Int
77 | bench_elem_index_second bs =
78 |     case S.elemIndex nl bs of
79 |         Just !i -> S.elemIndex nl (S.unsafeDrop (i+1) bs)
80 |         Nothing -> Nothing
81 | {-# INLINE bench_elem_index_second #-}
82 | 


--------------------------------------------------------------------------------
/bench/BenchReadInt.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | 
  3 | -- |
  4 | -- Copyright   : (c) 2021 Viktor Dukhovni
  5 | -- License     : BSD3-style (see LICENSE)
  6 | --
  7 | -- Maintainer  : Viktor Dukhovni <ietf-dane@dukhovni.org>
  8 | --
  9 | -- Benchmark readInt and variants, readWord and variants,
 10 | -- readInteger and readNatural
 11 | 
 12 | module BenchReadInt (benchReadInt) where
 13 | 
 14 | import qualified Data.ByteString.Builder               as B
 15 | import qualified Data.ByteString.Char8                 as S
 16 | import qualified Data.ByteString.Lazy.Char8            as L
 17 | import Test.Tasty.Bench
 18 | import Data.Int
 19 | import Data.Word
 20 | import Numeric.Natural
 21 | #if !(MIN_VERSION_base(4,11,0))
 22 | import Data.Semigroup (Semigroup((<>)))
 23 | #endif
 24 | import Data.Monoid (mconcat)
 25 | 
 26 | ------------------------------------------------------------------------------
 27 | -- Benchmark
 28 | ------------------------------------------------------------------------------
 29 | 
 30 | -- Sum space-separated integers in a ByteString.
 31 | loopS :: Integral a
 32 |       => (S.ByteString -> Maybe (a, S.ByteString)) -> S.ByteString -> a
 33 | loopS rd = go 0
 34 |   where
 35 |     go !acc !bs = case rd bs of
 36 |         Just (i, t) -> case S.uncons t of
 37 |             Just (_, t') -> go (acc + i) t'
 38 |             Nothing      -> acc + i
 39 |         Nothing          -> acc
 40 | 
 41 | -- Sum space-separated integers in a ByteString.
 42 | loopL :: Integral a
 43 |       => (L.ByteString -> Maybe (a, L.ByteString)) -> L.ByteString -> a
 44 | loopL rd = go 0
 45 |   where
 46 |     go !acc !bs = case rd bs of
 47 |         Just (i, t) -> case L.uncons t of
 48 |             Just (_, t') -> go (acc + i) t'
 49 |             Nothing      -> acc + i
 50 |         Nothing          -> acc
 51 | 
 52 | benchReadInt :: Benchmark
 53 | benchReadInt = bgroup "Read Integral"
 54 |     [ bgroup "Strict"
 55 |         [ bench "ReadInt"     $ nf (loopS S.readInt)     intS
 56 |         , bench "ReadInt8"    $ nf (loopS S.readInt8)    int8S
 57 |         , bench "ReadInt16"   $ nf (loopS S.readInt16)   int16S
 58 |         , bench "ReadInt32"   $ nf (loopS S.readInt32)   int32S
 59 |         , bench "ReadInt64"   $ nf (loopS S.readInt64)   int64S
 60 |         , bench "ReadWord"    $ nf (loopS S.readWord)    wordS
 61 |         , bench "ReadWord8"   $ nf (loopS S.readWord8)   word8S
 62 |         , bench "ReadWord16"  $ nf (loopS S.readWord16)  word16S
 63 |         , bench "ReadWord32"  $ nf (loopS S.readWord32)  word32S
 64 |         , bench "ReadWord64"  $ nf (loopS S.readWord64)  word64S
 65 |         , bench "ReadInteger" $ nf (loopS S.readInteger) bignatS
 66 |         , bench "ReadNatural" $ nf (loopS S.readNatural) bignatS
 67 |         , bench "ReadInteger small" $ nf (loopS S.readInteger) intS
 68 |         , bench "ReadNatural small" $ nf (loopS S.readNatural) wordS
 69 |         ]
 70 | 
 71 |     , bgroup "Lazy"
 72 |         [ bench "ReadInt"     $ nf (loopL L.readInt)     intL
 73 |         , bench "ReadInt8"    $ nf (loopL L.readInt8)    int8L
 74 |         , bench "ReadInt16"   $ nf (loopL L.readInt16)   int16L
 75 |         , bench "ReadInt32"   $ nf (loopL L.readInt32)   int32L
 76 |         , bench "ReadInt64"   $ nf (loopL L.readInt64)   int64L
 77 |         , bench "ReadWord"    $ nf (loopL L.readWord)    wordL
 78 |         , bench "ReadWord8"   $ nf (loopL L.readWord8)   word8L
 79 |         , bench "ReadWord16"  $ nf (loopL L.readWord16)  word16L
 80 |         , bench "ReadWord32"  $ nf (loopL L.readWord32)  word32L
 81 |         , bench "ReadWord64"  $ nf (loopL L.readWord64)  word64L
 82 |         , bench "ReadInteger" $ nf (loopL L.readInteger) bignatL
 83 |         , bench "ReadNatural" $ nf (loopL L.readNatural) bignatL
 84 |         , bench "ReadInteger small" $ nf (loopL L.readInteger) intL
 85 |         , bench "ReadNatural small" $ nf (loopL L.readNatural) wordL
 86 |         ]
 87 |     ]
 88 |   where
 89 |     mkWordL :: forall a. (Integral a, Bounded a)
 90 |             => (a -> B.Builder) -> L.ByteString
 91 |     mkWordL f = B.toLazyByteString b
 92 |       where b = mconcat [f i <> B.char8 ' ' | i <- [n-255..n]]
 93 |             n = maxBound @a
 94 |     mkWordS f = S.toStrict $ mkWordL f
 95 | 
 96 |     mkIntL :: forall a. (Integral a, Bounded a)
 97 |            => (a -> B.Builder) -> L.ByteString
 98 |     mkIntL f = B.toLazyByteString b
 99 |       where b = mconcat [f (i + 128) <> B.char8 ' ' | i <- [n-255..n]]
100 |             n = maxBound @a
101 |     mkIntS f = S.toStrict $ mkIntL f
102 | 
103 |     wordS, word8S, word16S, word32S, word64S :: S.ByteString
104 |     !wordS = mkWordS B.wordDec
105 |     !word8S = mkWordS B.word8Dec
106 |     !word16S = mkWordS B.word16Dec
107 |     !word32S = mkWordS B.word32Dec
108 |     !word64S = mkWordS B.word64Dec
109 | 
110 |     intS, int8S, int16S, int32S, int64S :: S.ByteString
111 |     !intS =  mkIntS B.intDec
112 |     !int8S = mkIntS B.int8Dec
113 |     !int16S = mkIntS B.int16Dec
114 |     !int32S = mkIntS B.int32Dec
115 |     !int64S = mkIntS B.int64Dec
116 | 
117 |     word8L, word16L, word32L, word64L :: L.ByteString
118 |     !wordL = mkWordL B.wordDec
119 |     !word8L = mkWordL B.word8Dec
120 |     !word16L = mkWordL B.word16Dec
121 |     !word32L = mkWordL B.word32Dec
122 |     !word64L = mkWordL B.word64Dec
123 | 
124 |     intL, int8L, int16L, int32L, int64L :: L.ByteString
125 |     !intL =  mkIntL B.intDec
126 |     !int8L = mkIntL B.int8Dec
127 |     !int16L = mkIntL B.int16Dec
128 |     !int32L = mkIntL B.int32Dec
129 |     !int64L = mkIntL B.int64Dec
130 | 
131 |     bignatL :: L.ByteString
132 |     !bignatL = B.toLazyByteString b
133 |       where b = mconcat [B.integerDec (powpow i) <> B.char8 ' ' | i <- [0..13]]
134 |             powpow :: Word -> Integer
135 |             powpow n = 2^(2^n :: Word)
136 | 
137 |     bignatS :: S.ByteString
138 |     !bignatS = S.toStrict bignatL
139 | 


--------------------------------------------------------------------------------
/bench/BenchShort.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings   #-}
  2 | 
  3 | module BenchShort (benchShort) where
  4 | 
  5 | import           Control.DeepSeq                       (force)
  6 | import           Data.Foldable                         (foldMap)
  7 | import           Data.Maybe                            (listToMaybe)
  8 | import           Data.Monoid
  9 | import           Data.String
 10 | import           Test.Tasty.Bench
 11 | import           Prelude                               hiding (words, head, tail)
 12 | 
 13 | import           Data.ByteString.Short                 (ShortByteString)
 14 | import qualified Data.ByteString.Short                 as S
 15 | 
 16 | import           Data.ByteString.Builder
 17 | import           Data.ByteString.Builder.Extra         (byteStringCopy,
 18 |                                                         byteStringInsert,
 19 |                                                         intHost)
 20 | import           Data.ByteString.Builder.Internal      (ensureFree)
 21 | import           Data.ByteString.Builder.Prim          (BoundedPrim, FixedPrim,
 22 |                                                         (>$<))
 23 | import qualified Data.ByteString.Builder.Prim          as P
 24 | import qualified Data.ByteString.Builder.Prim.Internal as PI
 25 | 
 26 | import           Foreign
 27 | 
 28 | import System.Random
 29 | 
 30 | 
 31 | 
 32 | ------------------------------------------------------------------------------
 33 | -- Benchmark
 34 | ------------------------------------------------------------------------------
 35 | 
 36 | -- input data (NOINLINE to ensure memoization)
 37 | ----------------------------------------------
 38 | 
 39 | -- | Few-enough repetitions to avoid making GC too expensive.
 40 | nRepl :: Int
 41 | nRepl = 10000
 42 | 
 43 | {-# NOINLINE intData #-}
 44 | intData :: [Int]
 45 | intData = [1..nRepl]
 46 | 
 47 | {-# NOINLINE byteStringData #-}
 48 | byteStringData :: S.ShortByteString
 49 | byteStringData = S.pack $ map fromIntegral intData
 50 | 
 51 | {-# NOINLINE loremIpsum #-}
 52 | loremIpsum :: S.ShortByteString
 53 | loremIpsum = mconcat
 54 |   [ "  Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor"
 55 |   , "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis"
 56 |   , "nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat."
 57 |   , "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu"
 58 |   , "fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in"
 59 |   , "culpa qui officia deserunt mollit anim id est laborum."
 60 |   ]
 61 | 
 62 | -- benchmark wrappers
 63 | ---------------------
 64 | 
 65 | {-# INLINE benchB' #-}
 66 | benchB' :: String -> a -> (a -> ShortByteString) -> Benchmark
 67 | benchB' name x b = bench name $ whnf (S.length . b) x
 68 | 
 69 | 
 70 | -- We use this construction of just looping through @n,n-1,..,1@ to ensure that
 71 | -- we measure the speed of the encoding and not the speed of generating the
 72 | -- values to be encoded.
 73 | {-# INLINE benchIntEncodingB #-}
 74 | benchIntEncodingB :: Int              -- ^ Maximal 'Int' to write
 75 |                   -> BoundedPrim Int  -- ^ 'BoundedPrim' to execute
 76 |                   -> IO ()            -- ^ 'IO' action to benchmark
 77 | benchIntEncodingB n0 w
 78 |   | n0 <= 0   = return ()
 79 |   | otherwise = do
 80 |       fpbuf <- mallocForeignPtrBytes (n0 * PI.sizeBound w)
 81 |       withForeignPtr fpbuf (loop n0) >> return ()
 82 |   where
 83 |     loop !n !op
 84 |       | n <= 0    = return op
 85 |       | otherwise = PI.runB w n op >>= loop (n - 1)
 86 | 
 87 | 
 88 | -- Helpers
 89 | -------------
 90 | 
 91 | hashInt :: Int -> Int
 92 | hashInt x = iterate step x !! 10
 93 |   where
 94 |     step a = e
 95 |       where b = (a `xor` 61) `xor` (a `shiftR` 16)
 96 |             c = b + (b `shiftL` 3)
 97 |             d = c `xor` (c `shiftR` 4)
 98 |             e = d * 0x27d4eb2d
 99 |             f = e `xor` (e `shiftR` 15)
100 | 
101 | w :: Int -> Word8
102 | w = fromIntegral
103 | 
104 | hashWord8 :: Word8 -> Word8
105 | hashWord8 = fromIntegral . hashInt . fromIntegral
106 | 
107 | foldInputs' :: [[Word8]]
108 | foldInputs' = force (S.unpack <$> foldInputs)
109 | 
110 | foldInputs :: [S.ShortByteString]
111 | foldInputs = map (\k -> S.pack $ if k <= 6 then take (2 ^ k) [32..95] else concat (replicate (2 ^ (k - 6)) [32..95])) [0..16]
112 | 
113 | largeTraversalInput :: S.ShortByteString
114 | largeTraversalInput = S.concat (replicate 10 byteStringData)
115 | 
116 | smallTraversalInput :: S.ShortByteString
117 | smallTraversalInput = "The quick brown fox"
118 | 
119 | zeroes :: S.ShortByteString
120 | zeroes = S.replicate 10000 0
121 | 
122 | partitionStrict p = nf (S.partition p) . randomStrict $ mkStdGen 98423098
123 |   where randomStrict = fst . S.unfoldrN 10000 (Just . random)
124 | 
125 | -- ASCII \n to ensure no typos
126 | nl :: Word8
127 | nl = 0xa
128 | {-# INLINE nl #-}
129 | 
130 | -- non-inlined equality test
131 | nilEq :: Word8 -> Word8 -> Bool
132 | {-# NOINLINE nilEq #-}
133 | nilEq = (==)
134 | 
135 | -- lines of 200 letters from a to e, followed by repeated letter f
136 | absurdlong :: S.ShortByteString
137 | absurdlong = (S.replicate 200 0x61 <> S.singleton nl
138 |           <> S.replicate 200 0x62 <> S.singleton nl
139 |           <> S.replicate 200 0x63 <> S.singleton nl
140 |           <> S.replicate 200 0x64 <> S.singleton nl
141 |           <> S.replicate 200 0x65 <> S.singleton nl)
142 |           <> S.replicate 999999 0x66
143 | 
144 | bench_find_index_second :: ShortByteString -> Maybe Int
145 | bench_find_index_second bs =
146 |   let isNl = (== nl)
147 |    in case S.findIndex isNl bs of
148 |         Just !i -> S.findIndex isNl (S.drop (i+1) bs)
149 |         Nothing -> Nothing
150 | {-# INLINE bench_find_index_second #-}
151 | 
152 | bench_elem_index_second :: ShortByteString -> Maybe Int
153 | bench_elem_index_second bs =
154 |     case S.elemIndex nl bs of
155 |         Just !i -> S.elemIndex nl (S.drop (i+1) bs)
156 |         Nothing -> Nothing
157 | {-# INLINE bench_elem_index_second #-}
158 | 
159 | 
160 | 
161 | -- benchmarks
162 | -------------
163 | 
164 | benchShort :: Benchmark
165 | benchShort = absurdlong `seq` bgroup "ShortByteString"
166 |     [ bgroup "Small payload"
167 |       [ benchB' "mempty"        ()  (const mempty)
168 |       , benchB' "UTF-8 String (naive)" "hello world\0" fromString
169 |       , benchB' "String (naive)" "hello world!" fromString
170 |       ]
171 |     , bgroup "intercalate"
172 |       [ bench "intercalate (large)" $ whnf (S.intercalate $ " and also ") (replicate 300 "expression")
173 |       , bench "intercalate (small)" $ whnf (S.intercalate "&") (replicate 30 "foo")
174 |       , bench "intercalate (tiny)" $ whnf (S.intercalate "&") (["foo", "bar", "baz"])
175 |       ]
176 |     , bgroup "partition"
177 |       [
178 |         bgroup "strict"
179 |         [
180 |           bench "mostlyTrueFast"  $ partitionStrict (< (w 225))
181 |         , bench "mostlyFalseFast" $ partitionStrict (< (w 10))
182 |         , bench "balancedFast"    $ partitionStrict (< (w 128))
183 | 
184 |         , bench "mostlyTrueSlow"  $ partitionStrict (\x -> hashWord8 x < w 225)
185 |         , bench "mostlyFalseSlow" $ partitionStrict (\x -> hashWord8 x < w 10)
186 |         , bench "balancedSlow"    $ partitionStrict (\x -> hashWord8 x < w 128)
187 |         ]
188 |       ]
189 |     , bgroup "folds"
190 |       [ bgroup "strict"
191 |         [ bgroup "foldl" $ map (\s -> bench (show $ S.length s) $
192 |             nf (S.foldl (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputs
193 |         , bgroup "foldl'" $ map (\s -> bench (show $ S.length s) $
194 |             nf (S.foldl' (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputs
195 |         , bgroup "foldr" $ map (\s -> bench (show $ S.length s) $
196 |             nf (S.foldr (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputs
197 |         , bgroup "foldr'" $ map (\s -> bench (show $ S.length s) $
198 |             nf (S.foldr' (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputs
199 |         , bgroup "foldr1'" $ map (\s -> bench (show $ S.length s) $
200 |             nf (S.foldr1' (\x  acc -> fromIntegral x + acc)) s) foldInputs
201 |         , bgroup "unfoldrN" $ map (\s -> bench (show $ S.length s) $
202 |             nf (S.unfoldrN (S.length s) (\a -> Just (a, a + 1))) 0) foldInputs
203 |         , bgroup "filter" $ map (\s -> bench (show $ S.length s) $
204 |             nf (S.filter odd) s) foldInputs
205 |         ]
206 |       ]
207 |     , bgroup "findIndexOrLength"
208 |       [ bench "takeWhile"      $ nf (S.takeWhile even) zeroes
209 |       , bench "dropWhile"      $ nf (S.dropWhile even) zeroes
210 |       , bench "break"          $ nf (S.break odd) zeroes
211 |       ]
212 |     , bgroup "findIndex_"
213 |       [ bench "findIndices"    $ nf (sum . S.findIndices (\x -> x ==  129 || x == 72)) byteStringData
214 |       , bench "find"           $ nf (S.find (>= 198)) byteStringData
215 |       ]
216 |     , bgroup "traversals"
217 |       [ bench "map (+1) large" $ nf (S.map (+ 1)) largeTraversalInput
218 |       , bench "map (+1) small" $ nf (S.map (+ 1)) smallTraversalInput
219 |       ]
220 |     , bgroup "ShortByteString strict first index" $
221 |         [ bench "FindIndices" $ nf (listToMaybe . S.findIndices (== nl)) absurdlong
222 |         , bench "ElemIndices" $ nf (listToMaybe . S.elemIndices     nl)  absurdlong
223 |         , bench "FindIndex"   $ nf (S.findIndex (== nl)) absurdlong
224 |         , bench "ElemIndex"   $ nf (S.elemIndex     nl)  absurdlong
225 |         ]
226 |     , bgroup "ShortByteString strict second index" $
227 |         [ bench "FindIndices" $ nf (listToMaybe . drop 1 . S.findIndices (== nl)) absurdlong
228 |         , bench "ElemIndices" $ nf (listToMaybe . drop 1 . S.elemIndices     nl)  absurdlong
229 |         , bench "FindIndex"   $ nf bench_find_index_second absurdlong
230 |         , bench "ElemIndex"   $ nf bench_elem_index_second absurdlong
231 |         ]
232 |     , bgroup "ShortByteString index equality inlining" $
233 |         [ bench "FindIndices/inlined"     $ nf (S.findIndices    (== nl)) absurdlong
234 |         , bench "FindIndices/non-inlined" $ nf (S.findIndices (nilEq nl)) absurdlong
235 |         , bench "FindIndex/inlined"       $ nf (S.findIndex      (== nl)) absurdlong
236 |         , bench "FindIndex/non-inlined"   $ nf (S.findIndex   (nilEq nl)) absurdlong
237 |         ]
238 |     , bgroup "ShortByteString conversions" $
239 |         [ bgroup "unpack" $ map (\s -> bench (show $ S.length s) $
240 |             nf (\x -> S.unpack x) s) foldInputs
241 |         , bgroup "pack" $ map (\s -> bench (show $ length s) $
242 |             nf S.pack s) foldInputs'
243 |         , bench "unpack and get last element" $ nf (\x -> last . S.unpack $ x) absurdlong
244 |         , bench "unpack and get first 120 elements" $ nf (\x -> take 120 . S.unpack $ x) absurdlong
245 |         ]
246 |     ]
247 | 


--------------------------------------------------------------------------------
/bytestring.cabal:
--------------------------------------------------------------------------------
  1 | Cabal-Version:       2.2
  2 | 
  3 | Name:                bytestring
  4 | Version:             0.13.0.0
  5 | Synopsis:            Fast, compact, strict and lazy byte strings with a list interface
  6 | Description:
  7 |     An efficient compact, immutable byte string type (both strict and lazy)
  8 |     suitable for binary or 8-bit character data.
  9 |     .
 10 |     The 'ByteString' type represents sequences of bytes or 8-bit characters.
 11 |     It is suitable for high performance use, both in terms of large data
 12 |     quantities, or high speed requirements. The 'ByteString' functions follow
 13 |     the same style as Haskell\'s ordinary lists, so it is easy to convert code
 14 |     from using 'String' to 'ByteString'.
 15 |     .
 16 |     Two 'ByteString' variants are provided:
 17 |     .
 18 |       * Strict 'ByteString's keep the string as a single large array. This
 19 |         makes them convenient for passing data between C and Haskell.
 20 |     .
 21 |       * Lazy 'ByteString's use a lazy list of strict chunks which makes it
 22 |         suitable for I\/O streaming tasks.
 23 |     .
 24 |     The @Char8@ modules provide a character-based view of the same
 25 |     underlying 'ByteString' types. This makes it convenient to handle mixed
 26 |     binary and 8-bit character content (which is common in many file formats
 27 |     and network protocols).
 28 |     .
 29 |     The 'Builder' module provides an efficient way to build up 'ByteString's
 30 |     in an ad-hoc way by repeated concatenation. This is ideal for fast
 31 |     serialisation or pretty printing.
 32 |     .
 33 |     There is also a 'ShortByteString' type which has a lower memory overhead
 34 |     and can be converted to or from a 'ByteString'. It is suitable for keeping
 35 |     many short strings in memory, especially long-term, without incurring any
 36 |     possible heap fragmentation costs.
 37 |     .
 38 |     'ByteString's are not designed for Unicode. For Unicode strings you should
 39 |     use the 'Text' type from the @text@ package.
 40 |     .
 41 |     These modules are intended to be imported qualified, to avoid name clashes
 42 |     with "Prelude" functions, e.g.
 43 |     .
 44 |     > import qualified Data.ByteString as BS
 45 | 
 46 | License:             BSD-3-Clause
 47 | License-file:        LICENSE
 48 | Category:            Data
 49 | Copyright:           Copyright (c) Don Stewart          2005-2009,
 50 |                                (c) Duncan Coutts        2006-2015,
 51 |                                (c) David Roundy         2003-2005,
 52 |                                (c) Jasper Van der Jeugt 2010,
 53 |                                (c) Simon Meier          2010-2013.
 54 | 
 55 | Author:              Don Stewart,
 56 |                      Duncan Coutts
 57 | Maintainer:          Haskell Bytestring Team <andrew.lelechenko@gmail.com>, Core Libraries Committee
 58 | Homepage:            https://github.com/haskell/bytestring
 59 | Bug-reports:         https://github.com/haskell/bytestring/issues
 60 | Tested-With:         GHC==9.12.1,
 61 |                      GHC==9.10.1,
 62 |                      GHC==9.8.4,
 63 |                      GHC==9.6.6,
 64 |                      GHC==9.4.8,
 65 |                      GHC==9.2.8,
 66 |                      GHC==9.0.2,
 67 |                      GHC==8.10.7,
 68 |                      GHC==8.8.4,
 69 |                      GHC==8.6.5,
 70 |                      GHC==8.4.4
 71 | Build-Type:          Simple
 72 | extra-source-files:  README.md Changelog.md include/bytestring-cpp-macros.h
 73 | 
 74 | Flag pure-haskell
 75 |   description: Don't use bytestring's standard C routines
 76 | 
 77 |     When this flag is true, bytestring will use pure Haskell variants (no C FFI)
 78 |     of the internal functions. This is not recommended except in use cases that
 79 |     cannot (or do not) depend on C, such as with GHC's JavaScript backend.
 80 | 
 81 |   default: False
 82 |   manual: True
 83 | 
 84 | source-repository head
 85 |   type:     git
 86 |   location: https://github.com/haskell/bytestring
 87 | 
 88 | 
 89 | common language
 90 |   default-language: Haskell2010
 91 |   default-extensions:
 92 |     BangPatterns
 93 |     DeriveDataTypeable
 94 |     DeriveGeneric
 95 |     DeriveLift
 96 |     FlexibleContexts
 97 |     FlexibleInstances
 98 |     LambdaCase
 99 |     MagicHash
100 |     MultiWayIf
101 |     NamedFieldPuns
102 |     PatternSynonyms
103 |     RankNTypes
104 |     ScopedTypeVariables
105 |     StandaloneDeriving
106 |     TupleSections
107 |     TypeApplications
108 |     TypeOperators
109 |     UnboxedTuples
110 | 
111 | library
112 |   import:          language
113 |   build-depends:   base >= 4.11 && < 5, ghc-prim, deepseq, template-haskell
114 | 
115 |   if impl(ghc < 9.4)
116 |     build-depends: data-array-byte >= 0.1 && < 0.2
117 | 
118 |   exposed-modules: Data.ByteString
119 |                    Data.ByteString.Char8
120 |                    Data.ByteString.Unsafe
121 |                    Data.ByteString.Internal
122 |                    Data.ByteString.Lazy
123 |                    Data.ByteString.Lazy.Char8
124 |                    Data.ByteString.Lazy.Internal
125 |                    Data.ByteString.Short
126 |                    Data.ByteString.Short.Internal
127 | 
128 |                    Data.ByteString.Builder
129 |                    Data.ByteString.Builder.Extra
130 |                    Data.ByteString.Builder.Prim
131 |                    Data.ByteString.Builder.RealFloat
132 | 
133 |                    -- perhaps only exposed temporarily
134 |                    Data.ByteString.Builder.Internal
135 |                    Data.ByteString.Builder.Prim.Internal
136 |   other-modules:   Data.ByteString.Builder.ASCII
137 |                    Data.ByteString.Builder.Prim.ASCII
138 |                    Data.ByteString.Builder.Prim.Binary
139 |                    Data.ByteString.Builder.Prim.Internal.Base16
140 |                    Data.ByteString.Builder.Prim.Internal.Floating
141 |                    Data.ByteString.Builder.RealFloat.F2S
142 |                    Data.ByteString.Builder.RealFloat.D2S
143 |                    Data.ByteString.Builder.RealFloat.Internal
144 |                    Data.ByteString.Builder.RealFloat.TableGenerator
145 |                    Data.ByteString.Internal.Type
146 |                    Data.ByteString.Lazy.ReadInt
147 |                    Data.ByteString.Lazy.ReadNat
148 |                    Data.ByteString.ReadInt
149 |                    Data.ByteString.ReadNat
150 |                    Data.ByteString.Utils.ByteOrder
151 |                    Data.ByteString.Utils.UnalignedAccess
152 | 
153 |   ghc-options:     -Wall -fwarn-tabs -Wincomplete-uni-patterns
154 |                    -optP-Wall -optP-Werror=undef
155 |                    -O2
156 |                    -fmax-simplifier-iterations=10
157 |                    -fdicts-cheap
158 |                    -fspec-constr-count=6
159 | 
160 |   if arch(javascript) || flag(pure-haskell)
161 |     cpp-options: -DPURE_HASKELL=1
162 |     other-modules: Data.ByteString.Internal.Pure
163 |     default-extensions: NoForeignFunctionInterface
164 |     -- Pure Haskell implementation only implemented for recent GHCs/base
165 |     build-depends: base >= 4.18 && < 5
166 |   else
167 |     cpp-options: -DPURE_HASKELL=0
168 | 
169 |     c-sources:        cbits/fpstring.c
170 |                       cbits/itoa.c
171 |                       cbits/shortbytestring.c
172 |                       cbits/aligned-static-hs-data.c
173 | 
174 |     if (arch(aarch64))
175 |       c-sources:        cbits/aarch64/is-valid-utf8.c
176 |     else
177 |       c-sources:        cbits/is-valid-utf8.c
178 | 
179 |     -- DNDEBUG disables asserts in cbits/
180 |     cc-options:        -std=c11 -DNDEBUG=1
181 |                        -fno-strict-aliasing
182 |                        -Werror=undef
183 | 
184 |     -- No need to link to libgcc on ghc-9.4 and later which uses a clang-based
185 |     -- toolchain.
186 |     if os(windows) && impl(ghc < 9.3)
187 |       extra-libraries:  gcc
188 | 
189 |   if arch(aarch64)
190 |     -- The libffi in Apple's darwin toolchain doesn't
191 |     -- play nice with -Wundef.  Recent GHCs work around this.
192 |     -- See also https://github.com/haskell/bytestring/issues/665
193 |     -- and https://gitlab.haskell.org/ghc/ghc/-/issues/23568
194 |     build-depends:    base (>= 4.17.2 && < 4.18) || >= 4.18.1
195 | 
196 |   include-dirs:      include
197 |   install-includes:  fpstring.h
198 |                      bytestring-cpp-macros.h
199 | 
200 | test-suite bytestring-tests
201 |   import:           language
202 |   type:             exitcode-stdio-1.0
203 |   main-is:          Main.hs
204 |   other-modules:    Builder
205 |                     Data.ByteString.Builder.Prim.TestUtils
206 |                     Data.ByteString.Builder.Prim.Tests
207 |                     Data.ByteString.Builder.Tests
208 |                     IsValidUtf8
209 |                     LazyHClose
210 |                     Lift
211 |                     Properties
212 |                     Properties.ByteString
213 |                     Properties.ByteStringChar8
214 |                     Properties.ByteStringLazy
215 |                     Properties.ByteStringLazyChar8
216 |                     Properties.ShortByteString
217 |                     QuickCheckUtils
218 |   hs-source-dirs:   tests,
219 |                     tests/builder
220 |   build-depends:    base,
221 |                     bytestring,
222 |                     deepseq,
223 |                     QuickCheck,
224 |                     tasty,
225 |                     tasty-quickcheck >= 0.8.1,
226 |                     template-haskell,
227 |                     transformers >= 0.3,
228 |                     syb
229 | 
230 |   ghc-options:      -fwarn-unused-binds
231 |                     -rtsopts
232 |   if !arch(wasm32)
233 |     ghc-options:    -threaded
234 | 
235 | benchmark bytestring-bench
236 |   import:           language
237 |   main-is:          BenchAll.hs
238 |   other-modules:    BenchBoundsCheckFusion
239 |                     BenchCount
240 |                     BenchCSV
241 |                     BenchIndices
242 |                     BenchReadInt
243 |                     BenchShort
244 |   type:             exitcode-stdio-1.0
245 |   hs-source-dirs:   bench
246 | 
247 |   ghc-options:      -O2 "-with-rtsopts=-A32m"
248 |   if impl(ghc >= 8.6)
249 |     ghc-options:    -fproc-alignment=64
250 |   build-depends:    base,
251 |                     bytestring,
252 |                     deepseq,
253 |                     tasty-bench,
254 |                     random
255 | 


--------------------------------------------------------------------------------
/cabal.project.wasi:
--------------------------------------------------------------------------------
1 | -- https://github.com/haskellari/splitmix/pull/73
2 | source-repository-package
3 |   type: git
4 |   location: https://github.com/amesgen/splitmix
5 |   tag: 5f5b766d97dc735ac228215d240a3bb90bc2ff75
6 | 


--------------------------------------------------------------------------------
/cbits/fpstring.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2003 David Roundy
  3 |  * Copyright (c) 2005-6 Don Stewart
  4 |  *
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions
  9 |  * are met:
 10 |  * 1. Redistributions of source code must retain the above copyright
 11 |  *    notice, this list of conditions and the following disclaimer.
 12 |  * 2. Redistributions in binary form must reproduce the above copyright
 13 |  *    notice, this list of conditions and the following disclaimer in the
 14 |  *    documentation and/or other materials provided with the distribution.
 15 |  * 3. Neither the names of the authors or the names of any contributors
 16 |  *    may be used to endorse or promote products derived from this software
 17 |  *    without specific prior written permission.
 18 |  *
 19 |  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 20 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 22 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 23 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 25 |  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 26 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 27 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 28 |  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 29 |  * SUCH DAMAGE.
 30 |  */
 31 | 
 32 | #include "HsFFI.h"
 33 | #include "MachDeps.h"
 34 | 
 35 | #include "fpstring.h"
 36 | #if defined(__x86_64__)
 37 | #include <x86intrin.h>
 38 | #include <cpuid.h>
 39 | #endif
 40 | 
 41 | #include <stdint.h>
 42 | #include <stdbool.h>
 43 | 
 44 | #if defined(__x86_64__) && (__GNUC__ >= 7 || __GNUC__ == 6 && __GNUC_MINOR__ >= 3 || defined(__clang_major__)) && !defined(__STDC_NO_ATOMICS__)
 45 | #include <stdatomic.h>
 46 | #define USE_SIMD_COUNT
 47 | #endif
 48 | 
 49 | /* copy a string in reverse */
 50 | void fps_reverse(unsigned char *q, unsigned char *p, size_t n) {
 51 |     p += n-1;
 52 |     while (n-- != 0)
 53 |         *q++ = *p--;
 54 | }
 55 | 
 56 | /* duplicate a string, interspersing the character through the elements
 57 |    of the duplicated string */
 58 | void fps_intersperse(unsigned char *q,
 59 |                      unsigned char *p,
 60 |                      size_t n,
 61 |                      unsigned char c) {
 62 | #if defined(__x86_64__)
 63 |   {
 64 |     const __m128i separator = _mm_set1_epi8(c);
 65 |     const unsigned char *const p_begin = p;
 66 |     const unsigned char *const p_end = p_begin + n - 9;
 67 |     while (p < p_end) {
 68 |       const __m128i eight_src_bytes = _mm_loadl_epi64((__m128i *)p);
 69 |       const __m128i sixteen_dst_bytes = _mm_unpacklo_epi8(eight_src_bytes, separator);
 70 |       _mm_storeu_si128((__m128i *)q, sixteen_dst_bytes);
 71 |       p += 8;
 72 |       q += 16;
 73 |     }
 74 |     n -= p - p_begin;
 75 |   }
 76 | #endif
 77 |     while (n > 1) {
 78 |         *q++ = *p++;
 79 |         *q++ = c;
 80 |         n--;
 81 |     }
 82 |     if (n == 1)
 83 |         *q = *p;
 84 | }
 85 | 
 86 | /* find maximum char in a packed string */
 87 | unsigned char fps_maximum(unsigned char *p, size_t len) {
 88 |     unsigned char *q, c = *p;
 89 |     for (q = p; q < p + len; q++)
 90 |         if (*q > c)
 91 |             c = *q;
 92 |     return c;
 93 | }
 94 | 
 95 | /* find minimum char in a packed string */
 96 | unsigned char fps_minimum(unsigned char *p, size_t len) {
 97 |     unsigned char *q, c = *p;
 98 |     for (q = p; q < p + len; q++)
 99 |         if (*q < c)
100 |             c = *q;
101 |     return c;
102 | }
103 | 
104 | int fps_compare(const void *a, const void *b) {
105 |     return (int)*(unsigned char*)a - (int)*(unsigned char*)b;
106 | }
107 | 
108 | void fps_sort(unsigned char *p, size_t len) {
109 |     return qsort(p, len, 1, fps_compare);
110 | }
111 | 
112 | // We don't actually always use these unaligned write functions on the
113 | // Haskell side, but the macros we check there aren't visible here...
114 | void fps_unaligned_write_u16(uint16_t x, uint8_t *p) {
115 |   memcpy(p, &x, 2);
116 |   return;
117 | }
118 | 
119 | void fps_unaligned_write_u32(uint32_t x, uint8_t *p) {
120 |   memcpy(p, &x, 4);
121 |   return;
122 | }
123 | 
124 | void fps_unaligned_write_u64(uint64_t x, uint8_t *p) {
125 |   memcpy(p, &x, 8);
126 |   return;
127 | }
128 | 
129 | void fps_unaligned_write_HsFloat(HsFloat x, uint8_t *p) {
130 |   memcpy(p, &x, SIZEOF_HSFLOAT);
131 | }
132 | 
133 | void fps_unaligned_write_HsDouble(HsDouble x, uint8_t *p) {
134 |   memcpy(p, &x, SIZEOF_HSDOUBLE);
135 | }
136 | 
137 | uint64_t fps_unaligned_read_u64(uint8_t *p) {
138 |   uint64_t ans;
139 |   memcpy(&ans, p, 8);
140 |   return ans;
141 | }
142 | 
143 | /* count the number of occurrences of a char in a string */
144 | size_t fps_count_naive(unsigned char *str, size_t len, unsigned char w) {
145 |     size_t c;
146 |     for (c = 0; len-- != 0; ++str)
147 |         if (*str == w)
148 |             ++c;
149 |     return c;
150 | }
151 | 
152 | 
153 | #ifdef USE_SIMD_COUNT
154 | __attribute__((target("sse4.2")))
155 | size_t fps_count_cmpestrm(unsigned char *str, size_t len, unsigned char w) {
156 |     const __m128i pat = _mm_set1_epi8(w);
157 | 
158 |     size_t res = 0;
159 | 
160 |     size_t i = 0;
161 | 
162 |     for (; i < len && (intptr_t)(str + i) % 64; ++i) {
163 |         res += str[i] == w;
164 |     }
165 | 
166 |     for (size_t end = len - 128; i < end; i += 128) {
167 |         __m128i p0 = _mm_load_si128((const __m128i*)(str + i + 16 * 0));
168 |         __m128i p1 = _mm_load_si128((const __m128i*)(str + i + 16 * 1));
169 |         __m128i p2 = _mm_load_si128((const __m128i*)(str + i + 16 * 2));
170 |         __m128i p3 = _mm_load_si128((const __m128i*)(str + i + 16 * 3));
171 |         __m128i p4 = _mm_load_si128((const __m128i*)(str + i + 16 * 4));
172 |         __m128i p5 = _mm_load_si128((const __m128i*)(str + i + 16 * 5));
173 |         __m128i p6 = _mm_load_si128((const __m128i*)(str + i + 16 * 6));
174 |         __m128i p7 = _mm_load_si128((const __m128i*)(str + i + 16 * 7));
175 |         // Here, cmpestrm compares two strings in the following mode:
176 |         // * _SIDD_SBYTE_OPS: interprets the strings as consisting of 8-bit chars,
177 |         // * _SIDD_CMP_EQUAL_EACH: computes the number of `i`s
178 |         //    for which `p[i]`, a part of `str`, is equal to `pat[i]`
179 |         //    (the latter being always equal to `w`).
180 |         //
181 |         // q.v. https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm&expand=835
182 | #define MODE _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_EACH
183 |         __m128i r0 = _mm_cmpestrm(p0, 16, pat, 16, MODE);
184 |         __m128i r1 = _mm_cmpestrm(p1, 16, pat, 16, MODE);
185 |         __m128i r2 = _mm_cmpestrm(p2, 16, pat, 16, MODE);
186 |         __m128i r3 = _mm_cmpestrm(p3, 16, pat, 16, MODE);
187 |         __m128i r4 = _mm_cmpestrm(p4, 16, pat, 16, MODE);
188 |         __m128i r5 = _mm_cmpestrm(p5, 16, pat, 16, MODE);
189 |         __m128i r6 = _mm_cmpestrm(p6, 16, pat, 16, MODE);
190 |         __m128i r7 = _mm_cmpestrm(p7, 16, pat, 16, MODE);
191 | #undef MODE
192 |         res += _popcnt64(_mm_extract_epi64(r0, 0));
193 |         res += _popcnt64(_mm_extract_epi64(r1, 0));
194 |         res += _popcnt64(_mm_extract_epi64(r2, 0));
195 |         res += _popcnt64(_mm_extract_epi64(r3, 0));
196 |         res += _popcnt64(_mm_extract_epi64(r4, 0));
197 |         res += _popcnt64(_mm_extract_epi64(r5, 0));
198 |         res += _popcnt64(_mm_extract_epi64(r6, 0));
199 |         res += _popcnt64(_mm_extract_epi64(r7, 0));
200 |     }
201 | 
202 |     for (; i < len; ++i) {
203 |         res += str[i] == w;
204 |     }
205 | 
206 |     return res;
207 | }
208 | 
209 | __attribute__((target("avx2")))
210 | size_t fps_count_avx2(unsigned char *str, size_t len, unsigned char w) {
211 |     __m256i pat = _mm256_set1_epi8(w);
212 | 
213 |     size_t prefix = 0, res = 0;
214 | 
215 |     size_t i = 0;
216 | 
217 |     for (; i < len && (intptr_t)(str + i) % 64; ++i) {
218 |         prefix += str[i] == w;
219 |     }
220 | 
221 |     for (size_t end = len - 128; i < end; i += 128) {
222 |         __m256i p0 = _mm256_load_si256((const __m256i*)(str + i + 32 * 0));
223 |         __m256i p1 = _mm256_load_si256((const __m256i*)(str + i + 32 * 1));
224 |         __m256i p2 = _mm256_load_si256((const __m256i*)(str + i + 32 * 2));
225 |         __m256i p3 = _mm256_load_si256((const __m256i*)(str + i + 32 * 3));
226 |         __m256i r0 = _mm256_cmpeq_epi8(p0, pat);
227 |         __m256i r1 = _mm256_cmpeq_epi8(p1, pat);
228 |         __m256i r2 = _mm256_cmpeq_epi8(p2, pat);
229 |         __m256i r3 = _mm256_cmpeq_epi8(p3, pat);
230 |         res += _popcnt64(_mm256_extract_epi64(r0, 0));
231 |         res += _popcnt64(_mm256_extract_epi64(r0, 1));
232 |         res += _popcnt64(_mm256_extract_epi64(r0, 2));
233 |         res += _popcnt64(_mm256_extract_epi64(r0, 3));
234 |         res += _popcnt64(_mm256_extract_epi64(r1, 0));
235 |         res += _popcnt64(_mm256_extract_epi64(r1, 1));
236 |         res += _popcnt64(_mm256_extract_epi64(r1, 2));
237 |         res += _popcnt64(_mm256_extract_epi64(r1, 3));
238 |         res += _popcnt64(_mm256_extract_epi64(r2, 0));
239 |         res += _popcnt64(_mm256_extract_epi64(r2, 1));
240 |         res += _popcnt64(_mm256_extract_epi64(r2, 2));
241 |         res += _popcnt64(_mm256_extract_epi64(r2, 3));
242 |         res += _popcnt64(_mm256_extract_epi64(r3, 0));
243 |         res += _popcnt64(_mm256_extract_epi64(r3, 1));
244 |         res += _popcnt64(_mm256_extract_epi64(r3, 2));
245 |         res += _popcnt64(_mm256_extract_epi64(r3, 3));
246 |     }
247 | 
248 |     // _mm256_cmpeq_epi8(p, pat) returns a SIMD vector
249 |     // with `i`th byte consisting of eight `1`s if `p[i] == pat[i]`,
250 |     // and of eight `0`s otherwise,
251 |     // hence each matching byte is counted 8 times by popcnt.
252 |     // Dividing by 8 corrects for that.
253 |     res /= 8;
254 | 
255 |     res += prefix;
256 | 
257 |     for (; i < len; ++i) {
258 |         res += str[i] == w;
259 |     }
260 | 
261 |     return res;
262 | }
263 | 
264 | typedef size_t (*fps_impl_t) (unsigned char*, size_t, unsigned char);
265 | 
266 | fps_impl_t select_fps_simd_impl() {
267 |     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
268 | 
269 |     uint32_t ecx1 = 0;
270 |     if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
271 |         ecx1 = ecx;
272 |     }
273 | 
274 |     const bool has_xsave = ecx1 & (1 << 26);
275 |     const bool has_popcnt = ecx1 & (1 << 23);
276 | 
277 |     if (__get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx)) {
278 |         const bool has_avx2 = has_xsave && (ebx & (1 << 5));
279 |         if (has_avx2 && has_popcnt) {
280 |             return &fps_count_avx2;
281 |         }
282 |     }
283 | 
284 |     const bool has_sse42 = ecx1 & (1 << 19);
285 |     if (has_sse42 && has_popcnt) {
286 |         return &fps_count_cmpestrm;
287 |     }
288 | 
289 |     return &fps_count_naive;
290 | }
291 | #endif
292 | 
293 | 
294 | 
295 | size_t fps_count(unsigned char *str, size_t len, unsigned char w) {
296 | #ifndef USE_SIMD_COUNT
297 |     return fps_count_naive(str, len, w);
298 | #else
299 |     // 1024 is a rough guesstimate of the string length
300 |     // for which the extra performance of the main SIMD loop
301 |     // starts to compensate the extra work and extra branching outside the SIMD loop.
302 |     // The real optimal number depends on the specific μarch
303 |     // and isn't worth optimizing for in this context,
304 |     // since counting characters in shorter strings is unlikely to be a hot spot.
305 |     if (len <= 1024) {
306 |         return fps_count_naive(str, len, w);
307 |     }
308 | 
309 |     static _Atomic fps_impl_t s_impl = (fps_impl_t)NULL;
310 |     fps_impl_t impl = atomic_load_explicit(&s_impl, memory_order_relaxed);
311 |     if (!impl) {
312 |       impl = select_fps_simd_impl();
313 |       atomic_store_explicit(&s_impl, impl, memory_order_relaxed);
314 |     }
315 | 
316 |     return (*impl)(str, len, w);
317 | #endif
318 | }
319 | 


--------------------------------------------------------------------------------
/cbits/itoa.c:
--------------------------------------------------------------------------------
  1 | ///////////////////////////////////////////////////////////////
  2 | // Encoding numbers using ASCII characters                   //
  3 | //                                                           //
  4 | // inspired by: http://www.jb.man.ac.uk/~slowe/cpp/itoa.html //
  5 | ///////////////////////////////////////////////////////////////
  6 | 
  7 | #include <stdio.h>
  8 | #include <stdint.h>
  9 | 
 10 | // Decimal Encoding
 11 | ///////////////////
 12 | 
 13 | static const char* digits = "0123456789abcdef";
 14 | 
 15 | // unsigned integers
 16 | char* _hs_bytestring_uint32_dec (uint32_t x, char* buf)
 17 | {
 18 |     char c, *ptr = buf, *next_free;
 19 |     uint32_t x_tmp;
 20 | 
 21 |     // encode positive number as little-endian decimal
 22 |     do {
 23 |         x_tmp = x;
 24 |         x /= 10;
 25 |         *ptr++ = digits[x_tmp - x * 10];
 26 |     } while ( x );
 27 | 
 28 |     // reverse written digits
 29 |     next_free = ptr--;
 30 |     while (buf < ptr) {
 31 |         c       = *ptr;
 32 |         *ptr--  = *buf;
 33 |         *buf++  = c;
 34 |     }
 35 |     return next_free;
 36 | }
 37 | 
 38 | // unsigned long ints
 39 | char* _hs_bytestring_uint64_dec (uint64_t x, char* buf)
 40 | {
 41 |     char c, *ptr = buf, *next_free;
 42 |     uint64_t x_tmp;
 43 | 
 44 |     // encode positive number as little-endian decimal
 45 |     do {
 46 |         x_tmp = x;
 47 |         x /= 10;
 48 |         *ptr++ = digits[x_tmp - x * 10];
 49 |     } while ( x );
 50 | 
 51 |     // reverse written digits
 52 |     next_free = ptr--;
 53 |     while (buf < ptr) {
 54 |         c       = *ptr;
 55 |         *ptr--  = *buf;
 56 |         *buf++  = c;
 57 |     }
 58 |     return next_free;
 59 | }
 60 | 
 61 | 
 62 | // Padded, decimal, positive integers for the decimal output of bignums
 63 | ///////////////////////////////////////////////////////////////////////
 64 | 
 65 | // Padded (9 digits), decimal, positive int:
 66 | // We will use it with numbers that fit in 31 bits; i.e., numbers smaller than
 67 | // 10^9, as "31 * log 2 / log 10 = 9.33"
 68 | void _hs_bytestring_uint32_dec_padded9 (uint32_t x, char* buf)
 69 | {
 70 |     const int max_width_int32_dec = 9;
 71 |     char* ptr = buf + max_width_int32_dec;
 72 |     uint32_t x_tmp;
 73 | 
 74 |     // encode positive number as little-endian decimal
 75 |     do {
 76 |         x_tmp = x;
 77 |         x /= 10;
 78 |         *(--ptr) = digits[x_tmp - x * 10];
 79 |     } while ( x );
 80 | 
 81 |     // pad beginning
 82 |     while (buf < ptr) { *(--ptr) = '0'; }
 83 | }
 84 | 
 85 | // Padded (19 digits), decimal, positive long long int:
 86 | // We will use it with numbers that fit in 63 bits; i.e., numbers smaller than
 87 | // 10^18, as "63 * log 2 / log 10 = 18.96"
 88 | void _hs_bytestring_uint64_dec_padded18 (uint64_t x, char* buf)
 89 | {
 90 |     const int max_width_int64_dec = 18;
 91 |     char* ptr = buf + max_width_int64_dec;
 92 |     uint64_t x_tmp;
 93 | 
 94 |     // encode positive number as little-endian decimal
 95 |     do {
 96 |         x_tmp = x;
 97 |         x /= 10;
 98 |         *(--ptr) = digits[x_tmp - x * 10];
 99 |     } while ( x );
100 | 
101 |     // pad beginning
102 |     while (buf < ptr) { *(--ptr) = '0'; }
103 | }
104 | 
105 | 
106 | ///////////////////////
107 | // Hexadecimal encoding
108 | ///////////////////////
109 | 
110 | // unsigned ints (32 bit words)
111 | char* _hs_bytestring_uint32_hex (uint32_t x, char* buf) {
112 |     // write hex representation in reverse order
113 |     char c, *ptr = buf, *next_free;
114 |     do {
115 |         *ptr++ = digits[x & 0xf];
116 |         x >>= 4;
117 |     } while ( x );
118 |     // invert written digits
119 |     next_free = ptr--;
120 |     while(buf < ptr) {
121 |         c      = *ptr;
122 |         *ptr-- = *buf;
123 |         *buf++ = c;
124 |     }
125 |     return next_free;
126 | };
127 | 
128 | // unsigned long ints (64 bit words)
129 | char* _hs_bytestring_uint64_hex (uint64_t x, char* buf) {
130 |     // write hex representation in reverse order
131 |     char c, *ptr = buf, *next_free;
132 |     do {
133 |         *ptr++ = digits[x & 0xf];
134 |         x >>= 4;
135 |     } while ( x );
136 |     // invert written digits
137 |     next_free = ptr--;
138 |     while(buf < ptr) {
139 |         c      = *ptr;
140 |         *ptr-- = *buf;
141 |         *buf++ = c;
142 |     }
143 |     return next_free;
144 | };
145 | 


--------------------------------------------------------------------------------
/cbits/shortbytestring.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stddef.h>
 3 | #include <stdint.h>
 4 | #include <string.h>
 5 | 
 6 | 
 7 | ptrdiff_t
 8 | sbs_elem_index(const void *s,
 9 |             uint8_t c,
10 |             size_t n)
11 | {
12 |     const void *so = memchr(s, c, n);
13 | 
14 |     if (so) {
15 |         ptrdiff_t diff = so - s;
16 |         assert(diff >= 0);
17 |         return diff;
18 |     } else {
19 |         return -1;
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/include/bytestring-cpp-macros.h:
--------------------------------------------------------------------------------
 1 | #if defined(__STDC__) || defined(__GNUC__) || defined(__clang__)
 2 | #error "bytestring-cpp-macros.h does not work in C code yet"
 3 | #endif
 4 | 
 5 | 
 6 | #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)       \
 7 |     || ((defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) \
 8 |         && defined(__ARM_FEATURE_UNALIGNED)) \
 9 |     || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
10 |     || defined(powerpc64le_HOST_ARCH) \
11 |     || defined(javascript_HOST_ARCH)
12 | /*
13 | Not all architectures are forgiving of unaligned accesses; whitelist ones
14 | which are known not to trap (either to the kernel for emulation, or crash).
15 | */
16 | #define HS_UNALIGNED_POKES_OK 1
17 | #else
18 | #if PURE_HASKELL
19 | #error "-fpure-haskell isn't supported yet on architectures only supporting aligned accesses."
20 | #endif
21 | #define HS_UNALIGNED_POKES_OK 0
22 | #endif
23 | 
24 | 
25 | #define HS_UNALIGNED_ByteArray_OPS_OK \
26 |   MIN_VERSION_base(4,12,0) \
27 |   && (MIN_VERSION_base(4,16,1) || HS_UNALIGNED_POKES_OK)
28 | /*
29 | The unaligned ByteArray# primops became available with base-4.12.0/ghc-8.6,
30 | but require an unaligned-friendly host architecture to be safe to use
31 | until ghc-9.2.2; see https://gitlab.haskell.org/ghc/ghc/-/issues/21015
32 | */
33 | 
34 | 
35 | #define HS_CAST_FLOAT_WORD_OPS_AVAILABLE MIN_VERSION_base(4,14,0)
36 | /*
37 | These operations were added in base-4.10.0, but due to
38 | https://gitlab.haskell.org/ghc/ghc/-/issues/16617 they
39 | are buggy with negative floats before ghc-8.10.
40 | */
41 | 
42 | #define HS_UNALIGNED_ADDR_PRIMOPS_AVAILABLE MIN_VERSION_base(4,20,0)
43 | 
44 | #define HS_timesInt2_PRIMOP_AVAILABLE MIN_VERSION_base(4,15,0)
45 | 
46 | #define HS_cstringLength_AND_FinalPtr_AVAILABLE MIN_VERSION_base(4,15,0)
47 |   /* These two were added in the same ghc commit and
48 |      both primarily affect how we handle literals */
49 | 
50 | #define HS_unsafeWithForeignPtr_AVAILABLE MIN_VERSION_base(4,15,0)
51 | 


--------------------------------------------------------------------------------
/include/fpstring.h:
--------------------------------------------------------------------------------
 1 | #include <string.h>
 2 | #include <stdlib.h>
 3 | 
 4 | void fps_reverse(unsigned char *dest, unsigned char *from, size_t len);
 5 | void fps_intersperse(unsigned char *dest, unsigned char *from, size_t len, unsigned char c);
 6 | unsigned char fps_maximum(unsigned char *p, size_t len);
 7 | unsigned char fps_minimum(unsigned char *p, size_t len);
 8 | size_t fps_count(unsigned char *p, size_t len, unsigned char w);
 9 | void fps_sort(unsigned char *p, size_t len);
10 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.tix
3 | /.hpc
4 | 


--------------------------------------------------------------------------------
/tests/Builder.hs:
--------------------------------------------------------------------------------
 1 | module Builder (testSuite) where
 2 | 
 3 | import qualified Data.ByteString.Builder.Tests
 4 | import qualified Data.ByteString.Builder.Prim.Tests
 5 | import           Test.Tasty (TestTree, testGroup)
 6 | 
 7 | testSuite :: TestTree
 8 | testSuite = testGroup "Builder"
 9 |   [ testGroup "Data.ByteString.Builder"
10 |        Data.ByteString.Builder.Tests.tests
11 | 
12 |   , testGroup "Data.ByteString.Builder.BasicEncoding"
13 |        Data.ByteString.Builder.Prim.Tests.tests
14 |   ]
15 | 


--------------------------------------------------------------------------------
/tests/LazyHClose.hs:
--------------------------------------------------------------------------------
 1 | module LazyHClose (testSuite) where
 2 | 
 3 | import Control.Monad (void, forM_)
 4 | import Data.ByteString.Internal (toForeignPtr)
 5 | import Foreign.C.String (withCString)
 6 | import Foreign.ForeignPtr (finalizeForeignPtr)
 7 | import System.IO (openFile, openTempFile, hClose, hPutStrLn, IOMode(..))
 8 | import System.Posix.Internals (c_unlink)
 9 | import Test.Tasty (TestTree, testGroup, withResource)
10 | import Test.Tasty.QuickCheck (testProperty, ioProperty)
11 | 
12 | import qualified Data.ByteString            as S
13 | import qualified Data.ByteString.Char8      as S8
14 | import qualified Data.ByteString.Lazy       as L
15 | import qualified Data.ByteString.Lazy.Char8 as L8
16 | 
17 | n :: Int
18 | n = 1000
19 | 
20 | testSuite :: TestTree
21 | testSuite = withResource
22 |   (do (fn, h) <- openTempFile "." "lazy-hclose-test.tmp"; hPutStrLn h "x"; hClose h; pure fn)
23 |   removeFile $ \fn' ->
24 |     testGroup "LazyHClose"
25 |     [ testProperty "Testing resource leaks for Strict.readFile" $ ioProperty $
26 |       forM_ [1..n] $ const $ do
27 |         fn <- fn'
28 |         r <- S.readFile fn
29 |         appendFile fn "" -- will fail, if fn has not been closed yet
30 | 
31 |     , testProperty "Testing resource leaks for Lazy.readFile" $ ioProperty $
32 |       forM_ [1..n] $ const $ do
33 |         fn <- fn'
34 |         r <- L.readFile fn
35 |         L.length r `seq` return ()
36 |         appendFile fn "" -- will fail, if fn has not been closed yet
37 | 
38 |     , testProperty "Testing resource leaks when converting lazy to strict" $ ioProperty $
39 |       forM_ [1..n] $ const $ do
40 |         fn <- fn'
41 |         let release c = finalizeForeignPtr fp where (fp,_,_) = toForeignPtr c
42 |         r <- L.readFile fn
43 |         mapM_ release (L.toChunks r)
44 |         appendFile fn "" -- will fail, if fn has not been closed yet
45 | 
46 |     , testProperty "Testing strict hGetContents" $ ioProperty $
47 |       forM_ [1..n] $ const $ do
48 |         fn <- fn'
49 |         h <- openFile fn ReadMode
50 |         r <- S.hGetContents h
51 |         S.last r `seq` return ()
52 |         appendFile fn "" -- will fail, if fn has not been closed yet
53 | 
54 |     , testProperty "Testing lazy hGetContents" $ ioProperty $
55 |       forM_ [1..n] $ const $ do
56 |         fn <- fn'
57 |         h <- openFile fn ReadMode
58 |         r <- L.hGetContents h
59 |         L.last r `seq` return ()
60 |         appendFile fn "" -- will fail, if fn has not been closed yet
61 |     ]
62 | 
63 | removeFile :: String -> IO ()
64 | removeFile fn = void $ withCString fn c_unlink
65 | 


--------------------------------------------------------------------------------
/tests/Lift.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP #-}
 2 | 
 3 | {-# LANGUAGE OverloadedStrings #-}
 4 | {-# LANGUAGE TemplateHaskell #-}
 5 | 
 6 | module Lift (testSuite) where
 7 | 
 8 | import           Test.Tasty (TestTree, testGroup)
 9 | import           Test.Tasty.QuickCheck (testProperty, (===))
10 | import qualified Data.ByteString as BS
11 | import qualified Data.ByteString.Lazy as LBS
12 | import qualified Data.ByteString.Short as SBS
13 | import qualified Language.Haskell.TH.Syntax as TH
14 | 
15 | testSuite :: TestTree
16 | #ifdef wasm32_HOST_ARCH
17 | testSuite = testGroup "Skipped, requires -fexternal-interpreter" []
18 | #else
19 | testSuite = testGroup "Lift"
20 |   [ testGroup "strict"
21 |     [ testProperty "normal" $
22 |         let bs = "foobar" :: BS.ByteString in
23 |         bs === $(TH.lift $ BS.pack [102,111,111,98,97,114])
24 | 
25 |     , testProperty "binary" $
26 |         let bs = "\0\1\2\3\0\1\2\3" :: BS.ByteString in
27 |         bs === $(TH.lift $ BS.pack [0,1,2,3,0,1,2,3])
28 | 
29 | #if MIN_VERSION_template_haskell(2,16,0)
30 |     , testProperty "typed" $
31 |         let bs = "\0\1\2\3\0\1\2\3" :: BS.ByteString in
32 |         bs === $$(TH.liftTyped $ BS.pack [0,1,2,3,0,1,2,3])
33 | #endif
34 |     ]
35 | 
36 |   , testGroup "lazy"
37 |     [ testProperty "normal" $
38 |         let bs = "foobar" :: LBS.ByteString in
39 |         bs === $(TH.lift $ LBS.pack [102,111,111,98,97,114])
40 | 
41 |     , testProperty "binary" $
42 |         let bs = "\0\1\2\3\0\1\2\3" :: LBS.ByteString in
43 |         bs === $(TH.lift $ LBS.pack [0,1,2,3,0,1,2,3])
44 | 
45 | #if MIN_VERSION_template_haskell(2,16,0)
46 |     , testProperty "typed" $
47 |         let bs = "\0\1\2\3\0\1\2\3" :: LBS.ByteString in
48 |         bs === $$(TH.liftTyped $ LBS.pack [0,1,2,3,0,1,2,3])
49 | #endif
50 |     ]
51 | 
52 |   , testGroup "short"
53 |     [ testProperty "normal" $
54 |         let bs = "foobar" :: SBS.ShortByteString in
55 |         bs === $(TH.lift $ SBS.pack [102,111,111,98,97,114])
56 | 
57 |     , testProperty "binary" $
58 |         let bs = "\0\1\2\3\0\1\2\3" :: SBS.ShortByteString in
59 |         bs === $(TH.lift $ SBS.pack [0,1,2,3,0,1,2,3])
60 | 
61 | #if MIN_VERSION_template_haskell(2,16,0)
62 |     , testProperty "typed" $
63 |         let bs = "\0\1\2\3\0\1\2\3" :: SBS.ShortByteString in
64 |         bs === $$(TH.liftTyped $ SBS.pack [0,1,2,3,0,1,2,3])
65 | #endif
66 |     ]
67 |   ]
68 | #endif
69 | 


--------------------------------------------------------------------------------
/tests/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main (main) where
 2 | 
 3 | import Test.Tasty
 4 | 
 5 | import qualified Builder
 6 | import qualified IsValidUtf8
 7 | import qualified LazyHClose
 8 | import qualified Lift
 9 | import qualified Properties
10 | 
11 | main :: IO ()
12 | main = defaultMain $ testGroup "All"
13 |   [ Builder.testSuite
14 |   , IsValidUtf8.testSuite
15 |   , LazyHClose.testSuite
16 |   , Lift.testSuite
17 |   , Properties.testSuite
18 |   ]
19 | 


--------------------------------------------------------------------------------
/tests/Properties/ByteStringChar8.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE CPP #-}
2 | 
3 | #define BYTESTRING_CHAR8
4 | 
5 | #include "ByteString.hs"
6 | 


--------------------------------------------------------------------------------
/tests/Properties/ByteStringLazy.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE CPP #-}
2 | 
3 | #define BYTESTRING_LAZY
4 | 
5 | #include "ByteString.hs"
6 | 


--------------------------------------------------------------------------------
/tests/Properties/ByteStringLazyChar8.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE CPP #-}
2 | 
3 | #define BYTESTRING_LAZY
4 | #define BYTESTRING_CHAR8
5 | 
6 | #include "ByteString.hs"
7 | 


--------------------------------------------------------------------------------
/tests/Properties/ShortByteString.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE CPP #-}
2 | 
3 | #define BYTESTRING_SHORT
4 | 
5 | #include "ByteString.hs"
6 | 


--------------------------------------------------------------------------------
/tests/QuickCheckUtils.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DataKinds #-}
  2 | {-# LANGUAGE UndecidableInstances #-}
  3 | 
  4 | module QuickCheckUtils
  5 |   ( Char8(..)
  6 |   , String8(..)
  7 |   , CByteString(..)
  8 |   , Sqrt(..)
  9 |   , int64OK
 10 |   , tooStrictErr
 11 |   ) where
 12 | 
 13 | import Test.Tasty.QuickCheck
 14 | import Text.Show.Functions
 15 | 
 16 | import Control.Monad        ( liftM2 )
 17 | import Data.Char
 18 | import Data.Word
 19 | import Data.Int
 20 | import System.IO
 21 | import Foreign.C (CChar)
 22 | import GHC.TypeLits (TypeError, ErrorMessage(..))
 23 | import GHC.Stack (withFrozenCallStack, HasCallStack)
 24 | 
 25 | import qualified Data.ByteString.Short as SB
 26 | import qualified Data.ByteString      as P
 27 | import qualified Data.ByteString.Lazy as L
 28 | 
 29 | import qualified Data.ByteString.Char8      as PC
 30 | import qualified Data.ByteString.Lazy.Char8 as LC
 31 | 
 32 | ------------------------------------------------------------------------
 33 | 
 34 | sizedByteString n = do m <- choose(0, n)
 35 |                        fmap P.pack $ vectorOf m arbitrary
 36 | 
 37 | instance Arbitrary P.ByteString where
 38 |   arbitrary = do
 39 |     bs <- sized sizedByteString
 40 |     n  <- choose (0, 2)
 41 |     return (P.drop n bs) -- to give us some with non-0 offset
 42 |   shrink = map P.pack . shrink . P.unpack
 43 | 
 44 | instance CoArbitrary P.ByteString where
 45 |   coarbitrary s = coarbitrary (P.unpack s)
 46 | 
 47 | instance Arbitrary L.ByteString where
 48 |   arbitrary = sized $ \n -> do numChunks <- choose (0, n)
 49 |                                if numChunks == 0
 50 |                                    then return L.empty
 51 |                                    else fmap (L.fromChunks .
 52 |                                               filter (not . P.null)) $
 53 |                                             vectorOf numChunks
 54 |                                                      (sizedByteString
 55 |                                                           (n `div` numChunks))
 56 | 
 57 |   shrink = map L.fromChunks . shrink . L.toChunks
 58 | 
 59 | instance CoArbitrary L.ByteString where
 60 |   coarbitrary s = coarbitrary (L.unpack s)
 61 | 
 62 | newtype CByteString = CByteString P.ByteString
 63 |   deriving Show
 64 | 
 65 | instance Arbitrary CByteString where
 66 |   arbitrary = fmap (CByteString . P.pack . map fromCChar)
 67 |                    arbitrary
 68 |     where
 69 |       fromCChar :: NonZero CChar -> Word8
 70 |       fromCChar = fromIntegral . getNonZero
 71 | 
 72 | -- | 'Char', but only representing 8-bit characters.
 73 | --
 74 | newtype Char8 = Char8 Char
 75 |   deriving (Eq, Ord, Show)
 76 | 
 77 | instance Arbitrary Char8 where
 78 |   arbitrary = fmap (Char8 . toChar) arbitrary
 79 |     where
 80 |       toChar :: Word8 -> Char
 81 |       toChar = toEnum . fromIntegral
 82 |   shrink (Char8 c) = fmap Char8 (shrink c)
 83 | 
 84 | instance CoArbitrary Char8 where
 85 |   coarbitrary (Char8 c) = coarbitrary c
 86 | 
 87 | -- | 'Char', but only representing 8-bit characters.
 88 | --
 89 | newtype String8 = String8 String
 90 |   deriving (Eq, Ord, Show)
 91 | 
 92 | instance Arbitrary String8 where
 93 |   arbitrary = fmap (String8 . map toChar) arbitrary
 94 |     where
 95 |       toChar :: Word8 -> Char
 96 |       toChar = toEnum . fromIntegral
 97 |   shrink (String8 xs) = fmap String8 (shrink xs)
 98 | 
 99 | -- | If a test takes O(n^2) time or memory, it's useful to wrap its inputs
100 | -- into 'Sqrt' so that increasing number of tests affects run time linearly.
101 | newtype Sqrt a = Sqrt { unSqrt :: a }
102 |   deriving (Eq, Show)
103 | 
104 | instance Arbitrary a => Arbitrary (Sqrt a) where
105 |   arbitrary = Sqrt <$> sized
106 |     (\n -> resize (round @Double $ sqrt $ fromIntegral @Int n) arbitrary)
107 |   shrink = map Sqrt . shrink . unSqrt
108 | 
109 | 
110 | sizedShortByteString :: Int -> Gen SB.ShortByteString
111 | sizedShortByteString n = do m <- choose(0, n)
112 |                             fmap SB.pack $ vectorOf m arbitrary
113 | 
114 | instance Arbitrary SB.ShortByteString where
115 |   arbitrary = sized sizedShortByteString
116 |   shrink = map SB.pack . shrink . SB.unpack
117 | 
118 | instance CoArbitrary SB.ShortByteString where
119 |   coarbitrary s = coarbitrary (SB.unpack s)
120 | 
121 | -- | This /poison instance/ exists to make accidental mis-use
122 | -- of the @Arbitrary Int64@ instance a bit less likely.
123 | instance {-# OVERLAPPING #-}
124 |   TypeError (Text "Found a test taking a raw Int64 argument."
125 |     :$$: Text "'instance Arbitrary Int64' by default is likely to"
126 |     :$$: Text "produce very large numbers after the first few tests,"
127 |     :$$: Text "which doesn't make great indices into a LazyByteString."
128 |     :$$: Text "For indices, try 'intToIndexTy' in Properties/ByteString.hs."
129 |     :$$: Text ""
130 |     :$$: Text "If very few small-numbers tests is OK, use"
131 |     :$$: Text "'int64OK' to bypass this poison-instance."
132 |   ) => Testable (Int64 -> prop) where
133 |   property = error "poison instance Testable (Int64 -> prop)"
134 | 
135 | -- | Use this to bypass the poison instance for @Testable (Int64 -> prop)@
136 | -- defined in "QuickCheckUtils".
137 | int64OK :: (Arbitrary a, Show a, Testable b) => (a -> b) -> Property
138 | int64OK f = propertyForAllShrinkShow arbitrary shrink (\v -> [show v]) f
139 | 
140 | tooStrictErr :: forall a. HasCallStack => a
141 | tooStrictErr = withFrozenCallStack $
142 |   error "A lazy sub-expression was unexpectedly evaluated"
143 | 


--------------------------------------------------------------------------------
/tests/builder/Data/ByteString/Builder/Prim/Tests.hs:
--------------------------------------------------------------------------------
  1 | -- |
  2 | -- Copyright   : (c) 2011 Simon Meier
  3 | -- License     : BSD3-style (see LICENSE)
  4 | --
  5 | -- Maintainer  : Simon Meier <iridcode@gmail.com>
  6 | -- Stability   : experimental
  7 | -- Portability : tested on GHC only
  8 | --
  9 | -- Testing all encodings provided by this library.
 10 | 
 11 | module Data.ByteString.Builder.Prim.Tests (tests) where
 12 | 
 13 | import           Data.Char  (ord)
 14 | import qualified Data.ByteString.Lazy                  as L
 15 | import qualified Data.ByteString.Lazy.Char8            as LC
 16 | import           Data.ByteString.Builder
 17 | import qualified Data.ByteString.Builder.Prim          as BP
 18 | import           Data.ByteString.Builder.Prim.TestUtils
 19 | 
 20 | import           Test.Tasty
 21 | import           Test.Tasty.QuickCheck
 22 | 
 23 | tests :: [TestTree]
 24 | tests = concat [ testsBinary, testsASCII, testsChar8, testsUtf8
 25 |                , testsCombinatorsB, [testCString, testCStringUtf8] ]
 26 | 
 27 | testCString :: TestTree
 28 | testCString = testProperty "cstring" $
 29 |     toLazyByteString (BP.cstring "hello world!"#) ==
 30 |       LC.pack "hello" `L.append` L.singleton 0x20 `L.append` LC.pack "world!"
 31 | 
 32 | testCStringUtf8 :: TestTree
 33 | testCStringUtf8 = testProperty "cstringUtf8" $
 34 |     toLazyByteString (BP.cstringUtf8 "hello\xc0\x80world!"#) ==
 35 |       LC.pack "hello" `L.append` L.singleton 0x00 `L.append` LC.pack "world!"
 36 | 
 37 | ------------------------------------------------------------------------------
 38 | -- Binary
 39 | ------------------------------------------------------------------------------
 40 | 
 41 | testsBinary :: [TestTree]
 42 | testsBinary =
 43 |   [ testBoundedF "word8"     bigEndian_list    BP.word8
 44 |   , testBoundedF "int8"      bigEndian_list    BP.int8
 45 | 
 46 |   --  big-endian
 47 |   , testBoundedF "int16BE"   bigEndian_list    BP.int16BE
 48 |   , testBoundedF "int32BE"   bigEndian_list    BP.int32BE
 49 |   , testBoundedF "int64BE"   bigEndian_list    BP.int64BE
 50 | 
 51 |   , testBoundedF "word16BE"  bigEndian_list    BP.word16BE
 52 |   , testBoundedF "word32BE"  bigEndian_list    BP.word32BE
 53 |   , testBoundedF "word64BE"  bigEndian_list    BP.word64BE
 54 | 
 55 |   , testF "floatLE"     (float_list  littleEndian_list) BP.floatLE
 56 |   , testF "doubleLE"    (double_list littleEndian_list) BP.doubleLE
 57 | 
 58 |   --  little-endian
 59 |   , testBoundedF "int16LE"   littleEndian_list BP.int16LE
 60 |   , testBoundedF "int32LE"   littleEndian_list BP.int32LE
 61 |   , testBoundedF "int64LE"   littleEndian_list BP.int64LE
 62 | 
 63 |   , testBoundedF "word16LE"  littleEndian_list BP.word16LE
 64 |   , testBoundedF "word32LE"  littleEndian_list BP.word32LE
 65 |   , testBoundedF "word64LE"  littleEndian_list BP.word64LE
 66 | 
 67 |   , testF "floatBE"     (float_list  bigEndian_list)   BP.floatBE
 68 |   , testF "doubleBE"    (double_list bigEndian_list)   BP.doubleBE
 69 | 
 70 |   --  host dependent
 71 |   , testBoundedF "int16Host"   hostEndian_list  BP.int16Host
 72 |   , testBoundedF "int32Host"   hostEndian_list  BP.int32Host
 73 |   , testBoundedF "int64Host"   hostEndian_list  BP.int64Host
 74 |   , testBoundedF "intHost"     hostEndian_list  BP.intHost
 75 | 
 76 |   , testBoundedF "word16Host"  hostEndian_list  BP.word16Host
 77 |   , testBoundedF "word32Host"  hostEndian_list  BP.word32Host
 78 |   , testBoundedF "word64Host"  hostEndian_list  BP.word64Host
 79 |   , testBoundedF "wordHost"    hostEndian_list  BP.wordHost
 80 | 
 81 |   , testF "floatHost"   (float_list  hostEndian_list)   BP.floatHost
 82 |   , testF "doubleHost"  (double_list hostEndian_list)   BP.doubleHost
 83 |   ]
 84 | 
 85 | 
 86 | ------------------------------------------------------------------------------
 87 | -- Latin-1  aka  Char8
 88 | ------------------------------------------------------------------------------
 89 | 
 90 | testsChar8 :: [TestTree]
 91 | testsChar8 =
 92 |   [ testBoundedF "char8"     char8_list        BP.char8  ]
 93 | 
 94 | 
 95 | ------------------------------------------------------------------------------
 96 | -- ASCII
 97 | ------------------------------------------------------------------------------
 98 | 
 99 | testsASCII :: [TestTree]
100 | testsASCII =
101 |   [ testBoundedF "char7" char7_list BP.char7
102 | 
103 |   , testBoundedB "int8Dec"   dec_list BP.int8Dec
104 |   , testBoundedB "int16Dec"  dec_list BP.int16Dec
105 |   , testBoundedB "int32Dec"  dec_list BP.int32Dec
106 |   , testBoundedB "int64Dec"  dec_list BP.int64Dec
107 |   , testBoundedB "intDec"    dec_list BP.intDec
108 | 
109 |   , testBoundedB "word8Dec"  dec_list BP.word8Dec
110 |   , testBoundedB "word16Dec" dec_list BP.word16Dec
111 |   , testBoundedB "word32Dec" dec_list BP.word32Dec
112 |   , testBoundedB "word64Dec" dec_list BP.word64Dec
113 |   , testBoundedB "wordDec"   dec_list BP.wordDec
114 | 
115 |   , testBoundedB "word8Hex"  hex_list BP.word8Hex
116 |   , testBoundedB "word16Hex" hex_list BP.word16Hex
117 |   , testBoundedB "word32Hex" hex_list BP.word32Hex
118 |   , testBoundedB "word64Hex" hex_list BP.word64Hex
119 |   , testBoundedB "wordHex"   hex_list BP.wordHex
120 | 
121 |   , testBoundedF "word8HexFixed"  wordHexFixed_list BP.word8HexFixed
122 |   , testBoundedF "word16HexFixed" wordHexFixed_list BP.word16HexFixed
123 |   , testBoundedF "word32HexFixed" wordHexFixed_list BP.word32HexFixed
124 |   , testBoundedF "word64HexFixed" wordHexFixed_list BP.word64HexFixed
125 | 
126 |   , testBoundedF "int8HexFixed"  int8HexFixed_list  BP.int8HexFixed
127 |   , testBoundedF "int16HexFixed" int16HexFixed_list BP.int16HexFixed
128 |   , testBoundedF "int32HexFixed" int32HexFixed_list BP.int32HexFixed
129 |   , testBoundedF "int64HexFixed" int64HexFixed_list BP.int64HexFixed
130 | 
131 |   , testF "floatHexFixed"  floatHexFixed_list  BP.floatHexFixed
132 |   , testF "doubleHexFixed" doubleHexFixed_list BP.doubleHexFixed
133 |   ]
134 | 
135 | 
136 | ------------------------------------------------------------------------------
137 | -- UTF-8
138 | ------------------------------------------------------------------------------
139 | 
140 | testsUtf8 :: [TestTree]
141 | testsUtf8 =
142 |   [ testBoundedB "charUtf8"  charUtf8_list  BP.charUtf8 ]
143 | 
144 | 
145 | ------------------------------------------------------------------------------
146 | -- BoundedPrim combinators
147 | ------------------------------------------------------------------------------
148 | 
149 | maybeB :: BP.BoundedPrim () -> BP.BoundedPrim a -> BP.BoundedPrim (Maybe a)
150 | maybeB nothing just = maybe (Left ()) Right BP.>$< BP.eitherB nothing just
151 | 
152 | testsCombinatorsB :: [TestTree]
153 | testsCombinatorsB =
154 |   [ compareImpls "mapMaybe (via BoundedPrim)"
155 |         (L.pack . concatMap encChar)
156 |         (toLazyByteString . encViaBuilder)
157 | 
158 |   , compareImpls "filter (via BoundedPrim)"
159 |         (L.pack . filter (< 32))
160 |         (toLazyByteString . BP.primMapListBounded (BP.condB (< 32) (BP.liftFixedToBounded BP.word8) BP.emptyB))
161 | 
162 |   , compareImpls "pairB"
163 |         (L.pack . concatMap (\(c,w) -> charUtf8_list c ++ [w]))
164 |         (toLazyByteString . BP.primMapListBounded
165 |             ((\(c,w) -> (c,(w,undefined))) BP.>$<
166 |                 BP.charUtf8 BP.>*< (BP.liftFixedToBounded BP.word8) BP.>*< (BP.liftFixedToBounded BP.emptyF)))
167 |   ]
168 |   where
169 |     encChar = maybe [112] (hostEndian_list . ord)
170 | 
171 |     encViaBuilder = BP.primMapListBounded $ maybeB (BP.liftFixedToBounded $ (\_ -> 112) BP.>$< BP.word8)
172 |                                                 (ord BP.>$< (BP.liftFixedToBounded $ BP.intHost))
173 | 


--------------------------------------------------------------------------------