├── .cirrus.yml ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .hlint.yaml ├── Changelog.md ├── Data ├── ByteString.hs └── ByteString │ ├── Builder.hs │ ├── Builder │ ├── ASCII.hs │ ├── Extra.hs │ ├── Internal.hs │ ├── Prim.hs │ ├── Prim │ │ ├── ASCII.hs │ │ ├── Binary.hs │ │ ├── Internal.hs │ │ └── Internal │ │ │ ├── Base16.hs │ │ │ └── Floating.hs │ ├── RealFloat.hs │ └── RealFloat │ │ ├── D2S.hs │ │ ├── F2S.hs │ │ ├── Internal.hs │ │ └── TableGenerator.hs │ ├── Char8.hs │ ├── Internal.hs │ ├── Internal │ ├── Pure.hs │ └── Type.hs │ ├── Lazy.hs │ ├── Lazy │ ├── Char8.hs │ ├── Internal.hs │ ├── ReadInt.hs │ └── ReadNat.hs │ ├── ReadInt.hs │ ├── ReadNat.hs │ ├── Short.hs │ ├── Short │ └── Internal.hs │ ├── Unsafe.hs │ └── Utils │ ├── ByteOrder.hs │ └── UnalignedAccess.hs ├── LICENSE ├── README.md ├── Setup.hs ├── bench ├── BenchAll.hs ├── BenchBoundsCheckFusion.hs ├── BenchCSV.hs ├── BenchCount.hs ├── BenchIndices.hs ├── BenchReadInt.hs └── BenchShort.hs ├── bytestring.cabal ├── cabal.project.wasi ├── cbits ├── aarch64 │ └── is-valid-utf8.c ├── aligned-static-hs-data.c ├── fpstring.c ├── is-valid-utf8.c ├── itoa.c └── shortbytestring.c ├── include ├── bytestring-cpp-macros.h └── fpstring.h └── tests ├── .gitignore ├── Builder.hs ├── IsValidUtf8.hs ├── LazyHClose.hs ├── Lift.hs ├── Main.hs ├── Properties.hs ├── Properties ├── ByteString.hs ├── ByteStringChar8.hs ├── ByteStringLazy.hs ├── ByteStringLazyChar8.hs └── ShortByteString.hs ├── QuickCheckUtils.hs └── builder └── Data └── ByteString └── Builder ├── Prim ├── TestUtils.hs └── Tests.hs └── Tests.hs /.cirrus.yml: -------------------------------------------------------------------------------- 1 | task: 2 | name: FreeBSD 3 | freebsd_instance: 4 | image_family: freebsd-14-0 5 | install_script: 6 | - pkg install -y git gmake 7 | - curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh 8 | script: 9 | - export PATH="$HOME/.ghcup/bin:$PATH" 10 | - cabal test --test-show-details=direct 11 | 12 | task: 13 | name: OpenBSD 14 | compute_engine_instance: 15 | image_project: pg-ci-images 16 | # See https://github.com/anarazel/pg-vm-images/blob/main/packer/openbsd.pkrvars.hcl 17 | image: family/pg-ci-openbsd-vanilla 18 | platform: openbsd 19 | install_script: pkg_add ghc cabal-install git 20 | script: 21 | - export CABAL_DIR=/tmp/.cabal 22 | - cabal update 23 | - cabal test --test-show-details=direct 24 | 25 | task: 26 | name: NetBSD 27 | compute_engine_instance: 28 | image_project: pg-ci-images 29 | # See https://github.com/anarazel/pg-vm-images/blob/main/packer/netbsd.pkrvars.hcl 30 | image: family/pg-ci-netbsd-vanilla 31 | platform: netbsd 32 | install_script: 33 | - export PKG_PATH="http://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/$(uname -p)/$(uname -r|cut -f '1 2' -d.)/All/" 34 | - pkg_add ghc cabal-install git 35 | script: 36 | - export CABAL_DIR=/tmp/.cabal 37 | - cabal update 38 | - cabal test --test-show-details=direct 39 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - bytestring-0.11 7 | - bytestring-0.12 8 | pull_request: {} # Validate all PRs 9 | 10 | defaults: 11 | run: 12 | shell: bash 13 | 14 | jobs: 15 | build: 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | fail-fast: true 19 | matrix: 20 | os: [ubuntu-latest] 21 | ghc: ['8.4', '8.6', '8.8', '8.10', '9.0', '9.2', '9.4', '9.6', '9.8', '9.10', '9.12'] 22 | include: 23 | - os: macOS-latest 24 | ghc: 'latest' 25 | steps: 26 | - uses: actions/checkout@v4 27 | - uses: haskell-actions/setup@v2 28 | id: setup-haskell-cabal 29 | with: 30 | ghc-version: ${{ matrix.ghc }} 31 | - name: Update cabal package database 32 | run: cabal update 33 | - uses: actions/cache@v3 34 | name: Cache cabal stuff 35 | with: 36 | path: | 37 | ${{ steps.setup-haskell-cabal.outputs.cabal-store }} 38 | dist-newstyle 39 | key: ${{ runner.os }}-${{ matrix.ghc }} 40 | - name: Test 41 | run: | 42 | cabal sdist -z -o . 43 | cabal get bytestring-*.tar.gz 44 | cd bytestring-*/ 45 | cabal build bytestring:tests --enable-tests --enable-benchmarks 46 | cabal test --enable-tests --enable-benchmarks --test-show-details=direct all 47 | - name: Bench 48 | run: | 49 | cd bytestring-*/ 50 | cabal bench --enable-tests --enable-benchmarks --benchmark-option=-l all 51 | - name: Haddock 52 | run: | 53 | cd bytestring-*/ 54 | cabal haddock all 55 | - name: Cabal check 56 | run: | 57 | cd bytestring-*/ 58 | cabal check 59 | 60 | windows-build: 61 | runs-on: windows-latest 62 | needs: build 63 | strategy: 64 | fail-fast: true 65 | matrix: 66 | ghc: ['9.2', '9.4', '9.6', '9.8', '9.10'] 67 | steps: 68 | - uses: actions/checkout@v4 69 | - uses: haskell-actions/setup@v2 70 | id: setup-haskell-cabal 71 | with: 72 | ghc-version: ${{ matrix.ghc }} 73 | - name: Update cabal package database 74 | run: cabal update 75 | - uses: actions/cache@v3 76 | name: Cache cabal stuff 77 | with: 78 | path: | 79 | ${{ steps.setup-haskell-cabal.outputs.cabal-store }} 80 | dist-newstyle 81 | key: ${{ runner.os }}-${{ matrix.ghc }} 82 | # We rebuild tests several times to avoid intermittent failures on Windows 83 | # https://github.com/haskell/actions/issues/36 84 | # We also use --enable-tests and --enable-benchmarks to avoid 85 | # test and bench commands from reconfiguring and thus rebuilding. 86 | - name: Build Test 87 | run: | 88 | cabal sdist -z -o . 89 | cabal get bytestring-*.tar.gz 90 | cd bytestring-*/ 91 | bld() { cabal build bytestring:tests --enable-tests --enable-benchmarks; } 92 | bld || bld || bld 93 | 94 | - name: Run Test 95 | # test broken linking on windows: https://github.com/haskell/bytestring/issues/497 96 | run: | 97 | cd bytestring-*/ 98 | $bin = cabal list-bin bytestring-tests 99 | $env:PATH = '' 100 | & "$bin" 101 | shell: pwsh 102 | 103 | - name: Bench 104 | run: | 105 | cd bytestring-*/ 106 | cabal bench --enable-tests --enable-benchmarks --benchmark-option=-l all 107 | - name: Haddock 108 | run: | 109 | cd bytestring-*/ 110 | cabal haddock all 111 | - name: Cabal check 112 | run: | 113 | cd bytestring-*/ 114 | cabal check 115 | 116 | # Emulation is incredibly slow and memory demanding. It seems that any 117 | # executable with GHC RTS takes at least 7-8 Gb of RAM, so we can run 118 | # `cabal` or `ghc` on their own, but cannot run them both at the same time, 119 | # striking out `cabal test`. Instead we rely on system packages and invoke 120 | # `ghc --make` manually, and even so `ghc -O` is prohibitively expensive. 121 | emulated: 122 | needs: build 123 | runs-on: ubuntu-latest 124 | strategy: 125 | fail-fast: true 126 | matrix: 127 | arch: ['s390x', 'ppc64le', 'riscv64'] 128 | steps: 129 | - uses: actions/checkout@v4 130 | - uses: uraimo/run-on-arch-action@v2.8.1 131 | timeout-minutes: 60 132 | with: 133 | arch: ${{ matrix.arch }} 134 | distro: ubuntu_rolling 135 | githubToken: ${{ github.token }} 136 | install: | 137 | apt-get update -y 138 | apt-get install -y ghc libghc-tasty-quickcheck-dev libghc-syb-dev 139 | run: | 140 | ghc --version 141 | ghc --make -fPIC -XHaskell2010 -XBangPatterns -XDeriveDataTypeable -XDeriveGeneric -XDeriveLift -XFlexibleContexts -XFlexibleInstances -XLambdaCase -XMagicHash -XMultiWayIf -XNamedFieldPuns -XPatternSynonyms -XRankNTypes -XScopedTypeVariables -XStandaloneDeriving -XTupleSections -XTypeApplications -XTypeOperators -XUnboxedTuples -optP-Wall -optP-Werror=undef -DPURE_HASKELL=0 -Iinclude -itests:tests/builder -o Main cbits/*.c tests/Main.hs +RTS -s 142 | ./Main +RTS -s 143 | 144 | bounds-checking: 145 | needs: build 146 | runs-on: ubuntu-latest 147 | steps: 148 | - uses: actions/checkout@v4 149 | - uses: haskell-actions/setup@v2 150 | id: setup-haskell-cabal 151 | with: 152 | ghc-version: 'latest' 153 | - name: Update cabal package database 154 | run: cabal update 155 | - uses: actions/cache@v3 156 | name: Cache cabal stuff 157 | with: 158 | path: | 159 | ${{ steps.setup-haskell-cabal.outputs.cabal-store }} 160 | dist-newstyle 161 | key: ${{ runner.os }}-latest 162 | - name: Test 163 | run: cabal test --ghc-options='-fcheck-prim-bounds -fno-ignore-asserts -DHS_BYTESTRING_ASSERTIONS' 164 | 165 | pure-haskell: 166 | needs: build 167 | runs-on: ubuntu-latest 168 | steps: 169 | - uses: actions/checkout@v4 170 | - uses: haskell-actions/setup@v2 171 | id: setup-haskell-cabal 172 | with: 173 | ghc-version: 'latest' 174 | - name: Update cabal package database 175 | run: cabal update 176 | - uses: actions/cache@v3 177 | name: Cache cabal stuff 178 | with: 179 | path: | 180 | ${{ steps.setup-haskell-cabal.outputs.cabal-store }} 181 | dist-newstyle 182 | key: ${{ runner.os }}-latest-pure-haskell 183 | - name: Test 184 | run: cabal test -fpure-haskell --ghc-options=-fno-ignore-asserts --enable-tests --test-show-details=direct all 185 | 186 | i386: 187 | needs: build 188 | runs-on: ubuntu-latest 189 | container: 190 | image: i386/ubuntu:bionic 191 | steps: 192 | - name: Install 193 | run: | 194 | apt-get update -y 195 | apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev 196 | curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh 197 | - uses: actions/checkout@v1 #This version must stay old enough to remain compatible with the container image 198 | - name: Test 199 | run: | 200 | source ~/.ghcup/env 201 | cabal update 202 | cabal test 203 | 204 | # We use github.com/haskell self-hosted runners for ARM testing. 205 | # If they become unavailable in future, put ['armv7', 'aarch64'] 206 | # back to emulation jobs above. 207 | arm: 208 | needs: build 209 | runs-on: [self-hosted, Linux, ARM64] 210 | strategy: 211 | fail-fast: true 212 | matrix: 213 | arch: [arm32v7, arm64v8] 214 | steps: 215 | - uses: docker://hasufell/arm64v8-ubuntu-haskell:focal 216 | name: Cleanup 217 | with: 218 | args: "find . -mindepth 1 -maxdepth 1 -exec rm -rf -- {} +" 219 | 220 | - name: Checkout code 221 | uses: actions/checkout@v4 222 | 223 | - if: matrix.arch == 'arm32v7' 224 | uses: docker://hasufell/arm32v7-ubuntu-haskell:focal 225 | name: Run build (arm32v7 linux) 226 | with: 227 | args: sh -c "cabal update && cabal test" 228 | 229 | - if: matrix.arch == 'arm64v8' 230 | uses: docker://hasufell/arm64v8-ubuntu-haskell:focal 231 | name: Run build (arm64v8 linux) 232 | with: 233 | args: sh -c "ghcup install ghc 9.10.1 && cabal update && cabal test -w ~/.ghcup/bin/ghc-9.10.1" 234 | 235 | wasi: 236 | runs-on: ubuntu-latest 237 | needs: build 238 | env: 239 | GHC_WASM_META_REV: a04cc1a2206d2030326e1d49be9c6a94ee4283a3 240 | strategy: 241 | matrix: 242 | ghc: ['9.10'] 243 | fail-fast: false 244 | steps: 245 | - name: setup-ghc-wasm32-wasi 246 | run: | 247 | cd $(mktemp -d) 248 | curl -L https://gitlab.haskell.org/ghc/ghc-wasm-meta/-/archive/$GHC_WASM_META_REV/ghc-wasm-meta.tar.gz | tar xz --strip-components=1 249 | ./setup.sh 250 | ~/.ghc-wasm/add_to_github_path.sh 251 | env: 252 | FLAVOUR: ${{ matrix.ghc }} 253 | - uses: actions/checkout@v4 254 | - uses: actions/cache@v3 255 | with: 256 | path: | 257 | ~/.ghc-wasm/.cabal/store 258 | key: wasi-${{ runner.os }}-${{ env.GHC_WASM_META_REV }}-flavour-${{ matrix.ghc }}-${{ github.sha }} 259 | restore-keys: | 260 | wasi-${{ runner.os }}-${{ env.GHC_WASM_META_REV }}-flavour-${{ matrix.ghc }}- 261 | - name: Build 262 | run: | 263 | mv cabal.project.wasi cabal.project.local 264 | wasm32-wasi-cabal build --enable-tests 265 | wasm32-wasi-cabal list-bin test:bytestring-tests 266 | - name: Test 267 | run: | 268 | wasmtime.sh $(wasm32-wasi-cabal list-bin test:bytestring-tests) 269 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | GNUmakefile 2 | cabal-dev 3 | dist 4 | dist-install 5 | ghc.mk 6 | .cabal-sandbox 7 | cabal.sandbox.config 8 | .hsenv 9 | *~ 10 | dist-newstyle/ 11 | cabal.project.local* 12 | .nvimrc 13 | .ghc.environment* 14 | -------------------------------------------------------------------------------- /.hlint.yaml: -------------------------------------------------------------------------------- 1 | - ignore: 2 | name: Use camelCase 3 | within: 4 | - Data.ByteString.Builder.Internal 5 | - Data.ByteString.Short.Internal 6 | - ignore: 7 | name: Use fewer imports 8 | within: 9 | - Data.ByteString 10 | - Data.ByteString.Internal 11 | - Data.ByteString.Short.Internal 12 | - ignore: 13 | name: Avoid lambda 14 | within: 15 | - Data.ByteString.Builder.Internal 16 | - Data.ByteString.Builder.Prim 17 | - ignore: 18 | name: Reduce duplication 19 | within: 20 | - Data.ByteString 21 | - ignore: 22 | name: Redundant lambda 23 | within: 24 | - Data.ByteString.Builder.Internal 25 | - Data.ByteString 26 | -------------------------------------------------------------------------------- /Data/ByteString/Builder/ASCII.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_HADDOCK not-home #-} 2 | 3 | -- | Copyright : (c) 2010 - 2011 Simon Meier 4 | -- License : BSD3-style (see LICENSE) 5 | -- 6 | -- Maintainer : Simon Meier 7 | -- Portability : GHC 8 | -- 9 | -- Constructing 'Builder's using ASCII-based encodings. 10 | -- 11 | module Data.ByteString.Builder.ASCII 12 | ( 13 | -- ** Formatting numbers as text 14 | -- | Formatting of numbers as ASCII text. 15 | -- 16 | -- Note that you can also use these functions for the ISO/IEC 8859-1 and 17 | -- UTF-8 encodings, as the ASCII encoding is equivalent on the 18 | -- codepoints 0-127. 19 | 20 | -- *** Decimal numbers 21 | -- | Decimal encoding of numbers using ASCII encoded characters. 22 | int8Dec 23 | , int16Dec 24 | , int32Dec 25 | , int64Dec 26 | , intDec 27 | , integerDec 28 | 29 | , word8Dec 30 | , word16Dec 31 | , word32Dec 32 | , word64Dec 33 | , wordDec 34 | 35 | , floatDec 36 | , doubleDec 37 | 38 | -- *** Hexadecimal numbers 39 | 40 | -- | Encoding positive integers as hexadecimal numbers using lower-case 41 | -- ASCII characters. The shortest 42 | -- possible representation is used. For example, 43 | -- 44 | -- >>> toLazyByteString (word16Hex 0x0a10) 45 | -- Chunk "a10" Empty 46 | -- 47 | -- Note that there is no support for using upper-case characters. Please 48 | -- contact the maintainer, if your application cannot work without 49 | -- hexadecimal encodings that use upper-case characters. 50 | -- 51 | , word8Hex 52 | , word16Hex 53 | , word32Hex 54 | , word64Hex 55 | , wordHex 56 | 57 | -- *** Fixed-width hexadecimal numbers 58 | -- 59 | , int8HexFixed 60 | , int16HexFixed 61 | , int32HexFixed 62 | , int64HexFixed 63 | , word8HexFixed 64 | , word16HexFixed 65 | , word32HexFixed 66 | , word64HexFixed 67 | 68 | , floatHexFixed 69 | , doubleHexFixed 70 | 71 | , byteStringHex 72 | , lazyByteStringHex 73 | 74 | ) where 75 | 76 | import Data.ByteString as S 77 | import Data.ByteString.Lazy as L 78 | import Data.ByteString.Builder.Internal (Builder) 79 | import qualified Data.ByteString.Builder.Prim as P 80 | import qualified Data.ByteString.Builder.Prim.Internal as P 81 | import Data.ByteString.Builder.RealFloat (floatDec, doubleDec) 82 | import Data.ByteString.Internal.Type (c_uint32_dec_padded9, c_uint64_dec_padded18) 83 | 84 | import Foreign 85 | import Data.List.NonEmpty (NonEmpty(..)) 86 | 87 | ------------------------------------------------------------------------------ 88 | -- Decimal Encoding 89 | ------------------------------------------------------------------------------ 90 | 91 | -- Signed integers 92 | ------------------ 93 | 94 | -- | Decimal encoding of an 'Int8' using the ASCII digits. 95 | -- 96 | -- e.g. 97 | -- 98 | -- > toLazyByteString (int8Dec 42) = "42" 99 | -- > toLazyByteString (int8Dec (-1)) = "-1" 100 | -- 101 | {-# INLINE int8Dec #-} 102 | int8Dec :: Int8 -> Builder 103 | int8Dec = P.primBounded P.int8Dec 104 | 105 | -- | Decimal encoding of an 'Int16' using the ASCII digits. 106 | {-# INLINE int16Dec #-} 107 | int16Dec :: Int16 -> Builder 108 | int16Dec = P.primBounded P.int16Dec 109 | 110 | -- | Decimal encoding of an 'Int32' using the ASCII digits. 111 | {-# INLINE int32Dec #-} 112 | int32Dec :: Int32 -> Builder 113 | int32Dec = P.primBounded P.int32Dec 114 | 115 | -- | Decimal encoding of an 'Int64' using the ASCII digits. 116 | {-# INLINE int64Dec #-} 117 | int64Dec :: Int64 -> Builder 118 | int64Dec = P.primBounded P.int64Dec 119 | 120 | -- | Decimal encoding of an 'Int' using the ASCII digits. 121 | {-# INLINE intDec #-} 122 | intDec :: Int -> Builder 123 | intDec = P.primBounded P.intDec 124 | 125 | 126 | -- Unsigned integers 127 | -------------------- 128 | 129 | -- | Decimal encoding of a 'Word8' using the ASCII digits. 130 | {-# INLINE word8Dec #-} 131 | word8Dec :: Word8 -> Builder 132 | word8Dec = P.primBounded P.word8Dec 133 | 134 | -- | Decimal encoding of a 'Word16' using the ASCII digits. 135 | {-# INLINE word16Dec #-} 136 | word16Dec :: Word16 -> Builder 137 | word16Dec = P.primBounded P.word16Dec 138 | 139 | -- | Decimal encoding of a 'Word32' using the ASCII digits. 140 | {-# INLINE word32Dec #-} 141 | word32Dec :: Word32 -> Builder 142 | word32Dec = P.primBounded P.word32Dec 143 | 144 | -- | Decimal encoding of a 'Word64' using the ASCII digits. 145 | {-# INLINE word64Dec #-} 146 | word64Dec :: Word64 -> Builder 147 | word64Dec = P.primBounded P.word64Dec 148 | 149 | -- | Decimal encoding of a 'Word' using the ASCII digits. 150 | {-# INLINE wordDec #-} 151 | wordDec :: Word -> Builder 152 | wordDec = P.primBounded P.wordDec 153 | 154 | 155 | ------------------------------------------------------------------------------ 156 | -- Hexadecimal Encoding 157 | ------------------------------------------------------------------------------ 158 | 159 | -- without lead 160 | --------------- 161 | 162 | -- | Shortest hexadecimal encoding of a 'Word8' using lower-case characters. 163 | {-# INLINE word8Hex #-} 164 | word8Hex :: Word8 -> Builder 165 | word8Hex = P.primBounded P.word8Hex 166 | 167 | -- | Shortest hexadecimal encoding of a 'Word16' using lower-case characters. 168 | {-# INLINE word16Hex #-} 169 | word16Hex :: Word16 -> Builder 170 | word16Hex = P.primBounded P.word16Hex 171 | 172 | -- | Shortest hexadecimal encoding of a 'Word32' using lower-case characters. 173 | {-# INLINE word32Hex #-} 174 | word32Hex :: Word32 -> Builder 175 | word32Hex = P.primBounded P.word32Hex 176 | 177 | -- | Shortest hexadecimal encoding of a 'Word64' using lower-case characters. 178 | {-# INLINE word64Hex #-} 179 | word64Hex :: Word64 -> Builder 180 | word64Hex = P.primBounded P.word64Hex 181 | 182 | -- | Shortest hexadecimal encoding of a 'Word' using lower-case characters. 183 | {-# INLINE wordHex #-} 184 | wordHex :: Word -> Builder 185 | wordHex = P.primBounded P.wordHex 186 | 187 | 188 | -- fixed width; leading zeroes 189 | ------------------------------ 190 | 191 | -- | Encode a 'Int8' using 2 nibbles (hexadecimal digits). 192 | {-# INLINE int8HexFixed #-} 193 | int8HexFixed :: Int8 -> Builder 194 | int8HexFixed = P.primFixed P.int8HexFixed 195 | 196 | -- | Encode a 'Int16' using 4 nibbles. 197 | {-# INLINE int16HexFixed #-} 198 | int16HexFixed :: Int16 -> Builder 199 | int16HexFixed = P.primFixed P.int16HexFixed 200 | 201 | -- | Encode a 'Int32' using 8 nibbles. 202 | {-# INLINE int32HexFixed #-} 203 | int32HexFixed :: Int32 -> Builder 204 | int32HexFixed = P.primFixed P.int32HexFixed 205 | 206 | -- | Encode a 'Int64' using 16 nibbles. 207 | {-# INLINE int64HexFixed #-} 208 | int64HexFixed :: Int64 -> Builder 209 | int64HexFixed = P.primFixed P.int64HexFixed 210 | 211 | -- | Encode a 'Word8' using 2 nibbles (hexadecimal digits). 212 | {-# INLINE word8HexFixed #-} 213 | word8HexFixed :: Word8 -> Builder 214 | word8HexFixed = P.primFixed P.word8HexFixed 215 | 216 | -- | Encode a 'Word16' using 4 nibbles. 217 | {-# INLINE word16HexFixed #-} 218 | word16HexFixed :: Word16 -> Builder 219 | word16HexFixed = P.primFixed P.word16HexFixed 220 | 221 | -- | Encode a 'Word32' using 8 nibbles. 222 | {-# INLINE word32HexFixed #-} 223 | word32HexFixed :: Word32 -> Builder 224 | word32HexFixed = P.primFixed P.word32HexFixed 225 | 226 | -- | Encode a 'Word64' using 16 nibbles. 227 | {-# INLINE word64HexFixed #-} 228 | word64HexFixed :: Word64 -> Builder 229 | word64HexFixed = P.primFixed P.word64HexFixed 230 | 231 | -- | Encode an IEEE 'Float' using 8 nibbles. 232 | {-# INLINE floatHexFixed #-} 233 | floatHexFixed :: Float -> Builder 234 | floatHexFixed = P.primFixed P.floatHexFixed 235 | 236 | -- | Encode an IEEE 'Double' using 16 nibbles. 237 | {-# INLINE doubleHexFixed #-} 238 | doubleHexFixed :: Double -> Builder 239 | doubleHexFixed = P.primFixed P.doubleHexFixed 240 | 241 | -- | Encode each byte of a 'S.StrictByteString' using its fixed-width hex encoding. 242 | {-# NOINLINE byteStringHex #-} -- share code 243 | byteStringHex :: S.StrictByteString -> Builder 244 | byteStringHex = P.primMapByteStringFixed P.word8HexFixed 245 | 246 | -- | Encode each byte of a 'L.LazyByteString' using its fixed-width hex encoding. 247 | {-# NOINLINE lazyByteStringHex #-} -- share code 248 | lazyByteStringHex :: L.LazyByteString -> Builder 249 | lazyByteStringHex = P.primMapLazyByteStringFixed P.word8HexFixed 250 | 251 | 252 | ------------------------------------------------------------------------------ 253 | -- Fast decimal 'Integer' encoding. 254 | ------------------------------------------------------------------------------ 255 | 256 | -- An optimized version of the integer serialization code 257 | -- in blaze-textual (c) 2011 MailRank, Inc. Bryan O'Sullivan 258 | -- . It is 2.5x faster on Int-sized integers and 4.5x faster 259 | -- on larger integers. 260 | 261 | -- | Maximal power of 10 fitting into an 'Int' without using the MSB. 262 | -- 10 ^ 9 for 32 bit ints (31 * log 2 / log 10 = 9.33) 263 | -- 10 ^ 18 for 64 bit ints (63 * log 2 / log 10 = 18.96) 264 | -- 265 | -- FIXME: Think about also using the MSB. For 64 bit 'Int's this makes a 266 | -- difference. 267 | maxPow10 :: Integer 268 | maxPow10 = toInteger $ (10 :: Int) ^ P.caseWordSize_32_64 (9 :: Int) 18 269 | 270 | -- | Decimal encoding of an 'Integer' using the ASCII digits. 271 | integerDec :: Integer -> Builder 272 | integerDec i 273 | | i' <- fromInteger i, toInteger i' == i = intDec i' 274 | | i < 0 = P.primFixed P.char8 '-' `mappend` go (-i) 275 | | otherwise = go i 276 | where 277 | go :: Integer -> Builder 278 | go n = case putH (splitf (maxPow10 * maxPow10) n) of 279 | x:|xs -> intDec x `mappend` P.primMapListBounded intDecPadded xs 280 | 281 | splitf :: Integer -> Integer -> NonEmpty Integer 282 | splitf pow10 n0 283 | | pow10 > n0 = n0 :| [] 284 | | otherwise = splith (splitf (pow10 * pow10) n0) 285 | where 286 | splith (n:|ns) = 287 | case n `quotRem` pow10 of 288 | (q,r) | q > 0 -> q :| r : splitb ns 289 | | otherwise -> r :| splitb ns 290 | 291 | splitb [] = [] 292 | splitb (n:ns) = case n `quotRem` pow10 of 293 | (q,r) -> q : r : splitb ns 294 | 295 | putH :: NonEmpty Integer -> NonEmpty Int 296 | putH (n:|ns) = case n `quotRem` maxPow10 of 297 | (x,y) 298 | | q > 0 -> q :| r : putB ns 299 | | otherwise -> r :| putB ns 300 | where q = fromInteger x 301 | r = fromInteger y 302 | 303 | putB :: [Integer] -> [Int] 304 | putB [] = [] 305 | putB (n:ns) = case n `quotRem` maxPow10 of 306 | (q,r) -> fromInteger q : fromInteger r : putB ns 307 | 308 | 309 | {-# INLINE intDecPadded #-} 310 | intDecPadded :: P.BoundedPrim Int 311 | intDecPadded = P.liftFixedToBounded $ P.caseWordSize_32_64 312 | (P.fixedPrim 9 $ c_uint32_dec_padded9 . fromIntegral) 313 | (P.fixedPrim 18 $ c_uint64_dec_padded18 . fromIntegral) 314 | -------------------------------------------------------------------------------- /Data/ByteString/Builder/Extra.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Trustworthy #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | Copyright : (c) 2010 Jasper Van der Jeugt 5 | -- (c) 2010-2011 Simon Meier 6 | -- License : BSD3-style (see LICENSE) 7 | -- 8 | -- Maintainer : Simon Meier 9 | -- Portability : GHC 10 | -- 11 | -- Extra functions for creating and executing 'Builder's. They are intended 12 | -- for application-specific fine-tuning the performance of 'Builder's. 13 | -- 14 | ----------------------------------------------------------------------------- 15 | module Data.ByteString.Builder.Extra 16 | ( 17 | -- * Execution strategies 18 | toLazyByteStringWith 19 | , AllocationStrategy 20 | , safeStrategy 21 | , untrimmedStrategy 22 | , smallChunkSize 23 | , defaultChunkSize 24 | 25 | -- * Controlling chunk boundaries 26 | , byteStringCopy 27 | , byteStringInsert 28 | , byteStringThreshold 29 | 30 | , lazyByteStringCopy 31 | , lazyByteStringInsert 32 | , lazyByteStringThreshold 33 | 34 | , flush 35 | 36 | -- * Low level execution 37 | , BufferWriter 38 | , Next(..) 39 | , runBuilder 40 | 41 | -- * Host-specific binary encodings 42 | , intHost 43 | , int16Host 44 | , int32Host 45 | , int64Host 46 | 47 | , wordHost 48 | , word16Host 49 | , word32Host 50 | , word64Host 51 | 52 | , floatHost 53 | , doubleHost 54 | 55 | ) where 56 | 57 | 58 | import Data.ByteString.Builder.Internal 59 | ( Builder, toLazyByteStringWith 60 | , AllocationStrategy, safeStrategy, untrimmedStrategy 61 | , smallChunkSize, defaultChunkSize, flush 62 | , byteStringCopy, byteStringInsert, byteStringThreshold 63 | , lazyByteStringCopy, lazyByteStringInsert, lazyByteStringThreshold ) 64 | 65 | import qualified Data.ByteString.Builder.Internal as I 66 | import qualified Data.ByteString.Builder.Prim as P 67 | import qualified Data.ByteString.Internal as S 68 | 69 | import Foreign 70 | 71 | ------------------------------------------------------------------------------ 72 | -- Builder execution public API 73 | ------------------------------------------------------------------------------ 74 | 75 | -- | A 'BufferWriter' represents the result of running a 'Builder'. 76 | -- It unfolds as a sequence of chunks of data. These chunks come in two forms: 77 | -- 78 | -- * an IO action for writing the Builder's data into a user-supplied memory 79 | -- buffer. 80 | -- 81 | -- * a pre-existing chunks of data represented by a 'S.StrictByteString' 82 | -- 83 | -- While this is rather low level, it provides you with full flexibility in 84 | -- how the data is written out. 85 | -- 86 | -- The 'BufferWriter' itself is an IO action: you supply it with a buffer 87 | -- (as a pointer and length) and it will write data into the buffer. 88 | -- It returns a number indicating how many bytes were actually written 89 | -- (which can be @0@). It also returns a 'Next' which describes what 90 | -- comes next. 91 | -- 92 | type BufferWriter = Ptr Word8 -> Int -> IO (Int, Next) 93 | 94 | -- | After running a 'BufferWriter' action there are three possibilities for 95 | -- what comes next: 96 | -- 97 | data Next = 98 | -- | This means we're all done. All the builder data has now been written. 99 | Done 100 | 101 | -- | This indicates that there may be more data to write. It 102 | -- gives you the next 'BufferWriter' action. You should call that action 103 | -- with an appropriate buffer. The int indicates the /minimum/ buffer size 104 | -- required by the next 'BufferWriter' action. That is, if you call the next 105 | -- action you /must/ supply it with a buffer length of at least this size. 106 | | More !Int BufferWriter 107 | 108 | -- | In addition to the data that has just been written into your buffer 109 | -- by the 'BufferWriter' action, it gives you a pre-existing chunk 110 | -- of data as a 'S.StrictByteString'. It also gives you the following 'BufferWriter' 111 | -- action. It is safe to run this following action using a buffer with as 112 | -- much free space as was left by the previous run action. 113 | | Chunk !S.StrictByteString BufferWriter 114 | 115 | -- | Turn a 'Builder' into its initial 'BufferWriter' action. 116 | -- 117 | runBuilder :: Builder -> BufferWriter 118 | runBuilder = run . I.runBuilder 119 | where 120 | bytesWritten startPtr endPtr = endPtr `minusPtr` startPtr 121 | 122 | run :: I.BuildStep () -> BufferWriter 123 | run step = \buf len -> 124 | let doneH endPtr () = 125 | let !wc = bytesWritten buf endPtr 126 | next = Done 127 | in return (wc, next) 128 | 129 | bufferFullH endPtr minReq step' = 130 | let !wc = bytesWritten buf endPtr 131 | next = More minReq (run step') 132 | in return (wc, next) 133 | 134 | insertChunkH endPtr bs step' = 135 | let !wc = bytesWritten buf endPtr 136 | next = Chunk bs (run step') 137 | in return (wc, next) 138 | 139 | br = I.BufferRange buf (buf `plusPtr` len) 140 | 141 | in I.fillWithBuildStep step doneH bufferFullH insertChunkH br 142 | 143 | 144 | 145 | ------------------------------------------------------------------------------ 146 | -- Host-specific encodings 147 | ------------------------------------------------------------------------------ 148 | 149 | -- | Encode a single native machine 'Int'. The 'Int' is encoded in host order, 150 | -- host endian form, for the machine you're on. On a 64 bit machine the 'Int' 151 | -- is an 8 byte value, on a 32 bit machine, 4 bytes. Values encoded this way 152 | -- are not portable to different endian or int sized machines, without 153 | -- conversion. 154 | -- 155 | {-# INLINE intHost #-} 156 | intHost :: Int -> Builder 157 | intHost = P.primFixed P.intHost 158 | 159 | -- | Encode a 'Int16' in native host order and host endianness. 160 | {-# INLINE int16Host #-} 161 | int16Host :: Int16 -> Builder 162 | int16Host = P.primFixed P.int16Host 163 | 164 | -- | Encode a 'Int32' in native host order and host endianness. 165 | {-# INLINE int32Host #-} 166 | int32Host :: Int32 -> Builder 167 | int32Host = P.primFixed P.int32Host 168 | 169 | -- | Encode a 'Int64' in native host order and host endianness. 170 | {-# INLINE int64Host #-} 171 | int64Host :: Int64 -> Builder 172 | int64Host = P.primFixed P.int64Host 173 | 174 | -- | Encode a single native machine 'Word'. The 'Word' is encoded in host order, 175 | -- host endian form, for the machine you're on. On a 64 bit machine the 'Word' 176 | -- is an 8 byte value, on a 32 bit machine, 4 bytes. Values encoded this way 177 | -- are not portable to different endian or word sized machines, without 178 | -- conversion. 179 | -- 180 | {-# INLINE wordHost #-} 181 | wordHost :: Word -> Builder 182 | wordHost = P.primFixed P.wordHost 183 | 184 | -- | Encode a 'Word16' in native host order and host endianness. 185 | {-# INLINE word16Host #-} 186 | word16Host :: Word16 -> Builder 187 | word16Host = P.primFixed P.word16Host 188 | 189 | -- | Encode a 'Word32' in native host order and host endianness. 190 | {-# INLINE word32Host #-} 191 | word32Host :: Word32 -> Builder 192 | word32Host = P.primFixed P.word32Host 193 | 194 | -- | Encode a 'Word64' in native host order and host endianness. 195 | {-# INLINE word64Host #-} 196 | word64Host :: Word64 -> Builder 197 | word64Host = P.primFixed P.word64Host 198 | 199 | -- | Encode a 'Float' in native host order. Values encoded this way are not 200 | -- portable to different endian machines, without conversion. 201 | {-# INLINE floatHost #-} 202 | floatHost :: Float -> Builder 203 | floatHost = P.primFixed P.floatHost 204 | 205 | -- | Encode a 'Double' in native host order. 206 | {-# INLINE doubleHost #-} 207 | doubleHost :: Double -> Builder 208 | doubleHost = P.primFixed P.doubleHost 209 | 210 | -------------------------------------------------------------------------------- /Data/ByteString/Builder/Prim/ASCII.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeFamilies #-} 2 | 3 | -- | Copyright : (c) 2010 Jasper Van der Jeugt 4 | -- (c) 2010 - 2011 Simon Meier 5 | -- License : BSD3-style (see LICENSE) 6 | -- 7 | -- Maintainer : Simon Meier 8 | -- Portability : GHC 9 | -- 10 | -- Encodings using ASCII encoded Unicode characters. 11 | -- 12 | module Data.ByteString.Builder.Prim.ASCII 13 | ( 14 | 15 | -- *** ASCII 16 | char7 17 | 18 | -- **** Decimal numbers 19 | -- | Decimal encoding of numbers using ASCII encoded characters. 20 | , int8Dec 21 | , int16Dec 22 | , int32Dec 23 | , int64Dec 24 | , intDec 25 | 26 | , word8Dec 27 | , word16Dec 28 | , word32Dec 29 | , word64Dec 30 | , wordDec 31 | 32 | {- 33 | -- These are the functions currently provided by Bryan O'Sullivans 34 | -- double-conversion library. 35 | -- 36 | -- , float 37 | -- , floatWith 38 | -- , double 39 | -- , doubleWith 40 | -} 41 | 42 | -- **** Hexadecimal numbers 43 | 44 | -- | Encoding positive integers as hexadecimal numbers using lower-case 45 | -- ASCII characters. The shortest possible representation is used. For 46 | -- example, 47 | -- 48 | -- > toLazyByteString (primBounded word16Hex 0x0a10) = "a10" 49 | -- 50 | -- Note that there is no support for using upper-case characters. Please 51 | -- contact the maintainer if your application cannot work without 52 | -- hexadecimal encodings that use upper-case characters. 53 | -- 54 | , word8Hex 55 | , word16Hex 56 | , word32Hex 57 | , word64Hex 58 | , wordHex 59 | 60 | -- **** Fixed-width hexadecimal numbers 61 | -- 62 | -- | Encoding the bytes of fixed-width types as hexadecimal 63 | -- numbers using lower-case ASCII characters. For example, 64 | -- 65 | -- > toLazyByteString (primFixed word16HexFixed 0x0a10) = "0a10" 66 | -- 67 | , int8HexFixed 68 | , int16HexFixed 69 | , int32HexFixed 70 | , int64HexFixed 71 | , word8HexFixed 72 | , word16HexFixed 73 | , word32HexFixed 74 | , word64HexFixed 75 | , floatHexFixed 76 | , doubleHexFixed 77 | 78 | ) where 79 | 80 | import Data.ByteString.Internal.Type 81 | import Data.ByteString.Builder.Prim.Binary 82 | import Data.ByteString.Builder.Prim.Internal 83 | import Data.ByteString.Builder.Prim.Internal.Floating 84 | import Data.ByteString.Builder.Prim.Internal.Base16 85 | import Data.ByteString.Utils.UnalignedAccess 86 | 87 | import Data.Char (ord) 88 | 89 | import Foreign 90 | 91 | -- | Encode the least 7-bits of a 'Char' using the ASCII encoding. 92 | {-# INLINE char7 #-} 93 | char7 :: FixedPrim Char 94 | char7 = (\c -> fromIntegral $ ord c .&. 0x7f) >$< word8 95 | 96 | 97 | ------------------------------------------------------------------------------ 98 | -- Decimal Encoding 99 | ------------------------------------------------------------------------------ 100 | 101 | -- Signed integers 102 | ------------------ 103 | 104 | type family CorrespondingUnsigned s where 105 | CorrespondingUnsigned Int8 = Word8 106 | CorrespondingUnsigned Int16 = Word16 107 | CorrespondingUnsigned Int32 = Word32 108 | CorrespondingUnsigned Int = Word 109 | CorrespondingUnsigned Int64 = Word64 110 | 111 | {-# INLINE encodeSignedViaUnsigned #-} 112 | encodeSignedViaUnsigned :: 113 | forall s. 114 | (Integral s, Num (CorrespondingUnsigned s)) => 115 | Int -> (BoundedPrim (CorrespondingUnsigned s)) -> BoundedPrim s 116 | encodeSignedViaUnsigned bound writeUnsigned = boundedPrim bound $ \sval ptr -> 117 | if sval < 0 then do 118 | poke ptr (c2w '-') 119 | runB writeUnsigned (makeUnsigned (negate sval)) (ptr `plusPtr` 1) 120 | -- This call to 'negate' may overflow if `sval == minBound`. 121 | -- But since we insist that the unsigned type has the same width, 122 | -- this causes no trouble. 123 | else do 124 | runB writeUnsigned (makeUnsigned sval) ptr 125 | where 126 | makeUnsigned = fromIntegral @s @(CorrespondingUnsigned s) 127 | 128 | -- | Decimal encoding of an 'Int8'. 129 | {-# INLINE int8Dec #-} 130 | int8Dec :: BoundedPrim Int8 131 | int8Dec = encodeSignedViaUnsigned 4 word8Dec 132 | 133 | -- | Decimal encoding of an 'Int16'. 134 | {-# INLINE int16Dec #-} 135 | int16Dec :: BoundedPrim Int16 136 | int16Dec = encodeSignedViaUnsigned 6 word16Dec 137 | 138 | 139 | -- | Decimal encoding of an 'Int32'. 140 | {-# INLINE int32Dec #-} 141 | int32Dec :: BoundedPrim Int32 142 | int32Dec = encodeSignedViaUnsigned 11 word32Dec 143 | 144 | -- | Decimal encoding of an 'Int64'. 145 | {-# INLINE int64Dec #-} 146 | int64Dec :: BoundedPrim Int64 147 | int64Dec = encodeSignedViaUnsigned 20 word64Dec 148 | 149 | -- | Decimal encoding of an 'Int'. 150 | {-# INLINE intDec #-} 151 | intDec :: BoundedPrim Int 152 | intDec = caseWordSize_32_64 153 | (fromIntegral >$< int32Dec) 154 | (fromIntegral >$< int64Dec) 155 | 156 | 157 | -- Unsigned integers 158 | -------------------- 159 | 160 | {-# INLINE encodeWord32Decimal #-} 161 | encodeWord32Decimal :: Integral a => Int -> BoundedPrim a 162 | encodeWord32Decimal bound = boundedPrim bound $ c_uint32_dec . fromIntegral 163 | 164 | -- | Decimal encoding of a 'Word8'. 165 | {-# INLINE word8Dec #-} 166 | word8Dec :: BoundedPrim Word8 167 | word8Dec = encodeWord32Decimal 3 168 | 169 | -- | Decimal encoding of a 'Word16'. 170 | {-# INLINE word16Dec #-} 171 | word16Dec :: BoundedPrim Word16 172 | word16Dec = encodeWord32Decimal 5 173 | 174 | -- | Decimal encoding of a 'Word32'. 175 | {-# INLINE word32Dec #-} 176 | word32Dec :: BoundedPrim Word32 177 | word32Dec = encodeWord32Decimal 10 178 | 179 | -- | Decimal encoding of a 'Word64'. 180 | {-# INLINE word64Dec #-} 181 | word64Dec :: BoundedPrim Word64 182 | word64Dec = boundedPrim 20 c_uint64_dec 183 | 184 | -- | Decimal encoding of a 'Word'. 185 | {-# INLINE wordDec #-} 186 | wordDec :: BoundedPrim Word 187 | wordDec = caseWordSize_32_64 188 | (fromIntegral >$< word32Dec) 189 | (fromIntegral >$< word64Dec) 190 | 191 | ------------------------------------------------------------------------------ 192 | -- Hexadecimal Encoding 193 | ------------------------------------------------------------------------------ 194 | 195 | -- without lead 196 | --------------- 197 | 198 | {-# INLINE encodeWord32Hex #-} 199 | encodeWord32Hex :: forall a. (Storable a, Integral a) => BoundedPrim a 200 | encodeWord32Hex = 201 | boundedPrim (2 * sizeOf @a undefined) $ c_uint32_hex . fromIntegral 202 | 203 | -- | Hexadecimal encoding of a 'Word8'. 204 | {-# INLINE word8Hex #-} 205 | word8Hex :: BoundedPrim Word8 206 | word8Hex = encodeWord32Hex 207 | 208 | -- | Hexadecimal encoding of a 'Word16'. 209 | {-# INLINE word16Hex #-} 210 | word16Hex :: BoundedPrim Word16 211 | word16Hex = encodeWord32Hex 212 | 213 | -- | Hexadecimal encoding of a 'Word32'. 214 | {-# INLINE word32Hex #-} 215 | word32Hex :: BoundedPrim Word32 216 | word32Hex = encodeWord32Hex 217 | 218 | -- | Hexadecimal encoding of a 'Word64'. 219 | {-# INLINE word64Hex #-} 220 | word64Hex :: BoundedPrim Word64 221 | word64Hex = boundedPrim 16 c_uint64_hex 222 | 223 | -- | Hexadecimal encoding of a 'Word'. 224 | {-# INLINE wordHex #-} 225 | wordHex :: BoundedPrim Word 226 | wordHex = caseWordSize_32_64 227 | (fromIntegral >$< word32Hex) 228 | (fromIntegral >$< word64Hex) 229 | 230 | 231 | -- fixed width; leading zeroes 232 | ------------------------------ 233 | 234 | -- | Encode a 'Word8' using 2 nibbles (hexadecimal digits). 235 | {-# INLINE word8HexFixed #-} 236 | word8HexFixed :: FixedPrim Word8 237 | word8HexFixed = fixedPrim 2 $ \x op -> do 238 | enc <- encode8_as_16h lowerTable x 239 | unalignedWriteU16 enc op 240 | 241 | -- | Encode a 'Word16' using 4 nibbles. 242 | {-# INLINE word16HexFixed #-} 243 | word16HexFixed :: FixedPrim Word16 244 | word16HexFixed = 245 | (\x -> (fromIntegral $ x `shiftR` 8, fromIntegral x)) 246 | >$< pairF word8HexFixed word8HexFixed 247 | 248 | -- | Encode a 'Word32' using 8 nibbles. 249 | {-# INLINE word32HexFixed #-} 250 | word32HexFixed :: FixedPrim Word32 251 | word32HexFixed = 252 | (\x -> (fromIntegral $ x `shiftR` 16, fromIntegral x)) 253 | >$< pairF word16HexFixed word16HexFixed 254 | 255 | -- | Encode a 'Word64' using 16 nibbles. 256 | {-# INLINE word64HexFixed #-} 257 | word64HexFixed :: FixedPrim Word64 258 | word64HexFixed = 259 | (\x -> (fromIntegral $ x `shiftR` 32, fromIntegral x)) 260 | >$< pairF word32HexFixed word32HexFixed 261 | 262 | -- | Encode a 'Int8' using 2 nibbles (hexadecimal digits). 263 | {-# INLINE int8HexFixed #-} 264 | int8HexFixed :: FixedPrim Int8 265 | int8HexFixed = fromIntegral >$< word8HexFixed 266 | 267 | -- | Encode a 'Int16' using 4 nibbles. 268 | {-# INLINE int16HexFixed #-} 269 | int16HexFixed :: FixedPrim Int16 270 | int16HexFixed = fromIntegral >$< word16HexFixed 271 | 272 | -- | Encode a 'Int32' using 8 nibbles. 273 | {-# INLINE int32HexFixed #-} 274 | int32HexFixed :: FixedPrim Int32 275 | int32HexFixed = fromIntegral >$< word32HexFixed 276 | 277 | -- | Encode a 'Int64' using 16 nibbles. 278 | {-# INLINE int64HexFixed #-} 279 | int64HexFixed :: FixedPrim Int64 280 | int64HexFixed = fromIntegral >$< word64HexFixed 281 | 282 | -- | Encode an IEEE 'Float' using 8 nibbles. 283 | {-# INLINE floatHexFixed #-} 284 | floatHexFixed :: FixedPrim Float 285 | floatHexFixed = encodeFloatViaWord32F word32HexFixed 286 | 287 | -- | Encode an IEEE 'Double' using 16 nibbles. 288 | {-# INLINE doubleHexFixed #-} 289 | doubleHexFixed :: FixedPrim Double 290 | doubleHexFixed = encodeDoubleViaWord64F word64HexFixed 291 | -------------------------------------------------------------------------------- /Data/ByteString/Builder/Prim/Binary.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Trustworthy #-} 2 | 3 | -- | Copyright : (c) 2010-2011 Simon Meier 4 | -- License : BSD3-style (see LICENSE) 5 | -- 6 | -- Maintainer : Simon Meier 7 | -- Portability : GHC 8 | -- 9 | module Data.ByteString.Builder.Prim.Binary ( 10 | 11 | -- ** Binary encodings 12 | int8 13 | , word8 14 | 15 | -- *** Big-endian 16 | , int16BE 17 | , int32BE 18 | , int64BE 19 | 20 | , word16BE 21 | , word32BE 22 | , word64BE 23 | 24 | , floatBE 25 | , doubleBE 26 | 27 | -- *** Little-endian 28 | , int16LE 29 | , int32LE 30 | , int64LE 31 | 32 | , word16LE 33 | , word32LE 34 | , word64LE 35 | 36 | , floatLE 37 | , doubleLE 38 | 39 | -- *** Non-portable, host-dependent 40 | , intHost 41 | , int16Host 42 | , int32Host 43 | , int64Host 44 | 45 | , wordHost 46 | , word16Host 47 | , word32Host 48 | , word64Host 49 | 50 | , floatHost 51 | , doubleHost 52 | 53 | ) where 54 | 55 | import Data.ByteString.Builder.Prim.Internal 56 | import Data.ByteString.Builder.Prim.Internal.Floating 57 | import Data.ByteString.Utils.ByteOrder 58 | import Data.ByteString.Utils.UnalignedAccess 59 | 60 | import Foreign 61 | 62 | ------------------------------------------------------------------------------ 63 | -- Binary encoding 64 | ------------------------------------------------------------------------------ 65 | 66 | -- Word encodings 67 | ----------------- 68 | 69 | -- | Encoding single unsigned bytes as-is. 70 | -- 71 | {-# INLINE word8 #-} 72 | word8 :: FixedPrim Word8 73 | word8 = fixedPrim 1 (flip poke) -- Word8 is always aligned 74 | 75 | -- 76 | -- We rely on the fromIntegral to do the right masking for us. 77 | -- The inlining here is critical, and can be worth 4x performance 78 | -- 79 | 80 | -- | Encoding 'Word16's in big endian format. 81 | {-# INLINE word16BE #-} 82 | word16BE :: FixedPrim Word16 83 | word16BE = whenLittleEndian byteSwap16 >$< word16Host 84 | 85 | -- | Encoding 'Word16's in little endian format. 86 | {-# INLINE word16LE #-} 87 | word16LE :: FixedPrim Word16 88 | word16LE = whenBigEndian byteSwap16 >$< word16Host 89 | 90 | -- | Encoding 'Word32's in big endian format. 91 | {-# INLINE word32BE #-} 92 | word32BE :: FixedPrim Word32 93 | word32BE = whenLittleEndian byteSwap32 >$< word32Host 94 | 95 | -- | Encoding 'Word32's in little endian format. 96 | {-# INLINE word32LE #-} 97 | word32LE :: FixedPrim Word32 98 | word32LE = whenBigEndian byteSwap32 >$< word32Host 99 | 100 | -- on a little endian machine: 101 | -- word32LE w32 = fixedPrim 4 (\w p -> poke (castPtr p) w32) 102 | 103 | -- | Encoding 'Word64's in big endian format. 104 | {-# INLINE word64BE #-} 105 | word64BE :: FixedPrim Word64 106 | word64BE = whenLittleEndian byteSwap64 >$< word64Host 107 | 108 | -- | Encoding 'Word64's in little endian format. 109 | {-# INLINE word64LE #-} 110 | word64LE :: FixedPrim Word64 111 | word64LE = whenBigEndian byteSwap64 >$< word64Host 112 | 113 | 114 | -- | Encode a single native machine 'Word'. The 'Word's is encoded in host order, 115 | -- host endian form, for the machine you are on. On a 64 bit machine the 'Word' 116 | -- is an 8 byte value, on a 32 bit machine, 4 bytes. Values encoded this way 117 | -- are not portable to different endian or word sized machines, without 118 | -- conversion. 119 | -- 120 | {-# INLINE wordHost #-} 121 | wordHost :: FixedPrim Word 122 | wordHost = case finiteBitSize (0 :: Word) of 123 | 32 -> fromIntegral @Word @Word32 >$< word32Host 124 | 64 -> fromIntegral @Word @Word64 >$< word64Host 125 | _ -> error "Data.ByteString.Builder.Prim.Binary.wordHost: unexpected word size" 126 | 127 | -- | Encoding 'Word16's in native host order and host endianness. 128 | {-# INLINE word16Host #-} 129 | word16Host :: FixedPrim Word16 130 | word16Host = fixedPrim 2 unalignedWriteU16 131 | 132 | -- | Encoding 'Word32's in native host order and host endianness. 133 | {-# INLINE word32Host #-} 134 | word32Host :: FixedPrim Word32 135 | word32Host = fixedPrim 4 unalignedWriteU32 136 | 137 | -- | Encoding 'Word64's in native host order and host endianness. 138 | {-# INLINE word64Host #-} 139 | word64Host :: FixedPrim Word64 140 | word64Host = fixedPrim 8 unalignedWriteU64 141 | 142 | ------------------------------------------------------------------------------ 143 | -- Int encodings 144 | ------------------------------------------------------------------------------ 145 | -- 146 | -- We rely on 'fromIntegral' to do a loss-less conversion to the corresponding 147 | -- 'Word' type 148 | -- 149 | ------------------------------------------------------------------------------ 150 | 151 | -- | Encoding single signed bytes as-is. 152 | -- 153 | {-# INLINE int8 #-} 154 | int8 :: FixedPrim Int8 155 | int8 = fromIntegral >$< word8 156 | 157 | -- | Encoding 'Int16's in big endian format. 158 | {-# INLINE int16BE #-} 159 | int16BE :: FixedPrim Int16 160 | int16BE = fromIntegral >$< word16BE 161 | 162 | -- | Encoding 'Int16's in little endian format. 163 | {-# INLINE int16LE #-} 164 | int16LE :: FixedPrim Int16 165 | int16LE = fromIntegral >$< word16LE 166 | 167 | -- | Encoding 'Int32's in big endian format. 168 | {-# INLINE int32BE #-} 169 | int32BE :: FixedPrim Int32 170 | int32BE = fromIntegral >$< word32BE 171 | 172 | -- | Encoding 'Int32's in little endian format. 173 | {-# INLINE int32LE #-} 174 | int32LE :: FixedPrim Int32 175 | int32LE = fromIntegral >$< word32LE 176 | 177 | -- | Encoding 'Int64's in big endian format. 178 | {-# INLINE int64BE #-} 179 | int64BE :: FixedPrim Int64 180 | int64BE = fromIntegral >$< word64BE 181 | 182 | -- | Encoding 'Int64's in little endian format. 183 | {-# INLINE int64LE #-} 184 | int64LE :: FixedPrim Int64 185 | int64LE = fromIntegral >$< word64LE 186 | 187 | 188 | -- | Encode a single native machine 'Int'. The 'Int's is encoded in host order, 189 | -- host endian form, for the machine you are on. On a 64 bit machine the 'Int' 190 | -- is an 8 byte value, on a 32 bit machine, 4 bytes. Values encoded this way 191 | -- are not portable to different endian or integer sized machines, without 192 | -- conversion. 193 | -- 194 | {-# INLINE intHost #-} 195 | intHost :: FixedPrim Int 196 | intHost = fromIntegral @Int @Word >$< wordHost 197 | 198 | -- | Encoding 'Int16's in native host order and host endianness. 199 | {-# INLINE int16Host #-} 200 | int16Host :: FixedPrim Int16 201 | int16Host = fromIntegral @Int16 @Word16 >$< word16Host 202 | 203 | -- | Encoding 'Int32's in native host order and host endianness. 204 | {-# INLINE int32Host #-} 205 | int32Host :: FixedPrim Int32 206 | int32Host = fromIntegral @Int32 @Word32 >$< word32Host 207 | 208 | -- | Encoding 'Int64's in native host order and host endianness. 209 | {-# INLINE int64Host #-} 210 | int64Host :: FixedPrim Int64 211 | int64Host = fromIntegral @Int64 @Word64 >$< word64Host 212 | 213 | -- IEEE Floating Point Numbers 214 | ------------------------------ 215 | 216 | -- | Encode a 'Float' in big endian format. 217 | {-# INLINE floatBE #-} 218 | floatBE :: FixedPrim Float 219 | floatBE = encodeFloatViaWord32F word32BE 220 | 221 | -- | Encode a 'Float' in little endian format. 222 | {-# INLINE floatLE #-} 223 | floatLE :: FixedPrim Float 224 | floatLE = encodeFloatViaWord32F word32LE 225 | 226 | -- | Encode a 'Double' in big endian format. 227 | {-# INLINE doubleBE #-} 228 | doubleBE :: FixedPrim Double 229 | doubleBE = encodeDoubleViaWord64F word64BE 230 | 231 | -- | Encode a 'Double' in little endian format. 232 | {-# INLINE doubleLE #-} 233 | doubleLE :: FixedPrim Double 234 | doubleLE = encodeDoubleViaWord64F word64LE 235 | 236 | 237 | -- | Encode a 'Float' in native host order and host endianness. Values written 238 | -- this way are not portable to different endian machines, without conversion. 239 | -- 240 | {-# INLINE floatHost #-} 241 | floatHost :: FixedPrim Float 242 | floatHost = fixedPrim (sizeOf @Float 0) unalignedWriteFloat 243 | 244 | -- | Encode a 'Double' in native host order and host endianness. 245 | {-# INLINE doubleHost #-} 246 | doubleHost :: FixedPrim Double 247 | doubleHost = fixedPrim (sizeOf @Double 0) unalignedWriteDouble 248 | -------------------------------------------------------------------------------- /Data/ByteString/Builder/Prim/Internal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE Unsafe #-} 3 | 4 | {-# OPTIONS_HADDOCK not-home #-} 5 | 6 | -- | 7 | -- Copyright : 2010-2011 Simon Meier, 2010 Jasper van der Jeugt 8 | -- License : BSD3-style (see LICENSE) 9 | -- 10 | -- Maintainer : Simon Meier 11 | -- Stability : unstable, private 12 | -- Portability : GHC 13 | -- 14 | -- *Warning:* this module is internal. If you find that you need it please 15 | -- contact the maintainers and explain what you are trying to do and discuss 16 | -- what you would need in the public API. It is important that you do this as 17 | -- the module may not be exposed at all in future releases. 18 | -- 19 | -- The maintainers are glad to accept patches for further 20 | -- standard encodings of standard Haskell values. 21 | -- 22 | -- If you need to write your own builder primitives, then be aware that you are 23 | -- writing code with /all safety belts off/; i.e., 24 | -- *this is the code that might make your application vulnerable to buffer-overflow attacks!* 25 | -- The "Data.ByteString.Builder.Prim.Tests" module provides you with 26 | -- utilities for testing your encodings thoroughly. 27 | -- 28 | module Data.ByteString.Builder.Prim.Internal ( 29 | -- * Fixed-size builder primitives 30 | Size 31 | , FixedPrim 32 | , fixedPrim 33 | , size 34 | , runF 35 | 36 | , emptyF 37 | , contramapF 38 | , pairF 39 | -- , liftIOF 40 | 41 | , storableToF 42 | 43 | -- * Bounded-size builder primitives 44 | , BoundedPrim 45 | , boundedPrim 46 | , sizeBound 47 | , runB 48 | 49 | , emptyB 50 | , contramapB 51 | , pairB 52 | , eitherB 53 | , condB 54 | 55 | -- , liftIOB 56 | 57 | , toB 58 | , liftFixedToBounded 59 | 60 | -- , withSizeFB 61 | -- , withSizeBB 62 | 63 | -- * Shared operators 64 | , (>$<) 65 | , (>*<) 66 | 67 | -- * Helpers 68 | , caseWordSize_32_64 69 | 70 | -- * Deprecated 71 | , boudedPrim 72 | ) where 73 | 74 | import Foreign 75 | import Prelude hiding (maxBound) 76 | 77 | #include "MachDeps.h" 78 | #include "bytestring-cpp-macros.h" 79 | 80 | ------------------------------------------------------------------------------ 81 | -- Supporting infrastructure 82 | ------------------------------------------------------------------------------ 83 | 84 | -- | Contravariant functors as in the @contravariant@ package. 85 | class Contravariant f where 86 | contramap :: (b -> a) -> f a -> f b 87 | 88 | infixl 4 >$< 89 | 90 | -- | A fmap-like operator for builder primitives, both bounded and fixed size. 91 | -- 92 | -- Builder primitives are contravariant so it's like the normal fmap, but 93 | -- backwards (look at the type). (If it helps to remember, the operator symbol 94 | -- is like (<$>) but backwards.) 95 | -- 96 | -- We can use it for example to prepend and/or append fixed values to an 97 | -- primitive. 98 | -- 99 | -- > import Data.ByteString.Builder.Prim as P 100 | -- >showEncoding ((\x -> ('\'', (x, '\''))) >$< fixed3) 'x' = "'x'" 101 | -- > where 102 | -- > fixed3 = P.char7 >*< P.char7 >*< P.char7 103 | -- 104 | -- Note that the rather verbose syntax for composition stems from the 105 | -- requirement to be able to compute the size / size bound at compile time. 106 | -- 107 | (>$<) :: Contravariant f => (b -> a) -> f a -> f b 108 | (>$<) = contramap 109 | 110 | 111 | instance Contravariant FixedPrim where 112 | contramap = contramapF 113 | 114 | instance Contravariant BoundedPrim where 115 | contramap = contramapB 116 | 117 | 118 | -- | Type-constructors supporting lifting of type-products. 119 | class Monoidal f where 120 | pair :: f a -> f b -> f (a, b) 121 | 122 | instance Monoidal FixedPrim where 123 | pair = pairF 124 | 125 | instance Monoidal BoundedPrim where 126 | pair = pairB 127 | 128 | infixr 5 >*< 129 | 130 | -- | A pairing/concatenation operator for builder primitives, both bounded and 131 | -- fixed size. 132 | -- 133 | -- For example, 134 | -- 135 | -- > toLazyByteString (primFixed (char7 >*< char7) ('x','y')) = "xy" 136 | -- 137 | -- We can combine multiple primitives using '>*<' multiple times. 138 | -- 139 | -- > toLazyByteString (primFixed (char7 >*< char7 >*< char7) ('x',('y','z'))) = "xyz" 140 | -- 141 | (>*<) :: Monoidal f => f a -> f b -> f (a, b) 142 | (>*<) = pair 143 | 144 | 145 | -- | The type used for sizes and sizeBounds of sizes. 146 | type Size = Int 147 | 148 | 149 | ------------------------------------------------------------------------------ 150 | -- Fixed-size builder primitives 151 | ------------------------------------------------------------------------------ 152 | 153 | -- | A builder primitive that always results in a sequence of bytes of a 154 | -- pre-determined, fixed size. 155 | data FixedPrim a = FP {-# UNPACK #-} !Int (a -> Ptr Word8 -> IO ()) 156 | 157 | fixedPrim :: Int -> (a -> Ptr Word8 -> IO ()) -> FixedPrim a 158 | fixedPrim = FP 159 | 160 | -- | The size of the sequences of bytes generated by this 'FixedPrim'. 161 | {-# INLINE CONLIKE size #-} 162 | size :: FixedPrim a -> Int 163 | size (FP l _) = l 164 | 165 | {-# INLINE CONLIKE runF #-} 166 | runF :: FixedPrim a -> a -> Ptr Word8 -> IO () 167 | runF (FP _ io) = io 168 | 169 | -- | The 'FixedPrim' that always results in the zero-length sequence. 170 | {-# INLINE CONLIKE emptyF #-} 171 | emptyF :: FixedPrim a 172 | emptyF = FP 0 (\_ _ -> return ()) 173 | 174 | -- | Encode a pair by encoding its first component and then its second component. 175 | {-# INLINE CONLIKE pairF #-} 176 | pairF :: FixedPrim a -> FixedPrim b -> FixedPrim (a, b) 177 | pairF (FP l1 io1) (FP l2 io2) = 178 | FP (l1 + l2) (\(x1,x2) op -> io1 x1 op >> io2 x2 (op `plusPtr` l1)) 179 | 180 | -- | Change a primitives such that it first applies a function to the value 181 | -- to be encoded. 182 | -- 183 | -- Note that primitives are 'Contravariant' 184 | -- . Hence, the following 185 | -- laws hold. 186 | -- 187 | -- >contramapF id = id 188 | -- >contramapF f . contramapF g = contramapF (g . f) 189 | {-# INLINE CONLIKE contramapF #-} 190 | contramapF :: (b -> a) -> FixedPrim a -> FixedPrim b 191 | contramapF f (FP l io) = FP l (io . f) 192 | 193 | -- | Convert a 'FixedPrim' to a 'BoundedPrim'. 194 | {-# INLINE CONLIKE toB #-} 195 | toB :: FixedPrim a -> BoundedPrim a 196 | toB (FP l io) = BP l (\x op -> io x op >> (return $! op `plusPtr` l)) 197 | 198 | -- | Lift a 'FixedPrim' to a 'BoundedPrim'. 199 | {-# INLINE CONLIKE liftFixedToBounded #-} 200 | liftFixedToBounded :: FixedPrim a -> BoundedPrim a 201 | liftFixedToBounded = toB 202 | 203 | {-# INLINE CONLIKE storableToF #-} 204 | {-# DEPRECATED storableToF 205 | "Deprecated since @bytestring-0.12.1.0@.\n\nThis function is dangerous in the presence of internal padding\nand makes naive assumptions about alignment.\n\n * For a primitive Haskell type like 'Int64', use the\n corresponding primitive like 'Data.ByteString.Builder.Prim.int64Host'.\n * For other types, it is recommended to manually write a small\n function that performs the necessary unaligned write\n and zeroes or removes any internal padding bits." 206 | #-} 207 | storableToF :: forall a. Storable a => FixedPrim a 208 | #if HS_UNALIGNED_POKES_OK 209 | storableToF = FP (sizeOf (undefined :: a)) (\x op -> poke (castPtr op) x) 210 | #else 211 | storableToF = FP (sizeOf (undefined :: a)) $ \x op -> 212 | if ptrToWordPtr op `mod` fromIntegral (alignment (undefined :: a)) == 0 then poke (castPtr op) x 213 | else with x $ \tp -> copyBytes op (castPtr tp) (sizeOf (undefined :: a)) 214 | #endif 215 | 216 | {- 217 | {-# INLINE CONLIKE liftIOF #-} 218 | liftIOF :: FixedPrim a -> FixedPrim (IO a) 219 | liftIOF (FP l io) = FP l (\xWrapped op -> do x <- xWrapped; io x op) 220 | -} 221 | 222 | ------------------------------------------------------------------------------ 223 | -- Bounded-size builder primitives 224 | ------------------------------------------------------------------------------ 225 | 226 | -- | A builder primitive that always results in sequence of bytes that is no longer 227 | -- than a pre-determined bound. 228 | data BoundedPrim a = BP {-# UNPACK #-} !Int (a -> Ptr Word8 -> IO (Ptr Word8)) 229 | 230 | -- | The bound on the size of sequences of bytes generated by this 'BoundedPrim'. 231 | {-# INLINE CONLIKE sizeBound #-} 232 | sizeBound :: BoundedPrim a -> Int 233 | sizeBound (BP b _) = b 234 | 235 | -- | @since 0.10.12.0 236 | boundedPrim :: Int -> (a -> Ptr Word8 -> IO (Ptr Word8)) -> BoundedPrim a 237 | boundedPrim = BP 238 | 239 | {-# DEPRECATED boudedPrim "Use 'boundedPrim' instead" #-} 240 | boudedPrim :: Int -> (a -> Ptr Word8 -> IO (Ptr Word8)) -> BoundedPrim a 241 | boudedPrim = BP 242 | 243 | {-# INLINE CONLIKE runB #-} 244 | runB :: BoundedPrim a -> a -> Ptr Word8 -> IO (Ptr Word8) 245 | runB (BP _ io) = io 246 | 247 | -- | Change a 'BoundedPrim' such that it first applies a function to the 248 | -- value to be encoded. 249 | -- 250 | -- Note that 'BoundedPrim's are 'Contravariant' 251 | -- . Hence, the following 252 | -- laws hold. 253 | -- 254 | -- >contramapB id = id 255 | -- >contramapB f . contramapB g = contramapB (g . f) 256 | {-# INLINE CONLIKE contramapB #-} 257 | contramapB :: (b -> a) -> BoundedPrim a -> BoundedPrim b 258 | contramapB f (BP b io) = BP b (io . f) 259 | 260 | -- | The 'BoundedPrim' that always results in the zero-length sequence. 261 | {-# INLINE CONLIKE emptyB #-} 262 | emptyB :: BoundedPrim a 263 | emptyB = BP 0 (\_ op -> return op) 264 | 265 | -- | Encode a pair by encoding its first component and then its second component. 266 | {-# INLINE CONLIKE pairB #-} 267 | pairB :: BoundedPrim a -> BoundedPrim b -> BoundedPrim (a, b) 268 | pairB (BP b1 io1) (BP b2 io2) = 269 | BP (b1 + b2) (\(x1,x2) op -> io1 x1 op >>= io2 x2) 270 | 271 | -- | Encode an 'Either' value using the first 'BoundedPrim' for 'Left' 272 | -- values and the second 'BoundedPrim' for 'Right' values. 273 | -- 274 | -- Note that the functions 'eitherB', 'pairB', and 'contramapB' (written below 275 | -- using '>$<') suffice to construct 'BoundedPrim's for all non-recursive 276 | -- algebraic datatypes. For example, 277 | -- 278 | -- @ 279 | --maybeB :: BoundedPrim () -> BoundedPrim a -> BoundedPrim (Maybe a) 280 | --maybeB nothing just = 'maybe' (Left ()) Right '>$<' eitherB nothing just 281 | -- @ 282 | {-# INLINE CONLIKE eitherB #-} 283 | eitherB :: BoundedPrim a -> BoundedPrim b -> BoundedPrim (Either a b) 284 | eitherB (BP b1 io1) (BP b2 io2) = 285 | BP (max b1 b2) 286 | (\x op -> case x of Left x1 -> io1 x1 op; Right x2 -> io2 x2 op) 287 | 288 | -- | Conditionally select a 'BoundedPrim'. 289 | -- For example, we can implement the ASCII primitive that drops characters with 290 | -- Unicode codepoints above 127 as follows. 291 | -- 292 | -- @ 293 | --charASCIIDrop = 'condB' (< \'\\128\') ('liftFixedToBounded' 'Data.ByteString.Builder.Prim.char7') 'emptyB' 294 | -- @ 295 | {-# INLINE CONLIKE condB #-} 296 | condB :: (a -> Bool) -> BoundedPrim a -> BoundedPrim a -> BoundedPrim a 297 | condB p be1 be2 = 298 | contramapB (\x -> if p x then Left x else Right x) (eitherB be1 be2) 299 | 300 | -- | Select an implementation depending on bitness. 301 | -- Throw a compile time error if bitness is neither 32 nor 64. 302 | {-# INLINE caseWordSize_32_64 #-} 303 | caseWordSize_32_64 304 | :: a -- Value for 32-bit architecture 305 | -> a -- Value for 64-bit architecture 306 | -> a 307 | #if WORD_SIZE_IN_BITS == 32 308 | caseWordSize_32_64 = const 309 | #endif 310 | #if WORD_SIZE_IN_BITS == 64 311 | caseWordSize_32_64 = const id 312 | #endif 313 | -------------------------------------------------------------------------------- /Data/ByteString/Builder/Prim/Internal/Base16.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | -- | 4 | -- Copyright : (c) 2011 Simon Meier 5 | -- License : BSD3-style (see LICENSE) 6 | -- 7 | -- Maintainer : Simon Meier 8 | -- Stability : experimental 9 | -- Portability : GHC 10 | -- 11 | -- Hexadecimal encoding of nibbles (4-bit) and octets (8-bit) as ASCII 12 | -- characters. 13 | -- 14 | -- The current implementation is based on a table based encoding inspired by 15 | -- the code in the 'base64-bytestring' library by Bryan O'Sullivan. In our 16 | -- benchmarks on a 32-bit machine it turned out to be the fastest 17 | -- implementation option. 18 | -- 19 | module Data.ByteString.Builder.Prim.Internal.Base16 ( 20 | EncodingTable 21 | , lowerTable 22 | , encode8_as_16h 23 | ) where 24 | 25 | import Foreign 26 | import GHC.Exts (Addr#, Ptr(..)) 27 | #if PURE_HASKELL 28 | import qualified Data.ByteString.Internal.Pure as Pure 29 | #else 30 | import Foreign.C.Types 31 | #endif 32 | 33 | -- Creating the encoding table 34 | ------------------------------ 35 | 36 | -- | An encoding table for Base16 encoding. 37 | data EncodingTable = EncodingTable Addr# 38 | 39 | -- | The encoding table for hexadecimal values with lower-case characters; 40 | -- e.g., deadbeef. 41 | lowerTable :: EncodingTable 42 | lowerTable = 43 | #if PURE_HASKELL 44 | case Pure.lower_hex_table of 45 | Ptr p# -> EncodingTable p# 46 | #else 47 | case c_lower_hex_table of 48 | Ptr p# -> EncodingTable p# 49 | 50 | foreign import ccall "&hs_bytestring_lower_hex_table" 51 | c_lower_hex_table :: Ptr CChar 52 | #endif 53 | 54 | -- | Encode an octet as 16bit word comprising both encoded nibbles ordered 55 | -- according to the host endianness. Writing these 16bit to memory will write 56 | -- the nibbles in the correct order (i.e. big-endian). 57 | {-# INLINE encode8_as_16h #-} 58 | encode8_as_16h :: EncodingTable -> Word8 -> IO Word16 59 | encode8_as_16h (EncodingTable table) = 60 | peekElemOff (Ptr table) . fromIntegral 61 | -------------------------------------------------------------------------------- /Data/ByteString/Builder/Prim/Internal/Floating.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | #include "MachDeps.h" 4 | #include "bytestring-cpp-macros.h" 5 | 6 | -- | 7 | -- Copyright : (c) 2010 Simon Meier 8 | -- 9 | -- License : BSD3-style (see LICENSE) 10 | -- 11 | -- Maintainer : Simon Meier 12 | -- Stability : experimental 13 | -- Portability : GHC 14 | -- 15 | -- Conversion of 'Float's and 'Double's to 'Word32's and 'Word64's. 16 | -- 17 | module Data.ByteString.Builder.Prim.Internal.Floating 18 | ( castFloatToWord32 19 | , castDoubleToWord64 20 | , encodeFloatViaWord32F 21 | , encodeDoubleViaWord64F 22 | ) where 23 | 24 | import Data.ByteString.Builder.Prim.Internal 25 | import Data.Word 26 | 27 | #if HS_CAST_FLOAT_WORD_OPS_AVAILABLE 28 | import GHC.Float (castFloatToWord32, castDoubleToWord64) 29 | #else 30 | import Foreign.Marshal.Utils 31 | import Foreign.Storable 32 | import Foreign.Ptr 33 | 34 | import Data.ByteString.Internal.Type (unsafeDupablePerformIO) 35 | {- 36 | We work around ticket http://ghc.haskell.org/trac/ghc/ticket/4092 by 37 | storing the Float/Double in a temp buffer and peeking it out again from there. 38 | -} 39 | 40 | -- | Interpret a 'Float' as a 'Word32' as if through a bit-for-bit copy. 41 | -- (fallback if not available through GHC.Float) 42 | -- 43 | -- e.g 44 | -- 45 | -- > showHex (castFloatToWord32 1.0) [] = "3f800000" 46 | {-# NOINLINE castFloatToWord32 #-} 47 | castFloatToWord32 :: Float -> Word32 48 | #if (SIZEOF_HSFLOAT != SIZEOF_WORD32) || (ALIGNMENT_HSFLOAT < ALIGNMENT_WORD32) 49 | #error "don't know how to cast Float to Word32" 50 | #endif 51 | castFloatToWord32 x = unsafeDupablePerformIO (with x (peek . castPtr)) 52 | 53 | -- | Interpret a 'Double' as a 'Word64' as if through a bit-for-bit copy. 54 | -- (fallback if not available through GHC.Float) 55 | -- 56 | -- e.g 57 | -- 58 | -- > showHex (castDoubleToWord64 1.0) [] = "3ff0000000000000" 59 | {-# NOINLINE castDoubleToWord64 #-} 60 | castDoubleToWord64 :: Double -> Word64 61 | #if (SIZEOF_HSDOUBLE != SIZEOF_WORD64) || (ALIGNMENT_HSDOUBLE < ALIGNMENT_WORD64) 62 | #error "don't know how to cast Double to Word64" 63 | #endif 64 | castDoubleToWord64 x = unsafeDupablePerformIO (with x (peek . castPtr)) 65 | #endif 66 | 67 | 68 | -- | Encode a 'Float' using a 'Word32' encoding. 69 | {-# INLINE encodeFloatViaWord32F #-} 70 | encodeFloatViaWord32F :: FixedPrim Word32 -> FixedPrim Float 71 | #if HS_CAST_FLOAT_WORD_OPS_AVAILABLE 72 | encodeFloatViaWord32F = (castFloatToWord32 >$<) 73 | #else 74 | encodeFloatViaWord32F w32fe = fixedPrim (size w32fe) $ \x op -> do 75 | x' <- with x (peek . castPtr) 76 | runF w32fe x' op 77 | #endif 78 | 79 | -- | Encode a 'Double' using a 'Word64' encoding. 80 | {-# INLINE encodeDoubleViaWord64F #-} 81 | encodeDoubleViaWord64F :: FixedPrim Word64 -> FixedPrim Double 82 | #if HS_CAST_FLOAT_WORD_OPS_AVAILABLE 83 | encodeDoubleViaWord64F = (castDoubleToWord64 >$<) 84 | #else 85 | encodeDoubleViaWord64F w64fe = fixedPrim (size w64fe) $ \x op -> do 86 | x' <- with x (peek . castPtr) 87 | runF w64fe x' op 88 | #endif 89 | -------------------------------------------------------------------------------- /Data/ByteString/Builder/RealFloat.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.ByteString.Builder.RealFloat 3 | -- Copyright : (c) Lawrence Wu 2021 4 | -- License : BSD-style 5 | -- Maintainer : lawrencejwu@gmail.com 6 | -- 7 | -- Floating point formatting for @Bytestring.Builder@ 8 | -- 9 | -- This module primarily exposes `floatDec` and `doubleDec` which do the 10 | -- equivalent of converting through @'Data.ByteString.Builder.string7' . 'show'@. 11 | -- 12 | -- It also exposes `formatFloat` and `formatDouble` with a similar API as 13 | -- `GHC.Float.formatRealFloat`. 14 | -- 15 | -- NB: The float-to-string conversions exposed by this module match `show`'s 16 | -- output (specifically with respect to default rounding and length). In 17 | -- particular, there are boundary cases where the closest and \'shortest\' 18 | -- string representations are not used. Mentions of \'shortest\' in the docs 19 | -- below are with this caveat. 20 | -- 21 | -- For example, for fidelity, we match `show` on the output below. 22 | -- 23 | -- >>> show (1.0e23 :: Float) 24 | -- "1.0e23" 25 | -- >>> show (1.0e23 :: Double) 26 | -- "9.999999999999999e22" 27 | -- >>> floatDec 1.0e23 28 | -- "1.0e23" 29 | -- >>> doubleDec 1.0e23 30 | -- "9.999999999999999e22" 31 | -- 32 | -- Simplifying, we can build a shorter, lossless representation by just using 33 | -- @"1.0e23"@ since the floating point values that are 1 ULP away are 34 | -- 35 | -- >>> showHex (castDoubleToWord64 1.0e23) [] 36 | -- "44b52d02c7e14af6" 37 | -- >>> castWord64ToDouble 0x44b52d02c7e14af5 38 | -- 9.999999999999997e22 39 | -- >>> castWord64ToDouble 0x44b52d02c7e14af6 40 | -- 9.999999999999999e22 41 | -- >>> castWord64ToDouble 0x44b52d02c7e14af7 42 | -- 1.0000000000000001e23 43 | -- 44 | -- In particular, we could use the exact boundary if it is the shortest 45 | -- representation and the original floating number is even. To experiment with 46 | -- the shorter rounding, refer to 47 | -- `Data.ByteString.Builder.RealFloat.Internal.acceptBounds`. This will give us 48 | -- 49 | -- >>> floatDec 1.0e23 50 | -- "1.0e23" 51 | -- >>> doubleDec 1.0e23 52 | -- "1.0e23" 53 | -- 54 | -- For more details, please refer to the 55 | -- . 56 | -- 57 | -- @since 0.11.2.0 58 | 59 | module Data.ByteString.Builder.RealFloat 60 | ( floatDec 61 | , doubleDec 62 | 63 | -- * Custom formatting 64 | , formatFloat 65 | , formatDouble 66 | , FloatFormat 67 | , standard 68 | , standardDefaultPrecision 69 | , scientific 70 | , generic 71 | ) where 72 | 73 | import Data.ByteString.Builder.Internal (Builder) 74 | import qualified Data.ByteString.Builder.RealFloat.Internal as R 75 | import qualified Data.ByteString.Builder.RealFloat.F2S as RF 76 | import qualified Data.ByteString.Builder.RealFloat.D2S as RD 77 | import qualified Data.ByteString.Builder.Prim as BP 78 | import GHC.Float (roundTo) 79 | import GHC.Word (Word64) 80 | import GHC.Show (intToDigit) 81 | 82 | -- | Returns a rendered Float. Matches `show` in displaying in standard or 83 | -- scientific notation 84 | -- 85 | -- @ 86 | -- floatDec = 'formatFloat' 'generic' 87 | -- @ 88 | {-# INLINABLE floatDec #-} 89 | floatDec :: Float -> Builder 90 | floatDec = formatFloat generic 91 | 92 | -- | Returns a rendered Double. Matches `show` in displaying in standard or 93 | -- scientific notation 94 | -- 95 | -- @ 96 | -- doubleDec = 'formatDouble' 'generic' 97 | -- @ 98 | {-# INLINABLE doubleDec #-} 99 | doubleDec :: Double -> Builder 100 | doubleDec = formatDouble generic 101 | 102 | -- | Format type for use with `formatFloat` and `formatDouble`. 103 | -- 104 | -- @since 0.11.2.0 105 | data FloatFormat = MkFloatFormat FormatMode (Maybe Int) 106 | 107 | -- | Standard notation with `n` decimal places 108 | -- 109 | -- @since 0.11.2.0 110 | standard :: Int -> FloatFormat 111 | standard n = MkFloatFormat FStandard (Just n) 112 | 113 | -- | Standard notation with the \'default precision\' (decimal places matching `show`) 114 | -- 115 | -- @since 0.11.2.0 116 | standardDefaultPrecision :: FloatFormat 117 | standardDefaultPrecision = MkFloatFormat FStandard Nothing 118 | 119 | -- | Scientific notation with \'default precision\' (decimal places matching `show`) 120 | -- 121 | -- @since 0.11.2.0 122 | scientific :: FloatFormat 123 | scientific = MkFloatFormat FScientific Nothing 124 | 125 | -- | Standard or scientific notation depending on the exponent. Matches `show` 126 | -- 127 | -- @since 0.11.2.0 128 | generic :: FloatFormat 129 | generic = MkFloatFormat FGeneric Nothing 130 | 131 | -- | ByteString float-to-string format 132 | data FormatMode 133 | = FScientific -- ^ scientific notation 134 | | FStandard -- ^ standard notation with `Maybe Int` digits after the decimal 135 | | FGeneric -- ^ dispatches to scientific or standard notation based on the exponent 136 | deriving Show 137 | 138 | -- TODO: support precision argument for FGeneric and FScientific 139 | -- | Returns a rendered Float. Returns the \'shortest\' representation in 140 | -- scientific notation and takes an optional precision argument in standard 141 | -- notation. Also see `floatDec`. 142 | -- 143 | -- With standard notation, the precision argument is used to truncate (or 144 | -- extend with 0s) the \'shortest\' rendered Float. The \'default precision\' does 145 | -- no such modifications and will return as many decimal places as the 146 | -- representation demands. 147 | -- 148 | -- e.g 149 | -- 150 | -- >>> formatFloat (standard 1) 1.2345e-2 151 | -- "0.0" 152 | -- >>> formatFloat (standard 10) 1.2345e-2 153 | -- "0.0123450000" 154 | -- >>> formatFloat standardDefaultPrecision 1.2345e-2 155 | -- "0.01234" 156 | -- >>> formatFloat scientific 12.345 157 | -- "1.2345e1" 158 | -- >>> formatFloat generic 12.345 159 | -- "12.345" 160 | -- 161 | -- @since 0.11.2.0 162 | {-# INLINABLE formatFloat #-} 163 | formatFloat :: FloatFormat -> Float -> Builder 164 | formatFloat (MkFloatFormat fmt prec) = \f -> 165 | let (RF.FloatingDecimal m e) = RF.f2Intermediate f 166 | e' = R.int32ToInt e + R.decimalLength9 m in 167 | case fmt of 168 | FGeneric -> 169 | case specialStr f of 170 | Just b -> b 171 | Nothing -> 172 | if e' >= 0 && e' <= 7 173 | then sign f `mappend` showStandard (R.word32ToWord64 m) e' prec 174 | else BP.primBounded (R.toCharsScientific (f < 0) m e) () 175 | FScientific -> RF.f2s f 176 | FStandard -> 177 | case specialStr f of 178 | Just b -> b 179 | Nothing -> sign f `mappend` showStandard (R.word32ToWord64 m) e' prec 180 | 181 | -- TODO: support precision argument for FGeneric and FScientific 182 | -- | Returns a rendered Double. Returns the \'shortest\' representation in 183 | -- scientific notation and takes an optional precision argument in standard 184 | -- notation. Also see `doubleDec`. 185 | -- 186 | -- With standard notation, the precision argument is used to truncate (or 187 | -- extend with 0s) the \'shortest\' rendered Float. The \'default precision\' 188 | -- does no such modifications and will return as many decimal places as the 189 | -- representation demands. 190 | -- 191 | -- e.g 192 | -- 193 | -- >>> formatDouble (standard 1) 1.2345e-2 194 | -- "0.0" 195 | -- >>> formatDouble (standard 10) 1.2345e-2 196 | -- "0.0123450000" 197 | -- >>> formatDouble standardDefaultPrecision 1.2345e-2 198 | -- "0.01234" 199 | -- >>> formatDouble scientific 12.345 200 | -- "1.2345e1" 201 | -- >>> formatDouble generic 12.345 202 | -- "12.345" 203 | -- 204 | -- @since 0.11.2.0 205 | {-# INLINABLE formatDouble #-} 206 | formatDouble :: FloatFormat -> Double -> Builder 207 | formatDouble (MkFloatFormat fmt prec) = \f -> 208 | let (RD.FloatingDecimal m e) = RD.d2Intermediate f 209 | e' = R.int32ToInt e + R.decimalLength17 m in 210 | case fmt of 211 | FGeneric -> 212 | case specialStr f of 213 | Just b -> b 214 | Nothing -> 215 | if e' >= 0 && e' <= 7 216 | then sign f `mappend` showStandard m e' prec 217 | else BP.primBounded (R.toCharsScientific (f < 0) m e) () 218 | FScientific -> RD.d2s f 219 | FStandard -> 220 | case specialStr f of 221 | Just b -> b 222 | Nothing -> sign f `mappend` showStandard m e' prec 223 | 224 | -- | Char7 encode a 'Char'. 225 | {-# INLINE char7 #-} 226 | char7 :: Char -> Builder 227 | char7 = BP.primFixed BP.char7 228 | 229 | -- | Char7 encode a 'String'. 230 | {-# INLINE string7 #-} 231 | string7 :: String -> Builder 232 | string7 = BP.primMapListFixed BP.char7 233 | 234 | -- | Encodes a `-` if input is negative 235 | sign :: RealFloat a => a -> Builder 236 | sign f = if f < 0 then char7 '-' else mempty 237 | 238 | -- | Special rendering for Nan, Infinity, and 0. See 239 | -- RealFloat.Internal.NonNumbersAndZero 240 | specialStr :: RealFloat a => a -> Maybe Builder 241 | specialStr f 242 | | isNaN f = Just $ string7 "NaN" 243 | | isInfinite f = Just $ sign f `mappend` string7 "Infinity" 244 | | isNegativeZero f = Just $ string7 "-0.0" 245 | | f == 0 = Just $ string7 "0.0" 246 | | otherwise = Nothing 247 | 248 | -- | Returns a list of decimal digits in a Word64 249 | digits :: Word64 -> [Int] 250 | digits w = go [] w 251 | where go ds 0 = ds 252 | go ds c = let (q, r) = R.dquotRem10 c 253 | in go ((R.word64ToInt r) : ds) q 254 | 255 | -- | Show a floating point value in standard notation. Based on GHC.Float.showFloat 256 | showStandard :: Word64 -> Int -> Maybe Int -> Builder 257 | showStandard m e prec = 258 | case prec of 259 | Nothing 260 | | e <= 0 -> char7 '0' 261 | `mappend` char7 '.' 262 | `mappend` string7 (replicate (-e) '0') 263 | `mappend` mconcat (digitsToBuilder ds) 264 | | otherwise -> 265 | let f 0 s rs = mk0 (reverse s) `mappend` char7 '.' `mappend` mk0 rs 266 | f n s [] = f (n-1) (char7 '0':s) [] 267 | f n s (r:rs) = f (n-1) (r:s) rs 268 | in f e [] (digitsToBuilder ds) 269 | Just p 270 | | e >= 0 -> 271 | let (ei, is') = roundTo 10 (p' + e) ds 272 | (ls, rs) = splitAt (e + ei) (digitsToBuilder is') 273 | in mk0 ls `mappend` mkDot rs 274 | | otherwise -> 275 | let (ei, is') = roundTo 10 p' (replicate (-e) 0 ++ ds) 276 | -- ds' should always be non-empty but use redundant pattern 277 | -- matching to silence warning 278 | ds' = if ei > 0 then is' else 0:is' 279 | (ls, rs) = splitAt 1 $ digitsToBuilder ds' 280 | in mk0 ls `mappend` mkDot rs 281 | where p' = max p 0 282 | where 283 | mk0 ls = case ls of [] -> char7 '0'; _ -> mconcat ls 284 | mkDot rs = if null rs then mempty else char7 '.' `mappend` mconcat rs 285 | ds = digits m 286 | digitsToBuilder = fmap (char7 . intToDigit) 287 | 288 | -------------------------------------------------------------------------------- /Data/ByteString/Builder/RealFloat/F2S.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | -- | 4 | -- Module : Data.ByteString.Builder.RealFloat.F2S 5 | -- Copyright : (c) Lawrence Wu 2021 6 | -- License : BSD-style 7 | -- Maintainer : lawrencejwu@gmail.com 8 | -- 9 | -- Implementation of float-to-string conversion 10 | 11 | module Data.ByteString.Builder.RealFloat.F2S 12 | ( FloatingDecimal(..) 13 | , f2s 14 | , f2Intermediate 15 | ) where 16 | 17 | import Control.Arrow (first) 18 | import Data.Bits ((.|.), (.&.), unsafeShiftL, unsafeShiftR) 19 | import Data.ByteString.Builder.Internal (Builder) 20 | import Data.ByteString.Builder.Prim (primBounded) 21 | import Data.ByteString.Builder.RealFloat.Internal 22 | import GHC.Int (Int32(..)) 23 | import GHC.Word (Word32(..), Word64(..)) 24 | 25 | #if !PURE_HASKELL 26 | import GHC.Ptr (Ptr(..)) 27 | #endif 28 | 29 | -- See Data.ByteString.Builder.RealFloat.TableGenerator for a high-level 30 | -- explanation of the ryu algorithm 31 | 32 | #if !PURE_HASKELL 33 | -- | Table of 2^k / 5^q + 1 34 | -- 35 | -- > fmap (finv float_pow5_inv_bitcount) [0..float_max_inv_split] 36 | foreign import ccall "&hs_bytestring_float_pow5_inv_split" 37 | float_pow5_inv_split :: Ptr Word64 38 | 39 | -- | Table of 5^(-e2-q) / 2^k + 1 40 | -- 41 | -- > fmap (fnorm float_pow5_bitcount) [0..float_max_split] 42 | foreign import ccall "&hs_bytestring_float_pow5_split" 43 | float_pow5_split :: Ptr Word64 44 | #endif 45 | 46 | -- | Number of mantissa bits of a 32-bit float. The number of significant bits 47 | -- (floatDigits (undefined :: Float)) is 24 since we have a leading 1 for 48 | -- normal floats and 0 for subnormal floats 49 | float_mantissa_bits :: Int 50 | float_mantissa_bits = 23 51 | 52 | -- | Number of exponent bits of a 32-bit float 53 | float_exponent_bits :: Int 54 | float_exponent_bits = 8 55 | 56 | -- | Bias in encoded 32-bit float representation (2^7 - 1) 57 | float_bias :: Int 58 | float_bias = 127 59 | 60 | data FloatingDecimal = FloatingDecimal 61 | { fmantissa :: !Word32 62 | , fexponent :: !Int32 63 | } deriving (Show, Eq) 64 | 65 | -- | Multiply a 32-bit number with a 64-bit number while keeping the upper 64 66 | -- bits. Then shift by specified amount minus 32 67 | mulShift32 :: Word32 -> Word64 -> Int -> Word32 68 | mulShift32 m factor shift = 69 | let factorLo = factor .&. mask 32 70 | factorHi = factor `unsafeShiftR` 32 71 | bits0 = word32ToWord64 m * factorLo 72 | bits1 = word32ToWord64 m * factorHi 73 | total = (bits0 `unsafeShiftR` 32) + bits1 74 | in word64ToWord32 $ total `unsafeShiftR` (shift - 32) 75 | 76 | -- | Index into the 64-bit word lookup table float_pow5_inv_split 77 | get_float_pow5_inv_split :: Int -> Word64 78 | #if !PURE_HASKELL 79 | get_float_pow5_inv_split = getWord64At float_pow5_inv_split 80 | #else 81 | -- > putStr $ case64 (finv float_pow5_inv_bitcount) [0..float_max_inv_split] 82 | get_float_pow5_inv_split i = case i of 83 | 0 -> 0x800000000000001 84 | 1 -> 0x666666666666667 85 | 2 -> 0x51eb851eb851eb9 86 | 3 -> 0x4189374bc6a7efa 87 | 4 -> 0x68db8bac710cb2a 88 | 5 -> 0x53e2d6238da3c22 89 | 6 -> 0x431bde82d7b634e 90 | 7 -> 0x6b5fca6af2bd216 91 | 8 -> 0x55e63b88c230e78 92 | 9 -> 0x44b82fa09b5a52d 93 | 10 -> 0x6df37f675ef6eae 94 | 11 -> 0x57f5ff85e592558 95 | 12 -> 0x465e6604b7a8447 96 | 13 -> 0x709709a125da071 97 | 14 -> 0x5a126e1a84ae6c1 98 | 15 -> 0x480ebe7b9d58567 99 | 16 -> 0x734aca5f6226f0b 100 | 17 -> 0x5c3bd5191b525a3 101 | 18 -> 0x49c97747490eae9 102 | 19 -> 0x760f253edb4ab0e 103 | 20 -> 0x5e72843249088d8 104 | 21 -> 0x4b8ed0283a6d3e0 105 | 22 -> 0x78e480405d7b966 106 | 23 -> 0x60b6cd004ac9452 107 | 24 -> 0x4d5f0a66a23a9db 108 | 25 -> 0x7bcb43d769f762b 109 | 26 -> 0x63090312bb2c4ef 110 | 27 -> 0x4f3a68dbc8f03f3 111 | 28 -> 0x7ec3daf94180651 112 | 29 -> 0x65697bfa9acd1da 113 | _ -> 0x51212ffbaf0a7e2 114 | #endif 115 | 116 | -- | Index into the 64-bit word lookup table float_pow5_split 117 | get_float_pow5_split :: Int -> Word64 118 | #if !PURE_HASKELL 119 | get_float_pow5_split = getWord64At float_pow5_split 120 | #else 121 | -- > putStr $ case64 (fnorm float_pow5_bitcount) [0..float_max_split] 122 | get_float_pow5_split i = case i of 123 | 0 -> 0x1000000000000000 124 | 1 -> 0x1400000000000000 125 | 2 -> 0x1900000000000000 126 | 3 -> 0x1f40000000000000 127 | 4 -> 0x1388000000000000 128 | 5 -> 0x186a000000000000 129 | 6 -> 0x1e84800000000000 130 | 7 -> 0x1312d00000000000 131 | 8 -> 0x17d7840000000000 132 | 9 -> 0x1dcd650000000000 133 | 10 -> 0x12a05f2000000000 134 | 11 -> 0x174876e800000000 135 | 12 -> 0x1d1a94a200000000 136 | 13 -> 0x12309ce540000000 137 | 14 -> 0x16bcc41e90000000 138 | 15 -> 0x1c6bf52634000000 139 | 16 -> 0x11c37937e0800000 140 | 17 -> 0x16345785d8a00000 141 | 18 -> 0x1bc16d674ec80000 142 | 19 -> 0x1158e460913d0000 143 | 20 -> 0x15af1d78b58c4000 144 | 21 -> 0x1b1ae4d6e2ef5000 145 | 22 -> 0x10f0cf064dd59200 146 | 23 -> 0x152d02c7e14af680 147 | 24 -> 0x1a784379d99db420 148 | 25 -> 0x108b2a2c28029094 149 | 26 -> 0x14adf4b7320334b9 150 | 27 -> 0x19d971e4fe8401e7 151 | 28 -> 0x1027e72f1f128130 152 | 29 -> 0x1431e0fae6d7217c 153 | 30 -> 0x193e5939a08ce9db 154 | 31 -> 0x1f8def8808b02452 155 | 32 -> 0x13b8b5b5056e16b3 156 | 33 -> 0x18a6e32246c99c60 157 | 34 -> 0x1ed09bead87c0378 158 | 35 -> 0x13426172c74d822b 159 | 36 -> 0x1812f9cf7920e2b6 160 | 37 -> 0x1e17b84357691b64 161 | 38 -> 0x12ced32a16a1b11e 162 | 39 -> 0x178287f49c4a1d66 163 | 40 -> 0x1d6329f1c35ca4bf 164 | 41 -> 0x125dfa371a19e6f7 165 | 42 -> 0x16f578c4e0a060b5 166 | 43 -> 0x1cb2d6f618c878e3 167 | 44 -> 0x11efc659cf7d4b8d 168 | 45 -> 0x166bb7f0435c9e71 169 | _ -> 0x1c06a5ec5433c60d 170 | #endif 171 | 172 | -- | Take the high bits of m * 2^k / 5^q / 2^-e2+q+k 173 | mulPow5InvDivPow2 :: Word32 -> Int -> Int -> Word32 174 | mulPow5InvDivPow2 m q j = mulShift32 m (get_float_pow5_inv_split q) j 175 | 176 | -- | Take the high bits of m * 5^-e2-q / 2^k / 2^q-k 177 | mulPow5DivPow2 :: Word32 -> Int -> Int -> Word32 178 | mulPow5DivPow2 m i j = mulShift32 m (get_float_pow5_split i) j 179 | 180 | -- | Handle case e2 >= 0 181 | f2dGT :: Int32 -> Word32 -> Word32 -> Word32 -> (BoundsState Word32, Int32) 182 | f2dGT e2' u v w = 183 | let e2 = int32ToInt e2' 184 | -- q = e10 = log_10(2^e2) 185 | q = log10pow2 e2 186 | -- k = B0 + log_2(5^q) 187 | k = float_pow5_inv_bitcount + pow5bits q - 1 188 | i = -e2 + q + k 189 | -- (u, v, w) * 2^k / 5^q / 2^-e2+q+k 190 | u' = mulPow5InvDivPow2 u q i 191 | v' = mulPow5InvDivPow2 v q i 192 | w' = mulPow5InvDivPow2 w q i 193 | !lastRemoved = 194 | if q /= 0 && fquot10 (w' - 1) <= fquot10 u' 195 | -- We need to know one removed digit even if we are not going to loop 196 | -- below. We could use q = X - 1 above, except that would require 33 197 | -- bits for the result, and we've found that 32-bit arithmetic is 198 | -- faster even on 64-bit machines. 199 | then let l = float_pow5_inv_bitcount + pow5bits (q - 1) - 1 200 | in frem10 (mulPow5InvDivPow2 v (q - 1) (-e2 + q - 1 + l)) 201 | else 0 202 | !(vvTrailing, vuTrailing, vw') = 203 | case () of 204 | _ | q < 9 && frem5 v == 0 205 | -> (multipleOfPowerOf5 v q, False, w') 206 | | q < 9 && acceptBounds v 207 | -> (False, multipleOfPowerOf5 u q, w') 208 | | q < 9 209 | -> (False, False, w' - boolToWord32 (multipleOfPowerOf5 w q)) 210 | | otherwise 211 | -> (False, False, w') 212 | in (BoundsState u' v' vw' lastRemoved vuTrailing vvTrailing, intToInt32 q) 213 | 214 | -- | Handle case e2 < 0 215 | f2dLT :: Int32 -> Word32 -> Word32 -> Word32 -> (BoundsState Word32, Int32) 216 | f2dLT e2' u v w = 217 | let e2 = int32ToInt e2' 218 | q = log10pow5 (-e2) 219 | e10 = q + e2 220 | i = (-e2) - q 221 | -- k = log_2(5^-e2-q) - B1 222 | k = pow5bits i - float_pow5_bitcount 223 | j = q - k 224 | -- (u, v, w) * 5^-e2-q / 2^k / 2^q-k 225 | u' = mulPow5DivPow2 u i j 226 | v' = mulPow5DivPow2 v i j 227 | w' = mulPow5DivPow2 w i j 228 | !lastRemoved = 229 | if q /= 0 && fquot10 (w' - 1) <= fquot10 u' 230 | then let j' = q - 1 - (pow5bits (i + 1) - float_pow5_bitcount) 231 | in frem10 (mulPow5DivPow2 v (i + 1) j') 232 | else 0 233 | !(vvTrailing , vuTrailing, vw') = 234 | case () of 235 | _ | q <= 1 && acceptBounds v 236 | -> (True, v - u == 2, w') -- mmShift == 1 237 | | q <= 1 238 | -> (True, False, w' - 1) 239 | | q < 31 240 | -> (multipleOfPowerOf2 v (q - 1), False, w') 241 | | otherwise 242 | -> (False, False, w') 243 | in (BoundsState u' v' vw' lastRemoved vuTrailing vvTrailing, intToInt32 e10) 244 | 245 | -- | Returns the decimal representation of the given mantissa and exponent of a 246 | -- 32-bit Float using the ryu algorithm. 247 | f2d :: Word32 -> Word32 -> FloatingDecimal 248 | f2d m e = 249 | let !mf = if e == 0 250 | then m 251 | else (1 `unsafeShiftL` float_mantissa_bits) .|. m 252 | !ef = intToInt32 $ if e == 0 253 | then 1 - (float_bias + float_mantissa_bits) 254 | else word32ToInt e - (float_bias + float_mantissa_bits) 255 | !e2 = ef - 2 256 | -- Step 2. 3-tuple (u, v, w) * 2**e2 257 | !u = 4 * mf - 1 - boolToWord32 (m /= 0 || e <= 1) 258 | !v = 4 * mf 259 | !w = 4 * mf + 2 260 | -- Step 3. convert to decimal power base 261 | !(state, e10) = 262 | if e2 >= 0 263 | then f2dGT e2 u v w 264 | else f2dLT e2 u v w 265 | -- Step 4: Find the shortest decimal representation in the interval of 266 | -- valid representations. 267 | !(output, removed) = 268 | let rounded = closestCorrectlyRounded (acceptBounds v) 269 | in first rounded $ if vvIsTrailingZeros state || vuIsTrailingZeros state 270 | then trimTrailing state 271 | else trimNoTrailing state 272 | !e' = e10 + removed 273 | in FloatingDecimal output e' 274 | 275 | -- | Split a Float into (sign, mantissa, exponent) 276 | breakdown :: Float -> (Bool, Word32, Word32) 277 | breakdown f = 278 | let bits = castFloatToWord32 f 279 | sign = ((bits `unsafeShiftR` (float_mantissa_bits + float_exponent_bits)) .&. 1) /= 0 280 | mantissa = bits .&. mask float_mantissa_bits 281 | expo = (bits `unsafeShiftR` float_mantissa_bits) .&. mask float_exponent_bits 282 | in (sign, mantissa, expo) 283 | 284 | -- | Dispatches to `f2d` and applies the given formatters 285 | {-# INLINE f2s' #-} 286 | f2s' :: (Bool -> Word32 -> Int32 -> a) -> (NonNumbersAndZero -> a) -> Float -> a 287 | f2s' formatter specialFormatter f = 288 | let (sign, mantissa, expo) = breakdown f 289 | in if (expo == mask float_exponent_bits) || (expo == 0 && mantissa == 0) 290 | then specialFormatter NonNumbersAndZero 291 | { negative=sign 292 | , exponent_all_one=expo > 0 293 | , mantissa_non_zero=mantissa > 0 } 294 | else let FloatingDecimal m e = f2d mantissa expo 295 | in formatter sign m e 296 | 297 | -- | Render a Float in scientific notation 298 | f2s :: Float -> Builder 299 | f2s f = primBounded (f2s' toCharsScientific toCharsNonNumbersAndZero f) () 300 | 301 | -- | Returns the decimal representation of a Float. NaN and Infinity will 302 | -- return `FloatingDecimal 0 0` 303 | f2Intermediate :: Float -> FloatingDecimal 304 | f2Intermediate = f2s' (const FloatingDecimal) (const $ FloatingDecimal 0 0) 305 | -------------------------------------------------------------------------------- /Data/ByteString/Builder/RealFloat/TableGenerator.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.ByteString.Builder.RealFloat.TableGenerator 3 | -- Copyright : (c) Lawrence Wu 2021 4 | -- License : BSD-style 5 | -- Maintainer : lawrencejwu@gmail.com 6 | -- 7 | -- Constants and overview for compile-time table generation for Ryu internals 8 | -- 9 | -- This module uses Haskell's arbitrary-precision `Integer` types to compute 10 | -- the necessary multipliers for efficient conversion to a decimal power base. 11 | -- 12 | -- It also exposes constants relevant to the 32- and 64-bit tables (e.g maximum 13 | -- number of bits required to store the table values). 14 | 15 | module Data.ByteString.Builder.RealFloat.TableGenerator 16 | ( float_pow5_inv_bitcount 17 | , float_pow5_bitcount 18 | , double_pow5_bitcount 19 | , double_pow5_inv_bitcount 20 | , float_max_split 21 | , float_max_inv_split 22 | , double_max_split 23 | , double_max_inv_split 24 | 25 | , finv 26 | , fnorm 27 | , splitWord128s 28 | , case64 29 | , case128 30 | ) where 31 | 32 | import GHC.Float (int2Double) 33 | 34 | import Data.Bits 35 | import Data.Word 36 | import Numeric 37 | 38 | 39 | -- The basic floating point conversion algorithm is as such: 40 | -- 41 | -- Given floating point 42 | -- 43 | -- f = (-1)^s * m_f * 2^e_f 44 | -- 45 | -- which is IEEE encoded by `[s] [.. e ..] [.. m ..]`. `s` is the sign bit, `e` 46 | -- is the biased exponent, and `m` is the mantissa, let 47 | -- 48 | -- | e /= 0 | e == 0 49 | -- -----+-------------------+----------- 50 | -- m_f | 2^len(m) + m | m 51 | -- e_f | e - bias - len(m) | 1 - bias - len(m) 52 | -- 53 | -- we compute the halfway points to the next smaller (`f-`) and larger (`f+`) 54 | -- floating point numbers as 55 | -- 56 | -- lower halfway point u * 2^e2, u = 4 * m_f - (if m == 0 then 1 else 2) 57 | -- v * 2^e2, v = 4 * m_f 58 | -- upper halfway point w * 2^e2, u = 4 * m_f + 2 59 | -- where e2 = ef - 2 (so u, v, w are integers) 60 | -- 61 | -- 62 | -- Then we compute (a, b, c) * 10^e10 = (u, v, w) * 2^e2 which is split into 63 | -- the case of 64 | -- 65 | -- e2 >= 0 ==> e10 = 0 , (a, b, c) = (u, v, w) * 2^e2 66 | -- e2 < 0 ==> e10 = e2, (a, b, c) = (u, v, w) * 5^-e2 67 | -- 68 | -- And finally we find the shortest representation from integers d0 and e0 such 69 | -- that 70 | -- 71 | -- a * 10^e10 < d0 * 10^(e0+e10) < c * 10^e10 72 | -- 73 | -- such that e0 is maximal (we allow equality to smaller or larger halfway 74 | -- point depending on rounding mode). This is found through iteratively 75 | -- dividing by 10 while a/10^j < c/10^j and doing some bookkeeping around 76 | -- zeros. 77 | -- 78 | -- 79 | -- 80 | -- 81 | -- The ryu algorithm removes the requirement for arbitrary precision arithmetic 82 | -- and improves the runtime significantly by skipping most of the iterative 83 | -- division by carefully selecting a point where certain invariants hold and 84 | -- precomputing a few tables. 85 | -- 86 | -- Specifically, define `q` such that the correspondings values of a/10^q < 87 | -- c/10^q - 1. We can prove (not shown) that 88 | -- 89 | -- if e2 >= 0, q = e2 * log_10(2) 90 | -- if e2 < 0, q = -e2 * log_10(5) 91 | -- 92 | -- Then we can compute (a, b, c) / 10^q. Starting from (u, v, w) we have 93 | -- 94 | -- (a, b, c) / 10^q (a, b, c) / 10^q 95 | -- = (u, v, w) * 2^e2 / 10^q OR = (u, v, w) * 5^-e2 / 10^q 96 | -- 97 | -- And since q < e2, 98 | -- 99 | -- = (u, v, w) * 2^e2-q / 5^q OR = (u, v, w) * 5^-e2-q / 2^q 100 | -- 101 | -- While (u, v, w) are n-bit numbers, 5^q and whatnot are significantly larger, 102 | -- but we only need the top-most n bits of the result so we can choose `k` that 103 | -- reduce the number of bits required to ~2n. We then multiply by either 104 | -- 105 | -- 2^k / 5^q OR 5^-e2-q / 2^k 106 | -- 107 | -- The required `k` is roughly linear in the exponent (we need more of the 108 | -- multiplication to be precise) but the number of bits to store the 109 | -- multiplicands above stays fixed. 110 | -- 111 | -- Since the number of bits needed is relatively small for IEEE 32- and 64-bit 112 | -- floating types, we can compute appropriate values for `k` for the 113 | -- floating-point-type-specific bounds instead of each e2. 114 | -- 115 | -- Finally, we need to do some final manual iterations potentially to do a 116 | -- final fixup of the skipped state 117 | 118 | 119 | -- | Bound for bits of @2^k / 5^q@ for floats 120 | float_pow5_inv_bitcount :: Int 121 | float_pow5_inv_bitcount = 59 122 | 123 | -- | Bound for bits of @5^-e2-q / 2^k@ for floats 124 | float_pow5_bitcount :: Int 125 | float_pow5_bitcount = 61 126 | 127 | -- | Bound for bits of @5^-e2-q / 2^k@ for doubles 128 | double_pow5_bitcount :: Int 129 | double_pow5_bitcount = 125 130 | 131 | -- | Bound for bits of @2^k / 5^q@ for doubles 132 | double_pow5_inv_bitcount :: Int 133 | double_pow5_inv_bitcount = 125 134 | 135 | -- NB: these tables are encoded directly into the 136 | -- source code in cbits/aligned-static-hs-data.c 137 | 138 | -- | Number of bits in a positive integer 139 | blen :: Integer -> Int 140 | blen 0 = 0 141 | blen 1 = 1 142 | blen n = 1 + blen (n `quot` 2) 143 | 144 | -- | Used for table generation of 2^k / 5^q + 1 145 | finv :: Int -> Int -> Integer 146 | finv bitcount i = 147 | let p = 5^i 148 | in (1 `shiftL` (blen p - 1 + bitcount)) `div` p + 1 149 | 150 | -- | Used for table generation of 5^-e2-q / 2^k 151 | fnorm :: Int -> Int -> Integer 152 | fnorm bitcount i = 153 | let p = 5^i 154 | s = blen p - bitcount 155 | in if s < 0 then p `shiftL` (-s) else p `shiftR` s 156 | 157 | -- | Breaks each integer into two Word64s (lowBits, highBits) 158 | splitWord128s :: [Integer] -> [Word64] 159 | splitWord128s li 160 | = [fromInteger w | x <- li, w <- [x .&. maxWord64, x `shiftR` 64]] 161 | where maxWord64 = toInteger (maxBound :: Word64) 162 | 163 | splitWord128 :: Integer -> (Word64,Word64) 164 | splitWord128 x = (fromInteger (x `shiftR` 64), fromInteger (x .&. maxWord64)) 165 | where maxWord64 = toInteger (maxBound :: Word64) 166 | 167 | 168 | -- Helpers to generate case alternatives returning either one Word64 (case64) or 169 | -- two Word64s (case128) for the PURE_HASKELL variant of the tables. 170 | case64 :: (Int -> Integer) -> [Int] -> String 171 | case64 f range = concat 172 | [ show i ++ " -> 0x" ++ showHex (f i) "\n" 173 | | i <- range] 174 | 175 | case128 :: (Int -> Integer) -> [Int] -> String 176 | case128 f range = concat 177 | [ show i ++ " -> (0x" ++ showHex hi "" ++ ", 0x" ++ showHex lo ")\n" 178 | | i <- range 179 | , let (hi,lo) = splitWord128 (f i) 180 | ] 181 | 182 | -- Given a specific floating-point type, determine the range of q for the < 0 183 | -- and >= 0 cases 184 | get_range :: forall ff. (RealFloat ff) => ff -> (Int, Int) 185 | get_range f = 186 | let (emin, emax) = floatRange f 187 | mantissaDigits = floatDigits f 188 | emin' = emin - mantissaDigits - 2 189 | emax' = emax - mantissaDigits - 2 190 | in ( (-emin') - floor (int2Double (-emin') * logBase 10 5) 191 | , floor (int2Double emax' * logBase 10 2)) 192 | 193 | float_max_split :: Int -- = 46 194 | float_max_inv_split :: Int -- = 30 195 | (float_max_split, float_max_inv_split) = get_range (undefined :: Float) 196 | 197 | -- we take a slightly different codepath s.t we need one extra entry 198 | double_max_split :: Int -- = 325 199 | double_max_inv_split :: Int -- = 291 200 | (double_max_split, double_max_inv_split) = 201 | let (m, mi) = get_range (undefined :: Double) 202 | in (m + 1, mi) 203 | 204 | -------------------------------------------------------------------------------- /Data/ByteString/Internal.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_HADDOCK not-home #-} 2 | 3 | -- | 4 | -- Module : Data.ByteString.Internal 5 | -- Copyright : (c) Don Stewart 2006-2008 6 | -- (c) Duncan Coutts 2006-2012 7 | -- License : BSD-style 8 | -- Maintainer : dons00@gmail.com, duncan@community.haskell.org 9 | -- Stability : unstable 10 | -- Portability : non-portable 11 | -- 12 | -- A module containing semi-public 'ByteString' internals. This exposes the 13 | -- 'ByteString' representation and low level construction functions. As such 14 | -- all the functions in this module are unsafe. The API is also not stable. 15 | -- 16 | -- Where possible application should instead use the functions from the normal 17 | -- public interface modules, such as "Data.ByteString.Unsafe". Packages that 18 | -- extend the ByteString system at a low level will need to use this module. 19 | -- 20 | module Data.ByteString.Internal ( 21 | 22 | -- * The @ByteString@ type and representation 23 | ByteString 24 | ( BS 25 | , PS -- backwards compatibility shim 26 | ), 27 | 28 | StrictByteString, 29 | 30 | -- * Internal indexing 31 | findIndexOrLength, 32 | 33 | -- * Conversion with lists: packing and unpacking 34 | packBytes, packUptoLenBytes, unsafePackLenBytes, 35 | packChars, packUptoLenChars, unsafePackLenChars, 36 | unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict, 37 | unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict, 38 | unsafePackAddress, unsafePackLenAddress, 39 | unsafePackLiteral, unsafePackLenLiteral, 40 | 41 | -- * Low level imperative construction 42 | empty, 43 | create, 44 | createUptoN, 45 | createUptoN', 46 | createAndTrim, 47 | createAndTrim', 48 | unsafeCreate, 49 | unsafeCreateUptoN, 50 | unsafeCreateUptoN', 51 | mallocByteString, 52 | 53 | -- * Conversion to and from ForeignPtrs 54 | mkDeferredByteString, 55 | fromForeignPtr, 56 | toForeignPtr, 57 | fromForeignPtr0, 58 | toForeignPtr0, 59 | 60 | -- * Utilities 61 | nullForeignPtr, 62 | deferForeignPtrAvailability, 63 | SizeOverflowException, 64 | overflowError, 65 | checkedAdd, 66 | checkedMultiply, 67 | 68 | -- * Standard C Functions 69 | c_strlen, 70 | c_free_finalizer, 71 | 72 | memchr, 73 | memcmp, 74 | memcpy, 75 | memset, 76 | 77 | -- * cbits functions 78 | c_reverse, 79 | c_intersperse, 80 | c_maximum, 81 | c_minimum, 82 | c_count, 83 | c_sort, 84 | 85 | -- * Chars 86 | w2c, c2w, isSpaceWord8, isSpaceChar8, 87 | 88 | -- * Deprecated and unmentionable 89 | accursedUnutterablePerformIO, 90 | 91 | -- * Exported compatibility shim 92 | plusForeignPtr, 93 | unsafeWithForeignPtr 94 | ) where 95 | 96 | import Data.ByteString.Internal.Type 97 | -------------------------------------------------------------------------------- /Data/ByteString/Lazy/ReadInt.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | -- This file is also included by "Data.ByteString.ReadInt", after defining 4 | -- "BYTESTRING_STRICT". The two modules share much of their code, but 5 | -- the lazy version adds an outer loop over the chunks. 6 | 7 | #ifdef BYTESTRING_STRICT 8 | module Data.ByteString.ReadInt 9 | #else 10 | module Data.ByteString.Lazy.ReadInt 11 | #endif 12 | ( readInt 13 | , readInt8 14 | , readInt16 15 | , readInt32 16 | , readWord 17 | , readWord8 18 | , readWord16 19 | , readWord32 20 | , readInt64 21 | , readWord64 22 | ) where 23 | 24 | import qualified Data.ByteString.Internal as BI 25 | #ifdef BYTESTRING_STRICT 26 | import Data.ByteString 27 | #else 28 | import Data.ByteString.Lazy 29 | import Data.ByteString.Lazy.Internal 30 | #endif 31 | import Data.Bits (FiniteBits, isSigned) 32 | import Data.ByteString.Internal (pattern BS, plusForeignPtr) 33 | import Data.Int 34 | import Data.Word 35 | import Foreign.ForeignPtr (ForeignPtr) 36 | import Foreign.Ptr (minusPtr, plusPtr) 37 | import Foreign.Storable (Storable(..)) 38 | 39 | ----- Public API 40 | 41 | -- | Try to read a signed 'Int' value from the 'ByteString', returning 42 | -- @Just (val, str)@ on success, where @val@ is the value read and @str@ is the 43 | -- rest of the input string. If the sequence of digits decodes to a value 44 | -- larger than can be represented by an 'Int', the returned value will be 45 | -- 'Nothing'. 46 | -- 47 | -- 'readInt' does not ignore leading whitespace, the value must start 48 | -- immediately at the beginning of the input string. 49 | -- 50 | -- ==== __Examples__ 51 | -- >>> readInt "-1729 sum of cubes" 52 | -- Just (-1729," sum of cubes") 53 | -- >>> readInt "+1: readInt also accepts a leading '+'" 54 | -- Just (1, ": readInt also accepts a leading '+'") 55 | -- >>> readInt "not a decimal number" 56 | -- Nothing 57 | -- >>> readInt "12345678901234567890 overflows maxBound" 58 | -- Nothing 59 | -- >>> readInt "-12345678901234567890 underflows minBound" 60 | -- Nothing 61 | -- 62 | readInt :: ByteString -> Maybe (Int, ByteString) 63 | readInt = _read 64 | 65 | -- | A variant of 'readInt' specialised to 'Int32'. 66 | readInt32 :: ByteString -> Maybe (Int32, ByteString) 67 | readInt32 = _read 68 | 69 | -- | A variant of 'readInt' specialised to 'Int16'. 70 | readInt16 :: ByteString -> Maybe (Int16, ByteString) 71 | readInt16 = _read 72 | 73 | -- | A variant of 'readInt' specialised to 'Int8'. 74 | readInt8 :: ByteString -> Maybe (Int8, ByteString) 75 | readInt8 = _read 76 | 77 | -- | Try to read a 'Word' value from the 'ByteString', returning 78 | -- @Just (val, str)@ on success, where @val@ is the value read and @str@ is the 79 | -- rest of the input string. If the sequence of digits decodes to a value 80 | -- larger than can be represented by a 'Word', the returned value will be 81 | -- 'Nothing'. 82 | -- 83 | -- 'readWord' does not ignore leading whitespace, the value must start with a 84 | -- decimal digit immediately at the beginning of the input string. Leading @+@ 85 | -- signs are not accepted. 86 | -- 87 | -- ==== __Examples__ 88 | -- >>> readWord "1729 sum of cubes" 89 | -- Just (1729," sum of cubes") 90 | -- >>> readWord "+1729 has an explicit sign" 91 | -- Nothing 92 | -- >>> readWord "not a decimal number" 93 | -- Nothing 94 | -- >>> readWord "98765432109876543210 overflows maxBound" 95 | -- Nothing 96 | -- 97 | readWord :: ByteString -> Maybe (Word, ByteString) 98 | readWord = _read 99 | 100 | -- | A variant of 'readWord' specialised to 'Word32'. 101 | readWord32 :: ByteString -> Maybe (Word32, ByteString) 102 | readWord32 = _read 103 | 104 | -- | A variant of 'readWord' specialised to 'Word16'. 105 | readWord16 :: ByteString -> Maybe (Word16, ByteString) 106 | readWord16 = _read 107 | 108 | -- | A variant of 'readWord' specialised to 'Word8'. 109 | readWord8 :: ByteString -> Maybe (Word8, ByteString) 110 | readWord8 = _read 111 | 112 | -- | A variant of 'readInt' specialised to 'Int64'. 113 | readInt64 :: ByteString -> Maybe (Int64, ByteString) 114 | readInt64 = _read 115 | 116 | -- | A variant of 'readWord' specialised to 'Word64'. 117 | readWord64 :: ByteString -> Maybe (Word64, ByteString) 118 | readWord64 = _read 119 | 120 | -- | Polymorphic Int*/Word* reader 121 | _read :: forall a. (Integral a, FiniteBits a, Bounded a) 122 | => ByteString -> Maybe (a, ByteString) 123 | {-# INLINE _read #-} 124 | _read 125 | | isSigned @a 0 126 | = \ bs -> signed bs >>= \ (r, s, d1) -> _readDecimal r s d1 127 | | otherwise 128 | -- When the input is @16^n-1@, as is the case with 'maxBound' for 129 | -- all the Word* types, the last decimal digit of 'maxBound' is 5. 130 | = \ bs -> unsigned 5 bs >>= \ (r, s, d1) -> _readDecimal r s d1 131 | where 132 | -- Returns: 133 | -- * Mod 10 min/max bound remainder 134 | -- * 2nd and later digits 135 | -- * 1st digit 136 | -- 137 | -- When the input is @8*16^n-1@, as is the case with 'maxBound' for 138 | -- all the Int* types, the last decimal digit of 'maxBound' is 7. 139 | -- 140 | signed :: ByteString -> Maybe (Word64, ByteString, Word64) 141 | signed bs = do 142 | (w, s) <- uncons bs 143 | let d1 = fromDigit w 144 | if | d1 <= 9 -> Just (7, s, d1) -- leading digit 145 | | w == 0x2d -> unsigned 8 s -- minus sign 146 | | w == 0x2b -> unsigned 7 s -- plus sign 147 | | otherwise -> Nothing -- not a number 148 | 149 | unsigned :: Word64 -> ByteString -> Maybe (Word64, ByteString, Word64) 150 | unsigned r bs = do 151 | (w, s) <- uncons bs 152 | let d1 = fromDigit w 153 | if | d1 <= 9 -> Just (r, s, d1) -- leading digit 154 | | otherwise -> Nothing -- not a number 155 | 156 | ----- Fixed-width unsigned reader 157 | 158 | -- | Intermediate result from scanning a chunk, final output is 159 | -- converted to the requested type once all chunks are processed. 160 | -- 161 | data Result = Overflow 162 | | Result !Int -- number of bytes (digits) read 163 | !Word64 -- accumulator value 164 | 165 | _readDecimal :: forall a. (Integral a, Bounded a) 166 | => Word64 -- ^ abs(maxBound/minBound) `mod` 10 167 | -> ByteString -- ^ Input string 168 | -> Word64 -- ^ First digit value 169 | -> Maybe (a, ByteString) 170 | {-# INLINE _readDecimal #-} 171 | _readDecimal !r = consume 172 | where 173 | consume :: ByteString -> Word64 -> Maybe (a, ByteString) 174 | #ifdef BYTESTRING_STRICT 175 | consume (BS fp len) a = case _digits q r fp len a of 176 | Result used acc 177 | | used == len 178 | -> convert acc empty 179 | | otherwise 180 | -> convert acc $ BS (fp `plusForeignPtr` used) (len - used) 181 | _ -> Nothing 182 | #else 183 | -- All done 184 | consume Empty acc = convert acc Empty 185 | -- Process next chunk 186 | consume (Chunk (BS fp len) cs) acc 187 | = case _digits q r fp len acc of 188 | Result used acc' 189 | | used == len 190 | -- process remaining chunks 191 | -> consume cs acc' 192 | | otherwise 193 | -- ran into a non-digit 194 | -> convert acc' $ 195 | Chunk (BS (fp `plusForeignPtr` used) (len - used)) cs 196 | _ -> Nothing 197 | #endif 198 | convert :: Word64 -> ByteString -> Maybe (a, ByteString) 199 | convert !acc rest = 200 | let !i = case r of 201 | -- minBound @Int* `mod` 10 == 8 202 | 8 -> negate $ fromIntegral @Word64 @a acc 203 | _ -> fromIntegral @Word64 @a acc 204 | in Just (i, rest) 205 | 206 | -- The quotient of 'maxBound' divided by 10 is needed for 207 | -- overflow checks, once the accumulator exceeds this value 208 | -- no further digits can be added. If equal, the last digit 209 | -- must not exceed the `r` value (max/min bound `mod` 10). 210 | -- 211 | q = fromIntegral @a @Word64 maxBound `div` 10 212 | 213 | ----- Per chunk decoder 214 | 215 | -- | Process as many digits as we can, returning the additional 216 | -- number of digits found and the updated accumulator. If the 217 | -- accumulator would overflow return 'Overflow'. 218 | -- 219 | _digits :: Word64 -- ^ maximum non-overflow value `div` 10 220 | -> Word64 -- ^ maximum non-overflow vavlue `mod` 10 221 | -> ForeignPtr Word8 -- ^ Input buffer 222 | -> Int -- ^ Input length 223 | -> Word64 -- ^ Accumulated value of leading digits 224 | -> Result -- ^ Bytes read and final accumulator, 225 | -- or else overflow indication 226 | {-# INLINE _digits #-} 227 | _digits !q !r fp len a = BI.accursedUnutterablePerformIO $ 228 | BI.unsafeWithForeignPtr fp $ \ ptr -> do 229 | let end = ptr `plusPtr` len 230 | go ptr end ptr a 231 | where 232 | go !start !end = loop 233 | where 234 | loop !ptr !acc = getDigit >>= \ !d -> 235 | if | d > 9 236 | -> return $ Result (ptr `minusPtr` start) acc 237 | | acc < q || acc == q && d <= r 238 | -> loop (ptr `plusPtr` 1) (acc * 10 + d) 239 | | otherwise 240 | -> return Overflow 241 | where 242 | getDigit :: IO Word64 243 | getDigit 244 | | ptr /= end = fromDigit <$> peek ptr 245 | | otherwise = pure 10 -- End of input 246 | {-# NOINLINE getDigit #-} 247 | -- 'getDigit' makes it possible to implement a single success 248 | -- exit point from the loop. If instead we return 'Result' 249 | -- from multiple places, when '_digits' is inlined we get (at 250 | -- least GHC 8.10 through 9.2) for each exit path a separate 251 | -- join point implementing the continuation code. GHC ticket 252 | -- . 253 | -- 254 | -- The NOINLINE pragma is required to avoid inlining branches 255 | -- that would restore multiple exit points. 256 | 257 | fromDigit :: Word8 -> Word64 258 | {-# INLINE fromDigit #-} 259 | fromDigit = \ !w -> fromIntegral w - 0x30 -- i.e. w - '0' 260 | -------------------------------------------------------------------------------- /Data/ByteString/Lazy/ReadNat.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | -- This file is included by "Data.ByteString.ReadInt", after defining 4 | -- "BYTESTRING_STRICT". The two modules are largely identical, except for the 5 | -- choice of ByteString type and the loops in `readNatural`, where the lazy 6 | -- version needs to nest the inner loop inside a loop over the constituent 7 | -- chunks. 8 | 9 | #ifdef BYTESTRING_STRICT 10 | module Data.ByteString.ReadNat 11 | #else 12 | module Data.ByteString.Lazy.ReadNat 13 | #endif 14 | ( readInteger 15 | , readNatural 16 | ) where 17 | 18 | import qualified Data.ByteString.Internal as BI 19 | #ifdef BYTESTRING_STRICT 20 | import Data.ByteString 21 | #else 22 | import Data.ByteString.Lazy 23 | import Data.ByteString.Lazy.Internal 24 | #endif 25 | import Data.Bits (finiteBitSize) 26 | import Data.ByteString.Internal (pattern BS, plusForeignPtr) 27 | import Data.Word 28 | import Foreign.ForeignPtr (ForeignPtr) 29 | import Foreign.Ptr (Ptr, minusPtr, plusPtr) 30 | import Foreign.Storable (Storable(..)) 31 | import Numeric.Natural (Natural) 32 | 33 | ----- Public API 34 | 35 | -- | 'readInteger' reads an 'Integer' from the beginning of the 'ByteString'. 36 | -- If there is no 'Integer' at the beginning of the string, it returns 37 | -- 'Nothing', otherwise it just returns the 'Integer' read, and the rest of 38 | -- the string. 39 | -- 40 | -- 'readInteger' does not ignore leading whitespace, the value must start 41 | -- immediately at the beginning of the input string. 42 | -- 43 | -- ==== __Examples__ 44 | -- >>> readInteger "-000111222333444555666777888999 all done" 45 | -- Just (-111222333444555666777888999," all done") 46 | -- >>> readInteger "+1: readInteger also accepts a leading '+'" 47 | -- Just (1, ": readInteger also accepts a leading '+'") 48 | -- >>> readInteger "not a decimal number" 49 | -- Nothing 50 | -- 51 | readInteger :: ByteString -> Maybe (Integer, ByteString) 52 | readInteger = \ bs -> do 53 | (w, s) <- uncons bs 54 | let d = fromDigit w 55 | if | d <= 9 -> unsigned d s -- leading digit 56 | | w == 0x2d -> negative s -- minus sign 57 | | w == 0x2b -> positive s -- plus sign 58 | | otherwise -> Nothing -- not a number 59 | where 60 | unsigned :: Word -> ByteString -> Maybe (Integer, ByteString) 61 | unsigned d s = 62 | let (!n, rest) = _readDecimal d s 63 | !i = toInteger n 64 | in Just (i, rest) 65 | 66 | positive :: ByteString -> Maybe (Integer, ByteString) 67 | positive bs = do 68 | (w, s) <- uncons bs 69 | let d = fromDigit w 70 | if | d <= 9 -> unsigned d s 71 | | otherwise -> Nothing 72 | 73 | negative :: ByteString -> Maybe (Integer, ByteString) 74 | negative bs = do 75 | (w, s) <- uncons bs 76 | let d = fromDigit w 77 | if | d > 9 -> Nothing 78 | | otherwise -> let (n, rest) = _readDecimal d s 79 | !i = negate $ toInteger n 80 | in Just (i, rest) 81 | 82 | -- | 'readNatural' reads a 'Natural' number from the beginning of the 83 | -- 'ByteString'. If there is no 'Natural' number at the beginning of the 84 | -- string, it returns 'Nothing', otherwise it just returns the number read, and 85 | -- the rest of the string. 86 | -- 87 | -- 'readNatural' does not ignore leading whitespace, the value must start with 88 | -- a decimal digit immediately at the beginning of the input string. Leading 89 | -- @+@ signs are not accepted. 90 | -- 91 | -- ==== __Examples__ 92 | -- >>> readNatural "000111222333444555666777888999 all done" 93 | -- Just (111222333444555666777888999," all done") 94 | -- >>> readNatural "+000111222333444555666777888999 explicit sign" 95 | -- Nothing 96 | -- >>> readNatural "not a decimal number" 97 | -- Nothing 98 | -- 99 | readNatural :: ByteString -> Maybe (Natural, ByteString) 100 | readNatural bs = do 101 | (w, s) <- uncons bs 102 | let d = fromDigit w 103 | if | d <= 9 -> Just $! _readDecimal d s 104 | | otherwise -> Nothing 105 | 106 | ----- Internal implementation 107 | 108 | -- | Intermediate result from scanning a chunk, final output is 109 | -- obtained via `convert` after all the chunks are processed. 110 | -- 111 | data Result = Result !Int -- Bytes consumed 112 | !Word -- Value of LSW 113 | !Int -- Digits in LSW 114 | [Natural] -- Little endian MSW list 115 | 116 | _readDecimal :: Word -> ByteString -> (Natural, ByteString) 117 | _readDecimal = 118 | -- Having read one digit, we're about to read the 2nd So the digit count 119 | -- up to 'safeLog' starts at 2. 120 | consume [] 2 121 | where 122 | consume :: [Natural] -> Int -> Word -> ByteString 123 | -> (Natural, ByteString) 124 | #ifdef BYTESTRING_STRICT 125 | consume ns cnt acc (BS fp len) = 126 | -- Having read one digit, we're about to read the 2nd 127 | -- So the digit count up to 'safeLog' starts at 2. 128 | case natdigits fp len acc cnt ns of 129 | Result used acc' cnt' ns' 130 | | used == len 131 | -> convert acc' cnt' ns' $ empty 132 | | otherwise 133 | -> convert acc' cnt' ns' $ 134 | BS (fp `plusForeignPtr` used) (len - used) 135 | #else 136 | -- All done 137 | consume ns cnt acc Empty = convert acc cnt ns Empty 138 | -- Process next chunk 139 | consume ns cnt acc (Chunk (BS fp len) cs) 140 | = case natdigits fp len acc cnt ns of 141 | Result used acc' cnt' ns' 142 | | used == len -- process more chunks 143 | -> consume ns' cnt' acc' cs 144 | | otherwise -- ran into a non-digit 145 | -> let c = Chunk (BS (fp `plusForeignPtr` used) (len - used)) cs 146 | in convert acc' cnt' ns' c 147 | #endif 148 | convert !acc !cnt !ns rest = 149 | let !n = combine acc cnt ns 150 | in (n, rest) 151 | 152 | -- | Merge least-significant word with reduction of of little-endian tail. 153 | -- 154 | -- The input is: 155 | -- 156 | -- * Least significant digits as a 'Word' (LSW) 157 | -- * The number of digits that went into the LSW 158 | -- * All the remaining digit groups ('safeLog' digits each), 159 | -- in little-endian order 160 | -- 161 | -- The result is obtained by pairwise recursive combining of all the 162 | -- full size digit groups, followed by multiplication by @10^cnt@ and 163 | -- addition of the LSW. 164 | combine :: Word -- ^ value of LSW 165 | -> Int -- ^ count of digits in LSW 166 | -> [Natural] -- ^ tail elements (base @10^'safeLog'@) 167 | -> Natural 168 | {-# INLINE combine #-} 169 | combine !acc !_ [] = wordToNatural acc 170 | combine !acc !cnt ns = 171 | wordToNatural (10^cnt) * combine1 safeBase ns + wordToNatural acc 172 | 173 | -- | Recursive reduction of little-endian sequence of 'Natural'-valued 174 | -- /digits/ in base @base@ (a power of 10). The base is squared after 175 | -- each round. This shows better asymptotic performance than one word 176 | -- at a time multiply-add folds. See: 177 | -- 178 | -- 179 | combine1 :: Natural -> [Natural] -> Natural 180 | combine1 _ [n] = n 181 | combine1 base ns = combine1 (base * base) (combine2 base ns) 182 | 183 | -- | One round pairwise merge of numbers in base @base@. 184 | combine2 :: Natural -> [Natural] -> [Natural] 185 | combine2 base (n:m:ns) = let !t = m * base + n in t : combine2 base ns 186 | combine2 _ ns = ns 187 | 188 | -- The intermediate representation is a little-endian sequence in base 189 | -- @10^'safeLog'@, prefixed by an initial element in base @10^cnt@ for some 190 | -- @cnt@ between 1 and 'safeLog'. The final result is obtained by recursive 191 | -- pairwise merging of the tail followed by a final multiplication by @10^cnt@ 192 | -- and addition of the head. 193 | -- 194 | natdigits :: ForeignPtr Word8 -- ^ Input chunk 195 | -> Int -- ^ Chunk length 196 | -> Word -- ^ accumulated element 197 | -> Int -- ^ partial digit count 198 | -> [Natural] -- ^ accumulated MSB elements 199 | -> Result 200 | {-# INLINE natdigits #-} 201 | natdigits fp len = \ acc cnt ns -> 202 | BI.accursedUnutterablePerformIO $ 203 | BI.unsafeWithForeignPtr fp $ \ ptr -> do 204 | let end = ptr `plusPtr` len 205 | go ptr end acc cnt ns ptr 206 | where 207 | go !start !end = loop 208 | where 209 | loop :: Word -> Int -> [Natural] -> Ptr Word8 -> IO Result 210 | loop !acc !cnt ns !ptr = getDigit >>= \ !d -> 211 | if | d > 9 212 | -> return $ Result (ptr `minusPtr` start) acc cnt ns 213 | | cnt < safeLog 214 | -> loop (10*acc + d) (cnt+1) ns $ ptr `plusPtr` 1 215 | | otherwise 216 | -> let !acc' = wordToNatural acc 217 | in loop d 1 (acc' : ns) $ ptr `plusPtr` 1 218 | where 219 | getDigit | ptr /= end = fromDigit <$> peek ptr 220 | | otherwise = pure 10 -- End of input 221 | {-# NOINLINE getDigit #-} 222 | -- 'getDigit' makes it possible to implement a single success 223 | -- exit point from the loop. If instead we return 'Result' 224 | -- from multiple places, when 'natdigits' is inlined we get (at 225 | -- least GHC 8.10 through 9.2) for each exit path a separate 226 | -- join point implementing the continuation code. GHC ticket 227 | -- . 228 | -- 229 | -- The NOINLINE pragma is required to avoid inlining branches 230 | -- that would restore multiple exit points. 231 | 232 | ----- Misc functions 233 | 234 | -- | Largest decimal digit count that never overflows the accumulator 235 | -- The base 10 logarithm of 2 is ~0.30103, therefore 2^n has at least 236 | -- @1 + floor (0.3 n)@ decimal digits. Therefore @floor (0.3 n)@, 237 | -- digits cannot overflow the upper bound of an @n-bit@ word. 238 | -- 239 | safeLog :: Int 240 | safeLog = 3 * finiteBitSize @Word 0 `div` 10 241 | 242 | -- | 10-power base for little-endian sequence of ~Word-sized "digits" 243 | safeBase :: Natural 244 | safeBase = 10 ^ safeLog 245 | 246 | fromDigit :: Word8 -> Word 247 | {-# INLINE fromDigit #-} 248 | fromDigit = \ !w -> fromIntegral w - 0x30 -- i.e. w - '0' 249 | 250 | wordToNatural :: Word -> Natural 251 | {-# INLINE wordToNatural #-} 252 | wordToNatural = fromIntegral 253 | -------------------------------------------------------------------------------- /Data/ByteString/ReadInt.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | #define BYTESTRING_STRICT 3 | #include "Lazy/ReadInt.hs" 4 | -------------------------------------------------------------------------------- /Data/ByteString/ReadNat.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | #define BYTESTRING_STRICT 3 | #include "Lazy/ReadNat.hs" 4 | -------------------------------------------------------------------------------- /Data/ByteString/Short.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Trustworthy #-} 2 | 3 | -- | 4 | -- Module : Data.ByteString.Short 5 | -- Copyright : (c) Duncan Coutts 2012-2013, Julian Ospald 2022 6 | -- License : BSD-style 7 | -- 8 | -- Maintainer : hasufell@posteo.de 9 | -- Stability : stable 10 | -- Portability : ghc only 11 | -- 12 | -- A compact representation suitable for storing short byte strings in memory. 13 | -- 14 | -- In typical use cases it can be imported alongside "Data.ByteString", e.g. 15 | -- 16 | -- > import qualified Data.ByteString as B 17 | -- > import qualified Data.ByteString.Short as B 18 | -- > (ShortByteString, toShort, fromShort) 19 | -- 20 | -- Other 'ShortByteString' operations clash with "Data.ByteString" or "Prelude" 21 | -- functions however, so they should be imported @qualified@ with a different 22 | -- alias e.g. 23 | -- 24 | -- > import qualified Data.ByteString.Short as B.Short 25 | -- 26 | module Data.ByteString.Short ( 27 | 28 | -- * The @ShortByteString@ type 29 | 30 | ShortByteString(..), 31 | 32 | -- ** Memory overhead 33 | -- | With GHC, the memory overheads are as follows, expressed in words and 34 | -- in bytes (words are 4 and 8 bytes on 32 or 64bit machines respectively). 35 | -- 36 | -- * t'Data.ByteString.ByteString' unshared: 8 words; 32 or 64 bytes. 37 | -- 38 | -- * t'Data.ByteString.ByteString' shared substring: 4 words; 16 or 32 bytes. 39 | -- 40 | -- * 'ShortByteString': 4 words; 16 or 32 bytes. 41 | -- 42 | -- For the string data itself, both 'ShortByteString' and t'Data.ByteString.ByteString' use 43 | -- one byte per element, rounded up to the nearest word. For example, 44 | -- including the overheads, a length 10 'ShortByteString' would take 45 | -- @16 + 12 = 28@ bytes on a 32bit platform and @32 + 16 = 48@ bytes on a 46 | -- 64bit platform. 47 | -- 48 | -- These overheads can all be reduced by 1 word (4 or 8 bytes) when the 49 | -- 'ShortByteString' or t'Data.ByteString.ByteString' is unpacked into another constructor. 50 | -- 51 | -- For example: 52 | -- 53 | -- > data ThingId = ThingId {-# UNPACK #-} !Int 54 | -- > {-# UNPACK #-} !ShortByteString 55 | -- 56 | -- This will take @1 + 1 + 3@ words (the @ThingId@ constructor + 57 | -- unpacked @Int@ + unpacked @ShortByteString@), plus the words for the 58 | -- string data. 59 | 60 | -- ** Heap fragmentation 61 | -- | With GHC, the t'Data.ByteString.ByteString' representation uses /pinned/ memory, 62 | -- meaning it cannot be moved by the GC. This is usually the right thing to 63 | -- do for larger strings, but for small strings using pinned memory can 64 | -- lead to heap fragmentation which wastes space. The 'ShortByteString' 65 | -- type (and the @Text@ type from the @text@ package) use /unpinned/ memory 66 | -- so they do not contribute to heap fragmentation. In addition, with GHC, 67 | -- small unpinned strings are allocated in the same way as normal heap 68 | -- allocations, rather than in a separate pinned area. 69 | 70 | -- * Introducing and eliminating 'ShortByteString's 71 | empty, 72 | singleton, 73 | pack, 74 | unpack, 75 | fromShort, 76 | toShort, 77 | 78 | -- * Basic interface 79 | snoc, 80 | cons, 81 | append, 82 | last, 83 | tail, 84 | uncons, 85 | head, 86 | init, 87 | unsnoc, 88 | null, 89 | length, 90 | 91 | -- * Encoding validation 92 | isValidUtf8, 93 | 94 | -- * Transforming ShortByteStrings 95 | map, 96 | reverse, 97 | intercalate, 98 | 99 | -- * Reducing 'ShortByteString's (folds) 100 | foldl, 101 | foldl', 102 | foldl1, 103 | foldl1', 104 | 105 | foldr, 106 | foldr', 107 | foldr1, 108 | foldr1', 109 | 110 | -- ** Special folds 111 | all, 112 | any, 113 | concat, 114 | 115 | -- ** Generating and unfolding ByteStrings 116 | replicate, 117 | unfoldr, 118 | unfoldrN, 119 | 120 | -- * Substrings 121 | 122 | -- ** Breaking strings 123 | take, 124 | takeEnd, 125 | takeWhileEnd, 126 | takeWhile, 127 | drop, 128 | dropEnd, 129 | dropWhile, 130 | dropWhileEnd, 131 | breakEnd, 132 | break, 133 | span, 134 | spanEnd, 135 | splitAt, 136 | split, 137 | splitWith, 138 | stripSuffix, 139 | stripPrefix, 140 | 141 | -- * Predicates 142 | isInfixOf, 143 | isPrefixOf, 144 | isSuffixOf, 145 | 146 | -- ** Search for arbitrary substrings 147 | breakSubstring, 148 | 149 | -- * Searching ShortByteStrings 150 | 151 | -- ** Searching by equality 152 | elem, 153 | 154 | -- ** Searching with a predicate 155 | find, 156 | filter, 157 | partition, 158 | 159 | -- * Indexing ShortByteStrings 160 | index, 161 | indexMaybe, 162 | (!?), 163 | elemIndex, 164 | elemIndices, 165 | count, 166 | findIndex, 167 | findIndices, 168 | 169 | -- * Low level conversions 170 | -- ** Packing 'Foreign.C.String.CString's and pointers 171 | packCString, 172 | packCStringLen, 173 | 174 | -- ** Using ShortByteStrings as 'Foreign.C.String.CString's 175 | useAsCString, 176 | useAsCStringLen, 177 | ) where 178 | 179 | import Data.ByteString.Short.Internal 180 | import Prelude () 181 | 182 | -------------------------------------------------------------------------------- /Data/ByteString/Unsafe.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Unsafe #-} 2 | 3 | -- | 4 | -- Module : Data.ByteString.Unsafe 5 | -- Copyright : (c) Don Stewart 2006-2008 6 | -- (c) Duncan Coutts 2006-2011 7 | -- License : BSD-style 8 | -- Maintainer : dons00@gmail.com, duncan@community.haskell.org 9 | -- Stability : provisional 10 | -- Portability : non-portable 11 | -- 12 | -- A module containing unsafe 'ByteString' operations. 13 | -- 14 | -- While these functions have a stable API and you may use these functions in 15 | -- applications, do carefully consider the documented pre-conditions; 16 | -- incorrect use can break referential transparency or worse. 17 | -- 18 | module Data.ByteString.Unsafe ( 19 | 20 | -- * Unchecked access 21 | unsafeHead, 22 | unsafeTail, 23 | unsafeInit, 24 | unsafeLast, 25 | unsafeIndex, 26 | unsafeTake, 27 | unsafeDrop, 28 | 29 | -- * Low level interaction with CStrings 30 | -- ** Using ByteStrings with functions for CStrings 31 | unsafeUseAsCString, 32 | unsafeUseAsCStringLen, 33 | 34 | -- ** Converting CStrings to ByteStrings 35 | unsafePackCString, 36 | unsafePackCStringLen, 37 | unsafePackMallocCString, 38 | unsafePackMallocCStringLen, 39 | 40 | unsafePackAddress, 41 | unsafePackAddressLen, 42 | unsafePackCStringFinalizer, 43 | unsafeFinalize, 44 | 45 | ) where 46 | 47 | import Data.ByteString.Internal 48 | 49 | import Foreign.ForeignPtr (newForeignPtr_, newForeignPtr, withForeignPtr) 50 | 51 | import Foreign.Storable (Storable(..)) 52 | import Foreign.C.String (CString, CStringLen) 53 | 54 | import Control.Exception (assert) 55 | 56 | import Data.Word (Word8) 57 | 58 | import qualified Foreign.ForeignPtr as FC (finalizeForeignPtr) 59 | import qualified Foreign.Concurrent as FC (newForeignPtr) 60 | 61 | import GHC.Exts (Addr#) 62 | import GHC.Ptr (Ptr(..), castPtr) 63 | 64 | -- --------------------------------------------------------------------- 65 | -- 66 | -- Extensions to the basic interface 67 | -- 68 | 69 | -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the 70 | -- check for the empty case, so there is an obligation on the programmer 71 | -- to provide a proof that the ByteString is non-empty. 72 | unsafeHead :: ByteString -> Word8 73 | unsafeHead (BS x l) = assert (l > 0) $ 74 | accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> peek p 75 | {-# INLINE unsafeHead #-} 76 | 77 | -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the 78 | -- check for the empty case. As with 'unsafeHead', the programmer must 79 | -- provide a separate proof that the ByteString is non-empty. 80 | unsafeTail :: ByteString -> ByteString 81 | unsafeTail (BS ps l) = assert (l > 0) $ BS (plusForeignPtr ps 1) (l-1) 82 | {-# INLINE unsafeTail #-} 83 | 84 | -- | A variety of 'init' for non-empty ByteStrings. 'unsafeInit' omits the 85 | -- check for the empty case. As with 'unsafeHead', the programmer must 86 | -- provide a separate proof that the ByteString is non-empty. 87 | unsafeInit :: ByteString -> ByteString 88 | unsafeInit (BS ps l) = assert (l > 0) $ BS ps (l-1) 89 | {-# INLINE unsafeInit #-} 90 | 91 | -- | A variety of 'last' for non-empty ByteStrings. 'unsafeLast' omits the 92 | -- check for the empty case. As with 'unsafeHead', the programmer must 93 | -- provide a separate proof that the ByteString is non-empty. 94 | unsafeLast :: ByteString -> Word8 95 | unsafeLast (BS x l) = assert (l > 0) $ 96 | accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> peekByteOff p (l-1) 97 | {-# INLINE unsafeLast #-} 98 | 99 | -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8' 100 | -- This omits the bounds check, which means there is an accompanying 101 | -- obligation on the programmer to ensure the bounds are checked in some 102 | -- other way. 103 | unsafeIndex :: ByteString -> Int -> Word8 104 | unsafeIndex (BS x l) i = assert (i >= 0 && i < l) $ 105 | accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> peekByteOff p i 106 | {-# INLINE unsafeIndex #-} 107 | 108 | -- | A variety of 'take' which omits the checks on @n@ so there is an 109 | -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. 110 | unsafeTake :: Int -> ByteString -> ByteString 111 | unsafeTake n (BS x l) = assert (0 <= n && n <= l) $ BS x n 112 | {-# INLINE unsafeTake #-} 113 | 114 | -- | A variety of 'drop' which omits the checks on @n@ so there is an 115 | -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. 116 | unsafeDrop :: Int -> ByteString -> ByteString 117 | unsafeDrop n (BS x l) = assert (0 <= n && n <= l) $ BS (plusForeignPtr x n) (l-n) 118 | {-# INLINE unsafeDrop #-} 119 | 120 | 121 | -- | /O(1)/ 'unsafePackAddressLen' provides constant-time construction of 122 | -- 'ByteString's, which is ideal for string literals. It packs a sequence 123 | -- of bytes into a 'ByteString', given a raw 'Addr#' to the string, and 124 | -- the length of the string. 125 | -- 126 | -- This function is /unsafe/ in two ways: 127 | -- 128 | -- * the length argument is assumed to be correct. If the length 129 | -- argument is incorrect, it is possible to overstep the end of the 130 | -- byte array. 131 | -- 132 | -- * if the underlying 'Addr#' is later modified, this change will be 133 | -- reflected in the resulting 'ByteString', breaking referential 134 | -- transparency. 135 | -- 136 | -- If in doubt, don't use this function. 137 | -- 138 | unsafePackAddressLen :: Int -> Addr# -> IO ByteString 139 | unsafePackAddressLen len addr# = do 140 | p <- newForeignPtr_ (Ptr addr#) 141 | return $ BS p len 142 | {-# INLINE unsafePackAddressLen #-} 143 | 144 | -- | /O(1)/ Construct a 'ByteString' given a Ptr Word8 to a buffer, a 145 | -- length, and an IO action representing a finalizer. This function is 146 | -- not available on Hugs. 147 | -- 148 | -- This function is /unsafe/, it is possible to break referential 149 | -- transparency by modifying the underlying buffer pointed to by the 150 | -- first argument. Any changes to the original buffer will be reflected 151 | -- in the resulting 'ByteString'. 152 | -- 153 | unsafePackCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString 154 | unsafePackCStringFinalizer p l f = do 155 | fp <- FC.newForeignPtr p f 156 | return $ BS fp l 157 | 158 | -- | Explicitly run the finaliser associated with a 'ByteString'. 159 | -- References to this value after finalisation may generate invalid memory 160 | -- references. 161 | -- 162 | -- This function is /unsafe/, as there may be other 163 | -- 'ByteString's referring to the same underlying pages. If you use 164 | -- this, you need to have a proof of some kind that all 'ByteString's 165 | -- ever generated from the underlying byte array are no longer live. 166 | -- 167 | unsafeFinalize :: ByteString -> IO () 168 | unsafeFinalize (BS p _) = FC.finalizeForeignPtr p 169 | 170 | ------------------------------------------------------------------------ 171 | -- Packing CStrings into ByteStrings 172 | 173 | -- | /O(n)/ Build a 'ByteString' from a 'CString'. This value will have /no/ 174 | -- finalizer associated to it, and will not be garbage collected by 175 | -- Haskell. The ByteString length is calculated using /strlen(3)/, 176 | -- and thus the complexity is a /O(n)/. 177 | -- 178 | -- This function is /unsafe/. If the 'CString' is later modified, this 179 | -- change will be reflected in the resulting 'ByteString', breaking 180 | -- referential transparency. 181 | -- 182 | unsafePackCString :: CString -> IO ByteString 183 | unsafePackCString cstr = do 184 | fp <- newForeignPtr_ (castPtr cstr) 185 | l <- c_strlen cstr 186 | return $! BS fp (fromIntegral l) 187 | 188 | -- | /O(1)/ Build a 'ByteString' from a 'CStringLen'. This value will 189 | -- have /no/ finalizer associated with it, and will not be garbage 190 | -- collected by Haskell. This operation has /O(1)/ complexity as we 191 | -- already know the final size, so no /strlen(3)/ is required. 192 | -- 193 | -- This function is /unsafe/. If the original 'CStringLen' is later 194 | -- modified, this change will be reflected in the resulting 'ByteString', 195 | -- breaking referential transparency. 196 | -- 197 | unsafePackCStringLen :: CStringLen -> IO ByteString 198 | unsafePackCStringLen (ptr,len) = do 199 | fp <- newForeignPtr_ (castPtr ptr) 200 | return $! BS fp (fromIntegral len) 201 | 202 | -- | /O(n)/ Build a 'ByteString' from a malloced 'CString'. This value will 203 | -- have a @free(3)@ finalizer associated to it. 204 | -- 205 | -- This function is /unsafe/. If the original 'CString' is later 206 | -- modified, this change will be reflected in the resulting 'ByteString', 207 | -- breaking referential transparency. 208 | -- 209 | -- This function is also unsafe if you call its finalizer twice, 210 | -- which will result in a /double free/ error, or if you pass it 211 | -- a 'CString' not allocated with 'Foreign.Marshal.Alloc.malloc'. 212 | -- 213 | unsafePackMallocCString :: CString -> IO ByteString 214 | unsafePackMallocCString cstr = do 215 | fp <- newForeignPtr c_free_finalizer (castPtr cstr) 216 | len <- c_strlen cstr 217 | return $! BS fp (fromIntegral len) 218 | 219 | -- | /O(1)/ Build a 'ByteString' from a malloced 'CStringLen'. This 220 | -- value will have a @free(3)@ finalizer associated to it. 221 | -- 222 | -- This function is /unsafe/. If the original 'CString' is later 223 | -- modified, this change will be reflected in the resulting 'ByteString', 224 | -- breaking referential transparency. 225 | -- 226 | -- This function is also unsafe if you call its finalizer twice, 227 | -- which will result in a /double free/ error, or if you pass it 228 | -- a 'CString' not allocated with 'Foreign.Marshal.Alloc.malloc'. 229 | -- 230 | unsafePackMallocCStringLen :: CStringLen -> IO ByteString 231 | unsafePackMallocCStringLen (cstr, len) = do 232 | fp <- newForeignPtr c_free_finalizer (castPtr cstr) 233 | return $! BS fp len 234 | 235 | -- --------------------------------------------------------------------- 236 | 237 | -- | /O(1) construction/ Use a 'ByteString' with a function requiring a 238 | -- 'CString'. 239 | -- 240 | -- This function does zero copying, and merely unwraps a 'ByteString' to 241 | -- appear as a 'CString'. It is /unsafe/ in two ways: 242 | -- 243 | -- * After calling this function the 'CString' shares the underlying 244 | -- byte buffer with the original 'ByteString'. Thus modifying the 245 | -- 'CString', either in C, or using poke, will cause the contents of the 246 | -- 'ByteString' to change, breaking referential transparency. Other 247 | -- 'ByteString's created by sharing (such as those produced via 'take' 248 | -- or 'drop') will also reflect these changes. Modifying the 'CString' 249 | -- will break referential transparency. To avoid this, use 250 | -- 'Data.ByteString.useAsCString', which makes a copy of the original 'ByteString'. 251 | -- 252 | -- * 'CString's are often passed to functions that require them to be 253 | -- null-terminated. If the original 'ByteString' wasn't null terminated, 254 | -- neither will the 'CString' be. It is the programmers responsibility 255 | -- to guarantee that the 'ByteString' is indeed null terminated. If in 256 | -- doubt, use 'Data.ByteString.useAsCString'. 257 | -- 258 | -- * The memory may freed at any point after the subcomputation 259 | -- terminates, so the pointer to the storage must *not* be used 260 | -- after this. 261 | -- 262 | unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a 263 | unsafeUseAsCString (BS ps _) action = withForeignPtr ps $ \p -> action (castPtr p) 264 | -- Cannot use unsafeWithForeignPtr, because action can diverge 265 | 266 | -- | /O(1) construction/ Use a 'ByteString' with a function requiring a 267 | -- 'CStringLen'. 268 | -- 269 | -- This function does zero copying, and merely unwraps a 'ByteString' to 270 | -- appear as a 'CStringLen'. It is /unsafe/: 271 | -- 272 | -- * After calling this function the 'CStringLen' shares the underlying 273 | -- byte buffer with the original 'ByteString'. Thus modifying the 274 | -- 'CStringLen', either in C, or using poke, will cause the contents of the 275 | -- 'ByteString' to change, breaking referential transparency. Other 276 | -- 'ByteString's created by sharing (such as those produced via 'take' 277 | -- or 'drop') will also reflect these changes. Modifying the 'CStringLen' 278 | -- will break referential transparency. To avoid this, use 279 | -- 'Data.ByteString.useAsCStringLen', which makes a copy of the original 'ByteString'. 280 | -- 281 | -- If 'Data.ByteString.empty' is given, it will pass @('Foreign.Ptr.nullPtr', 0)@. 282 | unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a 283 | unsafeUseAsCStringLen (BS ps l) action = withForeignPtr ps $ \p -> action (castPtr p, l) 284 | -- Cannot use unsafeWithForeignPtr, because action can diverge 285 | -------------------------------------------------------------------------------- /Data/ByteString/Utils/ByteOrder.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | #include "MachDeps.h" 4 | 5 | -- | Why does this module exist? There is "GHC.ByteOrder" in base. 6 | -- But that module is /broken/ until base-4.14/ghc-8.10, so we 7 | -- can't rely on it until we drop support for older ghcs. 8 | -- See https://gitlab.haskell.org/ghc/ghc/-/issues/20338 9 | -- and https://gitlab.haskell.org/ghc/ghc/-/issues/18445 10 | 11 | module Data.ByteString.Utils.ByteOrder 12 | ( ByteOrder(..) 13 | , hostByteOrder 14 | , whenLittleEndian 15 | , whenBigEndian 16 | ) where 17 | 18 | import GHC.ByteOrder (ByteOrder(..)) 19 | 20 | hostByteOrder :: ByteOrder 21 | hostByteOrder = 22 | #ifdef WORDS_BIGENDIAN 23 | BigEndian 24 | #else 25 | LittleEndian 26 | #endif 27 | 28 | -- | If the host is little-endian, applies the given function to the given arg. 29 | -- If the host is big-endian, returns the second argument unchanged. 30 | whenLittleEndian :: (a -> a) -> a -> a 31 | whenLittleEndian fun val = case hostByteOrder of 32 | LittleEndian -> fun val 33 | BigEndian -> val 34 | 35 | -- | If the host is little-endian, returns the second argument unchanged. 36 | -- If the host is big-endian, applies the given function to the given arg. 37 | whenBigEndian :: (a -> a) -> a -> a 38 | whenBigEndian fun val = case hostByteOrder of 39 | LittleEndian -> val 40 | BigEndian -> fun val 41 | -------------------------------------------------------------------------------- /Data/ByteString/Utils/UnalignedAccess.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | #include "bytestring-cpp-macros.h" 4 | 5 | -- | 6 | -- Module : Data.ByteString.Utils.UnalignedAccess 7 | -- Copyright : (c) Matthew Craven 2023-2024 8 | -- License : BSD-style 9 | -- Maintainer : clyring@gmail.com 10 | -- Stability : internal 11 | -- Portability : non-portable 12 | -- 13 | -- Primitives for reading and writing at potentially-unaligned memory locations 14 | 15 | module Data.ByteString.Utils.UnalignedAccess 16 | ( unalignedWriteU16 17 | , unalignedWriteU32 18 | , unalignedWriteU64 19 | , unalignedWriteFloat 20 | , unalignedWriteDouble 21 | , unalignedReadU64 22 | ) where 23 | 24 | import Foreign.Ptr 25 | import Data.Word 26 | 27 | 28 | #if HS_UNALIGNED_ADDR_PRIMOPS_AVAILABLE 29 | import GHC.IO (IO(..)) 30 | import GHC.Word (Word16(..), Word32(..), Word64(..)) 31 | import GHC.Exts 32 | 33 | unalignedWriteU16 :: Word16 -> Ptr Word8 -> IO () 34 | unalignedWriteU16 = coerce $ \(W16# x#) (Ptr p#) s 35 | -> (# writeWord8OffAddrAsWord16# p# 0# x# s, () #) 36 | 37 | unalignedWriteU32 :: Word32 -> Ptr Word8 -> IO () 38 | unalignedWriteU32 = coerce $ \(W32# x#) (Ptr p#) s 39 | -> (# writeWord8OffAddrAsWord32# p# 0# x# s, () #) 40 | 41 | unalignedWriteU64 :: Word64 -> Ptr Word8 -> IO () 42 | unalignedWriteU64 = coerce $ \(W64# x#) (Ptr p#) s 43 | -> (# writeWord8OffAddrAsWord64# p# 0# x# s, () #) 44 | 45 | unalignedWriteFloat :: Float -> Ptr Word8 -> IO () 46 | unalignedWriteFloat = coerce $ \(F# x#) (Ptr p#) s 47 | -> (# writeWord8OffAddrAsFloat# p# 0# x# s, () #) 48 | 49 | unalignedWriteDouble :: Double -> Ptr Word8 -> IO () 50 | unalignedWriteDouble = coerce $ \(D# x#) (Ptr p#) s 51 | -> (# writeWord8OffAddrAsDouble# p# 0# x# s, () #) 52 | 53 | unalignedReadU64 :: Ptr Word8 -> IO Word64 54 | unalignedReadU64 = coerce $ \(Ptr p#) s 55 | -> case readWord8OffAddrAsWord64# p# 0# s of 56 | (# s', w64# #) -> (# s', W64# w64# #) 57 | 58 | #elif HS_UNALIGNED_POKES_OK 59 | import Foreign.Storable 60 | 61 | unalignedWriteU16 :: Word16 -> Ptr Word8 -> IO () 62 | unalignedWriteU16 x p = poke (castPtr p) x 63 | 64 | unalignedWriteU32 :: Word32 -> Ptr Word8 -> IO () 65 | unalignedWriteU32 x p = poke (castPtr p) x 66 | 67 | unalignedWriteU64 :: Word64 -> Ptr Word8 -> IO () 68 | unalignedWriteU64 x p = poke (castPtr p) x 69 | 70 | unalignedWriteFloat :: Float -> Ptr Word8 -> IO () 71 | unalignedWriteFloat x p = poke (castPtr p) x 72 | 73 | unalignedWriteDouble :: Double -> Ptr Word8 -> IO () 74 | unalignedWriteDouble x p = poke (castPtr p) x 75 | 76 | unalignedReadU64 :: Ptr Word8 -> IO Word64 77 | unalignedReadU64 p = peek (castPtr p) 78 | 79 | #else 80 | foreign import ccall unsafe "static fpstring.h fps_unaligned_write_u16" 81 | unalignedWriteU16 :: Word16 -> Ptr Word8 -> IO () 82 | foreign import ccall unsafe "static fpstring.h fps_unaligned_write_u32" 83 | unalignedWriteU32 :: Word32 -> Ptr Word8 -> IO () 84 | foreign import ccall unsafe "static fpstring.h fps_unaligned_write_u64" 85 | unalignedWriteU64 :: Word64 -> Ptr Word8 -> IO () 86 | foreign import ccall unsafe "static fpstring.h fps_unaligned_write_HsFloat" 87 | unalignedWriteFloat :: Float -> Ptr Word8 -> IO () 88 | foreign import ccall unsafe "static fpstring.h fps_unaligned_write_HsDouble" 89 | unalignedWriteDouble :: Double -> Ptr Word8 -> IO () 90 | foreign import ccall unsafe "static fpstring.h fps_unaligned_read_u64" 91 | unalignedReadU64 :: Ptr Word8 -> IO Word64 92 | #endif 93 | 94 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Don Stewart 2005-2009 2 | (c) Duncan Coutts 2006-2015 3 | (c) David Roundy 2003-2005 4 | (c) Simon Meier 2010-2011 5 | (c) Koz Ross 2021 6 | 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions 11 | are met: 12 | 1. Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | 2. Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | 3. Neither the name of the author nor the names of his contributors 18 | may be used to endorse or promote products derived from this software 19 | without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 | SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ByteString: Fast, Packed Strings of Bytes 2 | 3 | [![Build Status](https://github.com/haskell/bytestring/workflows/ci/badge.svg)](https://github.com/haskell/bytestring/actions?query=workflow%3Aci) [![Hackage](http://img.shields.io/hackage/v/bytestring.svg)](https://hackage.haskell.org/package/bytestring) [![Stackage LTS](http://stackage.org/package/bytestring/badge/lts)](http://stackage.org/lts/package/bytestring) [![Stackage Nightly](http://stackage.org/package/bytestring/badge/nightly)](http://stackage.org/nightly/package/bytestring) 4 | 5 | This library provides the `Data.ByteString` module -- strict and lazy 6 | byte arrays manipulable as strings -- providing very time/space-efficient 7 | string and IO operations. 8 | 9 | For very large data requirements, or constraints on heap size, 10 | `Data.ByteString.Lazy` is provided, a lazy list of bytestring chunks. 11 | Efficient processing of multi-gigabyte data can be achieved this way. 12 | 13 | The library also provides `Data.ByteString.Builder` for efficient construction 14 | of `ByteString` values from smaller pieces during binary serialization. 15 | 16 | Requirements: 17 | 18 | * Cabal 2.2 or greater 19 | * GHC 8.4 or greater 20 | 21 | ### Authors 22 | 23 | `ByteString` was derived from the GHC `PackedString` library, 24 | originally written by Bryan O'Sullivan, and then by Simon Marlow. 25 | It was adapted and greatly extended for darcs by David Roundy and 26 | others. Don Stewart and Duncan Coutts cleaned up and further extended 27 | the implementation and added the `.Lazy` code. Simon Meier contributed 28 | the `Builder` feature. 29 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /bench/BenchBoundsCheckFusion.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Copyright : (c) 2011 Simon Meier 3 | -- License : BSD3-style (see LICENSE) 4 | -- 5 | -- Maintainer : Simon Meier 6 | -- Stability : experimental 7 | -- Portability : tested on GHC only 8 | -- 9 | -- Benchmark that the bounds checks fuse. 10 | 11 | module BenchBoundsCheckFusion (benchBoundsCheckFusion) where 12 | 13 | import Prelude hiding (words) 14 | import Data.Monoid 15 | import Data.Foldable (foldMap) 16 | import Test.Tasty.Bench 17 | 18 | import qualified Data.ByteString as S 19 | import qualified Data.ByteString.Lazy as L 20 | 21 | import Data.ByteString.Builder 22 | import Data.ByteString.Builder.Extra 23 | import Data.ByteString.Builder.Prim 24 | ( FixedPrim, BoundedPrim, (>$<), (>*<) ) 25 | import qualified Data.ByteString.Builder.Prim as P 26 | import qualified Data.ByteString.Builder.Internal as I 27 | import qualified Data.ByteString.Builder.Prim.Internal as I 28 | 29 | import Foreign 30 | 31 | ------------------------------------------------------------------------------ 32 | -- Benchmark support 33 | ------------------------------------------------------------------------------ 34 | 35 | countToZero :: Int -> Maybe (Int, Int) 36 | countToZero 0 = Nothing 37 | countToZero n = Just (n, n - 1) 38 | 39 | 40 | ------------------------------------------------------------------------------ 41 | -- Benchmark 42 | ------------------------------------------------------------------------------ 43 | 44 | -- input data (NOINLINE to ensure memoization) 45 | ---------------------------------------------- 46 | 47 | -- | Few-enough repetitions to avoid making GC too expensive. 48 | nRepl :: Int 49 | nRepl = 10000 50 | 51 | {-# NOINLINE intData #-} 52 | intData :: [Int] 53 | intData = [1..nRepl] 54 | 55 | -- benchmark wrappers 56 | --------------------- 57 | 58 | {-# INLINE benchB #-} 59 | benchB :: String -> a -> (a -> Builder) -> Benchmark 60 | benchB name x b = 61 | bench (name ++" (" ++ show nRepl ++ ")") $ 62 | whnf (L.length . toLazyByteString . b) x 63 | 64 | {-# INLINE benchBInts #-} 65 | benchBInts :: String -> ([Int] -> Builder) -> Benchmark 66 | benchBInts name = benchB name intData 67 | 68 | 69 | -- benchmarks 70 | ------------- 71 | 72 | benchBoundsCheckFusion :: Benchmark 73 | benchBoundsCheckFusion = bgroup "BoundsCheckFusion" 74 | [ bgroup "Data.ByteString.Builder" 75 | [ benchBInts "foldMap (left-assoc)" $ 76 | foldMap (\x -> (stringUtf8 "s" `mappend` intHost x) `mappend` intHost x) 77 | 78 | , benchBInts "foldMap (right-assoc)" $ 79 | foldMap (\x -> intHost x `mappend` (intHost x `mappend` stringUtf8 "s")) 80 | 81 | , benchBInts "foldMap [manually fused, left-assoc]" $ 82 | foldMap (\x -> stringUtf8 "s" `mappend` P.primBounded (P.liftFixedToBounded $ P.intHost >*< P.intHost) (x, x)) 83 | 84 | , benchBInts "foldMap [manually fused, right-assoc]" $ 85 | foldMap (\x -> P.primBounded (P.liftFixedToBounded $ P.intHost >*< P.intHost) (x, x) `mappend` stringUtf8 "s") 86 | ] 87 | ] 88 | 89 | {-# RULES 90 | 91 | "append/encodeWithB" forall w1 w2 x1 x2. 92 | I.append (P.primBounded w1 x1) (P.primBounded w2 x2) 93 | = P.primBounded (I.pairB w1 w2) (x1, x2) 94 | 95 | "append/encodeWithB/assoc_r" forall w1 w2 x1 x2 b. 96 | I.append (P.primBounded w1 x1) (I.append (P.primBounded w2 x2) b) 97 | = I.append (P.primBounded (I.pairB w1 w2) (x1, x2)) b 98 | 99 | "append/encodeWithB/assoc_l" forall w1 w2 x1 x2 b. 100 | I.append (I.append b (P.primBounded w1 x1)) (P.primBounded w2 x2) 101 | = I.append b (P.primBounded (I.pairB w1 w2) (x1, x2)) 102 | #-} 103 | 104 | -------------------------------------------------------------------------------- /bench/BenchCount.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Copyright : (c) 2021 Georg Rudoy 3 | -- License : BSD3-style (see LICENSE) 4 | -- 5 | -- Maintainer : Georg Rudoy <0xd34df00d+github@gmail.com> 6 | -- 7 | -- Benchmark count 8 | 9 | module BenchCount (benchCount) where 10 | 11 | import Test.Tasty.Bench 12 | import qualified Data.ByteString.Char8 as B 13 | 14 | benchCount :: Benchmark 15 | benchCount = bgroup "Count" 16 | [ bgroup "no matches, same char" $ mkBenches (1 : commonSizes) (\s -> B.replicate s 'b') 17 | , bgroup "no matches, different chars" $ mkBenches commonSizes (\s -> genCyclic 10 s 'b') 18 | , bgroup "some matches, alternating" $ mkBenches commonSizes (\s -> genCyclic 2 s 'a') 19 | , bgroup "some matches, short cycle" $ mkBenches commonSizes (\s -> genCyclic 5 s 'a') 20 | , bgroup "some matches, long cycle" $ mkBenches commonSizes (\s -> genCyclic 10 s 'a') 21 | , bgroup "all matches" $ mkBenches (1 : commonSizes) (\s -> B.replicate s 'a') 22 | ] 23 | where 24 | aboveSimdSwitchThreshold = 1030 -- something above the threshold of 1024 that's divisible by cycle lengths 25 | commonSizes = [ 10, 100, 1000, aboveSimdSwitchThreshold, 10000, 100000, 1000000 ] 26 | mkBenches sizes gen = [ bench (show size ++ " chars long") $ nf (B.count 'a') (gen size) 27 | | size <- sizes 28 | ] 29 | genCyclic cycleLen size from = B.concat $ replicate (size `div` cycleLen) $ B.pack (take cycleLen [from..]) 30 | -------------------------------------------------------------------------------- /bench/BenchIndices.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Copyright : (c) 2020 Peter Duchovni 3 | -- License : BSD3-style (see LICENSE) 4 | -- 5 | -- Maintainer : Peter Duchovni 6 | -- 7 | -- Benchmark elemIndex, findIndex, elemIndices, and findIndices 8 | 9 | module BenchIndices (benchIndices) where 10 | 11 | import Data.Foldable (foldMap) 12 | import Data.Maybe (listToMaybe) 13 | import Data.Monoid 14 | import Data.String 15 | import Test.Tasty.Bench 16 | import Prelude hiding (words, head, tail) 17 | import Data.Word (Word8) 18 | 19 | import qualified Data.ByteString as S 20 | import qualified Data.ByteString.Unsafe as S 21 | 22 | 23 | ------------------------------------------------------------------------------ 24 | -- Benchmark 25 | ------------------------------------------------------------------------------ 26 | 27 | -- ASCII \n to ensure no typos 28 | nl :: Word8 29 | nl = 0xa 30 | {-# INLINE nl #-} 31 | 32 | -- non-inlined equality test 33 | nilEq :: Word8 -> Word8 -> Bool 34 | {-# NOINLINE nilEq #-} 35 | nilEq = (==) 36 | 37 | -- lines of 200 letters from a to e, followed by repeated letter f 38 | absurdlong :: S.ByteString 39 | absurdlong = (S.replicate 200 0x61 <> S.singleton nl 40 | <> S.replicate 200 0x62 <> S.singleton nl 41 | <> S.replicate 200 0x63 <> S.singleton nl 42 | <> S.replicate 200 0x64 <> S.singleton nl 43 | <> S.replicate 200 0x65 <> S.singleton nl) 44 | <> S.replicate 999999 0x66 45 | 46 | benchIndices :: Benchmark 47 | benchIndices = absurdlong `seq` bgroup "Indices" 48 | [ bgroup "ByteString strict first index" $ 49 | [ bench "FindIndices" $ nf (listToMaybe . S.findIndices (== nl)) absurdlong 50 | , bench "ElemIndices" $ nf (listToMaybe . S.elemIndices nl) absurdlong 51 | , bench "FindIndex" $ nf (S.findIndex (== nl)) absurdlong 52 | , bench "ElemIndex" $ nf (S.elemIndex nl) absurdlong 53 | ] 54 | , bgroup "ByteString strict second index" $ 55 | [ bench "FindIndices" $ nf (listToMaybe . drop 1 . S.findIndices (== nl)) absurdlong 56 | , bench "ElemIndices" $ nf (listToMaybe . drop 1 . S.elemIndices nl) absurdlong 57 | , bench "FindIndex" $ nf bench_find_index_second absurdlong 58 | , bench "ElemIndex" $ nf bench_elem_index_second absurdlong 59 | ] 60 | , bgroup "ByteString index equality inlining" $ 61 | [ bench "FindIndices/inlined" $ nf (S.findIndices (== nl)) absurdlong 62 | , bench "FindIndices/non-inlined" $ nf (S.findIndices (nilEq nl)) absurdlong 63 | , bench "FindIndex/inlined" $ nf (S.findIndex (== nl)) absurdlong 64 | , bench "FindIndex/non-inlined" $ nf (S.findIndex (nilEq nl)) absurdlong 65 | ] 66 | ] 67 | 68 | bench_find_index_second :: S.ByteString -> Maybe Int 69 | bench_find_index_second bs = 70 | let isNl = (== nl) 71 | in case S.findIndex isNl bs of 72 | Just !i -> S.findIndex isNl (S.unsafeDrop (i+1) bs) 73 | Nothing -> Nothing 74 | {-# INLINE bench_find_index_second #-} 75 | 76 | bench_elem_index_second :: S.ByteString -> Maybe Int 77 | bench_elem_index_second bs = 78 | case S.elemIndex nl bs of 79 | Just !i -> S.elemIndex nl (S.unsafeDrop (i+1) bs) 80 | Nothing -> Nothing 81 | {-# INLINE bench_elem_index_second #-} 82 | -------------------------------------------------------------------------------- /bench/BenchReadInt.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | -- | 4 | -- Copyright : (c) 2021 Viktor Dukhovni 5 | -- License : BSD3-style (see LICENSE) 6 | -- 7 | -- Maintainer : Viktor Dukhovni 8 | -- 9 | -- Benchmark readInt and variants, readWord and variants, 10 | -- readInteger and readNatural 11 | 12 | module BenchReadInt (benchReadInt) where 13 | 14 | import qualified Data.ByteString.Builder as B 15 | import qualified Data.ByteString.Char8 as S 16 | import qualified Data.ByteString.Lazy.Char8 as L 17 | import Test.Tasty.Bench 18 | import Data.Int 19 | import Data.Word 20 | import Numeric.Natural 21 | #if !(MIN_VERSION_base(4,11,0)) 22 | import Data.Semigroup (Semigroup((<>))) 23 | #endif 24 | import Data.Monoid (mconcat) 25 | 26 | ------------------------------------------------------------------------------ 27 | -- Benchmark 28 | ------------------------------------------------------------------------------ 29 | 30 | -- Sum space-separated integers in a ByteString. 31 | loopS :: Integral a 32 | => (S.ByteString -> Maybe (a, S.ByteString)) -> S.ByteString -> a 33 | loopS rd = go 0 34 | where 35 | go !acc !bs = case rd bs of 36 | Just (i, t) -> case S.uncons t of 37 | Just (_, t') -> go (acc + i) t' 38 | Nothing -> acc + i 39 | Nothing -> acc 40 | 41 | -- Sum space-separated integers in a ByteString. 42 | loopL :: Integral a 43 | => (L.ByteString -> Maybe (a, L.ByteString)) -> L.ByteString -> a 44 | loopL rd = go 0 45 | where 46 | go !acc !bs = case rd bs of 47 | Just (i, t) -> case L.uncons t of 48 | Just (_, t') -> go (acc + i) t' 49 | Nothing -> acc + i 50 | Nothing -> acc 51 | 52 | benchReadInt :: Benchmark 53 | benchReadInt = bgroup "Read Integral" 54 | [ bgroup "Strict" 55 | [ bench "ReadInt" $ nf (loopS S.readInt) intS 56 | , bench "ReadInt8" $ nf (loopS S.readInt8) int8S 57 | , bench "ReadInt16" $ nf (loopS S.readInt16) int16S 58 | , bench "ReadInt32" $ nf (loopS S.readInt32) int32S 59 | , bench "ReadInt64" $ nf (loopS S.readInt64) int64S 60 | , bench "ReadWord" $ nf (loopS S.readWord) wordS 61 | , bench "ReadWord8" $ nf (loopS S.readWord8) word8S 62 | , bench "ReadWord16" $ nf (loopS S.readWord16) word16S 63 | , bench "ReadWord32" $ nf (loopS S.readWord32) word32S 64 | , bench "ReadWord64" $ nf (loopS S.readWord64) word64S 65 | , bench "ReadInteger" $ nf (loopS S.readInteger) bignatS 66 | , bench "ReadNatural" $ nf (loopS S.readNatural) bignatS 67 | , bench "ReadInteger small" $ nf (loopS S.readInteger) intS 68 | , bench "ReadNatural small" $ nf (loopS S.readNatural) wordS 69 | ] 70 | 71 | , bgroup "Lazy" 72 | [ bench "ReadInt" $ nf (loopL L.readInt) intL 73 | , bench "ReadInt8" $ nf (loopL L.readInt8) int8L 74 | , bench "ReadInt16" $ nf (loopL L.readInt16) int16L 75 | , bench "ReadInt32" $ nf (loopL L.readInt32) int32L 76 | , bench "ReadInt64" $ nf (loopL L.readInt64) int64L 77 | , bench "ReadWord" $ nf (loopL L.readWord) wordL 78 | , bench "ReadWord8" $ nf (loopL L.readWord8) word8L 79 | , bench "ReadWord16" $ nf (loopL L.readWord16) word16L 80 | , bench "ReadWord32" $ nf (loopL L.readWord32) word32L 81 | , bench "ReadWord64" $ nf (loopL L.readWord64) word64L 82 | , bench "ReadInteger" $ nf (loopL L.readInteger) bignatL 83 | , bench "ReadNatural" $ nf (loopL L.readNatural) bignatL 84 | , bench "ReadInteger small" $ nf (loopL L.readInteger) intL 85 | , bench "ReadNatural small" $ nf (loopL L.readNatural) wordL 86 | ] 87 | ] 88 | where 89 | mkWordL :: forall a. (Integral a, Bounded a) 90 | => (a -> B.Builder) -> L.ByteString 91 | mkWordL f = B.toLazyByteString b 92 | where b = mconcat [f i <> B.char8 ' ' | i <- [n-255..n]] 93 | n = maxBound @a 94 | mkWordS f = S.toStrict $ mkWordL f 95 | 96 | mkIntL :: forall a. (Integral a, Bounded a) 97 | => (a -> B.Builder) -> L.ByteString 98 | mkIntL f = B.toLazyByteString b 99 | where b = mconcat [f (i + 128) <> B.char8 ' ' | i <- [n-255..n]] 100 | n = maxBound @a 101 | mkIntS f = S.toStrict $ mkIntL f 102 | 103 | wordS, word8S, word16S, word32S, word64S :: S.ByteString 104 | !wordS = mkWordS B.wordDec 105 | !word8S = mkWordS B.word8Dec 106 | !word16S = mkWordS B.word16Dec 107 | !word32S = mkWordS B.word32Dec 108 | !word64S = mkWordS B.word64Dec 109 | 110 | intS, int8S, int16S, int32S, int64S :: S.ByteString 111 | !intS = mkIntS B.intDec 112 | !int8S = mkIntS B.int8Dec 113 | !int16S = mkIntS B.int16Dec 114 | !int32S = mkIntS B.int32Dec 115 | !int64S = mkIntS B.int64Dec 116 | 117 | word8L, word16L, word32L, word64L :: L.ByteString 118 | !wordL = mkWordL B.wordDec 119 | !word8L = mkWordL B.word8Dec 120 | !word16L = mkWordL B.word16Dec 121 | !word32L = mkWordL B.word32Dec 122 | !word64L = mkWordL B.word64Dec 123 | 124 | intL, int8L, int16L, int32L, int64L :: L.ByteString 125 | !intL = mkIntL B.intDec 126 | !int8L = mkIntL B.int8Dec 127 | !int16L = mkIntL B.int16Dec 128 | !int32L = mkIntL B.int32Dec 129 | !int64L = mkIntL B.int64Dec 130 | 131 | bignatL :: L.ByteString 132 | !bignatL = B.toLazyByteString b 133 | where b = mconcat [B.integerDec (powpow i) <> B.char8 ' ' | i <- [0..13]] 134 | powpow :: Word -> Integer 135 | powpow n = 2^(2^n :: Word) 136 | 137 | bignatS :: S.ByteString 138 | !bignatS = S.toStrict bignatL 139 | -------------------------------------------------------------------------------- /bench/BenchShort.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module BenchShort (benchShort) where 4 | 5 | import Control.DeepSeq (force) 6 | import Data.Foldable (foldMap) 7 | import Data.Maybe (listToMaybe) 8 | import Data.Monoid 9 | import Data.String 10 | import Test.Tasty.Bench 11 | import Prelude hiding (words, head, tail) 12 | 13 | import Data.ByteString.Short (ShortByteString) 14 | import qualified Data.ByteString.Short as S 15 | 16 | import Data.ByteString.Builder 17 | import Data.ByteString.Builder.Extra (byteStringCopy, 18 | byteStringInsert, 19 | intHost) 20 | import Data.ByteString.Builder.Internal (ensureFree) 21 | import Data.ByteString.Builder.Prim (BoundedPrim, FixedPrim, 22 | (>$<)) 23 | import qualified Data.ByteString.Builder.Prim as P 24 | import qualified Data.ByteString.Builder.Prim.Internal as PI 25 | 26 | import Foreign 27 | 28 | import System.Random 29 | 30 | 31 | 32 | ------------------------------------------------------------------------------ 33 | -- Benchmark 34 | ------------------------------------------------------------------------------ 35 | 36 | -- input data (NOINLINE to ensure memoization) 37 | ---------------------------------------------- 38 | 39 | -- | Few-enough repetitions to avoid making GC too expensive. 40 | nRepl :: Int 41 | nRepl = 10000 42 | 43 | {-# NOINLINE intData #-} 44 | intData :: [Int] 45 | intData = [1..nRepl] 46 | 47 | {-# NOINLINE byteStringData #-} 48 | byteStringData :: S.ShortByteString 49 | byteStringData = S.pack $ map fromIntegral intData 50 | 51 | {-# NOINLINE loremIpsum #-} 52 | loremIpsum :: S.ShortByteString 53 | loremIpsum = mconcat 54 | [ " Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor" 55 | , "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis" 56 | , "nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat." 57 | , "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu" 58 | , "fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in" 59 | , "culpa qui officia deserunt mollit anim id est laborum." 60 | ] 61 | 62 | -- benchmark wrappers 63 | --------------------- 64 | 65 | {-# INLINE benchB' #-} 66 | benchB' :: String -> a -> (a -> ShortByteString) -> Benchmark 67 | benchB' name x b = bench name $ whnf (S.length . b) x 68 | 69 | 70 | -- We use this construction of just looping through @n,n-1,..,1@ to ensure that 71 | -- we measure the speed of the encoding and not the speed of generating the 72 | -- values to be encoded. 73 | {-# INLINE benchIntEncodingB #-} 74 | benchIntEncodingB :: Int -- ^ Maximal 'Int' to write 75 | -> BoundedPrim Int -- ^ 'BoundedPrim' to execute 76 | -> IO () -- ^ 'IO' action to benchmark 77 | benchIntEncodingB n0 w 78 | | n0 <= 0 = return () 79 | | otherwise = do 80 | fpbuf <- mallocForeignPtrBytes (n0 * PI.sizeBound w) 81 | withForeignPtr fpbuf (loop n0) >> return () 82 | where 83 | loop !n !op 84 | | n <= 0 = return op 85 | | otherwise = PI.runB w n op >>= loop (n - 1) 86 | 87 | 88 | -- Helpers 89 | ------------- 90 | 91 | hashInt :: Int -> Int 92 | hashInt x = iterate step x !! 10 93 | where 94 | step a = e 95 | where b = (a `xor` 61) `xor` (a `shiftR` 16) 96 | c = b + (b `shiftL` 3) 97 | d = c `xor` (c `shiftR` 4) 98 | e = d * 0x27d4eb2d 99 | f = e `xor` (e `shiftR` 15) 100 | 101 | w :: Int -> Word8 102 | w = fromIntegral 103 | 104 | hashWord8 :: Word8 -> Word8 105 | hashWord8 = fromIntegral . hashInt . fromIntegral 106 | 107 | foldInputs' :: [[Word8]] 108 | foldInputs' = force (S.unpack <$> foldInputs) 109 | 110 | foldInputs :: [S.ShortByteString] 111 | foldInputs = map (\k -> S.pack $ if k <= 6 then take (2 ^ k) [32..95] else concat (replicate (2 ^ (k - 6)) [32..95])) [0..16] 112 | 113 | largeTraversalInput :: S.ShortByteString 114 | largeTraversalInput = S.concat (replicate 10 byteStringData) 115 | 116 | smallTraversalInput :: S.ShortByteString 117 | smallTraversalInput = "The quick brown fox" 118 | 119 | zeroes :: S.ShortByteString 120 | zeroes = S.replicate 10000 0 121 | 122 | partitionStrict p = nf (S.partition p) . randomStrict $ mkStdGen 98423098 123 | where randomStrict = fst . S.unfoldrN 10000 (Just . random) 124 | 125 | -- ASCII \n to ensure no typos 126 | nl :: Word8 127 | nl = 0xa 128 | {-# INLINE nl #-} 129 | 130 | -- non-inlined equality test 131 | nilEq :: Word8 -> Word8 -> Bool 132 | {-# NOINLINE nilEq #-} 133 | nilEq = (==) 134 | 135 | -- lines of 200 letters from a to e, followed by repeated letter f 136 | absurdlong :: S.ShortByteString 137 | absurdlong = (S.replicate 200 0x61 <> S.singleton nl 138 | <> S.replicate 200 0x62 <> S.singleton nl 139 | <> S.replicate 200 0x63 <> S.singleton nl 140 | <> S.replicate 200 0x64 <> S.singleton nl 141 | <> S.replicate 200 0x65 <> S.singleton nl) 142 | <> S.replicate 999999 0x66 143 | 144 | bench_find_index_second :: ShortByteString -> Maybe Int 145 | bench_find_index_second bs = 146 | let isNl = (== nl) 147 | in case S.findIndex isNl bs of 148 | Just !i -> S.findIndex isNl (S.drop (i+1) bs) 149 | Nothing -> Nothing 150 | {-# INLINE bench_find_index_second #-} 151 | 152 | bench_elem_index_second :: ShortByteString -> Maybe Int 153 | bench_elem_index_second bs = 154 | case S.elemIndex nl bs of 155 | Just !i -> S.elemIndex nl (S.drop (i+1) bs) 156 | Nothing -> Nothing 157 | {-# INLINE bench_elem_index_second #-} 158 | 159 | 160 | 161 | -- benchmarks 162 | ------------- 163 | 164 | benchShort :: Benchmark 165 | benchShort = absurdlong `seq` bgroup "ShortByteString" 166 | [ bgroup "Small payload" 167 | [ benchB' "mempty" () (const mempty) 168 | , benchB' "UTF-8 String (naive)" "hello world\0" fromString 169 | , benchB' "String (naive)" "hello world!" fromString 170 | ] 171 | , bgroup "intercalate" 172 | [ bench "intercalate (large)" $ whnf (S.intercalate $ " and also ") (replicate 300 "expression") 173 | , bench "intercalate (small)" $ whnf (S.intercalate "&") (replicate 30 "foo") 174 | , bench "intercalate (tiny)" $ whnf (S.intercalate "&") (["foo", "bar", "baz"]) 175 | ] 176 | , bgroup "partition" 177 | [ 178 | bgroup "strict" 179 | [ 180 | bench "mostlyTrueFast" $ partitionStrict (< (w 225)) 181 | , bench "mostlyFalseFast" $ partitionStrict (< (w 10)) 182 | , bench "balancedFast" $ partitionStrict (< (w 128)) 183 | 184 | , bench "mostlyTrueSlow" $ partitionStrict (\x -> hashWord8 x < w 225) 185 | , bench "mostlyFalseSlow" $ partitionStrict (\x -> hashWord8 x < w 10) 186 | , bench "balancedSlow" $ partitionStrict (\x -> hashWord8 x < w 128) 187 | ] 188 | ] 189 | , bgroup "folds" 190 | [ bgroup "strict" 191 | [ bgroup "foldl" $ map (\s -> bench (show $ S.length s) $ 192 | nf (S.foldl (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputs 193 | , bgroup "foldl'" $ map (\s -> bench (show $ S.length s) $ 194 | nf (S.foldl' (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputs 195 | , bgroup "foldr" $ map (\s -> bench (show $ S.length s) $ 196 | nf (S.foldr (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputs 197 | , bgroup "foldr'" $ map (\s -> bench (show $ S.length s) $ 198 | nf (S.foldr' (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputs 199 | , bgroup "foldr1'" $ map (\s -> bench (show $ S.length s) $ 200 | nf (S.foldr1' (\x acc -> fromIntegral x + acc)) s) foldInputs 201 | , bgroup "unfoldrN" $ map (\s -> bench (show $ S.length s) $ 202 | nf (S.unfoldrN (S.length s) (\a -> Just (a, a + 1))) 0) foldInputs 203 | , bgroup "filter" $ map (\s -> bench (show $ S.length s) $ 204 | nf (S.filter odd) s) foldInputs 205 | ] 206 | ] 207 | , bgroup "findIndexOrLength" 208 | [ bench "takeWhile" $ nf (S.takeWhile even) zeroes 209 | , bench "dropWhile" $ nf (S.dropWhile even) zeroes 210 | , bench "break" $ nf (S.break odd) zeroes 211 | ] 212 | , bgroup "findIndex_" 213 | [ bench "findIndices" $ nf (sum . S.findIndices (\x -> x == 129 || x == 72)) byteStringData 214 | , bench "find" $ nf (S.find (>= 198)) byteStringData 215 | ] 216 | , bgroup "traversals" 217 | [ bench "map (+1) large" $ nf (S.map (+ 1)) largeTraversalInput 218 | , bench "map (+1) small" $ nf (S.map (+ 1)) smallTraversalInput 219 | ] 220 | , bgroup "ShortByteString strict first index" $ 221 | [ bench "FindIndices" $ nf (listToMaybe . S.findIndices (== nl)) absurdlong 222 | , bench "ElemIndices" $ nf (listToMaybe . S.elemIndices nl) absurdlong 223 | , bench "FindIndex" $ nf (S.findIndex (== nl)) absurdlong 224 | , bench "ElemIndex" $ nf (S.elemIndex nl) absurdlong 225 | ] 226 | , bgroup "ShortByteString strict second index" $ 227 | [ bench "FindIndices" $ nf (listToMaybe . drop 1 . S.findIndices (== nl)) absurdlong 228 | , bench "ElemIndices" $ nf (listToMaybe . drop 1 . S.elemIndices nl) absurdlong 229 | , bench "FindIndex" $ nf bench_find_index_second absurdlong 230 | , bench "ElemIndex" $ nf bench_elem_index_second absurdlong 231 | ] 232 | , bgroup "ShortByteString index equality inlining" $ 233 | [ bench "FindIndices/inlined" $ nf (S.findIndices (== nl)) absurdlong 234 | , bench "FindIndices/non-inlined" $ nf (S.findIndices (nilEq nl)) absurdlong 235 | , bench "FindIndex/inlined" $ nf (S.findIndex (== nl)) absurdlong 236 | , bench "FindIndex/non-inlined" $ nf (S.findIndex (nilEq nl)) absurdlong 237 | ] 238 | , bgroup "ShortByteString conversions" $ 239 | [ bgroup "unpack" $ map (\s -> bench (show $ S.length s) $ 240 | nf (\x -> S.unpack x) s) foldInputs 241 | , bgroup "pack" $ map (\s -> bench (show $ length s) $ 242 | nf S.pack s) foldInputs' 243 | , bench "unpack and get last element" $ nf (\x -> last . S.unpack $ x) absurdlong 244 | , bench "unpack and get first 120 elements" $ nf (\x -> take 120 . S.unpack $ x) absurdlong 245 | ] 246 | ] 247 | -------------------------------------------------------------------------------- /bytestring.cabal: -------------------------------------------------------------------------------- 1 | Cabal-Version: 2.2 2 | 3 | Name: bytestring 4 | Version: 0.13.0.0 5 | Synopsis: Fast, compact, strict and lazy byte strings with a list interface 6 | Description: 7 | An efficient compact, immutable byte string type (both strict and lazy) 8 | suitable for binary or 8-bit character data. 9 | . 10 | The 'ByteString' type represents sequences of bytes or 8-bit characters. 11 | It is suitable for high performance use, both in terms of large data 12 | quantities, or high speed requirements. The 'ByteString' functions follow 13 | the same style as Haskell\'s ordinary lists, so it is easy to convert code 14 | from using 'String' to 'ByteString'. 15 | . 16 | Two 'ByteString' variants are provided: 17 | . 18 | * Strict 'ByteString's keep the string as a single large array. This 19 | makes them convenient for passing data between C and Haskell. 20 | . 21 | * Lazy 'ByteString's use a lazy list of strict chunks which makes it 22 | suitable for I\/O streaming tasks. 23 | . 24 | The @Char8@ modules provide a character-based view of the same 25 | underlying 'ByteString' types. This makes it convenient to handle mixed 26 | binary and 8-bit character content (which is common in many file formats 27 | and network protocols). 28 | . 29 | The 'Builder' module provides an efficient way to build up 'ByteString's 30 | in an ad-hoc way by repeated concatenation. This is ideal for fast 31 | serialisation or pretty printing. 32 | . 33 | There is also a 'ShortByteString' type which has a lower memory overhead 34 | and can be converted to or from a 'ByteString'. It is suitable for keeping 35 | many short strings in memory, especially long-term, without incurring any 36 | possible heap fragmentation costs. 37 | . 38 | 'ByteString's are not designed for Unicode. For Unicode strings you should 39 | use the 'Text' type from the @text@ package. 40 | . 41 | These modules are intended to be imported qualified, to avoid name clashes 42 | with "Prelude" functions, e.g. 43 | . 44 | > import qualified Data.ByteString as BS 45 | 46 | License: BSD-3-Clause 47 | License-file: LICENSE 48 | Category: Data 49 | Copyright: Copyright (c) Don Stewart 2005-2009, 50 | (c) Duncan Coutts 2006-2015, 51 | (c) David Roundy 2003-2005, 52 | (c) Jasper Van der Jeugt 2010, 53 | (c) Simon Meier 2010-2013. 54 | 55 | Author: Don Stewart, 56 | Duncan Coutts 57 | Maintainer: Haskell Bytestring Team , Core Libraries Committee 58 | Homepage: https://github.com/haskell/bytestring 59 | Bug-reports: https://github.com/haskell/bytestring/issues 60 | Tested-With: GHC==9.12.1, 61 | GHC==9.10.1, 62 | GHC==9.8.4, 63 | GHC==9.6.6, 64 | GHC==9.4.8, 65 | GHC==9.2.8, 66 | GHC==9.0.2, 67 | GHC==8.10.7, 68 | GHC==8.8.4, 69 | GHC==8.6.5, 70 | GHC==8.4.4 71 | Build-Type: Simple 72 | extra-source-files: README.md Changelog.md include/bytestring-cpp-macros.h 73 | 74 | Flag pure-haskell 75 | description: Don't use bytestring's standard C routines 76 | 77 | When this flag is true, bytestring will use pure Haskell variants (no C FFI) 78 | of the internal functions. This is not recommended except in use cases that 79 | cannot (or do not) depend on C, such as with GHC's JavaScript backend. 80 | 81 | default: False 82 | manual: True 83 | 84 | source-repository head 85 | type: git 86 | location: https://github.com/haskell/bytestring 87 | 88 | 89 | common language 90 | default-language: Haskell2010 91 | default-extensions: 92 | BangPatterns 93 | DeriveDataTypeable 94 | DeriveGeneric 95 | DeriveLift 96 | FlexibleContexts 97 | FlexibleInstances 98 | LambdaCase 99 | MagicHash 100 | MultiWayIf 101 | NamedFieldPuns 102 | PatternSynonyms 103 | RankNTypes 104 | ScopedTypeVariables 105 | StandaloneDeriving 106 | TupleSections 107 | TypeApplications 108 | TypeOperators 109 | UnboxedTuples 110 | 111 | library 112 | import: language 113 | build-depends: base >= 4.11 && < 5, ghc-prim, deepseq, template-haskell 114 | 115 | if impl(ghc < 9.4) 116 | build-depends: data-array-byte >= 0.1 && < 0.2 117 | 118 | exposed-modules: Data.ByteString 119 | Data.ByteString.Char8 120 | Data.ByteString.Unsafe 121 | Data.ByteString.Internal 122 | Data.ByteString.Lazy 123 | Data.ByteString.Lazy.Char8 124 | Data.ByteString.Lazy.Internal 125 | Data.ByteString.Short 126 | Data.ByteString.Short.Internal 127 | 128 | Data.ByteString.Builder 129 | Data.ByteString.Builder.Extra 130 | Data.ByteString.Builder.Prim 131 | Data.ByteString.Builder.RealFloat 132 | 133 | -- perhaps only exposed temporarily 134 | Data.ByteString.Builder.Internal 135 | Data.ByteString.Builder.Prim.Internal 136 | other-modules: Data.ByteString.Builder.ASCII 137 | Data.ByteString.Builder.Prim.ASCII 138 | Data.ByteString.Builder.Prim.Binary 139 | Data.ByteString.Builder.Prim.Internal.Base16 140 | Data.ByteString.Builder.Prim.Internal.Floating 141 | Data.ByteString.Builder.RealFloat.F2S 142 | Data.ByteString.Builder.RealFloat.D2S 143 | Data.ByteString.Builder.RealFloat.Internal 144 | Data.ByteString.Builder.RealFloat.TableGenerator 145 | Data.ByteString.Internal.Type 146 | Data.ByteString.Lazy.ReadInt 147 | Data.ByteString.Lazy.ReadNat 148 | Data.ByteString.ReadInt 149 | Data.ByteString.ReadNat 150 | Data.ByteString.Utils.ByteOrder 151 | Data.ByteString.Utils.UnalignedAccess 152 | 153 | ghc-options: -Wall -fwarn-tabs -Wincomplete-uni-patterns 154 | -optP-Wall -optP-Werror=undef 155 | -O2 156 | -fmax-simplifier-iterations=10 157 | -fdicts-cheap 158 | -fspec-constr-count=6 159 | 160 | if arch(javascript) || flag(pure-haskell) 161 | cpp-options: -DPURE_HASKELL=1 162 | other-modules: Data.ByteString.Internal.Pure 163 | default-extensions: NoForeignFunctionInterface 164 | -- Pure Haskell implementation only implemented for recent GHCs/base 165 | build-depends: base >= 4.18 && < 5 166 | else 167 | cpp-options: -DPURE_HASKELL=0 168 | 169 | c-sources: cbits/fpstring.c 170 | cbits/itoa.c 171 | cbits/shortbytestring.c 172 | cbits/aligned-static-hs-data.c 173 | 174 | if (arch(aarch64)) 175 | c-sources: cbits/aarch64/is-valid-utf8.c 176 | else 177 | c-sources: cbits/is-valid-utf8.c 178 | 179 | -- DNDEBUG disables asserts in cbits/ 180 | cc-options: -std=c11 -DNDEBUG=1 181 | -fno-strict-aliasing 182 | -Werror=undef 183 | 184 | -- No need to link to libgcc on ghc-9.4 and later which uses a clang-based 185 | -- toolchain. 186 | if os(windows) && impl(ghc < 9.3) 187 | extra-libraries: gcc 188 | 189 | if arch(aarch64) 190 | -- The libffi in Apple's darwin toolchain doesn't 191 | -- play nice with -Wundef. Recent GHCs work around this. 192 | -- See also https://github.com/haskell/bytestring/issues/665 193 | -- and https://gitlab.haskell.org/ghc/ghc/-/issues/23568 194 | build-depends: base (>= 4.17.2 && < 4.18) || >= 4.18.1 195 | 196 | include-dirs: include 197 | install-includes: fpstring.h 198 | bytestring-cpp-macros.h 199 | 200 | test-suite bytestring-tests 201 | import: language 202 | type: exitcode-stdio-1.0 203 | main-is: Main.hs 204 | other-modules: Builder 205 | Data.ByteString.Builder.Prim.TestUtils 206 | Data.ByteString.Builder.Prim.Tests 207 | Data.ByteString.Builder.Tests 208 | IsValidUtf8 209 | LazyHClose 210 | Lift 211 | Properties 212 | Properties.ByteString 213 | Properties.ByteStringChar8 214 | Properties.ByteStringLazy 215 | Properties.ByteStringLazyChar8 216 | Properties.ShortByteString 217 | QuickCheckUtils 218 | hs-source-dirs: tests, 219 | tests/builder 220 | build-depends: base, 221 | bytestring, 222 | deepseq, 223 | QuickCheck, 224 | tasty, 225 | tasty-quickcheck >= 0.8.1, 226 | template-haskell, 227 | transformers >= 0.3, 228 | syb 229 | 230 | ghc-options: -fwarn-unused-binds 231 | -rtsopts 232 | if !arch(wasm32) 233 | ghc-options: -threaded 234 | 235 | benchmark bytestring-bench 236 | import: language 237 | main-is: BenchAll.hs 238 | other-modules: BenchBoundsCheckFusion 239 | BenchCount 240 | BenchCSV 241 | BenchIndices 242 | BenchReadInt 243 | BenchShort 244 | type: exitcode-stdio-1.0 245 | hs-source-dirs: bench 246 | 247 | ghc-options: -O2 "-with-rtsopts=-A32m" 248 | if impl(ghc >= 8.6) 249 | ghc-options: -fproc-alignment=64 250 | build-depends: base, 251 | bytestring, 252 | deepseq, 253 | tasty-bench, 254 | random 255 | -------------------------------------------------------------------------------- /cabal.project.wasi: -------------------------------------------------------------------------------- 1 | -- https://github.com/haskellari/splitmix/pull/73 2 | source-repository-package 3 | type: git 4 | location: https://github.com/amesgen/splitmix 5 | tag: 5f5b766d97dc735ac228215d240a3bb90bc2ff75 6 | -------------------------------------------------------------------------------- /cbits/fpstring.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2003 David Roundy 3 | * Copyright (c) 2005-6 Don Stewart 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 1. Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 2. Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 3. Neither the names of the authors or the names of any contributors 16 | * may be used to endorse or promote products derived from this software 17 | * without specific prior written permission. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 | * SUCH DAMAGE. 30 | */ 31 | 32 | #include "HsFFI.h" 33 | #include "MachDeps.h" 34 | 35 | #include "fpstring.h" 36 | #if defined(__x86_64__) 37 | #include 38 | #include 39 | #endif 40 | 41 | #include 42 | #include 43 | 44 | #if defined(__x86_64__) && (__GNUC__ >= 7 || __GNUC__ == 6 && __GNUC_MINOR__ >= 3 || defined(__clang_major__)) && !defined(__STDC_NO_ATOMICS__) 45 | #include 46 | #define USE_SIMD_COUNT 47 | #endif 48 | 49 | /* copy a string in reverse */ 50 | void fps_reverse(unsigned char *q, unsigned char *p, size_t n) { 51 | p += n-1; 52 | while (n-- != 0) 53 | *q++ = *p--; 54 | } 55 | 56 | /* duplicate a string, interspersing the character through the elements 57 | of the duplicated string */ 58 | void fps_intersperse(unsigned char *q, 59 | unsigned char *p, 60 | size_t n, 61 | unsigned char c) { 62 | #if defined(__x86_64__) 63 | { 64 | const __m128i separator = _mm_set1_epi8(c); 65 | const unsigned char *const p_begin = p; 66 | const unsigned char *const p_end = p_begin + n - 9; 67 | while (p < p_end) { 68 | const __m128i eight_src_bytes = _mm_loadl_epi64((__m128i *)p); 69 | const __m128i sixteen_dst_bytes = _mm_unpacklo_epi8(eight_src_bytes, separator); 70 | _mm_storeu_si128((__m128i *)q, sixteen_dst_bytes); 71 | p += 8; 72 | q += 16; 73 | } 74 | n -= p - p_begin; 75 | } 76 | #endif 77 | while (n > 1) { 78 | *q++ = *p++; 79 | *q++ = c; 80 | n--; 81 | } 82 | if (n == 1) 83 | *q = *p; 84 | } 85 | 86 | /* find maximum char in a packed string */ 87 | unsigned char fps_maximum(unsigned char *p, size_t len) { 88 | unsigned char *q, c = *p; 89 | for (q = p; q < p + len; q++) 90 | if (*q > c) 91 | c = *q; 92 | return c; 93 | } 94 | 95 | /* find minimum char in a packed string */ 96 | unsigned char fps_minimum(unsigned char *p, size_t len) { 97 | unsigned char *q, c = *p; 98 | for (q = p; q < p + len; q++) 99 | if (*q < c) 100 | c = *q; 101 | return c; 102 | } 103 | 104 | int fps_compare(const void *a, const void *b) { 105 | return (int)*(unsigned char*)a - (int)*(unsigned char*)b; 106 | } 107 | 108 | void fps_sort(unsigned char *p, size_t len) { 109 | return qsort(p, len, 1, fps_compare); 110 | } 111 | 112 | // We don't actually always use these unaligned write functions on the 113 | // Haskell side, but the macros we check there aren't visible here... 114 | void fps_unaligned_write_u16(uint16_t x, uint8_t *p) { 115 | memcpy(p, &x, 2); 116 | return; 117 | } 118 | 119 | void fps_unaligned_write_u32(uint32_t x, uint8_t *p) { 120 | memcpy(p, &x, 4); 121 | return; 122 | } 123 | 124 | void fps_unaligned_write_u64(uint64_t x, uint8_t *p) { 125 | memcpy(p, &x, 8); 126 | return; 127 | } 128 | 129 | void fps_unaligned_write_HsFloat(HsFloat x, uint8_t *p) { 130 | memcpy(p, &x, SIZEOF_HSFLOAT); 131 | } 132 | 133 | void fps_unaligned_write_HsDouble(HsDouble x, uint8_t *p) { 134 | memcpy(p, &x, SIZEOF_HSDOUBLE); 135 | } 136 | 137 | uint64_t fps_unaligned_read_u64(uint8_t *p) { 138 | uint64_t ans; 139 | memcpy(&ans, p, 8); 140 | return ans; 141 | } 142 | 143 | /* count the number of occurrences of a char in a string */ 144 | size_t fps_count_naive(unsigned char *str, size_t len, unsigned char w) { 145 | size_t c; 146 | for (c = 0; len-- != 0; ++str) 147 | if (*str == w) 148 | ++c; 149 | return c; 150 | } 151 | 152 | 153 | #ifdef USE_SIMD_COUNT 154 | __attribute__((target("sse4.2"))) 155 | size_t fps_count_cmpestrm(unsigned char *str, size_t len, unsigned char w) { 156 | const __m128i pat = _mm_set1_epi8(w); 157 | 158 | size_t res = 0; 159 | 160 | size_t i = 0; 161 | 162 | for (; i < len && (intptr_t)(str + i) % 64; ++i) { 163 | res += str[i] == w; 164 | } 165 | 166 | for (size_t end = len - 128; i < end; i += 128) { 167 | __m128i p0 = _mm_load_si128((const __m128i*)(str + i + 16 * 0)); 168 | __m128i p1 = _mm_load_si128((const __m128i*)(str + i + 16 * 1)); 169 | __m128i p2 = _mm_load_si128((const __m128i*)(str + i + 16 * 2)); 170 | __m128i p3 = _mm_load_si128((const __m128i*)(str + i + 16 * 3)); 171 | __m128i p4 = _mm_load_si128((const __m128i*)(str + i + 16 * 4)); 172 | __m128i p5 = _mm_load_si128((const __m128i*)(str + i + 16 * 5)); 173 | __m128i p6 = _mm_load_si128((const __m128i*)(str + i + 16 * 6)); 174 | __m128i p7 = _mm_load_si128((const __m128i*)(str + i + 16 * 7)); 175 | // Here, cmpestrm compares two strings in the following mode: 176 | // * _SIDD_SBYTE_OPS: interprets the strings as consisting of 8-bit chars, 177 | // * _SIDD_CMP_EQUAL_EACH: computes the number of `i`s 178 | // for which `p[i]`, a part of `str`, is equal to `pat[i]` 179 | // (the latter being always equal to `w`). 180 | // 181 | // q.v. https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm&expand=835 182 | #define MODE _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_EACH 183 | __m128i r0 = _mm_cmpestrm(p0, 16, pat, 16, MODE); 184 | __m128i r1 = _mm_cmpestrm(p1, 16, pat, 16, MODE); 185 | __m128i r2 = _mm_cmpestrm(p2, 16, pat, 16, MODE); 186 | __m128i r3 = _mm_cmpestrm(p3, 16, pat, 16, MODE); 187 | __m128i r4 = _mm_cmpestrm(p4, 16, pat, 16, MODE); 188 | __m128i r5 = _mm_cmpestrm(p5, 16, pat, 16, MODE); 189 | __m128i r6 = _mm_cmpestrm(p6, 16, pat, 16, MODE); 190 | __m128i r7 = _mm_cmpestrm(p7, 16, pat, 16, MODE); 191 | #undef MODE 192 | res += _popcnt64(_mm_extract_epi64(r0, 0)); 193 | res += _popcnt64(_mm_extract_epi64(r1, 0)); 194 | res += _popcnt64(_mm_extract_epi64(r2, 0)); 195 | res += _popcnt64(_mm_extract_epi64(r3, 0)); 196 | res += _popcnt64(_mm_extract_epi64(r4, 0)); 197 | res += _popcnt64(_mm_extract_epi64(r5, 0)); 198 | res += _popcnt64(_mm_extract_epi64(r6, 0)); 199 | res += _popcnt64(_mm_extract_epi64(r7, 0)); 200 | } 201 | 202 | for (; i < len; ++i) { 203 | res += str[i] == w; 204 | } 205 | 206 | return res; 207 | } 208 | 209 | __attribute__((target("avx2"))) 210 | size_t fps_count_avx2(unsigned char *str, size_t len, unsigned char w) { 211 | __m256i pat = _mm256_set1_epi8(w); 212 | 213 | size_t prefix = 0, res = 0; 214 | 215 | size_t i = 0; 216 | 217 | for (; i < len && (intptr_t)(str + i) % 64; ++i) { 218 | prefix += str[i] == w; 219 | } 220 | 221 | for (size_t end = len - 128; i < end; i += 128) { 222 | __m256i p0 = _mm256_load_si256((const __m256i*)(str + i + 32 * 0)); 223 | __m256i p1 = _mm256_load_si256((const __m256i*)(str + i + 32 * 1)); 224 | __m256i p2 = _mm256_load_si256((const __m256i*)(str + i + 32 * 2)); 225 | __m256i p3 = _mm256_load_si256((const __m256i*)(str + i + 32 * 3)); 226 | __m256i r0 = _mm256_cmpeq_epi8(p0, pat); 227 | __m256i r1 = _mm256_cmpeq_epi8(p1, pat); 228 | __m256i r2 = _mm256_cmpeq_epi8(p2, pat); 229 | __m256i r3 = _mm256_cmpeq_epi8(p3, pat); 230 | res += _popcnt64(_mm256_extract_epi64(r0, 0)); 231 | res += _popcnt64(_mm256_extract_epi64(r0, 1)); 232 | res += _popcnt64(_mm256_extract_epi64(r0, 2)); 233 | res += _popcnt64(_mm256_extract_epi64(r0, 3)); 234 | res += _popcnt64(_mm256_extract_epi64(r1, 0)); 235 | res += _popcnt64(_mm256_extract_epi64(r1, 1)); 236 | res += _popcnt64(_mm256_extract_epi64(r1, 2)); 237 | res += _popcnt64(_mm256_extract_epi64(r1, 3)); 238 | res += _popcnt64(_mm256_extract_epi64(r2, 0)); 239 | res += _popcnt64(_mm256_extract_epi64(r2, 1)); 240 | res += _popcnt64(_mm256_extract_epi64(r2, 2)); 241 | res += _popcnt64(_mm256_extract_epi64(r2, 3)); 242 | res += _popcnt64(_mm256_extract_epi64(r3, 0)); 243 | res += _popcnt64(_mm256_extract_epi64(r3, 1)); 244 | res += _popcnt64(_mm256_extract_epi64(r3, 2)); 245 | res += _popcnt64(_mm256_extract_epi64(r3, 3)); 246 | } 247 | 248 | // _mm256_cmpeq_epi8(p, pat) returns a SIMD vector 249 | // with `i`th byte consisting of eight `1`s if `p[i] == pat[i]`, 250 | // and of eight `0`s otherwise, 251 | // hence each matching byte is counted 8 times by popcnt. 252 | // Dividing by 8 corrects for that. 253 | res /= 8; 254 | 255 | res += prefix; 256 | 257 | for (; i < len; ++i) { 258 | res += str[i] == w; 259 | } 260 | 261 | return res; 262 | } 263 | 264 | typedef size_t (*fps_impl_t) (unsigned char*, size_t, unsigned char); 265 | 266 | fps_impl_t select_fps_simd_impl() { 267 | uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; 268 | 269 | uint32_t ecx1 = 0; 270 | if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { 271 | ecx1 = ecx; 272 | } 273 | 274 | const bool has_xsave = ecx1 & (1 << 26); 275 | const bool has_popcnt = ecx1 & (1 << 23); 276 | 277 | if (__get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx)) { 278 | const bool has_avx2 = has_xsave && (ebx & (1 << 5)); 279 | if (has_avx2 && has_popcnt) { 280 | return &fps_count_avx2; 281 | } 282 | } 283 | 284 | const bool has_sse42 = ecx1 & (1 << 19); 285 | if (has_sse42 && has_popcnt) { 286 | return &fps_count_cmpestrm; 287 | } 288 | 289 | return &fps_count_naive; 290 | } 291 | #endif 292 | 293 | 294 | 295 | size_t fps_count(unsigned char *str, size_t len, unsigned char w) { 296 | #ifndef USE_SIMD_COUNT 297 | return fps_count_naive(str, len, w); 298 | #else 299 | // 1024 is a rough guesstimate of the string length 300 | // for which the extra performance of the main SIMD loop 301 | // starts to compensate the extra work and extra branching outside the SIMD loop. 302 | // The real optimal number depends on the specific μarch 303 | // and isn't worth optimizing for in this context, 304 | // since counting characters in shorter strings is unlikely to be a hot spot. 305 | if (len <= 1024) { 306 | return fps_count_naive(str, len, w); 307 | } 308 | 309 | static _Atomic fps_impl_t s_impl = (fps_impl_t)NULL; 310 | fps_impl_t impl = atomic_load_explicit(&s_impl, memory_order_relaxed); 311 | if (!impl) { 312 | impl = select_fps_simd_impl(); 313 | atomic_store_explicit(&s_impl, impl, memory_order_relaxed); 314 | } 315 | 316 | return (*impl)(str, len, w); 317 | #endif 318 | } 319 | -------------------------------------------------------------------------------- /cbits/itoa.c: -------------------------------------------------------------------------------- 1 | /////////////////////////////////////////////////////////////// 2 | // Encoding numbers using ASCII characters // 3 | // // 4 | // inspired by: http://www.jb.man.ac.uk/~slowe/cpp/itoa.html // 5 | /////////////////////////////////////////////////////////////// 6 | 7 | #include 8 | #include 9 | 10 | // Decimal Encoding 11 | /////////////////// 12 | 13 | static const char* digits = "0123456789abcdef"; 14 | 15 | // unsigned integers 16 | char* _hs_bytestring_uint32_dec (uint32_t x, char* buf) 17 | { 18 | char c, *ptr = buf, *next_free; 19 | uint32_t x_tmp; 20 | 21 | // encode positive number as little-endian decimal 22 | do { 23 | x_tmp = x; 24 | x /= 10; 25 | *ptr++ = digits[x_tmp - x * 10]; 26 | } while ( x ); 27 | 28 | // reverse written digits 29 | next_free = ptr--; 30 | while (buf < ptr) { 31 | c = *ptr; 32 | *ptr-- = *buf; 33 | *buf++ = c; 34 | } 35 | return next_free; 36 | } 37 | 38 | // unsigned long ints 39 | char* _hs_bytestring_uint64_dec (uint64_t x, char* buf) 40 | { 41 | char c, *ptr = buf, *next_free; 42 | uint64_t x_tmp; 43 | 44 | // encode positive number as little-endian decimal 45 | do { 46 | x_tmp = x; 47 | x /= 10; 48 | *ptr++ = digits[x_tmp - x * 10]; 49 | } while ( x ); 50 | 51 | // reverse written digits 52 | next_free = ptr--; 53 | while (buf < ptr) { 54 | c = *ptr; 55 | *ptr-- = *buf; 56 | *buf++ = c; 57 | } 58 | return next_free; 59 | } 60 | 61 | 62 | // Padded, decimal, positive integers for the decimal output of bignums 63 | /////////////////////////////////////////////////////////////////////// 64 | 65 | // Padded (9 digits), decimal, positive int: 66 | // We will use it with numbers that fit in 31 bits; i.e., numbers smaller than 67 | // 10^9, as "31 * log 2 / log 10 = 9.33" 68 | void _hs_bytestring_uint32_dec_padded9 (uint32_t x, char* buf) 69 | { 70 | const int max_width_int32_dec = 9; 71 | char* ptr = buf + max_width_int32_dec; 72 | uint32_t x_tmp; 73 | 74 | // encode positive number as little-endian decimal 75 | do { 76 | x_tmp = x; 77 | x /= 10; 78 | *(--ptr) = digits[x_tmp - x * 10]; 79 | } while ( x ); 80 | 81 | // pad beginning 82 | while (buf < ptr) { *(--ptr) = '0'; } 83 | } 84 | 85 | // Padded (19 digits), decimal, positive long long int: 86 | // We will use it with numbers that fit in 63 bits; i.e., numbers smaller than 87 | // 10^18, as "63 * log 2 / log 10 = 18.96" 88 | void _hs_bytestring_uint64_dec_padded18 (uint64_t x, char* buf) 89 | { 90 | const int max_width_int64_dec = 18; 91 | char* ptr = buf + max_width_int64_dec; 92 | uint64_t x_tmp; 93 | 94 | // encode positive number as little-endian decimal 95 | do { 96 | x_tmp = x; 97 | x /= 10; 98 | *(--ptr) = digits[x_tmp - x * 10]; 99 | } while ( x ); 100 | 101 | // pad beginning 102 | while (buf < ptr) { *(--ptr) = '0'; } 103 | } 104 | 105 | 106 | /////////////////////// 107 | // Hexadecimal encoding 108 | /////////////////////// 109 | 110 | // unsigned ints (32 bit words) 111 | char* _hs_bytestring_uint32_hex (uint32_t x, char* buf) { 112 | // write hex representation in reverse order 113 | char c, *ptr = buf, *next_free; 114 | do { 115 | *ptr++ = digits[x & 0xf]; 116 | x >>= 4; 117 | } while ( x ); 118 | // invert written digits 119 | next_free = ptr--; 120 | while(buf < ptr) { 121 | c = *ptr; 122 | *ptr-- = *buf; 123 | *buf++ = c; 124 | } 125 | return next_free; 126 | }; 127 | 128 | // unsigned long ints (64 bit words) 129 | char* _hs_bytestring_uint64_hex (uint64_t x, char* buf) { 130 | // write hex representation in reverse order 131 | char c, *ptr = buf, *next_free; 132 | do { 133 | *ptr++ = digits[x & 0xf]; 134 | x >>= 4; 135 | } while ( x ); 136 | // invert written digits 137 | next_free = ptr--; 138 | while(buf < ptr) { 139 | c = *ptr; 140 | *ptr-- = *buf; 141 | *buf++ = c; 142 | } 143 | return next_free; 144 | }; 145 | -------------------------------------------------------------------------------- /cbits/shortbytestring.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | ptrdiff_t 8 | sbs_elem_index(const void *s, 9 | uint8_t c, 10 | size_t n) 11 | { 12 | const void *so = memchr(s, c, n); 13 | 14 | if (so) { 15 | ptrdiff_t diff = so - s; 16 | assert(diff >= 0); 17 | return diff; 18 | } else { 19 | return -1; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /include/bytestring-cpp-macros.h: -------------------------------------------------------------------------------- 1 | #if defined(__STDC__) || defined(__GNUC__) || defined(__clang__) 2 | #error "bytestring-cpp-macros.h does not work in C code yet" 3 | #endif 4 | 5 | 6 | #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) \ 7 | || ((defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) \ 8 | && defined(__ARM_FEATURE_UNALIGNED)) \ 9 | || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \ 10 | || defined(powerpc64le_HOST_ARCH) \ 11 | || defined(javascript_HOST_ARCH) 12 | /* 13 | Not all architectures are forgiving of unaligned accesses; whitelist ones 14 | which are known not to trap (either to the kernel for emulation, or crash). 15 | */ 16 | #define HS_UNALIGNED_POKES_OK 1 17 | #else 18 | #if PURE_HASKELL 19 | #error "-fpure-haskell isn't supported yet on architectures only supporting aligned accesses." 20 | #endif 21 | #define HS_UNALIGNED_POKES_OK 0 22 | #endif 23 | 24 | 25 | #define HS_UNALIGNED_ByteArray_OPS_OK \ 26 | MIN_VERSION_base(4,12,0) \ 27 | && (MIN_VERSION_base(4,16,1) || HS_UNALIGNED_POKES_OK) 28 | /* 29 | The unaligned ByteArray# primops became available with base-4.12.0/ghc-8.6, 30 | but require an unaligned-friendly host architecture to be safe to use 31 | until ghc-9.2.2; see https://gitlab.haskell.org/ghc/ghc/-/issues/21015 32 | */ 33 | 34 | 35 | #define HS_CAST_FLOAT_WORD_OPS_AVAILABLE MIN_VERSION_base(4,14,0) 36 | /* 37 | These operations were added in base-4.10.0, but due to 38 | https://gitlab.haskell.org/ghc/ghc/-/issues/16617 they 39 | are buggy with negative floats before ghc-8.10. 40 | */ 41 | 42 | #define HS_UNALIGNED_ADDR_PRIMOPS_AVAILABLE MIN_VERSION_base(4,20,0) 43 | 44 | #define HS_timesInt2_PRIMOP_AVAILABLE MIN_VERSION_base(4,15,0) 45 | 46 | #define HS_cstringLength_AND_FinalPtr_AVAILABLE MIN_VERSION_base(4,15,0) 47 | /* These two were added in the same ghc commit and 48 | both primarily affect how we handle literals */ 49 | 50 | #define HS_unsafeWithForeignPtr_AVAILABLE MIN_VERSION_base(4,15,0) 51 | -------------------------------------------------------------------------------- /include/fpstring.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void fps_reverse(unsigned char *dest, unsigned char *from, size_t len); 5 | void fps_intersperse(unsigned char *dest, unsigned char *from, size_t len, unsigned char c); 6 | unsigned char fps_maximum(unsigned char *p, size_t len); 7 | unsigned char fps_minimum(unsigned char *p, size_t len); 8 | size_t fps_count(unsigned char *p, size_t len, unsigned char w); 9 | void fps_sort(unsigned char *p, size_t len); 10 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.tix 3 | /.hpc 4 | -------------------------------------------------------------------------------- /tests/Builder.hs: -------------------------------------------------------------------------------- 1 | module Builder (testSuite) where 2 | 3 | import qualified Data.ByteString.Builder.Tests 4 | import qualified Data.ByteString.Builder.Prim.Tests 5 | import Test.Tasty (TestTree, testGroup) 6 | 7 | testSuite :: TestTree 8 | testSuite = testGroup "Builder" 9 | [ testGroup "Data.ByteString.Builder" 10 | Data.ByteString.Builder.Tests.tests 11 | 12 | , testGroup "Data.ByteString.Builder.BasicEncoding" 13 | Data.ByteString.Builder.Prim.Tests.tests 14 | ] 15 | -------------------------------------------------------------------------------- /tests/LazyHClose.hs: -------------------------------------------------------------------------------- 1 | module LazyHClose (testSuite) where 2 | 3 | import Control.Monad (void, forM_) 4 | import Data.ByteString.Internal (toForeignPtr) 5 | import Foreign.C.String (withCString) 6 | import Foreign.ForeignPtr (finalizeForeignPtr) 7 | import System.IO (openFile, openTempFile, hClose, hPutStrLn, IOMode(..)) 8 | import System.Posix.Internals (c_unlink) 9 | import Test.Tasty (TestTree, testGroup, withResource) 10 | import Test.Tasty.QuickCheck (testProperty, ioProperty) 11 | 12 | import qualified Data.ByteString as S 13 | import qualified Data.ByteString.Char8 as S8 14 | import qualified Data.ByteString.Lazy as L 15 | import qualified Data.ByteString.Lazy.Char8 as L8 16 | 17 | n :: Int 18 | n = 1000 19 | 20 | testSuite :: TestTree 21 | testSuite = withResource 22 | (do (fn, h) <- openTempFile "." "lazy-hclose-test.tmp"; hPutStrLn h "x"; hClose h; pure fn) 23 | removeFile $ \fn' -> 24 | testGroup "LazyHClose" 25 | [ testProperty "Testing resource leaks for Strict.readFile" $ ioProperty $ 26 | forM_ [1..n] $ const $ do 27 | fn <- fn' 28 | r <- S.readFile fn 29 | appendFile fn "" -- will fail, if fn has not been closed yet 30 | 31 | , testProperty "Testing resource leaks for Lazy.readFile" $ ioProperty $ 32 | forM_ [1..n] $ const $ do 33 | fn <- fn' 34 | r <- L.readFile fn 35 | L.length r `seq` return () 36 | appendFile fn "" -- will fail, if fn has not been closed yet 37 | 38 | , testProperty "Testing resource leaks when converting lazy to strict" $ ioProperty $ 39 | forM_ [1..n] $ const $ do 40 | fn <- fn' 41 | let release c = finalizeForeignPtr fp where (fp,_,_) = toForeignPtr c 42 | r <- L.readFile fn 43 | mapM_ release (L.toChunks r) 44 | appendFile fn "" -- will fail, if fn has not been closed yet 45 | 46 | , testProperty "Testing strict hGetContents" $ ioProperty $ 47 | forM_ [1..n] $ const $ do 48 | fn <- fn' 49 | h <- openFile fn ReadMode 50 | r <- S.hGetContents h 51 | S.last r `seq` return () 52 | appendFile fn "" -- will fail, if fn has not been closed yet 53 | 54 | , testProperty "Testing lazy hGetContents" $ ioProperty $ 55 | forM_ [1..n] $ const $ do 56 | fn <- fn' 57 | h <- openFile fn ReadMode 58 | r <- L.hGetContents h 59 | L.last r `seq` return () 60 | appendFile fn "" -- will fail, if fn has not been closed yet 61 | ] 62 | 63 | removeFile :: String -> IO () 64 | removeFile fn = void $ withCString fn c_unlink 65 | -------------------------------------------------------------------------------- /tests/Lift.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | {-# LANGUAGE OverloadedStrings #-} 4 | {-# LANGUAGE TemplateHaskell #-} 5 | 6 | module Lift (testSuite) where 7 | 8 | import Test.Tasty (TestTree, testGroup) 9 | import Test.Tasty.QuickCheck (testProperty, (===)) 10 | import qualified Data.ByteString as BS 11 | import qualified Data.ByteString.Lazy as LBS 12 | import qualified Data.ByteString.Short as SBS 13 | import qualified Language.Haskell.TH.Syntax as TH 14 | 15 | testSuite :: TestTree 16 | #ifdef wasm32_HOST_ARCH 17 | testSuite = testGroup "Skipped, requires -fexternal-interpreter" [] 18 | #else 19 | testSuite = testGroup "Lift" 20 | [ testGroup "strict" 21 | [ testProperty "normal" $ 22 | let bs = "foobar" :: BS.ByteString in 23 | bs === $(TH.lift $ BS.pack [102,111,111,98,97,114]) 24 | 25 | , testProperty "binary" $ 26 | let bs = "\0\1\2\3\0\1\2\3" :: BS.ByteString in 27 | bs === $(TH.lift $ BS.pack [0,1,2,3,0,1,2,3]) 28 | 29 | #if MIN_VERSION_template_haskell(2,16,0) 30 | , testProperty "typed" $ 31 | let bs = "\0\1\2\3\0\1\2\3" :: BS.ByteString in 32 | bs === $$(TH.liftTyped $ BS.pack [0,1,2,3,0,1,2,3]) 33 | #endif 34 | ] 35 | 36 | , testGroup "lazy" 37 | [ testProperty "normal" $ 38 | let bs = "foobar" :: LBS.ByteString in 39 | bs === $(TH.lift $ LBS.pack [102,111,111,98,97,114]) 40 | 41 | , testProperty "binary" $ 42 | let bs = "\0\1\2\3\0\1\2\3" :: LBS.ByteString in 43 | bs === $(TH.lift $ LBS.pack [0,1,2,3,0,1,2,3]) 44 | 45 | #if MIN_VERSION_template_haskell(2,16,0) 46 | , testProperty "typed" $ 47 | let bs = "\0\1\2\3\0\1\2\3" :: LBS.ByteString in 48 | bs === $$(TH.liftTyped $ LBS.pack [0,1,2,3,0,1,2,3]) 49 | #endif 50 | ] 51 | 52 | , testGroup "short" 53 | [ testProperty "normal" $ 54 | let bs = "foobar" :: SBS.ShortByteString in 55 | bs === $(TH.lift $ SBS.pack [102,111,111,98,97,114]) 56 | 57 | , testProperty "binary" $ 58 | let bs = "\0\1\2\3\0\1\2\3" :: SBS.ShortByteString in 59 | bs === $(TH.lift $ SBS.pack [0,1,2,3,0,1,2,3]) 60 | 61 | #if MIN_VERSION_template_haskell(2,16,0) 62 | , testProperty "typed" $ 63 | let bs = "\0\1\2\3\0\1\2\3" :: SBS.ShortByteString in 64 | bs === $$(TH.liftTyped $ SBS.pack [0,1,2,3,0,1,2,3]) 65 | #endif 66 | ] 67 | ] 68 | #endif 69 | -------------------------------------------------------------------------------- /tests/Main.hs: -------------------------------------------------------------------------------- 1 | module Main (main) where 2 | 3 | import Test.Tasty 4 | 5 | import qualified Builder 6 | import qualified IsValidUtf8 7 | import qualified LazyHClose 8 | import qualified Lift 9 | import qualified Properties 10 | 11 | main :: IO () 12 | main = defaultMain $ testGroup "All" 13 | [ Builder.testSuite 14 | , IsValidUtf8.testSuite 15 | , LazyHClose.testSuite 16 | , Lift.testSuite 17 | , Properties.testSuite 18 | ] 19 | -------------------------------------------------------------------------------- /tests/Properties/ByteStringChar8.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | #define BYTESTRING_CHAR8 4 | 5 | #include "ByteString.hs" 6 | -------------------------------------------------------------------------------- /tests/Properties/ByteStringLazy.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | #define BYTESTRING_LAZY 4 | 5 | #include "ByteString.hs" 6 | -------------------------------------------------------------------------------- /tests/Properties/ByteStringLazyChar8.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | #define BYTESTRING_LAZY 4 | #define BYTESTRING_CHAR8 5 | 6 | #include "ByteString.hs" 7 | -------------------------------------------------------------------------------- /tests/Properties/ShortByteString.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | #define BYTESTRING_SHORT 4 | 5 | #include "ByteString.hs" 6 | -------------------------------------------------------------------------------- /tests/QuickCheckUtils.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DataKinds #-} 2 | {-# LANGUAGE UndecidableInstances #-} 3 | 4 | module QuickCheckUtils 5 | ( Char8(..) 6 | , String8(..) 7 | , CByteString(..) 8 | , Sqrt(..) 9 | , int64OK 10 | , tooStrictErr 11 | ) where 12 | 13 | import Test.Tasty.QuickCheck 14 | import Text.Show.Functions 15 | 16 | import Control.Monad ( liftM2 ) 17 | import Data.Char 18 | import Data.Word 19 | import Data.Int 20 | import System.IO 21 | import Foreign.C (CChar) 22 | import GHC.TypeLits (TypeError, ErrorMessage(..)) 23 | import GHC.Stack (withFrozenCallStack, HasCallStack) 24 | 25 | import qualified Data.ByteString.Short as SB 26 | import qualified Data.ByteString as P 27 | import qualified Data.ByteString.Lazy as L 28 | 29 | import qualified Data.ByteString.Char8 as PC 30 | import qualified Data.ByteString.Lazy.Char8 as LC 31 | 32 | ------------------------------------------------------------------------ 33 | 34 | sizedByteString n = do m <- choose(0, n) 35 | fmap P.pack $ vectorOf m arbitrary 36 | 37 | instance Arbitrary P.ByteString where 38 | arbitrary = do 39 | bs <- sized sizedByteString 40 | n <- choose (0, 2) 41 | return (P.drop n bs) -- to give us some with non-0 offset 42 | shrink = map P.pack . shrink . P.unpack 43 | 44 | instance CoArbitrary P.ByteString where 45 | coarbitrary s = coarbitrary (P.unpack s) 46 | 47 | instance Arbitrary L.ByteString where 48 | arbitrary = sized $ \n -> do numChunks <- choose (0, n) 49 | if numChunks == 0 50 | then return L.empty 51 | else fmap (L.fromChunks . 52 | filter (not . P.null)) $ 53 | vectorOf numChunks 54 | (sizedByteString 55 | (n `div` numChunks)) 56 | 57 | shrink = map L.fromChunks . shrink . L.toChunks 58 | 59 | instance CoArbitrary L.ByteString where 60 | coarbitrary s = coarbitrary (L.unpack s) 61 | 62 | newtype CByteString = CByteString P.ByteString 63 | deriving Show 64 | 65 | instance Arbitrary CByteString where 66 | arbitrary = fmap (CByteString . P.pack . map fromCChar) 67 | arbitrary 68 | where 69 | fromCChar :: NonZero CChar -> Word8 70 | fromCChar = fromIntegral . getNonZero 71 | 72 | -- | 'Char', but only representing 8-bit characters. 73 | -- 74 | newtype Char8 = Char8 Char 75 | deriving (Eq, Ord, Show) 76 | 77 | instance Arbitrary Char8 where 78 | arbitrary = fmap (Char8 . toChar) arbitrary 79 | where 80 | toChar :: Word8 -> Char 81 | toChar = toEnum . fromIntegral 82 | shrink (Char8 c) = fmap Char8 (shrink c) 83 | 84 | instance CoArbitrary Char8 where 85 | coarbitrary (Char8 c) = coarbitrary c 86 | 87 | -- | 'Char', but only representing 8-bit characters. 88 | -- 89 | newtype String8 = String8 String 90 | deriving (Eq, Ord, Show) 91 | 92 | instance Arbitrary String8 where 93 | arbitrary = fmap (String8 . map toChar) arbitrary 94 | where 95 | toChar :: Word8 -> Char 96 | toChar = toEnum . fromIntegral 97 | shrink (String8 xs) = fmap String8 (shrink xs) 98 | 99 | -- | If a test takes O(n^2) time or memory, it's useful to wrap its inputs 100 | -- into 'Sqrt' so that increasing number of tests affects run time linearly. 101 | newtype Sqrt a = Sqrt { unSqrt :: a } 102 | deriving (Eq, Show) 103 | 104 | instance Arbitrary a => Arbitrary (Sqrt a) where 105 | arbitrary = Sqrt <$> sized 106 | (\n -> resize (round @Double $ sqrt $ fromIntegral @Int n) arbitrary) 107 | shrink = map Sqrt . shrink . unSqrt 108 | 109 | 110 | sizedShortByteString :: Int -> Gen SB.ShortByteString 111 | sizedShortByteString n = do m <- choose(0, n) 112 | fmap SB.pack $ vectorOf m arbitrary 113 | 114 | instance Arbitrary SB.ShortByteString where 115 | arbitrary = sized sizedShortByteString 116 | shrink = map SB.pack . shrink . SB.unpack 117 | 118 | instance CoArbitrary SB.ShortByteString where 119 | coarbitrary s = coarbitrary (SB.unpack s) 120 | 121 | -- | This /poison instance/ exists to make accidental mis-use 122 | -- of the @Arbitrary Int64@ instance a bit less likely. 123 | instance {-# OVERLAPPING #-} 124 | TypeError (Text "Found a test taking a raw Int64 argument." 125 | :$$: Text "'instance Arbitrary Int64' by default is likely to" 126 | :$$: Text "produce very large numbers after the first few tests," 127 | :$$: Text "which doesn't make great indices into a LazyByteString." 128 | :$$: Text "For indices, try 'intToIndexTy' in Properties/ByteString.hs." 129 | :$$: Text "" 130 | :$$: Text "If very few small-numbers tests is OK, use" 131 | :$$: Text "'int64OK' to bypass this poison-instance." 132 | ) => Testable (Int64 -> prop) where 133 | property = error "poison instance Testable (Int64 -> prop)" 134 | 135 | -- | Use this to bypass the poison instance for @Testable (Int64 -> prop)@ 136 | -- defined in "QuickCheckUtils". 137 | int64OK :: (Arbitrary a, Show a, Testable b) => (a -> b) -> Property 138 | int64OK f = propertyForAllShrinkShow arbitrary shrink (\v -> [show v]) f 139 | 140 | tooStrictErr :: forall a. HasCallStack => a 141 | tooStrictErr = withFrozenCallStack $ 142 | error "A lazy sub-expression was unexpectedly evaluated" 143 | -------------------------------------------------------------------------------- /tests/builder/Data/ByteString/Builder/Prim/Tests.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Copyright : (c) 2011 Simon Meier 3 | -- License : BSD3-style (see LICENSE) 4 | -- 5 | -- Maintainer : Simon Meier 6 | -- Stability : experimental 7 | -- Portability : tested on GHC only 8 | -- 9 | -- Testing all encodings provided by this library. 10 | 11 | module Data.ByteString.Builder.Prim.Tests (tests) where 12 | 13 | import Data.Char (ord) 14 | import qualified Data.ByteString.Lazy as L 15 | import qualified Data.ByteString.Lazy.Char8 as LC 16 | import Data.ByteString.Builder 17 | import qualified Data.ByteString.Builder.Prim as BP 18 | import Data.ByteString.Builder.Prim.TestUtils 19 | 20 | import Test.Tasty 21 | import Test.Tasty.QuickCheck 22 | 23 | tests :: [TestTree] 24 | tests = concat [ testsBinary, testsASCII, testsChar8, testsUtf8 25 | , testsCombinatorsB, [testCString, testCStringUtf8] ] 26 | 27 | testCString :: TestTree 28 | testCString = testProperty "cstring" $ 29 | toLazyByteString (BP.cstring "hello world!"#) == 30 | LC.pack "hello" `L.append` L.singleton 0x20 `L.append` LC.pack "world!" 31 | 32 | testCStringUtf8 :: TestTree 33 | testCStringUtf8 = testProperty "cstringUtf8" $ 34 | toLazyByteString (BP.cstringUtf8 "hello\xc0\x80world!"#) == 35 | LC.pack "hello" `L.append` L.singleton 0x00 `L.append` LC.pack "world!" 36 | 37 | ------------------------------------------------------------------------------ 38 | -- Binary 39 | ------------------------------------------------------------------------------ 40 | 41 | testsBinary :: [TestTree] 42 | testsBinary = 43 | [ testBoundedF "word8" bigEndian_list BP.word8 44 | , testBoundedF "int8" bigEndian_list BP.int8 45 | 46 | -- big-endian 47 | , testBoundedF "int16BE" bigEndian_list BP.int16BE 48 | , testBoundedF "int32BE" bigEndian_list BP.int32BE 49 | , testBoundedF "int64BE" bigEndian_list BP.int64BE 50 | 51 | , testBoundedF "word16BE" bigEndian_list BP.word16BE 52 | , testBoundedF "word32BE" bigEndian_list BP.word32BE 53 | , testBoundedF "word64BE" bigEndian_list BP.word64BE 54 | 55 | , testF "floatLE" (float_list littleEndian_list) BP.floatLE 56 | , testF "doubleLE" (double_list littleEndian_list) BP.doubleLE 57 | 58 | -- little-endian 59 | , testBoundedF "int16LE" littleEndian_list BP.int16LE 60 | , testBoundedF "int32LE" littleEndian_list BP.int32LE 61 | , testBoundedF "int64LE" littleEndian_list BP.int64LE 62 | 63 | , testBoundedF "word16LE" littleEndian_list BP.word16LE 64 | , testBoundedF "word32LE" littleEndian_list BP.word32LE 65 | , testBoundedF "word64LE" littleEndian_list BP.word64LE 66 | 67 | , testF "floatBE" (float_list bigEndian_list) BP.floatBE 68 | , testF "doubleBE" (double_list bigEndian_list) BP.doubleBE 69 | 70 | -- host dependent 71 | , testBoundedF "int16Host" hostEndian_list BP.int16Host 72 | , testBoundedF "int32Host" hostEndian_list BP.int32Host 73 | , testBoundedF "int64Host" hostEndian_list BP.int64Host 74 | , testBoundedF "intHost" hostEndian_list BP.intHost 75 | 76 | , testBoundedF "word16Host" hostEndian_list BP.word16Host 77 | , testBoundedF "word32Host" hostEndian_list BP.word32Host 78 | , testBoundedF "word64Host" hostEndian_list BP.word64Host 79 | , testBoundedF "wordHost" hostEndian_list BP.wordHost 80 | 81 | , testF "floatHost" (float_list hostEndian_list) BP.floatHost 82 | , testF "doubleHost" (double_list hostEndian_list) BP.doubleHost 83 | ] 84 | 85 | 86 | ------------------------------------------------------------------------------ 87 | -- Latin-1 aka Char8 88 | ------------------------------------------------------------------------------ 89 | 90 | testsChar8 :: [TestTree] 91 | testsChar8 = 92 | [ testBoundedF "char8" char8_list BP.char8 ] 93 | 94 | 95 | ------------------------------------------------------------------------------ 96 | -- ASCII 97 | ------------------------------------------------------------------------------ 98 | 99 | testsASCII :: [TestTree] 100 | testsASCII = 101 | [ testBoundedF "char7" char7_list BP.char7 102 | 103 | , testBoundedB "int8Dec" dec_list BP.int8Dec 104 | , testBoundedB "int16Dec" dec_list BP.int16Dec 105 | , testBoundedB "int32Dec" dec_list BP.int32Dec 106 | , testBoundedB "int64Dec" dec_list BP.int64Dec 107 | , testBoundedB "intDec" dec_list BP.intDec 108 | 109 | , testBoundedB "word8Dec" dec_list BP.word8Dec 110 | , testBoundedB "word16Dec" dec_list BP.word16Dec 111 | , testBoundedB "word32Dec" dec_list BP.word32Dec 112 | , testBoundedB "word64Dec" dec_list BP.word64Dec 113 | , testBoundedB "wordDec" dec_list BP.wordDec 114 | 115 | , testBoundedB "word8Hex" hex_list BP.word8Hex 116 | , testBoundedB "word16Hex" hex_list BP.word16Hex 117 | , testBoundedB "word32Hex" hex_list BP.word32Hex 118 | , testBoundedB "word64Hex" hex_list BP.word64Hex 119 | , testBoundedB "wordHex" hex_list BP.wordHex 120 | 121 | , testBoundedF "word8HexFixed" wordHexFixed_list BP.word8HexFixed 122 | , testBoundedF "word16HexFixed" wordHexFixed_list BP.word16HexFixed 123 | , testBoundedF "word32HexFixed" wordHexFixed_list BP.word32HexFixed 124 | , testBoundedF "word64HexFixed" wordHexFixed_list BP.word64HexFixed 125 | 126 | , testBoundedF "int8HexFixed" int8HexFixed_list BP.int8HexFixed 127 | , testBoundedF "int16HexFixed" int16HexFixed_list BP.int16HexFixed 128 | , testBoundedF "int32HexFixed" int32HexFixed_list BP.int32HexFixed 129 | , testBoundedF "int64HexFixed" int64HexFixed_list BP.int64HexFixed 130 | 131 | , testF "floatHexFixed" floatHexFixed_list BP.floatHexFixed 132 | , testF "doubleHexFixed" doubleHexFixed_list BP.doubleHexFixed 133 | ] 134 | 135 | 136 | ------------------------------------------------------------------------------ 137 | -- UTF-8 138 | ------------------------------------------------------------------------------ 139 | 140 | testsUtf8 :: [TestTree] 141 | testsUtf8 = 142 | [ testBoundedB "charUtf8" charUtf8_list BP.charUtf8 ] 143 | 144 | 145 | ------------------------------------------------------------------------------ 146 | -- BoundedPrim combinators 147 | ------------------------------------------------------------------------------ 148 | 149 | maybeB :: BP.BoundedPrim () -> BP.BoundedPrim a -> BP.BoundedPrim (Maybe a) 150 | maybeB nothing just = maybe (Left ()) Right BP.>$< BP.eitherB nothing just 151 | 152 | testsCombinatorsB :: [TestTree] 153 | testsCombinatorsB = 154 | [ compareImpls "mapMaybe (via BoundedPrim)" 155 | (L.pack . concatMap encChar) 156 | (toLazyByteString . encViaBuilder) 157 | 158 | , compareImpls "filter (via BoundedPrim)" 159 | (L.pack . filter (< 32)) 160 | (toLazyByteString . BP.primMapListBounded (BP.condB (< 32) (BP.liftFixedToBounded BP.word8) BP.emptyB)) 161 | 162 | , compareImpls "pairB" 163 | (L.pack . concatMap (\(c,w) -> charUtf8_list c ++ [w])) 164 | (toLazyByteString . BP.primMapListBounded 165 | ((\(c,w) -> (c,(w,undefined))) BP.>$< 166 | BP.charUtf8 BP.>*< (BP.liftFixedToBounded BP.word8) BP.>*< (BP.liftFixedToBounded BP.emptyF))) 167 | ] 168 | where 169 | encChar = maybe [112] (hostEndian_list . ord) 170 | 171 | encViaBuilder = BP.primMapListBounded $ maybeB (BP.liftFixedToBounded $ (\_ -> 112) BP.>$< BP.word8) 172 | (ord BP.>$< (BP.liftFixedToBounded $ BP.intHost)) 173 | --------------------------------------------------------------------------------