├── test ├── filesystem │ ├── bar.txt │ ├── baz.txt │ ├── foo.txt │ └── bin │ │ └── bin.txt ├── Spec.hs ├── LICENSE.gz └── Data │ └── Streaming │ ├── FileReadSpec.hs │ ├── NetworkSpec.hs │ ├── ProcessSpec.hs │ ├── FilesystemSpec.hs │ ├── ByteString │ └── BuilderSpec.hs │ ├── TextSpec.hs │ └── ZlibSpec.hs ├── stack.yaml ├── Setup.hs ├── .gitignore ├── include └── text_cbits.h ├── bench ├── decode-memory-usage.hs ├── builder-to-bytestring-io.hs └── count-chars.hs ├── Data └── Streaming │ ├── FileRead.hs │ ├── Process │ └── Internal.hs │ ├── Filesystem.hs │ ├── Network │ └── Internal.hs │ ├── Zlib │ └── Lowlevel.hs │ ├── ByteString │ ├── Builder │ │ └── Buffer.hs │ └── Builder.hs │ ├── Process.hs │ ├── Zlib.hs │ ├── Text.hs │ └── Network.hs ├── LICENSE ├── README.md ├── cbits ├── zlib-helper.c └── text-helper.c ├── .github └── workflows │ └── ci.yml ├── System └── Win32File.hsc ├── ChangeLog.md └── streaming-commons.cabal /test/filesystem/bar.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/filesystem/baz.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/filesystem/foo.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/filesystem/bin/bin.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-23.5 2 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /test/Spec.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -F -pgmF hspec-discover #-} 2 | -------------------------------------------------------------------------------- /test/LICENSE.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpco/streaming-commons/HEAD/test/LICENSE.gz -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | cabal-dev 3 | *.o 4 | *.hi 5 | *.chi 6 | *.chs.h 7 | .virthualenv 8 | *.swp 9 | .stack-work/ 10 | tarballs/ 11 | dist-newstyle/ 12 | -------------------------------------------------------------------------------- /include/text_cbits.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 Bryan O'Sullivan . 3 | */ 4 | 5 | #ifndef _text_cbits_h 6 | #define _text_cbits_h 7 | 8 | #define UTF8_ACCEPT 0 9 | #define UTF8_REJECT 12 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /test/Data/Streaming/FileReadSpec.hs: -------------------------------------------------------------------------------- 1 | module Data.Streaming.FileReadSpec (spec) where 2 | 3 | import Test.Hspec 4 | import qualified Data.ByteString as S 5 | import qualified Data.Streaming.FileRead as F 6 | import Control.Exception (bracket) 7 | 8 | spec :: Spec 9 | spec = describe "Data.Streaming.FileRead" $ do 10 | it "works" $ do 11 | let fp = "LICENSE" 12 | expected <- S.readFile fp 13 | actual <- bracket (F.openFile fp) F.closeFile $ \fh -> do 14 | let loop front = do 15 | bs <- F.readChunk fh 16 | if S.null bs 17 | then return $ S.concat $ front [] 18 | else loop (front . (bs:)) 19 | loop id 20 | actual `shouldBe` expected 21 | -------------------------------------------------------------------------------- /bench/decode-memory-usage.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | import Data.ByteString (ByteString) 3 | import Data.Streaming.Text 4 | import System.Environment (getArgs) 5 | 6 | input :: [ByteString] 7 | input = replicate 1000000 "Hello World!\n" 8 | 9 | main :: IO () 10 | main = do 11 | args <- getArgs 12 | let dec = 13 | case args of 14 | ["16le"] -> decodeUtf16LE 15 | ["16be"] -> decodeUtf16BE 16 | ["32le"] -> decodeUtf32LE 17 | ["32be"] -> decodeUtf32BE 18 | ["8pure"] -> decodeUtf8Pure 19 | _ -> decodeUtf8 20 | 21 | loop dec input 22 | 23 | loop :: (ByteString -> DecodeResult) -> [ByteString] -> IO () 24 | loop dec [] = 25 | case dec "" of 26 | DecodeResultSuccess _ _ -> return () 27 | DecodeResultFailure _ _ -> error "failure1" 28 | loop dec (bs:bss) = 29 | case dec bs of 30 | DecodeResultSuccess _ dec' -> loop dec' bss 31 | DecodeResultFailure _ _ -> error "failure2" 32 | -------------------------------------------------------------------------------- /Data/Streaming/FileRead.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | -- | The standard @openFile@ call on Windows causing problematic file locking 3 | -- in some cases. This module provides a cross-platform file reading API 4 | -- without the file locking problems on Windows. 5 | -- 6 | -- This module /always/ opens files in binary mode. 7 | -- 8 | -- @readChunk@ will return an empty @ByteString@ on EOF. 9 | module Data.Streaming.FileRead 10 | ( ReadHandle 11 | , openFile 12 | , closeFile 13 | , readChunk 14 | ) where 15 | 16 | #if WINDOWS 17 | 18 | import System.Win32File 19 | 20 | #else 21 | 22 | import qualified System.IO as IO 23 | import qualified Data.ByteString as S 24 | import Data.ByteString.Lazy.Internal (defaultChunkSize) 25 | 26 | newtype ReadHandle = ReadHandle IO.Handle 27 | 28 | openFile :: FilePath -> IO ReadHandle 29 | openFile fp = ReadHandle `fmap` IO.openBinaryFile fp IO.ReadMode 30 | 31 | closeFile :: ReadHandle -> IO () 32 | closeFile (ReadHandle h) = IO.hClose h 33 | 34 | readChunk :: ReadHandle -> IO S.ByteString 35 | readChunk (ReadHandle h) = S.hGetSome h defaultChunkSize 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 FP Complete 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | streaming-commons 2 | ================= 3 | 4 | Common lower-level functions needed by various streaming data libraries. 5 | Intended to be shared by libraries like conduit and pipes. 6 | 7 | [![Build status](https://github.com/fpco/streaming-commons/actions/workflows/ci.yml/badge.svg)](https://github.com/fpco/streaming-commons/actions/workflows/ci.yml) 8 | 9 | Dependencies 10 | ------------ 11 | 12 | One of the requirements of this package is to restrict ourselves to "core" 13 | dependencies. The definition of core is still to be decided, but here's a 14 | working start: 15 | 16 | * *No* dependency on system libraries, beyond that which is required by other 17 | dependencies. 18 | * Anything which ships with GHC. *However*, we must retain compatibility with 19 | versions of those packages going back to at least GHC 7.4, and preferably 20 | earlier. 21 | * text, once again with backwards compatibility for versions included with 22 | legacy Haskell Platform. In other words, 0.11.2 support is required. 23 | * network, support back to 2.3. We do *not* need to support the 24 | network/network-bytestring split. 25 | * stm, preferably all the way back to 2.1. 26 | * transformers 27 | 28 | For debate: 29 | 30 | * Other Haskell Platform packages, especially vector and attoparsec. 31 | -------------------------------------------------------------------------------- /Data/Streaming/Process/Internal.hs: -------------------------------------------------------------------------------- 1 | module Data.Streaming.Process.Internal 2 | ( StreamingProcessHandle (..) 3 | , InputSource (..) 4 | , OutputSink (..) 5 | ) where 6 | 7 | import Control.Concurrent.STM (TMVar) 8 | import System.Exit (ExitCode) 9 | import System.IO (Handle) 10 | import System.Process (ProcessHandle, StdStream (CreatePipe)) 11 | 12 | -- | Class for all things which can be used to provide standard input. 13 | -- 14 | -- Since 0.1.4 15 | class InputSource a where 16 | isStdStream :: (Maybe Handle -> IO a, Maybe StdStream) 17 | instance InputSource Handle where 18 | isStdStream = (\(Just h) -> return h, Just CreatePipe) 19 | 20 | -- | Class for all things which can be used to consume standard output or 21 | -- error. 22 | -- 23 | -- Since 0.1.4 24 | class OutputSink a where 25 | osStdStream :: (Maybe Handle -> IO a, Maybe StdStream) 26 | instance OutputSink Handle where 27 | osStdStream = (\(Just h) -> return h, Just CreatePipe) 28 | 29 | -- | Wraps up the standard @ProcessHandle@ to avoid the @waitForProcess@ 30 | -- deadlock. See the linked documentation from the module header for more 31 | -- information. 32 | -- 33 | -- Since 0.1.4 34 | data StreamingProcessHandle = StreamingProcessHandle 35 | ProcessHandle 36 | (TMVar ExitCode) 37 | (IO ()) -- cleanup resources 38 | -------------------------------------------------------------------------------- /bench/builder-to-bytestring-io.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables #-} 2 | 3 | import Gauge.Main 4 | import qualified Data.ByteString.Char8 as S 5 | import qualified Data.ByteString.Builder as BB 6 | import Data.Monoid 7 | import qualified Data.Streaming.ByteString.Builder as BB 8 | 9 | main :: IO () 10 | main = defaultMain [ bgroup "Data.Streaming.ByteString.Builder.toByteStringIO" 11 | (benchmarks bIO b100_10000 b10000_100 b10000_10000) 12 | , bgroup "Data.ByteString.Builder.toLazyByteString" 13 | (benchmarks bLazy b100_10000 b10000_100 b10000_10000) 14 | ] 15 | where 16 | bIO = whnfIO . BB.toByteStringIO (const (return ())) 17 | bLazy = nf BB.toLazyByteString 18 | benchmarks run bld100_10000 bld10000_100 bld10000_10000 = 19 | [ bench' run bld100_10000 100 10000 20 | , bench' run bld10000_100 10000 100 21 | , bench' run bld10000_10000 10000 10000 22 | ] 23 | bench' :: (b -> Benchmarkable) -> b -> Int -> Int -> Benchmark 24 | bench' run bld' len reps = bench (show len ++ "/" ++ show reps) (run bld') 25 | b100_10000 = bld BB.byteString 100 10000 26 | b10000_100 = bld BB.byteString 10000 100 27 | b10000_10000 = bld BB.byteString 10000 10000 28 | bld :: Data.Monoid.Monoid a => (S.ByteString -> a) -> Int -> Int -> a 29 | bld f len reps = mconcat (replicate reps (f (S.replicate len 'x'))) 30 | -------------------------------------------------------------------------------- /bench/count-chars.hs: -------------------------------------------------------------------------------- 1 | import Gauge.Main 2 | import qualified Data.Text as T 3 | import qualified Data.Text.Lazy as TL 4 | import qualified Data.Text.Lazy.Encoding as TLE 5 | import qualified Data.ByteString as S 6 | import qualified Data.ByteString.Lazy as L 7 | import Data.ByteString.Lazy.Internal (ByteString (..)) 8 | import Data.Streaming.Text 9 | 10 | calcLen :: (S.ByteString -> DecodeResult) 11 | -> L.ByteString 12 | -> Int 13 | calcLen = 14 | loop 0 15 | where 16 | loop total _ Empty = total 17 | loop total dec (Chunk bs bss) = 18 | total' `seq` loop total' dec' bss 19 | where 20 | DecodeResultSuccess t dec' = dec bs 21 | total' = total + T.length t 22 | 23 | handleEncoding :: ( String 24 | , TL.Text -> L.ByteString 25 | , L.ByteString -> TL.Text 26 | , S.ByteString -> DecodeResult 27 | ) 28 | -> Benchmark 29 | handleEncoding (name, encodeLazy, decodeLazy, decodeStream) = bgroup name 30 | [ bench "lazy" $ whnf (TL.length . decodeLazy) lbs 31 | , bench "stream" $ whnf (calcLen decodeStream) lbs 32 | ] 33 | where 34 | text = TL.pack $ concat $ replicate 10 ['\27'..'\2003'] 35 | lbs = encodeLazy text 36 | 37 | main :: IO () 38 | main = defaultMain $ map handleEncoding 39 | [ ("UTF-8", TLE.encodeUtf8, TLE.decodeUtf8, decodeUtf8) 40 | , ("UTF-8 pure", TLE.encodeUtf8, TLE.decodeUtf8, decodeUtf8Pure) 41 | , ("UTF-16LE", TLE.encodeUtf16LE, TLE.decodeUtf16LE, decodeUtf16LE) 42 | , ("UTF-16BE", TLE.encodeUtf16BE, TLE.decodeUtf16BE, decodeUtf16BE) 43 | , ("UTF-32LE", TLE.encodeUtf32LE, TLE.decodeUtf32LE, decodeUtf32LE) 44 | , ("UTF-32BE", TLE.encodeUtf32BE, TLE.decodeUtf32BE, decodeUtf32BE) 45 | ] 46 | -------------------------------------------------------------------------------- /test/Data/Streaming/NetworkSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Data.Streaming.NetworkSpec where 3 | 4 | import Control.Concurrent.Async (withAsync) 5 | import Control.Exception (bracket) 6 | import Control.Monad (forever, replicateM_) 7 | import Data.Array.Unboxed (elems) 8 | import qualified Data.ByteString.Char8 as S8 9 | import Data.Char (toUpper) 10 | import Data.Streaming.Network 11 | import Network.Socket (close) 12 | import Test.Hspec 13 | import Test.Hspec.QuickCheck 14 | 15 | spec :: Spec 16 | spec = do 17 | describe "getDefaultReadBufferSize" $ do 18 | it "sanity" $ do 19 | getReadBufferSize (clientSettingsTCP 8080 "localhost") >= 4096 `shouldBe` True 20 | 21 | describe "getUnassignedPort" $ do 22 | it "sanity" $ replicateM_ 100000 $ do 23 | port <- getUnassignedPort 24 | (port `elem` elems unassignedPorts) `shouldBe` True 25 | describe "bindRandomPortTCP" $ do 26 | modifyMaxSuccess (const 5) $ prop "sanity" $ \content -> bracket 27 | (bindRandomPortTCP "*4") 28 | (close . snd) 29 | $ \(port, socket) -> do 30 | let server ad = forever $ appRead ad >>= appWrite ad . S8.map toUpper 31 | client ad = do 32 | appWrite ad bs 33 | appRead ad >>= (`shouldBe` S8.map toUpper bs) 34 | bs 35 | | null content = "hello" 36 | | otherwise = S8.pack $ take 1000 content 37 | withAsync (runTCPServer (serverSettingsTCPSocket socket) server) $ \_ -> do 38 | runTCPClient (clientSettingsTCP port "localhost") client 39 | -------------------------------------------------------------------------------- /test/Data/Streaming/ProcessSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | module Data.Streaming.ProcessSpec (spec, main) where 3 | 4 | import Test.Hspec 5 | import Test.Hspec.QuickCheck (prop) 6 | import Control.Concurrent.Async (concurrently) 7 | import qualified Data.ByteString.Lazy as L 8 | import qualified Data.ByteString as S 9 | import System.Exit 10 | import Control.Concurrent (threadDelay) 11 | import Data.Streaming.Process 12 | import System.IO (hClose) 13 | 14 | main :: IO () 15 | main = hspec spec 16 | 17 | spec :: Spec 18 | spec = do 19 | #ifndef WINDOWS 20 | prop "cat" $ \wss -> do 21 | let lbs = L.fromChunks $ map S.pack wss 22 | (sink, source, Inherited, cph) <- streamingProcess (shell "cat") 23 | ((), bs) <- concurrently 24 | (do 25 | L.hPut sink lbs 26 | hClose sink) 27 | (S.hGetContents source) 28 | L.fromChunks [bs] `shouldBe` lbs 29 | ec <- waitForStreamingProcess cph 30 | ec `shouldBe` ExitSuccess 31 | 32 | it "closed stream" $ do 33 | (ClosedStream, source, Inherited, cph) <- streamingProcess (shell "cat") 34 | bss <- S.hGetContents source 35 | bss `shouldBe` S.empty 36 | 37 | ec <- waitForStreamingProcess cph 38 | ec `shouldBe` ExitSuccess 39 | 40 | it "checked process" $ do 41 | let isRightException ProcessExitedUnsuccessfully {} = True 42 | withCheckedProcess (proc "false" []) 43 | (\Inherited Inherited Inherited -> return ()) 44 | `shouldThrow` isRightException 45 | 46 | #endif 47 | it "blocking vs non-blocking" $ do 48 | (ClosedStream, ClosedStream, ClosedStream, cph) <- streamingProcess (shell "sleep 1") 49 | 50 | mec1 <- getStreamingProcessExitCode cph 51 | mec1 `shouldBe` Nothing 52 | 53 | threadDelay 1500000 54 | 55 | mec2 <- getStreamingProcessExitCode cph 56 | mec2 `shouldBe` Just ExitSuccess 57 | 58 | ec <- waitForStreamingProcess cph 59 | ec `shouldBe` ExitSuccess 60 | -------------------------------------------------------------------------------- /test/Data/Streaming/FilesystemSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | module Data.Streaming.FilesystemSpec (spec) where 3 | 4 | import Test.Hspec 5 | import Data.Streaming.Filesystem 6 | import Control.Exception (bracket) 7 | import Data.List (sort) 8 | #if !WINDOWS 9 | import System.Posix.Files (removeLink, createSymbolicLink, createNamedPipe) 10 | import Control.Exception (try, IOException) 11 | #endif 12 | 13 | spec :: Spec 14 | spec = describe "Data.Streaming.Filesystem" $ do 15 | it "dirstream" $ do 16 | res <- bracket (openDirStream "test/filesystem") closeDirStream 17 | $ \ds -> do 18 | Just w <- readDirStream ds 19 | Just x <- readDirStream ds 20 | Just y <- readDirStream ds 21 | Just z <- readDirStream ds 22 | return $ sort [w, x, y, z] 23 | res `shouldBe` ["bar.txt", "baz.txt", "bin", "foo.txt"] 24 | describe "getFileType" $ do 25 | it "file" $ getFileType "streaming-commons.cabal" >>= (`shouldBe` FTFile) 26 | it "dir" $ getFileType "Data" >>= (`shouldBe` FTDirectory) 27 | #if !WINDOWS 28 | it "file sym" $ do 29 | _ <- tryIO $ removeLink "tmp" 30 | createSymbolicLink "streaming-commons.cabal" "tmp" 31 | ft <- getFileType "tmp" 32 | _ <- tryIO $ removeLink "tmp" 33 | ft `shouldBe` FTFileSym 34 | it "file sym" $ do 35 | _ <- tryIO $ removeLink "tmp" 36 | createSymbolicLink "Data" "tmp" 37 | ft <- getFileType "tmp" 38 | _ <- tryIO $ removeLink "tmp" 39 | ft `shouldBe` FTDirectorySym 40 | it "other" $ do 41 | _ <- tryIO $ removeLink "tmp" 42 | e <- tryIO $ createNamedPipe "tmp" 0 43 | case e of 44 | -- Creating named pipe might fail on some filesystems 45 | Left _ -> return () 46 | Right _ -> do 47 | ft <- getFileType "tmp" 48 | _ <- tryIO $ removeLink "tmp" 49 | ft `shouldBe` FTOther 50 | it "recursive symlink is other" $ do 51 | _ <- tryIO $ removeLink "tmp" 52 | createSymbolicLink "tmp" "tmp" 53 | ft <- getFileType "tmp" 54 | _ <- tryIO $ removeLink "tmp" 55 | ft `shouldBe` FTOther 56 | it "dangling symlink is other" $ do 57 | _ <- tryIO $ removeLink "tmp" 58 | createSymbolicLink "doesnotexist" "tmp" 59 | ft <- getFileType "tmp" 60 | _ <- tryIO $ removeLink "tmp" 61 | ft `shouldBe` FTOther 62 | 63 | tryIO :: IO a -> IO (Either IOException a) 64 | tryIO = try 65 | #endif 66 | -------------------------------------------------------------------------------- /cbits/zlib-helper.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | z_stream * streaming_commons_create_z_stream(void) 5 | { 6 | z_stream *ret = malloc(sizeof(z_stream)); 7 | if (ret) { 8 | ret->zalloc = Z_NULL; 9 | ret->zfree = Z_NULL; 10 | ret->opaque = Z_NULL; 11 | ret->next_in = NULL; 12 | ret->avail_in = 0; 13 | ret->next_out = NULL; 14 | ret->avail_out = 0; 15 | } 16 | return ret; 17 | } 18 | 19 | int streaming_commons_inflate_init2(z_stream *stream, int window_bits) 20 | { 21 | return inflateInit2(stream, window_bits); 22 | } 23 | 24 | int streaming_commons_deflate_init2(z_stream *stream, int level, int methodBits, 25 | int memlevel, int strategy) 26 | { 27 | return deflateInit2(stream, level, Z_DEFLATED, methodBits, memlevel, strategy); 28 | } 29 | 30 | int streaming_commons_inflate_set_dictionary(z_stream *stream, const char* dictionary, 31 | unsigned int dictLength) { 32 | return inflateSetDictionary(stream, (const Bytef *)dictionary, dictLength); 33 | } 34 | 35 | int streaming_commons_deflate_set_dictionary(z_stream *stream, const char* dictionary, 36 | unsigned int dictLength) { 37 | return deflateSetDictionary(stream, (const Bytef *)dictionary, dictLength); 38 | } 39 | 40 | void streaming_commons_free_z_stream_inflate (z_stream *stream) 41 | { 42 | inflateEnd(stream); 43 | free(stream); 44 | } 45 | 46 | void streaming_commons_set_avail_in (z_stream *stream, char *buff, unsigned int avail) 47 | { 48 | stream->next_in = (Bytef *)buff; 49 | stream->avail_in = avail; 50 | } 51 | 52 | void streaming_commons_set_avail_out (z_stream *stream, char *buff, unsigned int avail) 53 | { 54 | stream->next_out = (Bytef *)buff; 55 | stream->avail_out = avail; 56 | } 57 | 58 | int streaming_commons_call_inflate_noflush (z_stream *stream) 59 | { 60 | return inflate(stream, Z_NO_FLUSH); 61 | } 62 | 63 | unsigned int streaming_commons_get_avail_in (z_stream *stream) 64 | { 65 | return stream->avail_in; 66 | } 67 | 68 | unsigned int streaming_commons_get_avail_out (z_stream *stream) 69 | { 70 | return stream->avail_out; 71 | } 72 | 73 | char* streaming_commons_get_next_in (z_stream *stream) 74 | { 75 | return (char *)stream->next_in; 76 | } 77 | 78 | void streaming_commons_free_z_stream_deflate (z_stream *stream) 79 | { 80 | deflateEnd(stream); 81 | free(stream); 82 | } 83 | 84 | int streaming_commons_call_deflate_noflush (z_stream *stream) 85 | { 86 | return deflate(stream, Z_NO_FLUSH); 87 | } 88 | 89 | int streaming_commons_call_deflate_flush (z_stream *stream) 90 | { 91 | return deflate(stream, Z_SYNC_FLUSH); 92 | } 93 | 94 | int streaming_commons_call_deflate_full_flush (z_stream *stream) 95 | { 96 | return deflate(stream, Z_FULL_FLUSH); 97 | } 98 | 99 | int streaming_commons_call_deflate_finish (z_stream *stream) 100 | { 101 | return deflate(stream, Z_FINISH); 102 | } 103 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | cabal: 11 | strategy: 12 | matrix: 13 | ghc: ['8.10.7','9.0.2','9.2.8','9.4.8','9.6.7','9.8.4','9.10.2','9.12.2'] 14 | runs-on: [windows-latest, macos-latest, ubuntu-latest] 15 | exclude: 16 | - runs-on: macos-latest # requires llvm@13 but it's disabled in homebrew 17 | ghc: '8.10.7' 18 | - runs-on: macos-latest # requires llvm@13 but it's disabled in homebrew 19 | ghc: '9.0.2' 20 | fail-fast: false 21 | name: Cabal - ${{ matrix.runs-on }} GHC ${{ matrix.ghc }} 22 | runs-on: ${{ matrix.runs-on }} 23 | steps: 24 | - uses: actions/checkout@v5 25 | - uses: haskell-actions/setup@v2 26 | with: 27 | ghc-version: ${{ matrix.ghc }} 28 | 29 | - name: Cache dependencies 30 | uses: actions/cache@v4 31 | with: 32 | path: | 33 | ~/.cabal 34 | dist-newstyle 35 | key: ${{ matrix.runs-on }}-cabal-${{ matrix.ghc }}-${{ hashFiles('streaming-commons.cabal') }} 36 | restore-keys: | 37 | ${{ matrix.runs-on }}-cabal-${{ matrix.ghc }} 38 | ${{ matrix.runs-on }}-cabal- 39 | 40 | - name: Update Cabal package list 41 | run: cabal v2-update --with-compiler=ghc-${{ matrix.ghc }} 42 | 43 | - name: Build project 44 | run: cabal v2-build --with-compiler=ghc-${{ matrix.ghc }} 45 | 46 | - name: Run tests 47 | run: cabal v2-test --with-compiler=ghc-${{ matrix.ghc }} 48 | 49 | stack: 50 | strategy: 51 | matrix: 52 | resolver: ['lts-18','lts-19','lts-20','lts-21','lts-22', 'lts-23', 'lts-24','nightly'] 53 | runs-on: [windows-latest, macos-latest, ubuntu-latest] 54 | exclude: 55 | - runs-on: macos-latest # requires llvm@13 but it's disabled in homebrew 56 | resolver: 'lts-18' 57 | - runs-on: macos-latest # requires llvm@13 but it's disabled in homebrew 58 | resolver: 'lts-19' 59 | fail-fast: false 60 | name: Stack - ${{ matrix.runs-on }} Resolver ${{ matrix.resolver }} 61 | runs-on: ${{ matrix.runs-on }} 62 | steps: 63 | - uses: actions/checkout@v5 64 | - uses: haskell-actions/setup@v2 65 | with: 66 | enable-stack: true 67 | 68 | - name: Cache dependencies 69 | uses: actions/cache@v4 70 | with: 71 | path: ~/.stack 72 | key: ${{ matrix.runs-on }}-stack-${{ matrix.resolver }}-${{ hashFiles('stack.yaml') }} 73 | restore-keys: | 74 | ${{ matrix.runs-on }}-stack-${{ matrix.resolver }}- 75 | ${{ matrix.runs-on }}-stack- 76 | 77 | - name: Install dependencies for resolver ${{ matrix.resolver }} 78 | run: stack setup --resolver ${{ matrix.resolver }} 79 | 80 | - name: Build the project with resolver ${{ matrix.resolver }} 81 | run: stack build --resolver ${{ matrix.resolver }} 82 | 83 | - name: Run spec tests with resolver ${{ matrix.resolver }} 84 | run: stack test --resolver ${{ matrix.resolver }} 85 | -------------------------------------------------------------------------------- /System/Win32File.hsc: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ForeignFunctionInterface #-} 2 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 3 | module System.Win32File 4 | ( openFile 5 | , readChunk 6 | , closeFile 7 | , ReadHandle 8 | ) where 9 | 10 | import Foreign.C.String (CString) 11 | import Foreign.Ptr (castPtr) 12 | import Foreign.Marshal.Alloc (mallocBytes, free) 13 | import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) 14 | #if __GLASGOW_HASKELL__ >= 704 15 | import Foreign.C.Types (CInt (..)) 16 | #else 17 | import Foreign.C.Types (CInt) 18 | #endif 19 | import Foreign.C.Error (throwErrnoIfMinus1Retry) 20 | import Foreign.Ptr (Ptr) 21 | import Data.Bits (Bits, (.|.)) 22 | import qualified Data.ByteString as S 23 | import qualified Data.ByteString.Unsafe as BU 24 | import qualified Data.ByteString.Internal as BI 25 | import Data.Text (pack) 26 | import Data.Text.Encoding (encodeUtf16LE) 27 | import Data.Word (Word8) 28 | import Prelude hiding (read) 29 | import GHC.ForeignPtr (mallocPlainForeignPtrBytes) 30 | import Data.ByteString.Lazy.Internal (defaultChunkSize) 31 | 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | newtype OFlag = OFlag CInt 39 | deriving (Num, Bits, Show, Eq) 40 | 41 | #{enum OFlag, OFlag 42 | , oBinary = _O_BINARY 43 | , oRdonly = _O_RDONLY 44 | , oWronly = _O_WRONLY 45 | , oCreat = _O_CREAT 46 | } 47 | 48 | newtype SHFlag = SHFlag CInt 49 | deriving (Num, Bits, Show, Eq) 50 | 51 | #{enum SHFlag, SHFlag 52 | , shDenyno = _SH_DENYNO 53 | } 54 | 55 | newtype PMode = PMode CInt 56 | deriving (Num, Bits, Show, Eq) 57 | 58 | #{enum PMode, PMode 59 | , pIread = _S_IREAD 60 | , pIwrite = _S_IWRITE 61 | } 62 | 63 | foreign import ccall "_wsopen" 64 | c_wsopen :: CString -> OFlag -> SHFlag -> PMode -> IO CInt 65 | 66 | foreign import ccall "_read" 67 | c_read :: ReadHandle -> Ptr Word8 -> CInt -> IO CInt 68 | 69 | foreign import ccall "_write" 70 | c_write :: ReadHandle -> Ptr Word8 -> CInt -> IO CInt 71 | 72 | foreign import ccall "_close" 73 | closeFile :: ReadHandle -> IO () 74 | 75 | newtype ReadHandle = ReadHandle CInt 76 | 77 | openFile :: FilePath -> IO ReadHandle 78 | openFile fp = do 79 | -- need to append a null char 80 | -- note that useAsCString is not sufficient, as we need to have two 81 | -- null octets to account for UTF16 encoding 82 | let bs = encodeUtf16LE $ pack $ fp ++ "\0" 83 | h <- BU.unsafeUseAsCString bs $ \str -> 84 | throwErrnoIfMinus1Retry "Data.Streaming.FileRead.openFile" $ 85 | c_wsopen 86 | str 87 | (oBinary .|. oRdonly) 88 | shDenyno 89 | pIread 90 | return $ ReadHandle h 91 | 92 | readChunk :: ReadHandle -> IO S.ByteString 93 | readChunk fd = do 94 | fp <- mallocPlainForeignPtrBytes defaultChunkSize 95 | withForeignPtr fp $ \p -> do 96 | len <- throwErrnoIfMinus1Retry "System.Win32File.read" $ c_read fd p 97 | (fromIntegral defaultChunkSize) 98 | if len == 0 99 | then return $! S.empty 100 | else return $! BI.PS fp 0 (fromIntegral len) 101 | -------------------------------------------------------------------------------- /Data/Streaming/Filesystem.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE DeriveDataTypeable #-} 3 | {-# LANGUAGE ScopedTypeVariables #-} 4 | -- | Streaming functions for interacting with the filesystem. 5 | module Data.Streaming.Filesystem 6 | ( DirStream 7 | , openDirStream 8 | , readDirStream 9 | , closeDirStream 10 | , FileType (..) 11 | , getFileType 12 | ) where 13 | 14 | import Data.Typeable (Typeable) 15 | 16 | #if WINDOWS 17 | 18 | import qualified System.Win32 as Win32 19 | import System.FilePath (()) 20 | import Data.IORef (IORef, newIORef, readIORef, writeIORef) 21 | import System.Directory (doesFileExist, doesDirectoryExist) 22 | 23 | data DirStream = DirStream !Win32.HANDLE !Win32.FindData !(IORef Bool) 24 | deriving Typeable 25 | 26 | openDirStream :: FilePath -> IO DirStream 27 | openDirStream fp = do 28 | (h, fdat) <- Win32.findFirstFile $ fp "*" 29 | imore <- newIORef True -- always at least two records, "." and ".." 30 | return $! DirStream h fdat imore 31 | 32 | closeDirStream :: DirStream -> IO () 33 | closeDirStream (DirStream h _ _) = Win32.findClose h 34 | 35 | readDirStream :: DirStream -> IO (Maybe FilePath) 36 | readDirStream ds@(DirStream h fdat imore) = do 37 | more <- readIORef imore 38 | if more 39 | then do 40 | filename <- Win32.getFindDataFileName fdat 41 | Win32.findNextFile h fdat >>= writeIORef imore 42 | if filename == "." || filename == ".." 43 | then readDirStream ds 44 | else return $ Just filename 45 | else return Nothing 46 | 47 | isSymlink :: FilePath -> IO Bool 48 | isSymlink _ = return False 49 | 50 | getFileType :: FilePath -> IO FileType 51 | getFileType fp = do 52 | isFile <- doesFileExist fp 53 | if isFile 54 | then return FTFile 55 | else do 56 | isDir <- doesDirectoryExist fp 57 | return $ if isDir then FTDirectory else FTOther 58 | 59 | #else 60 | 61 | import System.Posix.Directory (DirStream, openDirStream, closeDirStream) 62 | import qualified System.Posix.Directory as Posix 63 | import qualified System.Posix.Files as PosixF 64 | import Control.Exception (try, IOException) 65 | 66 | readDirStream :: DirStream -> IO (Maybe FilePath) 67 | readDirStream ds = do 68 | fp <- Posix.readDirStream ds 69 | case fp of 70 | "" -> return Nothing 71 | "." -> readDirStream ds 72 | ".." -> readDirStream ds 73 | _ -> return $ Just fp 74 | 75 | getFileType :: FilePath -> IO FileType 76 | getFileType fp = do 77 | s <- PosixF.getSymbolicLinkStatus fp 78 | case () of 79 | () 80 | | PosixF.isRegularFile s -> return FTFile 81 | | PosixF.isDirectory s -> return FTDirectory 82 | | PosixF.isSymbolicLink s -> do 83 | es' <- try $ PosixF.getFileStatus fp 84 | case es' of 85 | Left (_ :: IOException) -> return FTOther 86 | Right s' 87 | | PosixF.isRegularFile s' -> return FTFileSym 88 | | PosixF.isDirectory s' -> return FTDirectorySym 89 | | otherwise -> return FTOther 90 | | otherwise -> return FTOther 91 | 92 | #endif 93 | 94 | data FileType 95 | = FTFile 96 | | FTFileSym -- ^ symlink to file 97 | | FTDirectory 98 | | FTDirectorySym -- ^ symlink to a directory 99 | | FTOther 100 | deriving (Show, Read, Eq, Ord, Typeable) 101 | -------------------------------------------------------------------------------- /Data/Streaming/Network/Internal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | module Data.Streaming.Network.Internal 3 | ( ServerSettings (..) 4 | , ClientSettings (..) 5 | , HostPreference (..) 6 | , Message (..) 7 | , AppData (..) 8 | , ServerSettingsUnix (..) 9 | , ClientSettingsUnix (..) 10 | , AppDataUnix (..) 11 | ) where 12 | 13 | import Data.String (IsString (..)) 14 | import Data.ByteString (ByteString) 15 | import Network.Socket (Socket, SockAddr, Family) 16 | 17 | -- | Settings for a TCP server. It takes a port to listen on, and an optional 18 | -- hostname to bind to. 19 | data ServerSettings = ServerSettings 20 | { serverPort :: !Int 21 | , serverHost :: !HostPreference 22 | , serverSocket :: !(Maybe Socket) -- ^ listening socket 23 | , serverAfterBind :: !(Socket -> IO ()) 24 | , serverNeedLocalAddr :: !Bool 25 | , serverReadBufferSize :: !Int 26 | } 27 | 28 | -- | Settings for a TCP client, specifying how to connect to the server. 29 | data ClientSettings = ClientSettings 30 | { clientPort :: !Int 31 | , clientHost :: !ByteString 32 | , clientAddrFamily :: !Family 33 | , clientReadBufferSize :: !Int 34 | } 35 | 36 | -- | Which host to bind. 37 | -- 38 | -- Note: The @IsString@ instance recognizes the following special values: 39 | -- 40 | -- * @*@ means @HostAny@ - "any IPv4 or IPv6 hostname" 41 | -- 42 | -- * @*4@ means @HostIPv4@ - "any IPv4 or IPv6 hostname, IPv4 preferred" 43 | -- 44 | -- * @!4@ means @HostIPv4Only@ - "any IPv4 hostname" 45 | -- 46 | -- * @*6@ means @HostIPv6@@ - "any IPv4 or IPv6 hostname, IPv6 preferred" 47 | -- 48 | -- * @!6@ means @HostIPv6Only@ - "any IPv6 hostname" 49 | -- 50 | -- Note that the permissive @*@ values allow binding to an IPv4 or an 51 | -- IPv6 hostname, which means you might be able to successfully bind 52 | -- to a port more times than you expect (eg once on the IPv4 localhost 53 | -- 127.0.0.1 and again on the IPv6 localhost 0:0:0:0:0:0:0:1). 54 | -- 55 | -- Any other value is treated as a hostname. As an example, to bind to the 56 | -- IPv4 local host only, use \"127.0.0.1\". 57 | data HostPreference = 58 | HostAny 59 | | HostIPv4 60 | | HostIPv4Only 61 | | HostIPv6 62 | | HostIPv6Only 63 | | Host String 64 | deriving (Eq, Ord, Show, Read) 65 | 66 | instance IsString HostPreference where 67 | fromString "*" = HostAny 68 | fromString "*4" = HostIPv4 69 | fromString "!4" = HostIPv4Only 70 | fromString "*6" = HostIPv6 71 | fromString "!6" = HostIPv6Only 72 | fromString s = Host s 73 | 74 | -- | Settings for a Unix domain sockets server. 75 | data ServerSettingsUnix = ServerSettingsUnix 76 | { serverPath :: !FilePath 77 | , serverAfterBindUnix :: !(Socket -> IO ()) 78 | , serverReadBufferSizeUnix :: !Int 79 | } 80 | 81 | -- | Settings for a Unix domain sockets client. 82 | data ClientSettingsUnix = ClientSettingsUnix 83 | { clientPath :: !FilePath 84 | , clientReadBufferSizeUnix :: !Int 85 | } 86 | 87 | -- | The data passed to a Unix domain sockets @Application@. 88 | data AppDataUnix = AppDataUnix 89 | { appReadUnix :: !(IO ByteString) 90 | , appWriteUnix :: !(ByteString -> IO ()) 91 | } 92 | 93 | -- | Representation of a single UDP message 94 | data Message = Message { msgData :: {-# UNPACK #-} !ByteString 95 | , msgSender :: !SockAddr 96 | } 97 | 98 | -- | The data passed to an @Application@. 99 | data AppData = AppData 100 | { appRead' :: !(IO ByteString) 101 | , appWrite' :: !(ByteString -> IO ()) 102 | , appSockAddr' :: !SockAddr 103 | , appLocalAddr' :: !(Maybe SockAddr) 104 | , appCloseConnection' :: !(IO ()) 105 | , appRawSocket' :: Maybe Socket 106 | } 107 | -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | # ChangeLog for streaming-commons 2 | 3 | ## 0.2.3.1 4 | 5 | * Fix `!6` host preference incorrectly allows IPv4 connections (https://github.com/fpco/streaming-commons/pull/83) 6 | * Fix case of windows header files to allow cross-compilation from linux to windows (https://github.com/fpco/streaming-commons/pull/84) 7 | 8 | ## 0.2.3.0 9 | 10 | * Allow Unix sockets on Windows (https://github.com/fpco/streaming-commons/pull/80) 11 | 12 | ## 0.2.2.6 13 | 14 | * Remove the zlib headers [#72](https://github.com/fpco/streaming-commons/issues/72) 15 | 16 | ## 0.2.2.4 17 | 18 | * Fix docstrings for text 2.0 19 | 20 | ## 0.2.2.3 21 | 22 | * Support text 2.0 [#65](https://github.com/fpco/streaming-commons/pull/65) 23 | 24 | ## 0.2.2.2 25 | 26 | * Support GHC 9.2 [#62](https://github.com/fpco/streaming-commons/pull/62) 27 | 28 | ## 0.2.2.1 29 | 30 | * Fix test suite compilation issue [stackage#5528](https://github.com/commercialhaskell/stackage/issues/5528) 31 | 32 | ## 0.2.2.0 33 | 34 | * Remove `AI_ADDRCONFIG` [#58](https://github.com/fpco/streaming-commons/issues/58) 35 | 36 | ## 0.2.1.2 37 | 38 | * Update `defaultReadBufferSize` to use system default instead of hardcoded value [#54](https://github.com/fpco/streaming-commons/issues/54) 39 | 40 | ## 0.2.1.1 41 | 42 | * Fix a failing test case (invalid `ByteString` copying), does not affect library itself 43 | 44 | ## 0.2.1.0 45 | 46 | * Change `bindRandomPortGen` to use binding to port 0 47 | 48 | ## 0.2.0 49 | 50 | * Drop `blaze-builder` dependency 51 | 52 | ## 0.1.19 53 | 54 | * Update `getAddrInfo` hints to allow hostnames and portnames [#46](https://github.com/fpco/streaming-commons/issues/46) 55 | 56 | ## 0.1.18 57 | 58 | * Add `isCompleteInflate` 59 | 60 | ## 0.1.17 61 | 62 | * Add `bindPortGenEx` 63 | 64 | ## 0.1.16 65 | 66 | * Add `closeStreamingProcessHandle` 67 | 68 | ## 0.1.15.5 69 | 70 | * Make getSocket{Family}TCP try all addr candidates [#32](https://github.com/fpco/streaming-commons/pull/32) 71 | 72 | ## 0.1.15.3 73 | 74 | * Fix benchmarks 75 | 76 | ## 0.1.15.2 77 | 78 | * Document child process behavior in `waitForProcess` 79 | 80 | ## 0.1.15.1 81 | 82 | * Catch exceptions thrown by `waitForProcess` 83 | 84 | ## 0.1.15 85 | 86 | * Use `NO_DELAY1 for TCP client connections [#27](https://github.com/fpco/streaming-commons/issues/27) 87 | 88 | ## 0.1.14.2 89 | 90 | * Fix bug in process exception display of args with spaces/quotes 91 | 92 | ## 0.1.14 93 | 94 | * Exporting HasReadBufferSize; instance for ClientSettingsUnix [#24](https://github.com/fpco/streaming-commons/pull/24) 95 | 96 | ## 0.1.13 97 | 98 | * Make size of read buffer configurable, change default size to 32 kiB [#23](https://github.com/fpco/streaming-commons/pull/23) 99 | 100 | ## 0.1.12.1 101 | 102 | * Fix up `acceptSafe` as [mentioned by Kazu](https://github.com/yesodweb/wai/issues/361#issuecomment-102204803) 103 | 104 | ## 0.1.12 105 | 106 | * `appRawSocket` 107 | 108 | ## 0.1.11 109 | 110 | * `getUnusedInflated`: Return uncompressed data following compressed data [#20](https://github.com/fpco/streaming-commons/issues/20) 111 | 112 | ## 0.1.10 113 | 114 | Support blaze-builder >= 0.4. Add `newByteStringBuilderRecv` to Data.Streaming.ByteString.Builder; add modules Data.Streaming.ByteString.Builder.Buffer and Data.Streaming.ByteString.Builder.Class. 115 | 116 | ## 0.1.9 117 | 118 | Add Data.Streaming.ByteString.Builder 119 | 120 | ## 0.1.8 121 | 122 | Generalise types of run\*Server which never cleanly return [#13](https://github.com/fpco/streaming-commons/pull/13) 123 | 124 | ## 0.1.7.1 125 | 126 | Fix `streamingProcess` so that it doesn't close `Handle`s passed in with 127 | `UseProvidedHandle`. 128 | 129 | ## 0.1.7 130 | 131 | `withCheckedProcess` added. 132 | 133 | ## 0.1.6 134 | 135 | Provide `appCloseConnection` to get the underlying connection from an `AppData`. 136 | -------------------------------------------------------------------------------- /Data/Streaming/Zlib/Lowlevel.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ForeignFunctionInterface #-} 2 | {-# LANGUAGE EmptyDataDecls #-} 3 | module Data.Streaming.Zlib.Lowlevel 4 | ( ZStreamStruct 5 | , ZStream' 6 | , zstreamNew 7 | , Strategy(..) 8 | , deflateInit2 9 | , inflateInit2 10 | , c_free_z_stream_inflate 11 | , c_free_z_stream_deflate 12 | , c_set_avail_in 13 | , c_set_avail_out 14 | , c_get_avail_out 15 | , c_get_avail_in 16 | , c_get_next_in 17 | , c_call_inflate_noflush 18 | , c_call_deflate_noflush 19 | , c_call_deflate_finish 20 | , c_call_deflate_flush 21 | , c_call_deflate_full_flush 22 | , c_call_deflate_set_dictionary 23 | , c_call_inflate_set_dictionary 24 | ) where 25 | 26 | import Data.Functor (void) 27 | import Foreign.C 28 | import Foreign.Ptr 29 | import Codec.Compression.Zlib (WindowBits (WindowBits)) 30 | 31 | data ZStreamStruct 32 | type ZStream' = Ptr ZStreamStruct 33 | 34 | data Strategy = 35 | StrategyDefault 36 | | StrategyFiltered 37 | | StrategyHuffman 38 | | StrategyRLE 39 | | StrategyFixed 40 | deriving (Show,Eq,Ord,Enum) 41 | 42 | foreign import ccall unsafe "streaming_commons_create_z_stream" 43 | zstreamNew :: IO ZStream' 44 | 45 | foreign import ccall unsafe "streaming_commons_deflate_init2" 46 | c_deflateInit2 :: ZStream' -> CInt -> CInt -> CInt -> CInt 47 | -> IO CInt 48 | 49 | deflateInit2 :: ZStream' -> Int -> WindowBits -> Int -> Strategy -> IO () 50 | deflateInit2 zstream level windowBits memlevel strategy = void $ 51 | c_deflateInit2 zstream (fromIntegral level) (wbToInt windowBits) 52 | (fromIntegral memlevel) 53 | (fromIntegral $ fromEnum strategy) 54 | 55 | foreign import ccall unsafe "streaming_commons_inflate_init2" 56 | c_inflateInit2 :: ZStream' -> CInt -> IO CInt 57 | 58 | inflateInit2 :: ZStream' -> WindowBits -> IO () 59 | inflateInit2 zstream wb = void $ c_inflateInit2 zstream (wbToInt wb) 60 | 61 | foreign import ccall unsafe "&streaming_commons_free_z_stream_inflate" 62 | c_free_z_stream_inflate :: FunPtr (ZStream' -> IO ()) 63 | 64 | foreign import ccall unsafe "&streaming_commons_free_z_stream_deflate" 65 | c_free_z_stream_deflate :: FunPtr (ZStream' -> IO ()) 66 | 67 | foreign import ccall unsafe "streaming_commons_set_avail_in" 68 | c_set_avail_in :: ZStream' -> Ptr CChar -> CUInt -> IO () 69 | 70 | foreign import ccall unsafe "streaming_commons_set_avail_out" 71 | c_set_avail_out :: ZStream' -> Ptr CChar -> CUInt -> IO () 72 | 73 | foreign import ccall unsafe "streaming_commons_get_avail_out" 74 | c_get_avail_out :: ZStream' -> IO CUInt 75 | 76 | foreign import ccall unsafe "streaming_commons_get_avail_in" 77 | c_get_avail_in :: ZStream' -> IO CUInt 78 | 79 | foreign import ccall unsafe "streaming_commons_get_next_in" 80 | c_get_next_in :: ZStream' -> IO (Ptr CChar) 81 | 82 | foreign import ccall unsafe "streaming_commons_call_inflate_noflush" 83 | c_call_inflate_noflush :: ZStream' -> IO CInt 84 | 85 | foreign import ccall unsafe "streaming_commons_call_deflate_noflush" 86 | c_call_deflate_noflush :: ZStream' -> IO CInt 87 | 88 | foreign import ccall unsafe "streaming_commons_call_deflate_finish" 89 | c_call_deflate_finish :: ZStream' -> IO CInt 90 | 91 | foreign import ccall unsafe "streaming_commons_call_deflate_flush" 92 | c_call_deflate_flush :: ZStream' -> IO CInt 93 | 94 | foreign import ccall unsafe "streaming_commons_call_deflate_full_flush" 95 | c_call_deflate_full_flush :: ZStream' -> IO CInt 96 | 97 | foreign import ccall unsafe "streaming_commons_deflate_set_dictionary" 98 | c_call_deflate_set_dictionary :: ZStream' -> Ptr CChar -> CUInt -> IO CInt 99 | 100 | foreign import ccall unsafe "streaming_commons_inflate_set_dictionary" 101 | c_call_inflate_set_dictionary :: ZStream' -> Ptr CChar -> CUInt -> IO CInt 102 | 103 | wbToInt :: WindowBits -> CInt 104 | wbToInt (WindowBits i) = fromIntegral i 105 | wbToInt _ = 15 106 | 107 | -------------------------------------------------------------------------------- /test/Data/Streaming/ByteString/BuilderSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE RecordWildCards #-} 3 | {-# LANGUAGE ScopedTypeVariables #-} 4 | module Data.Streaming.ByteString.BuilderSpec 5 | ( spec 6 | ) where 7 | 8 | import qualified Data.ByteString as S 9 | import Data.ByteString.Char8 () 10 | import qualified Data.ByteString.Builder as B 11 | import Data.ByteString.Builder (Builder) 12 | import qualified Data.ByteString.Builder.Internal as B 13 | import qualified Data.ByteString.Lazy as L 14 | import Data.ByteString.Lazy.Char8 () 15 | import Data.IORef 16 | import Data.Maybe 17 | import Data.Monoid 18 | import Test.Hspec 19 | import Test.Hspec.QuickCheck (prop) 20 | 21 | import Data.Streaming.ByteString.Builder 22 | 23 | tester :: BufferAllocStrategy -> [Builder] -> IO [S.ByteString] 24 | tester strat builders0 = do 25 | (recv, finish) <- newBuilderRecv strat 26 | let loop front [] = do 27 | mbs <- finish 28 | return $ front $ maybe [] return mbs 29 | loop front0 (bu:bus) = do 30 | popper <- recv bu 31 | let go front = do 32 | bs <- popper 33 | if S.null bs 34 | then loop front bus 35 | else go (front . (bs:)) 36 | go front0 37 | loop id builders0 38 | 39 | testerFlush :: BufferAllocStrategy -> [Maybe Builder] -> IO [Maybe S.ByteString] 40 | testerFlush strat builders0 = do 41 | (recv, finish) <- newBuilderRecv strat 42 | let loop front [] = do 43 | mbs <- finish 44 | return $ front $ maybe [] (return . Just) mbs 45 | loop front0 (mbu:bus) = do 46 | popper <- recv $ fromMaybe B.flush mbu 47 | let go front = do 48 | bs <- popper 49 | if S.null bs 50 | then 51 | case mbu of 52 | Nothing -> loop (front . (Nothing:)) bus 53 | Just _ -> loop front bus 54 | else go (front . (Just bs:)) 55 | go front0 56 | loop id builders0 57 | 58 | builderSpec :: Spec 59 | builderSpec = do 60 | prop "idempotent to toLazyByteString" $ \bss' -> do 61 | let bss = map S.pack bss' 62 | let builders = map B.byteString bss 63 | let lbs = B.toLazyByteString $ mconcat builders 64 | outBss <- tester defaultStrategy builders 65 | L.fromChunks outBss `shouldBe` lbs 66 | 67 | it "works for large input" $ do 68 | let builders = replicate 10000 (B.byteString "hello world!") 69 | let lbs = B.toLazyByteString $ mconcat builders 70 | outBss <- tester defaultStrategy builders 71 | L.fromChunks outBss `shouldBe` lbs 72 | 73 | it "works for lazy bytestring insertion" $ do 74 | let builders = replicate 10000 (B.lazyByteStringInsert "hello world!") 75 | let lbs = B.toLazyByteString $ mconcat builders 76 | outBss <- tester defaultStrategy builders 77 | L.fromChunks outBss `shouldBe` lbs 78 | 79 | prop "works for strict bytestring insertion" $ \bs' -> do 80 | let bs = S.pack bs' 81 | let builders = replicate 10000 (B.byteStringCopy bs `Data.Monoid.mappend` B.byteStringInsert bs) 82 | let lbs = B.toLazyByteString $ mconcat builders 83 | outBss <- tester defaultStrategy builders 84 | L.fromChunks outBss `shouldBe` lbs 85 | 86 | it "flush shouldn't bring in empty strings." $ do 87 | let dat = ["hello", "world"] 88 | builders = map ((`mappend` B.flush) . B.byteString) dat 89 | out <- tester defaultStrategy builders 90 | dat `shouldBe` out 91 | 92 | prop "flushing" $ \bss' -> do 93 | let bss = concatMap (\bs -> [Just $ S.pack bs, Nothing]) $ filter (not . null) bss' 94 | let builders = map (fmap B.byteString) bss 95 | outBss <- testerFlush defaultStrategy builders 96 | outBss `shouldBe` bss 97 | it "large flush input" $ do 98 | let lbs = L.pack $ concat $ replicate 100000 [0..255] 99 | chunks = map (Just . B.byteString) (L.toChunks lbs) 100 | bss <- testerFlush defaultStrategy chunks 101 | L.fromChunks (catMaybes bss) `shouldBe` lbs 102 | 103 | spec :: Spec 104 | spec = 105 | describe "Data.Streaming.ByteString.Builder" $ do 106 | 107 | builderSpec 108 | 109 | let prop_idempotent i bss' = do 110 | let bss = mconcat (map (B.byteString . S.pack) bss') 111 | ior <- newIORef [] 112 | toByteStringIOWith 16 113 | (\s -> do let s' = S.copy s 114 | s' `seq` modifyIORef ior (s' :)) 115 | bss 116 | chunks <- readIORef ior 117 | let have = L.unpack (L.fromChunks (reverse chunks)) 118 | want = L.unpack (B.toLazyByteString bss) 119 | (i, have) `shouldBe` (i, want) 120 | 121 | prop "toByteStringIO idempotent to toLazyByteString" (prop_idempotent (0::Int)) 122 | 123 | it "toByteStringIO idempotent to toLazyBytestring, specific case" $ do 124 | let bss' = replicate 10 [0..255] 125 | mapM_ (\i -> prop_idempotent i bss') [(1::Int)..100] 126 | -------------------------------------------------------------------------------- /streaming-commons.cabal: -------------------------------------------------------------------------------- 1 | name: streaming-commons 2 | version: 0.2.3.1 3 | synopsis: Common lower-level functions needed by various streaming data libraries 4 | description: Provides low-dependency functionality commonly needed by various streaming data libraries, such as conduit and pipes. 5 | homepage: https://github.com/fpco/streaming-commons 6 | license: MIT 7 | license-file: LICENSE 8 | author: Michael Snoyman, Emanuel Borsboom 9 | maintainer: michael@snoyman.com 10 | -- copyright: 11 | category: Data 12 | build-type: Simple 13 | cabal-version: >=1.10 14 | extra-source-files: 15 | test/filesystem/*.txt 16 | test/filesystem/bin/*.txt 17 | include/*.h 18 | cbits/*.c 19 | test/LICENSE.gz 20 | ChangeLog.md 21 | README.md 22 | 23 | flag use-bytestring-builder 24 | description: Use bytestring-builder package 25 | default: False 26 | 27 | library 28 | default-language: Haskell2010 29 | exposed-modules: Data.Streaming.ByteString.Builder 30 | Data.Streaming.ByteString.Builder.Buffer 31 | Data.Streaming.FileRead 32 | Data.Streaming.Filesystem 33 | Data.Streaming.Network 34 | Data.Streaming.Network.Internal 35 | Data.Streaming.Process 36 | Data.Streaming.Process.Internal 37 | Data.Streaming.Text 38 | Data.Streaming.Zlib 39 | Data.Streaming.Zlib.Lowlevel 40 | 41 | build-depends: base >= 4.12 && < 5 42 | , array 43 | , async 44 | , bytestring 45 | , directory 46 | , network >= 2.4.0.0 47 | , random 48 | , process 49 | , stm 50 | , text >= 1.2 && < 1.3 || >= 2.0 && < 2.2 51 | , transformers 52 | , zlib 53 | 54 | c-sources: cbits/zlib-helper.c 55 | cbits/text-helper.c 56 | include-dirs: include 57 | 58 | if os(windows) 59 | build-depends: Win32 60 | , filepath 61 | cpp-options: -DWINDOWS 62 | other-modules: System.Win32File 63 | else 64 | build-depends: unix 65 | 66 | if flag(use-bytestring-builder) 67 | build-depends: bytestring < 0.10.2.0 68 | , bytestring-builder 69 | else 70 | build-depends: bytestring >= 0.10.2.0 71 | 72 | test-suite test 73 | default-language: Haskell2010 74 | hs-source-dirs: test 75 | main-is: Spec.hs 76 | type: exitcode-stdio-1.0 77 | ghc-options: -Wall -threaded 78 | other-modules: Data.Streaming.ByteString.BuilderSpec 79 | Data.Streaming.FileReadSpec 80 | Data.Streaming.FilesystemSpec 81 | Data.Streaming.NetworkSpec 82 | Data.Streaming.ProcessSpec 83 | Data.Streaming.TextSpec 84 | Data.Streaming.ZlibSpec 85 | build-depends: base 86 | , streaming-commons 87 | , hspec >= 1.8 88 | 89 | , QuickCheck 90 | , array 91 | , async 92 | , bytestring 93 | , deepseq 94 | , network >= 2.4.0.0 95 | , text 96 | , zlib 97 | build-tool-depends: 98 | hspec-discover:hspec-discover 99 | 100 | if flag(use-bytestring-builder) 101 | build-depends: bytestring < 0.10.2.0 102 | , bytestring-builder 103 | else 104 | build-depends: bytestring >= 0.10.2.0 105 | 106 | if os(windows) 107 | cpp-options: -DWINDOWS 108 | else 109 | build-depends: unix 110 | 111 | benchmark count-chars 112 | default-language: Haskell2010 113 | type: exitcode-stdio-1.0 114 | hs-source-dirs: bench 115 | build-depends: base 116 | , gauge 117 | , bytestring 118 | , text 119 | , streaming-commons 120 | main-is: count-chars.hs 121 | ghc-options: -Wall -O2 122 | 123 | benchmark decode-memory-usage 124 | default-language: Haskell2010 125 | type: exitcode-stdio-1.0 126 | hs-source-dirs: bench 127 | build-depends: base 128 | , bytestring 129 | , text 130 | , streaming-commons 131 | main-is: decode-memory-usage.hs 132 | ghc-options: -Wall -O2 -with-rtsopts=-s 133 | 134 | benchmark builder-to-bytestring-io 135 | default-language: Haskell2010 136 | type: exitcode-stdio-1.0 137 | hs-source-dirs: bench 138 | main-is: builder-to-bytestring-io.hs 139 | ghc-options: -Wall -O2 140 | build-depends: base 141 | , bytestring >= 0.10.2 142 | , gauge 143 | , deepseq 144 | , streaming-commons 145 | 146 | if flag(use-bytestring-builder) 147 | build-depends: bytestring < 0.10.2.0 148 | , bytestring-builder 149 | else 150 | build-depends: bytestring >= 0.10.2.0 151 | 152 | source-repository head 153 | type: git 154 | location: git://github.com/fpco/streaming-commons.git 155 | -------------------------------------------------------------------------------- /test/Data/Streaming/TextSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Data.Streaming.TextSpec (spec) where 3 | 4 | import Test.Hspec 5 | import Test.Hspec.QuickCheck 6 | import qualified Data.Streaming.Text as SD 7 | import qualified Data.Text.Lazy.Encoding as TLE 8 | import qualified Data.ByteString as S 9 | import qualified Data.ByteString.Lazy as L 10 | import Control.Exception (evaluate, try, SomeException) 11 | import Control.DeepSeq (deepseq, NFData) 12 | import qualified Data.Text as T 13 | import qualified Data.Text.Encoding as TE 14 | import qualified Data.Text.Lazy as TL 15 | import Control.Monad (forM_) 16 | import Data.ByteString.Char8 () 17 | 18 | try' :: NFData a => a -> IO (Either SomeException a) 19 | try' a = try $ evaluate (a `deepseq` a) 20 | 21 | spec :: Spec 22 | spec = describe "Data.Streaming.TextSpec" $ {-modifyMaxSuccess (const 10000) $ -}do 23 | let test name lazy stream encodeLazy encodeStrict = describe name $ do 24 | prop "bytes" $ check lazy stream 25 | prop "chars" $ \css -> do 26 | let ts = map T.pack css 27 | lt = TL.fromChunks ts 28 | lbs = encodeLazy lt 29 | bss = L.toChunks lbs 30 | wss = map S.unpack bss 31 | in check lazy stream wss 32 | it "high code points" $ forM_ [100, 200..50000] $ \cnt -> do 33 | let t = T.replicate cnt "\x10000" 34 | bs = encodeStrict t 35 | case stream bs of 36 | SD.DecodeResultSuccess t' dec -> do 37 | t' `shouldBe` t 38 | case dec S.empty of 39 | SD.DecodeResultSuccess _ _ -> return () 40 | SD.DecodeResultFailure _ _ -> error "unexpected failure 1" 41 | SD.DecodeResultFailure _ _ -> error "unexpected failure 2" 42 | 43 | check lazy stream wss = do 44 | let bss = map S.pack wss 45 | lbs = L.fromChunks bss 46 | x <- try' $ feedLazy stream lbs 47 | y <- try' $ lazy lbs 48 | case (x, y) of 49 | (Right x', Right y') -> x' `shouldBe` y' 50 | (Left _, Left _) -> return () 51 | _ -> error $ show (x, y) 52 | test "UTF8" TLE.decodeUtf8 SD.decodeUtf8 TLE.encodeUtf8 TE.encodeUtf8 53 | test "UTF8 pure" TLE.decodeUtf8 SD.decodeUtf8Pure TLE.encodeUtf8 TE.encodeUtf8 54 | test "UTF16LE" TLE.decodeUtf16LE SD.decodeUtf16LE TLE.encodeUtf16LE TE.encodeUtf16LE 55 | test "UTF16BE" TLE.decodeUtf16BE SD.decodeUtf16BE TLE.encodeUtf16BE TE.encodeUtf16BE 56 | test "UTF32LE" TLE.decodeUtf32LE SD.decodeUtf32LE TLE.encodeUtf32LE TE.encodeUtf32LE 57 | test "UTF32BE" TLE.decodeUtf32BE SD.decodeUtf32BE TLE.encodeUtf32BE TE.encodeUtf32BE 58 | 59 | describe "UTF8 leftovers" $ do 60 | describe "C" $ do 61 | it "single chunk" $ do 62 | let bs = "good\128\128bad" 63 | case SD.decodeUtf8 bs of 64 | SD.DecodeResultSuccess _ _ -> error "Shouldn't have succeeded" 65 | SD.DecodeResultFailure t bs' -> do 66 | t `shouldBe` "good" 67 | bs' `shouldBe` "\128\128bad" 68 | 69 | it "multi chunk, no good" $ do 70 | let bs1 = "\226" 71 | bs2 = "\130" 72 | bs3 = "ABC" 73 | case SD.decodeUtf8 bs1 of 74 | SD.DecodeResultSuccess "" dec2 -> 75 | case dec2 bs2 of 76 | SD.DecodeResultSuccess "" dec3 -> 77 | case dec3 bs3 of 78 | SD.DecodeResultFailure "" bs -> bs `shouldBe` "\226\130ABC" 79 | _ -> error "fail on dec3" 80 | _ -> error "fail on dec2" 81 | _ -> error "fail on dec1" 82 | 83 | it "multi chunk, good in the middle" $ do 84 | let bs1 = "\226" 85 | bs2 = "\130\172\226" 86 | bs3 = "\130ABC" 87 | case SD.decodeUtf8 bs1 of 88 | SD.DecodeResultSuccess "" dec2 -> 89 | case dec2 bs2 of 90 | SD.DecodeResultSuccess "\x20AC" dec3 -> 91 | case dec3 bs3 of 92 | SD.DecodeResultFailure "" bs -> bs `shouldBe` "\226\130ABC" 93 | _ -> error "fail on dec3" 94 | _ -> error "fail on dec2" 95 | _ -> error "fail on dec1" 96 | describe "pure" $ do 97 | it "multi chunk, no good" $ do 98 | let bs1 = "\226" 99 | bs2 = "\130" 100 | bs3 = "ABC" 101 | case SD.decodeUtf8Pure bs1 of 102 | SD.DecodeResultSuccess "" dec2 -> 103 | case dec2 bs2 of 104 | SD.DecodeResultSuccess "" dec3 -> 105 | case dec3 bs3 of 106 | SD.DecodeResultFailure "" bs -> bs `shouldBe` "\226\130ABC" 107 | _ -> error "fail on dec3" 108 | _ -> error "fail on dec2" 109 | _ -> error "fail on dec1" 110 | 111 | describe "UTF16LE spot checks" $ do 112 | it "[[0,216,0],[220,0,0,0,0,0,0]]" $ do 113 | let bss = map S.pack [[0,216,0],[220,0,0,0,0,0,0]] 114 | lbs = L.fromChunks bss 115 | x <- try' $ feedLazy SD.decodeUtf16LE lbs 116 | y <- try' $ TLE.decodeUtf16LE lbs 117 | case (x, y) of 118 | (Right x', Right y') -> x' `shouldBe` y' 119 | (Left _, Left _) -> return () 120 | _ -> error $ show (x, y) 121 | 122 | feedLazy :: (S.ByteString -> SD.DecodeResult) 123 | -> L.ByteString 124 | -> TL.Text 125 | feedLazy start = 126 | TL.fromChunks . loop start . L.toChunks 127 | where 128 | loop dec [] = 129 | case dec S.empty of 130 | SD.DecodeResultSuccess t _ -> [t] 131 | SD.DecodeResultFailure _ _ -> [error "invalid sequence 1"] 132 | loop dec (bs:bss) = 133 | case dec bs of 134 | SD.DecodeResultSuccess t dec' -> t : loop dec' bss 135 | SD.DecodeResultFailure _ _ -> [error "invalid sequence 2"] 136 | -------------------------------------------------------------------------------- /Data/Streaming/ByteString/Builder/Buffer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE CPP #-} 3 | {-# LANGUAGE Rank2Types #-} 4 | 5 | -- | Buffers for 'Builder's. This is a partial copy of blaze-builder-0.3.3.4's 6 | -- "Blaze.ByteString.Builder.Internal.Buffer" module, which was removed in 7 | -- blaze-builder-0.4. 8 | -- 9 | -- If you are using blaze-builder 0.3.*, this module just re-exports from 10 | -- "Blaze.ByteString.Builder.Internal.Buffer". 11 | -- 12 | -- Since 0.1.10.0 13 | -- 14 | module Data.Streaming.ByteString.Builder.Buffer 15 | ( 16 | -- * Buffers 17 | Buffer (..) 18 | 19 | -- ** Status information 20 | , freeSize 21 | , sliceSize 22 | , bufferSize 23 | 24 | -- ** Creation and modification 25 | , allocBuffer 26 | , reuseBuffer 27 | , nextSlice 28 | , updateEndOfSlice 29 | 30 | -- ** Conversion to bytestings 31 | , unsafeFreezeBuffer 32 | , unsafeFreezeNonEmptyBuffer 33 | 34 | -- * Buffer allocation strategies 35 | , BufferAllocStrategy 36 | , allNewBuffersStrategy 37 | , reuseBufferStrategy 38 | , defaultStrategy 39 | ) where 40 | 41 | import Data.ByteString.Lazy.Internal (defaultChunkSize) 42 | 43 | import qualified Data.ByteString as S 44 | import qualified Data.ByteString.Internal as S 45 | import Foreign (Word8, ForeignPtr, Ptr, plusPtr, minusPtr) 46 | import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr) 47 | 48 | ------------------------------------------------------------------------------ 49 | -- Buffers 50 | ------------------------------------------------------------------------------ 51 | 52 | -- | A buffer @Buffer fpbuf p0 op ope@ describes a buffer with the underlying 53 | -- byte array @fpbuf..ope@, the currently written slice @p0..op@ and the free 54 | -- space @op..ope@. 55 | -- 56 | -- Since 0.1.10.0 57 | -- 58 | data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8) -- underlying pinned array 59 | {-# UNPACK #-} !(Ptr Word8) -- beginning of slice 60 | {-# UNPACK #-} !(Ptr Word8) -- next free byte 61 | {-# UNPACK #-} !(Ptr Word8) -- first byte after buffer 62 | 63 | -- | The size of the free space of the buffer. 64 | -- 65 | -- Since 0.1.10.0 66 | -- 67 | freeSize :: Buffer -> Int 68 | freeSize (Buffer _ _ op ope) = ope `minusPtr` op 69 | 70 | -- | The size of the written slice in the buffer. 71 | -- 72 | -- Since 0.1.10.0 73 | -- 74 | sliceSize :: Buffer -> Int 75 | sliceSize (Buffer _ p0 op _) = op `minusPtr` p0 76 | 77 | -- | The size of the whole byte array underlying the buffer. 78 | -- 79 | -- Since 0.1.10.0 80 | -- 81 | bufferSize :: Buffer -> Int 82 | bufferSize (Buffer fpbuf _ _ ope) = 83 | ope `minusPtr` unsafeForeignPtrToPtr fpbuf 84 | 85 | -- | @allocBuffer size@ allocates a new buffer of size @size@. 86 | -- 87 | -- Since 0.1.10.0 88 | -- 89 | {-# INLINE allocBuffer #-} 90 | allocBuffer :: Int -> IO Buffer 91 | allocBuffer size = do 92 | fpbuf <- S.mallocByteString size 93 | let !pbuf = unsafeForeignPtrToPtr fpbuf 94 | return $! Buffer fpbuf pbuf pbuf (pbuf `plusPtr` size) 95 | 96 | -- | Resets the beginning of the next slice and the next free byte such that 97 | -- the whole buffer can be filled again. 98 | -- 99 | -- Since 0.1.10.0 100 | -- 101 | {-# INLINE reuseBuffer #-} 102 | reuseBuffer :: Buffer -> Buffer 103 | reuseBuffer (Buffer fpbuf _ _ ope) = Buffer fpbuf p0 p0 ope 104 | where 105 | p0 = unsafeForeignPtrToPtr fpbuf 106 | 107 | -- | Convert the buffer to a bytestring. This operation is unsafe in the sense 108 | -- that created bytestring shares the underlying byte array with the buffer. 109 | -- Hence, depending on the later use of this buffer (e.g., if it gets reset and 110 | -- filled again) referential transparency may be lost. 111 | -- 112 | -- Since 0.1.10.0 113 | -- 114 | {-# INLINE unsafeFreezeBuffer #-} 115 | unsafeFreezeBuffer :: Buffer -> S.ByteString 116 | unsafeFreezeBuffer (Buffer fpbuf p0 op _) = 117 | S.PS fpbuf (p0 `minusPtr` unsafeForeignPtrToPtr fpbuf) (op `minusPtr` p0) 118 | 119 | -- | Convert a buffer to a non-empty bytestring. See 'unsafeFreezeBuffer' for 120 | -- the explanation of why this operation may be unsafe. 121 | -- 122 | -- Since 0.1.10.0 123 | -- 124 | {-# INLINE unsafeFreezeNonEmptyBuffer #-} 125 | unsafeFreezeNonEmptyBuffer :: Buffer -> Maybe S.ByteString 126 | unsafeFreezeNonEmptyBuffer buf 127 | | sliceSize buf <= 0 = Nothing 128 | | otherwise = Just $ unsafeFreezeBuffer buf 129 | 130 | -- | Update the end of slice pointer. 131 | -- 132 | -- Since 0.1.10.0 133 | -- 134 | {-# INLINE updateEndOfSlice #-} 135 | updateEndOfSlice :: Buffer -- Old buffer 136 | -> Ptr Word8 -- New end of slice 137 | -> Buffer -- Updated buffer 138 | updateEndOfSlice (Buffer fpbuf p0 _ ope) op' = Buffer fpbuf p0 op' ope 139 | 140 | -- | Move the beginning of the slice to the next free byte such that the 141 | -- remaining free space of the buffer can be filled further. This operation 142 | -- is safe and can be used to fill the remaining part of the buffer after a 143 | -- direct insertion of a bytestring or a flush. 144 | -- 145 | -- Since 0.1.10.0 146 | -- 147 | {-# INLINE nextSlice #-} 148 | nextSlice :: Int -> Buffer -> Maybe Buffer 149 | nextSlice minSize (Buffer fpbuf _ op ope) 150 | | ope `minusPtr` op <= minSize = Nothing 151 | | otherwise = Just (Buffer fpbuf op op ope) 152 | 153 | ------------------------------------------------------------------------------ 154 | -- Buffer allocation strategies 155 | ------------------------------------------------------------------------------ 156 | 157 | -- | A buffer allocation strategy @(buf0, nextBuf)@ specifies the initial 158 | -- buffer to use and how to compute a new buffer @nextBuf minSize buf@ with at 159 | -- least size @minSize@ from a filled buffer @buf@. The double nesting of the 160 | -- @IO@ monad helps to ensure that the reference to the filled buffer @buf@ is 161 | -- lost as soon as possible, but the new buffer doesn't have to be allocated 162 | -- too early. 163 | -- 164 | -- Since 0.1.10.0 165 | -- 166 | type BufferAllocStrategy = (IO Buffer, Int -> Buffer -> IO (IO Buffer)) 167 | 168 | -- | The simplest buffer allocation strategy: whenever a buffer is requested, 169 | -- allocate a new one that is big enough for the next build step to execute. 170 | -- 171 | -- NOTE that this allocation strategy may spill quite some memory upon direct 172 | -- insertion of a bytestring by the builder. Thats no problem for garbage 173 | -- collection, but it may lead to unreasonably high memory consumption in 174 | -- special circumstances. 175 | -- 176 | -- Since 0.1.10.0 177 | -- 178 | allNewBuffersStrategy :: Int -- Minimal buffer size. 179 | -> BufferAllocStrategy 180 | allNewBuffersStrategy bufSize = 181 | ( allocBuffer bufSize 182 | , \reqSize _ -> return (allocBuffer (max reqSize bufSize)) ) 183 | 184 | -- | An unsafe, but possibly more efficient buffer allocation strategy: 185 | -- reuse the buffer, if it is big enough for the next build step to execute. 186 | -- 187 | -- Since 0.1.10.0 188 | -- 189 | reuseBufferStrategy :: IO Buffer 190 | -> BufferAllocStrategy 191 | reuseBufferStrategy buf0 = 192 | (buf0, tryReuseBuffer) 193 | where 194 | tryReuseBuffer reqSize buf 195 | | bufferSize buf >= reqSize = return $ return (reuseBuffer buf) 196 | | otherwise = return $ allocBuffer reqSize 197 | 198 | defaultStrategy :: BufferAllocStrategy 199 | defaultStrategy = allNewBuffersStrategy defaultChunkSize 200 | -------------------------------------------------------------------------------- /Data/Streaming/ByteString/Builder.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE FlexibleContexts #-} 3 | {-# LANGUAGE BangPatterns #-} 4 | {-# LANGUAGE RankNTypes #-} 5 | 6 | -- | Convert a stream of bytestring @Builder@s into a stream of @ByteString@s. 7 | -- 8 | -- Adapted from blaze-builder-enumerator, written by Michael Snoyman and Simon Meier. 9 | -- 10 | -- Note that the functions here can work in any monad built on top of @IO@ or 11 | -- @ST@. 12 | -- 13 | -- Also provides @toByteStringIO*@ like "Blaze.ByteString.Builder"s, for 14 | -- "Data.ByteString.Builder". 15 | -- 16 | -- Since 0.1.9 17 | -- 18 | module Data.Streaming.ByteString.Builder 19 | ( BuilderRecv 20 | , BuilderPopper 21 | , BuilderFinish 22 | , newBuilderRecv 23 | , newByteStringBuilderRecv 24 | 25 | -- * toByteStringIO 26 | , toByteStringIO 27 | , toByteStringIOWith 28 | , toByteStringIOWithBuffer 29 | 30 | -- * Buffers 31 | , Buffer 32 | 33 | -- ** Status information 34 | , freeSize 35 | , sliceSize 36 | , bufferSize 37 | 38 | -- ** Creation and modification 39 | , allocBuffer 40 | , reuseBuffer 41 | , nextSlice 42 | 43 | -- ** Conversion to bytestings 44 | , unsafeFreezeBuffer 45 | , unsafeFreezeNonEmptyBuffer 46 | 47 | -- * Buffer allocation strategies 48 | , BufferAllocStrategy 49 | , allNewBuffersStrategy 50 | , reuseBufferStrategy 51 | , defaultStrategy 52 | ) 53 | where 54 | 55 | import Control.Monad (when,unless) 56 | import qualified Data.ByteString as S 57 | import Data.ByteString.Builder (Builder) 58 | import Data.ByteString.Builder.Extra (runBuilder, BufferWriter, Next(Done, More, Chunk)) 59 | import Data.ByteString.Internal (mallocByteString, ByteString(PS)) 60 | import Data.ByteString.Lazy.Internal (defaultChunkSize) 61 | import Data.IORef (newIORef, writeIORef, readIORef) 62 | import Data.Word (Word8) 63 | import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) 64 | import Foreign.Ptr (plusPtr, minusPtr) 65 | 66 | import Data.Streaming.ByteString.Builder.Buffer 67 | 68 | -- | Provides a series of @ByteString@s until empty, at which point it provides 69 | -- an empty @ByteString@. 70 | -- 71 | -- Since 0.1.10.0 72 | -- 73 | type BuilderPopper = IO S.ByteString 74 | 75 | type BuilderRecv = Builder -> IO BuilderPopper 76 | 77 | type BuilderFinish = IO (Maybe S.ByteString) 78 | 79 | newBuilderRecv :: BufferAllocStrategy -> IO (BuilderRecv, BuilderFinish) 80 | newBuilderRecv = newByteStringBuilderRecv 81 | {-# INLINE newBuilderRecv #-} 82 | 83 | newByteStringBuilderRecv :: BufferAllocStrategy -> IO (BuilderRecv, BuilderFinish) 84 | newByteStringBuilderRecv (ioBufInit, nextBuf) = do 85 | refBuf <- newIORef ioBufInit 86 | return (push refBuf, finish refBuf) 87 | where 88 | finish refBuf = do 89 | ioBuf <- readIORef refBuf 90 | buf <- ioBuf 91 | return $ unsafeFreezeNonEmptyBuffer buf 92 | 93 | push refBuf builder = do 94 | refWri <- newIORef $ Left $ runBuilder builder 95 | return $ popper refBuf refWri 96 | 97 | popper refBuf refWri = do 98 | ioBuf <- readIORef refBuf 99 | ebWri <- readIORef refWri 100 | case ebWri of 101 | Left bWri -> do 102 | !buf@(Buffer _ _ op ope) <- ioBuf 103 | (bytes, next) <- bWri op (ope `minusPtr` op) 104 | let op' = op `plusPtr` bytes 105 | case next of 106 | Done -> do 107 | writeIORef refBuf $ return $ updateEndOfSlice buf op' 108 | return S.empty 109 | More minSize bWri' -> do 110 | let buf' = updateEndOfSlice buf op' 111 | {-# INLINE cont #-} 112 | cont mbs = do 113 | -- sequencing the computation of the next buffer 114 | -- construction here ensures that the reference to the 115 | -- foreign pointer `fp` is lost as soon as possible. 116 | ioBuf' <- nextBuf minSize buf' 117 | writeIORef refBuf ioBuf' 118 | writeIORef refWri $ Left bWri' 119 | case mbs of 120 | Just bs | not $ S.null bs -> return bs 121 | _ -> popper refBuf refWri 122 | cont $ unsafeFreezeNonEmptyBuffer buf' 123 | Chunk bs bWri' -> do 124 | let buf' = updateEndOfSlice buf op' 125 | let yieldBS = do 126 | nextBuf 1 buf' >>= writeIORef refBuf 127 | writeIORef refWri $ Left bWri' 128 | if S.null bs 129 | then popper refBuf refWri 130 | else return bs 131 | case unsafeFreezeNonEmptyBuffer buf' of 132 | Nothing -> yieldBS 133 | Just bs' -> do 134 | writeIORef refWri $ Right yieldBS 135 | return bs' 136 | Right action -> action 137 | 138 | -- | Use a pre-existing buffer to 'toByteStringIOWith'. 139 | -- 140 | -- Since 0.1.9 141 | -- 142 | toByteStringIOWithBuffer :: Int 143 | -> (ByteString -> IO ()) 144 | -> Builder 145 | -> ForeignPtr Word8 146 | -> IO () 147 | toByteStringIOWithBuffer initBufSize io b initBuf = do 148 | go initBufSize initBuf (runBuilder b) 149 | where 150 | go bufSize buf = loop 151 | where 152 | loop :: BufferWriter -> IO () 153 | loop wr = do 154 | (len, next) <- withForeignPtr buf (flip wr bufSize) 155 | when (len > 0) (io $! PS buf 0 len) 156 | case next of 157 | Done -> return () 158 | More newBufSize nextWr 159 | | newBufSize > bufSize -> do 160 | newBuf <- mallocByteString newBufSize 161 | go newBufSize newBuf nextWr 162 | | otherwise -> loop nextWr 163 | Chunk s nextWr -> do 164 | unless (S.null s) (io s) 165 | loop nextWr 166 | 167 | -- | @toByteStringIOWith bufSize io b@ runs the builder @b@ with a buffer of 168 | -- at least the size @bufSize@ and executes the 'IO' action @io@ whenever the 169 | -- buffer is full. 170 | -- 171 | -- Compared to 'toLazyByteStringWith' this function requires less allocation, 172 | -- as the output buffer is only allocated once at the start of the 173 | -- serialization and whenever something bigger than the current buffer size has 174 | -- to be copied into the buffer, which should happen very seldomly for the 175 | -- default buffer size of 32kb. Hence, the pressure on the garbage collector is 176 | -- reduced, which can be an advantage when building long sequences of bytes. 177 | -- 178 | -- Since 0.1.9 179 | -- 180 | toByteStringIOWith :: Int -- ^ Buffer size (upper bounds 181 | -- the number of bytes forced 182 | -- per call to the 'IO' action). 183 | -> (ByteString -> IO ()) -- ^ 'IO' action to execute per 184 | -- full buffer, which is 185 | -- referenced by a strict 186 | -- 'S.ByteString'. 187 | -> Builder -- ^ 'Builder' to run. 188 | -> IO () 189 | toByteStringIOWith bufSize io b = 190 | toByteStringIOWithBuffer bufSize io b =<< mallocByteString bufSize 191 | {-# INLINE toByteStringIOWith #-} 192 | 193 | -- | Run the builder with a 'defaultChunkSize'd buffer and execute the given 194 | -- 'IO' action whenever the buffer is full or gets flushed. 195 | -- 196 | -- @ 'toByteStringIO' = 'toByteStringIOWith' 'defaultChunkSize'@ 197 | -- 198 | -- Since 0.1.9 199 | -- 200 | toByteStringIO :: (ByteString -> IO ()) 201 | -> Builder 202 | -> IO () 203 | toByteStringIO = toByteStringIOWith defaultChunkSize 204 | {-# INLINE toByteStringIO #-} 205 | -------------------------------------------------------------------------------- /Data/Streaming/Process.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE DeriveDataTypeable #-} 3 | -- | A full tutorial for this module is available at: 4 | -- . 5 | -- 6 | -- Note that, while the tutorial covers @Data.Conduit.Process@, that module closely 7 | -- follows the present one, and almost all concepts in the tutorial apply here. 8 | module Data.Streaming.Process 9 | ( -- * Functions 10 | streamingProcess 11 | , closeStreamingProcessHandle 12 | -- * Specialized streaming types 13 | , Inherited (..) 14 | , ClosedStream (..) 15 | , UseProvidedHandle (..) 16 | -- * Process handle 17 | , StreamingProcessHandle 18 | , waitForStreamingProcess 19 | , waitForStreamingProcessSTM 20 | , getStreamingProcessExitCode 21 | , getStreamingProcessExitCodeSTM 22 | , streamingProcessHandleRaw 23 | , streamingProcessHandleTMVar 24 | -- * Type classes 25 | , InputSource 26 | , OutputSink 27 | -- * Checked processes 28 | , withCheckedProcess 29 | , ProcessExitedUnsuccessfully (..) 30 | -- * Reexport 31 | , module System.Process 32 | ) where 33 | 34 | import Control.Applicative as A ((<$>), (<*>)) 35 | import Control.Concurrent (forkIOWithUnmask) 36 | import Control.Concurrent.STM (STM, TMVar, atomically, 37 | newEmptyTMVar, putTMVar, 38 | readTMVar) 39 | import Control.Exception (Exception, throwIO, try, throw, 40 | SomeException, finally) 41 | import Control.Monad.IO.Class (MonadIO, liftIO) 42 | import Data.Maybe (fromMaybe) 43 | import Data.Streaming.Process.Internal 44 | import Data.Typeable (Typeable) 45 | import System.Exit (ExitCode (ExitSuccess)) 46 | import System.IO (hClose) 47 | import System.Process 48 | 49 | #if MIN_VERSION_process(1,2,0) 50 | import qualified System.Process.Internals as PI 51 | #endif 52 | 53 | #if MIN_VERSION_stm(2,3,0) 54 | import Control.Concurrent.STM (tryReadTMVar) 55 | #else 56 | import Control.Concurrent.STM (tryTakeTMVar, putTMVar) 57 | 58 | tryReadTMVar :: TMVar a -> STM (Maybe a) 59 | tryReadTMVar var = do 60 | mx <- tryTakeTMVar var 61 | case mx of 62 | Nothing -> return () 63 | Just x -> putTMVar var x 64 | return mx 65 | #endif 66 | 67 | -- | Use the @Handle@ provided by the @CreateProcess@ value. This would allow 68 | -- you, for example, to open up a @Handle@ to a file, set it as @std_out@, and 69 | -- avoid any additional overhead of dealing with providing that data to your 70 | -- process. 71 | -- 72 | -- Since 0.1.4 73 | data UseProvidedHandle = UseProvidedHandle 74 | 75 | -- | Inherit the stream from the current process. 76 | -- 77 | -- Since 0.1.4 78 | data Inherited = Inherited 79 | 80 | -- | Close the stream with the child process. 81 | -- 82 | -- You usually do not want to use this, as it will leave the corresponding file 83 | -- descriptor unassigned and hence available for re-use in the child process. 84 | -- 85 | -- Since 0.1.4 86 | data ClosedStream = ClosedStream 87 | 88 | instance InputSource ClosedStream where 89 | isStdStream = (\(Just h) -> hClose h >> return ClosedStream, Just CreatePipe) 90 | instance InputSource Inherited where 91 | isStdStream = (\Nothing -> return Inherited, Just Inherit) 92 | instance InputSource UseProvidedHandle where 93 | isStdStream = (\Nothing -> return UseProvidedHandle, Nothing) 94 | 95 | instance OutputSink ClosedStream where 96 | osStdStream = (\(Just h) -> hClose h >> return ClosedStream, Just CreatePipe) 97 | instance OutputSink Inherited where 98 | osStdStream = (\Nothing -> return Inherited, Just Inherit) 99 | instance OutputSink UseProvidedHandle where 100 | osStdStream = (\Nothing -> return UseProvidedHandle, Nothing) 101 | 102 | -- | Blocking call to wait for a process to exit. 103 | -- 104 | -- Since 0.1.4 105 | waitForStreamingProcess :: MonadIO m => StreamingProcessHandle -> m ExitCode 106 | waitForStreamingProcess = liftIO . atomically . waitForStreamingProcessSTM 107 | 108 | -- | STM version of @waitForStreamingProcess@. 109 | -- 110 | -- Since 0.1.4 111 | waitForStreamingProcessSTM :: StreamingProcessHandle -> STM ExitCode 112 | waitForStreamingProcessSTM = readTMVar . streamingProcessHandleTMVar 113 | 114 | -- | Non-blocking call to check for a process exit code. 115 | -- 116 | -- Since 0.1.4 117 | getStreamingProcessExitCode :: MonadIO m => StreamingProcessHandle -> m (Maybe ExitCode) 118 | getStreamingProcessExitCode = liftIO . atomically . getStreamingProcessExitCodeSTM 119 | 120 | -- | STM version of @getStreamingProcessExitCode@. 121 | -- 122 | -- Since 0.1.4 123 | getStreamingProcessExitCodeSTM :: StreamingProcessHandle -> STM (Maybe ExitCode) 124 | getStreamingProcessExitCodeSTM = tryReadTMVar . streamingProcessHandleTMVar 125 | 126 | -- | Get the raw @ProcessHandle@ from a @StreamingProcessHandle@. Note that 127 | -- you should avoid using this to get the process exit code, and instead 128 | -- use the provided functions. 129 | -- 130 | -- Since 0.1.4 131 | streamingProcessHandleRaw :: StreamingProcessHandle -> ProcessHandle 132 | streamingProcessHandleRaw (StreamingProcessHandle ph _ _) = ph 133 | 134 | -- | Get the @TMVar@ storing the process exit code. In general, one of the 135 | -- above functions should be used instead to avoid accidentally corrupting the variable\'s state.. 136 | -- 137 | -- Since 0.1.4 138 | streamingProcessHandleTMVar :: StreamingProcessHandle -> TMVar ExitCode 139 | streamingProcessHandleTMVar (StreamingProcessHandle _ var _) = var 140 | 141 | -- | The primary function for running a process. Note that, with the 142 | -- exception of 'UseProvidedHandle', the values for @std_in@, @std_out@ 143 | -- and @std_err@ will be ignored by this function. 144 | -- 145 | -- Since 0.1.4 146 | streamingProcess :: (MonadIO m, InputSource stdin, OutputSink stdout, OutputSink stderr) 147 | => CreateProcess 148 | -> m (stdin, stdout, stderr, StreamingProcessHandle) 149 | streamingProcess cp = liftIO $ do 150 | let (getStdin, stdinStream) = isStdStream 151 | (getStdout, stdoutStream) = osStdStream 152 | (getStderr, stderrStream) = osStdStream 153 | 154 | #if MIN_VERSION_process(1,2,0) 155 | (stdinH, stdoutH, stderrH, ph) <- PI.createProcess_ "streamingProcess" cp 156 | #else 157 | (stdinH, stdoutH, stderrH, ph) <- createProcess cp 158 | #endif 159 | { std_in = fromMaybe (std_in cp) stdinStream 160 | , std_out = fromMaybe (std_out cp) stdoutStream 161 | , std_err = fromMaybe (std_err cp) stderrStream 162 | } 163 | 164 | ec <- atomically newEmptyTMVar 165 | -- Apparently waitForProcess can throw an exception itself when 166 | -- delegate_ctlc is True, so to avoid this TMVar from being left empty, we 167 | -- capture any exceptions and store them as an impure exception in the 168 | -- TMVar 169 | _ <- forkIOWithUnmask $ \_unmask -> try (waitForProcess ph) 170 | >>= atomically 171 | . putTMVar ec 172 | . either 173 | (throw :: SomeException -> a) 174 | id 175 | 176 | let close = 177 | mclose stdinH `finally` mclose stdoutH `finally` mclose stderrH 178 | where 179 | mclose = maybe (return ()) hClose 180 | 181 | (,,,) 182 | A.<$> getStdin stdinH 183 | A.<*> getStdout stdoutH 184 | <*> getStderr stderrH 185 | <*> return (StreamingProcessHandle ph ec close) 186 | 187 | -- | Free any resources (e.g. @Handle@s) acquired by a call to 'streamingProcess'. 188 | -- 189 | -- @since 0.1.16 190 | closeStreamingProcessHandle :: MonadIO m => StreamingProcessHandle -> m () 191 | closeStreamingProcessHandle (StreamingProcessHandle _ _ f) = liftIO f 192 | 193 | -- | Indicates that a process exited with an non-success exit code. 194 | -- 195 | -- Since 0.1.7 196 | data ProcessExitedUnsuccessfully = ProcessExitedUnsuccessfully CreateProcess ExitCode 197 | deriving Typeable 198 | instance Show ProcessExitedUnsuccessfully where 199 | show (ProcessExitedUnsuccessfully cp ec) = concat 200 | [ "Process exited with " 201 | , show ec 202 | , ": " 203 | , showCmdSpec (cmdspec cp) 204 | ] 205 | where 206 | showCmdSpec (ShellCommand str) = str 207 | showCmdSpec (RawCommand x xs) = unwords (x:map showArg xs) 208 | 209 | -- Ensure that strings that need to be escaped are 210 | showArg x 211 | | any (\c -> c == '"' || c == ' ') x = show x 212 | | otherwise = x 213 | instance Exception ProcessExitedUnsuccessfully 214 | 215 | -- | Run a process and supply its streams to the given callback function. After 216 | -- the callback completes, wait for the process to complete and check its exit 217 | -- code. If the exit code is not a success, throw a 218 | -- 'ProcessExitedUnsuccessfully'. 219 | -- 220 | -- NOTE: This function does not kill the child process or ensure 221 | -- resources are cleaned up in the event of an exception from the 222 | -- provided function. For that, please use @withCheckedProcessCleanup@ 223 | -- from the @conduit-extra@ package. 224 | -- 225 | -- Since 0.1.7 226 | withCheckedProcess :: ( InputSource stdin 227 | , OutputSink stderr 228 | , OutputSink stdout 229 | , MonadIO m 230 | ) 231 | => CreateProcess 232 | -> (stdin -> stdout -> stderr -> m b) 233 | -> m b 234 | withCheckedProcess cp f = do 235 | (x, y, z, sph) <- streamingProcess cp 236 | res <- f x y z 237 | liftIO $ do 238 | ec <- waitForStreamingProcess sph `finally` closeStreamingProcessHandle sph 239 | if ec == ExitSuccess 240 | then return res 241 | else throwIO $ ProcessExitedUnsuccessfully cp ec 242 | -------------------------------------------------------------------------------- /cbits/text-helper.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 Bryan O'Sullivan . 3 | * 4 | * Portions copyright (c) 2008-2010 Björn Höhrmann . 5 | * 6 | * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include "text_cbits.h" 13 | 14 | void _hs_streaming_commons_memcpy(void *dest, size_t doff, const void *src, size_t soff, 15 | size_t n) 16 | { 17 | char *cdest = dest; 18 | const char *csrc = src; 19 | memcpy(cdest + (doff<<1), csrc + (soff<<1), n<<1); 20 | } 21 | 22 | int _hs_streaming_commons_memcmp(const void *a, size_t aoff, const void *b, size_t boff, 23 | size_t n) 24 | { 25 | const char *ca = a; 26 | const char *cb = b; 27 | return memcmp(ca + (aoff<<1), cb + (boff<<1), n<<1); 28 | } 29 | 30 | #define UTF8_ACCEPT 0 31 | #define UTF8_REJECT 12 32 | 33 | static const uint8_t utf8d[] = { 34 | /* 35 | * The first part of the table maps bytes to character classes that 36 | * to reduce the size of the transition table and create bitmasks. 37 | */ 38 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 39 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 40 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 41 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 42 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 43 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 44 | 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 45 | 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, 46 | 47 | /* 48 | * The second part is a transition table that maps a combination of 49 | * a state of the automaton and a character class to a state. 50 | */ 51 | 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, 52 | 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, 53 | 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, 54 | 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, 55 | 12,36,12,12,12,12,12,12,12,12,12,12, 56 | }; 57 | 58 | static inline uint32_t 59 | decode(uint32_t *state, uint32_t* codep, uint32_t byte) { 60 | uint32_t type = utf8d[byte]; 61 | 62 | *codep = (*state != UTF8_ACCEPT) ? 63 | (byte & 0x3fu) | (*codep << 6) : 64 | (0xff >> type) & (byte); 65 | 66 | return *state = utf8d[256 + *state + type]; 67 | } 68 | 69 | /* 70 | * The ISO 8859-1 (aka latin-1) code points correspond exactly to the first 256 unicode 71 | * code-points, therefore we can trivially convert from a latin-1 encoded bytestring to 72 | * an UTF16 array 73 | */ 74 | void 75 | _hs_streaming_commons_decode_latin1(uint16_t *dest, const uint8_t *src, 76 | const uint8_t *srcend) 77 | { 78 | const uint8_t *p = src; 79 | 80 | #if defined(__i386__) || defined(__x86_64__) 81 | /* This optimization works on a little-endian systems by using 82 | (aligned) 32-bit loads instead of 8-bit loads 83 | */ 84 | 85 | /* consume unaligned prefix */ 86 | while (p != srcend && (uintptr_t)p & 0x3) 87 | *dest++ = *p++; 88 | 89 | /* iterate over 32-bit aligned loads */ 90 | while (p < srcend - 3) { 91 | const uint32_t w = *((const uint32_t *)p); 92 | 93 | *dest++ = w & 0xff; 94 | *dest++ = (w >> 8) & 0xff; 95 | *dest++ = (w >> 16) & 0xff; 96 | *dest++ = (w >> 24) & 0xff; 97 | 98 | p += 4; 99 | } 100 | #endif 101 | 102 | /* handle unaligned suffix */ 103 | while (p != srcend) 104 | *dest++ = *p++; 105 | } 106 | 107 | /* 108 | * A best-effort decoder. Runs until it hits either end of input or 109 | * the start of an invalid byte sequence. 110 | * 111 | * At exit, we update *destoff with the next offset to write to, *src 112 | * with the next source location past the last one successfully 113 | * decoded, and return the next source location to read from. 114 | * 115 | * Moreover, we expose the internal decoder state (state0 and 116 | * codepoint0), allowing one to restart the decoder after it 117 | * terminates (say, due to a partial codepoint). 118 | * 119 | * In particular, there are a few possible outcomes, 120 | * 121 | * 1) We decoded the buffer entirely: 122 | * In this case we return srcend 123 | * state0 == UTF8_ACCEPT 124 | * 125 | * 2) We met an invalid encoding 126 | * In this case we return the address of the first invalid byte 127 | * state0 == UTF8_REJECT 128 | * 129 | * 3) We reached the end of the buffer while decoding a codepoint 130 | * In this case we return a pointer to the first byte of the partial codepoint 131 | * state0 != UTF8_ACCEPT, UTF8_REJECT 132 | * 133 | */ 134 | #if defined(__GNUC__) || defined(__clang__) 135 | static inline uint8_t const * 136 | _hs_streaming_commons_decode_utf8_int(uint16_t *const dest, size_t *destoff, 137 | const uint8_t **src, const uint8_t *srcend, 138 | uint32_t *codepoint0, uint32_t *state0) 139 | __attribute((always_inline)); 140 | #endif 141 | 142 | static inline uint8_t const * 143 | _hs_streaming_commons_decode_utf8_int(uint16_t *const dest, size_t *destoff, 144 | const uint8_t **src, const uint8_t *srcend, 145 | uint32_t *codepoint0, uint32_t *state0) 146 | { 147 | uint16_t *d = dest + *destoff; 148 | const uint8_t *s = *src, *last = *src; 149 | uint32_t state = *state0; 150 | uint32_t codepoint = *codepoint0; 151 | 152 | while (s < srcend) { 153 | #if defined(__i386__) || defined(__x86_64__) 154 | /* 155 | * This code will only work on a little-endian system that 156 | * supports unaligned loads. 157 | * 158 | * It gives a substantial speed win on data that is purely or 159 | * partly ASCII (e.g. HTML), at only a slight cost on purely 160 | * non-ASCII text. 161 | */ 162 | 163 | if (state == UTF8_ACCEPT) { 164 | while (s < srcend - 4) { 165 | codepoint = *((const uint32_t *) s); 166 | if ((codepoint & 0x80808080) != 0) 167 | break; 168 | s += 4; 169 | 170 | /* 171 | * Tried 32-bit stores here, but the extra bit-twiddling 172 | * slowed the code down. 173 | */ 174 | 175 | *d++ = (uint16_t) (codepoint & 0xff); 176 | *d++ = (uint16_t) ((codepoint >> 8) & 0xff); 177 | *d++ = (uint16_t) ((codepoint >> 16) & 0xff); 178 | *d++ = (uint16_t) ((codepoint >> 24) & 0xff); 179 | } 180 | last = s; 181 | } 182 | #endif 183 | 184 | if (decode(&state, &codepoint, *s++) != UTF8_ACCEPT) { 185 | if (state != UTF8_REJECT) 186 | continue; 187 | break; 188 | } 189 | 190 | if (codepoint <= 0xffff) 191 | *d++ = (uint16_t) codepoint; 192 | else { 193 | *d++ = (uint16_t) (0xD7C0 + (codepoint >> 10)); 194 | *d++ = (uint16_t) (0xDC00 + (codepoint & 0x3FF)); 195 | } 196 | last = s; 197 | } 198 | 199 | *destoff = d - dest; 200 | *codepoint0 = codepoint; 201 | *state0 = state; 202 | *src = last; 203 | 204 | return s; 205 | } 206 | 207 | uint8_t const * 208 | _hs_streaming_commons_decode_utf8_state(uint16_t *const dest, size_t *destoff, 209 | const uint8_t **src, 210 | const uint8_t *srcend, 211 | uint32_t *codepoint0, uint32_t *state0) 212 | { 213 | uint8_t const *ret = _hs_streaming_commons_decode_utf8_int(dest, destoff, src, srcend, 214 | codepoint0, state0); 215 | if (*state0 == UTF8_REJECT) 216 | ret -=1; 217 | return ret; 218 | } 219 | 220 | /* 221 | * Helper to decode buffer and discard final decoder state 222 | */ 223 | const uint8_t * 224 | _hs_streaming_commons_decode_utf8(uint16_t *const dest, size_t *destoff, 225 | const uint8_t *src, const uint8_t *const srcend) 226 | { 227 | uint32_t codepoint; 228 | uint32_t state = UTF8_ACCEPT; 229 | uint8_t const *ret = _hs_streaming_commons_decode_utf8_int(dest, destoff, &src, srcend, 230 | &codepoint, &state); 231 | /* Back up if we have an incomplete or invalid encoding */ 232 | if (state != UTF8_ACCEPT) 233 | ret -= 1; 234 | return ret; 235 | } 236 | 237 | void 238 | _hs_streaming_commons_encode_utf8(uint8_t **destp, const uint16_t *src, size_t srcoff, 239 | size_t srclen) 240 | { 241 | const uint16_t *srcend; 242 | uint8_t *dest = *destp; 243 | 244 | src += srcoff; 245 | srcend = src + srclen; 246 | 247 | ascii: 248 | #if defined(__x86_64__) 249 | while (srcend - src >= 4) { 250 | uint64_t w = *((const uint64_t *) src); 251 | 252 | if (w & 0xFF80FF80FF80FF80ULL) { 253 | if (!(w & 0x000000000000FF80ULL)) { 254 | *dest++ = w & 0xFFFF; 255 | src++; 256 | if (!(w & 0x00000000FF800000ULL)) { 257 | *dest++ = (w >> 16) & 0xFFFF; 258 | src++; 259 | if (!(w & 0x0000FF8000000000ULL)) { 260 | *dest++ = (w >> 32) & 0xFFFF; 261 | src++; 262 | } 263 | } 264 | } 265 | break; 266 | } 267 | *dest++ = w & 0xFFFF; 268 | *dest++ = (w >> 16) & 0xFFFF; 269 | *dest++ = (w >> 32) & 0xFFFF; 270 | *dest++ = w >> 48; 271 | src += 4; 272 | } 273 | #endif 274 | 275 | #if defined(__i386__) 276 | while (srcend - src >= 2) { 277 | uint32_t w = *((uint32_t *) src); 278 | 279 | if (w & 0xFF80FF80) 280 | break; 281 | *dest++ = w & 0xFFFF; 282 | *dest++ = w >> 16; 283 | src += 2; 284 | } 285 | #endif 286 | 287 | while (src < srcend) { 288 | uint16_t w = *src++; 289 | 290 | if (w <= 0x7F) { 291 | *dest++ = w; 292 | /* An ASCII byte is likely to begin a run of ASCII bytes. 293 | Falling back into the fast path really helps performance. */ 294 | goto ascii; 295 | } 296 | else if (w <= 0x7FF) { 297 | *dest++ = (w >> 6) | 0xC0; 298 | *dest++ = (w & 0x3f) | 0x80; 299 | } 300 | else if (w < 0xD800 || w > 0xDBFF) { 301 | *dest++ = (w >> 12) | 0xE0; 302 | *dest++ = ((w >> 6) & 0x3F) | 0x80; 303 | *dest++ = (w & 0x3F) | 0x80; 304 | } else { 305 | uint32_t c = ((((uint32_t) w) - 0xD800) << 10) + 306 | (((uint32_t) *src++) - 0xDC00) + 0x10000; 307 | *dest++ = (c >> 18) | 0xF0; 308 | *dest++ = ((c >> 12) & 0x3F) | 0x80; 309 | *dest++ = ((c >> 6) & 0x3F) | 0x80; 310 | *dest++ = (c & 0x3F) | 0x80; 311 | } 312 | } 313 | 314 | *destp = dest; 315 | } 316 | -------------------------------------------------------------------------------- /test/Data/Streaming/ZlibSpec.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-orphans #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | module Data.Streaming.ZlibSpec (spec) where 4 | 5 | import Test.Hspec 6 | import Test.Hspec.QuickCheck (prop) 7 | import Test.QuickCheck (Arbitrary (..)) 8 | 9 | import Control.Exception (throwIO) 10 | import Data.Streaming.Zlib 11 | import Codec.Compression.Zlib 12 | import qualified Codec.Compression.GZip as Gzip 13 | import qualified Data.ByteString as S 14 | import qualified Data.ByteString.Char8 as S8 15 | import qualified Data.ByteString.Lazy as L 16 | import qualified Data.ByteString.Lazy.Internal as LI 17 | import Control.Monad (foldM, forM_, forM) 18 | import System.IO.Unsafe (unsafePerformIO) 19 | import qualified Codec.Compression.Zlib.Raw as Raw 20 | 21 | decompress' :: L.ByteString -> L.ByteString 22 | decompress' gziped = unsafePerformIO $ do 23 | inf <- initInflate defaultWindowBits 24 | ungziped <- foldM (go' inf) id $ L.toChunks gziped 25 | final <- finishInflate inf 26 | return $ L.fromChunks $ ungziped [final] 27 | where 28 | go' inf front bs = feedInflate inf bs >>= go front 29 | go front x = do 30 | y <- x 31 | case y of 32 | PRDone -> return front 33 | PRNext z -> go (front . (:) z) x 34 | PRError e -> throwIO e 35 | 36 | instance Arbitrary L.ByteString where 37 | arbitrary = L.fromChunks `fmap` arbitrary 38 | instance Arbitrary S.ByteString where 39 | arbitrary = S.pack `fmap` arbitrary 40 | 41 | compress' :: L.ByteString -> L.ByteString 42 | compress' raw = unsafePerformIO $ do 43 | def <- initDeflate 7 defaultWindowBits 44 | gziped <- foldM (go' def) id $ L.toChunks raw 45 | gziped' <- go gziped $ finishDeflate def 46 | return $ L.fromChunks $ gziped' [] 47 | where 48 | go' def front bs = feedDeflate def bs >>= go front 49 | go front x = do 50 | y <- x 51 | case y of 52 | PRDone -> return front 53 | PRNext z -> go (front . (:) z) x 54 | PRError e -> throwIO e 55 | 56 | license :: S.ByteString 57 | license = S8.filter (/= '\r') $ unsafePerformIO $ S.readFile "LICENSE" 58 | 59 | exampleDict :: S.ByteString 60 | exampleDict = "INITIALDICTIONARY" 61 | 62 | deflateWithDict :: S.ByteString -> L.ByteString -> L.ByteString 63 | deflateWithDict dict raw = unsafePerformIO $ do 64 | def <- initDeflateWithDictionary 7 dict $ WindowBits 15 65 | compressed <- foldM (go' def) id $ L.toChunks raw 66 | compressed' <- go compressed $ finishDeflate def 67 | return $ L.fromChunks $ compressed' [] 68 | where 69 | go' def front bs = feedDeflate def bs >>= go front 70 | go front x = do 71 | y <- x 72 | case y of 73 | PRDone -> return front 74 | PRNext z -> go (front . (:) z) x 75 | PRError e -> throwIO e 76 | 77 | inflateWithDict :: S.ByteString -> L.ByteString -> L.ByteString 78 | inflateWithDict dict compressed = unsafePerformIO $ do 79 | inf <- initInflateWithDictionary (WindowBits 15) dict 80 | decompressed <- foldM (go' inf) id $ L.toChunks compressed 81 | final <- finishInflate inf 82 | return $ L.fromChunks $ decompressed [final] 83 | where 84 | go' inf front bs = feedInflate inf bs >>= go front 85 | go front x = do 86 | y <- x 87 | case y of 88 | PRDone -> return front 89 | PRNext z -> go (front . (:) z) x 90 | PRError e -> throwIO e 91 | 92 | spec :: Spec 93 | spec = describe "Data.Streaming.Zlib" $ do 94 | describe "inflate/deflate" $ do 95 | prop "decompress'" $ \lbs -> lbs == decompress' (compress lbs) 96 | prop "compress'" $ \lbs -> lbs == decompress (compress' lbs) 97 | 98 | prop "with dictionary" $ \bs -> 99 | bs == 100 | (inflateWithDict exampleDict . deflateWithDict exampleDict) bs 101 | it "different dict" $ do 102 | raw <- L.readFile "LICENSE" 103 | deflated <- return $ deflateWithDict exampleDict raw 104 | inflated <- return $ inflateWithDict (S.drop 1 exampleDict) deflated 105 | inflated `shouldSatisfy` L.null 106 | 107 | describe "license" $ do 108 | it "single deflate" $ do 109 | let go front x = do 110 | y <- x 111 | case y of 112 | PRDone -> return front 113 | PRNext z -> go (front . (:) z) x 114 | PRError e -> throwIO e 115 | def <- initDeflate 8 $ WindowBits 31 116 | gziped <- feedDeflate def license >>= go id 117 | gziped' <- go gziped $ finishDeflate def 118 | let raw' = L.fromChunks [license] 119 | raw' `shouldBe` Gzip.decompress (L.fromChunks $ gziped' []) 120 | 121 | it "single inflate" $ do 122 | let go front x = do 123 | y <- x 124 | case y of 125 | PRDone -> return front 126 | PRNext z -> go (front . (:) z) x 127 | PRError e -> throwIO e 128 | gziped <- S.readFile "test/LICENSE.gz" 129 | inf <- initInflate $ WindowBits 31 130 | popper <- feedInflate inf gziped 131 | ungziped <- go id popper 132 | final <- finishInflate inf 133 | license `shouldBe` (S.concat $ ungziped [final]) 134 | 135 | it "multi deflate" $ do 136 | let go' inf front bs = feedDeflate inf bs >>= go front 137 | go front x = do 138 | y <- x 139 | case y of 140 | PRDone -> return front 141 | PRNext z -> go (front . (:) z) x 142 | PRError e -> throwIO e 143 | def <- initDeflate 5 $ WindowBits 31 144 | gziped <- foldM (go' def) id $ map S.singleton $ S.unpack license 145 | gziped' <- go gziped $ finishDeflate def 146 | let raw' = L.fromChunks [license] 147 | raw' `shouldBe` (Gzip.decompress $ L.fromChunks $ gziped' []) 148 | 149 | it "multi inflate" $ do 150 | let go' inf front bs = feedInflate inf bs >>= go front 151 | go front x = do 152 | y <- x 153 | case y of 154 | PRDone -> return front 155 | PRNext z -> go (front . (:) z) x 156 | PRError e -> throwIO e 157 | gziped <- S.readFile "test/LICENSE.gz" 158 | let gziped' = map S.singleton $ S.unpack gziped 159 | inf <- initInflate $ WindowBits 31 160 | ungziped' <- foldM (go' inf) id gziped' 161 | final <- finishInflate inf 162 | license `shouldBe` (S.concat $ ungziped' [final]) 163 | 164 | describe "lbs zlib" $ do 165 | prop "inflate" $ \lbs -> unsafePerformIO $ do 166 | let glbs = compress lbs 167 | go' inf front bs = feedInflate inf bs >>= go front 168 | go front x = do 169 | y <- x 170 | case y of 171 | PRDone -> return front 172 | PRNext z -> go (front . (:) z) x 173 | PRError e -> throwIO e 174 | inf <- initInflate defaultWindowBits 175 | inflated <- foldM (go' inf) id $ L.toChunks glbs 176 | final <- finishInflate inf 177 | return $ lbs == L.fromChunks (inflated [final]) 178 | prop "deflate" $ \lbs -> unsafePerformIO $ do 179 | let go' inf front bs = feedDeflate inf bs >>= go front 180 | go front x = do 181 | y <- x 182 | case y of 183 | PRDone -> return front 184 | PRNext z -> go (front . (:) z) x 185 | PRError e -> throwIO e 186 | def <- initDeflate 7 defaultWindowBits 187 | deflated <- foldM (go' def) id $ L.toChunks lbs 188 | deflated' <- go deflated $ finishDeflate def 189 | return $ lbs == decompress (L.fromChunks (deflated' [])) 190 | 191 | describe "flushing" $ do 192 | let helper wb = do 193 | let bss0 = replicate 5000 "abc" 194 | def <- initDeflate 9 wb 195 | inf <- initInflate wb 196 | 197 | let popList pop = do 198 | mx <- pop 199 | case mx of 200 | PRDone -> return [] 201 | PRNext x -> do 202 | xs <- popList pop 203 | return $ x : xs 204 | PRError e -> throwIO e 205 | 206 | let callback name expected pop = do 207 | bssDeflated <- popList pop 208 | bsInflated <- fmap (S.concat . concat) $ forM bssDeflated $ \bs -> do 209 | x <- feedInflate inf bs >>= popList 210 | y <- flushInflate inf 211 | return $ x ++ [y] 212 | if bsInflated == expected 213 | then return () 214 | else error $ "callback " ++ name ++ ", got: " ++ show bsInflated ++ ", expected: " ++ show expected 215 | 216 | forM_ (zip [1..] bss0) $ \(i, bs) -> do 217 | feedDeflate def bs >>= callback ("loop" ++ show (i :: Int)) "" 218 | callback ("loop" ++ show (i :: Int)) bs $ flushDeflate def 219 | callback "finish" "" $ finishDeflate def 220 | it "zlib" $ helper defaultWindowBits 221 | it "gzip" $ helper $ WindowBits 31 222 | describe "large raw #9" $ do 223 | let size = fromIntegral $ LI.defaultChunkSize * 4 + 1 224 | input = L.replicate size 10 225 | it "compressing" $ do 226 | output <- fmap Raw.decompress $ compressRaw input 227 | L.all (== 10) output `shouldBe` True 228 | L.length output `shouldBe` L.length input 229 | it "decompressing" $ do 230 | output <- decompressRaw $ Raw.compress input 231 | L.all (== 10) output `shouldBe` True 232 | L.length output `shouldBe` L.length input 233 | 234 | it "getUnusedInflate" $ do 235 | let c = "This data is stored compressed." 236 | u = "This data isn't." 237 | def <- initDeflate 5 defaultWindowBits 238 | let loop front popper = do 239 | res <- popper 240 | case res of 241 | PRDone -> return front 242 | PRNext bs -> loop (S.append front bs) popper 243 | PRError e -> throwIO e 244 | 245 | c' <- feedDeflate def c >>= loop S.empty >>= flip loop (finishDeflate def) 246 | 247 | inf <- initInflate defaultWindowBits 248 | x <- feedInflate inf (S.append c' u) >>= loop S.empty 249 | y <- finishInflate inf 250 | S.append x y `shouldBe` c 251 | z <- getUnusedInflate inf 252 | z `shouldBe` u 253 | 254 | rawWindowBits :: WindowBits 255 | rawWindowBits = WindowBits (-15) 256 | 257 | decompressRaw :: L.ByteString -> IO L.ByteString 258 | decompressRaw gziped = do 259 | inf <- initInflate rawWindowBits 260 | ungziped <- foldM (go' inf) id $ L.toChunks gziped 261 | final <- finishInflate inf 262 | return $ L.fromChunks $ ungziped [final] 263 | where 264 | go' inf front bs = feedInflate inf bs >>= go front 265 | go front x = do 266 | y <- x 267 | case y of 268 | PRDone -> return front 269 | PRNext z -> go (front . (:) z) x 270 | PRError e -> throwIO e 271 | 272 | compressRaw :: L.ByteString -> IO L.ByteString 273 | compressRaw raw = do 274 | def <- initDeflate 1 rawWindowBits 275 | gziped <- foldM (go' def) id $ L.toChunks raw 276 | gziped' <- go gziped $ finishDeflate def 277 | return $ L.fromChunks $ gziped' [] 278 | where 279 | go' def front bs = feedDeflate def bs >>= go front 280 | go front x = do 281 | y <- x 282 | case y of 283 | PRDone -> return front 284 | PRNext z -> go (front . (:) z) x 285 | PRError e -> throwIO e 286 | -------------------------------------------------------------------------------- /Data/Streaming/Zlib.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | -- | This is a middle-level wrapper around the zlib C API. It allows you to 3 | -- work fully with bytestrings and not touch the FFI at all, but is still 4 | -- low-level enough to allow you to implement high-level abstractions such as 5 | -- enumerators. Significantly, it does not use lazy IO. 6 | -- 7 | -- You'll probably need to reference the docs a bit to understand the 8 | -- WindowBits parameters below, but a basic rule of thumb is 15 is for zlib 9 | -- compression, and 31 for gzip compression. 10 | -- 11 | -- A simple streaming compressor in pseudo-code would look like: 12 | -- 13 | -- > def <- initDeflate ... 14 | -- > popper <- feedDeflate def rawContent 15 | -- > pullPopper popper 16 | -- > ... 17 | -- > finishDeflate def sendCompressedData 18 | -- 19 | -- You can see a more complete example is available in the included 20 | -- file-test.hs. 21 | module Data.Streaming.Zlib 22 | ( -- * Inflate 23 | Inflate 24 | , initInflate 25 | , initInflateWithDictionary 26 | , feedInflate 27 | , finishInflate 28 | , flushInflate 29 | , getUnusedInflate 30 | , isCompleteInflate 31 | -- * Deflate 32 | , Deflate 33 | , initDeflate 34 | , initDeflateWithDictionary 35 | , feedDeflate 36 | , finishDeflate 37 | , flushDeflate 38 | , fullFlushDeflate 39 | -- * Data types 40 | , WindowBits (..) 41 | , defaultWindowBits 42 | , ZlibException (..) 43 | , Popper 44 | , PopperRes (..) 45 | ) where 46 | 47 | import Data.Streaming.Zlib.Lowlevel 48 | import Foreign.ForeignPtr 49 | import Foreign.C.Types 50 | import Data.ByteString.Unsafe 51 | import Codec.Compression.Zlib (WindowBits(WindowBits), defaultWindowBits) 52 | import qualified Data.ByteString as S 53 | import Data.ByteString.Lazy.Internal (defaultChunkSize) 54 | import Data.Typeable (Typeable) 55 | import Control.Exception (Exception) 56 | import Control.Monad (when) 57 | import Data.IORef 58 | 59 | type ZStreamPair = (ForeignPtr ZStreamStruct, ForeignPtr CChar) 60 | 61 | -- | The state of an inflation (eg, decompression) process. All allocated 62 | -- memory is automatically reclaimed by the garbage collector. 63 | -- Also can contain the inflation dictionary that is used for decompression. 64 | data Inflate = Inflate 65 | ZStreamPair 66 | (IORef S.ByteString) -- last ByteString fed in, needed for getUnusedInflate 67 | (IORef Bool) -- set True when zlib indicates that inflation is complete 68 | (Maybe S.ByteString) -- dictionary 69 | 70 | -- | The state of a deflation (eg, compression) process. All allocated memory 71 | -- is automatically reclaimed by the garbage collector. 72 | newtype Deflate = Deflate ZStreamPair 73 | 74 | -- | Exception that can be thrown from the FFI code. The parameter is the 75 | -- numerical error code from the zlib library. Quoting the zlib.h file 76 | -- directly: 77 | -- 78 | -- * #define Z_OK 0 79 | -- 80 | -- * #define Z_STREAM_END 1 81 | -- 82 | -- * #define Z_NEED_DICT 2 83 | -- 84 | -- * #define Z_ERRNO (-1) 85 | -- 86 | -- * #define Z_STREAM_ERROR (-2) 87 | -- 88 | -- * #define Z_DATA_ERROR (-3) 89 | -- 90 | -- * #define Z_MEM_ERROR (-4) 91 | -- 92 | -- * #define Z_BUF_ERROR (-5) 93 | -- 94 | -- * #define Z_VERSION_ERROR (-6) 95 | 96 | data ZlibException = ZlibException Int 97 | deriving (Show, Typeable) 98 | instance Exception ZlibException 99 | 100 | -- | Some constants for the error codes, used internally 101 | zStreamEnd :: CInt 102 | zStreamEnd = 1 103 | 104 | zNeedDict :: CInt 105 | zNeedDict = 2 106 | 107 | zBufError :: CInt 108 | zBufError = -5 109 | 110 | -- | Initialize an inflation process with the given 'WindowBits'. You will need 111 | -- to call 'feedInflate' to feed compressed data to this and 112 | -- 'finishInflate' to extract the final chunk of decompressed data. 113 | initInflate :: WindowBits -> IO Inflate 114 | initInflate w = do 115 | zstr <- zstreamNew 116 | inflateInit2 zstr w 117 | fzstr <- newForeignPtr c_free_z_stream_inflate zstr 118 | fbuff <- mallocForeignPtrBytes defaultChunkSize 119 | withForeignPtr fbuff $ \buff -> 120 | c_set_avail_out zstr buff $ fromIntegral defaultChunkSize 121 | lastBS <- newIORef S.empty 122 | complete <- newIORef False 123 | return $ Inflate (fzstr, fbuff) lastBS complete Nothing 124 | 125 | -- | Initialize an inflation process with the given 'WindowBits'. 126 | -- Unlike initInflate a dictionary for inflation is set which must 127 | -- match the one set during compression. 128 | initInflateWithDictionary :: WindowBits -> S.ByteString -> IO Inflate 129 | initInflateWithDictionary w bs = do 130 | zstr <- zstreamNew 131 | inflateInit2 zstr w 132 | fzstr <- newForeignPtr c_free_z_stream_inflate zstr 133 | fbuff <- mallocForeignPtrBytes defaultChunkSize 134 | 135 | withForeignPtr fbuff $ \buff -> 136 | c_set_avail_out zstr buff $ fromIntegral defaultChunkSize 137 | lastBS <- newIORef S.empty 138 | complete <- newIORef False 139 | return $ Inflate (fzstr, fbuff) lastBS complete (Just bs) 140 | 141 | -- | Initialize a deflation process with the given compression level and 142 | -- 'WindowBits'. You will need to call 'feedDeflate' to feed uncompressed 143 | -- data to this and 'finishDeflate' to extract the final chunks of compressed 144 | -- data. 145 | initDeflate :: Int -- ^ Compression level 146 | -> WindowBits -> IO Deflate 147 | initDeflate level w = do 148 | zstr <- zstreamNew 149 | deflateInit2 zstr level w 8 StrategyDefault 150 | fzstr <- newForeignPtr c_free_z_stream_deflate zstr 151 | fbuff <- mallocForeignPtrBytes defaultChunkSize 152 | withForeignPtr fbuff $ \buff -> 153 | c_set_avail_out zstr buff $ fromIntegral defaultChunkSize 154 | return $ Deflate (fzstr, fbuff) 155 | 156 | -- | Initialize an deflation process with the given compression level and 157 | -- 'WindowBits'. 158 | -- Unlike initDeflate a dictionary for deflation is set. 159 | initDeflateWithDictionary :: Int -- ^ Compression level 160 | -> S.ByteString -- ^ Deflate dictionary 161 | -> WindowBits -> IO Deflate 162 | initDeflateWithDictionary level bs w = do 163 | zstr <- zstreamNew 164 | deflateInit2 zstr level w 8 StrategyDefault 165 | fzstr <- newForeignPtr c_free_z_stream_deflate zstr 166 | fbuff <- mallocForeignPtrBytes defaultChunkSize 167 | 168 | unsafeUseAsCStringLen bs $ \(cstr, len) -> do 169 | c_call_deflate_set_dictionary zstr cstr $ fromIntegral len 170 | 171 | withForeignPtr fbuff $ \buff -> 172 | c_set_avail_out zstr buff $ fromIntegral defaultChunkSize 173 | return $ Deflate (fzstr, fbuff) 174 | 175 | -- | Feed the given 'S.ByteString' to the inflater. Return a 'Popper', 176 | -- an IO action that returns the decompressed data a chunk at a time. 177 | -- The 'Popper' must be called to exhaustion before using the 'Inflate' 178 | -- object again. 179 | -- 180 | -- Note that this function automatically buffers the output to 181 | -- 'defaultChunkSize', and therefore you won't get any data from the popper 182 | -- until that much decompressed data is available. After you have fed all of 183 | -- the compressed data to this function, you can extract your final chunk of 184 | -- decompressed data using 'finishInflate'. 185 | feedInflate 186 | :: Inflate 187 | -> S.ByteString 188 | -> IO Popper 189 | feedInflate (Inflate (fzstr, fbuff) lastBS complete inflateDictionary) bs = do 190 | -- Write the BS to lastBS for use by getUnusedInflate. This is 191 | -- theoretically unnecessary, since we could just grab the pointer from the 192 | -- fzstr when needed. However, in that case, we wouldn't be holding onto a 193 | -- reference to the ForeignPtr, so the GC may decide to collect the 194 | -- ByteString in the interim. 195 | writeIORef lastBS bs 196 | 197 | withForeignPtr fzstr $ \zstr -> 198 | unsafeUseAsCStringLen bs $ \(cstr, len) -> 199 | c_set_avail_in zstr cstr $ fromIntegral len 200 | return $ drain fbuff fzstr (Just bs) inflate False 201 | where 202 | inflate zstr = do 203 | res <- c_call_inflate_noflush zstr 204 | res2 <- if (res == zNeedDict) 205 | then maybe (return zNeedDict) 206 | (\dict -> (unsafeUseAsCStringLen dict $ \(cstr, len) -> do 207 | c_call_inflate_set_dictionary zstr cstr $ fromIntegral len 208 | c_call_inflate_noflush zstr)) 209 | inflateDictionary 210 | else return res 211 | when (res2 == zStreamEnd) (writeIORef complete True) 212 | return res2 213 | 214 | -- | An IO action that returns the next chunk of data, returning 'PRDone' when 215 | -- there is no more data to be popped. 216 | type Popper = IO PopperRes 217 | 218 | data PopperRes = PRDone 219 | | PRNext !S.ByteString 220 | | PRError !ZlibException 221 | deriving (Show, Typeable) 222 | 223 | -- | Ensure that the given @ByteString@ is not deallocated. 224 | keepAlive :: Maybe S.ByteString -> IO a -> IO a 225 | keepAlive Nothing = id 226 | keepAlive (Just bs) = unsafeUseAsCStringLen bs . const 227 | 228 | drain :: ForeignPtr CChar 229 | -> ForeignPtr ZStreamStruct 230 | -> Maybe S.ByteString 231 | -> (ZStream' -> IO CInt) 232 | -> Bool 233 | -> Popper 234 | drain fbuff fzstr mbs func isFinish = withForeignPtr fzstr $ \zstr -> keepAlive mbs $ do 235 | res <- func zstr 236 | if res < 0 && res /= zBufError 237 | then return $ PRError $ ZlibException $ fromIntegral res 238 | else do 239 | avail <- c_get_avail_out zstr 240 | let size = defaultChunkSize - fromIntegral avail 241 | toOutput = avail == 0 || (isFinish && size /= 0) 242 | if toOutput 243 | then withForeignPtr fbuff $ \buff -> do 244 | bs <- S.packCStringLen (buff, size) 245 | c_set_avail_out zstr buff 246 | $ fromIntegral defaultChunkSize 247 | return $ PRNext bs 248 | else return PRDone 249 | 250 | 251 | -- | As explained in 'feedInflate', inflation buffers your decompressed 252 | -- data. After you call 'feedInflate' with your last chunk of compressed 253 | -- data, you will likely have some data still sitting in the buffer. This 254 | -- function will return it to you. 255 | finishInflate :: Inflate -> IO S.ByteString 256 | finishInflate (Inflate (fzstr, fbuff) _ _ _) = 257 | withForeignPtr fzstr $ \zstr -> 258 | withForeignPtr fbuff $ \buff -> do 259 | avail <- c_get_avail_out zstr 260 | let size = defaultChunkSize - fromIntegral avail 261 | bs <- S.packCStringLen (buff, size) 262 | c_set_avail_out zstr buff $ fromIntegral defaultChunkSize 263 | return bs 264 | 265 | -- | Flush the inflation buffer. Useful for interactive application. 266 | -- 267 | -- This is actually a synonym for 'finishInflate'. It is provided for its more 268 | -- semantic name. 269 | -- 270 | -- Since 0.0.3 271 | flushInflate :: Inflate -> IO S.ByteString 272 | flushInflate = finishInflate 273 | 274 | -- | Retrieve any data remaining after inflating. For more information on motivation, see: 275 | -- 276 | -- 277 | -- 278 | -- Since 0.1.11 279 | getUnusedInflate :: Inflate -> IO S.ByteString 280 | getUnusedInflate (Inflate (fzstr, _) ref _ _) = do 281 | bs <- readIORef ref 282 | len <- withForeignPtr fzstr c_get_avail_in 283 | return $ S.drop (S.length bs - fromIntegral len) bs 284 | 285 | -- | Returns True if the inflater has reached end-of-stream, or False if 286 | -- it is still expecting more data. 287 | -- 288 | -- Since 0.1.18 289 | isCompleteInflate :: Inflate -> IO Bool 290 | isCompleteInflate (Inflate _ _ complete _) = readIORef complete 291 | 292 | -- | Feed the given 'S.ByteString' to the deflater. Return a 'Popper', 293 | -- an IO action that returns the compressed data a chunk at a time. 294 | -- The 'Popper' must be called to exhaustion before using the 'Deflate' 295 | -- object again. 296 | -- 297 | -- Note that this function automatically buffers the output to 298 | -- 'defaultChunkSize', and therefore you won't get any data from the popper 299 | -- until that much compressed data is available. After you have fed all of the 300 | -- decompressed data to this function, you can extract your final chunks of 301 | -- compressed data using 'finishDeflate'. 302 | feedDeflate :: Deflate -> S.ByteString -> IO Popper 303 | feedDeflate (Deflate (fzstr, fbuff)) bs = do 304 | withForeignPtr fzstr $ \zstr -> 305 | unsafeUseAsCStringLen bs $ \(cstr, len) -> do 306 | c_set_avail_in zstr cstr $ fromIntegral len 307 | return $ drain fbuff fzstr (Just bs) c_call_deflate_noflush False 308 | 309 | -- | As explained in 'feedDeflate', deflation buffers your compressed 310 | -- data. After you call 'feedDeflate' with your last chunk of uncompressed 311 | -- data, use this to flush the rest of the data and signal end of input. 312 | finishDeflate :: Deflate -> Popper 313 | finishDeflate (Deflate (fzstr, fbuff)) = 314 | drain fbuff fzstr Nothing c_call_deflate_finish True 315 | 316 | -- | Flush the deflation buffer. Useful for interactive application. 317 | -- Internally this passes Z_SYNC_FLUSH to the zlib library. 318 | -- 319 | -- Unlike 'finishDeflate', 'flushDeflate' does not signal end of input, 320 | -- meaning you can feed more uncompressed data afterward. 321 | -- 322 | -- Since 0.0.3 323 | flushDeflate :: Deflate -> Popper 324 | flushDeflate (Deflate (fzstr, fbuff)) = 325 | drain fbuff fzstr Nothing c_call_deflate_flush True 326 | 327 | -- | Full flush the deflation buffer. Useful for interactive 328 | -- applications where previously streamed data may not be 329 | -- available. Using `fullFlushDeflate` too often can seriously degrade 330 | -- compression. Internally this passes Z_FULL_FLUSH to the zlib 331 | -- library. 332 | -- 333 | -- Like 'flushDeflate', 'fullFlushDeflate' does not signal end of input, 334 | -- meaning you can feed more uncompressed data afterward. 335 | -- 336 | -- Since 0.1.5 337 | fullFlushDeflate :: Deflate -> Popper 338 | fullFlushDeflate (Deflate (fzstr, fbuff)) = 339 | drain fbuff fzstr Nothing c_call_deflate_full_flush True 340 | -------------------------------------------------------------------------------- /Data/Streaming/Text.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE CPP #-} 3 | {-# LANGUAGE ForeignFunctionInterface #-} 4 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 5 | {-# LANGUAGE MagicHash #-} 6 | {-# LANGUAGE Rank2Types #-} 7 | {-# LANGUAGE ScopedTypeVariables #-} 8 | {-# LANGUAGE UnliftedFFITypes #-} 9 | 10 | -- 11 | -- Module : Data.Text.Lazy.Encoding.Fusion 12 | -- Copyright : (c) 2009, 2010 Bryan O'Sullivan 13 | -- 14 | -- License : BSD-style 15 | -- Maintainer : bos@serpentine.com 16 | -- Stability : experimental 17 | -- Portability : portable 18 | -- 19 | -- /Warning/: this is an internal module, and does not have a stable 20 | -- API or name. Functions in this module may not check or enforce 21 | -- preconditions expected by public modules. Use at your own risk! 22 | -- 23 | -- Fusible 'Stream'-oriented functions for converting between lazy 24 | -- 'Text' and several common encodings. 25 | 26 | -- | Provides a stream-based approach to decoding Unicode data. Each function 27 | -- below works the same way: you give it a chunk of data, and it gives back a 28 | -- @DecodeResult@. If the parse was a success, then you get a chunk of @Text@ 29 | -- (possibly empty) and a continuation parsing function. If the parse was a 30 | -- failure, you get a chunk of successfully decoded @Text@ (possibly empty) and 31 | -- the unconsumed bytes. 32 | -- 33 | -- In order to indicate end of stream, you pass an empty @ByteString@ to the 34 | -- decode function. This call may result in a failure, if there were unused 35 | -- bytes left over from a previous step which formed part of a code sequence. 36 | module Data.Streaming.Text 37 | ( 38 | -- * Streaming 39 | decodeUtf8 40 | , decodeUtf8Pure 41 | , decodeUtf16LE 42 | , decodeUtf16BE 43 | , decodeUtf32LE 44 | , decodeUtf32BE 45 | 46 | -- * Type 47 | , DecodeResult (..) 48 | ) where 49 | 50 | import Control.Monad.ST (ST, runST) 51 | import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO) 52 | import Data.Bits ((.|.), shiftL) 53 | import qualified Data.ByteString as B 54 | import Data.ByteString.Internal (ByteString (PS)) 55 | import qualified Data.ByteString.Unsafe as B 56 | import Data.Text (Text) 57 | import qualified Data.Text as T 58 | import qualified Data.Text.Array as A 59 | import Data.Text.Internal (text) 60 | import qualified Data.Text.Internal.Encoding.Utf16 as U16 61 | import qualified Data.Text.Internal.Encoding.Utf32 as U32 62 | import qualified Data.Text.Internal.Encoding.Utf8 as U8 63 | import Data.Text.Internal.Unsafe.Char (unsafeWrite, unsafeChr32, 64 | unsafeChr8) 65 | import Data.Word (Word32, Word8) 66 | import Foreign.C.Types (CSize (..)) 67 | import Foreign.ForeignPtr (withForeignPtr) 68 | import Foreign.Marshal.Utils (with) 69 | import Foreign.Ptr (Ptr, minusPtr, nullPtr, 70 | plusPtr) 71 | import Foreign.Storable (Storable, peek, poke) 72 | import GHC.Base (MutableByteArray#) 73 | 74 | #if MIN_VERSION_text(2,0,0) 75 | import Control.Exception (try, evaluate) 76 | import qualified Data.Text.Encoding as TE 77 | import qualified Data.Text.Encoding.Error as TE 78 | import Data.Text.Internal.Unsafe.Char (unsafeChr16) 79 | import System.IO.Unsafe (unsafePerformIO) 80 | #else 81 | import Data.Text.Internal.Unsafe.Char (unsafeChr) 82 | unsafeChr16 = unsafeChr 83 | #endif 84 | 85 | data S = S0 86 | | S1 {-# UNPACK #-} !Word8 87 | | S2 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 88 | | S3 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 89 | deriving Show 90 | 91 | data DecodeResult 92 | = DecodeResultSuccess !Text !(B.ByteString -> DecodeResult) 93 | | DecodeResultFailure !Text !B.ByteString 94 | 95 | toBS :: S -> B.ByteString 96 | toBS S0 = B.empty 97 | toBS (S1 a) = B.pack [a] 98 | toBS (S2 a b) = B.pack [a, b] 99 | toBS (S3 a b c) = B.pack [a, b, c] 100 | {-# INLINE toBS #-} 101 | 102 | getText :: Int -> A.MArray s -> ST s Text 103 | getText j marr = do 104 | arr <- A.unsafeFreeze marr 105 | return $! text arr 0 j 106 | {-# INLINE getText #-} 107 | 108 | #include "text_cbits.h" 109 | 110 | foreign import ccall unsafe "_hs_streaming_commons_decode_utf8_state" c_decode_utf8_with_state 111 | :: MutableByteArray# s -> Ptr CSize 112 | -> Ptr (Ptr Word8) -> Ptr Word8 113 | -> Ptr CodePoint -> Ptr DecoderState -> IO (Ptr Word8) 114 | 115 | newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable) 116 | newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable) 117 | 118 | -- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using 119 | -- UTF-8 encoding. 120 | decodeUtf8 :: B.ByteString -> DecodeResult 121 | #if MIN_VERSION_text(2,0,0) 122 | decodeUtf8 = go mempty TE.streamDecodeUtf8 123 | where 124 | go :: B.ByteString -> (B.ByteString -> TE.Decoding) -> B.ByteString -> DecodeResult 125 | go prev decoder curr = case unsafePerformIO (try (evaluate (decoder curr))) of 126 | -- Caught exception does not allow to reconstruct 'DecodeResultFailure', 127 | -- so delegating this to 'decodeUtf8Pure' 128 | Left (_ :: TE.UnicodeException) -> decodeUtf8Pure (prev <> curr) 129 | Right (TE.Some decoded undecoded cont) 130 | -- An empty bytestring indicates end-of-input, if we still have undecoded bytes that 131 | -- becomes a failure. 132 | | B.null curr && not (B.null undecoded) -> DecodeResultFailure decoded undecoded 133 | | otherwise -> DecodeResultSuccess decoded (go undecoded cont) 134 | #else 135 | decodeUtf8 = decodeChunk B.empty 0 0 136 | where 137 | decodeChunkCheck :: B.ByteString -> CodePoint -> DecoderState -> B.ByteString -> DecodeResult 138 | decodeChunkCheck bsOld codepoint state bs 139 | | B.null bs = 140 | if B.null bsOld 141 | then DecodeResultSuccess T.empty decodeUtf8 142 | else DecodeResultFailure T.empty bsOld 143 | | otherwise = decodeChunk bsOld codepoint state bs 144 | -- We create a slightly larger than necessary buffer to accommodate a 145 | -- potential surrogate pair started in the last buffer 146 | decodeChunk :: B.ByteString -> CodePoint -> DecoderState -> B.ByteString -> DecodeResult 147 | decodeChunk bsOld codepoint0 state0 bs@(PS fp off len) = 148 | runST $ (unsafeIOToST . decodeChunkToBuffer) =<< A.new (len+1) 149 | where 150 | decodeChunkToBuffer :: A.MArray s -> IO DecodeResult 151 | decodeChunkToBuffer dest = withForeignPtr fp $ \ptr -> 152 | with (0::CSize) $ \destOffPtr -> 153 | with codepoint0 $ \codepointPtr -> 154 | with state0 $ \statePtr -> 155 | with nullPtr $ \curPtrPtr -> 156 | let end = ptr `plusPtr` (off + len) 157 | loop curPtr = do 158 | poke curPtrPtr curPtr 159 | _ <- c_decode_utf8_with_state (A.maBA dest) destOffPtr 160 | curPtrPtr end codepointPtr statePtr 161 | state <- peek statePtr 162 | n <- peek destOffPtr 163 | chunkText <- unsafeSTToIO $ do 164 | arr <- A.unsafeFreeze dest 165 | return $! text arr 0 (fromIntegral n) 166 | lastPtr <- peek curPtrPtr 167 | let left = lastPtr `minusPtr` curPtr 168 | -- The logic here is: if any text was generated, then the 169 | -- previous leftovers were completely consumed already. 170 | -- If no text was generated, then any leftovers from the 171 | -- previous step are still leftovers now. 172 | unused 173 | | not $ T.null chunkText = B.unsafeDrop left bs 174 | | B.null bsOld = bs 175 | | otherwise = B.append bsOld bs 176 | case unused `seq` state of 177 | UTF8_REJECT -> 178 | -- We encountered an encoding error 179 | return $! DecodeResultFailure chunkText unused 180 | _ -> do 181 | codepoint <- peek codepointPtr 182 | return $! DecodeResultSuccess chunkText 183 | $! decodeChunkCheck unused codepoint state 184 | in loop (ptr `plusPtr` off) 185 | #endif 186 | 187 | -- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using 188 | -- UTF-8 encoding. 189 | decodeUtf8Pure :: B.ByteString -> DecodeResult 190 | decodeUtf8Pure = 191 | beginChunk S0 192 | where 193 | beginChunk :: S -> B.ByteString -> DecodeResult 194 | beginChunk s bs | B.null bs = 195 | case s of 196 | S0 -> DecodeResultSuccess T.empty (beginChunk S0) 197 | _ -> DecodeResultFailure T.empty $ toBS s 198 | beginChunk s0 ps = runST $ do 199 | let initLen = B.length ps 200 | #if MIN_VERSION_text(2,0,0) 201 | -- Worst-case scenario: the very first byte finishes a 4-byte sequence, 202 | -- so decoding results in 4 + (initLen - 1) bytes. 203 | marr <- A.new (initLen + 3) 204 | #else 205 | marr <- A.new (initLen + 1) 206 | #endif 207 | let start !i !j 208 | | i >= len = do 209 | t <- getText j marr 210 | return $! DecodeResultSuccess t (beginChunk S0) 211 | | U8.validate1 a = addChar' 1 (unsafeChr8 a) 212 | | i + 1 < len && U8.validate2 a b = addChar' 2 (U8.chr2 a b) 213 | | i + 2 < len && U8.validate3 a b c = addChar' 3 (U8.chr3 a b c) 214 | | i + 3 < len && U8.validate4 a b c d = addChar' 4 (U8.chr4 a b c d) 215 | | i + 3 < len = do 216 | t <- getText j marr 217 | return $! DecodeResultFailure t (B.unsafeDrop i ps) 218 | | i + 2 < len = continue (S3 a b c) 219 | | i + 1 < len = continue (S2 a b) 220 | | otherwise = continue (S1 a) 221 | where 222 | a = B.unsafeIndex ps i 223 | b = B.unsafeIndex ps (i+1) 224 | c = B.unsafeIndex ps (i+2) 225 | d = B.unsafeIndex ps (i+3) 226 | addChar' deltai char = do 227 | deltaj <- unsafeWrite marr j char 228 | start (i + deltai) (j + deltaj) 229 | continue s = do 230 | t <- getText j marr 231 | return $! DecodeResultSuccess t (beginChunk s) 232 | 233 | checkCont s !i | i >= len = return $! DecodeResultSuccess T.empty (beginChunk s) 234 | checkCont s !i = 235 | case s of 236 | S0 -> start i 0 237 | S1 a 238 | | U8.validate2 a x -> addChar' (U8.chr2 a x) 239 | | otherwise -> checkCont (S2 a x) (i + 1) 240 | S2 a b 241 | | U8.validate3 a b x -> addChar' (U8.chr3 a b x) 242 | | otherwise -> checkCont (S3 a b x) (i + 1) 243 | S3 a b c 244 | | U8.validate4 a b c x -> addChar' (U8.chr4 a b c x) 245 | _ -> return $! DecodeResultFailure T.empty 246 | $! B.append (toBS s) (B.unsafeDrop i ps) 247 | where 248 | x = B.unsafeIndex ps i 249 | addChar' c = do 250 | d <- unsafeWrite marr 0 c 251 | start (i + 1) d 252 | 253 | checkCont s0 0 254 | where 255 | len = B.length ps 256 | {-# INLINE beginChunk #-} 257 | 258 | -- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using little 259 | -- endian UTF-16 encoding. 260 | decodeUtf16LE :: B.ByteString -> DecodeResult 261 | decodeUtf16LE = 262 | beginChunk S0 263 | where 264 | beginChunk :: S -> B.ByteString -> DecodeResult 265 | beginChunk s bs | B.null bs = 266 | case s of 267 | S0 -> DecodeResultSuccess T.empty (beginChunk S0) 268 | _ -> DecodeResultFailure T.empty $ toBS s 269 | beginChunk s0 ps = runST $ do 270 | let initLen = B.length ps 271 | #if MIN_VERSION_text(2,0,0) 272 | -- Worst-case scenario: each Word16 in UTF16 gives three Word8 in UTF8 273 | -- and left-over from a previous chunk gives four Word8 in UTF8 274 | marr <- A.new ((initLen `div` 2) * 3 + 4) -- of Word8 275 | #else 276 | marr <- A.new (initLen + 1) -- of Word16 277 | #endif 278 | let start !i !j 279 | | i >= len = do 280 | t <- getText j marr 281 | return $! DecodeResultSuccess t (beginChunk S0) 282 | | i + 1 < len && U16.validate1 x1 = addChar' 2 (unsafeChr16 x1) 283 | | i + 3 < len && U16.validate2 x1 x2 = addChar' 4 (U16.chr2 x1 x2) 284 | | i + 3 < len = do 285 | t <- getText j marr 286 | return $! DecodeResultFailure t (B.unsafeDrop i ps) 287 | | i + 2 < len = continue (S3 a b c) 288 | | i + 1 < len = continue (S2 a b) 289 | | otherwise = continue (S1 a) 290 | where 291 | a = B.unsafeIndex ps i 292 | b = B.unsafeIndex ps (i+1) 293 | c = B.unsafeIndex ps (i+2) 294 | d = B.unsafeIndex ps (i+3) 295 | x1 = combine a b 296 | x2 = combine c d 297 | addChar' deltai char = do 298 | deltaj <- unsafeWrite marr j char 299 | start (i + deltai) (j + deltaj) 300 | continue s = do 301 | t <- getText j marr 302 | return $! DecodeResultSuccess t (beginChunk s) 303 | 304 | checkCont s !i | i >= len = return $! DecodeResultSuccess T.empty (beginChunk s) 305 | checkCont s !i = 306 | case s of 307 | S0 -> start i 0 308 | S1 a -> 309 | let x1 = combine a x 310 | in if U16.validate1 x1 311 | then addChar' (unsafeChr16 x1) 312 | else checkCont (S2 a x) (i + 1) 313 | S2 a b -> checkCont (S3 a b x) (i + 1) 314 | S3 a b c -> 315 | let x1 = combine a b 316 | x2 = combine c x 317 | in if U16.validate2 x1 x2 318 | then addChar' (U16.chr2 x1 x2) 319 | else return $! DecodeResultFailure T.empty 320 | $! B.append (toBS s) (B.unsafeDrop i ps) 321 | where 322 | x = B.unsafeIndex ps i 323 | addChar' c = do 324 | d <- unsafeWrite marr 0 c 325 | start (i + 1) d 326 | 327 | checkCont s0 0 328 | where 329 | len = B.length ps 330 | combine w1 w2 = fromIntegral w1 .|. (fromIntegral w2 `shiftL` 8) 331 | {-# INLINE beginChunk #-} 332 | 333 | -- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using big 334 | -- endian UTF-16 encoding. 335 | decodeUtf16BE :: B.ByteString -> DecodeResult 336 | decodeUtf16BE = 337 | beginChunk S0 338 | where 339 | beginChunk :: S -> B.ByteString -> DecodeResult 340 | beginChunk s bs | B.null bs = 341 | case s of 342 | S0 -> DecodeResultSuccess T.empty (beginChunk S0) 343 | _ -> DecodeResultFailure T.empty $ toBS s 344 | beginChunk s0 ps = runST $ do 345 | let initLen = B.length ps 346 | #if MIN_VERSION_text(2,0,0) 347 | -- Worst-case scenario: each Word16 in UTF16 gives three Word8 in UTF8 348 | -- and left-over from a previous chunk gives four Word8 in UTF8 349 | marr <- A.new ((initLen `div` 2) * 3 + 4) -- of Word8 350 | #else 351 | marr <- A.new (initLen + 1) -- of Word16 352 | #endif 353 | let start !i !j 354 | | i >= len = do 355 | t <- getText j marr 356 | return $! DecodeResultSuccess t (beginChunk S0) 357 | | i + 1 < len && U16.validate1 x1 = addChar' 2 (unsafeChr16 x1) 358 | | i + 3 < len && U16.validate2 x1 x2 = addChar' 4 (U16.chr2 x1 x2) 359 | | i + 3 < len = do 360 | t <- getText j marr 361 | return $! DecodeResultFailure t (B.unsafeDrop i ps) 362 | | i + 2 < len = continue (S3 a b c) 363 | | i + 1 < len = continue (S2 a b) 364 | | otherwise = continue (S1 a) 365 | where 366 | a = B.unsafeIndex ps i 367 | b = B.unsafeIndex ps (i+1) 368 | c = B.unsafeIndex ps (i+2) 369 | d = B.unsafeIndex ps (i+3) 370 | x1 = combine a b 371 | x2 = combine c d 372 | addChar' deltai char = do 373 | deltaj <- unsafeWrite marr j char 374 | start (i + deltai) (j + deltaj) 375 | continue s = do 376 | t <- getText j marr 377 | return $! DecodeResultSuccess t (beginChunk s) 378 | 379 | checkCont s !i | i >= len = return $! DecodeResultSuccess T.empty (beginChunk s) 380 | checkCont s !i = 381 | case s of 382 | S0 -> start i 0 383 | S1 a -> 384 | let x1 = combine a x 385 | in if U16.validate1 x1 386 | then addChar' (unsafeChr16 x1) 387 | else checkCont (S2 a x) (i + 1) 388 | S2 a b -> checkCont (S3 a b x) (i + 1) 389 | S3 a b c -> 390 | let x1 = combine a b 391 | x2 = combine c x 392 | in if U16.validate2 x1 x2 393 | then addChar' (U16.chr2 x1 x2) 394 | else return $! DecodeResultFailure T.empty 395 | $! B.append (toBS s) (B.unsafeDrop i ps) 396 | where 397 | x = B.unsafeIndex ps i 398 | addChar' c = do 399 | d <- unsafeWrite marr 0 c 400 | start (i + 1) d 401 | 402 | checkCont s0 0 403 | where 404 | len = B.length ps 405 | combine w1 w2 = (fromIntegral w1 `shiftL` 8) .|. fromIntegral w2 406 | {-# INLINE beginChunk #-} 407 | 408 | -- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using little 409 | -- endian UTF-32 encoding. 410 | decodeUtf32LE :: B.ByteString -> DecodeResult 411 | decodeUtf32LE = 412 | beginChunk S0 413 | where 414 | beginChunk :: S -> B.ByteString -> DecodeResult 415 | beginChunk s bs | B.null bs = 416 | case s of 417 | S0 -> DecodeResultSuccess T.empty (beginChunk S0) 418 | _ -> DecodeResultFailure T.empty $ toBS s 419 | beginChunk s0 ps = runST $ do 420 | let initLen = B.length ps `div` 2 421 | #if MIN_VERSION_text(2,0,0) 422 | -- Worst-case scenario: the very first byte finishes a 4-byte UTF8 sequence, 423 | -- and other codepoints have 4-byte UTF8 representation as well. 424 | -- This gives 4 + (B.length ps - 1), or (for odd B.length) initLen * 2 + 4. 425 | marr <- A.new (initLen * 2 + 4) -- of Word8 426 | #else 427 | marr <- A.new (initLen + 1) -- of Word16 428 | #endif 429 | let start !i !j 430 | | i >= len = do 431 | t <- getText j marr 432 | return $! DecodeResultSuccess t (beginChunk S0) 433 | | i + 3 < len && U32.validate x1 = addChar' 4 (unsafeChr32 x1) 434 | | i + 3 < len = do 435 | t <- getText j marr 436 | return $! DecodeResultFailure t (B.unsafeDrop i ps) 437 | | i + 2 < len = continue (S3 a b c) 438 | | i + 1 < len = continue (S2 a b) 439 | | otherwise = continue (S1 a) 440 | where 441 | a = B.unsafeIndex ps i 442 | b = B.unsafeIndex ps (i+1) 443 | c = B.unsafeIndex ps (i+2) 444 | d = B.unsafeIndex ps (i+3) 445 | x1 = combine a b c d 446 | addChar' deltai char = do 447 | deltaj <- unsafeWrite marr j char 448 | start (i + deltai) (j + deltaj) 449 | continue s = do 450 | t <- getText j marr 451 | return $! DecodeResultSuccess t (beginChunk s) 452 | 453 | checkCont s !i | i >= len = return $! DecodeResultSuccess T.empty (beginChunk s) 454 | checkCont s !i = 455 | case s of 456 | S0 -> start i 0 457 | S1 a -> checkCont (S2 a x) (i + 1) 458 | S2 a b -> checkCont (S3 a b x) (i + 1) 459 | S3 a b c -> 460 | let x1 = combine a b c x 461 | in if U32.validate x1 462 | then addChar' (unsafeChr32 x1) 463 | else return $! DecodeResultFailure T.empty 464 | $! B.append (toBS s) (B.unsafeDrop i ps) 465 | where 466 | x = B.unsafeIndex ps i 467 | addChar' c = do 468 | d <- unsafeWrite marr 0 c 469 | start (i + 1) d 470 | 471 | checkCont s0 0 472 | where 473 | len = B.length ps 474 | combine w1 w2 w3 w4 = 475 | shiftL (fromIntegral w4) 24 476 | .|. shiftL (fromIntegral w3) 16 477 | .|. shiftL (fromIntegral w2) 8 478 | .|. (fromIntegral w1) 479 | {-# INLINE beginChunk #-} 480 | 481 | -- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using big 482 | -- endian UTF-32 encoding. 483 | decodeUtf32BE :: B.ByteString -> DecodeResult 484 | decodeUtf32BE = 485 | beginChunk S0 486 | where 487 | beginChunk :: S -> B.ByteString -> DecodeResult 488 | beginChunk s bs | B.null bs = 489 | case s of 490 | S0 -> DecodeResultSuccess T.empty (beginChunk S0) 491 | _ -> DecodeResultFailure T.empty $ toBS s 492 | beginChunk s0 ps = runST $ do 493 | let initLen = B.length ps `div` 2 494 | #if MIN_VERSION_text(2,0,0) 495 | -- Worst-case scenario: the very first byte finishes a 4-byte UTF8 sequence, 496 | -- and other codepoints have 4-byte UTF8 representation as well. 497 | -- This gives 4 + (B.length ps - 1), or (for odd B.length) initLen * 2 + 4. 498 | marr <- A.new (initLen * 2 + 4) -- of Word8 499 | #else 500 | marr <- A.new (initLen + 1) -- of Word16 501 | #endif 502 | let start !i !j 503 | | i >= len = do 504 | t <- getText j marr 505 | return $! DecodeResultSuccess t (beginChunk S0) 506 | | i + 3 < len && U32.validate x1 = addChar' 4 (unsafeChr32 x1) 507 | | i + 3 < len = do 508 | t <- getText j marr 509 | return $! DecodeResultFailure t (B.unsafeDrop i ps) 510 | | i + 2 < len = continue (S3 a b c) 511 | | i + 1 < len = continue (S2 a b) 512 | | otherwise = continue (S1 a) 513 | where 514 | a = B.unsafeIndex ps i 515 | b = B.unsafeIndex ps (i+1) 516 | c = B.unsafeIndex ps (i+2) 517 | d = B.unsafeIndex ps (i+3) 518 | x1 = combine a b c d 519 | addChar' deltai char = do 520 | deltaj <- unsafeWrite marr j char 521 | start (i + deltai) (j + deltaj) 522 | continue s = do 523 | t <- getText j marr 524 | return $! DecodeResultSuccess t (beginChunk s) 525 | 526 | checkCont s !i | i >= len = return $! DecodeResultSuccess T.empty (beginChunk s) 527 | checkCont s !i = 528 | case s of 529 | S0 -> start i 0 530 | S1 a -> checkCont (S2 a x) (i + 1) 531 | S2 a b -> checkCont (S3 a b x) (i + 1) 532 | S3 a b c -> 533 | let x1 = combine a b c x 534 | in if U32.validate x1 535 | then addChar' (unsafeChr32 x1) 536 | else return $! DecodeResultFailure T.empty 537 | $! B.append (toBS s) (B.unsafeDrop i ps) 538 | where 539 | x = B.unsafeIndex ps i 540 | addChar' c = do 541 | d <- unsafeWrite marr 0 c 542 | start (i + 1) d 543 | 544 | checkCont s0 0 545 | where 546 | len = B.length ps 547 | combine w1 w2 w3 w4 = 548 | shiftL (fromIntegral w1) 24 549 | .|. shiftL (fromIntegral w2) 16 550 | .|. shiftL (fromIntegral w3) 8 551 | .|. (fromIntegral w4) 552 | {-# INLINE beginChunk #-} 553 | -------------------------------------------------------------------------------- /Data/Streaming/Network.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | {-# LANGUAGE BangPatterns #-} 4 | module Data.Streaming.Network 5 | ( -- * Types 6 | ServerSettings 7 | , ClientSettings 8 | , HostPreference 9 | , Message (..) 10 | , AppData 11 | , ServerSettingsUnix 12 | , ClientSettingsUnix 13 | , AppDataUnix 14 | -- ** Smart constructors 15 | , serverSettingsTCP 16 | , serverSettingsTCPSocket 17 | , clientSettingsTCP 18 | , serverSettingsUDP 19 | , clientSettingsUDP 20 | , serverSettingsUnix 21 | , clientSettingsUnix 22 | , message 23 | -- ** Classes 24 | , HasPort (..) 25 | , HasAfterBind (..) 26 | , HasReadWrite (..) 27 | , HasReadBufferSize (..) 28 | , HasPath (..) 29 | -- ** Setters 30 | , setPort 31 | , setHost 32 | , setAddrFamily 33 | , setAfterBind 34 | , setNeedLocalAddr 35 | , setReadBufferSize 36 | , setPath 37 | -- ** Getters 38 | , getPort 39 | , getHost 40 | , getAddrFamily 41 | , getAfterBind 42 | , getNeedLocalAddr 43 | , getReadBufferSize 44 | , getPath 45 | , appRead 46 | , appWrite 47 | , appSockAddr 48 | , appLocalAddr 49 | , appCloseConnection 50 | , appRawSocket 51 | -- * Functions 52 | -- ** General 53 | , bindPortGen 54 | , bindPortGenEx 55 | , bindRandomPortGen 56 | , getSocketGen 57 | , getSocketFamilyGen 58 | , acceptSafe 59 | , unassignedPorts 60 | , getUnassignedPort 61 | -- ** TCP 62 | , bindPortTCP 63 | , bindRandomPortTCP 64 | , getSocketTCP 65 | , getSocketFamilyTCP 66 | , safeRecv 67 | , runTCPServer 68 | , runTCPClient 69 | , ConnectionHandle() 70 | , runTCPServerWithHandle 71 | -- ** UDP 72 | , bindPortUDP 73 | , bindRandomPortUDP 74 | , getSocketUDP 75 | -- ** Unix 76 | , bindPath 77 | , getSocketUnix 78 | , runUnixServer 79 | , runUnixClient 80 | ) where 81 | 82 | import qualified Network.Socket as NS 83 | import Data.Streaming.Network.Internal 84 | import Control.Concurrent (threadDelay) 85 | import Control.Exception (IOException, try, SomeException, throwIO, bracketOnError, bracket) 86 | import Network.Socket (Socket, AddrInfo, SocketType) 87 | import Network.Socket.ByteString (recv, sendAll) 88 | import System.IO.Error (isDoesNotExistError) 89 | import qualified Data.ByteString.Char8 as S8 90 | import qualified Control.Exception as E 91 | import Data.ByteString (ByteString) 92 | import System.Directory (removeFile) 93 | import Data.Functor.Constant (Constant (Constant), getConstant) 94 | import Data.Functor.Identity (Identity (Identity), runIdentity) 95 | import Control.Concurrent (forkIO) 96 | import Control.Monad (forever) 97 | import Data.IORef (IORef, newIORef, atomicModifyIORef) 98 | import Data.Array.Unboxed ((!), UArray, listArray) 99 | import System.IO.Unsafe (unsafePerformIO, unsafeDupablePerformIO) 100 | import System.Random (randomRIO) 101 | import System.IO.Error (isFullErrorType, ioeGetErrorType) 102 | #if WINDOWS 103 | import Control.Concurrent.MVar (putMVar, takeMVar, newEmptyMVar) 104 | #endif 105 | 106 | getPossibleAddrs :: SocketType -> String -> Int -> NS.Family -> IO [AddrInfo] 107 | getPossibleAddrs sockettype host' port' af = 108 | NS.getAddrInfo (Just hints) (Just host') (Just $ show port') 109 | where 110 | hints = NS.defaultHints { 111 | NS.addrSocketType = sockettype 112 | , NS.addrFamily = af 113 | } 114 | 115 | -- | Attempt to connect to the given host/port/address family using given @SocketType@. 116 | -- 117 | -- Since 0.1.3 118 | getSocketFamilyGen :: SocketType -> String -> Int -> NS.Family -> IO (Socket, AddrInfo) 119 | getSocketFamilyGen sockettype host' port' af = do 120 | (addr:_) <- getPossibleAddrs sockettype host' port' af 121 | sock <- NS.socket (NS.addrFamily addr) (NS.addrSocketType addr) 122 | (NS.addrProtocol addr) 123 | return (sock, addr) 124 | 125 | -- | Attempt to connect to the given host/port using given @SocketType@. 126 | getSocketGen :: SocketType -> String -> Int -> IO (Socket, AddrInfo) 127 | getSocketGen sockettype host port = getSocketFamilyGen sockettype host port NS.AF_UNSPEC 128 | 129 | defaultSocketOptions :: SocketType -> [(NS.SocketOption, Int)] 130 | defaultSocketOptions sockettype = 131 | case sockettype of 132 | NS.Datagram -> [(NS.ReuseAddr,1)] 133 | _ -> [(NS.NoDelay,1), (NS.ReuseAddr,1)] 134 | 135 | -- | Attempt to bind a listening @Socket@ on the given host/port using given 136 | -- @SocketType@. If no host is given, will use the first address available. 137 | bindPortGen :: SocketType -> Int -> HostPreference -> IO Socket 138 | bindPortGen sockettype = bindPortGenEx (defaultSocketOptions sockettype) sockettype 139 | 140 | -- | Attempt to bind a listening @Socket@ on the given host/port using given 141 | -- socket options and @SocketType@. If no host is given, will use the first address available. 142 | -- 143 | -- Since 0.1.17 144 | bindPortGenEx :: [(NS.SocketOption, Int)] -> SocketType -> Int -> HostPreference -> IO Socket 145 | bindPortGenEx sockOpts sockettype p s = do 146 | let hints = NS.defaultHints 147 | { NS.addrFlags = [NS.AI_PASSIVE] 148 | , NS.addrSocketType = sockettype 149 | } 150 | host = 151 | case s of 152 | Host s' -> Just s' 153 | _ -> Nothing 154 | port = Just . show $ p 155 | addrs <- NS.getAddrInfo (Just hints) host port 156 | -- Choose an IPv6 socket if exists. This ensures the socket can 157 | -- handle both IPv4 and IPv6 if v6only is false. 158 | let addrs4 = filter (\x -> NS.addrFamily x /= NS.AF_INET6) addrs 159 | addrs6 = filter (\x -> NS.addrFamily x == NS.AF_INET6) addrs 160 | addrs' = 161 | case s of 162 | HostIPv4 -> addrs4 ++ addrs6 163 | HostIPv4Only -> addrs4 164 | HostIPv6 -> addrs6 ++ addrs4 165 | HostIPv6Only -> addrs6 -- this isn't enough, IPv6Only socket option must also be set 166 | _ -> addrs 167 | 168 | sockOpts' = if s == HostIPv6Only then ((NS.IPv6Only,1):sockOpts) else sockOpts 169 | 170 | tryAddrs (addr1:rest@(_:_)) = 171 | E.catch 172 | (theBody addr1) 173 | (\(_ :: IOException) -> tryAddrs rest) 174 | tryAddrs (addr1:[]) = theBody addr1 175 | tryAddrs _ = error "bindPort: addrs is empty" 176 | 177 | theBody addr = 178 | bracketOnError 179 | (NS.socket (NS.addrFamily addr) (NS.addrSocketType addr) (NS.addrProtocol addr)) 180 | NS.close 181 | (\sock -> do 182 | mapM_ (\(opt,v) -> NS.setSocketOption sock opt v) sockOpts' 183 | NS.bind sock (NS.addrAddress addr) 184 | return sock 185 | ) 186 | tryAddrs addrs' 187 | 188 | -- | Bind to a random port number. Especially useful for writing network tests. 189 | -- 190 | -- Since 0.1.1 191 | bindRandomPortGen :: SocketType -> HostPreference -> IO (Int, Socket) 192 | bindRandomPortGen sockettype s = do 193 | socket <- bindPortGen sockettype 0 s 194 | port <- NS.socketPort socket 195 | return (fromIntegral port, socket) 196 | 197 | -- | Top 10 Largest IANA unassigned port ranges with no unauthorized uses known 198 | unassignedPortsList :: [Int] 199 | unassignedPortsList = concat 200 | [ [43124..44320] 201 | , [28120..29166] 202 | , [45967..46997] 203 | , [28241..29117] 204 | , [40001..40840] 205 | , [29170..29998] 206 | , [38866..39680] 207 | , [43442..44122] 208 | , [41122..41793] 209 | , [35358..36000] 210 | ] 211 | 212 | unassignedPorts :: UArray Int Int 213 | unassignedPorts = listArray (unassignedPortsMin, unassignedPortsMax) unassignedPortsList 214 | 215 | unassignedPortsMin, unassignedPortsMax :: Int 216 | unassignedPortsMin = 0 217 | unassignedPortsMax = length unassignedPortsList - 1 218 | 219 | nextUnusedPort :: IORef Int 220 | nextUnusedPort = unsafePerformIO 221 | $ randomRIO (unassignedPortsMin, unassignedPortsMax) >>= newIORef 222 | {-# NOINLINE nextUnusedPort #-} 223 | 224 | -- | Get a port from the IANA list of unassigned ports. 225 | -- 226 | -- Internally, this function uses an @IORef@ to cycle through the list of ports 227 | getUnassignedPort :: IO Int 228 | getUnassignedPort = do 229 | port <- atomicModifyIORef nextUnusedPort go 230 | return $! port 231 | where 232 | go i 233 | | i > unassignedPortsMax = (succ unassignedPortsMin, unassignedPorts ! unassignedPortsMin) 234 | | otherwise = (succ i, unassignedPorts ! i) 235 | 236 | -- | Attempt to connect to the given host/port. 237 | getSocketUDP :: String -> Int -> IO (Socket, AddrInfo) 238 | getSocketUDP = getSocketGen NS.Datagram 239 | 240 | -- | Attempt to bind a listening @Socket@ on the given host/port. If no host is 241 | -- given, will use the first address available. 242 | bindPortUDP :: Int -> HostPreference -> IO Socket 243 | bindPortUDP = bindPortGen NS.Datagram 244 | 245 | -- | Bind a random UDP port. 246 | -- 247 | -- See 'bindRandomPortGen' 248 | -- 249 | -- Since 0.1.1 250 | bindRandomPortUDP :: HostPreference -> IO (Int, Socket) 251 | bindRandomPortUDP = bindRandomPortGen NS.Datagram 252 | 253 | {-# NOINLINE defaultReadBufferSize #-} 254 | defaultReadBufferSize :: Int 255 | defaultReadBufferSize = unsafeDupablePerformIO $ 256 | bracket (NS.socket NS.AF_INET NS.Stream 0) NS.close (\sock -> NS.getSocketOption sock NS.RecvBuffer) 257 | 258 | -- | Attempt to connect to the given Unix domain socket path. 259 | getSocketUnix :: FilePath -> IO Socket 260 | getSocketUnix path = do 261 | sock <- NS.socket NS.AF_UNIX NS.Stream 0 262 | ee <- try' $ NS.connect sock (NS.SockAddrUnix path) 263 | case ee of 264 | Left e -> NS.close sock >> throwIO e 265 | Right () -> return sock 266 | where 267 | try' :: IO a -> IO (Either SomeException a) 268 | try' = try 269 | 270 | -- | Attempt to bind a listening Unix domain socket at the given path. 271 | bindPath :: FilePath -> IO Socket 272 | bindPath path = do 273 | sock <- bracketOnError 274 | (NS.socket NS.AF_UNIX NS.Stream 0) 275 | NS.close 276 | (\sock -> do 277 | removeFileSafe path -- Cannot bind if the socket file exists. 278 | NS.bind sock (NS.SockAddrUnix path) 279 | return sock) 280 | NS.listen sock (max 2048 NS.maxListenQueue) 281 | return sock 282 | 283 | removeFileSafe :: FilePath -> IO () 284 | removeFileSafe path = 285 | removeFile path `E.catch` handleExists 286 | where 287 | handleExists e 288 | | isDoesNotExistError e = return () 289 | | otherwise = throwIO e 290 | 291 | -- | Smart constructor. 292 | serverSettingsUnix 293 | :: FilePath -- ^ path to bind to 294 | -> ServerSettingsUnix 295 | serverSettingsUnix path = ServerSettingsUnix 296 | { serverPath = path 297 | , serverAfterBindUnix = const $ return () 298 | , serverReadBufferSizeUnix = defaultReadBufferSize 299 | } 300 | 301 | -- | Smart constructor. 302 | clientSettingsUnix 303 | :: FilePath -- ^ path to connect to 304 | -> ClientSettingsUnix 305 | clientSettingsUnix path = ClientSettingsUnix 306 | { clientPath = path 307 | , clientReadBufferSizeUnix = defaultReadBufferSize 308 | } 309 | 310 | #if defined(__GLASGOW_HASKELL__) && WINDOWS 311 | -- Socket recv and accept calls on Windows platform cannot be interrupted when compiled with -threaded. 312 | -- See https://ghc.haskell.org/trac/ghc/ticket/5797 for details. 313 | -- The following enables simple workaround 314 | #define SOCKET_ACCEPT_RECV_WORKAROUND 315 | #endif 316 | 317 | safeRecv :: Socket -> Int -> IO ByteString 318 | #ifndef SOCKET_ACCEPT_RECV_WORKAROUND 319 | safeRecv = recv 320 | #else 321 | safeRecv s buf = do 322 | var <- newEmptyMVar 323 | forkIO $ recv s buf `E.catch` (\(_::IOException) -> return S8.empty) >>= putMVar var 324 | takeMVar var 325 | #endif 326 | 327 | -- | Smart constructor. 328 | serverSettingsUDP 329 | :: Int -- ^ port to bind to 330 | -> HostPreference -- ^ host binding preferences 331 | -> ServerSettings 332 | serverSettingsUDP = serverSettingsTCP 333 | 334 | -- | Smart constructor. 335 | serverSettingsTCP 336 | :: Int -- ^ port to bind to 337 | -> HostPreference -- ^ host binding preferences 338 | -> ServerSettings 339 | serverSettingsTCP port host = ServerSettings 340 | { serverPort = port 341 | , serverHost = host 342 | , serverSocket = Nothing 343 | , serverAfterBind = const $ return () 344 | , serverNeedLocalAddr = False 345 | , serverReadBufferSize = defaultReadBufferSize 346 | } 347 | 348 | -- | Create a server settings that uses an already available listening socket. 349 | -- Any port and host modifications made to this value will be ignored. 350 | -- 351 | -- Since 0.1.1 352 | serverSettingsTCPSocket :: Socket -> ServerSettings 353 | serverSettingsTCPSocket lsocket = ServerSettings 354 | { serverPort = 0 355 | , serverHost = HostAny 356 | , serverSocket = Just lsocket 357 | , serverAfterBind = const $ return () 358 | , serverNeedLocalAddr = False 359 | , serverReadBufferSize = defaultReadBufferSize 360 | } 361 | 362 | -- | Smart constructor. 363 | clientSettingsUDP 364 | :: Int -- ^ port to connect to 365 | -> ByteString -- ^ host to connect to 366 | -> ClientSettings 367 | clientSettingsUDP = clientSettingsTCP 368 | 369 | -- | Smart constructor. 370 | clientSettingsTCP 371 | :: Int -- ^ port to connect to 372 | -> ByteString -- ^ host to connect to 373 | -> ClientSettings 374 | clientSettingsTCP port host = ClientSettings 375 | { clientPort = port 376 | , clientHost = host 377 | , clientAddrFamily = NS.AF_UNSPEC 378 | , clientReadBufferSize = defaultReadBufferSize 379 | } 380 | 381 | -- | Attempt to connect to the given host/port/address family. 382 | -- 383 | -- Since 0.1.3 384 | getSocketFamilyTCP :: ByteString -> Int -> NS.Family -> IO (NS.Socket, NS.SockAddr) 385 | getSocketFamilyTCP host' port' addrFamily = do 386 | addrsInfo <- getPossibleAddrs NS.Stream (S8.unpack host') port' addrFamily 387 | firstSuccess addrsInfo 388 | where 389 | firstSuccess [ai] = connect ai 390 | firstSuccess (ai:ais) = connect ai `E.catch` \(_ :: IOException) -> firstSuccess ais 391 | firstSuccess _ = error "getSocketFamilyTCP: can't happen" 392 | 393 | createSocket addrInfo = do 394 | sock <- NS.socket (NS.addrFamily addrInfo) (NS.addrSocketType addrInfo) 395 | (NS.addrProtocol addrInfo) 396 | NS.setSocketOption sock NS.NoDelay 1 397 | return sock 398 | 399 | connect addrInfo = E.bracketOnError (createSocket addrInfo) NS.close $ \sock -> do 400 | NS.connect sock (NS.addrAddress addrInfo) 401 | return (sock, NS.addrAddress addrInfo) 402 | 403 | -- | Attempt to connect to the given host/port. 404 | getSocketTCP :: ByteString -> Int -> IO (NS.Socket, NS.SockAddr) 405 | getSocketTCP host port = getSocketFamilyTCP host port NS.AF_UNSPEC 406 | 407 | -- | Attempt to bind a listening @Socket@ on the given host/port. If no host is 408 | -- given, will use the first address available. 409 | -- 'maxListenQueue' is topically 128 which is too short for 410 | -- high performance servers. So, we specify 'max 2048 maxListenQueue' to 411 | -- the listen queue. 412 | bindPortTCP :: Int -> HostPreference -> IO Socket 413 | bindPortTCP p s = do 414 | sock <- bindPortGen NS.Stream p s 415 | NS.listen sock (max 2048 NS.maxListenQueue) 416 | return sock 417 | 418 | -- | Bind a random TCP port. 419 | -- 420 | -- See 'bindRandomPortGen'. 421 | -- 422 | -- Since 0.1.1 423 | bindRandomPortTCP :: HostPreference -> IO (Int, Socket) 424 | bindRandomPortTCP s = do 425 | (port, sock) <- bindRandomPortGen NS.Stream s 426 | NS.listen sock (max 2048 NS.maxListenQueue) 427 | return (port, sock) 428 | 429 | -- | Try to accept a connection, recovering automatically from exceptions. 430 | -- 431 | -- As reported by Kazu against Warp, "resource exhausted (Too many open files)" 432 | -- may be thrown by accept(). This function will catch that exception, wait a 433 | -- second, and then try again. 434 | acceptSafe :: Socket -> IO (Socket, NS.SockAddr) 435 | acceptSafe socket = 436 | #ifndef SOCKET_ACCEPT_RECV_WORKAROUND 437 | loop 438 | #else 439 | do var <- newEmptyMVar 440 | forkIO $ loop >>= putMVar var 441 | takeMVar var 442 | #endif 443 | where 444 | loop = 445 | NS.accept socket `E.catch` \e -> 446 | if isFullErrorType (ioeGetErrorType e) 447 | then do 448 | threadDelay 1000000 449 | loop 450 | else E.throwIO e 451 | 452 | message :: ByteString -> NS.SockAddr -> Message 453 | message = Message 454 | 455 | class HasPort a where 456 | portLens :: Functor f => (Int -> f Int) -> a -> f a 457 | instance HasPort ServerSettings where 458 | portLens f ss = fmap (\p -> ss { serverPort = p }) (f (serverPort ss)) 459 | instance HasPort ClientSettings where 460 | portLens f ss = fmap (\p -> ss { clientPort = p }) (f (clientPort ss)) 461 | 462 | getPort :: HasPort a => a -> Int 463 | getPort = getConstant . portLens Constant 464 | 465 | setPort :: HasPort a => Int -> a -> a 466 | setPort p = runIdentity . portLens (const (Identity p)) 467 | 468 | setHost :: ByteString -> ClientSettings -> ClientSettings 469 | setHost hp ss = ss { clientHost = hp } 470 | 471 | getHost :: ClientSettings -> ByteString 472 | getHost = clientHost 473 | 474 | -- | Set the address family for the given settings. 475 | -- 476 | -- Since 0.1.3 477 | setAddrFamily :: NS.Family -> ClientSettings -> ClientSettings 478 | setAddrFamily af cs = cs { clientAddrFamily = af } 479 | 480 | -- | Get the address family for the given settings. 481 | -- 482 | -- Since 0.1.3 483 | getAddrFamily :: ClientSettings -> NS.Family 484 | getAddrFamily = clientAddrFamily 485 | 486 | class HasPath a where 487 | pathLens :: Functor f => (FilePath -> f FilePath) -> a -> f a 488 | instance HasPath ServerSettingsUnix where 489 | pathLens f ss = fmap (\p -> ss { serverPath = p }) (f (serverPath ss)) 490 | instance HasPath ClientSettingsUnix where 491 | pathLens f ss = fmap (\p -> ss { clientPath = p }) (f (clientPath ss)) 492 | 493 | getPath :: HasPath a => a -> FilePath 494 | getPath = getConstant . pathLens Constant 495 | 496 | setPath :: HasPath a => FilePath -> a -> a 497 | setPath p = runIdentity . pathLens (const (Identity p)) 498 | 499 | setNeedLocalAddr :: Bool -> ServerSettings -> ServerSettings 500 | setNeedLocalAddr x y = y { serverNeedLocalAddr = x } 501 | 502 | getNeedLocalAddr :: ServerSettings -> Bool 503 | getNeedLocalAddr = serverNeedLocalAddr 504 | 505 | class HasAfterBind a where 506 | afterBindLens :: Functor f => ((Socket -> IO ()) -> f (Socket -> IO ())) -> a -> f a 507 | instance HasAfterBind ServerSettings where 508 | afterBindLens f ss = fmap (\p -> ss { serverAfterBind = p }) (f (serverAfterBind ss)) 509 | instance HasAfterBind ServerSettingsUnix where 510 | afterBindLens f ss = fmap (\p -> ss { serverAfterBindUnix = p }) (f (serverAfterBindUnix ss)) 511 | 512 | getAfterBind :: HasAfterBind a => a -> (Socket -> IO ()) 513 | getAfterBind = getConstant . afterBindLens Constant 514 | 515 | setAfterBind :: HasAfterBind a => (Socket -> IO ()) -> a -> a 516 | setAfterBind p = runIdentity . afterBindLens (const (Identity p)) 517 | 518 | -- | Since 0.1.13 519 | class HasReadBufferSize a where 520 | readBufferSizeLens :: Functor f => (Int -> f Int) -> a -> f a 521 | -- | Since 0.1.13 522 | instance HasReadBufferSize ServerSettings where 523 | readBufferSizeLens f ss = fmap (\p -> ss { serverReadBufferSize = p }) (f (serverReadBufferSize ss)) 524 | -- | Since 0.1.13 525 | instance HasReadBufferSize ClientSettings where 526 | readBufferSizeLens f cs = fmap (\p -> cs { clientReadBufferSize = p }) (f (clientReadBufferSize cs)) 527 | -- | Since 0.1.13 528 | instance HasReadBufferSize ServerSettingsUnix where 529 | readBufferSizeLens f ss = fmap (\p -> ss { serverReadBufferSizeUnix = p }) (f (serverReadBufferSizeUnix ss)) 530 | -- | Since 0.1.14 531 | instance HasReadBufferSize ClientSettingsUnix where 532 | readBufferSizeLens f ss = fmap (\p -> ss { clientReadBufferSizeUnix = p }) (f (clientReadBufferSizeUnix ss)) 533 | 534 | -- | Get buffer size used when reading from socket. 535 | -- 536 | -- Since 0.1.13 537 | getReadBufferSize :: HasReadBufferSize a => a -> Int 538 | getReadBufferSize = getConstant . readBufferSizeLens Constant 539 | 540 | -- | Set buffer size used when reading from socket. 541 | -- 542 | -- Since 0.1.13 543 | setReadBufferSize :: HasReadBufferSize a => Int -> a -> a 544 | setReadBufferSize p = runIdentity . readBufferSizeLens (const (Identity p)) 545 | 546 | type ConnectionHandle = Socket -> NS.SockAddr -> Maybe NS.SockAddr -> IO () 547 | 548 | runTCPServerWithHandle :: ServerSettings -> ConnectionHandle -> IO a 549 | runTCPServerWithHandle (ServerSettings port host msocket afterBind needLocalAddr _) handle = 550 | case msocket of 551 | Nothing -> E.bracket (bindPortTCP port host) NS.close inner 552 | Just lsocket -> inner lsocket 553 | where 554 | inner lsocket = afterBind lsocket >> forever (serve lsocket) 555 | serve lsocket = E.bracketOnError 556 | (acceptSafe lsocket) 557 | (\(socket, _) -> NS.close socket) 558 | $ \(socket, addr) -> do 559 | mlocal <- if needLocalAddr 560 | then fmap Just $ NS.getSocketName socket 561 | else return Nothing 562 | _ <- E.mask $ \restore -> forkIO 563 | $ restore (handle socket addr mlocal) 564 | `E.finally` NS.close socket 565 | return () 566 | 567 | 568 | 569 | -- | Run an @Application@ with the given settings. This function will create a 570 | -- new listening socket, accept connections on it, and spawn a new thread for 571 | -- each connection. 572 | runTCPServer :: ServerSettings -> (AppData -> IO ()) -> IO a 573 | runTCPServer settings app = runTCPServerWithHandle settings app' 574 | where app' socket addr mlocal = 575 | let ad = AppData 576 | { appRead' = safeRecv socket $ getReadBufferSize settings 577 | , appWrite' = sendAll socket 578 | , appSockAddr' = addr 579 | , appLocalAddr' = mlocal 580 | , appCloseConnection' = NS.close socket 581 | , appRawSocket' = Just socket 582 | } 583 | in 584 | app ad 585 | 586 | -- | Run an @Application@ by connecting to the specified server. 587 | runTCPClient :: ClientSettings -> (AppData -> IO a) -> IO a 588 | runTCPClient (ClientSettings port host addrFamily readBufferSize) app = E.bracket 589 | (getSocketFamilyTCP host port addrFamily) 590 | (NS.close . fst) 591 | (\(s, address) -> app AppData 592 | { appRead' = safeRecv s readBufferSize 593 | , appWrite' = sendAll s 594 | , appSockAddr' = address 595 | , appLocalAddr' = Nothing 596 | , appCloseConnection' = NS.close s 597 | , appRawSocket' = Just s 598 | }) 599 | 600 | appLocalAddr :: AppData -> Maybe NS.SockAddr 601 | appLocalAddr = appLocalAddr' 602 | 603 | appSockAddr :: AppData -> NS.SockAddr 604 | appSockAddr = appSockAddr' 605 | 606 | -- | Close the underlying connection. One possible use case is simulating 607 | -- connection failures in a test suite. 608 | -- 609 | -- Since 0.1.6 610 | appCloseConnection :: AppData -> IO () 611 | appCloseConnection = appCloseConnection' 612 | 613 | -- | Get the raw socket for this @AppData@, if available. 614 | -- 615 | -- Since 0.1.12 616 | appRawSocket :: AppData -> Maybe NS.Socket 617 | appRawSocket = appRawSocket' 618 | 619 | class HasReadWrite a where 620 | readLens :: Functor f => (IO ByteString -> f (IO ByteString)) -> a -> f a 621 | writeLens :: Functor f => ((ByteString -> IO ()) -> f (ByteString -> IO ())) -> a -> f a 622 | instance HasReadWrite AppData where 623 | readLens f a = fmap (\x -> a { appRead' = x }) (f (appRead' a)) 624 | writeLens f a = fmap (\x -> a { appWrite' = x }) (f (appWrite' a)) 625 | instance HasReadWrite AppDataUnix where 626 | readLens f a = fmap (\x -> a { appReadUnix = x }) (f (appReadUnix a)) 627 | writeLens f a = fmap (\x -> a { appWriteUnix = x }) (f (appWriteUnix a)) 628 | 629 | appRead :: HasReadWrite a => a -> IO ByteString 630 | appRead = getConstant . readLens Constant 631 | 632 | appWrite :: HasReadWrite a => a -> ByteString -> IO () 633 | appWrite = getConstant . writeLens Constant 634 | 635 | -- | Run an @Application@ with the given settings. This function will create a 636 | -- new listening socket, accept connections on it, and spawn a new thread for 637 | -- each connection. 638 | runUnixServer :: ServerSettingsUnix -> (AppDataUnix -> IO ()) -> IO a 639 | runUnixServer (ServerSettingsUnix path afterBind readBufferSize) app = E.bracket 640 | (bindPath path) 641 | NS.close 642 | (\socket -> do 643 | afterBind socket 644 | forever $ serve socket) 645 | where 646 | serve lsocket = E.bracketOnError 647 | (acceptSafe lsocket) 648 | (\(socket, _) -> NS.close socket) 649 | $ \(socket, _) -> do 650 | let ad = AppDataUnix 651 | { appReadUnix = safeRecv socket readBufferSize 652 | , appWriteUnix = sendAll socket 653 | } 654 | _ <- E.mask $ \restore -> forkIO 655 | $ restore (app ad) 656 | `E.finally` NS.close socket 657 | return () 658 | 659 | -- | Run an @Application@ by connecting to the specified server. 660 | runUnixClient :: ClientSettingsUnix -> (AppDataUnix -> IO a) -> IO a 661 | runUnixClient (ClientSettingsUnix path readBufferSize) app = E.bracket 662 | (getSocketUnix path) 663 | NS.close 664 | (\sock -> app AppDataUnix 665 | { appReadUnix = safeRecv sock readBufferSize 666 | , appWriteUnix = sendAll sock 667 | }) 668 | --------------------------------------------------------------------------------