├── .github └── workflows │ └── jekyll.yml ├── .gitignore ├── 404.html ├── CNAME ├── Gemfile ├── Gemfile.lock ├── LICENSE ├── Z-Data ├── Array.md ├── FFI.md ├── JSON.md ├── Parser-and-Builder.md ├── Vector-Bytes-Text.md └── index.md ├── Z-IO ├── BIO-Streaming.md ├── Filesystem.md ├── Logger.md ├── Network.md └── index.md ├── _config.yml ├── _data └── version.yml ├── _layouts └── post.html ├── _posts ├── 2021-02-01-High-performance-JSON-codec.md └── 2021-04-20-introduce-BIO-a-simple-streaming-abstraction.md ├── _sass └── custom │ └── custom.scss ├── benchmarks.md ├── blog.md ├── guide.md ├── haddock.inject.utterances.via.mathjax.js └── index.html /.github/workflows/jekyll.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages 7 | name: Deploy Jekyll site to Pages 8 | 9 | on: 10 | # Runs on pushes targeting the default branch 11 | push: 12 | branches: ["master"] 13 | 14 | # Allows you to run this workflow manually from the Actions tab 15 | workflow_dispatch: 16 | 17 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 18 | permissions: 19 | contents: read 20 | pages: write 21 | id-token: write 22 | 23 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 24 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 25 | concurrency: 26 | group: "pages" 27 | cancel-in-progress: false 28 | 29 | jobs: 30 | # Build job 31 | build: 32 | runs-on: ubuntu-latest 33 | steps: 34 | - name: Checkout 35 | uses: actions/checkout@v3 36 | - name: Setup Ruby 37 | uses: ruby/setup-ruby@55283cc23133118229fd3f97f9336ee23a179fcf # v1.146.0 38 | with: 39 | ruby-version: '3.1' # Not needed with a .ruby-version file 40 | bundler-cache: true # runs 'bundle install' and caches installed gems automatically 41 | cache-version: 0 # Increment this number if you need to re-download cached gems 42 | - name: Setup Pages 43 | id: pages 44 | uses: actions/configure-pages@v3 45 | - name: Build with Jekyll 46 | # Outputs to the './_site' directory by default 47 | run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}" 48 | env: 49 | JEKYLL_ENV: production 50 | - name: Upload artifact 51 | # Automatically uploads an artifact from the './_site' directory by default 52 | uses: actions/upload-pages-artifact@v1 53 | 54 | # Deployment job 55 | deploy: 56 | environment: 57 | name: github-pages 58 | url: ${{ steps.deployment.outputs.page_url }} 59 | runs-on: ubuntu-latest 60 | needs: build 61 | steps: 62 | - name: Deploy to GitHub Pages 63 | id: deployment 64 | uses: actions/deploy-pages@v2 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-* 3 | cabal-dev 4 | *.o 5 | *.hi 6 | *.hie 7 | *.chi 8 | *.chs.h 9 | *.dyn_o 10 | *.dyn_hi 11 | .hpc 12 | .hsenv 13 | .cabal-sandbox/ 14 | cabal.sandbox.config 15 | *.prof 16 | *.aux 17 | *.hp 18 | *.eventlog 19 | .stack-work/ 20 | cabal.project.local 21 | cabal.project.local~ 22 | .HTF/ 23 | .ghc.environment.* 24 | .vscode/ 25 | _site 26 | .sass-cache 27 | -------------------------------------------------------------------------------- /404.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 | 5 | 18 | 19 |
20 |

404

21 | 22 |

Page not found :(

23 |

The requested page could not be found.

24 |
25 | -------------------------------------------------------------------------------- /CNAME: -------------------------------------------------------------------------------- 1 | z.haskell.world 2 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | # Hello! This is where you manage which Jekyll version is used to run. 4 | # When you want to use a different version, change it below, save the 5 | # file and run `bundle install`. Run Jekyll with `bundle exec`, like so: 6 | # 7 | # bundle exec jekyll serve 8 | # 9 | # This will help ensure the proper Jekyll version is running. 10 | # Happy Jekylling! 11 | gem "jekyll", "~> 3.9.0" 12 | 13 | # This is the default theme for new Jekyll sites. You may change this to anything you like. 14 | gem "minima", "~> 2.0" 15 | 16 | gem "kramdown", "~> 2.3.1" 17 | gem "kramdown-parser-gfm", "~> 1.1.0" 18 | 19 | # If you want to use GitHub Pages, remove the "gem "jekyll"" above and 20 | # uncomment the line below. To upgrade, run `bundle update github-pages`. 21 | # gem "github-pages", group: :jekyll_plugins 22 | 23 | # If you have any plugins, put them here! 24 | group :jekyll_plugins do 25 | gem "jekyll-feed", "~> 0.6" 26 | end 27 | 28 | # Windows does not include zoneinfo files, so bundle the tzinfo-data gem 29 | # and associated library. 30 | install_if -> { RUBY_PLATFORM =~ %r!mingw|mswin|java! } do 31 | gem "tzinfo", "~> 1.2" 32 | gem "tzinfo-data" 33 | end 34 | 35 | # Performance-booster for watching directories on Windows 36 | gem "wdm", "~> 0.1.0", :install_if => Gem.win_platform? 37 | 38 | 39 | gem "just-the-docs", "~> 0.3.3" 40 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | addressable (2.8.0) 5 | public_suffix (>= 2.0.2, < 5.0) 6 | colorator (1.1.0) 7 | concurrent-ruby (1.1.8) 8 | em-websocket (0.5.2) 9 | eventmachine (>= 0.12.9) 10 | http_parser.rb (~> 0.6.0) 11 | eventmachine (1.2.7) 12 | ffi (1.14.2) 13 | forwardable-extended (2.6.0) 14 | http_parser.rb (0.6.0) 15 | i18n (0.9.5) 16 | concurrent-ruby (~> 1.0) 17 | jekyll (3.9.0) 18 | addressable (~> 2.4) 19 | colorator (~> 1.0) 20 | em-websocket (~> 0.5) 21 | i18n (~> 0.7) 22 | jekyll-sass-converter (~> 1.0) 23 | jekyll-watch (~> 2.0) 24 | kramdown (>= 1.17, < 3) 25 | liquid (~> 4.0) 26 | mercenary (~> 0.3.3) 27 | pathutil (~> 0.9) 28 | rouge (>= 1.7, < 4) 29 | safe_yaml (~> 1.0) 30 | jekyll-feed (0.15.1) 31 | jekyll (>= 3.7, < 5.0) 32 | jekyll-sass-converter (1.5.2) 33 | sass (~> 3.4) 34 | jekyll-seo-tag (2.7.1) 35 | jekyll (>= 3.8, < 5.0) 36 | jekyll-watch (2.2.1) 37 | listen (~> 3.0) 38 | just-the-docs (0.3.3) 39 | jekyll (>= 3.8.5) 40 | jekyll-seo-tag (~> 2.0) 41 | rake (>= 12.3.1, < 13.1.0) 42 | kramdown (2.3.1) 43 | rexml 44 | kramdown-parser-gfm (1.1.0) 45 | kramdown (~> 2.0) 46 | liquid (4.0.3) 47 | listen (3.4.1) 48 | rb-fsevent (~> 0.10, >= 0.10.3) 49 | rb-inotify (~> 0.9, >= 0.9.10) 50 | mercenary (0.3.6) 51 | minima (2.5.1) 52 | jekyll (>= 3.5, < 5.0) 53 | jekyll-feed (~> 0.9) 54 | jekyll-seo-tag (~> 2.1) 55 | pathutil (0.16.2) 56 | forwardable-extended (~> 2.6) 57 | public_suffix (4.0.6) 58 | rake (13.0.3) 59 | rb-fsevent (0.10.4) 60 | rb-inotify (0.10.1) 61 | ffi (~> 1.0) 62 | rexml (3.2.8) 63 | strscan (>= 3.0.9) 64 | rouge (3.26.0) 65 | safe_yaml (1.0.5) 66 | sass (3.7.4) 67 | sass-listen (~> 4.0.0) 68 | sass-listen (4.0.0) 69 | rb-fsevent (~> 0.9, >= 0.9.4) 70 | rb-inotify (~> 0.9, >= 0.9.7) 71 | strscan (3.1.0) 72 | thread_safe (0.3.6) 73 | tzinfo (1.2.9) 74 | thread_safe (~> 0.1) 75 | tzinfo-data (1.2021.1) 76 | tzinfo (>= 1.0.0) 77 | wdm (0.1.1) 78 | 79 | PLATFORMS 80 | ruby 81 | 82 | DEPENDENCIES 83 | jekyll (~> 3.9.0) 84 | jekyll-feed (~> 0.6) 85 | just-the-docs (~> 0.3.3) 86 | kramdown (~> 2.3.1) 87 | kramdown-parser-gfm (~> 1.1.0) 88 | minima (~> 2.0) 89 | tzinfo (~> 1.2) 90 | tzinfo-data 91 | wdm (~> 0.1.0) 92 | 93 | BUNDLED WITH 94 | 2.1.4 95 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Z-Data LICENSE 2 | 3 | Copyright (c) Z.Haskell Contributors, 2017-2022 4 | 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above 14 | copyright notice, this list of conditions and the following 15 | disclaimer in the documentation and/or other materials provided 16 | with the distribution. 17 | 18 | * Neither the name of winter nor the names of other 19 | contributors may be used to endorse or promote products derived 20 | from this software without specific prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | -------------------------------------------------------------------------------- 35 | utf8rewind LICENSE 36 | 37 | Copyright (C) 2014-2016 Quinten Lansu 38 | Copyright (C) 2019-2020 Dong Han 39 | 40 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 41 | 42 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 43 | 44 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 45 | 46 | -------------------------------------------------------------------------------- 47 | fastvalidate-utf-8 LICENSE 48 | 49 | Daniel Lemire 50 | Kendall Willets 51 | Zach Bjornson 52 | 53 | Permission is hereby granted, free of charge, to any 54 | person obtaining a copy of this software and associated 55 | documentation files (the "Software"), to deal in the 56 | Software without restriction, including without 57 | limitation the rights to use, copy, modify, merge, 58 | publish, distribute, sublicense, and/or sell copies of 59 | the Software, and to permit persons to whom the Software 60 | is furnished to do so, subject to the following 61 | conditions: 62 | 63 | The above copyright notice and this permission notice 64 | shall be included in all copies or substantial portions 65 | of the Software. 66 | 67 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 68 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 69 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 70 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 71 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 72 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 73 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 74 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 75 | DEALINGS IN THE SOFTWARE. 76 | 77 | -------------------------------------------------------------------------------- 78 | Copyright (c) 2016-2019, Powturbo 79 | All rights reserved. 80 | 81 | Redistribution and use in source and binary forms, with or without 82 | modification, are permitted provided that the following conditions are 83 | met: 84 | 85 | 1. Redistributions of source code must retain the above copyright 86 | notice, this list of conditions and the following disclaimer. 87 | 88 | 2. Redistributions in binary form must reproduce the above copyright 89 | notice, this list of conditions and the following disclaimer in the 90 | documentation and/or other materials provided with the distribution. 91 | 92 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 93 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 94 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 95 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 96 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 98 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 99 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 100 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 101 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 102 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 103 | 104 | - homepage : https://sites.google.com/site/powturbo/ 105 | - github : https://github.com/powturbo 106 | - twitter : https://twitter.com/powturbo 107 | - email : powturbo [_AT_] gmail [_DOT_] com 108 | -------------------------------------------------------------------------------- /Z-Data/Array.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | parent: Z-Data 4 | title: Array 5 | nav_order: 1 6 | --- 7 | 8 | ## Table of contents 9 | {: .no_toc .text-delta } 10 | 11 | 1. TOC 12 | {:toc} 13 | 14 | # Array in Haskell 15 | 16 | Unlike the ubiquitous linked list type `[a]`. In Haskell arrays doesn't have any built-in syntax support, or any other special compiler support excepts some built-in primitive functions, which can be found in [ghc-prim](http://hackage.haskell.org/package/ghc-prim/docs/GHC-Prim.html): 17 | 18 | ```haskell 19 | newArray# :: Int# -> a -> State# s -> (# State# s, MutableArray# s a #) 20 | readArray# :: MutableArray# s a -> Int# -> State# s -> (# State# s, a #) 21 | writeArray# :: MutableArray# s a -> Int# -> a -> State# s -> State# s 22 | newByteArray# :: Int# -> State# s -> (# State# s, MutableByteArray# s #) 23 | indexInt8Array# :: ByteArray# -> Int# -> Int# 24 | indexInt16Array# :: ByteArray# -> Int# -> Int# 25 | ... 26 | ``` 27 | 28 | It's hard to directly use those functions because they directly manipulate `State#` token, and they distinguish different array types: boxed `Array#`, `ByteArray#`, etc. The `#` after those types imply they are special primitive types, which will be discussed later. 29 | 30 | In [Z-Data](https://hackage.haskell.org/package/Z-Data),we provide type wrappers and typeclass to unified array operations: 31 | 32 | ```haskell 33 | class Arr (arr :: * -> * ) a where 34 | -- | Mutable version of this array type. 35 | type MArr arr = (mar :: * -> * -> *) | mar -> arr 36 | -- | Make a new array with given size. 37 | newArr :: (PrimMonad m, PrimState m ~ s) => Int -> m (marr s a) 38 | -- | Make a new array and fill it with an initial value. 39 | newArrWith :: (PrimMonad m, PrimState m ~ s) => Int -> a -> m (marr s a) 40 | -- | Index mutable array in a primitive monad. 41 | readArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> m a 42 | -- | Write mutable array in a primitive monad. 43 | writeArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> a -> m () 44 | -- | Fill mutable array with a given value. 45 | setArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> Int -> a -> m () 46 | -- | Index immutable array, which is a pure operation, 47 | indexArr :: arr a -> Int -> a 48 | -- | Index immutable array in a primitive monad, this helps in situations that 49 | -- you want your indexing result is not a thunk referencing whole array. 50 | indexArrM :: (Monad m) => arr a -> Int -> m a 51 | -- | Safely freeze mutable array by make a immutable copy of its slice. 52 | freezeArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> Int -> m (arr a) 53 | -- | Safely thaw immutable array by make a mutable copy of its slice. 54 | thawArr :: (PrimMonad m, PrimState m ~ s) => arr a -> Int -> Int -> m (marr s a) 55 | -- | In place freeze a mutable array, the original mutable array can not be used 56 | -- anymore. 57 | unsafeFreezeArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> m (arr a) 58 | -- | In place thaw a immutable array, the original immutable array can not be used 59 | -- anymore. 60 | unsafeThawArr :: (PrimMonad m, PrimState m ~ s) => arr a -> m (marr s a) 61 | -- | Copy a slice of immutable array to mutable array at given offset. 62 | copyArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> arr a -> Int -> Int -> m () 63 | -- | Copy a slice of mutable array to mutable array at given offset. 64 | -- The two mutable arrays shall no be the same one. 65 | copyMutableArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> marr s a -> Int -> Int -> m () 66 | -- | Copy a slice of mutable array to mutable array at given offset. 67 | -- The two mutable arrays may be the same one. 68 | moveArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> marr s a -> Int -> Int -> m () 69 | -- | Create immutable copy. 70 | cloneArr :: arr a -> Int -> Int -> arr a 71 | -- | Create mutable copy. 72 | cloneMutableArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> Int -> m (marr s a) 73 | -- | Resize mutable array to given size. 74 | resizeMutableArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> m (marr s a) 75 | -- | Shrink mutable array to given size. This operation only works on primitive arrays. 76 | -- For boxed array, this is a no-op, e.g. 'sizeOfMutableArr' will not change. 77 | shrinkMutableArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> m () 78 | -- | Is two mutable array are reference equal. 79 | sameMutableArr :: marr s a -> marr s a -> Bool 80 | -- | Size of immutable array. 81 | sizeofArr :: arr a -> Int 82 | -- | Size of mutable array. 83 | sizeofMutableArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> m Int 84 | -- | Is two immutable array are referencing the same one. 85 | sameArr :: arr a -> arr a -> Bool 86 | ``` 87 | 88 | And we have following instances: 89 | 90 | ```haskell 91 | -- | Boxed array type, for holding Haskell ADTs. 92 | instance Arr Array a where 93 | type MArr Array = MutableArray 94 | ... 95 | -- | Boxed array type, for holding Haskell ADTs, but doesn't carry a card table. 96 | instance Arr SmallArray a where 97 | type MArr SmallArray = SmallMutableArray 98 | ... 99 | -- | Unboxed array type, for holding primitive types like Int, Word8, etc. 100 | instance Prim a => Arr PrimArray a where 101 | type MArr PrimArray = MutablePrimArray 102 | ... 103 | -- | Boxed array type, for holding boxed unlifted types, see following section. 104 | instance PrimUnlifted a => Arr UnliftedArray a where 105 | type MArr UnliftedArray = MutableUnliftedArray 106 | ... 107 | ``` 108 | 109 | If you know how `IO` works in Haskell, `PrimMonad` simply means `ST` or `IO`. But if you get confused by the `PrimMonad` constraint, please get [more details here](https://wiki.haskell.org/IO_inside). 110 | 111 | # Boxed, Unboxed 112 | 113 | For many Haskellers, using arrays may be the first time one wants to know what's the difference between boxed, unboxed types. It's important to spend some time explaining these buzzwords. 114 | 115 | In other languages, you often have to distinguish *reference* and *value*. For example, in C pointers are references to other objects. It's a memory location in hardware sense: you can use machine code to follow a reference to the memory it pointing to. While the other non-pointer types value are not memory locations, their 1-0 arrangements stands for a certain value of that type. 116 | 117 | In Haskell almost every value you see is a pointer from C's perspective, i.e. a memory location point to a heap object, for example a data type like: 118 | 119 | ```haskell 120 | data Foo = Foo Int Char 121 | foo = Foo 3 'a' 122 | ``` 123 | 124 | Are represented as: 125 | 126 | ``` 127 | foo(from registers or other boxes) 128 | | 129 | V 130 | +----+--------+---+---+ +-------------+------+ 131 | | info-table* | * | * +--->+ info-table* | 'a'# | 132 | +-------------+-+-+---+ +-------------+------+ 133 | Foo | C# (Char's constructor) 134 | V 135 | +---+---------+----+ 136 | | info-table* | 3# | 137 | +-------------+----+ 138 | I# (Int's constructor) 139 | ``` 140 | 141 | During runtime the value `foo` is a reference, and all the operations, e.g. pattern match, go through dereferencing. Values like this are called *boxed* because it's a reference to a box, i.e. heap objects with [info-table](https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/rts/storage/heap-objects#info-tables). The info-table contains many useful infomation about the box, such as how many words the boxed occupied, which constructor the box stand for, etc. 142 | 143 | The `3#` and `'a'#` above are Haskell's non-pointer value, we call values like this *unboxed* values. Unboxed values don't have info-tables, so we really can't have them directly on heap: otherwise the GC would get confused when it scans them: without infomation from info-table, it can't decide how many bytes to copy. These values are usually belong to registers or other boxes: we generate machine code to manipulate them directly. 144 | 145 | 146 | ## Boxed array 147 | 148 | Now let's consider GHC arrays, they're special heap objects provided by RTS. We have boxed arrays `MutableArray#` and `Array#` that store references to boxes: 149 | 150 | ``` 151 | +-------------+--------------+---------------------------+---+-...-+---+---+------------+ 152 | | info-table* | payload size | payload + card-table size | * | ... | * | * | card table | 153 | +-------------+--------------+---------------------------+-+-+-...-+---+---+------------+ 154 | MutableArray# | 155 | Array# V 156 | +------+------+-----+ 157 | | info-table* | ... | 158 | +-------------+-----+ 159 | Boxes, maybe a thunk 160 | Most of the operations on boxed array 161 | are lazy on its element 162 | ``` 163 | 164 | It looks quite complicated, especially the card-table part, which is used to [optimize the GC for arrays](https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/rts/storage/gc/remembered-sets). `MutableArray#`s are always kept in a generation's mutable list once it's promoted to that generation, so this optimization is important if you keep a large mutable array on heap for a long time. For small arrays, it's unnecessary to use a card-table, and GHC provides `MutableSmallArray#/SmallArray#` for that purpose. 165 | 166 | ``` 167 | +-------------+--------------+---+-...-+---+---+ 168 | | info-table* | payload size | * | ... | * | * | 169 | +-------------+--------------+---+-...-+---+---+ 170 | MutableSmallArray# 171 | SmallArray# 172 | ``` 173 | 174 | There're ADT wrappers for these types to make it easier to work with: 175 | 176 | ```haskell 177 | data MutableArray s a = MutableArray (MutableArray# s a) 178 | data Array a = Array (Array# a) 179 | 180 | data SmallMutableArray s a = SmallMutableArray (SmallMutableArray# s a) 181 | data SmallArray a = SmallArray (SmallArray# a) 182 | ``` 183 | 184 | A common pattern in Haskell is to turn `MutableArray` into an `Array` with freeze operations after creation complete, but the card-table's space is still there in case we thaw the array in place again. Generally speaking, under creation-freeze pattern, `MutableSmallArray` and `SmallArray` are more recommended since you won't keep mutable array on heap for too long. 185 | 186 | ## Unboxed array 187 | 188 | `MutableByteArray#`, `ByteArray#` are GHC's unboxed array. They don't contain pointers, and their payload do not need to be traced during GC: 189 | 190 | ``` 191 | +-------------+--------------+-------------+---+-...-+---+---+ 192 | | info-table* | payload size | 0xXXXXXXXX# | # | ... | # | # | 193 | +-------------+--------------+-------------+---+-...-+---+---+ 194 | MutableByteArray# 195 | ByteArray# 196 | ``` 197 | 198 | `ByteArray#`s can be used to encode different size non-pointer data, such as `Int` and `Word8`, `ghc-prim` provide seperated functions to work with different data types: `indexIntArray#`, `indexWord8Array#`, etc, So there're `Prim` class and `PrimArray` type to make working with different types easier: 199 | 200 | ```haskell 201 | -- types which can be stored in ByteArray# 202 | class Prim a where 203 | indexByteArray# :: ByteArray# -> Int# -> a 204 | ... 205 | 206 | -- | type indexed ByteArray# 207 | data PrimArray a = PrimArray ByteArray# 208 | 209 | indexPrimArray :: Prim a => PrimArray a -> Int -> a 210 | ... 211 | ``` 212 | 213 | # Lifted, Unlifted 214 | 215 | Another difference between types: unlifted and lifted, exists because in Haskell we have non-strict evaluation mechanism, e.g. a value `1 + 2` may have a representation like: 216 | 217 | ``` 218 | +-------------+----------+---+ +-------------+----+ 219 | | info-table* | reserved | * +--->+ info-table* | 2# | 220 | +------+------+----------+---+ +-------------+----+ 221 | | This is I# 222 | V 223 | The info-table points to (+1) code. 224 | ``` 225 | 226 | In Haskell `1 + 2` and `3` are both references, they can be used interchangeably: a function expecting an `Int` argument can accept both pointers. This is done by *entering* the heap objects. i.e. execute the entry code following the info-table. The entry code for constructors are simply returns. For thunks the code will do evaluation and the `reserved` word above is reserved exactly for evaluation result, by writing a forward pointer and change the thunk box into an indirection box. 227 | 228 | The evaluation may fail(diverged recursion, stackoverflow, etc.), so the pointer could potentially point to an undefined value, this kind of things are called *bottom* in Haskell, written as `_|_`. The intuition for this name is that all the other evaluated values have certain meaning, but bottom doesn't, it sits lower in the spectrum of determinism, concreteness, usefulness ... whatever suits your mind. Hence comes the concept of `lifted` type, i.e. types which contain `bottom` values, or more formly, inhabited by `_|_`. 229 | 230 | As you expected, most of the boxed type can be inhabited by `_|_`, the thunk may explode and terminate your program, or call `error` or `undefined` in base. And most of the unboxed types are unlifted types. e.g. It's impossible that an `Int#` would stand for an undefined value, because all 1-0 arrangements would represent a `Int#`, or put it another way: there's no way we get a bottom from `Int#`, because it doesn't have an info-table, and we can't enter it. 231 | 232 | But some boxed unlifted types do exist, e.g. `MutableArray#/Array#` are such types, their representation on heap have an info-table pointer, but they were never entered. All the primitive operations manipulating them won't enter them, and the only way to create them is via `newArray#`, `cloneArray#`, etc. 233 | 234 | To efficiently store boxed unlifted types, `Unlifted` class and `UnliftedArray` type are introduced similar to `Prim` and `PrimArray`, `UnliftedArray` store unlifted references instead of normal Haskell ADTs. Comparing `Array Array`, `UnliftedArray Array` could remove a level of redirection, i.e. remove item's `Array` box and store `Array#` directly. 235 | 236 | # More on arrays 237 | 238 | There're more details on Haskell arrays, such as pinned vs unpinned `ByteArray`s, etc. Interested readers could find all these details on [GHC wiki](https://gitlab.haskell.org/ghc/ghc/-/wikis/home), especially on RTS section. 239 | To use array properly, all you need to do is choose the proper storage type and import `Z.Data.Array`. In next section we will introduce vectors, which is simply slices of arrays. 240 | -------------------------------------------------------------------------------- /Z-Data/FFI.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | parent: Z-Data 4 | title: FFI 5 | nav_order: 5 6 | --- 7 | 8 | ## Table of contents 9 | {: .no_toc .text-delta } 10 | 11 | 1. TOC 12 | {:toc} 13 | 14 | # FFI: Foreign Function Interface 15 | 16 | The Haskell [foreign function interface](https://wiki.haskell.org/Foreign_Function_Interface) is a specification to call foreign functions(mainly C functions) from Haskell. It looks like this: 17 | 18 | + In `Foo.hs`: 19 | 20 | ```haskell 21 | foreign import ccall unsafe "foo" c_foo :: CInt -> CInt -> IO CInt 22 | ``` 23 | 24 | + In `foo.c`: 25 | 26 | ```c 27 | int foo(int x, int y){ 28 | ... 29 | } 30 | ``` 31 | 32 | + In cabal file: 33 | 34 | ```yaml 35 | ... 36 | c-sources: foo.c 37 | ... 38 | ``` 39 | 40 | With proper setup, cabal could orchestrate the compilation and give you a static linked binary. The FFI specification specify the concrete syntax in Haskell side, to ensure a successful FFI call, you have to pay attention to several aspects: 41 | 42 | + The types in Haskell and C are matched. 43 | + How to allocate memory for C side, and when to free. 44 | + The difference between unsafe FFI calls, and [safe ones](https://simonmar.github.io/bib/papers/conc-ffi.pdf). 45 | 46 | Beside above points, you'll have to use correct calling conventions(which would be ccall for most of the time), write C wrappers if you want to call C++, etc. 47 | 48 | # FFI Types 49 | 50 | Here's a table of common FFI types that can be passed between C and Haskell, and where can you find them: 51 | 52 | | C type, header | Haskell type, module | Haskell type(with `UnliftedFFITypes` enable), module | 53 | |--------------------|------------------------|------------------------------------------------------| 54 | | bool, built-in | CBool, Foreign.C.types | - | 55 | | int, built-in | CInt, Foreign.C.types | - | 56 | | uint, built-in | CUInt, Foreign.C.types | - | 57 | | long, built-in | CLong, Foreign.C.types | - | 58 | | ulong, built-in | CULong, Foreign.C.types| - | 59 | | uchar, built-in | Word8, Data.Word | - | 60 | | char, built-in | Int8, Data.Word | - | 61 | | uint8_t, stdint.h | Word8, Data.Word | - | 62 | | uint16_t, stdint.h | Word16, Data.Word | - | 63 | | uint32_t, stdint.h | Word32, Data.Word | - | 64 | | uint64_t, stdint.h | Word64, Data.Word | - | 65 | | int8_t, stdint.h | Int8, Data.Int | - | 66 | | int16_t, stdint.h | Int16, Data.Int | - | 67 | | int32_t, stdint.h | Int32, Data.Int | - | 68 | | int64_t, stdint.h | Int64, Data.Int | - | 69 | | type \*, built-in | Ptr type, Foreign.Ptr | Addr#, GHC.Prim | 70 | | HsInt, HsFFI.h | Int, Prelude | Int#, GHC.Prim | 71 | | HsWord, HsFFI.h | Word, Prelude | Word#, GHC.Prim | 72 | | HsBool, HsFFI.h | Bool, Prelude | - | 73 | | double, built-in | Double, Prelude | Double#, GHC.Prim | 74 | | float, built-in | Float, Prelude | Float#, GHC.Prim | 75 | | size_t, stddef.h | CSize, Foreign.C.types | Word#, GHC.Prim | 76 | 77 | 78 | Some types' size depend on platform(32-bit, 64-bit), e.g. the `HsInt/Int` 's size is 32 bits on 32-bit machine, or 64 bits on 64-bit ones. GHC also support passing some array types to C but not vice versa: 79 | 80 | | C type, header | Haskell type, module | Haskell type(with `UnliftedFFITypes` enable), module | 81 | | type \*, built-in | - | MutableByteArray#, GHC.Prim | 82 | | const type \*, built-in | - | ByteArray#, GHC.Prim | 83 | | StgMutArrPtrs \*(ghc<8.10), StgArrBytes \*\*, Rts.h | - | ArrayArray#, GHC.Prim | 84 | 85 | The Haskell FFI specification also support function address, which is useful when used as weak pointer's finailizers. 86 | 87 | ```haskell 88 | foreign import ccall "&free" free :: FunPtr (Ptr Word8 -> IO ()) 89 | ``` 90 | 91 | # Allocate and free 92 | 93 | It's common to have a C function needs dynamic allocated arrays, there're two solutions in general: 94 | 95 | + Allocate from C side, pass pointer back to Haskell, then use `ForeignPtr` from `Foreign.ForeignPtr` or `CPtr` from `Z.Foreign.CPtr` to wrap it, and ensure the memory will be freed when no longer needed. 96 | + Allocate from Haskell side as a GC managed heap object, then pass to C for manipulation. 97 | 98 | Usually it's recomended to use the second method, since the memory is still under GHC GC's management, so you don't have to worry about free. 99 | 100 | ## Allocate memory and pass to C 101 | 102 | There're some helpers in `Z.Foreign` to help you with allocating and passing, it's important to have some knowledge about GHC runtime system to get things right. GHC runtime is garbaged collected, and there're two types of primitive array in GHC, with the objective to minimize overall memory management cost: 103 | 104 | + Small primitive arrays created with `newPrimArray` are directly allocated on GHC heap, which can be moved by GHC garbage collector, we call these arrays *unpinned*. Allocating these array is cheap, we only need to check heap limit and bump heap pointer just like any other haskell heap objects. But we will pay GC cost , which is OK for small arrays. 105 | 106 | + Large primitive array and those created with `newPinnedPrimArray` are allocated on GHC managed memory blocks, which is also traced by garbage collector, but will never moved before freed, thus are called *pinned*. Allocating these arrays are bit more expensive since it's more like how malloc works, but we don't have to pay for GC cost. 107 | 108 | Beside the pinned/unpinned difference, we have two types of FFI calls in GHC: 109 | 110 | + Safe FFI call annotated with `safe` keyword. These calls are executed on separated OS thread, which can be running concurrently with GHC garbage collector, thus we want to make sure only pinned arrays are passed. The main use case for safe FFIs are long running functions, for example, doing IO polling. Since these calls are running on separated OS thread, haskell thread on original OS thread will not be affected. 111 | 112 | + Unsafe FFI call annotated with `unsafe` keyword. These calls are executed on the same OS thread which is running the haskell side FFI code, which will in turn stop GHC from doing a garbage collection. We can pass both pinned and unpinned arrays in this case. The use case for unsafe FFIs are short/small functions, which can be treated like a fat primitive operations, such as memcpy, memcmp. Using unsafe FFI with long running functions will effectively block GHC runtime thread from running any other haskell threads, which is dangerous. Even if you use threaded runtime and expect your haskell thread can be stolen by other OS threads, but this will not work since GHC garbage collector will refuse to run if one of the OS thread is blocked by FFI calls. 113 | 114 | Base on above analysis, we have following FFI strategy table: 115 | 116 | | FFI \ Array | pinned | unpinned | 117 | |--------------|---------------|---------------| 118 | | unsafe | directly pass | directly pass | 119 | | safe | directly pass | make a copy | 120 | 121 | Helpers in `Z.Foreign` are also divided into two categories: those with unsafe suffix to be used with `unsafe` FFI, and those with safe suffix to be used with `safe` FFI. Following is a example to try accommodate a small C function: 122 | 123 | ```c 124 | include 125 | 126 | void c_add_and_time(HsInt x, HsInt y, HsInt* add_result, HsInt* time_result){ 127 | *add_result = x + y; 128 | *time_result = x * y; 129 | } 130 | ``` 131 | 132 | ```haskell 133 | {-# LANGUAGE TypeApplications #-} 134 | {-# LANGUAGE UnliftedFFITypes #-} 135 | 136 | import Z.Foreign 137 | 138 | foreign import ccall unsafe c_add_and_time :: Int -> Int -> MBA# Int -> MBA# Int -> IO () 139 | 140 | cAddTime :: Int -> Int -> (Int, Int) 141 | cAddTime x y = do 142 | fst <$> allocPrimUnsafe @Int (\ add_result -> 143 | fst <$> allocPrimUnsafe @Int (\ time_result -> 144 | c_add_and_time x y add_result time_result)) 145 | ``` 146 | 147 | Now when you call `cAdd` in haskell: 148 | 149 | 1. `allocPrimUnsafe` function will allocate a single element `MutablePrimArray Int` to be used as `Int` pointer, here we use two `allocPrimUnsafe` to allocate memory for save add and time results. 150 | 2. The `x` and `y` parameters are passed as `Int`, and receive as `HsInt` in C. The `add_result` and `time_result` are passed as `MBA# Int`, which is type alias for `MutableByteArray#`, and received as `HsInt*` in C. 151 | 3. `allocPrimUnsafe` will auto peek result from the single element array, and return together with FFI's return value, which is ignored by `fst`. 152 | 153 | The memory allocated by `allocPrimUnsafe`, `allocPrimArrayUnsafe` and `allocPrimVectorUnsafe` is not pinned, so you can't get the address first, then pass it as `Ptr a`. The only way to pass them is to use `MutableByteArray#` and `ByteArray#` primitive types. In `Z.Foreign` module `BA# a` and `MBA# a` type alias are defined for writing convenience: 154 | 155 | ```haskell 156 | -- for const pointers 157 | type BA# a = ByteArray# 158 | -- for writable pointers 159 | type MBA# a = MutableByteArray# RealWorld 160 | ``` 161 | 162 | Since they are type aliases, the type tag is only for document. You should use proper pointer types on C side to receive them just like a `Ptr a`. Another common problem with `BA#` and `MBA#` is that they can only pass the array's first element's address, thus you have to manually pass a seperate offset parameter if you want to work with certain range of the array. This can be illustrated by following code: 163 | 164 | ```c 165 | include 166 | 167 | // here we write a wrapper to receive a slice of bytearray 168 | HsInt hs_memchr(const uint8_t *a, HsInt aoff, uint8_t b, HsInt n) { 169 | a += aoff; 170 | uint8_t *p = memchr(a, b, (size_t)n); 171 | if (p == NULL) return -1; 172 | else return (p - a); 173 | } 174 | ``` 175 | 176 | ```haskell 177 | import Z.Foreign 178 | import Data.Word 179 | import qualified Z.Data.Vector as V 180 | 181 | foreign import ccall unsafe hs_memchr :: BA# Word8 -> Int -> Word8 -> Int -> IO Int 182 | 183 | memchrBytes :: V.Bytes -> Word8 -> Int 184 | memchrBytes bs x = withPrimVector bs $ \ mba off len -> hs_memchr mba off x len 185 | ``` 186 | 187 | The safe FFI variation `withPrimVectorSafe` is simplier, the offset is directly added to the address of pinned memory, so there's only a pointer and an address parameter. It's highly recommended to use unpinned allocation if possible, because pinned allocation often lead to memory fragmentation due their garbage collection strategy, especially under a lot of small repetitive allocations. 188 | 189 | ## Null terminated strings 190 | 191 | C use a lot of null ternimated strings, i.e. `char*` where no length info is needed because it's assumed that the string always ended with a NULL ternimator. In Haskell we provide a special type for this, that is the `CBytes` type from `Z.Data.CBytes` module. Similar to `withPrimVectorUnsafe` and `WithPrimVectorSafe`, use `WithCBytesUnsafe` and `withCBytes` to pass a `CBytes` to C FFI. 192 | 193 | ```haskell 194 | > :m + Z.Data.CBytes Z.Foreign Data.Word 195 | > foreign import ccall unsafe strlen :: BA# Word8 -> IO CSize 196 | > withCBytesUnsafe "hello, world!" strlen 197 | 13 198 | > foreign import ccall safe "strlen" strlen_safe :: Ptr Word8 -> IO CSize 199 | > withCBytes "hello, world!" strlen_safe 200 | 13 201 | ``` 202 | 203 | Use `allocCBytesUnsafe`, `allocCBytes` to allocate memory to be passed to C, return `CBytes` back. 204 | 205 | ```haskell 206 | > foreign import ccall unsafe sprint :: MBA# Word8 -> BA# Word8 -> Int -> IO () 207 | > allocCBytesUnsafe 32 $ \ dest -> withCBytesUnsafe "result is %d" $ \ fmt -> sprintf dest fmt 3 208 | ("result is 3",()) 209 | ``` 210 | 211 | To get `CBytes` from null terminated `char*`, use `fromCString` or `peekMBACBytes`. If the memory is allocated from C, it's recommend to use `bracket` to ensure memory get freed. 212 | 213 | ## Unaligned Class 214 | 215 | Sometime the memory passed to C are written with some struct fields, you could use `Storable` machinery from `Foreign.Storable` to peek/poke data from/to the memory, but `Storable` use `Ptr a`, so it requires pinned memory whose address is fixed. In [Z-Data](https://hackage.haskell.org/package/Z-Data) an alternative way to do this is to use `Unaligned` class from `Z.Data.Array.Unaligned` module. Here's a code sample from [Z-IO](https://hackage.haskell.org/package/Z-IO): 216 | 217 | ```c 218 | // definitions from libuv 219 | typedef struct uv_passwd_s { 220 | char* username; 221 | long uid; 222 | long gid; 223 | char* shell; 224 | char* homedir; 225 | } uv_passwd_t; 226 | 227 | int uv_os_get_passwd(uv_passwd_t* pwd); 228 | void uv_os_free_passwd(uv_passwd_t* pwd); 229 | ``` 230 | 231 | ```haskell 232 | import Z.Foreign 233 | import Z.Data.Array.Unaligned 234 | import Z.IO.Exception 235 | import Z.Data.CBytes 236 | 237 | -- | Data type for password file information. 238 | data PassWD = PassWD 239 | { passwd_username :: CBytes 240 | , passwd_uid :: UID 241 | , passwd_gid :: GID 242 | , passwd_shell :: CBytes 243 | , passwd_homedir :: CBytes 244 | } deriving (Eq, Ord, Show, Read) 245 | 246 | foreign import ccall unsafe uv_os_get_passwd :: MBA## PassWD -> IO CInt 247 | foreign import ccall unsafe uv_os_free_passwd :: MBA## PassWD -> IO () 248 | 249 | -- | Gets a subset of the password file entry for the current effective uid (not the real uid). 250 | -- 251 | -- The populated data includes the username, euid, gid, shell, and home directory. 252 | -- On non-Windows systems, all data comes from getpwuid_r(3). 253 | -- On Windows, uid and gid are set to -1 and have no meaning, and shell is empty. 254 | getPassWD :: HasCallStack => IO PassWD 255 | getPassWD = bracket 256 | (do mpa@(MutableByteArray mba##) <- newByteArray (#size uv_passwd_t) 257 | throwUVIfMinus_ (uv_os_get_passwd mba##) 258 | return mpa) 259 | (\ (MutableByteArray mba##) -> uv_os_free_passwd mba##) 260 | (\ (MutableByteArray mba##) -> do 261 | username <- fromCString =<< peekMBA mba## (#offset uv_passwd_t, username) 262 | uid <- fromIntegral <$> (peekMBA mba## (#offset uv_passwd_t, uid) :: IO CLong) 263 | gid <- fromIntegral <$> (peekMBA mba## (#offset uv_passwd_t, gid) :: IO CLong) 264 | shell <- fromCString =<< peekMBA mba## (#offset uv_passwd_t, shell) 265 | homedir <- fromCString =<< peekMBA mba## (#offset uv_passwd_t, homedir) 266 | return (PassWD username uid gid shell homedir)) 267 | ``` 268 | 269 | Note above Haskell code use [hsc2hs](https://hackage.haskell.org/package/hsc2hs) to get constants(struct size, field offset, etc.) from C code, `##` is `#` escaped in `.hsc` file. `uv_os_get_passwd` asks for a `uv_passwd_t*` struct pointer which must a valid writable memory location, so in Haskell we manually allocate memory with `newByteArray` and pass the `MutableByteArray#` as a pointer. After FFI is complete, we use `peekMBA` from `Unaligned` class to read the `char*` pointer, then use 270 | `fromCString` from `Z.Data.CBytes` to copy the result. After copy completes, `uv_os_free_passwd` is called to free any memory allocated in C code. 271 | 272 | ## CPtr 273 | 274 | For some cases, allocation from C is mandatory, e.g. you can't get size to allocate(hidden from C). We will use `CPtr` as an example to illustrate how do we keep reference to some opaque C struct. 275 | 276 | First you have to prepare a pair of allocation and free functions: 277 | 278 | ```c 279 | struct foo_s{ 280 | ... 281 | }; 282 | 283 | typedef struct foo_s foo_t; 284 | 285 | // the allocation function 286 | foo_t *new_foo(int x); 287 | 288 | // the free function 289 | void destroy_foo(foo_t* foo); 290 | 291 | // some function need foo_t 292 | void bar(foo_t* foo); 293 | ``` 294 | 295 | Now we import these functions in Haskell: 296 | 297 | ```haskell 298 | import Z.Foreign 299 | import Z.Foreign.CPtr 300 | 301 | data Foo 302 | 303 | foreign import ccall unsafe new_foo :: CInt -> IO (Ptr Foo) 304 | foreign import ccall unsafe "&destroy_foo" destroy_foo :: FunPtr (Ptr Foo -> IO ()) 305 | 306 | newFoo :: Int -> IO (CPtr Foo) 307 | newFoo x = newCPtr' (new_foo (fromIntegral x)) destroy_foo 308 | 309 | -- use `withCPtr` if you want to get foo_t pointer. 310 | foreign import ccall unsafe bar :: Ptr Foo -> IO () 311 | ... 312 | foo <- newFoo ... 313 | withCPtr foo bar 314 | ... 315 | 316 | ``` 317 | 318 | We encapsulate the C strcut `foo_t` in a Haskell heap object `CPtr Foo` with following steps: 319 | 320 | + Define a type tag `Foo`. 321 | + Import allocation and free functions, the free function should be imported as a `FunPtr` with its address. 322 | + Use `newCPtr'` from `Z.Foreign.CPtr` to attach the free function as finalizer, which will be call once the `CPtr Foo` is collected. 323 | + `withCPtr` will get the pointer back and ensure it will not get collected during the FFI computation. 324 | 325 | # Exception handling 326 | 327 | C libraries usually have some conventions on error handling, e.g. return a minus error code to indicate exception case. It's recommend to define an exception type then provide helpers. Following is an example in [Z-Botan](https://github.com/ZHaskell/z-botan): 328 | 329 | * Import Error code in hsc file: 330 | 331 | ```haskell 332 | pattern BOTAN_FFI_ERROR_UNKNOWN_ERROR :: CInt 333 | pattern BOTAN_FFI_SUCCESS = (#const BOTAN_FFI_SUCCESS) 334 | pattern BOTAN_FFI_INVALID_VERIFIER = (#const BOTAN_FFI_INVALID_VERIFIER) 335 | pattern BOTAN_FFI_ERROR_INVALID_INPUT = (#const BOTAN_FFI_ERROR_INVALID_INPUT) 336 | ... 337 | ``` 338 | 339 | * Define an extensible exception type. 340 | 341 | ```haskell 342 | data SomeBotanException = forall e . Exception e => SomeBotanException e 343 | 344 | instance Show SomeBotanException where 345 | show (SomeBotanException e) = show e 346 | 347 | instance Exception SomeBotanException 348 | 349 | botanExceptionToException :: Exception e => e -> SomeException 350 | botanExceptionToException = toException . SomeBotanException 351 | 352 | botanExceptionFromException :: Exception e => SomeException -> Maybe e 353 | botanExceptionFromException x = do 354 | SomeBotanException a <- fromException x 355 | cast a 356 | 357 | #define BotanE(e) data e = e CInt CallStack deriving Show; \ 358 | instance Exception e where \ 359 | { toException = botanExceptionToException \ 360 | ; fromException = botanExceptionFromException \ 361 | } 362 | 363 | BotanE(InvalidVerifier) 364 | BotanE(InvalidInput) 365 | BotanE(BadMac) 366 | ... 367 | ``` 368 | 369 | * And provide helpers for FFI code: 370 | 371 | ```haskell 372 | throwBotanIfMinus :: (HasCallStack, Integral a) => IO a -> IO a 373 | throwBotanIfMinus f = do 374 | r <- f 375 | when (r < 0) (throwBotanError_ (fromIntegral r) callStack) 376 | return r 377 | 378 | throwBotanIfMinus_ :: (HasCallStack, Integral a) => IO a -> IO () 379 | throwBotanIfMinus_ f = do 380 | r <- f 381 | when (r < 0) (throwBotanError_ (fromIntegral r) callStack) 382 | 383 | throwBotanError :: HasCallStack => CInt -> IO () 384 | throwBotanError r = throwBotanError_ r callStack 385 | 386 | throwBotanError_ :: CInt -> CallStack -> IO () 387 | throwBotanError_ r cs = case r of 388 | BOTAN_FFI_ERROR_INVALID_INPUT -> throw (InvalidInput r cs) 389 | BOTAN_FFI_ERROR_BAD_MAC -> throw (BadMac r cs) 390 | BOTAN_FFI_ERROR_INSUFFICIENT_BUFFER_SPACE -> throw (InsufficientBufferSpace r cs) 391 | ... 392 | ``` 393 | 394 | * In FFI code, use helper to throw exception when needed: 395 | 396 | ```haskell 397 | foreign import ccall unsafe hs_botan_mac_update :: BotanStructT -> BA## Word8 -> Int -> Int-> IO CInt 398 | 399 | updateMAC :: HasCallStack => MAC -> V.Bytes -> IO () 400 | updateMAC (MAC bts _ _) bs = 401 | withBotanStruct bts $ \ pbts -> 402 | withPrimVectorUnsafe bs $ \ pbs off len -> 403 | throwBotanIfMinus_ (hs_botan_mac_update pbts pbs off len) 404 | ``` 405 | -------------------------------------------------------------------------------- /Z-Data/JSON.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | parent: Z-Data 4 | title: JSON 5 | nav_order: 4 6 | --- 7 | 8 | ## Table of contents 9 | {: .no_toc .text-delta } 10 | 11 | 1. TOC 12 | {:toc} 13 | 14 | Using `Z.Data.JSON` module to get human-readable serialization/deserialization. The easiest way to use the library is to define a target data type, deriving 15 | `Generic` and `JSON` instances, which provides: 16 | 17 | * `fromValue` to convert `Value` to Haskell values. 18 | * `toValue` to convert Haskell values to `Value`. 19 | * `encodeJSON` to directly write Haskell value into JSON bytes. 20 | 21 | ```haskell 22 | class JSON a where 23 | ... 24 | toValue :: a -> Value 25 | fromValue :: Value -> Converter a 26 | encodeJSON :: a -> B.Builder () -- `Z.Data.Builder` as `B` 27 | ... 28 | ``` 29 | 30 | For example, 31 | 32 | ```haskell 33 | {-# LANGUAGE DeriveGeneric, DeriveAnyClass, DerivingStrategies #-} 34 | 35 | import GHC.Generics (Generic) 36 | import qualified Z.Data.Builder as Builder 37 | import qualified Z.Data.JSON as JSON 38 | import qualified Z.Data.Text as T 39 | 40 | data Person = Person {name :: T.Text, age :: Int} 41 | deriving (Show, Generic) 42 | deriving anyclass (JSON.JSON) 43 | ``` 44 | 45 | We can now encode & decode JSON like this: 46 | 47 | ```haskell 48 | > JSON.toValue (Person{ name="Alice", age=16 }) 49 | Object [("name",String "Alice"),("age",Number 16.0)] 50 | > JSON.encode (Person{ name="Alice", age=16 }) 51 | [123,34,110,97,109,101,34,58,34,65,108,105,99,101,34,44,34,97,103,101,34,58,49,54,125] 52 | > JSON.encodeText (Person{ name="Alice", age=16 }) 53 | "{\"age\":16,\"name\":\"Alice\"}" 54 | > JSON.decodeText' "{\"age\":16,\"name\":\"Alice\"}" :: Either JSON.DecodeError Person 55 | Right (Person {age = 16, name = "Alice"}) 56 | ``` 57 | 58 | The `Generic` based instances convert Haskell data with following rules: 59 | 60 | * Constructors without payloads are encoded as JSON String, `data T = A | B` are encoded as `"A"` or `"B"`. 61 | * Single constructor are ingored if there're payloads, `data T = T ...`, `T` is ingored: 62 | * Records are encoded as JSON object. `data T = T{k1 :: .., k2 :: ..}` are encoded as `{"k1":...,"k2":...}`. 63 | * Plain product are encoded as JSON array. `data T = T t1 t2` are encoded as "[x1,x2]". 64 | * Single field plain product are encoded as it is, i.e. `data T = T t` are encoded as "t" just like its payload. 65 | * Multiple constructors are convert to single key JSON object if there're payloads: 66 | * Records are encoded as JSON object like above. `data T = A | B {k1 :: .., k2 :: ..}` are encoded as 67 | `{"B":{"k1":...,"k2":...}}` in `B .. ..` case, or `"A"` in `A` case. 68 | * Products inside a sum type are similar to above, wrapped by an outer single-key object layer marking which constructor used during data construction. 69 | 70 | These rules apply to user defined ADTs, but some built-in instances have different behaviours, namely: 71 | 72 | * `Maybe a` are encoded as JSON `null` in `Nothing` case, or directly encoded to its payload in `Just` case. 73 | * `[a]` are encoded to JSON array, `[Char]` are encoded into JSON string. 74 | * `NonEmpty`, `Vector`, `PrimVector`, `HashSet`, `FlatSet`, `FlatIntSet` are also encoded to JSON array. 75 | * `Bytes` are encoded into JSON text using base64 encoding. 76 | * `HashMap`, `FlatMap`, `FlatIntMap` are encoded to JSON object. 77 | 78 | ## Custom Settings 79 | 80 | There're some modifying options if you providing a custom `Settings`, which 81 | allow you to modify field name or constructor name, but please *DO NOT* 82 | produce control characters during your modification, since we assume field 83 | labels and constructor name won't contain them, thus we can save an extra 84 | escaping pass. To use custom `Settings` just write: 85 | 86 | ```haskell 87 | data T = T {fooT :: Int, barT :: [Int]} deriving Generic 88 | instance JSON.JSON T where 89 | -- You can omit following definitions if you don't need to change settings 90 | toValue = JSON.gToValue JSON.defaultSettings{ JSON.fieldFmt = JSON.snakeCase } . from 91 | encodeJSON = JSON.gEncodeJSON JSON.defaultSettings{ JSON.fieldFmt = JSON.snakeCase } . from 92 | ``` 93 | 94 | ```haskell 95 | > JSON.toValue (T 0 [1,2,3]) 96 | Object [("foo_t",Number 0.0),("bar_t",Array [Number 1.0,Number 2.0,Number 3.0])] 97 | ``` 98 | 99 | ## Manually Writing Instances 100 | 101 | You can write `JSON` instances by hand if the `Generic` based one doesn't suit you. 102 | Here is an example similar to aeson's. 103 | 104 | ```haskell 105 | import qualified Z.Data.Text as T 106 | import qualified Z.Data.Vector as V 107 | import qualified Z.Data.Builder as B 108 | import qualified Z.Data.JSON as JSON 109 | import Z.Data.JSON ((.:), (.=), (.!), JSON(..)) 110 | 111 | data Person = Person { name :: T.Text , age :: Int } deriving Show 112 | 113 | instance JSON Person where 114 | fromValue = JSON.withFlatMapR "Person" $ \ v -> Person 115 | <$> v .: "name" 116 | <*> v .: "age" 117 | 118 | toValue (Person n a) = JSON.object ["name" .= n, "age" .= a] 119 | 120 | encodeJSON (Person n a) = JSON.object' $ ("name" .! n <> "age" .! a) 121 | ``` 122 | 123 | ```haskell 124 | > toValue (Person "Joe" 12) 125 | Object [("name",String "Joe"),("age",Number 12.0)] 126 | > JSON.convert' `Person . JSON.Object $ V.pack [("name",JSON.String "Joe"),("age",JSON.Number 12.0)] 127 | Right (Person {name = "Joe", age = 12}) 128 | > JSON.encodeText (Person "Joe" 12) 129 | "{"name":"Joe","age":12}" 130 | ``` 131 | 132 | The `Value` type is different from aeson's one in that we use `Vector (Text, Value)` to represent JSON objects, thus 133 | we can choose different strategies on key duplication, the lookup map type, etc. so instead of a single `withObject`, 134 | we provide `withHashMap`, `withHashMapR`, `withFlatMap` and `withFlatMapR` which use different lookup map type, and different key order priority. Most of the time `FlatMap` is faster than `HashMap` since we only use the lookup map once, the cost of constructing a `HashMap` is higher. If you want to directly work on key-values, `withKeyValues` provide key-values vector access. 135 | 136 | There're some useful tools to help write encoding code in `Z.Data.JSON.Builder` module, such as a JSON string escaping tool, etc. If you don't particularly care for fast encoding, you can also use `toValue` together with value builder, the overhead is usually very small. 137 | -------------------------------------------------------------------------------- /Z-Data/Parser-and-Builder.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | parent: Z-Data 4 | title: Parser and Builder 5 | nav_order: 3 6 | --- 7 | 8 | ## Table of contents 9 | {: .no_toc .text-delta } 10 | 11 | 1. TOC 12 | {:toc} 13 | 14 | # Parser Monad 15 | 16 | The `Parser` from `Z.Data.Parser` is designed for high performance resumable binary parsing and simple textual parsing, such as network protocols, JSON, etc. Write a parser by using basic parsers from `Z.Data.Parser` such as `takeWhile`, `int`, etc. 17 | 18 | ```haskell 19 | import qualified Z.Data.Parser as P 20 | import Z.Data.ASCII 21 | 22 | data Date = Date { year :: Int, month :: Int, day :: Int } deriving Show 23 | 24 | dateParser :: P.Parser Date 25 | dateParser = do 26 | y <- P.int 27 | P.word8 HYPHEN 28 | m <- P.int 29 | P.word8 HYPHEN 30 | d <- P.int 31 | return $ Date y m d 32 | ``` 33 | 34 | `Parser` in Z works directly on `Bytes`: 35 | 36 | ```haskell 37 | > P.parse' dateParser "2020-12-12" 38 | Date 2020 12 12 39 | > P.parse' dateParser "2020-JAN-12" 40 | Left ["Z.Data.Parser.Numeric.int","Z.Data.Parser.Base.takeWhile1: no satisfied byte at [74,65,78,45,49,50]"] 41 | > P.parse dateParser "2020-12-12, 08:00" 42 | ([44,32,48,56,58,48,48], Right (Date {year = 2020, month = 12, day = 12})) 43 | > P.parseChunk dateParser "2020-" 44 | Partial _ 45 | > let (P.Partial f) = P.parseChunk dateParser "2020-" 46 | > let (P.Partial f') = f "05-05" -- incrementally provide input 47 | > f' "" -- push empty chunk to signal EOF 48 | Success Date {year = 2020, month = 5, day = 5} 49 | ``` 50 | 51 | Binary protocol can use `decodePrim/decodePrimLE/decodePrimBE` with `TypeApplications` extension, let's say you want to implement a [MessagePack str format](https://github.com/msgpack/msgpack/blob/master/spec.md#str-format-family) parser: 52 | 53 | ```haskell 54 | import Data.Bits 55 | import Data.Word 56 | import qualified Z.Data.Parser as P 57 | import qualified Z.Data.Text as T 58 | 59 | msgStr :: P.Parser T.Text 60 | msgStr = do 61 | tag <- P.anyWord8 62 | case tag of 63 | t | t .&. 0xE0 == 0xA0 -> str (t .&. 0x1F) 64 | 0xD9 -> str =<< P.anyWord8 65 | 0xDA -> str =<< P.decodePrimBE @Word16 66 | 0xDB -> str =<< P.decodePrimBE @Word32 67 | _ -> P.fail' "unknown tag" 68 | where 69 | str !l = do 70 | bs <- P.take (fromIntegral l) 71 | case T.validateMaybe bs of 72 | Just t -> return (Str t) 73 | _ -> P.fail' "illegal UTF8 Bytes" 74 | ``` 75 | 76 | Comparing to `parsec` or `megaparsec`, `Parser` in Z provides limited error reporting, and do not support using as a monad transformer. But provides an instance of `PrimMonad`, which allows some limited effects, such as mutable variables and array operations. 77 | 78 | ## Auto Backtracked Alternative 79 | 80 | Similar to `attoparsec`, `Parser` in Z always backtrack when used with `<|>` (`Alternative` instance), that means the failed branch will not consume any input without doing anything special: 81 | 82 | ```haskell 83 | import Control.Applicative 84 | ... 85 | p = fooParser <|> barParser <|> quxParser 86 | ``` 87 | 88 | In above code, if any parser failed, the next parser is retried from the beginning of the input. Backtracking is not always needed though, it recommended to use `peek` 89 | or `peekMaybe` if the syntax or protocol can be parsed as LL(1) grammer since it's faster than backtracking. 90 | 91 | # Builder Monad 92 | 93 | The `Builder` from `Z.Data.Builder` is the reverse process of parsing, i.e. writing Haskell data types to `Bytes`, aka *Writer* monad. The usage is very similiar to `Parser`: 94 | 95 | ```haskell 96 | import qualified Z.Data.Builder as B 97 | import Z.Data.ASCII 98 | 99 | data Date = Date { year :: Int, month :: Int, day :: Int } deriving Show 100 | 101 | dataBuilder :: Date -> B.Builder () 102 | dataBuilder (Date y m d) = do 103 | int' y 104 | B.word8 HYPHEN 105 | int' m 106 | B.word8 HYPHEN 107 | int' d 108 | where 109 | int' x | x > 10 = B.int x 110 | | otherwise = B.word8 DIGIT_0 >> B.int x 111 | ``` 112 | 113 | Underhood a `Builder` records a buffer writing function, thus can be composed quickly. Use `build/buildText` to run a `Builder`, which produces `Bytes` and `Text` respectively: 114 | 115 | ```haskell 116 | > B.build (dataBuilder $ Date 2020 11 1) 117 | [50,48,50,48,45,49,49,45,48,49] 118 | > B.buildText (dataBuilder $ Date 2020 11 1) 119 | "2020-11-01" 120 | ``` 121 | 122 | Binary `Builder` can be constructed with `encodePrim/encodePrimLE/encodePrimBE`, let's still take [MessagePack str format](https://github.com/msgpack/msgpack/blob/master/spec.md#str-format-family) as an example: 123 | 124 | ```haskell 125 | import Data.Bits 126 | import Data.Word 127 | import qualified Z.Data.Builder as B 128 | import qualified Z.Data.Text as T 129 | import qualified Z.Data.Vector as V 130 | 131 | msgStr :: T.Text -> B.Builder () 132 | msgStr t = do 133 | let bs = T.getUTF8Bytes t 134 | case V.length bs of 135 | len | len <= 31 -> B.word8 (0xA0 .|. fromIntegral len) 136 | | len < 0x100 -> B.encodePrim (0xD9 :: Word8, fromIntegral len :: Word8) 137 | | len < 0x10000 -> B.encodePrim (0xDA :: Word8, BE (fromIntegral len :: Word16)) 138 | | otherwise -> B.encodePrim (0xDB :: Word8, BE (fromIntegral len :: Word32)) 139 | B.bytes bs 140 | ``` 141 | 142 | Note that we directly use `Unalign a, Unalign b => Unalign (a, b)` instance to write serveral primitive types in a row, The `Unalign` class provide basic reading and writing facilities to read primitive types from and to raw bytes(with unaligned offset). 143 | 144 | ## Text formatting with `Builder` 145 | 146 | Different from other standard libraries which usually provide `printf` or similar, in Z directly using `Builder` to format text is recommended: 147 | 148 | ```haskell 149 | -- Similar to print("The result are %d, %d", x, y) 150 | -- If you can ensure all Builders will write UTF-8 encoded bytes, 151 | -- you can use unsafeBuildText to save a validation 152 | 153 | B.unsafeBuildText $ do 154 | "The result are " >> B.double x >> ", " >> B.double y 155 | 156 | -- Or use do syntax 157 | 158 | B.unsafeBuildText $ do 159 | "The result are " 160 | B.double x 161 | ", " 162 | B.double y 163 | ... 164 | 165 | ``` 166 | 167 | The strength of monadic `Builder` is that you can reuse all control structure from `Control.Monad`, such as conditions, loops, etc. `Builder ()` has an `IsString` instance which can wrap writing literals in UTF-8 encoding, with some modifications: 168 | 169 | + `\NUL` will be written as `\xC0\x80`. 170 | + `\xD800` ~ `\xDFFF` will be encoded in three bytes as normal UTF-8 codepoints. 171 | 172 | It's safe to put an string literal inside a `unsafeBuildText` as long as you don't write `\0` or `\55296` ~ `\57343`. 173 | -------------------------------------------------------------------------------- /Z-Data/Vector-Bytes-Text.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | parent: Z-Data 4 | title: Vector and Text 5 | nav_order: 2 6 | --- 7 | 8 | ## Table of contents 9 | {: .no_toc .text-delta } 10 | 11 | 1. TOC 12 | {:toc} 13 | 14 | # Vector: array slices 15 | 16 | In Z.Haskell, we use immutable arrays a lot. And we have two main array slice types in `Z.Data.Vector`: 17 | 18 | ```haskell 19 | -- The payloads are array offset and length 20 | data Vector a = Vector (SmallArray a) Int Int 21 | data PrimVector a = PrimVector (PrimArray a) Int Int 22 | ... 23 | ``` 24 | 25 | These types can support efficiently slicing operations(`take`, `drop`, `break`, etc.), To abstract these types, The `Vec` class is introduced: 26 | 27 | ```haskell 28 | class (Arr (IArray v) a) => Vec v a where 29 | -- | Vector's immutable array type 30 | type IArray v :: Type -> Type 31 | -- | Get underline array and slice range(offset and length). 32 | toArr :: v a -> (IArray v a, Int, Int) 33 | -- | Create a vector by slicing an array(with offset and length). 34 | fromArr :: IArray v a -> Int -> Int -> v a 35 | ``` 36 | 37 | `Vector` and `PrimVector` are obvious instances, but plain array types are also `Vec`'s instances with `O(n)` `fromArr`, for example: 38 | 39 | ```haskell 40 | instance Prim a => Vec PrimArray a where 41 | type IArray PrimArray = PrimArray 42 | toArr arr = (arr, 0, sizeofArr arr) 43 | fromArr = fromArray 44 | 45 | -- | Construct a slice from an array by copying(if neccessary). 46 | fromArray :: Arr arr a => arr a -> Int -> Int -> arr a 47 | fromArray arr offset len | offset == 0 && sizeofArr arr == len = arr 48 | | otherwise = cloneArr arr offset len 49 | ``` 50 | 51 | These instances give `Vec` great flexiblity: if your combinators are implemented with `Vec`, it will work on various slicing types, and plain array types, for example, the `map'` combinator from `Z.Data.Vector`: 52 | 53 | ```haskell 54 | map' :: forall u v a b. (Vec u a, Vec v b) => (a -> b) -> u a -> v b 55 | ``` 56 | 57 | Note the input and output `Vec` type is not required to be the same, which means applications like the following are possible: 58 | 59 | ```haskell 60 | data User = User { ..., age :: Int, ...} 61 | 62 | -- | Take all user's age and pack them into a `PrimArray`. 63 | takeAllAges :: Vector User -> PrimArray Int 64 | takeAllAges = map' age 65 | ``` 66 | 67 | The above functions will work efficiently as expected, `User`'s age will be directly written into a new `PrimArray` with no extra copies. 68 | 69 | All functions in `Z.Data.Vector` are implemented using `Vec` constraint, sometimes this will lead to type inference failures, so it's recommended to enable `TypeApplications` extension and add necessary type annotations: 70 | 71 | ```haskell 72 | {-# LANUAGE TypeApplications #-} 73 | 74 | import qualified Z.Data.Vector as V 75 | ... 76 | -- if you don't write annotations, GHC may get confused 77 | -- which type of vectors you want to pack. 78 | let v = V.pack @PrimVector @Word [1..1024] 79 | ... 80 | ``` 81 | 82 | # Bytes: Word8 vector 83 | 84 | One of the most commonly used vector types is `type Bytes = PrimVector Word8`, which is used to represent binary data. To make writing `Bytes` literals more convenient, `Bytes` is an instance to `IsString`: 85 | 86 | ```haskell 87 | > import qualified Z.Data.Vector as V 88 | > :set -XOverloadedStrings 89 | > "hello, world" :: V.Bytes 90 | "hello, world" 91 | > "你好世界" :: V.Bytes -- unicode literals will be get choped! 92 | [96,125,22,76] 93 | ``` 94 | 95 | In the above example, unicode literals "你好世界" do not produce UTF-8 encoded byte vector as one might expect, you have to use `Text` to get that behaviour: 96 | 97 | ```haskell 98 | > import qualified Z.Data.Text as T 99 | > T.getUTF8Bytes "你好世界" 100 | [228,189,160,229,165,189,228,184,150,231,149,140] 101 | ``` 102 | 103 | Note that `Bytes`'s `Show` instance is not specialized to show ASCII characters. You can use functions from `Z.Data.Vector.Hex` and `Z.Data.Vector.Base64` to manually encode binary `Bytes` into ASCII strings: 104 | 105 | ```haskell 106 | > import Z.Data.Vector.Hex 107 | > hexEncode True "hello world" 108 | "68656C6C6F20776F726C64" 109 | > import Z.Data.Vector.Base64 110 | > base64Encode "hello wolrd" 111 | "aGVsbG8gd29scmQ=" 112 | ``` 113 | 114 | In `Z-Data` we use incoherent instances to handle `Bytes`'s JSON instance(using base64 encoding): 115 | 116 | ```haskell 117 | > V.pack [0..127] :: V.Bytes 118 | [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127] 119 | > import qualified Z.Data.JSON as JSON 120 | > JSON.encode (V.pack [0..127] :: V.Bytes) 121 | "\"AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8=\"" 122 | > JSON.encode (V.pack [0..127] :: V.PrimVector Int) 123 | "[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127]" 124 | ``` 125 | 126 | Besides special instances, many functions in `Z.Data.Vector` will leverage rewrite rules to use more efficient instructions when used with `Bytes`, such as `break`, `takeWhile`, etc. But these optimizations should have no visible difference for users. 127 | 128 | # Text: UTF-8 encoded Bytes 129 | 130 | The `Text` type from `Z.Data.Text` is a `newtype` wrapper around `Bytes` which provides UTF-8 encoding guarantee, you should construct a `Text` using `validate` or `validateMaybe` or string literals only: 131 | 132 | ```haskell 133 | > import qualified Z.Data.Text as T 134 | > T.validate "hello world" 135 | "hello world" 136 | > T.validate "hello world, \128" 137 | *** Exception: InvalidUTF8Exception [("validate",SrcLoc {srcLocPackage = "interactive", srcLocModule = "Ghci12", srcLocFile = "", srcLocStartLine = 52, srcLocStartCol = 1, srcLocEndLine = 52, srcLocEndCol = 31})] 138 | > "你好世界" :: T.Text 139 | "你好世界" 140 | ``` 141 | 142 | In Haskell, `String`s are allowed to have illegal UTF-8 code points so that any UNIX file path can be encoded in `String`, but in Z.Haskell we have a special type for file path. `Text` will convert illegal code points in case of string literals: 143 | 144 | ```haskell 145 | > "hello world, \55296" :: T.Text 146 | "hello world, �" 147 | > T.getUTF8Bytes "hello world, \55296" -- surrogates 148 | [104,101,108,108,111,32,119,111,114,108,100,44,32,239,191,189] 149 | ``` 150 | 151 | The `239, 191, 189` bytes sequence is the replacement char `\U+FFFD`'s UTF-8 encoding form. By providing limited ways of creating `Text`, combinators in `Z.Data.Text` can safely assume `Text` only contain UTF-8 encoded code points. 152 | 153 | `Z.Data.Text` also provide some unicode processing capabilities, such as normalization, case-mapping, etc: 154 | 155 | ```haskell 156 | > T.validate "re\204\129sume\204\129" 157 | > "résumé" 158 | > T.normalize (T.validate "re\204\129sume\204\129") 159 | > "résumé" 160 | > T.getUTF8Bytes $ (T.validate "re\204\129sume\204\129") 161 | [114,101,204,129,115,117,109,101,204,129] 162 | > T.getUTF8Bytes $ T.normalize (T.validate "re\204\129sume\204\129") 163 | [114,195,169,115,117,109,195,169] 164 | > T.toUpper "διακριτικός" 165 | "ΔΙΑΚΡΙΤΙΚΌΣ" 166 | ``` 167 | 168 | Regex expressions based on [re2](https://github.com/google/re2) regex engine is also provided: 169 | 170 | ```haskell 171 | > import qualified Z.Data.Text.Regex as RE 172 | > let emailRegex = RE.regex "([a-z0-9_\\.-]+)@([\\da-z\\.-]+)\\.([a-z\\.]{2,6})" 173 | > RE.match emailRegex "hello@world.com" 174 | ("hello@world.com",[Just "hello",Just "world",Just "com"],"") 175 | > RE.match emailRegex "foobar" 176 | ("",[],"foobar") 177 | > RE.replace emailRegex True "hello@world.com, foo@bar.com" "x@y.z" 178 | "x@y.z, x@y.z" 179 | > RE.extract emailRegex "hello@world.com" "http://\\2.\\3" 180 | "http://world.com" 181 | ``` 182 | 183 | # Print to Text 184 | 185 | `Z.Data.Text` module provide `toText` quickly convert a data type to `Text` based on `Print` class, it's similar to `Show` using `Generic` support: 186 | 187 | ``` 188 | > import GHC.Generics 189 | > import qualified Z.Data.Text as T 190 | > data Date = Date { year :: Int, month :: Int, day :: Int } deriving (Generic, T.Print) 191 | > T.toText $ Date 2020 1 12 192 | "Date {year = 2020, month = 1, day = 12}" 193 | ``` 194 | 195 | It's recommend to deriving `Print` for your data types to get fast text conversion, though current GHC compile `Generic` fair slowly. 196 | 197 | 198 | # List fusion 199 | 200 | `Vec` instances and `Text` support the [build-foldr](https://wiki.haskell.org/Correctness_of_short_cut_fusion#foldr.2Fbuild) fusion by providing fusion rules enabled `pack/unpack`, the following code should iterate the input vector and produce the output vector in a single pass rather than producing an intermediate list: 201 | 202 | ```haskell 203 | f :: V.Vector a -> V.Vector b 204 | f = V.pack . filter h . map g . V.unpack 205 | ``` 206 | 207 | This is different from the following code, which will produce an intermediate vector (may not be slower though): 208 | 209 | ```haskell 210 | f :: V.Vector a -> V.Vector b 211 | f = V.filter h . V.map' g 212 | ``` 213 | 214 | When working with sequential data, it's recommended to choose vectors as the final representation of data, since it's more compact and GC friendly. 215 | 216 | # Type cheatsheet 217 | 218 | [Z-Data](https://hackage.haskell.org/package/Z-Data) simplified a lot of types already, but in case of getting confused, here's a type cheat sheet: 219 | 220 | ``` 221 | +---------------------------------------------------------+ 222 | | Vec class | + Use Array to save ADTs. 223 | | | + Use SmallArray if you don't 224 | | +----------------------+ +-----------------------+ | often mutate. 225 | | | Arr class | | Slice types | | + Use PrimArray to save 226 | | | | | support O(1) slicing | | primitive types like 227 | | | +---------+ | | with offset/length | | Int or Word8. 228 | | | | Array a | | | | | + Use UnliftedArray to save 229 | | | +---------+ | | | | unlifted types like 230 | | | | | | | IORef or Array. 231 | | | +---------------+ | | | | 232 | | | |UnliftedArray a| | | | | + Use slice types to get O(1) 233 | | | +---------------+ | | | | slicing operations. 234 | | | | | | | + Use Bytes to represent 235 | | | +--------------+ | | +----------+ | | binary data. 236 | | | | SmallArray a +->arrVec->+ Vector a | | | 237 | | | +--------------+ | | +----------+ | | + Use Text to represent 238 | | | | | | | UTF-8 encoded bytes. 239 | | | +-------------+ | | +--------------+ | | 240 | | | | PrimArray a +->arrVec->-+ PrimVector a | | | 241 | | | +-------------+ | | +--------------+---+ | | 242 | | | | | | Bytes | | | 243 | | | | | | PrimVector Word8 | | | 244 | | | | | +-------+----------+ | | 245 | | +----------------------+ +----------V------------+ | 246 | +----------------------------------------|----------------+ 247 | validate 248 | | 249 | V 250 | +--------+------------+ 251 | | Text | 252 | | UTF-8 encoded Bytes | 253 | +---------------------+ 254 | ``` 255 | -------------------------------------------------------------------------------- /Z-Data/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Z-Data 4 | nav_order: 3 5 | has_children: true 6 | --- 7 | 8 | [![Hackage](https://img.shields.io/hackage/v/Z-Data.svg?style=flat)](https://hackage.haskell.org/package/Z-Data) 9 | [![Linux Build Status](https://github.com/ZHaskell/z-data/workflows/ubuntu-ci/badge.svg)](https://github.com/ZHaskell/z-data/actions) 10 | [![MacOS Build Status](https://github.com/ZHaskell/z-data/workflows/osx-ci/badge.svg)](https://github.com/ZHaskell/z-data/actions) 11 | [![Windows Build Status](https://github.com/ZHaskell/z-data/workflows/win-ci/badge.svg)](https://github.com/ZHaskell/z-data/actions) 12 | 13 | [Z-Data](https://github.com/haskell-Z/z-data) provids basic data structures and functions: 14 | 15 | * Array, vector(array slice), Bytes(Word8 vectors) 16 | * Text based UTF-8, basic unicode manipulating 17 | * FFI utilties 18 | * Parsing and building monad 19 | * JSON encoding and decoding 20 | -------------------------------------------------------------------------------- /Z-IO/BIO-Streaming.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | parent: Z-IO 4 | title: BIO Streaming 5 | nav_order: 3 6 | --- 7 | 8 | ## Table of contents 9 | {: .no_toc .text-delta } 10 | 11 | 1. TOC 12 | {:toc} 13 | 14 | # BIO: composable callbacks 15 | 16 | In previous sections, we have introduced the `Z.IO.Buffered` module. And it provides APIs for buffered reading and writing. When combined with [Builder and Parser]() facility, it is easy to handle some simple streaming tasks, for example, read/write packets from TCP wire. But sometimes, things could get complicated. Let's say you want to use the [zlib](https://zlib.net) library to decompress a bytes stream from some file. The interface provided by zlib is like this: 17 | 18 | ```c 19 | int inflateInit (z_streamp strm, int level); 20 | int inflate (z_streamp strm, int flush); 21 | int inflateEnd (z_streamp strm); 22 | ``` 23 | 24 | It's OK to draw a chunk from `BufferedInput`, feed it to `z_streamp`, check the status and do some computation if a decompressed chunk is produced. But how to read a line from decompressed streams? We can't reuse `readLine` from `Z.IO.Buffered` since decompressed chunks are not drawn directly from `BufferedInput`. 25 | 26 | Ideally, we should have a composable `BufferedInput` type, which can accept some transformations and yield another `BufferedInput`. But `BufferedInput` is all about managing reading from buffer so that raw byte chunks can be drawn from the device. In Z-IO the `BIO` type is introduced to solve the composable streaming problem: 27 | 28 | ```haskell 29 | type BIO inp out = (Maybe out -> IO ()) -> Maybe inp -> IO () 30 | ``` 31 | 32 | Conceptually a `BIO` is a box doing transformation on data callbacks: 33 | 34 | ```haskell 35 | -- A pattern synonym for more meaningful pattern match 36 | pattern EOF :: Maybe a 37 | pattern EOF = Nothing 38 | 39 | fooBIO :: BIO foo bar 40 | fooBIO callback maybeFoo = do 41 | ... use callback to pass output data 42 | case maybeFoo of 43 | Just foo -> 44 | ... you can send result to downstream by pass Just values 45 | ... to callback, and you can call callback multiple times. 46 | callback (Just ...) 47 | ... 48 | callback (Just ...) 49 | ... 50 | EOF -> 51 | ... you should pass EOF to callback to indicate current 52 | ... node also reaches its EOF 53 | callback EOF 54 | ``` 55 | 56 | `BIO` type have two params: 57 | 58 | + A `callback :: Maybe out -> IO ()`(often written as `k`) which get called when to write downstream: 59 | + A `Just out` value is an item passed to downstream. 60 | + A `EOF` notified downstream EOF. 61 | + A `Maybe inp` value which comes from upstream: 62 | + A `Just inp` value is an item from upstream. 63 | + A `EOF` notified upstream EOF. 64 | 65 | Let's take zlib's `z_streamp` as an example to implement a compressing BIO node: 66 | 67 | ```haskell 68 | compressBIO :: ZStream -> BIO V.Bytes V.Bytes 69 | compressBIO zs = \ callback mbs -> 70 | case mbs of 71 | Just bs -> do 72 | -- feed input chunk to ZStream 73 | set_avail_in zs bs (V.length bs) 74 | let loop = do 75 | oavail :: CUInt <- withCPtr zs $ \ ps -> do 76 | -- perform deflate and peek output buffer remaining 77 | throwZlibIfMinus_ (deflate ps (#const Z_NO_FLUSH)) 78 | (#peek struct z_stream_s, avail_out) ps 79 | when (oavail == 0) $ do 80 | -- when output buffer is full, 81 | -- freeze chunk and call the callback 82 | oarr <- A.unsafeFreezeArr =<< readIORef bufRef 83 | callback (Just (V.PrimVector oarr 0 bufSiz)) 84 | newOutBuffer 85 | loop 86 | loop 87 | _ -> ... similar to above, with no input chunk and Z_FINISH flag 88 | ``` 89 | 90 | # Source and Sink types 91 | 92 | Now let's consider the following devices: 93 | 94 | + A data source which doesn't take any input but can be read until EOF. 95 | + A data sink which only performs writing without producing any meaningful result. 96 | 97 | We can have the definitions for data `Source` and `Sink` by using `Void` from `Data.Void`: 98 | 99 | ```haskell 100 | -- Source type doesn't need input 101 | type Source a = BIO Void a 102 | -- Sink type doesn't produce output 103 | type Sink a = BIO a Void 104 | ``` 105 | 106 | Because `Void` type doesn't have constructors, one should ignore the `Maybe Void` param when defining a `Source`. For example, a `BIO` node sourcing chunks from `BufferedInput` can be implemented like this: 107 | 108 | ```haskell 109 | sourceFromBuffered :: BufferedInput -> Source V.Bytes 110 | sourceFromBuffered i = \ k _ -> 111 | let loop = readBuffer i >>= \ x -> 112 | if V.null x then k EOF else k (Just x) >> loop 113 | in loop 114 | ``` 115 | 116 | For `type Sink a = BIO a Void`, the callback type is `Maybe Void -> IO ()`, which means you can only pass `EOF` to the callback, the convention here is to only call callback when EOF: 117 | 118 | ```haskell 119 | -- | The `BufferedOutput` device will get flushed only on EOF. 120 | sinkToBuffered :: BufferedOutput -> Sink V.Bytes 121 | sinkToBuffered bo = \ k mbs -> 122 | case mbs of 123 | Just bs -> writeBuffer bo bs 124 | _ -> flushBuffer bo >> k EOF 125 | ``` 126 | 127 | # Composing BIO 128 | 129 | The `BIO` type could be composed via `(.)`, i.e. the function composition. The composition's result has some interesting facts: 130 | 131 | + If you compose a `Source a` to `BIO a b`, you will get a `Source b`. 132 | + If you compose a `BIO a b` to `Sink b`, you will get a `Sink a`. 133 | 134 | So let's say you want to count the line number of a file, you could use `BIO`: 135 | 136 | ```haskell 137 | import Z.IO 138 | import Z.Data.PrimRef 139 | 140 | main :: IO () 141 | main = do 142 | _:path:_ <- getArgs 143 | withResource (initSourceFromFile path) $ \ fileSource -> do 144 | counterRef <- newCounter 0 145 | let counter = counterNode counterRef 146 | splitter <- newLineSplitter 147 | runBIO_ $ fileSource . splitter . counter 148 | printStd =<< readPrimIORef counterRef 149 | ``` 150 | 151 | `runBIO_ :: Source a -> IO ()` simply supply a `EOF` to the BIO chain, and fileSource will drive the whole chain running until EOF, it's defined as: 152 | 153 | ```haskell 154 | discard :: a -> IO () 155 | {-# INLINABLE discard #-} 156 | discard _ = return () 157 | 158 | runBIO_ :: BIO inp out -> IO () 159 | {-# INLINABLE runBIO_ #-} 160 | runBIO_ bio = bio discard EOF 161 | ``` 162 | 163 | Another example from the [introduce BIO blog post](https://z.haskell.world/design/2021/04/20/introduce-BIO-a-simple-streaming-abstraction.html): 164 | 165 | ```haskell 166 | import Z.Data.CBytes (CBytes) 167 | import Z.IO 168 | import Z.IO.BIO 169 | import Z.IO.BIO.Zlib 170 | 171 | base64AndCompressFile :: HasCallStack => CBytes -> CBytes -> IO () 172 | base64AndCompressFile origin target = do 173 | base64Enc <- newBase64Encoder 174 | (_, zlibCompressor) <- newCompress defaultCompressConfig{compressWindowBits = 31} 175 | 176 | withResource (initSourceFromFile origin) $ \ src -> 177 | withResource (initSinkToFile target) $ \ sink -> 178 | runBIO_ $ src . base64Enc . zlibCompressor . sink 179 | ``` 180 | 181 | Above code is similar to command line `cat origin | base | gzip > target`. 182 | -------------------------------------------------------------------------------- /Z-IO/Filesystem.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | parent: Z-IO 4 | title: Filesystem 5 | nav_order: 1 6 | --- 7 | 8 | ## Table of contents 9 | {: .no_toc .text-delta } 10 | 11 | 1. TOC 12 | {:toc} 13 | 14 | # Hello File 15 | 16 | It's easy to use Z-IO package's filesystem module, first please import `Z.IO.Filesystem`: 17 | 18 | ```haskell 19 | import qualified Z.IO.FileSystem as FS 20 | ``` 21 | 22 | If it's OK to load a file into memory at once, you can use following: 23 | 24 | ```haskell 25 | readFile :: HasCallStack => CBytes -> IO Bytes 26 | readTextFile :: HasCallStack => CBytes -> IO Text 27 | writeFile :: HasCallStack => CBytes -> Bytes -> IO () 28 | writeTextFile :: HasCallStack => CBytes -> Text -> IO () 29 | ``` 30 | 31 | `CBytes` is Z's file path type. `Bytes`, and `Text` are types for binary and textual content, respectively. These types are documented in [Z-Data section](https://z.haskell.world/Z-Data/). `readTextFile` and `writeTextFile` assumes UTF-8 encoding: 32 | 33 | 34 | ```haskell 35 | > FS.writeTextFile "./test_file" "hello world!" 36 | > FS.readFile "./test_file" 37 | [104,101,108,108,111,32,119,111,114,108,100,33] 38 | > FS.readTextFile "./test_file" 39 | "hello world!" 40 | ``` 41 | 42 | # Resource Handling 43 | 44 | Now let's see a more complicated function: 45 | 46 | ```haskell 47 | initFile :: CBytes 48 | -> FileFlag -- ^ Opening flags, e.g. 'O_CREAT' @.|.@ 'O_RDWR' 49 | -> FileMode -- ^ Sets the file mode (permission and sticky bits), 50 | -- but only if the file was created, see 'DEFAULT_FILE_MODE'. 51 | -> Resource File 52 | ``` 53 | 54 | `FileFlag` and `FileMode` are bit constants controlling the file opening behavior, such as if we have read or write access or if a new file will be created when there's none. You can find more constants on [hackage docs](https://hackage.haskell.org/package/Z-IO-0.7.1.0/docs/Z-IO-FileSystem-Base.html#g:5). The interesting thing here is that `initFile` function returns a `Resource File` type instead of `IO File`. `Resource` is defined in `Z.IO.Resource` module, with following functions to use it: 55 | 56 | ```haskell 57 | withResource :: HasCallStack 58 | => Resource a -- ^ resource management record 59 | -> (a -> IO b) -- ^ function working on a resource 60 | -> IO b 61 | 62 | withResource' :: HasCallStack 63 | => Resource a -- ^ resource management record 64 | -> (a -> IO () -> IO b) 65 | -- ^ second param is the close function for early closing 66 | -> IO b 67 | ``` 68 | 69 | We simplified those two functions' type a little bit, and here is the idea: `withResource` will take care of resource opening and cleanup automatically, after you finish using it, or when exceptions happen. You only need to pass a function working on that resource. Now let's read the file created above again: 70 | 71 | ```haskell 72 | import Z.IO -- this module re-export Z.IO.Resource and other common stuff 73 | import qualified Z.IO.FileSystem as FS 74 | 75 | withResource (FS.initFile "./test_file" FS.O_RDWR FS.DEFAULT_FILE_MODE) $ \ file -> do 76 | bi <- newBufferedInput file 77 | printStd =<< readLine bi 78 | ``` 79 | 80 | `initFile` function doesn't open the file, and it just records how to open and close the file. Every time you want to do something with the file, use `withResource` to open(and close) it, that's all about resource handling in Z. 81 | 82 | `Resource` has a `Monad` instance, which is useful for safely combining resources, e.g. instead of writing following code: 83 | 84 | ```haskell 85 | withResource initRes1 $ \ res1 -> 86 | withResource initRes2 $ \ res2 -> 87 | withResource initRes3 $ \ res3 -> 88 | ... res1 ... res2 ... res3 89 | ``` 90 | 91 | You could define a combined `Resource`: 92 | 93 | ```haskell 94 | initRes123 :: Resource (Res1, Res2, Res3) 95 | initRes123 = do 96 | res1 <- initRes1 97 | res2 <- initRes2 98 | res3 <- initRes3 99 | return (res1, res2, res3) 100 | ``` 101 | 102 | Now `withResource initRes123 $ \ (res1, res2, res3) -> ...` will first open `res1`, `res2`, `res3` in order, then close them in reverse order. You could even interleave `IO` action within `Resource` using its `MonadIO` instance: 103 | 104 | ```haskell 105 | initRes123 :: Resource (Res1, Res2) 106 | initRes123 = do 107 | res1 <- initRes1 108 | res2Param <- liftIO $ ... res1 ... 109 | res2 <- initRes2 res2Param 110 | return (res1, res2) 111 | ``` 112 | 113 | The lifted `IO` action will become a part of the resource opening process. 114 | 115 | # Buffered I/O 116 | 117 | `newBufferedInput` and `readLine` functions in the code above are from `Z.IO.Buffered` module(also re-exported from `Z.IO`). In Z-IO, many IO devices(including `File` above) are instances of `Input/Output` class: 118 | 119 | ```haskell 120 | class Input i where 121 | readInput :: HasCallStack => i -> Ptr Word8 -> Int -> IO Int 122 | class Output o where 123 | writeOutput :: HasCallStack => o -> Ptr Word8 -> Int -> IO () 124 | ``` 125 | 126 | `readInput` and `writeOutput` work on pointers, which is not very convenient for direct usage. Open a `BufferedInput` or `BufferedOutput` to get auto-managed buffered I/O: 127 | 128 | ```haskell 129 | newBufferedInput :: Input i => i -> IO BufferedInput 130 | newBufferedOutput :: Output o => o -> IO BufferedOutput 131 | ``` 132 | 133 | There's a set of functions working on `BufferedInput/BufferedOutput` in `Z.IO.Buffered`, for example, to implement a word counter for files: 134 | 135 | ```haskell 136 | import Z.IO 137 | import qualified Z.IO.FileSystem as FS 138 | import qualified Z.Data.Vector as V 139 | 140 | main :: IO () 141 | main = do 142 | -- get file path from command line 143 | (_:path:_) <- getArgs 144 | withResource (FS.initFile path FS.O_RDWR FS.DEFAULT_FILE_MODE) $ \ file -> do 145 | bi <- newBufferedInput file 146 | printStd =<< loop bi 0 147 | where 148 | loop :: BufferedInput -> Int -> IO Int 149 | loop input !wc = do 150 | -- read a single line with linefeed dropped 151 | line <- readLine input 152 | case line of 153 | Just line' -> 154 | loop input (wc + length (V.words line')) 155 | _ -> return wc 156 | ``` 157 | 158 | Here's a quick cheatsheet on buffered IO, `BufferedInput` first: 159 | 160 | ```haskell 161 | -- | Request a chunk from the input device. 162 | readBuffer :: HasCallStack => BufferedInput -> IO Bytes 163 | 164 | -- | Push back an unconsumed chunk 165 | unReadBuffer :: HasCallStack => Bytes -> BufferedInput -> IO () 166 | 167 | -- | Read exactly N bytes, throw exception if EOF reached before N bytes. 168 | readExactly :: HasCallStack => Int -> BufferedInput -> IO Bytes 169 | 170 | -- /----- readToMagic ----- \ /----- readToMagic -----\ ... 171 | -- +------------------+-------+-----------------+-------+ 172 | -- | ... | magic | ... | magic | ... 173 | -- +------------------+-------+-----------------+-------+ 174 | readToMagic :: HasCallStack => Word8 -> BufferedInput -> IO Bytes 175 | 176 | -- /--- readLine ---\ discarded /--- readLine ---\ discarded / ... 177 | -- +------------------+---------+------------------+---------+ 178 | -- | ... | \r\n/\n | ... | \r\n/\n | ... 179 | -- +------------------+---------+------------------+---------+ 180 | readLine :: HasCallStack => BufferedInput -> IO (Maybe Bytes) 181 | 182 | -- | Read all chunks from input. 183 | readAll :: HasCallStack => BufferedInput -> IO [Bytes] 184 | readAll' :: HasCallStack => BufferedInput -> IO Bytes 185 | 186 | -- | See Parser & Builder under Z-Data section for the following functions. 187 | -- | Request input using Parser 188 | readParser :: HasCallStack => Parser a -> BufferedInput -> IO a 189 | 190 | -- | Request input using ParseChunks, see Parser & Builder under Z-Data section. 191 | readParseChunks :: (Print e, HasCallStack) => ParseChunks IO Bytes e a -> BufferedInput -> IO a 192 | ``` 193 | 194 | `BufferedOutput` is relatively simple: 195 | 196 | ```haskell 197 | -- | Write a chunk into buffer. 198 | writeBuffer :: HasCallStack => BufferedOutput -> Bytes -> IO () 199 | -- | Directly write Builder into output device. 200 | writeBuilder :: HasCallStack => BufferedOutput -> Builder a -> IO () 201 | -- | Flush the buffer into output device. 202 | flushBuffer :: HasCallStack => BufferedOutput -> IO () 203 | ``` 204 | 205 | # A note on filepath 206 | 207 | Other operations from `Z.IO.FileSystem` module, e.g., `seek`, `mkdtemp`, `rmdir`, etc., are basically mirroring the Unix system call, which should be familiar to people who come from C/C++. The type for file path in Z is `CBytes`, which is a `\NUL` terminated byte array managed on GHC heap. 208 | 209 | We assumed that `CBytes`'s content is UTF-8 encoded though it may not always be the case, and there're some platform differences on file path handling, e.g., the separator on windows is different from Unix. To proper handle file path, use `Z.IO.FileSystem.FilePath` (which is re-exported from `Z.IO.FileSystem`), for example, instead of manually connecting file path like: 210 | 211 | ```haskell 212 | let p = "foo" <> "/" <> "bar" 213 | ``` 214 | You should always use functions from the library 215 | 216 | ```haskell 217 | import qualified Z.IO.FileSystem as FS 218 | 219 | let p = "foo" `FS.join` "bar" 220 | -- "foo" `FS.join` "../bar" will yield "bar" instead of "foo/../bar" 221 | ``` 222 | -------------------------------------------------------------------------------- /Z-IO/Logger.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | parent: Z-IO 4 | title: Logger 5 | nav_order: 4 6 | --- 7 | 8 | ## Table of contents 9 | {: .no_toc .text-delta } 10 | 11 | 1. TOC 12 | {:toc} 13 | 14 | # Logging functions 15 | 16 | High-performance logging is important to all kinds of applications. In Z-IO, all you have to do is to import `Z.IO` and use the following functions: 17 | 18 | ```haskell 19 | {-# LANGUAGE OverloadedStrings #-} 20 | import qualified Z.Data.Builder as B 21 | import Z.IO 22 | 23 | -- logging functions all work directly in IO monad 24 | debug, info , warning, fatal, critical :: B.Builder () -> IO () 25 | 26 | -- you can use B.Builder's IsString instance 27 | debug "..." 28 | -- together with B.Builder's Monad instance 29 | info $ "..." >> B.int 666 >> "..." 30 | warning $ do 31 | "..." 32 | B.int 666 33 | "..." 34 | fatal "this is an important message" 35 | critical "OMG, system is on fire" 36 | ``` 37 | 38 | Note that `debug/info/warning` does not trigger a log flushing, while fatal/critical always triggers a log flushing. If `debug/info/warning` logs matter to you, use `withDefaultLogger` like this: 39 | 40 | ``` 41 | main :: IO 42 | main = withDefaultLogger $ do 43 | ... 44 | ``` 45 | 46 | It will add a flush after the application finishes to ensure that `debug/info/warning` logs are flushed. 47 | 48 | # Setup Logger 49 | 50 | Z-IO's `Logger` have the following concurrent characteristics: 51 | 52 | * Logging functions are lock-free and can be used across threads. 53 | * Logs are atomic, and the order is preserved. 54 | * Flushing is protected by the lock, and there'll be no concurrent writing to the buffered device. 55 | 56 | So there is no need to worry about anything since most of the things are taken care of, just import and start to log. Functions like `debugTo/infoTo/warningTo...` that explicitly write logs to given `Logger` are provided. However, most of the time, use the default `Logger`. And, use `setDefaultLogger` to change it when the application starts. Z-IO supports writing logs to different devices with different formats: 57 | 58 | ```haskell 59 | -- logs can be written to any `BufferedOutput`s with `MVar` protected from concurrent access 60 | newLogger :: LoggerConfig -> MVar BufferedOutput -> IO Logger 61 | -- create a logger connected to stderr 62 | newStdLogger :: LoggerConfig -> IO Logger 63 | -- create a file based logger 64 | newFileLogger :: LoggerConfig -> CBytes -> IO Logger 65 | 66 | -- Change LoggerConfig's loggerFormatter field to change logging format: 67 | -- [FATAL][2021-02-01T15:03:30+0800][interactive:31:1][thread#669]...\n 68 | defaultFmt :: LogFormatter 69 | -- Same with defaultFmt, but level is colored: cyan DEBUG, yellow WARNING, red FATAL and CRITICAL 70 | defaultColoredFmt :: LogFormatter 71 | -- {"level":"FATAL","time":"2021-02-01T15:02:19+0800","loc":":27:1","theadId":606,"content":"..."}\n 72 | defaultJSONFmt :: LogFormatter 73 | ``` 74 | 75 | Initial default loggers are connected to stderr. Use `defaultColoredFmt` if stderr is connected to a TTY device, and use `defaultFmt` otherwise. An example about setting up logger: 76 | 77 | ```haskell 78 | main :: IO () 79 | main = do 80 | -- setup filter level to WARNING, info/debug logs will be ignored. 81 | -- use file based logger, and write to "app.log" 82 | setDefaultLogger =<< newFileLogger defaultJSONLoggerConfig 83 | { loggerConfigLevel = WARNING } "app.log" 84 | withDefaultLogger $ do 85 | ... 86 | ``` 87 | -------------------------------------------------------------------------------- /Z-IO/Network.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | parent: Z-IO 4 | title: Network 5 | nav_order: 2 6 | --- 7 | 8 | ## Table of contents 9 | {: .no_toc .text-delta } 10 | 11 | 1. TOC 12 | {:toc} 13 | 14 | # Client and server 15 | 16 | The Network is all about sending and receiving data. Using Z-IO's Network is straightforward: 17 | 18 | ```haskell 19 | {-# LANGUAGE OverloadedStrings #-} 20 | import Z.IO 21 | import Z.IO.Network 22 | import Z.Data.Text as T 23 | 24 | main :: IO () 25 | main = do 26 | -- use getAddrInfo to perform DNS resolution 27 | addr:_ <- getAddrInfo Nothing "www.bing.com" "http" 28 | -- use initTCPClient to initialize a TCP client 29 | withResource (initTCPClient defaultTCPClientConfig{ 30 | tcpRemoteAddr = addrAddress addr}) $ \ tcp -> do 31 | -- use BufferedInput/BufferedOutput facility to read from/write to tcp socket 32 | i <- newBufferedInput tcp 33 | o <- newBufferedOutput tcp 34 | writeBuffer o "GET http://www.bing.com HTTP/1.1\r\nHost: www.bing.com\r\n\r\n" 35 | flushBuffer o 36 | readBuffer i >>= pure . T.validate 37 | 38 | -- use startTCPServer to start serving in TCP protocol 39 | startTCPServer defaultTCPServerConfig{ 40 | tcpListenAddr = SocketAddrIPv4 ipv4Loopback 8080} $ \ tcp -> do 41 | o <- newBufferedOutput tcp 42 | writeBuffer o "hello world" >> flushBuffer o 43 | ``` 44 | 45 | Z.Haskell provide several network capabilities: 46 | 47 | + `Z.IO.Network.IPC` provides the stream channel for inter-process communication based on domain socket(Unix) or named pipe(Windows). 48 | + `Z.IO.Network.TCP` provides the stream channel for remote communication based on TCP socket. 49 | + `Z.IO.Network.UDP` provides the message channel on top of the UDP socket. 50 | + A TLS implementation based on [botan](https://botan.randombit.net/) is under development. 51 | 52 | Let's take TCP module as an example. Lots of low-level socket details(`bind`, `listen`, `accept`, etc.) are hidden, with two high-level operations left: 53 | 54 | ```haskell 55 | -- | Connect to a TCP target 56 | initTCPClient :: HasCallStack => TCPClientConfig -> Resource UVStream 57 | -- | Start a TCP server 58 | startTCPServer :: HasCallStack 59 | => TCPServerConfig 60 | -> (UVStream -> IO ()) 61 | -- ^ worker which will get an accepted TCP stream 62 | -- and run in a seperated haskell thread 63 | -> IO 64 | ``` 65 | 66 | # Send/receive packet 67 | 68 | The `UVStream` type implements the `Input/Output` class from `Z.IO.Buffered`, so that you can reuse all the buffered read/write API. For example, let's say you have designed a simple framed message protocol: 69 | 70 | ```haskell 71 | import Data.Word 72 | import qualified Z.Data.Vector as V 73 | 74 | -- uint8 message type uint16 payload length message payload 75 | -- +------------------+----------------------+------------------ 76 | -- | 0xXX | 0xXXXX(big endian) | ... 77 | -- +------------------+----------------------+------------------ 78 | 79 | data Message = Message { msgTyp :: Word8, msgPayload :: V.Bytes } 80 | ``` 81 | 82 | You can manually decode message frames like this: 83 | 84 | ```haskell 85 | -- import bit operations 86 | import Data.Bits (unsafeShiftL, (.|.)) 87 | import Z.IO 88 | 89 | readMessage :: HasCallStack => BufferedInput -> IO Message 90 | readMessage bi = do 91 | msg_typ <- readExactly buffered_i 1 92 | payload_len_h <- readExactly buffered_i 1 93 | payload_len_l <- readExactly buffered_i 1 94 | let payload_len = 95 | (fromIntegral payload_len_h) `unsafeShiftL` 8 96 | .|. (fromIntegral payload_len_l) 97 | payload <- readExactly payload_len 98 | return (Message msg_typ payload) 99 | ``` 100 | 101 | Or you can use `Parser` from `Z.Data.Parser` module: 102 | 103 | 104 | ```haskell 105 | import qualified Z.Data.Parser as P 106 | import Data.Word 107 | import Z.IO 108 | 109 | parseMessage :: P.Parser Message 110 | parseMessage = do 111 | msg_type <- P.decodePrim @Word8 112 | payload_len <- P.decodePrimBE @Word16 113 | payload <- P.take (fromIntegral payload_len) 114 | return (Message msg_typ payload) 115 | 116 | readMessage :: HasCallStack => BufferedInput -> IO Message 117 | readMessage = readParser parseMessage 118 | ``` 119 | 120 | `readParser` will run `Parser` once a time, parse `Message` out of the buffer, and waiting for input automatically. To write a `Message` to the TCP socket is similar: 121 | 122 | ```haskell 123 | import qualified Z.Data.Builder as B 124 | import qualified Z.Data.Vector as V 125 | import Z.IO 126 | 127 | writeMessage :: HasCallStack => BufferedOutput -> Message -> IO () 128 | writeMessage bo (Message msg_typ payload) = do 129 | -- use Builder monad to compose buffer writing functions 130 | writeBuilder bo $ do 131 | B.encodePrim msg_typ 132 | B.encodePrimBE (V.length payload) 133 | B.bytes payload 134 | -- you may want to add a flush after each message has been written 135 | -- or leave flush to the caller 136 | -- flushBuffer bo 137 | ``` 138 | 139 | Z.Haskell provides many tools to deal with the streaming nature of TCP protocol (and many other streaming devices such as IPC and Files). In the next section, we will introduce the `BIO`, a more high-level streaming API. 140 | 141 | # UDP 142 | 143 | UDP is different from IPC or TCP in that it's a message protocol rather than a streaming one. There are no `Input/Output` instances for the `UDP` type. Instead, Z-IO provides message reading & writing functions for UDP directly: 144 | 145 | ```haskell 146 | -- | Initialize a UDP socket. 147 | initUDP :: UDPConfig -> Resource UDP 148 | -- | Send a UDP message to target address. 149 | sendUDP :: HasCallStack => UDP -> SocketAddr -> V.Bytes -> IO () 150 | -- | Receive messages from UDP socket, return source address if available, and a `Bool` 151 | -- to indicate if the message is partial (larger than receive buffer size). 152 | recvUDP :: HasCallStack => UDPRecvConfig -> UDP -> IO [(Maybe SocketAddr, Bool, V.Bytes)] 153 | -- | Receive UDP messages within a loop 154 | recvUDPLoop :: HasCallStack 155 | => UDPRecvConfig 156 | -> UDP 157 | -> ((Maybe SocketAddr, Bool, V.Bytes) -> IO a) 158 | -> IO () 159 | ``` 160 | 161 | Loop receiving(`recvUDPLoop`) can be faster since it can reuse the receiving buffer internally. Unlike the TCP server above, the UDP worker function is called on the current Haskell thread instead of a forked one. If you have heavy computations within the worker function, consider using `forkBa` from `Z.IO.UV.Manager` (a function similar to `forkIO`, but with active thread balancing). 162 | -------------------------------------------------------------------------------- /Z-IO/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Z-IO 4 | nav_order: 2 5 | has_children: true 6 | --- 7 | 8 | [![Hackage](https://img.shields.io/hackage/v/Z-IO.svg?style=flat)](https://hackage.haskell.org/package/Z-IO) 9 | [![Linux Build Status](https://github.com/ZHaskell/z-io/workflows/ubuntu-ci/badge.svg)](https://github.com/ZHaskell/z-io/actions) 10 | [![macOS Build Status](https://github.com/ZHaskell/z-io/workflows/osx-ci/badge.svg)](https://github.com/ZHaskell/z-io/actions) 11 | [![Windows Build Status](https://github.com/ZHaskell/z-io/workflows/win-ci/badge.svg)](https://github.com/ZHaskell/z-io/actions) 12 | 13 | # Z-IO 14 | 15 | Z-IO package provides high-performance I/O operations based on libuv's event loop and GHC lightweight threads. 16 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | # Welcome to Jekyll! 2 | # 3 | # This config file is meant for settings that affect your whole blog, values 4 | # which you are expected to set up once and rarely edit after that. If you find 5 | # yourself editing this file very often, consider using Jekyll's data files 6 | # feature for the data you need to update frequently. 7 | # 8 | # For technical reasons, this file is *NOT* reloaded automatically when you use 9 | # 'bundle exec jekyll serve'. If you change this file, please restart the server process. 10 | 11 | # Site settings 12 | # These are used to personalize your new site. If you look in the HTML files, 13 | # you will see them accessed via {{ site.title }}, {{ site.email }}, and so on. 14 | # You can create any custom variable you would like, and they will be accessible 15 | # in the templates via {{ site.myvariable }}. 16 | title: Z.Haskell 17 | email: winterland1989@gmail.com 18 | description: >- # this means to ignore newlines until "baseurl:" 19 | The document site for Z.Haskell. 20 | baseurl: "/" # the subpath of your site, e.g. /blog 21 | url: "" # the base hostname & protocol for your site, e.g. http://example.com 22 | github_username: winterland1989 23 | 24 | # Build settings 25 | markdown: kramdown 26 | theme: "just-the-docs" 27 | remote_theme: "pmarsceill/just-the-docs" 28 | plugins: 29 | - jekyll-feed 30 | 31 | aux_links: 32 | "GitHub": 33 | - "//github.com/ZHaskell" 34 | 35 | # Footer content 36 | # appears at the bottom of every page's main content 37 | # Note: The footer_content option is deprecated and will be removed in a future major release. Please use `_includes/footer_custom.html` for more robust markup / liquid-based content. 38 | footer_content: '

Back to top

Copyright © 2017-2022 Z.Haskell contributors. Distributed by a BSD license.' 39 | 40 | # Footer last edited timestamp 41 | last_edit_timestamp: true # show or hide edit time - page must have `last_modified_date` defined in the frontmatter 42 | last_edit_time_format: "%b %e %Y at %I:%M %p" # uses ruby's time format: https://ruby-doc.org/stdlib-2.7.0/libdoc/time/rdoc/Time.html 43 | 44 | # Footer "Edit this page on GitHub" link text 45 | gh_edit_link: true # show or hide edit this page link 46 | gh_edit_link_text: "Edit this page on GitHub." 47 | gh_edit_repository: "https://github.com/Zhaskell/docs" # the github URL for your repo 48 | gh_edit_branch: "master" # the branch that your docs is served from 49 | gh_edit_source: "" # the source that your files originate from 50 | gh_edit_view_mode: "tree" # "tree" or "edit" if you want the user to jump into the editor immediately 51 | 52 | # Exclude from processing. 53 | # The following items will not be processed, by default. Create a custom list 54 | # to override the default setting. 55 | # exclude: 56 | # - Gemfile 57 | # - Gemfile.lock 58 | # - node_modules 59 | # - vendor/bundle/ 60 | # - vendor/cache/ 61 | # - vendor/gems/ 62 | # - vendor/ruby/ 63 | -------------------------------------------------------------------------------- /_data/version.yml: -------------------------------------------------------------------------------- 1 | z_version: 1.0 2 | ghc_version: 8.6 3 | cabal_version: 2.4 4 | -------------------------------------------------------------------------------- /_layouts/post.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 | 5 | 22 | -------------------------------------------------------------------------------- /_posts/2021-02-01-High-performance-JSON-codec.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "High-performance JSON codec" 4 | date: 2021-02-01 16:52:44 CST 5 | author: Dong 6 | categories: performance 7 | --- 8 | 9 | JSON processing is a fundamental building block in modern network applications. It's also a large module in [Z-Data](//hackage.haskell.org/package/Z-Data) package. With careful optimization, we managed to get a 1.5X - 3X encoding and 3X decoding performance boost comparing to [aeson](//hackage.haskell.org/package/aeson), a widely used JSON package on hackage. 10 | 11 | 12 | 13 | ## Benchmark Result 14 | 15 | ![bench-result](https://github.com/ZHaskell/benchmarks/blob/master/json-benchmark/json-benchmark-result.png?raw=true) 16 | 17 | The [above benchmarks](//github.com/ZHaskell/z-benchmarks) running on an MBP13 2020(2 GHz Quad-Core Intel Core i5), Each benchmark runs a certain JSON task with fixed iterations, using [sample data](//github.com/ZHaskell/benchmarks/tree/master/asset/json-data). Some notes on benchmarks code: 18 | 19 | * Benchmarks labeled with `encode` and `decode` bench the conversion between JSON documents and JSON intermedia representation. 20 | * Benchmarks labeled with `typed encode` and `typed decode` bench the conversion between JSON documents and Haskell ADT. 21 | * All ADTs' instances are deriving using GHC generic mechanism, no manual conversion code is required. 22 | 23 | ## Fast escaping handling 24 | 25 | Surprisingly, when processing JSON, one can't directly copy strings because they may be [escaped](https://tools.ietf.org/html/rfc8259#page-8), which brings a quite big performance challenge. In [Z-Data](//hackage.haskell.org/package/Z-Data) we carefully arranged the code path to avoid performance hit: 26 | 27 | * When encoding text value 28 | 29 | 1. Run a prescan loop to find if we need escaping, and how much space we need to write the escaped string if escaping is needed. 30 | 2. If there's no escaping needed, vectorized `copyByteArray#` is used to directly write text into the output buffer. 31 | 3. Otherwise, go through the escaping loop. 32 | 33 | * When decoding JSON string 34 | 35 | 1. Run a prescan to find the end of the string, record if unescaping is needed at the same time. 36 | 2. If no unescaping is needed, a vectorized UTF8 validation is used. 37 | 3. Otherwise, go through a UTF8 validation loop extended with JSON unescaping logic. 38 | 39 | These optimizations are possible because [Z-Data](//hackage.haskell.org/package/Z-Data) uses UTF8 encoding `Text` type, which could save considerable time on the non-escaping path. 40 | 41 | ## IR(intermedia represantation) 42 | 43 | Another optimization opportunity comes from the new JSON document IR design. In [Z-Data](//hackage.haskell.org/package/Z-Data) the IR type use vector of key-value pair to represent JSON objects: 44 | 45 | ```haskell 46 | data Value = Object (Vector (Text, Value)) 47 | | Array (Vector Value) 48 | | String T.Text 49 | | Number Scientific 50 | | Bool Bool 51 | | Null 52 | deriving (Eq, Ord, Show, Typeable, Generic) 53 | deriving anyclass Print 54 | ``` 55 | 56 | This representation has many benefits: 57 | 58 | * Preserve original key-value order, so that round-trip processing is possible. 59 | * User can choose different de-duplicate strategys when converting IR to ADT. 60 | * It's faster to construct an IR value or convert ADT to IR. 61 | 62 | By default [Z-Data](//hackage.haskell.org/package/Z-Data) use [FlatMap](//hackage.haskell.org/package/Z-Data/docs/Z-Data-Vector-FlatMap.html) when converting IR to ADT, which is simply a sorted vector of key-value pair. It can be constructed by sorting the original key-value pairs in O(N\*logN) and looked up using binary-search in O(logN). 63 | 64 | ## Parser and Builder facility 65 | 66 | [Z-Data](//hackage.haskell.org/package/Z-Data) uses [Bytes](https://hackage.haskell.org/package/Z-Data/docs/Z-Data-Vector.html#t:Bytes), a vector type based on `ByteArray#` to represent binary data, it's different from traditional bytestring ones that use `Addr#`(pointer). It's necessary to provide a different set of `Builder`s and `Parser`s to work on that representation. In both cases, simple CPSed monad is chosen to make compiled code fast. 67 | 68 | ``` 69 | -- Z.Data.Builder.Base 70 | newtype Builder a = Builder { 71 | runBuilder :: (a -> BuildStep) -- next write continuation 72 | -> BuildStep 73 | } 74 | 75 | -- Z.Data.Parser.Base 76 | newtype Parser a = Parser { 77 | runParser :: forall r . (ParseError -> ParseStep r) -- fail continuation 78 | -> (a -> ParseStep r) -- success continuation 79 | -> ParseStep r 80 | } 81 | ``` 82 | 83 | These types are almost the simplest CPS monads one can write, and GHC is particularly good at optimizing the composition of these monads. 84 | 85 | ## Conclusion 86 | 87 | This benchmark compared [Z-Data](//hackage.haskell.org/package/Z-Data) to widely used Haskell package [aeson](//hackage.haskell.org/package/aeson). The result shows that the new `Builder` and `Parser` facility works as expected, and our optimizing techniques can bring a huge performance improvement. 88 | 89 | 90 | -------------------------------------------------------------------------------- /_posts/2021-04-20-introduce-BIO-a-simple-streaming-abstraction.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "Introduce BIO: A Simple Streaming Abstraction" 4 | date: 2021-04-20 14:43:14 CST 5 | author: Dong 6 | categories: design 7 | --- 8 | 9 | Streaming IO is an old idea: the data is read in chunks, each chunk gets processed and written to output so that the whole memory a program used is kept under a relatively low level. e.g. 10 | 11 | ```base 12 | cat foo.txt | gzip | base64 | tee foo.gz 13 | ``` 14 | 15 | Above UNIX commands read a file `foo.txt` in chunks, perform gzip and base64 transformation, and get piped to both `foo.gz` and stdout. We'd like to get similar syntax when using Haskell to work with chunked data, and that's the starting point of streaming abstraction. 16 | 17 | 18 | 19 | ## A Stream ADT 20 | 21 | ### Partial closure 22 | 23 | In [Z-Data's parser section](https://z.haskell.world/Z-Data/Parser-and-Builder.html), we described a resumable parser, which can consume input in chunks: 24 | 25 | ```haskell 26 | > P.parse' dateParser "2020-12-12" 27 | Date 2020 12 12 28 | > P.parseChunk dateParser "2020-" 29 | Partial _ 30 | > let (P.Partial f) = P.parseChunk dateParser "2020-" 31 | > let (P.Partial f') = f "05-05" -- incrementally provide input 32 | > f' "" -- push empty chunk to signal EOF 33 | Success Date {year = 2020, month = 5, day = 5} 34 | ``` 35 | 36 | The core type to achieve resumable parsing is `Result`: 37 | 38 | ```haskell 39 | data Result e r 40 | = Success r !Bytes 41 | | Failure e !Bytes 42 | | Partial (V.Bytes -> Result e r) 43 | ``` 44 | 45 | The `Partial` constructor contains a closure capturing the last chunk's parsing state, which could be applied to the next chunk to produce a new `Result`. Now let's consider if we could apply this construction to IO(or an arbitrary monad), following definition is from the [streaming](https://hackage.haskell.org/package/streaming) package: 46 | 47 | ```haskell 48 | data Stream f m r = Step !(f (Stream f m r)) 49 | | Effect (m (Stream f m r)) 50 | | Return r 51 | 52 | data Of a b = !a :> b 53 | ``` 54 | 55 | ### Stream Monad 56 | 57 | In streaming, `Stream (Of a) IO ()` are used to represent `IO` streams, with some monad primitives you can construct an `IO` stream like this: 58 | 59 | ```haskell 60 | -- Stream monad will provide some primitives to create monadic value, e.g. 61 | -- yield :: Monad m => a -> Stream (Of a) m () 62 | -- yield a = Step (a :> Return ()) 63 | -- instance (MonadIO m, Functor f) => MonadIO (Stream f m) where 64 | -- liftIO = Effect . fmap Return . liftIO 65 | 66 | foo :: Stream (Of a) IO () 67 | foo = do 68 | yield 1 69 | yield 2 70 | lift readLn >>= yield 71 | ``` 72 | 73 | With the `Stream`'s `Monad` instance, the value of foo now becomes a chain of Stream ADTs: 74 | 75 | ```haskell 76 | Step (1 :> Step (2 :> Effect (\ x -> Step x :> Return ()) <$> readLn)) 77 | ``` 78 | 79 | Now if we provide a function to iterate through this ADT, the stream could be processed. Such a function is often called an interpreter, a term from [the free monad design pattern](https://softwareengineering.stackexchange.com/questions/242795/what-is-the-free-monad-interpreter-pattern). For example streaming provides its own `foldrM` interpreter to fold over a `Stream` structure: 80 | 81 | ```haskell 82 | foldrM :: Monad m => (a -> m r -> m r) -> Stream (Of a) m r -> m r 83 | foldrM step = loop where 84 | loop stream = case stream of 85 | Return r -> return r 86 | Effect m -> m >>= loop -- This is where IO effects happened! 87 | Step (a :> as) -> step a (loop as) 88 | ``` 89 | 90 | ### The Magic Pipes 91 | 92 | There're some packages on hackage pushing the free monad technique to its limit, e.g. the [pipes](http://hackage.haskell.org/package/pipes) provide a rather incomprehensible core ADT type: 93 | 94 | ```haskell 95 | data Proxy a' a b' b m r 96 | = Request a' (a -> Proxy a' a b' b m r ) 97 | | Respond b (b' -> Proxy a' a b' b m r ) 98 | | M (m (Proxy a' a b' b m r)) 99 | | Pure r 100 | ``` 101 | 102 | With this beast at hand, pipes could provide more interesting primitives like `await`, or `>->`. e.g `do x <- await; y <- await; return (x, y)` becomes: 103 | 104 | ```haskell 105 | Request () (\ x -> Request () (\ x -> Pure (x, y))) 106 | ``` 107 | 108 | One technique pipes used is to use type `Void` to eliminate some constructors under certain types while still keep composability: 109 | 110 | ```haskell 111 | -- | type with no constructors 112 | type X = Void 113 | 114 | -- | 'Effect's neither 'Pipes.await' nor 'Pipes.yield' 115 | type Effect = Proxy X () () X 116 | -- | 'Producer's can only 'Pipes.yield' 117 | type Producer b = Proxy X () () b 118 | -- | 'Pipe's can both 'Pipes.await' and 'Pipes.yield' 119 | type Pipe a b = Proxy () a () b 120 | -- | 'Consumer's can only 'Pipes.await' 121 | type Consumer a = Proxy () a () X 122 | ``` 123 | 124 | ## A Retrospective 125 | 126 | ### Free monad is powerful, but hard to use 127 | 128 | The free monad approach could give you as many primitives as you want, and you could choose different interpreter to run, but it's hard to use in several ways: 129 | 130 | + It's hard to comprehend, you have to read the monad instance very carefully, to understand how those primitives work. 131 | + It has the same problem with monad transformers, i.e. now every base monad operations need to be lifted. 132 | + It's hard to be optimized by the compiler, because now every operation becomes an ADT constructor, and often leads to higher allocations. 133 | 134 | A free monad construction for streaming may also need to provide a different set of combinators, such as `mapM` or `foldM`, which is incompatible with `Control.Monad`. 135 | 136 | ### How other languages do streaming 137 | 138 | It's interesting to find out that most of the OO languages solve this problem in a much simpler way, for example in javascript. 139 | 140 | ```javascript 141 | // from node.js example 142 | const fs = require('fs'); 143 | const zlib = require('zlib'); 144 | const r = fs.createReadStream('file.txt'); 145 | 146 | const z = zlib.createGzip(); 147 | const w = fs.createWriteStream('file.txt.gz'); 148 | r.pipe(z).pipe(w); 149 | 150 | // or you can manually connect streams like this: 151 | r.on('data', (chunk) => { z.write(chunk); }); 152 | z.on('data', (chunk) => { w.write(chunk); }); 153 | ``` 154 | 155 | In OO's viewpoint, a stream node is an object, with a method to receive chunks, and write to downstream inside callbacks, and that's it. This pattern has some drawbacks: 156 | 157 | + Stream node somehow lost its control, e.g. you can't stop the stream processing in a middle node without touching the source. This is the *Inversion of Control* problem of all callback-based APIs. 158 | + Stream node now became a mutable stateful object, which is unnatural in Haskell. 159 | 160 | ## Introduce the BIO 161 | 162 | In [Z-IO](https://hackage.haskell.org/package/Z-IO) v0.8, we introduce a new `BIO` type to simplified streaming processing with three design goals: 163 | 164 | + Simple composable types. 165 | + No transformer, no lift. 166 | + Easier to be used for writing both processors and applications. 167 | 168 | The result is a type focusing on *callback transformation*: 169 | 170 | ```haskell 171 | -- A bio node receives a callback, returns a new callback to be called from upstream. 172 | type BIO inp out = (Maybe out -> IO ()) -> Maybe inp -> IO () 173 | 174 | -- A Source doesn't consume any meaningful input 175 | type Source a = BIO Void a 176 | -- A Sink doesn't produce any meaningful output 177 | type Sink a = BIO a Void 178 | 179 | -- | A pattern for more meaningful matching. 180 | pattern EOF :: Maybe a 181 | pattern EOF = Nothing 182 | ``` 183 | 184 | For example to implemented a [zlib](https://zlib.net/) node with BIO: 185 | 186 | ```haskell 187 | compressBIO :: ZStream -> BIO V.Bytes V.Bytes 188 | compressBIO zs = \ callback mbs -> 189 | case mbs of 190 | Just bs -> do 191 | -- feed input chunk to ZStream 192 | set_avail_in zs bs (V.length bs) 193 | let loop = do 194 | oavail :: CUInt <- withCPtr zs $ \ ps -> do 195 | -- perform deflate and peek output buffer remaining 196 | throwZlibIfMinus_ (deflate ps (#const Z_NO_FLUSH)) 197 | (#peek struct z_stream_s, avail_out) ps 198 | when (oavail == 0) $ do 199 | -- when output buffer is full, 200 | -- freeze chunk and call the callback 201 | oarr <- A.unsafeFreezeArr =<< readIORef bufRef 202 | callback (Just (V.PrimVector oarr 0 bufSiz)) 203 | newOutBuffer 204 | loop 205 | loop 206 | _ -> ... similar to above, with no input chunk and Z_FINISH flag 207 | ``` 208 | 209 | When implemented a `Source`, you just ignore the `EOF` param, and call the callback once a new chunk is ready. 210 | 211 | ```haskell 212 | -- | Turn a `IO` action into 'Source' 213 | sourceFromIO :: HasCallStack => IO (Maybe a) -> Source a 214 | sourceFromIO io = \ k _ -> 215 | let loop = io >>= \ x -> 216 | case x of 217 | Just _ -> k x >> loop -- you should loop inside a Source 218 | _ -> k EOF 219 | in loop 220 | ``` 221 | 222 | You should assume the `EOF` param is only given once, so a loop is often needed. Similar to `Source`, a `Sink` doesn't need to write any output until the final `EOF`: 223 | 224 | ```haskell 225 | sinkToIO :: HasCallStack => (a -> IO ()) -> Sink a 226 | sinkToIO f = \ k ma -> 227 | case ma of 228 | Just a -> f a 229 | _ -> k EOF 230 | ``` 231 | 232 | ### Composing BIO and running 233 | 234 | Composing BIO is simple: you can use `(.)` the function composition operator to connect BIOs, since it's just a callback transformation: 235 | 236 | ```haskell 237 | import Z.Data.CBytes (CBytes) 238 | import Z.IO 239 | import Z.IO.BIO 240 | import Z.IO.BIO.Zlib 241 | 242 | base64AndCompressFile :: HasCallStack => CBytes -> CBytes -> IO () 243 | base64AndCompressFile origin target = do 244 | base64Enc <- newBase64Encoder 245 | (_, zlibCompressor) <- newCompress defaultCompressConfig{compressWindowBits = 31} 246 | 247 | withResource (initSourceFromFile origin) $ \ src -> 248 | withResource (initSinkToFile target) $ \ sink -> 249 | runBIO_ $ src . base64Enc . zlibCompressor . sink 250 | ``` 251 | 252 | Above code is similar to command line `cat origin | base | gzip > target`, and `runBIO_` is defined simply as: 253 | 254 | ```haskell 255 | -- | Discards a value, used as the callback to `Sink`. 256 | discard :: a -> IO () 257 | discard _ = return () 258 | 259 | runBIO_ :: HasCallStack => BIO inp out -> IO () 260 | runBIO_ bio = bio discard EOF 261 | ``` 262 | 263 | ### Conclusion 264 | 265 | There're many streaming libraries on hackage, and most of them are designed around the free monad pattern. In `Z-IO` we introduced a new simpler design around callback transformation, which is much easier to use for writing both stream processors and applications. Of course, nothing is silver bullets. The `BIO` type in `Z-IO` also has limitations, for example, the source can not be paused by a downstream processor without using some IO state, and the whole state management now relies on IO, rather than user-supplied state monads. 266 | -------------------------------------------------------------------------------- /_sass/custom/custom.scss: -------------------------------------------------------------------------------- 1 | .site-header { 2 | position: relative; 3 | padding-left: 60px; 4 | } 5 | .site-header:before { 6 | content: ""; 7 | width: 60px; 8 | height: 60px; 9 | position: absolute; 10 | left: 0; 11 | top: 0; 12 | background-image: url("https://avatars.githubusercontent.com/u/38765559?s=200&v=4"); 13 | background-size: contain; 14 | } 15 | div.highlight { 16 | line-height: 1.4em; 17 | } 18 | .main-content h1, 19 | .main-content h2, 20 | .main-content h3, 21 | .main-content h4, 22 | .main-content h5, 23 | .main-content h6 { 24 | margin-top: 1em; 25 | } 26 | -------------------------------------------------------------------------------- /benchmarks.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Benchmarks 4 | nav_order: 4 5 | --- 6 | 7 | ## Table of contents 8 | {: .no_toc .text-delta } 9 | 10 | 1. TOC 11 | {:toc} 12 | 13 | Benchmarks' code are available on [GitHub](https://github.com/ZHaskell/benchmarks). 14 | 15 | Note benchmarks only record certain aspect of the code and the setup environment, they may not reflect real-world use-case. Any patches to improve the simulation as practical as possible are welcomed. 16 | 17 | # JSON performance 18 | 19 | This benchmark compared [JSON module in Z-Data](https://hackage.haskell.org/package/Z-Data/docs/Z-Data-JSON.html) with [aeson](https://hackage.haskell.org/package/aeson), a widely used JSON package on hackage. See our analysis in [this blog post](/performance/2021/02/01/High-performance-JSON-codec.html). 20 | 21 | ![bench-result](https://github.com/ZHaskell/benchmarks/blob/master/json-benchmark/json-benchmark-result.png?raw=true) 22 | 23 | # TCP performance 24 | 25 | This benchmark compared different redis PING-PONG server implementations, using `redis-benchmark` tool from redis package running `redis-benchmark -p 8888 -t ping -n 100000 -q -c 100`. 26 | 27 | ``` 28 | # Haskell's network package 29 | # cabal run redis-benchmark-base -- +RTS -N4 -H2G 30 | PING_INLINE: 88105.73 requests per second 31 | PING_BULK: 87873.46 requests per second 32 | 33 | # Z-IO from Z.Haskell 34 | # cabal run redis-benchmark-z -- +RTS -N4 -H2G 35 | PING_INLINE: 99800.40 requests per second 36 | PING_BULK: 102459.02 requests per second 37 | 38 | # Golang standard lib 39 | PING_INLINE: 98716.68 requests per second 40 | PING_BULK: 101522.84 requests per second 41 | 42 | # Rust mio 43 | PING_INLINE: 111731.84 requests per second 44 | PING_BULK: 112612.61 requests per second 45 | 46 | # C libuv 47 | PING_INLINE: 109170.30 requests per second 48 | PING_BULK: 105374.08 requests per second 49 | ``` 50 | 51 | Note both mio and libuv using a single thread event loop and a shared buffer to receive `redis-benchmark`'s messages between different connection, which is quite different from other lightweight thread based implementations. 52 | 53 | GHC also provides memory statistics: 54 | 55 | ``` 56 | # Haskell's network package 57 | # cabal run redis-benchmark-base -- +RTS -N4 -s -H2G 58 | 3,751,313,096 bytes allocated in the heap 59 | 302,793,568 bytes copied during GC 60 | 1,869,864 bytes maximum residency (1044 sample(s)) 61 | 490,016 bytes maximum slop 62 | 2085 MiB total memory in use (0 MB lost due to fragmentation) 63 | 64 | Tot time (elapsed) Avg pause Max pause 65 | Gen 0 2085 colls, 2085 par 0.455s 0.131s 0.0001s 0.0085s 66 | Gen 1 1044 colls, 1043 par 0.419s 0.149s 0.0001s 0.0226s 67 | 68 | Parallel GC work balance: 82.10% (serial 0%, perfect 100%) 69 | 70 | TASKS: 10 (1 bound, 9 peak workers (9 total), using -N4) 71 | 72 | SPARKS: 0 (0 converted, 0 overflowed, 0 dud, 0 GC'd, 0 fizzled) 73 | 74 | INIT time 0.001s ( 0.001s elapsed) 75 | MUT time 5.360s ( 5.842s elapsed) 76 | GC time 0.874s ( 0.280s elapsed) 77 | EXIT time 0.001s ( 0.008s elapsed) 78 | Total time 6.236s ( 6.130s elapsed) 79 | 80 | Alloc rate 699,915,737 bytes per MUT second 81 | 82 | Productivity 85.9% of total user, 95.3% of total elapsed 83 | 84 | # Z-IO from Z.Haskell 85 | # cabal run redis-benchmark-z -- +RTS -N4 -s -H2G 86 | 280,828,448 bytes allocated in the heap 87 | 835,688 bytes copied during GC 88 | 3,375,112 bytes maximum residency (4 sample(s)) 89 | 839,672 bytes maximum slop 90 | 2084 MiB total memory in use (0 MB lost due to fragmentation) 91 | 92 | Tot time (elapsed) Avg pause Max pause 93 | Gen 0 5 colls, 5 par 0.009s 0.008s 0.0015s 0.0073s 94 | Gen 1 4 colls, 3 par 0.023s 0.021s 0.0052s 0.0194s 95 | 96 | Parallel GC work balance: 82.81% (serial 0%, perfect 100%) 97 | 98 | TASKS: 14 (1 bound, 13 peak workers (13 total), using -N4) 99 | 100 | SPARKS: 0 (0 converted, 0 overflowed, 0 dud, 0 GC'd, 0 fizzled) 101 | 102 | INIT time 0.001s ( 0.001s elapsed) 103 | MUT time 2.811s ( 6.757s elapsed) 104 | GC time 0.032s ( 0.028s elapsed) 105 | EXIT time 0.002s ( 0.004s elapsed) 106 | Total time 2.846s ( 6.790s elapsed) 107 | 108 | Alloc rate 99,903,441 bytes per MUT second 109 | 110 | Productivity 98.8% of total user, 99.5% of total elapsed 111 | ``` 112 | -------------------------------------------------------------------------------- /blog.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Blog 4 | --- 5 | 6 |
    7 | {% for post in site.posts %} 8 |
  • 9 |

    {{ post.title }}

    10 |

    {{ post.date }} by {{ post.author }}

    11 | {{ post.excerpt }} 12 |
  • 13 | {% endfor %} 14 |
15 | -------------------------------------------------------------------------------- /guide.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Guide 4 | nav_order: 1 5 | permalink: /guide 6 | --- 7 | 8 | ## Table of contents 9 | {: .no_toc .text-delta } 10 | 11 | 1. TOC 12 | {:toc} 13 | 14 | ## Requirements 15 | 16 | You need a working Haskell compiler system: GHC(>={{site.data.version.ghc_version}}), cabal-install(>={{site.data.version.cabal_version}}). There are several choices: 17 | 18 | + Use the package manager on your operating system if available: 19 | 20 | * Mac users can get them via [homebew](//brew.sh/): `brew install ghc cabal-install`. 21 | * Windows users can get them via [chocolatey](//chocolatey.org): `choco install ghc cabal`. 22 | * Ubuntu users are recommended to use this [ppa](//launchpad.net/~hvr/+archive/ubuntu/ghc). 23 | 24 | + Setup via [ghcup](https://www.haskell.org/ghcup/). 25 | 26 | + Download pre-built binaries([GHC](https://www.haskell.org/ghc/download.html), [cabal-install](https://www.haskell.org/cabal/download.html)) and install manually. 27 | 28 | ## Installation 29 | 30 | To use [Z-Data](https://hackage.haskell.org/package/Z-Data) package as an example. Add the following lines to your project's cabal file: 31 | 32 | ``` 33 | ... 34 | build-depends: Z-Data == {{site.data.version.z_version}}.* 35 | ``` 36 | 37 | Now run `cabal build` within your project directory, cabal should be able to download [Z-Data](https://hackage.haskell.org/package/Z-Data) dependency automatically. Let's write a simple TCP echo server just for teaching purpose: 38 | 39 | 1. Initialize a project with `cabal`. 40 | 41 | ``` 42 | mkdir tcp-echo 43 | cd tcp-echo 44 | cabal init -i 45 | ``` 46 | 47 | `cabal` will ask you some questions about your project and create a `tcp-echo.cabal` file. 48 | 49 | 2. Add dependencies. 50 | 51 | Now open the` tcp-echo.cabal` file with a text editor, and add the following lines under the `executable` section: 52 | 53 | ``` 54 | ... 55 | build-depends: Z-IO == {{site.data.version.z_version}}.* 56 | ``` 57 | 58 | 3. Edit code. 59 | 60 | Open `src/Main.hs` and add a simple echo TCP server: 61 | 62 | ```haskell 63 | import Control.Monad 64 | import Z.IO 65 | import Z.IO.Network 66 | 67 | main :: IO () 68 | main = do 69 | let addr = SocketAddrIPv4 ipv4Loopback 8080 70 | startTCPServer defaultTCPServerConfig{ tcpListenAddr = addr } $ \ tcp -> do 71 | i <- newBufferedInput tcp 72 | o <- newBufferedOutput tcp 73 | forever $ readBuffer i >>= writeBuffer o >> flushBuffer o 74 | ``` 75 | 76 | 4. Build! 77 | 78 | Ensure that you have run `cabal update` to get the latest package list. `cabal build` will start to download dependencies and build your project. You may see output like this: 79 | 80 | ``` 81 | Resolving dependencies... 82 | Build profile: -w ghc-{{site.data.version.ghc_version}} -O1 83 | In order, the following will be built (use -v for more details): 84 | - Z-IO-{{site.data.version.z_version}}.0.0 (lib) (requires download & build) 85 | - tcp-echo-0.1.0.0 (exe:tcp-echo) (first run) 86 | Downloaded Z-IO-{{site.data.version.z_version}}.0.0 87 | Starting Z-IO-{{site.data.version.z_version}}.0.0 (lib) 88 | Building Z-IO-{{site.data.version.z_version}}.0.0 (lib) 89 | ... 90 | ``` 91 | 92 | It may take a while to build for the first time because cabal needs to download and build all the dependencies. Build afterward will be faster since dependencies are cached. For reference, on an intel 4th gen core, it takes around 10mins to compile Z-Data and Z-IO. So sit back and relax, or go for a coffee. 93 | 94 | After building complete, you can use `cabal run` to run your echo server and `nc 0.0.0.0 8080` to test it. That's it, happy hacking! 95 | -------------------------------------------------------------------------------- /haddock.inject.utterances.via.mathjax.js: -------------------------------------------------------------------------------- 1 | /* 2 | * /MathJax.js 3 | * 4 | * Copyright (c) 2009-2016 The MathJax Consortium 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | if(document.getElementById&&document.childNodes&&document.createElement){if(!(window.MathJax&&MathJax.Hub)){if(window.MathJax){window.MathJax={AuthorConfig:window.MathJax}}else{window.MathJax={}}MathJax.isPacked=true;MathJax.version="2.7.0";MathJax.fileversion="2.7.0";MathJax.cdnVersion="2.7.0";MathJax.cdnFileVersions={};(function(d){var b=window[d];if(!b){b=window[d]={}}var e=[];var c=function(f){var g=f.constructor;if(!g){g=function(){}}for(var h in f){if(h!=="constructor"&&f.hasOwnProperty(h)){g[h]=f[h]}}return g};var a=function(){return function(){return arguments.callee.Init.call(this,arguments)}};b.Object=c({constructor:a(),Subclass:function(f,h){var g=a();g.SUPER=this;g.Init=this.Init;g.Subclass=this.Subclass;g.Augment=this.Augment;g.protoFunction=this.protoFunction;g.can=this.can;g.has=this.has;g.isa=this.isa;g.prototype=new this(e);g.prototype.constructor=g;g.Augment(f,h);return g},Init:function(f){var g=this;if(f.length===1&&f[0]===e){return g}if(!(g instanceof f.callee)){g=new f.callee(e)}return g.Init.apply(g,f)||g},Augment:function(f,g){var h;if(f!=null){for(h in f){if(f.hasOwnProperty(h)){this.protoFunction(h,f[h])}}if(f.toString!==this.prototype.toString&&f.toString!=={}.toString){this.protoFunction("toString",f.toString)}}if(g!=null){for(h in g){if(g.hasOwnProperty(h)){this[h]=g[h]}}}return this},protoFunction:function(g,f){this.prototype[g]=f;if(typeof f==="function"){f.SUPER=this.SUPER.prototype}},prototype:{Init:function(){},SUPER:function(f){return f.callee.SUPER},can:function(f){return typeof(this[f])==="function"},has:function(f){return typeof(this[f])!=="undefined"},isa:function(f){return(f instanceof Object)&&(this instanceof f)}},can:function(f){return this.prototype.can.call(this,f)},has:function(f){return this.prototype.has.call(this,f)},isa:function(g){var f=this;while(f){if(f===g){return true}else{f=f.SUPER}}return false},SimpleSUPER:c({constructor:function(f){return this.SimpleSUPER.define(f)},define:function(f){var h={};if(f!=null){for(var g in f){if(f.hasOwnProperty(g)){h[g]=this.wrap(g,f[g])}}if(f.toString!==this.prototype.toString&&f.toString!=={}.toString){h.toString=this.wrap("toString",f.toString)}}return h},wrap:function(i,h){if(typeof(h)!=="function"||!h.toString().match(/\.\s*SUPER\s*\(/)){return h}var g=function(){this.SUPER=g.SUPER[i];try{var f=h.apply(this,arguments)}catch(j){delete this.SUPER;throw j}delete this.SUPER;return f};g.toString=function(){return h.toString.apply(h,arguments)};return g}})});b.Object.isArray=Array.isArray||function(f){return Object.prototype.toString.call(f)==="[object Array]"};b.Object.Array=Array})("MathJax");(function(BASENAME){var BASE=window[BASENAME];if(!BASE){BASE=window[BASENAME]={}}var isArray=BASE.Object.isArray;var CALLBACK=function(data){var cb=function(){return arguments.callee.execute.apply(arguments.callee,arguments)};for(var id in CALLBACK.prototype){if(CALLBACK.prototype.hasOwnProperty(id)){if(typeof(data[id])!=="undefined"){cb[id]=data[id]}else{cb[id]=CALLBACK.prototype[id]}}}cb.toString=CALLBACK.prototype.toString;return cb};CALLBACK.prototype={isCallback:true,hook:function(){},data:[],object:window,execute:function(){if(!this.called||this.autoReset){this.called=!this.autoReset;return this.hook.apply(this.object,this.data.concat([].slice.call(arguments,0)))}},reset:function(){delete this.called},toString:function(){return this.hook.toString.apply(this.hook,arguments)}};var ISCALLBACK=function(f){return(typeof(f)==="function"&&f.isCallback)};var EVAL=function(code){return eval.call(window,code)};var TESTEVAL=function(){EVAL("var __TeSt_VaR__ = 1");if(window.__TeSt_VaR__){try{delete window.__TeSt_VaR__}catch(error){window.__TeSt_VaR__=null}}else{if(window.execScript){EVAL=function(code){BASE.__code=code;code="try {"+BASENAME+".__result = eval("+BASENAME+".__code)} catch(err) {"+BASENAME+".__result = err}";window.execScript(code);var result=BASE.__result;delete BASE.__result;delete BASE.__code;if(result instanceof Error){throw result}return result}}else{EVAL=function(code){BASE.__code=code;code="try {"+BASENAME+".__result = eval("+BASENAME+".__code)} catch(err) {"+BASENAME+".__result = err}";var head=(document.getElementsByTagName("head"))[0];if(!head){head=document.body}var script=document.createElement("script");script.appendChild(document.createTextNode(code));head.appendChild(script);head.removeChild(script);var result=BASE.__result;delete BASE.__result;delete BASE.__code;if(result instanceof Error){throw result}return result}}}TESTEVAL=null};var USING=function(args,i){if(arguments.length>1){if(arguments.length===2&&!(typeof arguments[0]==="function")&&arguments[0] instanceof Object&&typeof arguments[1]==="number"){args=[].slice.call(args,i)}else{args=[].slice.call(arguments,0)}}if(isArray(args)&&args.length===1){args=args[0]}if(typeof args==="function"){if(args.execute===CALLBACK.prototype.execute){return args}return CALLBACK({hook:args})}else{if(isArray(args)){if(typeof(args[0])==="string"&&args[1] instanceof Object&&typeof args[1][args[0]]==="function"){return CALLBACK({hook:args[1][args[0]],object:args[1],data:args.slice(2)})}else{if(typeof args[0]==="function"){return CALLBACK({hook:args[0],data:args.slice(1)})}else{if(typeof args[1]==="function"){return CALLBACK({hook:args[1],object:args[0],data:args.slice(2)})}}}}else{if(typeof(args)==="string"){if(TESTEVAL){TESTEVAL()}return CALLBACK({hook:EVAL,data:[args]})}else{if(args instanceof Object){return CALLBACK(args)}else{if(typeof(args)==="undefined"){return CALLBACK({})}}}}}throw Error("Can't make callback from given data")};var DELAY=function(time,callback){callback=USING(callback);callback.timeout=setTimeout(callback,time);return callback};var WAITFOR=function(callback,signal){callback=USING(callback);if(!callback.called){WAITSIGNAL(callback,signal);signal.pending++}};var WAITEXECUTE=function(){var signals=this.signal;delete this.signal;this.execute=this.oldExecute;delete this.oldExecute;var result=this.execute.apply(this,arguments);if(ISCALLBACK(result)&&!result.called){WAITSIGNAL(result,signals)}else{for(var i=0,m=signals.length;i0&&priority=0;i--){this.hooks.splice(i,1)}this.remove=[]}});var EXECUTEHOOKS=function(hooks,data,reset){if(!hooks){return null}if(!isArray(hooks)){hooks=[hooks]}if(!isArray(data)){data=(data==null?[]:[data])}var handler=HOOKS(reset);for(var i=0,m=hooks.length;ig){g=document.styleSheets.length}if(!i){i=document.head||((document.getElementsByTagName("head"))[0]);if(!i){i=document.body}}return i};var f=[];var c=function(){for(var k=0,j=f.length;k=this.timeout){i(this.STATUS.ERROR);return 1}return 0},file:function(j,i){if(i<0){a.Ajax.loadTimeout(j)}else{a.Ajax.loadComplete(j)}},execute:function(){this.hook.call(this.object,this,this.data[0],this.data[1])},checkSafari2:function(i,j,k){if(i.time(k)){return}if(document.styleSheets.length>j&&document.styleSheets[j].cssRules&&document.styleSheets[j].cssRules.length){k(i.STATUS.OK)}else{setTimeout(i,i.delay)}},checkLength:function(i,l,n){if(i.time(n)){return}var m=0;var j=(l.sheet||l.styleSheet);try{if((j.cssRules||j.rules||[]).length>0){m=1}}catch(k){if(k.message.match(/protected variable|restricted URI/)){m=1}else{if(k.message.match(/Security error/)){m=1}}}if(m){setTimeout(a.Callback([n,i.STATUS.OK]),0)}else{setTimeout(i,i.delay)}}},loadComplete:function(i){i=this.fileURL(i);var j=this.loading[i];if(j&&!j.preloaded){a.Message.Clear(j.message);clearTimeout(j.timeout);if(j.script){if(f.length===0){setTimeout(c,0)}f.push(j.script)}this.loaded[i]=j.status;delete this.loading[i];this.addHook(i,j.callback)}else{if(j){delete this.loading[i]}this.loaded[i]=this.STATUS.OK;j={status:this.STATUS.OK}}if(!this.loadHooks[i]){return null}return this.loadHooks[i].Execute(j.status)},loadTimeout:function(i){if(this.loading[i].timeout){clearTimeout(this.loading[i].timeout)}this.loading[i].status=this.STATUS.ERROR;this.loadError(i);this.loadComplete(i)},loadError:function(i){a.Message.Set(["LoadFailed","File failed to load: %1",i],null,2000);a.Hub.signal.Post(["file load error",i])},Styles:function(k,l){var i=this.StyleString(k);if(i===""){l=a.Callback(l);l()}else{var j=document.createElement("style");j.type="text/css";this.head=h(this.head);this.head.appendChild(j);if(j.styleSheet&&typeof(j.styleSheet.cssText)!=="undefined"){j.styleSheet.cssText=i}else{j.appendChild(document.createTextNode(i))}l=this.timer.create.call(this,l,j)}return l},StyleString:function(n){if(typeof(n)==="string"){return n}var k="",o,m;for(o in n){if(n.hasOwnProperty(o)){if(typeof n[o]==="string"){k+=o+" {"+n[o]+"}\n"}else{if(a.Object.isArray(n[o])){for(var l=0;l="0"&&q<="9"){f[j]=p[f[j]-1];if(typeof f[j]==="number"){f[j]=this.number(f[j])}}else{if(q==="{"){q=f[j].substr(1);if(q>="0"&&q<="9"){f[j]=p[f[j].substr(1,f[j].length-2)-1];if(typeof f[j]==="number"){f[j]=this.number(f[j])}}else{var k=f[j].match(/^\{([a-z]+):%(\d+)\|(.*)\}$/);if(k){if(k[1]==="plural"){var d=p[k[2]-1];if(typeof d==="undefined"){f[j]="???"}else{d=this.plural(d)-1;var h=k[3].replace(/(^|[^%])(%%)*%\|/g,"$1$2%\uEFEF").split(/\|/);if(d>=0&&d=3){c.push([f[0],f[1],this.processSnippet(g,f[2])])}else{c.push(e[d])}}}}else{c.push(e[d])}}return c},markdownPattern:/(%.)|(\*{1,3})((?:%.|.)+?)\2|(`+)((?:%.|.)+?)\4|\[((?:%.|.)+?)\]\(([^\s\)]+)\)/,processMarkdown:function(b,h,d){var j=[],e;var c=b.split(this.markdownPattern);var g=c[0];for(var f=1,a=c.length;f1?d[1]:""));f=null}if(e&&(!b.preJax||d)){c.nodeValue=c.nodeValue.replace(b.postJax,(e.length>1?e[1]:""))}if(f&&!f.nodeValue.match(/\S/)){f=f.previousSibling}}if(b.preRemoveClass&&f&&f.className===b.preRemoveClass){a.MathJax.preview=f}a.MathJax.checked=1},processInput:function(a){var b,i=MathJax.ElementJax.STATE;var h,e,d=a.scripts.length;try{while(a.ithis.processUpdateTime&&a.i1){d.jax[a.outputJax].push(b)}b.MathJax.state=c.OUTPUT},prepareOutput:function(c,f){while(c.jthis.processUpdateTime&&h.i=0;q--){if((b[q].src||"").match(f)){s.script=b[q].innerHTML;if(RegExp.$2){var t=RegExp.$2.substr(1).split(/\&/);for(var p=0,l=t.length;p=parseInt(y[z])}}return true},Select:function(j){var i=j[d.Browser];if(i){return i(d.Browser)}return null}};var e=k.replace(/^Mozilla\/(\d+\.)+\d+ /,"").replace(/[a-z][-a-z0-9._: ]+\/\d+[^ ]*-[^ ]*\.([a-z][a-z])?\d+ /i,"").replace(/Gentoo |Ubuntu\/(\d+\.)*\d+ (\([^)]*\) )?/,"");d.Browser=d.Insert(d.Insert(new String("Unknown"),{version:"0.0"}),a);for(var v in a){if(a.hasOwnProperty(v)){if(a[v]&&v.substr(0,2)==="is"){v=v.slice(2);if(v==="Mac"||v==="PC"){continue}d.Browser=d.Insert(new String(v),a);var r=new RegExp(".*(Version/| Trident/.*; rv:)((?:\\d+\\.)+\\d+)|.*("+v+")"+(v=="MSIE"?" ":"/")+"((?:\\d+\\.)*\\d+)|(?:^|\\(| )([a-z][-a-z0-9._: ]+|(?:Apple)?WebKit)/((?:\\d+\\.)+\\d+)");var u=r.exec(e)||["","","","unknown","0.0"];d.Browser.name=(u[1]!=""?v:(u[3]||u[5]));d.Browser.version=u[2]||u[4]||u[6];break}}}try{d.Browser.Select({Safari:function(j){var i=parseInt((String(j.version).split("."))[0]);if(i>85){j.webkit=j.version}if(i>=538){j.version="8.0"}else{if(i>=537){j.version="7.0"}else{if(i>=536){j.version="6.0"}else{if(i>=534){j.version="5.1"}else{if(i>=533){j.version="5.0"}else{if(i>=526){j.version="4.0"}else{if(i>=525){j.version="3.1"}else{if(i>500){j.version="3.0"}else{if(i>400){j.version="2.0"}else{if(i>85){j.version="1.0"}}}}}}}}}}j.webkit=(navigator.appVersion.match(/WebKit\/(\d+)\./))[1];j.isMobile=(navigator.appVersion.match(/Mobile/i)!=null);j.noContextMenu=j.isMobile},Firefox:function(j){if((j.version==="0.0"||k.match(/Firefox/)==null)&&navigator.product==="Gecko"){var m=k.match(/[\/ ]rv:(\d+\.\d.*?)[\) ]/);if(m){j.version=m[1]}else{var i=(navigator.buildID||navigator.productSub||"0").substr(0,8);if(i>="20111220"){j.version="9.0"}else{if(i>="20111120"){j.version="8.0"}else{if(i>="20110927"){j.version="7.0"}else{if(i>="20110816"){j.version="6.0"}else{if(i>="20110621"){j.version="5.0"}else{if(i>="20110320"){j.version="4.0"}else{if(i>="20100121"){j.version="3.6"}else{if(i>="20090630"){j.version="3.5"}else{if(i>="20080617"){j.version="3.0"}else{if(i>="20061024"){j.version="2.0"}}}}}}}}}}}}j.isMobile=(navigator.appVersion.match(/Android/i)!=null||k.match(/ Fennec\//)!=null||k.match(/Mobile/)!=null)},Chrome:function(i){i.noContextMenu=i.isMobile=!!navigator.userAgent.match(/ Mobile[ \/]/)},Opera:function(i){i.version=opera.version()},Edge:function(i){i.isMobile=!!navigator.userAgent.match(/ Phone/)},MSIE:function(j){j.isMobile=!!navigator.userAgent.match(/ Phone/);j.isIE9=!!(document.documentMode&&(window.performance||window.msPerformance));MathJax.HTML.setScriptBug=!j.isIE9||document.documentMode<9;MathJax.Hub.msieHTMLCollectionBug=(document.documentMode<9);if(document.documentMode<10&&!s.params.NoMathPlayer){try{new ActiveXObject("MathPlayer.Factory.1");j.hasMathPlayer=true}catch(m){}try{if(j.hasMathPlayer){var i=document.createElement("object");i.id="mathplayer";i.classid="clsid:32F66A20-7614-11D4-BD11-00104BD3F987";g.appendChild(i);document.namespaces.add("m","http://www.w3.org/1998/Math/MathML");j.mpNamespace=true;if(document.readyState&&(document.readyState==="loading"||document.readyState==="interactive")){document.write('');j.mpImported=true}}else{document.namespaces.add("mjx_IE_fix","http://www.w3.org/1999/xlink")}}catch(m){}}}})}catch(c){console.error(c.message)}d.Browser.Select(MathJax.Message.browsers);if(h.AuthorConfig&&typeof h.AuthorConfig.AuthorInit==="function"){h.AuthorConfig.AuthorInit()}d.queue=h.Callback.Queue();d.queue.Push(["Post",s.signal,"Begin"],["Config",s],["Cookie",s],["Styles",s],["Message",s],function(){var i=h.Callback.Queue(s.Jax(),s.Extensions());return i.Push({})},["Menu",s],s.onLoad(),function(){MathJax.isReady=true},["Typeset",s],["Hash",s],["MenuZoom",s],["Post",s.signal,"End"])})("MathJax")}}; 20 | !function j(){window.addEventListener('load', function(){ 21 | var scriptSource = (function(scripts) { 22 | var scripts = document.getElementsByTagName('script'), 23 | script = scripts[scripts.length - 1] 24 | if (script.getAttribute.length !== undefined) { 25 | return script.src 26 | } 27 | return script.getAttribute('src', -1) 28 | }()) 29 | var query = scriptSource.split('?')[1] 30 | var params = {} 31 | query.split("&").forEach(function(part) { 32 | var item = part.split("=") 33 | params[item[0]] = decodeURIComponent(item[1]) 34 | }) 35 | var commentScript = document.createElement('script') 36 | commentScript.src="https://utteranc.es/client.js" 37 | commentScript.async=true 38 | commentScript.setAttribute('repo', params.repo) 39 | commentScript.setAttribute('issue-term', params.url || "url") 40 | commentScript.setAttribute('theme', params.theme || "github-light") 41 | commentScript.setAttribute('crossorigin',"anonymous") 42 | window.document.body.appendChild(commentScript) 43 | })}() 44 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Z.Haskell 5 | 6 | 7 | 8 | 34 | 35 | 36 | 60 |
61 | 62 |

Fast/Concise/Modern Haskell Engineering Toolkits

63 |

Z.Haskell is a set of high-performance and easy-to-use libraries written in Haskell

64 |

Providing rich APIs for building fast and reliable applications

65 | Start! 66 |
67 | 68 |
69 |
70 |

Blazing Fast

71 |

M GHC lightweight threads scale on N event loops

72 |

Serve applications at faster speed with less CPU/memory

73 |
74 |
75 |

Concise, Expressive

76 |

Written in purely functional language Haskell

77 |

Declarative coding with concise and composable types

78 |
79 |
80 |

Modern Codebase

81 |

Cross-platform IO, UTF8 based text processing

82 |

Best practices from years of engineering design

83 |
84 |
85 | 86 |
87 |
88 | 111 | 114 |
115 |
116 |
117 |

Z-IO

GitHub 118 |

Haddock

119 |
120 |
121 |

Z-Data

GitHub 122 |

Haddock

123 |
124 |
125 |

Z-Botan

GitHub 126 |

Haddock

127 |
128 |
129 |

Z-MessagePack

GitHub 130 |

Haddock

131 |
132 |
133 |

Z-YAML

GitHub 134 |

Haddock

135 |
136 |
137 |
138 | 139 | 140 | --------------------------------------------------------------------------------