├── .github
└── workflows
│ └── jekyll.yml
├── .gitignore
├── 404.html
├── CNAME
├── Gemfile
├── Gemfile.lock
├── LICENSE
├── Z-Data
├── Array.md
├── FFI.md
├── JSON.md
├── Parser-and-Builder.md
├── Vector-Bytes-Text.md
└── index.md
├── Z-IO
├── BIO-Streaming.md
├── Filesystem.md
├── Logger.md
├── Network.md
└── index.md
├── _config.yml
├── _data
└── version.yml
├── _layouts
└── post.html
├── _posts
├── 2021-02-01-High-performance-JSON-codec.md
└── 2021-04-20-introduce-BIO-a-simple-streaming-abstraction.md
├── _sass
└── custom
│ └── custom.scss
├── benchmarks.md
├── blog.md
├── guide.md
├── haddock.inject.utterances.via.mathjax.js
└── index.html
/.github/workflows/jekyll.yml:
--------------------------------------------------------------------------------
1 | # This workflow uses actions that are not certified by GitHub.
2 | # They are provided by a third-party and are governed by
3 | # separate terms of service, privacy policy, and support
4 | # documentation.
5 |
6 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages
7 | name: Deploy Jekyll site to Pages
8 |
9 | on:
10 | # Runs on pushes targeting the default branch
11 | push:
12 | branches: ["master"]
13 |
14 | # Allows you to run this workflow manually from the Actions tab
15 | workflow_dispatch:
16 |
17 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
18 | permissions:
19 | contents: read
20 | pages: write
21 | id-token: write
22 |
23 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
24 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
25 | concurrency:
26 | group: "pages"
27 | cancel-in-progress: false
28 |
29 | jobs:
30 | # Build job
31 | build:
32 | runs-on: ubuntu-latest
33 | steps:
34 | - name: Checkout
35 | uses: actions/checkout@v3
36 | - name: Setup Ruby
37 | uses: ruby/setup-ruby@55283cc23133118229fd3f97f9336ee23a179fcf # v1.146.0
38 | with:
39 | ruby-version: '3.1' # Not needed with a .ruby-version file
40 | bundler-cache: true # runs 'bundle install' and caches installed gems automatically
41 | cache-version: 0 # Increment this number if you need to re-download cached gems
42 | - name: Setup Pages
43 | id: pages
44 | uses: actions/configure-pages@v3
45 | - name: Build with Jekyll
46 | # Outputs to the './_site' directory by default
47 | run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}"
48 | env:
49 | JEKYLL_ENV: production
50 | - name: Upload artifact
51 | # Automatically uploads an artifact from the './_site' directory by default
52 | uses: actions/upload-pages-artifact@v1
53 |
54 | # Deployment job
55 | deploy:
56 | environment:
57 | name: github-pages
58 | url: ${{ steps.deployment.outputs.page_url }}
59 | runs-on: ubuntu-latest
60 | needs: build
61 | steps:
62 | - name: Deploy to GitHub Pages
63 | id: deployment
64 | uses: actions/deploy-pages@v2
65 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | dist-*
3 | cabal-dev
4 | *.o
5 | *.hi
6 | *.hie
7 | *.chi
8 | *.chs.h
9 | *.dyn_o
10 | *.dyn_hi
11 | .hpc
12 | .hsenv
13 | .cabal-sandbox/
14 | cabal.sandbox.config
15 | *.prof
16 | *.aux
17 | *.hp
18 | *.eventlog
19 | .stack-work/
20 | cabal.project.local
21 | cabal.project.local~
22 | .HTF/
23 | .ghc.environment.*
24 | .vscode/
25 | _site
26 | .sass-cache
27 |
--------------------------------------------------------------------------------
/404.html:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | ---
4 |
5 |
18 |
19 |
20 |
404
21 |
22 |
Page not found :(
23 |
The requested page could not be found.
24 |
25 |
--------------------------------------------------------------------------------
/CNAME:
--------------------------------------------------------------------------------
1 | z.haskell.world
2 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 |
3 | # Hello! This is where you manage which Jekyll version is used to run.
4 | # When you want to use a different version, change it below, save the
5 | # file and run `bundle install`. Run Jekyll with `bundle exec`, like so:
6 | #
7 | # bundle exec jekyll serve
8 | #
9 | # This will help ensure the proper Jekyll version is running.
10 | # Happy Jekylling!
11 | gem "jekyll", "~> 3.9.0"
12 |
13 | # This is the default theme for new Jekyll sites. You may change this to anything you like.
14 | gem "minima", "~> 2.0"
15 |
16 | gem "kramdown", "~> 2.3.1"
17 | gem "kramdown-parser-gfm", "~> 1.1.0"
18 |
19 | # If you want to use GitHub Pages, remove the "gem "jekyll"" above and
20 | # uncomment the line below. To upgrade, run `bundle update github-pages`.
21 | # gem "github-pages", group: :jekyll_plugins
22 |
23 | # If you have any plugins, put them here!
24 | group :jekyll_plugins do
25 | gem "jekyll-feed", "~> 0.6"
26 | end
27 |
28 | # Windows does not include zoneinfo files, so bundle the tzinfo-data gem
29 | # and associated library.
30 | install_if -> { RUBY_PLATFORM =~ %r!mingw|mswin|java! } do
31 | gem "tzinfo", "~> 1.2"
32 | gem "tzinfo-data"
33 | end
34 |
35 | # Performance-booster for watching directories on Windows
36 | gem "wdm", "~> 0.1.0", :install_if => Gem.win_platform?
37 |
38 |
39 | gem "just-the-docs", "~> 0.3.3"
40 |
--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
1 | GEM
2 | remote: https://rubygems.org/
3 | specs:
4 | addressable (2.8.0)
5 | public_suffix (>= 2.0.2, < 5.0)
6 | colorator (1.1.0)
7 | concurrent-ruby (1.1.8)
8 | em-websocket (0.5.2)
9 | eventmachine (>= 0.12.9)
10 | http_parser.rb (~> 0.6.0)
11 | eventmachine (1.2.7)
12 | ffi (1.14.2)
13 | forwardable-extended (2.6.0)
14 | http_parser.rb (0.6.0)
15 | i18n (0.9.5)
16 | concurrent-ruby (~> 1.0)
17 | jekyll (3.9.0)
18 | addressable (~> 2.4)
19 | colorator (~> 1.0)
20 | em-websocket (~> 0.5)
21 | i18n (~> 0.7)
22 | jekyll-sass-converter (~> 1.0)
23 | jekyll-watch (~> 2.0)
24 | kramdown (>= 1.17, < 3)
25 | liquid (~> 4.0)
26 | mercenary (~> 0.3.3)
27 | pathutil (~> 0.9)
28 | rouge (>= 1.7, < 4)
29 | safe_yaml (~> 1.0)
30 | jekyll-feed (0.15.1)
31 | jekyll (>= 3.7, < 5.0)
32 | jekyll-sass-converter (1.5.2)
33 | sass (~> 3.4)
34 | jekyll-seo-tag (2.7.1)
35 | jekyll (>= 3.8, < 5.0)
36 | jekyll-watch (2.2.1)
37 | listen (~> 3.0)
38 | just-the-docs (0.3.3)
39 | jekyll (>= 3.8.5)
40 | jekyll-seo-tag (~> 2.0)
41 | rake (>= 12.3.1, < 13.1.0)
42 | kramdown (2.3.1)
43 | rexml
44 | kramdown-parser-gfm (1.1.0)
45 | kramdown (~> 2.0)
46 | liquid (4.0.3)
47 | listen (3.4.1)
48 | rb-fsevent (~> 0.10, >= 0.10.3)
49 | rb-inotify (~> 0.9, >= 0.9.10)
50 | mercenary (0.3.6)
51 | minima (2.5.1)
52 | jekyll (>= 3.5, < 5.0)
53 | jekyll-feed (~> 0.9)
54 | jekyll-seo-tag (~> 2.1)
55 | pathutil (0.16.2)
56 | forwardable-extended (~> 2.6)
57 | public_suffix (4.0.6)
58 | rake (13.0.3)
59 | rb-fsevent (0.10.4)
60 | rb-inotify (0.10.1)
61 | ffi (~> 1.0)
62 | rexml (3.2.8)
63 | strscan (>= 3.0.9)
64 | rouge (3.26.0)
65 | safe_yaml (1.0.5)
66 | sass (3.7.4)
67 | sass-listen (~> 4.0.0)
68 | sass-listen (4.0.0)
69 | rb-fsevent (~> 0.9, >= 0.9.4)
70 | rb-inotify (~> 0.9, >= 0.9.7)
71 | strscan (3.1.0)
72 | thread_safe (0.3.6)
73 | tzinfo (1.2.9)
74 | thread_safe (~> 0.1)
75 | tzinfo-data (1.2021.1)
76 | tzinfo (>= 1.0.0)
77 | wdm (0.1.1)
78 |
79 | PLATFORMS
80 | ruby
81 |
82 | DEPENDENCIES
83 | jekyll (~> 3.9.0)
84 | jekyll-feed (~> 0.6)
85 | just-the-docs (~> 0.3.3)
86 | kramdown (~> 2.3.1)
87 | kramdown-parser-gfm (~> 1.1.0)
88 | minima (~> 2.0)
89 | tzinfo (~> 1.2)
90 | tzinfo-data
91 | wdm (~> 0.1.0)
92 |
93 | BUNDLED WITH
94 | 2.1.4
95 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Z-Data LICENSE
2 |
3 | Copyright (c) Z.Haskell Contributors, 2017-2022
4 |
5 | All rights reserved.
6 |
7 | Redistribution and use in source and binary forms, with or without
8 | modification, are permitted provided that the following conditions are met:
9 |
10 | * Redistributions of source code must retain the above copyright
11 | notice, this list of conditions and the following disclaimer.
12 |
13 | * Redistributions in binary form must reproduce the above
14 | copyright notice, this list of conditions and the following
15 | disclaimer in the documentation and/or other materials provided
16 | with the distribution.
17 |
18 | * Neither the name of winter nor the names of other
19 | contributors may be used to endorse or promote products derived
20 | from this software without specific prior written permission.
21 |
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 |
34 | --------------------------------------------------------------------------------
35 | utf8rewind LICENSE
36 |
37 | Copyright (C) 2014-2016 Quinten Lansu
38 | Copyright (C) 2019-2020 Dong Han
39 |
40 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
41 |
42 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
43 |
44 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
45 |
46 | --------------------------------------------------------------------------------
47 | fastvalidate-utf-8 LICENSE
48 |
49 | Daniel Lemire
50 | Kendall Willets
51 | Zach Bjornson
52 |
53 | Permission is hereby granted, free of charge, to any
54 | person obtaining a copy of this software and associated
55 | documentation files (the "Software"), to deal in the
56 | Software without restriction, including without
57 | limitation the rights to use, copy, modify, merge,
58 | publish, distribute, sublicense, and/or sell copies of
59 | the Software, and to permit persons to whom the Software
60 | is furnished to do so, subject to the following
61 | conditions:
62 |
63 | The above copyright notice and this permission notice
64 | shall be included in all copies or substantial portions
65 | of the Software.
66 |
67 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
68 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
69 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
70 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
71 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
72 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
73 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
74 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
75 | DEALINGS IN THE SOFTWARE.
76 |
77 | --------------------------------------------------------------------------------
78 | Copyright (c) 2016-2019, Powturbo
79 | All rights reserved.
80 |
81 | Redistribution and use in source and binary forms, with or without
82 | modification, are permitted provided that the following conditions are
83 | met:
84 |
85 | 1. Redistributions of source code must retain the above copyright
86 | notice, this list of conditions and the following disclaimer.
87 |
88 | 2. Redistributions in binary form must reproduce the above copyright
89 | notice, this list of conditions and the following disclaimer in the
90 | documentation and/or other materials provided with the distribution.
91 |
92 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
93 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
94 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
95 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
96 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
98 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
99 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
100 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
101 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
102 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
103 |
104 | - homepage : https://sites.google.com/site/powturbo/
105 | - github : https://github.com/powturbo
106 | - twitter : https://twitter.com/powturbo
107 | - email : powturbo [_AT_] gmail [_DOT_] com
108 |
--------------------------------------------------------------------------------
/Z-Data/Array.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | parent: Z-Data
4 | title: Array
5 | nav_order: 1
6 | ---
7 |
8 | ## Table of contents
9 | {: .no_toc .text-delta }
10 |
11 | 1. TOC
12 | {:toc}
13 |
14 | # Array in Haskell
15 |
16 | Unlike the ubiquitous linked list type `[a]`. In Haskell arrays doesn't have any built-in syntax support, or any other special compiler support excepts some built-in primitive functions, which can be found in [ghc-prim](http://hackage.haskell.org/package/ghc-prim/docs/GHC-Prim.html):
17 |
18 | ```haskell
19 | newArray# :: Int# -> a -> State# s -> (# State# s, MutableArray# s a #)
20 | readArray# :: MutableArray# s a -> Int# -> State# s -> (# State# s, a #)
21 | writeArray# :: MutableArray# s a -> Int# -> a -> State# s -> State# s
22 | newByteArray# :: Int# -> State# s -> (# State# s, MutableByteArray# s #)
23 | indexInt8Array# :: ByteArray# -> Int# -> Int#
24 | indexInt16Array# :: ByteArray# -> Int# -> Int#
25 | ...
26 | ```
27 |
28 | It's hard to directly use those functions because they directly manipulate `State#` token, and they distinguish different array types: boxed `Array#`, `ByteArray#`, etc. The `#` after those types imply they are special primitive types, which will be discussed later.
29 |
30 | In [Z-Data](https://hackage.haskell.org/package/Z-Data),we provide type wrappers and typeclass to unified array operations:
31 |
32 | ```haskell
33 | class Arr (arr :: * -> * ) a where
34 | -- | Mutable version of this array type.
35 | type MArr arr = (mar :: * -> * -> *) | mar -> arr
36 | -- | Make a new array with given size.
37 | newArr :: (PrimMonad m, PrimState m ~ s) => Int -> m (marr s a)
38 | -- | Make a new array and fill it with an initial value.
39 | newArrWith :: (PrimMonad m, PrimState m ~ s) => Int -> a -> m (marr s a)
40 | -- | Index mutable array in a primitive monad.
41 | readArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> m a
42 | -- | Write mutable array in a primitive monad.
43 | writeArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> a -> m ()
44 | -- | Fill mutable array with a given value.
45 | setArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> Int -> a -> m ()
46 | -- | Index immutable array, which is a pure operation,
47 | indexArr :: arr a -> Int -> a
48 | -- | Index immutable array in a primitive monad, this helps in situations that
49 | -- you want your indexing result is not a thunk referencing whole array.
50 | indexArrM :: (Monad m) => arr a -> Int -> m a
51 | -- | Safely freeze mutable array by make a immutable copy of its slice.
52 | freezeArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> Int -> m (arr a)
53 | -- | Safely thaw immutable array by make a mutable copy of its slice.
54 | thawArr :: (PrimMonad m, PrimState m ~ s) => arr a -> Int -> Int -> m (marr s a)
55 | -- | In place freeze a mutable array, the original mutable array can not be used
56 | -- anymore.
57 | unsafeFreezeArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> m (arr a)
58 | -- | In place thaw a immutable array, the original immutable array can not be used
59 | -- anymore.
60 | unsafeThawArr :: (PrimMonad m, PrimState m ~ s) => arr a -> m (marr s a)
61 | -- | Copy a slice of immutable array to mutable array at given offset.
62 | copyArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> arr a -> Int -> Int -> m ()
63 | -- | Copy a slice of mutable array to mutable array at given offset.
64 | -- The two mutable arrays shall no be the same one.
65 | copyMutableArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> marr s a -> Int -> Int -> m ()
66 | -- | Copy a slice of mutable array to mutable array at given offset.
67 | -- The two mutable arrays may be the same one.
68 | moveArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> marr s a -> Int -> Int -> m ()
69 | -- | Create immutable copy.
70 | cloneArr :: arr a -> Int -> Int -> arr a
71 | -- | Create mutable copy.
72 | cloneMutableArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> Int -> m (marr s a)
73 | -- | Resize mutable array to given size.
74 | resizeMutableArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> m (marr s a)
75 | -- | Shrink mutable array to given size. This operation only works on primitive arrays.
76 | -- For boxed array, this is a no-op, e.g. 'sizeOfMutableArr' will not change.
77 | shrinkMutableArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> Int -> m ()
78 | -- | Is two mutable array are reference equal.
79 | sameMutableArr :: marr s a -> marr s a -> Bool
80 | -- | Size of immutable array.
81 | sizeofArr :: arr a -> Int
82 | -- | Size of mutable array.
83 | sizeofMutableArr :: (PrimMonad m, PrimState m ~ s) => marr s a -> m Int
84 | -- | Is two immutable array are referencing the same one.
85 | sameArr :: arr a -> arr a -> Bool
86 | ```
87 |
88 | And we have following instances:
89 |
90 | ```haskell
91 | -- | Boxed array type, for holding Haskell ADTs.
92 | instance Arr Array a where
93 | type MArr Array = MutableArray
94 | ...
95 | -- | Boxed array type, for holding Haskell ADTs, but doesn't carry a card table.
96 | instance Arr SmallArray a where
97 | type MArr SmallArray = SmallMutableArray
98 | ...
99 | -- | Unboxed array type, for holding primitive types like Int, Word8, etc.
100 | instance Prim a => Arr PrimArray a where
101 | type MArr PrimArray = MutablePrimArray
102 | ...
103 | -- | Boxed array type, for holding boxed unlifted types, see following section.
104 | instance PrimUnlifted a => Arr UnliftedArray a where
105 | type MArr UnliftedArray = MutableUnliftedArray
106 | ...
107 | ```
108 |
109 | If you know how `IO` works in Haskell, `PrimMonad` simply means `ST` or `IO`. But if you get confused by the `PrimMonad` constraint, please get [more details here](https://wiki.haskell.org/IO_inside).
110 |
111 | # Boxed, Unboxed
112 |
113 | For many Haskellers, using arrays may be the first time one wants to know what's the difference between boxed, unboxed types. It's important to spend some time explaining these buzzwords.
114 |
115 | In other languages, you often have to distinguish *reference* and *value*. For example, in C pointers are references to other objects. It's a memory location in hardware sense: you can use machine code to follow a reference to the memory it pointing to. While the other non-pointer types value are not memory locations, their 1-0 arrangements stands for a certain value of that type.
116 |
117 | In Haskell almost every value you see is a pointer from C's perspective, i.e. a memory location point to a heap object, for example a data type like:
118 |
119 | ```haskell
120 | data Foo = Foo Int Char
121 | foo = Foo 3 'a'
122 | ```
123 |
124 | Are represented as:
125 |
126 | ```
127 | foo(from registers or other boxes)
128 | |
129 | V
130 | +----+--------+---+---+ +-------------+------+
131 | | info-table* | * | * +--->+ info-table* | 'a'# |
132 | +-------------+-+-+---+ +-------------+------+
133 | Foo | C# (Char's constructor)
134 | V
135 | +---+---------+----+
136 | | info-table* | 3# |
137 | +-------------+----+
138 | I# (Int's constructor)
139 | ```
140 |
141 | During runtime the value `foo` is a reference, and all the operations, e.g. pattern match, go through dereferencing. Values like this are called *boxed* because it's a reference to a box, i.e. heap objects with [info-table](https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/rts/storage/heap-objects#info-tables). The info-table contains many useful infomation about the box, such as how many words the boxed occupied, which constructor the box stand for, etc.
142 |
143 | The `3#` and `'a'#` above are Haskell's non-pointer value, we call values like this *unboxed* values. Unboxed values don't have info-tables, so we really can't have them directly on heap: otherwise the GC would get confused when it scans them: without infomation from info-table, it can't decide how many bytes to copy. These values are usually belong to registers or other boxes: we generate machine code to manipulate them directly.
144 |
145 |
146 | ## Boxed array
147 |
148 | Now let's consider GHC arrays, they're special heap objects provided by RTS. We have boxed arrays `MutableArray#` and `Array#` that store references to boxes:
149 |
150 | ```
151 | +-------------+--------------+---------------------------+---+-...-+---+---+------------+
152 | | info-table* | payload size | payload + card-table size | * | ... | * | * | card table |
153 | +-------------+--------------+---------------------------+-+-+-...-+---+---+------------+
154 | MutableArray# |
155 | Array# V
156 | +------+------+-----+
157 | | info-table* | ... |
158 | +-------------+-----+
159 | Boxes, maybe a thunk
160 | Most of the operations on boxed array
161 | are lazy on its element
162 | ```
163 |
164 | It looks quite complicated, especially the card-table part, which is used to [optimize the GC for arrays](https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/rts/storage/gc/remembered-sets). `MutableArray#`s are always kept in a generation's mutable list once it's promoted to that generation, so this optimization is important if you keep a large mutable array on heap for a long time. For small arrays, it's unnecessary to use a card-table, and GHC provides `MutableSmallArray#/SmallArray#` for that purpose.
165 |
166 | ```
167 | +-------------+--------------+---+-...-+---+---+
168 | | info-table* | payload size | * | ... | * | * |
169 | +-------------+--------------+---+-...-+---+---+
170 | MutableSmallArray#
171 | SmallArray#
172 | ```
173 |
174 | There're ADT wrappers for these types to make it easier to work with:
175 |
176 | ```haskell
177 | data MutableArray s a = MutableArray (MutableArray# s a)
178 | data Array a = Array (Array# a)
179 |
180 | data SmallMutableArray s a = SmallMutableArray (SmallMutableArray# s a)
181 | data SmallArray a = SmallArray (SmallArray# a)
182 | ```
183 |
184 | A common pattern in Haskell is to turn `MutableArray` into an `Array` with freeze operations after creation complete, but the card-table's space is still there in case we thaw the array in place again. Generally speaking, under creation-freeze pattern, `MutableSmallArray` and `SmallArray` are more recommended since you won't keep mutable array on heap for too long.
185 |
186 | ## Unboxed array
187 |
188 | `MutableByteArray#`, `ByteArray#` are GHC's unboxed array. They don't contain pointers, and their payload do not need to be traced during GC:
189 |
190 | ```
191 | +-------------+--------------+-------------+---+-...-+---+---+
192 | | info-table* | payload size | 0xXXXXXXXX# | # | ... | # | # |
193 | +-------------+--------------+-------------+---+-...-+---+---+
194 | MutableByteArray#
195 | ByteArray#
196 | ```
197 |
198 | `ByteArray#`s can be used to encode different size non-pointer data, such as `Int` and `Word8`, `ghc-prim` provide seperated functions to work with different data types: `indexIntArray#`, `indexWord8Array#`, etc, So there're `Prim` class and `PrimArray` type to make working with different types easier:
199 |
200 | ```haskell
201 | -- types which can be stored in ByteArray#
202 | class Prim a where
203 | indexByteArray# :: ByteArray# -> Int# -> a
204 | ...
205 |
206 | -- | type indexed ByteArray#
207 | data PrimArray a = PrimArray ByteArray#
208 |
209 | indexPrimArray :: Prim a => PrimArray a -> Int -> a
210 | ...
211 | ```
212 |
213 | # Lifted, Unlifted
214 |
215 | Another difference between types: unlifted and lifted, exists because in Haskell we have non-strict evaluation mechanism, e.g. a value `1 + 2` may have a representation like:
216 |
217 | ```
218 | +-------------+----------+---+ +-------------+----+
219 | | info-table* | reserved | * +--->+ info-table* | 2# |
220 | +------+------+----------+---+ +-------------+----+
221 | | This is I#
222 | V
223 | The info-table points to (+1) code.
224 | ```
225 |
226 | In Haskell `1 + 2` and `3` are both references, they can be used interchangeably: a function expecting an `Int` argument can accept both pointers. This is done by *entering* the heap objects. i.e. execute the entry code following the info-table. The entry code for constructors are simply returns. For thunks the code will do evaluation and the `reserved` word above is reserved exactly for evaluation result, by writing a forward pointer and change the thunk box into an indirection box.
227 |
228 | The evaluation may fail(diverged recursion, stackoverflow, etc.), so the pointer could potentially point to an undefined value, this kind of things are called *bottom* in Haskell, written as `_|_`. The intuition for this name is that all the other evaluated values have certain meaning, but bottom doesn't, it sits lower in the spectrum of determinism, concreteness, usefulness ... whatever suits your mind. Hence comes the concept of `lifted` type, i.e. types which contain `bottom` values, or more formly, inhabited by `_|_`.
229 |
230 | As you expected, most of the boxed type can be inhabited by `_|_`, the thunk may explode and terminate your program, or call `error` or `undefined` in base. And most of the unboxed types are unlifted types. e.g. It's impossible that an `Int#` would stand for an undefined value, because all 1-0 arrangements would represent a `Int#`, or put it another way: there's no way we get a bottom from `Int#`, because it doesn't have an info-table, and we can't enter it.
231 |
232 | But some boxed unlifted types do exist, e.g. `MutableArray#/Array#` are such types, their representation on heap have an info-table pointer, but they were never entered. All the primitive operations manipulating them won't enter them, and the only way to create them is via `newArray#`, `cloneArray#`, etc.
233 |
234 | To efficiently store boxed unlifted types, `Unlifted` class and `UnliftedArray` type are introduced similar to `Prim` and `PrimArray`, `UnliftedArray` store unlifted references instead of normal Haskell ADTs. Comparing `Array Array`, `UnliftedArray Array` could remove a level of redirection, i.e. remove item's `Array` box and store `Array#` directly.
235 |
236 | # More on arrays
237 |
238 | There're more details on Haskell arrays, such as pinned vs unpinned `ByteArray`s, etc. Interested readers could find all these details on [GHC wiki](https://gitlab.haskell.org/ghc/ghc/-/wikis/home), especially on RTS section.
239 | To use array properly, all you need to do is choose the proper storage type and import `Z.Data.Array`. In next section we will introduce vectors, which is simply slices of arrays.
240 |
--------------------------------------------------------------------------------
/Z-Data/FFI.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | parent: Z-Data
4 | title: FFI
5 | nav_order: 5
6 | ---
7 |
8 | ## Table of contents
9 | {: .no_toc .text-delta }
10 |
11 | 1. TOC
12 | {:toc}
13 |
14 | # FFI: Foreign Function Interface
15 |
16 | The Haskell [foreign function interface](https://wiki.haskell.org/Foreign_Function_Interface) is a specification to call foreign functions(mainly C functions) from Haskell. It looks like this:
17 |
18 | + In `Foo.hs`:
19 |
20 | ```haskell
21 | foreign import ccall unsafe "foo" c_foo :: CInt -> CInt -> IO CInt
22 | ```
23 |
24 | + In `foo.c`:
25 |
26 | ```c
27 | int foo(int x, int y){
28 | ...
29 | }
30 | ```
31 |
32 | + In cabal file:
33 |
34 | ```yaml
35 | ...
36 | c-sources: foo.c
37 | ...
38 | ```
39 |
40 | With proper setup, cabal could orchestrate the compilation and give you a static linked binary. The FFI specification specify the concrete syntax in Haskell side, to ensure a successful FFI call, you have to pay attention to several aspects:
41 |
42 | + The types in Haskell and C are matched.
43 | + How to allocate memory for C side, and when to free.
44 | + The difference between unsafe FFI calls, and [safe ones](https://simonmar.github.io/bib/papers/conc-ffi.pdf).
45 |
46 | Beside above points, you'll have to use correct calling conventions(which would be ccall for most of the time), write C wrappers if you want to call C++, etc.
47 |
48 | # FFI Types
49 |
50 | Here's a table of common FFI types that can be passed between C and Haskell, and where can you find them:
51 |
52 | | C type, header | Haskell type, module | Haskell type(with `UnliftedFFITypes` enable), module |
53 | |--------------------|------------------------|------------------------------------------------------|
54 | | bool, built-in | CBool, Foreign.C.types | - |
55 | | int, built-in | CInt, Foreign.C.types | - |
56 | | uint, built-in | CUInt, Foreign.C.types | - |
57 | | long, built-in | CLong, Foreign.C.types | - |
58 | | ulong, built-in | CULong, Foreign.C.types| - |
59 | | uchar, built-in | Word8, Data.Word | - |
60 | | char, built-in | Int8, Data.Word | - |
61 | | uint8_t, stdint.h | Word8, Data.Word | - |
62 | | uint16_t, stdint.h | Word16, Data.Word | - |
63 | | uint32_t, stdint.h | Word32, Data.Word | - |
64 | | uint64_t, stdint.h | Word64, Data.Word | - |
65 | | int8_t, stdint.h | Int8, Data.Int | - |
66 | | int16_t, stdint.h | Int16, Data.Int | - |
67 | | int32_t, stdint.h | Int32, Data.Int | - |
68 | | int64_t, stdint.h | Int64, Data.Int | - |
69 | | type \*, built-in | Ptr type, Foreign.Ptr | Addr#, GHC.Prim |
70 | | HsInt, HsFFI.h | Int, Prelude | Int#, GHC.Prim |
71 | | HsWord, HsFFI.h | Word, Prelude | Word#, GHC.Prim |
72 | | HsBool, HsFFI.h | Bool, Prelude | - |
73 | | double, built-in | Double, Prelude | Double#, GHC.Prim |
74 | | float, built-in | Float, Prelude | Float#, GHC.Prim |
75 | | size_t, stddef.h | CSize, Foreign.C.types | Word#, GHC.Prim |
76 |
77 |
78 | Some types' size depend on platform(32-bit, 64-bit), e.g. the `HsInt/Int` 's size is 32 bits on 32-bit machine, or 64 bits on 64-bit ones. GHC also support passing some array types to C but not vice versa:
79 |
80 | | C type, header | Haskell type, module | Haskell type(with `UnliftedFFITypes` enable), module |
81 | | type \*, built-in | - | MutableByteArray#, GHC.Prim |
82 | | const type \*, built-in | - | ByteArray#, GHC.Prim |
83 | | StgMutArrPtrs \*(ghc<8.10), StgArrBytes \*\*, Rts.h | - | ArrayArray#, GHC.Prim |
84 |
85 | The Haskell FFI specification also support function address, which is useful when used as weak pointer's finailizers.
86 |
87 | ```haskell
88 | foreign import ccall "&free" free :: FunPtr (Ptr Word8 -> IO ())
89 | ```
90 |
91 | # Allocate and free
92 |
93 | It's common to have a C function needs dynamic allocated arrays, there're two solutions in general:
94 |
95 | + Allocate from C side, pass pointer back to Haskell, then use `ForeignPtr` from `Foreign.ForeignPtr` or `CPtr` from `Z.Foreign.CPtr` to wrap it, and ensure the memory will be freed when no longer needed.
96 | + Allocate from Haskell side as a GC managed heap object, then pass to C for manipulation.
97 |
98 | Usually it's recomended to use the second method, since the memory is still under GHC GC's management, so you don't have to worry about free.
99 |
100 | ## Allocate memory and pass to C
101 |
102 | There're some helpers in `Z.Foreign` to help you with allocating and passing, it's important to have some knowledge about GHC runtime system to get things right. GHC runtime is garbaged collected, and there're two types of primitive array in GHC, with the objective to minimize overall memory management cost:
103 |
104 | + Small primitive arrays created with `newPrimArray` are directly allocated on GHC heap, which can be moved by GHC garbage collector, we call these arrays *unpinned*. Allocating these array is cheap, we only need to check heap limit and bump heap pointer just like any other haskell heap objects. But we will pay GC cost , which is OK for small arrays.
105 |
106 | + Large primitive array and those created with `newPinnedPrimArray` are allocated on GHC managed memory blocks, which is also traced by garbage collector, but will never moved before freed, thus are called *pinned*. Allocating these arrays are bit more expensive since it's more like how malloc works, but we don't have to pay for GC cost.
107 |
108 | Beside the pinned/unpinned difference, we have two types of FFI calls in GHC:
109 |
110 | + Safe FFI call annotated with `safe` keyword. These calls are executed on separated OS thread, which can be running concurrently with GHC garbage collector, thus we want to make sure only pinned arrays are passed. The main use case for safe FFIs are long running functions, for example, doing IO polling. Since these calls are running on separated OS thread, haskell thread on original OS thread will not be affected.
111 |
112 | + Unsafe FFI call annotated with `unsafe` keyword. These calls are executed on the same OS thread which is running the haskell side FFI code, which will in turn stop GHC from doing a garbage collection. We can pass both pinned and unpinned arrays in this case. The use case for unsafe FFIs are short/small functions, which can be treated like a fat primitive operations, such as memcpy, memcmp. Using unsafe FFI with long running functions will effectively block GHC runtime thread from running any other haskell threads, which is dangerous. Even if you use threaded runtime and expect your haskell thread can be stolen by other OS threads, but this will not work since GHC garbage collector will refuse to run if one of the OS thread is blocked by FFI calls.
113 |
114 | Base on above analysis, we have following FFI strategy table:
115 |
116 | | FFI \ Array | pinned | unpinned |
117 | |--------------|---------------|---------------|
118 | | unsafe | directly pass | directly pass |
119 | | safe | directly pass | make a copy |
120 |
121 | Helpers in `Z.Foreign` are also divided into two categories: those with unsafe suffix to be used with `unsafe` FFI, and those with safe suffix to be used with `safe` FFI. Following is a example to try accommodate a small C function:
122 |
123 | ```c
124 | include
125 |
126 | void c_add_and_time(HsInt x, HsInt y, HsInt* add_result, HsInt* time_result){
127 | *add_result = x + y;
128 | *time_result = x * y;
129 | }
130 | ```
131 |
132 | ```haskell
133 | {-# LANGUAGE TypeApplications #-}
134 | {-# LANGUAGE UnliftedFFITypes #-}
135 |
136 | import Z.Foreign
137 |
138 | foreign import ccall unsafe c_add_and_time :: Int -> Int -> MBA# Int -> MBA# Int -> IO ()
139 |
140 | cAddTime :: Int -> Int -> (Int, Int)
141 | cAddTime x y = do
142 | fst <$> allocPrimUnsafe @Int (\ add_result ->
143 | fst <$> allocPrimUnsafe @Int (\ time_result ->
144 | c_add_and_time x y add_result time_result))
145 | ```
146 |
147 | Now when you call `cAdd` in haskell:
148 |
149 | 1. `allocPrimUnsafe` function will allocate a single element `MutablePrimArray Int` to be used as `Int` pointer, here we use two `allocPrimUnsafe` to allocate memory for save add and time results.
150 | 2. The `x` and `y` parameters are passed as `Int`, and receive as `HsInt` in C. The `add_result` and `time_result` are passed as `MBA# Int`, which is type alias for `MutableByteArray#`, and received as `HsInt*` in C.
151 | 3. `allocPrimUnsafe` will auto peek result from the single element array, and return together with FFI's return value, which is ignored by `fst`.
152 |
153 | The memory allocated by `allocPrimUnsafe`, `allocPrimArrayUnsafe` and `allocPrimVectorUnsafe` is not pinned, so you can't get the address first, then pass it as `Ptr a`. The only way to pass them is to use `MutableByteArray#` and `ByteArray#` primitive types. In `Z.Foreign` module `BA# a` and `MBA# a` type alias are defined for writing convenience:
154 |
155 | ```haskell
156 | -- for const pointers
157 | type BA# a = ByteArray#
158 | -- for writable pointers
159 | type MBA# a = MutableByteArray# RealWorld
160 | ```
161 |
162 | Since they are type aliases, the type tag is only for document. You should use proper pointer types on C side to receive them just like a `Ptr a`. Another common problem with `BA#` and `MBA#` is that they can only pass the array's first element's address, thus you have to manually pass a seperate offset parameter if you want to work with certain range of the array. This can be illustrated by following code:
163 |
164 | ```c
165 | include
166 |
167 | // here we write a wrapper to receive a slice of bytearray
168 | HsInt hs_memchr(const uint8_t *a, HsInt aoff, uint8_t b, HsInt n) {
169 | a += aoff;
170 | uint8_t *p = memchr(a, b, (size_t)n);
171 | if (p == NULL) return -1;
172 | else return (p - a);
173 | }
174 | ```
175 |
176 | ```haskell
177 | import Z.Foreign
178 | import Data.Word
179 | import qualified Z.Data.Vector as V
180 |
181 | foreign import ccall unsafe hs_memchr :: BA# Word8 -> Int -> Word8 -> Int -> IO Int
182 |
183 | memchrBytes :: V.Bytes -> Word8 -> Int
184 | memchrBytes bs x = withPrimVector bs $ \ mba off len -> hs_memchr mba off x len
185 | ```
186 |
187 | The safe FFI variation `withPrimVectorSafe` is simplier, the offset is directly added to the address of pinned memory, so there's only a pointer and an address parameter. It's highly recommended to use unpinned allocation if possible, because pinned allocation often lead to memory fragmentation due their garbage collection strategy, especially under a lot of small repetitive allocations.
188 |
189 | ## Null terminated strings
190 |
191 | C use a lot of null ternimated strings, i.e. `char*` where no length info is needed because it's assumed that the string always ended with a NULL ternimator. In Haskell we provide a special type for this, that is the `CBytes` type from `Z.Data.CBytes` module. Similar to `withPrimVectorUnsafe` and `WithPrimVectorSafe`, use `WithCBytesUnsafe` and `withCBytes` to pass a `CBytes` to C FFI.
192 |
193 | ```haskell
194 | > :m + Z.Data.CBytes Z.Foreign Data.Word
195 | > foreign import ccall unsafe strlen :: BA# Word8 -> IO CSize
196 | > withCBytesUnsafe "hello, world!" strlen
197 | 13
198 | > foreign import ccall safe "strlen" strlen_safe :: Ptr Word8 -> IO CSize
199 | > withCBytes "hello, world!" strlen_safe
200 | 13
201 | ```
202 |
203 | Use `allocCBytesUnsafe`, `allocCBytes` to allocate memory to be passed to C, return `CBytes` back.
204 |
205 | ```haskell
206 | > foreign import ccall unsafe sprint :: MBA# Word8 -> BA# Word8 -> Int -> IO ()
207 | > allocCBytesUnsafe 32 $ \ dest -> withCBytesUnsafe "result is %d" $ \ fmt -> sprintf dest fmt 3
208 | ("result is 3",())
209 | ```
210 |
211 | To get `CBytes` from null terminated `char*`, use `fromCString` or `peekMBACBytes`. If the memory is allocated from C, it's recommend to use `bracket` to ensure memory get freed.
212 |
213 | ## Unaligned Class
214 |
215 | Sometime the memory passed to C are written with some struct fields, you could use `Storable` machinery from `Foreign.Storable` to peek/poke data from/to the memory, but `Storable` use `Ptr a`, so it requires pinned memory whose address is fixed. In [Z-Data](https://hackage.haskell.org/package/Z-Data) an alternative way to do this is to use `Unaligned` class from `Z.Data.Array.Unaligned` module. Here's a code sample from [Z-IO](https://hackage.haskell.org/package/Z-IO):
216 |
217 | ```c
218 | // definitions from libuv
219 | typedef struct uv_passwd_s {
220 | char* username;
221 | long uid;
222 | long gid;
223 | char* shell;
224 | char* homedir;
225 | } uv_passwd_t;
226 |
227 | int uv_os_get_passwd(uv_passwd_t* pwd);
228 | void uv_os_free_passwd(uv_passwd_t* pwd);
229 | ```
230 |
231 | ```haskell
232 | import Z.Foreign
233 | import Z.Data.Array.Unaligned
234 | import Z.IO.Exception
235 | import Z.Data.CBytes
236 |
237 | -- | Data type for password file information.
238 | data PassWD = PassWD
239 | { passwd_username :: CBytes
240 | , passwd_uid :: UID
241 | , passwd_gid :: GID
242 | , passwd_shell :: CBytes
243 | , passwd_homedir :: CBytes
244 | } deriving (Eq, Ord, Show, Read)
245 |
246 | foreign import ccall unsafe uv_os_get_passwd :: MBA## PassWD -> IO CInt
247 | foreign import ccall unsafe uv_os_free_passwd :: MBA## PassWD -> IO ()
248 |
249 | -- | Gets a subset of the password file entry for the current effective uid (not the real uid).
250 | --
251 | -- The populated data includes the username, euid, gid, shell, and home directory.
252 | -- On non-Windows systems, all data comes from getpwuid_r(3).
253 | -- On Windows, uid and gid are set to -1 and have no meaning, and shell is empty.
254 | getPassWD :: HasCallStack => IO PassWD
255 | getPassWD = bracket
256 | (do mpa@(MutableByteArray mba##) <- newByteArray (#size uv_passwd_t)
257 | throwUVIfMinus_ (uv_os_get_passwd mba##)
258 | return mpa)
259 | (\ (MutableByteArray mba##) -> uv_os_free_passwd mba##)
260 | (\ (MutableByteArray mba##) -> do
261 | username <- fromCString =<< peekMBA mba## (#offset uv_passwd_t, username)
262 | uid <- fromIntegral <$> (peekMBA mba## (#offset uv_passwd_t, uid) :: IO CLong)
263 | gid <- fromIntegral <$> (peekMBA mba## (#offset uv_passwd_t, gid) :: IO CLong)
264 | shell <- fromCString =<< peekMBA mba## (#offset uv_passwd_t, shell)
265 | homedir <- fromCString =<< peekMBA mba## (#offset uv_passwd_t, homedir)
266 | return (PassWD username uid gid shell homedir))
267 | ```
268 |
269 | Note above Haskell code use [hsc2hs](https://hackage.haskell.org/package/hsc2hs) to get constants(struct size, field offset, etc.) from C code, `##` is `#` escaped in `.hsc` file. `uv_os_get_passwd` asks for a `uv_passwd_t*` struct pointer which must a valid writable memory location, so in Haskell we manually allocate memory with `newByteArray` and pass the `MutableByteArray#` as a pointer. After FFI is complete, we use `peekMBA` from `Unaligned` class to read the `char*` pointer, then use
270 | `fromCString` from `Z.Data.CBytes` to copy the result. After copy completes, `uv_os_free_passwd` is called to free any memory allocated in C code.
271 |
272 | ## CPtr
273 |
274 | For some cases, allocation from C is mandatory, e.g. you can't get size to allocate(hidden from C). We will use `CPtr` as an example to illustrate how do we keep reference to some opaque C struct.
275 |
276 | First you have to prepare a pair of allocation and free functions:
277 |
278 | ```c
279 | struct foo_s{
280 | ...
281 | };
282 |
283 | typedef struct foo_s foo_t;
284 |
285 | // the allocation function
286 | foo_t *new_foo(int x);
287 |
288 | // the free function
289 | void destroy_foo(foo_t* foo);
290 |
291 | // some function need foo_t
292 | void bar(foo_t* foo);
293 | ```
294 |
295 | Now we import these functions in Haskell:
296 |
297 | ```haskell
298 | import Z.Foreign
299 | import Z.Foreign.CPtr
300 |
301 | data Foo
302 |
303 | foreign import ccall unsafe new_foo :: CInt -> IO (Ptr Foo)
304 | foreign import ccall unsafe "&destroy_foo" destroy_foo :: FunPtr (Ptr Foo -> IO ())
305 |
306 | newFoo :: Int -> IO (CPtr Foo)
307 | newFoo x = newCPtr' (new_foo (fromIntegral x)) destroy_foo
308 |
309 | -- use `withCPtr` if you want to get foo_t pointer.
310 | foreign import ccall unsafe bar :: Ptr Foo -> IO ()
311 | ...
312 | foo <- newFoo ...
313 | withCPtr foo bar
314 | ...
315 |
316 | ```
317 |
318 | We encapsulate the C strcut `foo_t` in a Haskell heap object `CPtr Foo` with following steps:
319 |
320 | + Define a type tag `Foo`.
321 | + Import allocation and free functions, the free function should be imported as a `FunPtr` with its address.
322 | + Use `newCPtr'` from `Z.Foreign.CPtr` to attach the free function as finalizer, which will be call once the `CPtr Foo` is collected.
323 | + `withCPtr` will get the pointer back and ensure it will not get collected during the FFI computation.
324 |
325 | # Exception handling
326 |
327 | C libraries usually have some conventions on error handling, e.g. return a minus error code to indicate exception case. It's recommend to define an exception type then provide helpers. Following is an example in [Z-Botan](https://github.com/ZHaskell/z-botan):
328 |
329 | * Import Error code in hsc file:
330 |
331 | ```haskell
332 | pattern BOTAN_FFI_ERROR_UNKNOWN_ERROR :: CInt
333 | pattern BOTAN_FFI_SUCCESS = (#const BOTAN_FFI_SUCCESS)
334 | pattern BOTAN_FFI_INVALID_VERIFIER = (#const BOTAN_FFI_INVALID_VERIFIER)
335 | pattern BOTAN_FFI_ERROR_INVALID_INPUT = (#const BOTAN_FFI_ERROR_INVALID_INPUT)
336 | ...
337 | ```
338 |
339 | * Define an extensible exception type.
340 |
341 | ```haskell
342 | data SomeBotanException = forall e . Exception e => SomeBotanException e
343 |
344 | instance Show SomeBotanException where
345 | show (SomeBotanException e) = show e
346 |
347 | instance Exception SomeBotanException
348 |
349 | botanExceptionToException :: Exception e => e -> SomeException
350 | botanExceptionToException = toException . SomeBotanException
351 |
352 | botanExceptionFromException :: Exception e => SomeException -> Maybe e
353 | botanExceptionFromException x = do
354 | SomeBotanException a <- fromException x
355 | cast a
356 |
357 | #define BotanE(e) data e = e CInt CallStack deriving Show; \
358 | instance Exception e where \
359 | { toException = botanExceptionToException \
360 | ; fromException = botanExceptionFromException \
361 | }
362 |
363 | BotanE(InvalidVerifier)
364 | BotanE(InvalidInput)
365 | BotanE(BadMac)
366 | ...
367 | ```
368 |
369 | * And provide helpers for FFI code:
370 |
371 | ```haskell
372 | throwBotanIfMinus :: (HasCallStack, Integral a) => IO a -> IO a
373 | throwBotanIfMinus f = do
374 | r <- f
375 | when (r < 0) (throwBotanError_ (fromIntegral r) callStack)
376 | return r
377 |
378 | throwBotanIfMinus_ :: (HasCallStack, Integral a) => IO a -> IO ()
379 | throwBotanIfMinus_ f = do
380 | r <- f
381 | when (r < 0) (throwBotanError_ (fromIntegral r) callStack)
382 |
383 | throwBotanError :: HasCallStack => CInt -> IO ()
384 | throwBotanError r = throwBotanError_ r callStack
385 |
386 | throwBotanError_ :: CInt -> CallStack -> IO ()
387 | throwBotanError_ r cs = case r of
388 | BOTAN_FFI_ERROR_INVALID_INPUT -> throw (InvalidInput r cs)
389 | BOTAN_FFI_ERROR_BAD_MAC -> throw (BadMac r cs)
390 | BOTAN_FFI_ERROR_INSUFFICIENT_BUFFER_SPACE -> throw (InsufficientBufferSpace r cs)
391 | ...
392 | ```
393 |
394 | * In FFI code, use helper to throw exception when needed:
395 |
396 | ```haskell
397 | foreign import ccall unsafe hs_botan_mac_update :: BotanStructT -> BA## Word8 -> Int -> Int-> IO CInt
398 |
399 | updateMAC :: HasCallStack => MAC -> V.Bytes -> IO ()
400 | updateMAC (MAC bts _ _) bs =
401 | withBotanStruct bts $ \ pbts ->
402 | withPrimVectorUnsafe bs $ \ pbs off len ->
403 | throwBotanIfMinus_ (hs_botan_mac_update pbts pbs off len)
404 | ```
405 |
--------------------------------------------------------------------------------
/Z-Data/JSON.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | parent: Z-Data
4 | title: JSON
5 | nav_order: 4
6 | ---
7 |
8 | ## Table of contents
9 | {: .no_toc .text-delta }
10 |
11 | 1. TOC
12 | {:toc}
13 |
14 | Using `Z.Data.JSON` module to get human-readable serialization/deserialization. The easiest way to use the library is to define a target data type, deriving
15 | `Generic` and `JSON` instances, which provides:
16 |
17 | * `fromValue` to convert `Value` to Haskell values.
18 | * `toValue` to convert Haskell values to `Value`.
19 | * `encodeJSON` to directly write Haskell value into JSON bytes.
20 |
21 | ```haskell
22 | class JSON a where
23 | ...
24 | toValue :: a -> Value
25 | fromValue :: Value -> Converter a
26 | encodeJSON :: a -> B.Builder () -- `Z.Data.Builder` as `B`
27 | ...
28 | ```
29 |
30 | For example,
31 |
32 | ```haskell
33 | {-# LANGUAGE DeriveGeneric, DeriveAnyClass, DerivingStrategies #-}
34 |
35 | import GHC.Generics (Generic)
36 | import qualified Z.Data.Builder as Builder
37 | import qualified Z.Data.JSON as JSON
38 | import qualified Z.Data.Text as T
39 |
40 | data Person = Person {name :: T.Text, age :: Int}
41 | deriving (Show, Generic)
42 | deriving anyclass (JSON.JSON)
43 | ```
44 |
45 | We can now encode & decode JSON like this:
46 |
47 | ```haskell
48 | > JSON.toValue (Person{ name="Alice", age=16 })
49 | Object [("name",String "Alice"),("age",Number 16.0)]
50 | > JSON.encode (Person{ name="Alice", age=16 })
51 | [123,34,110,97,109,101,34,58,34,65,108,105,99,101,34,44,34,97,103,101,34,58,49,54,125]
52 | > JSON.encodeText (Person{ name="Alice", age=16 })
53 | "{\"age\":16,\"name\":\"Alice\"}"
54 | > JSON.decodeText' "{\"age\":16,\"name\":\"Alice\"}" :: Either JSON.DecodeError Person
55 | Right (Person {age = 16, name = "Alice"})
56 | ```
57 |
58 | The `Generic` based instances convert Haskell data with following rules:
59 |
60 | * Constructors without payloads are encoded as JSON String, `data T = A | B` are encoded as `"A"` or `"B"`.
61 | * Single constructor are ingored if there're payloads, `data T = T ...`, `T` is ingored:
62 | * Records are encoded as JSON object. `data T = T{k1 :: .., k2 :: ..}` are encoded as `{"k1":...,"k2":...}`.
63 | * Plain product are encoded as JSON array. `data T = T t1 t2` are encoded as "[x1,x2]".
64 | * Single field plain product are encoded as it is, i.e. `data T = T t` are encoded as "t" just like its payload.
65 | * Multiple constructors are convert to single key JSON object if there're payloads:
66 | * Records are encoded as JSON object like above. `data T = A | B {k1 :: .., k2 :: ..}` are encoded as
67 | `{"B":{"k1":...,"k2":...}}` in `B .. ..` case, or `"A"` in `A` case.
68 | * Products inside a sum type are similar to above, wrapped by an outer single-key object layer marking which constructor used during data construction.
69 |
70 | These rules apply to user defined ADTs, but some built-in instances have different behaviours, namely:
71 |
72 | * `Maybe a` are encoded as JSON `null` in `Nothing` case, or directly encoded to its payload in `Just` case.
73 | * `[a]` are encoded to JSON array, `[Char]` are encoded into JSON string.
74 | * `NonEmpty`, `Vector`, `PrimVector`, `HashSet`, `FlatSet`, `FlatIntSet` are also encoded to JSON array.
75 | * `Bytes` are encoded into JSON text using base64 encoding.
76 | * `HashMap`, `FlatMap`, `FlatIntMap` are encoded to JSON object.
77 |
78 | ## Custom Settings
79 |
80 | There're some modifying options if you providing a custom `Settings`, which
81 | allow you to modify field name or constructor name, but please *DO NOT*
82 | produce control characters during your modification, since we assume field
83 | labels and constructor name won't contain them, thus we can save an extra
84 | escaping pass. To use custom `Settings` just write:
85 |
86 | ```haskell
87 | data T = T {fooT :: Int, barT :: [Int]} deriving Generic
88 | instance JSON.JSON T where
89 | -- You can omit following definitions if you don't need to change settings
90 | toValue = JSON.gToValue JSON.defaultSettings{ JSON.fieldFmt = JSON.snakeCase } . from
91 | encodeJSON = JSON.gEncodeJSON JSON.defaultSettings{ JSON.fieldFmt = JSON.snakeCase } . from
92 | ```
93 |
94 | ```haskell
95 | > JSON.toValue (T 0 [1,2,3])
96 | Object [("foo_t",Number 0.0),("bar_t",Array [Number 1.0,Number 2.0,Number 3.0])]
97 | ```
98 |
99 | ## Manually Writing Instances
100 |
101 | You can write `JSON` instances by hand if the `Generic` based one doesn't suit you.
102 | Here is an example similar to aeson's.
103 |
104 | ```haskell
105 | import qualified Z.Data.Text as T
106 | import qualified Z.Data.Vector as V
107 | import qualified Z.Data.Builder as B
108 | import qualified Z.Data.JSON as JSON
109 | import Z.Data.JSON ((.:), (.=), (.!), JSON(..))
110 |
111 | data Person = Person { name :: T.Text , age :: Int } deriving Show
112 |
113 | instance JSON Person where
114 | fromValue = JSON.withFlatMapR "Person" $ \ v -> Person
115 | <$> v .: "name"
116 | <*> v .: "age"
117 |
118 | toValue (Person n a) = JSON.object ["name" .= n, "age" .= a]
119 |
120 | encodeJSON (Person n a) = JSON.object' $ ("name" .! n <> "age" .! a)
121 | ```
122 |
123 | ```haskell
124 | > toValue (Person "Joe" 12)
125 | Object [("name",String "Joe"),("age",Number 12.0)]
126 | > JSON.convert' `Person . JSON.Object $ V.pack [("name",JSON.String "Joe"),("age",JSON.Number 12.0)]
127 | Right (Person {name = "Joe", age = 12})
128 | > JSON.encodeText (Person "Joe" 12)
129 | "{"name":"Joe","age":12}"
130 | ```
131 |
132 | The `Value` type is different from aeson's one in that we use `Vector (Text, Value)` to represent JSON objects, thus
133 | we can choose different strategies on key duplication, the lookup map type, etc. so instead of a single `withObject`,
134 | we provide `withHashMap`, `withHashMapR`, `withFlatMap` and `withFlatMapR` which use different lookup map type, and different key order priority. Most of the time `FlatMap` is faster than `HashMap` since we only use the lookup map once, the cost of constructing a `HashMap` is higher. If you want to directly work on key-values, `withKeyValues` provide key-values vector access.
135 |
136 | There're some useful tools to help write encoding code in `Z.Data.JSON.Builder` module, such as a JSON string escaping tool, etc. If you don't particularly care for fast encoding, you can also use `toValue` together with value builder, the overhead is usually very small.
137 |
--------------------------------------------------------------------------------
/Z-Data/Parser-and-Builder.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | parent: Z-Data
4 | title: Parser and Builder
5 | nav_order: 3
6 | ---
7 |
8 | ## Table of contents
9 | {: .no_toc .text-delta }
10 |
11 | 1. TOC
12 | {:toc}
13 |
14 | # Parser Monad
15 |
16 | The `Parser` from `Z.Data.Parser` is designed for high performance resumable binary parsing and simple textual parsing, such as network protocols, JSON, etc. Write a parser by using basic parsers from `Z.Data.Parser` such as `takeWhile`, `int`, etc.
17 |
18 | ```haskell
19 | import qualified Z.Data.Parser as P
20 | import Z.Data.ASCII
21 |
22 | data Date = Date { year :: Int, month :: Int, day :: Int } deriving Show
23 |
24 | dateParser :: P.Parser Date
25 | dateParser = do
26 | y <- P.int
27 | P.word8 HYPHEN
28 | m <- P.int
29 | P.word8 HYPHEN
30 | d <- P.int
31 | return $ Date y m d
32 | ```
33 |
34 | `Parser` in Z works directly on `Bytes`:
35 |
36 | ```haskell
37 | > P.parse' dateParser "2020-12-12"
38 | Date 2020 12 12
39 | > P.parse' dateParser "2020-JAN-12"
40 | Left ["Z.Data.Parser.Numeric.int","Z.Data.Parser.Base.takeWhile1: no satisfied byte at [74,65,78,45,49,50]"]
41 | > P.parse dateParser "2020-12-12, 08:00"
42 | ([44,32,48,56,58,48,48], Right (Date {year = 2020, month = 12, day = 12}))
43 | > P.parseChunk dateParser "2020-"
44 | Partial _
45 | > let (P.Partial f) = P.parseChunk dateParser "2020-"
46 | > let (P.Partial f') = f "05-05" -- incrementally provide input
47 | > f' "" -- push empty chunk to signal EOF
48 | Success Date {year = 2020, month = 5, day = 5}
49 | ```
50 |
51 | Binary protocol can use `decodePrim/decodePrimLE/decodePrimBE` with `TypeApplications` extension, let's say you want to implement a [MessagePack str format](https://github.com/msgpack/msgpack/blob/master/spec.md#str-format-family) parser:
52 |
53 | ```haskell
54 | import Data.Bits
55 | import Data.Word
56 | import qualified Z.Data.Parser as P
57 | import qualified Z.Data.Text as T
58 |
59 | msgStr :: P.Parser T.Text
60 | msgStr = do
61 | tag <- P.anyWord8
62 | case tag of
63 | t | t .&. 0xE0 == 0xA0 -> str (t .&. 0x1F)
64 | 0xD9 -> str =<< P.anyWord8
65 | 0xDA -> str =<< P.decodePrimBE @Word16
66 | 0xDB -> str =<< P.decodePrimBE @Word32
67 | _ -> P.fail' "unknown tag"
68 | where
69 | str !l = do
70 | bs <- P.take (fromIntegral l)
71 | case T.validateMaybe bs of
72 | Just t -> return (Str t)
73 | _ -> P.fail' "illegal UTF8 Bytes"
74 | ```
75 |
76 | Comparing to `parsec` or `megaparsec`, `Parser` in Z provides limited error reporting, and do not support using as a monad transformer. But provides an instance of `PrimMonad`, which allows some limited effects, such as mutable variables and array operations.
77 |
78 | ## Auto Backtracked Alternative
79 |
80 | Similar to `attoparsec`, `Parser` in Z always backtrack when used with `<|>` (`Alternative` instance), that means the failed branch will not consume any input without doing anything special:
81 |
82 | ```haskell
83 | import Control.Applicative
84 | ...
85 | p = fooParser <|> barParser <|> quxParser
86 | ```
87 |
88 | In above code, if any parser failed, the next parser is retried from the beginning of the input. Backtracking is not always needed though, it recommended to use `peek`
89 | or `peekMaybe` if the syntax or protocol can be parsed as LL(1) grammer since it's faster than backtracking.
90 |
91 | # Builder Monad
92 |
93 | The `Builder` from `Z.Data.Builder` is the reverse process of parsing, i.e. writing Haskell data types to `Bytes`, aka *Writer* monad. The usage is very similiar to `Parser`:
94 |
95 | ```haskell
96 | import qualified Z.Data.Builder as B
97 | import Z.Data.ASCII
98 |
99 | data Date = Date { year :: Int, month :: Int, day :: Int } deriving Show
100 |
101 | dataBuilder :: Date -> B.Builder ()
102 | dataBuilder (Date y m d) = do
103 | int' y
104 | B.word8 HYPHEN
105 | int' m
106 | B.word8 HYPHEN
107 | int' d
108 | where
109 | int' x | x > 10 = B.int x
110 | | otherwise = B.word8 DIGIT_0 >> B.int x
111 | ```
112 |
113 | Underhood a `Builder` records a buffer writing function, thus can be composed quickly. Use `build/buildText` to run a `Builder`, which produces `Bytes` and `Text` respectively:
114 |
115 | ```haskell
116 | > B.build (dataBuilder $ Date 2020 11 1)
117 | [50,48,50,48,45,49,49,45,48,49]
118 | > B.buildText (dataBuilder $ Date 2020 11 1)
119 | "2020-11-01"
120 | ```
121 |
122 | Binary `Builder` can be constructed with `encodePrim/encodePrimLE/encodePrimBE`, let's still take [MessagePack str format](https://github.com/msgpack/msgpack/blob/master/spec.md#str-format-family) as an example:
123 |
124 | ```haskell
125 | import Data.Bits
126 | import Data.Word
127 | import qualified Z.Data.Builder as B
128 | import qualified Z.Data.Text as T
129 | import qualified Z.Data.Vector as V
130 |
131 | msgStr :: T.Text -> B.Builder ()
132 | msgStr t = do
133 | let bs = T.getUTF8Bytes t
134 | case V.length bs of
135 | len | len <= 31 -> B.word8 (0xA0 .|. fromIntegral len)
136 | | len < 0x100 -> B.encodePrim (0xD9 :: Word8, fromIntegral len :: Word8)
137 | | len < 0x10000 -> B.encodePrim (0xDA :: Word8, BE (fromIntegral len :: Word16))
138 | | otherwise -> B.encodePrim (0xDB :: Word8, BE (fromIntegral len :: Word32))
139 | B.bytes bs
140 | ```
141 |
142 | Note that we directly use `Unalign a, Unalign b => Unalign (a, b)` instance to write serveral primitive types in a row, The `Unalign` class provide basic reading and writing facilities to read primitive types from and to raw bytes(with unaligned offset).
143 |
144 | ## Text formatting with `Builder`
145 |
146 | Different from other standard libraries which usually provide `printf` or similar, in Z directly using `Builder` to format text is recommended:
147 |
148 | ```haskell
149 | -- Similar to print("The result are %d, %d", x, y)
150 | -- If you can ensure all Builders will write UTF-8 encoded bytes,
151 | -- you can use unsafeBuildText to save a validation
152 |
153 | B.unsafeBuildText $ do
154 | "The result are " >> B.double x >> ", " >> B.double y
155 |
156 | -- Or use do syntax
157 |
158 | B.unsafeBuildText $ do
159 | "The result are "
160 | B.double x
161 | ", "
162 | B.double y
163 | ...
164 |
165 | ```
166 |
167 | The strength of monadic `Builder` is that you can reuse all control structure from `Control.Monad`, such as conditions, loops, etc. `Builder ()` has an `IsString` instance which can wrap writing literals in UTF-8 encoding, with some modifications:
168 |
169 | + `\NUL` will be written as `\xC0\x80`.
170 | + `\xD800` ~ `\xDFFF` will be encoded in three bytes as normal UTF-8 codepoints.
171 |
172 | It's safe to put an string literal inside a `unsafeBuildText` as long as you don't write `\0` or `\55296` ~ `\57343`.
173 |
--------------------------------------------------------------------------------
/Z-Data/Vector-Bytes-Text.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | parent: Z-Data
4 | title: Vector and Text
5 | nav_order: 2
6 | ---
7 |
8 | ## Table of contents
9 | {: .no_toc .text-delta }
10 |
11 | 1. TOC
12 | {:toc}
13 |
14 | # Vector: array slices
15 |
16 | In Z.Haskell, we use immutable arrays a lot. And we have two main array slice types in `Z.Data.Vector`:
17 |
18 | ```haskell
19 | -- The payloads are array offset and length
20 | data Vector a = Vector (SmallArray a) Int Int
21 | data PrimVector a = PrimVector (PrimArray a) Int Int
22 | ...
23 | ```
24 |
25 | These types can support efficiently slicing operations(`take`, `drop`, `break`, etc.), To abstract these types, The `Vec` class is introduced:
26 |
27 | ```haskell
28 | class (Arr (IArray v) a) => Vec v a where
29 | -- | Vector's immutable array type
30 | type IArray v :: Type -> Type
31 | -- | Get underline array and slice range(offset and length).
32 | toArr :: v a -> (IArray v a, Int, Int)
33 | -- | Create a vector by slicing an array(with offset and length).
34 | fromArr :: IArray v a -> Int -> Int -> v a
35 | ```
36 |
37 | `Vector` and `PrimVector` are obvious instances, but plain array types are also `Vec`'s instances with `O(n)` `fromArr`, for example:
38 |
39 | ```haskell
40 | instance Prim a => Vec PrimArray a where
41 | type IArray PrimArray = PrimArray
42 | toArr arr = (arr, 0, sizeofArr arr)
43 | fromArr = fromArray
44 |
45 | -- | Construct a slice from an array by copying(if neccessary).
46 | fromArray :: Arr arr a => arr a -> Int -> Int -> arr a
47 | fromArray arr offset len | offset == 0 && sizeofArr arr == len = arr
48 | | otherwise = cloneArr arr offset len
49 | ```
50 |
51 | These instances give `Vec` great flexiblity: if your combinators are implemented with `Vec`, it will work on various slicing types, and plain array types, for example, the `map'` combinator from `Z.Data.Vector`:
52 |
53 | ```haskell
54 | map' :: forall u v a b. (Vec u a, Vec v b) => (a -> b) -> u a -> v b
55 | ```
56 |
57 | Note the input and output `Vec` type is not required to be the same, which means applications like the following are possible:
58 |
59 | ```haskell
60 | data User = User { ..., age :: Int, ...}
61 |
62 | -- | Take all user's age and pack them into a `PrimArray`.
63 | takeAllAges :: Vector User -> PrimArray Int
64 | takeAllAges = map' age
65 | ```
66 |
67 | The above functions will work efficiently as expected, `User`'s age will be directly written into a new `PrimArray` with no extra copies.
68 |
69 | All functions in `Z.Data.Vector` are implemented using `Vec` constraint, sometimes this will lead to type inference failures, so it's recommended to enable `TypeApplications` extension and add necessary type annotations:
70 |
71 | ```haskell
72 | {-# LANUAGE TypeApplications #-}
73 |
74 | import qualified Z.Data.Vector as V
75 | ...
76 | -- if you don't write annotations, GHC may get confused
77 | -- which type of vectors you want to pack.
78 | let v = V.pack @PrimVector @Word [1..1024]
79 | ...
80 | ```
81 |
82 | # Bytes: Word8 vector
83 |
84 | One of the most commonly used vector types is `type Bytes = PrimVector Word8`, which is used to represent binary data. To make writing `Bytes` literals more convenient, `Bytes` is an instance to `IsString`:
85 |
86 | ```haskell
87 | > import qualified Z.Data.Vector as V
88 | > :set -XOverloadedStrings
89 | > "hello, world" :: V.Bytes
90 | "hello, world"
91 | > "你好世界" :: V.Bytes -- unicode literals will be get choped!
92 | [96,125,22,76]
93 | ```
94 |
95 | In the above example, unicode literals "你好世界" do not produce UTF-8 encoded byte vector as one might expect, you have to use `Text` to get that behaviour:
96 |
97 | ```haskell
98 | > import qualified Z.Data.Text as T
99 | > T.getUTF8Bytes "你好世界"
100 | [228,189,160,229,165,189,228,184,150,231,149,140]
101 | ```
102 |
103 | Note that `Bytes`'s `Show` instance is not specialized to show ASCII characters. You can use functions from `Z.Data.Vector.Hex` and `Z.Data.Vector.Base64` to manually encode binary `Bytes` into ASCII strings:
104 |
105 | ```haskell
106 | > import Z.Data.Vector.Hex
107 | > hexEncode True "hello world"
108 | "68656C6C6F20776F726C64"
109 | > import Z.Data.Vector.Base64
110 | > base64Encode "hello wolrd"
111 | "aGVsbG8gd29scmQ="
112 | ```
113 |
114 | In `Z-Data` we use incoherent instances to handle `Bytes`'s JSON instance(using base64 encoding):
115 |
116 | ```haskell
117 | > V.pack [0..127] :: V.Bytes
118 | [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127]
119 | > import qualified Z.Data.JSON as JSON
120 | > JSON.encode (V.pack [0..127] :: V.Bytes)
121 | "\"AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8=\""
122 | > JSON.encode (V.pack [0..127] :: V.PrimVector Int)
123 | "[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127]"
124 | ```
125 |
126 | Besides special instances, many functions in `Z.Data.Vector` will leverage rewrite rules to use more efficient instructions when used with `Bytes`, such as `break`, `takeWhile`, etc. But these optimizations should have no visible difference for users.
127 |
128 | # Text: UTF-8 encoded Bytes
129 |
130 | The `Text` type from `Z.Data.Text` is a `newtype` wrapper around `Bytes` which provides UTF-8 encoding guarantee, you should construct a `Text` using `validate` or `validateMaybe` or string literals only:
131 |
132 | ```haskell
133 | > import qualified Z.Data.Text as T
134 | > T.validate "hello world"
135 | "hello world"
136 | > T.validate "hello world, \128"
137 | *** Exception: InvalidUTF8Exception [("validate",SrcLoc {srcLocPackage = "interactive", srcLocModule = "Ghci12", srcLocFile = "", srcLocStartLine = 52, srcLocStartCol = 1, srcLocEndLine = 52, srcLocEndCol = 31})]
138 | > "你好世界" :: T.Text
139 | "你好世界"
140 | ```
141 |
142 | In Haskell, `String`s are allowed to have illegal UTF-8 code points so that any UNIX file path can be encoded in `String`, but in Z.Haskell we have a special type for file path. `Text` will convert illegal code points in case of string literals:
143 |
144 | ```haskell
145 | > "hello world, \55296" :: T.Text
146 | "hello world, �"
147 | > T.getUTF8Bytes "hello world, \55296" -- surrogates
148 | [104,101,108,108,111,32,119,111,114,108,100,44,32,239,191,189]
149 | ```
150 |
151 | The `239, 191, 189` bytes sequence is the replacement char `\U+FFFD`'s UTF-8 encoding form. By providing limited ways of creating `Text`, combinators in `Z.Data.Text` can safely assume `Text` only contain UTF-8 encoded code points.
152 |
153 | `Z.Data.Text` also provide some unicode processing capabilities, such as normalization, case-mapping, etc:
154 |
155 | ```haskell
156 | > T.validate "re\204\129sume\204\129"
157 | > "résumé"
158 | > T.normalize (T.validate "re\204\129sume\204\129")
159 | > "résumé"
160 | > T.getUTF8Bytes $ (T.validate "re\204\129sume\204\129")
161 | [114,101,204,129,115,117,109,101,204,129]
162 | > T.getUTF8Bytes $ T.normalize (T.validate "re\204\129sume\204\129")
163 | [114,195,169,115,117,109,195,169]
164 | > T.toUpper "διακριτικός"
165 | "ΔΙΑΚΡΙΤΙΚΌΣ"
166 | ```
167 |
168 | Regex expressions based on [re2](https://github.com/google/re2) regex engine is also provided:
169 |
170 | ```haskell
171 | > import qualified Z.Data.Text.Regex as RE
172 | > let emailRegex = RE.regex "([a-z0-9_\\.-]+)@([\\da-z\\.-]+)\\.([a-z\\.]{2,6})"
173 | > RE.match emailRegex "hello@world.com"
174 | ("hello@world.com",[Just "hello",Just "world",Just "com"],"")
175 | > RE.match emailRegex "foobar"
176 | ("",[],"foobar")
177 | > RE.replace emailRegex True "hello@world.com, foo@bar.com" "x@y.z"
178 | "x@y.z, x@y.z"
179 | > RE.extract emailRegex "hello@world.com" "http://\\2.\\3"
180 | "http://world.com"
181 | ```
182 |
183 | # Print to Text
184 |
185 | `Z.Data.Text` module provide `toText` quickly convert a data type to `Text` based on `Print` class, it's similar to `Show` using `Generic` support:
186 |
187 | ```
188 | > import GHC.Generics
189 | > import qualified Z.Data.Text as T
190 | > data Date = Date { year :: Int, month :: Int, day :: Int } deriving (Generic, T.Print)
191 | > T.toText $ Date 2020 1 12
192 | "Date {year = 2020, month = 1, day = 12}"
193 | ```
194 |
195 | It's recommend to deriving `Print` for your data types to get fast text conversion, though current GHC compile `Generic` fair slowly.
196 |
197 |
198 | # List fusion
199 |
200 | `Vec` instances and `Text` support the [build-foldr](https://wiki.haskell.org/Correctness_of_short_cut_fusion#foldr.2Fbuild) fusion by providing fusion rules enabled `pack/unpack`, the following code should iterate the input vector and produce the output vector in a single pass rather than producing an intermediate list:
201 |
202 | ```haskell
203 | f :: V.Vector a -> V.Vector b
204 | f = V.pack . filter h . map g . V.unpack
205 | ```
206 |
207 | This is different from the following code, which will produce an intermediate vector (may not be slower though):
208 |
209 | ```haskell
210 | f :: V.Vector a -> V.Vector b
211 | f = V.filter h . V.map' g
212 | ```
213 |
214 | When working with sequential data, it's recommended to choose vectors as the final representation of data, since it's more compact and GC friendly.
215 |
216 | # Type cheatsheet
217 |
218 | [Z-Data](https://hackage.haskell.org/package/Z-Data) simplified a lot of types already, but in case of getting confused, here's a type cheat sheet:
219 |
220 | ```
221 | +---------------------------------------------------------+
222 | | Vec class | + Use Array to save ADTs.
223 | | | + Use SmallArray if you don't
224 | | +----------------------+ +-----------------------+ | often mutate.
225 | | | Arr class | | Slice types | | + Use PrimArray to save
226 | | | | | support O(1) slicing | | primitive types like
227 | | | +---------+ | | with offset/length | | Int or Word8.
228 | | | | Array a | | | | | + Use UnliftedArray to save
229 | | | +---------+ | | | | unlifted types like
230 | | | | | | | IORef or Array.
231 | | | +---------------+ | | | |
232 | | | |UnliftedArray a| | | | | + Use slice types to get O(1)
233 | | | +---------------+ | | | | slicing operations.
234 | | | | | | | + Use Bytes to represent
235 | | | +--------------+ | | +----------+ | | binary data.
236 | | | | SmallArray a +->arrVec->+ Vector a | | |
237 | | | +--------------+ | | +----------+ | | + Use Text to represent
238 | | | | | | | UTF-8 encoded bytes.
239 | | | +-------------+ | | +--------------+ | |
240 | | | | PrimArray a +->arrVec->-+ PrimVector a | | |
241 | | | +-------------+ | | +--------------+---+ | |
242 | | | | | | Bytes | | |
243 | | | | | | PrimVector Word8 | | |
244 | | | | | +-------+----------+ | |
245 | | +----------------------+ +----------V------------+ |
246 | +----------------------------------------|----------------+
247 | validate
248 | |
249 | V
250 | +--------+------------+
251 | | Text |
252 | | UTF-8 encoded Bytes |
253 | +---------------------+
254 | ```
255 |
--------------------------------------------------------------------------------
/Z-Data/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | title: Z-Data
4 | nav_order: 3
5 | has_children: true
6 | ---
7 |
8 | [](https://hackage.haskell.org/package/Z-Data)
9 | [](https://github.com/ZHaskell/z-data/actions)
10 | [](https://github.com/ZHaskell/z-data/actions)
11 | [](https://github.com/ZHaskell/z-data/actions)
12 |
13 | [Z-Data](https://github.com/haskell-Z/z-data) provids basic data structures and functions:
14 |
15 | * Array, vector(array slice), Bytes(Word8 vectors)
16 | * Text based UTF-8, basic unicode manipulating
17 | * FFI utilties
18 | * Parsing and building monad
19 | * JSON encoding and decoding
20 |
--------------------------------------------------------------------------------
/Z-IO/BIO-Streaming.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | parent: Z-IO
4 | title: BIO Streaming
5 | nav_order: 3
6 | ---
7 |
8 | ## Table of contents
9 | {: .no_toc .text-delta }
10 |
11 | 1. TOC
12 | {:toc}
13 |
14 | # BIO: composable callbacks
15 |
16 | In previous sections, we have introduced the `Z.IO.Buffered` module. And it provides APIs for buffered reading and writing. When combined with [Builder and Parser]() facility, it is easy to handle some simple streaming tasks, for example, read/write packets from TCP wire. But sometimes, things could get complicated. Let's say you want to use the [zlib](https://zlib.net) library to decompress a bytes stream from some file. The interface provided by zlib is like this:
17 |
18 | ```c
19 | int inflateInit (z_streamp strm, int level);
20 | int inflate (z_streamp strm, int flush);
21 | int inflateEnd (z_streamp strm);
22 | ```
23 |
24 | It's OK to draw a chunk from `BufferedInput`, feed it to `z_streamp`, check the status and do some computation if a decompressed chunk is produced. But how to read a line from decompressed streams? We can't reuse `readLine` from `Z.IO.Buffered` since decompressed chunks are not drawn directly from `BufferedInput`.
25 |
26 | Ideally, we should have a composable `BufferedInput` type, which can accept some transformations and yield another `BufferedInput`. But `BufferedInput` is all about managing reading from buffer so that raw byte chunks can be drawn from the device. In Z-IO the `BIO` type is introduced to solve the composable streaming problem:
27 |
28 | ```haskell
29 | type BIO inp out = (Maybe out -> IO ()) -> Maybe inp -> IO ()
30 | ```
31 |
32 | Conceptually a `BIO` is a box doing transformation on data callbacks:
33 |
34 | ```haskell
35 | -- A pattern synonym for more meaningful pattern match
36 | pattern EOF :: Maybe a
37 | pattern EOF = Nothing
38 |
39 | fooBIO :: BIO foo bar
40 | fooBIO callback maybeFoo = do
41 | ... use callback to pass output data
42 | case maybeFoo of
43 | Just foo ->
44 | ... you can send result to downstream by pass Just values
45 | ... to callback, and you can call callback multiple times.
46 | callback (Just ...)
47 | ...
48 | callback (Just ...)
49 | ...
50 | EOF ->
51 | ... you should pass EOF to callback to indicate current
52 | ... node also reaches its EOF
53 | callback EOF
54 | ```
55 |
56 | `BIO` type have two params:
57 |
58 | + A `callback :: Maybe out -> IO ()`(often written as `k`) which get called when to write downstream:
59 | + A `Just out` value is an item passed to downstream.
60 | + A `EOF` notified downstream EOF.
61 | + A `Maybe inp` value which comes from upstream:
62 | + A `Just inp` value is an item from upstream.
63 | + A `EOF` notified upstream EOF.
64 |
65 | Let's take zlib's `z_streamp` as an example to implement a compressing BIO node:
66 |
67 | ```haskell
68 | compressBIO :: ZStream -> BIO V.Bytes V.Bytes
69 | compressBIO zs = \ callback mbs ->
70 | case mbs of
71 | Just bs -> do
72 | -- feed input chunk to ZStream
73 | set_avail_in zs bs (V.length bs)
74 | let loop = do
75 | oavail :: CUInt <- withCPtr zs $ \ ps -> do
76 | -- perform deflate and peek output buffer remaining
77 | throwZlibIfMinus_ (deflate ps (#const Z_NO_FLUSH))
78 | (#peek struct z_stream_s, avail_out) ps
79 | when (oavail == 0) $ do
80 | -- when output buffer is full,
81 | -- freeze chunk and call the callback
82 | oarr <- A.unsafeFreezeArr =<< readIORef bufRef
83 | callback (Just (V.PrimVector oarr 0 bufSiz))
84 | newOutBuffer
85 | loop
86 | loop
87 | _ -> ... similar to above, with no input chunk and Z_FINISH flag
88 | ```
89 |
90 | # Source and Sink types
91 |
92 | Now let's consider the following devices:
93 |
94 | + A data source which doesn't take any input but can be read until EOF.
95 | + A data sink which only performs writing without producing any meaningful result.
96 |
97 | We can have the definitions for data `Source` and `Sink` by using `Void` from `Data.Void`:
98 |
99 | ```haskell
100 | -- Source type doesn't need input
101 | type Source a = BIO Void a
102 | -- Sink type doesn't produce output
103 | type Sink a = BIO a Void
104 | ```
105 |
106 | Because `Void` type doesn't have constructors, one should ignore the `Maybe Void` param when defining a `Source`. For example, a `BIO` node sourcing chunks from `BufferedInput` can be implemented like this:
107 |
108 | ```haskell
109 | sourceFromBuffered :: BufferedInput -> Source V.Bytes
110 | sourceFromBuffered i = \ k _ ->
111 | let loop = readBuffer i >>= \ x ->
112 | if V.null x then k EOF else k (Just x) >> loop
113 | in loop
114 | ```
115 |
116 | For `type Sink a = BIO a Void`, the callback type is `Maybe Void -> IO ()`, which means you can only pass `EOF` to the callback, the convention here is to only call callback when EOF:
117 |
118 | ```haskell
119 | -- | The `BufferedOutput` device will get flushed only on EOF.
120 | sinkToBuffered :: BufferedOutput -> Sink V.Bytes
121 | sinkToBuffered bo = \ k mbs ->
122 | case mbs of
123 | Just bs -> writeBuffer bo bs
124 | _ -> flushBuffer bo >> k EOF
125 | ```
126 |
127 | # Composing BIO
128 |
129 | The `BIO` type could be composed via `(.)`, i.e. the function composition. The composition's result has some interesting facts:
130 |
131 | + If you compose a `Source a` to `BIO a b`, you will get a `Source b`.
132 | + If you compose a `BIO a b` to `Sink b`, you will get a `Sink a`.
133 |
134 | So let's say you want to count the line number of a file, you could use `BIO`:
135 |
136 | ```haskell
137 | import Z.IO
138 | import Z.Data.PrimRef
139 |
140 | main :: IO ()
141 | main = do
142 | _:path:_ <- getArgs
143 | withResource (initSourceFromFile path) $ \ fileSource -> do
144 | counterRef <- newCounter 0
145 | let counter = counterNode counterRef
146 | splitter <- newLineSplitter
147 | runBIO_ $ fileSource . splitter . counter
148 | printStd =<< readPrimIORef counterRef
149 | ```
150 |
151 | `runBIO_ :: Source a -> IO ()` simply supply a `EOF` to the BIO chain, and fileSource will drive the whole chain running until EOF, it's defined as:
152 |
153 | ```haskell
154 | discard :: a -> IO ()
155 | {-# INLINABLE discard #-}
156 | discard _ = return ()
157 |
158 | runBIO_ :: BIO inp out -> IO ()
159 | {-# INLINABLE runBIO_ #-}
160 | runBIO_ bio = bio discard EOF
161 | ```
162 |
163 | Another example from the [introduce BIO blog post](https://z.haskell.world/design/2021/04/20/introduce-BIO-a-simple-streaming-abstraction.html):
164 |
165 | ```haskell
166 | import Z.Data.CBytes (CBytes)
167 | import Z.IO
168 | import Z.IO.BIO
169 | import Z.IO.BIO.Zlib
170 |
171 | base64AndCompressFile :: HasCallStack => CBytes -> CBytes -> IO ()
172 | base64AndCompressFile origin target = do
173 | base64Enc <- newBase64Encoder
174 | (_, zlibCompressor) <- newCompress defaultCompressConfig{compressWindowBits = 31}
175 |
176 | withResource (initSourceFromFile origin) $ \ src ->
177 | withResource (initSinkToFile target) $ \ sink ->
178 | runBIO_ $ src . base64Enc . zlibCompressor . sink
179 | ```
180 |
181 | Above code is similar to command line `cat origin | base | gzip > target`.
182 |
--------------------------------------------------------------------------------
/Z-IO/Filesystem.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | parent: Z-IO
4 | title: Filesystem
5 | nav_order: 1
6 | ---
7 |
8 | ## Table of contents
9 | {: .no_toc .text-delta }
10 |
11 | 1. TOC
12 | {:toc}
13 |
14 | # Hello File
15 |
16 | It's easy to use Z-IO package's filesystem module, first please import `Z.IO.Filesystem`:
17 |
18 | ```haskell
19 | import qualified Z.IO.FileSystem as FS
20 | ```
21 |
22 | If it's OK to load a file into memory at once, you can use following:
23 |
24 | ```haskell
25 | readFile :: HasCallStack => CBytes -> IO Bytes
26 | readTextFile :: HasCallStack => CBytes -> IO Text
27 | writeFile :: HasCallStack => CBytes -> Bytes -> IO ()
28 | writeTextFile :: HasCallStack => CBytes -> Text -> IO ()
29 | ```
30 |
31 | `CBytes` is Z's file path type. `Bytes`, and `Text` are types for binary and textual content, respectively. These types are documented in [Z-Data section](https://z.haskell.world/Z-Data/). `readTextFile` and `writeTextFile` assumes UTF-8 encoding:
32 |
33 |
34 | ```haskell
35 | > FS.writeTextFile "./test_file" "hello world!"
36 | > FS.readFile "./test_file"
37 | [104,101,108,108,111,32,119,111,114,108,100,33]
38 | > FS.readTextFile "./test_file"
39 | "hello world!"
40 | ```
41 |
42 | # Resource Handling
43 |
44 | Now let's see a more complicated function:
45 |
46 | ```haskell
47 | initFile :: CBytes
48 | -> FileFlag -- ^ Opening flags, e.g. 'O_CREAT' @.|.@ 'O_RDWR'
49 | -> FileMode -- ^ Sets the file mode (permission and sticky bits),
50 | -- but only if the file was created, see 'DEFAULT_FILE_MODE'.
51 | -> Resource File
52 | ```
53 |
54 | `FileFlag` and `FileMode` are bit constants controlling the file opening behavior, such as if we have read or write access or if a new file will be created when there's none. You can find more constants on [hackage docs](https://hackage.haskell.org/package/Z-IO-0.7.1.0/docs/Z-IO-FileSystem-Base.html#g:5). The interesting thing here is that `initFile` function returns a `Resource File` type instead of `IO File`. `Resource` is defined in `Z.IO.Resource` module, with following functions to use it:
55 |
56 | ```haskell
57 | withResource :: HasCallStack
58 | => Resource a -- ^ resource management record
59 | -> (a -> IO b) -- ^ function working on a resource
60 | -> IO b
61 |
62 | withResource' :: HasCallStack
63 | => Resource a -- ^ resource management record
64 | -> (a -> IO () -> IO b)
65 | -- ^ second param is the close function for early closing
66 | -> IO b
67 | ```
68 |
69 | We simplified those two functions' type a little bit, and here is the idea: `withResource` will take care of resource opening and cleanup automatically, after you finish using it, or when exceptions happen. You only need to pass a function working on that resource. Now let's read the file created above again:
70 |
71 | ```haskell
72 | import Z.IO -- this module re-export Z.IO.Resource and other common stuff
73 | import qualified Z.IO.FileSystem as FS
74 |
75 | withResource (FS.initFile "./test_file" FS.O_RDWR FS.DEFAULT_FILE_MODE) $ \ file -> do
76 | bi <- newBufferedInput file
77 | printStd =<< readLine bi
78 | ```
79 |
80 | `initFile` function doesn't open the file, and it just records how to open and close the file. Every time you want to do something with the file, use `withResource` to open(and close) it, that's all about resource handling in Z.
81 |
82 | `Resource` has a `Monad` instance, which is useful for safely combining resources, e.g. instead of writing following code:
83 |
84 | ```haskell
85 | withResource initRes1 $ \ res1 ->
86 | withResource initRes2 $ \ res2 ->
87 | withResource initRes3 $ \ res3 ->
88 | ... res1 ... res2 ... res3
89 | ```
90 |
91 | You could define a combined `Resource`:
92 |
93 | ```haskell
94 | initRes123 :: Resource (Res1, Res2, Res3)
95 | initRes123 = do
96 | res1 <- initRes1
97 | res2 <- initRes2
98 | res3 <- initRes3
99 | return (res1, res2, res3)
100 | ```
101 |
102 | Now `withResource initRes123 $ \ (res1, res2, res3) -> ...` will first open `res1`, `res2`, `res3` in order, then close them in reverse order. You could even interleave `IO` action within `Resource` using its `MonadIO` instance:
103 |
104 | ```haskell
105 | initRes123 :: Resource (Res1, Res2)
106 | initRes123 = do
107 | res1 <- initRes1
108 | res2Param <- liftIO $ ... res1 ...
109 | res2 <- initRes2 res2Param
110 | return (res1, res2)
111 | ```
112 |
113 | The lifted `IO` action will become a part of the resource opening process.
114 |
115 | # Buffered I/O
116 |
117 | `newBufferedInput` and `readLine` functions in the code above are from `Z.IO.Buffered` module(also re-exported from `Z.IO`). In Z-IO, many IO devices(including `File` above) are instances of `Input/Output` class:
118 |
119 | ```haskell
120 | class Input i where
121 | readInput :: HasCallStack => i -> Ptr Word8 -> Int -> IO Int
122 | class Output o where
123 | writeOutput :: HasCallStack => o -> Ptr Word8 -> Int -> IO ()
124 | ```
125 |
126 | `readInput` and `writeOutput` work on pointers, which is not very convenient for direct usage. Open a `BufferedInput` or `BufferedOutput` to get auto-managed buffered I/O:
127 |
128 | ```haskell
129 | newBufferedInput :: Input i => i -> IO BufferedInput
130 | newBufferedOutput :: Output o => o -> IO BufferedOutput
131 | ```
132 |
133 | There's a set of functions working on `BufferedInput/BufferedOutput` in `Z.IO.Buffered`, for example, to implement a word counter for files:
134 |
135 | ```haskell
136 | import Z.IO
137 | import qualified Z.IO.FileSystem as FS
138 | import qualified Z.Data.Vector as V
139 |
140 | main :: IO ()
141 | main = do
142 | -- get file path from command line
143 | (_:path:_) <- getArgs
144 | withResource (FS.initFile path FS.O_RDWR FS.DEFAULT_FILE_MODE) $ \ file -> do
145 | bi <- newBufferedInput file
146 | printStd =<< loop bi 0
147 | where
148 | loop :: BufferedInput -> Int -> IO Int
149 | loop input !wc = do
150 | -- read a single line with linefeed dropped
151 | line <- readLine input
152 | case line of
153 | Just line' ->
154 | loop input (wc + length (V.words line'))
155 | _ -> return wc
156 | ```
157 |
158 | Here's a quick cheatsheet on buffered IO, `BufferedInput` first:
159 |
160 | ```haskell
161 | -- | Request a chunk from the input device.
162 | readBuffer :: HasCallStack => BufferedInput -> IO Bytes
163 |
164 | -- | Push back an unconsumed chunk
165 | unReadBuffer :: HasCallStack => Bytes -> BufferedInput -> IO ()
166 |
167 | -- | Read exactly N bytes, throw exception if EOF reached before N bytes.
168 | readExactly :: HasCallStack => Int -> BufferedInput -> IO Bytes
169 |
170 | -- /----- readToMagic ----- \ /----- readToMagic -----\ ...
171 | -- +------------------+-------+-----------------+-------+
172 | -- | ... | magic | ... | magic | ...
173 | -- +------------------+-------+-----------------+-------+
174 | readToMagic :: HasCallStack => Word8 -> BufferedInput -> IO Bytes
175 |
176 | -- /--- readLine ---\ discarded /--- readLine ---\ discarded / ...
177 | -- +------------------+---------+------------------+---------+
178 | -- | ... | \r\n/\n | ... | \r\n/\n | ...
179 | -- +------------------+---------+------------------+---------+
180 | readLine :: HasCallStack => BufferedInput -> IO (Maybe Bytes)
181 |
182 | -- | Read all chunks from input.
183 | readAll :: HasCallStack => BufferedInput -> IO [Bytes]
184 | readAll' :: HasCallStack => BufferedInput -> IO Bytes
185 |
186 | -- | See Parser & Builder under Z-Data section for the following functions.
187 | -- | Request input using Parser
188 | readParser :: HasCallStack => Parser a -> BufferedInput -> IO a
189 |
190 | -- | Request input using ParseChunks, see Parser & Builder under Z-Data section.
191 | readParseChunks :: (Print e, HasCallStack) => ParseChunks IO Bytes e a -> BufferedInput -> IO a
192 | ```
193 |
194 | `BufferedOutput` is relatively simple:
195 |
196 | ```haskell
197 | -- | Write a chunk into buffer.
198 | writeBuffer :: HasCallStack => BufferedOutput -> Bytes -> IO ()
199 | -- | Directly write Builder into output device.
200 | writeBuilder :: HasCallStack => BufferedOutput -> Builder a -> IO ()
201 | -- | Flush the buffer into output device.
202 | flushBuffer :: HasCallStack => BufferedOutput -> IO ()
203 | ```
204 |
205 | # A note on filepath
206 |
207 | Other operations from `Z.IO.FileSystem` module, e.g., `seek`, `mkdtemp`, `rmdir`, etc., are basically mirroring the Unix system call, which should be familiar to people who come from C/C++. The type for file path in Z is `CBytes`, which is a `\NUL` terminated byte array managed on GHC heap.
208 |
209 | We assumed that `CBytes`'s content is UTF-8 encoded though it may not always be the case, and there're some platform differences on file path handling, e.g., the separator on windows is different from Unix. To proper handle file path, use `Z.IO.FileSystem.FilePath` (which is re-exported from `Z.IO.FileSystem`), for example, instead of manually connecting file path like:
210 |
211 | ```haskell
212 | let p = "foo" <> "/" <> "bar"
213 | ```
214 | You should always use functions from the library
215 |
216 | ```haskell
217 | import qualified Z.IO.FileSystem as FS
218 |
219 | let p = "foo" `FS.join` "bar"
220 | -- "foo" `FS.join` "../bar" will yield "bar" instead of "foo/../bar"
221 | ```
222 |
--------------------------------------------------------------------------------
/Z-IO/Logger.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | parent: Z-IO
4 | title: Logger
5 | nav_order: 4
6 | ---
7 |
8 | ## Table of contents
9 | {: .no_toc .text-delta }
10 |
11 | 1. TOC
12 | {:toc}
13 |
14 | # Logging functions
15 |
16 | High-performance logging is important to all kinds of applications. In Z-IO, all you have to do is to import `Z.IO` and use the following functions:
17 |
18 | ```haskell
19 | {-# LANGUAGE OverloadedStrings #-}
20 | import qualified Z.Data.Builder as B
21 | import Z.IO
22 |
23 | -- logging functions all work directly in IO monad
24 | debug, info , warning, fatal, critical :: B.Builder () -> IO ()
25 |
26 | -- you can use B.Builder's IsString instance
27 | debug "..."
28 | -- together with B.Builder's Monad instance
29 | info $ "..." >> B.int 666 >> "..."
30 | warning $ do
31 | "..."
32 | B.int 666
33 | "..."
34 | fatal "this is an important message"
35 | critical "OMG, system is on fire"
36 | ```
37 |
38 | Note that `debug/info/warning` does not trigger a log flushing, while fatal/critical always triggers a log flushing. If `debug/info/warning` logs matter to you, use `withDefaultLogger` like this:
39 |
40 | ```
41 | main :: IO
42 | main = withDefaultLogger $ do
43 | ...
44 | ```
45 |
46 | It will add a flush after the application finishes to ensure that `debug/info/warning` logs are flushed.
47 |
48 | # Setup Logger
49 |
50 | Z-IO's `Logger` have the following concurrent characteristics:
51 |
52 | * Logging functions are lock-free and can be used across threads.
53 | * Logs are atomic, and the order is preserved.
54 | * Flushing is protected by the lock, and there'll be no concurrent writing to the buffered device.
55 |
56 | So there is no need to worry about anything since most of the things are taken care of, just import and start to log. Functions like `debugTo/infoTo/warningTo...` that explicitly write logs to given `Logger` are provided. However, most of the time, use the default `Logger`. And, use `setDefaultLogger` to change it when the application starts. Z-IO supports writing logs to different devices with different formats:
57 |
58 | ```haskell
59 | -- logs can be written to any `BufferedOutput`s with `MVar` protected from concurrent access
60 | newLogger :: LoggerConfig -> MVar BufferedOutput -> IO Logger
61 | -- create a logger connected to stderr
62 | newStdLogger :: LoggerConfig -> IO Logger
63 | -- create a file based logger
64 | newFileLogger :: LoggerConfig -> CBytes -> IO Logger
65 |
66 | -- Change LoggerConfig's loggerFormatter field to change logging format:
67 | -- [FATAL][2021-02-01T15:03:30+0800][interactive:31:1][thread#669]...\n
68 | defaultFmt :: LogFormatter
69 | -- Same with defaultFmt, but level is colored: cyan DEBUG, yellow WARNING, red FATAL and CRITICAL
70 | defaultColoredFmt :: LogFormatter
71 | -- {"level":"FATAL","time":"2021-02-01T15:02:19+0800","loc":":27:1","theadId":606,"content":"..."}\n
72 | defaultJSONFmt :: LogFormatter
73 | ```
74 |
75 | Initial default loggers are connected to stderr. Use `defaultColoredFmt` if stderr is connected to a TTY device, and use `defaultFmt` otherwise. An example about setting up logger:
76 |
77 | ```haskell
78 | main :: IO ()
79 | main = do
80 | -- setup filter level to WARNING, info/debug logs will be ignored.
81 | -- use file based logger, and write to "app.log"
82 | setDefaultLogger =<< newFileLogger defaultJSONLoggerConfig
83 | { loggerConfigLevel = WARNING } "app.log"
84 | withDefaultLogger $ do
85 | ...
86 | ```
87 |
--------------------------------------------------------------------------------
/Z-IO/Network.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | parent: Z-IO
4 | title: Network
5 | nav_order: 2
6 | ---
7 |
8 | ## Table of contents
9 | {: .no_toc .text-delta }
10 |
11 | 1. TOC
12 | {:toc}
13 |
14 | # Client and server
15 |
16 | The Network is all about sending and receiving data. Using Z-IO's Network is straightforward:
17 |
18 | ```haskell
19 | {-# LANGUAGE OverloadedStrings #-}
20 | import Z.IO
21 | import Z.IO.Network
22 | import Z.Data.Text as T
23 |
24 | main :: IO ()
25 | main = do
26 | -- use getAddrInfo to perform DNS resolution
27 | addr:_ <- getAddrInfo Nothing "www.bing.com" "http"
28 | -- use initTCPClient to initialize a TCP client
29 | withResource (initTCPClient defaultTCPClientConfig{
30 | tcpRemoteAddr = addrAddress addr}) $ \ tcp -> do
31 | -- use BufferedInput/BufferedOutput facility to read from/write to tcp socket
32 | i <- newBufferedInput tcp
33 | o <- newBufferedOutput tcp
34 | writeBuffer o "GET http://www.bing.com HTTP/1.1\r\nHost: www.bing.com\r\n\r\n"
35 | flushBuffer o
36 | readBuffer i >>= pure . T.validate
37 |
38 | -- use startTCPServer to start serving in TCP protocol
39 | startTCPServer defaultTCPServerConfig{
40 | tcpListenAddr = SocketAddrIPv4 ipv4Loopback 8080} $ \ tcp -> do
41 | o <- newBufferedOutput tcp
42 | writeBuffer o "hello world" >> flushBuffer o
43 | ```
44 |
45 | Z.Haskell provide several network capabilities:
46 |
47 | + `Z.IO.Network.IPC` provides the stream channel for inter-process communication based on domain socket(Unix) or named pipe(Windows).
48 | + `Z.IO.Network.TCP` provides the stream channel for remote communication based on TCP socket.
49 | + `Z.IO.Network.UDP` provides the message channel on top of the UDP socket.
50 | + A TLS implementation based on [botan](https://botan.randombit.net/) is under development.
51 |
52 | Let's take TCP module as an example. Lots of low-level socket details(`bind`, `listen`, `accept`, etc.) are hidden, with two high-level operations left:
53 |
54 | ```haskell
55 | -- | Connect to a TCP target
56 | initTCPClient :: HasCallStack => TCPClientConfig -> Resource UVStream
57 | -- | Start a TCP server
58 | startTCPServer :: HasCallStack
59 | => TCPServerConfig
60 | -> (UVStream -> IO ())
61 | -- ^ worker which will get an accepted TCP stream
62 | -- and run in a seperated haskell thread
63 | -> IO
64 | ```
65 |
66 | # Send/receive packet
67 |
68 | The `UVStream` type implements the `Input/Output` class from `Z.IO.Buffered`, so that you can reuse all the buffered read/write API. For example, let's say you have designed a simple framed message protocol:
69 |
70 | ```haskell
71 | import Data.Word
72 | import qualified Z.Data.Vector as V
73 |
74 | -- uint8 message type uint16 payload length message payload
75 | -- +------------------+----------------------+------------------
76 | -- | 0xXX | 0xXXXX(big endian) | ...
77 | -- +------------------+----------------------+------------------
78 |
79 | data Message = Message { msgTyp :: Word8, msgPayload :: V.Bytes }
80 | ```
81 |
82 | You can manually decode message frames like this:
83 |
84 | ```haskell
85 | -- import bit operations
86 | import Data.Bits (unsafeShiftL, (.|.))
87 | import Z.IO
88 |
89 | readMessage :: HasCallStack => BufferedInput -> IO Message
90 | readMessage bi = do
91 | msg_typ <- readExactly buffered_i 1
92 | payload_len_h <- readExactly buffered_i 1
93 | payload_len_l <- readExactly buffered_i 1
94 | let payload_len =
95 | (fromIntegral payload_len_h) `unsafeShiftL` 8
96 | .|. (fromIntegral payload_len_l)
97 | payload <- readExactly payload_len
98 | return (Message msg_typ payload)
99 | ```
100 |
101 | Or you can use `Parser` from `Z.Data.Parser` module:
102 |
103 |
104 | ```haskell
105 | import qualified Z.Data.Parser as P
106 | import Data.Word
107 | import Z.IO
108 |
109 | parseMessage :: P.Parser Message
110 | parseMessage = do
111 | msg_type <- P.decodePrim @Word8
112 | payload_len <- P.decodePrimBE @Word16
113 | payload <- P.take (fromIntegral payload_len)
114 | return (Message msg_typ payload)
115 |
116 | readMessage :: HasCallStack => BufferedInput -> IO Message
117 | readMessage = readParser parseMessage
118 | ```
119 |
120 | `readParser` will run `Parser` once a time, parse `Message` out of the buffer, and waiting for input automatically. To write a `Message` to the TCP socket is similar:
121 |
122 | ```haskell
123 | import qualified Z.Data.Builder as B
124 | import qualified Z.Data.Vector as V
125 | import Z.IO
126 |
127 | writeMessage :: HasCallStack => BufferedOutput -> Message -> IO ()
128 | writeMessage bo (Message msg_typ payload) = do
129 | -- use Builder monad to compose buffer writing functions
130 | writeBuilder bo $ do
131 | B.encodePrim msg_typ
132 | B.encodePrimBE (V.length payload)
133 | B.bytes payload
134 | -- you may want to add a flush after each message has been written
135 | -- or leave flush to the caller
136 | -- flushBuffer bo
137 | ```
138 |
139 | Z.Haskell provides many tools to deal with the streaming nature of TCP protocol (and many other streaming devices such as IPC and Files). In the next section, we will introduce the `BIO`, a more high-level streaming API.
140 |
141 | # UDP
142 |
143 | UDP is different from IPC or TCP in that it's a message protocol rather than a streaming one. There are no `Input/Output` instances for the `UDP` type. Instead, Z-IO provides message reading & writing functions for UDP directly:
144 |
145 | ```haskell
146 | -- | Initialize a UDP socket.
147 | initUDP :: UDPConfig -> Resource UDP
148 | -- | Send a UDP message to target address.
149 | sendUDP :: HasCallStack => UDP -> SocketAddr -> V.Bytes -> IO ()
150 | -- | Receive messages from UDP socket, return source address if available, and a `Bool`
151 | -- to indicate if the message is partial (larger than receive buffer size).
152 | recvUDP :: HasCallStack => UDPRecvConfig -> UDP -> IO [(Maybe SocketAddr, Bool, V.Bytes)]
153 | -- | Receive UDP messages within a loop
154 | recvUDPLoop :: HasCallStack
155 | => UDPRecvConfig
156 | -> UDP
157 | -> ((Maybe SocketAddr, Bool, V.Bytes) -> IO a)
158 | -> IO ()
159 | ```
160 |
161 | Loop receiving(`recvUDPLoop`) can be faster since it can reuse the receiving buffer internally. Unlike the TCP server above, the UDP worker function is called on the current Haskell thread instead of a forked one. If you have heavy computations within the worker function, consider using `forkBa` from `Z.IO.UV.Manager` (a function similar to `forkIO`, but with active thread balancing).
162 |
--------------------------------------------------------------------------------
/Z-IO/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | title: Z-IO
4 | nav_order: 2
5 | has_children: true
6 | ---
7 |
8 | [](https://hackage.haskell.org/package/Z-IO)
9 | [](https://github.com/ZHaskell/z-io/actions)
10 | [](https://github.com/ZHaskell/z-io/actions)
11 | [](https://github.com/ZHaskell/z-io/actions)
12 |
13 | # Z-IO
14 |
15 | Z-IO package provides high-performance I/O operations based on libuv's event loop and GHC lightweight threads.
16 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | # Welcome to Jekyll!
2 | #
3 | # This config file is meant for settings that affect your whole blog, values
4 | # which you are expected to set up once and rarely edit after that. If you find
5 | # yourself editing this file very often, consider using Jekyll's data files
6 | # feature for the data you need to update frequently.
7 | #
8 | # For technical reasons, this file is *NOT* reloaded automatically when you use
9 | # 'bundle exec jekyll serve'. If you change this file, please restart the server process.
10 |
11 | # Site settings
12 | # These are used to personalize your new site. If you look in the HTML files,
13 | # you will see them accessed via {{ site.title }}, {{ site.email }}, and so on.
14 | # You can create any custom variable you would like, and they will be accessible
15 | # in the templates via {{ site.myvariable }}.
16 | title: Z.Haskell
17 | email: winterland1989@gmail.com
18 | description: >- # this means to ignore newlines until "baseurl:"
19 | The document site for Z.Haskell.
20 | baseurl: "/" # the subpath of your site, e.g. /blog
21 | url: "" # the base hostname & protocol for your site, e.g. http://example.com
22 | github_username: winterland1989
23 |
24 | # Build settings
25 | markdown: kramdown
26 | theme: "just-the-docs"
27 | remote_theme: "pmarsceill/just-the-docs"
28 | plugins:
29 | - jekyll-feed
30 |
31 | aux_links:
32 | "GitHub":
33 | - "//github.com/ZHaskell"
34 |
35 | # Footer content
36 | # appears at the bottom of every page's main content
37 | # Note: The footer_content option is deprecated and will be removed in a future major release. Please use `_includes/footer_custom.html` for more robust markup / liquid-based content.
38 | footer_content: '
12 |
13 |
14 | {{ content }}
15 |
16 |
21 |
22 |
--------------------------------------------------------------------------------
/_posts/2021-02-01-High-performance-JSON-codec.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: post
3 | title: "High-performance JSON codec"
4 | date: 2021-02-01 16:52:44 CST
5 | author: Dong
6 | categories: performance
7 | ---
8 |
9 | JSON processing is a fundamental building block in modern network applications. It's also a large module in [Z-Data](//hackage.haskell.org/package/Z-Data) package. With careful optimization, we managed to get a 1.5X - 3X encoding and 3X decoding performance boost comparing to [aeson](//hackage.haskell.org/package/aeson), a widely used JSON package on hackage.
10 |
11 |
12 |
13 | ## Benchmark Result
14 |
15 | 
16 |
17 | The [above benchmarks](//github.com/ZHaskell/z-benchmarks) running on an MBP13 2020(2 GHz Quad-Core Intel Core i5), Each benchmark runs a certain JSON task with fixed iterations, using [sample data](//github.com/ZHaskell/benchmarks/tree/master/asset/json-data). Some notes on benchmarks code:
18 |
19 | * Benchmarks labeled with `encode` and `decode` bench the conversion between JSON documents and JSON intermedia representation.
20 | * Benchmarks labeled with `typed encode` and `typed decode` bench the conversion between JSON documents and Haskell ADT.
21 | * All ADTs' instances are deriving using GHC generic mechanism, no manual conversion code is required.
22 |
23 | ## Fast escaping handling
24 |
25 | Surprisingly, when processing JSON, one can't directly copy strings because they may be [escaped](https://tools.ietf.org/html/rfc8259#page-8), which brings a quite big performance challenge. In [Z-Data](//hackage.haskell.org/package/Z-Data) we carefully arranged the code path to avoid performance hit:
26 |
27 | * When encoding text value
28 |
29 | 1. Run a prescan loop to find if we need escaping, and how much space we need to write the escaped string if escaping is needed.
30 | 2. If there's no escaping needed, vectorized `copyByteArray#` is used to directly write text into the output buffer.
31 | 3. Otherwise, go through the escaping loop.
32 |
33 | * When decoding JSON string
34 |
35 | 1. Run a prescan to find the end of the string, record if unescaping is needed at the same time.
36 | 2. If no unescaping is needed, a vectorized UTF8 validation is used.
37 | 3. Otherwise, go through a UTF8 validation loop extended with JSON unescaping logic.
38 |
39 | These optimizations are possible because [Z-Data](//hackage.haskell.org/package/Z-Data) uses UTF8 encoding `Text` type, which could save considerable time on the non-escaping path.
40 |
41 | ## IR(intermedia represantation)
42 |
43 | Another optimization opportunity comes from the new JSON document IR design. In [Z-Data](//hackage.haskell.org/package/Z-Data) the IR type use vector of key-value pair to represent JSON objects:
44 |
45 | ```haskell
46 | data Value = Object (Vector (Text, Value))
47 | | Array (Vector Value)
48 | | String T.Text
49 | | Number Scientific
50 | | Bool Bool
51 | | Null
52 | deriving (Eq, Ord, Show, Typeable, Generic)
53 | deriving anyclass Print
54 | ```
55 |
56 | This representation has many benefits:
57 |
58 | * Preserve original key-value order, so that round-trip processing is possible.
59 | * User can choose different de-duplicate strategys when converting IR to ADT.
60 | * It's faster to construct an IR value or convert ADT to IR.
61 |
62 | By default [Z-Data](//hackage.haskell.org/package/Z-Data) use [FlatMap](//hackage.haskell.org/package/Z-Data/docs/Z-Data-Vector-FlatMap.html) when converting IR to ADT, which is simply a sorted vector of key-value pair. It can be constructed by sorting the original key-value pairs in O(N\*logN) and looked up using binary-search in O(logN).
63 |
64 | ## Parser and Builder facility
65 |
66 | [Z-Data](//hackage.haskell.org/package/Z-Data) uses [Bytes](https://hackage.haskell.org/package/Z-Data/docs/Z-Data-Vector.html#t:Bytes), a vector type based on `ByteArray#` to represent binary data, it's different from traditional bytestring ones that use `Addr#`(pointer). It's necessary to provide a different set of `Builder`s and `Parser`s to work on that representation. In both cases, simple CPSed monad is chosen to make compiled code fast.
67 |
68 | ```
69 | -- Z.Data.Builder.Base
70 | newtype Builder a = Builder {
71 | runBuilder :: (a -> BuildStep) -- next write continuation
72 | -> BuildStep
73 | }
74 |
75 | -- Z.Data.Parser.Base
76 | newtype Parser a = Parser {
77 | runParser :: forall r . (ParseError -> ParseStep r) -- fail continuation
78 | -> (a -> ParseStep r) -- success continuation
79 | -> ParseStep r
80 | }
81 | ```
82 |
83 | These types are almost the simplest CPS monads one can write, and GHC is particularly good at optimizing the composition of these monads.
84 |
85 | ## Conclusion
86 |
87 | This benchmark compared [Z-Data](//hackage.haskell.org/package/Z-Data) to widely used Haskell package [aeson](//hackage.haskell.org/package/aeson). The result shows that the new `Builder` and `Parser` facility works as expected, and our optimizing techniques can bring a huge performance improvement.
88 |
89 |
90 |
--------------------------------------------------------------------------------
/_posts/2021-04-20-introduce-BIO-a-simple-streaming-abstraction.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: post
3 | title: "Introduce BIO: A Simple Streaming Abstraction"
4 | date: 2021-04-20 14:43:14 CST
5 | author: Dong
6 | categories: design
7 | ---
8 |
9 | Streaming IO is an old idea: the data is read in chunks, each chunk gets processed and written to output so that the whole memory a program used is kept under a relatively low level. e.g.
10 |
11 | ```base
12 | cat foo.txt | gzip | base64 | tee foo.gz
13 | ```
14 |
15 | Above UNIX commands read a file `foo.txt` in chunks, perform gzip and base64 transformation, and get piped to both `foo.gz` and stdout. We'd like to get similar syntax when using Haskell to work with chunked data, and that's the starting point of streaming abstraction.
16 |
17 |
18 |
19 | ## A Stream ADT
20 |
21 | ### Partial closure
22 |
23 | In [Z-Data's parser section](https://z.haskell.world/Z-Data/Parser-and-Builder.html), we described a resumable parser, which can consume input in chunks:
24 |
25 | ```haskell
26 | > P.parse' dateParser "2020-12-12"
27 | Date 2020 12 12
28 | > P.parseChunk dateParser "2020-"
29 | Partial _
30 | > let (P.Partial f) = P.parseChunk dateParser "2020-"
31 | > let (P.Partial f') = f "05-05" -- incrementally provide input
32 | > f' "" -- push empty chunk to signal EOF
33 | Success Date {year = 2020, month = 5, day = 5}
34 | ```
35 |
36 | The core type to achieve resumable parsing is `Result`:
37 |
38 | ```haskell
39 | data Result e r
40 | = Success r !Bytes
41 | | Failure e !Bytes
42 | | Partial (V.Bytes -> Result e r)
43 | ```
44 |
45 | The `Partial` constructor contains a closure capturing the last chunk's parsing state, which could be applied to the next chunk to produce a new `Result`. Now let's consider if we could apply this construction to IO(or an arbitrary monad), following definition is from the [streaming](https://hackage.haskell.org/package/streaming) package:
46 |
47 | ```haskell
48 | data Stream f m r = Step !(f (Stream f m r))
49 | | Effect (m (Stream f m r))
50 | | Return r
51 |
52 | data Of a b = !a :> b
53 | ```
54 |
55 | ### Stream Monad
56 |
57 | In streaming, `Stream (Of a) IO ()` are used to represent `IO` streams, with some monad primitives you can construct an `IO` stream like this:
58 |
59 | ```haskell
60 | -- Stream monad will provide some primitives to create monadic value, e.g.
61 | -- yield :: Monad m => a -> Stream (Of a) m ()
62 | -- yield a = Step (a :> Return ())
63 | -- instance (MonadIO m, Functor f) => MonadIO (Stream f m) where
64 | -- liftIO = Effect . fmap Return . liftIO
65 |
66 | foo :: Stream (Of a) IO ()
67 | foo = do
68 | yield 1
69 | yield 2
70 | lift readLn >>= yield
71 | ```
72 |
73 | With the `Stream`'s `Monad` instance, the value of foo now becomes a chain of Stream ADTs:
74 |
75 | ```haskell
76 | Step (1 :> Step (2 :> Effect (\ x -> Step x :> Return ()) <$> readLn))
77 | ```
78 |
79 | Now if we provide a function to iterate through this ADT, the stream could be processed. Such a function is often called an interpreter, a term from [the free monad design pattern](https://softwareengineering.stackexchange.com/questions/242795/what-is-the-free-monad-interpreter-pattern). For example streaming provides its own `foldrM` interpreter to fold over a `Stream` structure:
80 |
81 | ```haskell
82 | foldrM :: Monad m => (a -> m r -> m r) -> Stream (Of a) m r -> m r
83 | foldrM step = loop where
84 | loop stream = case stream of
85 | Return r -> return r
86 | Effect m -> m >>= loop -- This is where IO effects happened!
87 | Step (a :> as) -> step a (loop as)
88 | ```
89 |
90 | ### The Magic Pipes
91 |
92 | There're some packages on hackage pushing the free monad technique to its limit, e.g. the [pipes](http://hackage.haskell.org/package/pipes) provide a rather incomprehensible core ADT type:
93 |
94 | ```haskell
95 | data Proxy a' a b' b m r
96 | = Request a' (a -> Proxy a' a b' b m r )
97 | | Respond b (b' -> Proxy a' a b' b m r )
98 | | M (m (Proxy a' a b' b m r))
99 | | Pure r
100 | ```
101 |
102 | With this beast at hand, pipes could provide more interesting primitives like `await`, or `>->`. e.g `do x <- await; y <- await; return (x, y)` becomes:
103 |
104 | ```haskell
105 | Request () (\ x -> Request () (\ x -> Pure (x, y)))
106 | ```
107 |
108 | One technique pipes used is to use type `Void` to eliminate some constructors under certain types while still keep composability:
109 |
110 | ```haskell
111 | -- | type with no constructors
112 | type X = Void
113 |
114 | -- | 'Effect's neither 'Pipes.await' nor 'Pipes.yield'
115 | type Effect = Proxy X () () X
116 | -- | 'Producer's can only 'Pipes.yield'
117 | type Producer b = Proxy X () () b
118 | -- | 'Pipe's can both 'Pipes.await' and 'Pipes.yield'
119 | type Pipe a b = Proxy () a () b
120 | -- | 'Consumer's can only 'Pipes.await'
121 | type Consumer a = Proxy () a () X
122 | ```
123 |
124 | ## A Retrospective
125 |
126 | ### Free monad is powerful, but hard to use
127 |
128 | The free monad approach could give you as many primitives as you want, and you could choose different interpreter to run, but it's hard to use in several ways:
129 |
130 | + It's hard to comprehend, you have to read the monad instance very carefully, to understand how those primitives work.
131 | + It has the same problem with monad transformers, i.e. now every base monad operations need to be lifted.
132 | + It's hard to be optimized by the compiler, because now every operation becomes an ADT constructor, and often leads to higher allocations.
133 |
134 | A free monad construction for streaming may also need to provide a different set of combinators, such as `mapM` or `foldM`, which is incompatible with `Control.Monad`.
135 |
136 | ### How other languages do streaming
137 |
138 | It's interesting to find out that most of the OO languages solve this problem in a much simpler way, for example in javascript.
139 |
140 | ```javascript
141 | // from node.js example
142 | const fs = require('fs');
143 | const zlib = require('zlib');
144 | const r = fs.createReadStream('file.txt');
145 |
146 | const z = zlib.createGzip();
147 | const w = fs.createWriteStream('file.txt.gz');
148 | r.pipe(z).pipe(w);
149 |
150 | // or you can manually connect streams like this:
151 | r.on('data', (chunk) => { z.write(chunk); });
152 | z.on('data', (chunk) => { w.write(chunk); });
153 | ```
154 |
155 | In OO's viewpoint, a stream node is an object, with a method to receive chunks, and write to downstream inside callbacks, and that's it. This pattern has some drawbacks:
156 |
157 | + Stream node somehow lost its control, e.g. you can't stop the stream processing in a middle node without touching the source. This is the *Inversion of Control* problem of all callback-based APIs.
158 | + Stream node now became a mutable stateful object, which is unnatural in Haskell.
159 |
160 | ## Introduce the BIO
161 |
162 | In [Z-IO](https://hackage.haskell.org/package/Z-IO) v0.8, we introduce a new `BIO` type to simplified streaming processing with three design goals:
163 |
164 | + Simple composable types.
165 | + No transformer, no lift.
166 | + Easier to be used for writing both processors and applications.
167 |
168 | The result is a type focusing on *callback transformation*:
169 |
170 | ```haskell
171 | -- A bio node receives a callback, returns a new callback to be called from upstream.
172 | type BIO inp out = (Maybe out -> IO ()) -> Maybe inp -> IO ()
173 |
174 | -- A Source doesn't consume any meaningful input
175 | type Source a = BIO Void a
176 | -- A Sink doesn't produce any meaningful output
177 | type Sink a = BIO a Void
178 |
179 | -- | A pattern for more meaningful matching.
180 | pattern EOF :: Maybe a
181 | pattern EOF = Nothing
182 | ```
183 |
184 | For example to implemented a [zlib](https://zlib.net/) node with BIO:
185 |
186 | ```haskell
187 | compressBIO :: ZStream -> BIO V.Bytes V.Bytes
188 | compressBIO zs = \ callback mbs ->
189 | case mbs of
190 | Just bs -> do
191 | -- feed input chunk to ZStream
192 | set_avail_in zs bs (V.length bs)
193 | let loop = do
194 | oavail :: CUInt <- withCPtr zs $ \ ps -> do
195 | -- perform deflate and peek output buffer remaining
196 | throwZlibIfMinus_ (deflate ps (#const Z_NO_FLUSH))
197 | (#peek struct z_stream_s, avail_out) ps
198 | when (oavail == 0) $ do
199 | -- when output buffer is full,
200 | -- freeze chunk and call the callback
201 | oarr <- A.unsafeFreezeArr =<< readIORef bufRef
202 | callback (Just (V.PrimVector oarr 0 bufSiz))
203 | newOutBuffer
204 | loop
205 | loop
206 | _ -> ... similar to above, with no input chunk and Z_FINISH flag
207 | ```
208 |
209 | When implemented a `Source`, you just ignore the `EOF` param, and call the callback once a new chunk is ready.
210 |
211 | ```haskell
212 | -- | Turn a `IO` action into 'Source'
213 | sourceFromIO :: HasCallStack => IO (Maybe a) -> Source a
214 | sourceFromIO io = \ k _ ->
215 | let loop = io >>= \ x ->
216 | case x of
217 | Just _ -> k x >> loop -- you should loop inside a Source
218 | _ -> k EOF
219 | in loop
220 | ```
221 |
222 | You should assume the `EOF` param is only given once, so a loop is often needed. Similar to `Source`, a `Sink` doesn't need to write any output until the final `EOF`:
223 |
224 | ```haskell
225 | sinkToIO :: HasCallStack => (a -> IO ()) -> Sink a
226 | sinkToIO f = \ k ma ->
227 | case ma of
228 | Just a -> f a
229 | _ -> k EOF
230 | ```
231 |
232 | ### Composing BIO and running
233 |
234 | Composing BIO is simple: you can use `(.)` the function composition operator to connect BIOs, since it's just a callback transformation:
235 |
236 | ```haskell
237 | import Z.Data.CBytes (CBytes)
238 | import Z.IO
239 | import Z.IO.BIO
240 | import Z.IO.BIO.Zlib
241 |
242 | base64AndCompressFile :: HasCallStack => CBytes -> CBytes -> IO ()
243 | base64AndCompressFile origin target = do
244 | base64Enc <- newBase64Encoder
245 | (_, zlibCompressor) <- newCompress defaultCompressConfig{compressWindowBits = 31}
246 |
247 | withResource (initSourceFromFile origin) $ \ src ->
248 | withResource (initSinkToFile target) $ \ sink ->
249 | runBIO_ $ src . base64Enc . zlibCompressor . sink
250 | ```
251 |
252 | Above code is similar to command line `cat origin | base | gzip > target`, and `runBIO_` is defined simply as:
253 |
254 | ```haskell
255 | -- | Discards a value, used as the callback to `Sink`.
256 | discard :: a -> IO ()
257 | discard _ = return ()
258 |
259 | runBIO_ :: HasCallStack => BIO inp out -> IO ()
260 | runBIO_ bio = bio discard EOF
261 | ```
262 |
263 | ### Conclusion
264 |
265 | There're many streaming libraries on hackage, and most of them are designed around the free monad pattern. In `Z-IO` we introduced a new simpler design around callback transformation, which is much easier to use for writing both stream processors and applications. Of course, nothing is silver bullets. The `BIO` type in `Z-IO` also has limitations, for example, the source can not be paused by a downstream processor without using some IO state, and the whole state management now relies on IO, rather than user-supplied state monads.
266 |
--------------------------------------------------------------------------------
/_sass/custom/custom.scss:
--------------------------------------------------------------------------------
1 | .site-header {
2 | position: relative;
3 | padding-left: 60px;
4 | }
5 | .site-header:before {
6 | content: "";
7 | width: 60px;
8 | height: 60px;
9 | position: absolute;
10 | left: 0;
11 | top: 0;
12 | background-image: url("https://avatars.githubusercontent.com/u/38765559?s=200&v=4");
13 | background-size: contain;
14 | }
15 | div.highlight {
16 | line-height: 1.4em;
17 | }
18 | .main-content h1,
19 | .main-content h2,
20 | .main-content h3,
21 | .main-content h4,
22 | .main-content h5,
23 | .main-content h6 {
24 | margin-top: 1em;
25 | }
26 |
--------------------------------------------------------------------------------
/benchmarks.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | title: Benchmarks
4 | nav_order: 4
5 | ---
6 |
7 | ## Table of contents
8 | {: .no_toc .text-delta }
9 |
10 | 1. TOC
11 | {:toc}
12 |
13 | Benchmarks' code are available on [GitHub](https://github.com/ZHaskell/benchmarks).
14 |
15 | Note benchmarks only record certain aspect of the code and the setup environment, they may not reflect real-world use-case. Any patches to improve the simulation as practical as possible are welcomed.
16 |
17 | # JSON performance
18 |
19 | This benchmark compared [JSON module in Z-Data](https://hackage.haskell.org/package/Z-Data/docs/Z-Data-JSON.html) with [aeson](https://hackage.haskell.org/package/aeson), a widely used JSON package on hackage. See our analysis in [this blog post](/performance/2021/02/01/High-performance-JSON-codec.html).
20 |
21 | 
22 |
23 | # TCP performance
24 |
25 | This benchmark compared different redis PING-PONG server implementations, using `redis-benchmark` tool from redis package running `redis-benchmark -p 8888 -t ping -n 100000 -q -c 100`.
26 |
27 | ```
28 | # Haskell's network package
29 | # cabal run redis-benchmark-base -- +RTS -N4 -H2G
30 | PING_INLINE: 88105.73 requests per second
31 | PING_BULK: 87873.46 requests per second
32 |
33 | # Z-IO from Z.Haskell
34 | # cabal run redis-benchmark-z -- +RTS -N4 -H2G
35 | PING_INLINE: 99800.40 requests per second
36 | PING_BULK: 102459.02 requests per second
37 |
38 | # Golang standard lib
39 | PING_INLINE: 98716.68 requests per second
40 | PING_BULK: 101522.84 requests per second
41 |
42 | # Rust mio
43 | PING_INLINE: 111731.84 requests per second
44 | PING_BULK: 112612.61 requests per second
45 |
46 | # C libuv
47 | PING_INLINE: 109170.30 requests per second
48 | PING_BULK: 105374.08 requests per second
49 | ```
50 |
51 | Note both mio and libuv using a single thread event loop and a shared buffer to receive `redis-benchmark`'s messages between different connection, which is quite different from other lightweight thread based implementations.
52 |
53 | GHC also provides memory statistics:
54 |
55 | ```
56 | # Haskell's network package
57 | # cabal run redis-benchmark-base -- +RTS -N4 -s -H2G
58 | 3,751,313,096 bytes allocated in the heap
59 | 302,793,568 bytes copied during GC
60 | 1,869,864 bytes maximum residency (1044 sample(s))
61 | 490,016 bytes maximum slop
62 | 2085 MiB total memory in use (0 MB lost due to fragmentation)
63 |
64 | Tot time (elapsed) Avg pause Max pause
65 | Gen 0 2085 colls, 2085 par 0.455s 0.131s 0.0001s 0.0085s
66 | Gen 1 1044 colls, 1043 par 0.419s 0.149s 0.0001s 0.0226s
67 |
68 | Parallel GC work balance: 82.10% (serial 0%, perfect 100%)
69 |
70 | TASKS: 10 (1 bound, 9 peak workers (9 total), using -N4)
71 |
72 | SPARKS: 0 (0 converted, 0 overflowed, 0 dud, 0 GC'd, 0 fizzled)
73 |
74 | INIT time 0.001s ( 0.001s elapsed)
75 | MUT time 5.360s ( 5.842s elapsed)
76 | GC time 0.874s ( 0.280s elapsed)
77 | EXIT time 0.001s ( 0.008s elapsed)
78 | Total time 6.236s ( 6.130s elapsed)
79 |
80 | Alloc rate 699,915,737 bytes per MUT second
81 |
82 | Productivity 85.9% of total user, 95.3% of total elapsed
83 |
84 | # Z-IO from Z.Haskell
85 | # cabal run redis-benchmark-z -- +RTS -N4 -s -H2G
86 | 280,828,448 bytes allocated in the heap
87 | 835,688 bytes copied during GC
88 | 3,375,112 bytes maximum residency (4 sample(s))
89 | 839,672 bytes maximum slop
90 | 2084 MiB total memory in use (0 MB lost due to fragmentation)
91 |
92 | Tot time (elapsed) Avg pause Max pause
93 | Gen 0 5 colls, 5 par 0.009s 0.008s 0.0015s 0.0073s
94 | Gen 1 4 colls, 3 par 0.023s 0.021s 0.0052s 0.0194s
95 |
96 | Parallel GC work balance: 82.81% (serial 0%, perfect 100%)
97 |
98 | TASKS: 14 (1 bound, 13 peak workers (13 total), using -N4)
99 |
100 | SPARKS: 0 (0 converted, 0 overflowed, 0 dud, 0 GC'd, 0 fizzled)
101 |
102 | INIT time 0.001s ( 0.001s elapsed)
103 | MUT time 2.811s ( 6.757s elapsed)
104 | GC time 0.032s ( 0.028s elapsed)
105 | EXIT time 0.002s ( 0.004s elapsed)
106 | Total time 2.846s ( 6.790s elapsed)
107 |
108 | Alloc rate 99,903,441 bytes per MUT second
109 |
110 | Productivity 98.8% of total user, 99.5% of total elapsed
111 | ```
112 |
--------------------------------------------------------------------------------
/blog.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | title: Blog
4 | ---
5 |
6 |