├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── boris-git.toml
├── boris.toml
├── cabal.project
├── doc
    ├── bit-packing.png
    ├── frame-of-reference.png
    ├── layout1-data.png
    ├── layout1-types.png
    ├── layout2-data.png
    ├── layout2-types.png
    ├── layout3-data.png
    ├── layout3-types.png
    ├── layout4-data.png
    ├── layout4-types.png
    ├── row-vs-column.png
    ├── zebra-v2-plan.md
    ├── zebra.jpg
    └── zig-zag-encoding.png
├── framework
    ├── ghci
    └── mafia
├── zebra-cli
    ├── .ghci
    ├── ambiata-zebra-cli.cabal
    ├── ambiata-zebra-cli.lock-7.10.2
    ├── ambiata-zebra-cli.lock-8.0.1
    ├── mafia
    ├── main
    │   └── zebra.hs
    ├── master.toml
    ├── src
    │   └── Zebra
    │   │   ├── Command.hs
    │   │   └── Command
    │   │       ├── Adapt.hs
    │   │       ├── Consistency.hs
    │   │       ├── Export.hs
    │   │       ├── Import.hs
    │   │       ├── Merge.hs
    │   │       ├── Summary.hs
    │   │       └── Util.hs
    └── test
    │   ├── cli
    │       ├── core
    │       │   └── .keep
    │       ├── import
    │       │   ├── run
    │       │   ├── t01-array
    │       │   │   ├── expected
    │       │   │   ├── input.zschema
    │       │   │   ├── input.ztxt
    │       │   │   └── script
    │       │   ├── t02-map
    │       │   │   ├── expected
    │       │   │   ├── input.zschema
    │       │   │   ├── input.ztxt
    │       │   │   └── script
    │       │   ├── t03-binary
    │       │   │   ├── expected
    │       │   │   ├── input.zschema
    │       │   │   ├── input.ztxt
    │       │   │   └── script
    │       │   └── t04-time
    │       │   │   ├── expected
    │       │   │   ├── input.zschema
    │       │   │   ├── input.ztxt
    │       │   │   └── script
    │       └── merge
    │       │   ├── run
    │       │   ├── t01-same
    │       │       ├── expected
    │       │       ├── input.zschema
    │       │       ├── input0.ztxt
    │       │       ├── input1.ztxt
    │       │       └── script
    │       │   └── t02-different
    │       │       ├── expected
    │       │       ├── input0.zschema
    │       │       ├── input0.ztxt
    │       │       ├── input1.zschema
    │       │       ├── input1.ztxt
    │       │       └── script
    │   ├── test-cli.hs
    │   ├── test-io.hs
    │   └── test.hs
└── zebra-core
    ├── .ghci
    ├── ambiata-zebra-core.cabal
    ├── ambiata-zebra-core.lock-7.10.2
    ├── ambiata-zebra-core.lock-8.0.1
    ├── ambiata-zebra-core.mk
    ├── csrc
        ├── zebra_append.c
        ├── zebra_append.h
        ├── zebra_bindings.h
        ├── zebra_block_split.c
        ├── zebra_block_split.h
        ├── zebra_clone.c
        ├── zebra_clone.h
        ├── zebra_data.h
        ├── zebra_debug.h
        ├── zebra_grow.c
        ├── zebra_grow.h
        ├── zebra_hash.h
        ├── zebra_merge.c
        ├── zebra_merge.h
        ├── zebra_merge_many.c
        ├── zebra_merge_many.h
        ├── zebra_unpack.c
        └── zebra_unpack.h
    ├── mafia
    ├── master.toml
    ├── src
        └── Zebra
        │   ├── Factset
        │       ├── Block.hs
        │       ├── Block
        │       │   ├── Block.hs
        │       │   ├── Entity.hs
        │       │   └── Index.hs
        │       ├── Data.hs
        │       ├── Entity.hs
        │       ├── Fact.hs
        │       └── Table.hs
        │   ├── Foreign
        │       ├── Bindings.hsc
        │       ├── Block.hs
        │       ├── Entity.hs
        │       ├── Merge.hs
        │       ├── Serial.hs
        │       ├── Table.hs
        │       └── Util.hs
        │   ├── Merge
        │       ├── Base.hs
        │       ├── Block.hs
        │       ├── BlockC.hs
        │       ├── Entity.hs
        │       ├── Puller
        │       │   ├── File.hs
        │       │   └── List.hs
        │       └── Table.hs
        │   ├── Serial
        │       ├── Binary.hs
        │       ├── Binary
        │       │   ├── Array.hs
        │       │   ├── Block.hs
        │       │   ├── Data.hs
        │       │   ├── File.hs
        │       │   ├── Header.hs
        │       │   ├── Logical.hs
        │       │   ├── Striped.hs
        │       │   └── Table.hs
        │       ├── Json.hs
        │       ├── Json
        │       │   ├── Logical.hs
        │       │   ├── Schema.hs
        │       │   ├── Striped.hs
        │       │   └── Util.hs
        │       ├── Text.hs
        │       └── Text
        │       │   ├── Logical.hs
        │       │   ├── Schema.hs
        │       │   └── Striped.hs
        │   ├── Table
        │       ├── Data.hs
        │       ├── Encoding.hs
        │       ├── Logical.hs
        │       ├── Schema.hs
        │       └── Striped.hs
        │   ├── Time.hs
        │   └── X
        │       ├── Either.hs
        │       └── Vector
        │           ├── Generic.hs
        │           ├── Segment.hs
        │           └── Storable.hs
    └── test
        ├── Test
            └── Zebra
            │   ├── Factset
            │       ├── Block.hs
            │       └── Data.hs
            │   ├── Foreign
            │       ├── Block.hs
            │       ├── Entity.hs
            │       ├── Merge.hs
            │       └── Table.hs
            │   ├── Jack.hs
            │   ├── Merge
            │       ├── Entity.hs
            │       └── Table.hs
            │   ├── Serial
            │       ├── Binary
            │       │   ├── Array.hs
            │       │   ├── Block.hs
            │       │   ├── File.hs
            │       │   ├── Header.hs
            │       │   ├── Logical.hs
            │       │   ├── Striped.hs
            │       │   └── Table.hs
            │       ├── Json
            │       │   ├── Logical.hs
            │       │   ├── Schema.hs
            │       │   ├── Striped.hs
            │       │   └── Util.hs
            │       └── Text
            │       │   ├── Logical.hs
            │       │   ├── Schema.hs
            │       │   └── Striped.hs
            │   ├── Table
            │       ├── Logical.hs
            │       ├── Schema.hs
            │       └── Striped.hs
            │   ├── Time.hs
            │   └── Util.hs
        ├── ambiata-zebra-core-test.cabal
        └── test.hs


/.gitignore:
--------------------------------------------------------------------------------
 1 | TAGS
 2 | dist
 3 | cabal-dev
 4 | *.o
 5 | *.hi
 6 | *.chi
 7 | *.chs.h
 8 | *.gch
 9 | .virthualenv
10 | .cabal-sandbox
11 | cabal.sandbox.config
12 | tmp
13 | gen
14 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "lib/p"]
 2 | 	path = lib/p
 3 | 	url = git@github.com:HuwCampbell/p
 4 | [submodule "lib/disorder"]
 5 | 	path = lib/disorder
 6 | 	url = git@github.com:HuwCampbell/disorder.hs
 7 | [submodule "lib/x"]
 8 | 	path = lib/x
 9 | 	url = git@github.com:HuwCampbell/x
10 | [submodule "lib/anemone"]
11 | 	path = lib/anemone
12 | 	url = git@github.com:HuwCampbell/anemone
13 | [submodule "lib/snapper"]
14 | 	path = lib/snapper
15 | 	url = git@github.com:HuwCampbell/snapper
16 | [submodule "lib/viking"]
17 | 	path = lib/viking
18 | 	url = git@github.com:HuwCampbell/viking
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2017, Ambiata, All Rights Reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |   1. Redistributions of source code must retain the above copyright
 8 |      notice, this list of conditions and the following disclaimer.
 9 | 
10 |   2. Redistributions in binary form must reproduce the above copyright
11 |      notice, this list of conditions and the following disclaimer in the
12 |      documentation and/or other materials provided with the distribution.
13 | 
14 |   3. Neither the name of the copyright holder nor the names of
15 |      its contributors may be used to endorse or promote products derived
16 |      from this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/boris-git.toml:
--------------------------------------------------------------------------------
 1 | [boris]
 2 |   version = 1
 3 | 
 4 | [build.dist-*]
 5 |   git = "refs/heads/master"
 6 | 
 7 | [build.branches-*]
 8 |   git = "refs/heads/topic/*"
 9 | 
10 | [build.all-*]
11 |   git = "refs/heads/**"
12 | 


--------------------------------------------------------------------------------
/boris.toml:
--------------------------------------------------------------------------------
 1 | [boris]
 2 |   version = 1
 3 | 
 4 | [build.dist-7-10-core]
 5 |   command = [["master", "build", "dist-7-10", "-C", "zebra-core"]]
 6 | [build.dist-7-10-cli]
 7 |   command = [["master", "build", "dist-7-10", "-C", "zebra-cli"]]
 8 | 
 9 | [build.dist-8-0-core]
10 |   command = [["master", "build", "dist-8-0", "-C", "zebra-core"]]
11 | [build.dist-8-0-cli]
12 |   command = [["master", "build", "dist-8-0", "-C", "zebra-cli"]]
13 | 
14 | [build.dist-validate]
15 |   command = [["validate-respect"]]
16 | 
17 | [build.branches-7-10-core]
18 |   command = [["master", "build", "branches-7-10", "-C", "zebra-core"]]
19 | [build.branches-7-10-cli]
20 |   command = [["master", "build", "branches-7-10", "-C", "zebra-cli"]]
21 | 
22 | [build.branches-8-0-core]
23 |   command = [["master", "build", "branches-8-0", "-C", "zebra-core"]]
24 | [build.branches-8-0-cli]
25 |   command = [["master", "build", "branches-8-0", "-C", "zebra-cli"]]
26 | 
27 | [build.all-rebased]
28 |   command = [["rebased"]]
29 | 
30 | [build.all-submodules]
31 |   command = [["dangling-refs"]]
32 | 


--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
 1 | packages:
 2 |           zebra-cli/
 3 |           zebra-core/
 4 |           zebra-core/test/
 5 |           lib/anemone/
 6 |           lib/disorder/disorder-core
 7 |           lib/disorder/disorder-corpus
 8 |           lib/disorder/disorder-jack
 9 |           lib/viking/
10 |           lib/snapper/
11 |           lib/x/x-show/
12 |           lib/x/x-bytestring/
13 |           lib/x/x-optparse/
14 |           lib/x/x-vector/
15 |           lib/p/
16 | 
17 | constraints:
18 |   cabal > 2
19 | 
20 | 


--------------------------------------------------------------------------------
/doc/bit-packing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/bit-packing.png


--------------------------------------------------------------------------------
/doc/frame-of-reference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/frame-of-reference.png


--------------------------------------------------------------------------------
/doc/layout1-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/layout1-data.png


--------------------------------------------------------------------------------
/doc/layout1-types.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/layout1-types.png


--------------------------------------------------------------------------------
/doc/layout2-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/layout2-data.png


--------------------------------------------------------------------------------
/doc/layout2-types.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/layout2-types.png


--------------------------------------------------------------------------------
/doc/layout3-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/layout3-data.png


--------------------------------------------------------------------------------
/doc/layout3-types.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/layout3-types.png


--------------------------------------------------------------------------------
/doc/layout4-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/layout4-data.png


--------------------------------------------------------------------------------
/doc/layout4-types.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/layout4-types.png


--------------------------------------------------------------------------------
/doc/row-vs-column.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/row-vs-column.png


--------------------------------------------------------------------------------
/doc/zebra-v2-plan.md:
--------------------------------------------------------------------------------
 1 | Zebra v2 plan
 2 | -------------
 3 | 
 4 | Zebra is currently specialised to icicle/blizzard but is pretty complete when
 5 | it comes to a compressed binary data store for the sort of data that Ambiata
 6 | wants to store.
 7 | 
 8 | Ambiata also has a need for an encrypted, compressed binary data store and
 9 | rather than build that from scratch it has been decided that we should at least
10 | explore the possibility of adapting as much code as possible from what is now
11 | Zebra.
12 | 
13 | The current Zebra has some features and capabilities that are Icicle/Blizzard
14 | specific and not needed in the new format. Current Zebra also has some
15 | limitations/omissions with regards to what is needed for the new format.
16 | However, there is a large amount of code currently in Zebra that would be useful
17 | for the new format.
18 | 
19 | The plan is to convert the current zebra into a multiple repo project which
20 | will contain code common to new and old formats, another with the existing
21 | Zebra format and a third with the new format. The aim of doing it this way
22 | is:
23 | 
24 | * Minimal disruption to current work on Zebra/Icicle/Blizzard.
25 | * Reuse of as much existing code as possible.
26 | * Reduction in development time for the new format.
27 | * Possibility of switching Icicle to the new format at some time.
28 | 
29 | 
30 | The work-in-progress spec for the new format is as follows:
31 | 
32 | Properties on disk:
33 | 
34 | * Encrypted (optional?, maybe AES CTR)
35 | * Random access.
36 | * Compressed.
37 | * Stored column major to exploit redundancy.
38 | * Types (primitives and lists of primitives).
39 | * Correctness
40 |   * Sort keys (validated) stored with the data.
41 |   * Integrity checks
42 | * No need for modify in place.
43 | 
44 | 
45 | Operational properties:
46 | 
47 | * Ability to introspect on file (baked in schema).
48 | * Fast stats like count.
49 | 
50 | Run time properties:
51 | 
52 | * Performance
53 | * Throughput.
54 | * Target pulling out bigger chunks rather than single row.
55 | 
56 | Required tooling:
57 | 
58 | * Library
59 | * Cli
60 | * Tool to convert to/from Walrus.
61 | 
62 | 
63 | Comments on Zebra and how it measures up in comparison to desired storage format:
64 | 
65 | * Zebra doesn't do encyption.
66 | * Zebra does do compression (uses Snappy to compress arrays of bytes (strings),
67 |   and a combination of delta encoding, zig-zag enconding and BP64 to compress
68 |   integers (as described in Lemire12)).
69 | * Zebra uses column major storage to exploit redundancy.
70 | * Zebra supports primitives, arbitrarily nested product types and the
71 |   maybe/option type. There is a plan for how to encode full sum types rather
72 |   than making option a special case.
73 | * Zebra is currently specialised to be sorted by entity/attribute/time/priority
74 |   which are baked in concepts at the moment, but there are plans to generalise
75 |   this to arbitrary schemas which can be sorted by any set of fields.
76 | * Zebra files are immutable.
77 | * Currently, the Zebra schema/meta-data isn't fully self-describing of the
78 |   original input.
79 | * Zebra facts allows for dumping the file as human readable facts, but the
80 |   schema in the header is not enough to be able to do this in a roundtrippable
81 |   way.
82 | * Zebra provides a number of human readable summaries not limited to
83 |       ```
84 |       zebra cat --summary
85 |       zebra cat --entity-details
86 |       zebra facts
87 |       ```
88 | 
89 | 
90 | Things to research and possibly steal ideas from:
91 | * Google's [column oriented data store](https://research.google.com/pubs/pub36632.html).
92 | * [HDF5](https://support.hdfgroup.org/HDF5/) ([in Haskell](https://github.com/mokus0/hs-hdf5)).
93 | * [NetCDF](]http://www.unidata.ucar.edu/software/netcdf/).
94 | 


--------------------------------------------------------------------------------
/doc/zebra.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/zebra.jpg


--------------------------------------------------------------------------------
/doc/zig-zag-encoding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/doc/zig-zag-encoding.png


--------------------------------------------------------------------------------
/framework/ghci:
--------------------------------------------------------------------------------
1 | :set prompt "λ> "
2 | :set -Wall
3 | :set -XOverloadedStrings
4 | :set -XScopedTypeVariables
5 | 


--------------------------------------------------------------------------------
/framework/mafia:
--------------------------------------------------------------------------------
 1 | #!/bin/sh -eu
 2 | 
 3 | : ${MAFIA_HOME:=$HOME/.mafia}
 4 | : ${MAFIA_VERSIONS:=$MAFIA_HOME/versions}
 5 | 
 6 | latest_version () {
 7 |     git ls-remote https://github.com/ambiata/mafia | grep refs/heads/master | cut -f 1
 8 | }
 9 | 
10 | build_version() {
11 |     MAFIA_VERSION="$1"
12 |     MAFIA_TEMP=$(mktemp -d 2>/dev/null || mktemp -d -t 'exec_mafia')
13 |     MAFIA_FILE=mafia-$MAFIA_VERSION
14 |     MAFIA_PATH=$MAFIA_VERSIONS/$MAFIA_FILE
15 |     mkdir -p $MAFIA_VERSIONS
16 |     echo "Building $MAFIA_FILE in $MAFIA_TEMP"
17 |     git clone https://github.com/ambiata/mafia $MAFIA_TEMP
18 |     git --git-dir="$MAFIA_TEMP/.git" --work-tree="$MAFIA_TEMP" reset --hard $MAFIA_VERSION || {
19 |         echo "mafia version ($MAFIA_VERSION) could not be found." >&2
20 |         exit 1
21 |     }
22 |     (cd "$MAFIA_TEMP" && ./bin/bootstrap) || {
23 |         got=$?
24 |         echo "mafia version ($MAFIA_VERSION) could not be built." >&2
25 |         exit "$got"
26 |     }
27 |     chmod +x "$MAFIA_TEMP/.cabal-sandbox/bin/mafia"
28 |     # Ensure executable is on same file-system so final mv is atomic.
29 |     mv -f "$MAFIA_TEMP/.cabal-sandbox/bin/mafia" "$MAFIA_PATH.$$"
30 |     mv "$MAFIA_PATH.$$" "$MAFIA_PATH" || {
31 |         rm -f "$MAFIA_PATH.$$"
32 |         echo "INFO: mafia version ($MAFIA_VERSION) already exists not overiding," >&2
33 |         echo "INFO: this is expected if parallel builds of the same version of" >&2
34 |         echo "INFO: mafia occur, we are playing by first in, wins." >&2
35 |         exit 0
36 |     }
37 | }
38 | 
39 | enable_version() {
40 |     if [ $# -eq 0 ]; then
41 |         MAFIA_VERSION="$(latest_version)"
42 |         echo "INFO: No explicit mafia version requested installing latest ($MAFIA_VERSION)." >&2
43 |     else
44 |         MAFIA_VERSION="$1"
45 |     fi
46 |     [ -x "$MAFIA_HOME/versions/mafia-$MAFIA_VERSION" ] || build_version "$MAFIA_VERSION"
47 |     ln -sf "$MAFIA_HOME/versions/mafia-$MAFIA_VERSION" "$MAFIA_HOME/versions/mafia"
48 | }
49 | 
50 | exec_mafia () {
51 |     [ -x "$MAFIA_HOME/versions/mafia" ] || enable_version
52 |     "$MAFIA_HOME/versions/mafia" "$@"
53 | }
54 | 
55 | #
56 | # The actual start of the script.....
57 | #
58 | 
59 | case "${1:-}" in
60 | upgrade) shift; enable_version "$@" ;;
61 | *) exec_mafia "$@"
62 | esac
63 | 


--------------------------------------------------------------------------------
/zebra-cli/.ghci:
--------------------------------------------------------------------------------
1 | ../framework/ghci


--------------------------------------------------------------------------------
/zebra-cli/ambiata-zebra-cli.cabal:
--------------------------------------------------------------------------------
  1 | name:                  ambiata-zebra-cli
  2 | version:               0.0.1
  3 | license:               BSD3
  4 | author:                Ambiata <info@ambiata.com>
  5 | maintainer:            Ambiata <info@ambiata.com>
  6 | copyright:             (c) 2015 Ambiata.
  7 | synopsis:              zebra
  8 | category:              System
  9 | cabal-version:         >= 1.8
 10 | build-type:            Simple
 11 | description:           zebra
 12 | 
 13 | library
 14 |   build-depends:
 15 |                       base                            >= 3          && < 5
 16 |                     , ambiata-anemone
 17 |                     , ambiata-p
 18 |                     , ambiata-viking
 19 |                     , ambiata-x-vector
 20 |                     , ambiata-zebra-core
 21 |                     , binary                          >= 0.7.2      && < 0.9
 22 |                     , bytestring                      >= 0.10
 23 |                     , containers                      == 0.5.*
 24 |                     , exceptions                      >= 0.8
 25 |                     , mmorph                          >= 1.0
 26 |                     , pretty-show                     == 1.6.*
 27 |                     , resourcet                       >= 1.1
 28 |                     , semigroups                      >= 0.18
 29 |                     , text                            == 1.2.*
 30 |                     , transformers                    == 0.5.*
 31 |                     , transformers-either
 32 |                     , concurrent-output               >= 1.6        && < 1.11
 33 |                     , vector                          >= 0.11       && < 0.13
 34 | 
 35 |   ghc-options:
 36 |                     -Wall
 37 | 
 38 |   hs-source-dirs:
 39 |                     src
 40 | 
 41 |   exposed-modules:
 42 |                     Zebra.Command
 43 |                     Zebra.Command.Adapt
 44 |                     Zebra.Command.Consistency
 45 |                     Zebra.Command.Export
 46 |                     Zebra.Command.Import
 47 |                     Zebra.Command.Merge
 48 |                     Zebra.Command.Summary
 49 |                     Zebra.Command.Util
 50 | 
 51 | 
 52 | executable zebra
 53 |   if impl(ghc >= 8.0)
 54 |     ghc-options:
 55 |                     -Wall -threaded -O2 -rtsopts "-with-rtsopts=-A128m -n4m"
 56 |   else
 57 |     ghc-options:
 58 |                     -Wall -threaded -O2 -rtsopts "-with-rtsopts=-A128m -n4m -qg"
 59 | 
 60 |   hs-source-dirs:
 61 |                     main
 62 | 
 63 |   main-is:
 64 |                     zebra.hs
 65 | 
 66 |   build-depends:
 67 |                       base
 68 |                     , ambiata-anemone
 69 |                     , ambiata-p
 70 |                     , ambiata-x-optparse
 71 |                     , ambiata-x-vector
 72 |                     , ambiata-zebra-cli
 73 |                     , ambiata-zebra-core
 74 |                     , binary
 75 |                     , bytestring
 76 |                     , containers
 77 |                     , mmorph
 78 |                     , optparse-applicative
 79 |                     , pretty-show
 80 |                     , resourcet
 81 |                     , semigroups
 82 |                     , text
 83 |                     , transformers
 84 |                     , transformers-either
 85 | 
 86 |   other-modules:
 87 |                     Paths_ambiata_zebra_cli
 88 | 
 89 | test-suite test
 90 |   type:
 91 |                     exitcode-stdio-1.0
 92 | 
 93 |   main-is:
 94 |                     test.hs
 95 | 
 96 |   ghc-options:
 97 |                     -Wall -threaded -O2
 98 | 
 99 |   hs-source-dirs:
100 |                     test
101 | 
102 |   build-depends:
103 |                       base
104 |                     , ambiata-disorder-core
105 |                     , ambiata-disorder-corpus
106 |                     , ambiata-disorder-jack
107 |                     , ambiata-p
108 | 
109 | test-suite test-io
110 |   type:
111 |                     exitcode-stdio-1.0
112 | 
113 |   main-is:
114 |                     test-io.hs
115 | 
116 |   ghc-options:
117 |                     -Wall -threaded -O2
118 | 
119 |   hs-source-dirs:
120 |                     test
121 | 
122 |   build-depends:
123 |                       base
124 |                     , ambiata-disorder-core
125 |                     , ambiata-disorder-corpus
126 |                     , ambiata-disorder-jack
127 |                     , ambiata-p
128 | 
129 | test-suite test-cli
130 |   type:
131 |                     exitcode-stdio-1.0
132 | 
133 |   main-is:
134 |                     test-cli.hs
135 | 
136 |   ghc-options:
137 |                     -Wall -threaded -O2
138 | 
139 |   hs-source-dirs:
140 |                     test
141 | 
142 |   build-depends:
143 |                       base
144 |                     , ambiata-disorder-core
145 | 


--------------------------------------------------------------------------------
/zebra-cli/ambiata-zebra-cli.lock-7.10.2:
--------------------------------------------------------------------------------
 1 | # mafia-lock-file-version: 0
 2 | adjunctions == 4.3
 3 | aeson == 1.1.1.0
 4 | aeson-pretty == 0.8.2
 5 | ansi-terminal == 0.6.2.3
 6 | ansi-wl-pprint == 0.6.7.3
 7 | attoparsec == 0.13.1.0
 8 | base-compat == 0.9.2
 9 | base-orphans == 0.5.4
10 | base64-bytestring == 1.0.0.1
11 | bifunctors == 5.3
12 | bindings-DSL == 1.0.23
13 | Boolean == 0.2.4
14 | cabal-doctest == 1
15 | cmdargs == 0.10.15
16 | comonad == 5
17 | contravariant == 1.4
18 | distributive == 0.5.2
19 | dlist == 0.8.0.2
20 | exceptions == 0.8.3
21 | fail == 4.9.0.0
22 | file-embed == 0.0.9.1
23 | free == 4.12.4
24 | hashable == 1.2.6.0
25 | haskell-lexer == 1.0.1
26 | ieee754 == 0.7.9
27 | integer-logarithms == 1.0.1
28 | kan-extensions == 5.0.1
29 | lens == 4.14
30 | lifted-base == 0.2.3.11
31 | MemoTrie == 0.6.7
32 | mmorph == 1.0.9
33 | monad-control == 1.0.2.0
34 | mtl == 2.2.1
35 | newtype-generics == 0.5
36 | NumInstances == 1.4
37 | old-locale == 1.0.0.7
38 | optparse-applicative == 0.12.1.0
39 | parallel == 3.2.1.0
40 | prelude-extras == 0.4.0.3
41 | pretty-show == 1.6.12
42 | primitive == 0.6.1.0
43 | profunctors == 5.2
44 | QuickCheck == 2.8.2
45 | quickcheck-text == 0.1.2.1
46 | random == 1.1
47 | reflection == 2.1.2
48 | resourcet == 1.1.9
49 | scientific == 0.3.4.10
50 | semigroupoids == 5.1
51 | semigroups == 0.18.2
52 | semigroups -bytestring-builder
53 | StateVar == 1.1.0.4
54 | stm == 2.4.4.1
55 | streaming == 0.1.4.5
56 | streaming-bytestring == 0.1.4.6
57 | tagged == 0.8.5
58 | text == 1.2.2.1
59 | tf-random == 0.5
60 | thyme == 0.3.5.5
61 | time-locale-compat == 0.1.1.3
62 | time-locale-compat -old-locale
63 | transformers == 0.5.4.0
64 | transformers-base == 0.4.4
65 | transformers-compat == 0.5.1.4
66 | unordered-containers == 0.2.8.0
67 | uuid-types == 1.0.3
68 | vector == 0.11.0.0
69 | vector-space == 0.10.4
70 | vector-th-unbox == 0.2.1.6
71 | void == 0.7.1
72 | 


--------------------------------------------------------------------------------
/zebra-cli/ambiata-zebra-cli.lock-8.0.1:
--------------------------------------------------------------------------------
 1 | # mafia-lock-file-version: 0
 2 | adjunctions == 4.3
 3 | aeson == 1.1.1.0
 4 | aeson-pretty == 0.8.2
 5 | ansi-terminal == 0.6.2.3
 6 | ansi-wl-pprint == 0.6.7.3
 7 | attoparsec == 0.13.1.0
 8 | base-compat == 0.9.2
 9 | base-orphans == 0.5.4
10 | base64-bytestring == 1.0.0.1
11 | bifunctors == 5.3
12 | bindings-DSL == 1.0.23
13 | Boolean == 0.2.4
14 | cabal-doctest == 1
15 | cmdargs == 0.10.15
16 | comonad == 5
17 | contravariant == 1.4
18 | distributive == 0.5.2
19 | dlist == 0.8.0.2
20 | exceptions == 0.8.3
21 | file-embed == 0.0.9.1
22 | free == 4.12.4
23 | hashable == 1.2.6.0
24 | haskell-lexer == 1.0.1
25 | ieee754 == 0.7.9
26 | integer-logarithms == 1.0.1
27 | kan-extensions == 5.0.1
28 | lens == 4.14
29 | lifted-base == 0.2.3.11
30 | MemoTrie == 0.6.7
31 | mmorph == 1.0.9
32 | monad-control == 1.0.2.0
33 | mtl == 2.2.1
34 | newtype-generics == 0.5
35 | NumInstances == 1.4
36 | old-locale == 1.0.0.7
37 | optparse-applicative == 0.12.1.0
38 | parallel == 3.2.1.0
39 | prelude-extras == 0.4.0.3
40 | pretty-show == 1.6.12
41 | primitive == 0.6.1.0
42 | profunctors == 5.2
43 | QuickCheck == 2.8.2
44 | quickcheck-text == 0.1.2.1
45 | random == 1.1
46 | reflection == 2.1.2
47 | resourcet == 1.1.9
48 | scientific == 0.3.4.10
49 | semigroupoids == 5.1
50 | semigroups == 0.18.2
51 | StateVar == 1.1.0.4
52 | stm == 2.4.4.1
53 | streaming == 0.1.4.5
54 | streaming-bytestring == 0.1.4.6
55 | tagged == 0.8.5
56 | text == 1.2.2.1
57 | tf-random == 0.5
58 | thyme == 0.3.5.5
59 | time-locale-compat == 0.1.1.3
60 | time-locale-compat -old-locale
61 | transformers-base == 0.4.4
62 | transformers-compat == 0.5.1.4
63 | unordered-containers == 0.2.8.0
64 | uuid-types == 1.0.3
65 | vector == 0.11.0.0
66 | vector-space == 0.10.4
67 | vector-th-unbox == 0.2.1.6
68 | void == 0.7.1
69 | 


--------------------------------------------------------------------------------
/zebra-cli/mafia:
--------------------------------------------------------------------------------
1 | ../framework/mafia


--------------------------------------------------------------------------------
/zebra-cli/master.toml:
--------------------------------------------------------------------------------
 1 | [master]
 2 |   runner = "s3://ambiata-dispensary-v2/dist/master/master-haskell/linux/x86_64/20170613033625-fddfe03/master-haskell-20170613033625-fddfe03"
 3 |   version = 1
 4 |   sha1 = "0a9e91a8c3373a402c5f2c4fc8d9aabb210c03b3"
 5 | 
 6 | [build.dist-7-10]
 7 |   GHC_VERSION = "7.10.2"
 8 |   CABAL_VERSION = "1.24.0.0"
 9 |   CACHE = "true"
10 |   PUBLISH = "true"
11 |   PUBLISH_S3 = "$AMBIATA_ARTEFACTS_MASTER"
12 |   PUBLISH_EXECUTABLES = "zebra"
13 | 
14 | [build.dist-8-0]
15 |   GHC_VERSION = "8.0.1"
16 |   CABAL_VERSION = "1.24.0.0"
17 |   CACHE = "true"
18 | 
19 | [build.branches-7-10]
20 |   GHC_VERSION = "7.10.2"
21 |   CABAL_VERSION = "1.24.0.0"
22 |   CACHE = "true"
23 |   PUBLISH = "true"
24 |   PUBLISH_S3 = "$AMBIATA_ARTEFACTS_BRANCHES"
25 |   PUBLISH_EXECUTABLES = "zebra"
26 | 
27 | [build.branches-8-0]
28 |   GHC_VERSION = "8.0.1"
29 |   CABAL_VERSION = "1.24.0.0"
30 |   CACHE = "true"
31 | 


--------------------------------------------------------------------------------
/zebra-cli/src/Zebra/Command/Adapt.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | {-# LANGUAGE LambdaCase #-}
 3 | {-# LANGUAGE NoImplicitPrelude #-}
 4 | {-# LANGUAGE OverloadedStrings #-}
 5 | {-# LANGUAGE ScopedTypeVariables #-}
 6 | module Zebra.Command.Adapt (
 7 |     Adapt(..)
 8 |   , zebraAdapt
 9 | 
10 |   , AdaptError(..)
11 |   , renderAdaptError
12 |   ) where
13 | 
14 | import           Control.Monad.Catch (MonadMask(..))
15 | import           Control.Monad.IO.Class (MonadIO(..))
16 | import           Control.Monad.Morph (hoist, squash, lift)
17 | import           Control.Monad.Trans.Resource (MonadResource, ResourceT)
18 | import           Control.Monad.Trans.Either (EitherT, hoistEither)
19 | 
20 | import qualified Data.ByteString as ByteString
21 | import qualified Data.Text as Text
22 | 
23 | import           P
24 | 
25 | import           System.IO (IO, FilePath)
26 | import           System.IO.Error (IOError)
27 | 
28 | import           Viking (Stream, Of(..))
29 | import qualified Viking.ByteStream as ByteStream
30 | import qualified Viking.Stream as Stream
31 | 
32 | 
33 | import           Zebra.Command.Util
34 | import           Zebra.Serial.Binary (BinaryStripedEncodeError, BinaryStripedDecodeError)
35 | import qualified Zebra.Serial.Binary as Binary
36 | import           Zebra.Serial.Text (TextSchemaDecodeError)
37 | import qualified Zebra.Serial.Text as Text
38 | import qualified Zebra.Table.Schema as Schema
39 | import           Zebra.Table.Striped (StripedError)
40 | import qualified Zebra.Table.Striped as Striped
41 | import           Zebra.X.Either
42 | 
43 | data Adapt =
44 |   Adapt {
45 |       adaptInput :: !FilePath
46 |     , adaptSchema :: !FilePath
47 |     , adaptOutput :: !(Maybe FilePath)
48 |     } deriving (Eq, Ord, Show)
49 | 
50 | data AdaptError =
51 |     AdaptIOError !IOError
52 |   | AdaptTextSchemaDecodeError !TextSchemaDecodeError
53 |   | AdaptBinaryStripedEncodeError !BinaryStripedEncodeError
54 |   | AdaptBinaryStripedDecodeError !BinaryStripedDecodeError
55 |   | AdaptStripedError !StripedError
56 |     deriving (Eq, Show)
57 | 
58 | renderAdaptError :: AdaptError -> Text
59 | renderAdaptError = \case
60 |   AdaptIOError err ->
61 |     Text.pack (show err)
62 |   AdaptTextSchemaDecodeError err ->
63 |     Text.renderTextSchemaDecodeError err
64 |   AdaptBinaryStripedEncodeError err ->
65 |     Binary.renderBinaryStripedEncodeError err
66 |   AdaptBinaryStripedDecodeError err ->
67 |     Binary.renderBinaryStripedDecodeError err
68 |   AdaptStripedError err ->
69 |     Striped.renderStripedError err
70 | 
71 | transmute :: Monad m => Schema.Table -> Stream (Of Striped.Table) m r -> Stream (Of Striped.Table) (EitherT StripedError m) r
72 | transmute schema =
73 |   Stream.mapM (hoistEither . Striped.transmute schema) .
74 |   hoist lift
75 | {-# INLINE transmute #-}
76 | 
77 | zebraAdapt :: forall m. (MonadResource m, MonadMask m) => Adapt -> EitherT AdaptError m ()
78 | zebraAdapt x = do
79 |   schema0 <- liftIO . ByteString.readFile $ adaptSchema x
80 |   schema <- firstT AdaptTextSchemaDecodeError . hoistEither $ Text.decodeSchema schema0
81 | 
82 |   squash . firstJoin AdaptIOError .
83 |       writeFileOrStdout (adaptOutput x) .
84 |     hoist (firstJoin AdaptBinaryStripedEncodeError) .
85 |       Binary.encodeStriped .
86 |     hoist (firstJoin AdaptStripedError) .
87 |       transmute schema .
88 |     hoist (firstJoin AdaptBinaryStripedDecodeError) .
89 |       Binary.decodeStriped .
90 |     hoist (firstT AdaptIOError) $
91 |       ByteStream.readFile (adaptInput x)
92 | {-# SPECIALIZE zebraAdapt :: Adapt -> EitherT AdaptError (ResourceT IO) () #-}
93 | 


--------------------------------------------------------------------------------
/zebra-cli/src/Zebra/Command/Consistency.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE DoAndIfThenElse #-}
  3 | {-# LANGUAGE LambdaCase #-}
  4 | {-# LANGUAGE NoImplicitPrelude #-}
  5 | {-# LANGUAGE OverloadedStrings #-}
  6 | {-# LANGUAGE PatternSynonyms #-}
  7 | {-# LANGUAGE ScopedTypeVariables #-}
  8 | module Zebra.Command.Consistency (
  9 |     Consistency(..)
 10 | 
 11 |   , zebraConsistency
 12 |   , renderConsistencyError
 13 |   ) where
 14 | 
 15 | import           Control.Monad.Catch (MonadMask)
 16 | import           Control.Monad.IO.Class (MonadIO(..))
 17 | import           Control.Monad.Morph (hoist)
 18 | import           Control.Monad.Trans.Resource (MonadResource, ResourceT)
 19 | import           Control.Monad.Trans.Either (EitherT, pattern EitherT, runEitherT, hoistEither)
 20 | 
 21 | import qualified Data.Text as Text
 22 | import qualified Data.Map as Map
 23 | 
 24 | import           P
 25 | 
 26 | import qualified System.Console.Regions as Concurrent
 27 | import           System.IO (IO, FilePath)
 28 | import           System.IO.Error (IOError)
 29 | 
 30 | import qualified Viking.ByteStream as ByteStream
 31 | import qualified Viking.Stream as Stream
 32 | 
 33 | 
 34 | import           Zebra.Command.Summary (zebraDisplay)
 35 | import           Zebra.Serial.Binary (BinaryStripedDecodeError)
 36 | import qualified Zebra.Serial.Binary as Binary
 37 | import qualified Zebra.Table.Logical as Logical
 38 | import           Zebra.Table.Striped (StripedError)
 39 | import qualified Zebra.Table.Striped as Striped
 40 | import           Zebra.X.Either
 41 | 
 42 | data Consistency =
 43 |   Consistency {
 44 |       consistencyInput :: !FilePath
 45 |     } deriving (Eq, Ord, Show)
 46 | 
 47 | data ConsistencyError =
 48 |     ConsistencyIOError !IOError
 49 |   | ConsistencyStripedDecodeError !BinaryStripedDecodeError
 50 |   | ConsistencyStripedError !StripedError
 51 |   | ConsistencyInterBlock Int Logical.Value Logical.Value
 52 |     deriving (Eq, Show)
 53 | 
 54 | data BlockRange =
 55 |   BlockRange {
 56 |       _blockMinKey :: Maybe (Logical.Value)
 57 |     , _blockMaxKey :: Maybe (Logical.Value)
 58 |     } deriving (Eq, Ord, Show)
 59 | 
 60 | renderConsistencyError :: ConsistencyError -> Text
 61 | renderConsistencyError = \case
 62 |   ConsistencyIOError err ->
 63 |     Text.pack (show err)
 64 |   ConsistencyStripedDecodeError err ->
 65 |     "Error decoding: " <> Binary.renderBinaryStripedDecodeError err
 66 |   ConsistencyStripedError err ->
 67 |     Striped.renderStripedError err
 68 |   ConsistencyInterBlock chunk keya keyb ->
 69 |     Text.unlines [
 70 |       "Consistency check failure:"
 71 |     , "Chunk " <> Text.pack (show chunk) <> " has max key:"
 72 |     , "  " <> Text.pack (show keya)
 73 |     , "while the following chunk starts with"
 74 |     , "  " <> Text.pack (show keyb)
 75 |     ]
 76 | 
 77 | summariseBlock :: Logical.Table -> BlockRange
 78 | summariseBlock = \case
 79 |   Logical.Binary _ -> BlockRange Nothing Nothing
 80 |   Logical.Array _ -> BlockRange Nothing Nothing
 81 |   Logical.Map m ->
 82 |     if Map.null m then
 83 |       BlockRange Nothing Nothing
 84 |     else
 85 |       let
 86 |         bmin = fst $ Map.findMin m
 87 |         bmax = fst $ Map.findMax m
 88 |       in
 89 |         BlockRange (Just bmin) (Just bmax)
 90 | 
 91 | zebraConsistency ::
 92 |      (MonadResource m, MonadMask m)
 93 |   => Consistency
 94 |   -> EitherT ConsistencyError m ()
 95 | zebraConsistency x =
 96 |   EitherT . Concurrent.displayConsoleRegions . runEitherT $ do
 97 |     region <- liftIO $ Concurrent.openConsoleRegion Concurrent.Linear
 98 | 
 99 |     let
100 |       tables =
101 |         Stream.store (zebraDisplay region) .
102 |         hoist (firstJoin ConsistencyStripedDecodeError) .
103 |           Binary.decodeStriped .
104 |         hoist (firstT ConsistencyIOError) $
105 |           ByteStream.readFile (consistencyInput x)
106 | 
107 |       fromStriped =
108 |         Stream.mapM (hoistEither . first ConsistencyStripedError . Striped.toLogical)
109 | 
110 |       blockRanges =
111 |         Stream.map summariseBlock $ fromStriped tables
112 | 
113 |       loop mseen (BlockRange mbmin mbmax) =
114 |         case (mseen, mbmin) of
115 |           ((chunk, Just seen), Just bmin) ->
116 |             case compare seen bmin of
117 |               LT -> pure (chunk + 1, mbmax)
118 |               _  -> hoistEither (Left $ ConsistencyInterBlock chunk seen bmin)
119 |           ((chunk, _), _) ->
120 |             pure (chunk + 1, mbmax)
121 | 
122 |     _ <- Stream.foldM_ loop (pure (0, Nothing)) pure blockRanges
123 |     pure ()
124 | 
125 | {-# SPECIALIZE zebraConsistency :: Consistency -> EitherT ConsistencyError (ResourceT IO) () #-}
126 | 


--------------------------------------------------------------------------------
/zebra-cli/src/Zebra/Command/Export.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE LambdaCase #-}
  2 | {-# LANGUAGE NoImplicitPrelude #-}
  3 | {-# LANGUAGE OverloadedStrings #-}
  4 | {-# LANGUAGE PatternSynonyms #-}
  5 | {-# LANGUAGE ScopedTypeVariables #-}
  6 | module Zebra.Command.Export (
  7 |     Export(..)
  8 |   , ExportOutput(..)
  9 |   , zebraExport
 10 | 
 11 |   , ExportError(..)
 12 |   , renderExportError
 13 |   ) where
 14 | 
 15 | import           Control.Monad.Catch (MonadCatch(..))
 16 | import           Control.Monad.IO.Class (MonadIO(..))
 17 | import           Control.Monad.Morph (hoist)
 18 | import           Control.Monad.Trans.Resource (MonadResource, ResourceT)
 19 | import           Control.Monad.Trans.Either (EitherT)
 20 | 
 21 | import qualified Data.ByteString as ByteString
 22 | import           Data.List.NonEmpty (NonEmpty)
 23 | import qualified Data.Text as Text
 24 | 
 25 | import           P
 26 | 
 27 | import           System.IO (IO, FilePath, stdout)
 28 | import           System.IO.Error (IOError)
 29 | 
 30 | import           Viking (ByteStream)
 31 | import qualified Viking.ByteStream as ByteStream
 32 | 
 33 | 
 34 | import           Zebra.Serial.Binary (BinaryLogicalDecodeError)
 35 | import qualified Zebra.Serial.Binary as Binary
 36 | import           Zebra.Serial.Text (TextLogicalEncodeError)
 37 | import qualified Zebra.Serial.Text as Text
 38 | import qualified Zebra.Table.Schema as Schema
 39 | import           Zebra.X.Either
 40 | 
 41 | 
 42 | data Export =
 43 |   Export {
 44 |       exportInput :: !FilePath
 45 |     , exportOutputs :: !(NonEmpty ExportOutput)
 46 |     } deriving (Eq, Ord, Show)
 47 | 
 48 | data ExportOutput =
 49 |     ExportTextStdout
 50 |   | ExportText !FilePath
 51 |   | ExportSchemaStdout
 52 |   | ExportSchema !FilePath
 53 |     deriving (Eq, Ord, Show)
 54 | 
 55 | data ExportError =
 56 |     ExportIOError !IOError
 57 |   | ExportBinaryLogicalDecodeError !BinaryLogicalDecodeError
 58 |   | ExportTextLogicalEncodeError !TextLogicalEncodeError
 59 |     deriving (Eq, Show)
 60 | 
 61 | renderExportError :: ExportError -> Text
 62 | renderExportError = \case
 63 |   ExportIOError err ->
 64 |     Text.pack (show err)
 65 |   ExportBinaryLogicalDecodeError err ->
 66 |     Binary.renderBinaryLogicalDecodeError err
 67 |   ExportTextLogicalEncodeError err ->
 68 |     Text.renderTextLogicalEncodeError err
 69 | 
 70 | takeSchemaOutputs :: [ExportOutput] -> [Maybe FilePath]
 71 | takeSchemaOutputs =
 72 |   mapMaybe $ \case
 73 |     ExportSchemaStdout ->
 74 |       Just Nothing
 75 |     ExportSchema x ->
 76 |       Just (Just x)
 77 |     _ ->
 78 |       Nothing
 79 | 
 80 | takeTextOutputs :: [ExportOutput] -> [Maybe FilePath]
 81 | takeTextOutputs =
 82 |   mapMaybe $ \case
 83 |     ExportTextStdout ->
 84 |       Just Nothing
 85 |     ExportText x ->
 86 |       Just (Just x)
 87 |     _ ->
 88 |       Nothing
 89 | 
 90 | writeSchema :: (MonadIO m, MonadCatch m) => Schema.Table -> Maybe FilePath -> EitherT ExportError m ()
 91 | writeSchema schema = \case
 92 |   Nothing ->
 93 |     tryEitherT ExportIOError . liftIO $
 94 |       ByteString.hPut stdout (Text.encodeSchema schema)
 95 | 
 96 |   Just path ->
 97 |     tryEitherT ExportIOError . liftIO $
 98 |       ByteString.writeFile path (Text.encodeSchema schema)
 99 | 
100 | writeText :: (MonadCatch m, MonadResource m) => [Maybe FilePath] -> ByteStream (EitherT ExportError m) r -> ByteStream (EitherT ExportError m) r
101 | writeText xs0 tables =
102 |   case xs0 of
103 |     [] ->
104 |       tables
105 | 
106 |     Nothing : xs ->
107 |       writeText xs . hoist (firstJoin ExportIOError) . ByteStream.injectEitherT $
108 |         ByteStream.hPut stdout (ByteStream.copy tables)
109 | 
110 |     Just path : xs -> do
111 |       writeText xs . hoist (firstJoin ExportIOError) . ByteStream.injectEitherT $
112 |         ByteStream.writeFile path (ByteStream.copy tables)
113 | 
114 | zebraExport :: forall m. (MonadResource m, MonadCatch m) => Export -> EitherT ExportError m ()
115 | zebraExport export = do
116 |   (schema, tables0) <-
117 |     firstJoin ExportBinaryLogicalDecodeError .
118 |       Binary.decodeLogical .
119 |     hoist (firstT ExportIOError) $
120 |       ByteStream.readFile (exportInput export)
121 | 
122 |   let
123 |     outputs =
124 |       toList $ exportOutputs export
125 | 
126 |   traverse_ (writeSchema schema) $ takeSchemaOutputs outputs
127 | 
128 |   case takeTextOutputs outputs of
129 |     [] ->
130 |       pure ()
131 | 
132 |     xs ->
133 |       let
134 |         tables1 :: ByteStream (EitherT ExportError m) ()
135 |         tables1 =
136 |           hoist (firstJoin ExportTextLogicalEncodeError) .
137 |             Text.encodeLogical schema .
138 |           hoist (firstJoin ExportBinaryLogicalDecodeError) $
139 |             tables0
140 |       in
141 |         ByteStream.effects $
142 |           writeText xs tables1
143 | {-# SPECIALIZE zebraExport :: Export -> EitherT ExportError (ResourceT IO) () #-}
144 | 


--------------------------------------------------------------------------------
/zebra-cli/src/Zebra/Command/Import.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DoAndIfThenElse #-}
 2 | {-# LANGUAGE LambdaCase #-}
 3 | {-# LANGUAGE NoImplicitPrelude #-}
 4 | {-# LANGUAGE OverloadedStrings #-}
 5 | {-# LANGUAGE PatternSynonyms #-}
 6 | module Zebra.Command.Import (
 7 |     Import(..)
 8 |   , zebraImport
 9 | 
10 |   , ImportError(..)
11 |   , renderImportError
12 |   ) where
13 | 
14 | import           Control.Monad.Catch (MonadCatch)
15 | import           Control.Monad.IO.Class (MonadIO(..))
16 | import           Control.Monad.Morph (hoist, squash)
17 | import           Control.Monad.Trans.Resource (MonadResource, ResourceT)
18 | import           Control.Monad.Trans.Either (EitherT, hoistEither)
19 | 
20 | import qualified Data.ByteString as ByteString
21 | import qualified Data.Text as Text
22 | 
23 | import           P
24 | 
25 | import           System.IO (IO, FilePath)
26 | import           System.IO.Error (IOError)
27 | 
28 | import qualified Viking.ByteStream as ByteStream
29 | 
30 | 
31 | import           Zebra.Command.Util
32 | import           Zebra.Serial.Binary (BinaryStripedEncodeError)
33 | import qualified Zebra.Serial.Binary as Binary
34 | import           Zebra.Serial.Text (TextSchemaDecodeError, TextStripedDecodeError)
35 | import qualified Zebra.Serial.Text as Text
36 | import           Zebra.X.Either
37 | 
38 | data Import =
39 |   Import {
40 |       importInput :: !FilePath
41 |     , importSchema :: !FilePath
42 |     , importOutput :: !(Maybe FilePath)
43 |     } deriving (Eq, Ord, Show)
44 | 
45 | data ImportError =
46 |     ImportIOError !IOError
47 |   | ImportTextSchemaDecodeError !TextSchemaDecodeError
48 |   | ImportTextStripedDecodeError !TextStripedDecodeError
49 |   | ImportBinaryStripedEncodeError !BinaryStripedEncodeError
50 |     deriving (Eq, Show)
51 | 
52 | renderImportError :: ImportError -> Text
53 | renderImportError = \case
54 |   ImportIOError err ->
55 |     Text.pack (show err)
56 |   ImportTextSchemaDecodeError err ->
57 |     Text.renderTextSchemaDecodeError err
58 |   ImportTextStripedDecodeError err ->
59 |     Text.renderTextStripedDecodeError err
60 |   ImportBinaryStripedEncodeError err ->
61 |     Binary.renderBinaryStripedEncodeError err
62 | 
63 | zebraImport :: (MonadResource m, MonadCatch m) => Import -> EitherT ImportError m ()
64 | zebraImport x = do
65 |   schema0 <- liftIO . ByteString.readFile $ importSchema x
66 |   schema <- firstT ImportTextSchemaDecodeError . hoistEither $ Text.decodeSchema schema0
67 | 
68 |   squash . firstJoin ImportIOError .
69 |       writeFileOrStdout (importOutput x) .
70 |     hoist (firstJoin ImportBinaryStripedEncodeError) .
71 |       Binary.encodeStriped .
72 |     hoist (firstJoin ImportTextStripedDecodeError) .
73 |       Text.decodeStriped schema .
74 |     hoist (firstT ImportIOError) $
75 |       ByteStream.readFile (importInput x)
76 | {-# SPECIALIZE zebraImport :: Import -> EitherT ImportError (ResourceT IO) () #-}
77 | 


--------------------------------------------------------------------------------
/zebra-cli/src/Zebra/Command/Util.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DoAndIfThenElse #-}
 2 | {-# LANGUAGE NoImplicitPrelude #-}
 3 | module Zebra.Command.Util (
 4 |     getBinaryStdout
 5 |   , writeFileOrStdout
 6 |   ) where
 7 | 
 8 | import           Control.Monad.Catch (MonadCatch)
 9 | import           Control.Monad.IO.Class (MonadIO(..))
10 | import           Control.Monad.Trans.Resource (MonadResource)
11 | import           Control.Monad.Trans.Either (EitherT)
12 | 
13 | import qualified Data.Char as Char
14 | 
15 | import           P
16 | 
17 | import           System.IO (FilePath, Handle, getLine, putStr)
18 | import           System.IO (stdout, hIsTerminalDevice, hFlush)
19 | import           System.IO.Error (IOError)
20 | 
21 | import           Viking (ByteStream)
22 | import qualified Viking.ByteStream as ByteStream
23 | 
24 | 
25 | 
26 | getBinaryStdout :: MonadIO m => m (Maybe Handle)
27 | getBinaryStdout =
28 |   liftIO $ do
29 |     tty <- hIsTerminalDevice stdout
30 |     if tty then do
31 |       putStr "About to write a binary file to stdout. Are you sure? [y/N] "
32 |       hFlush stdout
33 |       line <- fmap Char.toLower <$> getLine
34 |       if line == "y" then
35 |         pure $ Just stdout
36 |       else
37 |         pure Nothing
38 |     else
39 |       pure $ Just stdout
40 | 
41 | writeFileOrStdout ::
42 |      MonadResource m
43 |   => MonadCatch m
44 |   => Maybe FilePath
45 |   -> ByteStream m ()
46 |   -> EitherT IOError m ()
47 | writeFileOrStdout mpath bss = do
48 |   case mpath of
49 |     Nothing -> do
50 |       mhandle <- getBinaryStdout
51 |       case mhandle of
52 |         Nothing ->
53 |           pure ()
54 |         Just handle ->
55 |           ByteStream.hPut handle bss
56 | 
57 |     Just path ->
58 |       ByteStream.writeFile path bss
59 | {-# INLINABLE writeFileOrStdout #-}
60 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/core/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuwCampbell/zebra/afe41591499934b00263abf816f5ef3509d6672c/zebra-cli/test/cli/core/.keep


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/run:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | : ${UPDATE:=0}
 3 | : ${ZEBRA:=$PWD/dist/build/zebra/zebra}
 4 | 
 5 | if hash colordiff 2>/dev/null; then
 6 |   diff=colordiff
 7 | else
 8 |   diff=diff
 9 | fi
10 | 
11 | FAILURES=0
12 | TESTS=0
13 | 
14 | export ZEBRA
15 | ZEBRA_TEMP_ROOT=$(mktemp -d 2>/dev/null || mktemp -d -t 'zebra-cli-test')
16 | 
17 | clean_up () {
18 |     rm -rf "$ZEBRA_TEMP_ROOT"
19 | }
20 | 
21 | trap clean_up EXIT
22 | 
23 | for REL_DIR in test/cli/import/t*; do
24 |     echo "─── $REL_DIR ───"
25 |     ABS_DIR=$PWD/$REL_DIR
26 | 
27 |     export ZEBRA_TEMP=$ZEBRA_TEMP_ROOT/$REL_DIR
28 |     mkdir -p $ZEBRA_TEMP
29 | 
30 |     if [ $UPDATE -eq 0 ]; then
31 |         (cd $REL_DIR; $ABS_DIR/script 2>&1 | $diff -u $ABS_DIR/expected -)
32 |     else
33 |         echo " * Updating expected"
34 |         (cd $REL_DIR; $ABS_DIR/script 2>&1 | tee $ABS_DIR/expected)
35 |     fi
36 | 
37 |     if [ $? -ne 0 ]; then
38 |         FAILURES=$(expr $FAILURES + 1)
39 |         echo " * FAILED: $REL_DIR"
40 |     fi
41 | 
42 |     TESTS=$(expr $TESTS + 1)
43 | done
44 | 
45 | if [ $FAILURES -ne 0 ]; then
46 |     echo " * Some failures: $FAILURES"
47 |     exit $FAILURES
48 | else
49 |     echo " * Passed $TESTS tests!"
50 | fi
51 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t01-array/expected:
--------------------------------------------------------------------------------
 1 | === Schema ===
 2 | {
 3 |   "version": "v0",
 4 |   "schema": {
 5 |     "array": {
 6 |       "element": {
 7 |         "struct": {
 8 |           "fields": [
 9 |             {
10 |               "name": "schleem",
11 |               "schema": {
12 |                 "enum": {
13 |                   "variants": [
14 |                     {
15 |                       "name": "none",
16 |                       "schema": {
17 |                         "unit": {}
18 |                       }
19 |                     },
20 |                     {
21 |                       "name": "some",
22 |                       "schema": {
23 |                         "int": {}
24 |                       }
25 |                     }
26 |                   ]
27 |                 }
28 |               }
29 |             },
30 |             {
31 |               "name": "entity_id",
32 |               "schema": {
33 |                 "binary": {
34 |                   "encoding": {
35 |                     "utf8": {}
36 |                   }
37 |                 }
38 |               }
39 |             }
40 |           ]
41 |         }
42 |       }
43 |     }
44 |   }
45 | }
46 | 
47 | === Data ===
48 | {"entity_id":"abc","schleem":{"none":{}}}
49 | {"entity_id":"def","schleem":{"some":456}}
50 | {"entity_id":"ghi","schleem":{"some":789}}
51 | {"entity_id":"jkl","schleem":{"none":{}}}
52 | 
53 | === Summary ===
54 | block_count        = 1
55 | row_count          = 4
56 | max_rows_per_block = 4
57 | first_key          = <file is not a map>
58 | last_key           = <file is not a map>
59 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t01-array/input.zschema:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "v0",
 3 |   "schema": {
 4 |     "array": {
 5 |       "element": {
 6 |         "struct": {
 7 |           "fields": [
 8 |             {
 9 |               "name": "schleem",
10 |               "schema": {
11 |                 "enum": {
12 |                   "variants": [
13 |                     {
14 |                       "name": "none",
15 |                       "schema": { "unit": {} }
16 |                     },
17 |                     {
18 |                       "name": "some",
19 |                       "schema": { "int": {} }
20 |                     }
21 |                   ]
22 |                 }
23 |               }
24 |             },
25 |             {
26 |               "name": "entity_id",
27 |               "schema": {
28 |                  "binary": {
29 |                    "encoding": { "utf8": {} }
30 |                  }
31 |               }
32 |             }
33 |           ]
34 |         }
35 |       }
36 |     }
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t01-array/input.ztxt:
--------------------------------------------------------------------------------
1 | {"schleem":{"none":{}}, "entity_id": "abc"}
2 | {"schleem":{"some":456}, "entity_id": "def"}
3 | {"schleem":{"some":789}, "entity_id": "ghi"}
4 | {"schleem":{"none":{}}, "entity_id": "jkl"}
5 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t01-array/script:
--------------------------------------------------------------------------------
 1 | #!/bin/sh -eu
 2 | 
 3 | ZBIN=$ZEBRA_TEMP/intermediate.zbin
 4 | 
 5 | $ZEBRA import input.ztxt --schema input.zschema --output $ZBIN
 6 | 
 7 | echo "=== Schema ==="
 8 | $ZEBRA export $ZBIN --schema-stdout
 9 | 
10 | echo
11 | echo "=== Data ==="
12 | $ZEBRA export $ZBIN --output-stdout
13 | 
14 | echo
15 | echo "=== Summary ==="
16 | $ZEBRA summary $ZBIN
17 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t02-map/expected:
--------------------------------------------------------------------------------
 1 | === Schema ===
 2 | {
 3 |   "version": "v0",
 4 |   "schema": {
 5 |     "map": {
 6 |       "key": {
 7 |         "struct": {
 8 |           "fields": [
 9 |             {
10 |               "name": "entity_hash",
11 |               "schema": {
12 |                 "int": {}
13 |               }
14 |             },
15 |             {
16 |               "name": "entity_id",
17 |               "schema": {
18 |                 "binary": {
19 |                   "encoding": {
20 |                     "utf8": {}
21 |                   }
22 |                 }
23 |               }
24 |             }
25 |           ]
26 |         }
27 |       },
28 |       "value": {
29 |         "enum": {
30 |           "variants": [
31 |             {
32 |               "name": "cash",
33 |               "schema": {
34 |                 "double": {}
35 |               }
36 |             },
37 |             {
38 |               "name": "item",
39 |               "schema": {
40 |                 "binary": {}
41 |               }
42 |             }
43 |           ]
44 |         }
45 |       }
46 |     }
47 |   }
48 | }
49 | 
50 | === Data ===
51 | {"key":{"entity_hash":10,"entity_id":"barney"},"value":{"cash":27.6}}
52 | {"key":{"entity_hash":10,"entity_id":"homer"},"value":{"item":"ZHVmZg=="}}
53 | {"key":{"entity_hash":20,"entity_id":"marge"},"value":{"cash":45.1}}
54 | {"key":{"entity_hash":30,"entity_id":"lisa"},"value":{"item":"c2F4b3Bob25l"}}
55 | 
56 | === Summary ===
57 | block_count        = 1
58 | row_count          = 4
59 | max_rows_per_block = 4
60 | first_key          = {"entity_hash":10,"entity_id":"barney"}
61 | last_key           = {"entity_hash":30,"entity_id":"lisa"}
62 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t02-map/input.zschema:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "v0",
 3 |   "schema": {
 4 |     "map": {
 5 |       "key": {
 6 |         "struct": {
 7 |           "fields": [
 8 |             {
 9 |               "name": "entity_hash",
10 |               "schema": {
11 |                 "int": {}
12 |               }
13 |             },
14 |             {
15 |               "name": "entity_id",
16 |               "schema": {
17 |                 "binary": {
18 |                   "encoding": { "utf8": {} }
19 |                 }
20 |               }
21 |             }
22 |           ]
23 |         }
24 |       },
25 |       "value": {
26 |         "enum": {
27 |           "variants": [
28 |             {
29 |               "name": "cash",
30 |               "schema": {
31 |                 "double": {}
32 |               }
33 |             },
34 |             {
35 |               "name": "item",
36 |               "schema": {
37 |                 "binary": {}
38 |               }
39 |             }
40 |           ]
41 |         }
42 |       }
43 |     }
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t02-map/input.ztxt:
--------------------------------------------------------------------------------
1 | { "key": { "entity_hash": 10, "entity_id": "barney" }, "value": { "cash": 27.6 } }
2 | { "key": { "entity_hash": 10, "entity_id": "homer" }, "value": { "item": "ZHVmZg==" } }
3 | { "key": { "entity_hash": 20, "entity_id": "marge" }, "value": { "cash": 45.1 } }
4 | { "key": { "entity_hash": 30, "entity_id": "lisa" }, "value": { "item": "c2F4b3Bob25l" } }
5 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t02-map/script:
--------------------------------------------------------------------------------
 1 | #!/bin/sh -eu
 2 | 
 3 | ZBIN=$ZEBRA_TEMP/intermediate.zbin
 4 | 
 5 | $ZEBRA import input.ztxt --schema input.zschema --output $ZBIN
 6 | 
 7 | echo "=== Schema ==="
 8 | $ZEBRA export $ZBIN --schema-stdout
 9 | 
10 | echo
11 | echo "=== Data ==="
12 | $ZEBRA export $ZBIN --output-stdout
13 | 
14 | echo
15 | echo "=== Summary ==="
16 | $ZEBRA summary $ZBIN
17 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t03-binary/expected:
--------------------------------------------------------------------------------
 1 | === Schema ===
 2 | {
 3 |   "version": "v0",
 4 |   "schema": {
 5 |     "binary": {
 6 |       "encoding": {
 7 |         "utf8": {}
 8 |       }
 9 |     }
10 |   }
11 | }
12 | 
13 | === Data ===
14 | zebra is a typed data format for storing arbitrary combinations of sums,
15 | products and arrays in compressed form. It achieves high compression by
16 | decomposing this data in to a "struct of arrays" representation on disk. zebra
17 | stores data in blocks so that files can be streamed without loading the entire
18 | file in memory.
19 | 
20 | === Summary ===
21 | block_count        = 1
22 | row_count          = 319
23 | max_rows_per_block = 319
24 | first_key          = <file is not a map>
25 | last_key           = <file is not a map>
26 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t03-binary/input.zschema:
--------------------------------------------------------------------------------
1 | {
2 |   "version": "v0",
3 |   "schema": {
4 |     "binary": {
5 |       "encoding": { "utf8": {} }
6 |     }
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t03-binary/input.ztxt:
--------------------------------------------------------------------------------
1 | zebra is a typed data format for storing arbitrary combinations of sums,
2 | products and arrays in compressed form. It achieves high compression by
3 | decomposing this data in to a "struct of arrays" representation on disk. zebra
4 | stores data in blocks so that files can be streamed without loading the entire
5 | file in memory.
6 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t03-binary/script:
--------------------------------------------------------------------------------
 1 | #!/bin/sh -eu
 2 | 
 3 | ZBIN=$ZEBRA_TEMP/intermediate.zbin
 4 | 
 5 | $ZEBRA import input.ztxt --schema input.zschema --output $ZBIN
 6 | 
 7 | echo "=== Schema ==="
 8 | $ZEBRA export $ZBIN --schema-stdout
 9 | 
10 | echo
11 | echo "=== Data ==="
12 | $ZEBRA export $ZBIN --output-stdout
13 | 
14 | echo
15 | echo "=== Summary ==="
16 | $ZEBRA summary $ZBIN
17 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t04-time/expected:
--------------------------------------------------------------------------------
  1 | === Schema ===
  2 | {
  3 |   "version": "v0",
  4 |   "schema": {
  5 |     "array": {
  6 |       "element": {
  7 |         "struct": {
  8 |           "fields": [
  9 |             {
 10 |               "name": "entity_id",
 11 |               "schema": {
 12 |                 "binary": {
 13 |                   "encoding": {
 14 |                     "utf8": {}
 15 |                   }
 16 |                 }
 17 |               }
 18 |             },
 19 |             {
 20 |               "name": "attribute_id",
 21 |               "schema": {
 22 |                 "binary": {
 23 |                   "encoding": {
 24 |                     "utf8": {}
 25 |                   }
 26 |                 }
 27 |               }
 28 |             },
 29 |             {
 30 |               "name": "value",
 31 |               "schema": {
 32 |                 "binary": {
 33 |                   "encoding": {
 34 |                     "utf8": {}
 35 |                   }
 36 |                 }
 37 |               }
 38 |             },
 39 |             {
 40 |               "name": "date",
 41 |               "schema": {
 42 |                 "int": {
 43 |                   "encoding": {
 44 |                     "date": {}
 45 |                   }
 46 |                 }
 47 |               }
 48 |             },
 49 |             {
 50 |               "name": "time_s",
 51 |               "schema": {
 52 |                 "int": {
 53 |                   "encoding": {
 54 |                     "time": {
 55 |                       "interval": {
 56 |                         "seconds": {}
 57 |                       }
 58 |                     }
 59 |                   }
 60 |                 }
 61 |               }
 62 |             },
 63 |             {
 64 |               "name": "time_ms",
 65 |               "schema": {
 66 |                 "int": {
 67 |                   "encoding": {
 68 |                     "time": {
 69 |                       "interval": {
 70 |                         "milliseconds": {}
 71 |                       }
 72 |                     }
 73 |                   }
 74 |                 }
 75 |               }
 76 |             },
 77 |             {
 78 |               "name": "time_us",
 79 |               "schema": {
 80 |                 "int": {
 81 |                   "encoding": {
 82 |                     "time": {
 83 |                       "interval": {
 84 |                         "microseconds": {}
 85 |                       }
 86 |                     }
 87 |                   }
 88 |                 }
 89 |               }
 90 |             }
 91 |           ]
 92 |         }
 93 |       }
 94 |     }
 95 |   }
 96 | }
 97 | 
 98 | === Data ===
 99 | {"time_ms":"2017-01-01 12:34:56.789","value":"szechuan","entity_id":"abc","date":"2017-01-01","time_us":"2017-01-01 12:34:56.789012","attribute_id":"delicious","time_s":"2017-01-01 12:34:56"}
100 | {"time_ms":"2017-02-03 13:35:57.79","value":"bacon","entity_id":"def","date":"2017-02-03","time_us":"2017-02-03 13:35:57.790013","attribute_id":"delicious","time_s":"2017-02-03 13:35:57"}
101 | {"time_ms":"2017-03-04 14:36:37.123","value":"avocado","entity_id":"ghi","date":"2017-03-04","time_us":"2017-03-04 14:36:37.123456","attribute_id":"delicious","time_s":"2017-03-04 14:36:37"}
102 | {"time_ms":"1600-03-01 00:00:00","value":"wagyu","entity_id":"jkl","date":"1600-03-01","time_us":"1600-03-01 00:00:00","attribute_id":"delicious","time_s":"1600-03-01 00:00:00"}
103 | {"time_ms":"2999-12-31 23:59:59.999","value":"praline","entity_id":"mno","date":"2999-12-31","time_us":"2999-12-31 23:59:59.999999","attribute_id":"delicious","time_s":"2999-12-31 23:59:59"}
104 | 
105 | === Summary ===
106 | block_count        = 1
107 | row_count          = 5
108 | max_rows_per_block = 5
109 | first_key          = <file is not a map>
110 | last_key           = <file is not a map>
111 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t04-time/input.zschema:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "v0",
 3 |   "schema": {
 4 |     "array": {
 5 |       "element": {
 6 |         "struct": {
 7 |           "fields": [
 8 |             {
 9 |               "name": "entity_id",
10 |               "schema": {
11 |                  "binary": {
12 |                    "encoding": { "utf8": {} }
13 |                  }
14 |               }
15 |             },
16 |             {
17 |               "name": "attribute_id",
18 |               "schema": {
19 |                  "binary": {
20 |                    "encoding": { "utf8": {} }
21 |                  }
22 |               }
23 |             },
24 |             {
25 |               "name": "value",
26 |               "schema": {
27 |                  "binary": {
28 |                    "encoding": { "utf8": {} }
29 |                  }
30 |               }
31 |             },
32 |             {
33 |               "name": "date",
34 |               "schema": {
35 |                  "int": {
36 |                    "encoding": { "date": {} }
37 |                  }
38 |               }
39 |             },
40 |             {
41 |               "name": "time_s",
42 |               "schema": {
43 |                  "int": {
44 |                    "encoding": { "time": { "interval": { "seconds": {} } } }
45 |                  }
46 |               }
47 |             },
48 |             {
49 |               "name": "time_ms",
50 |               "schema": {
51 |                  "int": {
52 |                    "encoding": { "time": { "interval": { "milliseconds": {} } } }
53 |                  }
54 |               }
55 |             },
56 |             {
57 |               "name": "time_us",
58 |               "schema": {
59 |                  "int": {
60 |                    "encoding": { "time": { "interval": { "microseconds": {} } } }
61 |                  }
62 |               }
63 |             }
64 |           ]
65 |         }
66 |       }
67 |     }
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t04-time/input.ztxt:
--------------------------------------------------------------------------------
1 | {"entity_id": "abc", "attribute_id": "delicious", "value": "szechuan", "date": "2017-01-01", "time_s": "2017-01-01 12:34:56", "time_ms": "2017-01-01 12:34:56.789", "time_us": "2017-01-01 12:34:56.789012"}
2 | {"entity_id": "def", "attribute_id": "delicious", "value": "bacon", "date": "2017-02-03", "time_s": "2017-02-03 13:35:57", "time_ms": "2017-02-03 13:35:57.790", "time_us": "2017-02-03 13:35:57.790013"}
3 | {"entity_id": "ghi", "attribute_id": "delicious", "value": "avocado", "date": "2017-03-04", "time_s": "2017-03-04 14:36:37", "time_ms": "2017-03-04 14:36:37.123", "time_us": "2017-03-04 14:36:37.123456"}
4 | {"entity_id": "jkl", "attribute_id": "delicious", "value": "wagyu", "date": "1600-03-01", "time_s": "1600-03-01 00:00:00", "time_ms": "1600-03-01 00:00:00.000", "time_us": "1600-03-01 00:00:00.000000"}
5 | {"entity_id": "mno", "attribute_id": "delicious", "value": "praline", "date": "2999-12-31", "time_s": "2999-12-31 23:59:59", "time_ms": "2999-12-31 23:59:59.999", "time_us": "2999-12-31 23:59:59.999999"}
6 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/import/t04-time/script:
--------------------------------------------------------------------------------
 1 | #!/bin/sh -eu
 2 | 
 3 | ZBIN=$ZEBRA_TEMP/intermediate.zbin
 4 | 
 5 | $ZEBRA import input.ztxt --schema input.zschema --output $ZBIN
 6 | 
 7 | echo "=== Schema ==="
 8 | $ZEBRA export $ZBIN --schema-stdout
 9 | 
10 | echo
11 | echo "=== Data ==="
12 | $ZEBRA export $ZBIN --output-stdout
13 | 
14 | echo
15 | echo "=== Summary ==="
16 | $ZEBRA summary $ZBIN
17 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/run:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | : ${UPDATE:=0}
 3 | : ${ZEBRA:=$PWD/dist/build/zebra/zebra}
 4 | 
 5 | if hash colordiff 2>/dev/null; then
 6 |   diff=colordiff
 7 | else
 8 |   diff=diff
 9 | fi
10 | 
11 | FAILURES=0
12 | TESTS=0
13 | 
14 | export ZEBRA
15 | ZEBRA_TEMP_ROOT=$(mktemp -d 2>/dev/null || mktemp -d -t 'zebra-cli-test')
16 | 
17 | clean_up () {
18 |     rm -rf "$ZEBRA_TEMP_ROOT"
19 | }
20 | 
21 | trap clean_up EXIT
22 | 
23 | for REL_DIR in test/cli/merge/t*; do
24 |     echo "─── $REL_DIR ───"
25 |     ABS_DIR=$PWD/$REL_DIR
26 | 
27 |     export ZEBRA_TEMP=$ZEBRA_TEMP_ROOT/$REL_DIR
28 |     mkdir -p $ZEBRA_TEMP
29 | 
30 |     if [ $UPDATE -eq 0 ]; then
31 |         (cd $REL_DIR; $ABS_DIR/script 2>&1 | $diff -u $ABS_DIR/expected -)
32 |     else
33 |         echo " * Updating expected"
34 |         (cd $REL_DIR; $ABS_DIR/script 2>&1 | tee $ABS_DIR/expected)
35 |     fi
36 | 
37 |     if [ $? -ne 0 ]; then
38 |         FAILURES=$(expr $FAILURES + 1)
39 |         echo " * FAILED: $REL_DIR"
40 |     fi
41 | 
42 |     TESTS=$(expr $TESTS + 1)
43 | done
44 | 
45 | if [ $FAILURES -ne 0 ]; then
46 |     echo " * Some failures: $FAILURES"
47 |     exit $FAILURES
48 | else
49 |     echo " * Passed $TESTS tests!"
50 | fi
51 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/t01-same/expected:
--------------------------------------------------------------------------------
 1 | === Schema ===
 2 | {
 3 |   "version": "v0",
 4 |   "schema": {
 5 |     "map": {
 6 |       "key": {
 7 |         "struct": {
 8 |           "fields": [
 9 |             {
10 |               "name": "entity_hash",
11 |               "schema": {
12 |                 "int": {}
13 |               }
14 |             },
15 |             {
16 |               "name": "entity_id",
17 |               "schema": {
18 |                 "binary": {
19 |                   "encoding": {
20 |                     "utf8": {}
21 |                   }
22 |                 }
23 |               }
24 |             }
25 |           ]
26 |         }
27 |       },
28 |       "value": {
29 |         "struct": {
30 |           "fields": [
31 |             {
32 |               "name": "cash",
33 |               "schema": {
34 |                 "double": {
35 |                   "default": {
36 |                     "allow": {}
37 |                   }
38 |                 }
39 |               }
40 |             },
41 |             {
42 |               "name": "item",
43 |               "schema": {
44 |                 "enum": {
45 |                   "variants": [
46 |                     {
47 |                       "name": "none",
48 |                       "schema": {
49 |                         "unit": {}
50 |                       }
51 |                     },
52 |                     {
53 |                       "name": "some",
54 |                       "schema": {
55 |                         "binary": {
56 |                           "encoding": {
57 |                             "utf8": {}
58 |                           }
59 |                         }
60 |                       }
61 |                     }
62 |                   ],
63 |                   "default": {
64 |                     "allow": {}
65 |                   }
66 |                 }
67 |               }
68 |             }
69 |           ]
70 |         }
71 |       }
72 |     }
73 |   }
74 | }
75 | 
76 | === Data ===
77 | {"key":{"entity_hash":10,"entity_id":"barney"},"value":{"item":{"none":{}},"cash":27.6}}
78 | {"key":{"entity_hash":10,"entity_id":"homer"},"value":{"item":{"none":{}},"cash":6.1}}
79 | {"key":{"entity_hash":20,"entity_id":"marge"},"value":{"item":{"none":{}},"cash":45.1}}
80 | {"key":{"entity_hash":30,"entity_id":"lisa"},"value":{"item":{"some":"saxophone"},"cash":5}}
81 | {"key":{"entity_hash":50,"entity_id":"bart"},"value":{"item":{"some":"skateboard"},"cash":27}}
82 | {"key":{"entity_hash":50,"entity_id":"millhouse"},"value":{"item":{"none":{}},"cash":19}}
83 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/t01-same/input.zschema:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "v0",
 3 |   "schema": {
 4 |     "map": {
 5 |       "key": {
 6 |         "struct": {
 7 |           "fields": [
 8 |             {
 9 |               "name": "entity_hash",
10 |               "schema": {
11 |                 "int": {}
12 |               }
13 |             },
14 |             {
15 |               "name": "entity_id",
16 |               "schema": {
17 |                 "binary": {
18 |                   "encoding": { "utf8": {} }
19 |                 }
20 |               }
21 |             }
22 |           ]
23 |         }
24 |       },
25 |       "value": {
26 |         "struct": {
27 |           "fields": [
28 |             {
29 |               "name": "cash",
30 |               "schema": {
31 |                 "double": {
32 |                   "default": {
33 |                     "allow": {}
34 |                   }
35 |                 }
36 |               }
37 |             },
38 |             {
39 |               "name": "item",
40 |               "schema": {
41 |                 "enum": {
42 |                   "variants": [
43 |                     {
44 |                       "name": "none",
45 |                       "schema": {
46 |                         "unit": {}
47 |                       }
48 |                     },
49 |                     {
50 |                       "name": "some",
51 |                       "schema": {
52 |                         "binary": {
53 |                           "encoding": { "utf8": {} }
54 |                         }
55 |                       }
56 |                     }
57 |                   ],
58 |                   "default": {
59 |                     "allow": {}
60 |                   }
61 |                 }
62 |               }
63 |             }
64 |           ]
65 |         }
66 |       }
67 |     }
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/t01-same/input0.ztxt:
--------------------------------------------------------------------------------
1 | { "key": { "entity_hash": 10, "entity_id": "barney" }, "value": { "cash": 27.6, "item": { "none": {} } } }
2 | { "key": { "entity_hash": 10, "entity_id": "homer" }, "value": { "cash": 6.1, "item": { "none": {} } } }
3 | { "key": { "entity_hash": 20, "entity_id": "marge" }, "value": { "cash": 45.1, "item": { "none": {} } } }
4 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/t01-same/input1.ztxt:
--------------------------------------------------------------------------------
1 | { "key": { "entity_hash": 30, "entity_id": "lisa" }, "value": { "cash": 5, "item": { "some": "saxophone" } } }
2 | { "key": { "entity_hash": 50, "entity_id": "bart" }, "value": { "cash": 27, "item": { "some": "skateboard" } } }
3 | { "key": { "entity_hash": 50, "entity_id": "millhouse" }, "value": { "cash": 19, "item": { "none": {} } } }
4 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/t01-same/script:
--------------------------------------------------------------------------------
 1 | #!/bin/sh -eu
 2 | 
 3 | ZBIN0=$ZEBRA_TEMP/intermediate0.zbin
 4 | ZBIN1=$ZEBRA_TEMP/intermediate1.zbin
 5 | MERGED=$ZEBRA_TEMP/merged.zbin
 6 | 
 7 | $ZEBRA import input0.ztxt --schema input.zschema --output $ZBIN0
 8 | $ZEBRA import input1.ztxt --schema input.zschema --output $ZBIN1
 9 | $ZEBRA merge $ZBIN0 $ZBIN1 -o $MERGED --output-v3
10 | 
11 | echo "=== Schema ==="
12 | $ZEBRA export $MERGED --schema-stdout
13 | 
14 | echo
15 | echo "=== Data ==="
16 | $ZEBRA export $MERGED --output-stdout
17 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/t02-different/expected:
--------------------------------------------------------------------------------
 1 | === Schema ===
 2 | {
 3 |   "version": "v0",
 4 |   "schema": {
 5 |     "map": {
 6 |       "key": {
 7 |         "struct": {
 8 |           "fields": [
 9 |             {
10 |               "name": "entity_hash",
11 |               "schema": {
12 |                 "int": {}
13 |               }
14 |             },
15 |             {
16 |               "name": "entity_id",
17 |               "schema": {
18 |                 "binary": {
19 |                   "encoding": {
20 |                     "utf8": {}
21 |                   }
22 |                 }
23 |               }
24 |             }
25 |           ]
26 |         }
27 |       },
28 |       "value": {
29 |         "struct": {
30 |           "fields": [
31 |             {
32 |               "name": "cash",
33 |               "schema": {
34 |                 "double": {
35 |                   "default": {
36 |                     "allow": {}
37 |                   }
38 |                 }
39 |               }
40 |             },
41 |             {
42 |               "name": "item",
43 |               "schema": {
44 |                 "enum": {
45 |                   "variants": [
46 |                     {
47 |                       "name": "none",
48 |                       "schema": {
49 |                         "unit": {}
50 |                       }
51 |                     },
52 |                     {
53 |                       "name": "some",
54 |                       "schema": {
55 |                         "binary": {
56 |                           "encoding": {
57 |                             "utf8": {}
58 |                           }
59 |                         }
60 |                       }
61 |                     }
62 |                   ],
63 |                   "default": {
64 |                     "allow": {}
65 |                   }
66 |                 }
67 |               }
68 |             }
69 |           ]
70 |         }
71 |       }
72 |     }
73 |   }
74 | }
75 | 
76 | === Data ===
77 | {"key":{"entity_hash":10,"entity_id":"barney"},"value":{"item":{"none":{}},"cash":27.6}}
78 | {"key":{"entity_hash":10,"entity_id":"homer"},"value":{"item":{"none":{}},"cash":6.1}}
79 | {"key":{"entity_hash":20,"entity_id":"marge"},"value":{"item":{"none":{}},"cash":45.1}}
80 | {"key":{"entity_hash":30,"entity_id":"lisa"},"value":{"item":{"some":"saxophone"},"cash":0}}
81 | {"key":{"entity_hash":50,"entity_id":"bart"},"value":{"item":{"some":"skateboard"},"cash":0}}
82 | {"key":{"entity_hash":50,"entity_id":"millhouse"},"value":{"item":{"none":{}},"cash":0}}
83 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/t02-different/input0.zschema:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "v0",
 3 |   "schema": {
 4 |     "map": {
 5 |       "key": {
 6 |         "struct": {
 7 |           "fields": [
 8 |             {
 9 |               "name": "entity_hash",
10 |               "schema": {
11 |                 "int": {}
12 |               }
13 |             },
14 |             {
15 |               "name": "entity_id",
16 |               "schema": {
17 |                 "binary": {
18 |                   "encoding": { "utf8": {} }
19 |                 }
20 |               }
21 |             }
22 |           ]
23 |         }
24 |       },
25 |       "value": {
26 |         "struct": {
27 |           "fields": [
28 |             {
29 |               "name": "cash",
30 |               "schema": {
31 |                 "double": {
32 |                   "default": {
33 |                     "allow": {}
34 |                   }
35 |                 }
36 |               }
37 |             }
38 |           ]
39 |         }
40 |       }
41 |     }
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/t02-different/input0.ztxt:
--------------------------------------------------------------------------------
1 | { "key": { "entity_hash": 10, "entity_id": "barney" }, "value": { "cash": 27.6 } }
2 | { "key": { "entity_hash": 10, "entity_id": "homer" }, "value": { "cash": 6.1 } }
3 | { "key": { "entity_hash": 20, "entity_id": "marge" }, "value": { "cash": 45.1 } }
4 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/t02-different/input1.zschema:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "v0",
 3 |   "schema": {
 4 |     "map": {
 5 |       "key": {
 6 |         "struct": {
 7 |           "fields": [
 8 |             {
 9 |               "name": "entity_hash",
10 |               "schema": {
11 |                 "int": {}
12 |               }
13 |             },
14 |             {
15 |               "name": "entity_id",
16 |               "schema": {
17 |                 "binary": {
18 |                   "encoding": { "utf8": {} }
19 |                 }
20 |               }
21 |             }
22 |           ]
23 |         }
24 |       },
25 |       "value": {
26 |         "struct": {
27 |           "fields": [
28 |             {
29 |               "name": "item",
30 |               "schema": {
31 |                 "enum": {
32 |                   "variants": [
33 |                     {
34 |                       "name": "none",
35 |                       "schema": {
36 |                         "unit": {}
37 |                       }
38 |                     },
39 |                     {
40 |                       "name": "some",
41 |                       "schema": {
42 |                         "binary": {
43 |                           "encoding": { "utf8": {} }
44 |                         }
45 |                       }
46 |                     }
47 |                   ],
48 |                   "default": {
49 |                     "allow": {}
50 |                   }
51 |                 }
52 |               }
53 |             }
54 |           ]
55 |         }
56 |       }
57 |     }
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/t02-different/input1.ztxt:
--------------------------------------------------------------------------------
1 | { "key": { "entity_hash": 30, "entity_id": "lisa" }, "value": { "item": { "some": "saxophone" } } }
2 | { "key": { "entity_hash": 50, "entity_id": "bart" }, "value": { "item": { "some": "skateboard" } } }
3 | { "key": { "entity_hash": 50, "entity_id": "millhouse" }, "value": { "item": { "none": {} } } }
4 | 


--------------------------------------------------------------------------------
/zebra-cli/test/cli/merge/t02-different/script:
--------------------------------------------------------------------------------
 1 | #!/bin/sh -eu
 2 | 
 3 | ZBIN0=$ZEBRA_TEMP/intermediate0.zbin
 4 | ZBIN1=$ZEBRA_TEMP/intermediate1.zbin
 5 | MERGED=$ZEBRA_TEMP/merged.zbin
 6 | 
 7 | $ZEBRA import input0.ztxt --schema input0.zschema --output $ZBIN0
 8 | $ZEBRA import input1.ztxt --schema input1.zschema --output $ZBIN1
 9 | $ZEBRA merge $ZBIN0 $ZBIN1 -o $MERGED --output-v3
10 | 
11 | echo "=== Schema ==="
12 | $ZEBRA export $MERGED --schema-stdout
13 | 
14 | echo
15 | echo "=== Data ==="
16 | $ZEBRA export $MERGED --output-stdout
17 | 


--------------------------------------------------------------------------------
/zebra-cli/test/test-cli.hs:
--------------------------------------------------------------------------------
1 | import           Disorder.Core.Main
2 | 
3 | main :: IO ()
4 | main =
5 |   disorderCliMain ["./dist/build/zebra/zebra"]
6 | 


--------------------------------------------------------------------------------
/zebra-cli/test/test-io.hs:
--------------------------------------------------------------------------------
1 | import           Disorder.Core.Main
2 | 
3 | main :: IO ()
4 | main =
5 |   disorderMain [
6 |     ]
7 | 


--------------------------------------------------------------------------------
/zebra-cli/test/test.hs:
--------------------------------------------------------------------------------
1 | import           Disorder.Core.Main
2 | 
3 | main :: IO ()
4 | main =
5 |   disorderMain [
6 |     ]
7 | 


--------------------------------------------------------------------------------
/zebra-core/.ghci:
--------------------------------------------------------------------------------
1 | ../framework/ghci


--------------------------------------------------------------------------------
/zebra-core/ambiata-zebra-core.lock-7.10.2:
--------------------------------------------------------------------------------
 1 | # mafia-lock-file-version: 0
 2 | adjunctions == 4.3
 3 | aeson == 1.1.1.0
 4 | aeson-pretty == 0.8.5
 5 | attoparsec == 0.13.1.0
 6 | base-compat == 0.9.2
 7 | base-orphans == 0.5.4
 8 | base64-bytestring == 1.0.0.1
 9 | bifunctors == 5.3
10 | bindings-DSL == 1.0.23
11 | Boolean == 0.2.4
12 | cabal-doctest == 1
13 | case-insensitive == 1.2.0.10
14 | cmdargs == 0.10.15
15 | comonad == 5
16 | contravariant == 1.4
17 | distributive == 0.5.2
18 | dlist == 0.8.0.2
19 | exceptions == 0.8.3
20 | fail == 4.9.0.0
21 | file-embed == 0.0.9.1
22 | free == 4.12.4
23 | hashable == 1.2.6.0
24 | haskell-lexer == 1.0.1
25 | ieee754 == 0.7.9
26 | integer-logarithms == 1.0.1
27 | kan-extensions == 5.0.1
28 | lens == 4.14
29 | lifted-base == 0.2.3.11
30 | MemoTrie == 0.6.7
31 | mmorph == 1.0.9
32 | monad-control == 1.0.2.0
33 | mtl == 2.2.1
34 | newtype-generics == 0.5
35 | NumInstances == 1.4
36 | old-locale == 1.0.0.7
37 | old-time == 1.1.0.3
38 | parallel == 3.2.1.0
39 | prelude-extras == 0.4.0.3
40 | pretty-show == 1.6.12
41 | primitive == 0.6.1.0
42 | profunctors == 5.2
43 | QuickCheck == 2.8.2
44 | quickcheck-instances == 0.3.13
45 | quickcheck-text == 0.1.2.1
46 | random == 1.1
47 | reflection == 2.1.2
48 | resourcet == 1.1.9
49 | scientific == 0.3.4.10
50 | semigroupoids == 5.1
51 | semigroups == 0.18.2
52 | semigroups -bytestring-builder
53 | StateVar == 1.1.0.4
54 | stm == 2.4.4.1
55 | streaming == 0.1.4.5
56 | streaming-bytestring == 0.1.4.6
57 | tagged == 0.8.5
58 | text == 1.2.2.1
59 | tf-random == 0.5
60 | thyme == 0.3.5.5
61 | time-locale-compat == 0.1.1.3
62 | time-locale-compat -old-locale
63 | transformers == 0.5.4.0
64 | transformers-base == 0.4.4
65 | transformers-compat == 0.5.1.4
66 | unordered-containers == 0.2.8.0
67 | uuid-types == 1.0.3
68 | vector == 0.11.0.0
69 | vector-space == 0.10.4
70 | vector-th-unbox == 0.2.1.6
71 | void == 0.7.1
72 | 


--------------------------------------------------------------------------------
/zebra-core/ambiata-zebra-core.lock-8.0.1:
--------------------------------------------------------------------------------
 1 | # mafia-lock-file-version: 0
 2 | adjunctions == 4.3
 3 | aeson == 1.1.1.0
 4 | aeson-pretty == 0.8.5
 5 | attoparsec == 0.13.1.0
 6 | base-compat == 0.9.2
 7 | base-orphans == 0.5.4
 8 | base64-bytestring == 1.0.0.1
 9 | bifunctors == 5.3
10 | bindings-DSL == 1.0.23
11 | Boolean == 0.2.4
12 | cabal-doctest == 1
13 | case-insensitive == 1.2.0.10
14 | cmdargs == 0.10.15
15 | comonad == 5
16 | contravariant == 1.4
17 | distributive == 0.5.2
18 | dlist == 0.8.0.2
19 | exceptions == 0.8.3
20 | file-embed == 0.0.9.1
21 | free == 4.12.4
22 | hashable == 1.2.6.0
23 | haskell-lexer == 1.0.1
24 | ieee754 == 0.7.9
25 | integer-logarithms == 1.0.1
26 | kan-extensions == 5.0.1
27 | lens == 4.14
28 | lifted-base == 0.2.3.11
29 | MemoTrie == 0.6.7
30 | mmorph == 1.0.9
31 | monad-control == 1.0.2.0
32 | mtl == 2.2.1
33 | newtype-generics == 0.5
34 | NumInstances == 1.4
35 | old-locale == 1.0.0.7
36 | old-time == 1.1.0.3
37 | parallel == 3.2.1.0
38 | prelude-extras == 0.4.0.3
39 | pretty-show == 1.6.12
40 | primitive == 0.6.1.0
41 | profunctors == 5.2
42 | QuickCheck == 2.8.2
43 | quickcheck-instances == 0.3.13
44 | quickcheck-text == 0.1.2.1
45 | random == 1.1
46 | reflection == 2.1.2
47 | resourcet == 1.1.9
48 | scientific == 0.3.4.10
49 | semigroupoids == 5.1
50 | semigroups == 0.18.2
51 | StateVar == 1.1.0.4
52 | stm == 2.4.4.1
53 | streaming == 0.1.4.5
54 | streaming-bytestring == 0.1.4.6
55 | tagged == 0.8.5
56 | text == 1.2.2.1
57 | tf-random == 0.5
58 | thyme == 0.3.5.5
59 | time-locale-compat == 0.1.1.3
60 | time-locale-compat -old-locale
61 | transformers-base == 0.4.4
62 | transformers-compat == 0.5.1.4
63 | unordered-containers == 0.2.8.0
64 | uuid-types == 1.0.3
65 | vector == 0.11.0.0
66 | vector-space == 0.10.4
67 | vector-th-unbox == 0.2.1.6
68 | void == 0.7.1
69 | 


--------------------------------------------------------------------------------
/zebra-core/ambiata-zebra-core.mk:
--------------------------------------------------------------------------------
  1 | # Zebra makefile
  2 | #
  3 | # Cabal is very good at compiling C source, but doesn't do a very good job of
  4 | # knowing *when* is should compile particular files.
  5 | # It doesn't perform any sort of dependency tracking, so if you modify a header
  6 | # then the source files won't be recompiled.
  7 | # We really want to fix this, but we still want to use Cabal because it has a
  8 | # few other nice things.
  9 | #
 10 | # The idea is to trick Cabal into recompiling source files by deleting the objects.
 11 | # We trick Make into doing this by having forget-me files for each source file:
 12 | # these are empty files which do nothing but have the right dependencies.
 13 | # Each forget-me file depends on the C file as well as the headers that the C file
 14 | # depends on.
 15 | # Then, when Make sees that one of these dependencies has changed, we remove the
 16 | # object file and touch the forget-me file to mark it as up to date.
 17 | #
 18 | 
 19 | # Which mafia to use. Caller can set a specific one as environment variable.
 20 | MAFIA ?= ./mafia
 21 | 
 22 | # Where to look for code.
 23 | # We should be able to reuse this for other projects just by modifying these two directories.
 24 | #
 25 | # C code goes here. We can't just look in '.' because that would include lib submodules.
 26 | # This should work with multiple entries (space separated).
 27 | DIR_C 	= csrc
 28 | # Haskell code.
 29 | # One annoying thing is that the Haskell object files are in dist/build/Module.dyn_o
 30 | # rather than dist/build/src/Module.dyn_o,
 31 | # whereas C code are under dist/build/csrc/module.dyn_o.
 32 | # This only works with a single entry.
 33 | DIR_HS 	= src
 34 | 
 35 | 
 36 | # Directory to store make dependencies and forget-me files
 37 | MAKE_DIR 	= dist/build/make
 38 | # Where cabal stores the build stuff that we have to clean
 39 | CABAL_OUT = dist/build
 40 | 
 41 | # Slurp all the .c and .hsc files
 42 | SRC_C 	= $(shell find $(DIR_C) -name "*.c")
 43 | SRC_HSC = $(shell find $(DIR_HS) -name "*.hsc")
 44 | 
 45 | # Convert each file to .dep, so that
 46 | # 	csrc/module.c => dist/build/make/csrc/module.dep
 47 | # and
 48 | # 	src/Module.hs => dist/build/make/Module.dep
 49 | DEP_C 	= $(patsubst %.c,$(MAKE_DIR)/%.dep,$(SRC_C))
 50 | DEP_HSC = $(patsubst $(DIR_HS)/%.hsc,$(MAKE_DIR)/%.dep,$(SRC_HSC))
 51 | 
 52 | # All the things we care about: dependency files and the forget-me files.
 53 | TARGETS 	= 																												\
 54 | 	$(DEP_C) 																													\
 55 | 	$(DEP_HSC) 																												\
 56 | 	$(patsubst $(DIR_HS)/%.hsc,$(MAKE_DIR)/%.hs.forget,$(SRC_HSC)) 		\
 57 | 	$(patsubst %.c,$(MAKE_DIR)/%.dyn_o.forget,$(SRC_C)) 							\
 58 | 
 59 | 
 60 | # Phony means these targets do not correspond to actual files
 61 | .PHONY: main clean-deps
 62 | 
 63 | # Do nothing except make sure our dependencies are in order
 64 | main: $(TARGETS)
 65 | 	@# And be quiet about it
 66 | 	@:
 67 | 
 68 | # Blow away the dependency and forget-me files.
 69 | # This doesn't clean the actual code.
 70 | clean-deps:
 71 | 	@echo "Cleaning directory $(MAKE_DIR)"
 72 | 	@rm -r $(MAKE_DIR)
 73 | 
 74 | # Now include all the generated dependency files.
 75 | # This has to go after main because the first target is the default.
 76 | -include $(DEP_HSC)
 77 | -include $(DEP_C)
 78 | 
 79 | # If you want to build a dependency file from a C file, you can:
 80 | # The dependency should have the forget-me file as the target, so
 81 | # the file csrc/module.dep contains:
 82 | #
 83 | # > dist/build/make/csrc/module.dyn_o.forget: csrc/module.c csrc/module.h
 84 | #
 85 | $(MAKE_DIR)/%.dep : %.c
 86 | 	@echo "(C) Dependencies -------- $<"
 87 | 	@# Ensure the directory exists before we try to put things in it.
 88 | 	@mkdir -p $(@D)
 89 | 	@gcc -MM -MT $(patsubst %.c,$(MAKE_DIR)/%.dyn_o.forget,$<) $< `$(MAFIA) cflags` > $@
 90 | 
 91 | # Convert the .hsc into C code so we can compute its dependencies.
 92 | $(MAKE_DIR)/%_hsc_make.c: $(DIR_HS)/%.hsc
 93 | 	@echo "(HSC) Converting -------- $<"
 94 | 	@mkdir -p $(@D)
 95 | 	@hsc2hs $< -o $(patsubst %_hsc_make.c,%,$@) --no-compile
 96 | 
 97 | DIR_C_INCLUDES = $(patsubst %,-I%,$(DIR_C))
 98 | 
 99 | # This is the same as the rule for C files, except we also need to give it the C source directory
100 | # on the include path.
101 | $(MAKE_DIR)/%.dep: $(MAKE_DIR)/%_hsc_make.c
102 | 	@echo "(HSC) Dependencies ------ $<"
103 | 	@mkdir -p $(@D)
104 | 	@gcc -MM -MT$(patsubst %.dep,%.hs.forget,$@) $< `$(MAFIA) cflags` $(DIR_C_INCLUDES) > $@
105 | 
106 | # Now, we can update the forget-me file.
107 | # This has no dependencies listed here: they are all specified in the generated dependency files.
108 | %.forget :
109 | 	@echo "(Forget) ---------------- $(patsubst $(MAKE_DIR)/%.forget,$(CABAL_OUT)/%,$@)"
110 | 	@rm -f $(patsubst $(MAKE_DIR)/%.forget,$(CABAL_OUT)/%,$@)
111 | 	@mkdir -p $(@D)
112 | 	@touch $@
113 | 
114 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_append.h:
--------------------------------------------------------------------------------
 1 | #ifndef __ZEBRA_APPEND_H
 2 | #define __ZEBRA_APPEND_H
 3 | 
 4 | #include "zebra_data.h"
 5 | 
 6 | error_t zebra_append_attribute (
 7 |     anemone_mempool_t *pool
 8 |   , const zebra_attribute_t *in
 9 |   , int64_t ix
10 |   , zebra_attribute_t *out_into
11 |   , int64_t out_count
12 |   );
13 | 
14 | error_t zebra_append_table (
15 |     anemone_mempool_t *pool
16 |   , const zebra_table_t *in
17 |   , int64_t ix
18 |   , zebra_table_t *out_into
19 |   , int64_t out_count
20 |   );
21 | 
22 | 
23 | // Append an entity to a block.
24 | // If *block is null, a new block will be allocated.
25 | // The block must have been allocated using this function, as it ensures the arrays are allocated with extra capacity on the end.
26 | // Do not call this with blocks allocated elsewhere, or you will write over someone else's memory.
27 | error_t zebra_append_block_entity (
28 |     anemone_mempool_t *pool
29 |   , zebra_entity_t *entity
30 |   , zebra_block_t **inout_block
31 |   );
32 | 
33 | #endif//__ZEBRA_APPEND_H
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_bindings.h:
--------------------------------------------------------------------------------
 1 | #if __clang__
 2 |     #pragma clang diagnostic ignored "-Wformat"
 3 |     #pragma clang diagnostic ignored "-Wunused-variable"
 4 | #elif __GNUC__
 5 |     #pragma GCC diagnostic ignored "-Wformat"
 6 | #endif
 7 | 
 8 | #if __GLASGOW_HASKELL__ >= 800
 9 |     #define bc_zpatsig(name,constr) \
10 |         printf("pattern ");bc_conid(name); \
11 |         printf(" :: (Eq a, %s a) => a",constr);
12 | #else
13 |     #define bc_zpatsig(name,constr)
14 | #endif
15 | 
16 | #define hsc_znum(name) \
17 |     bc_zpatsig(# name,"Num");printf("\n"); \
18 |     printf("pattern ");bc_conid(# name);printf(" <- ((== ("); \
19 |     bc_decimal(name);printf(")) -> True) where\n    "); \
20 |     bc_conid(# name);printf(" = ");bc_decimal(name);
21 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_block_split.h:
--------------------------------------------------------------------------------
 1 | #ifndef __ZEBRA_BLOCK_SPLIT_H
 2 | #define __ZEBRA_BLOCK_SPLIT_H
 3 | 
 4 | #include "zebra_data.h"
 5 | 
 6 | 
 7 | error_t zebra_entities_of_block (
 8 |     anemone_mempool_t *pool
 9 |   , zebra_block_t *block
10 |   , int64_t *out_entity_count
11 |   , zebra_entity_t **out_entities
12 |   );
13 | 
14 | 
15 | #endif//__ZEBRA_BLOCK_SPLIT_H
16 | 
17 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_clone.h:
--------------------------------------------------------------------------------
  1 | #ifndef __ZEBRA_CLONE_H
  2 | #define __ZEBRA_CLONE_H
  3 | 
  4 | #include "zebra_data.h"
  5 | 
  6 | 
  7 | // ------------------------
  8 | // Agile clone: copy the structure, but throw away and clear the content.
  9 | // ------------------------
 10 | error_t zebra_agile_clone_attribute (
 11 |     anemone_mempool_t *pool
 12 |   , const zebra_attribute_t *attribute
 13 |   , zebra_attribute_t *into
 14 |   );
 15 | 
 16 | error_t zebra_agile_clone_table (
 17 |     anemone_mempool_t *pool
 18 |   , const zebra_table_t *table
 19 |   , zebra_table_t *into
 20 |   );
 21 | 
 22 | error_t zebra_agile_clone_column (
 23 |     anemone_mempool_t *pool
 24 |   , const zebra_column_t *column
 25 |   , zebra_column_t *into
 26 |   );
 27 | 
 28 | 
 29 | // ------------------------
 30 | // Neritic clone: deep copy the structure, shallow copy the content
 31 | // ------------------------
 32 | error_t zebra_neritic_clone_table (
 33 |     anemone_mempool_t *pool
 34 |   , zebra_table_t *in_table
 35 |   , zebra_table_t *out_table
 36 |   );
 37 | 
 38 | error_t zebra_neritic_clone_tables (
 39 |     anemone_mempool_t *pool
 40 |   , int64_t table_count
 41 |   , zebra_table_t *in_tables
 42 |   , zebra_table_t **out_tables
 43 |   );
 44 | 
 45 | error_t zebra_neritic_clone_column (
 46 |     anemone_mempool_t *pool
 47 |   , zebra_column_t *in_column
 48 |   , zebra_column_t *out_column
 49 |   );
 50 | 
 51 | 
 52 | // ------------------------
 53 | // Deep clone: deep copy structure and content
 54 | // ------------------------
 55 | error_t zebra_deep_clone_table (
 56 |     anemone_mempool_t *pool
 57 |   , const zebra_table_t *table
 58 |   , zebra_table_t *into
 59 |   );
 60 | 
 61 | error_t zebra_deep_clone_column (
 62 |     anemone_mempool_t *pool
 63 |   , int64_t row_count
 64 |   , const zebra_column_t *in_column
 65 |   , zebra_column_t *out_column
 66 |   );
 67 | 
 68 | 
 69 | error_t zebra_deep_clone_attribute (
 70 |     anemone_mempool_t *pool
 71 |   , const zebra_attribute_t *attribute
 72 |   , zebra_attribute_t *into
 73 |   );
 74 | 
 75 | error_t zebra_deep_clone_entity (
 76 |     anemone_mempool_t *pool
 77 |   , const zebra_entity_t *entity
 78 |   , zebra_entity_t *into
 79 |   );
 80 | 
 81 | 
 82 | ANEMONE_STATIC
 83 | ANEMONE_INLINE
 84 | void *zebra_clone_array (
 85 |     anemone_mempool_t *pool
 86 |   , const void *in
 87 |   , int64_t num_elements
 88 |   , int64_t element_size
 89 |   )
 90 | {
 91 |     int64_t bytes = num_elements * element_size;
 92 |     void *out = anemone_mempool_alloc (pool, bytes);
 93 |     if (in) memcpy (out, in, bytes);
 94 |     return out;
 95 | }
 96 | 
 97 | #define ZEBRA_CLONE_ARRAY(pool, in, num_elements) zebra_clone_array (pool, in, num_elements, sizeof (*in) )
 98 | 
 99 | #endif//__ZEBRA_CLONE_H
100 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_debug.h:
--------------------------------------------------------------------------------
  1 | #ifndef __ZEBRA_DEBUG_H
  2 | #define __ZEBRA_DEBUG_H
  3 | 
  4 | #include "zebra_data.h"
  5 | #include <stdio.h>
  6 | 
  7 | #if __clang__
  8 |     #pragma clang diagnostic ignored "-Wno-unused-function"
  9 | #elif __GNUC__
 10 |     #pragma GCC diagnostic ignored "-Wno-unused-function"
 11 | #endif
 12 | 
 13 | ANEMONE_STATIC
 14 | void zebra_debug_print_indent (int64_t indent)
 15 | {
 16 |     for (int64_t i = 0; i != indent; ++i) printf(" ");
 17 | }
 18 | #define INDENTF zebra_debug_print_indent(indent); printf
 19 | 
 20 | 
 21 | ANEMONE_STATIC
 22 | void zebra_debug_print_table (int64_t indent, zebra_table_t *table) ;
 23 | 
 24 | ANEMONE_STATIC
 25 | void zebra_debug_print_column (int64_t indent, zebra_column_t *column, int64_t row_count)
 26 | {
 27 |     if (row_count < 0)
 28 |     {
 29 |         printf("Negative row_count of %lld! Error or too big to print\n", row_count);
 30 |         row_count = 0;
 31 |     }
 32 | 
 33 |     switch (column->type) {
 34 |         case ZEBRA_BYTE:
 35 |             INDENTF("BYTE:\n");
 36 |             INDENTF("");
 37 |             for (int64_t i = 0; i != row_count; ++i) {
 38 |                 unsigned char c = column->data.b[i];
 39 |                 if (c > 32 && c < 127)
 40 |                     printf("'%c' ", c);
 41 |                 else
 42 |                     printf("(%d) ", c);
 43 |             }
 44 |             printf("\n");
 45 |             return;
 46 | 
 47 |         case ZEBRA_INT:
 48 |             INDENTF("INT:\n");
 49 |             INDENTF("");
 50 |             for (int64_t i = 0; i != row_count; ++i) {
 51 |                 printf("%lld ", column->data.i[i]);
 52 |             }
 53 |             printf("\n");
 54 |             return;
 55 | 
 56 |         case ZEBRA_DOUBLE:
 57 |             INDENTF("DOUBLE:\n");
 58 |             INDENTF("");
 59 |             for (int64_t i = 0; i != row_count; ++i) {
 60 |                 printf("%f ", column->data.d[i]);
 61 |             }
 62 |             printf("\n");
 63 |             return;
 64 | 
 65 |         case ZEBRA_ARRAY:
 66 |             INDENTF("ARRAY:\n");
 67 |             INDENTF(" LENS:\n");
 68 |             INDENTF("  ");
 69 |             for (int64_t i = 0; i != row_count; ++i) {
 70 |                 printf("%lld ", column->data.a.n[i+1] - column->data.a.n[i]);
 71 |             }
 72 |             printf("\n");
 73 |             INDENTF(" SCAN:\n");
 74 |             INDENTF("  ");
 75 |             for (int64_t i = 0; i != row_count+1; ++i) {
 76 |                 if (column->data.a.n) printf("%lld ", column->data.a.n[i]);
 77 |             }
 78 | 
 79 |             printf("\n");
 80 |             INDENTF(" NESTED:\n");
 81 |             zebra_debug_print_table(indent + 2, &column->data.a.table);
 82 | 
 83 |             if (column->data.a.n) {
 84 |                 int64_t compute_count = column->data.a.n[row_count] - column->data.a.n[0];
 85 |                 if (compute_count != column->data.a.table.row_count) {
 86 |                     printf("!!!\n\nComputed row count was %lld but child table has row count %lld!\n", compute_count, column->data.a.table.row_count);
 87 |                     exit(1);
 88 |                 }
 89 |             }
 90 | 
 91 |             return;
 92 | 
 93 |         default:
 94 |             INDENTF("INVALID COLUMN\n");
 95 |             return;
 96 |     }
 97 | }
 98 | 
 99 | ANEMONE_STATIC
100 | void zebra_debug_print_table (int64_t indent, zebra_table_t *table)
101 | {
102 |     for (int64_t i = 0; i != table->column_count; ++i) {
103 |         INDENTF("Column %lld\n", i);
104 |         zebra_debug_print_column(indent + 1, table->columns + i, table->row_count);
105 |     }
106 | }
107 | 
108 | ANEMONE_STATIC
109 | void zebra_debug_print_block (zebra_block_t *block)
110 | {
111 |     printf("Block: %lld tables\n", block->table_count);
112 |     for (int64_t i = 0; i != block->table_count; ++i) {
113 |         if (block->tables[i].row_count == 0) continue;
114 |         printf(" Table %lld\n", i);
115 |         zebra_debug_print_table (2, block->tables + i);
116 |     }
117 | }
118 | 
119 | ANEMONE_STATIC
120 | void zebra_debug_print_entity (const zebra_entity_t *entity)
121 | {
122 |     printf("Entity: %lld attributes\n", entity->attribute_count);
123 |     for (int64_t i = 0; i != entity->attribute_count; ++i) {
124 |         if (entity->attributes[i].table.row_count == 0) continue;
125 |         printf(" Table %lld\n", i);
126 |         zebra_debug_print_table (2, &entity->attributes[i].table);
127 |     }
128 | }
129 | 
130 | #endif//__ZEBRA_DEBUG_H
131 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_grow.h:
--------------------------------------------------------------------------------
 1 | #ifndef __ZEBRA_GROW_H
 2 | #define __ZEBRA_GROW_H
 3 | 
 4 | #include "zebra_data.h"
 5 | 
 6 | error_t zebra_grow_column (
 7 |     anemone_mempool_t *pool
 8 |   , zebra_column_t *column
 9 |   , int64_t old_count
10 |   , int64_t new_capacity
11 |   );
12 | 
13 | error_t zebra_grow_table (
14 |     anemone_mempool_t *pool
15 |   , zebra_table_t *table
16 |   , int64_t grow_by
17 |   );
18 | 
19 | error_t zebra_grow_attribute (
20 |     anemone_mempool_t *pool
21 |   , zebra_attribute_t *attribute
22 |   , int64_t grow_by
23 |   );
24 | 
25 | //
26 | // Array capacity: compute array capacity for given count.
27 | // Gets next highest power of two after count, or a minimum of 4.
28 | // This was stolen from Icicle. Maybe it should go in Anemone.
29 | //
30 | ANEMONE_STATIC
31 | ANEMONE_INLINE
32 | int64_t zebra_grow_array_capacity(int64_t count)
33 | {
34 |     if (count < 4) return 4;
35 | 
36 |     int64_t bits = 64 - __builtin_clzll (count - 1);
37 |     int64_t next = 1L << bits;
38 | 
39 |     return next;
40 | }
41 | 
42 | ANEMONE_STATIC
43 | ANEMONE_INLINE
44 | void* zebra_grow_array (anemone_mempool_t *pool, void *old, size_t size, int64_t old_count, int64_t new_capacity)
45 | {
46 |     void *new = anemone_mempool_alloc (pool, new_capacity * size);
47 | 
48 |     //
49 |     // Allow grow_array to do the initial allocation when there is no previous
50 |     // array. This guard probably isn't necessary but calling memcpy with a
51 |     // null pointer is technically undefined.
52 |     //
53 |     if (old) {
54 |         memcpy (new, old, old_count * size);
55 |     }
56 | 
57 |     return new;
58 | }
59 | 
60 | #define ZEBRA_GROW_ARRAY(pool, in, oldcap, newcap) zebra_grow_array (pool, in, sizeof (in[0]), oldcap, newcap )
61 | 
62 | 
63 | #endif//__ZEBRA_GROW_H
64 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_hash.h:
--------------------------------------------------------------------------------
 1 | #ifndef __ZEBRA_HASH_H
 2 | #define __ZEBRA_HASH_H
 3 | 
 4 | #if CABAL
 5 | #include "anemone_base.h"
 6 | #include "anemone_hash.h"
 7 | #else
 8 | #include "../../lib/anemone/csrc/anemone_base.h"
 9 | #include "../../lib/anemone/csrc/anemone_hash.h"
10 | #endif
11 | 
12 | #define ZEBRA_HASH_SEED ((uint64_t) 0xf7a646480e5a3c0f)
13 | 
14 | ANEMONE_STATIC
15 | ANEMONE_INLINE
16 | uint32_t zebra_hash (const uint8_t *buf, size_t len) {
17 |     return anemone_fasthash32 (ZEBRA_HASH_SEED, buf, len);
18 | }
19 | 
20 | #endif//__ZEBRA_HASH_H
21 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_merge.c:
--------------------------------------------------------------------------------
  1 | #include "zebra_append.h"
  2 | #include "zebra_clone.h"
  3 | #include "zebra_data.h"
  4 | #include "zebra_grow.h"
  5 | #include "zebra_merge.h"
  6 | 
  7 | 
  8 | ANEMONE_STATIC
  9 | error_t zebra_merge_attributes (anemone_mempool_t *pool, zebra_attribute_t **ins, int64_t ins_count, zebra_attribute_t *out_into)
 10 | {
 11 |     error_t err;
 12 |     // XXX: would it be better to allocate this in zebra_merge_entities and zero out on every call?
 13 |     int64_t *in_ixs = calloc(ins_count, sizeof(int64_t));
 14 | 
 15 |     err = zebra_agile_clone_attribute (pool, ins[0], out_into);
 16 |     if (err) goto clean;
 17 | 
 18 |     // zebra_append_attribute can be expensive, since it has to traverse down to find the values.
 19 |     // we can get some gains by saving up calls to it, so that if we have inputs something like
 20 |     // > [ 1, 2, 3 ]
 21 |     // and
 22 |     // > [ 4, 5 ]
 23 |     // we don't have to call zebra_append_attribute separately for each of 1, 2 and 3.
 24 |     // instead we call it to copy all three elements at once.
 25 |     int64_t pending_copy_count = 0;
 26 |     int64_t pending_copy_from_ix = 0;
 27 | 
 28 |     while (1) {
 29 |         int64_t alive = 0;
 30 |         int64_t min_ix = 0;
 31 |         int64_t min_time = 0;
 32 |         int64_t min_fsid = 0;
 33 | 
 34 |         for (int64_t at_ix = 0; at_ix != ins_count; ++at_ix) {
 35 |             zebra_attribute_t *in = ins[at_ix];
 36 |             int64_t count = in->table.row_count;
 37 |             int64_t in_ix = in_ixs[at_ix];
 38 | 
 39 |             if (in_ix < count) {
 40 |                 int64_t in_time = in->times[in_ix];
 41 |                 int64_t in_fsid = in->factset_ids[in_ix];
 42 | 
 43 |                 bool64_t take_this = (alive == 0)
 44 |                     || (in_time < min_time)
 45 |                     || (in_time == min_time && in_fsid > min_fsid);
 46 |                 if (take_this) {
 47 |                     min_time = in_time;
 48 |                     min_fsid = in_fsid;
 49 |                     min_ix   = at_ix;
 50 |                 }
 51 | 
 52 |                 alive++;
 53 |             }
 54 |         }
 55 | 
 56 |         if (pending_copy_count > 0 && (pending_copy_from_ix != min_ix || alive == 0)) {
 57 |             err = zebra_append_attribute (pool, ins[pending_copy_from_ix], in_ixs[pending_copy_from_ix] - pending_copy_count, out_into, pending_copy_count);
 58 |             if (err) return err;
 59 | 
 60 |             pending_copy_count = 0;
 61 |         }
 62 | 
 63 |         if (alive == 0) break;
 64 | 
 65 |         pending_copy_from_ix = min_ix;
 66 |         pending_copy_count++;
 67 |         in_ixs[min_ix]++;
 68 | 
 69 |     }
 70 | 
 71 |     err = ZEBRA_SUCCESS;
 72 | clean:
 73 |     free (in_ixs);
 74 | 
 75 |     return err;
 76 | }
 77 | 
 78 | error_t zebra_merge_entities (anemone_mempool_t *pool, zebra_entity_t **ins, int64_t ins_count, zebra_entity_t *out_into)
 79 | {
 80 |     error_t err;
 81 | 
 82 |     if (ins_count == 0) return ZEBRA_MERGE_NO_ENTITIES;
 83 |     const zebra_entity_t *in1 = ins[0];
 84 | 
 85 |     out_into->hash            = in1->hash;
 86 |     out_into->id_length       = in1->id_length;
 87 |     out_into->id_bytes        = in1->id_bytes;
 88 |     out_into->attribute_count = in1->attribute_count;
 89 | 
 90 |     out_into->attributes = anemone_mempool_alloc (pool, sizeof (zebra_attribute_t) * out_into->attribute_count );
 91 | 
 92 |     zebra_attribute_t **in_ats = malloc(ins_count * sizeof(zebra_attribute_t*));
 93 | 
 94 |     for (int64_t attr_ix = 0; attr_ix != out_into->attribute_count; ++attr_ix) {
 95 |         // This might be a waste of time, but it simplifies and might give better locality.
 96 |         // Might give even better locality if we copied values rather than pointers?
 97 |         for (int64_t ent_ix = 0; ent_ix != ins_count; ++ent_ix)
 98 |             in_ats[ent_ix] = ins[ent_ix]->attributes + attr_ix;
 99 | 
100 |         err = zebra_merge_attributes (pool, in_ats, ins_count, out_into->attributes + attr_ix);
101 |         if (err) goto clean;
102 |     }
103 | 
104 |     err = ZEBRA_SUCCESS;
105 | clean:
106 |     free (in_ats);
107 | 
108 |     return err;
109 | }
110 | 
111 | error_t zebra_merge_entity_pair (
112 |     anemone_mempool_t *pool
113 |   , zebra_entity_t *in1
114 |   , zebra_entity_t *in2
115 |   , zebra_entity_t *out_into
116 |   )
117 | {
118 |     zebra_entity_t *ins[2] = {in1, in2};
119 |     return zebra_merge_entities (pool, ins, 2, out_into);
120 | }
121 | 
122 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_merge.h:
--------------------------------------------------------------------------------
 1 | #ifndef __ZEBRA_MERGE_H
 2 | #define __ZEBRA_MERGE_H
 3 | 
 4 | #include "zebra_data.h"
 5 | 
 6 | error_t zebra_merge_entities (
 7 |     anemone_mempool_t *pool
 8 |   , zebra_entity_t **ins
 9 |   , int64_t ins_count
10 |   , zebra_entity_t *out_into
11 |   );
12 | 
13 | // This is just for testing. Calls zebra_merge_entities with a pair as input.
14 | error_t zebra_merge_entity_pair (
15 |     anemone_mempool_t *pool
16 |   , zebra_entity_t *in1
17 |   , zebra_entity_t *in2
18 |   , zebra_entity_t *out_into
19 |   );
20 | 
21 | 
22 | #endif//__ZEBRA_MERGE_H
23 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_merge_many.h:
--------------------------------------------------------------------------------
 1 | #ifndef __ZEBRA_MERGE_MANY_H
 2 | #define __ZEBRA_MERGE_MANY_H
 3 | 
 4 | #include "zebra_data.h"
 5 | 
 6 | typedef struct zebra_merge_many {
 7 |     int64_t count;
 8 |     zebra_entity_t **entities;
 9 | } zebra_merge_many_t;
10 | 
11 | 
12 | error_t zebra_mm_init (
13 |     anemone_mempool_t *pool
14 |   , zebra_merge_many_t **out
15 |   );
16 | 
17 | error_t zebra_mm_push (
18 |     anemone_mempool_t *pool
19 |   , zebra_merge_many_t *merger
20 |   , int64_t entity_count
21 |   , zebra_entity_t **entities
22 |   );
23 | 
24 | error_t zebra_mm_pop (
25 |     anemone_mempool_t *pool
26 |   , zebra_merge_many_t *merger
27 |   , zebra_entity_t **out
28 |   );
29 | 
30 | error_t zebra_mm_clone (
31 |     anemone_mempool_t *pool
32 |   , zebra_merge_many_t **merger
33 |   );
34 | 
35 | 
36 | #endif//__ZEBRA_MERGE_MANY_H
37 | 
38 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_unpack.c:
--------------------------------------------------------------------------------
  1 | #include "zebra_unpack.h"
  2 | 
  3 | #if CABAL
  4 | #include "anemone_pack.h"
  5 | #else
  6 | #include "../../lib/anemone/csrc/anemone_pack.h"
  7 | #endif
  8 | 
  9 | ANEMONE_STATIC
 10 | ANEMONE_INLINE
 11 | int64_t zebra_unzigzag64(uint64_t n)
 12 | {
 13 |     return (n >> 1) ^ (-(n & 1));
 14 | }
 15 | 
 16 | ANEMONE_STATIC
 17 | ANEMONE_INLINE
 18 | uint64_t zebra_zigzag64(int64_t n)
 19 | {
 20 |     return (n << 1) ^ (n >> 63);
 21 | }
 22 | 
 23 | ANEMONE_STATIC
 24 | ANEMONE_INLINE
 25 | int64_t zebra_midpoint (
 26 |     int64_t *elems
 27 |   , int64_t n_elems )
 28 | {
 29 |     if (n_elems == 0) return 0;
 30 | 
 31 |     int64_t min = elems[0];
 32 |     int64_t max = elems[0];
 33 |     for (int64_t c = 1; c != n_elems; ++c) {
 34 |         int64_t elem = elems[c];
 35 |         min = elem < min ? elem : min;
 36 |         max = elem > max ? elem : max;
 37 |     }
 38 | 
 39 |     // Commutative, overflow proof integer average/midpoint:
 40 |     //   Source: http://stackoverflow.com/a/4844672
 41 |     return (min & max) + ((min ^ max) >> 1);
 42 | }
 43 | 
 44 | error_t zebra_unpack_array (
 45 |     uint8_t *buf
 46 |   , int64_t bufsize0
 47 |   , int64_t n_elems
 48 |   , int64_t offset
 49 |   , int64_t *fill_start
 50 |   )
 51 | {
 52 |     error_t error;
 53 | 
 54 |     const int64_t int_part_size = 64;
 55 | 
 56 |     int64_t n_parts   = n_elems / int_part_size;
 57 |     int64_t n_remains = n_elems % int_part_size;
 58 | 
 59 |     int64_t bufsize = bufsize0 - n_parts;
 60 |     if (bufsize < 0) return ZEBRA_UNPACK_BUFFER_TOO_SMALL;
 61 | 
 62 |     int64_t *fill  = fill_start;
 63 |     uint8_t *nbits = buf;
 64 |     uint8_t *parts = buf + n_parts;
 65 | 
 66 |     for (int64_t ix = 0; ix != n_parts; ++ix) {
 67 |         bufsize -= nbits[ix] * 8;
 68 |     }
 69 | 
 70 |     if (bufsize < 0) return ZEBRA_UNPACK_BUFFER_TOO_SMALL;
 71 | 
 72 |     for (int64_t ix = 0; ix != n_parts; ++ix) {
 73 |         uint8_t nbit = *nbits;
 74 | 
 75 |         error = anemone_unpack64_64 (1, nbit, parts, (uint64_t*)fill);
 76 |         if (error) return error;
 77 | 
 78 |         nbits += 1;
 79 |         // we have read 64 ints
 80 |         fill  += 64;
 81 |         // but it took how many bytes...
 82 |         parts += nbit * 8;
 83 |     }
 84 | 
 85 |     int64_t remains_size = n_remains * sizeof (int64_t);
 86 |     bufsize -= remains_size;
 87 |     if (bufsize < 0) return ZEBRA_UNPACK_BUFFER_TOO_SMALL;
 88 |     if (bufsize > 0) return ZEBRA_UNPACK_BUFFER_TOO_LARGE;
 89 | 
 90 |     memcpy(fill, parts, remains_size);
 91 | 
 92 |     for (int64_t ix = 0; ix != n_elems; ++ix) {
 93 |         fill_start[ix] = zebra_unzigzag64(fill_start[ix]) + offset;
 94 |     }
 95 | 
 96 |     return 0;
 97 | }
 98 | 
 99 | error_t zebra_pack_array (
100 |     uint8_t **buf_inout
101 |   , int64_t *elems
102 |   , int64_t n_elems
103 |   )
104 | {
105 |     error_t error;
106 | 
107 |     const int64_t int_part_size = 64;
108 | 
109 |     int64_t n_parts   = n_elems / int_part_size;
110 |     int64_t n_remains = n_elems % int_part_size;
111 | 
112 | 
113 |     int64_t offset = zebra_midpoint (elems, n_elems);
114 | 
115 |     uint8_t *buf_start = *buf_inout;
116 |     uint32_t *header_size = (uint32_t*)buf_start;
117 |     uint64_t *header_offset = (uint64_t*)(header_size + 1);
118 |     uint8_t *nbits_start = (uint8_t*)(header_offset + 1);
119 |     uint8_t *nbits = nbits_start;
120 |     uint8_t *parts = nbits + n_parts;
121 |     *header_offset = offset;
122 | 
123 |     uint64_t deltas[int_part_size];
124 | 
125 |     for (int64_t part_ix = 0; part_ix != n_parts; ++part_ix) {
126 | 
127 |         uint64_t max_delta = 0;
128 |         for (int64_t delta_ix = 0; delta_ix != int_part_size; ++delta_ix) {
129 |             uint64_t delta = zebra_zigzag64 (elems[delta_ix] - offset);
130 |             deltas[delta_ix] = delta;
131 |             max_delta = delta > max_delta ? delta : max_delta;
132 |         }
133 | 
134 |         uint64_t bitsof = max_delta ? 64 - __builtin_clzll (max_delta) : 0;
135 |         uint8_t  nbit = (uint8_t)bitsof;
136 | 
137 |         *nbits = nbit;
138 | 
139 |         error = anemone_pack64_64 (1, nbit, deltas, parts);
140 |         if (error) return error;
141 | 
142 |         nbits += 1;
143 |         // we have read 64 ints
144 |         elems += 64;
145 |         // but it took how many bytes...
146 |         parts += nbit * 8;
147 |     }
148 | 
149 |     int64_t *remains = (int64_t*)parts;
150 |     for (int64_t ix = 0; ix != n_remains; ++ix) {
151 |         *remains = zebra_zigzag64 (*elems - offset);
152 |         ++elems;
153 |         ++remains;
154 |     }
155 | 
156 |     uint8_t* buf_end = (uint8_t*)remains;
157 |     *header_size = buf_end - nbits_start;
158 | 
159 |     *buf_inout = buf_end;
160 | 
161 |     return 0;
162 | }
163 | 
164 | 


--------------------------------------------------------------------------------
/zebra-core/csrc/zebra_unpack.h:
--------------------------------------------------------------------------------
 1 | #ifndef __ZEBRA_UNPACK_H
 2 | #define __ZEBRA_UNPACK_H
 3 | 
 4 | #include "zebra_data.h"
 5 | 
 6 | error_t zebra_unpack_array (
 7 |     uint8_t *bytes
 8 |   , int64_t bufsize
 9 |   , int64_t elems
10 |   , int64_t offset
11 |   , int64_t *fill
12 |   );
13 | 
14 | error_t zebra_pack_array (
15 |     uint8_t **buf_inout
16 |   , int64_t *elems
17 |   , int64_t n_elems
18 |   );
19 | 
20 | #endif//__ZEBRA_UNPACK_H
21 | 
22 | 


--------------------------------------------------------------------------------
/zebra-core/mafia:
--------------------------------------------------------------------------------
1 | ../framework/mafia


--------------------------------------------------------------------------------
/zebra-core/master.toml:
--------------------------------------------------------------------------------
 1 | [master]
 2 |   runner = "s3://ambiata-dispensary-v2/dist/master/master-haskell/linux/x86_64/20170613033625-fddfe03/master-haskell-20170613033625-fddfe03"
 3 |   version = 1
 4 |   sha1 = "0a9e91a8c3373a402c5f2c4fc8d9aabb210c03b3"
 5 | 
 6 | [build.dist-7-10]
 7 |   GHC_VERSION = "7.10.2"
 8 |   CABAL_VERSION = "1.24.0.0"
 9 |   CACHE = "true"
10 |   HADDOCK = "true"
11 |   HADDOCK_S3 = "$AMBIATA_HADDOCK_MASTER"
12 |   HADDOCK_URL = "$AMBIATA_HADDOCK_MASTER_URL"
13 | 
14 | [build.dist-8-0]
15 |   GHC_VERSION = "8.0.1"
16 |   CABAL_VERSION = "1.24.0.0"
17 |   CACHE = "true"
18 | 
19 | [build.branches-7-10]
20 |   GHC_VERSION = "7.10.2"
21 |   CABAL_VERSION = "1.24.0.0"
22 |   CACHE = "true"
23 |   HADDOCK = "true"
24 |   HADDOCK_S3 = "$AMBIATA_HADDOCK_BRANCHES"
25 |   HADDOCK_URL = "$AMBIATA_HADDOCK_BRANCHES_URL"
26 | 
27 | [build.branches-8-0]
28 |   GHC_VERSION = "8.0.1"
29 |   CABAL_VERSION = "1.24.0.0"
30 |   CACHE = "true"
31 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Factset/Block.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE NoImplicitPrelude #-}
2 | module Zebra.Factset.Block (
3 |     module X
4 |   ) where
5 | 
6 | import           Zebra.Factset.Block.Block as X
7 | import           Zebra.Factset.Block.Entity as X
8 | import           Zebra.Factset.Block.Index as X
9 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Factset/Block/Entity.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveGeneric #-}
  2 | {-# LANGUAGE LambdaCase #-}
  3 | {-# LANGUAGE MultiParamTypeClasses #-}
  4 | {-# LANGUAGE NoImplicitPrelude #-}
  5 | {-# LANGUAGE TemplateHaskell #-}
  6 | {-# LANGUAGE TypeFamilies #-}
  7 | {-# OPTIONS_GHC -funbox-strict-fields #-}
  8 | module Zebra.Factset.Block.Entity (
  9 |     BlockEntity(..)
 10 |   , BlockAttribute(..)
 11 |   , entitiesOfFacts
 12 |   ) where
 13 | 
 14 | import           Data.Vector.Unboxed.Deriving (derivingUnbox)
 15 | 
 16 | import           GHC.Generics (Generic)
 17 | 
 18 | import           P
 19 | 
 20 | import qualified X.Data.Vector as Boxed
 21 | import qualified X.Data.Vector.Unboxed as Unboxed
 22 | 
 23 | import           Zebra.Factset.Data
 24 | import           Zebra.Factset.Fact
 25 | 
 26 | 
 27 | data BlockAttribute =
 28 |   BlockAttribute {
 29 |       attributeId :: !AttributeId
 30 |     , attributeRows :: !Int64
 31 |     } deriving (Eq, Ord, Show, Generic)
 32 | 
 33 | -- This deriving Unbox needs to appear before using it in BlockEntity below
 34 | derivingUnbox "BlockAttribute"
 35 |   [t| BlockAttribute -> (AttributeId, Int64) |]
 36 |   [| \(BlockAttribute x y) -> (x, y) |]
 37 |   [| \(x, y) -> BlockAttribute x y |]
 38 | 
 39 | data BlockEntity =
 40 |   BlockEntity {
 41 |       entityHash :: !EntityHash
 42 |     , entityId :: !EntityId
 43 |     , entityAttributes :: !(Unboxed.Vector BlockAttribute)
 44 |     } deriving (Eq, Ord, Show, Generic)
 45 | 
 46 | data EntityAcc =
 47 |   EntityAcc !EntityHash !EntityId !AttributeAcc !(Boxed.Vector BlockEntity)
 48 | 
 49 | data AttributeAcc =
 50 |   AttributeAcc !AttributeId !Int64 !(Unboxed.Vector BlockAttribute)
 51 | 
 52 | -- | Convert facts to hierarchical BlockEntity representation.
 53 | --
 54 | --   The input fact vector must be sorted by:
 55 | --
 56 | --     1. BlockEntity Hash
 57 | --     2. BlockEntity Id
 58 | --     3. BlockAttribute
 59 | --     4. Time
 60 | --     5. FactsetId
 61 | --
 62 | entitiesOfFacts :: Boxed.Vector Fact -> Boxed.Vector BlockEntity
 63 | entitiesOfFacts =
 64 |   let
 65 |     loop macc fact =
 66 |       case macc of
 67 |         Nothing' ->
 68 |           Just' $! mkEntityAcc fact
 69 |         Just' acc ->
 70 |           Just' $! appendEntity acc fact
 71 |   in
 72 |     maybe' Boxed.empty takeEntities . Boxed.foldl' loop Nothing'
 73 | 
 74 | mkAttributeAcc :: AttributeId -> AttributeAcc
 75 | mkAttributeAcc aid =
 76 |   AttributeAcc aid 1 Unboxed.empty
 77 | 
 78 | mkEntityAcc :: Fact -> EntityAcc
 79 | mkEntityAcc (Fact ehash eid aid _ _ _) =
 80 |   EntityAcc ehash eid (mkAttributeAcc aid) Boxed.empty
 81 | 
 82 | takeEntities :: EntityAcc -> Boxed.Vector BlockEntity
 83 | takeEntities (EntityAcc ehash eid attrs ents) =
 84 |   ents `Boxed.snoc` BlockEntity ehash eid (takeAttributes attrs)
 85 | 
 86 | takeAttributes :: AttributeAcc -> Unboxed.Vector BlockAttribute
 87 | takeAttributes (AttributeAcc aid nrecs attrs) =
 88 |   attrs `Unboxed.snoc` BlockAttribute aid nrecs
 89 | 
 90 | appendEntity :: EntityAcc -> Fact -> EntityAcc
 91 | appendEntity acc0@(EntityAcc ehash0 eid0 attrs0 ents0) (Fact ehash1 eid1 aid1 _ _ _) =
 92 |   if ehash0 == ehash1 && eid0 == eid1 then
 93 |     EntityAcc ehash0 eid0 (appendAttribute attrs0 aid1) ents0
 94 |   else
 95 |     EntityAcc ehash1 eid1 (mkAttributeAcc aid1) (takeEntities acc0)
 96 | 
 97 | appendAttribute :: AttributeAcc -> AttributeId -> AttributeAcc
 98 | appendAttribute acc0@(AttributeAcc aid0 recs0 attrs0) aid1 =
 99 |   if aid0 == aid1 then
100 |     AttributeAcc aid0 (recs0 + 1) attrs0
101 |   else
102 |     AttributeAcc aid1 1 (takeAttributes acc0)
103 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Factset/Block/Index.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveGeneric #-}
 2 | {-# LANGUAGE LambdaCase #-}
 3 | {-# LANGUAGE MultiParamTypeClasses #-}
 4 | {-# LANGUAGE NoImplicitPrelude #-}
 5 | {-# LANGUAGE TemplateHaskell #-}
 6 | {-# LANGUAGE TypeFamilies #-}
 7 | {-# OPTIONS_GHC -funbox-strict-fields #-}
 8 | module Zebra.Factset.Block.Index (
 9 |     BlockIndex(..)
10 |   , Tombstone(..)
11 |   , indicesOfFacts
12 |   ) where
13 | 
14 | import           Data.Vector.Unboxed.Deriving (derivingUnbox)
15 | 
16 | import           GHC.Generics (Generic)
17 | 
18 | import           P
19 | 
20 | import qualified X.Data.Vector as Boxed
21 | import qualified X.Data.Vector.Unboxed as Unboxed
22 | 
23 | import           Zebra.Factset.Data
24 | import           Zebra.Factset.Fact
25 | 
26 | 
27 | -- FIXME Might be good if this were using 3x Storable.Vector instead of a
28 | -- FIXME single Unboxed.Vector, as it would make translation to C smoother.
29 | data BlockIndex =
30 |   BlockIndex {
31 |       indexTime :: !Time
32 |     , indexFactsetId :: !FactsetId
33 |     , indexTombstone :: !Tombstone
34 |     } deriving (Eq, Ord, Show, Generic)
35 | 
36 | indicesOfFacts :: Boxed.Vector Fact -> Unboxed.Vector BlockIndex
37 | indicesOfFacts =
38 |   let
39 |     fromFact fact =
40 |       BlockIndex
41 |         (factTime fact)
42 |         (factFactsetId fact)
43 |         (maybe' Tombstone (const NotTombstone) $ factValue fact)
44 |   in
45 |     Unboxed.convert . fmap fromFact
46 | 
47 | derivingUnbox "BlockIndex"
48 |   [t| BlockIndex -> (Time, FactsetId, Tombstone) |]
49 |   [| \(BlockIndex x y z) -> (x, y, z) |]
50 |   [| \(x, y, z) -> BlockIndex x y z |]
51 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Factset/Entity.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveFoldable #-}
 2 | {-# LANGUAGE DeriveFunctor #-}
 3 | {-# LANGUAGE DeriveGeneric #-}
 4 | {-# LANGUAGE DeriveTraversable #-}
 5 | {-# LANGUAGE NoImplicitPrelude #-}
 6 | module Zebra.Factset.Entity where
 7 | 
 8 | import qualified Data.Vector as Boxed
 9 | import qualified Data.Vector.Storable as Storable
10 | 
11 | import           GHC.Generics (Generic)
12 | 
13 | import           P
14 | 
15 | import           X.Text.Show (gshowsPrec)
16 | 
17 | import           Zebra.Factset.Data
18 | import qualified Zebra.Table.Striped as Striped
19 | 
20 | 
21 | data Attribute =
22 |   Attribute {
23 |       attributeTime :: !(Storable.Vector Time)
24 |     , attributeFactsetId :: !(Storable.Vector FactsetId)
25 |     , attributeTombstone :: !(Storable.Vector Tombstone)
26 |     , attributeTable :: !Striped.Table
27 |     } deriving (Eq, Ord, Generic)
28 | 
29 | data Entity =
30 |   Entity {
31 |       entityHash :: !EntityHash
32 |     , entityId :: !EntityId
33 |     , entityAttributes :: !(Boxed.Vector Attribute)
34 |     } deriving (Eq, Ord, Generic)
35 | 
36 | instance Show Attribute where
37 |   showsPrec =
38 |     gshowsPrec
39 | 
40 | instance Show Entity where
41 |   showsPrec =
42 |     gshowsPrec
43 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Factset/Fact.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveGeneric #-}
  2 | {-# LANGUAGE LambdaCase #-}
  3 | {-# LANGUAGE NoImplicitPrelude #-}
  4 | {-# LANGUAGE OverloadedStrings #-}
  5 | {-# OPTIONS_GHC -funbox-strict-fields #-}
  6 | module Zebra.Factset.Fact (
  7 |     Fact(..)
  8 |   , toValueTable
  9 |   , render
 10 | 
 11 |   , FactConversionError(..)
 12 |   , renderFactConversionError
 13 | 
 14 |   , FactRenderError(..)
 15 |   , renderFactRenderError
 16 |   ) where
 17 | 
 18 | import           Data.ByteString (ByteString)
 19 | import qualified Data.ByteString.Char8 as Char8
 20 | import qualified Data.Text as Text
 21 | import           Data.Thyme.Format (formatTime)
 22 | import qualified Data.Vector as Boxed
 23 | 
 24 | import           GHC.Generics (Generic)
 25 | 
 26 | import           P hiding (some)
 27 | 
 28 | import           System.Locale (defaultTimeLocale)
 29 | 
 30 | import           Text.Printf (printf)
 31 | import           Text.Show.Pretty (ppShow)
 32 | 
 33 | import           Zebra.Factset.Data
 34 | import           Zebra.Serial.Json
 35 | import           Zebra.Table.Data
 36 | import qualified Zebra.Table.Logical as Logical
 37 | import qualified Zebra.Table.Schema as Schema
 38 | import           Zebra.Table.Striped (StripedError)
 39 | import qualified Zebra.Table.Striped as Striped
 40 | 
 41 | 
 42 | data Fact =
 43 |   Fact {
 44 |       factEntityHash :: !EntityHash
 45 |     , factEntityId :: !EntityId
 46 |     , factAttributeId :: !AttributeId
 47 |     , factTime :: !Time
 48 |     , factFactsetId :: !FactsetId
 49 |     , factValue :: !(Maybe' Logical.Value)
 50 |     } deriving (Eq, Ord, Show, Generic)
 51 | 
 52 | data FactConversionError =
 53 |     FactStripedError !StripedError
 54 |   | FactConversionSchemaError !FactSchemaError
 55 |     deriving (Eq, Show)
 56 | 
 57 | data FactRenderError =
 58 |     FactJsonEncodeError !JsonLogicalEncodeError
 59 |   | FactSchemaNotFoundForAttribute !AttributeId
 60 |   | FactRenderSchemaError !FactSchemaError
 61 |     deriving (Eq, Show)
 62 | 
 63 | data FactSchemaError =
 64 |     FactExpectedArrayTable !Schema.Table
 65 |     deriving (Eq, Show)
 66 | 
 67 | renderFactConversionError :: FactConversionError -> Text
 68 | renderFactConversionError = \case
 69 |   FactStripedError err ->
 70 |     Striped.renderStripedError err
 71 |   FactConversionSchemaError err ->
 72 |     renderFactSchemaError err
 73 | 
 74 | renderFactRenderError :: FactRenderError -> Text
 75 | renderFactRenderError = \case
 76 |   FactJsonEncodeError err ->
 77 |     renderJsonLogicalEncodeError err
 78 |   FactSchemaNotFoundForAttribute (AttributeId aid) ->
 79 |     "Could not render fact, no schema found for attribute-id: " <> Text.pack (show aid)
 80 |   FactRenderSchemaError err ->
 81 |     renderFactSchemaError err
 82 | 
 83 | renderFactSchemaError :: FactSchemaError -> Text
 84 | renderFactSchemaError = \case
 85 |   FactExpectedArrayTable schema ->
 86 |     "Fact tables must be arrays, found: " <> Text.pack (ppShow schema)
 87 | 
 88 | toValueTable :: Boxed.Vector Schema.Column -> Boxed.Vector Fact -> Either FactConversionError (Boxed.Vector Striped.Table)
 89 | toValueTable schemas facts =
 90 |   flip Boxed.imapM schemas $ \ix schema -> do
 91 |     let
 92 |       defaultValue =
 93 |         Logical.defaultValue schema
 94 | 
 95 |       matchId fact =
 96 |         AttributeId (fromIntegral ix) == factAttributeId fact
 97 | 
 98 |       values =
 99 |         Boxed.map (fromMaybe' defaultValue . factValue) $
100 |         Boxed.filter matchId facts
101 | 
102 |     first FactStripedError . Striped.fromLogical (Schema.Array DenyDefault schema) $ Logical.Array values
103 | 
104 | render :: Boxed.Vector Schema.Column -> Fact -> Either FactRenderError ByteString
105 | render schemas fact = do
106 |   let
107 |     aid =
108 |       factAttributeId fact
109 | 
110 |     ix =
111 |       fromIntegral $ unAttributeId aid
112 | 
113 |   cschema <- maybeToRight (FactSchemaNotFoundForAttribute aid) (schemas Boxed.!? ix)
114 |   rvalue <- renderMaybeValue cschema $ factValue fact
115 | 
116 |   pure $ Char8.intercalate "|" [
117 |       renderEntityHash $ factEntityHash fact
118 |     , unEntityId $ factEntityId fact
119 |     , renderAttributeId $ factAttributeId fact
120 |     , renderTime $ factTime fact
121 |     , renderFactsetId $ factFactsetId fact
122 |     , rvalue
123 |     ]
124 | 
125 | renderEntityHash :: EntityHash -> ByteString
126 | renderEntityHash (EntityHash hash) =
127 |   Char8.pack $ printf "0x%08X" hash
128 | 
129 | renderAttributeId :: AttributeId -> ByteString
130 | renderAttributeId (AttributeId aid) =
131 |   Char8.pack $ printf "attribute=%05d" aid
132 | 
133 | renderTime :: Time -> ByteString
134 | renderTime =
135 |   Char8.pack . formatTime defaultTimeLocale "%0Y-%m-%d %H:%M:%S" . toUTCTime
136 | 
137 | renderFactsetId :: FactsetId -> ByteString
138 | renderFactsetId (FactsetId factsetId) =
139 |   Char8.pack $ printf "factset=%08x" factsetId
140 | 
141 | renderMaybeValue :: Schema.Column -> Maybe' Logical.Value -> Either FactRenderError ByteString
142 | renderMaybeValue schema =
143 |   maybe' (pure "NA") (first FactJsonEncodeError . encodeLogicalValue schema)
144 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Foreign/Entity.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE GeneralizedNewtypeDeriving #-}
  2 | {-# LANGUAGE LambdaCase #-}
  3 | {-# LANGUAGE NoImplicitPrelude #-}
  4 | {-# LANGUAGE ScopedTypeVariables #-}
  5 | module Zebra.Foreign.Entity (
  6 |     CEntity(..)
  7 |   , entityOfForeign
  8 |   , foreignOfEntity
  9 | 
 10 |   , peekEntity
 11 |   , pokeEntity
 12 |   , peekEntityHash
 13 |   , peekEntityId
 14 |   , peekAttribute
 15 |   , pokeAttribute
 16 |   ) where
 17 | 
 18 | import           Anemone.Foreign.Mempool (Mempool, alloc, calloc)
 19 | 
 20 | import           Control.Monad.IO.Class (MonadIO(..))
 21 | import           Control.Monad.Trans.Either (EitherT)
 22 | 
 23 | import qualified Data.ByteString as B
 24 | import qualified Data.Vector as Boxed
 25 | 
 26 | import           Foreign.Ptr (Ptr)
 27 | import           Foreign.Storable (Storable(..))
 28 | 
 29 | import           P
 30 | 
 31 | 
 32 | import           Zebra.Factset.Data
 33 | import           Zebra.Factset.Entity
 34 | import           Zebra.Foreign.Bindings
 35 | import           Zebra.Foreign.Table
 36 | import           Zebra.Foreign.Util
 37 | import qualified Zebra.Table.Striped as Striped
 38 | 
 39 | 
 40 | newtype CEntity =
 41 |   CEntity {
 42 |       unCEntity :: Ptr C'zebra_entity
 43 |     }
 44 |   deriving Storable
 45 | 
 46 | entityOfForeign :: MonadIO m => CEntity -> EitherT ForeignError m Entity
 47 | entityOfForeign (CEntity c_entity) =
 48 |   peekEntity c_entity
 49 | 
 50 | foreignOfEntity :: MonadIO m => Mempool -> Entity -> m CEntity
 51 | foreignOfEntity pool entity = do
 52 |   c_entity <- liftIO $ alloc pool
 53 |   pokeEntity pool c_entity entity
 54 |   pure $ CEntity c_entity
 55 | 
 56 | peekEntity :: MonadIO m => Ptr C'zebra_entity -> EitherT ForeignError m Entity
 57 | peekEntity c_entity = do
 58 |   hash <- peekEntityHash c_entity
 59 |   eid <- peekEntityId c_entity
 60 | 
 61 |   n_attrs <- fromIntegral <$> peekIO (p'zebra_entity'attribute_count c_entity)
 62 |   c_attributes <- peekIO (p'zebra_entity'attributes c_entity)
 63 | 
 64 |   fmap (Entity hash eid) $ peekMany c_attributes n_attrs peekAttribute
 65 | 
 66 | pokeEntity :: MonadIO m => Mempool -> Ptr C'zebra_entity -> Entity -> m ()
 67 | pokeEntity pool c_entity (Entity hash eid attributes) = do
 68 |   let
 69 |     eid_len =
 70 |       B.length $ unEntityId eid
 71 | 
 72 |   pokeIO (p'zebra_entity'hash c_entity) $ unEntityHash hash
 73 |   pokeIO (p'zebra_entity'id_length c_entity) $ fromIntegral eid_len
 74 |   pokeByteString pool (p'zebra_entity'id_bytes c_entity) $ unEntityId eid
 75 | 
 76 |   let
 77 |     n_attrs =
 78 |       Boxed.length attributes
 79 | 
 80 |   c_attributes <- liftIO . calloc pool $ fromIntegral n_attrs
 81 | 
 82 |   pokeIO (p'zebra_entity'attribute_count c_entity) $ fromIntegral n_attrs
 83 |   pokeIO (p'zebra_entity'attributes c_entity) c_attributes
 84 |   pokeMany c_attributes attributes $ pokeAttribute pool
 85 | 
 86 | 
 87 | peekEntityHash :: MonadIO m => Ptr C'zebra_entity -> EitherT ForeignError m EntityHash
 88 | peekEntityHash c_entity = do
 89 |   EntityHash <$> peekIO (p'zebra_entity'hash c_entity)
 90 | 
 91 | peekEntityId :: MonadIO m => Ptr C'zebra_entity -> EitherT ForeignError m EntityId
 92 | peekEntityId c_entity = do
 93 |   eid_len <- fromIntegral <$> peekIO (p'zebra_entity'id_length c_entity)
 94 |   EntityId <$> peekByteString eid_len (p'zebra_entity'id_bytes c_entity)
 95 | 
 96 | 
 97 | 
 98 | 
 99 | peekAttribute :: MonadIO m => Ptr C'zebra_attribute -> EitherT ForeignError m Attribute
100 | peekAttribute c_attribute = do
101 |   table <- peekTable (p'zebra_attribute'table c_attribute)
102 |   let n_rows = Striped.length table
103 |   times <- fmap timesOfForeign . peekVector n_rows $ p'zebra_attribute'times c_attribute
104 |   factset_ids <- fmap factsetIdsOfForeign . peekVector n_rows $ p'zebra_attribute'factset_ids c_attribute
105 |   tombstones <- fmap tombstonesOfForeign . peekVector n_rows $ p'zebra_attribute'tombstones c_attribute
106 | 
107 |   pure $ Attribute times factset_ids tombstones table
108 | 
109 | pokeAttribute :: MonadIO m => Mempool -> Ptr C'zebra_attribute -> Attribute -> m ()
110 | pokeAttribute pool c_attribute (Attribute times factset_ids tombstones table) = do
111 |   pokeVector pool (p'zebra_attribute'times c_attribute) $ foreignOfTimes times
112 |   pokeVector pool (p'zebra_attribute'factset_ids c_attribute) $ foreignOfFactsetIds factset_ids
113 |   pokeVector pool (p'zebra_attribute'tombstones c_attribute) $ foreignOfTombstones tombstones
114 |   pokeTable pool (p'zebra_attribute'table c_attribute) table
115 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Foreign/Merge.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 2 | {-# LANGUAGE LambdaCase #-}
 3 | {-# LANGUAGE NoImplicitPrelude #-}
 4 | {-# LANGUAGE ScopedTypeVariables #-}
 5 | module Zebra.Foreign.Merge (
 6 |     CMergeMany(..)
 7 |   , mergeEntityPair
 8 |   , mergeManyInit
 9 |   , mergeManyPush
10 |   , mergeManyPop
11 |   , mergeManyClone
12 |   ) where
13 | 
14 | import           Anemone.Foreign.Mempool (Mempool)
15 | import qualified Anemone.Foreign.Mempool as Mempool
16 | 
17 | import           Control.Monad.IO.Class (MonadIO(..))
18 | import           Control.Monad.Trans.Either (EitherT)
19 | 
20 | import           Data.Coerce (coerce)
21 | import qualified Data.Vector.Storable as Storable
22 | 
23 | import           Foreign.Ptr (Ptr, nullPtr)
24 | import           Foreign.Storable (Storable(..))
25 | import           Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
26 | 
27 | import           P
28 | 
29 | import           Zebra.Foreign.Bindings
30 | import           Zebra.Foreign.Entity
31 | import           Zebra.Foreign.Util
32 | 
33 | 
34 | mergeEntityPair :: MonadIO m => Mempool -> CEntity -> CEntity -> EitherT ForeignError m CEntity
35 | mergeEntityPair pool (CEntity c_entity1) (CEntity c_entity2) = do
36 |   merge_into <- liftIO $ Mempool.alloc pool
37 |   liftCError $ unsafe'c'zebra_merge_entity_pair pool c_entity1 c_entity2 merge_into
38 |   return $ CEntity merge_into
39 | 
40 | 
41 | 
42 | newtype CMergeMany =
43 |   CMergeMany {
44 |       unCMergeMany :: Ptr C'zebra_merge_many
45 |     }
46 |   deriving Storable
47 | 
48 | 
49 | mergeManyInit :: MonadIO m => Mempool -> EitherT ForeignError m CMergeMany
50 | mergeManyInit pool = allocStack $ \pmerge -> do
51 |   liftCError $ unsafe'c'zebra_mm_init pool pmerge
52 |   CMergeMany <$> liftIO (peek pmerge)
53 | 
54 | mergeManyPush :: MonadIO m => Mempool -> CMergeMany -> Storable.Vector CEntity -> EitherT ForeignError m ()
55 | mergeManyPush pool (CMergeMany merger) entities = do
56 |   let (ptr, len) = Storable.unsafeToForeignPtr0 entities
57 |   let ptr' :: ForeignPtr (Ptr C'zebra_entity) = coerce ptr
58 |   let len' :: Int64 = fromIntegral $ len
59 |   liftCError $ withForeignPtr ptr' $ unsafe'c'zebra_mm_push pool merger len'
60 | 
61 | mergeManyPop :: MonadIO m => Mempool -> CMergeMany -> EitherT ForeignError m (Maybe CEntity)
62 | mergeManyPop pool (CMergeMany merger) = allocStack $ \pentity -> do
63 |   liftCError $ unsafe'c'zebra_mm_pop pool merger pentity
64 |   entity <- liftIO $ peek pentity
65 |   if entity == nullPtr
66 |     then return Nothing
67 |     else return $ Just $ CEntity entity
68 | 
69 | mergeManyClone :: MonadIO m => Mempool -> CMergeMany -> EitherT ForeignError m CMergeMany
70 | mergeManyClone pool (CMergeMany merger) = allocStack $ \pmerge -> do
71 |   liftIO $ poke pmerge merger
72 |   liftCError $ unsafe'c'zebra_mm_clone pool pmerge
73 |   CMergeMany <$> liftIO (peek pmerge)
74 | 
75 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Foreign/Serial.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | module Zebra.Foreign.Serial (
 3 |     unpackArray
 4 |   , packArray
 5 |   ) where
 6 | 
 7 | import           Data.ByteString (ByteString)
 8 | import qualified Data.ByteString as ByteString
 9 | import qualified Data.ByteString.Unsafe as ByteString
10 | import qualified Data.Vector.Storable.Mutable as Mutable
11 | import           Data.Word (Word8)
12 | 
13 | import           Foreign.Marshal.Alloc (alloca)
14 | import           Foreign.Storable (Storable(..))
15 | import           Foreign.Ptr (Ptr)
16 | 
17 | import           P
18 | 
19 | import qualified Prelude as Savage
20 | 
21 | import           System.IO (IO)
22 | import           System.IO.Unsafe (unsafePerformIO)
23 | 
24 | import qualified X.Data.Vector.Storable as Storable
25 | 
26 | import           Zebra.Foreign.Bindings
27 | import           Zebra.Foreign.Util
28 | 
29 | 
30 | unpackArray :: ByteString -> Int -> Int -> Either ForeignError (Storable.Vector Int64)
31 | unpackArray bytes elems offset =
32 |   unsafePerformIO $ do
33 |     fill <- Mutable.new elems
34 |     err <- ByteString.unsafeUseAsCString bytes $ \buf ->
35 |            Mutable.unsafeWith fill $ \fill' ->
36 |            unsafe'c'zebra_unpack_array buf (i64 $ ByteString.length bytes) (i64 elems) (i64 offset) fill'
37 | 
38 |     case fromCError err of
39 |      Left err' -> return $ Left err'
40 |      Right ()  -> Right <$> Storable.unsafeFreeze fill
41 | 
42 | i64 :: Int -> Int64
43 | i64 = fromIntegral
44 | 
45 | -- | Pack an array into a buffer. Assumes there is enough space for the whole output.
46 | -- This type signature is designed to be used with Builder
47 | packArray :: Storable.Vector Int64 -> Ptr Word8 -> IO (Ptr Word8)
48 | packArray elems buf
49 |  = alloca $ \bufp -> do
50 |     poke bufp buf
51 |     let len = i64 $ Storable.length elems
52 |     -- XXX: throwing away the error
53 |     err <- Storable.unsafeWith elems $ \elemsp -> unsafe'c'zebra_pack_array bufp elemsp len
54 |     case fromCError err of
55 |      Left err' -> Savage.error ("Zebra.Foreign.Serial.packArray: C code returned error " <> show err')
56 |      Right ()  -> peek bufp
57 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Merge/Block.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveGeneric #-}
 2 | {-# LANGUAGE LambdaCase #-}
 3 | {-# LANGUAGE MultiParamTypeClasses #-}
 4 | {-# LANGUAGE NoImplicitPrelude #-}
 5 | {-# LANGUAGE OverloadedStrings #-}
 6 | {-# LANGUAGE PatternGuards #-}
 7 | {-# LANGUAGE ScopedTypeVariables #-}
 8 | {-# LANGUAGE TupleSections #-}
 9 | {-# LANGUAGE TypeFamilies #-}
10 | module Zebra.Merge.Block
11 |   ( mergeFiles
12 |   ) where
13 | 
14 | import Zebra.Factset.Block
15 | import Zebra.Merge.Base
16 | import Zebra.Merge.Entity
17 | 
18 | import qualified X.Data.Vector as Boxed
19 | import qualified X.Data.Vector.Stream as Stream
20 | 
21 | import P
22 | 
23 | -- | Merge a whole bunch of files together.
24 | -- All files must have the same header.
25 | --
26 | -- For fusion to work properly, the type parameter 'c' should not be a stream,
27 | -- so that the f_map argument is the stream producer which can be inlined.
28 | -- We need to be very careful that streams are not stored inside vectors,
29 | -- because then they would be stored as a thunk and good codegen / fusion is impossible.
30 | mergeFiles :: Monad m
31 |   => (c -> Stream.Stream m Block)
32 |   -> Boxed.Vector c
33 |   -> Stream.Stream m (Either MergeError EntityMerged)
34 | mergeFiles f_map fs
35 |  = Stream.map entityMergedOfEntityValues
36 |  $ treeFold mergeEntityValues Stream.empty getEntities
37 |  $ Boxed.indexed fs
38 |  where
39 |   getEntities (ix,blocks)
40 |    = Stream.concatMap (entityValuesOfBlock $ BlockDataId ix) (f_map blocks)
41 | 
42 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Merge/BlockC.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveGeneric #-}
  2 | {-# LANGUAGE LambdaCase #-}
  3 | {-# LANGUAGE MultiParamTypeClasses #-}
  4 | {-# LANGUAGE NoImplicitPrelude #-}
  5 | {-# LANGUAGE OverloadedStrings #-}
  6 | {-# LANGUAGE PatternGuards #-}
  7 | {-# LANGUAGE PatternSynonyms #-}
  8 | {-# LANGUAGE ScopedTypeVariables #-}
  9 | {-# LANGUAGE TupleSections #-}
 10 | {-# LANGUAGE TypeFamilies #-}
 11 | module Zebra.Merge.BlockC
 12 |   ( mergeBlocks
 13 |   , MergeOptions(..)
 14 |   , MergeError(..)
 15 |   ) where
 16 | 
 17 | import qualified Anemone.Foreign.Mempool as Mempool
 18 | import           Anemone.Foreign.Mempool (Mempool)
 19 | 
 20 | import           Control.Monad.IO.Class (MonadIO(..))
 21 | import           Control.Monad.Trans.Class (lift)
 22 | import           Control.Monad.Trans.Either (EitherT, left)
 23 | 
 24 | import qualified Data.Map.Strict as Map
 25 | 
 26 | import           P
 27 | 
 28 | import qualified X.Data.Vector as Boxed
 29 | 
 30 | import           Zebra.Factset.Block
 31 | import           Zebra.Factset.Data
 32 | import           Zebra.Foreign.Block
 33 | import           Zebra.Foreign.Entity
 34 | import           Zebra.Foreign.Merge
 35 | import           Zebra.Foreign.Util
 36 | 
 37 | 
 38 | data MergeError =
 39 |     MergeInputEntitiesOutOfOrder (EntityHash,EntityId) (EntityHash,EntityId)
 40 |   | MergeForeign ForeignError
 41 |   deriving (Eq, Show)
 42 | 
 43 | data MergeOptions c m =
 44 |   MergeOptions
 45 |   { optionPullBlock  :: c -> m (Maybe Block)
 46 |   , optionPushEntity :: CEntity -> m ()
 47 |   , optionGCAfterBytes :: !Int64
 48 |   }
 49 | 
 50 | data MergeState c =
 51 |   MergeState
 52 |   { stateEntityRefills :: !(Map.Map EntityId [c])
 53 |   , stateMempool       :: !Mempool
 54 |   , stateMergeMany     :: !CMergeMany
 55 |   -- Last seen entity id, to check
 56 |   , stateLastEntityId  :: !(Maybe (EntityHash, EntityId))
 57 |   }
 58 | 
 59 | -- | Merge a whole bunch of files together.
 60 | -- All files must have the same header.
 61 | mergeBlocks :: MonadIO m
 62 |   => MergeOptions c m
 63 |   -> Boxed.Vector c
 64 |   -> EitherT MergeError m ()
 65 | 
 66 | mergeBlocks options files = do
 67 |   pool <- liftIO Mempool.create
 68 |   merger <- foreignT $ mergeManyInit pool
 69 |   let state0 = MergeState Map.empty pool merger Nothing
 70 |   -- FIXME bracket/catch and clean up last memory pool on error
 71 |   -- need to convert state into an IORef for this
 72 |   state <- foldM fill state0 files
 73 |   state' <- go state
 74 |   liftIO $ Mempool.free $ stateMempool state'
 75 |   return ()
 76 |  where
 77 | 
 78 |   go state0 = do
 79 |     state <- gcCheck state0
 80 |     pop <- foreignT $ mergeManyPop (stateMempool state) (stateMergeMany state)
 81 |     case pop of
 82 |       Nothing ->
 83 |         return state
 84 |       Just centity -> do
 85 |         lift $ optionPushEntity options centity
 86 |         eid <- centityId centity
 87 |         ehash <- centityHash centity
 88 | 
 89 |         case stateLastEntityId state of
 90 |          Nothing -> return ()
 91 |          Just last
 92 |           -> when ((ehash,eid) <= last) $
 93 |               left $ MergeInputEntitiesOutOfOrder last (ehash,eid)
 94 | 
 95 |         state' <- refill state eid
 96 |         go state' { stateLastEntityId = Just (ehash,eid) }
 97 | 
 98 |   refill state eid =
 99 |     case Map.lookup eid $ stateEntityRefills state of
100 |       Nothing -> return state
101 |       Just refills -> do
102 |         let state' = state { stateEntityRefills = Map.delete eid $ stateEntityRefills state }
103 |         foldM fill state' refills
104 | 
105 |   fill state fileId = do
106 |     pblock <- lift $ optionPullBlock options fileId
107 |     case pblock of
108 |       Nothing ->
109 |         return state
110 |       Just block -> do
111 |         cblock <- lift $ foreignOfBlock (stateMempool state) block
112 |         entities <- foreignT $ foreignEntitiesOfBlock (stateMempool state) cblock
113 |         foreignT $ mergeManyPush (stateMempool state) (stateMergeMany state) (Boxed.convert entities)
114 | 
115 |         -- FIXME do this better
116 |         case Boxed.uncons $ Boxed.reverse entities of
117 |           Nothing ->
118 |             return state
119 |           Just (centity,_) -> do
120 |             eid <- centityId centity
121 |             let refills  = Map.insertWith (<>) eid [fileId]
122 |                          $ stateEntityRefills state
123 |             let state'   = state
124 |                          { stateEntityRefills = refills }
125 |             gcCheck state'
126 | 
127 | 
128 |   gcCheck state = do
129 |     used <- liftIO $ Mempool.totalAllocSize $ stateMempool state
130 |     if used > optionGCAfterBytes options then gcRun state else return state
131 | 
132 |   gcRun state = do
133 |     pool' <- liftIO Mempool.create
134 |     merger' <- foreignT $ mergeManyClone pool' $ stateMergeMany state
135 |     liftIO $ Mempool.free $ stateMempool state
136 |     return state { stateMempool = pool', stateMergeMany = merger' }
137 | 
138 |   centityId = foreignT . peekEntityId . unCEntity
139 |   centityHash = foreignT . peekEntityHash . unCEntity
140 | 
141 |   foreignT = firstT MergeForeign
142 | 
143 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Merge/Puller/File.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveGeneric #-}
 2 | {-# LANGUAGE LambdaCase #-}
 3 | {-# LANGUAGE MultiParamTypeClasses #-}
 4 | {-# LANGUAGE NoImplicitPrelude #-}
 5 | {-# LANGUAGE OverloadedStrings #-}
 6 | {-# LANGUAGE PatternGuards #-}
 7 | {-# LANGUAGE PatternSynonyms #-}
 8 | {-# LANGUAGE ScopedTypeVariables #-}
 9 | {-# LANGUAGE TupleSections #-}
10 | {-# LANGUAGE TypeFamilies #-}
11 | module Zebra.Merge.Puller.File
12 |   ( blockChainPuller
13 |   , pullerOfStream
14 |   , PullerError (..)
15 |   , PullId (..)
16 |   ) where
17 | 
18 | import           Control.Monad.IO.Class (MonadIO(..))
19 | import           Control.Monad.Trans.Either (EitherT, left)
20 | import           Control.Monad.Trans.Class (lift)
21 | import           Control.Monad.Trans.Resource (MonadResource(..))
22 | 
23 | import qualified Data.IORef as IORef
24 | 
25 | import           P
26 | 
27 | import           System.IO (FilePath)
28 | 
29 | import qualified X.Data.Vector as Boxed
30 | import qualified X.Data.Vector.Stream as Stream
31 | 
32 | import           Zebra.Factset.Block
33 | import           Zebra.Foreign.Util
34 | import           Zebra.Serial.Binary.File
35 | 
36 | 
37 | data PullerError =
38 |    PullerForeign ForeignError
39 |  | PullerDifferentHeaders FilePath FilePath
40 |  | PullerDecode FileError
41 |    deriving (Eq, Show)
42 | 
43 | newtype PullId =
44 |    PullId { unPullId :: Int }
45 |    deriving Show
46 | 
47 | 
48 | blockChainPuller :: MonadResource m
49 |   => Boxed.Vector FilePath
50 |   -> EitherT PullerError m (PullId -> EitherT FileError m (Maybe Block), Boxed.Vector PullId)
51 | blockChainPuller files
52 |  | Just (file0, _) <- Boxed.uncons files
53 |  = do
54 |     (header0,_) <- firstT PullerDecode $ readBlocks file0
55 |     pullers <- mapM (makePuller file0 header0) files
56 |     let puller pid = pullers Boxed.! unPullId pid
57 |     let pullids = Boxed.map (PullId . fst) $ Boxed.indexed files
58 |     return (puller, pullids)
59 | 
60 |  -- No input files, don't even bother
61 |  | otherwise
62 |  = return (\_id -> return Nothing, Boxed.empty)
63 | 
64 |  where
65 |   makePuller file0 header0 fileN = do
66 |     blocks <- readBlocksCheckHeader file0 header0 fileN
67 |     lift $ pullerOfStream blocks
68 | 
69 |   readBlocksCheckHeader file0 header0 fileN = do
70 |     (h,bs) <- firstT PullerDecode $ readBlocks fileN
71 |     -- It is tempting to put the header in entirety
72 |     -- but I think it will be too big to print with Show so worry about it later
73 |     when (h /= header0) $
74 |       left $ PullerDifferentHeaders file0 fileN
75 |     return bs
76 | 
77 | 
78 | 
79 | pullerOfStream :: MonadIO m => Stream.Stream (EitherT e m) b -> m (EitherT e m (Maybe b))
80 | pullerOfStream (Stream.Stream loop state0) = do
81 |   stateRef <- liftIO $ IORef.newIORef state0
82 |   return $ go stateRef
83 |  where
84 |   go stateRef = do
85 |     state <- liftIO $ IORef.readIORef stateRef
86 |     step  <- loop state
87 |     case step of
88 |       Stream.Yield v state' -> do
89 |         liftIO $ IORef.writeIORef stateRef state'
90 |         return (Just v)
91 |       Stream.Skip state' -> do
92 |         liftIO $ IORef.writeIORef stateRef state'
93 |         go stateRef
94 |       Stream.Done -> do
95 |         return Nothing
96 | 
97 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Merge/Puller/List.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | {-# LANGUAGE FlexibleContexts #-}
 4 | {-# LANGUAGE PatternGuards #-}
 5 | {-# LANGUAGE ScopedTypeVariables #-}
 6 | module Zebra.Merge.Puller.List (
 7 |     mergeLists
 8 |   ) where
 9 | 
10 | import           Control.Monad.IO.Class (MonadIO(..))
11 | import           Control.Monad.Trans.Either (EitherT)
12 | 
13 | import qualified Data.IORef as Ref
14 | 
15 | import           P
16 | 
17 | import           System.IO (IO)
18 | 
19 | import qualified X.Data.Vector as Boxed
20 | 
21 | import           Zebra.Factset.Block
22 | import           Zebra.Factset.Entity
23 | import           Zebra.Foreign.Entity
24 | import           Zebra.Merge.BlockC
25 | import           Zebra.X.Either
26 | 
27 | 
28 | mergeLists :: Int64 -> [[Block]] -> EitherT MergeError IO [Entity]
29 | mergeLists gcEvery blocks0 = do
30 |   blockRef <- liftIO $ Ref.newIORef blocks0
31 |   entityRef <- liftIO $ Ref.newIORef []
32 |   let pull ix = do
33 |       blocks <- liftIO $ Ref.readIORef blockRef
34 |       case headIx blocks ix of
35 |         Nothing -> return Nothing
36 |         Just (b,blocks') -> do
37 |           liftIO $ Ref.writeIORef blockRef blocks'
38 |           return (Just b)
39 |   let push e = do
40 |       e' <- entityOfForeign e
41 |       entities <- liftIO $ Ref.readIORef entityRef
42 |       liftIO $ Ref.writeIORef entityRef (e' : entities)
43 |   let opts = MergeOptions pull push gcEvery
44 | 
45 |   let ixes = Boxed.enumFromN (0 :: Int) (length blocks0)
46 | 
47 |   secondJoin MergeForeign $ mergeBlocks opts ixes
48 |   reverse <$> liftIO (Ref.readIORef entityRef)
49 | 
50 |  where
51 | 
52 |   headIx ((b:as):bs) 0 = Just (b, as:bs)
53 |   headIx _           0 = Nothing
54 |   headIx (as:bs) n
55 |    | Just (b,bs') <- headIx bs (n-1)
56 |    = Just (b, as:bs')
57 |   headIx _ _
58 |    = Nothing
59 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Serial/Binary.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | module Zebra.Serial.Binary (
 3 |     module X
 4 |   ) where
 5 | 
 6 | import           Zebra.Serial.Binary.Data as X (Header(..), BinaryVersion(..))
 7 | import           Zebra.Serial.Binary.Data as X (schemaOfHeader, headerOfSchema)
 8 | import           Zebra.Serial.Binary.Logical as X
 9 | import           Zebra.Serial.Binary.Striped as X
10 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Serial/Binary/Data.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE LambdaCase #-}
 2 | {-# LANGUAGE NoImplicitPrelude #-}
 3 | {-# LANGUAGE OverloadedStrings #-}
 4 | module Zebra.Serial.Binary.Data (
 5 |     Header(..)
 6 |   , BinaryVersion(..)
 7 | 
 8 |   , headerOfSchema
 9 |   , schemaOfHeader
10 | 
11 |   , headerOfAttributes
12 |   , attributesOfHeader
13 | 
14 |   , BinaryEncodeError(..)
15 |   , renderBinaryEncodeError
16 | 
17 |   , BinaryDecodeError(..)
18 |   , renderBinaryDecodeError
19 |   ) where
20 | 
21 | import           Data.Map (Map)
22 | 
23 | import           P
24 | 
25 | import           Zebra.Factset.Data
26 | import           Zebra.Factset.Table
27 | import           Zebra.Table.Encoding (Utf8Error)
28 | import qualified Zebra.Table.Encoding as Encoding
29 | import qualified Zebra.Table.Schema as Schema
30 | 
31 | 
32 | data Header =
33 |     HeaderV2 !(Map AttributeName Schema.Column)
34 |   | HeaderV3 !Schema.Table
35 |     deriving (Eq, Ord, Show)
36 | 
37 | data BinaryVersion =
38 | --  BinaryV0 -- x Initial version.
39 | --  BinaryV1 -- x Store factset-id instead of priority, this flips sort order.
40 |     BinaryV2 -- ^ Schema is stored in header, instead of encoding.
41 |   | BinaryV3 -- ^ Data is stored as tables instead of entity blocks.
42 |     deriving (Eq, Ord, Show)
43 | 
44 | data BinaryEncodeError =
45 |     BinaryEncodeUtf8 !Utf8Error
46 |   | BinaryEncodeBlockTableError !BlockTableError
47 |     deriving (Eq, Show)
48 | 
49 | data BinaryDecodeError =
50 |     BinaryDecodeUtf8 !Utf8Error
51 |     deriving (Eq, Show)
52 | 
53 | renderBinaryEncodeError :: BinaryEncodeError -> Text
54 | renderBinaryEncodeError = \case
55 |   BinaryEncodeUtf8 err ->
56 |     "Failed encoding UTF-8 binary: " <>
57 |     Encoding.renderUtf8Error err
58 |   BinaryEncodeBlockTableError err ->
59 |     renderBlockTableError err
60 | 
61 | renderBinaryDecodeError :: BinaryDecodeError -> Text
62 | renderBinaryDecodeError = \case
63 |   BinaryDecodeUtf8 err ->
64 |     "Failed decoding UTF-8 binary: " <>
65 |     Encoding.renderUtf8Error err
66 | 
67 | headerOfSchema :: BinaryVersion -> Schema.Table -> Either BlockTableError Header
68 | headerOfSchema version schema =
69 |   case version of
70 |     BinaryV2 ->
71 |       HeaderV2 <$> attributesOfTableSchema schema
72 |     BinaryV3 ->
73 |       pure $ HeaderV3 schema
74 | 
75 | schemaOfHeader :: Header -> Schema.Table
76 | schemaOfHeader = \case
77 |   HeaderV2 attributes ->
78 |     tableSchemaOfAttributes attributes
79 |   HeaderV3 table ->
80 |     table
81 | 
82 | headerOfAttributes :: BinaryVersion -> Map AttributeName Schema.Column -> Header
83 | headerOfAttributes version attributes =
84 |   case version of
85 |     BinaryV2 ->
86 |       HeaderV2 attributes
87 |     BinaryV3 ->
88 |       HeaderV3 (tableSchemaOfAttributes attributes)
89 | 
90 | attributesOfHeader :: Header -> Either BlockTableError (Map AttributeName Schema.Column)
91 | attributesOfHeader = \case
92 |   HeaderV2 attributes ->
93 |     pure attributes
94 |   HeaderV3 table ->
95 |     attributesOfTableSchema table
96 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Serial/Binary/Logical.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE LambdaCase #-}
  2 | {-# LANGUAGE NoImplicitPrelude #-}
  3 | {-# LANGUAGE OverloadedStrings #-}
  4 | {-# LANGUAGE TupleSections #-}
  5 | module Zebra.Serial.Binary.Logical (
  6 |     encodeLogical
  7 |   , encodeLogicalWith
  8 |   , decodeLogical
  9 | 
 10 |   , BinaryLogicalEncodeError(..)
 11 |   , renderBinaryLogicalEncodeError
 12 | 
 13 |   , BinaryLogicalDecodeError(..)
 14 |   , renderBinaryLogicalDecodeError
 15 |   ) where
 16 | 
 17 | import           Control.Monad.Trans.Either (EitherT, left, hoistEither)
 18 | import           Control.Monad.Morph (hoist)
 19 | import           Control.Monad.Trans.Class (lift)
 20 | 
 21 | import           P
 22 | 
 23 | import           Viking (ByteStream, Stream, Of(..))
 24 | import qualified Viking.Stream as Stream
 25 | 
 26 | 
 27 | import           Zebra.Serial.Binary.Data
 28 | import           Zebra.Serial.Binary.Striped
 29 | import qualified Zebra.Table.Logical as Logical
 30 | import qualified Zebra.Table.Schema as Schema
 31 | import           Zebra.Table.Striped (StripedError)
 32 | import qualified Zebra.Table.Striped as Striped
 33 | import           Zebra.X.Either
 34 | 
 35 | 
 36 | data BinaryLogicalEncodeError =
 37 |     BinaryLogicalEncodeStripedError !StripedError
 38 |   | BinaryLogicalStripedEncodeError !BinaryStripedEncodeError
 39 |     deriving (Eq, Show)
 40 | 
 41 | data BinaryLogicalDecodeError =
 42 |     BinaryLogicalDecodeEmpty
 43 |   | BinaryLogicalDecodeStripedError !StripedError
 44 |   | BinaryLogicalStripedDecodeError !BinaryStripedDecodeError
 45 |     deriving (Eq, Show)
 46 | 
 47 | renderBinaryLogicalEncodeError :: BinaryLogicalEncodeError -> Text
 48 | renderBinaryLogicalEncodeError = \case
 49 |   BinaryLogicalEncodeStripedError err ->
 50 |     Striped.renderStripedError err
 51 |   BinaryLogicalStripedEncodeError err ->
 52 |     renderBinaryStripedEncodeError err
 53 | 
 54 | renderBinaryLogicalDecodeError :: BinaryLogicalDecodeError -> Text
 55 | renderBinaryLogicalDecodeError = \case
 56 |   BinaryLogicalDecodeEmpty ->
 57 |     "Cannot resolve schema of an empty zebra file"
 58 |   BinaryLogicalDecodeStripedError err ->
 59 |     Striped.renderStripedError err
 60 |   BinaryLogicalStripedDecodeError err ->
 61 |     renderBinaryStripedDecodeError err
 62 | 
 63 | encodeLogical ::
 64 |      Monad m
 65 |   => Schema.Table
 66 |   -> Stream (Of Logical.Table) m r
 67 |   -> ByteStream (EitherT BinaryLogicalEncodeError m) r
 68 | encodeLogical =
 69 |   encodeLogicalWith BinaryV3
 70 | {-# INLINABLE encodeLogical #-}
 71 | 
 72 | encodeLogicalWith ::
 73 |      Monad m
 74 |   => BinaryVersion
 75 |   -> Schema.Table
 76 |   -> Stream (Of Logical.Table) m r
 77 |   -> ByteStream (EitherT BinaryLogicalEncodeError m) r
 78 | encodeLogicalWith version schema input =
 79 |   hoist (firstJoin BinaryLogicalStripedEncodeError) .
 80 |   encodeStripedWith version $
 81 |   Stream.mapM
 82 |     (hoistEither . first BinaryLogicalEncodeStripedError . Striped.fromLogical schema)
 83 |     (hoist lift input)
 84 | {-# INLINABLE encodeLogicalWith #-}
 85 | 
 86 | decodeLogical ::
 87 |      Monad m
 88 |   => ByteStream m r
 89 |   -> EitherT BinaryLogicalDecodeError m
 90 |        (Schema.Table, Stream (Of Logical.Table) (EitherT BinaryLogicalDecodeError m) r)
 91 | decodeLogical bss0 = do
 92 |   e <- firstT BinaryLogicalStripedDecodeError . Stream.next $ decodeStriped bss0
 93 |   case e of
 94 |     Left _r ->
 95 |       left $ BinaryLogicalDecodeEmpty
 96 | 
 97 |     Right (hd, tl) ->
 98 |       pure . (Striped.schema hd,) $
 99 |         Stream.mapM (hoistEither . first BinaryLogicalDecodeStripedError . Striped.toLogical) $
100 |         hoist (firstT BinaryLogicalStripedDecodeError) (Stream.cons hd tl)
101 | {-# INLINABLE decodeLogical #-}
102 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Serial/Binary/Striped.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE LambdaCase #-}
  2 | {-# LANGUAGE NoImplicitPrelude #-}
  3 | {-# LANGUAGE OverloadedStrings #-}
  4 | module Zebra.Serial.Binary.Striped (
  5 |     encodeStriped
  6 |   , encodeStripedWith
  7 |   , decodeStriped
  8 | 
  9 |   , BinaryStripedEncodeError(..)
 10 |   , renderBinaryStripedEncodeError
 11 | 
 12 |   , BinaryStripedDecodeError(..)
 13 |   , renderBinaryStripedDecodeError
 14 |   ) where
 15 | 
 16 | import           Control.Monad.Trans.Either (EitherT, left, hoistEither)
 17 | import           Control.Monad.Morph (hoist)
 18 | import           Control.Monad.Trans.Class (lift)
 19 | 
 20 | import           P
 21 | 
 22 | import           Viking (ByteStream, Stream, Of(..))
 23 | import qualified Viking.ByteStream as ByteStream
 24 | import qualified Viking.Stream as Stream
 25 | import           Viking.Stream.Binary (BinaryError, renderBinaryError)
 26 | import qualified Viking.Stream.Binary as Stream
 27 | 
 28 | 
 29 | import           Zebra.Factset.Table
 30 | import           Zebra.Serial.Binary.Block
 31 | import           Zebra.Serial.Binary.Data
 32 | import           Zebra.Serial.Binary.Header
 33 | import qualified Zebra.Table.Striped as Striped
 34 | 
 35 | 
 36 | data BinaryStripedEncodeError =
 37 |     BinaryStripedEncodeEmpty
 38 |   | BinaryStripedEncodeBlockTableError !BlockTableError
 39 |   | BinaryStripedEncodeError !BinaryEncodeError
 40 |     deriving (Eq, Show)
 41 | 
 42 | data BinaryStripedDecodeError =
 43 |     BinaryStripedDecodeHeaderError !BinaryError
 44 |   | BinaryStripedDecodeBlockError !BinaryError
 45 |     deriving (Eq, Show)
 46 | 
 47 | renderBinaryStripedEncodeError :: BinaryStripedEncodeError -> Text
 48 | renderBinaryStripedEncodeError = \case
 49 |   BinaryStripedEncodeEmpty ->
 50 |     "Cannot encode a zebra file with no schema"
 51 |   BinaryStripedEncodeBlockTableError err ->
 52 |     renderBlockTableError err
 53 |   BinaryStripedEncodeError err ->
 54 |     renderBinaryEncodeError err
 55 | 
 56 | renderBinaryStripedDecodeError :: BinaryStripedDecodeError -> Text
 57 | renderBinaryStripedDecodeError = \case
 58 |   BinaryStripedDecodeHeaderError err ->
 59 |     "Error decoding header: " <> renderBinaryError err
 60 |   BinaryStripedDecodeBlockError err ->
 61 |     "Error decoding block: " <> renderBinaryError err
 62 | 
 63 | encodeStriped ::
 64 |      Monad m
 65 |   => Stream (Of Striped.Table) m r
 66 |   -> ByteStream (EitherT BinaryStripedEncodeError m) r
 67 | encodeStriped =
 68 |   encodeStripedWith BinaryV3
 69 | {-# INLINABLE encodeStriped #-}
 70 | 
 71 | encodeStripedWith ::
 72 |      Monad m
 73 |   => BinaryVersion
 74 |   -> Stream (Of Striped.Table) m r
 75 |   -> ByteStream (EitherT BinaryStripedEncodeError m) r
 76 | encodeStripedWith version input = do
 77 |   e <- lift . lift $ Stream.next input
 78 |   case e of
 79 |     Left _r ->
 80 |       lift $ left BinaryStripedEncodeEmpty
 81 | 
 82 |     Right (hd, tl) -> do
 83 |       header <- lift . hoistEither . first BinaryStripedEncodeBlockTableError $
 84 |         headerOfSchema version (Striped.schema hd)
 85 | 
 86 |       ByteStream.fromBuilders . Stream.cons (bHeader header) .
 87 |         Stream.mapM (hoistEither . first BinaryStripedEncodeError . bBlockTable header) $
 88 |         hoist lift (Stream.cons hd tl)
 89 | {-# INLINABLE encodeStripedWith #-}
 90 | 
 91 | decodeStriped ::
 92 |      Monad m
 93 |   => ByteStream m r
 94 |   -> Stream (Of Striped.Table) (EitherT BinaryStripedDecodeError m) r
 95 | decodeStriped bss0 = do
 96 |   (header, bss1) <- lift . firstT BinaryStripedDecodeHeaderError $
 97 |     Stream.runGet getHeader bss0
 98 | 
 99 |   hoist (firstT BinaryStripedDecodeBlockError) $
100 |     Stream.runGetSome (getBlockTable header) bss1
101 | {-# INLINABLE decodeStriped #-}
102 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Serial/Json.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | module Zebra.Serial.Json (
 3 |     module X
 4 |   ) where
 5 | 
 6 | import           Zebra.Serial.Json.Util as X
 7 | import           Zebra.Serial.Json.Schema as X
 8 | import           Zebra.Serial.Json.Striped as X
 9 | import           Zebra.Serial.Json.Logical as X
10 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Serial/Json/Striped.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE LambdaCase #-}
 2 | {-# LANGUAGE NoImplicitPrelude #-}
 3 | {-# LANGUAGE OverloadedStrings #-}
 4 | module Zebra.Serial.Json.Striped (
 5 |     encodeStriped
 6 |   , decodeStriped
 7 | 
 8 |   , JsonStripedEncodeError(..)
 9 |   , renderJsonStripedEncodeError
10 | 
11 |   , JsonStripedDecodeError(..)
12 |   , renderJsonStripedDecodeError
13 |   ) where
14 | 
15 | import           Data.ByteString (ByteString)
16 | 
17 | import           P
18 | 
19 | import           Zebra.Serial.Json.Logical
20 | import qualified Zebra.Table.Schema as Schema
21 | import           Zebra.Table.Striped (StripedError)
22 | import qualified Zebra.Table.Striped as Striped
23 | 
24 | data JsonStripedEncodeError =
25 |     JsonStripedEncodeError !StripedError
26 |   | JsonStripedLogicalEncodeError !JsonLogicalEncodeError
27 |     deriving (Eq, Show)
28 | 
29 | data JsonStripedDecodeError =
30 |     JsonStripedDecodeError !StripedError
31 |   | JsonStripedLogicalDecodeError !JsonLogicalDecodeError
32 |     deriving (Eq, Show)
33 | 
34 | renderJsonStripedEncodeError :: JsonStripedEncodeError -> Text
35 | renderJsonStripedEncodeError = \case
36 |   JsonStripedEncodeError err ->
37 |     Striped.renderStripedError err
38 |   JsonStripedLogicalEncodeError err ->
39 |     renderJsonLogicalEncodeError err
40 | 
41 | renderJsonStripedDecodeError :: JsonStripedDecodeError -> Text
42 | renderJsonStripedDecodeError = \case
43 |   JsonStripedDecodeError err ->
44 |     Striped.renderStripedError err
45 |   JsonStripedLogicalDecodeError err ->
46 |     renderJsonLogicalDecodeError err
47 | 
48 | encodeStriped :: Striped.Table -> Either JsonStripedEncodeError ByteString
49 | encodeStriped striped = do
50 |   logical <- first JsonStripedEncodeError $ Striped.toLogical striped
51 |   first JsonStripedLogicalEncodeError $ encodeLogical (Striped.schema striped) logical
52 | {-# INLINABLE encodeStriped #-}
53 | 
54 | decodeStriped :: Schema.Table -> ByteString -> Either JsonStripedDecodeError Striped.Table
55 | decodeStriped schema bs = do
56 |   logical <- first JsonStripedLogicalDecodeError $ decodeLogical schema bs
57 |   first JsonStripedDecodeError $ Striped.fromLogical schema logical
58 | {-# INLINABLE decodeStriped #-}
59 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Serial/Text.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE NoImplicitPrelude #-}
2 | module Zebra.Serial.Text (
3 |     module X
4 |   ) where
5 | 
6 | import           Zebra.Serial.Text.Schema as X
7 | import           Zebra.Serial.Text.Striped as X
8 | import           Zebra.Serial.Text.Logical as X
9 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Serial/Text/Schema.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE LambdaCase #-}
 2 | {-# LANGUAGE NoImplicitPrelude #-}
 3 | {-# LANGUAGE OverloadedStrings #-}
 4 | module Zebra.Serial.Text.Schema (
 5 |     TextVersion(..)
 6 |   , encodeSchema
 7 |   , encodeSchemaWith
 8 |   , decodeSchema
 9 | 
10 |   , TextSchemaDecodeError(..)
11 |   , renderTextSchemaDecodeError
12 |   ) where
13 | 
14 | import qualified Data.Aeson as Aeson
15 | import qualified Data.Aeson.Types as Aeson
16 | import           Data.ByteString (ByteString)
17 | import qualified Data.Text as Text
18 | 
19 | import           P
20 | 
21 | import           Zebra.Serial.Json.Schema (SchemaVersion(..), pTableSchemaV1, ppTableSchema)
22 | import           Zebra.Serial.Json.Util
23 | import           Zebra.Table.Data
24 | import qualified Zebra.Table.Schema as Schema
25 | 
26 | 
27 | data TextVersion =
28 |     TextV0
29 |     deriving (Eq, Show, Enum, Bounded)
30 | 
31 | data TextSchemaDecodeError =
32 |     TextSchemaDecodeError !JsonDecodeError
33 |     deriving (Eq, Show)
34 | 
35 | renderTextSchemaDecodeError :: TextSchemaDecodeError -> Text
36 | renderTextSchemaDecodeError = \case
37 |   TextSchemaDecodeError err ->
38 |     renderJsonDecodeError err
39 | 
40 | encodeSchema :: Schema.Table -> ByteString
41 | encodeSchema =
42 |   encodeSchemaWith TextV0
43 | 
44 | encodeSchemaWith :: TextVersion -> Schema.Table -> ByteString
45 | encodeSchemaWith version schema =
46 |   encodeJsonIndented ["version", "key", "name"] (ppVersionedSchema version schema) <> "\n"
47 | 
48 | decodeSchema :: ByteString -> Either TextSchemaDecodeError Schema.Table
49 | decodeSchema =
50 |   first TextSchemaDecodeError . decodeJson pVersionedSchema
51 | 
52 | pVersionedSchema :: Aeson.Value -> Aeson.Parser Schema.Table
53 | pVersionedSchema =
54 |   Aeson.withObject "object containing versioned schema" $ \o -> do
55 |     version <- withStructField "version" o pVersion
56 |     case version of
57 |       TextV0 ->
58 |         withStructField "schema" o pTableSchemaV1
59 | 
60 | ppVersionedSchema :: TextVersion -> Schema.Table -> Aeson.Value
61 | ppVersionedSchema version schema =
62 |   ppStruct [
63 |       Field "version" $
64 |         ppVersion version
65 |     , Field "schema" $
66 |         ppTableSchema (schemaVersion version) schema
67 |     ]
68 | 
69 | schemaVersion :: TextVersion -> SchemaVersion
70 | schemaVersion = \case
71 |   TextV0 ->
72 |     SchemaV1
73 | 
74 | pVersion :: Aeson.Value -> Aeson.Parser TextVersion
75 | pVersion =
76 |   Aeson.withText "string containing version number" $ \case
77 |     "v0" ->
78 |       pure TextV0
79 |     v ->
80 |       fail $ "unknown/unsupported version: " <> Text.unpack v
81 | 
82 | ppVersion :: TextVersion -> Aeson.Value
83 | ppVersion = \case
84 |   TextV0 ->
85 |     Aeson.String "v0"
86 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Serial/Text/Striped.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE LambdaCase #-}
 2 | {-# LANGUAGE NoImplicitPrelude #-}
 3 | {-# LANGUAGE OverloadedStrings #-}
 4 | module Zebra.Serial.Text.Striped (
 5 |     encodeStriped
 6 |   , encodeStripedBlock
 7 | 
 8 |   , decodeStriped
 9 |   , decodeStripedBlock
10 | 
11 |   , TextStripedEncodeError(..)
12 |   , renderTextStripedEncodeError
13 | 
14 |   , TextStripedDecodeError(..)
15 |   , renderTextStripedDecodeError
16 |   ) where
17 | 
18 | import           Control.Monad.Trans.Either (EitherT, hoistEither)
19 | import           Control.Monad.Morph (hoist)
20 | import           Control.Monad.Trans.Class (lift)
21 | 
22 | import           Data.ByteString (ByteString)
23 | 
24 | import           P
25 | 
26 | import           Viking (ByteStream, Stream, Of(..))
27 | import qualified Viking.Stream as Stream
28 | 
29 | import           Zebra.Serial.Text.Logical
30 | import qualified Zebra.Table.Schema as Schema
31 | import           Zebra.Table.Striped (StripedError)
32 | import qualified Zebra.Table.Striped as Striped
33 | import           Zebra.X.Either
34 | 
35 | 
36 | data TextStripedEncodeError =
37 |     TextStripedEncodeError !StripedError
38 |   | TextStripedLogicalEncodeError !TextLogicalEncodeError
39 |     deriving (Eq, Show)
40 | 
41 | data TextStripedDecodeError =
42 |     TextStripedDecodeError !StripedError
43 |   | TextStripedLogicalDecodeError !TextLogicalDecodeError
44 |     deriving (Eq, Show)
45 | 
46 | renderTextStripedEncodeError :: TextStripedEncodeError -> Text
47 | renderTextStripedEncodeError = \case
48 |   TextStripedEncodeError err ->
49 |     Striped.renderStripedError err
50 |   TextStripedLogicalEncodeError err ->
51 |     renderTextLogicalEncodeError err
52 | 
53 | renderTextStripedDecodeError :: TextStripedDecodeError -> Text
54 | renderTextStripedDecodeError = \case
55 |   TextStripedDecodeError err ->
56 |     Striped.renderStripedError err
57 |   TextStripedLogicalDecodeError err ->
58 |     renderTextLogicalDecodeError err
59 | 
60 | encodeStriped ::
61 |      Monad m
62 |   => Stream (Of Striped.Table) m r
63 |   -> ByteStream (EitherT TextStripedEncodeError m) r
64 | encodeStriped input = do
65 |   e <- lift . lift $ Stream.next input
66 |   case e of
67 |     Left r ->
68 |       pure r
69 | 
70 |     Right (hd, tl) ->
71 |       hoist (firstJoin TextStripedLogicalEncodeError) .
72 |       encodeLogical (Striped.schema hd) .
73 |       Stream.mapM (hoistEither . first TextStripedEncodeError . Striped.toLogical) $
74 |       hoist lift (Stream.cons hd tl)
75 | {-# INLINABLE encodeStriped #-}
76 | 
77 | encodeStripedBlock :: Striped.Table -> Either TextStripedEncodeError ByteString
78 | encodeStripedBlock striped = do
79 |   logical <- first TextStripedEncodeError $ Striped.toLogical striped
80 |   first TextStripedLogicalEncodeError $ encodeLogicalBlock (Striped.schema striped) logical
81 | {-# INLINABLE encodeStripedBlock #-}
82 | 
83 | decodeStriped ::
84 |      Monad m
85 |   => Schema.Table
86 |   -> ByteStream m r
87 |   -> Stream (Of Striped.Table) (EitherT TextStripedDecodeError m) r
88 | decodeStriped schema =
89 |   Stream.mapM (hoistEither . first TextStripedDecodeError . Striped.fromLogical schema) .
90 |   hoist (firstT TextStripedLogicalDecodeError) .
91 |   decodeLogical schema
92 | {-# INLINABLE decodeStriped #-}
93 | 
94 | decodeStripedBlock :: Schema.Table -> ByteString -> Either TextStripedDecodeError Striped.Table
95 | decodeStripedBlock schema bs = do
96 |   logical <- first TextStripedLogicalDecodeError $ decodeLogicalBlock schema bs
97 |   first TextStripedDecodeError $ Striped.fromLogical schema logical
98 | {-# INLINABLE decodeStripedBlock #-}
99 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/Table/Encoding.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveGeneric #-}
  2 | {-# LANGUAGE LambdaCase #-}
  3 | {-# LANGUAGE NoImplicitPrelude #-}
  4 | {-# LANGUAGE OverloadedStrings #-}
  5 | {-# OPTIONS_GHC -fno-warn-deprecations #-} -- EncodeError
  6 | module Zebra.Table.Encoding (
  7 |     Binary(..)
  8 |   , Int(..)
  9 | 
 10 |   , Utf8Error(..)
 11 |   , renderUtf8Error
 12 | 
 13 |   , validateBinary
 14 |   , validateUtf8
 15 |   , decodeUtf8
 16 | 
 17 |   , validateInt
 18 |   , decodeDate
 19 |   , encodeDate
 20 |   , decodeTimeSeconds
 21 |   , encodeTimeSeconds
 22 |   , decodeTimeMilliseconds
 23 |   , encodeTimeMilliseconds
 24 |   , decodeTimeMicroseconds
 25 |   , encodeTimeMicroseconds
 26 |   ) where
 27 | 
 28 | import           Data.ByteString (ByteString)
 29 | import           Data.String (String)
 30 | import qualified Data.Text as Text
 31 | import qualified Data.Text.Encoding as Text
 32 | import           Data.Text.Encoding.Error (UnicodeException(..))
 33 | import           Data.Word (Word8)
 34 | 
 35 | import           GHC.Generics (Generic)
 36 | 
 37 | import           P hiding (Int)
 38 | 
 39 | import           Text.Printf (printf)
 40 | 
 41 | import           Zebra.Time (Time, Date, TimeError)
 42 | import qualified Zebra.Time as Time
 43 | 
 44 | 
 45 | data Binary =
 46 |     Binary
 47 |   | Utf8
 48 |     deriving (Eq, Ord, Show, Generic)
 49 | 
 50 | instance NFData Binary
 51 | 
 52 | --
 53 | -- Potentially we want to support epochs other than 1600-03-01, and possibly
 54 | -- even time zones, but we keep it super simple for now.
 55 | --
 56 | 
 57 | data Int =
 58 |     Int
 59 |   | Date -- ^ days since 1600-03-01
 60 |   | TimeSeconds -- ^ seconds since 1600-03-01
 61 |   | TimeMilliseconds -- ^ milliseconds since 1600-03-01
 62 |   | TimeMicroseconds -- ^ microseconds since 1600-03-01
 63 |     deriving (Eq, Ord, Show, Generic)
 64 | 
 65 | instance NFData Int
 66 | 
 67 | data Utf8Error =
 68 |     Utf8Error !String !(Maybe Word8)
 69 |     deriving (Eq, Ord, Show, Generic)
 70 | 
 71 | renderUtf8Error :: Utf8Error -> Text
 72 | renderUtf8Error = \case
 73 |   Utf8Error msg Nothing ->
 74 |     "Not valid UTF-8: " <> Text.pack msg
 75 |   Utf8Error msg (Just byte) ->
 76 |     Text.pack $
 77 |       printf "Not valid UTF-8, cannot decode byte 0x%02x: %s" byte msg
 78 | 
 79 | validateBinary :: Binary -> ByteString -> Either Utf8Error ()
 80 | validateBinary = \case
 81 |   Binary ->
 82 |     const $ pure ()
 83 |   Utf8 ->
 84 |     validateUtf8
 85 | {-# INLINABLE validateBinary #-}
 86 | 
 87 | -- FIXME replace with something that doesn't allocate
 88 | validateUtf8 :: ByteString -> Either Utf8Error ()
 89 | validateUtf8 bs =
 90 |   () <$ decodeUtf8 bs
 91 | {-# INLINABLE validateUtf8 #-}
 92 | 
 93 | decodeUtf8 :: ByteString -> Either Utf8Error Text
 94 | decodeUtf8 bs =
 95 |   case Text.decodeUtf8' bs of
 96 |     Left (DecodeError msg byte) ->
 97 |       Left (Utf8Error msg byte)
 98 | 
 99 |     Left (EncodeError msg _) ->
100 |       Left (Utf8Error msg Nothing) -- good.
101 | 
102 |     Right txt ->
103 |       pure txt
104 | {-# INLINABLE decodeUtf8 #-}
105 | 
106 | validateInt :: Int -> Int64 -> Either TimeError ()
107 | validateInt = \case
108 |   Int ->
109 |     const $ pure ()
110 |   Date ->
111 |     fmap (const ()) . decodeDate
112 |   TimeSeconds ->
113 |     fmap (const ()) . decodeTimeSeconds
114 |   TimeMilliseconds ->
115 |     fmap (const ()) . decodeTimeMilliseconds
116 |   TimeMicroseconds ->
117 |     fmap (const ()) . decodeTimeMicroseconds
118 | {-# INLINABLE validateInt #-}
119 | 
120 | decodeDate :: Int64 -> Either TimeError Date
121 | decodeDate =
122 |   Time.fromDays . Time.Days
123 | {-# INLINE decodeDate #-}
124 | 
125 | encodeDate :: Date -> Int64
126 | encodeDate =
127 |   Time.unDays . Time.toDays
128 | {-# INLINE encodeDate #-}
129 | 
130 | decodeTimeSeconds :: Int64 -> Either TimeError Time
131 | decodeTimeSeconds =
132 |   Time.fromSeconds . Time.Seconds
133 | {-# INLINE decodeTimeSeconds #-}
134 | 
135 | encodeTimeSeconds :: Time -> Int64
136 | encodeTimeSeconds =
137 |   Time.unSeconds . Time.toSeconds
138 | {-# INLINE encodeTimeSeconds #-}
139 | 
140 | decodeTimeMilliseconds :: Int64 -> Either TimeError Time
141 | decodeTimeMilliseconds =
142 |   Time.fromMilliseconds . Time.Milliseconds
143 | {-# INLINE decodeTimeMilliseconds #-}
144 | 
145 | encodeTimeMilliseconds :: Time -> Int64
146 | encodeTimeMilliseconds =
147 |   Time.unMilliseconds . Time.toMilliseconds
148 | {-# INLINE encodeTimeMilliseconds #-}
149 | 
150 | decodeTimeMicroseconds :: Int64 -> Either TimeError Time
151 | decodeTimeMicroseconds =
152 |   Time.fromMicroseconds . Time.Microseconds
153 | {-# INLINE decodeTimeMicroseconds #-}
154 | 
155 | encodeTimeMicroseconds :: Time -> Int64
156 | encodeTimeMicroseconds =
157 |   Time.unMicroseconds . Time.toMicroseconds
158 | {-# INLINE encodeTimeMicroseconds #-}
159 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/X/Either.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | {-# LANGUAGE FlexibleContexts #-}
 3 | {-# LANGUAGE LambdaCase #-}
 4 | {-# LANGUAGE NoImplicitPrelude #-}
 5 | {-# LANGUAGE OverloadedStrings #-}
 6 | module Zebra.X.Either (
 7 |     firstJoin
 8 |   , secondJoin
 9 |   , tryEitherT
10 |   ) where
11 | 
12 | import           P
13 | import           Control.Exception.Base (Exception)
14 | import           Control.Monad.Catch (MonadCatch, try)
15 | import           Control.Monad.Trans.Either (EitherT, runEitherT, newEitherT, firstEitherT)
16 | 
17 | firstJoin :: (Functor m, Monad m) => (x -> y) -> EitherT x (EitherT y m) a -> EitherT y m a
18 | firstJoin f e =
19 |   newEitherT $ do
20 |     b <- runEitherT (runEitherT e)
21 |     case b of
22 |       Left a ->
23 |         pure (Left a)
24 |       Right x ->
25 |         pure (first f x)
26 | {-# INLINE firstJoin #-}
27 | 
28 | secondJoin :: (Functor m, Monad m) => (y -> x) -> EitherT x (EitherT y m) a -> EitherT x m a
29 | secondJoin f e =
30 |   newEitherT $ do
31 |     b <- runEitherT (runEitherT e)
32 |     case b of
33 |       Left a ->
34 |         pure (Left (f a))
35 |       Right x ->
36 |         pure x
37 | {-# INLINE secondJoin #-}
38 | 
39 | tryEitherT :: (Functor m, MonadCatch m, Exception e) => (e -> x) -> m a -> EitherT x m a
40 | tryEitherT handler = firstEitherT handler . newEitherT . try
41 | {-# INLINE tryEitherT #-}


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/X/Vector/Generic.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE DoAndIfThenElse #-}
  3 | {-# LANGUAGE FlexibleContexts #-}
  4 | {-# LANGUAGE LambdaCase #-}
  5 | {-# LANGUAGE NoImplicitPrelude #-}
  6 | module Zebra.X.Vector.Generic (
  7 |     group
  8 |   , segmentedGroup
  9 |   , segmentedGroupOn
 10 | 
 11 |   , module Generic
 12 |   ) where
 13 | 
 14 | import           X.Data.Vector.Stream (Stream(..), Step(..))
 15 | import qualified X.Data.Vector.Stream as Stream
 16 | import           X.Data.Vector.Generic as Generic
 17 | 
 18 | import           P hiding (concatMap, length, sum)
 19 | 
 20 | -- | Run-length encode a segmented vector.
 21 | segmentedGroup ::
 22 |      Eq a
 23 |   => Vector v a
 24 |   => Vector v Int
 25 |   => Vector v (Int, a)
 26 |   => Vector v (Int, (Int, a))
 27 |   => Vector v (Int, Int)
 28 |   => v Int
 29 |   -> v a
 30 |   -> (v Int, v (Int, a))
 31 | segmentedGroup = segmentedGroupOn id
 32 | {-# INLINE segmentedGroup #-}
 33 | 
 34 | -- | Run-length encode a segmented vector.
 35 | segmentedGroupOn ::
 36 |      Eq b
 37 |   => Vector v a
 38 |   => Vector v Int
 39 |   => Vector v (Int, a)
 40 |   => Vector v (Int, (Int, a))
 41 |   => Vector v (Int, Int)
 42 |   => (a -> b)
 43 |   -> v Int
 44 |   -> v a
 45 |   -> (v Int, v (Int, a))
 46 | segmentedGroupOn f ns xs =
 47 |   let
 48 |     indices =
 49 |       concatMap (uncurry $ flip replicate) (indexed ns)
 50 | 
 51 |     rotate (n, (s, x)) =
 52 |       (s, (n, x))
 53 |   in
 54 |     first (rezero ns . map fst . group) .
 55 |     unzip .
 56 |     map rotate $
 57 |     groupOn (second f) (zip indices xs)
 58 | {-# INLINE segmentedGroupOn #-}
 59 | 
 60 | -- | Run-length encode a vector.
 61 | group :: (Eq a, Vector v a, Vector v (Int, a)) => v a -> v (Int, a)
 62 | group =
 63 |   groupOn id
 64 | {-# INLINE group #-}
 65 | 
 66 | -- | Run-length encode a vector.
 67 | groupOn :: (Eq b, Vector v a, Vector v (Int, a)) => (a -> b) -> v a -> v (Int, a)
 68 | groupOn f =
 69 |   Stream.vectorOfStream . groupStreamOn f . Stream.streamOfVector
 70 | {-# INLINE groupOn #-}
 71 | 
 72 | data Run a =
 73 |     None
 74 |   | Run !Int !a
 75 | 
 76 | groupStreamOn :: (Monad m, Eq b) => (a -> b) -> Stream m a -> Stream m (Int, a)
 77 | groupStreamOn f (Stream step sinit) =
 78 |   let
 79 |     loop = \case
 80 |       Nothing ->
 81 |         pure Done
 82 | 
 83 |       Just (s0, run) ->
 84 |         step s0 >>= \case
 85 |           Done ->
 86 |             case run of
 87 |               None ->
 88 |                 pure $ Done
 89 |               Run n y ->
 90 |                 pure $ Yield (n, y) Nothing
 91 | 
 92 |           Skip s ->
 93 |             pure . Skip $
 94 |               Just (s, run)
 95 | 
 96 |           Yield x s ->
 97 |             case run of
 98 |               None ->
 99 |                 pure . Skip $
100 |                   Just (s, Run 1 x)
101 |               Run n y ->
102 |                 if f x == f y then
103 |                   pure . Skip $
104 |                     Just (s, Run (n + 1) x)
105 |                 else
106 |                   pure . Yield (n, y) $
107 |                     Just (s, Run 1 x)
108 |     {-# INLINE [0] loop #-}
109 |   in
110 |     Stream loop (Just (sinit, None))
111 | {-# INLINE [1] groupStreamOn #-}
112 | 
113 | -- | Re-insert zeros that have been lost from a segement descriptor:
114 | --
115 | --   rezero [1,2,0,3] [4,5,6] == [4,5,0,6]
116 | --
117 | rezero :: Vector v Int => v Int -> v Int -> v Int
118 | rezero xs ys =
119 |   Stream.vectorOfStream $ rezeroStream
120 |     (Stream.streamOfVector xs)
121 |     (Stream.streamOfVector ys)
122 | {-# INLINE rezero #-}
123 | 
124 | rezeroStream :: Monad m => Stream m Int -> Stream m Int -> Stream m Int
125 | rezeroStream (Stream o_step o_sinit) (Stream n_step n_sinit) =
126 |   let
127 |     loop (o_s0, n_s0) =
128 |       o_step o_s0 >>= \case
129 |         Done ->
130 |           pure Done
131 | 
132 |         Skip o_s ->
133 |           pure $ Skip (o_s, n_s0)
134 | 
135 |         Yield 0 o_s ->
136 |           pure $ Yield 0 (o_s, n_s0)
137 | 
138 |         Yield _ o_s ->
139 |           n_step n_s0 >>= \case
140 |             Done ->
141 |               pure Done
142 | 
143 |             Skip n_s ->
144 |               pure $ Skip (o_s, n_s)
145 | 
146 |             Yield x n_s ->
147 |               pure $ Yield x (o_s, n_s)
148 |     {-# INLINE [0] loop #-}
149 |   in
150 |     Stream loop (o_sinit, n_sinit)
151 | {-# INLINE [1] rezeroStream #-}
152 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/X/Vector/Segment.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE FlexibleContexts #-}
  3 | {-# LANGUAGE LambdaCase #-}
  4 | {-# LANGUAGE NoImplicitPrelude #-}
  5 | {-# LANGUAGE OverloadedStrings #-}
  6 | module Zebra.X.Vector.Segment (
  7 |     Segment(..)
  8 |   , SegmentError(..)
  9 |   , renderSegmentError
 10 | 
 11 |   , length
 12 |   , slice
 13 |   , unsafeSlice
 14 |   , reify
 15 |   , unsafeReify
 16 |   ) where
 17 | 
 18 | import           Data.ByteString (ByteString)
 19 | import qualified Data.ByteString as ByteString
 20 | import qualified Data.Text as Text
 21 | import qualified Data.Vector as Boxed
 22 | import qualified Data.Vector.Storable as Storable
 23 | import           Data.Vector.Unboxed (Unbox)
 24 | import qualified Data.Vector.Unboxed as Unboxed
 25 | 
 26 | import           Foreign.Storable (Storable)
 27 | 
 28 | import           P hiding (length, concat, empty)
 29 | 
 30 | import qualified X.Data.ByteString.Unsafe as ByteString
 31 | import qualified X.Data.Vector.Generic as Generic
 32 | 
 33 | 
 34 | -- | Class of things that can contain segmented data.
 35 | class Segment a where
 36 |   segmentLength :: a -> Int
 37 |   segmentUnsafeSlice :: Int -> Int -> a -> a
 38 | 
 39 | instance Segment (Boxed.Vector a) where
 40 |   segmentLength =
 41 |     Boxed.length
 42 |   {-# INLINE segmentLength #-}
 43 | 
 44 |   segmentUnsafeSlice =
 45 |     Generic.unsafeSlice
 46 |   {-# INLINE segmentUnsafeSlice #-}
 47 | 
 48 | instance Unbox a => Segment (Unboxed.Vector a) where
 49 |   segmentLength =
 50 |     Unboxed.length
 51 |   {-# INLINE segmentLength #-}
 52 | 
 53 |   segmentUnsafeSlice =
 54 |     Generic.unsafeSlice
 55 |   {-# INLINE segmentUnsafeSlice #-}
 56 | 
 57 | instance Storable a => Segment (Storable.Vector a) where
 58 |   segmentLength =
 59 |     Storable.length
 60 |   {-# INLINE segmentLength #-}
 61 | 
 62 |   segmentUnsafeSlice =
 63 |     Generic.unsafeSlice
 64 |   {-# INLINE segmentUnsafeSlice #-}
 65 | 
 66 | instance Segment ByteString where
 67 |   segmentLength =
 68 |     ByteString.length
 69 |   {-# INLINE segmentLength #-}
 70 | 
 71 |   segmentUnsafeSlice =
 72 |     ByteString.unsafeSlice
 73 |   {-# INLINE segmentUnsafeSlice #-}
 74 | 
 75 | data SegmentError =
 76 |   SegmentLengthMismatch !Int !Int
 77 |   deriving (Eq, Show)
 78 | 
 79 | renderSegmentError :: SegmentError -> Text
 80 | renderSegmentError = \case
 81 |   SegmentLengthMismatch n_sum n_xs ->
 82 |     "Sum of segment lengths <" <>
 83 |     Text.pack (show n_sum) <>
 84 |     "> did not match total length <" <>
 85 |     Text.pack (show n_xs) <>
 86 |     "> of nested segments"
 87 | 
 88 | -- | Yield the length of a segment.
 89 | length :: Segment a => a -> Int
 90 | length =
 91 |   segmentLength
 92 | {-# INLINE length #-}
 93 | 
 94 | slice :: Segment a => Int -> Int -> a -> Maybe a
 95 | slice off len xs =
 96 |   if off < 0 || off + len > length xs then
 97 |     Nothing
 98 |   else
 99 |     Just (unsafeSlice off len xs)
100 | {-# INLINE slice #-}
101 | 
102 | -- | Yield a slice of the segment, the segment must contain at least
103 | --   @off + len@ elements, but this is not checked.
104 | unsafeSlice :: Segment a => Int -> Int -> a -> a
105 | unsafeSlice =
106 |   segmentUnsafeSlice
107 | {-# INLINE unsafeSlice #-}
108 | 
109 | -- | Reify nested segments in to a vector of segments.
110 | reify :: (Segment a, Generic.Vector v Int64) => v Int64 -> a -> Either SegmentError (Boxed.Vector a)
111 | reify ns xs =
112 |   let
113 |     !n_sum =
114 |       fromIntegral (Generic.sum ns)
115 | 
116 |     !n_xs =
117 |       length xs
118 |   in
119 |     if n_sum /= n_xs then
120 |       Left $ SegmentLengthMismatch n_sum n_xs
121 |     else
122 |       pure $ unsafeReify ns xs
123 | {-# INLINE reify #-}
124 | 
125 | data IdxOff =
126 |   IdxOff !Int !Int
127 | 
128 | unsafeReify :: (Segment a, Generic.Vector v Int64) => v Int64 -> a -> Boxed.Vector a
129 | unsafeReify ns xs =
130 |   let
131 |     loop (IdxOff idx off) =
132 |       let
133 |         !len =
134 |           fromIntegral (Generic.unsafeIndex ns idx)
135 |       in
136 |         Just (unsafeSlice off len xs, IdxOff (idx + 1) (off + len))
137 |   in
138 |     Generic.unfoldrN (Generic.length ns) loop (IdxOff 0 0)
139 | {-# INLINE unsafeReify #-}
140 | 


--------------------------------------------------------------------------------
/zebra-core/src/Zebra/X/Vector/Storable.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | module Zebra.X.Vector.Storable (
 3 |     unsafeFromByteString
 4 |   , unsafeToByteString
 5 |   , module Storable
 6 |   ) where
 7 | 
 8 | import           Data.ByteString.Internal (ByteString(..))
 9 | import           Data.Word (Word8)
10 | 
11 | import           X.Data.Vector.Storable as Storable
12 | 
13 | 
14 | unsafeFromByteString :: ByteString -> Storable.Vector Word8
15 | unsafeFromByteString (PS fp off len) =
16 |   Storable.unsafeFromForeignPtr fp off len
17 | 
18 | unsafeToByteString :: Storable.Vector Word8 -> ByteString
19 | unsafeToByteString xs =
20 |   let
21 |     (fp, off, len) =
22 |       Storable.unsafeToForeignPtr xs
23 |   in
24 |     PS fp off len
25 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Factset/Block.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE NoImplicitPrelude #-}
  2 | {-# LANGUAGE ScopedTypeVariables #-}
  3 | {-# LANGUAGE TemplateHaskell #-}
  4 | module Test.Zebra.Factset.Block where
  5 | 
  6 | import qualified Data.List as List
  7 | import qualified Data.Map as Map
  8 | import qualified Data.Text as Text
  9 | import qualified Data.Vector as Boxed
 10 | 
 11 | import           Disorder.Core.Run (ExpectedTestSpeed(..), disorderCheckEnvAll)
 12 | import           Disorder.Jack (Property, counterexample)
 13 | import           Disorder.Jack ((===), gamble, listOf)
 14 | 
 15 | import           P
 16 | 
 17 | import           System.IO (IO)
 18 | 
 19 | import           Test.Zebra.Jack
 20 | 
 21 | import           Text.Show.Pretty (ppShow)
 22 | 
 23 | import           Zebra.Factset.Block
 24 | import           Zebra.Factset.Data
 25 | import           Zebra.Factset.Table
 26 | import qualified Zebra.Table.Logical as Logical
 27 | import qualified Zebra.Table.Striped as Striped
 28 | 
 29 | 
 30 | prop_roundtrip_facts :: Property
 31 | prop_roundtrip_facts =
 32 |   gamble jColumnSchema $ \schema ->
 33 |   gamble (listOf $ jFact schema (AttributeId 0)) $ \facts ->
 34 |     let
 35 |       schemas =
 36 |         Boxed.singleton schema
 37 | 
 38 |       input =
 39 |         Boxed.fromList facts
 40 |     in
 41 |       trippingBoth (blockOfFacts schemas) factsOfBlock input
 42 | 
 43 | prop_roundtrip_tables :: Property
 44 | prop_roundtrip_tables =
 45 |   gamble jBlock $ \block ->
 46 |   let
 47 |     names =
 48 |       fmap (AttributeName . Text.pack . ("attribute_" <>) . show)
 49 |         [0..Boxed.length (blockTables block) - 1]
 50 |   in
 51 |     trippingBoth (tableOfBlock $ Boxed.fromList names) blockOfTable block
 52 | 
 53 | prop_roundtrip_attribute_schemas :: Property
 54 | prop_roundtrip_attribute_schemas =
 55 |   gamble (listOf jColumnSchema) $ \attrs0 ->
 56 |   let
 57 |     mkAttr (ix :: Int) attr0 =
 58 |       (AttributeName . Text.pack $ "attribute_" <> show ix, attr0)
 59 | 
 60 |     attrs =
 61 |       Map.fromList $
 62 |       List.zipWith mkAttr [0..] attrs0
 63 |   in
 64 |     trippingBoth (pure . tableSchemaOfAttributes) attributesOfTableSchema attrs
 65 | 
 66 | prop_logical_from_block_is_valid :: Property
 67 | prop_logical_from_block_is_valid  =
 68 |   gamble jBlock $ \block ->
 69 |   let
 70 |     names =
 71 |       fmap (AttributeName . Text.pack . ("attribute_" <>) . show)
 72 |         [0..Boxed.length (blockTables block) - 1]
 73 |   in
 74 |     either (flip counterexample False) id $ do
 75 |       striped0 <- first ppShow $ tableOfBlock (Boxed.fromList names) block
 76 | 
 77 |       logical <- first (\x -> ppShow striped0 <> "\n" <> ppShow x) $
 78 |         Striped.toLogical striped0
 79 | 
 80 |       pure . counterexample (ppShow logical) $
 81 |         Logical.valid logical
 82 | 
 83 | prop_logical_from_block :: Property
 84 | prop_logical_from_block =
 85 |   gamble jBlock $ \block ->
 86 |   let
 87 |     names =
 88 |       fmap (AttributeName . Text.pack . ("attribute_" <>) . show)
 89 |         [0..Boxed.length (blockTables block) - 1]
 90 |   in
 91 |     either (flip counterexample False) id $ do
 92 |       striped0 <- first ppShow $ tableOfBlock (Boxed.fromList names) block
 93 | 
 94 |       logical <- first (\x -> ppShow striped0 <> "\n" <> ppShow x) $
 95 |         Striped.toLogical striped0
 96 | 
 97 |       striped <- first ppShow $ Striped.fromLogical (Striped.schema striped0) logical
 98 | 
 99 |       pure . counterexample (ppShow logical) $
100 |         striped0
101 |         ===
102 |         striped
103 | 
104 | return []
105 | tests :: IO Bool
106 | tests =
107 |   $disorderCheckEnvAll TestRunMore
108 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Factset/Data.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE PatternSynonyms #-}
 3 | {-# LANGUAGE TemplateHaskell #-}
 4 | module Test.Zebra.Factset.Data where
 5 | 
 6 | import           Disorder.Jack (Property, quickCheckAll)
 7 | import           Disorder.Jack ((===), gamble, tripping, once)
 8 | 
 9 | import           P
10 | 
11 | import           System.IO (IO)
12 | 
13 | import           Test.Zebra.Jack
14 | 
15 | import           Zebra.Factset.Data
16 | import           Zebra.Foreign.Bindings (pattern C'ZEBRA_HASH_SEED)
17 | 
18 | 
19 | prop_roundtrip_day :: Property
20 | prop_roundtrip_day =
21 |   gamble jFactsetDay $
22 |     tripping fromDay (Just . toDay)
23 | 
24 | prop_hash_seed :: Property
25 | prop_hash_seed =
26 |   once $ hashSeed === C'ZEBRA_HASH_SEED
27 | 
28 | return []
29 | tests :: IO Bool
30 | tests =
31 |   $quickCheckAll
32 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Foreign/Entity.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Foreign.Entity where
 4 | 
 5 | import qualified Anemone.Foreign.Mempool as Mempool
 6 | 
 7 | import           Control.Monad.Catch (bracket)
 8 | 
 9 | import           Disorder.Core.IO (testIO)
10 | import           Disorder.Jack (Property)
11 | import           Disorder.Jack (quickCheckAll, gamble)
12 | 
13 | import           P
14 | 
15 | import           System.IO (IO)
16 | 
17 | import           Test.Zebra.Jack
18 | import           Test.Zebra.Util
19 | 
20 | import           Zebra.Foreign.Entity
21 | 
22 | 
23 | prop_roundtrip_entity :: Property
24 | prop_roundtrip_entity =
25 |   gamble jEntity $ \entity ->
26 |   testIO . bracket Mempool.create Mempool.free $ \pool ->
27 |     trippingIO (liftE . foreignOfEntity pool) entityOfForeign entity
28 | 
29 | return []
30 | tests :: IO Bool
31 | tests =
32 |   $quickCheckAll
33 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Foreign/Table.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Foreign.Table where
 4 | 
 5 | import           Anemone.Foreign.Mempool (Mempool)
 6 | import qualified Anemone.Foreign.Mempool as Mempool
 7 | 
 8 | import           Control.Monad.Catch (bracket)
 9 | 
10 | import           Disorder.Core.IO (testIO)
11 | import           Disorder.Core.Run (ExpectedTestSpeed(..), disorderCheckEnvAll)
12 | import           Disorder.Jack (Property)
13 | import           Disorder.Jack (gamble, chooseInt)
14 | 
15 | import           P
16 | 
17 | import           System.IO (IO)
18 | 
19 | import           Test.Zebra.Jack
20 | import           Test.Zebra.Util
21 | 
22 | import           Control.Monad.Trans.Either (EitherT)
23 | 
24 | import           Zebra.Foreign.Table
25 | import           Zebra.Table.Striped (Table)
26 | import qualified Zebra.Table.Striped as Striped
27 | 
28 | prop_roundtrip_table :: Property
29 | prop_roundtrip_table =
30 |   gamble jSizedStriped $ \table ->
31 |   testIO . bracket Mempool.create Mempool.free $ \pool ->
32 |     trippingIO (liftE . foreignOfTable pool) tableOfForeign table
33 | 
34 | prop_deep_clone_table :: Property
35 | prop_deep_clone_table =
36 |   testClone id deepCloneTable
37 | 
38 | prop_neritic_clone_table :: Property
39 | prop_neritic_clone_table =
40 |   testClone id neriticCloneTable
41 | 
42 | prop_agile_clone_table :: Property
43 | prop_agile_clone_table =
44 |   testClone Striped.schema agileCloneTable
45 | 
46 | prop_grow_table :: Property
47 | prop_grow_table =
48 |   gamble (chooseInt (0, 100)) $ \n ->
49 |     testClone Striped.schema (\pool table -> growTable pool table n >> pure table)
50 | 
51 | testClone :: (Show a, Show x, Eq a, Eq x) => (Table -> a) -> (Mempool -> CTable -> EitherT x IO CTable) -> Property
52 | testClone select clone =
53 |   gamble jSizedStriped $ \table ->
54 |   testIO . bracket Mempool.create Mempool.free $ \pool ->
55 |     trippingByIO select (bind (clone pool) . foreignOfTable pool) tableOfForeign table
56 | 
57 | return []
58 | tests :: IO Bool
59 | tests =
60 |   $disorderCheckEnvAll TestRunMore
61 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Merge/Entity.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE NoImplicitPrelude #-}
  2 | {-# LANGUAGE TemplateHaskell #-}
  3 | {-# LANGUAGE FlexibleContexts #-}
  4 | {-# LANGUAGE ScopedTypeVariables #-}
  5 | module Test.Zebra.Merge.Entity where
  6 | 
  7 | import qualified Data.List as List
  8 | import qualified Data.Map as Map
  9 | 
 10 | import           Disorder.Jack
 11 | import           Disorder.Core.Run
 12 | 
 13 | import           P
 14 | 
 15 | import qualified Prelude as Savage
 16 | 
 17 | import           System.IO (IO)
 18 | 
 19 | import           Test.Zebra.Jack
 20 | 
 21 | import           Text.Show.Pretty (ppShow)
 22 | 
 23 | import qualified X.Data.Vector as Boxed
 24 | import qualified X.Data.Vector.Stream as Stream
 25 | 
 26 | import           Zebra.Factset.Block
 27 | import           Zebra.Factset.Fact
 28 | import           Zebra.Merge.Base
 29 | import           Zebra.Merge.Entity
 30 | import qualified Zebra.Table.Schema as Schema
 31 | import qualified Zebra.Table.Striped as Striped
 32 | 
 33 | 
 34 | fakeBlockId :: BlockDataId
 35 | fakeBlockId = BlockDataId 0
 36 | 
 37 | entityValuesOfBlock' :: BlockDataId -> Block -> Boxed.Vector EntityValues
 38 | entityValuesOfBlock' blockId block = Stream.vectorOfStream $ entityValuesOfBlock blockId block
 39 | 
 40 | ppCounter :: (Show a, Testable p) => Savage.String -> a -> p -> Property
 41 | ppCounter heading thing prop
 42 |  = counterexample ("=== " <> heading <> " ===")
 43 |  $ counterexample (ppShow thing) prop
 44 | 
 45 | 
 46 | jColumnSchemas :: Jack [Schema.Column]
 47 | jColumnSchemas = listOfN 0 5 jColumnSchema
 48 | 
 49 | blockOfFacts' :: [Schema.Column] -> [Fact] -> Block
 50 | blockOfFacts' schemas facts =
 51 |   case blockOfFacts (Boxed.fromList schemas) (Boxed.fromList facts) of
 52 |    Left e -> Savage.error
 53 |               ("jBlockFromFacts: invariant failed\n"
 54 |               <> "\tgenerated facts cannot be converted to block\n"
 55 |               <> "\t" <> show e)
 56 |    Right b -> b
 57 | 
 58 | prop_entitiesOfBlock_entities :: Property
 59 | prop_entitiesOfBlock_entities =
 60 |   gamble jYoloBlock $ \block ->
 61 |     fmap evEntity (entityValuesOfBlock' fakeBlockId block) === blockEntities block
 62 | 
 63 | prop_entitiesOfBlock_indices :: Property
 64 | prop_entitiesOfBlock_indices =
 65 |   gamble jBlock $ \block ->
 66 |     catIndices (entityValuesOfBlock' fakeBlockId block) === takeIndices block
 67 |  where
 68 |   catIndices evs
 69 |    = Boxed.map fst
 70 |    $ Boxed.concatMap Boxed.convert
 71 |    $ Boxed.concatMap evIndices evs
 72 | 
 73 |   takeIndices block
 74 |    = Boxed.convert
 75 |    $ blockIndices block
 76 | 
 77 | prop_entitiesOfBlock_tables_1_entity :: Property
 78 | prop_entitiesOfBlock_tables_1_entity =
 79 |   gamble jColumnSchemas $ \schemas ->
 80 |   gamble (jFacts schemas) $ \facts ->
 81 |   gamble jEntityHashId $ \(ehash,eid) ->
 82 |   let fixFact f = f { factEntityHash = ehash, factEntityId = eid }
 83 |       facts'    = List.sort $ fmap fixFact facts
 84 |       block     = blockOfFacts' schemas facts'
 85 |       es        = entityValuesOfBlock' fakeBlockId block
 86 |   in  ppCounter "Block" block
 87 |     $ ppCounter "Entities" es
 88 |     ( length facts > 0
 89 |     ==> Boxed.concatMap id (getFakeTableValues es) === blockTables block )
 90 | 
 91 | getFakeTableValues :: Boxed.Vector EntityValues -> Boxed.Vector (Boxed.Vector Striped.Table)
 92 | getFakeTableValues = fmap (fmap (Map.! fakeBlockId) . evTables)
 93 | 
 94 | prop_mergeEntityTables_1_block :: Property
 95 | prop_mergeEntityTables_1_block =
 96 |   gamble jBlock $ \block ->
 97 |   let es = entityValuesOfBlock' fakeBlockId block
 98 |       recs_l = mapM mergeEntityTables es
 99 | 
100 |       recs_r = getFakeTableValues es
101 |   in  ppCounter "Entities" es (recs_l === Right recs_r)
102 | 
103 | 
104 | prop_treeFold_sum :: Property
105 | prop_treeFold_sum =
106 |   gamble arbitrary $ \(bs :: [Int]) ->
107 |   List.sum bs === treeFold (+) 0 id (Boxed.fromList bs)
108 | 
109 | prop_treeFold_with_map :: Property
110 | prop_treeFold_with_map =
111 |   gamble arbitrary $ \(bs :: [Int]) ->
112 |   List.sum (fmap (+1) bs) === treeFold (+) 0 (+1) (Boxed.fromList bs)
113 | 
114 | 
115 | return []
116 | tests :: IO Bool
117 | tests = $disorderCheckEnvAll TestRunNormal
118 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Binary/Array.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Binary.Array where
 4 | 
 5 | import qualified Data.ByteString as ByteString
 6 | import qualified Data.List as List
 7 | import qualified Data.Vector as Boxed
 8 | import qualified Data.Vector.Storable as Storable
 9 | 
10 | import           Disorder.Core.Run
11 | import           Disorder.Jack (Property)
12 | import           Disorder.Jack (gamble, listOf, arbitrary, sizedBounded, choose)
13 | 
14 | import           P
15 | 
16 | import           System.IO (IO)
17 | 
18 | import           Test.QuickCheck.Instances ()
19 | import           Test.Zebra.Util
20 | 
21 | import           Zebra.Serial.Binary.Array
22 | 
23 | 
24 | prop_roundtrip_strings :: Property
25 | prop_roundtrip_strings =
26 |   gamble (Boxed.fromList <$> listOf arbitrary) $ \xs ->
27 |     trippingSerial bStrings (getStrings $ Boxed.length xs) xs
28 | 
29 | prop_roundtrip_bytes :: Property
30 | prop_roundtrip_bytes =
31 |   gamble arbitrary $ \bs ->
32 |     trippingSerial bByteArray (getByteArray $ ByteString.length bs) bs
33 | 
34 | prop_roundtrip_sized_bytes :: Property
35 | prop_roundtrip_sized_bytes =
36 |   gamble arbitrary $
37 |     trippingSerial bSizedByteArray getSizedByteArray
38 | 
39 | prop_roundtrip_ints :: Property
40 | prop_roundtrip_ints =
41 |   gamble (Storable.fromList <$> listOf sizedBounded) $ \xs ->
42 |     trippingSerial bIntArray (getIntArray $ Storable.length xs) xs
43 | 
44 | -- Worst case for packing is to make sure each pack block contains an int64 min and max.
45 | prop_roundtrip_ints_minmax_64blocks :: Property
46 | prop_roundtrip_ints_minmax_64blocks =
47 |   gamble (choose (0,1000)) $ \x ->
48 |     let x' = x * 64
49 |         xs = Storable.fromList $ List.take x' $ List.cycle [minBound, maxBound]
50 |     in trippingSerial bIntArray (getIntArray $ Storable.length xs) xs
51 | 
52 | 
53 | prop_roundtrip_zigzag :: Property
54 | prop_roundtrip_zigzag =
55 |   gamble sizedBounded $ \x ->
56 |     x == unZigZag64 (zigZag64 x)
57 | 
58 | prop_mid64 :: Property
59 | prop_mid64 =
60 |   gamble sizedBounded $ \x ->
61 |   gamble sizedBounded $ \y ->
62 |     fromIntegral (mid64 x y) == midBig (fromIntegral x) (fromIntegral y)
63 | 
64 | midBig :: Integer -> Integer -> Integer
65 | midBig x y =
66 |   x + (y - x) `div` 2
67 | 
68 | return []
69 | tests :: IO Bool
70 | tests = $disorderCheckEnvAll TestRunMore
71 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Binary/Block.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE NoImplicitPrelude #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | {-# LANGUAGE ScopedTypeVariables #-}
  4 | {-# LANGUAGE TemplateHaskell #-}
  5 | module Test.Zebra.Serial.Binary.Block where
  6 | 
  7 | import qualified Data.List as List
  8 | import qualified Data.Map as Map
  9 | import qualified Data.Text as Text
 10 | import qualified Data.Vector as Boxed
 11 | import qualified Data.Vector.Unboxed as Unboxed
 12 | 
 13 | import           Disorder.Jack (Property)
 14 | import           Disorder.Jack (quickCheckAll, gamble, listOf, counterexample)
 15 | 
 16 | import           P
 17 | 
 18 | import qualified Prelude as Savage
 19 | 
 20 | import           System.IO (IO)
 21 | 
 22 | import           Test.Zebra.Jack
 23 | import           Test.Zebra.Util
 24 | 
 25 | import           Text.Printf (printf)
 26 | import           Text.Show.Pretty (ppShow)
 27 | 
 28 | import           Zebra.Factset.Block
 29 | import           Zebra.Factset.Data
 30 | import           Zebra.Serial.Binary.Block
 31 | import           Zebra.Serial.Binary.Data
 32 | import           Zebra.Table.Data
 33 | import qualified Zebra.Table.Schema as Schema
 34 | import qualified Zebra.Table.Striped as Striped
 35 | 
 36 | 
 37 | prop_roundtrip_from_facts :: Property
 38 | prop_roundtrip_from_facts =
 39 |   gamble jBinaryVersion $ \version ->
 40 |   gamble jColumnSchema $ \schema ->
 41 |   gamble (listOf $ jFact schema (AttributeId 0)) $ \facts ->
 42 |     let
 43 |       schemas =
 44 |         Boxed.singleton schema
 45 | 
 46 |       header =
 47 |         headerOfAttributes version $ Map.singleton (AttributeName "attribute_0") schema
 48 | 
 49 |       block =
 50 |         either (Savage.error . show) id .
 51 |         blockOfFacts schemas $
 52 |         Boxed.fromList facts
 53 |     in
 54 |       counterexample (ppShow schema) $
 55 |       trippingSerialE (bBlock header) (getBlock header) block
 56 | 
 57 | prop_roundtrip_block :: Property
 58 | prop_roundtrip_block =
 59 |   gamble jYoloBlock $ \block ->
 60 |     let
 61 |       mkAttr (ix :: Int) attr0 =
 62 |         (AttributeName . Text.pack $ printf "attribute_%05d" ix, attr0)
 63 | 
 64 |       header =
 65 |         headerOfAttributes BinaryV2 .
 66 |         Map.fromList $
 67 |         List.zipWith mkAttr [0..] .
 68 |         fmap (unsafeTakeArray . Striped.schema) .
 69 |         Boxed.toList $
 70 |         blockTables block
 71 |     in
 72 |       trippingSerialE (bBlock header) (getBlock header) block
 73 | 
 74 | prop_roundtrip_entities :: Property
 75 | prop_roundtrip_entities =
 76 |   gamble (Boxed.fromList <$> listOf jBlockEntity) $
 77 |     trippingSerial bEntities getEntities
 78 | 
 79 | prop_roundtrip_attributes :: Property
 80 | prop_roundtrip_attributes =
 81 |   gamble (Unboxed.fromList <$> listOf jBlockAttribute) $
 82 |     trippingSerial bAttributes getAttributes
 83 | 
 84 | prop_roundtrip_indices :: Property
 85 | prop_roundtrip_indices =
 86 |   gamble (Unboxed.fromList <$> listOf jBlockIndex) $
 87 |     trippingSerial bIndices getIndices
 88 | 
 89 | prop_roundtrip_tables :: Property
 90 | prop_roundtrip_tables =
 91 |   gamble (Boxed.fromList <$> listOf (jStripedColumn 1)) $ \xs ->
 92 |     trippingSerialE bTables (getTables $ fmap Striped.schemaColumn xs) (fmap (Striped.Array DenyDefault) xs)
 93 | 
 94 | unsafeTakeArray :: Schema.Table -> Schema.Column
 95 | unsafeTakeArray =
 96 |   either (Savage.error . ppShow) snd . Schema.takeArray
 97 | 
 98 | return []
 99 | tests :: IO Bool
100 | tests =
101 |   $quickCheckAll
102 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Binary/Header.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Binary.Header where
 4 | 
 5 | import           Data.Map (Map)
 6 | import qualified Data.Map as Map
 7 | 
 8 | import           Disorder.Jack (Property, Jack)
 9 | import           Disorder.Jack (quickCheckAll, gamble, listOf, oneOf)
10 | 
11 | import           P
12 | 
13 | import           System.IO (IO)
14 | 
15 | import           Test.Zebra.Jack
16 | import           Test.Zebra.Util
17 | 
18 | import           Zebra.Serial.Binary.Data
19 | import           Zebra.Serial.Binary.Header
20 | 
21 | 
22 | prop_roundtrip_header_v2 :: Property
23 | prop_roundtrip_header_v2 =
24 |   gamble (mapOf jAttributeName $ fmap columnSchemaV0 jColumnSchema) $
25 |     trippingSerial bHeaderV2 getHeaderV2
26 | 
27 | prop_roundtrip_header_v3 :: Property
28 | prop_roundtrip_header_v3 =
29 |   gamble jTableSchema $
30 |     trippingSerial bHeaderV3 getHeaderV3
31 | 
32 | prop_roundtrip_header :: Property
33 | prop_roundtrip_header =
34 |   gamble jHeader $
35 |     trippingSerial bHeader getHeader
36 | 
37 | jHeader :: Jack Header
38 | jHeader =
39 |   oneOf [
40 |       HeaderV3 <$> jTableSchema
41 |     , HeaderV2 <$> mapOf jAttributeName (fmap columnSchemaV0 jColumnSchema)
42 |     ]
43 | 
44 | mapOf :: Ord k => Jack k -> Jack v -> Jack (Map k v)
45 | mapOf k v =
46 |   Map.fromList <$> listOf ((,) <$> k <*> v)
47 | 
48 | return []
49 | tests :: IO Bool
50 | tests =
51 |   $quickCheckAll
52 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Binary/Logical.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Binary.Logical where
 4 | 
 5 | import           Disorder.Jack (Property, forAllProperties, quickCheckWithResult, maxSuccess, stdArgs)
 6 | import           Disorder.Jack (gamble, listOfN)
 7 | 
 8 | import           P
 9 | 
10 | import           System.IO (IO)
11 | 
12 | import           Test.Zebra.Jack
13 | 
14 | import qualified Viking.ByteStream as ByteStream
15 | import qualified Viking.Stream as Stream
16 | 
17 | import           Zebra.Serial.Binary.Logical
18 | 
19 | 
20 | data BinaryError =
21 |     BinaryEncode !BinaryLogicalEncodeError
22 |   | BinaryDecode !BinaryLogicalDecodeError
23 |     deriving (Eq, Show)
24 | 
25 | prop_roundtrip_file :: Property
26 | prop_roundtrip_file =
27 |   gamble jTableSchema $ \schema ->
28 |   gamble (listOfN 1 10 $ jSizedLogical1 schema) $ \logical ->
29 |     trippingBoth
30 |       (first BinaryEncode . withList (ByteStream.toChunks . encodeLogical schema))
31 |       (first BinaryDecode . withList (Stream.effect . fmap snd . decodeLogical . ByteStream.fromChunks))
32 |       logical
33 | 
34 | return []
35 | tests :: IO Bool
36 | tests =
37 |   $forAllProperties $ quickCheckWithResult (stdArgs {maxSuccess = 1000})
38 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Binary/Striped.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Binary.Striped where
 4 | 
 5 | import           Disorder.Jack (Property, forAllProperties, quickCheckWithResult, maxSuccess, stdArgs)
 6 | import           Disorder.Jack (gamble, listOfN)
 7 | 
 8 | import           P
 9 | 
10 | import           System.IO (IO)
11 | 
12 | import           Test.Zebra.Jack
13 | 
14 | import qualified Viking.ByteStream as ByteStream
15 | 
16 | import           Zebra.Serial.Binary.Striped
17 | import qualified Zebra.Table.Striped as Striped
18 | 
19 | 
20 | data BinaryError =
21 |     BinaryEncode !BinaryStripedEncodeError
22 |   | BinaryDecode !BinaryStripedDecodeError
23 |     deriving (Eq, Show)
24 | 
25 | prop_roundtrip_file :: Property
26 | prop_roundtrip_file =
27 |   gamble jTableSchema $ \schema ->
28 |   gamble (listOfN 1 10 $ jSizedLogical1 schema) $ \logical ->
29 |     let
30 |       takeStriped x =
31 |         let
32 |           Right striped =
33 |             Striped.fromLogical schema x
34 |         in
35 |           striped
36 |     in
37 |       trippingBoth
38 |         (first BinaryEncode . withList (ByteStream.toChunks . encodeStriped))
39 |         (first BinaryDecode . withList (decodeStriped . ByteStream.fromChunks))
40 |         (fmap takeStriped logical)
41 | 
42 | return []
43 | tests :: IO Bool
44 | tests =
45 |   $forAllProperties $ quickCheckWithResult (stdArgs {maxSuccess = 1000})
46 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Binary/Table.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Binary.Table where
 4 | 
 5 | import           Disorder.Jack (Property)
 6 | import           Disorder.Jack (quickCheckAll, gamble)
 7 | 
 8 | import           P
 9 | 
10 | import           System.IO (IO)
11 | 
12 | import           Test.Zebra.Jack
13 | import           Test.Zebra.Util
14 | 
15 | import           Zebra.Serial.Binary.Table
16 | import qualified Zebra.Table.Striped as Striped
17 | 
18 | 
19 | prop_roundtrip_table :: Property
20 | prop_roundtrip_table =
21 |   gamble jBinaryVersion $ \version ->
22 |   gamble (jStriped 1) $ \table ->
23 |     trippingSerialE (bTable version) (getTable version 1 $ Striped.schema table) table
24 | 
25 | prop_roundtrip_column :: Property
26 | prop_roundtrip_column =
27 |   gamble jBinaryVersion $ \version ->
28 |   gamble (jStripedColumn 1) $ \column ->
29 |     trippingSerialE (bColumn version) (getColumn version 1 $ Striped.schemaColumn column) column
30 | 
31 | return []
32 | tests :: IO Bool
33 | tests =
34 |   $quickCheckAll
35 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Json/Logical.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Json.Logical where
 4 | 
 5 | import           Disorder.Jack (Property, forAllProperties, quickCheckWithResult, maxSuccess, stdArgs)
 6 | import           Disorder.Jack (gamble)
 7 | 
 8 | import           P
 9 | 
10 | import           System.IO (IO)
11 | 
12 | import           Test.Zebra.Jack
13 | 
14 | import           Zebra.Serial.Json.Logical
15 | 
16 | 
17 | data JsonError =
18 |     JsonEncode !JsonLogicalEncodeError
19 |   | JsonDecode !JsonLogicalDecodeError
20 |     deriving (Eq, Show)
21 | 
22 | prop_roundtrip_table :: Property
23 | prop_roundtrip_table =
24 |   gamble jTableSchema $ \schema ->
25 |   gamble (jSizedLogical schema) $
26 |     trippingBoth
27 |       (first JsonEncode . encodeLogical schema)
28 |       (first JsonDecode . decodeLogical schema)
29 | 
30 | prop_roundtrip_value :: Property
31 | prop_roundtrip_value =
32 |   gamble jColumnSchema $ \schema ->
33 |   gamble (jLogicalValue schema) $
34 |     trippingBoth
35 |       (first JsonEncode . encodeLogicalValue schema)
36 |       (first JsonDecode . decodeLogicalValue schema)
37 | 
38 | return []
39 | tests :: IO Bool
40 | tests =
41 |   $forAllProperties $ quickCheckWithResult (stdArgs {maxSuccess = 1000})
42 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Json/Schema.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Json.Schema where
 4 | 
 5 | import           Disorder.Jack (Property, quickCheckAll)
 6 | import           Disorder.Jack (gamble)
 7 | 
 8 | import           P
 9 | 
10 | import           System.IO (IO)
11 | 
12 | import           Test.Zebra.Jack
13 | 
14 | import           Zebra.Serial.Json.Schema
15 | 
16 | 
17 | prop_roundtrip_schema_v0 :: Property
18 | prop_roundtrip_schema_v0 =
19 |   gamble (tableSchemaV0 <$> jTableSchema) $
20 |     trippingBoth (pure . encodeSchema SchemaV0) (decodeSchema SchemaV0)
21 | 
22 | prop_roundtrip_schema_v1 :: Property
23 | prop_roundtrip_schema_v1 =
24 |   gamble jTableSchema $
25 |     trippingBoth (pure . encodeSchema SchemaV1) (decodeSchema SchemaV1)
26 | 
27 | return []
28 | tests :: IO Bool
29 | tests =
30 |   $quickCheckAll
31 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Json/Striped.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Json.Striped where
 4 | 
 5 | import           Disorder.Jack (Property, forAllProperties, quickCheckWithResult, maxSuccess, stdArgs)
 6 | import           Disorder.Jack (gamble)
 7 | 
 8 | import           P
 9 | 
10 | import           System.IO (IO)
11 | 
12 | import           Test.Zebra.Jack
13 | 
14 | import           Zebra.Serial.Json.Striped
15 | import qualified Zebra.Table.Striped as Striped
16 | 
17 | 
18 | data JsonError =
19 |     JsonEncode !JsonStripedEncodeError
20 |   | JsonDecode !JsonStripedDecodeError
21 |     deriving (Eq, Show)
22 | 
23 | prop_roundtrip_table :: Property
24 | prop_roundtrip_table =
25 |   gamble jTableSchema $ \schema ->
26 |   gamble (jSizedLogical schema) $ \logical ->
27 |     let
28 |       Right striped =
29 |         Striped.fromLogical schema logical
30 |     in
31 |       trippingBoth
32 |         (first JsonEncode . encodeStriped)
33 |         (first JsonDecode . decodeStriped schema)
34 |         striped
35 | 
36 | return []
37 | tests :: IO Bool
38 | tests =
39 |   $forAllProperties $ quickCheckWithResult (stdArgs {maxSuccess = 1000})
40 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Json/Util.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Json.Util where
 4 | 
 5 | import           Data.ByteString (ByteString)
 6 | import qualified Data.ByteString as ByteString
 7 | import qualified Data.Text as Text
 8 | import qualified Data.Text.Encoding as Text
 9 | 
10 | import           Disorder.Jack (Property, Jack, forAllProperties, quickCheckWithResult, maxSuccess, stdArgs)
11 | import           Disorder.Jack (gamble, tripping, once, arbitrary, sizedBounded)
12 | import           Disorder.Jack (listOf, choose, chooseChar, suchThat)
13 | 
14 | import           P
15 | 
16 | import           System.IO (IO)
17 | 
18 | import           Test.Zebra.Jack
19 | 
20 | import           Zebra.Serial.Json.Util
21 | 
22 | 
23 | jText :: Jack Text
24 | jText =
25 |   fmap Text.pack . listOf $
26 |     chooseChar (minBound, maxBound)
27 | 
28 | jBinary :: Jack ByteString
29 | jBinary =
30 |   fmap ByteString.pack . listOf $
31 |     choose (minBound, maxBound)
32 | 
33 | prop_roundtrip_unit :: Property
34 | prop_roundtrip_unit =
35 |   once $
36 |     tripping (\_ -> encodeJson [] ppUnit) (decodeJson pUnit) ()
37 | 
38 | prop_roundtrip_int :: Property
39 | prop_roundtrip_int =
40 |   gamble sizedBounded $
41 |     tripping (encodeJson [] . ppInt) (decodeJson pInt)
42 | 
43 | prop_roundtrip_date :: Property
44 | prop_roundtrip_date =
45 |   gamble jDate $
46 |     tripping (encodeJson [] . ppDate) (decodeJson pDate)
47 | 
48 | prop_roundtrip_time :: Property
49 | prop_roundtrip_time =
50 |   gamble jTime $
51 |     tripping (encodeJson [] . ppTime) (decodeJson pTime)
52 | 
53 | prop_roundtrip_double :: Property
54 | prop_roundtrip_double =
55 |   gamble arbitrary $
56 |     tripping (encodeJson [] . ppDouble) (decodeJson pDouble)
57 | 
58 | prop_roundtrip_text :: Property
59 | prop_roundtrip_text =
60 |   gamble jText $
61 |     tripping (encodeJson [] . ppText) (decodeJson pText)
62 | 
63 | prop_roundtrip_binary :: Property
64 | prop_roundtrip_binary =
65 |   gamble jBinary $
66 |     tripping (encodeJson [] . ppBinary) (decodeJson pBinary)
67 | 
68 | -- This can be considered documentation that for all valid UTF-8 byte
69 | -- sequences, translating them to UTF-16 (i.e. Data.Text / Aeson) and back
70 | -- again results in the original sequence of bytes.
71 | prop_roundtrip_utf8 :: Property
72 | prop_roundtrip_utf8 =
73 |   gamble (suchThat jBinary $ isRight . Text.decodeUtf8') $
74 |     tripping Text.decodeUtf8 (Just . Text.encodeUtf8)
75 | 
76 | return []
77 | tests :: IO Bool
78 | tests =
79 |   $forAllProperties $ quickCheckWithResult (stdArgs {maxSuccess = 1000})
80 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Text/Logical.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Text.Logical where
 4 | 
 5 | import           Disorder.Jack (Property, forAllProperties, quickCheckWithResult, maxSuccess, stdArgs)
 6 | import           Disorder.Jack (gamble, listOfN)
 7 | 
 8 | import           P
 9 | 
10 | import           System.IO (IO)
11 | 
12 | import           Test.Zebra.Jack
13 | 
14 | import qualified Viking.ByteStream as ByteStream
15 | 
16 | import           Zebra.Serial.Text.Logical
17 | 
18 | 
19 | data TextError =
20 |     TextEncode !TextLogicalEncodeError
21 |   | TextDecode !TextLogicalDecodeError
22 |     deriving (Eq, Show)
23 | 
24 | prop_roundtrip_table :: Property
25 | prop_roundtrip_table =
26 |   gamble jTableSchema $ \schema ->
27 |   gamble (jSizedLogical schema) $
28 |     trippingBoth
29 |       (first TextEncode . encodeLogicalBlock schema)
30 |       (first TextDecode . decodeLogicalBlock schema)
31 | 
32 | prop_roundtrip_file :: Property
33 | prop_roundtrip_file =
34 |   gamble jTableSchema $ \schema ->
35 |   gamble (listOfN 1 10 $ jSizedLogical1 schema) $ \logical ->
36 |     trippingBoth
37 |       (first TextEncode . withList (ByteStream.toChunks . encodeLogical schema))
38 |       (first TextDecode . withList (decodeLogical schema . ByteStream.fromChunks))
39 |       logical
40 | 
41 | return []
42 | tests :: IO Bool
43 | tests =
44 |   $forAllProperties $ quickCheckWithResult (stdArgs {maxSuccess = 1000})
45 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Text/Schema.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Text.Schema where
 4 | 
 5 | import           Disorder.Jack (Property, quickCheckAll)
 6 | import           Disorder.Jack (gamble)
 7 | 
 8 | import           P
 9 | 
10 | import           System.IO (IO)
11 | 
12 | import           Test.Zebra.Jack
13 | 
14 | import           Zebra.Serial.Text.Schema
15 | 
16 | 
17 | prop_roundtrip_schema :: Property
18 | prop_roundtrip_schema =
19 |   gamble jTableSchema $
20 |     trippingBoth (pure . encodeSchemaWith TextV0) (decodeSchema)
21 | 
22 | return []
23 | tests :: IO Bool
24 | tests =
25 |   $quickCheckAll
26 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Serial/Text/Striped.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Serial.Text.Striped where
 4 | 
 5 | import           Disorder.Jack (Property, forAllProperties, quickCheckWithResult, maxSuccess, stdArgs)
 6 | import           Disorder.Jack (gamble, listOfN)
 7 | 
 8 | import           P
 9 | 
10 | import           System.IO (IO)
11 | 
12 | import           Test.Zebra.Jack
13 | 
14 | import qualified Viking.ByteStream as ByteStream
15 | 
16 | import           Zebra.Serial.Text.Striped
17 | import qualified Zebra.Table.Striped as Striped
18 | 
19 | 
20 | data TextError =
21 |     TextEncode !TextStripedEncodeError
22 |   | TextDecode !TextStripedDecodeError
23 |     deriving (Eq, Show)
24 | 
25 | prop_roundtrip_table :: Property
26 | prop_roundtrip_table =
27 |   gamble jTableSchema $ \schema ->
28 |   gamble (jSizedLogical schema) $ \logical ->
29 |     let
30 |       Right striped =
31 |         Striped.fromLogical schema logical
32 |     in
33 |       trippingBoth
34 |         (first TextEncode . encodeStripedBlock)
35 |         (first TextDecode . decodeStripedBlock schema)
36 |         striped
37 | 
38 | prop_roundtrip_file :: Property
39 | prop_roundtrip_file =
40 |   gamble jTableSchema $ \schema ->
41 |   gamble (listOfN 1 10 $ jSizedLogical1 schema) $ \logical ->
42 |     let
43 |       takeStriped x =
44 |         let
45 |           Right striped =
46 |             Striped.fromLogical schema x
47 |         in
48 |           striped
49 |     in
50 |       trippingBoth
51 |         (first TextEncode . withList (ByteStream.toChunks . encodeStriped))
52 |         (first TextDecode . withList (decodeStriped schema . ByteStream.fromChunks))
53 |         (fmap takeStriped logical)
54 | 
55 | return []
56 | tests :: IO Bool
57 | tests =
58 |   $forAllProperties $ quickCheckWithResult (stdArgs {maxSuccess = 1000})
59 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Table/Logical.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Table.Logical where
 4 | 
 5 | import           Disorder.Jack (Property)
 6 | import           Disorder.Jack ((===), (==>), quickCheckAll, gamble)
 7 | 
 8 | import qualified Data.Vector as Boxed
 9 | 
10 | import           P
11 | 
12 | import           System.IO (IO)
13 | 
14 | import           Test.Zebra.Jack
15 | 
16 | import           Zebra.Table.Logical
17 | 
18 | 
19 | prop_reversed :: Property
20 | prop_reversed =
21 |   gamble jColumnSchema $ \schema ->
22 |   gamble (jLogicalValue schema) $ \x ->
23 |   gamble (jLogicalValue schema) $ \y ->
24 |     compare x y
25 |     ===
26 |     compare (Reversed y) (Reversed x)
27 | 
28 | prop_nested :: Property
29 | prop_nested =
30 |   gamble jColumnSchema $ \schema ->
31 |   gamble (jLogicalValue schema) $ \x ->
32 |   gamble (jLogicalValue schema) $ \y ->
33 |     compare x y
34 |     ===
35 |     compare (Nested (Array (Boxed.singleton x))) (Nested (Array (Boxed.singleton y)))
36 | 
37 | prop_ord_0 :: Property
38 | prop_ord_0 =
39 |   gamble jColumnSchema $ \schema ->
40 |   gamble (jLogicalValue schema) $ \x ->
41 |   gamble (jLogicalValue schema) $ \y ->
42 |   gamble (jLogicalValue schema) $ \z ->
43 |     x > y && y > z
44 |     ==>
45 |     x > z
46 | 
47 | prop_ord_1 :: Property
48 | prop_ord_1 =
49 |   gamble jColumnSchema $ \schema ->
50 |   gamble (jLogicalValue schema) $ \x ->
51 |   gamble (jLogicalValue schema) $ \y ->
52 |   gamble (jLogicalValue schema) $ \z ->
53 |     x < y && y < z
54 |     ==>
55 |     x < z
56 | 
57 | return []
58 | tests :: IO Bool
59 | tests =
60 |   $quickCheckAll
61 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Table/Schema.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NoImplicitPrelude #-}
 2 | {-# LANGUAGE TemplateHaskell #-}
 3 | module Test.Zebra.Table.Schema where
 4 | 
 5 | import           Disorder.Core.Run (ExpectedTestSpeed(..), disorderCheckEnvAll)
 6 | import           Disorder.Jack (Property, (===), gamble, shuffle)
 7 | 
 8 | import           P
 9 | 
10 | import           System.IO (IO)
11 | 
12 | import           Test.QuickCheck (cover)
13 | import           Test.Zebra.Jack
14 | 
15 | import qualified Zebra.Table.Schema as Schema
16 | 
17 | 
18 | prop_union_associative :: Property
19 | prop_union_associative =
20 |   gamble jTableSchema $ \table0 ->
21 |   gamble (jExpandedTableSchema table0) $ \table1 ->
22 |   gamble (jContractedTableSchema table0) $ \table2 ->
23 |   gamble (shuffle [table0, table1, table2]) $ \[x, y, z] ->
24 |     let
25 |       x_yz =
26 |         first (const ())
27 |           (Schema.union x =<< Schema.union y z)
28 | 
29 |       xy_z =
30 |         first (const ())
31 |           (Schema.union x y >>= \xy -> Schema.union xy z)
32 | 
33 |       compatible =
34 |         isRight x_yz && isRight xy_z
35 |     in
36 |       -- Check that >90% of test cases are unions on compatible schemas
37 |       cover compatible 90 "compatible schemas" $
38 |         x_yz === xy_z
39 | 
40 | return []
41 | tests :: IO Bool
42 | tests =
43 |   $disorderCheckEnvAll TestRunMore
44 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Time.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE NoImplicitPrelude #-}
  2 | {-# LANGUAGE TemplateHaskell #-}
  3 | module Test.Zebra.Time where
  4 | 
  5 | import qualified Data.ByteString.Char8 as Char8
  6 | import qualified Data.Time as Time
  7 | 
  8 | import           Disorder.Jack
  9 | 
 10 | import           P
 11 | 
 12 | import           System.IO (IO)
 13 | 
 14 | import           Test.Zebra.Jack
 15 | 
 16 | import           Zebra.Time
 17 | 
 18 | 
 19 | prop_roundtrip_date_render :: Property
 20 | prop_roundtrip_date_render =
 21 |   gamble jDate $
 22 |     tripping renderDate parseDate
 23 | 
 24 | prop_roundtrip_date_days :: Property
 25 | prop_roundtrip_date_days =
 26 |   gamble jDate $
 27 |     tripping toDays fromDays
 28 | 
 29 | prop_roundtrip_date_calendar :: Property
 30 | prop_roundtrip_date_calendar =
 31 |   gamble jDate $
 32 |     tripping toCalendarDate fromCalendarDate
 33 | 
 34 | prop_roundtrip_time_render :: Property
 35 | prop_roundtrip_time_render =
 36 |   gamble jTime $
 37 |     tripping renderTime parseTime
 38 | 
 39 | prop_roundtrip_time_seconds :: Property
 40 | prop_roundtrip_time_seconds =
 41 |   gamble jTime $ \time0 ->
 42 |   let
 43 |     Right time =
 44 |       fromSeconds (toSeconds time0)
 45 |   in
 46 |     tripping toSeconds fromSeconds time
 47 | 
 48 | prop_roundtrip_time_milliseconds :: Property
 49 | prop_roundtrip_time_milliseconds =
 50 |   gamble jTime $ \time0 ->
 51 |   let
 52 |     Right time =
 53 |       fromMilliseconds (toMilliseconds time0)
 54 |   in
 55 |     tripping toMilliseconds fromMilliseconds time
 56 | 
 57 | prop_roundtrip_time_microseconds :: Property
 58 | prop_roundtrip_time_microseconds =
 59 |   gamble jTime $
 60 |     tripping toMicroseconds fromMicroseconds
 61 | 
 62 | prop_roundtrip_time_calendar :: Property
 63 | prop_roundtrip_time_calendar =
 64 |   gamble jTime $
 65 |     tripping toCalendarTime fromCalendarTime
 66 | 
 67 | prop_roundtrip_time_of_day_microsecond :: Property
 68 | prop_roundtrip_time_of_day_microsecond =
 69 |   gamble jTimeOfDay $
 70 |     tripping fromTimeOfDay (Just . toTimeOfDay)
 71 | 
 72 | epoch :: Time.UTCTime
 73 | epoch =
 74 |   Time.UTCTime (Time.fromGregorian 1600 3 1) 0
 75 | 
 76 | prop_compare_date_parsing :: Property
 77 | prop_compare_date_parsing =
 78 |   gamble jDate $ \date ->
 79 |     let
 80 |       str =
 81 |         renderDate date
 82 | 
 83 |       theirs :: Maybe Time.Day
 84 |       theirs =
 85 |         Time.parseTimeM False Time.defaultTimeLocale "%Y-%m-%d" $
 86 |         Char8.unpack str
 87 | 
 88 |       theirs_days :: Maybe Days
 89 |       theirs_days =
 90 |         fmap (fromIntegral . Time.toModifiedJulianDay) theirs
 91 | 
 92 |       ours :: Maybe Days
 93 |       ours =
 94 |         fmap toModifiedJulianDay .
 95 |         rightToMaybe $
 96 |         parseDate str
 97 |    in
 98 |      counterexample ("render =  " <> Char8.unpack str) $
 99 |      counterexample ("theirs =  " <> show theirs) $
100 |      counterexample ("ours   =  " <> show ours) $
101 |        theirs_days === ours
102 | 
103 | prop_compare_time_parsing :: Property
104 | prop_compare_time_parsing =
105 |   gamble jTime $ \time ->
106 |     let
107 |       str =
108 |         renderTime time
109 | 
110 |       theirs :: Maybe Time.UTCTime
111 |       theirs =
112 |         Time.parseTimeM False Time.defaultTimeLocale "%Y-%m-%d %H:%M:%S%Q" $
113 |         Char8.unpack str
114 | 
115 |       theirs_us :: Maybe Microseconds
116 |       theirs_us =
117 |         fmap round .
118 |         fmap (* 1000000) $
119 |         fmap (`Time.diffUTCTime` epoch) theirs
120 | 
121 |       ours :: Maybe Microseconds
122 |       ours =
123 |         fmap toMicroseconds .
124 |         rightToMaybe $
125 |         parseTime str
126 |    in
127 |      counterexample ("render =  " <> Char8.unpack str) $
128 |      counterexample ("theirs =  " <> show theirs) $
129 |      counterexample ("ours   =  " <> show ours) $
130 |        theirs_us === ours
131 | 
132 | return []
133 | tests :: IO Bool
134 | tests =
135 |   $quickCheckAll
136 | 


--------------------------------------------------------------------------------
/zebra-core/test/Test/Zebra/Util.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE NoImplicitPrelude #-}
  2 | {-# LANGUAGE ScopedTypeVariables #-}
  3 | module Test.Zebra.Util (
  4 |     liftE
  5 |   , trippingIO
  6 |   , trippingByIO
  7 |   , trippingSerial
  8 |   , trippingSerialE
  9 |   , runGetEither
 10 |   , runGetEitherConsumeAll
 11 |   ) where
 12 | 
 13 | import           Control.Monad.Trans.Class (lift)
 14 | 
 15 | import           Data.Binary.Get (Get, ByteOffset)
 16 | import qualified Data.Binary.Get as Get
 17 | import           Data.ByteString.Builder (Builder)
 18 | import qualified Data.ByteString.Builder as Builder
 19 | import qualified Data.ByteString.Lazy as Lazy
 20 | import           Data.String (String)
 21 | import           Data.Void (Void)
 22 | 
 23 | import           Disorder.Jack (Property, property, counterexample)
 24 | import           Disorder.Jack.Property.Diff (renderDiffs)
 25 | 
 26 | import           Text.Show.Pretty (ppShow)
 27 | import qualified Text.Show.Pretty as Pretty
 28 | 
 29 | import           P
 30 | 
 31 | import           System.IO (IO)
 32 | 
 33 | import           Control.Monad.Trans.Either (EitherT, runEitherT)
 34 | 
 35 | 
 36 | data TrippingError x y =
 37 |     EncodeError x
 38 |   | DecodeError y
 39 |     deriving (Eq, Show)
 40 | 
 41 | liftE :: IO a -> EitherT Void IO a
 42 | liftE =
 43 |   lift
 44 | 
 45 | trippingIO ::
 46 |      Eq a
 47 |   => Eq x
 48 |   => Eq y
 49 |   => Show a
 50 |   => Show x
 51 |   => Show y
 52 |   => (a -> EitherT x IO b)
 53 |   -> (b -> EitherT y IO a)
 54 |   -> a
 55 |   -> IO Property
 56 | trippingIO =
 57 |   trippingByIO id
 58 | 
 59 | trippingByIO ::
 60 |      Eq c
 61 |   => Eq x
 62 |   => Eq y
 63 |   => Show c
 64 |   => Show x
 65 |   => Show y
 66 |   => (a -> c)
 67 |   -> (a -> EitherT x IO b)
 68 |   -> (b -> EitherT y IO a)
 69 |   -> a
 70 |   -> IO Property
 71 | trippingByIO select to from a = do
 72 |   roundtrip <-
 73 |     runEitherT $ do
 74 |       b <- firstT EncodeError $ to a
 75 |       firstT DecodeError $ from b
 76 |   pure $ diff (pure $ select a) (fmap select roundtrip)
 77 | 
 78 | 
 79 | trippingSerial :: forall a. (Eq a, Show a) => (a -> Builder) -> Get a -> a -> Property
 80 | trippingSerial build0 get a =
 81 |   let
 82 |     build :: a -> Either () Builder
 83 |     build =
 84 |       pure . build0
 85 |   in
 86 |     trippingSerialE build get a
 87 | 
 88 | trippingSerialE :: (Eq a, Show a, Show x) => (a -> Either x Builder) -> Get a -> a -> Property
 89 | trippingSerialE build get a =
 90 |   let
 91 |     roundtrip = do
 92 |       b <- bimap ppShow Builder.toLazyByteString $ build a
 93 |       first ppShow $ runGetEither get b
 94 |   in
 95 |     diff (pure a) roundtrip
 96 | 
 97 | diff :: (Eq x, Eq a, Show x, Show a) => Either x a -> Either x a -> Property
 98 | diff original roundtrip =
 99 |   let
100 |     comparison =
101 |       "=== Original ===" <>
102 |       "\n" <> ppShow original <>
103 |       "\n" <>
104 |       "\n=== Roundtrip ===" <>
105 |       "\n" <> ppShow roundtrip
106 | 
107 |     pdiff = do
108 |       o <- Pretty.reify original
109 |       r <- Pretty.reify roundtrip
110 |       pure $
111 |         "=== - Original / + Roundtrip ===" <>
112 |         "\n" <> renderDiffs o r
113 |   in
114 |     counterexample "" .
115 |     counterexample "Roundtrip failed." .
116 |     counterexample "" .
117 |     counterexample (fromMaybe comparison pdiff) $
118 |       property (roundtrip == original)
119 | 
120 | runGetEither :: Get a -> Lazy.ByteString -> Either (Lazy.ByteString, ByteOffset, String) a
121 | runGetEither g =
122 |   let
123 |     third (_, _, x) = x
124 |   in
125 |     second third . Get.runGetOrFail g
126 | 
127 | runGetEitherConsumeAll :: Get a -> Lazy.ByteString -> Either (Lazy.ByteString, ByteOffset, String) a
128 | runGetEitherConsumeAll g bs =
129 |   case Get.runGetOrFail g bs of
130 |     Left err -> Left err
131 |     Right (leftovers,off,v)
132 |      | Lazy.null leftovers
133 |      -> Right v
134 |      | otherwise
135 |      -> Left (leftovers, off, "Not all input consumed")
136 | 


--------------------------------------------------------------------------------
/zebra-core/test/ambiata-zebra-core-test.cabal:
--------------------------------------------------------------------------------
 1 | name:                  ambiata-zebra-core-test
 2 | version:               0.0.1
 3 | cabal-version:         >= 1.8
 4 | build-type:            Simple
 5 | 
 6 | library
 7 |   build-depends:
 8 |                       base
 9 |                     , ambiata-anemone
10 |                     , ambiata-disorder-core
11 |                     , ambiata-disorder-corpus
12 |                     , ambiata-disorder-jack
13 |                     , ambiata-p
14 |                     , ambiata-viking
15 |                     , ambiata-x-vector
16 |                     , ambiata-zebra-core
17 |                     , binary
18 |                     , bytestring
19 |                     , containers
20 |                     , exceptions
21 |                     , pretty-show
22 |                     , QuickCheck
23 |                     , quickcheck-instances
24 |                     , text
25 |                     , thyme
26 |                     , transformers
27 |                     , transformers-either
28 |                     , vector
29 | 
30 |   exposed-modules:
31 |                     Test.Zebra.Jack
32 | 


--------------------------------------------------------------------------------
/zebra-core/test/test.hs:
--------------------------------------------------------------------------------
 1 | import           Disorder.Core.Main
 2 | 
 3 | import qualified Test.Zebra.Factset.Block
 4 | import qualified Test.Zebra.Factset.Data
 5 | import qualified Test.Zebra.Foreign.Block
 6 | import qualified Test.Zebra.Foreign.Entity
 7 | import qualified Test.Zebra.Foreign.Merge
 8 | import qualified Test.Zebra.Foreign.Table
 9 | import qualified Test.Zebra.Merge.Entity
10 | import qualified Test.Zebra.Merge.Table
11 | import qualified Test.Zebra.Serial.Binary.Array
12 | import qualified Test.Zebra.Serial.Binary.Block
13 | import qualified Test.Zebra.Serial.Binary.File
14 | import qualified Test.Zebra.Serial.Binary.Header
15 | import qualified Test.Zebra.Serial.Binary.Logical
16 | import qualified Test.Zebra.Serial.Binary.Striped
17 | import qualified Test.Zebra.Serial.Binary.Table
18 | import qualified Test.Zebra.Serial.Json.Logical
19 | import qualified Test.Zebra.Serial.Json.Schema
20 | import qualified Test.Zebra.Serial.Json.Striped
21 | import qualified Test.Zebra.Serial.Json.Util
22 | import qualified Test.Zebra.Serial.Text.Logical
23 | import qualified Test.Zebra.Serial.Text.Schema
24 | import qualified Test.Zebra.Serial.Text.Striped
25 | import qualified Test.Zebra.Table.Logical
26 | import qualified Test.Zebra.Table.Striped
27 | import qualified Test.Zebra.Time
28 | 
29 | main :: IO ()
30 | main =
31 |   disorderMain [
32 |       Test.Zebra.Factset.Block.tests
33 |     , Test.Zebra.Factset.Data.tests
34 |     , Test.Zebra.Foreign.Block.tests
35 |     , Test.Zebra.Foreign.Entity.tests
36 |     , Test.Zebra.Foreign.Merge.tests
37 |     , Test.Zebra.Foreign.Table.tests
38 |     , Test.Zebra.Merge.Table.tests
39 |     , Test.Zebra.Merge.Entity.tests
40 |     , Test.Zebra.Serial.Binary.Array.tests
41 |     , Test.Zebra.Serial.Binary.Block.tests
42 |     , Test.Zebra.Serial.Binary.File.tests
43 |     , Test.Zebra.Serial.Binary.Header.tests
44 |     , Test.Zebra.Serial.Binary.Logical.tests
45 |     , Test.Zebra.Serial.Binary.Striped.tests
46 |     , Test.Zebra.Serial.Binary.Table.tests
47 |     , Test.Zebra.Serial.Json.Logical.tests
48 |     , Test.Zebra.Serial.Json.Schema.tests
49 |     , Test.Zebra.Serial.Json.Striped.tests
50 |     , Test.Zebra.Serial.Json.Util.tests
51 |     , Test.Zebra.Serial.Text.Logical.tests
52 |     , Test.Zebra.Serial.Text.Schema.tests
53 |     , Test.Zebra.Serial.Text.Striped.tests
54 |     , Test.Zebra.Table.Logical.tests
55 |     , Test.Zebra.Table.Striped.tests
56 |     , Test.Zebra.Time.tests
57 |     ]
58 | 


--------------------------------------------------------------------------------