├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── ci.yml
    │   └── golangci-lint.yml
├── .gitignore
├── .golangci.yml
├── CHANGELOG.md
├── LICENSE
├── Makefile
├── README.md
├── XXXexamples_test.go
├── arrowtests
    ├── arrow.go
    └── arrow_test.go
├── benchmarks
    ├── bow1-10-rows.parquet
    ├── bow1-100-rows.parquet
    ├── bow1-1000-rows.parquet
    ├── bow1-10000-rows.parquet
    ├── bow1-100000-rows.parquet
    ├── bow2-10-rows.parquet
    ├── bow2-100-rows.parquet
    ├── bow2-1000-rows.parquet
    ├── bow2-10000-rows.parquet
    ├── bow2-100000-rows.parquet
    └── generator_test.go
├── bow.go
├── bow_test.go
├── bowappend.go
├── bowappend_test.go
├── bowassertion.go
├── bowassertion_test.go
├── bowbuffer.go
├── bowbuffer_test.go
├── bowconvert.go
├── bowconvert_test.go
├── bowdiff.go
├── bowdiff_test.go
├── bowfill.go
├── bowfill_test.go
├── bowfind.go
├── bowfind_test.go
├── bowgenerator.go
├── bowgenerator_test.go
├── bowgetters.go
├── bowgetters_test.go
├── bowjoin.go
├── bowjoin_test.go
├── bowjson.go
├── bowjson_test.go
├── bowmetadata.go
├── bowmetadata_test.go
├── bowparquet.go
├── bowparquet_test.go
├── bowparquet_test_input.parquet
├── bowrecord.go
├── bowseries.go
├── bowseries_test.go
├── bowsetters.go
├── bowsetters_test.go
├── bowsort.go
├── bowsort_test.go
├── bowstring.go
├── bowtypes.go
├── bowtypes_test.go
├── bowvalues.go
├── go.mod
├── go.sum
├── rolling
    ├── aggregation.go
    ├── aggregation
    │   ├── XXXbenchmarks_test.go
    │   ├── arithmeticmean.go
    │   ├── arithmeticmean_test.go
    │   ├── core_test.go
    │   ├── count.go
    │   ├── count_test.go
    │   ├── firstlast.go
    │   ├── firstlast_test.go
    │   ├── integral.go
    │   ├── integral_test.go
    │   ├── minmax.go
    │   ├── minmax_test.go
    │   ├── mode.go
    │   ├── mode_test.go
    │   ├── sum.go
    │   ├── sum_test.go
    │   ├── weightedmean.go
    │   ├── weightedmean_test.go
    │   ├── whole.go
    │   ├── whole_test.go
    │   └── windowstart.go
    ├── aggregation_test.go
    ├── interpolation.go
    ├── interpolation
    │   ├── linear.go
    │   ├── linear_test.go
    │   ├── none.go
    │   ├── none_test.go
    │   ├── stepprevious.go
    │   ├── stepprevious_test.go
    │   ├── windowstart.go
    │   └── windowstart_test.go
    ├── interpolation_test.go
    ├── rolling.go
    ├── rolling_test.go
    ├── transformation
    │   ├── factor.go
    │   └── factor_test.go
    └── window.go
└── scripts
    ├── benchmark.sh
    ├── benchstat.sh
    └── test.sh


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "github-actions" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "weekly"
12 |   
13 |   - package-ecosystem: "gomod" # See documentation for possible values
14 |     directory: "/" # Location of package manifests
15 |     schedule:
16 |       interval: "weekly"
17 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   ci:
 7 |     runs-on: ubuntu-latest
 8 |     env:
 9 |       BENCH_RESULTS_DIR_PATH: benchmarks
10 |       BENCH_COMPARISON_FILE_PATH: ${{ format('benchmarks/comparison-{0}-vs-{1}.txt', github.base_ref, github.sha) }}
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |       - uses: actions/setup-go@v4
14 |         with:
15 |           go-version-file: 'go.mod'
16 |           cache: true
17 | 
18 |       - name: Run tests
19 |         run: |
20 |           go install github.com/jstemmer/go-junit-report@latest
21 |           bash -c ./scripts/test.sh
22 | 
23 |       - name: ${{ format('Run benchmarks on sha {0}', github.sha) }}
24 |         run: |
25 |           go install golang.org/x/perf/cmd/benchstat@latest
26 |           bash ./scripts/benchmark.sh
27 |         env:
28 |           BENCH_RESULTS_FILE_PATH: ${{ format('benchmarks/{0}.txt', github.sha) }}
29 | 
30 |       - name: ${{ format('Uploading artifact of sha {0} benchmark results', github.sha) }}
31 |         uses: actions/upload-artifact@v3
32 |         with:
33 |           name: ${{ format('{0}-sha-benchmark-results', github.sha) }}
34 |           path: ${{ format('benchmarks/{0}.txt', github.sha) }}
35 | 
36 |       - uses: actions/checkout@v3
37 |         if: ${{ github.event_name == 'pull_request' }}
38 |         with:
39 |           ref: ${{ github.base_ref }}
40 | 
41 |       - uses: actions/download-artifact@v3
42 |         if: ${{ github.event_name == 'pull_request' }}
43 |         with:
44 |           name: ${{ format('{0}-sha-benchmark-results', github.sha) }}
45 |           path: benchmarks
46 | 
47 |       - name: ${{ format('Run benchmarks on base ref {0}', github.base_ref) }}
48 |         if: ${{ github.event_name == 'pull_request' }}
49 |         run: |
50 |           bash ./scripts/benchmark.sh
51 |         env:
52 |           BENCH_RESULTS_FILE_PATH: ${{ format('benchmarks/{0}.txt', github.base_ref) }}
53 | 
54 |       - name: ${{ format('Uploading artifact of base ref {0} benchmark results', github.base_ref) }}
55 |         if: ${{ github.event_name == 'pull_request' }}
56 |         uses: actions/upload-artifact@v3
57 |         with:
58 |           name: ${{ format('{0}-base-ref-benchmark-results', github.base_ref) }}
59 |           path: ${{ format('benchmarks/{0}.txt', github.base_ref) }}
60 | 
61 |       - name: ${{ format('Compare benchmarks of base ref {0} with sha {1}', github.base_ref, github.sha) }}
62 |         if: ${{ github.event_name == 'pull_request' }}
63 |         run: |
64 |           bash ./scripts/benchstat.sh "${{ format('benchmarks/{0}.txt', github.base_ref) }}" "${{ format('benchmarks/{0}.txt', github.sha) }}" 
65 | 
66 |       - name: Upload artifact of benchmark comparison results
67 |         if: ${{ github.event_name == 'pull_request' }}
68 |         uses: actions/upload-artifact@v3
69 |         with:
70 |           name: benchmark-comparison-results
71 |           path: ${{ format('benchmarks/comparison-{0}-vs-{1}.txt', github.base_ref, github.sha) }}
72 | 


--------------------------------------------------------------------------------
/.github/workflows/golangci-lint.yml:
--------------------------------------------------------------------------------
 1 | name: golangci-lint
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   lint:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v3
10 |       - uses: actions/setup-go@v4
11 |         with:
12 |           go-version-file: 'go.mod'
13 |           cache: true
14 |       - name: golangci-lint
15 |         uses: golangci/golangci-lint-action@v3
16 |         with:
17 |           version: latest
18 |           args: --verbose
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by .ignore support plugin (hsz.mobi)
 2 | ### Go template
 3 | # Binaries for programs and plugins
 4 | *.exe
 5 | *.dll
 6 | *.so
 7 | *.dylib
 8 | 
 9 | # Test binary, build with `go test -c`
10 | *.test
11 | mocks
12 | 
13 | # Output of the go coverage tool, specifically when used with LiteIDE
14 | *.out
15 | 
16 | # Contains docker temp files
17 | .tmp
18 | .ssh
19 | 
20 | # ide
21 | .idea/*
22 | .vscode
23 | 
24 | # Apple Desktop Services Store
25 | .DS_Store
26 | 
27 | # Ansible
28 | *.retry
29 | 
30 | # GENERAL PURPOSE
31 | vendor


--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
1 | linters:
2 |   enable:
3 |     - gofmt
4 |     - gci
5 |     - goimports
6 | 
7 | run:
8 |   timeout: 5m
9 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | UNRELEASED [XXXX-XX-XX]
  2 | -------------------
  3 | 
  4 | v1.0.0 [2023-04-07]
  5 | -------------------
  6 | 
  7 | - General
  8 |   - bump to go 1.18
  9 |   - bump to arrow/go/v8
 10 |   - remove useless count script
 11 |   - add gci linter
 12 |   - improve documentation
 13 |   - improve error handling
 14 |   - improve code readability
 15 |   - remove code gen to prepare for Timestamp support
 16 |   - add Bow data type in Buffer to decouple Go native types from Arrow types
 17 |   - expose arrow record
 18 | 
 19 | v0.18.0 [2022-02-16]
 20 | -------------------
 21 | 
 22 | - General
 23 |   - bump to arrow/go/v7
 24 |   - improve CI to run benchmarks comparison for PR with the same runner
 25 | - Parquet
 26 |   - add new GetParquetMetaColTimeUnit method to extract column time unit from the metadata of a bow read from a parquet file 
 27 |   - remove deprecated ConvertedType from the metadata
 28 | 
 29 | 
 30 | v0.17.0 [2021-10-27]
 31 | -------------------
 32 | 
 33 | - new Manipulation features
 34 | - bump Go to version 1.17
 35 | - bow interface: switch from colNames to colIndices arguments
 36 | - support special characters in Parquet read/write
 37 | 
 38 | 
 39 | v0.16.0 [2021-08-25]
 40 | -------------------
 41 | 
 42 | - general code refactoring
 43 | - improved performance, mostly with better memory usage and buffers rework
 44 | - introduced code generation with Metronlab/genius framework
 45 | - changed Find method and add Contains and FindNext
 46 | - improved bow generator by simplification and made it extensible by user for value creation strategy
 47 | 
 48 | 
 49 | v0.15.0 [2021-08-04]
 50 | -------------------
 51 | 
 52 | - Benchmarks and Profiling:
 53 |   - Moved benchmarks closer to functions
 54 |   - Simplified and faster benchmarks
 55 |   - Added AppendBows and NewBufferFromInterfaces benchmarks
 56 |   - Added Makefile rules for tests and benchmarks profiling
 57 | 
 58 | 
 59 | v0.14.0 [2021-07-20]
 60 | -------------------
 61 | 
 62 | - Adding `SetMetadata` method to `Bow`'s interface
 63 | 
 64 | 
 65 | v0.13.0 [2021-06-17]
 66 | -------------------
 67 | 
 68 | - Adding `AddCols` method to `Bow`'s interface
 69 | 
 70 | 
 71 | v0.12.1 [2021-06-16]
 72 | -------------------
 73 | 
 74 | - Apache Parquet: new tests and UX improvements
 75 | 
 76 | 
 77 | v0.12.0 [2021-06-10]
 78 | -------------------
 79 | 
 80 | - Apache Parquet file read/write support
 81 | - Add Schema Metadata support
 82 | - Add golangci-lint usage
 83 | 
 84 | 
 85 | v0.11.0 [2021-05-17]
 86 | -------------------
 87 | 
 88 | - Add new bow.Diff function
 89 | - Depreciate Difference aggregation
 90 | 
 91 | 
 92 | v0.10.0 [2021-05-11]
 93 | -------------------
 94 | 
 95 | - Rolling:
 96 |     - improved code readability
 97 |     - aggregation/fill: it is now possible to pass a previous row option to the rolling to enable the correct interpolation of the first row of its first window, in the case of missing window start row
 98 | 
 99 | 
100 | v0.9.0 [2021-03-24]
101 | -------------------
102 | 
103 | - General:
104 |     - Fix typos
105 |     - Improve robustness and code clarity of functions IsColEmpty, IsColSorted and FillLinear with better error management
106 |     - Remove unused variables
107 |     - Remove bow.marshalJSONRowBased
108 |     
109 | - Bow Generator:
110 |     - Improve randomness of values
111 |     - Added support for String and Bool data types
112 |     - New ColNames and DataTypes options for more flexibility
113 |     - Improve user experience with better error management
114 | 
115 | - Benchmarks improvements:
116 |     - Added new test cases
117 |     - Added usage of benchstat on the CircleCI pipeline to compare benchmark results with master branch
118 | 
119 | - New Functions:
120 |     - NewValuesFromJSON
121 | 
122 | - Bug fix:
123 |   - Rolling inclusive window with duplicated indexes now correctly iterate keeping windowing integrity
124 | 
125 | 
126 | v0.8.0 [2021-02-12]
127 | -------------------
128 | 
129 | - New functions:
130 |     - IsEmpty
131 |     - FindFirst
132 |     - IsSupported
133 |     - GetReturnType
134 | - Adding strong typing support
135 | - Refactoring Bow's logic to return a valid schema instead of nil when no data is found
136 | - Fixing tests
137 | 
138 | 
139 | v0.7.3 [2021-01-12]
140 | -------------------
141 | 
142 | - New functions:
143 |     - NewBowEmpty
144 |     - NewBowFromColNames
145 |     - EncodeBowToJSONBody
146 |     - DecodeJSONRespToBow
147 | - New aggregation tests
148 | - Minor code refactoring
149 | 
150 | 
151 | v0.7.2 [2020-09-14]
152 | -------------------
153 | 
154 | ### Bugfixes
155 | - OuterJoin: support of bow without rows returning correct schema
156 | 
157 | 
158 | v0.7.1 [2020-08-03]
159 | -------------------
160 | 
161 | ### Features
162 | - Add SortByCol method to sort a bow by a column name
163 | 
164 | 
165 | v0.6.2 [2020-06-02]
166 | -------------------
167 | 
168 | #### Bugfixes
169 | - InnerJoin
170 | 
171 | 
172 | v0.6.1 [2020-04-22]
173 | -------------------
174 | 
175 | #### Bugfixes
176 | - bump arrow to apache-arrow-0.17.0
177 | 
178 | #### Known issues
179 | arrow now allow several column with same name introducing new panics in bow if the case happen.
180 | [corresponding issue](https://github.com/Metronlab/bow/issues/12)
181 | 
182 | 
183 | v0.6.0 [2020-04-22]
184 | -------------------
185 | 
186 | #### Features
187 | - Add Fill functions for missing data interpolation
188 | - Add OuterJoin method
189 | - Refactor InnerJoin method
190 | - Add new CI with CircleCI
191 | - Refactor the sub package bow to have the main functionalities available in the root module
192 | 
193 | #### How to migrate to v0.6.0
194 | It is necessary to replace the library import path from github.com/Metronlab/bow/bow to github.com/Metronlab/bow
195 | 
196 | 
197 | 0.2.0 [2019-02-19]
198 | -------------------
199 | 
200 | #### Features
201 | 
202 | - Depreciate method to print in favor to a stringer interface
203 | - Innerjoin based on column name for now, we'll have to let more liberty over the join later on
204 | - Map based indexes for join optimisation (divide time per 5 on simple short sample)
205 | 
206 | #### Bugfixes
207 | 
208 | - Fix empty series that make the code segfault in arrow, can now have empty dataframe with schema/record set.
209 | 
210 | 
211 | 0.1.0 [2019-02-01]
212 | -------------------
213 | 
214 | #### Features
215 | 
216 | - Row based json encoding and decoding
217 | - New Bow fron row and column based [][]interfaces{}
218 | - Method to print
219 | 
220 | 
221 | 0.0.0 [2019-01-11]
222 | -------------------
223 | 
224 | #### Features
225 | 
226 | - Initial Release
227 | - Simple dataframe with type and series based on apache arrow
228 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | #user overridable variables
 2 | all: lint test
 3 | 
 4 | install:
 5 | 	@go install golang.org/x/perf/cmd/benchstat@latest
 6 | 	@go install github.com/jstemmer/go-junit-report@latest
 7 | 	@go install github.com/Metronlab/genius@latest
 8 | 	curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin latest
 9 | 
10 | lint:
11 | 	golangci-lint run --fix -v $(PKG)
12 | 
13 | test:
14 | 	@RUN=$(RUN) PKG=$(PKG) TIMEOUT=$(TIMEOUT) bash -c $(PWD)/scripts/test.sh
15 | 
16 | bench:
17 | 	@RUN=$(RUN) PKG=$(PKG) TIMEOUT=$(TIMEOUT) bash -c $(PWD)/scripts/benchmark.sh
18 | 
19 | CPUPROFILE=/tmp/$(shell basename $(PWD))$(shell echo $(PKG) | sed 's/[^[:alnum:]\t]//g').cpu.prof
20 | MEMPROFILE=/tmp/$(shell basename $(PWD))$(shell echo $(PKG) | sed 's/[^[:alnum:]\t]//g').mem.prof
21 | 
22 | test-profile:
23 | 	go test $(PKG) -v -run $(RUN) -cpuprofile $(CPUPROFILE) -memprofile $(MEMPROFILE)
24 | 	-lsof -ti tcp:8888 | xargs kill -9 2> /dev/null
25 | 	-lsof -ti tcp:8989 | xargs kill -9 2> /dev/null
26 | 	go tool pprof -http=:8888 $(CPUPROFILE) &
27 | 	go tool pprof -http=:8989 $(MEMPROFILE) &
28 | 
29 | bench-profile:
30 | 	go test $(PKG) -run XXX -bench $(RUN) -cpuprofile $(CPUPROFILE) -memprofile $(MEMPROFILE)
31 | 	-lsof -ti tcp:9090 | xargs kill -9 2> /dev/null
32 | 	-lsof -ti tcp:9191 | xargs kill -9 2> /dev/null
33 | 	go tool pprof -http=:9090 $(CPUPROFILE) &
34 | 	go tool pprof -http=:9191 $(MEMPROFILE) &
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Bow
 2 | 
 3 | ![lint](https://github.com/Metronlab/bow/actions/workflows/golangci-lint.yml/badge.svg)
 4 | ![ci](https://github.com/Metronlab/bow/actions/workflows/ci.yml/badge.svg)
 5 | 
 6 | Bow is meant to be an efficient data manipulation framework based on [Apache Arrow](https://arrow.apache.org/) for the Go programming language.
 7 | Inspired by [Pandas](https://pandas.pydata.org/), Bow aims to bring the last missing block required to make Go a data science ready language.
 8 | 
 9 | The `Bow` interface is stable and frozen, you can using it at will, all further changes will be planned for a v2.
10 | 
11 | This project have been used for years in production at [Metron](https://www.metron.energy/), 
12 | however it's still an incomplete pet project compared to [Pandas](https://pandas.pydata.org/).
13 | Bow is currently developed internally at Metronlab with primary concerns about timeseries.
14 | Recently [empowill](https://www.empowill.com/) decided to contribute to confront this library to a more general purpose usage. 
15 | 
16 | We are looking for a foundation / group of people that could help send this library to the next level! 
17 | 
18 | ## CONTRIBUTE
19 | Don't hesitate to send issues and contribute to the library design.
20 | 
21 | This library is in pure Go, to contribute you just need a recent Go version installed and you can directly use `make` to validate your contribution.
22 | 
23 | - Create an issue
24 | - Create a branch from main
25 | - Implement and comply with the Github Actions CI
26 | - Submit a PR
27 | 
28 | 


--------------------------------------------------------------------------------
/XXXexamples_test.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 	"fmt"
  7 | )
  8 | 
  9 | func ExampleNewBow() {
 10 | 	b, err := NewBow(
 11 | 		NewSeries("col1", Int64, []int64{1, 2, 3, 4}, nil),
 12 | 		NewSeries("col2", Float64, []float64{1.1, 2.2, 3.3, 4}, []bool{true, false, true, true}),
 13 | 		NewSeries("col3", Boolean, []bool{true, false, true, false}, []bool{true, false, true, true}),
 14 | 	)
 15 | 	if err != nil {
 16 | 		panic(err)
 17 | 	}
 18 | 
 19 | 	fmt.Println(b)
 20 | 	// Output:
 21 | 	// col1:int64  col2:float64  col3:bool
 22 | 	// 1           1.1           true
 23 | 	// 2           <nil>         <nil>
 24 | 	// 3           3.3           true
 25 | 	// 4           4             false
 26 | }
 27 | 
 28 | func ExampleNewBowFromColBasedInterfaces() {
 29 | 	colNames := []string{"time", "value", "valueFromJSON"}
 30 | 	colTypes := make([]Type, len(colNames))
 31 | 	colTypes[0] = Int64
 32 | 	colBasedData := [][]interface{}{
 33 | 		{1, 1.2, json.Number("3")},
 34 | 		{1, json.Number("1.2"), 3},
 35 | 		{json.Number("1.1"), 2, 1.3},
 36 | 	}
 37 | 
 38 | 	b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colBasedData)
 39 | 	if err != nil {
 40 | 		panic(err)
 41 | 	}
 42 | 
 43 | 	fmt.Println(b)
 44 | 	// Output:
 45 | 	// time:int64  value:int64  valueFromJSON:float64
 46 | 	// 1           1            1.1
 47 | 	// 1           <nil>        2
 48 | 	// 3           3            1.3
 49 | }
 50 | 
 51 | func ExampleNewBowFromRowBasedInterfaces() {
 52 | 	colNames := []string{"time", "value", "valueFromJSON"}
 53 | 	colTypes := []Type{Int64, Int64, Float64}
 54 | 	rowBasedData := [][]interface{}{
 55 | 		{1, 1, json.Number("1.1")},
 56 | 		{1.2, json.Number("1.2"), 2},
 57 | 		{json.Number("3"), 3, 1.3},
 58 | 	}
 59 | 
 60 | 	b, err := NewBowFromRowBasedInterfaces(colNames, colTypes, rowBasedData)
 61 | 	if err != nil {
 62 | 		panic(err)
 63 | 	}
 64 | 
 65 | 	fmt.Println(b)
 66 | 	// Output:
 67 | 	// time:int64  value:int64  valueFromJSON:float64
 68 | 	// 1           1            1.1
 69 | 	// 1           <nil>        2
 70 | 	// 3           3            1.3
 71 | }
 72 | 
 73 | func ExampleBow_MarshalJSON() {
 74 | 	colNames := []string{"time", "value", "valueFromJSON"}
 75 | 	colTypes := make([]Type, len(colNames))
 76 | 	colTypes[0] = Int64
 77 | 	colBasedData := [][]interface{}{
 78 | 		{1, 1.2, json.Number("3")},
 79 | 		{1, json.Number("1.2"), 3},
 80 | 		{json.Number("1.1"), 2, 1.3},
 81 | 	}
 82 | 
 83 | 	b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colBasedData)
 84 | 	if err != nil {
 85 | 		panic(err)
 86 | 	}
 87 | 
 88 | 	js, err := b.MarshalJSON()
 89 | 	if err != nil {
 90 | 		panic(err)
 91 | 	}
 92 | 
 93 | 	// pretty print json
 94 | 	var out bytes.Buffer
 95 | 	if err = json.Indent(&out, js, "", "\t"); err != nil {
 96 | 		panic(err)
 97 | 	}
 98 | 
 99 | 	fmt.Println(out.String())
100 | 	// Output:
101 | 	// {
102 | 	// 	"schema": {
103 | 	// 		"fields": [
104 | 	// 			{
105 | 	// 				"name": "time",
106 | 	// 				"type": "int64"
107 | 	// 			},
108 | 	// 			{
109 | 	// 				"name": "value",
110 | 	// 				"type": "int64"
111 | 	// 			},
112 | 	// 			{
113 | 	// 				"name": "valueFromJSON",
114 | 	// 				"type": "float64"
115 | 	// 			}
116 | 	// 		]
117 | 	// 	},
118 | 	// 	"data": [
119 | 	// 		{
120 | 	// 			"time": 1,
121 | 	// 			"value": 1,
122 | 	// 			"valueFromJSON": 1.1
123 | 	// 		},
124 | 	// 		{
125 | 	// 			"time": 1,
126 | 	// 			"valueFromJSON": 2
127 | 	// 		},
128 | 	// 		{
129 | 	// 			"time": 3,
130 | 	// 			"value": 3,
131 | 	// 			"valueFromJSON": 1.3
132 | 	// 		}
133 | 	// 	]
134 | 	// }
135 | }
136 | 


--------------------------------------------------------------------------------
/arrowtests/arrow.go:
--------------------------------------------------------------------------------
 1 | package arrowtests
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/apache/arrow/go/v8/arrow"
 7 | 	"github.com/apache/arrow/go/v8/arrow/array"
 8 | 	"github.com/apache/arrow/go/v8/arrow/memory"
 9 | )
10 | 
11 | var (
12 | 	EventSchema = arrow.NewSchema(
13 | 		[]arrow.Field{
14 | 			{Name: "time", Type: arrow.FixedWidthTypes.Time32ms},
15 | 			{Name: "value", Type: arrow.PrimitiveTypes.Float64},
16 | 			{Name: "quality", Type: arrow.PrimitiveTypes.Int64},
17 | 		}, nil,
18 | 	)
19 | )
20 | 
21 | type Event struct {
22 | 	Time    arrow.Time32
23 | 	Value   interface{}
24 | 	quality int64
25 | }
26 | 
27 | // NewTSRecord Create a new sample base on eventSchema
28 | func NewTSRecord() (*arrow.Schema, arrow.Record) {
29 | 	pool := memory.NewGoAllocator()
30 | 	b := array.NewRecordBuilder(pool, EventSchema)
31 | 	defer b.Release()
32 | 
33 | 	b.Field(0).(*array.Time32Builder).AppendValues([]arrow.Time32{1, 2, 3, 4}, nil)
34 | 	b.Field(1).(*array.Float64Builder).AppendValues([]float64{7, 8, 9, 10}, []bool{true, true, false, true})
35 | 	b.Field(2).(*array.Int64Builder).AppendValues([]int64{42, 42, 41, 42}, nil)
36 | 
37 | 	return EventSchema, b.NewRecord()
38 | }
39 | 
40 | // PrintRecordColumns Print a columns based output
41 | func PrintRecordColumns(rec arrow.Record) {
42 | 	for i, col := range rec.Columns() {
43 | 		fmt.Printf("column[%d] %q: %v\n", i, rec.ColumnName(i), col)
44 | 	}
45 | }
46 | 
47 | // PrintRecordRows Print a row based output
48 | func PrintRecordRows(schema *arrow.Schema, recs []arrow.Record) {
49 | 	// Make a table read only based on many records
50 | 	table := array.NewTableFromRecords(schema, recs)
51 | 	defer table.Release()
52 | 
53 | 	// makes a events series
54 | 	events := make([]Event, table.NumRows())
55 | 
56 | 	// Seek schema index for event
57 | 	timeIndex := table.Schema().FieldIndices("time")[0]
58 | 	valueIndex := table.Schema().FieldIndices("value")[0]
59 | 	qualityIndex := table.Schema().FieldIndices("quality")[0]
60 | 
61 | 	// TableReader is able to iter on a table grouping by indexes,
62 | 	// marvelous to do calculation in parallel
63 | 	// Underutilized in this case, for a naive implementation iteration is done 1 by 1
64 | 	tr := array.NewTableReader(table, 1)
65 | 	defer tr.Release()
66 | 
67 | 	// fill series with TableReader iteration
68 | 	index := 0
69 | 	for tr.Next() {
70 | 		rec := tr.Record()
71 | 
72 | 		td := array.NewTime32Data(rec.Column(timeIndex).Data())
73 | 		if td.IsValid(0) {
74 | 			events[index].Time = td.Time32Values()[0]
75 | 		}
76 | 		vd := array.NewFloat64Data(rec.Column(valueIndex).Data())
77 | 		if vd.IsValid(0) {
78 | 			events[index].Value = vd.Float64Values()[0]
79 | 		}
80 | 		qd := array.NewInt64Data(rec.Column(qualityIndex).Data())
81 | 		if qd.IsValid(0) {
82 | 			events[index].quality = qd.Int64Values()[0]
83 | 		}
84 | 
85 | 		index++
86 | 	}
87 | 
88 | 	// Prints series
89 | 	for _, e := range events {
90 | 		fmt.Println("time:", e.Time, ", value:", e.Value, ", quality:", e.quality)
91 | 	}
92 | }
93 | 


--------------------------------------------------------------------------------
/arrowtests/arrow_test.go:
--------------------------------------------------------------------------------
 1 | package arrowtests
 2 | 
 3 | import (
 4 | 	"github.com/apache/arrow/go/v8/arrow"
 5 | )
 6 | 
 7 | func ExamplePrintRecordColumns() {
 8 | 	_, rec := NewTSRecord()
 9 | 	defer rec.Release()
10 | 
11 | 	PrintRecordColumns(rec)
12 | 
13 | 	// Output:
14 | 	//column[0] "time": [1 2 3 4]
15 | 	//column[1] "value": [7 8 (null) 10]
16 | 	//column[2] "quality": [42 42 41 42]
17 | }
18 | 
19 | func ExamplePrintRecordRows() {
20 | 	s, rec := NewTSRecord()
21 | 	defer rec.Release()
22 | 
23 | 	PrintRecordRows(s, []arrow.Record{rec})
24 | 
25 | 	// Output:
26 | 	//time: 1 , value: 7 , quality: 42
27 | 	//time: 2 , value: 8 , quality: 42
28 | 	//time: 3 , value: <nil> , quality: 41
29 | 	//time: 4 , value: 10 , quality: 42
30 | }
31 | 


--------------------------------------------------------------------------------
/benchmarks/bow1-10-rows.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow1-10-rows.parquet


--------------------------------------------------------------------------------
/benchmarks/bow1-100-rows.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow1-100-rows.parquet


--------------------------------------------------------------------------------
/benchmarks/bow1-1000-rows.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow1-1000-rows.parquet


--------------------------------------------------------------------------------
/benchmarks/bow1-10000-rows.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow1-10000-rows.parquet


--------------------------------------------------------------------------------
/benchmarks/bow1-100000-rows.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow1-100000-rows.parquet


--------------------------------------------------------------------------------
/benchmarks/bow2-10-rows.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow2-10-rows.parquet


--------------------------------------------------------------------------------
/benchmarks/bow2-100-rows.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow2-100-rows.parquet


--------------------------------------------------------------------------------
/benchmarks/bow2-1000-rows.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow2-1000-rows.parquet


--------------------------------------------------------------------------------
/benchmarks/bow2-10000-rows.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow2-10000-rows.parquet


--------------------------------------------------------------------------------
/benchmarks/bow2-100000-rows.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow2-100000-rows.parquet


--------------------------------------------------------------------------------
/benchmarks/generator_test.go:
--------------------------------------------------------------------------------
 1 | package benchmarks
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/metronlab/bow"
 8 | 	"github.com/stretchr/testify/assert"
 9 | 	"github.com/stretchr/testify/require"
10 | )
11 | 
12 | func TestGeneratorForBenchmarks(t *testing.T) {
13 | 	t.Skip("comment this skip to generate new bows for benchmarks")
14 | 
15 | 	for rows := 10; rows <= 100000; rows *= 10 {
16 | 		b1, err := bow.NewGenBow(rows,
17 | 			bow.GenSeriesOptions{
18 | 				Name:        "Int64_ref",
19 | 				GenStrategy: bow.GenStrategyRandomIncremental,
20 | 			},
21 | 			bow.GenSeriesOptions{
22 | 				Name:        "Int64_no_nils_bow1",
23 | 				GenStrategy: bow.GenStrategyRandom,
24 | 			},
25 | 			bow.GenSeriesOptions{
26 | 				Name:        "Int64_bow1",
27 | 				GenStrategy: bow.GenStrategyRandom,
28 | 				MissingData: true,
29 | 			},
30 | 			bow.GenSeriesOptions{
31 | 				Name:        "Float64_bow1",
32 | 				GenStrategy: bow.GenStrategyRandom,
33 | 				MissingData: true,
34 | 				Type:        bow.Float64,
35 | 			},
36 | 			bow.GenSeriesOptions{
37 | 				Name:        "Boolean_bow1",
38 | 				GenStrategy: bow.GenStrategyRandom,
39 | 				MissingData: true,
40 | 				Type:        bow.Boolean,
41 | 			},
42 | 			bow.GenSeriesOptions{
43 | 				Name:        "String_bow1",
44 | 				GenStrategy: bow.GenStrategyRandom,
45 | 				MissingData: true,
46 | 				Type:        bow.String,
47 | 			},
48 | 		)
49 | 		require.NoError(t, err)
50 | 
51 | 		b2, err := bow.NewGenBow(rows,
52 | 			bow.GenSeriesOptions{
53 | 				Name:        "Int64_ref",
54 | 				GenStrategy: bow.GenStrategyRandomIncremental,
55 | 			},
56 | 			bow.GenSeriesOptions{
57 | 				Name:        "Int64_bow2",
58 | 				GenStrategy: bow.GenStrategyRandom,
59 | 				MissingData: true,
60 | 			},
61 | 			bow.GenSeriesOptions{
62 | 				Name:        "Float64_bow2",
63 | 				GenStrategy: bow.GenStrategyRandom,
64 | 				MissingData: true,
65 | 				Type:        bow.Float64,
66 | 			},
67 | 			bow.GenSeriesOptions{
68 | 				Name:        "Boolean_bow2",
69 | 				GenStrategy: bow.GenStrategyRandom,
70 | 				MissingData: true,
71 | 				Type:        bow.Boolean,
72 | 			},
73 | 			bow.GenSeriesOptions{
74 | 				Name:        "String_bow2",
75 | 				GenStrategy: bow.GenStrategyRandom,
76 | 				MissingData: true,
77 | 				Type:        bow.String,
78 | 			},
79 | 		)
80 | 		require.NoError(t, err)
81 | 
82 | 		assert.NoError(t, b1.WriteParquet(fmt.Sprintf("./bow1-%d-rows", rows), false))
83 | 		_, err = bow.NewBowFromParquet(fmt.Sprintf("./bow1-%d-rows.parquet", rows), false)
84 | 		assert.NoError(t, err)
85 | 
86 | 		assert.NoError(t, b2.WriteParquet(fmt.Sprintf("./bow2-%d-rows", rows), false))
87 | 		_, err = bow.NewBowFromParquet(fmt.Sprintf("./bow2-%d-rows.parquet", rows), false)
88 | 		assert.NoError(t, err)
89 | 	}
90 | }
91 | 


--------------------------------------------------------------------------------
/bowappend.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/apache/arrow/go/v8/arrow"
  7 | 	"github.com/apache/arrow/go/v8/arrow/array"
  8 | 	"github.com/apache/arrow/go/v8/arrow/memory"
  9 | )
 10 | 
 11 | // AppendBows attempts to append bows with equal schemas.
 12 | // Different schemas will lead to undefined behavior.
 13 | // Resulting metadata is copied from the first bow.
 14 | func AppendBows(bows ...Bow) (Bow, error) {
 15 | 	if len(bows) == 0 {
 16 | 		return nil, nil
 17 | 	}
 18 | 
 19 | 	if len(bows) == 1 {
 20 | 		return bows[0], nil
 21 | 	}
 22 | 
 23 | 	numRows := 0
 24 | 	for _, b := range bows {
 25 | 		numRows += b.NumRows()
 26 | 	}
 27 | 
 28 | 	refBow := bows[0]
 29 | 	series := make([]Series, refBow.NumCols())
 30 | 
 31 | 	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
 32 | 	for colIndex := 0; colIndex < refBow.NumCols(); colIndex++ {
 33 | 		var newArray arrow.Array
 34 | 		refType := refBow.ColumnType(colIndex)
 35 | 		switch refType {
 36 | 		case Int64:
 37 | 			builder := array.NewInt64Builder(mem)
 38 | 			builder.Resize(numRows)
 39 | 			for _, b := range bows {
 40 | 				if colType := b.ColumnType(colIndex); colType != refType {
 41 | 					return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType)
 42 | 				}
 43 | 				data := b.(*bow).Column(colIndex).Data()
 44 | 				arr := array.NewInt64Data(data)
 45 | 				v := int64Values(arr)
 46 | 				valid := getValiditySlice(arr)
 47 | 				builder.AppendValues(v, valid)
 48 | 			}
 49 | 			newArray = builder.NewArray()
 50 | 		case Float64:
 51 | 			builder := array.NewFloat64Builder(mem)
 52 | 			builder.Resize(numRows)
 53 | 			for _, b := range bows {
 54 | 				if colType := b.ColumnType(colIndex); colType != refType {
 55 | 					return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType)
 56 | 				}
 57 | 				data := b.(*bow).Column(colIndex).Data()
 58 | 				arr := array.NewFloat64Data(data)
 59 | 				v := float64Values(arr)
 60 | 				valid := getValiditySlice(arr)
 61 | 				builder.AppendValues(v, valid)
 62 | 			}
 63 | 			newArray = builder.NewArray()
 64 | 		case Boolean:
 65 | 			builder := array.NewBooleanBuilder(mem)
 66 | 			builder.Resize(numRows)
 67 | 			for _, b := range bows {
 68 | 				if colType := b.ColumnType(colIndex); colType != refType {
 69 | 					return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType)
 70 | 				}
 71 | 				data := b.(*bow).Column(colIndex).Data()
 72 | 				arr := array.NewBooleanData(data)
 73 | 				v := booleanValues(arr)
 74 | 				valid := getValiditySlice(arr)
 75 | 				builder.AppendValues(v, valid)
 76 | 			}
 77 | 			newArray = builder.NewArray()
 78 | 		case String:
 79 | 			builder := array.NewStringBuilder(mem)
 80 | 			builder.Resize(numRows)
 81 | 			for _, b := range bows {
 82 | 				if colType := b.ColumnType(colIndex); colType != refType {
 83 | 					return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType)
 84 | 				}
 85 | 				data := b.(*bow).Column(colIndex).Data()
 86 | 				arr := array.NewStringData(data)
 87 | 				v := stringValues(arr)
 88 | 				valid := getValiditySlice(arr)
 89 | 				builder.AppendValues(v, valid)
 90 | 			}
 91 | 			newArray = builder.NewArray()
 92 | 		default:
 93 | 			return nil, fmt.Errorf("unsupported type '%s'", refType)
 94 | 		}
 95 | 
 96 | 		series[colIndex] = Series{
 97 | 			Name:  refBow.ColumnName(colIndex),
 98 | 			Array: newArray,
 99 | 		}
100 | 	}
101 | 
102 | 	return NewBowWithMetadata(refBow.Metadata(), series...)
103 | }
104 | 


--------------------------------------------------------------------------------
/bowappend_test.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/stretchr/testify/assert"
  8 | 	"github.com/stretchr/testify/require"
  9 | )
 10 | 
 11 | func TestAppendBows(t *testing.T) {
 12 | 	t.Run("no bows", func(t *testing.T) {
 13 | 		appended, err := AppendBows()
 14 | 		assert.NoError(t, err)
 15 | 		assert.Nil(t, appended)
 16 | 	})
 17 | 
 18 | 	t.Run("one empty bow", func(t *testing.T) {
 19 | 		b, _ := NewBowFromColBasedInterfaces(
 20 | 			[]string{"a"},
 21 | 			[]Type{Int64},
 22 | 			[][]interface{}{{}})
 23 | 		appended, err := AppendBows(b)
 24 | 		assert.NoError(t, err)
 25 | 		assert.True(t, appended.Equal(b), fmt.Sprintf(
 26 | 			"want:\n%v\nhave:\n%v", b, appended))
 27 | 	})
 28 | 
 29 | 	t.Run("first empty bow", func(t *testing.T) {
 30 | 		b1, _ := NewBowFromColBasedInterfaces(
 31 | 			[]string{"a"},
 32 | 			[]Type{Int64},
 33 | 			[][]interface{}{{}})
 34 | 		b2, _ := NewBowFromColBasedInterfaces(
 35 | 			[]string{"a"},
 36 | 			[]Type{Int64},
 37 | 			[][]interface{}{
 38 | 				{1},
 39 | 			})
 40 | 		appended, err := AppendBows(b1, b2)
 41 | 		assert.NoError(t, err)
 42 | 		assert.True(t, appended.Equal(b2), fmt.Sprintf(
 43 | 			"want:\n%v\nhave:\n%v", b2, appended))
 44 | 	})
 45 | 
 46 | 	t.Run("several empty bows", func(t *testing.T) {
 47 | 		b, _ := NewBowFromColBasedInterfaces(
 48 | 			[]string{"a"},
 49 | 			[]Type{Int64},
 50 | 			[][]interface{}{{}})
 51 | 		appended, err := AppendBows(b, b)
 52 | 		assert.NoError(t, err)
 53 | 		assert.True(t, appended.Equal(b), fmt.Sprintf(
 54 | 			"want:\n%v\nhave:\n%v", b, appended))
 55 | 	})
 56 | 
 57 | 	t.Run("schema mismatch", func(t *testing.T) {
 58 | 		b1, _ := NewBowFromColBasedInterfaces(
 59 | 			[]string{"i", "s"},
 60 | 			[]Type{Int64, String},
 61 | 			[][]interface{}{
 62 | 				{"hey"},
 63 | 				{1},
 64 | 			})
 65 | 		b2, _ := NewBowFromColBasedInterfaces(
 66 | 			[]string{"a"},
 67 | 			[]Type{Int64},
 68 | 			[][]interface{}{
 69 | 				{1},
 70 | 			})
 71 | 
 72 | 		assert.Panics(t, func() { _, _ = AppendBows(b1, b2) })
 73 | 	})
 74 | 
 75 | 	t.Run("type mismatch", func(t *testing.T) {
 76 | 		b1, _ := NewBowFromColBasedInterfaces(
 77 | 			[]string{"i", "s"},
 78 | 			[]Type{Int64, Int64},
 79 | 			[][]interface{}{
 80 | 				{1},
 81 | 				{1},
 82 | 			})
 83 | 		b2, _ := NewBowFromColBasedInterfaces(
 84 | 			[]string{"a"},
 85 | 			[]Type{Int64, Float64},
 86 | 			[][]interface{}{
 87 | 				{1},
 88 | 				{1.},
 89 | 			})
 90 | 
 91 | 		assert.Panics(t, func() { _, _ = AppendBows(b1, b2) })
 92 | 	})
 93 | 
 94 | 	t.Run("3 bows of 2 cols", func(t *testing.T) {
 95 | 		b1, _ := NewBowFromColBasedInterfaces(
 96 | 			[]string{"a", "b"},
 97 | 			[]Type{Int64, Float64},
 98 | 			[][]interface{}{
 99 | 				{1, 2, 3},
100 | 				{.1, .2, .3},
101 | 			})
102 | 		b2, _ := NewBowFromColBasedInterfaces(
103 | 			[]string{"a", "b"},
104 | 			[]Type{Int64, Float64},
105 | 			[][]interface{}{
106 | 				{4, 5},
107 | 				{.4, .5},
108 | 			})
109 | 		b3, _ := NewBowFromColBasedInterfaces(
110 | 			[]string{"a", "b"},
111 | 			[]Type{Int64, Float64},
112 | 			[][]interface{}{
113 | 				{6},
114 | 				{.6},
115 | 			})
116 | 
117 | 		appended, err := AppendBows(b1, b2, b3)
118 | 		expected, _ := NewBowFromColBasedInterfaces(
119 | 			[]string{"a", "b"},
120 | 			[]Type{Int64, Float64},
121 | 			[][]interface{}{
122 | 				{1, 2, 3, 4, 5, 6},
123 | 				{.1, .2, .3, .4, .5, .6},
124 | 			})
125 | 		assert.NoError(t, err)
126 | 		assert.True(t, appended.Equal(expected), fmt.Sprintf(
127 | 			"want:\n%v\nhave:\n%v", expected, appended))
128 | 	})
129 | 
130 | 	t.Run("2 bows with the same metadata", func(t *testing.T) {
131 | 		b1, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
132 | 			NewSeries("time", Int64, []int64{1, 2}, nil),
133 | 			NewSeries("value", Float64, []float64{.1, .2}, nil),
134 | 		)
135 | 		require.NoError(t, err)
136 | 
137 | 		b2, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
138 | 			NewSeries("time", Int64, []int64{3, 4}, nil),
139 | 			NewSeries("value", Float64, []float64{.3, .4}, nil),
140 | 		)
141 | 		require.NoError(t, err)
142 | 
143 | 		expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
144 | 			NewSeries("time", Int64, []int64{1, 2, 3, 4}, nil),
145 | 			NewSeries("value", Float64, []float64{.1, .2, .3, .4}, nil),
146 | 		)
147 | 		require.NoError(t, err)
148 | 
149 | 		appended, err := AppendBows(b1, b2)
150 | 		assert.NoError(t, err)
151 | 
152 | 		assert.Equal(t, expected.String(), appended.String())
153 | 	})
154 | 
155 | 	t.Run("same column names but different types", func(t *testing.T) {
156 | 		b1, err := NewBowFromColBasedInterfaces(
157 | 			[]string{"a", "b"},
158 | 			[]Type{Int64, Float64},
159 | 			[][]interface{}{
160 | 				{1, 2},
161 | 				{.1, .2},
162 | 			})
163 | 		require.NoError(t, err)
164 | 		b2, err := NewBowFromColBasedInterfaces(
165 | 			[]string{"a", "b"},
166 | 			[]Type{Int64, Int64},
167 | 			[][]interface{}{
168 | 				{3},
169 | 				{3},
170 | 			})
171 | 		require.NoError(t, err)
172 | 
173 | 		_, err = AppendBows(b1, b2)
174 | 		assert.Error(t, err)
175 | 	})
176 | }
177 | 
178 | func BenchmarkAppendBows(b *testing.B) {
179 | 	for rows := 10; rows <= 100000; rows *= 10 {
180 | 		b1, err := NewBow(
181 | 			NewSeries("time", Int64, make([]int64, rows), nil),
182 | 			NewSeries("value", Float64, make([]float64, rows), nil))
183 | 		require.NoError(b, err)
184 | 
185 | 		b2, err := NewBow(
186 | 			NewSeries("time", Int64, make([]int64, rows), nil),
187 | 			NewSeries("value", Float64, make([]float64, rows), nil))
188 | 		require.NoError(b, err)
189 | 
190 | 		b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) {
191 | 			for n := 0; n < b.N; n++ {
192 | 				_, err := AppendBows(b1, b2)
193 | 				require.NoError(b, err)
194 | 			}
195 | 		})
196 | 	}
197 | }
198 | 


--------------------------------------------------------------------------------
/bowassertion.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"github.com/apache/arrow/go/v8/arrow/array"
 5 | )
 6 | 
 7 | const (
 8 | 	orderUndefined = iota
 9 | 	orderASC
10 | 	orderDESC
11 | )
12 | 
13 | // IsColSorted returns a boolean whether the column colIndex is sorted or not, skipping nil values.
14 | // An empty column or an unsupported data type returns false.
15 | func (b *bow) IsColSorted(colIndex int) bool {
16 | 	if b.IsColEmpty(colIndex) {
17 | 		return false
18 | 	}
19 | 	var rowIndex int
20 | 	var order = orderUndefined
21 | 
22 | 	switch b.ColumnType(colIndex) {
23 | 	case Int64:
24 | 		arr := array.NewInt64Data(b.Column(colIndex).Data())
25 | 		values := arr.Int64Values()
26 | 		for arr.IsNull(rowIndex) {
27 | 			rowIndex++
28 | 		}
29 | 		curr := values[rowIndex]
30 | 		var next int64
31 | 		rowIndex++
32 | 		for ; rowIndex < len(values); rowIndex++ {
33 | 			if !arr.IsValid(rowIndex) {
34 | 				continue
35 | 			}
36 | 			next = values[rowIndex]
37 | 			if order == orderUndefined {
38 | 				if curr < next {
39 | 					order = orderASC
40 | 				} else if curr > next {
41 | 					order = orderDESC
42 | 				}
43 | 			}
44 | 			if order == orderASC && next < curr ||
45 | 				order == orderDESC && next > curr {
46 | 				return false
47 | 			}
48 | 			curr = next
49 | 		}
50 | 	case Float64:
51 | 		arr := array.NewFloat64Data(b.Column(colIndex).Data())
52 | 		values := arr.Float64Values()
53 | 		for arr.IsNull(rowIndex) {
54 | 			rowIndex++
55 | 		}
56 | 		curr := values[rowIndex]
57 | 		var next float64
58 | 		rowIndex++
59 | 		for ; rowIndex < len(values); rowIndex++ {
60 | 			if !arr.IsValid(rowIndex) {
61 | 				continue
62 | 			}
63 | 			next = values[rowIndex]
64 | 			if order == orderUndefined {
65 | 				if curr < next {
66 | 					order = orderASC
67 | 				} else if curr > next {
68 | 					order = orderDESC
69 | 				}
70 | 			}
71 | 			if order == orderASC && next < curr ||
72 | 				order == orderDESC && next > curr {
73 | 				return false
74 | 			}
75 | 			curr = next
76 | 		}
77 | 	default:
78 | 		return false
79 | 	}
80 | 	return true
81 | }
82 | 
83 | // IsColEmpty returns false if the column has at least one non-nil value, and true otherwise.
84 | func (b *bow) IsColEmpty(colIndex int) bool {
85 | 	return b.Column(colIndex).NullN() == b.Column(colIndex).Len()
86 | }
87 | 


--------------------------------------------------------------------------------
/bowassertion_test.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/stretchr/testify/assert"
  8 | 	"github.com/stretchr/testify/require"
  9 | )
 10 | 
 11 | func TestBow_IsColSorted(t *testing.T) {
 12 | 	t.Run("int64", func(t *testing.T) {
 13 | 		b, _ := NewBowFromRowBasedInterfaces(
 14 | 			[]string{"a", "b", "c", "d", "e"},
 15 | 			[]Type{Int64, Int64, Int64, Int64, Int64},
 16 | 			[][]interface{}{
 17 | 				{-2, 1, nil, nil, -8},
 18 | 				{0, nil, 3, 4, 0},
 19 | 				{1, nil, nil, 120, nil},
 20 | 				{10, 4, 10, 10, -5},
 21 | 				{13, nil, nil, nil, nil},
 22 | 				{20, 6, 30, 400, -10},
 23 | 			})
 24 | 		sorted := b.IsColSorted(0)
 25 | 		assert.True(t, sorted)
 26 | 		sorted = b.IsColSorted(1)
 27 | 		assert.True(t, sorted)
 28 | 		sorted = b.IsColSorted(2)
 29 | 		assert.True(t, sorted)
 30 | 		sorted = b.IsColSorted(3)
 31 | 		assert.False(t, sorted)
 32 | 		sorted = b.IsColSorted(4)
 33 | 		assert.False(t, sorted)
 34 | 	})
 35 | 
 36 | 	t.Run("float64", func(t *testing.T) {
 37 | 		b, _ := NewBowFromRowBasedInterfaces([]string{"a", "b", "c", "d", "e"}, []Type{Float64, Float64, Float64, Float64, Float64}, [][]interface{}{
 38 | 			{-2.0, 1.0, nil, nil, -8.0},
 39 | 			{0.0, nil, 3.0, 4.0, 0.0},
 40 | 			{1.0, nil, nil, 120.0, nil},
 41 | 			{10.0, 4.0, 10.0, 10.0, -5.0},
 42 | 			{13.0, nil, nil, nil, nil},
 43 | 			{20.0, 6.0, 30.0, 400.0, -10.0},
 44 | 		})
 45 | 		sorted := b.IsColSorted(0)
 46 | 		assert.True(t, sorted)
 47 | 		sorted = b.IsColSorted(1)
 48 | 		assert.True(t, sorted)
 49 | 		sorted = b.IsColSorted(2)
 50 | 		assert.True(t, sorted)
 51 | 		sorted = b.IsColSorted(3)
 52 | 		assert.False(t, sorted)
 53 | 		sorted = b.IsColSorted(4)
 54 | 		assert.False(t, sorted)
 55 | 	})
 56 | 
 57 | 	t.Run("string (unsupported type)", func(t *testing.T) {
 58 | 		b, _ := NewBowFromRowBasedInterfaces([]string{"a", "b"}, []Type{String, String}, [][]interface{}{
 59 | 			{"egr", "rgr"},
 60 | 			{"zrr", nil},
 61 | 			{"zrfr", nil},
 62 | 			{"rgrg", "zefe"},
 63 | 			{"zfer", nil},
 64 | 			{"sffe", "srre"},
 65 | 		})
 66 | 		sorted := b.IsColSorted(0)
 67 | 		assert.False(t, sorted)
 68 | 		sorted = b.IsColSorted(1)
 69 | 		assert.False(t, sorted)
 70 | 	})
 71 | }
 72 | 
 73 | func TestBow_IsColEmpty(t *testing.T) {
 74 | 	b, err := NewBowFromRowBasedInterfaces(
 75 | 		[]string{"a", "b", "c"},
 76 | 		[]Type{Int64, Int64, Int64},
 77 | 		[][]interface{}{
 78 | 			{-2, 1, nil},
 79 | 			{0, nil, nil},
 80 | 			{1, nil, nil},
 81 | 		})
 82 | 	require.NoError(t, err)
 83 | 
 84 | 	empty := b.IsColEmpty(0)
 85 | 	assert.False(t, empty)
 86 | 	empty = b.IsColEmpty(1)
 87 | 	assert.False(t, empty)
 88 | 	empty = b.IsColEmpty(2)
 89 | 	assert.True(t, empty)
 90 | }
 91 | 
 92 | func BenchmarkBow_IsColSorted(b *testing.B) {
 93 | 	for rows := 10; rows <= 100000; rows *= 10 {
 94 | 		data, err := NewBowFromParquet(fmt.Sprintf(
 95 | 			"%sbow1-%d-rows.parquet", benchmarkBowsDirPath, rows), false)
 96 | 		require.NoError(b, err)
 97 | 
 98 | 		b.Run(fmt.Sprintf("sorted_%d_rows", rows), func(b *testing.B) {
 99 | 			for n := 0; n < b.N; n++ {
100 | 				data.IsColSorted(0)
101 | 			}
102 | 		})
103 | 
104 | 		b.Run(fmt.Sprintf("not_sorted_%d_rows", rows), func(b *testing.B) {
105 | 			for n := 0; n < b.N; n++ {
106 | 				data.IsColSorted(1)
107 | 			}
108 | 		})
109 | 	}
110 | }
111 | 


--------------------------------------------------------------------------------
/bowbuffer.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sort"
  6 | 
  7 | 	"github.com/apache/arrow/go/v8/arrow/array"
  8 | 	"github.com/apache/arrow/go/v8/arrow/bitutil"
  9 | )
 10 | 
 11 | // Buffer is a mutable data structure with the purpose of easily building data Series with:
 12 | // - Data: slice of data.
 13 | // - DataType: type of the data.
 14 | // - nullBitmapBytes: slice of bytes representing valid or null values.
 15 | type Buffer struct {
 16 | 	Data            interface{}
 17 | 	DataType        Type
 18 | 	nullBitmapBytes []byte
 19 | }
 20 | 
 21 | // NewBuffer returns a new Buffer of size `size` and Type `typ`.
 22 | func NewBuffer(size int, typ Type) Buffer {
 23 | 	buf := Buffer{
 24 | 		DataType:        typ,
 25 | 		nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8),
 26 | 	}
 27 | 	switch typ {
 28 | 	case Int64:
 29 | 		buf.Data = make([]int64, size)
 30 | 	case Float64:
 31 | 		buf.Data = make([]float64, size)
 32 | 	case Boolean:
 33 | 		buf.Data = make([]bool, size)
 34 | 	case String:
 35 | 		buf.Data = make([]string, size)
 36 | 	default:
 37 | 		panic(fmt.Errorf("unsupported type '%s'", typ))
 38 | 	}
 39 | 	return buf
 40 | }
 41 | 
 42 | // Len returns the size of the underlying slice of data in the Buffer.
 43 | func (b Buffer) Len() int {
 44 | 	switch b.DataType {
 45 | 	case Int64:
 46 | 		return len(b.Data.([]int64))
 47 | 	case Float64:
 48 | 		return len(b.Data.([]float64))
 49 | 	case Boolean:
 50 | 		return len(b.Data.([]bool))
 51 | 	case String:
 52 | 		return len(b.Data.([]string))
 53 | 	default:
 54 | 		panic(fmt.Errorf("unsupported type '%s'", b.DataType))
 55 | 	}
 56 | }
 57 | 
 58 | // SetOrDrop sets the Buffer data at index `i` by attempting to convert `value` to its DataType.
 59 | // Sets the value to nil if the conversion failed or if `value` is nil.
 60 | func (b *Buffer) SetOrDrop(i int, value interface{}) {
 61 | 	var valid bool
 62 | 	switch b.DataType {
 63 | 	case Int64:
 64 | 		b.Data.([]int64)[i], valid = Int64.Convert(value).(int64)
 65 | 	case Float64:
 66 | 		b.Data.([]float64)[i], valid = Float64.Convert(value).(float64)
 67 | 	case Boolean:
 68 | 		b.Data.([]bool)[i], valid = Boolean.Convert(value).(bool)
 69 | 	case String:
 70 | 		b.Data.([]string)[i], valid = String.Convert(value).(string)
 71 | 	default:
 72 | 		panic(fmt.Errorf("unsupported type '%s'", b.DataType))
 73 | 	}
 74 | 
 75 | 	if valid {
 76 | 		bitutil.SetBit(b.nullBitmapBytes, i)
 77 | 	} else {
 78 | 		bitutil.ClearBit(b.nullBitmapBytes, i)
 79 | 	}
 80 | }
 81 | 
 82 | // SetOrDropStrict sets the Buffer data at index `i` by attempting a type assertion of `value` to its DataType.
 83 | // Sets the value to nil if the assertion failed or if `value` is nil.
 84 | func (b *Buffer) SetOrDropStrict(i int, value interface{}) {
 85 | 	var valid bool
 86 | 	switch b.DataType {
 87 | 	case Int64:
 88 | 		b.Data.([]int64)[i], valid = value.(int64)
 89 | 	case Float64:
 90 | 		b.Data.([]float64)[i], valid = value.(float64)
 91 | 	case Boolean:
 92 | 		b.Data.([]bool)[i], valid = value.(bool)
 93 | 	case String:
 94 | 		b.Data.([]string)[i], valid = value.(string)
 95 | 	default:
 96 | 		panic(fmt.Errorf("unsupported type '%s'", b.DataType))
 97 | 	}
 98 | 
 99 | 	if valid {
100 | 		bitutil.SetBit(b.nullBitmapBytes, i)
101 | 	} else {
102 | 		bitutil.ClearBit(b.nullBitmapBytes, i)
103 | 	}
104 | }
105 | 
106 | // GetValue gets the value at index `i` from the Buffer
107 | func (b *Buffer) GetValue(i int) interface{} {
108 | 	if bitutil.BitIsNotSet(b.nullBitmapBytes, i) {
109 | 		return nil
110 | 	}
111 | 
112 | 	switch b.DataType {
113 | 	case Int64:
114 | 		return b.Data.([]int64)[i]
115 | 	case Float64:
116 | 		return b.Data.([]float64)[i]
117 | 	case Boolean:
118 | 		return b.Data.([]bool)[i]
119 | 	case String:
120 | 		return b.Data.([]string)[i]
121 | 	default:
122 | 		panic(fmt.Errorf("unsupported type '%s'", b.DataType))
123 | 	}
124 | }
125 | 
126 | func (b Buffer) Less(i, j int) bool {
127 | 	switch b.DataType {
128 | 	case Int64:
129 | 		return b.Data.([]int64)[i] < b.Data.([]int64)[j]
130 | 	case Float64:
131 | 		return b.Data.([]float64)[i] < b.Data.([]float64)[j]
132 | 	case String:
133 | 		return b.Data.([]string)[i] < b.Data.([]string)[j]
134 | 	case Boolean:
135 | 		return !b.Data.([]bool)[i] && b.Data.([]bool)[j]
136 | 	default:
137 | 		panic(fmt.Errorf("unsupported type '%s'", b.DataType))
138 | 	}
139 | }
140 | 
141 | func (b *bow) NewBufferFromCol(colIndex int) Buffer {
142 | 	data := b.Column(colIndex).Data()
143 | 	res := Buffer{DataType: b.ColumnType(colIndex)}
144 | 	switch b.ColumnType(colIndex) {
145 | 	case Int64:
146 | 		arr := array.NewInt64Data(data)
147 | 		nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8]
148 | 		nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes))
149 | 		copy(nullBitmapBytesCopy, nullBitmapBytes)
150 | 		res.Data = int64Values(arr)
151 | 		res.nullBitmapBytes = nullBitmapBytesCopy
152 | 	case Float64:
153 | 		arr := array.NewFloat64Data(data)
154 | 		nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8]
155 | 		nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes))
156 | 		copy(nullBitmapBytesCopy, nullBitmapBytes)
157 | 		res.Data = float64Values(arr)
158 | 		res.nullBitmapBytes = nullBitmapBytesCopy
159 | 	case Boolean:
160 | 		arr := array.NewBooleanData(data)
161 | 		nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8]
162 | 		nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes))
163 | 		copy(nullBitmapBytesCopy, nullBitmapBytes)
164 | 		res.Data = booleanValues(arr)
165 | 		res.nullBitmapBytes = nullBitmapBytesCopy
166 | 	case String:
167 | 		arr := array.NewStringData(data)
168 | 		nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8]
169 | 		nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes))
170 | 		copy(nullBitmapBytesCopy, nullBitmapBytes)
171 | 		res.Data = stringValues(arr)
172 | 		res.nullBitmapBytes = nullBitmapBytesCopy
173 | 	default:
174 | 		panic(fmt.Errorf("unsupported type '%s'", b.ColumnType(colIndex)))
175 | 	}
176 | 	return res
177 | }
178 | 
179 | // NewBufferFromInterfaces returns a new typed Buffer with the data represented as a slice of interface{}, with eventual nil values.
180 | func NewBufferFromInterfaces(typ Type, data []interface{}) (Buffer, error) {
181 | 	buf := NewBuffer(len(data), typ)
182 | 	for i, c := range data {
183 | 		buf.SetOrDrop(i, c)
184 | 	}
185 | 	return buf, nil
186 | }
187 | 
188 | // IsValid return true if the value at row `rowIndex` is valid.
189 | func (b Buffer) IsValid(rowIndex int) bool {
190 | 	return bitutil.BitIsSet(b.nullBitmapBytes, rowIndex)
191 | }
192 | 
193 | // IsNull return true if the value at row `rowIndex` is nil.
194 | func (b Buffer) IsNull(rowIndex int) bool {
195 | 	return bitutil.BitIsNotSet(b.nullBitmapBytes, rowIndex)
196 | }
197 | 
198 | // IsSorted returns true if the values of the Buffer are sorted in ascending order.
199 | func (b Buffer) IsSorted() bool { return sort.IsSorted(b) }
200 | 
201 | // Swap swaps the values of the Buffer at indices i and j.
202 | func (b Buffer) Swap(i, j int) {
203 | 	v1, v2 := b.GetValue(i), b.GetValue(j)
204 | 	b.SetOrDropStrict(i, v2)
205 | 	b.SetOrDropStrict(j, v1)
206 | }
207 | 


--------------------------------------------------------------------------------
/bowbuffer_test.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/require"
 8 | )
 9 | 
10 | func BenchmarkNewBufferFromInterfaces(b *testing.B) {
11 | 	for rows := 10; rows <= 100000; rows *= 10 {
12 | 		cells := make([]interface{}, rows)
13 | 		for i := range cells {
14 | 			cells[i] = int64(i)
15 | 		}
16 | 
17 | 		b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) {
18 | 			for n := 0; n < b.N; n++ {
19 | 				_, err := NewBufferFromInterfaces(Int64, cells)
20 | 				require.NoError(b, err)
21 | 			}
22 | 		})
23 | 	}
24 | }
25 | 
26 | func BenchmarkBuffer_SetOrDrop(b *testing.B) {
27 | 	buf := NewBuffer(10, Int64)
28 | 	b.ResetTimer()
29 | 	for n := 0; n < b.N; n++ {
30 | 		buf.SetOrDrop(9, int64(3))
31 | 		buf.SetOrDrop(9, nil)
32 | 	}
33 | }
34 | 
35 | func BenchmarkBuffer_SetOrStrict(b *testing.B) {
36 | 	buf := NewBuffer(10, Int64)
37 | 	b.ResetTimer()
38 | 	for n := 0; n < b.N; n++ {
39 | 		buf.SetOrDropStrict(9, int64(3))
40 | 		buf.SetOrDropStrict(9, nil)
41 | 	}
42 | }
43 | 
44 | func BenchmarkBuffer_GetValue(b *testing.B) {
45 | 	buf := NewBuffer(10, Int64)
46 | 	b.ResetTimer()
47 | 	for n := 0; n < b.N; n++ {
48 | 		_ = buf.GetValue(9)
49 | 	}
50 | }
51 | 


--------------------------------------------------------------------------------
/bowconvert.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"strconv"
  7 | )
  8 | 
  9 | // ToInt64 attempts to convert `input` to int64.
 10 | // Return also a false boolean if the conversion failed.
 11 | func ToInt64(input interface{}) (output int64, ok bool) {
 12 | 	switch input := input.(type) {
 13 | 	case json.Number:
 14 | 		output, err := input.Int64()
 15 | 		return output, err == nil
 16 | 	case int:
 17 | 		return int64(input), true
 18 | 	case int8:
 19 | 		return int64(input), true
 20 | 	case int16:
 21 | 		return int64(input), true
 22 | 	case int32:
 23 | 		return int64(input), true
 24 | 	case int64:
 25 | 		return input, true
 26 | 	case float32:
 27 | 		return int64(input), true
 28 | 	case float64:
 29 | 		return int64(input), true
 30 | 	case bool:
 31 | 		if input {
 32 | 			return 1, true
 33 | 		}
 34 | 		return 0, true
 35 | 	case string:
 36 | 		output, err := strconv.ParseInt(input, 10, 64)
 37 | 		return output, err == nil
 38 | 	}
 39 | 	return
 40 | }
 41 | 
 42 | // ToFloat64 attempts to convert `input` to float64.
 43 | // Return also a false boolean if the conversion failed.
 44 | func ToFloat64(input interface{}) (output float64, ok bool) {
 45 | 	switch input := input.(type) {
 46 | 	case float64:
 47 | 		return input, true
 48 | 	case json.Number:
 49 | 		output, err := input.Float64()
 50 | 		return output, err == nil
 51 | 	case int:
 52 | 		return float64(input), true
 53 | 	case int8:
 54 | 		return float64(input), true
 55 | 	case int16:
 56 | 		return float64(input), true
 57 | 	case int32:
 58 | 		return float64(input), true
 59 | 	case int64:
 60 | 		return float64(input), true
 61 | 	case float32:
 62 | 		return float64(input), true
 63 | 	case bool:
 64 | 		if input {
 65 | 			return 1., true
 66 | 		}
 67 | 		return 0., true
 68 | 	case string:
 69 | 		output, err := strconv.ParseFloat(input, 64)
 70 | 		return output, err == nil
 71 | 	}
 72 | 	return
 73 | }
 74 | 
 75 | // ToBoolean attempts to convert `input` to bool.
 76 | // Return also a false boolean if the conversion failed.
 77 | // In case of numeric type, returns true if the value is non-zero.
 78 | func ToBoolean(input interface{}) (output bool, ok bool) {
 79 | 	switch input := input.(type) {
 80 | 	case bool:
 81 | 		return input, true
 82 | 	case string:
 83 | 		output, err := strconv.ParseBool(input)
 84 | 		return output, err == nil
 85 | 	case json.Number:
 86 | 		output, err := input.Float64()
 87 | 		return output != 0., err != nil
 88 | 	case int:
 89 | 		return input != 0, true
 90 | 	case int8:
 91 | 		return input != 0, true
 92 | 	case int16:
 93 | 		return input != 0, true
 94 | 	case int32:
 95 | 		return input != 0, true
 96 | 	case int64:
 97 | 		return input != 0, true
 98 | 	case float32:
 99 | 		return input != 0., true
100 | 	case float64:
101 | 		return input != 0., true
102 | 	}
103 | 	return
104 | }
105 | 
106 | // ToString attempts to convert `input` to string.
107 | // Return also a false boolean if the conversion failed.
108 | func ToString(input interface{}) (output string, ok bool) {
109 | 	switch input := input.(type) {
110 | 	case bool:
111 | 		if input {
112 | 			return "true", true
113 | 		}
114 | 		return "false", true
115 | 	case string:
116 | 		return input, true
117 | 	case json.Number:
118 | 		return input.String(), true
119 | 	case int:
120 | 		return strconv.Itoa(input), true
121 | 	case int8:
122 | 		return strconv.Itoa(int(input)), true
123 | 	case int16:
124 | 		return strconv.Itoa(int(input)), true
125 | 	case int32:
126 | 		return strconv.Itoa(int(input)), true
127 | 	case int64:
128 | 		return strconv.Itoa(int(input)), true
129 | 	case float32:
130 | 		return fmt.Sprintf("%f", input), true
131 | 	case float64:
132 | 		return fmt.Sprintf("%f", input), true
133 | 	}
134 | 	return
135 | }
136 | 


--------------------------------------------------------------------------------
/bowconvert_test.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/stretchr/testify/assert"
  7 | 	"github.com/stretchr/testify/require"
  8 | )
  9 | 
 10 | func TestToBool(t *testing.T) {
 11 | 	var v bool
 12 | 	var ok bool
 13 | 
 14 | 	v, ok = ToBoolean(true)
 15 | 	require.True(t, ok)
 16 | 	assert.Equal(t, true, v)
 17 | 
 18 | 	v, ok = ToBoolean(false)
 19 | 	require.True(t, ok)
 20 | 	assert.Equal(t, false, v)
 21 | 
 22 | 	v, ok = ToBoolean("true")
 23 | 	require.True(t, ok)
 24 | 	assert.Equal(t, true, v)
 25 | 
 26 | 	v, ok = ToBoolean("True")
 27 | 	require.True(t, ok)
 28 | 	assert.Equal(t, true, v)
 29 | 
 30 | 	v, ok = ToBoolean("false")
 31 | 	require.True(t, ok)
 32 | 	assert.Equal(t, false, v)
 33 | 
 34 | 	v, ok = ToBoolean("False")
 35 | 	require.True(t, ok)
 36 | 	assert.Equal(t, false, v)
 37 | 
 38 | 	v, ok = ToBoolean(1)
 39 | 	require.True(t, v)
 40 | 	require.True(t, ok)
 41 | 	v, ok = ToBoolean(0)
 42 | 	require.False(t, v)
 43 | 	require.True(t, ok)
 44 | 
 45 | 	v, ok = ToBoolean(1.)
 46 | 	require.True(t, v)
 47 | 	require.True(t, ok)
 48 | 	v, ok = ToBoolean(0.)
 49 | 	require.False(t, v)
 50 | 	require.True(t, ok)
 51 | }
 52 | 
 53 | func TestToFloat64(t *testing.T) {
 54 | 	var v float64
 55 | 	var ok bool
 56 | 
 57 | 	v, ok = ToFloat64(true)
 58 | 	require.True(t, ok)
 59 | 	assert.Equal(t, 1., v)
 60 | 
 61 | 	v, ok = ToFloat64(false)
 62 | 	require.True(t, ok)
 63 | 	assert.Equal(t, 0., v)
 64 | 
 65 | 	v, ok = ToFloat64(0.)
 66 | 	require.True(t, ok)
 67 | 	assert.Equal(t, 0., v)
 68 | 
 69 | 	v, ok = ToFloat64(0)
 70 | 	require.True(t, ok)
 71 | 	assert.Equal(t, 0., v)
 72 | 
 73 | 	v, ok = ToFloat64("0")
 74 | 	require.True(t, ok)
 75 | 	assert.Equal(t, 0., v)
 76 | }
 77 | 
 78 | func TestToInt64(t *testing.T) {
 79 | 	var v int64
 80 | 	var ok bool
 81 | 
 82 | 	v, ok = ToInt64(true)
 83 | 	require.True(t, ok)
 84 | 	assert.Equal(t, int64(1), v)
 85 | 
 86 | 	v, ok = ToInt64(false)
 87 | 	require.True(t, ok)
 88 | 	assert.Equal(t, int64(0), v)
 89 | 
 90 | 	v, ok = ToInt64(0.)
 91 | 	require.True(t, ok)
 92 | 	assert.Equal(t, int64(0), v)
 93 | 
 94 | 	v, ok = ToInt64(0)
 95 | 	require.True(t, ok)
 96 | 	assert.Equal(t, int64(0), v)
 97 | 
 98 | 	v, ok = ToInt64("0")
 99 | 	require.True(t, ok)
100 | 	assert.Equal(t, int64(0), v)
101 | }
102 | 
103 | func TestToString(t *testing.T) {
104 | 	var v string
105 | 	var ok bool
106 | 
107 | 	v, ok = ToString(true)
108 | 	require.True(t, ok)
109 | 	assert.Equal(t, "true", v)
110 | 
111 | 	v, ok = ToString(false)
112 | 	require.True(t, ok)
113 | 	assert.Equal(t, "false", v)
114 | 
115 | 	v, ok = ToString(0.)
116 | 	require.True(t, ok)
117 | 	assert.Equal(t, "0.000000", v)
118 | 
119 | 	v, ok = ToString(0)
120 | 	require.True(t, ok)
121 | 	assert.Equal(t, "0", v)
122 | 
123 | 	v, ok = ToString("0")
124 | 	require.True(t, ok)
125 | 	assert.Equal(t, "0", v)
126 | }
127 | 


--------------------------------------------------------------------------------
/bowdiff.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"sync"
 6 | )
 7 | 
 8 | // Diff calculates the first discrete difference of each row compared with the previous row.
 9 | // If any of the current or the previous row is nil, the result will be nil.
10 | // For boolean columns, XOR operation is used.
11 | // TODO: directly mutate bow && only read currVal at each iteration for performance improvement
12 | func (b *bow) Diff(colIndices ...int) (Bow, error) {
13 | 	selectedCols, err := selectCols(b, colIndices)
14 | 	if err != nil {
15 | 		return nil, err
16 | 	}
17 | 
18 | 	for colIndex, col := range b.Schema().Fields() {
19 | 		switch b.ColumnType(colIndex) {
20 | 		case Int64:
21 | 		case Float64:
22 | 		case Boolean:
23 | 		default:
24 | 			return nil, fmt.Errorf(
25 | 				"column '%s' is of unsupported type '%s'",
26 | 				col.Name, b.ColumnType(colIndex))
27 | 		}
28 | 	}
29 | 
30 | 	var wg sync.WaitGroup
31 | 	calcSeries := make([]Series, b.NumCols())
32 | 	for colIndex, col := range b.Schema().Fields() {
33 | 		if !selectedCols[colIndex] {
34 | 			calcSeries[colIndex] = b.NewSeriesFromCol(colIndex)
35 | 			continue
36 | 		}
37 | 
38 | 		wg.Add(1)
39 | 		go func(colIndex int, colName string) {
40 | 			defer wg.Done()
41 | 			colType := b.ColumnType(colIndex)
42 | 			colBuf := b.NewBufferFromCol(colIndex)
43 | 			calcBuf := NewBuffer(b.NumRows(), colType)
44 | 			for rowIndex := 1; rowIndex < b.NumRows(); rowIndex++ {
45 | 				valid := b.Column(colIndex).IsValid(rowIndex) &&
46 | 					b.Column(colIndex).IsValid(rowIndex-1)
47 | 				if !valid {
48 | 					continue
49 | 				}
50 | 				switch colType {
51 | 				case Int64:
52 | 					currVal := colBuf.GetValue(rowIndex).(int64)
53 | 					prevVal := colBuf.GetValue(rowIndex - 1).(int64)
54 | 					calcBuf.SetOrDrop(rowIndex, currVal-prevVal)
55 | 				case Float64:
56 | 					currVal := colBuf.GetValue(rowIndex).(float64)
57 | 					prevVal := colBuf.GetValue(rowIndex - 1).(float64)
58 | 					calcBuf.SetOrDrop(rowIndex, currVal-prevVal)
59 | 				case Boolean:
60 | 					currVal := colBuf.GetValue(rowIndex).(bool)
61 | 					prevVal := colBuf.GetValue(rowIndex - 1).(bool)
62 | 					calcBuf.SetOrDrop(rowIndex, currVal != prevVal)
63 | 				}
64 | 			}
65 | 
66 | 			calcSeries[colIndex] = NewSeriesFromBuffer(colName, calcBuf)
67 | 
68 | 		}(colIndex, col.Name)
69 | 	}
70 | 	wg.Wait()
71 | 
72 | 	return NewBowWithMetadata(b.Metadata(), calcSeries...)
73 | }
74 | 


--------------------------------------------------------------------------------
/bowdiff_test.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | 	"github.com/stretchr/testify/require"
 8 | )
 9 | 
10 | func TestDiff(t *testing.T) {
11 | 	t.Run("all columns all supported types with nils and metadata", func(t *testing.T) {
12 | 		b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
13 | 			NewSeries("a", Int64,
14 | 				[]int64{1, 2, 3, 4, 0, 5},
15 | 				[]bool{true, true, true, true, false, true}),
16 | 			NewSeries("b", Float64,
17 | 				[]float64{1., 2., 3., 4., 0., 5.},
18 | 				[]bool{true, true, true, true, false, true}),
19 | 			NewSeries("c", Boolean,
20 | 				[]bool{false, false, true, true, false, false},
21 | 				[]bool{true, true, true, true, false, true}),
22 | 		)
23 | 		require.NoError(t, err)
24 | 
25 | 		expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
26 | 			NewSeries("a", Int64,
27 | 				[]int64{0, 1, 1, 1, 0, 0},
28 | 				[]bool{false, true, true, true, false, false}),
29 | 			NewSeries("b", Float64,
30 | 				[]float64{0., 1., 1., 1., 0., 0.},
31 | 				[]bool{false, true, true, true, false, false}),
32 | 			NewSeries("c", Boolean,
33 | 				[]bool{false, false, true, false, false, false},
34 | 				[]bool{false, true, true, true, false, false}),
35 | 		)
36 | 		require.NoError(t, err)
37 | 
38 | 		calc, err := b.Diff()
39 | 		assert.NoError(t, err)
40 | 		assert.EqualValues(t, expected.String(), calc.String())
41 | 	})
42 | 
43 | 	t.Run("one column all supported types", func(t *testing.T) {
44 | 		b, err := NewBowFromRowBasedInterfaces(
45 | 			[]string{"a", "b", "c"},
46 | 			[]Type{Int64, Float64, Boolean},
47 | 			[][]interface{}{
48 | 				{1, 1., false},
49 | 				{2, 2., false},
50 | 				{3, 3., true},
51 | 			})
52 | 		require.NoError(t, err)
53 | 
54 | 		expected, err := NewBowFromRowBasedInterfaces(
55 | 			[]string{"a", "b", "c"},
56 | 			[]Type{Int64, Float64, Boolean},
57 | 			[][]interface{}{
58 | 				{1, nil, false},
59 | 				{2, 1., false},
60 | 				{3, 1., true},
61 | 			})
62 | 		require.NoError(t, err)
63 | 		calc, err := b.Diff(1)
64 | 		assert.NoError(t, err)
65 | 		assert.EqualValues(t, expected.String(), calc.String())
66 | 	})
67 | 
68 | 	t.Run("unsupported type string", func(t *testing.T) {
69 | 		b, err := NewBowFromRowBasedInterfaces([]string{"a"}, []Type{String}, [][]interface{}{})
70 | 		require.NoError(t, err)
71 | 
72 | 		calc, err := b.Diff()
73 | 		assert.Error(t, err)
74 | 		assert.Nil(t, calc)
75 | 	})
76 | 
77 | 	t.Run("empty", func(t *testing.T) {
78 | 		b, err := NewBowFromRowBasedInterfaces([]string{"a"}, []Type{Int64}, [][]interface{}{})
79 | 		require.NoError(t, err)
80 | 
81 | 		calc, err := b.Diff()
82 | 		assert.NoError(t, err)
83 | 		assert.EqualValues(t, b.String(), calc.String())
84 | 	})
85 | 
86 | 	t.Run("missing column", func(t *testing.T) {
87 | 		b, err := NewBowFromRowBasedInterfaces([]string{"a"}, []Type{Int64}, [][]interface{}{})
88 | 		require.NoError(t, err)
89 | 
90 | 		calc, err := b.Diff(1)
91 | 		assert.Error(t, err)
92 | 		assert.Nil(t, calc)
93 | 	})
94 | }
95 | 


--------------------------------------------------------------------------------
/bowfind.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | // Find returns the index of the row where `value` is found in the `colIndex` column.
 4 | // Returns -1 if the value is not found.
 5 | func (b *bow) Find(colIndex int, value interface{}) int {
 6 | 	return b.FindNext(colIndex, 0, value)
 7 | }
 8 | 
 9 | // FindNext returns the index of the row where `value` is found in the `colIndex` column, starting from the `rowIndex` row.
10 | // Returns -1 if the value is not found.
11 | func (b *bow) FindNext(colIndex, rowIndex int, value interface{}) int {
12 | 	if value == nil {
13 | 		for i := 0; i < b.NumRows(); i++ {
14 | 			if !b.Column(colIndex).IsValid(i) {
15 | 				return i
16 | 			}
17 | 		}
18 | 		return -1
19 | 	}
20 | 
21 | 	for i := rowIndex; i < b.NumRows(); i++ {
22 | 		if value == b.GetValue(colIndex, i) {
23 | 			return i
24 | 		}
25 | 	}
26 | 	return -1
27 | }
28 | 
29 | // Contains returns whether `value` is found in `colIndex` columns.
30 | func (b *bow) Contains(colIndex int, value interface{}) bool {
31 | 	return b.Find(colIndex, value) != -1
32 | }
33 | 


--------------------------------------------------------------------------------
/bowfind_test.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | )
 8 | 
 9 | var sortedTestBow, _ = NewBow(
10 | 	NewSeries(Int64.String(), Int64,
11 | 		[]int64{0, 1, 0, 0},
12 | 		[]bool{true, true, false, true}),
13 | 	NewSeries(Float64.String(), Float64,
14 | 		[]float64{0., 1., 0., 0.},
15 | 		[]bool{true, true, false, true}),
16 | 	NewSeries(String.String(), String,
17 | 		[]string{"0", "1", "0", "0"},
18 | 		[]bool{true, true, false, true}),
19 | 	NewSeries(Boolean.String(), Boolean,
20 | 		[]bool{false, true, false, false},
21 | 		[]bool{true, true, false, true}),
22 | )
23 | 
24 | func TestBow_Find(t *testing.T) {
25 | 	type toto int
26 | 	for i := 0; i < sortedTestBow.NumCols(); i++ {
27 | 		t.Run(sortedTestBow.ColumnName(i), func(t *testing.T) {
28 | 			v := sortedTestBow.GetValue(i, 0)
29 | 			assert.Equal(t, 0, sortedTestBow.Find(i, v))
30 | 			assert.Equal(t, 2, sortedTestBow.Find(i, nil))
31 | 			assert.Equal(t, -1, sortedTestBow.Find(i, toto(0)))
32 | 			assert.False(t, sortedTestBow.Contains(i, toto(0)))
33 | 			assert.True(t, sortedTestBow.Contains(i, v))
34 | 			assert.Equal(t, 3, sortedTestBow.FindNext(i, 1, v))
35 | 
36 | 			empty := sortedTestBow.NewEmptySlice()
37 | 			assert.Equal(t, -1, empty.Find(i, v))
38 | 			assert.Equal(t, -1, empty.Find(i, nil))
39 | 			assert.Equal(t, -1, empty.Find(i, toto(0)))
40 | 			assert.False(t, empty.Contains(i, v))
41 | 			assert.Equal(t, -1, empty.FindNext(i, 1, v))
42 | 		})
43 | 	}
44 | }
45 | 


--------------------------------------------------------------------------------
/bowgenerator.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	crand "crypto/rand"
  5 | 	"fmt"
  6 | 	"math/big"
  7 | 
  8 | 	"github.com/google/uuid"
  9 | )
 10 | 
 11 | const genDefaultNumRows = 3
 12 | 
 13 | // GenSeriesOptions are options to generate random Series:
 14 | // - NumRows: number of rows of the resulting Series
 15 | // - Name: name of the Series
 16 | // - Type: data type of the Series
 17 | // - GenStrategy: strategy of data generation
 18 | // - MissingData: sets whether the Series includes random nil values
 19 | type GenSeriesOptions struct {
 20 | 	NumRows     int
 21 | 	Name        string
 22 | 	Type        Type
 23 | 	GenStrategy GenStrategy
 24 | 	MissingData bool
 25 | }
 26 | 
 27 | // NewGenBow generates a new random Bow with `numRows` rows and eventual GenSeriesOptions.
 28 | func NewGenBow(numRows int, options ...GenSeriesOptions) (Bow, error) {
 29 | 	series := make([]Series, len(options))
 30 | 	nameMap := make(map[string]struct{})
 31 | 	for i, o := range options {
 32 | 		o.NumRows = numRows
 33 | 		o.validate()
 34 | 		if _, ok := nameMap[o.Name]; ok {
 35 | 			o.Name = fmt.Sprintf("%s_%d", o.Name, i)
 36 | 		}
 37 | 		nameMap[o.Name] = struct{}{}
 38 | 		series[i] = o.genSeries()
 39 | 	}
 40 | 
 41 | 	return NewBow(series...)
 42 | }
 43 | 
 44 | // NewGenSeries returns a new randomly generated Series.
 45 | func NewGenSeries(o GenSeriesOptions) Series {
 46 | 	o.validate()
 47 | 	return o.genSeries()
 48 | }
 49 | 
 50 | func (o *GenSeriesOptions) validate() {
 51 | 	if o.NumRows <= 0 {
 52 | 		o.NumRows = genDefaultNumRows
 53 | 	}
 54 | 	if o.Name == "" {
 55 | 		o.Name = "default"
 56 | 	}
 57 | 	if o.Type == Unknown {
 58 | 		o.Type = Int64
 59 | 	}
 60 | 	if o.GenStrategy == nil {
 61 | 		o.GenStrategy = GenStrategyIncremental
 62 | 	}
 63 | }
 64 | 
 65 | func (o *GenSeriesOptions) genSeries() Series {
 66 | 	buf := NewBuffer(o.NumRows, o.Type)
 67 | 	for rowIndex := 0; rowIndex < o.NumRows; rowIndex++ {
 68 | 		if !o.MissingData ||
 69 | 			// 20% of nils values
 70 | 			(newRandomNumber(Int64).(int64) > 2) {
 71 | 			buf.SetOrDrop(rowIndex, o.GenStrategy(o.Type, rowIndex))
 72 | 		}
 73 | 	}
 74 | 
 75 | 	return NewSeriesFromBuffer(o.Name, buf)
 76 | }
 77 | 
 78 | // GenStrategy defines how random values are generated.
 79 | type GenStrategy func(typ Type, seed int) interface{}
 80 | 
 81 | // GenStrategyRandom generates a random number of type `typ`.
 82 | func GenStrategyRandom(typ Type, seed int) interface{} {
 83 | 	return newRandomNumber(typ)
 84 | }
 85 | 
 86 | // GenStrategyIncremental generates a number of type `typ` equal to the converted `seed` value.
 87 | func GenStrategyIncremental(typ Type, seed int) interface{} {
 88 | 	return typ.Convert(seed)
 89 | }
 90 | 
 91 | // GenStrategyDecremental generates a number of type `typ` equal to the opposite of the converted `seed` value.
 92 | func GenStrategyDecremental(typ Type, seed int) interface{} {
 93 | 	return typ.Convert(-seed)
 94 | }
 95 | 
 96 | // GenStrategyRandomIncremental generates a random number of type `typ` by using the `seed` value.
 97 | func GenStrategyRandomIncremental(typ Type, seed int) interface{} {
 98 | 	i := int64(seed) * 10
 99 | 	switch typ {
100 | 	case Float64:
101 | 		add, _ := ToFloat64(newRandomNumber(Float64))
102 | 		return float64(i) + add
103 | 	default:
104 | 		add, _ := ToInt64(newRandomNumber(Int64))
105 | 		return typ.Convert(i + add)
106 | 	}
107 | }
108 | 
109 | // GenStrategyRandomDecremental generates a random number of type `typ` by using the `seed` value.
110 | func GenStrategyRandomDecremental(typ Type, seed int) interface{} {
111 | 	i := -int64(seed) * 10
112 | 	switch typ {
113 | 	default:
114 | 		add, _ := ToInt64(newRandomNumber(Int64))
115 | 		return typ.Convert(i - add)
116 | 	}
117 | }
118 | 
119 | func newRandomNumber(typ Type) interface{} {
120 | 	n, err := crand.Int(crand.Reader, big.NewInt(10))
121 | 	if err != nil {
122 | 		panic(err)
123 | 	}
124 | 	switch typ {
125 | 	case Int64:
126 | 		return n.Int64()
127 | 	case Float64:
128 | 		return float64(n.Int64()) + 0.5
129 | 	case Boolean:
130 | 		return n.Int64() > 5
131 | 	case String:
132 | 		return uuid.New().String()[:8]
133 | 	default:
134 | 		panic("unsupported data type")
135 | 	}
136 | }
137 | 


--------------------------------------------------------------------------------
/bowgenerator_test.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func TestGenerator(t *testing.T) {
11 | 	t.Run("default", func(t *testing.T) {
12 | 		b, err := NewGenBow(0, GenSeriesOptions{})
13 | 		assert.NoError(t, err)
14 | 		assert.Equal(t, genDefaultNumRows, b.NumRows())
15 | 		assert.Equal(t, 1, b.NumCols())
16 | 		assert.Equal(t, Int64, b.ColumnType(0))
17 | 
18 | 		b2, err := b.DropNils()
19 | 		assert.NoError(t, err)
20 | 		assert.Equal(t, b, b2)
21 | 		assert.True(t, b2.Equal(b), fmt.Sprintf("want %v\ngot %v", b, b2))
22 | 	})
23 | 
24 | 	t.Run("with missing data", func(t *testing.T) {
25 | 		b, err := NewGenBow(100, GenSeriesOptions{MissingData: true})
26 | 		assert.NoError(t, err)
27 | 
28 | 		b2, err := b.DropNils()
29 | 		assert.NoError(t, err)
30 | 		assert.Less(t, b2.NumRows(), b.NumRows())
31 | 	})
32 | 
33 | 	t.Run("float64 with all columns sorted", func(t *testing.T) {
34 | 		b, err := NewGenBow(8,
35 | 			GenSeriesOptions{},
36 | 			GenSeriesOptions{Type: Float64},
37 | 		)
38 | 		assert.NoError(t, err)
39 | 
40 | 		assert.Equal(t, 8, b.NumRows())
41 | 		assert.Equal(t, 2, b.NumCols())
42 | 		assert.Equal(t, Int64, b.ColumnType(0))
43 | 		assert.Equal(t, Float64, b.ColumnType(1))
44 | 		assert.True(t, b.IsColSorted(0))
45 | 	})
46 | 
47 | 	t.Run("descending sort on last column", func(t *testing.T) {
48 | 		b, err := NewGenBow(3,
49 | 			GenSeriesOptions{GenStrategy: GenStrategyIncremental},
50 | 			GenSeriesOptions{GenStrategy: GenStrategyDecremental},
51 | 		)
52 | 		assert.NoError(t, err)
53 | 		assert.True(t, b.IsColSorted(0))
54 | 		assert.True(t, b.IsColSorted(1))
55 | 	})
56 | 
57 | 	t.Run("custom names and types", func(t *testing.T) {
58 | 		b, err := NewGenBow(4,
59 | 			GenSeriesOptions{Name: "A", Type: Int64},
60 | 			GenSeriesOptions{Name: "B", Type: Float64},
61 | 			GenSeriesOptions{Name: "C", Type: String},
62 | 			GenSeriesOptions{Name: "D", Type: Boolean},
63 | 		)
64 | 		assert.NoError(t, err)
65 | 
66 | 		assert.Equal(t, "A", b.ColumnName(0))
67 | 		assert.Equal(t, "B", b.ColumnName(1))
68 | 		assert.Equal(t, "C", b.ColumnName(2))
69 | 		assert.Equal(t, "D", b.ColumnName(3))
70 | 
71 | 		assert.Equal(t, Int64, b.ColumnType(0))
72 | 		assert.Equal(t, Float64, b.ColumnType(1))
73 | 		assert.Equal(t, String, b.ColumnType(2))
74 | 		assert.Equal(t, Boolean, b.ColumnType(3))
75 | 	})
76 | }
77 | 


--------------------------------------------------------------------------------
/bowgetters_test.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | 	"github.com/stretchr/testify/require"
 8 | )
 9 | 
10 | func TestBow_GetValue(t *testing.T) {
11 | 	colNames := []string{"time", "value", "meta"}
12 | 	colTypes := []Type{Int64, Float64, String}
13 | 	colData := [][]interface{}{
14 | 		{1, 2, 3},
15 | 		{1.1, 2.2, 3.3},
16 | 		{"", "test", "3.3"},
17 | 	}
18 | 
19 | 	b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colData)
20 | 	require.NoError(t, err)
21 | 
22 | 	assert.Equal(t, 3.3, b.GetValue(1, 2))
23 | 	assert.Equal(t, map[string]interface{}{
24 | 		"time":  int64(2),
25 | 		"value": 2.2,
26 | 		"meta":  "test",
27 | 	}, b.GetRow(1))
28 | 
29 | 	res, ok := b.GetFloat64(2, 2)
30 | 	assert.True(t, ok)
31 | 	assert.Equal(t, 3.3, res)
32 | }
33 | 
34 | func TestBow_Distinct(t *testing.T) {
35 | 	colNames := []string{"time", "value", "meta"}
36 | 	colTypes := []Type{Int64, Float64, String}
37 | 	colData := [][]interface{}{
38 | 		{1, 1, 2, nil, 3},
39 | 		{1.1, 1.1, 2.2, nil, 3.3},
40 | 		{"", "test", "test", nil, "3.3"},
41 | 	}
42 | 
43 | 	b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colData)
44 | 	require.NoError(t, err)
45 | 
46 | 	t.Run(Int64.String(), func(t *testing.T) {
47 | 		res := b.Distinct(0)
48 | 		expect, err := NewBow(NewSeries("time", Int64, []int64{1, 2, 3}, nil))
49 | 		require.NoError(t, err)
50 | 
51 | 		ExpectEqual(t, expect, res)
52 | 	})
53 | 
54 | 	t.Run(Float64.String(), func(t *testing.T) {
55 | 		res := b.Distinct(1)
56 | 		expect, err := NewBow(NewSeries("value", Float64, []float64{1.1, 2.2, 3.3}, nil))
57 | 		require.NoError(t, err)
58 | 
59 | 		ExpectEqual(t, expect, res)
60 | 	})
61 | 
62 | 	t.Run(String.String(), func(t *testing.T) {
63 | 		res := b.Distinct(2)
64 | 		expect, err := NewBow(NewSeries("meta", String, []string{"", "3.3", "test"}, nil))
65 | 		require.NoError(t, err)
66 | 
67 | 		ExpectEqual(t, expect, res)
68 | 	})
69 | }
70 | 


--------------------------------------------------------------------------------
/bowjson.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | )
  7 | 
  8 | type jsonField struct {
  9 | 	Name string `json:"name"`
 10 | 	Type string `json:"type"`
 11 | }
 12 | 
 13 | type JSONSchema struct {
 14 | 	Fields []jsonField `json:"fields"`
 15 | }
 16 | 
 17 | // JSONBow is a structure representing a Bow for JSON marshaling purpose.
 18 | type JSONBow struct {
 19 | 	Schema       JSONSchema               `json:"schema"`
 20 | 	RowBasedData []map[string]interface{} `json:"data"`
 21 | }
 22 | 
 23 | // MarshalJSON returns the marshal encoding of the bow.
 24 | func (b bow) MarshalJSON() ([]byte, error) {
 25 | 	return json.Marshal(NewJSONBow(&b))
 26 | }
 27 | 
 28 | // NewJSONBow returns a new JSONBow structure from a Bow.
 29 | func NewJSONBow(b Bow) JSONBow {
 30 | 	if b == nil {
 31 | 		return JSONBow{}
 32 | 	}
 33 | 
 34 | 	res := JSONBow{
 35 | 		RowBasedData: make([]map[string]interface{}, 0, b.NumRows()),
 36 | 	}
 37 | 
 38 | 	for _, col := range b.Schema().Fields() {
 39 | 		res.Schema.Fields = append(
 40 | 			res.Schema.Fields,
 41 | 			jsonField{
 42 | 				Name: col.Name,
 43 | 				Type: col.Type.Name(),
 44 | 			})
 45 | 	}
 46 | 
 47 | 	for row := range b.GetRowsChan() {
 48 | 		if len(row) == 0 {
 49 | 			continue
 50 | 		}
 51 | 		res.RowBasedData = append(res.RowBasedData, row)
 52 | 	}
 53 | 
 54 | 	return res
 55 | }
 56 | 
 57 | // UnmarshalJSON parses the JSON-encoded data and stores the result in the bow.
 58 | func (b *bow) UnmarshalJSON(data []byte) error {
 59 | 	jsonB := JSONBow{}
 60 | 	if err := json.Unmarshal(data, &jsonB); err != nil {
 61 | 		return fmt.Errorf("json.Unmarshal: %w", err)
 62 | 	}
 63 | 
 64 | 	if err := b.NewValuesFromJSON(jsonB); err != nil {
 65 | 		return fmt.Errorf("bow.NewValuesFromJSON: %w", err)
 66 | 	}
 67 | 
 68 | 	return nil
 69 | 
 70 | }
 71 | 
 72 | // NewValuesFromJSON replaces the bow arrow.Record by a new one represented by the JSONBow structure.
 73 | func (b *bow) NewValuesFromJSON(jsonB JSONBow) error {
 74 | 	if len(jsonB.Schema.Fields) == 0 {
 75 | 		b.Record = NewBowEmpty().(*bow).Record
 76 | 		return nil
 77 | 	}
 78 | 
 79 | 	/*
 80 | 			Convert back json_table data types to bow data types
 81 | 			From pandas / io / json / _table_schema.py / as_json_table_type(x: DtypeObj) -> str:
 82 | 		    This table shows the relationship between NumPy / pandas dtypes,
 83 | 		    and Table Schema dtypes.
 84 | 		    ==============  =================
 85 | 		    Pandas type     Table Schema type
 86 | 		    ==============  =================
 87 | 		    int64           integer
 88 | 		    float64         number
 89 | 		    bool            boolean
 90 | 		    datetime64[ns]  datetime
 91 | 		    timedelta64[ns] duration
 92 | 		    object          str
 93 | 		    categorical     any
 94 | 		    =============== =================
 95 | 	*/
 96 | 
 97 | 	for fieldIndex, field := range jsonB.Schema.Fields {
 98 | 		if _, ok := mapArrowNameToBowTypes[field.Type]; ok {
 99 | 			continue
100 | 		}
101 | 		switch field.Type {
102 | 		case "integer":
103 | 			jsonB.Schema.Fields[fieldIndex].Type = "int64"
104 | 		case "number":
105 | 			jsonB.Schema.Fields[fieldIndex].Type = "float64"
106 | 		case "boolean":
107 | 			jsonB.Schema.Fields[fieldIndex].Type = "bool"
108 | 		}
109 | 	}
110 | 
111 | 	series := make([]Series, len(jsonB.Schema.Fields))
112 | 
113 | 	if jsonB.RowBasedData == nil {
114 | 		for fieldIndex, field := range jsonB.Schema.Fields {
115 | 			typ := getBowTypeFromArrowName(field.Type)
116 | 			buf := NewBuffer(0, typ)
117 | 			series[fieldIndex] = NewSeriesFromBuffer(field.Name, buf)
118 | 		}
119 | 
120 | 		tmpBow, err := NewBow(series...)
121 | 		if err != nil {
122 | 			return err
123 | 		}
124 | 
125 | 		b.Record = tmpBow.(*bow).Record
126 | 		return nil
127 | 	}
128 | 
129 | 	for fieldIndex, field := range jsonB.Schema.Fields {
130 | 		typ := getBowTypeFromArrowName(field.Type)
131 | 		buf := NewBuffer(len(jsonB.RowBasedData), typ)
132 | 		for rowIndex, row := range jsonB.RowBasedData {
133 | 			buf.SetOrDrop(rowIndex, row[field.Name])
134 | 		}
135 | 
136 | 		series[fieldIndex] = NewSeriesFromBuffer(field.Name, buf)
137 | 	}
138 | 
139 | 	tmpBow, err := NewBow(series...)
140 | 	if err != nil {
141 | 		return err
142 | 	}
143 | 
144 | 	b.Record = tmpBow.(*bow).Record
145 | 	return nil
146 | }
147 | 


--------------------------------------------------------------------------------
/bowjson_test.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/stretchr/testify/assert"
  9 | 	"github.com/stretchr/testify/require"
 10 | )
 11 | 
 12 | func TestJSON(t *testing.T) {
 13 | 	t.Run("MarshalJSON", func(t *testing.T) {
 14 | 		t.Run("empty", func(t *testing.T) {
 15 | 			b := NewBowEmpty()
 16 | 
 17 | 			byteB, err := json.Marshal(b)
 18 | 			require.NoError(t, err)
 19 | 
 20 | 			jsonB := JSONBow{}
 21 | 			err = json.Unmarshal(byteB, &jsonB)
 22 | 			require.NoError(t, err)
 23 | 
 24 | 			expected := JSONBow{
 25 | 				Schema:       JSONSchema{},
 26 | 				RowBasedData: []map[string]interface{}{},
 27 | 			}
 28 | 			assert.Equal(t, expected, jsonB)
 29 | 		})
 30 | 
 31 | 		t.Run("simple", func(t *testing.T) {
 32 | 			b, err := NewBowFromRowBasedInterfaces(
 33 | 				[]string{"a", "b", "c"},
 34 | 				[]Type{Int64, Float64, Boolean},
 35 | 				[][]interface{}{
 36 | 					{100, 200., false},
 37 | 					{110, 220., true},
 38 | 					{111, 222., false},
 39 | 				})
 40 | 			require.NoError(t, err)
 41 | 
 42 | 			byteB, err := json.Marshal(b)
 43 | 			require.NoError(t, err)
 44 | 
 45 | 			jsonB := JSONBow{}
 46 | 			err = json.Unmarshal(byteB, &jsonB)
 47 | 			require.NoError(t, err)
 48 | 
 49 | 			expected := JSONBow{
 50 | 				Schema: JSONSchema{
 51 | 					Fields: []jsonField{
 52 | 						{Name: "a", Type: "int64"},
 53 | 						{Name: "b", Type: "float64"},
 54 | 						{Name: "c", Type: "bool"},
 55 | 					},
 56 | 				},
 57 | 				RowBasedData: []map[string]interface{}{
 58 | 					{"a": 100., "b": 200., "c": false},
 59 | 					{"a": 110., "b": 220., "c": true},
 60 | 					{"a": 111., "b": 222., "c": false},
 61 | 				},
 62 | 			}
 63 | 			assert.Equal(t, expected, jsonB)
 64 | 		})
 65 | 	})
 66 | 
 67 | 	t.Run("UnmarshalJSON", func(t *testing.T) {
 68 | 		t.Run("empty", func(t *testing.T) {
 69 | 			b := NewBowEmpty()
 70 | 
 71 | 			byteB, err := json.Marshal(b)
 72 | 			require.NoError(t, err)
 73 | 
 74 | 			bCopy := b
 75 | 			err = json.Unmarshal(byteB, bCopy)
 76 | 			require.NoError(t, err)
 77 | 
 78 | 			assert.True(t, b.Equal(bCopy),
 79 | 				fmt.Sprintf("have:\n%vexpect:\n%v", bCopy, b))
 80 | 		})
 81 | 
 82 | 		t.Run("simple", func(t *testing.T) {
 83 | 			b, err := NewBowFromRowBasedInterfaces(
 84 | 				[]string{"a", "b", "c"},
 85 | 				[]Type{Int64, Float64, Boolean},
 86 | 				[][]interface{}{
 87 | 					{100, 200., false},
 88 | 					{110, 220., true},
 89 | 					{111, 222., false},
 90 | 				})
 91 | 			require.NoError(t, err)
 92 | 
 93 | 			byteB, err := json.Marshal(b)
 94 | 			require.NoError(t, err)
 95 | 
 96 | 			bCopy := b
 97 | 			err = json.Unmarshal(byteB, bCopy)
 98 | 			require.NoError(t, err)
 99 | 
100 | 			assert.True(t, b.Equal(bCopy),
101 | 				fmt.Sprintf("have:\n%vexpect:\n%v", bCopy, b))
102 | 		})
103 | 
104 | 		t.Run("simple no data", func(t *testing.T) {
105 | 			b, err := NewBowFromRowBasedInterfaces(
106 | 				[]string{"a", "b", "c"},
107 | 				[]Type{Int64, Float64, Boolean},
108 | 				[][]interface{}{})
109 | 			require.NoError(t, err)
110 | 
111 | 			byteB, err := json.Marshal(b)
112 | 			require.NoError(t, err)
113 | 
114 | 			bCopy := b
115 | 			err = json.Unmarshal(byteB, bCopy)
116 | 			require.NoError(t, err)
117 | 
118 | 			assert.True(t, b.Equal(bCopy),
119 | 				fmt.Sprintf("have:\n%vexpect:\n%v", bCopy, b))
120 | 		})
121 | 	})
122 | }
123 | 
124 | func BenchmarkBow_JSON(b *testing.B) {
125 | 	for rows := 10; rows <= 100000; rows *= 10 {
126 | 		data, err := NewBowFromParquet(fmt.Sprintf(
127 | 			"%sbow1-%d-rows.parquet", benchmarkBowsDirPath, rows), false)
128 | 		require.NoError(b, err)
129 | 
130 | 		var j []byte
131 | 		b.Run(fmt.Sprintf("Marshal/%d_rows", rows), func(b *testing.B) {
132 | 			for n := 0; n < b.N; n++ {
133 | 				j, err = data.MarshalJSON()
134 | 				require.NoError(b, err)
135 | 			}
136 | 		})
137 | 
138 | 		b.Run(fmt.Sprintf("Unmarshal/%d_rows", rows), func(b *testing.B) {
139 | 			for n := 0; n < b.N; n++ {
140 | 				require.NoError(b, NewBowEmpty().UnmarshalJSON(j))
141 | 			}
142 | 		})
143 | 	}
144 | }
145 | 


--------------------------------------------------------------------------------
/bowmetadata.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/apache/arrow/go/v8/arrow"
 7 | 	"github.com/apache/arrow/go/v8/arrow/array"
 8 | )
 9 | 
10 | // Metadata is wrapping arrow.Metadata.
11 | type Metadata struct {
12 | 	arrow.Metadata
13 | }
14 | 
15 | // NewMetadata returns a new Metadata.
16 | func NewMetadata(keys, values []string) Metadata {
17 | 	return Metadata{arrow.NewMetadata(keys, values)}
18 | }
19 | 
20 | // NewBowWithMetadata returns a new Bow from Metadata and Series.
21 | func NewBowWithMetadata(metadata Metadata, series ...Series) (Bow, error) {
22 | 	rec, err := newRecord(metadata, series...)
23 | 	if err != nil {
24 | 		return nil, fmt.Errorf("newRecord: %w", err)
25 | 	}
26 | 
27 | 	return &bow{Record: rec}, nil
28 | }
29 | 
30 | // Metadata return a copy of the bow Schema Metadata.
31 | func (b *bow) Metadata() Metadata {
32 | 	return NewMetadata(
33 | 		b.Schema().Metadata().Keys(),
34 | 		b.Schema().Metadata().Values())
35 | }
36 | 
37 | // SetMetadata sets a value for a given key and return a Bow with freshly created Metadata.
38 | func (b *bow) SetMetadata(key, value string) Bow {
39 | 	m := b.Metadata()
40 | 	m = m.Set(key, value)
41 | 	return &bow{Record: array.NewRecord(
42 | 		arrow.NewSchema(b.Schema().Fields(), &m.Metadata),
43 | 		b.Columns(),
44 | 		b.Record.NumRows())}
45 | }
46 | 
47 | // WithMetadata replaces the bow original Metadata.
48 | func (b *bow) WithMetadata(metadata Metadata) Bow {
49 | 	m := arrow.NewMetadata(metadata.Keys(), metadata.Values())
50 | 	return &bow{Record: array.NewRecord(
51 | 		arrow.NewSchema(b.Schema().Fields(), &m),
52 | 		b.Columns(),
53 | 		b.Record.NumRows())}
54 | }
55 | 
56 | // Set returns a new Metadata with the key/value pair set.
57 | // If the key already exists, it replaces its value.
58 | func (m *Metadata) Set(newKey, newValue string) Metadata {
59 | 	keys := m.Keys()
60 | 	values := m.Values()
61 | 	keyIndex := m.FindKey(newKey)
62 | 
63 | 	if keyIndex == -1 {
64 | 		keys = append(keys, newKey)
65 | 		values = append(values, newValue)
66 | 	} else {
67 | 		values[keyIndex] = newValue
68 | 	}
69 | 
70 | 	return Metadata{arrow.NewMetadata(keys, values)}
71 | }
72 | 
73 | // SetMany returns a new Metadata with the key/value pairs set.
74 | // If a key already exists, it replaces its value.
75 | func (m *Metadata) SetMany(newKeys, newValues []string) Metadata {
76 | 	if len(newKeys) != len(newValues) {
77 | 		panic("metadata len mismatch")
78 | 	}
79 | 	if len(newKeys) == 0 {
80 | 		return *m
81 | 	}
82 | 
83 | 	keys := m.Keys()
84 | 	values := m.Values()
85 | 
86 | 	for i, newKey := range newKeys {
87 | 		newKeyIndex := m.FindKey(newKey)
88 | 		if newKeyIndex == -1 {
89 | 			keys = append(keys, newKey)
90 | 			values = append(values, newValues[i])
91 | 		} else {
92 | 			values[newKeyIndex] = newValues[i]
93 | 		}
94 | 	}
95 | 
96 | 	return Metadata{arrow.NewMetadata(keys, values)}
97 | }
98 | 


--------------------------------------------------------------------------------
/bowmetadata_test.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | )
 8 | 
 9 | func TestBow_WithMetadata(t *testing.T) {
10 | 	t.Run("adding meta should not modify bow, but correctly change schema", func(t *testing.T) {
11 | 		metadata := NewMetadata([]string{"testKey"}, []string{"testValue"})
12 | 		b, _ := NewBow(NewSeries("test", Int64, []int64{1, 2}, nil))
13 | 
14 | 		res := b.WithMetadata(metadata)
15 | 		assert.True(t, res.Metadata().Equal(metadata.Metadata),
16 | 			"expected %q have %q", metadata.String(), b.Metadata().String())
17 | 		assert.Equal(t, 0, b.Metadata().Len())
18 | 		assert.Equal(t, 1, res.Metadata().Len())
19 | 	})
20 | }
21 | 
22 | func TestMetadataSetMany(t *testing.T) {
23 | 	t.Run("single set on existing key", func(t *testing.T) {
24 | 		metadata := NewMetadata([]string{"testKey"}, []string{"testValue"})
25 | 		expected := NewMetadata([]string{"testKey"}, []string{"updatedValue"})
26 | 
27 | 		res := metadata.SetMany([]string{"testKey"}, []string{"updatedValue"})
28 | 		assert.Equal(t, expected, res, "expected %q have %q", expected.String(), res.String())
29 | 	})
30 | 
31 | 	t.Run("single set on new key", func(t *testing.T) {
32 | 		metadata := NewMetadata([]string{"testKey1"}, []string{"testValue1"})
33 | 		expected := NewMetadata([]string{"testKey1", "testKey2"}, []string{"testValue1", "testValue2"})
34 | 
35 | 		res := metadata.SetMany([]string{"testKey2"}, []string{"testValue2"})
36 | 		assert.Equal(t, expected, res, "expected %q have %q", expected.String(), res.String())
37 | 	})
38 | 
39 | 	t.Run("set many", func(t *testing.T) {
40 | 		metadata := NewMetadata(
41 | 			[]string{"testKey1", "testKey2", "testKey3"},
42 | 			[]string{"testValue1", "testValue2", "testValue3"})
43 | 
44 | 		expectedKeys := []string{"testKey1", "testKey2", "testKey3", "testKey4", "testKey5", "testKey6"}
45 | 		expectedValues := []string{"testValue1", "updatedValue2", "testValue3", "testValue4", "testValue5", "testValue6"}
46 | 		expected := NewMetadata(expectedKeys, expectedValues)
47 | 
48 | 		res := metadata.SetMany(
49 | 			[]string{"testKey2", "testKey4", "testKey5", "testKey6"},
50 | 			[]string{"updatedValue2", "testValue4", "testValue5", "testValue6"})
51 | 		assert.Equal(t, expected, res, "expected %q have %q", expected.String(), res.String())
52 | 	})
53 | }
54 | 
55 | func TestMetadataSet(t *testing.T) {
56 | 	t.Run("single set on existing key", func(t *testing.T) {
57 | 		metadata := NewMetadata([]string{"testKey"}, []string{"testValue"})
58 | 		expected := NewMetadata([]string{"testKey"}, []string{"updatedValue"})
59 | 
60 | 		res := metadata.Set("testKey", "updatedValue")
61 | 		assert.Equal(t, expected, res, "expected %q have %q", expected.String(), res.String())
62 | 	})
63 | 
64 | 	t.Run("single set on new key", func(t *testing.T) {
65 | 		metadata := NewMetadata([]string{"testKey1"}, []string{"testValue1"})
66 | 		expected := NewMetadata([]string{"testKey1", "testKey2"}, []string{"testValue1", "testValue2"})
67 | 
68 | 		res := metadata.Set("testKey2", "testValue2")
69 | 		assert.Equal(t, expected, res, "expected %q have %q", expected.String(), res.String())
70 | 	})
71 | }
72 | 


--------------------------------------------------------------------------------
/bowparquet_test.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"testing"
  8 | 	"time"
  9 | 
 10 | 	"github.com/apache/arrow/go/v8/arrow"
 11 | 	"github.com/stretchr/testify/assert"
 12 | 	"github.com/stretchr/testify/require"
 13 | 	"github.com/xitongsys/parquet-go/parquet"
 14 | )
 15 | 
 16 | const (
 17 | 	testInputFileName  = "bowparquet_test_input.parquet"
 18 | 	testOutputFileName = "/tmp/bowparquet_test_output"
 19 | )
 20 | 
 21 | func TestParquet(t *testing.T) {
 22 | 	t.Run("read/write input file", func(t *testing.T) {
 23 | 		bBefore, err := NewBowFromParquet(testInputFileName, false)
 24 | 		assert.NoError(t, err)
 25 | 
 26 | 		assert.NoError(t, bBefore.WriteParquet(testOutputFileName, false))
 27 | 
 28 | 		bAfter, err := NewBowFromParquet(testOutputFileName+".parquet", false)
 29 | 		assert.NoError(t, err)
 30 | 
 31 | 		assert.Equal(t, bBefore.String(), bAfter.String())
 32 | 
 33 | 		require.NoError(t, os.Remove(testOutputFileName+".parquet"))
 34 | 	})
 35 | 
 36 | 	t.Run("all supported types with rows and nil values", func(t *testing.T) {
 37 | 		bBefore, err := NewBowFromRowBasedInterfaces(
 38 | 			[]string{"int", "float", "bool", "string"},
 39 | 			[]Type{Int64, Float64, Boolean, String},
 40 | 			[][]interface{}{
 41 | 				{1, 1., true, "hi"},
 42 | 				{2, 2., false, "ho"},
 43 | 				{nil, nil, nil, nil},
 44 | 				{3, 3., true, "hu"},
 45 | 			})
 46 | 		require.NoError(t, err)
 47 | 
 48 | 		assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_withrows", false))
 49 | 
 50 | 		bAfter, err := NewBowFromParquet(testOutputFileName+"_withrows.parquet", false)
 51 | 		assert.NoError(t, err)
 52 | 
 53 | 		assert.Equal(t, bBefore.String(), bAfter.String())
 54 | 
 55 | 		require.NoError(t, os.Remove(testOutputFileName+"_withrows.parquet"))
 56 | 	})
 57 | 
 58 | 	t.Run("bow supported types without rows", func(t *testing.T) {
 59 | 		bBefore, err := NewBowFromRowBasedInterfaces(
 60 | 			[]string{"int", "float", "bool", "string"},
 61 | 			[]Type{Int64, Float64, Boolean, String},
 62 | 			[][]interface{}{})
 63 | 		require.NoError(t, err)
 64 | 
 65 | 		assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_norows", false))
 66 | 
 67 | 		bAfter, err := NewBowFromParquet(testOutputFileName+"_norows.parquet", false)
 68 | 		assert.NoError(t, err)
 69 | 
 70 | 		assert.Equal(t, bBefore.String(), bAfter.String())
 71 | 
 72 | 		require.NoError(t, os.Remove(testOutputFileName+"_norows.parquet"))
 73 | 	})
 74 | 
 75 | 	t.Run("write empty bow", func(t *testing.T) {
 76 | 		bBefore := NewBowEmpty()
 77 | 
 78 | 		assert.Errorf(t,
 79 | 			bBefore.WriteParquet(testOutputFileName+"_empty", false),
 80 | 			"bow.WriteParquet: no columns",
 81 | 		)
 82 | 	})
 83 | 
 84 | 	t.Run("bow with context and col_types metadata", func(t *testing.T) {
 85 | 		var series = make([]Series, 2)
 86 | 		series[0] = NewSeries("time", Int64, []int64{0}, []bool{true})
 87 | 		series[1] = NewSeries("  va\"lue  ", Float64, []float64{0.}, []bool{true})
 88 | 
 89 | 		var keys, values []string
 90 | 		type Unit struct {
 91 | 			Symbol string `json:"symbol"`
 92 | 		}
 93 | 		type Meta struct {
 94 | 			Unit Unit `json:"unit"`
 95 | 		}
 96 | 		type Context map[string]Meta
 97 | 
 98 | 		var ctx = Context{
 99 | 			"time":        Meta{Unit{Symbol: "microseconds"}},
100 | 			"  va\"lue  ": Meta{Unit{Symbol: "kWh"}},
101 | 		}
102 | 
103 | 		contextJSON, err := json.Marshal(ctx)
104 | 		require.NoError(t, err)
105 | 
106 | 		keys = append(keys, "context")
107 | 		values = append(values, string(contextJSON))
108 | 
109 | 		bBefore, err := NewBowWithMetadata(
110 | 			newMetaWithParquetTimestampCol(keys, values, "time", time.Microsecond),
111 | 			series...)
112 | 		require.NoError(t, err)
113 | 
114 | 		err = bBefore.WriteParquet(testOutputFileName+"_meta", false)
115 | 		assert.NoError(t, err)
116 | 
117 | 		bAfter, err := NewBowFromParquet(testOutputFileName+"_meta.parquet", false)
118 | 		assert.NoError(t, err)
119 | 
120 | 		assert.Equal(t, bBefore.String(), bAfter.String())
121 | 
122 | 		require.NoError(t, os.Remove(testOutputFileName+"_meta.parquet"))
123 | 	})
124 | 
125 | 	t.Run("bow with wrong col_types metadata", func(t *testing.T) {
126 | 		var series = make([]Series, 2)
127 | 
128 | 		series[0] = NewSeries("time", Int64, []int64{0}, []bool{true})
129 | 		series[1] = NewSeries("value", Float64, []float64{0.}, []bool{true})
130 | 
131 | 		var keys, values []string
132 | 
133 | 		bBefore, err := NewBowWithMetadata(
134 | 			newMetaWithParquetTimestampCol(keys, values, "unknown", time.Microsecond),
135 | 			series...)
136 | 		assert.NoError(t, err)
137 | 
138 | 		assert.Error(t, bBefore.WriteParquet(testOutputFileName+"_wrong", false))
139 | 	})
140 | }
141 | 
142 | func TestBowGetParquetMetaColTimeUnit(t *testing.T) {
143 | 	timeCol := "time"
144 | 	var series = make([]Series, 2)
145 | 	series[0] = NewSeries(timeCol, Int64, []int64{0}, nil)
146 | 	series[1] = NewSeries("value", Float64, []float64{0.}, nil)
147 | 
148 | 	t.Run("time.Millisecond", func(t *testing.T) {
149 | 		b, err := NewBowWithMetadata(
150 | 			newMetaWithParquetTimestampCol([]string{}, []string{}, timeCol, time.Millisecond),
151 | 			series...)
152 | 		require.NoError(t, err)
153 | 
154 | 		got, err := b.GetParquetMetaColTimeUnit(0)
155 | 		require.NoError(t, err)
156 | 		assert.Equal(t, time.Millisecond, got)
157 | 	})
158 | 
159 | 	t.Run("time.Microsecond", func(t *testing.T) {
160 | 		b, err := NewBowWithMetadata(
161 | 			newMetaWithParquetTimestampCol([]string{}, []string{}, timeCol, time.Microsecond),
162 | 			series...)
163 | 		require.NoError(t, err)
164 | 
165 | 		got, err := b.GetParquetMetaColTimeUnit(0)
166 | 		require.NoError(t, err)
167 | 		assert.Equal(t, time.Microsecond, got)
168 | 	})
169 | 
170 | 	t.Run("time.Nanosecond", func(t *testing.T) {
171 | 		b, err := NewBowWithMetadata(
172 | 			newMetaWithParquetTimestampCol([]string{}, []string{}, timeCol, time.Nanosecond),
173 | 			series...)
174 | 		require.NoError(t, err)
175 | 
176 | 		got, err := b.GetParquetMetaColTimeUnit(0)
177 | 		require.NoError(t, err)
178 | 		assert.Equal(t, time.Nanosecond, got)
179 | 	})
180 | 
181 | 	t.Run("column without timestamp metadata", func(t *testing.T) {
182 | 		b, err := NewBowWithMetadata(
183 | 			newMetaWithParquetTimestampCol([]string{}, []string{}, timeCol, time.Nanosecond),
184 | 			series...)
185 | 		require.NoError(t, err)
186 | 
187 | 		got, err := b.GetParquetMetaColTimeUnit(1)
188 | 		require.ErrorIs(t, err, ErrColTimeUnitNotFound)
189 | 		require.Equal(t, time.Duration(0), got)
190 | 	})
191 | 
192 | 	t.Run("column out of range", func(t *testing.T) {
193 | 		b, err := NewBowWithMetadata(
194 | 			newMetaWithParquetTimestampCol([]string{}, []string{}, timeCol, time.Nanosecond),
195 | 			series...)
196 | 		require.NoError(t, err)
197 | 
198 | 		assert.Panics(t, func() {
199 | 			_, _ = b.GetParquetMetaColTimeUnit(42)
200 | 		})
201 | 	})
202 | }
203 | 
204 | func newMetaWithParquetTimestampCol(keys, values []string, colName string, timeUnit time.Duration) Metadata {
205 | 	var colTypes = make([]parquetColTypesMeta, 1)
206 | 
207 | 	unit := parquet.TimeUnit{}
208 | 	switch timeUnit {
209 | 	case time.Millisecond:
210 | 		unit.MILLIS = &parquet.MilliSeconds{}
211 | 	case time.Microsecond:
212 | 		unit.MICROS = &parquet.MicroSeconds{}
213 | 	case time.Nanosecond:
214 | 		unit.NANOS = &parquet.NanoSeconds{}
215 | 	default:
216 | 		panic(fmt.Errorf("unsupported time unit '%s'", timeUnit))
217 | 	}
218 | 
219 | 	logicalType := parquet.LogicalType{
220 | 		TIMESTAMP: &parquet.TimestampType{
221 | 			IsAdjustedToUTC: true,
222 | 			Unit:            &unit,
223 | 		}}
224 | 	colTypes[0] = parquetColTypesMeta{
225 | 		Name:        colName,
226 | 		LogicalType: &logicalType,
227 | 	}
228 | 
229 | 	colTypesJSON, err := json.Marshal(colTypes)
230 | 	if err != nil {
231 | 		panic(err)
232 | 	}
233 | 
234 | 	keys = append(keys, keyParquetColTypesMeta)
235 | 	values = append(values, string(colTypesJSON))
236 | 
237 | 	return Metadata{arrow.NewMetadata(keys, values)}
238 | }
239 | 


--------------------------------------------------------------------------------
/bowparquet_test_input.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/bowparquet_test_input.parquet


--------------------------------------------------------------------------------
/bowrecord.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"fmt"
 6 | 
 7 | 	"github.com/apache/arrow/go/v8/arrow"
 8 | 	"github.com/apache/arrow/go/v8/arrow/array"
 9 | )
10 | 
11 | func newRecord(metadata Metadata, series ...Series) (arrow.Record, error) {
12 | 	var fields []arrow.Field
13 | 	var arrays []arrow.Array
14 | 	var nRows int64
15 | 
16 | 	if len(series) != 0 && series[0].Array != nil {
17 | 		nRows = int64(series[0].Array.Len())
18 | 	}
19 | 
20 | 	for _, s := range series {
21 | 		if s.Array == nil {
22 | 			return nil, errors.New("empty Series")
23 | 		}
24 | 		if s.Name == "" {
25 | 			return nil, errors.New("empty Series name")
26 | 		}
27 | 		if getBowTypeFromArrowFingerprint(s.Array.DataType().Fingerprint()) == Unknown {
28 | 			return nil, fmt.Errorf("unsupported type '%s'", s.Array.DataType())
29 | 		}
30 | 		if int64(s.Array.Len()) != nRows {
31 | 			return nil,
32 | 				fmt.Errorf(
33 | 					"bow.Series '%s' has a length of %d, which is different from the previous ones",
34 | 					s.Name, s.Array.Len())
35 | 		}
36 | 		fields = append(fields, arrow.Field{
37 | 			Name:     s.Name,
38 | 			Type:     s.Array.DataType(),
39 | 			Nullable: true,
40 | 		})
41 | 		arrays = append(arrays, s.Array)
42 | 	}
43 | 
44 | 	return array.NewRecord(
45 | 		arrow.NewSchema(fields, &metadata.Metadata),
46 | 		arrays, nRows), nil
47 | }
48 | 


--------------------------------------------------------------------------------
/bowseries_test.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | 	"github.com/stretchr/testify/require"
 9 | )
10 | 
11 | func TestNewSeriesFromInterfaces(t *testing.T) {
12 | 	for _, typ := range allType {
13 | 		t.Run(typ.String(), func(t *testing.T) {
14 | 			testcase := []interface{}{typ.Convert(0), nil}
15 | 			res, err := NewBow(NewSeriesFromInterfaces(typ.String(), typ, testcase))
16 | 			require.NoError(t, err)
17 | 			assert.Equal(t, typ.Convert(0), res.GetValue(0, 0))
18 | 			assert.Equal(t, nil, res.GetValue(0, 1))
19 | 		})
20 | 	}
21 | }
22 | 
23 | func BenchmarkNewSeries(b *testing.B) {
24 | 	for rows := 10; rows <= 100000; rows *= 10 {
25 | 		dataArray := make([]int64, rows)
26 | 		validArray := make([]bool, rows)
27 | 		for i := range dataArray {
28 | 			dataArray[i] = int64(i)
29 | 			validArray[i] = i%2 == 0
30 | 		}
31 | 
32 | 		b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) {
33 | 			for n := 0; n < b.N; n++ {
34 | 				NewSeries("test", Int64, dataArray, validArray)
35 | 			}
36 | 		})
37 | 	}
38 | }
39 | 
40 | func BenchmarkNewSeriesFromInterfaces(b *testing.B) {
41 | 	for rows := 10; rows <= 100000; rows *= 10 {
42 | 		cells := make([]interface{}, rows)
43 | 		for i := range cells {
44 | 			cells[i] = int64(i)
45 | 		}
46 | 
47 | 		b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) {
48 | 			for n := 0; n < b.N; n++ {
49 | 				NewSeriesFromInterfaces("test", Int64, cells)
50 | 			}
51 | 		})
52 | 	}
53 | }
54 | 


--------------------------------------------------------------------------------
/bowsetters.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | )
  6 | 
  7 | // RenameCol returns a new Bow with the column `colIndex` renamed.
  8 | func (b *bow) RenameCol(colIndex int, newName string) (Bow, error) {
  9 | 	if colIndex >= b.NumCols() {
 10 | 		return nil, fmt.Errorf("column index out of bound")
 11 | 	}
 12 | 
 13 | 	if newName == "" {
 14 | 		return nil, fmt.Errorf("newName cannot be empty")
 15 | 	}
 16 | 
 17 | 	series := make([]Series, b.NumCols())
 18 | 	for i, col := range b.Columns() {
 19 | 		if i == colIndex {
 20 | 			series[i] = Series{
 21 | 				Name:  newName,
 22 | 				Array: col,
 23 | 			}
 24 | 		} else {
 25 | 			series[i] = b.NewSeriesFromCol(i)
 26 | 		}
 27 | 	}
 28 | 
 29 | 	return NewBowWithMetadata(b.Metadata(), series...)
 30 | }
 31 | 
 32 | // Apply uses the given function to transform the values of column `colIndex`.
 33 | // Its expected return type has to be supported otherwise given results will be stored as nil values.
 34 | func (b *bow) Apply(colIndex int, returnType Type, fn func(interface{}) interface{}) (Bow, error) {
 35 | 	buf := NewBuffer(b.NumRows(), returnType)
 36 | 	for i := 0; i < b.NumRows(); i++ {
 37 | 		buf.SetOrDropStrict(i, fn(b.GetValue(colIndex, i)))
 38 | 	}
 39 | 
 40 | 	series := make([]Series, b.NumCols())
 41 | 	for i := range b.Columns() {
 42 | 		if i == colIndex {
 43 | 			series[i] = NewSeriesFromBuffer(b.ColumnName(colIndex), buf)
 44 | 		} else {
 45 | 			series[i] = b.NewSeriesFromCol(i)
 46 | 		}
 47 | 	}
 48 | 
 49 | 	return NewBowWithMetadata(b.Metadata(), series...)
 50 | }
 51 | 
 52 | // Convert transforms a column type into another,
 53 | // if default behavior is not the one expected, you can use Apply with any implementation needed
 54 | func (b *bow) Convert(colIndex int, t Type) (Bow, error) {
 55 | 	return b.Apply(colIndex, t, t.Convert)
 56 | }
 57 | 
 58 | // RowCmp implementation is required for Filter
 59 | // passing full dataset multidimensional comparators implementations, cross column for instance
 60 | // index argument is the current row to compare
 61 | type RowCmp func(b Bow, i int) bool
 62 | 
 63 | // Filter only preserves the rows where all given comparators return true
 64 | // Filter with no argument return the original bow without copy, but it's not recommended,
 65 | // If all filters result in concomitant rows, Filter is as optimal as Slicing in terms of copying
 66 | func (b *bow) Filter(fns ...RowCmp) Bow {
 67 | 	var indices []int
 68 | 	for i := 0; i < b.NumRows(); i++ {
 69 | 		if matchRowCmps(b, i, fns...) {
 70 | 			indices = append(indices, i)
 71 | 		}
 72 | 	}
 73 | 
 74 | 	if len(indices) == 0 {
 75 | 		return b.NewEmptySlice()
 76 | 	}
 77 | 
 78 | 	// If all indices are concomitant, slicing is more performent than copying
 79 | 	lastInclusive := indices[len(indices)-1] + 1
 80 | 	if len(indices) == lastInclusive-indices[0] {
 81 | 		return b.NewSlice(indices[0], lastInclusive)
 82 | 	}
 83 | 
 84 | 	filteredSeries := make([]Series, b.NumCols())
 85 | 	for colIndex := 0; colIndex < b.NumCols(); colIndex++ {
 86 | 		buf := NewBuffer(len(indices), b.ColumnType(colIndex))
 87 | 		for i, j := range indices {
 88 | 			buf.SetOrDropStrict(i, b.GetValue(colIndex, j))
 89 | 		}
 90 | 		filteredSeries[colIndex] = NewSeriesFromBuffer(b.ColumnName(colIndex), buf)
 91 | 	}
 92 | 
 93 | 	res, err := NewBowWithMetadata(b.Metadata(), filteredSeries...)
 94 | 	if err != nil {
 95 | 		panic(err)
 96 | 	}
 97 | 
 98 | 	return res
 99 | }
100 | 
101 | func matchRowCmps(b Bow, i int, fns ...RowCmp) bool {
102 | 	for _, fn := range fns {
103 | 		if !fn(b, i) {
104 | 			return false
105 | 		}
106 | 	}
107 | 
108 | 	return true
109 | }
110 | 
111 | // MakeFilterValues prepares a valid comparator for Filter, it is lazy on given type.
112 | // Be careful about number to string though, for instance 0.1 give "0.100000", which could be unexpected
113 | // If value is of the wrong type and not convertible to column type, comparison will be done on null values!
114 | func (b *bow) MakeFilterValues(colIndex int, values ...interface{}) RowCmp {
115 | 	for i := range values {
116 | 		values[i] = b.ColumnType(colIndex).Convert(values[i])
117 | 	}
118 | 
119 | 	return func(b Bow, i int) bool {
120 | 		return contains(values, b.GetValue(colIndex, i))
121 | 	}
122 | }
123 | 
124 | func contains(values []interface{}, value interface{}) bool {
125 | 	for _, val := range values {
126 | 		if val == value {
127 | 			return true
128 | 		}
129 | 	}
130 | 
131 | 	return false
132 | }
133 | 


--------------------------------------------------------------------------------
/bowsetters_test.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/stretchr/testify/assert"
  7 | 	"github.com/stretchr/testify/require"
  8 | )
  9 | 
 10 | func TestBow_SetColName(t *testing.T) {
 11 | 	b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
 12 | 		NewSeries("oldName", Float64, []float64{0.1, 0.2}, nil),
 13 | 	)
 14 | 	require.NoError(t, err)
 15 | 
 16 | 	expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
 17 | 		NewSeries("newName", Float64, []float64{0.1, 0.2}, nil),
 18 | 	)
 19 | 	require.NoError(t, err)
 20 | 
 21 | 	t.Run("valid", func(t *testing.T) {
 22 | 		res, err := b.RenameCol(0, "newName")
 23 | 		require.NoError(t, err)
 24 | 		assert.EqualValues(t, expected.String(), res.String())
 25 | 	})
 26 | 
 27 | 	t.Run("invalid colIndex", func(t *testing.T) {
 28 | 		_, err = b.RenameCol(1, "newName")
 29 | 		require.Error(t, err)
 30 | 	})
 31 | 
 32 | 	t.Run("invalid newName", func(t *testing.T) {
 33 | 		_, err = b.RenameCol(0, "")
 34 | 		require.Error(t, err)
 35 | 	})
 36 | }
 37 | 
 38 | func TestBow_Apply(t *testing.T) {
 39 | 	b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
 40 | 		NewSeries("unchanged", Float64, []float64{0.1, 0.2}, nil),
 41 | 		NewSeries("apply", Float64, []float64{0.1, 0.2}, nil),
 42 | 	)
 43 | 	require.NoError(t, err)
 44 | 
 45 | 	expect, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
 46 | 		NewSeries("unchanged", Float64, []float64{0.1, 0.2}, nil),
 47 | 		NewSeries("apply", String, []string{"0.100000", "0.200000"}, nil),
 48 | 	)
 49 | 	require.NoError(t, err)
 50 | 
 51 | 	res, err := b.Apply(1, String, String.Convert)
 52 | 	require.NoError(t, err)
 53 | 	ExpectEqual(t, expect, res)
 54 | }
 55 | 
 56 | func TestBow_Filter(t *testing.T) {
 57 | 	b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
 58 | 		NewSeries("string", String, []string{"0.1", "0.2"}, nil),
 59 | 		NewSeries("float", Float64, []float64{0.1, 0.2}, nil),
 60 | 	)
 61 | 	require.NoError(t, err)
 62 | 
 63 | 	t.Run("empty filter", func(t *testing.T) {
 64 | 		res := b.Filter()
 65 | 		ExpectEqual(t, b, res)
 66 | 	})
 67 | 
 68 | 	t.Run("empty result", func(t *testing.T) {
 69 | 		res := b.Filter(b.MakeFilterValues(0, "not found"))
 70 | 		ExpectEqual(t, b.NewEmptySlice(), res)
 71 | 	})
 72 | 
 73 | 	t.Run("match one comparator", func(t *testing.T) {
 74 | 		res := b.Filter(b.MakeFilterValues(0, "0.1"))
 75 | 		ExpectEqual(t, b.NewSlice(0, 1), res)
 76 | 	})
 77 | 
 78 | 	t.Run("match two", func(t *testing.T) {
 79 | 		res := b.Filter(
 80 | 			b.MakeFilterValues(0, "0.1"),
 81 | 			b.MakeFilterValues(1, 0.1),
 82 | 		)
 83 | 		ExpectEqual(t, b.NewSlice(0, 1), res)
 84 | 	})
 85 | 
 86 | 	t.Run("match half", func(t *testing.T) {
 87 | 		res := b.Filter(
 88 | 			b.MakeFilterValues(0, "0.1"),
 89 | 			b.MakeFilterValues(1, 0.2),
 90 | 		)
 91 | 		ExpectEqual(t, b.NewEmptySlice(), res)
 92 | 	})
 93 | 
 94 | 	t.Run("match all", func(t *testing.T) {
 95 | 		res := b.Filter(
 96 | 			b.MakeFilterValues(0, "0.1", "0.2"),
 97 | 			b.MakeFilterValues(1, 0.1, 0.2),
 98 | 		)
 99 | 		ExpectEqual(t, b, res)
100 | 	})
101 | 
102 | 	t.Run("match all lazy", func(t *testing.T) {
103 | 		res := b.Filter(
104 | 			b.MakeFilterValues(0, "0.1", "0.2"),
105 | 			b.MakeFilterValues(1, "0.1", "0.2"),
106 | 		)
107 | 		ExpectEqual(t, b, res)
108 | 	})
109 | 
110 | 	t.Run("not convertible", func(t *testing.T) {
111 | 		res := b.Filter(
112 | 			b.MakeFilterValues(1, "not a number"),
113 | 		)
114 | 		ExpectEqual(t, b.NewEmptySlice(), res)
115 | 	})
116 | 
117 | 	t.Run("match non concomitant", func(t *testing.T) {
118 | 		b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
119 | 			NewSeries("string", String, []string{"0.1", "0.2", "0.3"}, nil),
120 | 			NewSeries("float", Float64, []float64{0.1, 0.2, 0.3}, nil),
121 | 		)
122 | 		require.NoError(t, err)
123 | 		expect, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
124 | 			NewSeries("string", String, []string{"0.1", "0.3"}, nil),
125 | 			NewSeries("float", Float64, []float64{0.1, 0.3}, nil),
126 | 		)
127 | 		require.NoError(t, err)
128 | 
129 | 		res := b.Filter(
130 | 			b.MakeFilterValues(0, "0.1", "0.3"),
131 | 		)
132 | 		ExpectEqual(t, expect, res)
133 | 	})
134 | 
135 | }
136 | 


--------------------------------------------------------------------------------
/bowsort.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"sort"
 6 | )
 7 | 
 8 | // SortByCol returns a new Bow with the rows sorted by a column in ascending order.
 9 | // Returns the same Bow if the column is already sorted.
10 | func (b *bow) SortByCol(colIndex int) (Bow, error) {
11 | 	if b.Column(colIndex).NullN() != 0 {
12 | 		return nil, fmt.Errorf(
13 | 			"column to sort by has %d nil values",
14 | 			b.Column(colIndex).NullN())
15 | 	}
16 | 
17 | 	sortableBuf := newBufferWithIndices(b.NewBufferFromCol(colIndex))
18 | 	// Stop if sort by column is already sorted
19 | 	if sortableBuf.IsSorted() {
20 | 		return b, nil
21 | 	}
22 | 
23 | 	// Sort the column by ascending values
24 | 	sort.Sort(sortableBuf)
25 | 
26 | 	// Fill the sort by column with sorted values
27 | 	sortedSeries := make([]Series, b.NumCols())
28 | 	for i := 0; i < b.NumCols(); i++ {
29 | 		if i == colIndex {
30 | 			sortedSeries[i] = NewSeriesFromBuffer(b.ColumnName(i), sortableBuf.Buffer)
31 | 			continue
32 | 		}
33 | 		buf := NewBuffer(b.NumRows(), b.ColumnType(i))
34 | 		for j, index := range sortableBuf.indices {
35 | 			buf.SetOrDropStrict(j, b.GetValue(i, index))
36 | 		}
37 | 		sortedSeries[i] = NewSeriesFromBuffer(b.ColumnName(i), buf)
38 | 	}
39 | 
40 | 	return NewBowWithMetadata(b.Metadata(), sortedSeries...)
41 | }
42 | 
43 | // bufferWithIndices implements the methods of sort.Interface, sorting in ascending order.
44 | type bufferWithIndices struct {
45 | 	Buffer
46 | 	indices []int
47 | }
48 | 
49 | func newBufferWithIndices(buf Buffer) bufferWithIndices {
50 | 	indices := make([]int, buf.Len())
51 | 	for i := 0; i < buf.Len(); i++ {
52 | 		indices[i] = i
53 | 	}
54 | 	return bufferWithIndices{Buffer: buf, indices: indices}
55 | }
56 | 
57 | func (p bufferWithIndices) Swap(i, j int) {
58 | 	p.Buffer.Swap(i, j)
59 | 	p.indices[i], p.indices[j] = p.indices[j], p.indices[i]
60 | }
61 | 


--------------------------------------------------------------------------------
/bowsort_test.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/stretchr/testify/assert"
  8 | 	"github.com/stretchr/testify/require"
  9 | )
 10 | 
 11 | func TestBow_SortByCol(t *testing.T) {
 12 | 	t.Run("sorted", func(t *testing.T) {
 13 | 		b, err := NewBowFromRowBasedInterfaces(
 14 | 			[]string{"time", "a", "b"},
 15 | 			[]Type{Int64, Float64, Float64},
 16 | 			[][]interface{}{
 17 | 				{10, 2.4, 3.1},
 18 | 				{11, 2.8, 5.9},
 19 | 				{12, 2.9, 7.5},
 20 | 				{13, 3.9, 13.4},
 21 | 			})
 22 | 		require.NoError(t, err)
 23 | 
 24 | 		sorted, err := b.SortByCol(0)
 25 | 		assert.Nil(t, err)
 26 | 		assert.EqualValues(t, b.String(), sorted.String())
 27 | 	})
 28 | 
 29 | 	t.Run("unsorted with all types", func(t *testing.T) {
 30 | 		b, err := NewBowFromRowBasedInterfaces(
 31 | 			[]string{"time", "i", "f", "b", "s"},
 32 | 			[]Type{Int64, Int64, Float64, Boolean, String},
 33 | 			[][]interface{}{
 34 | 				{10, 2, 3.1, true, "ho"},
 35 | 				{11, 2, 5.9, false, "la"},
 36 | 				{13, 3, 13.4, true, "tal"},
 37 | 				{12, 2, 7.5, false, "que"},
 38 | 			})
 39 | 		require.NoError(t, err)
 40 | 		expected, err := NewBowFromRowBasedInterfaces(
 41 | 			[]string{"time", "i", "f", "b", "s"},
 42 | 			[]Type{Int64, Int64, Float64, Boolean, String},
 43 | 			[][]interface{}{
 44 | 				{10, 2, 3.1, true, "ho"},
 45 | 				{11, 2, 5.9, false, "la"},
 46 | 				{12, 2, 7.5, false, "que"},
 47 | 				{13, 3, 13.4, true, "tal"},
 48 | 			})
 49 | 		require.NoError(t, err)
 50 | 		sorted, err := b.SortByCol(0)
 51 | 		assert.NoError(t, err)
 52 | 		assert.EqualValues(t, expected.String(), sorted.String())
 53 | 	})
 54 | 
 55 | 	t.Run("unsorted with different cols", func(t *testing.T) {
 56 | 		b, err := NewBowFromRowBasedInterfaces(
 57 | 			[]string{"a", "b", "time"},
 58 | 			[]Type{Float64, Float64, Int64},
 59 | 			[][]interface{}{
 60 | 				{2.4, 3.1, 10},
 61 | 				{2.8, 5.9, 11},
 62 | 				{3.9, 13.4, 13},
 63 | 				{2.9, 7.5, 12},
 64 | 			})
 65 | 		require.NoError(t, err)
 66 | 		expected, err := NewBowFromRowBasedInterfaces(
 67 | 			[]string{"a", "b", "time"},
 68 | 			[]Type{Float64, Float64, Int64},
 69 | 			[][]interface{}{
 70 | 				{2.4, 3.1, 10},
 71 | 				{2.8, 5.9, 11},
 72 | 				{2.9, 7.5, 12},
 73 | 				{3.9, 13.4, 13},
 74 | 			})
 75 | 		require.NoError(t, err)
 76 | 		sorted, err := b.SortByCol(2)
 77 | 		assert.Nil(t, err)
 78 | 		assert.EqualValues(t, expected.String(), sorted.String())
 79 | 	})
 80 | 
 81 | 	t.Run("unsorted with nil values and all types", func(t *testing.T) {
 82 | 		b, err := NewBowFromRowBasedInterfaces(
 83 | 			[]string{"time", "int", "float", "string", "bool"},
 84 | 			[]Type{Int64, Int64, Float64, String, Boolean},
 85 | 			[][]interface{}{
 86 | 				{10, 5, nil, "bonjour", true},
 87 | 				{11, 2, 56., "comment", false},
 88 | 				{13, nil, 13.4, "allez", nil},
 89 | 				{12, -1, nil, nil, false},
 90 | 			})
 91 | 		require.NoError(t, err)
 92 | 		expected, err := NewBowFromRowBasedInterfaces(
 93 | 			[]string{"time", "int", "float", "string", "bool"},
 94 | 			[]Type{Int64, Int64, Float64, String, Boolean},
 95 | 			[][]interface{}{
 96 | 				{10, 5, nil, "bonjour", true},
 97 | 				{11, 2, 56., "comment", false},
 98 | 				{12, -1, nil, nil, false},
 99 | 				{13, nil, 13.4, "allez", nil},
100 | 			})
101 | 		require.NoError(t, err)
102 | 		sorted, err := b.SortByCol(0)
103 | 		assert.Nil(t, err)
104 | 		assert.EqualValues(t, expected.String(), sorted.String())
105 | 	})
106 | 
107 | 	t.Run("sorted in desc order", func(t *testing.T) {
108 | 		b, err := NewBowFromRowBasedInterfaces(
109 | 			[]string{"time", "a", "b"},
110 | 			[]Type{Int64, Float64, Float64},
111 | 			[][]interface{}{
112 | 				{13, 3.9, 13.4},
113 | 				{12, 2.9, 7.5},
114 | 				{11, 2.8, 5.9},
115 | 				{10, 2.4, 3.1},
116 | 			})
117 | 		require.NoError(t, err)
118 | 		expected, err := NewBowFromRowBasedInterfaces(
119 | 			[]string{"time", "a", "b"},
120 | 			[]Type{Int64, Float64, Float64},
121 | 			[][]interface{}{
122 | 				{10, 2.4, 3.1},
123 | 				{11, 2.8, 5.9},
124 | 				{12, 2.9, 7.5},
125 | 				{13, 3.9, 13.4},
126 | 			})
127 | 		require.NoError(t, err)
128 | 		sorted, err := b.SortByCol(0)
129 | 		assert.Nil(t, err)
130 | 		assert.EqualValues(t, expected.String(), sorted.String())
131 | 	})
132 | 
133 | 	t.Run("duplicate values in sort by column", func(t *testing.T) {
134 | 		b, err := NewBowFromRowBasedInterfaces(
135 | 			[]string{"time", "a", "b"},
136 | 			[]Type{Int64, Float64, Float64},
137 | 			[][]interface{}{
138 | 				{13, 3.9, 13.4},
139 | 				{12, 2.9, 7.5},
140 | 				{12, 2.8, 5.9},
141 | 				{10, 2.4, 3.1},
142 | 			})
143 | 		require.NoError(t, err)
144 | 		expected, err := NewBowFromRowBasedInterfaces(
145 | 			[]string{"time", "a", "b"},
146 | 			[]Type{Int64, Float64, Float64},
147 | 			[][]interface{}{
148 | 				{10, 2.4, 3.1},
149 | 				{12, 2.9, 7.5},
150 | 				{12, 2.8, 5.9},
151 | 				{13, 3.9, 13.4},
152 | 			})
153 | 		require.NoError(t, err)
154 | 		sorted, err := b.SortByCol(0)
155 | 		assert.Nil(t, err)
156 | 		assert.EqualValues(t, expected.String(), sorted.String())
157 | 	})
158 | 
159 | 	t.Run("empty bow", func(t *testing.T) {
160 | 		b, err := NewBowFromRowBasedInterfaces(
161 | 			[]string{"time", "a"},
162 | 			[]Type{Int64, Float64},
163 | 			[][]interface{}{})
164 | 		require.NoError(t, err)
165 | 		expected := b
166 | 		sorted, err := b.SortByCol(0)
167 | 		assert.Nil(t, err)
168 | 		assert.EqualValues(t, expected.String(), sorted.String())
169 | 	})
170 | 
171 | 	t.Run("with metadata", func(t *testing.T) {
172 | 		b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
173 | 			NewSeries("time", Int64, []int64{1, 3, 2}, nil),
174 | 			NewSeries("value", Float64, []float64{.1, .3, .2}, nil),
175 | 		)
176 | 		require.NoError(t, err)
177 | 
178 | 		expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}),
179 | 			NewSeries("time", Int64, []int64{1, 2, 3}, nil),
180 | 			NewSeries("value", Float64, []float64{.1, .2, .3}, nil),
181 | 		)
182 | 		require.NoError(t, err)
183 | 
184 | 		sorted, err := b.SortByCol(0)
185 | 		assert.NoError(t, err)
186 | 
187 | 		assert.Equal(t, expected.String(), sorted.String())
188 | 	})
189 | 
190 | 	t.Run("ERR: nil values in sort by column", func(t *testing.T) {
191 | 		b, err := NewBowFromRowBasedInterfaces(
192 | 			[]string{"time", "a", "b"},
193 | 			[]Type{Int64, Float64, Float64},
194 | 			[][]interface{}{
195 | 				{13, 3.9, 13.4},
196 | 				{12, 2.9, 7.5},
197 | 				{nil, 2.8, 5.9},
198 | 				{10, 2.4, 3.1},
199 | 			})
200 | 		require.NoError(t, err)
201 | 		_, err = b.SortByCol(0)
202 | 		assert.Error(t, err)
203 | 	})
204 | }
205 | 
206 | func BenchmarkBow_SortByCol(b *testing.B) {
207 | 	for rows := 10; rows <= 100000; rows *= 10 {
208 | 		data, err := NewBowFromParquet(fmt.Sprintf(
209 | 			"%sbow1-%d-rows.parquet", benchmarkBowsDirPath, rows), false)
210 | 		require.NoError(b, err)
211 | 
212 | 		b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) {
213 | 			for n := 0; n < b.N; n++ {
214 | 				_, err = data.SortByCol(1)
215 | 				require.NoError(b, err)
216 | 			}
217 | 		})
218 | 	}
219 | }
220 | 


--------------------------------------------------------------------------------
/bowstring.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strings"
 6 | 	"text/tabwriter"
 7 | )
 8 | 
 9 | // String returns a formatted representation of the Bow.
10 | func (b *bow) String() string {
11 | 	if b.NumCols() == 0 {
12 | 		return ""
13 | 	}
14 | 
15 | 	w := new(tabwriter.Writer)
16 | 	writer := new(strings.Builder)
17 | 	// tabs will be replaced by two spaces by formatter
18 | 	w.Init(writer, 0, 4, 2, ' ', 0)
19 | 
20 | 	var cells []string
21 | 	for colIndex := 0; colIndex < b.NumCols(); colIndex++ {
22 | 		cells = append(cells, fmt.Sprintf(
23 | 			"%v", fmt.Sprintf(
24 | 				"%s:%v", b.Schema().Field(colIndex).Name, b.ColumnType(colIndex))))
25 | 	}
26 | 	_, err := fmt.Fprintln(w, strings.Join(cells, "\t"))
27 | 	if err != nil {
28 | 		panic(err)
29 | 	}
30 | 
31 | 	for row := range b.GetRowsChan() {
32 | 		cells = []string{}
33 | 		for colIndex := 0; colIndex < b.NumCols(); colIndex++ {
34 | 			cells = append(cells, fmt.Sprintf("%v", row[b.Schema().Field(colIndex).Name]))
35 | 		}
36 | 		if _, err = fmt.Fprintln(w, strings.Join(cells, "\t")); err != nil {
37 | 			panic(err)
38 | 		}
39 | 	}
40 | 
41 | 	if b.Metadata().Len() > 0 {
42 | 		if _, err = fmt.Fprintf(w, "metadata: %+v\n", b.Metadata()); err != nil {
43 | 			panic(err)
44 | 		}
45 | 	}
46 | 
47 | 	if err = w.Flush(); err != nil {
48 | 		panic(err)
49 | 	}
50 | 
51 | 	return writer.String()
52 | }
53 | 


--------------------------------------------------------------------------------
/bowtypes.go:
--------------------------------------------------------------------------------
  1 | package bow
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/apache/arrow/go/v8/arrow"
  7 | )
  8 | 
  9 | type Type int
 10 | 
 11 | // How to add a Type:
 12 | // - Seek corresponding arrow.DataType and add it in `mapBowToArrowTypes`
 13 | // - add a convert function with desired logic and add case in other conversion func
 14 | // - add necessary case in buffer file
 15 | // - complete GetValue bow method
 16 | 
 17 | const (
 18 | 	// Unknown is placed first to be the default when allocating Type or []Type.
 19 | 	Unknown = Type(iota)
 20 | 
 21 | 	// Float64 and following types are native arrow type supported by bow.
 22 | 	Float64
 23 | 	Int64
 24 | 	Boolean
 25 | 	String
 26 | 
 27 | 	// InputDependent is used in aggregations when the output type is dependent on the input type.
 28 | 	InputDependent
 29 | 
 30 | 	// IteratorDependent is used in aggregations when the output type is dependent on the iterator type.
 31 | 	IteratorDependent
 32 | )
 33 | 
 34 | var (
 35 | 	mapBowToArrowTypes = map[Type]arrow.DataType{
 36 | 		Float64: arrow.PrimitiveTypes.Float64,
 37 | 		Int64:   arrow.PrimitiveTypes.Int64,
 38 | 		Boolean: arrow.FixedWidthTypes.Boolean,
 39 | 		String:  arrow.BinaryTypes.String,
 40 | 	}
 41 | 	mapArrowNameToBowTypes = func() map[string]Type {
 42 | 		res := make(map[string]Type)
 43 | 		for bowType, arrowDataType := range mapBowToArrowTypes {
 44 | 			res[arrowDataType.Name()] = bowType
 45 | 		}
 46 | 		return res
 47 | 	}()
 48 | 	mapArrowFingerprintToBowTypes = func() map[string]Type {
 49 | 		res := make(map[string]Type)
 50 | 		for bowType, arrowDataType := range mapBowToArrowTypes {
 51 | 			res[arrowDataType.Fingerprint()] = bowType
 52 | 		}
 53 | 		return res
 54 | 	}()
 55 | 	allType = func() []Type {
 56 | 		res := make([]Type, InputDependent-1)
 57 | 		for typ := Type(1); typ < InputDependent; typ++ {
 58 | 			res[typ-1] = typ
 59 | 		}
 60 | 		return res
 61 | 	}()
 62 | )
 63 | 
 64 | func (t Type) Convert(input interface{}) interface{} {
 65 | 	var output interface{}
 66 | 	var ok bool
 67 | 	switch t {
 68 | 	case Float64:
 69 | 		output, ok = ToFloat64(input)
 70 | 	case Int64:
 71 | 		output, ok = ToInt64(input)
 72 | 	case Boolean:
 73 | 		output, ok = ToBoolean(input)
 74 | 	case String:
 75 | 		output, ok = ToString(input)
 76 | 	}
 77 | 	if ok {
 78 | 		return output
 79 | 	}
 80 | 	return nil
 81 | }
 82 | 
 83 | // IsSupported ensures that the Type t is currently supported by Bow and matches a convertible concrete type.
 84 | func (t Type) IsSupported() bool {
 85 | 	_, ok := mapBowToArrowTypes[t]
 86 | 	return ok
 87 | }
 88 | 
 89 | // String returns the string representation of the Type t.
 90 | func (t Type) String() string {
 91 | 	at, ok := mapBowToArrowTypes[t]
 92 | 	if !ok {
 93 | 		return "undefined"
 94 | 	}
 95 | 	return fmt.Sprintf("%s", at)
 96 | }
 97 | 
 98 | func getBowTypeFromArrowFingerprint(fingerprint string) Type {
 99 | 	typ, ok := mapArrowFingerprintToBowTypes[fingerprint]
100 | 	if !ok {
101 | 		return Unknown
102 | 	}
103 | 	return typ
104 | }
105 | 
106 | func getBowTypeFromArrowName(name string) Type {
107 | 	typ, ok := mapArrowNameToBowTypes[name]
108 | 	if !ok {
109 | 		return Unknown
110 | 	}
111 | 	return typ
112 | }
113 | 
114 | // GetAllTypes returns all Bow types.
115 | func GetAllTypes() []Type {
116 | 	res := make([]Type, len(allType))
117 | 	copy(res, allType)
118 | 	return res
119 | }
120 | 


--------------------------------------------------------------------------------
/bowtypes_test.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | )
 8 | 
 9 | func TestGetAllTypes(t *testing.T) {
10 | 	cp := GetAllTypes()
11 | 	cp[0] = 10
12 | 	assert.NotEqual(t, allType, cp)
13 | }
14 | 


--------------------------------------------------------------------------------
/bowvalues.go:
--------------------------------------------------------------------------------
 1 | package bow
 2 | 
 3 | import "github.com/apache/arrow/go/v8/arrow/array"
 4 | 
 5 | func int64Values(arr *array.Int64) []int64 {
 6 | 	return arr.Int64Values()
 7 | }
 8 | 
 9 | func float64Values(arr *array.Float64) []float64 {
10 | 	return arr.Float64Values()
11 | }
12 | 
13 | func booleanValues(arr *array.Boolean) []bool {
14 | 	var res = make([]bool, arr.Len())
15 | 	for i := range res {
16 | 		res[i] = arr.Value(i)
17 | 	}
18 | 	return res
19 | }
20 | 
21 | func stringValues(arr *array.String) []string {
22 | 	var res = make([]string, arr.Len())
23 | 	for i := range res {
24 | 		res[i] = arr.Value(i)
25 | 	}
26 | 	return res
27 | }
28 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/metronlab/bow
 2 | 
 3 | go 1.18
 4 | 
 5 | require (
 6 | 	github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect
 7 | 	github.com/apache/arrow/go/v8 v8.0.1
 8 | 	github.com/apache/thrift v0.16.0 // indirect
 9 | 	github.com/davecgh/go-spew v1.1.1 // indirect
10 | 	github.com/google/uuid v1.3.0
11 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
12 | 	github.com/stretchr/testify v1.8.2
13 | 	github.com/xitongsys/parquet-go v1.6.2
14 | 	github.com/xitongsys/parquet-go-source v0.0.0-20220723234337-052319f3f36b
15 | 	golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect
16 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
17 | )
18 | 
19 | require (
20 | 	github.com/andybalholm/brotli v1.0.4 // indirect
21 | 	github.com/goccy/go-json v0.9.10 // indirect
22 | 	github.com/golang/snappy v0.0.4 // indirect
23 | 	github.com/klauspost/asmfmt v1.3.2 // indirect
24 | 	github.com/klauspost/compress v1.15.9 // indirect
25 | 	github.com/klauspost/cpuid/v2 v2.1.0 // indirect
26 | 	github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
27 | 	github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
28 | 	github.com/pierrec/lz4/v4 v4.1.15 // indirect
29 | 	github.com/zeebo/xxh3 v1.0.2 // indirect
30 | 	golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect
31 | 	golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect
32 | 	golang.org/x/tools v0.1.11 // indirect
33 | )
34 | 


--------------------------------------------------------------------------------
/rolling/aggregation.go:
--------------------------------------------------------------------------------
  1 | package rolling
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/metronlab/bow"
  7 | 	"github.com/metronlab/bow/rolling/transformation"
  8 | )
  9 | 
 10 | // ColAggregation is a set of methods to aggregate and transform a Window.
 11 | type ColAggregation interface {
 12 | 	// InputName returns the name of the input column.
 13 | 	InputName() string
 14 | 	// InputIndex returns the index of the input column.
 15 | 	InputIndex() int
 16 | 	// SetInputIndex sets the index of the input column.
 17 | 	SetInputIndex(int)
 18 | 
 19 | 	// OutputName returns the name of the output column.
 20 | 	OutputName() string
 21 | 	// RenameOutput returns a copy of the ColAggregation with a new output column name.
 22 | 	RenameOutput(string) ColAggregation
 23 | 	// NeedInclusiveWindow returns true if the ColAggregation needs to have inclusive windows.
 24 | 	NeedInclusiveWindow() bool
 25 | 
 26 | 	// Type returns the return type of the ColAggregation.
 27 | 	Type() bow.Type
 28 | 	// GetReturnType returns the return type of the ColAggregation depending on an input and an iterator type.
 29 | 	GetReturnType(inputType, iteratorType bow.Type) bow.Type
 30 | 
 31 | 	// Func returns the ColAggregationFunc of the ColAggregation.
 32 | 	Func() ColAggregationFunc
 33 | 
 34 | 	// Transformations returns the transformation functions of the ColAggregation.
 35 | 	Transformations() []transformation.Func
 36 | 	// SetTransformations returns a copy of the ColAggregation with new transformations functions.
 37 | 	SetTransformations(...transformation.Func) ColAggregation
 38 | }
 39 | 
 40 | type colAggregation struct {
 41 | 	inputName           string
 42 | 	inputIndex          int
 43 | 	needInclusiveWindow bool
 44 | 
 45 | 	aggregationFn     ColAggregationFunc
 46 | 	transformationFns []transformation.Func
 47 | 
 48 | 	outputName string
 49 | 	typ        bow.Type
 50 | }
 51 | 
 52 | // NewColAggregation returns a new ColAggregation.
 53 | func NewColAggregation(inputName string, needInclusiveWindow bool, typ bow.Type, fn ColAggregationFunc) ColAggregation {
 54 | 	return &colAggregation{
 55 | 		inputName:           inputName,
 56 | 		inputIndex:          -1,
 57 | 		needInclusiveWindow: needInclusiveWindow,
 58 | 		aggregationFn:       fn,
 59 | 		typ:                 typ,
 60 | 	}
 61 | }
 62 | 
 63 | type ColAggregationConstruct func(colName string) ColAggregation
 64 | type ColAggregationFunc func(colIndex int, w Window) (interface{}, error)
 65 | 
 66 | func (a *colAggregation) InputName() string {
 67 | 	return a.inputName
 68 | }
 69 | 
 70 | func (a *colAggregation) InputIndex() int {
 71 | 	return a.inputIndex
 72 | }
 73 | 
 74 | func (a *colAggregation) SetInputIndex(i int) {
 75 | 	a.inputIndex = i
 76 | }
 77 | 
 78 | func (a *colAggregation) OutputName() string {
 79 | 	return a.outputName
 80 | }
 81 | 
 82 | func (a *colAggregation) RenameOutput(name string) ColAggregation {
 83 | 	aCopy := *a
 84 | 	aCopy.outputName = name
 85 | 	return &aCopy
 86 | }
 87 | 
 88 | func (a *colAggregation) NeedInclusiveWindow() bool {
 89 | 	return a.needInclusiveWindow
 90 | }
 91 | 
 92 | func (a *colAggregation) Type() bow.Type {
 93 | 	return a.typ
 94 | }
 95 | 
 96 | func (a *colAggregation) Func() ColAggregationFunc {
 97 | 	return a.aggregationFn
 98 | }
 99 | 
100 | func (a *colAggregation) Transformations() []transformation.Func {
101 | 	return a.transformationFns
102 | }
103 | 
104 | func (a *colAggregation) SetTransformations(transformations ...transformation.Func) ColAggregation {
105 | 	aCopy := *a
106 | 	aCopy.transformationFns = transformations
107 | 	return &aCopy
108 | }
109 | 
110 | func (a *colAggregation) GetReturnType(inputType, iteratorType bow.Type) bow.Type {
111 | 	switch a.Type() {
112 | 	case bow.Int64, bow.Float64, bow.Boolean, bow.String:
113 | 		return a.Type()
114 | 	case bow.InputDependent:
115 | 		return inputType
116 | 	case bow.IteratorDependent:
117 | 		return iteratorType
118 | 	default:
119 | 		panic(fmt.Errorf("invalid return type %v", a.Type()))
120 | 	}
121 | }
122 | 
123 | func (r *intervalRolling) Aggregate(aggrs ...ColAggregation) Rolling {
124 | 	if r.err != nil {
125 | 		return r
126 | 	}
127 | 
128 | 	rCopy := *r
129 | 	newIntervalCol, aggrs, err := rCopy.indexedAggregations(aggrs)
130 | 	if err != nil {
131 | 		return rCopy.setError(fmt.Errorf("intervalRolling.indexedAggregations: %w", err))
132 | 	}
133 | 
134 | 	b, err := rCopy.aggregateWindows(aggrs)
135 | 	if err != nil {
136 | 		return rCopy.setError(fmt.Errorf("intervalRolling.aggregateWindows: %w", err))
137 | 	}
138 | 
139 | 	newR, err := newIntervalRolling(b, newIntervalCol, rCopy.interval, rCopy.options)
140 | 	if err != nil {
141 | 		return rCopy.setError(fmt.Errorf("newIntervalRolling: %w", err))
142 | 	}
143 | 
144 | 	return newR
145 | }
146 | 
147 | func (r *intervalRolling) indexedAggregations(aggrs []ColAggregation) (int, []ColAggregation, error) {
148 | 	if len(aggrs) == 0 {
149 | 		return -1, nil, fmt.Errorf("at least one column aggregation is required")
150 | 	}
151 | 
152 | 	newIntervalCol := -1
153 | 	for i := range aggrs {
154 | 		isInterval, err := r.validateAggregation(aggrs[i], i)
155 | 		if err != nil {
156 | 			return -1, nil, err
157 | 		}
158 | 		if isInterval {
159 | 			newIntervalCol = i
160 | 		}
161 | 	}
162 | 
163 | 	if newIntervalCol == -1 {
164 | 		return -1, nil, fmt.Errorf(
165 | 			"must keep interval column '%s'", r.bow.ColumnName(r.intervalColIndex))
166 | 	}
167 | 
168 | 	return newIntervalCol, aggrs, nil
169 | }
170 | 
171 | func (r *intervalRolling) validateAggregation(aggr ColAggregation, newIndex int) (isInterval bool, err error) {
172 | 	if aggr.InputName() == "" {
173 | 		return false, fmt.Errorf("aggregation %d has no column name", newIndex)
174 | 	}
175 | 
176 | 	readIndex, err := r.bow.ColumnIndex(aggr.InputName())
177 | 	if err != nil {
178 | 		return false, err
179 | 	}
180 | 
181 | 	aggr.SetInputIndex(readIndex)
182 | 
183 | 	if aggr.NeedInclusiveWindow() {
184 | 		r.options.Inclusive = true
185 | 	}
186 | 
187 | 	return readIndex == r.intervalColIndex, nil
188 | }
189 | 
190 | func (r *intervalRolling) aggregateWindows(aggrs []ColAggregation) (bow.Bow, error) {
191 | 	series := make([]bow.Series, len(aggrs))
192 | 
193 | 	for colIndex, aggr := range aggrs {
194 | 		rCopy := *r
195 | 		typ := aggr.GetReturnType(
196 | 			rCopy.bow.ColumnType(aggr.InputIndex()),
197 | 			rCopy.bow.ColumnType(rCopy.intervalColIndex))
198 | 		buf := bow.NewBuffer(rCopy.numWindows, typ)
199 | 
200 | 		for rCopy.HasNext() {
201 | 			winIndex, w, err := rCopy.Next()
202 | 			if err != nil {
203 | 				return nil, err
204 | 			}
205 | 
206 | 			var val interface{}
207 | 			if !aggr.NeedInclusiveWindow() && w.IsInclusive {
208 | 				val, err = aggr.Func()(aggr.InputIndex(), (*w).UnsetInclusive())
209 | 			} else {
210 | 				val, err = aggr.Func()(aggr.InputIndex(), *w)
211 | 			}
212 | 			if err != nil {
213 | 				return nil, err
214 | 			}
215 | 
216 | 			for _, trans := range aggr.Transformations() {
217 | 				val, err = trans(val)
218 | 				if err != nil {
219 | 					return nil, err
220 | 				}
221 | 			}
222 | 
223 | 			if val == nil {
224 | 				continue
225 | 			}
226 | 
227 | 			buf.SetOrDrop(winIndex, val)
228 | 		}
229 | 
230 | 		if aggr.OutputName() == "" {
231 | 			series[colIndex] = bow.NewSeriesFromBuffer(rCopy.bow.ColumnName(aggr.InputIndex()), buf)
232 | 		} else {
233 | 			series[colIndex] = bow.NewSeriesFromBuffer(aggr.OutputName(), buf)
234 | 		}
235 | 	}
236 | 
237 | 	return bow.NewBow(series...)
238 | }
239 | 


--------------------------------------------------------------------------------
/rolling/aggregation/XXXbenchmarks_test.go:
--------------------------------------------------------------------------------
  1 | package aggregation
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math/rand"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/metronlab/bow"
  9 | 	"github.com/metronlab/bow/rolling"
 10 | 	"github.com/stretchr/testify/require"
 11 | )
 12 | 
 13 | // BenchSize of 1e8 triggers out of memory on a 16Go mem computer
 14 | var BenchSize int64
 15 | 
 16 | func BenchmarkBow(b *testing.B) {
 17 | 	for _, BenchSize = range []int64{1, 1e3, 1e5} {
 18 | 		b.Run(fmt.Sprintf("%d_rows", BenchSize), benchmarkBow)
 19 | 	}
 20 | }
 21 | 
 22 | func benchmarkBow(b *testing.B) {
 23 | 	var benchBow bow.Bow
 24 | 	var err error
 25 | 
 26 | 	rows := make([][]interface{}, BenchSize)
 27 | 	rand.Seed(42)
 28 | 	for i := int64(0); i < BenchSize; i++ {
 29 | 		rows[i] = []interface{}{i, rand.Float64()}
 30 | 	}
 31 | 
 32 | 	b.Run("NewBowFromRowBasedInterfaces", func(b *testing.B) {
 33 | 		for n := 0; n < b.N; n++ {
 34 | 			benchBow, err = bow.NewBowFromRowBasedInterfaces(
 35 | 				[]string{timeCol, valueCol},
 36 | 				[]bow.Type{bow.Int64, bow.Float64},
 37 | 				rows,
 38 | 			)
 39 | 			require.NoError(b, err)
 40 | 		}
 41 | 	})
 42 | 
 43 | 	columns := make([][]interface{}, 2)
 44 | 	rand.Seed(42)
 45 | 
 46 | 	columns[0] = func(size int64) []interface{} {
 47 | 		timeCol := make([]interface{}, size)
 48 | 		for i := int64(0); i < size; i++ {
 49 | 			timeCol[i] = i
 50 | 		}
 51 | 		return timeCol
 52 | 	}(BenchSize)
 53 | 
 54 | 	columns[1] = func(size int64) []interface{} {
 55 | 		valueCol := make([]interface{}, size)
 56 | 		for i := int64(0); i < size; i++ {
 57 | 			valueCol[i] = rand.Float64()
 58 | 		}
 59 | 		return valueCol
 60 | 	}(BenchSize)
 61 | 
 62 | 	b.Run("NewBowFromColBasedInterfaces", func(b *testing.B) {
 63 | 		for n := 0; n < b.N; n++ {
 64 | 			benchBow, err = bow.NewBowFromColBasedInterfaces(
 65 | 				[]string{timeCol, valueCol},
 66 | 				[]bow.Type{bow.Int64, bow.Float64},
 67 | 				columns,
 68 | 			)
 69 | 			require.NoError(b, err)
 70 | 		}
 71 | 	})
 72 | 
 73 | 	series := make([]bow.Series, 2)
 74 | 	rand.Seed(42)
 75 | 
 76 | 	series[0] = func(size int64) bow.Series {
 77 | 		buf := bow.NewBuffer(int(size), bow.Int64)
 78 | 		for i := int64(0); i < size; i++ {
 79 | 			buf.SetOrDrop(int(i), i)
 80 | 		}
 81 | 		return bow.NewSeriesFromBuffer(timeCol, buf)
 82 | 	}(BenchSize)
 83 | 
 84 | 	series[1] = func(size int64) bow.Series {
 85 | 		buf := bow.NewBuffer(int(size), bow.Float64)
 86 | 		for i := int64(0); i < size; i++ {
 87 | 			buf.SetOrDrop(int(i), rand.Float64())
 88 | 		}
 89 | 		return bow.NewSeriesFromBuffer(valueCol, buf)
 90 | 	}(BenchSize)
 91 | 
 92 | 	b.Run("NewBow with validity bitmap", func(b *testing.B) {
 93 | 		for n := 0; n < b.N; n++ {
 94 | 			benchBow, err = bow.NewBow(series...)
 95 | 			require.NoError(b, err)
 96 | 		}
 97 | 	})
 98 | 
 99 | 	series = make([]bow.Series, 2)
100 | 	rand.Seed(42)
101 | 
102 | 	series[0] = func(size int64) bow.Series {
103 | 		buf := bow.NewBuffer(int(size), bow.Int64)
104 | 		for i := int64(0); i < size; i++ {
105 | 			buf.Data.([]int64)[i] = i
106 | 		}
107 | 		return bow.NewSeries(timeCol, bow.Int64, buf.Data, nil)
108 | 	}(BenchSize)
109 | 
110 | 	series[1] = func(size int64) bow.Series {
111 | 		buf := bow.NewBuffer(int(size), bow.Float64)
112 | 		for i := int64(0); i < size; i++ {
113 | 			buf.Data.([]float64)[i] = rand.Float64()
114 | 		}
115 | 		return bow.NewSeries(valueCol, bow.Float64, buf.Data, nil)
116 | 	}(BenchSize)
117 | 
118 | 	b.Run("NewBow without validity bitmap", func(b *testing.B) {
119 | 		for n := 0; n < b.N; n++ {
120 | 			benchBow, err = bow.NewBow(series...)
121 | 			require.NoError(b, err)
122 | 		}
123 | 	})
124 | 
125 | 	var r rolling.Rolling
126 | 	b.Run("rolling.IntervalRolling", func(b *testing.B) {
127 | 		for n := 0; n < b.N; n++ {
128 | 			r, err = rolling.IntervalRolling(benchBow, timeCol, 10, rolling.Options{})
129 | 			require.NoError(b, err)
130 | 		}
131 | 	})
132 | 
133 | 	b.Run("rolling.Rolling.Aggregate", func(b *testing.B) {
134 | 		for n := 0; n < b.N; n++ {
135 | 			_, err = r.Aggregate(WindowStart(timeCol), ArithmeticMean(valueCol)).Bow()
136 | 			require.NoError(b, err)
137 | 		}
138 | 	})
139 | }
140 | 


--------------------------------------------------------------------------------
/rolling/aggregation/arithmeticmean.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func ArithmeticMean(col string) rolling.ColAggregation {
 9 | 	return rolling.NewColAggregation(col, false, bow.Float64,
10 | 		func(col int, w rolling.Window) (interface{}, error) {
11 | 			if w.Bow.NumRows() == 0 {
12 | 				return nil, nil
13 | 			}
14 | 
15 | 			var sum float64
16 | 			var count int
17 | 			for i := 0; i < w.Bow.NumRows(); i++ {
18 | 				value, ok := w.Bow.GetFloat64(col, i)
19 | 				if !ok {
20 | 					continue
21 | 				}
22 | 				sum += value
23 | 				count++
24 | 			}
25 | 			if count == 0 {
26 | 				return nil, nil
27 | 			}
28 | 			return sum / float64(count), nil
29 | 		})
30 | }
31 | 


--------------------------------------------------------------------------------
/rolling/aggregation/arithmeticmean_test.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/metronlab/bow"
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func TestArithmeticMean(t *testing.T) {
11 | 	runTestCases(t, ArithmeticMean, nil, []testCase{
12 | 		{
13 | 			name:      "empty",
14 | 			testedBow: emptyBow,
15 | 			expectedBow: func() bow.Bow {
16 | 				b, err := bow.NewBow(
17 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
18 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
19 | 				)
20 | 				assert.NoError(t, err)
21 | 				return b
22 | 			}(),
23 | 		},
24 | 		{
25 | 			name:      "sparse",
26 | 			testedBow: sparseFloatBow,
27 | 			expectedBow: func() bow.Bow {
28 | 				b, err := bow.NewBowFromRowBasedInterfaces(
29 | 					[]string{timeCol, valueCol},
30 | 					[]bow.Type{bow.Int64, bow.Float64},
31 | 					[][]interface{}{
32 | 						{10, 10.},
33 | 						{20, nil},
34 | 						{30, nil},
35 | 						{40, 10.},
36 | 						{50, 15.},
37 | 						{60, 15.},
38 | 					})
39 | 				assert.NoError(t, err)
40 | 				return b
41 | 			}(),
42 | 		},
43 | 		{
44 | 			name:      "sparse bool",
45 | 			testedBow: sparseBoolBow,
46 | 			expectedBow: func() bow.Bow {
47 | 				b, err := bow.NewBowFromRowBasedInterfaces(
48 | 					[]string{timeCol, valueCol},
49 | 					[]bow.Type{bow.Int64, bow.Float64},
50 | 					[][]interface{}{
51 | 						{10, 1.},
52 | 						{20, nil},
53 | 						{30, nil},
54 | 						{40, 0.},
55 | 						{50, 0.5},
56 | 						{60, 0.5},
57 | 					})
58 | 				assert.NoError(t, err)
59 | 				return b
60 | 			}(),
61 | 		},
62 | 		{
63 | 			name:      "sparse string",
64 | 			testedBow: sparseStringBow,
65 | 			expectedBow: func() bow.Bow {
66 | 				b, err := bow.NewBowFromRowBasedInterfaces(
67 | 					[]string{timeCol, valueCol},
68 | 					[]bow.Type{bow.Int64, bow.Float64},
69 | 					[][]interface{}{
70 | 						{10, 10.},
71 | 						{20, nil},
72 | 						{30, nil},
73 | 						{40, 10.},
74 | 						{50, 15.},
75 | 						{60, 20.},
76 | 					})
77 | 				assert.NoError(t, err)
78 | 				return b
79 | 			}(),
80 | 		},
81 | 	})
82 | }
83 | 


--------------------------------------------------------------------------------
/rolling/aggregation/core_test.go:
--------------------------------------------------------------------------------
  1 | package aggregation
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/metronlab/bow"
  8 | 	"github.com/metronlab/bow/rolling"
  9 | 	"github.com/metronlab/bow/rolling/transformation"
 10 | 	"github.com/stretchr/testify/assert"
 11 | )
 12 | 
 13 | const (
 14 | 	timeCol  = "time"
 15 | 	valueCol = "value"
 16 | )
 17 | 
 18 | type testCase struct {
 19 | 	name        string
 20 | 	testedBow   bow.Bow
 21 | 	expectedBow bow.Bow
 22 | }
 23 | 
 24 | var (
 25 | 	emptyBow, _ = bow.NewBow(
 26 | 		bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
 27 | 		bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
 28 | 	)
 29 | 	nilBow, _ = bow.NewBowFromRowBasedInterfaces(
 30 | 		[]string{timeCol, valueCol},
 31 | 		[]bow.Type{bow.Int64, bow.Float64},
 32 | 		[][]interface{}{
 33 | 			{10, nil},
 34 | 			{11, nil},
 35 | 			{20, nil},
 36 | 		})
 37 | 	sparseFloatBow, _ = bow.NewBowFromRowBasedInterfaces(
 38 | 		[]string{timeCol, valueCol},
 39 | 		[]bow.Type{bow.Int64, bow.Float64},
 40 | 		[][]interface{}{
 41 | 			{10, 10.}, // partially valid window
 42 | 			{11, nil},
 43 | 			{20, nil}, // only invalid window
 44 | 
 45 | 			// empty window
 46 | 
 47 | 			{40, nil}, // partially valid with start of window invalid
 48 | 			{41, 10.},
 49 | 			{50, 10.}, // valid with two values on start of window
 50 | 			{51, 20.},
 51 | 			{61, 10.}, // valid with two values NOT on start of window
 52 | 			{69, 20.},
 53 | 		})
 54 | 	sparseBoolBow, _ = bow.NewBowFromRowBasedInterfaces(
 55 | 		[]string{timeCol, valueCol},
 56 | 		[]bow.Type{bow.Int64, bow.Boolean},
 57 | 		[][]interface{}{
 58 | 			{10, true}, // partially valid window
 59 | 			{11, nil},
 60 | 			{20, nil}, // only invalid window
 61 | 
 62 | 			// empty window
 63 | 
 64 | 			{40, nil}, // partially valid with start of window invalid
 65 | 			{41, false},
 66 | 			{50, true}, // valid with two values on start of window
 67 | 			{51, false},
 68 | 			{61, true}, // valid with two values NOT on start of window
 69 | 			{69, false},
 70 | 		})
 71 | 	sparseStringBow, _ = bow.NewBowFromRowBasedInterfaces(
 72 | 		[]string{timeCol, valueCol},
 73 | 		[]bow.Type{bow.Int64, bow.String},
 74 | 		[][]interface{}{
 75 | 			{10, "10."}, // partially valid window
 76 | 			{11, nil},
 77 | 			{20, nil}, // only invalid window
 78 | 
 79 | 			// empty window
 80 | 
 81 | 			{40, nil}, // partially valid with start of window invalid
 82 | 			{41, "10."},
 83 | 			{50, "10."}, // valid with two values on start of window
 84 | 			{51, "20."},
 85 | 			{61, "test"}, // valid with two values NOT on start of window
 86 | 			{69, "20."},
 87 | 		})
 88 | )
 89 | 
 90 | func runTestCases(t *testing.T, aggrConstruct rolling.ColAggregationConstruct,
 91 | 	aggrTransforms []transformation.Func, testCases []testCase) {
 92 | 	for _, testCase := range testCases {
 93 | 		t.Run(testCase.name, func(t *testing.T) {
 94 | 			r, err := rolling.IntervalRolling(testCase.testedBow, timeCol, 10, rolling.Options{})
 95 | 			assert.NoError(t, err)
 96 | 			aggregated, err := r.
 97 | 				Aggregate(
 98 | 					WindowStart(timeCol),
 99 | 					aggrConstruct(valueCol).SetTransformations(aggrTransforms...)).
100 | 				Bow()
101 | 			assert.NoError(t, err)
102 | 			assert.NotNil(t, aggregated)
103 | 
104 | 			assert.Equal(t, true, aggregated.Equal(testCase.expectedBow),
105 | 				fmt.Sprintf("expect:\n%v\nhave:\n%v", testCase.expectedBow, aggregated))
106 | 		})
107 | 	}
108 | }
109 | 


--------------------------------------------------------------------------------
/rolling/aggregation/count.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func Count(col string) rolling.ColAggregation {
 9 | 	return rolling.NewColAggregation(col, false, bow.Int64,
10 | 		func(col int, w rolling.Window) (interface{}, error) {
11 | 			var count int64
12 | 			for i := 0; i < w.Bow.NumRows(); i++ {
13 | 				v := w.Bow.GetValue(col, i)
14 | 				if v != nil {
15 | 					count++
16 | 				}
17 | 			}
18 | 			return count, nil
19 | 		})
20 | }
21 | 


--------------------------------------------------------------------------------
/rolling/aggregation/count_test.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/metronlab/bow"
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func TestCount(t *testing.T) {
11 | 	runTestCases(t, Count, nil, []testCase{
12 | 		{
13 | 			name:      "empty",
14 | 			testedBow: emptyBow,
15 | 			expectedBow: func() bow.Bow {
16 | 				b, err := bow.NewBow(
17 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
18 | 					bow.NewSeries(valueCol, bow.Int64, []int64{}, nil),
19 | 				)
20 | 				assert.NoError(t, err)
21 | 				return b
22 | 			}(),
23 | 		},
24 | 		{
25 | 			name:      "sparse",
26 | 			testedBow: sparseFloatBow,
27 | 			expectedBow: func() bow.Bow {
28 | 				b, err := bow.NewBowFromRowBasedInterfaces(
29 | 					[]string{timeCol, valueCol},
30 | 					[]bow.Type{bow.Int64, bow.Int64},
31 | 					[][]interface{}{
32 | 						{10, 1},
33 | 						{20, 0},
34 | 						{30, 0},
35 | 						{40, 1},
36 | 						{50, 2},
37 | 						{60, 2},
38 | 					})
39 | 				assert.NoError(t, err)
40 | 				return b
41 | 			}(),
42 | 		},
43 | 	})
44 | }
45 | 


--------------------------------------------------------------------------------
/rolling/aggregation/firstlast.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func First(col string) rolling.ColAggregation {
 9 | 	return rolling.NewColAggregation(col, false, bow.InputDependent,
10 | 		func(col int, w rolling.Window) (interface{}, error) {
11 | 			if w.Bow.NumRows() == 0 {
12 | 				return nil, nil
13 | 			}
14 | 
15 | 			value, irow := w.Bow.GetNextValue(col, 0)
16 | 			if irow == -1 {
17 | 				return nil, nil
18 | 			}
19 | 			return value, nil
20 | 		})
21 | }
22 | 
23 | func Last(col string) rolling.ColAggregation {
24 | 	return rolling.NewColAggregation(col, false, bow.InputDependent,
25 | 		func(col int, w rolling.Window) (interface{}, error) {
26 | 			if w.Bow.NumRows() == 0 {
27 | 				return nil, nil
28 | 			}
29 | 
30 | 			value, irow := w.Bow.GetPrevValue(col, w.Bow.NumRows()-1)
31 | 			if irow == -1 {
32 | 				return nil, nil
33 | 			}
34 | 			return value, nil
35 | 		})
36 | }
37 | 


--------------------------------------------------------------------------------
/rolling/aggregation/firstlast_test.go:
--------------------------------------------------------------------------------
  1 | package aggregation
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/metronlab/bow"
  7 | 	"github.com/stretchr/testify/assert"
  8 | )
  9 | 
 10 | func TestFirst(t *testing.T) {
 11 | 	runTestCases(t, First, nil, []testCase{
 12 | 		{
 13 | 			name:      "empty",
 14 | 			testedBow: emptyBow,
 15 | 			expectedBow: func() bow.Bow {
 16 | 				b, err := bow.NewBow(
 17 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
 18 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
 19 | 				)
 20 | 				assert.NoError(t, err)
 21 | 				return b
 22 | 			}(),
 23 | 		},
 24 | 		{
 25 | 			name:      "sparse",
 26 | 			testedBow: sparseFloatBow,
 27 | 			expectedBow: func() bow.Bow {
 28 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 29 | 					[]string{timeCol, valueCol},
 30 | 					[]bow.Type{bow.Int64, bow.Float64},
 31 | 					[][]interface{}{
 32 | 						{10, 10.},
 33 | 						{20, nil},
 34 | 						{30, nil},
 35 | 						{40, 10.},
 36 | 						{50, 10.},
 37 | 						{60, 10.},
 38 | 					})
 39 | 				assert.NoError(t, err)
 40 | 				return b
 41 | 			}(),
 42 | 		},
 43 | 		{
 44 | 			name:      "sparse bool",
 45 | 			testedBow: sparseBoolBow,
 46 | 			expectedBow: func() bow.Bow {
 47 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 48 | 					[]string{timeCol, valueCol},
 49 | 					[]bow.Type{bow.Int64, bow.Boolean},
 50 | 					[][]interface{}{
 51 | 						{10, true},
 52 | 						{20, nil},
 53 | 						{30, nil},
 54 | 						{40, false},
 55 | 						{50, true},
 56 | 						{60, true},
 57 | 					})
 58 | 				assert.NoError(t, err)
 59 | 				return b
 60 | 			}(),
 61 | 		},
 62 | 		{
 63 | 			name:      "sparse string",
 64 | 			testedBow: sparseStringBow,
 65 | 			expectedBow: func() bow.Bow {
 66 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 67 | 					[]string{timeCol, valueCol},
 68 | 					[]bow.Type{bow.Int64, bow.String},
 69 | 					[][]interface{}{
 70 | 						{10, "10."},
 71 | 						{20, nil},
 72 | 						{30, nil},
 73 | 						{40, "10."},
 74 | 						{50, "10."},
 75 | 						{60, "test"},
 76 | 					})
 77 | 				assert.NoError(t, err)
 78 | 				return b
 79 | 			}(),
 80 | 		},
 81 | 	})
 82 | }
 83 | 
 84 | func TestLast(t *testing.T) {
 85 | 	runTestCases(t, Last, nil, []testCase{
 86 | 		{
 87 | 			name:      "empty",
 88 | 			testedBow: emptyBow,
 89 | 			expectedBow: func() bow.Bow {
 90 | 				b, err := bow.NewBow(
 91 | 					bow.NewSeries("time", bow.Int64, []int64{}, nil),
 92 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
 93 | 				)
 94 | 				assert.NoError(t, err)
 95 | 				return b
 96 | 			}(),
 97 | 		},
 98 | 		{
 99 | 			name:      "sparse float",
100 | 			testedBow: sparseFloatBow,
101 | 			expectedBow: func() bow.Bow {
102 | 				b, err := bow.NewBowFromRowBasedInterfaces(
103 | 					[]string{"time", valueCol},
104 | 					[]bow.Type{bow.Int64, bow.Float64},
105 | 					[][]interface{}{
106 | 						{10, 10.},
107 | 						{20, nil},
108 | 						{30, nil},
109 | 						{40, 10.},
110 | 						{50, 20.},
111 | 						{60, 20.},
112 | 					})
113 | 				assert.NoError(t, err)
114 | 				return b
115 | 			}(),
116 | 		},
117 | 		{
118 | 			name:      "sparse bool",
119 | 			testedBow: sparseBoolBow,
120 | 			expectedBow: func() bow.Bow {
121 | 				b, err := bow.NewBowFromRowBasedInterfaces(
122 | 					[]string{"time", valueCol},
123 | 					[]bow.Type{bow.Int64, bow.Boolean},
124 | 					[][]interface{}{
125 | 						{10, true},
126 | 						{20, nil},
127 | 						{30, nil},
128 | 						{40, false},
129 | 						{50, false},
130 | 						{60, false},
131 | 					})
132 | 				assert.NoError(t, err)
133 | 				return b
134 | 			}(),
135 | 		},
136 | 		{
137 | 			name:      "sparse string",
138 | 			testedBow: sparseStringBow,
139 | 			expectedBow: func() bow.Bow {
140 | 				b, err := bow.NewBowFromRowBasedInterfaces(
141 | 					[]string{"time", valueCol},
142 | 					[]bow.Type{bow.Int64, bow.String},
143 | 					[][]interface{}{
144 | 						{10, "10."},
145 | 						{20, nil},
146 | 						{30, nil},
147 | 						{40, "10."},
148 | 						{50, "20."},
149 | 						{60, "20."},
150 | 					})
151 | 				assert.NoError(t, err)
152 | 				return b
153 | 			}(),
154 | 		},
155 | 	})
156 | }
157 | 


--------------------------------------------------------------------------------
/rolling/aggregation/integral.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func IntegralTrapezoid(col string) rolling.ColAggregation {
 9 | 	return rolling.NewColAggregation(col, true, bow.Float64,
10 | 		func(colIndex int, w rolling.Window) (interface{}, error) {
11 | 			if w.Bow.NumRows() == 0 {
12 | 				return nil, nil
13 | 			}
14 | 
15 | 			var sum float64
16 | 			var ok bool
17 | 			t0, v0, rowIndex := w.Bow.GetNextFloat64s(w.IntervalColIndex, colIndex, 0)
18 | 			if rowIndex < 0 {
19 | 				return nil, nil
20 | 			}
21 | 
22 | 			for rowIndex >= 0 {
23 | 				t1, v1, nextRowIndex := w.Bow.GetNextFloat64s(w.IntervalColIndex, colIndex, rowIndex+1)
24 | 				if nextRowIndex < 0 {
25 | 					break
26 | 				}
27 | 
28 | 				sum += (v0 + v1) / 2 * (t1 - t0)
29 | 				ok = true
30 | 
31 | 				t0, v0, rowIndex = t1, v1, nextRowIndex
32 | 			}
33 | 			if !ok {
34 | 				return nil, nil
35 | 			}
36 | 			return sum, nil
37 | 		})
38 | }
39 | 
40 | func IntegralStep(col string) rolling.ColAggregation {
41 | 	return rolling.NewColAggregation(col, false, bow.Float64,
42 | 		func(colIndex int, w rolling.Window) (interface{}, error) {
43 | 			if w.Bow.NumRows() == 0 {
44 | 				return nil, nil
45 | 			}
46 | 			var sum float64
47 | 			var ok bool
48 | 			t0, v0, rowIndex := w.Bow.GetNextFloat64s(w.IntervalColIndex, colIndex, 0)
49 | 			for rowIndex >= 0 {
50 | 				t1, v1, nextRowIndex := w.Bow.GetNextFloat64s(w.IntervalColIndex, colIndex, rowIndex+1)
51 | 				if nextRowIndex < 0 {
52 | 					t1 = float64(w.LastValue)
53 | 				}
54 | 
55 | 				sum += v0 * (t1 - t0)
56 | 				ok = true
57 | 
58 | 				if nextRowIndex < 0 {
59 | 					break
60 | 				}
61 | 
62 | 				t0, v0, rowIndex = t1, v1, nextRowIndex
63 | 			}
64 | 			if !ok {
65 | 				return nil, nil
66 | 			}
67 | 			return sum, nil
68 | 		})
69 | }
70 | 


--------------------------------------------------------------------------------
/rolling/aggregation/integral_test.go:
--------------------------------------------------------------------------------
  1 | package aggregation
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/metronlab/bow"
  7 | 	"github.com/metronlab/bow/rolling/transformation"
  8 | 	"github.com/stretchr/testify/assert"
  9 | )
 10 | 
 11 | func TestIntegralStep(t *testing.T) {
 12 | 	runTestCases(t, IntegralStep, nil, []testCase{
 13 | 		{
 14 | 			name:      "empty",
 15 | 			testedBow: emptyBow,
 16 | 			expectedBow: func() bow.Bow {
 17 | 				b, err := bow.NewBow(
 18 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
 19 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
 20 | 				)
 21 | 				assert.NoError(t, err)
 22 | 				return b
 23 | 			}(),
 24 | 		},
 25 | 		{
 26 | 			name:      "sparse float",
 27 | 			testedBow: sparseFloatBow,
 28 | 			expectedBow: func() bow.Bow {
 29 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 30 | 					[]string{timeCol, valueCol},
 31 | 					[]bow.Type{bow.Int64, bow.Float64},
 32 | 					[][]interface{}{
 33 | 						{10, 100.},
 34 | 						{20, nil},
 35 | 						{30, nil},
 36 | 						{40, 100 * 0.9},
 37 | 						{50, 100*0.1 + 200*0.9},
 38 | 						{60, 100*0.8 + 200*0.1},
 39 | 					})
 40 | 				assert.NoError(t, err)
 41 | 				return b
 42 | 			}(),
 43 | 		},
 44 | 		{
 45 | 			name:      "sparse bool",
 46 | 			testedBow: sparseBoolBow,
 47 | 			expectedBow: func() bow.Bow {
 48 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 49 | 					[]string{timeCol, valueCol},
 50 | 					[]bow.Type{bow.Int64, bow.Float64},
 51 | 					[][]interface{}{
 52 | 						{10, 10.},
 53 | 						{20, nil},
 54 | 						{30, nil},
 55 | 						{40, 0.},
 56 | 						{50, 1.},
 57 | 						{60, 8.},
 58 | 					})
 59 | 				assert.NoError(t, err)
 60 | 				return b
 61 | 			}(),
 62 | 		},
 63 | 		{
 64 | 			name:      "sparse string",
 65 | 			testedBow: sparseStringBow,
 66 | 			expectedBow: func() bow.Bow {
 67 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 68 | 					[]string{timeCol, valueCol},
 69 | 					[]bow.Type{bow.Int64, bow.Float64},
 70 | 					[][]interface{}{
 71 | 						{10, 100.},
 72 | 						{20, nil},
 73 | 						{30, nil},
 74 | 						{40, 100 * 0.9},
 75 | 						{50, 100*0.1 + 200*0.9},
 76 | 						{60, 20.},
 77 | 					})
 78 | 				assert.NoError(t, err)
 79 | 				return b
 80 | 			}(),
 81 | 		},
 82 | 	})
 83 | }
 84 | 
 85 | func TestIntegralStep_scaled(t *testing.T) {
 86 | 	factor := 0.1
 87 | 	transforms := []transformation.Func{
 88 | 		func(x interface{}) (interface{}, error) {
 89 | 			if x == nil {
 90 | 				return nil, nil
 91 | 			}
 92 | 			return x.(float64) * factor, nil
 93 | 		},
 94 | 	}
 95 | 	runTestCases(t, IntegralStep, transforms, []testCase{
 96 | 		{
 97 | 			name:      "empty",
 98 | 			testedBow: emptyBow,
 99 | 			expectedBow: func() bow.Bow {
100 | 				b, err := bow.NewBow(
101 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
102 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
103 | 				)
104 | 				assert.NoError(t, err)
105 | 				return b
106 | 			}(),
107 | 		},
108 | 		{
109 | 			name:      "sparse",
110 | 			testedBow: sparseFloatBow,
111 | 			expectedBow: func() bow.Bow {
112 | 				b, err := bow.NewBowFromRowBasedInterfaces(
113 | 					[]string{timeCol, valueCol},
114 | 					[]bow.Type{bow.Int64, bow.Float64},
115 | 					[][]interface{}{
116 | 						{10, factor * (100.)},
117 | 						{20, nil},
118 | 						{30, nil},
119 | 						{40, factor * (100 * 0.9)},
120 | 						{50, factor * (100*0.1 + 200*0.9)},
121 | 						{60, factor * (100*0.8 + 200*0.1)},
122 | 					})
123 | 				assert.NoError(t, err)
124 | 				return b
125 | 			}(),
126 | 		},
127 | 	})
128 | }
129 | 
130 | func TestIntegralTrapezoid(t *testing.T) {
131 | 	runTestCases(t, IntegralTrapezoid, nil, []testCase{
132 | 		{
133 | 			name:      "empty",
134 | 			testedBow: emptyBow,
135 | 			expectedBow: func() bow.Bow {
136 | 				b, err := bow.NewBow(
137 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
138 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
139 | 				)
140 | 				assert.NoError(t, err)
141 | 				return b
142 | 			}(),
143 | 		},
144 | 		{
145 | 			name:      "sparse float",
146 | 			testedBow: sparseFloatBow,
147 | 			expectedBow: func() bow.Bow {
148 | 				b, err := bow.NewBowFromRowBasedInterfaces(
149 | 					[]string{timeCol, valueCol},
150 | 					[]bow.Type{bow.Int64, bow.Float64},
151 | 					[][]interface{}{
152 | 						{10, nil},
153 | 						{20, nil},
154 | 						{30, nil},
155 | 						{40, 9 * 10.},
156 | 						{50, 15.},
157 | 						{60, 8 * (15.)},
158 | 					})
159 | 				assert.NoError(t, err)
160 | 				return b
161 | 			}(),
162 | 		},
163 | 		{
164 | 			name:      "sparse bool",
165 | 			testedBow: sparseBoolBow,
166 | 			expectedBow: func() bow.Bow {
167 | 				b, err := bow.NewBowFromRowBasedInterfaces(
168 | 					[]string{timeCol, valueCol},
169 | 					[]bow.Type{bow.Int64, bow.Float64},
170 | 					[][]interface{}{
171 | 						{10, nil},
172 | 						{20, nil},
173 | 						{30, nil},
174 | 						{40, 4.5},
175 | 						{50, 0.5},
176 | 						{60, 4.},
177 | 					})
178 | 				assert.NoError(t, err)
179 | 				return b
180 | 			}(),
181 | 		},
182 | 		{
183 | 			name:      "sparse string",
184 | 			testedBow: sparseStringBow,
185 | 			expectedBow: func() bow.Bow {
186 | 				b, err := bow.NewBowFromRowBasedInterfaces(
187 | 					[]string{timeCol, valueCol},
188 | 					[]bow.Type{bow.Int64, bow.Float64},
189 | 					[][]interface{}{
190 | 						{10, nil},
191 | 						{20, nil},
192 | 						{30, nil},
193 | 						{40, 9 * 10.},
194 | 						{50, 15.},
195 | 						{60, nil},
196 | 					})
197 | 				assert.NoError(t, err)
198 | 				return b
199 | 			}(),
200 | 		},
201 | 	})
202 | }
203 | 


--------------------------------------------------------------------------------
/rolling/aggregation/minmax.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func Min(col string) rolling.ColAggregation {
 9 | 	return rolling.NewColAggregation(col, false, bow.Float64,
10 | 		func(col int, w rolling.Window) (interface{}, error) {
11 | 			if w.Bow.NumRows() == 0 {
12 | 				return nil, nil
13 | 			}
14 | 
15 | 			var min interface{}
16 | 			for i := 0; i < w.Bow.NumRows(); i++ {
17 | 				value, ok := w.Bow.GetFloat64(col, i)
18 | 				if !ok {
19 | 					continue
20 | 				}
21 | 				if min != nil {
22 | 					if value < min.(float64) {
23 | 						min = value
24 | 					}
25 | 					continue
26 | 				}
27 | 				min = value
28 | 			}
29 | 			return min, nil
30 | 		})
31 | }
32 | 
33 | func Max(col string) rolling.ColAggregation {
34 | 	return rolling.NewColAggregation(col, false, bow.Float64,
35 | 		func(col int, w rolling.Window) (interface{}, error) {
36 | 			if w.Bow.NumRows() == 0 {
37 | 				return nil, nil
38 | 			}
39 | 
40 | 			var min interface{}
41 | 			for i := 0; i < w.Bow.NumRows(); i++ {
42 | 				value, ok := w.Bow.GetFloat64(col, i)
43 | 				if !ok {
44 | 					continue
45 | 				}
46 | 				if min != nil {
47 | 					if value > min.(float64) {
48 | 						min = value
49 | 					}
50 | 					continue
51 | 				}
52 | 				min = value
53 | 			}
54 | 			return min, nil
55 | 		})
56 | }
57 | 


--------------------------------------------------------------------------------
/rolling/aggregation/minmax_test.go:
--------------------------------------------------------------------------------
  1 | package aggregation
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/metronlab/bow"
  7 | 	"github.com/stretchr/testify/assert"
  8 | )
  9 | 
 10 | func TestMin(t *testing.T) {
 11 | 	runTestCases(t, Min, nil, []testCase{
 12 | 		{
 13 | 			name:      "empty",
 14 | 			testedBow: emptyBow,
 15 | 			expectedBow: func() bow.Bow {
 16 | 				b, err := bow.NewBow(
 17 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
 18 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
 19 | 				)
 20 | 				assert.NoError(t, err)
 21 | 				return b
 22 | 			}(),
 23 | 		},
 24 | 		{
 25 | 			name:      "sparse float",
 26 | 			testedBow: sparseFloatBow,
 27 | 			expectedBow: func() bow.Bow {
 28 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 29 | 					[]string{timeCol, valueCol},
 30 | 					[]bow.Type{bow.Int64, bow.Float64},
 31 | 					[][]interface{}{
 32 | 						{10, 10.},
 33 | 						{20, nil},
 34 | 						{30, nil},
 35 | 						{40, 10.},
 36 | 						{50, 10.},
 37 | 						{60, 10.},
 38 | 					})
 39 | 				assert.NoError(t, err)
 40 | 				return b
 41 | 			}(),
 42 | 		},
 43 | 		{
 44 | 			name:      "sparse bool",
 45 | 			testedBow: sparseBoolBow,
 46 | 			expectedBow: func() bow.Bow {
 47 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 48 | 					[]string{timeCol, valueCol},
 49 | 					[]bow.Type{bow.Int64, bow.Float64},
 50 | 					[][]interface{}{
 51 | 						{10, 1.},
 52 | 						{20, nil},
 53 | 						{30, nil},
 54 | 						{40, 0.},
 55 | 						{50, 0.},
 56 | 						{60, 0.},
 57 | 					})
 58 | 				assert.NoError(t, err)
 59 | 				return b
 60 | 			}(),
 61 | 		},
 62 | 		{
 63 | 			name:      "sparse string",
 64 | 			testedBow: sparseStringBow,
 65 | 			expectedBow: func() bow.Bow {
 66 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 67 | 					[]string{timeCol, valueCol},
 68 | 					[]bow.Type{bow.Int64, bow.Float64},
 69 | 					[][]interface{}{
 70 | 						{10, 10.},
 71 | 						{20, nil},
 72 | 						{30, nil},
 73 | 						{40, 10.},
 74 | 						{50, 10.},
 75 | 						{60, 20.},
 76 | 					})
 77 | 				assert.NoError(t, err)
 78 | 				return b
 79 | 			}(),
 80 | 		},
 81 | 	})
 82 | }
 83 | 
 84 | func TestMax(t *testing.T) {
 85 | 	runTestCases(t, Max, nil, []testCase{
 86 | 		{
 87 | 			name:      "empty",
 88 | 			testedBow: emptyBow,
 89 | 			expectedBow: func() bow.Bow {
 90 | 				b, err := bow.NewBow(
 91 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
 92 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
 93 | 				)
 94 | 				assert.NoError(t, err)
 95 | 				return b
 96 | 			}(),
 97 | 		},
 98 | 		{
 99 | 			name:      "sparse float",
100 | 			testedBow: sparseFloatBow,
101 | 			expectedBow: func() bow.Bow {
102 | 				b, err := bow.NewBowFromRowBasedInterfaces(
103 | 					[]string{timeCol, valueCol},
104 | 					[]bow.Type{bow.Int64, bow.Float64},
105 | 					[][]interface{}{
106 | 						{10, 10.},
107 | 						{20, nil},
108 | 						{30, nil},
109 | 						{40, 10.},
110 | 						{50, 20.},
111 | 						{60, 20.},
112 | 					})
113 | 				assert.NoError(t, err)
114 | 				return b
115 | 			}(),
116 | 		},
117 | 		{
118 | 			name:      "sparse bool",
119 | 			testedBow: sparseBoolBow,
120 | 			expectedBow: func() bow.Bow {
121 | 				b, err := bow.NewBowFromRowBasedInterfaces(
122 | 					[]string{timeCol, valueCol},
123 | 					[]bow.Type{bow.Int64, bow.Float64},
124 | 					[][]interface{}{
125 | 						{10, 1.},
126 | 						{20, nil},
127 | 						{30, nil},
128 | 						{40, 0.},
129 | 						{50, 1.},
130 | 						{60, 1.},
131 | 					})
132 | 				assert.NoError(t, err)
133 | 				return b
134 | 			}(),
135 | 		},
136 | 		{
137 | 			name:      "sparse string",
138 | 			testedBow: sparseStringBow,
139 | 			expectedBow: func() bow.Bow {
140 | 				b, err := bow.NewBowFromRowBasedInterfaces(
141 | 					[]string{timeCol, valueCol},
142 | 					[]bow.Type{bow.Int64, bow.Float64},
143 | 					[][]interface{}{
144 | 						{10, 10.},
145 | 						{20, nil},
146 | 						{30, nil},
147 | 						{40, 10.},
148 | 						{50, 20.},
149 | 						{60, 20.},
150 | 					})
151 | 				assert.NoError(t, err)
152 | 				return b
153 | 			}(),
154 | 		},
155 | 	})
156 | }
157 | 


--------------------------------------------------------------------------------
/rolling/aggregation/mode.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func Mode(col string) rolling.ColAggregation {
 9 | 	return rolling.NewColAggregation(col, false, bow.InputDependent,
10 | 		func(col int, w rolling.Window) (interface{}, error) {
11 | 			if w.Bow.NumRows() == 0 {
12 | 				return nil, nil
13 | 			}
14 | 
15 | 			occurrences := make(map[interface{}]int)
16 | 			max := 0
17 | 			var res interface{}
18 | 			for i := 0; i < w.Bow.NumRows(); i++ {
19 | 				v := w.Bow.GetValue(col, i)
20 | 				if v != nil {
21 | 					nb := occurrences[v]
22 | 					nb++
23 | 					occurrences[v] = nb
24 | 					if nb > max {
25 | 						max = nb
26 | 						res = v
27 | 					}
28 | 				}
29 | 			}
30 | 			return res, nil
31 | 		})
32 | }
33 | 


--------------------------------------------------------------------------------
/rolling/aggregation/mode_test.go:
--------------------------------------------------------------------------------
  1 | package aggregation
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/metronlab/bow"
  7 | 	"github.com/stretchr/testify/assert"
  8 | )
  9 | 
 10 | func TestMode(t *testing.T) {
 11 | 	var modeFloatBow, _ = bow.NewBowFromRowBasedInterfaces(
 12 | 		[]string{timeCol, valueCol},
 13 | 		[]bow.Type{bow.Int64, bow.Float64},
 14 | 		[][]interface{}{
 15 | 			{10, 10.}, // same value window
 16 | 			{11, 10.},
 17 | 
 18 | 			{20, 42.}, // most occurrences to 42
 19 | 			{21, 42.},
 20 | 			{22, 10.},
 21 | 
 22 | 			{30, nil}, // most occurrences to 10
 23 | 			{31, nil},
 24 | 			{32, 10.},
 25 | 
 26 | 			// Empty window
 27 | 
 28 | 			{50, nil}, // only nil values to nil
 29 | 			{51, nil},
 30 | 		})
 31 | 
 32 | 	runTestCases(t, Mode, nil, []testCase{
 33 | 		{
 34 | 			name:      "empty",
 35 | 			testedBow: emptyBow,
 36 | 			expectedBow: func() bow.Bow {
 37 | 				b, err := bow.NewBow(
 38 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
 39 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
 40 | 				)
 41 | 				assert.NoError(t, err)
 42 | 				return b
 43 | 			}(),
 44 | 		},
 45 | 		{
 46 | 			name:      "mode float",
 47 | 			testedBow: modeFloatBow,
 48 | 			expectedBow: func() bow.Bow {
 49 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 50 | 					[]string{timeCol, valueCol},
 51 | 					[]bow.Type{bow.Int64, bow.Float64},
 52 | 					[][]interface{}{
 53 | 						{10, 10.},
 54 | 						{20, 42.},
 55 | 						{30, 10.},
 56 | 						{40, nil},
 57 | 						{50, nil},
 58 | 					})
 59 | 				assert.NoError(t, err)
 60 | 				return b
 61 | 			}(),
 62 | 		},
 63 | 		{
 64 | 			name:      "sparse bool",
 65 | 			testedBow: sparseBoolBow,
 66 | 			expectedBow: func() bow.Bow {
 67 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 68 | 					[]string{timeCol, valueCol},
 69 | 					[]bow.Type{bow.Int64, bow.Boolean},
 70 | 					[][]interface{}{
 71 | 						{10, true},
 72 | 						{20, nil},
 73 | 						{30, nil},
 74 | 						{40, false},
 75 | 						{50, true},
 76 | 						{60, true},
 77 | 					})
 78 | 				assert.NoError(t, err)
 79 | 				return b
 80 | 			}(),
 81 | 		},
 82 | 		{
 83 | 			name:      "sparse string",
 84 | 			testedBow: sparseStringBow,
 85 | 			expectedBow: func() bow.Bow {
 86 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 87 | 					[]string{timeCol, valueCol},
 88 | 					[]bow.Type{bow.Int64, bow.String},
 89 | 					[][]interface{}{
 90 | 						{10, "10."},
 91 | 						{20, nil},
 92 | 						{30, nil},
 93 | 						{40, "10."},
 94 | 						{50, "10."},
 95 | 						{60, "test"},
 96 | 					})
 97 | 				assert.NoError(t, err)
 98 | 				return b
 99 | 			}(),
100 | 		},
101 | 	})
102 | }
103 | 


--------------------------------------------------------------------------------
/rolling/aggregation/sum.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func Sum(col string) rolling.ColAggregation {
 9 | 	return rolling.NewColAggregation(col, false, bow.Float64,
10 | 		func(col int, w rolling.Window) (interface{}, error) {
11 | 			if w.Bow.NumRows() == 0 {
12 | 				return 0., nil
13 | 			}
14 | 
15 | 			var sum float64
16 | 			for i := 0; i < w.Bow.NumRows(); i++ {
17 | 				value, ok := w.Bow.GetFloat64(col, i)
18 | 				if !ok {
19 | 					continue
20 | 				}
21 | 				sum += value
22 | 			}
23 | 			return sum, nil
24 | 		})
25 | }
26 | 


--------------------------------------------------------------------------------
/rolling/aggregation/sum_test.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/metronlab/bow"
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func TestSum(t *testing.T) {
11 | 	runTestCases(t, Sum, nil, []testCase{
12 | 		{
13 | 			name:      "empty",
14 | 			testedBow: emptyBow,
15 | 			expectedBow: func() bow.Bow {
16 | 				b, err := bow.NewBow(
17 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
18 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
19 | 				)
20 | 				assert.NoError(t, err)
21 | 				return b
22 | 			}(),
23 | 		},
24 | 		{
25 | 			name:      "sparse float",
26 | 			testedBow: sparseFloatBow,
27 | 			expectedBow: func() bow.Bow {
28 | 				b, err := bow.NewBowFromRowBasedInterfaces(
29 | 					[]string{timeCol, valueCol},
30 | 					[]bow.Type{bow.Int64, bow.Float64},
31 | 					[][]interface{}{
32 | 						{10, 10.},
33 | 						{20, 0.},
34 | 						{30, 0.},
35 | 						{40, 10.},
36 | 						{50, 30.},
37 | 						{60, 30.},
38 | 					})
39 | 				assert.NoError(t, err)
40 | 				return b
41 | 			}(),
42 | 		},
43 | 		{
44 | 			name:      "sparse bool",
45 | 			testedBow: sparseBoolBow,
46 | 			expectedBow: func() bow.Bow {
47 | 				b, err := bow.NewBowFromRowBasedInterfaces(
48 | 					[]string{timeCol, valueCol},
49 | 					[]bow.Type{bow.Int64, bow.Float64},
50 | 					[][]interface{}{
51 | 						{10, 1.},
52 | 						{20, 0.},
53 | 						{30, 0.},
54 | 						{40, 0.},
55 | 						{50, 1.},
56 | 						{60, 1.},
57 | 					})
58 | 				assert.NoError(t, err)
59 | 				return b
60 | 			}(),
61 | 		},
62 | 		{
63 | 			name:      "sparse string",
64 | 			testedBow: sparseStringBow,
65 | 			expectedBow: func() bow.Bow {
66 | 				b, err := bow.NewBowFromRowBasedInterfaces(
67 | 					[]string{timeCol, valueCol},
68 | 					[]bow.Type{bow.Int64, bow.Float64},
69 | 					[][]interface{}{
70 | 						{10, 10.},
71 | 						{20, 0.},
72 | 						{30, 0.},
73 | 						{40, 10.},
74 | 						{50, 30.},
75 | 						{60, 20.},
76 | 					})
77 | 				assert.NoError(t, err)
78 | 				return b
79 | 			}(),
80 | 		},
81 | 	})
82 | }
83 | 


--------------------------------------------------------------------------------
/rolling/aggregation/weightedmean.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func WeightedAverageStep(col string) rolling.ColAggregation {
 9 | 	integralFunc := IntegralStep(col).Func()
10 | 	return rolling.NewColAggregation(col, false, bow.Float64,
11 | 		func(colIndex int, w rolling.Window) (interface{}, error) {
12 | 			v, err := integralFunc(colIndex, w)
13 | 			if v == nil || err != nil {
14 | 				return v, err
15 | 			}
16 | 
17 | 			windowsWide := float64(w.LastValue - w.FirstValue)
18 | 			return v.(float64) / windowsWide, nil
19 | 		})
20 | }
21 | 
22 | func WeightedAverageLinear(col string) rolling.ColAggregation {
23 | 	integralFunc := IntegralTrapezoid(col).Func()
24 | 	return rolling.NewColAggregation(col, true, bow.Float64,
25 | 		func(colIndex int, w rolling.Window) (interface{}, error) {
26 | 			v, err := integralFunc(colIndex, w)
27 | 			if v == nil || err != nil {
28 | 				return v, err
29 | 			}
30 | 
31 | 			windowsWide := float64(w.LastValue - w.FirstValue)
32 | 			return v.(float64) / windowsWide, nil
33 | 		})
34 | }
35 | 


--------------------------------------------------------------------------------
/rolling/aggregation/weightedmean_test.go:
--------------------------------------------------------------------------------
  1 | package aggregation
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/metronlab/bow"
  7 | 	"github.com/stretchr/testify/assert"
  8 | )
  9 | 
 10 | func TestWeightedAverageStep(t *testing.T) {
 11 | 	runTestCases(t, WeightedAverageStep, nil, []testCase{
 12 | 		{
 13 | 			name:      "empty",
 14 | 			testedBow: emptyBow,
 15 | 			expectedBow: func() bow.Bow {
 16 | 				b, err := bow.NewBow(
 17 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
 18 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
 19 | 				)
 20 | 				assert.NoError(t, err)
 21 | 				return b
 22 | 			}(),
 23 | 		},
 24 | 		{
 25 | 			name:      "sparse float",
 26 | 			testedBow: sparseFloatBow,
 27 | 			expectedBow: func() bow.Bow {
 28 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 29 | 					[]string{timeCol, valueCol},
 30 | 					[]bow.Type{bow.Int64, bow.Float64},
 31 | 					[][]interface{}{
 32 | 						{10, 10.},
 33 | 						{20, nil},
 34 | 						{30, nil},
 35 | 						{40, 10 * 0.9},
 36 | 						{50, 10*0.1 + 20*0.9},
 37 | 						{60, 10*0.8 + 20*0.1},
 38 | 					})
 39 | 				assert.NoError(t, err)
 40 | 				return b
 41 | 			}(),
 42 | 		},
 43 | 		{
 44 | 			name:      "float only nil",
 45 | 			testedBow: nilBow,
 46 | 			expectedBow: func() bow.Bow {
 47 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 48 | 					[]string{timeCol, valueCol},
 49 | 					[]bow.Type{bow.Int64, bow.Float64},
 50 | 					[][]interface{}{
 51 | 						{10, nil},
 52 | 						{20, nil},
 53 | 					})
 54 | 				assert.NoError(t, err)
 55 | 				return b
 56 | 			}(),
 57 | 		},
 58 | 		{
 59 | 			name:      "sparse bool",
 60 | 			testedBow: sparseBoolBow,
 61 | 			expectedBow: func() bow.Bow {
 62 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 63 | 					[]string{timeCol, valueCol},
 64 | 					[]bow.Type{bow.Int64, bow.Float64},
 65 | 					[][]interface{}{
 66 | 						{10, 1.},
 67 | 						{20, nil},
 68 | 						{30, nil},
 69 | 						{40, 0.},
 70 | 						{50, 0.1},
 71 | 						{60, 0.8},
 72 | 					})
 73 | 				assert.NoError(t, err)
 74 | 				return b
 75 | 			}(),
 76 | 		},
 77 | 		{
 78 | 			name:      "sparse string",
 79 | 			testedBow: sparseStringBow,
 80 | 			expectedBow: func() bow.Bow {
 81 | 				b, err := bow.NewBowFromRowBasedInterfaces(
 82 | 					[]string{timeCol, valueCol},
 83 | 					[]bow.Type{bow.Int64, bow.Float64},
 84 | 					[][]interface{}{
 85 | 						{10, 10.},
 86 | 						{20, nil},
 87 | 						{30, nil},
 88 | 						{40, 9.},
 89 | 						{50, 19.},
 90 | 						{60, 2.},
 91 | 					})
 92 | 				assert.NoError(t, err)
 93 | 				return b
 94 | 			}(),
 95 | 		},
 96 | 	})
 97 | }
 98 | 
 99 | func TestWeightedAverageLinear(t *testing.T) {
100 | 	runTestCases(t, WeightedAverageLinear, nil, []testCase{
101 | 		{
102 | 			name:      "empty",
103 | 			testedBow: emptyBow,
104 | 			expectedBow: func() bow.Bow {
105 | 				b, err := bow.NewBow(
106 | 					bow.NewSeries(timeCol, bow.Int64, []int64{}, nil),
107 | 					bow.NewSeries(valueCol, bow.Float64, []float64{}, nil),
108 | 				)
109 | 				assert.NoError(t, err)
110 | 				return b
111 | 			}(),
112 | 		},
113 | 		{
114 | 			name:      "sparse float",
115 | 			testedBow: sparseFloatBow,
116 | 			expectedBow: func() bow.Bow {
117 | 				b, err := bow.NewBowFromRowBasedInterfaces(
118 | 					[]string{timeCol, valueCol},
119 | 					[]bow.Type{bow.Int64, bow.Float64},
120 | 					[][]interface{}{
121 | 						{10, nil},
122 | 						{20, nil},
123 | 						{30, nil},
124 | 						{40, 10 * 0.9},
125 | 						{50, 15 * 0.1},
126 | 						{60, 15 * 0.8},
127 | 					})
128 | 				assert.NoError(t, err)
129 | 				return b
130 | 			}(),
131 | 		},
132 | 		{
133 | 			name:      "sparse bool",
134 | 			testedBow: sparseBoolBow,
135 | 			expectedBow: func() bow.Bow {
136 | 				b, err := bow.NewBowFromRowBasedInterfaces(
137 | 					[]string{timeCol, valueCol},
138 | 					[]bow.Type{bow.Int64, bow.Float64},
139 | 					[][]interface{}{
140 | 						{10, nil},
141 | 						{20, nil},
142 | 						{30, nil},
143 | 						{40, 0.45},
144 | 						{50, 0.05},
145 | 						{60, 0.4},
146 | 					})
147 | 				assert.NoError(t, err)
148 | 				return b
149 | 			}(),
150 | 		},
151 | 		{
152 | 			name:      "sparse string",
153 | 			testedBow: sparseStringBow,
154 | 			expectedBow: func() bow.Bow {
155 | 				b, err := bow.NewBowFromRowBasedInterfaces(
156 | 					[]string{timeCol, valueCol},
157 | 					[]bow.Type{bow.Int64, bow.Float64},
158 | 					[][]interface{}{
159 | 						{10, nil},
160 | 						{20, nil},
161 | 						{30, nil},
162 | 						{40, 9.},
163 | 						{50, 1.5},
164 | 						{60, nil},
165 | 					})
166 | 				assert.NoError(t, err)
167 | 				return b
168 | 			}(),
169 | 		},
170 | 	})
171 | }
172 | 


--------------------------------------------------------------------------------
/rolling/aggregation/whole.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"fmt"
 6 | 
 7 | 	"github.com/metronlab/bow"
 8 | 	"github.com/metronlab/bow/rolling"
 9 | )
10 | 
11 | // Aggregate the whole dataframe on column intervalColName with one or several rolling.ColAggregation.
12 | func Aggregate(b bow.Bow, intervalColName string, aggrs ...rolling.ColAggregation) (bow.Bow, error) {
13 | 	if b == nil {
14 | 		return nil, errors.New("nil bow")
15 | 	}
16 | 	if len(aggrs) == 0 {
17 | 		return nil, errors.New("at least one column aggregation is required")
18 | 	}
19 | 
20 | 	intervalColIndex, err := b.ColumnIndex(intervalColName)
21 | 	if err != nil {
22 | 		return nil, err
23 | 	}
24 | 
25 | 	series := make([]bow.Series, len(aggrs))
26 | 
27 | 	for aggrIndex, aggr := range aggrs {
28 | 		if aggr.InputName() == "" {
29 | 			return nil, fmt.Errorf("column aggregation %d: no input name", aggrIndex)
30 | 		}
31 | 
32 | 		inputColIndex, err := b.ColumnIndex(aggr.InputName())
33 | 		if err != nil {
34 | 			return nil, fmt.Errorf("column aggregation %d: %w", aggrIndex, err)
35 | 		}
36 | 
37 | 		aggr.SetInputIndex(inputColIndex)
38 | 
39 | 		name := aggr.OutputName()
40 | 		if name == "" {
41 | 			name = b.ColumnName(aggr.InputIndex())
42 | 		}
43 | 
44 | 		typ := aggr.GetReturnType(
45 | 			b.ColumnType(aggr.InputIndex()),
46 | 			b.ColumnType(aggr.InputIndex()))
47 | 
48 | 		var buf bow.Buffer
49 | 		if b.NumRows() == 0 {
50 | 			buf = bow.NewBuffer(0, typ)
51 | 		} else {
52 | 			buf = bow.NewBuffer(1, typ)
53 | 
54 | 			firstValue, firstValueIndex := b.GetNextFloat64(intervalColIndex, 0)
55 | 			if firstValueIndex == -1 {
56 | 				firstValue = -1
57 | 			}
58 | 
59 | 			lastValue, lastValueIndex := b.GetPrevFloat64(intervalColIndex, b.NumRows()-1)
60 | 			if lastValueIndex == -1 {
61 | 				lastValue = -1
62 | 			}
63 | 
64 | 			w := rolling.Window{
65 | 				Bow:              b,
66 | 				IntervalColIndex: intervalColIndex,
67 | 				IsInclusive:      true,
68 | 				FirstIndex:       0,
69 | 				FirstValue:       int64(firstValue),
70 | 				LastValue:        int64(lastValue),
71 | 			}
72 | 
73 | 			aggrValue, err := aggr.Func()(aggr.InputIndex(), w)
74 | 			if err != nil {
75 | 				return nil, fmt.Errorf("column aggregation %d: %w", aggrIndex, err)
76 | 			}
77 | 
78 | 			for transIndex, trans := range aggr.Transformations() {
79 | 				aggrValue, err = trans(aggrValue)
80 | 				if err != nil {
81 | 					return nil, fmt.Errorf("column aggregation %d: transIndex %d: %w",
82 | 						aggrIndex, transIndex, err)
83 | 				}
84 | 			}
85 | 
86 | 			buf.SetOrDropStrict(0, aggrValue)
87 | 		}
88 | 
89 | 		series[aggrIndex] = bow.NewSeriesFromBuffer(name, buf)
90 | 	}
91 | 
92 | 	return bow.NewBow(series...)
93 | }
94 | 


--------------------------------------------------------------------------------
/rolling/aggregation/windowstart.go:
--------------------------------------------------------------------------------
 1 | package aggregation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func WindowStart(col string) rolling.ColAggregation {
 9 | 	return rolling.NewColAggregation(col, false, bow.IteratorDependent,
10 | 		func(col int, w rolling.Window) (interface{}, error) {
11 | 			return w.FirstValue, nil
12 | 		})
13 | }
14 | 


--------------------------------------------------------------------------------
/rolling/aggregation_test.go:
--------------------------------------------------------------------------------
  1 | package rolling
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/metronlab/bow"
  8 | 	"github.com/stretchr/testify/assert"
  9 | 	"github.com/stretchr/testify/require"
 10 | )
 11 | 
 12 | func TestIntervalRolling_Aggregate(t *testing.T) {
 13 | 	b, err := bow.NewBowFromColBasedInterfaces(
 14 | 		[]string{timeCol, valueCol},
 15 | 		[]bow.Type{bow.Int64, bow.Float64},
 16 | 		[][]interface{}{
 17 | 			{10, 15, 16, 25, 29},
 18 | 			{1.0, 1.5, 1.6, 2.5, 2.9},
 19 | 		})
 20 | 	require.NoError(t, err)
 21 | 	r, err := IntervalRolling(b, timeCol, 10, Options{})
 22 | 	require.NoError(t, err)
 23 | 
 24 | 	timeAggr := NewColAggregation(timeCol, false, bow.Int64,
 25 | 		func(col int, w Window) (interface{}, error) {
 26 | 			return w.FirstValue, nil
 27 | 		})
 28 | 	valueAggr := NewColAggregation(valueCol, false, bow.Float64,
 29 | 		func(col int, w Window) (interface{}, error) {
 30 | 			return float64(w.Bow.NumRows()), nil
 31 | 		})
 32 | 	doubleAggr := NewColAggregation(valueCol, false, bow.Float64,
 33 | 		func(col int, w Window) (interface{}, error) {
 34 | 			return float64(w.Bow.NumRows()) * 2, nil
 35 | 		})
 36 | 
 37 | 	t.Run("keep columns", func(t *testing.T) {
 38 | 		aggregated, err := r.
 39 | 			Aggregate(timeAggr, valueAggr).
 40 | 			Bow()
 41 | 		assert.NoError(t, err)
 42 | 		assert.NotNil(t, aggregated)
 43 | 		expected, _ := bow.NewBowFromColBasedInterfaces(
 44 | 			[]string{timeCol, valueCol},
 45 | 			[]bow.Type{bow.Int64, bow.Float64},
 46 | 			[][]interface{}{
 47 | 				{10, 20},
 48 | 				{3., 2.},
 49 | 			})
 50 | 		assert.True(t, aggregated.Equal(expected))
 51 | 	})
 52 | 
 53 | 	t.Run("swap columns", func(t *testing.T) {
 54 | 		aggregated, err := r.
 55 | 			Aggregate(valueAggr, timeAggr).
 56 | 			Bow()
 57 | 		assert.NoError(t, err)
 58 | 		assert.NotNil(t, aggregated)
 59 | 		expected, _ := bow.NewBowFromColBasedInterfaces(
 60 | 			[]string{valueCol, timeCol},
 61 | 			[]bow.Type{bow.Float64, bow.Int64},
 62 | 			[][]interface{}{
 63 | 				{3., 2.},
 64 | 				{10, 20},
 65 | 			})
 66 | 		assert.True(t, aggregated.Equal(expected))
 67 | 	})
 68 | 
 69 | 	t.Run("rename columns", func(t *testing.T) {
 70 | 		aggregated, err := r.Aggregate(timeAggr.RenameOutput("a"), valueAggr.RenameOutput("b")).Bow()
 71 | 		assert.NoError(t, err)
 72 | 		assert.NotNil(t, aggregated)
 73 | 		expected, _ := bow.NewBowFromColBasedInterfaces(
 74 | 			[]string{"a", "b"},
 75 | 			[]bow.Type{bow.Int64, bow.Float64},
 76 | 			[][]interface{}{
 77 | 				{10, 20},
 78 | 				{3., 2.},
 79 | 			})
 80 | 		assert.True(t, aggregated.Equal(expected))
 81 | 	})
 82 | 
 83 | 	t.Run("less than in original", func(t *testing.T) {
 84 | 		aggregated, err := r.Aggregate(timeAggr).Bow()
 85 | 		assert.NoError(t, err)
 86 | 		assert.NotNil(t, aggregated)
 87 | 		expected, _ := bow.NewBowFromColBasedInterfaces(
 88 | 			[]string{timeCol},
 89 | 			[]bow.Type{bow.Int64},
 90 | 			[][]interface{}{
 91 | 				{10, 20},
 92 | 			})
 93 | 		assert.True(t, aggregated.Equal(expected))
 94 | 	})
 95 | 
 96 | 	t.Run("more than in original", func(t *testing.T) {
 97 | 		aggregated, err := r.Aggregate(timeAggr, doubleAggr.RenameOutput("double"), valueAggr).Bow()
 98 | 		assert.NoError(t, err)
 99 | 		assert.NotNil(t, aggregated)
100 | 		expected, _ := bow.NewBowFromColBasedInterfaces(
101 | 			[]string{timeCol, "double", valueCol},
102 | 			[]bow.Type{bow.Int64, bow.Float64, bow.Float64},
103 | 			[][]interface{}{
104 | 				{10, 20},
105 | 				{6., 4.},
106 | 				{3., 2.},
107 | 			})
108 | 		assert.True(t, aggregated.Equal(expected))
109 | 	})
110 | 
111 | 	t.Run("missing interval colIndex", func(t *testing.T) {
112 | 		_, err := r.Aggregate(valueAggr).Bow()
113 | 		assert.EqualError(t, err, fmt.Sprintf(
114 | 			"intervalRolling.indexedAggregations: must keep interval column '%s'", timeCol))
115 | 	})
116 | 
117 | 	t.Run("invalid colIndex", func(t *testing.T) {
118 | 		_, err := r.Aggregate(timeAggr, NewColAggregation("-", false, bow.Int64,
119 | 			func(col int, w Window) (interface{}, error) { return nil, nil })).Bow()
120 | 		assert.EqualError(t, err,
121 | 			"intervalRolling.indexedAggregations: no column '-'")
122 | 	})
123 | }
124 | 
125 | func TestWindow_UnsetInclusive(t *testing.T) {
126 | 	inclusiveBow, err := bow.NewBowFromColBasedInterfaces(
127 | 		[]string{timeCol, valueCol},
128 | 		[]bow.Type{bow.Int64, bow.Int64},
129 | 		[][]interface{}{
130 | 			{1, 2},
131 | 			{1, 2}})
132 | 	assert.NoError(t, err)
133 | 	exclusiveBow, err := bow.NewBowFromColBasedInterfaces(
134 | 		[]string{timeCol, valueCol},
135 | 		[]bow.Type{bow.Int64, bow.Int64},
136 | 		[][]interface{}{
137 | 			{1},
138 | 			{1}})
139 | 	assert.NoError(t, err)
140 | 
141 | 	inclusiveWindow := Window{
142 | 		Bow:              inclusiveBow,
143 | 		FirstIndex:       0,
144 | 		IntervalColIndex: 0,
145 | 		FirstValue:       0,
146 | 		LastValue:        2,
147 | 		IsInclusive:      true,
148 | 	}
149 | 
150 | 	exclusiveWindow := inclusiveWindow.UnsetInclusive()
151 | 	assert.True(t, exclusiveWindow.Bow.Equal(exclusiveBow))
152 | 	exclusiveWindow.Bow = nil
153 | 	assert.Equal(t, Window{
154 | 		Bow:              nil,
155 | 		FirstIndex:       0,
156 | 		IntervalColIndex: 0,
157 | 		FirstValue:       0,
158 | 		LastValue:        2,
159 | 		IsInclusive:      false,
160 | 	}, exclusiveWindow)
161 | 
162 | 	// inclusive window should not be modified
163 | 	assert.Equal(t, Window{
164 | 		Bow:              inclusiveBow,
165 | 		FirstIndex:       0,
166 | 		IntervalColIndex: 0,
167 | 		FirstValue:       0,
168 | 		LastValue:        2,
169 | 		IsInclusive:      true,
170 | 	}, inclusiveWindow)
171 | }
172 | 


--------------------------------------------------------------------------------
/rolling/interpolation.go:
--------------------------------------------------------------------------------
  1 | package rolling
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/metronlab/bow"
  7 | )
  8 | 
  9 | // ColInterpolation is used to interpolate a column.
 10 | type ColInterpolation struct {
 11 | 	colName    string
 12 | 	inputTypes []bow.Type
 13 | 	fn         ColInterpolationFunc
 14 | 
 15 | 	colIndex int
 16 | }
 17 | 
 18 | // ColInterpolationFunc is a function that take a column index, a Window, the full bow.Bow and the previous row, and provides a value at the start of the Window.
 19 | type ColInterpolationFunc func(colIndex int, window Window, fullBow, prevRow bow.Bow) (interface{}, error)
 20 | 
 21 | // NewColInterpolation returns a new ColInterpolation.
 22 | func NewColInterpolation(colName string, inputTypes []bow.Type, fn ColInterpolationFunc) ColInterpolation {
 23 | 	return ColInterpolation{
 24 | 		colName:    colName,
 25 | 		inputTypes: inputTypes,
 26 | 		fn:         fn,
 27 | 	}
 28 | }
 29 | 
 30 | func (r *intervalRolling) Interpolate(interps ...ColInterpolation) Rolling {
 31 | 	if r.err != nil {
 32 | 		return r
 33 | 	}
 34 | 
 35 | 	rCopy := *r
 36 | 	if len(interps) == 0 {
 37 | 		return rCopy.setError(fmt.Errorf("at least one column interpolation is required"))
 38 | 	}
 39 | 
 40 | 	newIntervalCol := -1
 41 | 	for i := range interps {
 42 | 		isInterval, err := r.validateInterpolation(&interps[i], i)
 43 | 		if err != nil {
 44 | 			return rCopy.setError(fmt.Errorf("intervalRolling.validateInterpolation: %w", err))
 45 | 		}
 46 | 		if isInterval {
 47 | 			newIntervalCol = i
 48 | 		}
 49 | 	}
 50 | 
 51 | 	if newIntervalCol == -1 {
 52 | 		return rCopy.setError(fmt.Errorf("must keep interval column '%s'", r.bow.ColumnName(r.intervalColIndex)))
 53 | 	}
 54 | 
 55 | 	b, err := rCopy.interpolateWindows(interps)
 56 | 	if err != nil {
 57 | 		return rCopy.setError(fmt.Errorf("intervalRolling.interpolateWindows: %w", err))
 58 | 	}
 59 | 	if b == nil {
 60 | 		b = r.bow.NewEmptySlice()
 61 | 	}
 62 | 
 63 | 	newR, err := newIntervalRolling(b, newIntervalCol, rCopy.interval, rCopy.options)
 64 | 	if err != nil {
 65 | 		return rCopy.setError(fmt.Errorf("newIntervalRolling: %w", err))
 66 | 	}
 67 | 
 68 | 	return newR
 69 | }
 70 | 
 71 | func (r *intervalRolling) validateInterpolation(interp *ColInterpolation, newIndex int) (bool, error) {
 72 | 	if interp.colName == "" {
 73 | 		return false, fmt.Errorf("interpolation %d has no column name", newIndex)
 74 | 	}
 75 | 
 76 | 	var err error
 77 | 	interp.colIndex, err = r.bow.ColumnIndex(interp.colName)
 78 | 	if err != nil {
 79 | 		return false, err
 80 | 	}
 81 | 
 82 | 	var typeOk bool
 83 | 	colType := r.bow.ColumnType(interp.colIndex)
 84 | 	for _, inputType := range interp.inputTypes {
 85 | 		if colType == inputType {
 86 | 			typeOk = true
 87 | 			break
 88 | 		}
 89 | 	}
 90 | 	if !typeOk {
 91 | 		return false, fmt.Errorf("accepts types %v, got type %s",
 92 | 			interp.inputTypes, colType)
 93 | 	}
 94 | 
 95 | 	return interp.colIndex == r.intervalColIndex, nil
 96 | }
 97 | 
 98 | func (r *intervalRolling) interpolateWindows(interps []ColInterpolation) (bow.Bow, error) {
 99 | 	rCopy := *r
100 | 
101 | 	bows := make([]bow.Bow, rCopy.numWindows)
102 | 
103 | 	for rCopy.HasNext() {
104 | 		winIndex, w, err := rCopy.Next()
105 | 		if err != nil {
106 | 			return nil, err
107 | 		}
108 | 
109 | 		bows[winIndex], err = rCopy.interpolateWindow(interps, w)
110 | 		if err != nil {
111 | 			return nil, err
112 | 		}
113 | 	}
114 | 
115 | 	return bow.AppendBows(bows...)
116 | }
117 | 
118 | func (r *intervalRolling) interpolateWindow(interps []ColInterpolation, window *Window) (bow.Bow, error) {
119 | 	var firstColValue int64 = -1
120 | 	if window.Bow.NumRows() > 0 {
121 | 		firstColVal, i := window.Bow.GetNextFloat64(r.intervalColIndex, 0)
122 | 		if i > -1 {
123 | 			firstColValue = int64(firstColVal)
124 | 		}
125 | 	}
126 | 
127 | 	// has start: call interpolation anyway for those stateful
128 | 	if firstColValue == window.FirstValue {
129 | 		for _, interpolation := range interps {
130 | 			_, err := interpolation.fn(interpolation.colIndex, *window, r.bow, r.options.PrevRow)
131 | 			if err != nil {
132 | 				return nil, err
133 | 			}
134 | 		}
135 | 
136 | 		return window.Bow, nil
137 | 	}
138 | 
139 | 	// missing start
140 | 	series := make([]bow.Series, len(interps))
141 | 	for colIndex, interpolation := range interps {
142 | 		colType := window.Bow.ColumnType(interpolation.colIndex)
143 | 
144 | 		interpolatedValue, err := interpolation.fn(interpolation.colIndex, *window, r.bow, r.options.PrevRow)
145 | 		if err != nil {
146 | 			return nil, err
147 | 		}
148 | 
149 | 		buf := bow.NewBuffer(1, colType)
150 | 		buf.SetOrDrop(0, interpolatedValue)
151 | 
152 | 		series[colIndex] = bow.NewSeriesFromBuffer(window.Bow.ColumnName(interpolation.colIndex), buf)
153 | 	}
154 | 
155 | 	startBow, err := bow.NewBow(series...)
156 | 	if err != nil {
157 | 		return nil, err
158 | 	}
159 | 
160 | 	return bow.AppendBows(startBow, window.Bow)
161 | }
162 | 


--------------------------------------------------------------------------------
/rolling/interpolation/linear.go:
--------------------------------------------------------------------------------
 1 | package interpolation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func Linear(colName string) rolling.ColInterpolation {
 9 | 	var prevT0, prevV0 float64
10 | 	var prevValid bool
11 | 	return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64, bow.Float64},
12 | 		func(colIndexToFill int, w rolling.Window, fullBow, prevRow bow.Bow) (interface{}, error) {
13 | 			var prevValidT0, prevValidV0 bool
14 | 			if w.FirstIndex == 0 && prevRow != nil {
15 | 				prevT0, prevValidT0 = prevRow.GetFloat64(w.IntervalColIndex, prevRow.NumRows()-1)
16 | 				prevV0, prevValidV0 = prevRow.GetFloat64(colIndexToFill, prevRow.NumRows()-1)
17 | 				prevValid = prevValidT0 && prevValidV0
18 | 			}
19 | 
20 | 			t0, v0, prevIndex := fullBow.GetPrevFloat64s(w.IntervalColIndex, colIndexToFill, w.FirstIndex-1)
21 | 			if prevIndex == -1 {
22 | 				if !prevValid {
23 | 					return nil, nil
24 | 				}
25 | 				t0 = prevT0
26 | 				v0 = prevV0
27 | 			}
28 | 
29 | 			t2, v2, nextIndex := fullBow.GetNextFloat64s(w.IntervalColIndex, colIndexToFill, w.FirstIndex)
30 | 			if nextIndex == -1 {
31 | 				return nil, nil
32 | 			}
33 | 
34 | 			coef := (float64(w.FirstValue) - t0) / (t2 - t0)
35 | 			return ((v2 - v0) * coef) + v0, nil
36 | 		},
37 | 	)
38 | }
39 | 


--------------------------------------------------------------------------------
/rolling/interpolation/linear_test.go:
--------------------------------------------------------------------------------
  1 | package interpolation
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/metronlab/bow"
  8 | 	"github.com/metronlab/bow/rolling"
  9 | 	"github.com/stretchr/testify/assert"
 10 | 	"github.com/stretchr/testify/require"
 11 | )
 12 | 
 13 | func TestLinear(t *testing.T) {
 14 | 	var interval int64 = 2
 15 | 
 16 | 	ascLinearTestBow, err := bow.NewBowFromRowBasedInterfaces(
 17 | 		[]string{timeCol, valueCol},
 18 | 		[]bow.Type{bow.Int64, bow.Float64},
 19 | 		[][]interface{}{
 20 | 			{10, 10.},
 21 | 			{15, 15.},
 22 | 			{17, 17.},
 23 | 		})
 24 | 	require.NoError(t, err)
 25 | 
 26 | 	t.Run("asc no options", func(t *testing.T) {
 27 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
 28 | 			[]string{timeCol, valueCol},
 29 | 			[]bow.Type{bow.Int64, bow.Float64},
 30 | 			[][]interface{}{
 31 | 				{10, 10.},
 32 | 				{12, 12.},
 33 | 				{14, 14.},
 34 | 				{15, 15.},
 35 | 				{16, 16.},
 36 | 				{17, 17.},
 37 | 			})
 38 | 		require.NoError(t, err)
 39 | 
 40 | 		r, err := rolling.IntervalRolling(ascLinearTestBow, timeCol, interval, rolling.Options{})
 41 | 		require.NoError(t, err)
 42 | 
 43 | 		filled, err := r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow()
 44 | 		assert.NoError(t, err)
 45 | 		assert.True(t, filled.Equal(expected),
 46 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
 47 | 	})
 48 | 
 49 | 	t.Run("asc with offset", func(t *testing.T) {
 50 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
 51 | 			[]string{timeCol, valueCol},
 52 | 			[]bow.Type{bow.Int64, bow.Float64},
 53 | 			[][]interface{}{
 54 | 				{9, nil},
 55 | 				{10, 10.},
 56 | 				{11, 11.},
 57 | 				{13, 13.},
 58 | 				{15, 15.},
 59 | 				{17, 17.},
 60 | 			})
 61 | 		require.NoError(t, err)
 62 | 
 63 | 		r, err := rolling.IntervalRolling(ascLinearTestBow, timeCol, interval, rolling.Options{Offset: 3})
 64 | 		require.NoError(t, err)
 65 | 
 66 | 		filled, err := r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow()
 67 | 		assert.NoError(t, err)
 68 | 		assert.True(t, filled.Equal(expected),
 69 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
 70 | 	})
 71 | 
 72 | 	descLinearTestBow, err := bow.NewBowFromRowBasedInterfaces(
 73 | 		[]string{timeCol, valueCol},
 74 | 		[]bow.Type{bow.Int64, bow.Float64},
 75 | 		[][]interface{}{
 76 | 			{10, 30.},
 77 | 			{15, 25.},
 78 | 			{17, 24.},
 79 | 		})
 80 | 	require.NoError(t, err)
 81 | 
 82 | 	t.Run("desc no options", func(t *testing.T) {
 83 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
 84 | 			[]string{timeCol, valueCol},
 85 | 			[]bow.Type{bow.Int64, bow.Float64},
 86 | 			[][]interface{}{
 87 | 				{10, 30.},
 88 | 				{12, 28.},
 89 | 				{14, 26.},
 90 | 				{15, 25.},
 91 | 				{16, 24.5},
 92 | 				{17, 24.},
 93 | 			})
 94 | 		require.NoError(t, err)
 95 | 
 96 | 		r, err := rolling.IntervalRolling(descLinearTestBow, timeCol, interval, rolling.Options{})
 97 | 		require.NoError(t, err)
 98 | 
 99 | 		filled, err := r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow()
100 | 		assert.NoError(t, err)
101 | 		assert.True(t, filled.Equal(expected),
102 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
103 | 	})
104 | 
105 | 	t.Run("desc with offset", func(t *testing.T) {
106 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
107 | 			[]string{timeCol, valueCol},
108 | 			[]bow.Type{bow.Int64, bow.Float64},
109 | 			[][]interface{}{
110 | 				{9, nil},
111 | 				{10, 30.},
112 | 				{11, 29.},
113 | 				{13, 27.},
114 | 				{15, 25.},
115 | 				{17, 24.},
116 | 			})
117 | 		require.NoError(t, err)
118 | 
119 | 		r, err := rolling.IntervalRolling(descLinearTestBow, timeCol, interval, rolling.Options{Offset: 3})
120 | 		require.NoError(t, err)
121 | 
122 | 		filled, err := r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow()
123 | 		assert.NoError(t, err)
124 | 		assert.True(t, filled.Equal(expected),
125 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
126 | 	})
127 | 
128 | 	t.Run("string error", func(t *testing.T) {
129 | 		b, err := bow.NewBowFromRowBasedInterfaces(
130 | 			[]string{timeCol, valueCol},
131 | 			[]bow.Type{bow.Int64, bow.String},
132 | 			[][]interface{}{
133 | 				{10, "test"},
134 | 				{15, "test2"},
135 | 			})
136 | 		require.NoError(t, err)
137 | 
138 | 		r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{})
139 | 		require.NoError(t, err)
140 | 
141 | 		_, err = r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow()
142 | 		assert.EqualError(t, err,
143 | 			"intervalRolling.validateInterpolation: accepts types [int64 float64], got type utf8")
144 | 	})
145 | 
146 | 	t.Run("bool error", func(t *testing.T) {
147 | 		b, err := bow.NewBowFromRowBasedInterfaces(
148 | 			[]string{timeCol, valueCol},
149 | 			[]bow.Type{bow.Int64, bow.Boolean},
150 | 			[][]interface{}{
151 | 				{10, true},
152 | 				{15, false},
153 | 			})
154 | 		require.NoError(t, err)
155 | 
156 | 		r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{})
157 | 		require.NoError(t, err)
158 | 
159 | 		res, err := r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow()
160 | 		assert.EqualError(t, err,
161 | 			"intervalRolling.validateInterpolation: accepts types [int64 float64], got type bool",
162 | 			"have res: %v", res)
163 | 	})
164 | }
165 | 


--------------------------------------------------------------------------------
/rolling/interpolation/none.go:
--------------------------------------------------------------------------------
 1 | package interpolation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func None(colName string) rolling.ColInterpolation {
 9 | 	return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64, bow.Float64, bow.Boolean},
10 | 		func(colIndexToFill int, w rolling.Window, fullBow, prevRow bow.Bow) (interface{}, error) {
11 | 			return nil, nil
12 | 		},
13 | 	)
14 | }
15 | 


--------------------------------------------------------------------------------
/rolling/interpolation/none_test.go:
--------------------------------------------------------------------------------
 1 | package interpolation
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/metronlab/bow"
 8 | 	"github.com/metronlab/bow/rolling"
 9 | 	"github.com/stretchr/testify/assert"
10 | 	"github.com/stretchr/testify/require"
11 | )
12 | 
13 | func TestNone(t *testing.T) {
14 | 	var interval int64 = 2
15 | 
16 | 	b, err := bow.NewBowFromRowBasedInterfaces(
17 | 		[]string{timeCol, valueCol},
18 | 		[]bow.Type{bow.Int64, bow.Float64},
19 | 		[][]interface{}{
20 | 			{10, 1.0},
21 | 			{13, 1.3},
22 | 		})
23 | 	require.NoError(t, err)
24 | 
25 | 	t.Run("no options", func(t *testing.T) {
26 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
27 | 			[]string{timeCol, valueCol},
28 | 			[]bow.Type{bow.Int64, bow.Float64},
29 | 			[][]interface{}{
30 | 				{10, 1.0},
31 | 				{12, nil},
32 | 				{13, 1.3},
33 | 			})
34 | 		require.NoError(t, err)
35 | 
36 | 		r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{})
37 | 		require.NoError(t, err)
38 | 
39 | 		filled, err := r.Interpolate(WindowStart(timeCol), None(valueCol)).Bow()
40 | 		assert.NoError(t, err)
41 | 		assert.True(t, filled.Equal(expected),
42 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
43 | 	})
44 | 
45 | 	t.Run("with offset", func(t *testing.T) {
46 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
47 | 			[]string{timeCol, valueCol},
48 | 			[]bow.Type{bow.Int64, bow.Float64},
49 | 			[][]interface{}{
50 | 				{9, nil},
51 | 				{10, 1.0},
52 | 				{11, nil},
53 | 				{13, 1.3},
54 | 			})
55 | 		require.NoError(t, err)
56 | 
57 | 		r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{Offset: 1})
58 | 		require.NoError(t, err)
59 | 
60 | 		filled, err := r.Interpolate(WindowStart(timeCol), None(valueCol)).Bow()
61 | 		assert.NoError(t, err)
62 | 		assert.True(t, filled.Equal(expected),
63 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
64 | 	})
65 | }
66 | 


--------------------------------------------------------------------------------
/rolling/interpolation/stepprevious.go:
--------------------------------------------------------------------------------
 1 | package interpolation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func StepPrevious(colName string) rolling.ColInterpolation {
 9 | 	var prevVal interface{}
10 | 	return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64, bow.Float64, bow.Boolean, bow.String},
11 | 		func(colIndexToFill int, w rolling.Window, fullBow, prevRow bow.Bow) (interface{}, error) {
12 | 			// For the first window, add the previous row to interpolate correctly
13 | 			if w.FirstIndex == 0 && prevRow != nil {
14 | 				prevVal = prevRow.GetValue(colIndexToFill, prevRow.NumRows()-1)
15 | 			}
16 | 
17 | 			var v interface{}
18 | 			_, v, _ = fullBow.GetPrevValues(w.IntervalColIndex, colIndexToFill, w.FirstIndex-1)
19 | 			if v != nil {
20 | 				prevVal = v
21 | 			}
22 | 
23 | 			return prevVal, nil
24 | 		},
25 | 	)
26 | }
27 | 


--------------------------------------------------------------------------------
/rolling/interpolation/stepprevious_test.go:
--------------------------------------------------------------------------------
  1 | package interpolation
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/metronlab/bow"
  8 | 	"github.com/metronlab/bow/rolling"
  9 | 	"github.com/stretchr/testify/assert"
 10 | 	"github.com/stretchr/testify/require"
 11 | )
 12 | 
 13 | const (
 14 | 	timeCol  = "time"
 15 | 	valueCol = "value"
 16 | )
 17 | 
 18 | func TestStepPrevious(t *testing.T) {
 19 | 	t.Run("no options", func(t *testing.T) {
 20 | 		b, err := bow.NewBowFromRowBasedInterfaces(
 21 | 			[]string{timeCol, valueCol},
 22 | 			[]bow.Type{bow.Int64, bow.Float64},
 23 | 			[][]interface{}{
 24 | 				{10, 1.0},
 25 | 				{13, 1.3},
 26 | 			})
 27 | 		require.NoError(t, err)
 28 | 
 29 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
 30 | 			[]string{timeCol, valueCol},
 31 | 			[]bow.Type{bow.Int64, bow.Float64},
 32 | 			[][]interface{}{
 33 | 				{10, 1.0},
 34 | 				{12, 1.0},
 35 | 				{13, 1.3},
 36 | 			})
 37 | 		require.NoError(t, err)
 38 | 
 39 | 		r, err := rolling.IntervalRolling(b, timeCol, 2, rolling.Options{})
 40 | 		require.NoError(t, err)
 41 | 
 42 | 		filled, err := r.Interpolate(WindowStart(timeCol), StepPrevious(valueCol)).Bow()
 43 | 		assert.NoError(t, err)
 44 | 		assert.True(t, filled.Equal(expected),
 45 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
 46 | 	})
 47 | 
 48 | 	t.Run("bool", func(t *testing.T) {
 49 | 		b, err := bow.NewBowFromRowBasedInterfaces(
 50 | 			[]string{timeCol, valueCol},
 51 | 			[]bow.Type{bow.Int64, bow.Boolean},
 52 | 			[][]interface{}{
 53 | 				{10, true},
 54 | 				{13, false},
 55 | 			})
 56 | 		require.NoError(t, err)
 57 | 
 58 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
 59 | 			[]string{timeCol, valueCol},
 60 | 			[]bow.Type{bow.Int64, bow.Boolean},
 61 | 			[][]interface{}{
 62 | 				{10, true},
 63 | 				{12, true},
 64 | 				{13, false},
 65 | 			})
 66 | 		require.NoError(t, err)
 67 | 
 68 | 		r, err := rolling.IntervalRolling(b, timeCol, 2, rolling.Options{})
 69 | 		require.NoError(t, err)
 70 | 
 71 | 		filled, err := r.Interpolate(WindowStart(timeCol), StepPrevious(valueCol)).Bow()
 72 | 		assert.NoError(t, err)
 73 | 		assert.True(t, filled.Equal(expected),
 74 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
 75 | 	})
 76 | 
 77 | 	t.Run("string", func(t *testing.T) {
 78 | 		b, err := bow.NewBowFromRowBasedInterfaces(
 79 | 			[]string{timeCol, valueCol},
 80 | 			[]bow.Type{bow.Int64, bow.String},
 81 | 			[][]interface{}{
 82 | 				{10, "test"},
 83 | 				{13, "test2"},
 84 | 			})
 85 | 		require.NoError(t, err)
 86 | 
 87 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
 88 | 			[]string{timeCol, valueCol},
 89 | 			[]bow.Type{bow.Int64, bow.String},
 90 | 			[][]interface{}{
 91 | 				{10, "test"},
 92 | 				{12, "test"},
 93 | 				{13, "test2"},
 94 | 			})
 95 | 		require.NoError(t, err)
 96 | 
 97 | 		r, err := rolling.IntervalRolling(b, timeCol, 2, rolling.Options{})
 98 | 		require.NoError(t, err)
 99 | 
100 | 		filled, err := r.Interpolate(WindowStart(timeCol), StepPrevious(valueCol)).Bow()
101 | 		assert.NoError(t, err)
102 | 		assert.True(t, filled.Equal(expected),
103 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
104 | 	})
105 | 
106 | 	t.Run("with offset", func(t *testing.T) {
107 | 		b, err := bow.NewBowFromRowBasedInterfaces(
108 | 			[]string{timeCol, valueCol},
109 | 			[]bow.Type{bow.Int64, bow.Float64},
110 | 			[][]interface{}{
111 | 				{10, 1.0},
112 | 				{13, 1.3},
113 | 			})
114 | 		require.NoError(t, err)
115 | 
116 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
117 | 			[]string{timeCol, valueCol},
118 | 			[]bow.Type{bow.Int64, bow.Float64},
119 | 			[][]interface{}{
120 | 				{9, nil},
121 | 				{10, 1.0},
122 | 				{11, 1.0},
123 | 				{13, 1.3},
124 | 			})
125 | 		require.NoError(t, err)
126 | 
127 | 		r, err := rolling.IntervalRolling(b, timeCol, 2, rolling.Options{Offset: 1})
128 | 		require.NoError(t, err)
129 | 
130 | 		filled, err := r.Interpolate(WindowStart(timeCol), StepPrevious(valueCol)).Bow()
131 | 		assert.NoError(t, err)
132 | 		assert.True(t, filled.Equal(expected),
133 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
134 | 	})
135 | 
136 | 	t.Run("with nils", func(t *testing.T) {
137 | 		b, err := bow.NewBowFromRowBasedInterfaces(
138 | 			[]string{timeCol, valueCol},
139 | 			[]bow.Type{bow.Int64, bow.Float64},
140 | 			[][]interface{}{
141 | 				{10, 1.0},
142 | 				{11, nil},
143 | 				{13, nil},
144 | 				{15, 1.5},
145 | 			})
146 | 		require.NoError(t, err)
147 | 
148 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
149 | 			[]string{timeCol, valueCol},
150 | 			[]bow.Type{bow.Int64, bow.Float64},
151 | 			[][]interface{}{
152 | 				{10, 1.0},
153 | 				{11, nil},
154 | 				{12, 1.0},
155 | 				{13, nil},
156 | 				{14, 1.0},
157 | 				{15, 1.5},
158 | 			})
159 | 		require.NoError(t, err)
160 | 
161 | 		r, err := rolling.IntervalRolling(b, timeCol, 2, rolling.Options{})
162 | 		require.NoError(t, err)
163 | 
164 | 		filled, err := r.Interpolate(WindowStart(timeCol), StepPrevious(valueCol)).Bow()
165 | 		assert.NoError(t, err)
166 | 		assert.True(t, filled.Equal(expected),
167 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
168 | 	})
169 | }
170 | 


--------------------------------------------------------------------------------
/rolling/interpolation/windowstart.go:
--------------------------------------------------------------------------------
 1 | package interpolation
 2 | 
 3 | import (
 4 | 	"github.com/metronlab/bow"
 5 | 	"github.com/metronlab/bow/rolling"
 6 | )
 7 | 
 8 | func WindowStart(colName string) rolling.ColInterpolation {
 9 | 	return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64},
10 | 		func(colIndexToFill int, w rolling.Window, fullBow, prevRow bow.Bow) (interface{}, error) {
11 | 			return w.FirstValue, nil
12 | 		},
13 | 	)
14 | }
15 | 


--------------------------------------------------------------------------------
/rolling/interpolation/windowstart_test.go:
--------------------------------------------------------------------------------
 1 | package interpolation
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/metronlab/bow"
 8 | 	"github.com/metronlab/bow/rolling"
 9 | 	"github.com/stretchr/testify/assert"
10 | 	"github.com/stretchr/testify/require"
11 | )
12 | 
13 | func TestWindowStart(t *testing.T) {
14 | 	var interval int64 = 2
15 | 
16 | 	b, err := bow.NewBowFromRowBasedInterfaces(
17 | 		[]string{timeCol},
18 | 		[]bow.Type{bow.Int64},
19 | 		[][]interface{}{
20 | 			{10},
21 | 			{13},
22 | 		})
23 | 	require.NoError(t, err)
24 | 
25 | 	t.Run("no options", func(t *testing.T) {
26 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
27 | 			[]string{timeCol},
28 | 			[]bow.Type{bow.Int64},
29 | 			[][]interface{}{
30 | 				{10},
31 | 				{12},
32 | 				{13},
33 | 			})
34 | 		require.NoError(t, err)
35 | 
36 | 		r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{})
37 | 		require.NoError(t, err)
38 | 
39 | 		filled, err := r.Interpolate(WindowStart(timeCol)).Bow()
40 | 		assert.NoError(t, err)
41 | 		assert.True(t, filled.Equal(expected),
42 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
43 | 	})
44 | 
45 | 	t.Run("with offset", func(t *testing.T) {
46 | 		expected, err := bow.NewBowFromRowBasedInterfaces(
47 | 			[]string{timeCol},
48 | 			[]bow.Type{bow.Int64},
49 | 			[][]interface{}{
50 | 				{9},
51 | 				{10},
52 | 				{11},
53 | 				{13},
54 | 			})
55 | 		require.NoError(t, err)
56 | 
57 | 		r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{Offset: 1.})
58 | 		require.NoError(t, err)
59 | 
60 | 		filled, err := r.Interpolate(WindowStart(timeCol)).Bow()
61 | 		assert.NoError(t, err)
62 | 		assert.True(t, filled.Equal(expected),
63 | 			fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String()))
64 | 	})
65 | }
66 | 


--------------------------------------------------------------------------------
/rolling/interpolation_test.go:
--------------------------------------------------------------------------------
  1 | package rolling
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/metronlab/bow"
  8 | 	"github.com/stretchr/testify/assert"
  9 | )
 10 | 
 11 | func TestIntervalRollingIter_Interpolate(t *testing.T) {
 12 | 	timeInterp := NewColInterpolation(timeCol, []bow.Type{bow.Int64},
 13 | 		func(colIndex int, w Window, full, prevRow bow.Bow) (interface{}, error) {
 14 | 			return w.FirstValue, nil
 15 | 		})
 16 | 	valueInterp := NewColInterpolation(valueCol, []bow.Type{bow.Int64, bow.Float64},
 17 | 		func(colIndex int, w Window, full, prevRow bow.Bow) (interface{}, error) {
 18 | 			return 9.9, nil
 19 | 		})
 20 | 
 21 | 	t.Run("invalid input type", func(t *testing.T) {
 22 | 		b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{
 23 | 			{10, 13},
 24 | 			{1.0, 1.3},
 25 | 		})
 26 | 		r, _ := IntervalRolling(b, timeCol, 2, Options{})
 27 | 		interp := NewColInterpolation(valueCol, []bow.Type{bow.Int64, bow.Boolean},
 28 | 			func(colIndex int, w Window, full, prevRow bow.Bow) (interface{}, error) {
 29 | 				return true, nil
 30 | 			})
 31 | 		_, err := r.
 32 | 			Interpolate(timeInterp, interp).
 33 | 			Bow()
 34 | 		assert.EqualError(t, err, "intervalRolling.validateInterpolation: accepts types [int64 bool], got type float64")
 35 | 	})
 36 | 
 37 | 	t.Run("missing interval column", func(t *testing.T) {
 38 | 		b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{
 39 | 			{10, 13},
 40 | 			{1.0, 1.3},
 41 | 		})
 42 | 		r, _ := IntervalRolling(b, timeCol, 2, Options{})
 43 | 		_, err := r.
 44 | 			Interpolate(valueInterp).
 45 | 			Bow()
 46 | 		assert.EqualError(t, err, fmt.Sprintf("must keep interval column '%s'", timeCol))
 47 | 	})
 48 | 
 49 | 	t.Run("empty bow", func(t *testing.T) {
 50 | 		b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{
 51 | 			{},
 52 | 			{},
 53 | 		})
 54 | 		r, _ := IntervalRolling(b, timeCol, 2, Options{})
 55 | 
 56 | 		filled, err := r.
 57 | 			Interpolate(timeInterp, valueInterp).
 58 | 			Bow()
 59 | 		assert.Nil(t, err)
 60 | 
 61 | 		assert.True(t, filled.Equal(b), fmt.Sprintf("expected %v\nactual  %v", b, filled))
 62 | 	})
 63 | 
 64 | 	t.Run("no options", func(t *testing.T) {
 65 | 		b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{
 66 | 			{10, 13},
 67 | 			{1.0, 1.3},
 68 | 		})
 69 | 		r, _ := IntervalRolling(b, timeCol, 2, Options{})
 70 | 
 71 | 		filled, err := r.
 72 | 			Interpolate(timeInterp, valueInterp).
 73 | 			Bow()
 74 | 		assert.Nil(t, err)
 75 | 
 76 | 		expected, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{
 77 | 			{10, 12, 13},
 78 | 			{1.0, 9.9, 1.3},
 79 | 		})
 80 | 		assert.True(t, filled.Equal(expected), fmt.Sprintf("expected %v\nactual  %v", expected, filled))
 81 | 	})
 82 | 
 83 | 	t.Run("with offset", func(t *testing.T) {
 84 | 		b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{
 85 | 			{10, 13},
 86 | 			{1.0, 1.3},
 87 | 		})
 88 | 		r, _ := IntervalRolling(b, timeCol, 2, Options{Offset: 1})
 89 | 
 90 | 		filled, err := r.
 91 | 			Interpolate(timeInterp, valueInterp).
 92 | 			Bow()
 93 | 		assert.Nil(t, err)
 94 | 
 95 | 		expected, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{
 96 | 			{9, 10, 11, 13},
 97 | 			{9.9, 1.0, 9.9, 1.3},
 98 | 		})
 99 | 		assert.True(t, filled.Equal(expected), fmt.Sprintf("expected %v\nactual  %v", expected, filled))
100 | 	})
101 | }
102 | 


--------------------------------------------------------------------------------
/rolling/rolling.go:
--------------------------------------------------------------------------------
  1 | package rolling
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 
  7 | 	"github.com/metronlab/bow"
  8 | )
  9 | 
 10 | // Rolling enables processing a Bow via windows.
 11 | // Use Interpolate() and/or Aggregate() to transform windows.
 12 | // Use Next() to iterate over windows.
 13 | // Use Bow() to get the processed Bow.
 14 | type Rolling interface {
 15 | 	// Aggregate aggregates each column by using a ColAggregation.
 16 | 	Aggregate(...ColAggregation) Rolling
 17 | 	// Interpolate fills each window by interpolating its start if missing.
 18 | 	Interpolate(...ColInterpolation) Rolling
 19 | 
 20 | 	// NumWindows returns the total number of windows in the Bow.
 21 | 	NumWindows() (int, error)
 22 | 	// HasNext returns true if the next call to Next() will return a new Window.
 23 | 	HasNext() bool
 24 | 	// Next returns the next Window, along with its index.
 25 | 	Next() (windowIndex int, window *Window, err error)
 26 | 
 27 | 	// Bow returns the Bow from the Rolling.
 28 | 	Bow() (bow.Bow, error)
 29 | }
 30 | 
 31 | type intervalRolling struct {
 32 | 	// TODO: sync.Mutex
 33 | 	bow              bow.Bow
 34 | 	intervalColIndex int
 35 | 	interval         int64
 36 | 	options          Options
 37 | 	numWindows       int
 38 | 
 39 | 	currWindowFirstValue int64
 40 | 	currRowIndex         int
 41 | 	currWindowIndex      int
 42 | 	err                  error
 43 | }
 44 | 
 45 | // Options sets options for IntervalRolling:
 46 | // - Offset: interval to move the window start, can be negative.
 47 | // - Inclusive: sets if the window needs to be inclusive; i.e., includes the last point.
 48 | // - PrevRow: extra point before the window to enable better interpolation.
 49 | type Options struct {
 50 | 	Offset    int64
 51 | 	Inclusive bool
 52 | 	PrevRow   bow.Bow
 53 | }
 54 | 
 55 | // IntervalRolling returns a new interval-based Rolling with:
 56 | // - b: Bow to process in windows
 57 | // - colName: column on which the interval is based on
 58 | // - interval: numeric value independent of any unit, length of the windows
 59 | // All windows except the last one may be empty.
 60 | func IntervalRolling(b bow.Bow, colName string, interval int64, options Options) (Rolling, error) {
 61 | 	colIndex, err := b.ColumnIndex(colName)
 62 | 	if err != nil {
 63 | 		return nil, err
 64 | 	}
 65 | 
 66 | 	return newIntervalRolling(b, colIndex, interval, options)
 67 | }
 68 | 
 69 | func newIntervalRolling(b bow.Bow, intervalColIndex int, interval int64, options Options) (Rolling, error) {
 70 | 	if b.ColumnType(intervalColIndex) != bow.Int64 {
 71 | 		return nil, fmt.Errorf("impossible to create a new intervalRolling on column of type %v",
 72 | 			b.ColumnType(intervalColIndex))
 73 | 	}
 74 | 
 75 | 	var err error
 76 | 	options.Offset, err = enforceIntervalAndOffset(interval, options.Offset)
 77 | 	if err != nil {
 78 | 		return nil, fmt.Errorf("enforceIntervalAndOffset: %w", err)
 79 | 	}
 80 | 
 81 | 	options.PrevRow, err = enforcePrevRow(options.PrevRow)
 82 | 	if err != nil {
 83 | 		return nil, fmt.Errorf("enforcePrevRow: %w", err)
 84 | 	}
 85 | 
 86 | 	var windowFirstValue int64
 87 | 	if b.NumRows() > 0 {
 88 | 		firstBowValue, valid := b.GetInt64(intervalColIndex, 0)
 89 | 		if !valid {
 90 | 			return nil, fmt.Errorf(
 91 | 				"the first value of the column should be convertible to int64, got %v",
 92 | 				b.GetValue(intervalColIndex, 0))
 93 | 		}
 94 | 
 95 | 		// align window first value on interval
 96 | 		windowFirstValue = (firstBowValue/interval)*interval + options.Offset
 97 | 		if windowFirstValue > firstBowValue {
 98 | 			windowFirstValue -= interval
 99 | 		}
100 | 	}
101 | 
102 | 	numWindows := countWindows(b, intervalColIndex, windowFirstValue, interval)
103 | 
104 | 	return &intervalRolling{
105 | 		bow:                  b,
106 | 		intervalColIndex:     intervalColIndex,
107 | 		interval:             interval,
108 | 		options:              options,
109 | 		numWindows:           numWindows,
110 | 		currWindowFirstValue: windowFirstValue,
111 | 	}, nil
112 | }
113 | 
114 | func enforceIntervalAndOffset(interval, offset int64) (int64, error) {
115 | 	if interval <= 0 {
116 | 		return -1, errors.New("strictly positive interval required")
117 | 	}
118 | 
119 | 	if offset >= interval || offset <= -interval {
120 | 		offset = offset % interval
121 | 	}
122 | 
123 | 	if offset < 0 {
124 | 		offset += interval
125 | 	}
126 | 
127 | 	return offset, nil
128 | }
129 | 
130 | func enforcePrevRow(prevRow bow.Bow) (bow.Bow, error) {
131 | 	if prevRow == nil || prevRow.NumRows() == 0 {
132 | 		return nil, nil
133 | 	}
134 | 
135 | 	if prevRow.NumRows() != 1 {
136 | 		return nil, fmt.Errorf("prevRow must have only one row, have %d",
137 | 			prevRow.NumRows())
138 | 	}
139 | 
140 | 	return prevRow, nil
141 | }
142 | 
143 | func countWindows(b bow.Bow, colIndex int, firstWindowStart, interval int64) int {
144 | 	if b.NumRows() == 0 {
145 | 		return 0
146 | 	}
147 | 
148 | 	lastBowValue, lastBowValueRowIndex := b.GetPrevInt64(colIndex, b.NumRows()-1)
149 | 	if lastBowValueRowIndex == -1 || firstWindowStart > lastBowValue {
150 | 		return 0
151 | 	}
152 | 
153 | 	return int((lastBowValue-firstWindowStart)/interval + 1)
154 | }
155 | 
156 | func (r *intervalRolling) NumWindows() (int, error) {
157 | 	return r.numWindows, r.err
158 | }
159 | 
160 | // TODO: concurrent-safe
161 | 
162 | func (r *intervalRolling) HasNext() bool {
163 | 	if r.currRowIndex >= r.bow.NumRows() {
164 | 		return false
165 | 	}
166 | 
167 | 	lastBowValue, lastBowValueIsValid := r.bow.GetInt64(r.intervalColIndex, r.bow.NumRows()-1)
168 | 	if !lastBowValueIsValid {
169 | 		return false
170 | 	}
171 | 
172 | 	return r.currWindowFirstValue <= lastBowValue
173 | }
174 | 
175 | // TODO: concurrent-safe
176 | 
177 | func (r *intervalRolling) Next() (windowIndex int, window *Window, err error) {
178 | 	if !r.HasNext() {
179 | 		return r.currWindowIndex, nil, nil
180 | 	}
181 | 
182 | 	firstValue := r.currWindowFirstValue
183 | 	lastValue := r.currWindowFirstValue + r.interval // include last position even if last point is excluded
184 | 
185 | 	rowIndex := 0
186 | 	isInclusive := false
187 | 	firstRowIndex := r.currRowIndex
188 | 	lastRowIndex := -1
189 | 	for rowIndex = firstRowIndex; rowIndex < r.bow.NumRows(); rowIndex++ {
190 | 		val, ok := r.bow.GetInt64(r.intervalColIndex, rowIndex)
191 | 		if !ok {
192 | 			continue
193 | 		}
194 | 		if val < firstValue {
195 | 			continue
196 | 		}
197 | 		if val > lastValue {
198 | 			break
199 | 		}
200 | 
201 | 		if val == lastValue {
202 | 			if isInclusive {
203 | 				break
204 | 			}
205 | 			if !r.options.Inclusive {
206 | 				break
207 | 			}
208 | 			isInclusive = true
209 | 		}
210 | 
211 | 		lastRowIndex = rowIndex
212 | 	}
213 | 
214 | 	if !isInclusive {
215 | 		r.currRowIndex = rowIndex
216 | 	} else {
217 | 		r.currRowIndex = rowIndex - 1
218 | 	}
219 | 
220 | 	r.currWindowFirstValue = lastValue
221 | 	windowIndex = r.currWindowIndex
222 | 	r.currWindowIndex++
223 | 
224 | 	var b bow.Bow
225 | 	if lastRowIndex == -1 {
226 | 		b = r.bow.NewEmptySlice()
227 | 	} else {
228 | 		b = r.bow.NewSlice(firstRowIndex, lastRowIndex+1)
229 | 	}
230 | 
231 | 	return windowIndex, &Window{
232 | 		Bow:              b,
233 | 		FirstIndex:       firstRowIndex,
234 | 		IntervalColIndex: r.intervalColIndex,
235 | 		FirstValue:       firstValue,
236 | 		LastValue:        lastValue,
237 | 		IsInclusive:      isInclusive,
238 | 	}, nil
239 | }
240 | 
241 | func (r *intervalRolling) Bow() (bow.Bow, error) {
242 | 	return r.bow, r.err
243 | }
244 | 
245 | func (r *intervalRolling) setError(err error) Rolling {
246 | 	r.err = err
247 | 	return r
248 | }
249 | 


--------------------------------------------------------------------------------
/rolling/transformation/factor.go:
--------------------------------------------------------------------------------
 1 | package transformation
 2 | 
 3 | import "fmt"
 4 | 
 5 | type Func func(interface{}) (interface{}, error)
 6 | 
 7 | func Factor(n float64) Func {
 8 | 	return func(x interface{}) (interface{}, error) {
 9 | 		switch x := x.(type) {
10 | 		case float64:
11 | 			return x * n, nil
12 | 		case int64:
13 | 			return int64(float64(x) * n), nil
14 | 		case nil:
15 | 			return x, nil
16 | 		default:
17 | 			return nil, fmt.Errorf("factor: invalid type %T", x)
18 | 		}
19 | 	}
20 | }
21 | 


--------------------------------------------------------------------------------
/rolling/transformation/factor_test.go:
--------------------------------------------------------------------------------
 1 | package transformation
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | )
 8 | 
 9 | func TestFactor(t *testing.T) {
10 | 	transform := Factor(0.1)
11 | 
12 | 	t.Run("invalid input", func(t *testing.T) {
13 | 		res, err := transform("11")
14 | 		assert.EqualError(t, err, "factor: invalid type string")
15 | 		assert.Nil(t, res)
16 | 	})
17 | 
18 | 	t.Run("preserve nil", func(t *testing.T) {
19 | 		res, err := transform(nil)
20 | 		assert.Nil(t, err)
21 | 		assert.Nil(t, res)
22 | 	})
23 | 
24 | 	t.Run("preserve int64", func(t *testing.T) {
25 | 		res, err := transform(int64(11))
26 | 		assert.Nil(t, err)
27 | 		assert.Equal(t, int64(1), res)
28 | 	})
29 | 
30 | 	t.Run("preserve float64", func(t *testing.T) {
31 | 		res, err := transform(11.)
32 | 		assert.Nil(t, err)
33 | 		assert.Equal(t, 1.1, res)
34 | 	})
35 | }
36 | 


--------------------------------------------------------------------------------
/rolling/window.go:
--------------------------------------------------------------------------------
 1 | package rolling
 2 | 
 3 | import "github.com/metronlab/bow"
 4 | 
 5 | // Window represents an interval-based window of data with:
 6 | // Bow: data
 7 | // FirstIndex: index (across all windows) of first row in this window (-1 if none)
 8 | // IntervalColIndex: index of the interval column
 9 | // FirstValue: Window first value
10 | // LastValue: Window last value
11 | // IsInclusive: Window is inclusive, i.e. includes the last point at the end of the interval
12 | type Window struct {
13 | 	Bow              bow.Bow
14 | 	FirstIndex       int
15 | 	IntervalColIndex int
16 | 	FirstValue       int64
17 | 	LastValue        int64
18 | 	IsInclusive      bool
19 | }
20 | 
21 | // UnsetInclusive returns a copy of the Window with the IsInclusive parameter set to false and with the last row sliced off.
22 | // Returns the unchanged Window if the IsInclusive parameter is not set.
23 | func (w Window) UnsetInclusive() Window {
24 | 	if !w.IsInclusive {
25 | 		return w
26 | 	}
27 | 	wCopy := w
28 | 	wCopy.IsInclusive = false
29 | 	wCopy.Bow = wCopy.Bow.NewSlice(0, wCopy.Bow.NumRows()-1)
30 | 	return wCopy
31 | }
32 | 


--------------------------------------------------------------------------------
/scripts/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # The -o pipefail option is important for the trap to be executed if the "go test" command fails
 4 | set -o pipefail
 5 | 
 6 | TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S)
 7 | 
 8 | : "${PKG:="./..."}"
 9 | : "${TIMEOUT:="1h"}"
10 | : "${RUN:=".*"}"
11 | : "${BENCH_RESULTS_DIR_PATH:="/tmp/benchmarks"}"
12 | : "${BENCH_RESULTS_FILE_PATH:="/tmp/benchmarks/${TIMESTAMP}.txt"}"
13 | 
14 | mkdir -p "$BENCH_RESULTS_DIR_PATH"
15 | 
16 | printf "Run benchmarks into file %s\n" "$BENCH_RESULTS_FILE_PATH"
17 | go test $PKG -run XXX -bench="$RUN" -benchmem -timeout "$TIMEOUT" | tee "$BENCH_RESULTS_FILE_PATH"
18 | 
19 | printf "Run benchstat on file %s\n" "$BENCH_RESULTS_FILE_PATH"
20 | benchstat "$BENCH_RESULTS_FILE_PATH"


--------------------------------------------------------------------------------
/scripts/benchstat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | OLD_BENCH_FILE_PATH=$1
 4 | NEW_BENCH_FILE_PATH=$2
 5 | 
 6 | : "${BENCH_RESULTS_DIR_PATH:="/tmp/benchmarks"}"
 7 | : "${BENCH_COMPARISON_FILE_PATH:="$BENCH_RESULTS_DIR_PATH/benchstat.$(date +%Y-%m-%d_%H-%M-%S).txt"}"
 8 | 
 9 | echo
10 | printf "Running benchstat to compare %s and %s in %s\n" "$OLD_BENCH_FILE_PATH" "$NEW_BENCH_FILE_PATH" "$BENCH_COMPARISON_FILE_PATH"
11 | 
12 | if [ ! -f "$OLD_BENCH_FILE_PATH" ]
13 | then
14 |     printf "%s does not exist\n" "$OLD_BENCH_FILE_PATH"
15 |     exit 0
16 | fi
17 | 
18 | if [ ! -f "$NEW_BENCH_FILE_PATH" ]
19 | then
20 |     printf "%s does not exist\n" "$NEW_BENCH_FILE_PATH"
21 |     exit 0
22 | fi
23 | 
24 | mkdir -p "$BENCH_RESULTS_DIR_PATH"
25 | 
26 | benchstat -delta-test none "$OLD_BENCH_FILE_PATH" "$NEW_BENCH_FILE_PATH" | tee "$BENCH_COMPARISON_FILE_PATH"


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # The -o pipefail option is important for the trap to be executed if the "go test" command fails
 4 | set -o pipefail
 5 | 
 6 | : ${TEST_RESULTS:=/tmp/test-results}
 7 | : ${COVER_RESULTS:=/tmp/cover-results}
 8 | : ${PKG:=./...}
 9 | : ${RUN:=".*"}
10 | : ${TIMEOUT:="5m"}
11 | 
12 | mkdir -p ${COVER_RESULTS}
13 | mkdir -p ${TEST_RESULTS}
14 | 
15 | trap "go-junit-report <${TEST_RESULTS}/go-test.out > ${TEST_RESULTS}/go-test-report.xml" EXIT
16 | go test ${PKG} -v -race -cover -covermode=atomic -coverprofile=${COVER_RESULTS}/coverage.cover -timeout ${TIMEOUT} -run ${RUN} \
17 |     | tee ${TEST_RESULTS}/go-test.out \
18 |     | sed ''/PASS/s//$(printf "\033[32mPASS\033[0m")/'' \
19 |     | sed ''/FAIL/s//$(printf "\033[31mFAIL\033[0m")/'' \
20 |     | sed ''/RUN/s//$(printf "\033[34mRUN\033[0m")/''
21 | 
22 | go tool cover -html=${COVER_RESULTS}/coverage.cover -o ${COVER_RESULTS}/coverage.html
23 | 
24 | echo "To open the html coverage file use one of the following commands:"
25 | echo "open file://$COVER_RESULTS/coverage.html on mac"
26 | echo "xdg-open file://$COVER_RESULTS/coverage.html on linux"


--------------------------------------------------------------------------------