├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ └── golangci-lint.yml ├── .gitignore ├── .golangci.yml ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── XXXexamples_test.go ├── arrowtests ├── arrow.go └── arrow_test.go ├── benchmarks ├── bow1-10-rows.parquet ├── bow1-100-rows.parquet ├── bow1-1000-rows.parquet ├── bow1-10000-rows.parquet ├── bow1-100000-rows.parquet ├── bow2-10-rows.parquet ├── bow2-100-rows.parquet ├── bow2-1000-rows.parquet ├── bow2-10000-rows.parquet ├── bow2-100000-rows.parquet └── generator_test.go ├── bow.go ├── bow_test.go ├── bowappend.go ├── bowappend_test.go ├── bowassertion.go ├── bowassertion_test.go ├── bowbuffer.go ├── bowbuffer_test.go ├── bowconvert.go ├── bowconvert_test.go ├── bowdiff.go ├── bowdiff_test.go ├── bowfill.go ├── bowfill_test.go ├── bowfind.go ├── bowfind_test.go ├── bowgenerator.go ├── bowgenerator_test.go ├── bowgetters.go ├── bowgetters_test.go ├── bowjoin.go ├── bowjoin_test.go ├── bowjson.go ├── bowjson_test.go ├── bowmetadata.go ├── bowmetadata_test.go ├── bowparquet.go ├── bowparquet_test.go ├── bowparquet_test_input.parquet ├── bowrecord.go ├── bowseries.go ├── bowseries_test.go ├── bowsetters.go ├── bowsetters_test.go ├── bowsort.go ├── bowsort_test.go ├── bowstring.go ├── bowtypes.go ├── bowtypes_test.go ├── bowvalues.go ├── go.mod ├── go.sum ├── rolling ├── aggregation.go ├── aggregation │ ├── XXXbenchmarks_test.go │ ├── arithmeticmean.go │ ├── arithmeticmean_test.go │ ├── core_test.go │ ├── count.go │ ├── count_test.go │ ├── firstlast.go │ ├── firstlast_test.go │ ├── integral.go │ ├── integral_test.go │ ├── minmax.go │ ├── minmax_test.go │ ├── mode.go │ ├── mode_test.go │ ├── sum.go │ ├── sum_test.go │ ├── weightedmean.go │ ├── weightedmean_test.go │ ├── whole.go │ ├── whole_test.go │ └── windowstart.go ├── aggregation_test.go ├── interpolation.go ├── interpolation │ ├── linear.go │ ├── linear_test.go │ ├── none.go │ ├── none_test.go │ ├── stepprevious.go │ ├── stepprevious_test.go │ ├── windowstart.go │ └── windowstart_test.go ├── interpolation_test.go ├── rolling.go ├── rolling_test.go ├── transformation │ ├── factor.go │ └── factor_test.go └── window.go └── scripts ├── benchmark.sh ├── benchstat.sh └── test.sh /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "github-actions" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | 13 | - package-ecosystem: "gomod" # See documentation for possible values 14 | directory: "/" # Location of package manifests 15 | schedule: 16 | interval: "weekly" 17 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | ci: 7 | runs-on: ubuntu-latest 8 | env: 9 | BENCH_RESULTS_DIR_PATH: benchmarks 10 | BENCH_COMPARISON_FILE_PATH: ${{ format('benchmarks/comparison-{0}-vs-{1}.txt', github.base_ref, github.sha) }} 11 | steps: 12 | - uses: actions/checkout@v3 13 | - uses: actions/setup-go@v4 14 | with: 15 | go-version-file: 'go.mod' 16 | cache: true 17 | 18 | - name: Run tests 19 | run: | 20 | go install github.com/jstemmer/go-junit-report@latest 21 | bash -c ./scripts/test.sh 22 | 23 | - name: ${{ format('Run benchmarks on sha {0}', github.sha) }} 24 | run: | 25 | go install golang.org/x/perf/cmd/benchstat@latest 26 | bash ./scripts/benchmark.sh 27 | env: 28 | BENCH_RESULTS_FILE_PATH: ${{ format('benchmarks/{0}.txt', github.sha) }} 29 | 30 | - name: ${{ format('Uploading artifact of sha {0} benchmark results', github.sha) }} 31 | uses: actions/upload-artifact@v3 32 | with: 33 | name: ${{ format('{0}-sha-benchmark-results', github.sha) }} 34 | path: ${{ format('benchmarks/{0}.txt', github.sha) }} 35 | 36 | - uses: actions/checkout@v3 37 | if: ${{ github.event_name == 'pull_request' }} 38 | with: 39 | ref: ${{ github.base_ref }} 40 | 41 | - uses: actions/download-artifact@v3 42 | if: ${{ github.event_name == 'pull_request' }} 43 | with: 44 | name: ${{ format('{0}-sha-benchmark-results', github.sha) }} 45 | path: benchmarks 46 | 47 | - name: ${{ format('Run benchmarks on base ref {0}', github.base_ref) }} 48 | if: ${{ github.event_name == 'pull_request' }} 49 | run: | 50 | bash ./scripts/benchmark.sh 51 | env: 52 | BENCH_RESULTS_FILE_PATH: ${{ format('benchmarks/{0}.txt', github.base_ref) }} 53 | 54 | - name: ${{ format('Uploading artifact of base ref {0} benchmark results', github.base_ref) }} 55 | if: ${{ github.event_name == 'pull_request' }} 56 | uses: actions/upload-artifact@v3 57 | with: 58 | name: ${{ format('{0}-base-ref-benchmark-results', github.base_ref) }} 59 | path: ${{ format('benchmarks/{0}.txt', github.base_ref) }} 60 | 61 | - name: ${{ format('Compare benchmarks of base ref {0} with sha {1}', github.base_ref, github.sha) }} 62 | if: ${{ github.event_name == 'pull_request' }} 63 | run: | 64 | bash ./scripts/benchstat.sh "${{ format('benchmarks/{0}.txt', github.base_ref) }}" "${{ format('benchmarks/{0}.txt', github.sha) }}" 65 | 66 | - name: Upload artifact of benchmark comparison results 67 | if: ${{ github.event_name == 'pull_request' }} 68 | uses: actions/upload-artifact@v3 69 | with: 70 | name: benchmark-comparison-results 71 | path: ${{ format('benchmarks/comparison-{0}-vs-{1}.txt', github.base_ref, github.sha) }} 72 | -------------------------------------------------------------------------------- /.github/workflows/golangci-lint.yml: -------------------------------------------------------------------------------- 1 | name: golangci-lint 2 | 3 | on: [push] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v3 10 | - uses: actions/setup-go@v4 11 | with: 12 | go-version-file: 'go.mod' 13 | cache: true 14 | - name: golangci-lint 15 | uses: golangci/golangci-lint-action@v3 16 | with: 17 | version: latest 18 | args: --verbose 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Go template 3 | # Binaries for programs and plugins 4 | *.exe 5 | *.dll 6 | *.so 7 | *.dylib 8 | 9 | # Test binary, build with `go test -c` 10 | *.test 11 | mocks 12 | 13 | # Output of the go coverage tool, specifically when used with LiteIDE 14 | *.out 15 | 16 | # Contains docker temp files 17 | .tmp 18 | .ssh 19 | 20 | # ide 21 | .idea/* 22 | .vscode 23 | 24 | # Apple Desktop Services Store 25 | .DS_Store 26 | 27 | # Ansible 28 | *.retry 29 | 30 | # GENERAL PURPOSE 31 | vendor -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | linters: 2 | enable: 3 | - gofmt 4 | - gci 5 | - goimports 6 | 7 | run: 8 | timeout: 5m 9 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | UNRELEASED [XXXX-XX-XX] 2 | ------------------- 3 | 4 | v1.0.0 [2023-04-07] 5 | ------------------- 6 | 7 | - General 8 | - bump to go 1.18 9 | - bump to arrow/go/v8 10 | - remove useless count script 11 | - add gci linter 12 | - improve documentation 13 | - improve error handling 14 | - improve code readability 15 | - remove code gen to prepare for Timestamp support 16 | - add Bow data type in Buffer to decouple Go native types from Arrow types 17 | - expose arrow record 18 | 19 | v0.18.0 [2022-02-16] 20 | ------------------- 21 | 22 | - General 23 | - bump to arrow/go/v7 24 | - improve CI to run benchmarks comparison for PR with the same runner 25 | - Parquet 26 | - add new GetParquetMetaColTimeUnit method to extract column time unit from the metadata of a bow read from a parquet file 27 | - remove deprecated ConvertedType from the metadata 28 | 29 | 30 | v0.17.0 [2021-10-27] 31 | ------------------- 32 | 33 | - new Manipulation features 34 | - bump Go to version 1.17 35 | - bow interface: switch from colNames to colIndices arguments 36 | - support special characters in Parquet read/write 37 | 38 | 39 | v0.16.0 [2021-08-25] 40 | ------------------- 41 | 42 | - general code refactoring 43 | - improved performance, mostly with better memory usage and buffers rework 44 | - introduced code generation with Metronlab/genius framework 45 | - changed Find method and add Contains and FindNext 46 | - improved bow generator by simplification and made it extensible by user for value creation strategy 47 | 48 | 49 | v0.15.0 [2021-08-04] 50 | ------------------- 51 | 52 | - Benchmarks and Profiling: 53 | - Moved benchmarks closer to functions 54 | - Simplified and faster benchmarks 55 | - Added AppendBows and NewBufferFromInterfaces benchmarks 56 | - Added Makefile rules for tests and benchmarks profiling 57 | 58 | 59 | v0.14.0 [2021-07-20] 60 | ------------------- 61 | 62 | - Adding `SetMetadata` method to `Bow`'s interface 63 | 64 | 65 | v0.13.0 [2021-06-17] 66 | ------------------- 67 | 68 | - Adding `AddCols` method to `Bow`'s interface 69 | 70 | 71 | v0.12.1 [2021-06-16] 72 | ------------------- 73 | 74 | - Apache Parquet: new tests and UX improvements 75 | 76 | 77 | v0.12.0 [2021-06-10] 78 | ------------------- 79 | 80 | - Apache Parquet file read/write support 81 | - Add Schema Metadata support 82 | - Add golangci-lint usage 83 | 84 | 85 | v0.11.0 [2021-05-17] 86 | ------------------- 87 | 88 | - Add new bow.Diff function 89 | - Depreciate Difference aggregation 90 | 91 | 92 | v0.10.0 [2021-05-11] 93 | ------------------- 94 | 95 | - Rolling: 96 | - improved code readability 97 | - aggregation/fill: it is now possible to pass a previous row option to the rolling to enable the correct interpolation of the first row of its first window, in the case of missing window start row 98 | 99 | 100 | v0.9.0 [2021-03-24] 101 | ------------------- 102 | 103 | - General: 104 | - Fix typos 105 | - Improve robustness and code clarity of functions IsColEmpty, IsColSorted and FillLinear with better error management 106 | - Remove unused variables 107 | - Remove bow.marshalJSONRowBased 108 | 109 | - Bow Generator: 110 | - Improve randomness of values 111 | - Added support for String and Bool data types 112 | - New ColNames and DataTypes options for more flexibility 113 | - Improve user experience with better error management 114 | 115 | - Benchmarks improvements: 116 | - Added new test cases 117 | - Added usage of benchstat on the CircleCI pipeline to compare benchmark results with master branch 118 | 119 | - New Functions: 120 | - NewValuesFromJSON 121 | 122 | - Bug fix: 123 | - Rolling inclusive window with duplicated indexes now correctly iterate keeping windowing integrity 124 | 125 | 126 | v0.8.0 [2021-02-12] 127 | ------------------- 128 | 129 | - New functions: 130 | - IsEmpty 131 | - FindFirst 132 | - IsSupported 133 | - GetReturnType 134 | - Adding strong typing support 135 | - Refactoring Bow's logic to return a valid schema instead of nil when no data is found 136 | - Fixing tests 137 | 138 | 139 | v0.7.3 [2021-01-12] 140 | ------------------- 141 | 142 | - New functions: 143 | - NewBowEmpty 144 | - NewBowFromColNames 145 | - EncodeBowToJSONBody 146 | - DecodeJSONRespToBow 147 | - New aggregation tests 148 | - Minor code refactoring 149 | 150 | 151 | v0.7.2 [2020-09-14] 152 | ------------------- 153 | 154 | ### Bugfixes 155 | - OuterJoin: support of bow without rows returning correct schema 156 | 157 | 158 | v0.7.1 [2020-08-03] 159 | ------------------- 160 | 161 | ### Features 162 | - Add SortByCol method to sort a bow by a column name 163 | 164 | 165 | v0.6.2 [2020-06-02] 166 | ------------------- 167 | 168 | #### Bugfixes 169 | - InnerJoin 170 | 171 | 172 | v0.6.1 [2020-04-22] 173 | ------------------- 174 | 175 | #### Bugfixes 176 | - bump arrow to apache-arrow-0.17.0 177 | 178 | #### Known issues 179 | arrow now allow several column with same name introducing new panics in bow if the case happen. 180 | [corresponding issue](https://github.com/Metronlab/bow/issues/12) 181 | 182 | 183 | v0.6.0 [2020-04-22] 184 | ------------------- 185 | 186 | #### Features 187 | - Add Fill functions for missing data interpolation 188 | - Add OuterJoin method 189 | - Refactor InnerJoin method 190 | - Add new CI with CircleCI 191 | - Refactor the sub package bow to have the main functionalities available in the root module 192 | 193 | #### How to migrate to v0.6.0 194 | It is necessary to replace the library import path from github.com/Metronlab/bow/bow to github.com/Metronlab/bow 195 | 196 | 197 | 0.2.0 [2019-02-19] 198 | ------------------- 199 | 200 | #### Features 201 | 202 | - Depreciate method to print in favor to a stringer interface 203 | - Innerjoin based on column name for now, we'll have to let more liberty over the join later on 204 | - Map based indexes for join optimisation (divide time per 5 on simple short sample) 205 | 206 | #### Bugfixes 207 | 208 | - Fix empty series that make the code segfault in arrow, can now have empty dataframe with schema/record set. 209 | 210 | 211 | 0.1.0 [2019-02-01] 212 | ------------------- 213 | 214 | #### Features 215 | 216 | - Row based json encoding and decoding 217 | - New Bow fron row and column based [][]interfaces{} 218 | - Method to print 219 | 220 | 221 | 0.0.0 [2019-01-11] 222 | ------------------- 223 | 224 | #### Features 225 | 226 | - Initial Release 227 | - Simple dataframe with type and series based on apache arrow 228 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #user overridable variables 2 | all: lint test 3 | 4 | install: 5 | @go install golang.org/x/perf/cmd/benchstat@latest 6 | @go install github.com/jstemmer/go-junit-report@latest 7 | @go install github.com/Metronlab/genius@latest 8 | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin latest 9 | 10 | lint: 11 | golangci-lint run --fix -v $(PKG) 12 | 13 | test: 14 | @RUN=$(RUN) PKG=$(PKG) TIMEOUT=$(TIMEOUT) bash -c $(PWD)/scripts/test.sh 15 | 16 | bench: 17 | @RUN=$(RUN) PKG=$(PKG) TIMEOUT=$(TIMEOUT) bash -c $(PWD)/scripts/benchmark.sh 18 | 19 | CPUPROFILE=/tmp/$(shell basename $(PWD))$(shell echo $(PKG) | sed 's/[^[:alnum:]\t]//g').cpu.prof 20 | MEMPROFILE=/tmp/$(shell basename $(PWD))$(shell echo $(PKG) | sed 's/[^[:alnum:]\t]//g').mem.prof 21 | 22 | test-profile: 23 | go test $(PKG) -v -run $(RUN) -cpuprofile $(CPUPROFILE) -memprofile $(MEMPROFILE) 24 | -lsof -ti tcp:8888 | xargs kill -9 2> /dev/null 25 | -lsof -ti tcp:8989 | xargs kill -9 2> /dev/null 26 | go tool pprof -http=:8888 $(CPUPROFILE) & 27 | go tool pprof -http=:8989 $(MEMPROFILE) & 28 | 29 | bench-profile: 30 | go test $(PKG) -run XXX -bench $(RUN) -cpuprofile $(CPUPROFILE) -memprofile $(MEMPROFILE) 31 | -lsof -ti tcp:9090 | xargs kill -9 2> /dev/null 32 | -lsof -ti tcp:9191 | xargs kill -9 2> /dev/null 33 | go tool pprof -http=:9090 $(CPUPROFILE) & 34 | go tool pprof -http=:9191 $(MEMPROFILE) & 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bow 2 | 3 | ![lint](https://github.com/Metronlab/bow/actions/workflows/golangci-lint.yml/badge.svg) 4 | ![ci](https://github.com/Metronlab/bow/actions/workflows/ci.yml/badge.svg) 5 | 6 | Bow is meant to be an efficient data manipulation framework based on [Apache Arrow](https://arrow.apache.org/) for the Go programming language. 7 | Inspired by [Pandas](https://pandas.pydata.org/), Bow aims to bring the last missing block required to make Go a data science ready language. 8 | 9 | The `Bow` interface is stable and frozen, you can using it at will, all further changes will be planned for a v2. 10 | 11 | This project have been used for years in production at [Metron](https://www.metron.energy/), 12 | however it's still an incomplete pet project compared to [Pandas](https://pandas.pydata.org/). 13 | Bow is currently developed internally at Metronlab with primary concerns about timeseries. 14 | Recently [empowill](https://www.empowill.com/) decided to contribute to confront this library to a more general purpose usage. 15 | 16 | We are looking for a foundation / group of people that could help send this library to the next level! 17 | 18 | ## CONTRIBUTE 19 | Don't hesitate to send issues and contribute to the library design. 20 | 21 | This library is in pure Go, to contribute you just need a recent Go version installed and you can directly use `make` to validate your contribution. 22 | 23 | - Create an issue 24 | - Create a branch from main 25 | - Implement and comply with the Github Actions CI 26 | - Submit a PR 27 | 28 | -------------------------------------------------------------------------------- /XXXexamples_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | ) 8 | 9 | func ExampleNewBow() { 10 | b, err := NewBow( 11 | NewSeries("col1", Int64, []int64{1, 2, 3, 4}, nil), 12 | NewSeries("col2", Float64, []float64{1.1, 2.2, 3.3, 4}, []bool{true, false, true, true}), 13 | NewSeries("col3", Boolean, []bool{true, false, true, false}, []bool{true, false, true, true}), 14 | ) 15 | if err != nil { 16 | panic(err) 17 | } 18 | 19 | fmt.Println(b) 20 | // Output: 21 | // col1:int64 col2:float64 col3:bool 22 | // 1 1.1 true 23 | // 2 24 | // 3 3.3 true 25 | // 4 4 false 26 | } 27 | 28 | func ExampleNewBowFromColBasedInterfaces() { 29 | colNames := []string{"time", "value", "valueFromJSON"} 30 | colTypes := make([]Type, len(colNames)) 31 | colTypes[0] = Int64 32 | colBasedData := [][]interface{}{ 33 | {1, 1.2, json.Number("3")}, 34 | {1, json.Number("1.2"), 3}, 35 | {json.Number("1.1"), 2, 1.3}, 36 | } 37 | 38 | b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colBasedData) 39 | if err != nil { 40 | panic(err) 41 | } 42 | 43 | fmt.Println(b) 44 | // Output: 45 | // time:int64 value:int64 valueFromJSON:float64 46 | // 1 1 1.1 47 | // 1 2 48 | // 3 3 1.3 49 | } 50 | 51 | func ExampleNewBowFromRowBasedInterfaces() { 52 | colNames := []string{"time", "value", "valueFromJSON"} 53 | colTypes := []Type{Int64, Int64, Float64} 54 | rowBasedData := [][]interface{}{ 55 | {1, 1, json.Number("1.1")}, 56 | {1.2, json.Number("1.2"), 2}, 57 | {json.Number("3"), 3, 1.3}, 58 | } 59 | 60 | b, err := NewBowFromRowBasedInterfaces(colNames, colTypes, rowBasedData) 61 | if err != nil { 62 | panic(err) 63 | } 64 | 65 | fmt.Println(b) 66 | // Output: 67 | // time:int64 value:int64 valueFromJSON:float64 68 | // 1 1 1.1 69 | // 1 2 70 | // 3 3 1.3 71 | } 72 | 73 | func ExampleBow_MarshalJSON() { 74 | colNames := []string{"time", "value", "valueFromJSON"} 75 | colTypes := make([]Type, len(colNames)) 76 | colTypes[0] = Int64 77 | colBasedData := [][]interface{}{ 78 | {1, 1.2, json.Number("3")}, 79 | {1, json.Number("1.2"), 3}, 80 | {json.Number("1.1"), 2, 1.3}, 81 | } 82 | 83 | b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colBasedData) 84 | if err != nil { 85 | panic(err) 86 | } 87 | 88 | js, err := b.MarshalJSON() 89 | if err != nil { 90 | panic(err) 91 | } 92 | 93 | // pretty print json 94 | var out bytes.Buffer 95 | if err = json.Indent(&out, js, "", "\t"); err != nil { 96 | panic(err) 97 | } 98 | 99 | fmt.Println(out.String()) 100 | // Output: 101 | // { 102 | // "schema": { 103 | // "fields": [ 104 | // { 105 | // "name": "time", 106 | // "type": "int64" 107 | // }, 108 | // { 109 | // "name": "value", 110 | // "type": "int64" 111 | // }, 112 | // { 113 | // "name": "valueFromJSON", 114 | // "type": "float64" 115 | // } 116 | // ] 117 | // }, 118 | // "data": [ 119 | // { 120 | // "time": 1, 121 | // "value": 1, 122 | // "valueFromJSON": 1.1 123 | // }, 124 | // { 125 | // "time": 1, 126 | // "valueFromJSON": 2 127 | // }, 128 | // { 129 | // "time": 3, 130 | // "value": 3, 131 | // "valueFromJSON": 1.3 132 | // } 133 | // ] 134 | // } 135 | } 136 | -------------------------------------------------------------------------------- /arrowtests/arrow.go: -------------------------------------------------------------------------------- 1 | package arrowtests 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/apache/arrow/go/v8/arrow" 7 | "github.com/apache/arrow/go/v8/arrow/array" 8 | "github.com/apache/arrow/go/v8/arrow/memory" 9 | ) 10 | 11 | var ( 12 | EventSchema = arrow.NewSchema( 13 | []arrow.Field{ 14 | {Name: "time", Type: arrow.FixedWidthTypes.Time32ms}, 15 | {Name: "value", Type: arrow.PrimitiveTypes.Float64}, 16 | {Name: "quality", Type: arrow.PrimitiveTypes.Int64}, 17 | }, nil, 18 | ) 19 | ) 20 | 21 | type Event struct { 22 | Time arrow.Time32 23 | Value interface{} 24 | quality int64 25 | } 26 | 27 | // NewTSRecord Create a new sample base on eventSchema 28 | func NewTSRecord() (*arrow.Schema, arrow.Record) { 29 | pool := memory.NewGoAllocator() 30 | b := array.NewRecordBuilder(pool, EventSchema) 31 | defer b.Release() 32 | 33 | b.Field(0).(*array.Time32Builder).AppendValues([]arrow.Time32{1, 2, 3, 4}, nil) 34 | b.Field(1).(*array.Float64Builder).AppendValues([]float64{7, 8, 9, 10}, []bool{true, true, false, true}) 35 | b.Field(2).(*array.Int64Builder).AppendValues([]int64{42, 42, 41, 42}, nil) 36 | 37 | return EventSchema, b.NewRecord() 38 | } 39 | 40 | // PrintRecordColumns Print a columns based output 41 | func PrintRecordColumns(rec arrow.Record) { 42 | for i, col := range rec.Columns() { 43 | fmt.Printf("column[%d] %q: %v\n", i, rec.ColumnName(i), col) 44 | } 45 | } 46 | 47 | // PrintRecordRows Print a row based output 48 | func PrintRecordRows(schema *arrow.Schema, recs []arrow.Record) { 49 | // Make a table read only based on many records 50 | table := array.NewTableFromRecords(schema, recs) 51 | defer table.Release() 52 | 53 | // makes a events series 54 | events := make([]Event, table.NumRows()) 55 | 56 | // Seek schema index for event 57 | timeIndex := table.Schema().FieldIndices("time")[0] 58 | valueIndex := table.Schema().FieldIndices("value")[0] 59 | qualityIndex := table.Schema().FieldIndices("quality")[0] 60 | 61 | // TableReader is able to iter on a table grouping by indexes, 62 | // marvelous to do calculation in parallel 63 | // Underutilized in this case, for a naive implementation iteration is done 1 by 1 64 | tr := array.NewTableReader(table, 1) 65 | defer tr.Release() 66 | 67 | // fill series with TableReader iteration 68 | index := 0 69 | for tr.Next() { 70 | rec := tr.Record() 71 | 72 | td := array.NewTime32Data(rec.Column(timeIndex).Data()) 73 | if td.IsValid(0) { 74 | events[index].Time = td.Time32Values()[0] 75 | } 76 | vd := array.NewFloat64Data(rec.Column(valueIndex).Data()) 77 | if vd.IsValid(0) { 78 | events[index].Value = vd.Float64Values()[0] 79 | } 80 | qd := array.NewInt64Data(rec.Column(qualityIndex).Data()) 81 | if qd.IsValid(0) { 82 | events[index].quality = qd.Int64Values()[0] 83 | } 84 | 85 | index++ 86 | } 87 | 88 | // Prints series 89 | for _, e := range events { 90 | fmt.Println("time:", e.Time, ", value:", e.Value, ", quality:", e.quality) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /arrowtests/arrow_test.go: -------------------------------------------------------------------------------- 1 | package arrowtests 2 | 3 | import ( 4 | "github.com/apache/arrow/go/v8/arrow" 5 | ) 6 | 7 | func ExamplePrintRecordColumns() { 8 | _, rec := NewTSRecord() 9 | defer rec.Release() 10 | 11 | PrintRecordColumns(rec) 12 | 13 | // Output: 14 | //column[0] "time": [1 2 3 4] 15 | //column[1] "value": [7 8 (null) 10] 16 | //column[2] "quality": [42 42 41 42] 17 | } 18 | 19 | func ExamplePrintRecordRows() { 20 | s, rec := NewTSRecord() 21 | defer rec.Release() 22 | 23 | PrintRecordRows(s, []arrow.Record{rec}) 24 | 25 | // Output: 26 | //time: 1 , value: 7 , quality: 42 27 | //time: 2 , value: 8 , quality: 42 28 | //time: 3 , value: , quality: 41 29 | //time: 4 , value: 10 , quality: 42 30 | } 31 | -------------------------------------------------------------------------------- /benchmarks/bow1-10-rows.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow1-10-rows.parquet -------------------------------------------------------------------------------- /benchmarks/bow1-100-rows.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow1-100-rows.parquet -------------------------------------------------------------------------------- /benchmarks/bow1-1000-rows.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow1-1000-rows.parquet -------------------------------------------------------------------------------- /benchmarks/bow1-10000-rows.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow1-10000-rows.parquet -------------------------------------------------------------------------------- /benchmarks/bow1-100000-rows.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow1-100000-rows.parquet -------------------------------------------------------------------------------- /benchmarks/bow2-10-rows.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow2-10-rows.parquet -------------------------------------------------------------------------------- /benchmarks/bow2-100-rows.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow2-100-rows.parquet -------------------------------------------------------------------------------- /benchmarks/bow2-1000-rows.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow2-1000-rows.parquet -------------------------------------------------------------------------------- /benchmarks/bow2-10000-rows.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow2-10000-rows.parquet -------------------------------------------------------------------------------- /benchmarks/bow2-100000-rows.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/benchmarks/bow2-100000-rows.parquet -------------------------------------------------------------------------------- /benchmarks/generator_test.go: -------------------------------------------------------------------------------- 1 | package benchmarks 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/metronlab/bow" 8 | "github.com/stretchr/testify/assert" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestGeneratorForBenchmarks(t *testing.T) { 13 | t.Skip("comment this skip to generate new bows for benchmarks") 14 | 15 | for rows := 10; rows <= 100000; rows *= 10 { 16 | b1, err := bow.NewGenBow(rows, 17 | bow.GenSeriesOptions{ 18 | Name: "Int64_ref", 19 | GenStrategy: bow.GenStrategyRandomIncremental, 20 | }, 21 | bow.GenSeriesOptions{ 22 | Name: "Int64_no_nils_bow1", 23 | GenStrategy: bow.GenStrategyRandom, 24 | }, 25 | bow.GenSeriesOptions{ 26 | Name: "Int64_bow1", 27 | GenStrategy: bow.GenStrategyRandom, 28 | MissingData: true, 29 | }, 30 | bow.GenSeriesOptions{ 31 | Name: "Float64_bow1", 32 | GenStrategy: bow.GenStrategyRandom, 33 | MissingData: true, 34 | Type: bow.Float64, 35 | }, 36 | bow.GenSeriesOptions{ 37 | Name: "Boolean_bow1", 38 | GenStrategy: bow.GenStrategyRandom, 39 | MissingData: true, 40 | Type: bow.Boolean, 41 | }, 42 | bow.GenSeriesOptions{ 43 | Name: "String_bow1", 44 | GenStrategy: bow.GenStrategyRandom, 45 | MissingData: true, 46 | Type: bow.String, 47 | }, 48 | ) 49 | require.NoError(t, err) 50 | 51 | b2, err := bow.NewGenBow(rows, 52 | bow.GenSeriesOptions{ 53 | Name: "Int64_ref", 54 | GenStrategy: bow.GenStrategyRandomIncremental, 55 | }, 56 | bow.GenSeriesOptions{ 57 | Name: "Int64_bow2", 58 | GenStrategy: bow.GenStrategyRandom, 59 | MissingData: true, 60 | }, 61 | bow.GenSeriesOptions{ 62 | Name: "Float64_bow2", 63 | GenStrategy: bow.GenStrategyRandom, 64 | MissingData: true, 65 | Type: bow.Float64, 66 | }, 67 | bow.GenSeriesOptions{ 68 | Name: "Boolean_bow2", 69 | GenStrategy: bow.GenStrategyRandom, 70 | MissingData: true, 71 | Type: bow.Boolean, 72 | }, 73 | bow.GenSeriesOptions{ 74 | Name: "String_bow2", 75 | GenStrategy: bow.GenStrategyRandom, 76 | MissingData: true, 77 | Type: bow.String, 78 | }, 79 | ) 80 | require.NoError(t, err) 81 | 82 | assert.NoError(t, b1.WriteParquet(fmt.Sprintf("./bow1-%d-rows", rows), false)) 83 | _, err = bow.NewBowFromParquet(fmt.Sprintf("./bow1-%d-rows.parquet", rows), false) 84 | assert.NoError(t, err) 85 | 86 | assert.NoError(t, b2.WriteParquet(fmt.Sprintf("./bow2-%d-rows", rows), false)) 87 | _, err = bow.NewBowFromParquet(fmt.Sprintf("./bow2-%d-rows.parquet", rows), false) 88 | assert.NoError(t, err) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /bowappend.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/apache/arrow/go/v8/arrow" 7 | "github.com/apache/arrow/go/v8/arrow/array" 8 | "github.com/apache/arrow/go/v8/arrow/memory" 9 | ) 10 | 11 | // AppendBows attempts to append bows with equal schemas. 12 | // Different schemas will lead to undefined behavior. 13 | // Resulting metadata is copied from the first bow. 14 | func AppendBows(bows ...Bow) (Bow, error) { 15 | if len(bows) == 0 { 16 | return nil, nil 17 | } 18 | 19 | if len(bows) == 1 { 20 | return bows[0], nil 21 | } 22 | 23 | numRows := 0 24 | for _, b := range bows { 25 | numRows += b.NumRows() 26 | } 27 | 28 | refBow := bows[0] 29 | series := make([]Series, refBow.NumCols()) 30 | 31 | mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) 32 | for colIndex := 0; colIndex < refBow.NumCols(); colIndex++ { 33 | var newArray arrow.Array 34 | refType := refBow.ColumnType(colIndex) 35 | switch refType { 36 | case Int64: 37 | builder := array.NewInt64Builder(mem) 38 | builder.Resize(numRows) 39 | for _, b := range bows { 40 | if colType := b.ColumnType(colIndex); colType != refType { 41 | return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType) 42 | } 43 | data := b.(*bow).Column(colIndex).Data() 44 | arr := array.NewInt64Data(data) 45 | v := int64Values(arr) 46 | valid := getValiditySlice(arr) 47 | builder.AppendValues(v, valid) 48 | } 49 | newArray = builder.NewArray() 50 | case Float64: 51 | builder := array.NewFloat64Builder(mem) 52 | builder.Resize(numRows) 53 | for _, b := range bows { 54 | if colType := b.ColumnType(colIndex); colType != refType { 55 | return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType) 56 | } 57 | data := b.(*bow).Column(colIndex).Data() 58 | arr := array.NewFloat64Data(data) 59 | v := float64Values(arr) 60 | valid := getValiditySlice(arr) 61 | builder.AppendValues(v, valid) 62 | } 63 | newArray = builder.NewArray() 64 | case Boolean: 65 | builder := array.NewBooleanBuilder(mem) 66 | builder.Resize(numRows) 67 | for _, b := range bows { 68 | if colType := b.ColumnType(colIndex); colType != refType { 69 | return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType) 70 | } 71 | data := b.(*bow).Column(colIndex).Data() 72 | arr := array.NewBooleanData(data) 73 | v := booleanValues(arr) 74 | valid := getValiditySlice(arr) 75 | builder.AppendValues(v, valid) 76 | } 77 | newArray = builder.NewArray() 78 | case String: 79 | builder := array.NewStringBuilder(mem) 80 | builder.Resize(numRows) 81 | for _, b := range bows { 82 | if colType := b.ColumnType(colIndex); colType != refType { 83 | return nil, fmt.Errorf("incompatible types '%s' and '%s'", refType, colType) 84 | } 85 | data := b.(*bow).Column(colIndex).Data() 86 | arr := array.NewStringData(data) 87 | v := stringValues(arr) 88 | valid := getValiditySlice(arr) 89 | builder.AppendValues(v, valid) 90 | } 91 | newArray = builder.NewArray() 92 | default: 93 | return nil, fmt.Errorf("unsupported type '%s'", refType) 94 | } 95 | 96 | series[colIndex] = Series{ 97 | Name: refBow.ColumnName(colIndex), 98 | Array: newArray, 99 | } 100 | } 101 | 102 | return NewBowWithMetadata(refBow.Metadata(), series...) 103 | } 104 | -------------------------------------------------------------------------------- /bowappend_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestAppendBows(t *testing.T) { 12 | t.Run("no bows", func(t *testing.T) { 13 | appended, err := AppendBows() 14 | assert.NoError(t, err) 15 | assert.Nil(t, appended) 16 | }) 17 | 18 | t.Run("one empty bow", func(t *testing.T) { 19 | b, _ := NewBowFromColBasedInterfaces( 20 | []string{"a"}, 21 | []Type{Int64}, 22 | [][]interface{}{{}}) 23 | appended, err := AppendBows(b) 24 | assert.NoError(t, err) 25 | assert.True(t, appended.Equal(b), fmt.Sprintf( 26 | "want:\n%v\nhave:\n%v", b, appended)) 27 | }) 28 | 29 | t.Run("first empty bow", func(t *testing.T) { 30 | b1, _ := NewBowFromColBasedInterfaces( 31 | []string{"a"}, 32 | []Type{Int64}, 33 | [][]interface{}{{}}) 34 | b2, _ := NewBowFromColBasedInterfaces( 35 | []string{"a"}, 36 | []Type{Int64}, 37 | [][]interface{}{ 38 | {1}, 39 | }) 40 | appended, err := AppendBows(b1, b2) 41 | assert.NoError(t, err) 42 | assert.True(t, appended.Equal(b2), fmt.Sprintf( 43 | "want:\n%v\nhave:\n%v", b2, appended)) 44 | }) 45 | 46 | t.Run("several empty bows", func(t *testing.T) { 47 | b, _ := NewBowFromColBasedInterfaces( 48 | []string{"a"}, 49 | []Type{Int64}, 50 | [][]interface{}{{}}) 51 | appended, err := AppendBows(b, b) 52 | assert.NoError(t, err) 53 | assert.True(t, appended.Equal(b), fmt.Sprintf( 54 | "want:\n%v\nhave:\n%v", b, appended)) 55 | }) 56 | 57 | t.Run("schema mismatch", func(t *testing.T) { 58 | b1, _ := NewBowFromColBasedInterfaces( 59 | []string{"i", "s"}, 60 | []Type{Int64, String}, 61 | [][]interface{}{ 62 | {"hey"}, 63 | {1}, 64 | }) 65 | b2, _ := NewBowFromColBasedInterfaces( 66 | []string{"a"}, 67 | []Type{Int64}, 68 | [][]interface{}{ 69 | {1}, 70 | }) 71 | 72 | assert.Panics(t, func() { _, _ = AppendBows(b1, b2) }) 73 | }) 74 | 75 | t.Run("type mismatch", func(t *testing.T) { 76 | b1, _ := NewBowFromColBasedInterfaces( 77 | []string{"i", "s"}, 78 | []Type{Int64, Int64}, 79 | [][]interface{}{ 80 | {1}, 81 | {1}, 82 | }) 83 | b2, _ := NewBowFromColBasedInterfaces( 84 | []string{"a"}, 85 | []Type{Int64, Float64}, 86 | [][]interface{}{ 87 | {1}, 88 | {1.}, 89 | }) 90 | 91 | assert.Panics(t, func() { _, _ = AppendBows(b1, b2) }) 92 | }) 93 | 94 | t.Run("3 bows of 2 cols", func(t *testing.T) { 95 | b1, _ := NewBowFromColBasedInterfaces( 96 | []string{"a", "b"}, 97 | []Type{Int64, Float64}, 98 | [][]interface{}{ 99 | {1, 2, 3}, 100 | {.1, .2, .3}, 101 | }) 102 | b2, _ := NewBowFromColBasedInterfaces( 103 | []string{"a", "b"}, 104 | []Type{Int64, Float64}, 105 | [][]interface{}{ 106 | {4, 5}, 107 | {.4, .5}, 108 | }) 109 | b3, _ := NewBowFromColBasedInterfaces( 110 | []string{"a", "b"}, 111 | []Type{Int64, Float64}, 112 | [][]interface{}{ 113 | {6}, 114 | {.6}, 115 | }) 116 | 117 | appended, err := AppendBows(b1, b2, b3) 118 | expected, _ := NewBowFromColBasedInterfaces( 119 | []string{"a", "b"}, 120 | []Type{Int64, Float64}, 121 | [][]interface{}{ 122 | {1, 2, 3, 4, 5, 6}, 123 | {.1, .2, .3, .4, .5, .6}, 124 | }) 125 | assert.NoError(t, err) 126 | assert.True(t, appended.Equal(expected), fmt.Sprintf( 127 | "want:\n%v\nhave:\n%v", expected, appended)) 128 | }) 129 | 130 | t.Run("2 bows with the same metadata", func(t *testing.T) { 131 | b1, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 132 | NewSeries("time", Int64, []int64{1, 2}, nil), 133 | NewSeries("value", Float64, []float64{.1, .2}, nil), 134 | ) 135 | require.NoError(t, err) 136 | 137 | b2, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 138 | NewSeries("time", Int64, []int64{3, 4}, nil), 139 | NewSeries("value", Float64, []float64{.3, .4}, nil), 140 | ) 141 | require.NoError(t, err) 142 | 143 | expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 144 | NewSeries("time", Int64, []int64{1, 2, 3, 4}, nil), 145 | NewSeries("value", Float64, []float64{.1, .2, .3, .4}, nil), 146 | ) 147 | require.NoError(t, err) 148 | 149 | appended, err := AppendBows(b1, b2) 150 | assert.NoError(t, err) 151 | 152 | assert.Equal(t, expected.String(), appended.String()) 153 | }) 154 | 155 | t.Run("same column names but different types", func(t *testing.T) { 156 | b1, err := NewBowFromColBasedInterfaces( 157 | []string{"a", "b"}, 158 | []Type{Int64, Float64}, 159 | [][]interface{}{ 160 | {1, 2}, 161 | {.1, .2}, 162 | }) 163 | require.NoError(t, err) 164 | b2, err := NewBowFromColBasedInterfaces( 165 | []string{"a", "b"}, 166 | []Type{Int64, Int64}, 167 | [][]interface{}{ 168 | {3}, 169 | {3}, 170 | }) 171 | require.NoError(t, err) 172 | 173 | _, err = AppendBows(b1, b2) 174 | assert.Error(t, err) 175 | }) 176 | } 177 | 178 | func BenchmarkAppendBows(b *testing.B) { 179 | for rows := 10; rows <= 100000; rows *= 10 { 180 | b1, err := NewBow( 181 | NewSeries("time", Int64, make([]int64, rows), nil), 182 | NewSeries("value", Float64, make([]float64, rows), nil)) 183 | require.NoError(b, err) 184 | 185 | b2, err := NewBow( 186 | NewSeries("time", Int64, make([]int64, rows), nil), 187 | NewSeries("value", Float64, make([]float64, rows), nil)) 188 | require.NoError(b, err) 189 | 190 | b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) { 191 | for n := 0; n < b.N; n++ { 192 | _, err := AppendBows(b1, b2) 193 | require.NoError(b, err) 194 | } 195 | }) 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /bowassertion.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "github.com/apache/arrow/go/v8/arrow/array" 5 | ) 6 | 7 | const ( 8 | orderUndefined = iota 9 | orderASC 10 | orderDESC 11 | ) 12 | 13 | // IsColSorted returns a boolean whether the column colIndex is sorted or not, skipping nil values. 14 | // An empty column or an unsupported data type returns false. 15 | func (b *bow) IsColSorted(colIndex int) bool { 16 | if b.IsColEmpty(colIndex) { 17 | return false 18 | } 19 | var rowIndex int 20 | var order = orderUndefined 21 | 22 | switch b.ColumnType(colIndex) { 23 | case Int64: 24 | arr := array.NewInt64Data(b.Column(colIndex).Data()) 25 | values := arr.Int64Values() 26 | for arr.IsNull(rowIndex) { 27 | rowIndex++ 28 | } 29 | curr := values[rowIndex] 30 | var next int64 31 | rowIndex++ 32 | for ; rowIndex < len(values); rowIndex++ { 33 | if !arr.IsValid(rowIndex) { 34 | continue 35 | } 36 | next = values[rowIndex] 37 | if order == orderUndefined { 38 | if curr < next { 39 | order = orderASC 40 | } else if curr > next { 41 | order = orderDESC 42 | } 43 | } 44 | if order == orderASC && next < curr || 45 | order == orderDESC && next > curr { 46 | return false 47 | } 48 | curr = next 49 | } 50 | case Float64: 51 | arr := array.NewFloat64Data(b.Column(colIndex).Data()) 52 | values := arr.Float64Values() 53 | for arr.IsNull(rowIndex) { 54 | rowIndex++ 55 | } 56 | curr := values[rowIndex] 57 | var next float64 58 | rowIndex++ 59 | for ; rowIndex < len(values); rowIndex++ { 60 | if !arr.IsValid(rowIndex) { 61 | continue 62 | } 63 | next = values[rowIndex] 64 | if order == orderUndefined { 65 | if curr < next { 66 | order = orderASC 67 | } else if curr > next { 68 | order = orderDESC 69 | } 70 | } 71 | if order == orderASC && next < curr || 72 | order == orderDESC && next > curr { 73 | return false 74 | } 75 | curr = next 76 | } 77 | default: 78 | return false 79 | } 80 | return true 81 | } 82 | 83 | // IsColEmpty returns false if the column has at least one non-nil value, and true otherwise. 84 | func (b *bow) IsColEmpty(colIndex int) bool { 85 | return b.Column(colIndex).NullN() == b.Column(colIndex).Len() 86 | } 87 | -------------------------------------------------------------------------------- /bowassertion_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestBow_IsColSorted(t *testing.T) { 12 | t.Run("int64", func(t *testing.T) { 13 | b, _ := NewBowFromRowBasedInterfaces( 14 | []string{"a", "b", "c", "d", "e"}, 15 | []Type{Int64, Int64, Int64, Int64, Int64}, 16 | [][]interface{}{ 17 | {-2, 1, nil, nil, -8}, 18 | {0, nil, 3, 4, 0}, 19 | {1, nil, nil, 120, nil}, 20 | {10, 4, 10, 10, -5}, 21 | {13, nil, nil, nil, nil}, 22 | {20, 6, 30, 400, -10}, 23 | }) 24 | sorted := b.IsColSorted(0) 25 | assert.True(t, sorted) 26 | sorted = b.IsColSorted(1) 27 | assert.True(t, sorted) 28 | sorted = b.IsColSorted(2) 29 | assert.True(t, sorted) 30 | sorted = b.IsColSorted(3) 31 | assert.False(t, sorted) 32 | sorted = b.IsColSorted(4) 33 | assert.False(t, sorted) 34 | }) 35 | 36 | t.Run("float64", func(t *testing.T) { 37 | b, _ := NewBowFromRowBasedInterfaces([]string{"a", "b", "c", "d", "e"}, []Type{Float64, Float64, Float64, Float64, Float64}, [][]interface{}{ 38 | {-2.0, 1.0, nil, nil, -8.0}, 39 | {0.0, nil, 3.0, 4.0, 0.0}, 40 | {1.0, nil, nil, 120.0, nil}, 41 | {10.0, 4.0, 10.0, 10.0, -5.0}, 42 | {13.0, nil, nil, nil, nil}, 43 | {20.0, 6.0, 30.0, 400.0, -10.0}, 44 | }) 45 | sorted := b.IsColSorted(0) 46 | assert.True(t, sorted) 47 | sorted = b.IsColSorted(1) 48 | assert.True(t, sorted) 49 | sorted = b.IsColSorted(2) 50 | assert.True(t, sorted) 51 | sorted = b.IsColSorted(3) 52 | assert.False(t, sorted) 53 | sorted = b.IsColSorted(4) 54 | assert.False(t, sorted) 55 | }) 56 | 57 | t.Run("string (unsupported type)", func(t *testing.T) { 58 | b, _ := NewBowFromRowBasedInterfaces([]string{"a", "b"}, []Type{String, String}, [][]interface{}{ 59 | {"egr", "rgr"}, 60 | {"zrr", nil}, 61 | {"zrfr", nil}, 62 | {"rgrg", "zefe"}, 63 | {"zfer", nil}, 64 | {"sffe", "srre"}, 65 | }) 66 | sorted := b.IsColSorted(0) 67 | assert.False(t, sorted) 68 | sorted = b.IsColSorted(1) 69 | assert.False(t, sorted) 70 | }) 71 | } 72 | 73 | func TestBow_IsColEmpty(t *testing.T) { 74 | b, err := NewBowFromRowBasedInterfaces( 75 | []string{"a", "b", "c"}, 76 | []Type{Int64, Int64, Int64}, 77 | [][]interface{}{ 78 | {-2, 1, nil}, 79 | {0, nil, nil}, 80 | {1, nil, nil}, 81 | }) 82 | require.NoError(t, err) 83 | 84 | empty := b.IsColEmpty(0) 85 | assert.False(t, empty) 86 | empty = b.IsColEmpty(1) 87 | assert.False(t, empty) 88 | empty = b.IsColEmpty(2) 89 | assert.True(t, empty) 90 | } 91 | 92 | func BenchmarkBow_IsColSorted(b *testing.B) { 93 | for rows := 10; rows <= 100000; rows *= 10 { 94 | data, err := NewBowFromParquet(fmt.Sprintf( 95 | "%sbow1-%d-rows.parquet", benchmarkBowsDirPath, rows), false) 96 | require.NoError(b, err) 97 | 98 | b.Run(fmt.Sprintf("sorted_%d_rows", rows), func(b *testing.B) { 99 | for n := 0; n < b.N; n++ { 100 | data.IsColSorted(0) 101 | } 102 | }) 103 | 104 | b.Run(fmt.Sprintf("not_sorted_%d_rows", rows), func(b *testing.B) { 105 | for n := 0; n < b.N; n++ { 106 | data.IsColSorted(1) 107 | } 108 | }) 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /bowbuffer.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | 7 | "github.com/apache/arrow/go/v8/arrow/array" 8 | "github.com/apache/arrow/go/v8/arrow/bitutil" 9 | ) 10 | 11 | // Buffer is a mutable data structure with the purpose of easily building data Series with: 12 | // - Data: slice of data. 13 | // - DataType: type of the data. 14 | // - nullBitmapBytes: slice of bytes representing valid or null values. 15 | type Buffer struct { 16 | Data interface{} 17 | DataType Type 18 | nullBitmapBytes []byte 19 | } 20 | 21 | // NewBuffer returns a new Buffer of size `size` and Type `typ`. 22 | func NewBuffer(size int, typ Type) Buffer { 23 | buf := Buffer{ 24 | DataType: typ, 25 | nullBitmapBytes: make([]byte, bitutil.CeilByte(size)/8), 26 | } 27 | switch typ { 28 | case Int64: 29 | buf.Data = make([]int64, size) 30 | case Float64: 31 | buf.Data = make([]float64, size) 32 | case Boolean: 33 | buf.Data = make([]bool, size) 34 | case String: 35 | buf.Data = make([]string, size) 36 | default: 37 | panic(fmt.Errorf("unsupported type '%s'", typ)) 38 | } 39 | return buf 40 | } 41 | 42 | // Len returns the size of the underlying slice of data in the Buffer. 43 | func (b Buffer) Len() int { 44 | switch b.DataType { 45 | case Int64: 46 | return len(b.Data.([]int64)) 47 | case Float64: 48 | return len(b.Data.([]float64)) 49 | case Boolean: 50 | return len(b.Data.([]bool)) 51 | case String: 52 | return len(b.Data.([]string)) 53 | default: 54 | panic(fmt.Errorf("unsupported type '%s'", b.DataType)) 55 | } 56 | } 57 | 58 | // SetOrDrop sets the Buffer data at index `i` by attempting to convert `value` to its DataType. 59 | // Sets the value to nil if the conversion failed or if `value` is nil. 60 | func (b *Buffer) SetOrDrop(i int, value interface{}) { 61 | var valid bool 62 | switch b.DataType { 63 | case Int64: 64 | b.Data.([]int64)[i], valid = Int64.Convert(value).(int64) 65 | case Float64: 66 | b.Data.([]float64)[i], valid = Float64.Convert(value).(float64) 67 | case Boolean: 68 | b.Data.([]bool)[i], valid = Boolean.Convert(value).(bool) 69 | case String: 70 | b.Data.([]string)[i], valid = String.Convert(value).(string) 71 | default: 72 | panic(fmt.Errorf("unsupported type '%s'", b.DataType)) 73 | } 74 | 75 | if valid { 76 | bitutil.SetBit(b.nullBitmapBytes, i) 77 | } else { 78 | bitutil.ClearBit(b.nullBitmapBytes, i) 79 | } 80 | } 81 | 82 | // SetOrDropStrict sets the Buffer data at index `i` by attempting a type assertion of `value` to its DataType. 83 | // Sets the value to nil if the assertion failed or if `value` is nil. 84 | func (b *Buffer) SetOrDropStrict(i int, value interface{}) { 85 | var valid bool 86 | switch b.DataType { 87 | case Int64: 88 | b.Data.([]int64)[i], valid = value.(int64) 89 | case Float64: 90 | b.Data.([]float64)[i], valid = value.(float64) 91 | case Boolean: 92 | b.Data.([]bool)[i], valid = value.(bool) 93 | case String: 94 | b.Data.([]string)[i], valid = value.(string) 95 | default: 96 | panic(fmt.Errorf("unsupported type '%s'", b.DataType)) 97 | } 98 | 99 | if valid { 100 | bitutil.SetBit(b.nullBitmapBytes, i) 101 | } else { 102 | bitutil.ClearBit(b.nullBitmapBytes, i) 103 | } 104 | } 105 | 106 | // GetValue gets the value at index `i` from the Buffer 107 | func (b *Buffer) GetValue(i int) interface{} { 108 | if bitutil.BitIsNotSet(b.nullBitmapBytes, i) { 109 | return nil 110 | } 111 | 112 | switch b.DataType { 113 | case Int64: 114 | return b.Data.([]int64)[i] 115 | case Float64: 116 | return b.Data.([]float64)[i] 117 | case Boolean: 118 | return b.Data.([]bool)[i] 119 | case String: 120 | return b.Data.([]string)[i] 121 | default: 122 | panic(fmt.Errorf("unsupported type '%s'", b.DataType)) 123 | } 124 | } 125 | 126 | func (b Buffer) Less(i, j int) bool { 127 | switch b.DataType { 128 | case Int64: 129 | return b.Data.([]int64)[i] < b.Data.([]int64)[j] 130 | case Float64: 131 | return b.Data.([]float64)[i] < b.Data.([]float64)[j] 132 | case String: 133 | return b.Data.([]string)[i] < b.Data.([]string)[j] 134 | case Boolean: 135 | return !b.Data.([]bool)[i] && b.Data.([]bool)[j] 136 | default: 137 | panic(fmt.Errorf("unsupported type '%s'", b.DataType)) 138 | } 139 | } 140 | 141 | func (b *bow) NewBufferFromCol(colIndex int) Buffer { 142 | data := b.Column(colIndex).Data() 143 | res := Buffer{DataType: b.ColumnType(colIndex)} 144 | switch b.ColumnType(colIndex) { 145 | case Int64: 146 | arr := array.NewInt64Data(data) 147 | nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] 148 | nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) 149 | copy(nullBitmapBytesCopy, nullBitmapBytes) 150 | res.Data = int64Values(arr) 151 | res.nullBitmapBytes = nullBitmapBytesCopy 152 | case Float64: 153 | arr := array.NewFloat64Data(data) 154 | nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] 155 | nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) 156 | copy(nullBitmapBytesCopy, nullBitmapBytes) 157 | res.Data = float64Values(arr) 158 | res.nullBitmapBytes = nullBitmapBytesCopy 159 | case Boolean: 160 | arr := array.NewBooleanData(data) 161 | nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] 162 | nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) 163 | copy(nullBitmapBytesCopy, nullBitmapBytes) 164 | res.Data = booleanValues(arr) 165 | res.nullBitmapBytes = nullBitmapBytesCopy 166 | case String: 167 | arr := array.NewStringData(data) 168 | nullBitmapBytes := arr.NullBitmapBytes()[:bitutil.CeilByte(arr.Data().Len())/8] 169 | nullBitmapBytesCopy := make([]byte, len(nullBitmapBytes)) 170 | copy(nullBitmapBytesCopy, nullBitmapBytes) 171 | res.Data = stringValues(arr) 172 | res.nullBitmapBytes = nullBitmapBytesCopy 173 | default: 174 | panic(fmt.Errorf("unsupported type '%s'", b.ColumnType(colIndex))) 175 | } 176 | return res 177 | } 178 | 179 | // NewBufferFromInterfaces returns a new typed Buffer with the data represented as a slice of interface{}, with eventual nil values. 180 | func NewBufferFromInterfaces(typ Type, data []interface{}) (Buffer, error) { 181 | buf := NewBuffer(len(data), typ) 182 | for i, c := range data { 183 | buf.SetOrDrop(i, c) 184 | } 185 | return buf, nil 186 | } 187 | 188 | // IsValid return true if the value at row `rowIndex` is valid. 189 | func (b Buffer) IsValid(rowIndex int) bool { 190 | return bitutil.BitIsSet(b.nullBitmapBytes, rowIndex) 191 | } 192 | 193 | // IsNull return true if the value at row `rowIndex` is nil. 194 | func (b Buffer) IsNull(rowIndex int) bool { 195 | return bitutil.BitIsNotSet(b.nullBitmapBytes, rowIndex) 196 | } 197 | 198 | // IsSorted returns true if the values of the Buffer are sorted in ascending order. 199 | func (b Buffer) IsSorted() bool { return sort.IsSorted(b) } 200 | 201 | // Swap swaps the values of the Buffer at indices i and j. 202 | func (b Buffer) Swap(i, j int) { 203 | v1, v2 := b.GetValue(i), b.GetValue(j) 204 | b.SetOrDropStrict(i, v2) 205 | b.SetOrDropStrict(j, v1) 206 | } 207 | -------------------------------------------------------------------------------- /bowbuffer_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func BenchmarkNewBufferFromInterfaces(b *testing.B) { 11 | for rows := 10; rows <= 100000; rows *= 10 { 12 | cells := make([]interface{}, rows) 13 | for i := range cells { 14 | cells[i] = int64(i) 15 | } 16 | 17 | b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) { 18 | for n := 0; n < b.N; n++ { 19 | _, err := NewBufferFromInterfaces(Int64, cells) 20 | require.NoError(b, err) 21 | } 22 | }) 23 | } 24 | } 25 | 26 | func BenchmarkBuffer_SetOrDrop(b *testing.B) { 27 | buf := NewBuffer(10, Int64) 28 | b.ResetTimer() 29 | for n := 0; n < b.N; n++ { 30 | buf.SetOrDrop(9, int64(3)) 31 | buf.SetOrDrop(9, nil) 32 | } 33 | } 34 | 35 | func BenchmarkBuffer_SetOrStrict(b *testing.B) { 36 | buf := NewBuffer(10, Int64) 37 | b.ResetTimer() 38 | for n := 0; n < b.N; n++ { 39 | buf.SetOrDropStrict(9, int64(3)) 40 | buf.SetOrDropStrict(9, nil) 41 | } 42 | } 43 | 44 | func BenchmarkBuffer_GetValue(b *testing.B) { 45 | buf := NewBuffer(10, Int64) 46 | b.ResetTimer() 47 | for n := 0; n < b.N; n++ { 48 | _ = buf.GetValue(9) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /bowconvert.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "strconv" 7 | ) 8 | 9 | // ToInt64 attempts to convert `input` to int64. 10 | // Return also a false boolean if the conversion failed. 11 | func ToInt64(input interface{}) (output int64, ok bool) { 12 | switch input := input.(type) { 13 | case json.Number: 14 | output, err := input.Int64() 15 | return output, err == nil 16 | case int: 17 | return int64(input), true 18 | case int8: 19 | return int64(input), true 20 | case int16: 21 | return int64(input), true 22 | case int32: 23 | return int64(input), true 24 | case int64: 25 | return input, true 26 | case float32: 27 | return int64(input), true 28 | case float64: 29 | return int64(input), true 30 | case bool: 31 | if input { 32 | return 1, true 33 | } 34 | return 0, true 35 | case string: 36 | output, err := strconv.ParseInt(input, 10, 64) 37 | return output, err == nil 38 | } 39 | return 40 | } 41 | 42 | // ToFloat64 attempts to convert `input` to float64. 43 | // Return also a false boolean if the conversion failed. 44 | func ToFloat64(input interface{}) (output float64, ok bool) { 45 | switch input := input.(type) { 46 | case float64: 47 | return input, true 48 | case json.Number: 49 | output, err := input.Float64() 50 | return output, err == nil 51 | case int: 52 | return float64(input), true 53 | case int8: 54 | return float64(input), true 55 | case int16: 56 | return float64(input), true 57 | case int32: 58 | return float64(input), true 59 | case int64: 60 | return float64(input), true 61 | case float32: 62 | return float64(input), true 63 | case bool: 64 | if input { 65 | return 1., true 66 | } 67 | return 0., true 68 | case string: 69 | output, err := strconv.ParseFloat(input, 64) 70 | return output, err == nil 71 | } 72 | return 73 | } 74 | 75 | // ToBoolean attempts to convert `input` to bool. 76 | // Return also a false boolean if the conversion failed. 77 | // In case of numeric type, returns true if the value is non-zero. 78 | func ToBoolean(input interface{}) (output bool, ok bool) { 79 | switch input := input.(type) { 80 | case bool: 81 | return input, true 82 | case string: 83 | output, err := strconv.ParseBool(input) 84 | return output, err == nil 85 | case json.Number: 86 | output, err := input.Float64() 87 | return output != 0., err != nil 88 | case int: 89 | return input != 0, true 90 | case int8: 91 | return input != 0, true 92 | case int16: 93 | return input != 0, true 94 | case int32: 95 | return input != 0, true 96 | case int64: 97 | return input != 0, true 98 | case float32: 99 | return input != 0., true 100 | case float64: 101 | return input != 0., true 102 | } 103 | return 104 | } 105 | 106 | // ToString attempts to convert `input` to string. 107 | // Return also a false boolean if the conversion failed. 108 | func ToString(input interface{}) (output string, ok bool) { 109 | switch input := input.(type) { 110 | case bool: 111 | if input { 112 | return "true", true 113 | } 114 | return "false", true 115 | case string: 116 | return input, true 117 | case json.Number: 118 | return input.String(), true 119 | case int: 120 | return strconv.Itoa(input), true 121 | case int8: 122 | return strconv.Itoa(int(input)), true 123 | case int16: 124 | return strconv.Itoa(int(input)), true 125 | case int32: 126 | return strconv.Itoa(int(input)), true 127 | case int64: 128 | return strconv.Itoa(int(input)), true 129 | case float32: 130 | return fmt.Sprintf("%f", input), true 131 | case float64: 132 | return fmt.Sprintf("%f", input), true 133 | } 134 | return 135 | } 136 | -------------------------------------------------------------------------------- /bowconvert_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestToBool(t *testing.T) { 11 | var v bool 12 | var ok bool 13 | 14 | v, ok = ToBoolean(true) 15 | require.True(t, ok) 16 | assert.Equal(t, true, v) 17 | 18 | v, ok = ToBoolean(false) 19 | require.True(t, ok) 20 | assert.Equal(t, false, v) 21 | 22 | v, ok = ToBoolean("true") 23 | require.True(t, ok) 24 | assert.Equal(t, true, v) 25 | 26 | v, ok = ToBoolean("True") 27 | require.True(t, ok) 28 | assert.Equal(t, true, v) 29 | 30 | v, ok = ToBoolean("false") 31 | require.True(t, ok) 32 | assert.Equal(t, false, v) 33 | 34 | v, ok = ToBoolean("False") 35 | require.True(t, ok) 36 | assert.Equal(t, false, v) 37 | 38 | v, ok = ToBoolean(1) 39 | require.True(t, v) 40 | require.True(t, ok) 41 | v, ok = ToBoolean(0) 42 | require.False(t, v) 43 | require.True(t, ok) 44 | 45 | v, ok = ToBoolean(1.) 46 | require.True(t, v) 47 | require.True(t, ok) 48 | v, ok = ToBoolean(0.) 49 | require.False(t, v) 50 | require.True(t, ok) 51 | } 52 | 53 | func TestToFloat64(t *testing.T) { 54 | var v float64 55 | var ok bool 56 | 57 | v, ok = ToFloat64(true) 58 | require.True(t, ok) 59 | assert.Equal(t, 1., v) 60 | 61 | v, ok = ToFloat64(false) 62 | require.True(t, ok) 63 | assert.Equal(t, 0., v) 64 | 65 | v, ok = ToFloat64(0.) 66 | require.True(t, ok) 67 | assert.Equal(t, 0., v) 68 | 69 | v, ok = ToFloat64(0) 70 | require.True(t, ok) 71 | assert.Equal(t, 0., v) 72 | 73 | v, ok = ToFloat64("0") 74 | require.True(t, ok) 75 | assert.Equal(t, 0., v) 76 | } 77 | 78 | func TestToInt64(t *testing.T) { 79 | var v int64 80 | var ok bool 81 | 82 | v, ok = ToInt64(true) 83 | require.True(t, ok) 84 | assert.Equal(t, int64(1), v) 85 | 86 | v, ok = ToInt64(false) 87 | require.True(t, ok) 88 | assert.Equal(t, int64(0), v) 89 | 90 | v, ok = ToInt64(0.) 91 | require.True(t, ok) 92 | assert.Equal(t, int64(0), v) 93 | 94 | v, ok = ToInt64(0) 95 | require.True(t, ok) 96 | assert.Equal(t, int64(0), v) 97 | 98 | v, ok = ToInt64("0") 99 | require.True(t, ok) 100 | assert.Equal(t, int64(0), v) 101 | } 102 | 103 | func TestToString(t *testing.T) { 104 | var v string 105 | var ok bool 106 | 107 | v, ok = ToString(true) 108 | require.True(t, ok) 109 | assert.Equal(t, "true", v) 110 | 111 | v, ok = ToString(false) 112 | require.True(t, ok) 113 | assert.Equal(t, "false", v) 114 | 115 | v, ok = ToString(0.) 116 | require.True(t, ok) 117 | assert.Equal(t, "0.000000", v) 118 | 119 | v, ok = ToString(0) 120 | require.True(t, ok) 121 | assert.Equal(t, "0", v) 122 | 123 | v, ok = ToString("0") 124 | require.True(t, ok) 125 | assert.Equal(t, "0", v) 126 | } 127 | -------------------------------------------------------------------------------- /bowdiff.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | ) 7 | 8 | // Diff calculates the first discrete difference of each row compared with the previous row. 9 | // If any of the current or the previous row is nil, the result will be nil. 10 | // For boolean columns, XOR operation is used. 11 | // TODO: directly mutate bow && only read currVal at each iteration for performance improvement 12 | func (b *bow) Diff(colIndices ...int) (Bow, error) { 13 | selectedCols, err := selectCols(b, colIndices) 14 | if err != nil { 15 | return nil, err 16 | } 17 | 18 | for colIndex, col := range b.Schema().Fields() { 19 | switch b.ColumnType(colIndex) { 20 | case Int64: 21 | case Float64: 22 | case Boolean: 23 | default: 24 | return nil, fmt.Errorf( 25 | "column '%s' is of unsupported type '%s'", 26 | col.Name, b.ColumnType(colIndex)) 27 | } 28 | } 29 | 30 | var wg sync.WaitGroup 31 | calcSeries := make([]Series, b.NumCols()) 32 | for colIndex, col := range b.Schema().Fields() { 33 | if !selectedCols[colIndex] { 34 | calcSeries[colIndex] = b.NewSeriesFromCol(colIndex) 35 | continue 36 | } 37 | 38 | wg.Add(1) 39 | go func(colIndex int, colName string) { 40 | defer wg.Done() 41 | colType := b.ColumnType(colIndex) 42 | colBuf := b.NewBufferFromCol(colIndex) 43 | calcBuf := NewBuffer(b.NumRows(), colType) 44 | for rowIndex := 1; rowIndex < b.NumRows(); rowIndex++ { 45 | valid := b.Column(colIndex).IsValid(rowIndex) && 46 | b.Column(colIndex).IsValid(rowIndex-1) 47 | if !valid { 48 | continue 49 | } 50 | switch colType { 51 | case Int64: 52 | currVal := colBuf.GetValue(rowIndex).(int64) 53 | prevVal := colBuf.GetValue(rowIndex - 1).(int64) 54 | calcBuf.SetOrDrop(rowIndex, currVal-prevVal) 55 | case Float64: 56 | currVal := colBuf.GetValue(rowIndex).(float64) 57 | prevVal := colBuf.GetValue(rowIndex - 1).(float64) 58 | calcBuf.SetOrDrop(rowIndex, currVal-prevVal) 59 | case Boolean: 60 | currVal := colBuf.GetValue(rowIndex).(bool) 61 | prevVal := colBuf.GetValue(rowIndex - 1).(bool) 62 | calcBuf.SetOrDrop(rowIndex, currVal != prevVal) 63 | } 64 | } 65 | 66 | calcSeries[colIndex] = NewSeriesFromBuffer(colName, calcBuf) 67 | 68 | }(colIndex, col.Name) 69 | } 70 | wg.Wait() 71 | 72 | return NewBowWithMetadata(b.Metadata(), calcSeries...) 73 | } 74 | -------------------------------------------------------------------------------- /bowdiff_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestDiff(t *testing.T) { 11 | t.Run("all columns all supported types with nils and metadata", func(t *testing.T) { 12 | b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 13 | NewSeries("a", Int64, 14 | []int64{1, 2, 3, 4, 0, 5}, 15 | []bool{true, true, true, true, false, true}), 16 | NewSeries("b", Float64, 17 | []float64{1., 2., 3., 4., 0., 5.}, 18 | []bool{true, true, true, true, false, true}), 19 | NewSeries("c", Boolean, 20 | []bool{false, false, true, true, false, false}, 21 | []bool{true, true, true, true, false, true}), 22 | ) 23 | require.NoError(t, err) 24 | 25 | expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 26 | NewSeries("a", Int64, 27 | []int64{0, 1, 1, 1, 0, 0}, 28 | []bool{false, true, true, true, false, false}), 29 | NewSeries("b", Float64, 30 | []float64{0., 1., 1., 1., 0., 0.}, 31 | []bool{false, true, true, true, false, false}), 32 | NewSeries("c", Boolean, 33 | []bool{false, false, true, false, false, false}, 34 | []bool{false, true, true, true, false, false}), 35 | ) 36 | require.NoError(t, err) 37 | 38 | calc, err := b.Diff() 39 | assert.NoError(t, err) 40 | assert.EqualValues(t, expected.String(), calc.String()) 41 | }) 42 | 43 | t.Run("one column all supported types", func(t *testing.T) { 44 | b, err := NewBowFromRowBasedInterfaces( 45 | []string{"a", "b", "c"}, 46 | []Type{Int64, Float64, Boolean}, 47 | [][]interface{}{ 48 | {1, 1., false}, 49 | {2, 2., false}, 50 | {3, 3., true}, 51 | }) 52 | require.NoError(t, err) 53 | 54 | expected, err := NewBowFromRowBasedInterfaces( 55 | []string{"a", "b", "c"}, 56 | []Type{Int64, Float64, Boolean}, 57 | [][]interface{}{ 58 | {1, nil, false}, 59 | {2, 1., false}, 60 | {3, 1., true}, 61 | }) 62 | require.NoError(t, err) 63 | calc, err := b.Diff(1) 64 | assert.NoError(t, err) 65 | assert.EqualValues(t, expected.String(), calc.String()) 66 | }) 67 | 68 | t.Run("unsupported type string", func(t *testing.T) { 69 | b, err := NewBowFromRowBasedInterfaces([]string{"a"}, []Type{String}, [][]interface{}{}) 70 | require.NoError(t, err) 71 | 72 | calc, err := b.Diff() 73 | assert.Error(t, err) 74 | assert.Nil(t, calc) 75 | }) 76 | 77 | t.Run("empty", func(t *testing.T) { 78 | b, err := NewBowFromRowBasedInterfaces([]string{"a"}, []Type{Int64}, [][]interface{}{}) 79 | require.NoError(t, err) 80 | 81 | calc, err := b.Diff() 82 | assert.NoError(t, err) 83 | assert.EqualValues(t, b.String(), calc.String()) 84 | }) 85 | 86 | t.Run("missing column", func(t *testing.T) { 87 | b, err := NewBowFromRowBasedInterfaces([]string{"a"}, []Type{Int64}, [][]interface{}{}) 88 | require.NoError(t, err) 89 | 90 | calc, err := b.Diff(1) 91 | assert.Error(t, err) 92 | assert.Nil(t, calc) 93 | }) 94 | } 95 | -------------------------------------------------------------------------------- /bowfind.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | // Find returns the index of the row where `value` is found in the `colIndex` column. 4 | // Returns -1 if the value is not found. 5 | func (b *bow) Find(colIndex int, value interface{}) int { 6 | return b.FindNext(colIndex, 0, value) 7 | } 8 | 9 | // FindNext returns the index of the row where `value` is found in the `colIndex` column, starting from the `rowIndex` row. 10 | // Returns -1 if the value is not found. 11 | func (b *bow) FindNext(colIndex, rowIndex int, value interface{}) int { 12 | if value == nil { 13 | for i := 0; i < b.NumRows(); i++ { 14 | if !b.Column(colIndex).IsValid(i) { 15 | return i 16 | } 17 | } 18 | return -1 19 | } 20 | 21 | for i := rowIndex; i < b.NumRows(); i++ { 22 | if value == b.GetValue(colIndex, i) { 23 | return i 24 | } 25 | } 26 | return -1 27 | } 28 | 29 | // Contains returns whether `value` is found in `colIndex` columns. 30 | func (b *bow) Contains(colIndex int, value interface{}) bool { 31 | return b.Find(colIndex, value) != -1 32 | } 33 | -------------------------------------------------------------------------------- /bowfind_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | var sortedTestBow, _ = NewBow( 10 | NewSeries(Int64.String(), Int64, 11 | []int64{0, 1, 0, 0}, 12 | []bool{true, true, false, true}), 13 | NewSeries(Float64.String(), Float64, 14 | []float64{0., 1., 0., 0.}, 15 | []bool{true, true, false, true}), 16 | NewSeries(String.String(), String, 17 | []string{"0", "1", "0", "0"}, 18 | []bool{true, true, false, true}), 19 | NewSeries(Boolean.String(), Boolean, 20 | []bool{false, true, false, false}, 21 | []bool{true, true, false, true}), 22 | ) 23 | 24 | func TestBow_Find(t *testing.T) { 25 | type toto int 26 | for i := 0; i < sortedTestBow.NumCols(); i++ { 27 | t.Run(sortedTestBow.ColumnName(i), func(t *testing.T) { 28 | v := sortedTestBow.GetValue(i, 0) 29 | assert.Equal(t, 0, sortedTestBow.Find(i, v)) 30 | assert.Equal(t, 2, sortedTestBow.Find(i, nil)) 31 | assert.Equal(t, -1, sortedTestBow.Find(i, toto(0))) 32 | assert.False(t, sortedTestBow.Contains(i, toto(0))) 33 | assert.True(t, sortedTestBow.Contains(i, v)) 34 | assert.Equal(t, 3, sortedTestBow.FindNext(i, 1, v)) 35 | 36 | empty := sortedTestBow.NewEmptySlice() 37 | assert.Equal(t, -1, empty.Find(i, v)) 38 | assert.Equal(t, -1, empty.Find(i, nil)) 39 | assert.Equal(t, -1, empty.Find(i, toto(0))) 40 | assert.False(t, empty.Contains(i, v)) 41 | assert.Equal(t, -1, empty.FindNext(i, 1, v)) 42 | }) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /bowgenerator.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | crand "crypto/rand" 5 | "fmt" 6 | "math/big" 7 | 8 | "github.com/google/uuid" 9 | ) 10 | 11 | const genDefaultNumRows = 3 12 | 13 | // GenSeriesOptions are options to generate random Series: 14 | // - NumRows: number of rows of the resulting Series 15 | // - Name: name of the Series 16 | // - Type: data type of the Series 17 | // - GenStrategy: strategy of data generation 18 | // - MissingData: sets whether the Series includes random nil values 19 | type GenSeriesOptions struct { 20 | NumRows int 21 | Name string 22 | Type Type 23 | GenStrategy GenStrategy 24 | MissingData bool 25 | } 26 | 27 | // NewGenBow generates a new random Bow with `numRows` rows and eventual GenSeriesOptions. 28 | func NewGenBow(numRows int, options ...GenSeriesOptions) (Bow, error) { 29 | series := make([]Series, len(options)) 30 | nameMap := make(map[string]struct{}) 31 | for i, o := range options { 32 | o.NumRows = numRows 33 | o.validate() 34 | if _, ok := nameMap[o.Name]; ok { 35 | o.Name = fmt.Sprintf("%s_%d", o.Name, i) 36 | } 37 | nameMap[o.Name] = struct{}{} 38 | series[i] = o.genSeries() 39 | } 40 | 41 | return NewBow(series...) 42 | } 43 | 44 | // NewGenSeries returns a new randomly generated Series. 45 | func NewGenSeries(o GenSeriesOptions) Series { 46 | o.validate() 47 | return o.genSeries() 48 | } 49 | 50 | func (o *GenSeriesOptions) validate() { 51 | if o.NumRows <= 0 { 52 | o.NumRows = genDefaultNumRows 53 | } 54 | if o.Name == "" { 55 | o.Name = "default" 56 | } 57 | if o.Type == Unknown { 58 | o.Type = Int64 59 | } 60 | if o.GenStrategy == nil { 61 | o.GenStrategy = GenStrategyIncremental 62 | } 63 | } 64 | 65 | func (o *GenSeriesOptions) genSeries() Series { 66 | buf := NewBuffer(o.NumRows, o.Type) 67 | for rowIndex := 0; rowIndex < o.NumRows; rowIndex++ { 68 | if !o.MissingData || 69 | // 20% of nils values 70 | (newRandomNumber(Int64).(int64) > 2) { 71 | buf.SetOrDrop(rowIndex, o.GenStrategy(o.Type, rowIndex)) 72 | } 73 | } 74 | 75 | return NewSeriesFromBuffer(o.Name, buf) 76 | } 77 | 78 | // GenStrategy defines how random values are generated. 79 | type GenStrategy func(typ Type, seed int) interface{} 80 | 81 | // GenStrategyRandom generates a random number of type `typ`. 82 | func GenStrategyRandom(typ Type, seed int) interface{} { 83 | return newRandomNumber(typ) 84 | } 85 | 86 | // GenStrategyIncremental generates a number of type `typ` equal to the converted `seed` value. 87 | func GenStrategyIncremental(typ Type, seed int) interface{} { 88 | return typ.Convert(seed) 89 | } 90 | 91 | // GenStrategyDecremental generates a number of type `typ` equal to the opposite of the converted `seed` value. 92 | func GenStrategyDecremental(typ Type, seed int) interface{} { 93 | return typ.Convert(-seed) 94 | } 95 | 96 | // GenStrategyRandomIncremental generates a random number of type `typ` by using the `seed` value. 97 | func GenStrategyRandomIncremental(typ Type, seed int) interface{} { 98 | i := int64(seed) * 10 99 | switch typ { 100 | case Float64: 101 | add, _ := ToFloat64(newRandomNumber(Float64)) 102 | return float64(i) + add 103 | default: 104 | add, _ := ToInt64(newRandomNumber(Int64)) 105 | return typ.Convert(i + add) 106 | } 107 | } 108 | 109 | // GenStrategyRandomDecremental generates a random number of type `typ` by using the `seed` value. 110 | func GenStrategyRandomDecremental(typ Type, seed int) interface{} { 111 | i := -int64(seed) * 10 112 | switch typ { 113 | default: 114 | add, _ := ToInt64(newRandomNumber(Int64)) 115 | return typ.Convert(i - add) 116 | } 117 | } 118 | 119 | func newRandomNumber(typ Type) interface{} { 120 | n, err := crand.Int(crand.Reader, big.NewInt(10)) 121 | if err != nil { 122 | panic(err) 123 | } 124 | switch typ { 125 | case Int64: 126 | return n.Int64() 127 | case Float64: 128 | return float64(n.Int64()) + 0.5 129 | case Boolean: 130 | return n.Int64() > 5 131 | case String: 132 | return uuid.New().String()[:8] 133 | default: 134 | panic("unsupported data type") 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /bowgenerator_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestGenerator(t *testing.T) { 11 | t.Run("default", func(t *testing.T) { 12 | b, err := NewGenBow(0, GenSeriesOptions{}) 13 | assert.NoError(t, err) 14 | assert.Equal(t, genDefaultNumRows, b.NumRows()) 15 | assert.Equal(t, 1, b.NumCols()) 16 | assert.Equal(t, Int64, b.ColumnType(0)) 17 | 18 | b2, err := b.DropNils() 19 | assert.NoError(t, err) 20 | assert.Equal(t, b, b2) 21 | assert.True(t, b2.Equal(b), fmt.Sprintf("want %v\ngot %v", b, b2)) 22 | }) 23 | 24 | t.Run("with missing data", func(t *testing.T) { 25 | b, err := NewGenBow(100, GenSeriesOptions{MissingData: true}) 26 | assert.NoError(t, err) 27 | 28 | b2, err := b.DropNils() 29 | assert.NoError(t, err) 30 | assert.Less(t, b2.NumRows(), b.NumRows()) 31 | }) 32 | 33 | t.Run("float64 with all columns sorted", func(t *testing.T) { 34 | b, err := NewGenBow(8, 35 | GenSeriesOptions{}, 36 | GenSeriesOptions{Type: Float64}, 37 | ) 38 | assert.NoError(t, err) 39 | 40 | assert.Equal(t, 8, b.NumRows()) 41 | assert.Equal(t, 2, b.NumCols()) 42 | assert.Equal(t, Int64, b.ColumnType(0)) 43 | assert.Equal(t, Float64, b.ColumnType(1)) 44 | assert.True(t, b.IsColSorted(0)) 45 | }) 46 | 47 | t.Run("descending sort on last column", func(t *testing.T) { 48 | b, err := NewGenBow(3, 49 | GenSeriesOptions{GenStrategy: GenStrategyIncremental}, 50 | GenSeriesOptions{GenStrategy: GenStrategyDecremental}, 51 | ) 52 | assert.NoError(t, err) 53 | assert.True(t, b.IsColSorted(0)) 54 | assert.True(t, b.IsColSorted(1)) 55 | }) 56 | 57 | t.Run("custom names and types", func(t *testing.T) { 58 | b, err := NewGenBow(4, 59 | GenSeriesOptions{Name: "A", Type: Int64}, 60 | GenSeriesOptions{Name: "B", Type: Float64}, 61 | GenSeriesOptions{Name: "C", Type: String}, 62 | GenSeriesOptions{Name: "D", Type: Boolean}, 63 | ) 64 | assert.NoError(t, err) 65 | 66 | assert.Equal(t, "A", b.ColumnName(0)) 67 | assert.Equal(t, "B", b.ColumnName(1)) 68 | assert.Equal(t, "C", b.ColumnName(2)) 69 | assert.Equal(t, "D", b.ColumnName(3)) 70 | 71 | assert.Equal(t, Int64, b.ColumnType(0)) 72 | assert.Equal(t, Float64, b.ColumnType(1)) 73 | assert.Equal(t, String, b.ColumnType(2)) 74 | assert.Equal(t, Boolean, b.ColumnType(3)) 75 | }) 76 | } 77 | -------------------------------------------------------------------------------- /bowgetters_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestBow_GetValue(t *testing.T) { 11 | colNames := []string{"time", "value", "meta"} 12 | colTypes := []Type{Int64, Float64, String} 13 | colData := [][]interface{}{ 14 | {1, 2, 3}, 15 | {1.1, 2.2, 3.3}, 16 | {"", "test", "3.3"}, 17 | } 18 | 19 | b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colData) 20 | require.NoError(t, err) 21 | 22 | assert.Equal(t, 3.3, b.GetValue(1, 2)) 23 | assert.Equal(t, map[string]interface{}{ 24 | "time": int64(2), 25 | "value": 2.2, 26 | "meta": "test", 27 | }, b.GetRow(1)) 28 | 29 | res, ok := b.GetFloat64(2, 2) 30 | assert.True(t, ok) 31 | assert.Equal(t, 3.3, res) 32 | } 33 | 34 | func TestBow_Distinct(t *testing.T) { 35 | colNames := []string{"time", "value", "meta"} 36 | colTypes := []Type{Int64, Float64, String} 37 | colData := [][]interface{}{ 38 | {1, 1, 2, nil, 3}, 39 | {1.1, 1.1, 2.2, nil, 3.3}, 40 | {"", "test", "test", nil, "3.3"}, 41 | } 42 | 43 | b, err := NewBowFromColBasedInterfaces(colNames, colTypes, colData) 44 | require.NoError(t, err) 45 | 46 | t.Run(Int64.String(), func(t *testing.T) { 47 | res := b.Distinct(0) 48 | expect, err := NewBow(NewSeries("time", Int64, []int64{1, 2, 3}, nil)) 49 | require.NoError(t, err) 50 | 51 | ExpectEqual(t, expect, res) 52 | }) 53 | 54 | t.Run(Float64.String(), func(t *testing.T) { 55 | res := b.Distinct(1) 56 | expect, err := NewBow(NewSeries("value", Float64, []float64{1.1, 2.2, 3.3}, nil)) 57 | require.NoError(t, err) 58 | 59 | ExpectEqual(t, expect, res) 60 | }) 61 | 62 | t.Run(String.String(), func(t *testing.T) { 63 | res := b.Distinct(2) 64 | expect, err := NewBow(NewSeries("meta", String, []string{"", "3.3", "test"}, nil)) 65 | require.NoError(t, err) 66 | 67 | ExpectEqual(t, expect, res) 68 | }) 69 | } 70 | -------------------------------------------------------------------------------- /bowjson.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | ) 7 | 8 | type jsonField struct { 9 | Name string `json:"name"` 10 | Type string `json:"type"` 11 | } 12 | 13 | type JSONSchema struct { 14 | Fields []jsonField `json:"fields"` 15 | } 16 | 17 | // JSONBow is a structure representing a Bow for JSON marshaling purpose. 18 | type JSONBow struct { 19 | Schema JSONSchema `json:"schema"` 20 | RowBasedData []map[string]interface{} `json:"data"` 21 | } 22 | 23 | // MarshalJSON returns the marshal encoding of the bow. 24 | func (b bow) MarshalJSON() ([]byte, error) { 25 | return json.Marshal(NewJSONBow(&b)) 26 | } 27 | 28 | // NewJSONBow returns a new JSONBow structure from a Bow. 29 | func NewJSONBow(b Bow) JSONBow { 30 | if b == nil { 31 | return JSONBow{} 32 | } 33 | 34 | res := JSONBow{ 35 | RowBasedData: make([]map[string]interface{}, 0, b.NumRows()), 36 | } 37 | 38 | for _, col := range b.Schema().Fields() { 39 | res.Schema.Fields = append( 40 | res.Schema.Fields, 41 | jsonField{ 42 | Name: col.Name, 43 | Type: col.Type.Name(), 44 | }) 45 | } 46 | 47 | for row := range b.GetRowsChan() { 48 | if len(row) == 0 { 49 | continue 50 | } 51 | res.RowBasedData = append(res.RowBasedData, row) 52 | } 53 | 54 | return res 55 | } 56 | 57 | // UnmarshalJSON parses the JSON-encoded data and stores the result in the bow. 58 | func (b *bow) UnmarshalJSON(data []byte) error { 59 | jsonB := JSONBow{} 60 | if err := json.Unmarshal(data, &jsonB); err != nil { 61 | return fmt.Errorf("json.Unmarshal: %w", err) 62 | } 63 | 64 | if err := b.NewValuesFromJSON(jsonB); err != nil { 65 | return fmt.Errorf("bow.NewValuesFromJSON: %w", err) 66 | } 67 | 68 | return nil 69 | 70 | } 71 | 72 | // NewValuesFromJSON replaces the bow arrow.Record by a new one represented by the JSONBow structure. 73 | func (b *bow) NewValuesFromJSON(jsonB JSONBow) error { 74 | if len(jsonB.Schema.Fields) == 0 { 75 | b.Record = NewBowEmpty().(*bow).Record 76 | return nil 77 | } 78 | 79 | /* 80 | Convert back json_table data types to bow data types 81 | From pandas / io / json / _table_schema.py / as_json_table_type(x: DtypeObj) -> str: 82 | This table shows the relationship between NumPy / pandas dtypes, 83 | and Table Schema dtypes. 84 | ============== ================= 85 | Pandas type Table Schema type 86 | ============== ================= 87 | int64 integer 88 | float64 number 89 | bool boolean 90 | datetime64[ns] datetime 91 | timedelta64[ns] duration 92 | object str 93 | categorical any 94 | =============== ================= 95 | */ 96 | 97 | for fieldIndex, field := range jsonB.Schema.Fields { 98 | if _, ok := mapArrowNameToBowTypes[field.Type]; ok { 99 | continue 100 | } 101 | switch field.Type { 102 | case "integer": 103 | jsonB.Schema.Fields[fieldIndex].Type = "int64" 104 | case "number": 105 | jsonB.Schema.Fields[fieldIndex].Type = "float64" 106 | case "boolean": 107 | jsonB.Schema.Fields[fieldIndex].Type = "bool" 108 | } 109 | } 110 | 111 | series := make([]Series, len(jsonB.Schema.Fields)) 112 | 113 | if jsonB.RowBasedData == nil { 114 | for fieldIndex, field := range jsonB.Schema.Fields { 115 | typ := getBowTypeFromArrowName(field.Type) 116 | buf := NewBuffer(0, typ) 117 | series[fieldIndex] = NewSeriesFromBuffer(field.Name, buf) 118 | } 119 | 120 | tmpBow, err := NewBow(series...) 121 | if err != nil { 122 | return err 123 | } 124 | 125 | b.Record = tmpBow.(*bow).Record 126 | return nil 127 | } 128 | 129 | for fieldIndex, field := range jsonB.Schema.Fields { 130 | typ := getBowTypeFromArrowName(field.Type) 131 | buf := NewBuffer(len(jsonB.RowBasedData), typ) 132 | for rowIndex, row := range jsonB.RowBasedData { 133 | buf.SetOrDrop(rowIndex, row[field.Name]) 134 | } 135 | 136 | series[fieldIndex] = NewSeriesFromBuffer(field.Name, buf) 137 | } 138 | 139 | tmpBow, err := NewBow(series...) 140 | if err != nil { 141 | return err 142 | } 143 | 144 | b.Record = tmpBow.(*bow).Record 145 | return nil 146 | } 147 | -------------------------------------------------------------------------------- /bowjson_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/assert" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestJSON(t *testing.T) { 13 | t.Run("MarshalJSON", func(t *testing.T) { 14 | t.Run("empty", func(t *testing.T) { 15 | b := NewBowEmpty() 16 | 17 | byteB, err := json.Marshal(b) 18 | require.NoError(t, err) 19 | 20 | jsonB := JSONBow{} 21 | err = json.Unmarshal(byteB, &jsonB) 22 | require.NoError(t, err) 23 | 24 | expected := JSONBow{ 25 | Schema: JSONSchema{}, 26 | RowBasedData: []map[string]interface{}{}, 27 | } 28 | assert.Equal(t, expected, jsonB) 29 | }) 30 | 31 | t.Run("simple", func(t *testing.T) { 32 | b, err := NewBowFromRowBasedInterfaces( 33 | []string{"a", "b", "c"}, 34 | []Type{Int64, Float64, Boolean}, 35 | [][]interface{}{ 36 | {100, 200., false}, 37 | {110, 220., true}, 38 | {111, 222., false}, 39 | }) 40 | require.NoError(t, err) 41 | 42 | byteB, err := json.Marshal(b) 43 | require.NoError(t, err) 44 | 45 | jsonB := JSONBow{} 46 | err = json.Unmarshal(byteB, &jsonB) 47 | require.NoError(t, err) 48 | 49 | expected := JSONBow{ 50 | Schema: JSONSchema{ 51 | Fields: []jsonField{ 52 | {Name: "a", Type: "int64"}, 53 | {Name: "b", Type: "float64"}, 54 | {Name: "c", Type: "bool"}, 55 | }, 56 | }, 57 | RowBasedData: []map[string]interface{}{ 58 | {"a": 100., "b": 200., "c": false}, 59 | {"a": 110., "b": 220., "c": true}, 60 | {"a": 111., "b": 222., "c": false}, 61 | }, 62 | } 63 | assert.Equal(t, expected, jsonB) 64 | }) 65 | }) 66 | 67 | t.Run("UnmarshalJSON", func(t *testing.T) { 68 | t.Run("empty", func(t *testing.T) { 69 | b := NewBowEmpty() 70 | 71 | byteB, err := json.Marshal(b) 72 | require.NoError(t, err) 73 | 74 | bCopy := b 75 | err = json.Unmarshal(byteB, bCopy) 76 | require.NoError(t, err) 77 | 78 | assert.True(t, b.Equal(bCopy), 79 | fmt.Sprintf("have:\n%vexpect:\n%v", bCopy, b)) 80 | }) 81 | 82 | t.Run("simple", func(t *testing.T) { 83 | b, err := NewBowFromRowBasedInterfaces( 84 | []string{"a", "b", "c"}, 85 | []Type{Int64, Float64, Boolean}, 86 | [][]interface{}{ 87 | {100, 200., false}, 88 | {110, 220., true}, 89 | {111, 222., false}, 90 | }) 91 | require.NoError(t, err) 92 | 93 | byteB, err := json.Marshal(b) 94 | require.NoError(t, err) 95 | 96 | bCopy := b 97 | err = json.Unmarshal(byteB, bCopy) 98 | require.NoError(t, err) 99 | 100 | assert.True(t, b.Equal(bCopy), 101 | fmt.Sprintf("have:\n%vexpect:\n%v", bCopy, b)) 102 | }) 103 | 104 | t.Run("simple no data", func(t *testing.T) { 105 | b, err := NewBowFromRowBasedInterfaces( 106 | []string{"a", "b", "c"}, 107 | []Type{Int64, Float64, Boolean}, 108 | [][]interface{}{}) 109 | require.NoError(t, err) 110 | 111 | byteB, err := json.Marshal(b) 112 | require.NoError(t, err) 113 | 114 | bCopy := b 115 | err = json.Unmarshal(byteB, bCopy) 116 | require.NoError(t, err) 117 | 118 | assert.True(t, b.Equal(bCopy), 119 | fmt.Sprintf("have:\n%vexpect:\n%v", bCopy, b)) 120 | }) 121 | }) 122 | } 123 | 124 | func BenchmarkBow_JSON(b *testing.B) { 125 | for rows := 10; rows <= 100000; rows *= 10 { 126 | data, err := NewBowFromParquet(fmt.Sprintf( 127 | "%sbow1-%d-rows.parquet", benchmarkBowsDirPath, rows), false) 128 | require.NoError(b, err) 129 | 130 | var j []byte 131 | b.Run(fmt.Sprintf("Marshal/%d_rows", rows), func(b *testing.B) { 132 | for n := 0; n < b.N; n++ { 133 | j, err = data.MarshalJSON() 134 | require.NoError(b, err) 135 | } 136 | }) 137 | 138 | b.Run(fmt.Sprintf("Unmarshal/%d_rows", rows), func(b *testing.B) { 139 | for n := 0; n < b.N; n++ { 140 | require.NoError(b, NewBowEmpty().UnmarshalJSON(j)) 141 | } 142 | }) 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /bowmetadata.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/apache/arrow/go/v8/arrow" 7 | "github.com/apache/arrow/go/v8/arrow/array" 8 | ) 9 | 10 | // Metadata is wrapping arrow.Metadata. 11 | type Metadata struct { 12 | arrow.Metadata 13 | } 14 | 15 | // NewMetadata returns a new Metadata. 16 | func NewMetadata(keys, values []string) Metadata { 17 | return Metadata{arrow.NewMetadata(keys, values)} 18 | } 19 | 20 | // NewBowWithMetadata returns a new Bow from Metadata and Series. 21 | func NewBowWithMetadata(metadata Metadata, series ...Series) (Bow, error) { 22 | rec, err := newRecord(metadata, series...) 23 | if err != nil { 24 | return nil, fmt.Errorf("newRecord: %w", err) 25 | } 26 | 27 | return &bow{Record: rec}, nil 28 | } 29 | 30 | // Metadata return a copy of the bow Schema Metadata. 31 | func (b *bow) Metadata() Metadata { 32 | return NewMetadata( 33 | b.Schema().Metadata().Keys(), 34 | b.Schema().Metadata().Values()) 35 | } 36 | 37 | // SetMetadata sets a value for a given key and return a Bow with freshly created Metadata. 38 | func (b *bow) SetMetadata(key, value string) Bow { 39 | m := b.Metadata() 40 | m = m.Set(key, value) 41 | return &bow{Record: array.NewRecord( 42 | arrow.NewSchema(b.Schema().Fields(), &m.Metadata), 43 | b.Columns(), 44 | b.Record.NumRows())} 45 | } 46 | 47 | // WithMetadata replaces the bow original Metadata. 48 | func (b *bow) WithMetadata(metadata Metadata) Bow { 49 | m := arrow.NewMetadata(metadata.Keys(), metadata.Values()) 50 | return &bow{Record: array.NewRecord( 51 | arrow.NewSchema(b.Schema().Fields(), &m), 52 | b.Columns(), 53 | b.Record.NumRows())} 54 | } 55 | 56 | // Set returns a new Metadata with the key/value pair set. 57 | // If the key already exists, it replaces its value. 58 | func (m *Metadata) Set(newKey, newValue string) Metadata { 59 | keys := m.Keys() 60 | values := m.Values() 61 | keyIndex := m.FindKey(newKey) 62 | 63 | if keyIndex == -1 { 64 | keys = append(keys, newKey) 65 | values = append(values, newValue) 66 | } else { 67 | values[keyIndex] = newValue 68 | } 69 | 70 | return Metadata{arrow.NewMetadata(keys, values)} 71 | } 72 | 73 | // SetMany returns a new Metadata with the key/value pairs set. 74 | // If a key already exists, it replaces its value. 75 | func (m *Metadata) SetMany(newKeys, newValues []string) Metadata { 76 | if len(newKeys) != len(newValues) { 77 | panic("metadata len mismatch") 78 | } 79 | if len(newKeys) == 0 { 80 | return *m 81 | } 82 | 83 | keys := m.Keys() 84 | values := m.Values() 85 | 86 | for i, newKey := range newKeys { 87 | newKeyIndex := m.FindKey(newKey) 88 | if newKeyIndex == -1 { 89 | keys = append(keys, newKey) 90 | values = append(values, newValues[i]) 91 | } else { 92 | values[newKeyIndex] = newValues[i] 93 | } 94 | } 95 | 96 | return Metadata{arrow.NewMetadata(keys, values)} 97 | } 98 | -------------------------------------------------------------------------------- /bowmetadata_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestBow_WithMetadata(t *testing.T) { 10 | t.Run("adding meta should not modify bow, but correctly change schema", func(t *testing.T) { 11 | metadata := NewMetadata([]string{"testKey"}, []string{"testValue"}) 12 | b, _ := NewBow(NewSeries("test", Int64, []int64{1, 2}, nil)) 13 | 14 | res := b.WithMetadata(metadata) 15 | assert.True(t, res.Metadata().Equal(metadata.Metadata), 16 | "expected %q have %q", metadata.String(), b.Metadata().String()) 17 | assert.Equal(t, 0, b.Metadata().Len()) 18 | assert.Equal(t, 1, res.Metadata().Len()) 19 | }) 20 | } 21 | 22 | func TestMetadataSetMany(t *testing.T) { 23 | t.Run("single set on existing key", func(t *testing.T) { 24 | metadata := NewMetadata([]string{"testKey"}, []string{"testValue"}) 25 | expected := NewMetadata([]string{"testKey"}, []string{"updatedValue"}) 26 | 27 | res := metadata.SetMany([]string{"testKey"}, []string{"updatedValue"}) 28 | assert.Equal(t, expected, res, "expected %q have %q", expected.String(), res.String()) 29 | }) 30 | 31 | t.Run("single set on new key", func(t *testing.T) { 32 | metadata := NewMetadata([]string{"testKey1"}, []string{"testValue1"}) 33 | expected := NewMetadata([]string{"testKey1", "testKey2"}, []string{"testValue1", "testValue2"}) 34 | 35 | res := metadata.SetMany([]string{"testKey2"}, []string{"testValue2"}) 36 | assert.Equal(t, expected, res, "expected %q have %q", expected.String(), res.String()) 37 | }) 38 | 39 | t.Run("set many", func(t *testing.T) { 40 | metadata := NewMetadata( 41 | []string{"testKey1", "testKey2", "testKey3"}, 42 | []string{"testValue1", "testValue2", "testValue3"}) 43 | 44 | expectedKeys := []string{"testKey1", "testKey2", "testKey3", "testKey4", "testKey5", "testKey6"} 45 | expectedValues := []string{"testValue1", "updatedValue2", "testValue3", "testValue4", "testValue5", "testValue6"} 46 | expected := NewMetadata(expectedKeys, expectedValues) 47 | 48 | res := metadata.SetMany( 49 | []string{"testKey2", "testKey4", "testKey5", "testKey6"}, 50 | []string{"updatedValue2", "testValue4", "testValue5", "testValue6"}) 51 | assert.Equal(t, expected, res, "expected %q have %q", expected.String(), res.String()) 52 | }) 53 | } 54 | 55 | func TestMetadataSet(t *testing.T) { 56 | t.Run("single set on existing key", func(t *testing.T) { 57 | metadata := NewMetadata([]string{"testKey"}, []string{"testValue"}) 58 | expected := NewMetadata([]string{"testKey"}, []string{"updatedValue"}) 59 | 60 | res := metadata.Set("testKey", "updatedValue") 61 | assert.Equal(t, expected, res, "expected %q have %q", expected.String(), res.String()) 62 | }) 63 | 64 | t.Run("single set on new key", func(t *testing.T) { 65 | metadata := NewMetadata([]string{"testKey1"}, []string{"testValue1"}) 66 | expected := NewMetadata([]string{"testKey1", "testKey2"}, []string{"testValue1", "testValue2"}) 67 | 68 | res := metadata.Set("testKey2", "testValue2") 69 | assert.Equal(t, expected, res, "expected %q have %q", expected.String(), res.String()) 70 | }) 71 | } 72 | -------------------------------------------------------------------------------- /bowparquet_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "os" 7 | "testing" 8 | "time" 9 | 10 | "github.com/apache/arrow/go/v8/arrow" 11 | "github.com/stretchr/testify/assert" 12 | "github.com/stretchr/testify/require" 13 | "github.com/xitongsys/parquet-go/parquet" 14 | ) 15 | 16 | const ( 17 | testInputFileName = "bowparquet_test_input.parquet" 18 | testOutputFileName = "/tmp/bowparquet_test_output" 19 | ) 20 | 21 | func TestParquet(t *testing.T) { 22 | t.Run("read/write input file", func(t *testing.T) { 23 | bBefore, err := NewBowFromParquet(testInputFileName, false) 24 | assert.NoError(t, err) 25 | 26 | assert.NoError(t, bBefore.WriteParquet(testOutputFileName, false)) 27 | 28 | bAfter, err := NewBowFromParquet(testOutputFileName+".parquet", false) 29 | assert.NoError(t, err) 30 | 31 | assert.Equal(t, bBefore.String(), bAfter.String()) 32 | 33 | require.NoError(t, os.Remove(testOutputFileName+".parquet")) 34 | }) 35 | 36 | t.Run("all supported types with rows and nil values", func(t *testing.T) { 37 | bBefore, err := NewBowFromRowBasedInterfaces( 38 | []string{"int", "float", "bool", "string"}, 39 | []Type{Int64, Float64, Boolean, String}, 40 | [][]interface{}{ 41 | {1, 1., true, "hi"}, 42 | {2, 2., false, "ho"}, 43 | {nil, nil, nil, nil}, 44 | {3, 3., true, "hu"}, 45 | }) 46 | require.NoError(t, err) 47 | 48 | assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_withrows", false)) 49 | 50 | bAfter, err := NewBowFromParquet(testOutputFileName+"_withrows.parquet", false) 51 | assert.NoError(t, err) 52 | 53 | assert.Equal(t, bBefore.String(), bAfter.String()) 54 | 55 | require.NoError(t, os.Remove(testOutputFileName+"_withrows.parquet")) 56 | }) 57 | 58 | t.Run("bow supported types without rows", func(t *testing.T) { 59 | bBefore, err := NewBowFromRowBasedInterfaces( 60 | []string{"int", "float", "bool", "string"}, 61 | []Type{Int64, Float64, Boolean, String}, 62 | [][]interface{}{}) 63 | require.NoError(t, err) 64 | 65 | assert.NoError(t, bBefore.WriteParquet(testOutputFileName+"_norows", false)) 66 | 67 | bAfter, err := NewBowFromParquet(testOutputFileName+"_norows.parquet", false) 68 | assert.NoError(t, err) 69 | 70 | assert.Equal(t, bBefore.String(), bAfter.String()) 71 | 72 | require.NoError(t, os.Remove(testOutputFileName+"_norows.parquet")) 73 | }) 74 | 75 | t.Run("write empty bow", func(t *testing.T) { 76 | bBefore := NewBowEmpty() 77 | 78 | assert.Errorf(t, 79 | bBefore.WriteParquet(testOutputFileName+"_empty", false), 80 | "bow.WriteParquet: no columns", 81 | ) 82 | }) 83 | 84 | t.Run("bow with context and col_types metadata", func(t *testing.T) { 85 | var series = make([]Series, 2) 86 | series[0] = NewSeries("time", Int64, []int64{0}, []bool{true}) 87 | series[1] = NewSeries(" va\"lue ", Float64, []float64{0.}, []bool{true}) 88 | 89 | var keys, values []string 90 | type Unit struct { 91 | Symbol string `json:"symbol"` 92 | } 93 | type Meta struct { 94 | Unit Unit `json:"unit"` 95 | } 96 | type Context map[string]Meta 97 | 98 | var ctx = Context{ 99 | "time": Meta{Unit{Symbol: "microseconds"}}, 100 | " va\"lue ": Meta{Unit{Symbol: "kWh"}}, 101 | } 102 | 103 | contextJSON, err := json.Marshal(ctx) 104 | require.NoError(t, err) 105 | 106 | keys = append(keys, "context") 107 | values = append(values, string(contextJSON)) 108 | 109 | bBefore, err := NewBowWithMetadata( 110 | newMetaWithParquetTimestampCol(keys, values, "time", time.Microsecond), 111 | series...) 112 | require.NoError(t, err) 113 | 114 | err = bBefore.WriteParquet(testOutputFileName+"_meta", false) 115 | assert.NoError(t, err) 116 | 117 | bAfter, err := NewBowFromParquet(testOutputFileName+"_meta.parquet", false) 118 | assert.NoError(t, err) 119 | 120 | assert.Equal(t, bBefore.String(), bAfter.String()) 121 | 122 | require.NoError(t, os.Remove(testOutputFileName+"_meta.parquet")) 123 | }) 124 | 125 | t.Run("bow with wrong col_types metadata", func(t *testing.T) { 126 | var series = make([]Series, 2) 127 | 128 | series[0] = NewSeries("time", Int64, []int64{0}, []bool{true}) 129 | series[1] = NewSeries("value", Float64, []float64{0.}, []bool{true}) 130 | 131 | var keys, values []string 132 | 133 | bBefore, err := NewBowWithMetadata( 134 | newMetaWithParquetTimestampCol(keys, values, "unknown", time.Microsecond), 135 | series...) 136 | assert.NoError(t, err) 137 | 138 | assert.Error(t, bBefore.WriteParquet(testOutputFileName+"_wrong", false)) 139 | }) 140 | } 141 | 142 | func TestBowGetParquetMetaColTimeUnit(t *testing.T) { 143 | timeCol := "time" 144 | var series = make([]Series, 2) 145 | series[0] = NewSeries(timeCol, Int64, []int64{0}, nil) 146 | series[1] = NewSeries("value", Float64, []float64{0.}, nil) 147 | 148 | t.Run("time.Millisecond", func(t *testing.T) { 149 | b, err := NewBowWithMetadata( 150 | newMetaWithParquetTimestampCol([]string{}, []string{}, timeCol, time.Millisecond), 151 | series...) 152 | require.NoError(t, err) 153 | 154 | got, err := b.GetParquetMetaColTimeUnit(0) 155 | require.NoError(t, err) 156 | assert.Equal(t, time.Millisecond, got) 157 | }) 158 | 159 | t.Run("time.Microsecond", func(t *testing.T) { 160 | b, err := NewBowWithMetadata( 161 | newMetaWithParquetTimestampCol([]string{}, []string{}, timeCol, time.Microsecond), 162 | series...) 163 | require.NoError(t, err) 164 | 165 | got, err := b.GetParquetMetaColTimeUnit(0) 166 | require.NoError(t, err) 167 | assert.Equal(t, time.Microsecond, got) 168 | }) 169 | 170 | t.Run("time.Nanosecond", func(t *testing.T) { 171 | b, err := NewBowWithMetadata( 172 | newMetaWithParquetTimestampCol([]string{}, []string{}, timeCol, time.Nanosecond), 173 | series...) 174 | require.NoError(t, err) 175 | 176 | got, err := b.GetParquetMetaColTimeUnit(0) 177 | require.NoError(t, err) 178 | assert.Equal(t, time.Nanosecond, got) 179 | }) 180 | 181 | t.Run("column without timestamp metadata", func(t *testing.T) { 182 | b, err := NewBowWithMetadata( 183 | newMetaWithParquetTimestampCol([]string{}, []string{}, timeCol, time.Nanosecond), 184 | series...) 185 | require.NoError(t, err) 186 | 187 | got, err := b.GetParquetMetaColTimeUnit(1) 188 | require.ErrorIs(t, err, ErrColTimeUnitNotFound) 189 | require.Equal(t, time.Duration(0), got) 190 | }) 191 | 192 | t.Run("column out of range", func(t *testing.T) { 193 | b, err := NewBowWithMetadata( 194 | newMetaWithParquetTimestampCol([]string{}, []string{}, timeCol, time.Nanosecond), 195 | series...) 196 | require.NoError(t, err) 197 | 198 | assert.Panics(t, func() { 199 | _, _ = b.GetParquetMetaColTimeUnit(42) 200 | }) 201 | }) 202 | } 203 | 204 | func newMetaWithParquetTimestampCol(keys, values []string, colName string, timeUnit time.Duration) Metadata { 205 | var colTypes = make([]parquetColTypesMeta, 1) 206 | 207 | unit := parquet.TimeUnit{} 208 | switch timeUnit { 209 | case time.Millisecond: 210 | unit.MILLIS = &parquet.MilliSeconds{} 211 | case time.Microsecond: 212 | unit.MICROS = &parquet.MicroSeconds{} 213 | case time.Nanosecond: 214 | unit.NANOS = &parquet.NanoSeconds{} 215 | default: 216 | panic(fmt.Errorf("unsupported time unit '%s'", timeUnit)) 217 | } 218 | 219 | logicalType := parquet.LogicalType{ 220 | TIMESTAMP: &parquet.TimestampType{ 221 | IsAdjustedToUTC: true, 222 | Unit: &unit, 223 | }} 224 | colTypes[0] = parquetColTypesMeta{ 225 | Name: colName, 226 | LogicalType: &logicalType, 227 | } 228 | 229 | colTypesJSON, err := json.Marshal(colTypes) 230 | if err != nil { 231 | panic(err) 232 | } 233 | 234 | keys = append(keys, keyParquetColTypesMeta) 235 | values = append(values, string(colTypesJSON)) 236 | 237 | return Metadata{arrow.NewMetadata(keys, values)} 238 | } 239 | -------------------------------------------------------------------------------- /bowparquet_test_input.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Metronlab/bow/e1e1bd5c179e1541de46061796bb13736caadac2/bowparquet_test_input.parquet -------------------------------------------------------------------------------- /bowrecord.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | 7 | "github.com/apache/arrow/go/v8/arrow" 8 | "github.com/apache/arrow/go/v8/arrow/array" 9 | ) 10 | 11 | func newRecord(metadata Metadata, series ...Series) (arrow.Record, error) { 12 | var fields []arrow.Field 13 | var arrays []arrow.Array 14 | var nRows int64 15 | 16 | if len(series) != 0 && series[0].Array != nil { 17 | nRows = int64(series[0].Array.Len()) 18 | } 19 | 20 | for _, s := range series { 21 | if s.Array == nil { 22 | return nil, errors.New("empty Series") 23 | } 24 | if s.Name == "" { 25 | return nil, errors.New("empty Series name") 26 | } 27 | if getBowTypeFromArrowFingerprint(s.Array.DataType().Fingerprint()) == Unknown { 28 | return nil, fmt.Errorf("unsupported type '%s'", s.Array.DataType()) 29 | } 30 | if int64(s.Array.Len()) != nRows { 31 | return nil, 32 | fmt.Errorf( 33 | "bow.Series '%s' has a length of %d, which is different from the previous ones", 34 | s.Name, s.Array.Len()) 35 | } 36 | fields = append(fields, arrow.Field{ 37 | Name: s.Name, 38 | Type: s.Array.DataType(), 39 | Nullable: true, 40 | }) 41 | arrays = append(arrays, s.Array) 42 | } 43 | 44 | return array.NewRecord( 45 | arrow.NewSchema(fields, &metadata.Metadata), 46 | arrays, nRows), nil 47 | } 48 | -------------------------------------------------------------------------------- /bowseries_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestNewSeriesFromInterfaces(t *testing.T) { 12 | for _, typ := range allType { 13 | t.Run(typ.String(), func(t *testing.T) { 14 | testcase := []interface{}{typ.Convert(0), nil} 15 | res, err := NewBow(NewSeriesFromInterfaces(typ.String(), typ, testcase)) 16 | require.NoError(t, err) 17 | assert.Equal(t, typ.Convert(0), res.GetValue(0, 0)) 18 | assert.Equal(t, nil, res.GetValue(0, 1)) 19 | }) 20 | } 21 | } 22 | 23 | func BenchmarkNewSeries(b *testing.B) { 24 | for rows := 10; rows <= 100000; rows *= 10 { 25 | dataArray := make([]int64, rows) 26 | validArray := make([]bool, rows) 27 | for i := range dataArray { 28 | dataArray[i] = int64(i) 29 | validArray[i] = i%2 == 0 30 | } 31 | 32 | b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) { 33 | for n := 0; n < b.N; n++ { 34 | NewSeries("test", Int64, dataArray, validArray) 35 | } 36 | }) 37 | } 38 | } 39 | 40 | func BenchmarkNewSeriesFromInterfaces(b *testing.B) { 41 | for rows := 10; rows <= 100000; rows *= 10 { 42 | cells := make([]interface{}, rows) 43 | for i := range cells { 44 | cells[i] = int64(i) 45 | } 46 | 47 | b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) { 48 | for n := 0; n < b.N; n++ { 49 | NewSeriesFromInterfaces("test", Int64, cells) 50 | } 51 | }) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /bowsetters.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | // RenameCol returns a new Bow with the column `colIndex` renamed. 8 | func (b *bow) RenameCol(colIndex int, newName string) (Bow, error) { 9 | if colIndex >= b.NumCols() { 10 | return nil, fmt.Errorf("column index out of bound") 11 | } 12 | 13 | if newName == "" { 14 | return nil, fmt.Errorf("newName cannot be empty") 15 | } 16 | 17 | series := make([]Series, b.NumCols()) 18 | for i, col := range b.Columns() { 19 | if i == colIndex { 20 | series[i] = Series{ 21 | Name: newName, 22 | Array: col, 23 | } 24 | } else { 25 | series[i] = b.NewSeriesFromCol(i) 26 | } 27 | } 28 | 29 | return NewBowWithMetadata(b.Metadata(), series...) 30 | } 31 | 32 | // Apply uses the given function to transform the values of column `colIndex`. 33 | // Its expected return type has to be supported otherwise given results will be stored as nil values. 34 | func (b *bow) Apply(colIndex int, returnType Type, fn func(interface{}) interface{}) (Bow, error) { 35 | buf := NewBuffer(b.NumRows(), returnType) 36 | for i := 0; i < b.NumRows(); i++ { 37 | buf.SetOrDropStrict(i, fn(b.GetValue(colIndex, i))) 38 | } 39 | 40 | series := make([]Series, b.NumCols()) 41 | for i := range b.Columns() { 42 | if i == colIndex { 43 | series[i] = NewSeriesFromBuffer(b.ColumnName(colIndex), buf) 44 | } else { 45 | series[i] = b.NewSeriesFromCol(i) 46 | } 47 | } 48 | 49 | return NewBowWithMetadata(b.Metadata(), series...) 50 | } 51 | 52 | // Convert transforms a column type into another, 53 | // if default behavior is not the one expected, you can use Apply with any implementation needed 54 | func (b *bow) Convert(colIndex int, t Type) (Bow, error) { 55 | return b.Apply(colIndex, t, t.Convert) 56 | } 57 | 58 | // RowCmp implementation is required for Filter 59 | // passing full dataset multidimensional comparators implementations, cross column for instance 60 | // index argument is the current row to compare 61 | type RowCmp func(b Bow, i int) bool 62 | 63 | // Filter only preserves the rows where all given comparators return true 64 | // Filter with no argument return the original bow without copy, but it's not recommended, 65 | // If all filters result in concomitant rows, Filter is as optimal as Slicing in terms of copying 66 | func (b *bow) Filter(fns ...RowCmp) Bow { 67 | var indices []int 68 | for i := 0; i < b.NumRows(); i++ { 69 | if matchRowCmps(b, i, fns...) { 70 | indices = append(indices, i) 71 | } 72 | } 73 | 74 | if len(indices) == 0 { 75 | return b.NewEmptySlice() 76 | } 77 | 78 | // If all indices are concomitant, slicing is more performent than copying 79 | lastInclusive := indices[len(indices)-1] + 1 80 | if len(indices) == lastInclusive-indices[0] { 81 | return b.NewSlice(indices[0], lastInclusive) 82 | } 83 | 84 | filteredSeries := make([]Series, b.NumCols()) 85 | for colIndex := 0; colIndex < b.NumCols(); colIndex++ { 86 | buf := NewBuffer(len(indices), b.ColumnType(colIndex)) 87 | for i, j := range indices { 88 | buf.SetOrDropStrict(i, b.GetValue(colIndex, j)) 89 | } 90 | filteredSeries[colIndex] = NewSeriesFromBuffer(b.ColumnName(colIndex), buf) 91 | } 92 | 93 | res, err := NewBowWithMetadata(b.Metadata(), filteredSeries...) 94 | if err != nil { 95 | panic(err) 96 | } 97 | 98 | return res 99 | } 100 | 101 | func matchRowCmps(b Bow, i int, fns ...RowCmp) bool { 102 | for _, fn := range fns { 103 | if !fn(b, i) { 104 | return false 105 | } 106 | } 107 | 108 | return true 109 | } 110 | 111 | // MakeFilterValues prepares a valid comparator for Filter, it is lazy on given type. 112 | // Be careful about number to string though, for instance 0.1 give "0.100000", which could be unexpected 113 | // If value is of the wrong type and not convertible to column type, comparison will be done on null values! 114 | func (b *bow) MakeFilterValues(colIndex int, values ...interface{}) RowCmp { 115 | for i := range values { 116 | values[i] = b.ColumnType(colIndex).Convert(values[i]) 117 | } 118 | 119 | return func(b Bow, i int) bool { 120 | return contains(values, b.GetValue(colIndex, i)) 121 | } 122 | } 123 | 124 | func contains(values []interface{}, value interface{}) bool { 125 | for _, val := range values { 126 | if val == value { 127 | return true 128 | } 129 | } 130 | 131 | return false 132 | } 133 | -------------------------------------------------------------------------------- /bowsetters_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestBow_SetColName(t *testing.T) { 11 | b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 12 | NewSeries("oldName", Float64, []float64{0.1, 0.2}, nil), 13 | ) 14 | require.NoError(t, err) 15 | 16 | expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 17 | NewSeries("newName", Float64, []float64{0.1, 0.2}, nil), 18 | ) 19 | require.NoError(t, err) 20 | 21 | t.Run("valid", func(t *testing.T) { 22 | res, err := b.RenameCol(0, "newName") 23 | require.NoError(t, err) 24 | assert.EqualValues(t, expected.String(), res.String()) 25 | }) 26 | 27 | t.Run("invalid colIndex", func(t *testing.T) { 28 | _, err = b.RenameCol(1, "newName") 29 | require.Error(t, err) 30 | }) 31 | 32 | t.Run("invalid newName", func(t *testing.T) { 33 | _, err = b.RenameCol(0, "") 34 | require.Error(t, err) 35 | }) 36 | } 37 | 38 | func TestBow_Apply(t *testing.T) { 39 | b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 40 | NewSeries("unchanged", Float64, []float64{0.1, 0.2}, nil), 41 | NewSeries("apply", Float64, []float64{0.1, 0.2}, nil), 42 | ) 43 | require.NoError(t, err) 44 | 45 | expect, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 46 | NewSeries("unchanged", Float64, []float64{0.1, 0.2}, nil), 47 | NewSeries("apply", String, []string{"0.100000", "0.200000"}, nil), 48 | ) 49 | require.NoError(t, err) 50 | 51 | res, err := b.Apply(1, String, String.Convert) 52 | require.NoError(t, err) 53 | ExpectEqual(t, expect, res) 54 | } 55 | 56 | func TestBow_Filter(t *testing.T) { 57 | b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 58 | NewSeries("string", String, []string{"0.1", "0.2"}, nil), 59 | NewSeries("float", Float64, []float64{0.1, 0.2}, nil), 60 | ) 61 | require.NoError(t, err) 62 | 63 | t.Run("empty filter", func(t *testing.T) { 64 | res := b.Filter() 65 | ExpectEqual(t, b, res) 66 | }) 67 | 68 | t.Run("empty result", func(t *testing.T) { 69 | res := b.Filter(b.MakeFilterValues(0, "not found")) 70 | ExpectEqual(t, b.NewEmptySlice(), res) 71 | }) 72 | 73 | t.Run("match one comparator", func(t *testing.T) { 74 | res := b.Filter(b.MakeFilterValues(0, "0.1")) 75 | ExpectEqual(t, b.NewSlice(0, 1), res) 76 | }) 77 | 78 | t.Run("match two", func(t *testing.T) { 79 | res := b.Filter( 80 | b.MakeFilterValues(0, "0.1"), 81 | b.MakeFilterValues(1, 0.1), 82 | ) 83 | ExpectEqual(t, b.NewSlice(0, 1), res) 84 | }) 85 | 86 | t.Run("match half", func(t *testing.T) { 87 | res := b.Filter( 88 | b.MakeFilterValues(0, "0.1"), 89 | b.MakeFilterValues(1, 0.2), 90 | ) 91 | ExpectEqual(t, b.NewEmptySlice(), res) 92 | }) 93 | 94 | t.Run("match all", func(t *testing.T) { 95 | res := b.Filter( 96 | b.MakeFilterValues(0, "0.1", "0.2"), 97 | b.MakeFilterValues(1, 0.1, 0.2), 98 | ) 99 | ExpectEqual(t, b, res) 100 | }) 101 | 102 | t.Run("match all lazy", func(t *testing.T) { 103 | res := b.Filter( 104 | b.MakeFilterValues(0, "0.1", "0.2"), 105 | b.MakeFilterValues(1, "0.1", "0.2"), 106 | ) 107 | ExpectEqual(t, b, res) 108 | }) 109 | 110 | t.Run("not convertible", func(t *testing.T) { 111 | res := b.Filter( 112 | b.MakeFilterValues(1, "not a number"), 113 | ) 114 | ExpectEqual(t, b.NewEmptySlice(), res) 115 | }) 116 | 117 | t.Run("match non concomitant", func(t *testing.T) { 118 | b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 119 | NewSeries("string", String, []string{"0.1", "0.2", "0.3"}, nil), 120 | NewSeries("float", Float64, []float64{0.1, 0.2, 0.3}, nil), 121 | ) 122 | require.NoError(t, err) 123 | expect, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 124 | NewSeries("string", String, []string{"0.1", "0.3"}, nil), 125 | NewSeries("float", Float64, []float64{0.1, 0.3}, nil), 126 | ) 127 | require.NoError(t, err) 128 | 129 | res := b.Filter( 130 | b.MakeFilterValues(0, "0.1", "0.3"), 131 | ) 132 | ExpectEqual(t, expect, res) 133 | }) 134 | 135 | } 136 | -------------------------------------------------------------------------------- /bowsort.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | ) 7 | 8 | // SortByCol returns a new Bow with the rows sorted by a column in ascending order. 9 | // Returns the same Bow if the column is already sorted. 10 | func (b *bow) SortByCol(colIndex int) (Bow, error) { 11 | if b.Column(colIndex).NullN() != 0 { 12 | return nil, fmt.Errorf( 13 | "column to sort by has %d nil values", 14 | b.Column(colIndex).NullN()) 15 | } 16 | 17 | sortableBuf := newBufferWithIndices(b.NewBufferFromCol(colIndex)) 18 | // Stop if sort by column is already sorted 19 | if sortableBuf.IsSorted() { 20 | return b, nil 21 | } 22 | 23 | // Sort the column by ascending values 24 | sort.Sort(sortableBuf) 25 | 26 | // Fill the sort by column with sorted values 27 | sortedSeries := make([]Series, b.NumCols()) 28 | for i := 0; i < b.NumCols(); i++ { 29 | if i == colIndex { 30 | sortedSeries[i] = NewSeriesFromBuffer(b.ColumnName(i), sortableBuf.Buffer) 31 | continue 32 | } 33 | buf := NewBuffer(b.NumRows(), b.ColumnType(i)) 34 | for j, index := range sortableBuf.indices { 35 | buf.SetOrDropStrict(j, b.GetValue(i, index)) 36 | } 37 | sortedSeries[i] = NewSeriesFromBuffer(b.ColumnName(i), buf) 38 | } 39 | 40 | return NewBowWithMetadata(b.Metadata(), sortedSeries...) 41 | } 42 | 43 | // bufferWithIndices implements the methods of sort.Interface, sorting in ascending order. 44 | type bufferWithIndices struct { 45 | Buffer 46 | indices []int 47 | } 48 | 49 | func newBufferWithIndices(buf Buffer) bufferWithIndices { 50 | indices := make([]int, buf.Len()) 51 | for i := 0; i < buf.Len(); i++ { 52 | indices[i] = i 53 | } 54 | return bufferWithIndices{Buffer: buf, indices: indices} 55 | } 56 | 57 | func (p bufferWithIndices) Swap(i, j int) { 58 | p.Buffer.Swap(i, j) 59 | p.indices[i], p.indices[j] = p.indices[j], p.indices[i] 60 | } 61 | -------------------------------------------------------------------------------- /bowsort_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestBow_SortByCol(t *testing.T) { 12 | t.Run("sorted", func(t *testing.T) { 13 | b, err := NewBowFromRowBasedInterfaces( 14 | []string{"time", "a", "b"}, 15 | []Type{Int64, Float64, Float64}, 16 | [][]interface{}{ 17 | {10, 2.4, 3.1}, 18 | {11, 2.8, 5.9}, 19 | {12, 2.9, 7.5}, 20 | {13, 3.9, 13.4}, 21 | }) 22 | require.NoError(t, err) 23 | 24 | sorted, err := b.SortByCol(0) 25 | assert.Nil(t, err) 26 | assert.EqualValues(t, b.String(), sorted.String()) 27 | }) 28 | 29 | t.Run("unsorted with all types", func(t *testing.T) { 30 | b, err := NewBowFromRowBasedInterfaces( 31 | []string{"time", "i", "f", "b", "s"}, 32 | []Type{Int64, Int64, Float64, Boolean, String}, 33 | [][]interface{}{ 34 | {10, 2, 3.1, true, "ho"}, 35 | {11, 2, 5.9, false, "la"}, 36 | {13, 3, 13.4, true, "tal"}, 37 | {12, 2, 7.5, false, "que"}, 38 | }) 39 | require.NoError(t, err) 40 | expected, err := NewBowFromRowBasedInterfaces( 41 | []string{"time", "i", "f", "b", "s"}, 42 | []Type{Int64, Int64, Float64, Boolean, String}, 43 | [][]interface{}{ 44 | {10, 2, 3.1, true, "ho"}, 45 | {11, 2, 5.9, false, "la"}, 46 | {12, 2, 7.5, false, "que"}, 47 | {13, 3, 13.4, true, "tal"}, 48 | }) 49 | require.NoError(t, err) 50 | sorted, err := b.SortByCol(0) 51 | assert.NoError(t, err) 52 | assert.EqualValues(t, expected.String(), sorted.String()) 53 | }) 54 | 55 | t.Run("unsorted with different cols", func(t *testing.T) { 56 | b, err := NewBowFromRowBasedInterfaces( 57 | []string{"a", "b", "time"}, 58 | []Type{Float64, Float64, Int64}, 59 | [][]interface{}{ 60 | {2.4, 3.1, 10}, 61 | {2.8, 5.9, 11}, 62 | {3.9, 13.4, 13}, 63 | {2.9, 7.5, 12}, 64 | }) 65 | require.NoError(t, err) 66 | expected, err := NewBowFromRowBasedInterfaces( 67 | []string{"a", "b", "time"}, 68 | []Type{Float64, Float64, Int64}, 69 | [][]interface{}{ 70 | {2.4, 3.1, 10}, 71 | {2.8, 5.9, 11}, 72 | {2.9, 7.5, 12}, 73 | {3.9, 13.4, 13}, 74 | }) 75 | require.NoError(t, err) 76 | sorted, err := b.SortByCol(2) 77 | assert.Nil(t, err) 78 | assert.EqualValues(t, expected.String(), sorted.String()) 79 | }) 80 | 81 | t.Run("unsorted with nil values and all types", func(t *testing.T) { 82 | b, err := NewBowFromRowBasedInterfaces( 83 | []string{"time", "int", "float", "string", "bool"}, 84 | []Type{Int64, Int64, Float64, String, Boolean}, 85 | [][]interface{}{ 86 | {10, 5, nil, "bonjour", true}, 87 | {11, 2, 56., "comment", false}, 88 | {13, nil, 13.4, "allez", nil}, 89 | {12, -1, nil, nil, false}, 90 | }) 91 | require.NoError(t, err) 92 | expected, err := NewBowFromRowBasedInterfaces( 93 | []string{"time", "int", "float", "string", "bool"}, 94 | []Type{Int64, Int64, Float64, String, Boolean}, 95 | [][]interface{}{ 96 | {10, 5, nil, "bonjour", true}, 97 | {11, 2, 56., "comment", false}, 98 | {12, -1, nil, nil, false}, 99 | {13, nil, 13.4, "allez", nil}, 100 | }) 101 | require.NoError(t, err) 102 | sorted, err := b.SortByCol(0) 103 | assert.Nil(t, err) 104 | assert.EqualValues(t, expected.String(), sorted.String()) 105 | }) 106 | 107 | t.Run("sorted in desc order", func(t *testing.T) { 108 | b, err := NewBowFromRowBasedInterfaces( 109 | []string{"time", "a", "b"}, 110 | []Type{Int64, Float64, Float64}, 111 | [][]interface{}{ 112 | {13, 3.9, 13.4}, 113 | {12, 2.9, 7.5}, 114 | {11, 2.8, 5.9}, 115 | {10, 2.4, 3.1}, 116 | }) 117 | require.NoError(t, err) 118 | expected, err := NewBowFromRowBasedInterfaces( 119 | []string{"time", "a", "b"}, 120 | []Type{Int64, Float64, Float64}, 121 | [][]interface{}{ 122 | {10, 2.4, 3.1}, 123 | {11, 2.8, 5.9}, 124 | {12, 2.9, 7.5}, 125 | {13, 3.9, 13.4}, 126 | }) 127 | require.NoError(t, err) 128 | sorted, err := b.SortByCol(0) 129 | assert.Nil(t, err) 130 | assert.EqualValues(t, expected.String(), sorted.String()) 131 | }) 132 | 133 | t.Run("duplicate values in sort by column", func(t *testing.T) { 134 | b, err := NewBowFromRowBasedInterfaces( 135 | []string{"time", "a", "b"}, 136 | []Type{Int64, Float64, Float64}, 137 | [][]interface{}{ 138 | {13, 3.9, 13.4}, 139 | {12, 2.9, 7.5}, 140 | {12, 2.8, 5.9}, 141 | {10, 2.4, 3.1}, 142 | }) 143 | require.NoError(t, err) 144 | expected, err := NewBowFromRowBasedInterfaces( 145 | []string{"time", "a", "b"}, 146 | []Type{Int64, Float64, Float64}, 147 | [][]interface{}{ 148 | {10, 2.4, 3.1}, 149 | {12, 2.9, 7.5}, 150 | {12, 2.8, 5.9}, 151 | {13, 3.9, 13.4}, 152 | }) 153 | require.NoError(t, err) 154 | sorted, err := b.SortByCol(0) 155 | assert.Nil(t, err) 156 | assert.EqualValues(t, expected.String(), sorted.String()) 157 | }) 158 | 159 | t.Run("empty bow", func(t *testing.T) { 160 | b, err := NewBowFromRowBasedInterfaces( 161 | []string{"time", "a"}, 162 | []Type{Int64, Float64}, 163 | [][]interface{}{}) 164 | require.NoError(t, err) 165 | expected := b 166 | sorted, err := b.SortByCol(0) 167 | assert.Nil(t, err) 168 | assert.EqualValues(t, expected.String(), sorted.String()) 169 | }) 170 | 171 | t.Run("with metadata", func(t *testing.T) { 172 | b, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 173 | NewSeries("time", Int64, []int64{1, 3, 2}, nil), 174 | NewSeries("value", Float64, []float64{.1, .3, .2}, nil), 175 | ) 176 | require.NoError(t, err) 177 | 178 | expected, err := NewBowWithMetadata(NewMetadata([]string{"k"}, []string{"v"}), 179 | NewSeries("time", Int64, []int64{1, 2, 3}, nil), 180 | NewSeries("value", Float64, []float64{.1, .2, .3}, nil), 181 | ) 182 | require.NoError(t, err) 183 | 184 | sorted, err := b.SortByCol(0) 185 | assert.NoError(t, err) 186 | 187 | assert.Equal(t, expected.String(), sorted.String()) 188 | }) 189 | 190 | t.Run("ERR: nil values in sort by column", func(t *testing.T) { 191 | b, err := NewBowFromRowBasedInterfaces( 192 | []string{"time", "a", "b"}, 193 | []Type{Int64, Float64, Float64}, 194 | [][]interface{}{ 195 | {13, 3.9, 13.4}, 196 | {12, 2.9, 7.5}, 197 | {nil, 2.8, 5.9}, 198 | {10, 2.4, 3.1}, 199 | }) 200 | require.NoError(t, err) 201 | _, err = b.SortByCol(0) 202 | assert.Error(t, err) 203 | }) 204 | } 205 | 206 | func BenchmarkBow_SortByCol(b *testing.B) { 207 | for rows := 10; rows <= 100000; rows *= 10 { 208 | data, err := NewBowFromParquet(fmt.Sprintf( 209 | "%sbow1-%d-rows.parquet", benchmarkBowsDirPath, rows), false) 210 | require.NoError(b, err) 211 | 212 | b.Run(fmt.Sprintf("%d_rows", rows), func(b *testing.B) { 213 | for n := 0; n < b.N; n++ { 214 | _, err = data.SortByCol(1) 215 | require.NoError(b, err) 216 | } 217 | }) 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /bowstring.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "text/tabwriter" 7 | ) 8 | 9 | // String returns a formatted representation of the Bow. 10 | func (b *bow) String() string { 11 | if b.NumCols() == 0 { 12 | return "" 13 | } 14 | 15 | w := new(tabwriter.Writer) 16 | writer := new(strings.Builder) 17 | // tabs will be replaced by two spaces by formatter 18 | w.Init(writer, 0, 4, 2, ' ', 0) 19 | 20 | var cells []string 21 | for colIndex := 0; colIndex < b.NumCols(); colIndex++ { 22 | cells = append(cells, fmt.Sprintf( 23 | "%v", fmt.Sprintf( 24 | "%s:%v", b.Schema().Field(colIndex).Name, b.ColumnType(colIndex)))) 25 | } 26 | _, err := fmt.Fprintln(w, strings.Join(cells, "\t")) 27 | if err != nil { 28 | panic(err) 29 | } 30 | 31 | for row := range b.GetRowsChan() { 32 | cells = []string{} 33 | for colIndex := 0; colIndex < b.NumCols(); colIndex++ { 34 | cells = append(cells, fmt.Sprintf("%v", row[b.Schema().Field(colIndex).Name])) 35 | } 36 | if _, err = fmt.Fprintln(w, strings.Join(cells, "\t")); err != nil { 37 | panic(err) 38 | } 39 | } 40 | 41 | if b.Metadata().Len() > 0 { 42 | if _, err = fmt.Fprintf(w, "metadata: %+v\n", b.Metadata()); err != nil { 43 | panic(err) 44 | } 45 | } 46 | 47 | if err = w.Flush(); err != nil { 48 | panic(err) 49 | } 50 | 51 | return writer.String() 52 | } 53 | -------------------------------------------------------------------------------- /bowtypes.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/apache/arrow/go/v8/arrow" 7 | ) 8 | 9 | type Type int 10 | 11 | // How to add a Type: 12 | // - Seek corresponding arrow.DataType and add it in `mapBowToArrowTypes` 13 | // - add a convert function with desired logic and add case in other conversion func 14 | // - add necessary case in buffer file 15 | // - complete GetValue bow method 16 | 17 | const ( 18 | // Unknown is placed first to be the default when allocating Type or []Type. 19 | Unknown = Type(iota) 20 | 21 | // Float64 and following types are native arrow type supported by bow. 22 | Float64 23 | Int64 24 | Boolean 25 | String 26 | 27 | // InputDependent is used in aggregations when the output type is dependent on the input type. 28 | InputDependent 29 | 30 | // IteratorDependent is used in aggregations when the output type is dependent on the iterator type. 31 | IteratorDependent 32 | ) 33 | 34 | var ( 35 | mapBowToArrowTypes = map[Type]arrow.DataType{ 36 | Float64: arrow.PrimitiveTypes.Float64, 37 | Int64: arrow.PrimitiveTypes.Int64, 38 | Boolean: arrow.FixedWidthTypes.Boolean, 39 | String: arrow.BinaryTypes.String, 40 | } 41 | mapArrowNameToBowTypes = func() map[string]Type { 42 | res := make(map[string]Type) 43 | for bowType, arrowDataType := range mapBowToArrowTypes { 44 | res[arrowDataType.Name()] = bowType 45 | } 46 | return res 47 | }() 48 | mapArrowFingerprintToBowTypes = func() map[string]Type { 49 | res := make(map[string]Type) 50 | for bowType, arrowDataType := range mapBowToArrowTypes { 51 | res[arrowDataType.Fingerprint()] = bowType 52 | } 53 | return res 54 | }() 55 | allType = func() []Type { 56 | res := make([]Type, InputDependent-1) 57 | for typ := Type(1); typ < InputDependent; typ++ { 58 | res[typ-1] = typ 59 | } 60 | return res 61 | }() 62 | ) 63 | 64 | func (t Type) Convert(input interface{}) interface{} { 65 | var output interface{} 66 | var ok bool 67 | switch t { 68 | case Float64: 69 | output, ok = ToFloat64(input) 70 | case Int64: 71 | output, ok = ToInt64(input) 72 | case Boolean: 73 | output, ok = ToBoolean(input) 74 | case String: 75 | output, ok = ToString(input) 76 | } 77 | if ok { 78 | return output 79 | } 80 | return nil 81 | } 82 | 83 | // IsSupported ensures that the Type t is currently supported by Bow and matches a convertible concrete type. 84 | func (t Type) IsSupported() bool { 85 | _, ok := mapBowToArrowTypes[t] 86 | return ok 87 | } 88 | 89 | // String returns the string representation of the Type t. 90 | func (t Type) String() string { 91 | at, ok := mapBowToArrowTypes[t] 92 | if !ok { 93 | return "undefined" 94 | } 95 | return fmt.Sprintf("%s", at) 96 | } 97 | 98 | func getBowTypeFromArrowFingerprint(fingerprint string) Type { 99 | typ, ok := mapArrowFingerprintToBowTypes[fingerprint] 100 | if !ok { 101 | return Unknown 102 | } 103 | return typ 104 | } 105 | 106 | func getBowTypeFromArrowName(name string) Type { 107 | typ, ok := mapArrowNameToBowTypes[name] 108 | if !ok { 109 | return Unknown 110 | } 111 | return typ 112 | } 113 | 114 | // GetAllTypes returns all Bow types. 115 | func GetAllTypes() []Type { 116 | res := make([]Type, len(allType)) 117 | copy(res, allType) 118 | return res 119 | } 120 | -------------------------------------------------------------------------------- /bowtypes_test.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestGetAllTypes(t *testing.T) { 10 | cp := GetAllTypes() 11 | cp[0] = 10 12 | assert.NotEqual(t, allType, cp) 13 | } 14 | -------------------------------------------------------------------------------- /bowvalues.go: -------------------------------------------------------------------------------- 1 | package bow 2 | 3 | import "github.com/apache/arrow/go/v8/arrow/array" 4 | 5 | func int64Values(arr *array.Int64) []int64 { 6 | return arr.Int64Values() 7 | } 8 | 9 | func float64Values(arr *array.Float64) []float64 { 10 | return arr.Float64Values() 11 | } 12 | 13 | func booleanValues(arr *array.Boolean) []bool { 14 | var res = make([]bool, arr.Len()) 15 | for i := range res { 16 | res[i] = arr.Value(i) 17 | } 18 | return res 19 | } 20 | 21 | func stringValues(arr *array.String) []string { 22 | var res = make([]string, arr.Len()) 23 | for i := range res { 24 | res[i] = arr.Value(i) 25 | } 26 | return res 27 | } 28 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/metronlab/bow 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect 7 | github.com/apache/arrow/go/v8 v8.0.1 8 | github.com/apache/thrift v0.16.0 // indirect 9 | github.com/davecgh/go-spew v1.1.1 // indirect 10 | github.com/google/uuid v1.3.0 11 | github.com/pmezard/go-difflib v1.0.0 // indirect 12 | github.com/stretchr/testify v1.8.2 13 | github.com/xitongsys/parquet-go v1.6.2 14 | github.com/xitongsys/parquet-go-source v0.0.0-20220723234337-052319f3f36b 15 | golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect 16 | gopkg.in/yaml.v3 v3.0.1 // indirect 17 | ) 18 | 19 | require ( 20 | github.com/andybalholm/brotli v1.0.4 // indirect 21 | github.com/goccy/go-json v0.9.10 // indirect 22 | github.com/golang/snappy v0.0.4 // indirect 23 | github.com/klauspost/asmfmt v1.3.2 // indirect 24 | github.com/klauspost/compress v1.15.9 // indirect 25 | github.com/klauspost/cpuid/v2 v2.1.0 // indirect 26 | github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect 27 | github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect 28 | github.com/pierrec/lz4/v4 v4.1.15 // indirect 29 | github.com/zeebo/xxh3 v1.0.2 // indirect 30 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect 31 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect 32 | golang.org/x/tools v0.1.11 // indirect 33 | ) 34 | -------------------------------------------------------------------------------- /rolling/aggregation.go: -------------------------------------------------------------------------------- 1 | package rolling 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/metronlab/bow" 7 | "github.com/metronlab/bow/rolling/transformation" 8 | ) 9 | 10 | // ColAggregation is a set of methods to aggregate and transform a Window. 11 | type ColAggregation interface { 12 | // InputName returns the name of the input column. 13 | InputName() string 14 | // InputIndex returns the index of the input column. 15 | InputIndex() int 16 | // SetInputIndex sets the index of the input column. 17 | SetInputIndex(int) 18 | 19 | // OutputName returns the name of the output column. 20 | OutputName() string 21 | // RenameOutput returns a copy of the ColAggregation with a new output column name. 22 | RenameOutput(string) ColAggregation 23 | // NeedInclusiveWindow returns true if the ColAggregation needs to have inclusive windows. 24 | NeedInclusiveWindow() bool 25 | 26 | // Type returns the return type of the ColAggregation. 27 | Type() bow.Type 28 | // GetReturnType returns the return type of the ColAggregation depending on an input and an iterator type. 29 | GetReturnType(inputType, iteratorType bow.Type) bow.Type 30 | 31 | // Func returns the ColAggregationFunc of the ColAggregation. 32 | Func() ColAggregationFunc 33 | 34 | // Transformations returns the transformation functions of the ColAggregation. 35 | Transformations() []transformation.Func 36 | // SetTransformations returns a copy of the ColAggregation with new transformations functions. 37 | SetTransformations(...transformation.Func) ColAggregation 38 | } 39 | 40 | type colAggregation struct { 41 | inputName string 42 | inputIndex int 43 | needInclusiveWindow bool 44 | 45 | aggregationFn ColAggregationFunc 46 | transformationFns []transformation.Func 47 | 48 | outputName string 49 | typ bow.Type 50 | } 51 | 52 | // NewColAggregation returns a new ColAggregation. 53 | func NewColAggregation(inputName string, needInclusiveWindow bool, typ bow.Type, fn ColAggregationFunc) ColAggregation { 54 | return &colAggregation{ 55 | inputName: inputName, 56 | inputIndex: -1, 57 | needInclusiveWindow: needInclusiveWindow, 58 | aggregationFn: fn, 59 | typ: typ, 60 | } 61 | } 62 | 63 | type ColAggregationConstruct func(colName string) ColAggregation 64 | type ColAggregationFunc func(colIndex int, w Window) (interface{}, error) 65 | 66 | func (a *colAggregation) InputName() string { 67 | return a.inputName 68 | } 69 | 70 | func (a *colAggregation) InputIndex() int { 71 | return a.inputIndex 72 | } 73 | 74 | func (a *colAggregation) SetInputIndex(i int) { 75 | a.inputIndex = i 76 | } 77 | 78 | func (a *colAggregation) OutputName() string { 79 | return a.outputName 80 | } 81 | 82 | func (a *colAggregation) RenameOutput(name string) ColAggregation { 83 | aCopy := *a 84 | aCopy.outputName = name 85 | return &aCopy 86 | } 87 | 88 | func (a *colAggregation) NeedInclusiveWindow() bool { 89 | return a.needInclusiveWindow 90 | } 91 | 92 | func (a *colAggregation) Type() bow.Type { 93 | return a.typ 94 | } 95 | 96 | func (a *colAggregation) Func() ColAggregationFunc { 97 | return a.aggregationFn 98 | } 99 | 100 | func (a *colAggregation) Transformations() []transformation.Func { 101 | return a.transformationFns 102 | } 103 | 104 | func (a *colAggregation) SetTransformations(transformations ...transformation.Func) ColAggregation { 105 | aCopy := *a 106 | aCopy.transformationFns = transformations 107 | return &aCopy 108 | } 109 | 110 | func (a *colAggregation) GetReturnType(inputType, iteratorType bow.Type) bow.Type { 111 | switch a.Type() { 112 | case bow.Int64, bow.Float64, bow.Boolean, bow.String: 113 | return a.Type() 114 | case bow.InputDependent: 115 | return inputType 116 | case bow.IteratorDependent: 117 | return iteratorType 118 | default: 119 | panic(fmt.Errorf("invalid return type %v", a.Type())) 120 | } 121 | } 122 | 123 | func (r *intervalRolling) Aggregate(aggrs ...ColAggregation) Rolling { 124 | if r.err != nil { 125 | return r 126 | } 127 | 128 | rCopy := *r 129 | newIntervalCol, aggrs, err := rCopy.indexedAggregations(aggrs) 130 | if err != nil { 131 | return rCopy.setError(fmt.Errorf("intervalRolling.indexedAggregations: %w", err)) 132 | } 133 | 134 | b, err := rCopy.aggregateWindows(aggrs) 135 | if err != nil { 136 | return rCopy.setError(fmt.Errorf("intervalRolling.aggregateWindows: %w", err)) 137 | } 138 | 139 | newR, err := newIntervalRolling(b, newIntervalCol, rCopy.interval, rCopy.options) 140 | if err != nil { 141 | return rCopy.setError(fmt.Errorf("newIntervalRolling: %w", err)) 142 | } 143 | 144 | return newR 145 | } 146 | 147 | func (r *intervalRolling) indexedAggregations(aggrs []ColAggregation) (int, []ColAggregation, error) { 148 | if len(aggrs) == 0 { 149 | return -1, nil, fmt.Errorf("at least one column aggregation is required") 150 | } 151 | 152 | newIntervalCol := -1 153 | for i := range aggrs { 154 | isInterval, err := r.validateAggregation(aggrs[i], i) 155 | if err != nil { 156 | return -1, nil, err 157 | } 158 | if isInterval { 159 | newIntervalCol = i 160 | } 161 | } 162 | 163 | if newIntervalCol == -1 { 164 | return -1, nil, fmt.Errorf( 165 | "must keep interval column '%s'", r.bow.ColumnName(r.intervalColIndex)) 166 | } 167 | 168 | return newIntervalCol, aggrs, nil 169 | } 170 | 171 | func (r *intervalRolling) validateAggregation(aggr ColAggregation, newIndex int) (isInterval bool, err error) { 172 | if aggr.InputName() == "" { 173 | return false, fmt.Errorf("aggregation %d has no column name", newIndex) 174 | } 175 | 176 | readIndex, err := r.bow.ColumnIndex(aggr.InputName()) 177 | if err != nil { 178 | return false, err 179 | } 180 | 181 | aggr.SetInputIndex(readIndex) 182 | 183 | if aggr.NeedInclusiveWindow() { 184 | r.options.Inclusive = true 185 | } 186 | 187 | return readIndex == r.intervalColIndex, nil 188 | } 189 | 190 | func (r *intervalRolling) aggregateWindows(aggrs []ColAggregation) (bow.Bow, error) { 191 | series := make([]bow.Series, len(aggrs)) 192 | 193 | for colIndex, aggr := range aggrs { 194 | rCopy := *r 195 | typ := aggr.GetReturnType( 196 | rCopy.bow.ColumnType(aggr.InputIndex()), 197 | rCopy.bow.ColumnType(rCopy.intervalColIndex)) 198 | buf := bow.NewBuffer(rCopy.numWindows, typ) 199 | 200 | for rCopy.HasNext() { 201 | winIndex, w, err := rCopy.Next() 202 | if err != nil { 203 | return nil, err 204 | } 205 | 206 | var val interface{} 207 | if !aggr.NeedInclusiveWindow() && w.IsInclusive { 208 | val, err = aggr.Func()(aggr.InputIndex(), (*w).UnsetInclusive()) 209 | } else { 210 | val, err = aggr.Func()(aggr.InputIndex(), *w) 211 | } 212 | if err != nil { 213 | return nil, err 214 | } 215 | 216 | for _, trans := range aggr.Transformations() { 217 | val, err = trans(val) 218 | if err != nil { 219 | return nil, err 220 | } 221 | } 222 | 223 | if val == nil { 224 | continue 225 | } 226 | 227 | buf.SetOrDrop(winIndex, val) 228 | } 229 | 230 | if aggr.OutputName() == "" { 231 | series[colIndex] = bow.NewSeriesFromBuffer(rCopy.bow.ColumnName(aggr.InputIndex()), buf) 232 | } else { 233 | series[colIndex] = bow.NewSeriesFromBuffer(aggr.OutputName(), buf) 234 | } 235 | } 236 | 237 | return bow.NewBow(series...) 238 | } 239 | -------------------------------------------------------------------------------- /rolling/aggregation/XXXbenchmarks_test.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "testing" 7 | 8 | "github.com/metronlab/bow" 9 | "github.com/metronlab/bow/rolling" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | // BenchSize of 1e8 triggers out of memory on a 16Go mem computer 14 | var BenchSize int64 15 | 16 | func BenchmarkBow(b *testing.B) { 17 | for _, BenchSize = range []int64{1, 1e3, 1e5} { 18 | b.Run(fmt.Sprintf("%d_rows", BenchSize), benchmarkBow) 19 | } 20 | } 21 | 22 | func benchmarkBow(b *testing.B) { 23 | var benchBow bow.Bow 24 | var err error 25 | 26 | rows := make([][]interface{}, BenchSize) 27 | rand.Seed(42) 28 | for i := int64(0); i < BenchSize; i++ { 29 | rows[i] = []interface{}{i, rand.Float64()} 30 | } 31 | 32 | b.Run("NewBowFromRowBasedInterfaces", func(b *testing.B) { 33 | for n := 0; n < b.N; n++ { 34 | benchBow, err = bow.NewBowFromRowBasedInterfaces( 35 | []string{timeCol, valueCol}, 36 | []bow.Type{bow.Int64, bow.Float64}, 37 | rows, 38 | ) 39 | require.NoError(b, err) 40 | } 41 | }) 42 | 43 | columns := make([][]interface{}, 2) 44 | rand.Seed(42) 45 | 46 | columns[0] = func(size int64) []interface{} { 47 | timeCol := make([]interface{}, size) 48 | for i := int64(0); i < size; i++ { 49 | timeCol[i] = i 50 | } 51 | return timeCol 52 | }(BenchSize) 53 | 54 | columns[1] = func(size int64) []interface{} { 55 | valueCol := make([]interface{}, size) 56 | for i := int64(0); i < size; i++ { 57 | valueCol[i] = rand.Float64() 58 | } 59 | return valueCol 60 | }(BenchSize) 61 | 62 | b.Run("NewBowFromColBasedInterfaces", func(b *testing.B) { 63 | for n := 0; n < b.N; n++ { 64 | benchBow, err = bow.NewBowFromColBasedInterfaces( 65 | []string{timeCol, valueCol}, 66 | []bow.Type{bow.Int64, bow.Float64}, 67 | columns, 68 | ) 69 | require.NoError(b, err) 70 | } 71 | }) 72 | 73 | series := make([]bow.Series, 2) 74 | rand.Seed(42) 75 | 76 | series[0] = func(size int64) bow.Series { 77 | buf := bow.NewBuffer(int(size), bow.Int64) 78 | for i := int64(0); i < size; i++ { 79 | buf.SetOrDrop(int(i), i) 80 | } 81 | return bow.NewSeriesFromBuffer(timeCol, buf) 82 | }(BenchSize) 83 | 84 | series[1] = func(size int64) bow.Series { 85 | buf := bow.NewBuffer(int(size), bow.Float64) 86 | for i := int64(0); i < size; i++ { 87 | buf.SetOrDrop(int(i), rand.Float64()) 88 | } 89 | return bow.NewSeriesFromBuffer(valueCol, buf) 90 | }(BenchSize) 91 | 92 | b.Run("NewBow with validity bitmap", func(b *testing.B) { 93 | for n := 0; n < b.N; n++ { 94 | benchBow, err = bow.NewBow(series...) 95 | require.NoError(b, err) 96 | } 97 | }) 98 | 99 | series = make([]bow.Series, 2) 100 | rand.Seed(42) 101 | 102 | series[0] = func(size int64) bow.Series { 103 | buf := bow.NewBuffer(int(size), bow.Int64) 104 | for i := int64(0); i < size; i++ { 105 | buf.Data.([]int64)[i] = i 106 | } 107 | return bow.NewSeries(timeCol, bow.Int64, buf.Data, nil) 108 | }(BenchSize) 109 | 110 | series[1] = func(size int64) bow.Series { 111 | buf := bow.NewBuffer(int(size), bow.Float64) 112 | for i := int64(0); i < size; i++ { 113 | buf.Data.([]float64)[i] = rand.Float64() 114 | } 115 | return bow.NewSeries(valueCol, bow.Float64, buf.Data, nil) 116 | }(BenchSize) 117 | 118 | b.Run("NewBow without validity bitmap", func(b *testing.B) { 119 | for n := 0; n < b.N; n++ { 120 | benchBow, err = bow.NewBow(series...) 121 | require.NoError(b, err) 122 | } 123 | }) 124 | 125 | var r rolling.Rolling 126 | b.Run("rolling.IntervalRolling", func(b *testing.B) { 127 | for n := 0; n < b.N; n++ { 128 | r, err = rolling.IntervalRolling(benchBow, timeCol, 10, rolling.Options{}) 129 | require.NoError(b, err) 130 | } 131 | }) 132 | 133 | b.Run("rolling.Rolling.Aggregate", func(b *testing.B) { 134 | for n := 0; n < b.N; n++ { 135 | _, err = r.Aggregate(WindowStart(timeCol), ArithmeticMean(valueCol)).Bow() 136 | require.NoError(b, err) 137 | } 138 | }) 139 | } 140 | -------------------------------------------------------------------------------- /rolling/aggregation/arithmeticmean.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func ArithmeticMean(col string) rolling.ColAggregation { 9 | return rolling.NewColAggregation(col, false, bow.Float64, 10 | func(col int, w rolling.Window) (interface{}, error) { 11 | if w.Bow.NumRows() == 0 { 12 | return nil, nil 13 | } 14 | 15 | var sum float64 16 | var count int 17 | for i := 0; i < w.Bow.NumRows(); i++ { 18 | value, ok := w.Bow.GetFloat64(col, i) 19 | if !ok { 20 | continue 21 | } 22 | sum += value 23 | count++ 24 | } 25 | if count == 0 { 26 | return nil, nil 27 | } 28 | return sum / float64(count), nil 29 | }) 30 | } 31 | -------------------------------------------------------------------------------- /rolling/aggregation/arithmeticmean_test.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/metronlab/bow" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestArithmeticMean(t *testing.T) { 11 | runTestCases(t, ArithmeticMean, nil, []testCase{ 12 | { 13 | name: "empty", 14 | testedBow: emptyBow, 15 | expectedBow: func() bow.Bow { 16 | b, err := bow.NewBow( 17 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 18 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 19 | ) 20 | assert.NoError(t, err) 21 | return b 22 | }(), 23 | }, 24 | { 25 | name: "sparse", 26 | testedBow: sparseFloatBow, 27 | expectedBow: func() bow.Bow { 28 | b, err := bow.NewBowFromRowBasedInterfaces( 29 | []string{timeCol, valueCol}, 30 | []bow.Type{bow.Int64, bow.Float64}, 31 | [][]interface{}{ 32 | {10, 10.}, 33 | {20, nil}, 34 | {30, nil}, 35 | {40, 10.}, 36 | {50, 15.}, 37 | {60, 15.}, 38 | }) 39 | assert.NoError(t, err) 40 | return b 41 | }(), 42 | }, 43 | { 44 | name: "sparse bool", 45 | testedBow: sparseBoolBow, 46 | expectedBow: func() bow.Bow { 47 | b, err := bow.NewBowFromRowBasedInterfaces( 48 | []string{timeCol, valueCol}, 49 | []bow.Type{bow.Int64, bow.Float64}, 50 | [][]interface{}{ 51 | {10, 1.}, 52 | {20, nil}, 53 | {30, nil}, 54 | {40, 0.}, 55 | {50, 0.5}, 56 | {60, 0.5}, 57 | }) 58 | assert.NoError(t, err) 59 | return b 60 | }(), 61 | }, 62 | { 63 | name: "sparse string", 64 | testedBow: sparseStringBow, 65 | expectedBow: func() bow.Bow { 66 | b, err := bow.NewBowFromRowBasedInterfaces( 67 | []string{timeCol, valueCol}, 68 | []bow.Type{bow.Int64, bow.Float64}, 69 | [][]interface{}{ 70 | {10, 10.}, 71 | {20, nil}, 72 | {30, nil}, 73 | {40, 10.}, 74 | {50, 15.}, 75 | {60, 20.}, 76 | }) 77 | assert.NoError(t, err) 78 | return b 79 | }(), 80 | }, 81 | }) 82 | } 83 | -------------------------------------------------------------------------------- /rolling/aggregation/core_test.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/metronlab/bow" 8 | "github.com/metronlab/bow/rolling" 9 | "github.com/metronlab/bow/rolling/transformation" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | const ( 14 | timeCol = "time" 15 | valueCol = "value" 16 | ) 17 | 18 | type testCase struct { 19 | name string 20 | testedBow bow.Bow 21 | expectedBow bow.Bow 22 | } 23 | 24 | var ( 25 | emptyBow, _ = bow.NewBow( 26 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 27 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 28 | ) 29 | nilBow, _ = bow.NewBowFromRowBasedInterfaces( 30 | []string{timeCol, valueCol}, 31 | []bow.Type{bow.Int64, bow.Float64}, 32 | [][]interface{}{ 33 | {10, nil}, 34 | {11, nil}, 35 | {20, nil}, 36 | }) 37 | sparseFloatBow, _ = bow.NewBowFromRowBasedInterfaces( 38 | []string{timeCol, valueCol}, 39 | []bow.Type{bow.Int64, bow.Float64}, 40 | [][]interface{}{ 41 | {10, 10.}, // partially valid window 42 | {11, nil}, 43 | {20, nil}, // only invalid window 44 | 45 | // empty window 46 | 47 | {40, nil}, // partially valid with start of window invalid 48 | {41, 10.}, 49 | {50, 10.}, // valid with two values on start of window 50 | {51, 20.}, 51 | {61, 10.}, // valid with two values NOT on start of window 52 | {69, 20.}, 53 | }) 54 | sparseBoolBow, _ = bow.NewBowFromRowBasedInterfaces( 55 | []string{timeCol, valueCol}, 56 | []bow.Type{bow.Int64, bow.Boolean}, 57 | [][]interface{}{ 58 | {10, true}, // partially valid window 59 | {11, nil}, 60 | {20, nil}, // only invalid window 61 | 62 | // empty window 63 | 64 | {40, nil}, // partially valid with start of window invalid 65 | {41, false}, 66 | {50, true}, // valid with two values on start of window 67 | {51, false}, 68 | {61, true}, // valid with two values NOT on start of window 69 | {69, false}, 70 | }) 71 | sparseStringBow, _ = bow.NewBowFromRowBasedInterfaces( 72 | []string{timeCol, valueCol}, 73 | []bow.Type{bow.Int64, bow.String}, 74 | [][]interface{}{ 75 | {10, "10."}, // partially valid window 76 | {11, nil}, 77 | {20, nil}, // only invalid window 78 | 79 | // empty window 80 | 81 | {40, nil}, // partially valid with start of window invalid 82 | {41, "10."}, 83 | {50, "10."}, // valid with two values on start of window 84 | {51, "20."}, 85 | {61, "test"}, // valid with two values NOT on start of window 86 | {69, "20."}, 87 | }) 88 | ) 89 | 90 | func runTestCases(t *testing.T, aggrConstruct rolling.ColAggregationConstruct, 91 | aggrTransforms []transformation.Func, testCases []testCase) { 92 | for _, testCase := range testCases { 93 | t.Run(testCase.name, func(t *testing.T) { 94 | r, err := rolling.IntervalRolling(testCase.testedBow, timeCol, 10, rolling.Options{}) 95 | assert.NoError(t, err) 96 | aggregated, err := r. 97 | Aggregate( 98 | WindowStart(timeCol), 99 | aggrConstruct(valueCol).SetTransformations(aggrTransforms...)). 100 | Bow() 101 | assert.NoError(t, err) 102 | assert.NotNil(t, aggregated) 103 | 104 | assert.Equal(t, true, aggregated.Equal(testCase.expectedBow), 105 | fmt.Sprintf("expect:\n%v\nhave:\n%v", testCase.expectedBow, aggregated)) 106 | }) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /rolling/aggregation/count.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func Count(col string) rolling.ColAggregation { 9 | return rolling.NewColAggregation(col, false, bow.Int64, 10 | func(col int, w rolling.Window) (interface{}, error) { 11 | var count int64 12 | for i := 0; i < w.Bow.NumRows(); i++ { 13 | v := w.Bow.GetValue(col, i) 14 | if v != nil { 15 | count++ 16 | } 17 | } 18 | return count, nil 19 | }) 20 | } 21 | -------------------------------------------------------------------------------- /rolling/aggregation/count_test.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/metronlab/bow" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestCount(t *testing.T) { 11 | runTestCases(t, Count, nil, []testCase{ 12 | { 13 | name: "empty", 14 | testedBow: emptyBow, 15 | expectedBow: func() bow.Bow { 16 | b, err := bow.NewBow( 17 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 18 | bow.NewSeries(valueCol, bow.Int64, []int64{}, nil), 19 | ) 20 | assert.NoError(t, err) 21 | return b 22 | }(), 23 | }, 24 | { 25 | name: "sparse", 26 | testedBow: sparseFloatBow, 27 | expectedBow: func() bow.Bow { 28 | b, err := bow.NewBowFromRowBasedInterfaces( 29 | []string{timeCol, valueCol}, 30 | []bow.Type{bow.Int64, bow.Int64}, 31 | [][]interface{}{ 32 | {10, 1}, 33 | {20, 0}, 34 | {30, 0}, 35 | {40, 1}, 36 | {50, 2}, 37 | {60, 2}, 38 | }) 39 | assert.NoError(t, err) 40 | return b 41 | }(), 42 | }, 43 | }) 44 | } 45 | -------------------------------------------------------------------------------- /rolling/aggregation/firstlast.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func First(col string) rolling.ColAggregation { 9 | return rolling.NewColAggregation(col, false, bow.InputDependent, 10 | func(col int, w rolling.Window) (interface{}, error) { 11 | if w.Bow.NumRows() == 0 { 12 | return nil, nil 13 | } 14 | 15 | value, irow := w.Bow.GetNextValue(col, 0) 16 | if irow == -1 { 17 | return nil, nil 18 | } 19 | return value, nil 20 | }) 21 | } 22 | 23 | func Last(col string) rolling.ColAggregation { 24 | return rolling.NewColAggregation(col, false, bow.InputDependent, 25 | func(col int, w rolling.Window) (interface{}, error) { 26 | if w.Bow.NumRows() == 0 { 27 | return nil, nil 28 | } 29 | 30 | value, irow := w.Bow.GetPrevValue(col, w.Bow.NumRows()-1) 31 | if irow == -1 { 32 | return nil, nil 33 | } 34 | return value, nil 35 | }) 36 | } 37 | -------------------------------------------------------------------------------- /rolling/aggregation/firstlast_test.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/metronlab/bow" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestFirst(t *testing.T) { 11 | runTestCases(t, First, nil, []testCase{ 12 | { 13 | name: "empty", 14 | testedBow: emptyBow, 15 | expectedBow: func() bow.Bow { 16 | b, err := bow.NewBow( 17 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 18 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 19 | ) 20 | assert.NoError(t, err) 21 | return b 22 | }(), 23 | }, 24 | { 25 | name: "sparse", 26 | testedBow: sparseFloatBow, 27 | expectedBow: func() bow.Bow { 28 | b, err := bow.NewBowFromRowBasedInterfaces( 29 | []string{timeCol, valueCol}, 30 | []bow.Type{bow.Int64, bow.Float64}, 31 | [][]interface{}{ 32 | {10, 10.}, 33 | {20, nil}, 34 | {30, nil}, 35 | {40, 10.}, 36 | {50, 10.}, 37 | {60, 10.}, 38 | }) 39 | assert.NoError(t, err) 40 | return b 41 | }(), 42 | }, 43 | { 44 | name: "sparse bool", 45 | testedBow: sparseBoolBow, 46 | expectedBow: func() bow.Bow { 47 | b, err := bow.NewBowFromRowBasedInterfaces( 48 | []string{timeCol, valueCol}, 49 | []bow.Type{bow.Int64, bow.Boolean}, 50 | [][]interface{}{ 51 | {10, true}, 52 | {20, nil}, 53 | {30, nil}, 54 | {40, false}, 55 | {50, true}, 56 | {60, true}, 57 | }) 58 | assert.NoError(t, err) 59 | return b 60 | }(), 61 | }, 62 | { 63 | name: "sparse string", 64 | testedBow: sparseStringBow, 65 | expectedBow: func() bow.Bow { 66 | b, err := bow.NewBowFromRowBasedInterfaces( 67 | []string{timeCol, valueCol}, 68 | []bow.Type{bow.Int64, bow.String}, 69 | [][]interface{}{ 70 | {10, "10."}, 71 | {20, nil}, 72 | {30, nil}, 73 | {40, "10."}, 74 | {50, "10."}, 75 | {60, "test"}, 76 | }) 77 | assert.NoError(t, err) 78 | return b 79 | }(), 80 | }, 81 | }) 82 | } 83 | 84 | func TestLast(t *testing.T) { 85 | runTestCases(t, Last, nil, []testCase{ 86 | { 87 | name: "empty", 88 | testedBow: emptyBow, 89 | expectedBow: func() bow.Bow { 90 | b, err := bow.NewBow( 91 | bow.NewSeries("time", bow.Int64, []int64{}, nil), 92 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 93 | ) 94 | assert.NoError(t, err) 95 | return b 96 | }(), 97 | }, 98 | { 99 | name: "sparse float", 100 | testedBow: sparseFloatBow, 101 | expectedBow: func() bow.Bow { 102 | b, err := bow.NewBowFromRowBasedInterfaces( 103 | []string{"time", valueCol}, 104 | []bow.Type{bow.Int64, bow.Float64}, 105 | [][]interface{}{ 106 | {10, 10.}, 107 | {20, nil}, 108 | {30, nil}, 109 | {40, 10.}, 110 | {50, 20.}, 111 | {60, 20.}, 112 | }) 113 | assert.NoError(t, err) 114 | return b 115 | }(), 116 | }, 117 | { 118 | name: "sparse bool", 119 | testedBow: sparseBoolBow, 120 | expectedBow: func() bow.Bow { 121 | b, err := bow.NewBowFromRowBasedInterfaces( 122 | []string{"time", valueCol}, 123 | []bow.Type{bow.Int64, bow.Boolean}, 124 | [][]interface{}{ 125 | {10, true}, 126 | {20, nil}, 127 | {30, nil}, 128 | {40, false}, 129 | {50, false}, 130 | {60, false}, 131 | }) 132 | assert.NoError(t, err) 133 | return b 134 | }(), 135 | }, 136 | { 137 | name: "sparse string", 138 | testedBow: sparseStringBow, 139 | expectedBow: func() bow.Bow { 140 | b, err := bow.NewBowFromRowBasedInterfaces( 141 | []string{"time", valueCol}, 142 | []bow.Type{bow.Int64, bow.String}, 143 | [][]interface{}{ 144 | {10, "10."}, 145 | {20, nil}, 146 | {30, nil}, 147 | {40, "10."}, 148 | {50, "20."}, 149 | {60, "20."}, 150 | }) 151 | assert.NoError(t, err) 152 | return b 153 | }(), 154 | }, 155 | }) 156 | } 157 | -------------------------------------------------------------------------------- /rolling/aggregation/integral.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func IntegralTrapezoid(col string) rolling.ColAggregation { 9 | return rolling.NewColAggregation(col, true, bow.Float64, 10 | func(colIndex int, w rolling.Window) (interface{}, error) { 11 | if w.Bow.NumRows() == 0 { 12 | return nil, nil 13 | } 14 | 15 | var sum float64 16 | var ok bool 17 | t0, v0, rowIndex := w.Bow.GetNextFloat64s(w.IntervalColIndex, colIndex, 0) 18 | if rowIndex < 0 { 19 | return nil, nil 20 | } 21 | 22 | for rowIndex >= 0 { 23 | t1, v1, nextRowIndex := w.Bow.GetNextFloat64s(w.IntervalColIndex, colIndex, rowIndex+1) 24 | if nextRowIndex < 0 { 25 | break 26 | } 27 | 28 | sum += (v0 + v1) / 2 * (t1 - t0) 29 | ok = true 30 | 31 | t0, v0, rowIndex = t1, v1, nextRowIndex 32 | } 33 | if !ok { 34 | return nil, nil 35 | } 36 | return sum, nil 37 | }) 38 | } 39 | 40 | func IntegralStep(col string) rolling.ColAggregation { 41 | return rolling.NewColAggregation(col, false, bow.Float64, 42 | func(colIndex int, w rolling.Window) (interface{}, error) { 43 | if w.Bow.NumRows() == 0 { 44 | return nil, nil 45 | } 46 | var sum float64 47 | var ok bool 48 | t0, v0, rowIndex := w.Bow.GetNextFloat64s(w.IntervalColIndex, colIndex, 0) 49 | for rowIndex >= 0 { 50 | t1, v1, nextRowIndex := w.Bow.GetNextFloat64s(w.IntervalColIndex, colIndex, rowIndex+1) 51 | if nextRowIndex < 0 { 52 | t1 = float64(w.LastValue) 53 | } 54 | 55 | sum += v0 * (t1 - t0) 56 | ok = true 57 | 58 | if nextRowIndex < 0 { 59 | break 60 | } 61 | 62 | t0, v0, rowIndex = t1, v1, nextRowIndex 63 | } 64 | if !ok { 65 | return nil, nil 66 | } 67 | return sum, nil 68 | }) 69 | } 70 | -------------------------------------------------------------------------------- /rolling/aggregation/integral_test.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/metronlab/bow" 7 | "github.com/metronlab/bow/rolling/transformation" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestIntegralStep(t *testing.T) { 12 | runTestCases(t, IntegralStep, nil, []testCase{ 13 | { 14 | name: "empty", 15 | testedBow: emptyBow, 16 | expectedBow: func() bow.Bow { 17 | b, err := bow.NewBow( 18 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 19 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 20 | ) 21 | assert.NoError(t, err) 22 | return b 23 | }(), 24 | }, 25 | { 26 | name: "sparse float", 27 | testedBow: sparseFloatBow, 28 | expectedBow: func() bow.Bow { 29 | b, err := bow.NewBowFromRowBasedInterfaces( 30 | []string{timeCol, valueCol}, 31 | []bow.Type{bow.Int64, bow.Float64}, 32 | [][]interface{}{ 33 | {10, 100.}, 34 | {20, nil}, 35 | {30, nil}, 36 | {40, 100 * 0.9}, 37 | {50, 100*0.1 + 200*0.9}, 38 | {60, 100*0.8 + 200*0.1}, 39 | }) 40 | assert.NoError(t, err) 41 | return b 42 | }(), 43 | }, 44 | { 45 | name: "sparse bool", 46 | testedBow: sparseBoolBow, 47 | expectedBow: func() bow.Bow { 48 | b, err := bow.NewBowFromRowBasedInterfaces( 49 | []string{timeCol, valueCol}, 50 | []bow.Type{bow.Int64, bow.Float64}, 51 | [][]interface{}{ 52 | {10, 10.}, 53 | {20, nil}, 54 | {30, nil}, 55 | {40, 0.}, 56 | {50, 1.}, 57 | {60, 8.}, 58 | }) 59 | assert.NoError(t, err) 60 | return b 61 | }(), 62 | }, 63 | { 64 | name: "sparse string", 65 | testedBow: sparseStringBow, 66 | expectedBow: func() bow.Bow { 67 | b, err := bow.NewBowFromRowBasedInterfaces( 68 | []string{timeCol, valueCol}, 69 | []bow.Type{bow.Int64, bow.Float64}, 70 | [][]interface{}{ 71 | {10, 100.}, 72 | {20, nil}, 73 | {30, nil}, 74 | {40, 100 * 0.9}, 75 | {50, 100*0.1 + 200*0.9}, 76 | {60, 20.}, 77 | }) 78 | assert.NoError(t, err) 79 | return b 80 | }(), 81 | }, 82 | }) 83 | } 84 | 85 | func TestIntegralStep_scaled(t *testing.T) { 86 | factor := 0.1 87 | transforms := []transformation.Func{ 88 | func(x interface{}) (interface{}, error) { 89 | if x == nil { 90 | return nil, nil 91 | } 92 | return x.(float64) * factor, nil 93 | }, 94 | } 95 | runTestCases(t, IntegralStep, transforms, []testCase{ 96 | { 97 | name: "empty", 98 | testedBow: emptyBow, 99 | expectedBow: func() bow.Bow { 100 | b, err := bow.NewBow( 101 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 102 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 103 | ) 104 | assert.NoError(t, err) 105 | return b 106 | }(), 107 | }, 108 | { 109 | name: "sparse", 110 | testedBow: sparseFloatBow, 111 | expectedBow: func() bow.Bow { 112 | b, err := bow.NewBowFromRowBasedInterfaces( 113 | []string{timeCol, valueCol}, 114 | []bow.Type{bow.Int64, bow.Float64}, 115 | [][]interface{}{ 116 | {10, factor * (100.)}, 117 | {20, nil}, 118 | {30, nil}, 119 | {40, factor * (100 * 0.9)}, 120 | {50, factor * (100*0.1 + 200*0.9)}, 121 | {60, factor * (100*0.8 + 200*0.1)}, 122 | }) 123 | assert.NoError(t, err) 124 | return b 125 | }(), 126 | }, 127 | }) 128 | } 129 | 130 | func TestIntegralTrapezoid(t *testing.T) { 131 | runTestCases(t, IntegralTrapezoid, nil, []testCase{ 132 | { 133 | name: "empty", 134 | testedBow: emptyBow, 135 | expectedBow: func() bow.Bow { 136 | b, err := bow.NewBow( 137 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 138 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 139 | ) 140 | assert.NoError(t, err) 141 | return b 142 | }(), 143 | }, 144 | { 145 | name: "sparse float", 146 | testedBow: sparseFloatBow, 147 | expectedBow: func() bow.Bow { 148 | b, err := bow.NewBowFromRowBasedInterfaces( 149 | []string{timeCol, valueCol}, 150 | []bow.Type{bow.Int64, bow.Float64}, 151 | [][]interface{}{ 152 | {10, nil}, 153 | {20, nil}, 154 | {30, nil}, 155 | {40, 9 * 10.}, 156 | {50, 15.}, 157 | {60, 8 * (15.)}, 158 | }) 159 | assert.NoError(t, err) 160 | return b 161 | }(), 162 | }, 163 | { 164 | name: "sparse bool", 165 | testedBow: sparseBoolBow, 166 | expectedBow: func() bow.Bow { 167 | b, err := bow.NewBowFromRowBasedInterfaces( 168 | []string{timeCol, valueCol}, 169 | []bow.Type{bow.Int64, bow.Float64}, 170 | [][]interface{}{ 171 | {10, nil}, 172 | {20, nil}, 173 | {30, nil}, 174 | {40, 4.5}, 175 | {50, 0.5}, 176 | {60, 4.}, 177 | }) 178 | assert.NoError(t, err) 179 | return b 180 | }(), 181 | }, 182 | { 183 | name: "sparse string", 184 | testedBow: sparseStringBow, 185 | expectedBow: func() bow.Bow { 186 | b, err := bow.NewBowFromRowBasedInterfaces( 187 | []string{timeCol, valueCol}, 188 | []bow.Type{bow.Int64, bow.Float64}, 189 | [][]interface{}{ 190 | {10, nil}, 191 | {20, nil}, 192 | {30, nil}, 193 | {40, 9 * 10.}, 194 | {50, 15.}, 195 | {60, nil}, 196 | }) 197 | assert.NoError(t, err) 198 | return b 199 | }(), 200 | }, 201 | }) 202 | } 203 | -------------------------------------------------------------------------------- /rolling/aggregation/minmax.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func Min(col string) rolling.ColAggregation { 9 | return rolling.NewColAggregation(col, false, bow.Float64, 10 | func(col int, w rolling.Window) (interface{}, error) { 11 | if w.Bow.NumRows() == 0 { 12 | return nil, nil 13 | } 14 | 15 | var min interface{} 16 | for i := 0; i < w.Bow.NumRows(); i++ { 17 | value, ok := w.Bow.GetFloat64(col, i) 18 | if !ok { 19 | continue 20 | } 21 | if min != nil { 22 | if value < min.(float64) { 23 | min = value 24 | } 25 | continue 26 | } 27 | min = value 28 | } 29 | return min, nil 30 | }) 31 | } 32 | 33 | func Max(col string) rolling.ColAggregation { 34 | return rolling.NewColAggregation(col, false, bow.Float64, 35 | func(col int, w rolling.Window) (interface{}, error) { 36 | if w.Bow.NumRows() == 0 { 37 | return nil, nil 38 | } 39 | 40 | var min interface{} 41 | for i := 0; i < w.Bow.NumRows(); i++ { 42 | value, ok := w.Bow.GetFloat64(col, i) 43 | if !ok { 44 | continue 45 | } 46 | if min != nil { 47 | if value > min.(float64) { 48 | min = value 49 | } 50 | continue 51 | } 52 | min = value 53 | } 54 | return min, nil 55 | }) 56 | } 57 | -------------------------------------------------------------------------------- /rolling/aggregation/minmax_test.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/metronlab/bow" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestMin(t *testing.T) { 11 | runTestCases(t, Min, nil, []testCase{ 12 | { 13 | name: "empty", 14 | testedBow: emptyBow, 15 | expectedBow: func() bow.Bow { 16 | b, err := bow.NewBow( 17 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 18 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 19 | ) 20 | assert.NoError(t, err) 21 | return b 22 | }(), 23 | }, 24 | { 25 | name: "sparse float", 26 | testedBow: sparseFloatBow, 27 | expectedBow: func() bow.Bow { 28 | b, err := bow.NewBowFromRowBasedInterfaces( 29 | []string{timeCol, valueCol}, 30 | []bow.Type{bow.Int64, bow.Float64}, 31 | [][]interface{}{ 32 | {10, 10.}, 33 | {20, nil}, 34 | {30, nil}, 35 | {40, 10.}, 36 | {50, 10.}, 37 | {60, 10.}, 38 | }) 39 | assert.NoError(t, err) 40 | return b 41 | }(), 42 | }, 43 | { 44 | name: "sparse bool", 45 | testedBow: sparseBoolBow, 46 | expectedBow: func() bow.Bow { 47 | b, err := bow.NewBowFromRowBasedInterfaces( 48 | []string{timeCol, valueCol}, 49 | []bow.Type{bow.Int64, bow.Float64}, 50 | [][]interface{}{ 51 | {10, 1.}, 52 | {20, nil}, 53 | {30, nil}, 54 | {40, 0.}, 55 | {50, 0.}, 56 | {60, 0.}, 57 | }) 58 | assert.NoError(t, err) 59 | return b 60 | }(), 61 | }, 62 | { 63 | name: "sparse string", 64 | testedBow: sparseStringBow, 65 | expectedBow: func() bow.Bow { 66 | b, err := bow.NewBowFromRowBasedInterfaces( 67 | []string{timeCol, valueCol}, 68 | []bow.Type{bow.Int64, bow.Float64}, 69 | [][]interface{}{ 70 | {10, 10.}, 71 | {20, nil}, 72 | {30, nil}, 73 | {40, 10.}, 74 | {50, 10.}, 75 | {60, 20.}, 76 | }) 77 | assert.NoError(t, err) 78 | return b 79 | }(), 80 | }, 81 | }) 82 | } 83 | 84 | func TestMax(t *testing.T) { 85 | runTestCases(t, Max, nil, []testCase{ 86 | { 87 | name: "empty", 88 | testedBow: emptyBow, 89 | expectedBow: func() bow.Bow { 90 | b, err := bow.NewBow( 91 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 92 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 93 | ) 94 | assert.NoError(t, err) 95 | return b 96 | }(), 97 | }, 98 | { 99 | name: "sparse float", 100 | testedBow: sparseFloatBow, 101 | expectedBow: func() bow.Bow { 102 | b, err := bow.NewBowFromRowBasedInterfaces( 103 | []string{timeCol, valueCol}, 104 | []bow.Type{bow.Int64, bow.Float64}, 105 | [][]interface{}{ 106 | {10, 10.}, 107 | {20, nil}, 108 | {30, nil}, 109 | {40, 10.}, 110 | {50, 20.}, 111 | {60, 20.}, 112 | }) 113 | assert.NoError(t, err) 114 | return b 115 | }(), 116 | }, 117 | { 118 | name: "sparse bool", 119 | testedBow: sparseBoolBow, 120 | expectedBow: func() bow.Bow { 121 | b, err := bow.NewBowFromRowBasedInterfaces( 122 | []string{timeCol, valueCol}, 123 | []bow.Type{bow.Int64, bow.Float64}, 124 | [][]interface{}{ 125 | {10, 1.}, 126 | {20, nil}, 127 | {30, nil}, 128 | {40, 0.}, 129 | {50, 1.}, 130 | {60, 1.}, 131 | }) 132 | assert.NoError(t, err) 133 | return b 134 | }(), 135 | }, 136 | { 137 | name: "sparse string", 138 | testedBow: sparseStringBow, 139 | expectedBow: func() bow.Bow { 140 | b, err := bow.NewBowFromRowBasedInterfaces( 141 | []string{timeCol, valueCol}, 142 | []bow.Type{bow.Int64, bow.Float64}, 143 | [][]interface{}{ 144 | {10, 10.}, 145 | {20, nil}, 146 | {30, nil}, 147 | {40, 10.}, 148 | {50, 20.}, 149 | {60, 20.}, 150 | }) 151 | assert.NoError(t, err) 152 | return b 153 | }(), 154 | }, 155 | }) 156 | } 157 | -------------------------------------------------------------------------------- /rolling/aggregation/mode.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func Mode(col string) rolling.ColAggregation { 9 | return rolling.NewColAggregation(col, false, bow.InputDependent, 10 | func(col int, w rolling.Window) (interface{}, error) { 11 | if w.Bow.NumRows() == 0 { 12 | return nil, nil 13 | } 14 | 15 | occurrences := make(map[interface{}]int) 16 | max := 0 17 | var res interface{} 18 | for i := 0; i < w.Bow.NumRows(); i++ { 19 | v := w.Bow.GetValue(col, i) 20 | if v != nil { 21 | nb := occurrences[v] 22 | nb++ 23 | occurrences[v] = nb 24 | if nb > max { 25 | max = nb 26 | res = v 27 | } 28 | } 29 | } 30 | return res, nil 31 | }) 32 | } 33 | -------------------------------------------------------------------------------- /rolling/aggregation/mode_test.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/metronlab/bow" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestMode(t *testing.T) { 11 | var modeFloatBow, _ = bow.NewBowFromRowBasedInterfaces( 12 | []string{timeCol, valueCol}, 13 | []bow.Type{bow.Int64, bow.Float64}, 14 | [][]interface{}{ 15 | {10, 10.}, // same value window 16 | {11, 10.}, 17 | 18 | {20, 42.}, // most occurrences to 42 19 | {21, 42.}, 20 | {22, 10.}, 21 | 22 | {30, nil}, // most occurrences to 10 23 | {31, nil}, 24 | {32, 10.}, 25 | 26 | // Empty window 27 | 28 | {50, nil}, // only nil values to nil 29 | {51, nil}, 30 | }) 31 | 32 | runTestCases(t, Mode, nil, []testCase{ 33 | { 34 | name: "empty", 35 | testedBow: emptyBow, 36 | expectedBow: func() bow.Bow { 37 | b, err := bow.NewBow( 38 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 39 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 40 | ) 41 | assert.NoError(t, err) 42 | return b 43 | }(), 44 | }, 45 | { 46 | name: "mode float", 47 | testedBow: modeFloatBow, 48 | expectedBow: func() bow.Bow { 49 | b, err := bow.NewBowFromRowBasedInterfaces( 50 | []string{timeCol, valueCol}, 51 | []bow.Type{bow.Int64, bow.Float64}, 52 | [][]interface{}{ 53 | {10, 10.}, 54 | {20, 42.}, 55 | {30, 10.}, 56 | {40, nil}, 57 | {50, nil}, 58 | }) 59 | assert.NoError(t, err) 60 | return b 61 | }(), 62 | }, 63 | { 64 | name: "sparse bool", 65 | testedBow: sparseBoolBow, 66 | expectedBow: func() bow.Bow { 67 | b, err := bow.NewBowFromRowBasedInterfaces( 68 | []string{timeCol, valueCol}, 69 | []bow.Type{bow.Int64, bow.Boolean}, 70 | [][]interface{}{ 71 | {10, true}, 72 | {20, nil}, 73 | {30, nil}, 74 | {40, false}, 75 | {50, true}, 76 | {60, true}, 77 | }) 78 | assert.NoError(t, err) 79 | return b 80 | }(), 81 | }, 82 | { 83 | name: "sparse string", 84 | testedBow: sparseStringBow, 85 | expectedBow: func() bow.Bow { 86 | b, err := bow.NewBowFromRowBasedInterfaces( 87 | []string{timeCol, valueCol}, 88 | []bow.Type{bow.Int64, bow.String}, 89 | [][]interface{}{ 90 | {10, "10."}, 91 | {20, nil}, 92 | {30, nil}, 93 | {40, "10."}, 94 | {50, "10."}, 95 | {60, "test"}, 96 | }) 97 | assert.NoError(t, err) 98 | return b 99 | }(), 100 | }, 101 | }) 102 | } 103 | -------------------------------------------------------------------------------- /rolling/aggregation/sum.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func Sum(col string) rolling.ColAggregation { 9 | return rolling.NewColAggregation(col, false, bow.Float64, 10 | func(col int, w rolling.Window) (interface{}, error) { 11 | if w.Bow.NumRows() == 0 { 12 | return 0., nil 13 | } 14 | 15 | var sum float64 16 | for i := 0; i < w.Bow.NumRows(); i++ { 17 | value, ok := w.Bow.GetFloat64(col, i) 18 | if !ok { 19 | continue 20 | } 21 | sum += value 22 | } 23 | return sum, nil 24 | }) 25 | } 26 | -------------------------------------------------------------------------------- /rolling/aggregation/sum_test.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/metronlab/bow" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestSum(t *testing.T) { 11 | runTestCases(t, Sum, nil, []testCase{ 12 | { 13 | name: "empty", 14 | testedBow: emptyBow, 15 | expectedBow: func() bow.Bow { 16 | b, err := bow.NewBow( 17 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 18 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 19 | ) 20 | assert.NoError(t, err) 21 | return b 22 | }(), 23 | }, 24 | { 25 | name: "sparse float", 26 | testedBow: sparseFloatBow, 27 | expectedBow: func() bow.Bow { 28 | b, err := bow.NewBowFromRowBasedInterfaces( 29 | []string{timeCol, valueCol}, 30 | []bow.Type{bow.Int64, bow.Float64}, 31 | [][]interface{}{ 32 | {10, 10.}, 33 | {20, 0.}, 34 | {30, 0.}, 35 | {40, 10.}, 36 | {50, 30.}, 37 | {60, 30.}, 38 | }) 39 | assert.NoError(t, err) 40 | return b 41 | }(), 42 | }, 43 | { 44 | name: "sparse bool", 45 | testedBow: sparseBoolBow, 46 | expectedBow: func() bow.Bow { 47 | b, err := bow.NewBowFromRowBasedInterfaces( 48 | []string{timeCol, valueCol}, 49 | []bow.Type{bow.Int64, bow.Float64}, 50 | [][]interface{}{ 51 | {10, 1.}, 52 | {20, 0.}, 53 | {30, 0.}, 54 | {40, 0.}, 55 | {50, 1.}, 56 | {60, 1.}, 57 | }) 58 | assert.NoError(t, err) 59 | return b 60 | }(), 61 | }, 62 | { 63 | name: "sparse string", 64 | testedBow: sparseStringBow, 65 | expectedBow: func() bow.Bow { 66 | b, err := bow.NewBowFromRowBasedInterfaces( 67 | []string{timeCol, valueCol}, 68 | []bow.Type{bow.Int64, bow.Float64}, 69 | [][]interface{}{ 70 | {10, 10.}, 71 | {20, 0.}, 72 | {30, 0.}, 73 | {40, 10.}, 74 | {50, 30.}, 75 | {60, 20.}, 76 | }) 77 | assert.NoError(t, err) 78 | return b 79 | }(), 80 | }, 81 | }) 82 | } 83 | -------------------------------------------------------------------------------- /rolling/aggregation/weightedmean.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func WeightedAverageStep(col string) rolling.ColAggregation { 9 | integralFunc := IntegralStep(col).Func() 10 | return rolling.NewColAggregation(col, false, bow.Float64, 11 | func(colIndex int, w rolling.Window) (interface{}, error) { 12 | v, err := integralFunc(colIndex, w) 13 | if v == nil || err != nil { 14 | return v, err 15 | } 16 | 17 | windowsWide := float64(w.LastValue - w.FirstValue) 18 | return v.(float64) / windowsWide, nil 19 | }) 20 | } 21 | 22 | func WeightedAverageLinear(col string) rolling.ColAggregation { 23 | integralFunc := IntegralTrapezoid(col).Func() 24 | return rolling.NewColAggregation(col, true, bow.Float64, 25 | func(colIndex int, w rolling.Window) (interface{}, error) { 26 | v, err := integralFunc(colIndex, w) 27 | if v == nil || err != nil { 28 | return v, err 29 | } 30 | 31 | windowsWide := float64(w.LastValue - w.FirstValue) 32 | return v.(float64) / windowsWide, nil 33 | }) 34 | } 35 | -------------------------------------------------------------------------------- /rolling/aggregation/weightedmean_test.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/metronlab/bow" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestWeightedAverageStep(t *testing.T) { 11 | runTestCases(t, WeightedAverageStep, nil, []testCase{ 12 | { 13 | name: "empty", 14 | testedBow: emptyBow, 15 | expectedBow: func() bow.Bow { 16 | b, err := bow.NewBow( 17 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 18 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 19 | ) 20 | assert.NoError(t, err) 21 | return b 22 | }(), 23 | }, 24 | { 25 | name: "sparse float", 26 | testedBow: sparseFloatBow, 27 | expectedBow: func() bow.Bow { 28 | b, err := bow.NewBowFromRowBasedInterfaces( 29 | []string{timeCol, valueCol}, 30 | []bow.Type{bow.Int64, bow.Float64}, 31 | [][]interface{}{ 32 | {10, 10.}, 33 | {20, nil}, 34 | {30, nil}, 35 | {40, 10 * 0.9}, 36 | {50, 10*0.1 + 20*0.9}, 37 | {60, 10*0.8 + 20*0.1}, 38 | }) 39 | assert.NoError(t, err) 40 | return b 41 | }(), 42 | }, 43 | { 44 | name: "float only nil", 45 | testedBow: nilBow, 46 | expectedBow: func() bow.Bow { 47 | b, err := bow.NewBowFromRowBasedInterfaces( 48 | []string{timeCol, valueCol}, 49 | []bow.Type{bow.Int64, bow.Float64}, 50 | [][]interface{}{ 51 | {10, nil}, 52 | {20, nil}, 53 | }) 54 | assert.NoError(t, err) 55 | return b 56 | }(), 57 | }, 58 | { 59 | name: "sparse bool", 60 | testedBow: sparseBoolBow, 61 | expectedBow: func() bow.Bow { 62 | b, err := bow.NewBowFromRowBasedInterfaces( 63 | []string{timeCol, valueCol}, 64 | []bow.Type{bow.Int64, bow.Float64}, 65 | [][]interface{}{ 66 | {10, 1.}, 67 | {20, nil}, 68 | {30, nil}, 69 | {40, 0.}, 70 | {50, 0.1}, 71 | {60, 0.8}, 72 | }) 73 | assert.NoError(t, err) 74 | return b 75 | }(), 76 | }, 77 | { 78 | name: "sparse string", 79 | testedBow: sparseStringBow, 80 | expectedBow: func() bow.Bow { 81 | b, err := bow.NewBowFromRowBasedInterfaces( 82 | []string{timeCol, valueCol}, 83 | []bow.Type{bow.Int64, bow.Float64}, 84 | [][]interface{}{ 85 | {10, 10.}, 86 | {20, nil}, 87 | {30, nil}, 88 | {40, 9.}, 89 | {50, 19.}, 90 | {60, 2.}, 91 | }) 92 | assert.NoError(t, err) 93 | return b 94 | }(), 95 | }, 96 | }) 97 | } 98 | 99 | func TestWeightedAverageLinear(t *testing.T) { 100 | runTestCases(t, WeightedAverageLinear, nil, []testCase{ 101 | { 102 | name: "empty", 103 | testedBow: emptyBow, 104 | expectedBow: func() bow.Bow { 105 | b, err := bow.NewBow( 106 | bow.NewSeries(timeCol, bow.Int64, []int64{}, nil), 107 | bow.NewSeries(valueCol, bow.Float64, []float64{}, nil), 108 | ) 109 | assert.NoError(t, err) 110 | return b 111 | }(), 112 | }, 113 | { 114 | name: "sparse float", 115 | testedBow: sparseFloatBow, 116 | expectedBow: func() bow.Bow { 117 | b, err := bow.NewBowFromRowBasedInterfaces( 118 | []string{timeCol, valueCol}, 119 | []bow.Type{bow.Int64, bow.Float64}, 120 | [][]interface{}{ 121 | {10, nil}, 122 | {20, nil}, 123 | {30, nil}, 124 | {40, 10 * 0.9}, 125 | {50, 15 * 0.1}, 126 | {60, 15 * 0.8}, 127 | }) 128 | assert.NoError(t, err) 129 | return b 130 | }(), 131 | }, 132 | { 133 | name: "sparse bool", 134 | testedBow: sparseBoolBow, 135 | expectedBow: func() bow.Bow { 136 | b, err := bow.NewBowFromRowBasedInterfaces( 137 | []string{timeCol, valueCol}, 138 | []bow.Type{bow.Int64, bow.Float64}, 139 | [][]interface{}{ 140 | {10, nil}, 141 | {20, nil}, 142 | {30, nil}, 143 | {40, 0.45}, 144 | {50, 0.05}, 145 | {60, 0.4}, 146 | }) 147 | assert.NoError(t, err) 148 | return b 149 | }(), 150 | }, 151 | { 152 | name: "sparse string", 153 | testedBow: sparseStringBow, 154 | expectedBow: func() bow.Bow { 155 | b, err := bow.NewBowFromRowBasedInterfaces( 156 | []string{timeCol, valueCol}, 157 | []bow.Type{bow.Int64, bow.Float64}, 158 | [][]interface{}{ 159 | {10, nil}, 160 | {20, nil}, 161 | {30, nil}, 162 | {40, 9.}, 163 | {50, 1.5}, 164 | {60, nil}, 165 | }) 166 | assert.NoError(t, err) 167 | return b 168 | }(), 169 | }, 170 | }) 171 | } 172 | -------------------------------------------------------------------------------- /rolling/aggregation/whole.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | 7 | "github.com/metronlab/bow" 8 | "github.com/metronlab/bow/rolling" 9 | ) 10 | 11 | // Aggregate the whole dataframe on column intervalColName with one or several rolling.ColAggregation. 12 | func Aggregate(b bow.Bow, intervalColName string, aggrs ...rolling.ColAggregation) (bow.Bow, error) { 13 | if b == nil { 14 | return nil, errors.New("nil bow") 15 | } 16 | if len(aggrs) == 0 { 17 | return nil, errors.New("at least one column aggregation is required") 18 | } 19 | 20 | intervalColIndex, err := b.ColumnIndex(intervalColName) 21 | if err != nil { 22 | return nil, err 23 | } 24 | 25 | series := make([]bow.Series, len(aggrs)) 26 | 27 | for aggrIndex, aggr := range aggrs { 28 | if aggr.InputName() == "" { 29 | return nil, fmt.Errorf("column aggregation %d: no input name", aggrIndex) 30 | } 31 | 32 | inputColIndex, err := b.ColumnIndex(aggr.InputName()) 33 | if err != nil { 34 | return nil, fmt.Errorf("column aggregation %d: %w", aggrIndex, err) 35 | } 36 | 37 | aggr.SetInputIndex(inputColIndex) 38 | 39 | name := aggr.OutputName() 40 | if name == "" { 41 | name = b.ColumnName(aggr.InputIndex()) 42 | } 43 | 44 | typ := aggr.GetReturnType( 45 | b.ColumnType(aggr.InputIndex()), 46 | b.ColumnType(aggr.InputIndex())) 47 | 48 | var buf bow.Buffer 49 | if b.NumRows() == 0 { 50 | buf = bow.NewBuffer(0, typ) 51 | } else { 52 | buf = bow.NewBuffer(1, typ) 53 | 54 | firstValue, firstValueIndex := b.GetNextFloat64(intervalColIndex, 0) 55 | if firstValueIndex == -1 { 56 | firstValue = -1 57 | } 58 | 59 | lastValue, lastValueIndex := b.GetPrevFloat64(intervalColIndex, b.NumRows()-1) 60 | if lastValueIndex == -1 { 61 | lastValue = -1 62 | } 63 | 64 | w := rolling.Window{ 65 | Bow: b, 66 | IntervalColIndex: intervalColIndex, 67 | IsInclusive: true, 68 | FirstIndex: 0, 69 | FirstValue: int64(firstValue), 70 | LastValue: int64(lastValue), 71 | } 72 | 73 | aggrValue, err := aggr.Func()(aggr.InputIndex(), w) 74 | if err != nil { 75 | return nil, fmt.Errorf("column aggregation %d: %w", aggrIndex, err) 76 | } 77 | 78 | for transIndex, trans := range aggr.Transformations() { 79 | aggrValue, err = trans(aggrValue) 80 | if err != nil { 81 | return nil, fmt.Errorf("column aggregation %d: transIndex %d: %w", 82 | aggrIndex, transIndex, err) 83 | } 84 | } 85 | 86 | buf.SetOrDropStrict(0, aggrValue) 87 | } 88 | 89 | series[aggrIndex] = bow.NewSeriesFromBuffer(name, buf) 90 | } 91 | 92 | return bow.NewBow(series...) 93 | } 94 | -------------------------------------------------------------------------------- /rolling/aggregation/windowstart.go: -------------------------------------------------------------------------------- 1 | package aggregation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func WindowStart(col string) rolling.ColAggregation { 9 | return rolling.NewColAggregation(col, false, bow.IteratorDependent, 10 | func(col int, w rolling.Window) (interface{}, error) { 11 | return w.FirstValue, nil 12 | }) 13 | } 14 | -------------------------------------------------------------------------------- /rolling/aggregation_test.go: -------------------------------------------------------------------------------- 1 | package rolling 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/metronlab/bow" 8 | "github.com/stretchr/testify/assert" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestIntervalRolling_Aggregate(t *testing.T) { 13 | b, err := bow.NewBowFromColBasedInterfaces( 14 | []string{timeCol, valueCol}, 15 | []bow.Type{bow.Int64, bow.Float64}, 16 | [][]interface{}{ 17 | {10, 15, 16, 25, 29}, 18 | {1.0, 1.5, 1.6, 2.5, 2.9}, 19 | }) 20 | require.NoError(t, err) 21 | r, err := IntervalRolling(b, timeCol, 10, Options{}) 22 | require.NoError(t, err) 23 | 24 | timeAggr := NewColAggregation(timeCol, false, bow.Int64, 25 | func(col int, w Window) (interface{}, error) { 26 | return w.FirstValue, nil 27 | }) 28 | valueAggr := NewColAggregation(valueCol, false, bow.Float64, 29 | func(col int, w Window) (interface{}, error) { 30 | return float64(w.Bow.NumRows()), nil 31 | }) 32 | doubleAggr := NewColAggregation(valueCol, false, bow.Float64, 33 | func(col int, w Window) (interface{}, error) { 34 | return float64(w.Bow.NumRows()) * 2, nil 35 | }) 36 | 37 | t.Run("keep columns", func(t *testing.T) { 38 | aggregated, err := r. 39 | Aggregate(timeAggr, valueAggr). 40 | Bow() 41 | assert.NoError(t, err) 42 | assert.NotNil(t, aggregated) 43 | expected, _ := bow.NewBowFromColBasedInterfaces( 44 | []string{timeCol, valueCol}, 45 | []bow.Type{bow.Int64, bow.Float64}, 46 | [][]interface{}{ 47 | {10, 20}, 48 | {3., 2.}, 49 | }) 50 | assert.True(t, aggregated.Equal(expected)) 51 | }) 52 | 53 | t.Run("swap columns", func(t *testing.T) { 54 | aggregated, err := r. 55 | Aggregate(valueAggr, timeAggr). 56 | Bow() 57 | assert.NoError(t, err) 58 | assert.NotNil(t, aggregated) 59 | expected, _ := bow.NewBowFromColBasedInterfaces( 60 | []string{valueCol, timeCol}, 61 | []bow.Type{bow.Float64, bow.Int64}, 62 | [][]interface{}{ 63 | {3., 2.}, 64 | {10, 20}, 65 | }) 66 | assert.True(t, aggregated.Equal(expected)) 67 | }) 68 | 69 | t.Run("rename columns", func(t *testing.T) { 70 | aggregated, err := r.Aggregate(timeAggr.RenameOutput("a"), valueAggr.RenameOutput("b")).Bow() 71 | assert.NoError(t, err) 72 | assert.NotNil(t, aggregated) 73 | expected, _ := bow.NewBowFromColBasedInterfaces( 74 | []string{"a", "b"}, 75 | []bow.Type{bow.Int64, bow.Float64}, 76 | [][]interface{}{ 77 | {10, 20}, 78 | {3., 2.}, 79 | }) 80 | assert.True(t, aggregated.Equal(expected)) 81 | }) 82 | 83 | t.Run("less than in original", func(t *testing.T) { 84 | aggregated, err := r.Aggregate(timeAggr).Bow() 85 | assert.NoError(t, err) 86 | assert.NotNil(t, aggregated) 87 | expected, _ := bow.NewBowFromColBasedInterfaces( 88 | []string{timeCol}, 89 | []bow.Type{bow.Int64}, 90 | [][]interface{}{ 91 | {10, 20}, 92 | }) 93 | assert.True(t, aggregated.Equal(expected)) 94 | }) 95 | 96 | t.Run("more than in original", func(t *testing.T) { 97 | aggregated, err := r.Aggregate(timeAggr, doubleAggr.RenameOutput("double"), valueAggr).Bow() 98 | assert.NoError(t, err) 99 | assert.NotNil(t, aggregated) 100 | expected, _ := bow.NewBowFromColBasedInterfaces( 101 | []string{timeCol, "double", valueCol}, 102 | []bow.Type{bow.Int64, bow.Float64, bow.Float64}, 103 | [][]interface{}{ 104 | {10, 20}, 105 | {6., 4.}, 106 | {3., 2.}, 107 | }) 108 | assert.True(t, aggregated.Equal(expected)) 109 | }) 110 | 111 | t.Run("missing interval colIndex", func(t *testing.T) { 112 | _, err := r.Aggregate(valueAggr).Bow() 113 | assert.EqualError(t, err, fmt.Sprintf( 114 | "intervalRolling.indexedAggregations: must keep interval column '%s'", timeCol)) 115 | }) 116 | 117 | t.Run("invalid colIndex", func(t *testing.T) { 118 | _, err := r.Aggregate(timeAggr, NewColAggregation("-", false, bow.Int64, 119 | func(col int, w Window) (interface{}, error) { return nil, nil })).Bow() 120 | assert.EqualError(t, err, 121 | "intervalRolling.indexedAggregations: no column '-'") 122 | }) 123 | } 124 | 125 | func TestWindow_UnsetInclusive(t *testing.T) { 126 | inclusiveBow, err := bow.NewBowFromColBasedInterfaces( 127 | []string{timeCol, valueCol}, 128 | []bow.Type{bow.Int64, bow.Int64}, 129 | [][]interface{}{ 130 | {1, 2}, 131 | {1, 2}}) 132 | assert.NoError(t, err) 133 | exclusiveBow, err := bow.NewBowFromColBasedInterfaces( 134 | []string{timeCol, valueCol}, 135 | []bow.Type{bow.Int64, bow.Int64}, 136 | [][]interface{}{ 137 | {1}, 138 | {1}}) 139 | assert.NoError(t, err) 140 | 141 | inclusiveWindow := Window{ 142 | Bow: inclusiveBow, 143 | FirstIndex: 0, 144 | IntervalColIndex: 0, 145 | FirstValue: 0, 146 | LastValue: 2, 147 | IsInclusive: true, 148 | } 149 | 150 | exclusiveWindow := inclusiveWindow.UnsetInclusive() 151 | assert.True(t, exclusiveWindow.Bow.Equal(exclusiveBow)) 152 | exclusiveWindow.Bow = nil 153 | assert.Equal(t, Window{ 154 | Bow: nil, 155 | FirstIndex: 0, 156 | IntervalColIndex: 0, 157 | FirstValue: 0, 158 | LastValue: 2, 159 | IsInclusive: false, 160 | }, exclusiveWindow) 161 | 162 | // inclusive window should not be modified 163 | assert.Equal(t, Window{ 164 | Bow: inclusiveBow, 165 | FirstIndex: 0, 166 | IntervalColIndex: 0, 167 | FirstValue: 0, 168 | LastValue: 2, 169 | IsInclusive: true, 170 | }, inclusiveWindow) 171 | } 172 | -------------------------------------------------------------------------------- /rolling/interpolation.go: -------------------------------------------------------------------------------- 1 | package rolling 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/metronlab/bow" 7 | ) 8 | 9 | // ColInterpolation is used to interpolate a column. 10 | type ColInterpolation struct { 11 | colName string 12 | inputTypes []bow.Type 13 | fn ColInterpolationFunc 14 | 15 | colIndex int 16 | } 17 | 18 | // ColInterpolationFunc is a function that take a column index, a Window, the full bow.Bow and the previous row, and provides a value at the start of the Window. 19 | type ColInterpolationFunc func(colIndex int, window Window, fullBow, prevRow bow.Bow) (interface{}, error) 20 | 21 | // NewColInterpolation returns a new ColInterpolation. 22 | func NewColInterpolation(colName string, inputTypes []bow.Type, fn ColInterpolationFunc) ColInterpolation { 23 | return ColInterpolation{ 24 | colName: colName, 25 | inputTypes: inputTypes, 26 | fn: fn, 27 | } 28 | } 29 | 30 | func (r *intervalRolling) Interpolate(interps ...ColInterpolation) Rolling { 31 | if r.err != nil { 32 | return r 33 | } 34 | 35 | rCopy := *r 36 | if len(interps) == 0 { 37 | return rCopy.setError(fmt.Errorf("at least one column interpolation is required")) 38 | } 39 | 40 | newIntervalCol := -1 41 | for i := range interps { 42 | isInterval, err := r.validateInterpolation(&interps[i], i) 43 | if err != nil { 44 | return rCopy.setError(fmt.Errorf("intervalRolling.validateInterpolation: %w", err)) 45 | } 46 | if isInterval { 47 | newIntervalCol = i 48 | } 49 | } 50 | 51 | if newIntervalCol == -1 { 52 | return rCopy.setError(fmt.Errorf("must keep interval column '%s'", r.bow.ColumnName(r.intervalColIndex))) 53 | } 54 | 55 | b, err := rCopy.interpolateWindows(interps) 56 | if err != nil { 57 | return rCopy.setError(fmt.Errorf("intervalRolling.interpolateWindows: %w", err)) 58 | } 59 | if b == nil { 60 | b = r.bow.NewEmptySlice() 61 | } 62 | 63 | newR, err := newIntervalRolling(b, newIntervalCol, rCopy.interval, rCopy.options) 64 | if err != nil { 65 | return rCopy.setError(fmt.Errorf("newIntervalRolling: %w", err)) 66 | } 67 | 68 | return newR 69 | } 70 | 71 | func (r *intervalRolling) validateInterpolation(interp *ColInterpolation, newIndex int) (bool, error) { 72 | if interp.colName == "" { 73 | return false, fmt.Errorf("interpolation %d has no column name", newIndex) 74 | } 75 | 76 | var err error 77 | interp.colIndex, err = r.bow.ColumnIndex(interp.colName) 78 | if err != nil { 79 | return false, err 80 | } 81 | 82 | var typeOk bool 83 | colType := r.bow.ColumnType(interp.colIndex) 84 | for _, inputType := range interp.inputTypes { 85 | if colType == inputType { 86 | typeOk = true 87 | break 88 | } 89 | } 90 | if !typeOk { 91 | return false, fmt.Errorf("accepts types %v, got type %s", 92 | interp.inputTypes, colType) 93 | } 94 | 95 | return interp.colIndex == r.intervalColIndex, nil 96 | } 97 | 98 | func (r *intervalRolling) interpolateWindows(interps []ColInterpolation) (bow.Bow, error) { 99 | rCopy := *r 100 | 101 | bows := make([]bow.Bow, rCopy.numWindows) 102 | 103 | for rCopy.HasNext() { 104 | winIndex, w, err := rCopy.Next() 105 | if err != nil { 106 | return nil, err 107 | } 108 | 109 | bows[winIndex], err = rCopy.interpolateWindow(interps, w) 110 | if err != nil { 111 | return nil, err 112 | } 113 | } 114 | 115 | return bow.AppendBows(bows...) 116 | } 117 | 118 | func (r *intervalRolling) interpolateWindow(interps []ColInterpolation, window *Window) (bow.Bow, error) { 119 | var firstColValue int64 = -1 120 | if window.Bow.NumRows() > 0 { 121 | firstColVal, i := window.Bow.GetNextFloat64(r.intervalColIndex, 0) 122 | if i > -1 { 123 | firstColValue = int64(firstColVal) 124 | } 125 | } 126 | 127 | // has start: call interpolation anyway for those stateful 128 | if firstColValue == window.FirstValue { 129 | for _, interpolation := range interps { 130 | _, err := interpolation.fn(interpolation.colIndex, *window, r.bow, r.options.PrevRow) 131 | if err != nil { 132 | return nil, err 133 | } 134 | } 135 | 136 | return window.Bow, nil 137 | } 138 | 139 | // missing start 140 | series := make([]bow.Series, len(interps)) 141 | for colIndex, interpolation := range interps { 142 | colType := window.Bow.ColumnType(interpolation.colIndex) 143 | 144 | interpolatedValue, err := interpolation.fn(interpolation.colIndex, *window, r.bow, r.options.PrevRow) 145 | if err != nil { 146 | return nil, err 147 | } 148 | 149 | buf := bow.NewBuffer(1, colType) 150 | buf.SetOrDrop(0, interpolatedValue) 151 | 152 | series[colIndex] = bow.NewSeriesFromBuffer(window.Bow.ColumnName(interpolation.colIndex), buf) 153 | } 154 | 155 | startBow, err := bow.NewBow(series...) 156 | if err != nil { 157 | return nil, err 158 | } 159 | 160 | return bow.AppendBows(startBow, window.Bow) 161 | } 162 | -------------------------------------------------------------------------------- /rolling/interpolation/linear.go: -------------------------------------------------------------------------------- 1 | package interpolation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func Linear(colName string) rolling.ColInterpolation { 9 | var prevT0, prevV0 float64 10 | var prevValid bool 11 | return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64, bow.Float64}, 12 | func(colIndexToFill int, w rolling.Window, fullBow, prevRow bow.Bow) (interface{}, error) { 13 | var prevValidT0, prevValidV0 bool 14 | if w.FirstIndex == 0 && prevRow != nil { 15 | prevT0, prevValidT0 = prevRow.GetFloat64(w.IntervalColIndex, prevRow.NumRows()-1) 16 | prevV0, prevValidV0 = prevRow.GetFloat64(colIndexToFill, prevRow.NumRows()-1) 17 | prevValid = prevValidT0 && prevValidV0 18 | } 19 | 20 | t0, v0, prevIndex := fullBow.GetPrevFloat64s(w.IntervalColIndex, colIndexToFill, w.FirstIndex-1) 21 | if prevIndex == -1 { 22 | if !prevValid { 23 | return nil, nil 24 | } 25 | t0 = prevT0 26 | v0 = prevV0 27 | } 28 | 29 | t2, v2, nextIndex := fullBow.GetNextFloat64s(w.IntervalColIndex, colIndexToFill, w.FirstIndex) 30 | if nextIndex == -1 { 31 | return nil, nil 32 | } 33 | 34 | coef := (float64(w.FirstValue) - t0) / (t2 - t0) 35 | return ((v2 - v0) * coef) + v0, nil 36 | }, 37 | ) 38 | } 39 | -------------------------------------------------------------------------------- /rolling/interpolation/linear_test.go: -------------------------------------------------------------------------------- 1 | package interpolation 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/metronlab/bow" 8 | "github.com/metronlab/bow/rolling" 9 | "github.com/stretchr/testify/assert" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestLinear(t *testing.T) { 14 | var interval int64 = 2 15 | 16 | ascLinearTestBow, err := bow.NewBowFromRowBasedInterfaces( 17 | []string{timeCol, valueCol}, 18 | []bow.Type{bow.Int64, bow.Float64}, 19 | [][]interface{}{ 20 | {10, 10.}, 21 | {15, 15.}, 22 | {17, 17.}, 23 | }) 24 | require.NoError(t, err) 25 | 26 | t.Run("asc no options", func(t *testing.T) { 27 | expected, err := bow.NewBowFromRowBasedInterfaces( 28 | []string{timeCol, valueCol}, 29 | []bow.Type{bow.Int64, bow.Float64}, 30 | [][]interface{}{ 31 | {10, 10.}, 32 | {12, 12.}, 33 | {14, 14.}, 34 | {15, 15.}, 35 | {16, 16.}, 36 | {17, 17.}, 37 | }) 38 | require.NoError(t, err) 39 | 40 | r, err := rolling.IntervalRolling(ascLinearTestBow, timeCol, interval, rolling.Options{}) 41 | require.NoError(t, err) 42 | 43 | filled, err := r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow() 44 | assert.NoError(t, err) 45 | assert.True(t, filled.Equal(expected), 46 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 47 | }) 48 | 49 | t.Run("asc with offset", func(t *testing.T) { 50 | expected, err := bow.NewBowFromRowBasedInterfaces( 51 | []string{timeCol, valueCol}, 52 | []bow.Type{bow.Int64, bow.Float64}, 53 | [][]interface{}{ 54 | {9, nil}, 55 | {10, 10.}, 56 | {11, 11.}, 57 | {13, 13.}, 58 | {15, 15.}, 59 | {17, 17.}, 60 | }) 61 | require.NoError(t, err) 62 | 63 | r, err := rolling.IntervalRolling(ascLinearTestBow, timeCol, interval, rolling.Options{Offset: 3}) 64 | require.NoError(t, err) 65 | 66 | filled, err := r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow() 67 | assert.NoError(t, err) 68 | assert.True(t, filled.Equal(expected), 69 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 70 | }) 71 | 72 | descLinearTestBow, err := bow.NewBowFromRowBasedInterfaces( 73 | []string{timeCol, valueCol}, 74 | []bow.Type{bow.Int64, bow.Float64}, 75 | [][]interface{}{ 76 | {10, 30.}, 77 | {15, 25.}, 78 | {17, 24.}, 79 | }) 80 | require.NoError(t, err) 81 | 82 | t.Run("desc no options", func(t *testing.T) { 83 | expected, err := bow.NewBowFromRowBasedInterfaces( 84 | []string{timeCol, valueCol}, 85 | []bow.Type{bow.Int64, bow.Float64}, 86 | [][]interface{}{ 87 | {10, 30.}, 88 | {12, 28.}, 89 | {14, 26.}, 90 | {15, 25.}, 91 | {16, 24.5}, 92 | {17, 24.}, 93 | }) 94 | require.NoError(t, err) 95 | 96 | r, err := rolling.IntervalRolling(descLinearTestBow, timeCol, interval, rolling.Options{}) 97 | require.NoError(t, err) 98 | 99 | filled, err := r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow() 100 | assert.NoError(t, err) 101 | assert.True(t, filled.Equal(expected), 102 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 103 | }) 104 | 105 | t.Run("desc with offset", func(t *testing.T) { 106 | expected, err := bow.NewBowFromRowBasedInterfaces( 107 | []string{timeCol, valueCol}, 108 | []bow.Type{bow.Int64, bow.Float64}, 109 | [][]interface{}{ 110 | {9, nil}, 111 | {10, 30.}, 112 | {11, 29.}, 113 | {13, 27.}, 114 | {15, 25.}, 115 | {17, 24.}, 116 | }) 117 | require.NoError(t, err) 118 | 119 | r, err := rolling.IntervalRolling(descLinearTestBow, timeCol, interval, rolling.Options{Offset: 3}) 120 | require.NoError(t, err) 121 | 122 | filled, err := r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow() 123 | assert.NoError(t, err) 124 | assert.True(t, filled.Equal(expected), 125 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 126 | }) 127 | 128 | t.Run("string error", func(t *testing.T) { 129 | b, err := bow.NewBowFromRowBasedInterfaces( 130 | []string{timeCol, valueCol}, 131 | []bow.Type{bow.Int64, bow.String}, 132 | [][]interface{}{ 133 | {10, "test"}, 134 | {15, "test2"}, 135 | }) 136 | require.NoError(t, err) 137 | 138 | r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{}) 139 | require.NoError(t, err) 140 | 141 | _, err = r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow() 142 | assert.EqualError(t, err, 143 | "intervalRolling.validateInterpolation: accepts types [int64 float64], got type utf8") 144 | }) 145 | 146 | t.Run("bool error", func(t *testing.T) { 147 | b, err := bow.NewBowFromRowBasedInterfaces( 148 | []string{timeCol, valueCol}, 149 | []bow.Type{bow.Int64, bow.Boolean}, 150 | [][]interface{}{ 151 | {10, true}, 152 | {15, false}, 153 | }) 154 | require.NoError(t, err) 155 | 156 | r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{}) 157 | require.NoError(t, err) 158 | 159 | res, err := r.Interpolate(WindowStart(timeCol), Linear(valueCol)).Bow() 160 | assert.EqualError(t, err, 161 | "intervalRolling.validateInterpolation: accepts types [int64 float64], got type bool", 162 | "have res: %v", res) 163 | }) 164 | } 165 | -------------------------------------------------------------------------------- /rolling/interpolation/none.go: -------------------------------------------------------------------------------- 1 | package interpolation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func None(colName string) rolling.ColInterpolation { 9 | return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64, bow.Float64, bow.Boolean}, 10 | func(colIndexToFill int, w rolling.Window, fullBow, prevRow bow.Bow) (interface{}, error) { 11 | return nil, nil 12 | }, 13 | ) 14 | } 15 | -------------------------------------------------------------------------------- /rolling/interpolation/none_test.go: -------------------------------------------------------------------------------- 1 | package interpolation 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/metronlab/bow" 8 | "github.com/metronlab/bow/rolling" 9 | "github.com/stretchr/testify/assert" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestNone(t *testing.T) { 14 | var interval int64 = 2 15 | 16 | b, err := bow.NewBowFromRowBasedInterfaces( 17 | []string{timeCol, valueCol}, 18 | []bow.Type{bow.Int64, bow.Float64}, 19 | [][]interface{}{ 20 | {10, 1.0}, 21 | {13, 1.3}, 22 | }) 23 | require.NoError(t, err) 24 | 25 | t.Run("no options", func(t *testing.T) { 26 | expected, err := bow.NewBowFromRowBasedInterfaces( 27 | []string{timeCol, valueCol}, 28 | []bow.Type{bow.Int64, bow.Float64}, 29 | [][]interface{}{ 30 | {10, 1.0}, 31 | {12, nil}, 32 | {13, 1.3}, 33 | }) 34 | require.NoError(t, err) 35 | 36 | r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{}) 37 | require.NoError(t, err) 38 | 39 | filled, err := r.Interpolate(WindowStart(timeCol), None(valueCol)).Bow() 40 | assert.NoError(t, err) 41 | assert.True(t, filled.Equal(expected), 42 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 43 | }) 44 | 45 | t.Run("with offset", func(t *testing.T) { 46 | expected, err := bow.NewBowFromRowBasedInterfaces( 47 | []string{timeCol, valueCol}, 48 | []bow.Type{bow.Int64, bow.Float64}, 49 | [][]interface{}{ 50 | {9, nil}, 51 | {10, 1.0}, 52 | {11, nil}, 53 | {13, 1.3}, 54 | }) 55 | require.NoError(t, err) 56 | 57 | r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{Offset: 1}) 58 | require.NoError(t, err) 59 | 60 | filled, err := r.Interpolate(WindowStart(timeCol), None(valueCol)).Bow() 61 | assert.NoError(t, err) 62 | assert.True(t, filled.Equal(expected), 63 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 64 | }) 65 | } 66 | -------------------------------------------------------------------------------- /rolling/interpolation/stepprevious.go: -------------------------------------------------------------------------------- 1 | package interpolation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func StepPrevious(colName string) rolling.ColInterpolation { 9 | var prevVal interface{} 10 | return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64, bow.Float64, bow.Boolean, bow.String}, 11 | func(colIndexToFill int, w rolling.Window, fullBow, prevRow bow.Bow) (interface{}, error) { 12 | // For the first window, add the previous row to interpolate correctly 13 | if w.FirstIndex == 0 && prevRow != nil { 14 | prevVal = prevRow.GetValue(colIndexToFill, prevRow.NumRows()-1) 15 | } 16 | 17 | var v interface{} 18 | _, v, _ = fullBow.GetPrevValues(w.IntervalColIndex, colIndexToFill, w.FirstIndex-1) 19 | if v != nil { 20 | prevVal = v 21 | } 22 | 23 | return prevVal, nil 24 | }, 25 | ) 26 | } 27 | -------------------------------------------------------------------------------- /rolling/interpolation/stepprevious_test.go: -------------------------------------------------------------------------------- 1 | package interpolation 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/metronlab/bow" 8 | "github.com/metronlab/bow/rolling" 9 | "github.com/stretchr/testify/assert" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | const ( 14 | timeCol = "time" 15 | valueCol = "value" 16 | ) 17 | 18 | func TestStepPrevious(t *testing.T) { 19 | t.Run("no options", func(t *testing.T) { 20 | b, err := bow.NewBowFromRowBasedInterfaces( 21 | []string{timeCol, valueCol}, 22 | []bow.Type{bow.Int64, bow.Float64}, 23 | [][]interface{}{ 24 | {10, 1.0}, 25 | {13, 1.3}, 26 | }) 27 | require.NoError(t, err) 28 | 29 | expected, err := bow.NewBowFromRowBasedInterfaces( 30 | []string{timeCol, valueCol}, 31 | []bow.Type{bow.Int64, bow.Float64}, 32 | [][]interface{}{ 33 | {10, 1.0}, 34 | {12, 1.0}, 35 | {13, 1.3}, 36 | }) 37 | require.NoError(t, err) 38 | 39 | r, err := rolling.IntervalRolling(b, timeCol, 2, rolling.Options{}) 40 | require.NoError(t, err) 41 | 42 | filled, err := r.Interpolate(WindowStart(timeCol), StepPrevious(valueCol)).Bow() 43 | assert.NoError(t, err) 44 | assert.True(t, filled.Equal(expected), 45 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 46 | }) 47 | 48 | t.Run("bool", func(t *testing.T) { 49 | b, err := bow.NewBowFromRowBasedInterfaces( 50 | []string{timeCol, valueCol}, 51 | []bow.Type{bow.Int64, bow.Boolean}, 52 | [][]interface{}{ 53 | {10, true}, 54 | {13, false}, 55 | }) 56 | require.NoError(t, err) 57 | 58 | expected, err := bow.NewBowFromRowBasedInterfaces( 59 | []string{timeCol, valueCol}, 60 | []bow.Type{bow.Int64, bow.Boolean}, 61 | [][]interface{}{ 62 | {10, true}, 63 | {12, true}, 64 | {13, false}, 65 | }) 66 | require.NoError(t, err) 67 | 68 | r, err := rolling.IntervalRolling(b, timeCol, 2, rolling.Options{}) 69 | require.NoError(t, err) 70 | 71 | filled, err := r.Interpolate(WindowStart(timeCol), StepPrevious(valueCol)).Bow() 72 | assert.NoError(t, err) 73 | assert.True(t, filled.Equal(expected), 74 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 75 | }) 76 | 77 | t.Run("string", func(t *testing.T) { 78 | b, err := bow.NewBowFromRowBasedInterfaces( 79 | []string{timeCol, valueCol}, 80 | []bow.Type{bow.Int64, bow.String}, 81 | [][]interface{}{ 82 | {10, "test"}, 83 | {13, "test2"}, 84 | }) 85 | require.NoError(t, err) 86 | 87 | expected, err := bow.NewBowFromRowBasedInterfaces( 88 | []string{timeCol, valueCol}, 89 | []bow.Type{bow.Int64, bow.String}, 90 | [][]interface{}{ 91 | {10, "test"}, 92 | {12, "test"}, 93 | {13, "test2"}, 94 | }) 95 | require.NoError(t, err) 96 | 97 | r, err := rolling.IntervalRolling(b, timeCol, 2, rolling.Options{}) 98 | require.NoError(t, err) 99 | 100 | filled, err := r.Interpolate(WindowStart(timeCol), StepPrevious(valueCol)).Bow() 101 | assert.NoError(t, err) 102 | assert.True(t, filled.Equal(expected), 103 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 104 | }) 105 | 106 | t.Run("with offset", func(t *testing.T) { 107 | b, err := bow.NewBowFromRowBasedInterfaces( 108 | []string{timeCol, valueCol}, 109 | []bow.Type{bow.Int64, bow.Float64}, 110 | [][]interface{}{ 111 | {10, 1.0}, 112 | {13, 1.3}, 113 | }) 114 | require.NoError(t, err) 115 | 116 | expected, err := bow.NewBowFromRowBasedInterfaces( 117 | []string{timeCol, valueCol}, 118 | []bow.Type{bow.Int64, bow.Float64}, 119 | [][]interface{}{ 120 | {9, nil}, 121 | {10, 1.0}, 122 | {11, 1.0}, 123 | {13, 1.3}, 124 | }) 125 | require.NoError(t, err) 126 | 127 | r, err := rolling.IntervalRolling(b, timeCol, 2, rolling.Options{Offset: 1}) 128 | require.NoError(t, err) 129 | 130 | filled, err := r.Interpolate(WindowStart(timeCol), StepPrevious(valueCol)).Bow() 131 | assert.NoError(t, err) 132 | assert.True(t, filled.Equal(expected), 133 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 134 | }) 135 | 136 | t.Run("with nils", func(t *testing.T) { 137 | b, err := bow.NewBowFromRowBasedInterfaces( 138 | []string{timeCol, valueCol}, 139 | []bow.Type{bow.Int64, bow.Float64}, 140 | [][]interface{}{ 141 | {10, 1.0}, 142 | {11, nil}, 143 | {13, nil}, 144 | {15, 1.5}, 145 | }) 146 | require.NoError(t, err) 147 | 148 | expected, err := bow.NewBowFromRowBasedInterfaces( 149 | []string{timeCol, valueCol}, 150 | []bow.Type{bow.Int64, bow.Float64}, 151 | [][]interface{}{ 152 | {10, 1.0}, 153 | {11, nil}, 154 | {12, 1.0}, 155 | {13, nil}, 156 | {14, 1.0}, 157 | {15, 1.5}, 158 | }) 159 | require.NoError(t, err) 160 | 161 | r, err := rolling.IntervalRolling(b, timeCol, 2, rolling.Options{}) 162 | require.NoError(t, err) 163 | 164 | filled, err := r.Interpolate(WindowStart(timeCol), StepPrevious(valueCol)).Bow() 165 | assert.NoError(t, err) 166 | assert.True(t, filled.Equal(expected), 167 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 168 | }) 169 | } 170 | -------------------------------------------------------------------------------- /rolling/interpolation/windowstart.go: -------------------------------------------------------------------------------- 1 | package interpolation 2 | 3 | import ( 4 | "github.com/metronlab/bow" 5 | "github.com/metronlab/bow/rolling" 6 | ) 7 | 8 | func WindowStart(colName string) rolling.ColInterpolation { 9 | return rolling.NewColInterpolation(colName, []bow.Type{bow.Int64}, 10 | func(colIndexToFill int, w rolling.Window, fullBow, prevRow bow.Bow) (interface{}, error) { 11 | return w.FirstValue, nil 12 | }, 13 | ) 14 | } 15 | -------------------------------------------------------------------------------- /rolling/interpolation/windowstart_test.go: -------------------------------------------------------------------------------- 1 | package interpolation 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/metronlab/bow" 8 | "github.com/metronlab/bow/rolling" 9 | "github.com/stretchr/testify/assert" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestWindowStart(t *testing.T) { 14 | var interval int64 = 2 15 | 16 | b, err := bow.NewBowFromRowBasedInterfaces( 17 | []string{timeCol}, 18 | []bow.Type{bow.Int64}, 19 | [][]interface{}{ 20 | {10}, 21 | {13}, 22 | }) 23 | require.NoError(t, err) 24 | 25 | t.Run("no options", func(t *testing.T) { 26 | expected, err := bow.NewBowFromRowBasedInterfaces( 27 | []string{timeCol}, 28 | []bow.Type{bow.Int64}, 29 | [][]interface{}{ 30 | {10}, 31 | {12}, 32 | {13}, 33 | }) 34 | require.NoError(t, err) 35 | 36 | r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{}) 37 | require.NoError(t, err) 38 | 39 | filled, err := r.Interpolate(WindowStart(timeCol)).Bow() 40 | assert.NoError(t, err) 41 | assert.True(t, filled.Equal(expected), 42 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 43 | }) 44 | 45 | t.Run("with offset", func(t *testing.T) { 46 | expected, err := bow.NewBowFromRowBasedInterfaces( 47 | []string{timeCol}, 48 | []bow.Type{bow.Int64}, 49 | [][]interface{}{ 50 | {9}, 51 | {10}, 52 | {11}, 53 | {13}, 54 | }) 55 | require.NoError(t, err) 56 | 57 | r, err := rolling.IntervalRolling(b, timeCol, interval, rolling.Options{Offset: 1.}) 58 | require.NoError(t, err) 59 | 60 | filled, err := r.Interpolate(WindowStart(timeCol)).Bow() 61 | assert.NoError(t, err) 62 | assert.True(t, filled.Equal(expected), 63 | fmt.Sprintf("expected:\n%s\nactual:\n%s", expected.String(), filled.String())) 64 | }) 65 | } 66 | -------------------------------------------------------------------------------- /rolling/interpolation_test.go: -------------------------------------------------------------------------------- 1 | package rolling 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/metronlab/bow" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestIntervalRollingIter_Interpolate(t *testing.T) { 12 | timeInterp := NewColInterpolation(timeCol, []bow.Type{bow.Int64}, 13 | func(colIndex int, w Window, full, prevRow bow.Bow) (interface{}, error) { 14 | return w.FirstValue, nil 15 | }) 16 | valueInterp := NewColInterpolation(valueCol, []bow.Type{bow.Int64, bow.Float64}, 17 | func(colIndex int, w Window, full, prevRow bow.Bow) (interface{}, error) { 18 | return 9.9, nil 19 | }) 20 | 21 | t.Run("invalid input type", func(t *testing.T) { 22 | b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{ 23 | {10, 13}, 24 | {1.0, 1.3}, 25 | }) 26 | r, _ := IntervalRolling(b, timeCol, 2, Options{}) 27 | interp := NewColInterpolation(valueCol, []bow.Type{bow.Int64, bow.Boolean}, 28 | func(colIndex int, w Window, full, prevRow bow.Bow) (interface{}, error) { 29 | return true, nil 30 | }) 31 | _, err := r. 32 | Interpolate(timeInterp, interp). 33 | Bow() 34 | assert.EqualError(t, err, "intervalRolling.validateInterpolation: accepts types [int64 bool], got type float64") 35 | }) 36 | 37 | t.Run("missing interval column", func(t *testing.T) { 38 | b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{ 39 | {10, 13}, 40 | {1.0, 1.3}, 41 | }) 42 | r, _ := IntervalRolling(b, timeCol, 2, Options{}) 43 | _, err := r. 44 | Interpolate(valueInterp). 45 | Bow() 46 | assert.EqualError(t, err, fmt.Sprintf("must keep interval column '%s'", timeCol)) 47 | }) 48 | 49 | t.Run("empty bow", func(t *testing.T) { 50 | b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{ 51 | {}, 52 | {}, 53 | }) 54 | r, _ := IntervalRolling(b, timeCol, 2, Options{}) 55 | 56 | filled, err := r. 57 | Interpolate(timeInterp, valueInterp). 58 | Bow() 59 | assert.Nil(t, err) 60 | 61 | assert.True(t, filled.Equal(b), fmt.Sprintf("expected %v\nactual %v", b, filled)) 62 | }) 63 | 64 | t.Run("no options", func(t *testing.T) { 65 | b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{ 66 | {10, 13}, 67 | {1.0, 1.3}, 68 | }) 69 | r, _ := IntervalRolling(b, timeCol, 2, Options{}) 70 | 71 | filled, err := r. 72 | Interpolate(timeInterp, valueInterp). 73 | Bow() 74 | assert.Nil(t, err) 75 | 76 | expected, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{ 77 | {10, 12, 13}, 78 | {1.0, 9.9, 1.3}, 79 | }) 80 | assert.True(t, filled.Equal(expected), fmt.Sprintf("expected %v\nactual %v", expected, filled)) 81 | }) 82 | 83 | t.Run("with offset", func(t *testing.T) { 84 | b, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{ 85 | {10, 13}, 86 | {1.0, 1.3}, 87 | }) 88 | r, _ := IntervalRolling(b, timeCol, 2, Options{Offset: 1}) 89 | 90 | filled, err := r. 91 | Interpolate(timeInterp, valueInterp). 92 | Bow() 93 | assert.Nil(t, err) 94 | 95 | expected, _ := bow.NewBowFromColBasedInterfaces([]string{timeCol, valueCol}, []bow.Type{bow.Int64, bow.Float64}, [][]interface{}{ 96 | {9, 10, 11, 13}, 97 | {9.9, 1.0, 9.9, 1.3}, 98 | }) 99 | assert.True(t, filled.Equal(expected), fmt.Sprintf("expected %v\nactual %v", expected, filled)) 100 | }) 101 | } 102 | -------------------------------------------------------------------------------- /rolling/rolling.go: -------------------------------------------------------------------------------- 1 | package rolling 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | 7 | "github.com/metronlab/bow" 8 | ) 9 | 10 | // Rolling enables processing a Bow via windows. 11 | // Use Interpolate() and/or Aggregate() to transform windows. 12 | // Use Next() to iterate over windows. 13 | // Use Bow() to get the processed Bow. 14 | type Rolling interface { 15 | // Aggregate aggregates each column by using a ColAggregation. 16 | Aggregate(...ColAggregation) Rolling 17 | // Interpolate fills each window by interpolating its start if missing. 18 | Interpolate(...ColInterpolation) Rolling 19 | 20 | // NumWindows returns the total number of windows in the Bow. 21 | NumWindows() (int, error) 22 | // HasNext returns true if the next call to Next() will return a new Window. 23 | HasNext() bool 24 | // Next returns the next Window, along with its index. 25 | Next() (windowIndex int, window *Window, err error) 26 | 27 | // Bow returns the Bow from the Rolling. 28 | Bow() (bow.Bow, error) 29 | } 30 | 31 | type intervalRolling struct { 32 | // TODO: sync.Mutex 33 | bow bow.Bow 34 | intervalColIndex int 35 | interval int64 36 | options Options 37 | numWindows int 38 | 39 | currWindowFirstValue int64 40 | currRowIndex int 41 | currWindowIndex int 42 | err error 43 | } 44 | 45 | // Options sets options for IntervalRolling: 46 | // - Offset: interval to move the window start, can be negative. 47 | // - Inclusive: sets if the window needs to be inclusive; i.e., includes the last point. 48 | // - PrevRow: extra point before the window to enable better interpolation. 49 | type Options struct { 50 | Offset int64 51 | Inclusive bool 52 | PrevRow bow.Bow 53 | } 54 | 55 | // IntervalRolling returns a new interval-based Rolling with: 56 | // - b: Bow to process in windows 57 | // - colName: column on which the interval is based on 58 | // - interval: numeric value independent of any unit, length of the windows 59 | // All windows except the last one may be empty. 60 | func IntervalRolling(b bow.Bow, colName string, interval int64, options Options) (Rolling, error) { 61 | colIndex, err := b.ColumnIndex(colName) 62 | if err != nil { 63 | return nil, err 64 | } 65 | 66 | return newIntervalRolling(b, colIndex, interval, options) 67 | } 68 | 69 | func newIntervalRolling(b bow.Bow, intervalColIndex int, interval int64, options Options) (Rolling, error) { 70 | if b.ColumnType(intervalColIndex) != bow.Int64 { 71 | return nil, fmt.Errorf("impossible to create a new intervalRolling on column of type %v", 72 | b.ColumnType(intervalColIndex)) 73 | } 74 | 75 | var err error 76 | options.Offset, err = enforceIntervalAndOffset(interval, options.Offset) 77 | if err != nil { 78 | return nil, fmt.Errorf("enforceIntervalAndOffset: %w", err) 79 | } 80 | 81 | options.PrevRow, err = enforcePrevRow(options.PrevRow) 82 | if err != nil { 83 | return nil, fmt.Errorf("enforcePrevRow: %w", err) 84 | } 85 | 86 | var windowFirstValue int64 87 | if b.NumRows() > 0 { 88 | firstBowValue, valid := b.GetInt64(intervalColIndex, 0) 89 | if !valid { 90 | return nil, fmt.Errorf( 91 | "the first value of the column should be convertible to int64, got %v", 92 | b.GetValue(intervalColIndex, 0)) 93 | } 94 | 95 | // align window first value on interval 96 | windowFirstValue = (firstBowValue/interval)*interval + options.Offset 97 | if windowFirstValue > firstBowValue { 98 | windowFirstValue -= interval 99 | } 100 | } 101 | 102 | numWindows := countWindows(b, intervalColIndex, windowFirstValue, interval) 103 | 104 | return &intervalRolling{ 105 | bow: b, 106 | intervalColIndex: intervalColIndex, 107 | interval: interval, 108 | options: options, 109 | numWindows: numWindows, 110 | currWindowFirstValue: windowFirstValue, 111 | }, nil 112 | } 113 | 114 | func enforceIntervalAndOffset(interval, offset int64) (int64, error) { 115 | if interval <= 0 { 116 | return -1, errors.New("strictly positive interval required") 117 | } 118 | 119 | if offset >= interval || offset <= -interval { 120 | offset = offset % interval 121 | } 122 | 123 | if offset < 0 { 124 | offset += interval 125 | } 126 | 127 | return offset, nil 128 | } 129 | 130 | func enforcePrevRow(prevRow bow.Bow) (bow.Bow, error) { 131 | if prevRow == nil || prevRow.NumRows() == 0 { 132 | return nil, nil 133 | } 134 | 135 | if prevRow.NumRows() != 1 { 136 | return nil, fmt.Errorf("prevRow must have only one row, have %d", 137 | prevRow.NumRows()) 138 | } 139 | 140 | return prevRow, nil 141 | } 142 | 143 | func countWindows(b bow.Bow, colIndex int, firstWindowStart, interval int64) int { 144 | if b.NumRows() == 0 { 145 | return 0 146 | } 147 | 148 | lastBowValue, lastBowValueRowIndex := b.GetPrevInt64(colIndex, b.NumRows()-1) 149 | if lastBowValueRowIndex == -1 || firstWindowStart > lastBowValue { 150 | return 0 151 | } 152 | 153 | return int((lastBowValue-firstWindowStart)/interval + 1) 154 | } 155 | 156 | func (r *intervalRolling) NumWindows() (int, error) { 157 | return r.numWindows, r.err 158 | } 159 | 160 | // TODO: concurrent-safe 161 | 162 | func (r *intervalRolling) HasNext() bool { 163 | if r.currRowIndex >= r.bow.NumRows() { 164 | return false 165 | } 166 | 167 | lastBowValue, lastBowValueIsValid := r.bow.GetInt64(r.intervalColIndex, r.bow.NumRows()-1) 168 | if !lastBowValueIsValid { 169 | return false 170 | } 171 | 172 | return r.currWindowFirstValue <= lastBowValue 173 | } 174 | 175 | // TODO: concurrent-safe 176 | 177 | func (r *intervalRolling) Next() (windowIndex int, window *Window, err error) { 178 | if !r.HasNext() { 179 | return r.currWindowIndex, nil, nil 180 | } 181 | 182 | firstValue := r.currWindowFirstValue 183 | lastValue := r.currWindowFirstValue + r.interval // include last position even if last point is excluded 184 | 185 | rowIndex := 0 186 | isInclusive := false 187 | firstRowIndex := r.currRowIndex 188 | lastRowIndex := -1 189 | for rowIndex = firstRowIndex; rowIndex < r.bow.NumRows(); rowIndex++ { 190 | val, ok := r.bow.GetInt64(r.intervalColIndex, rowIndex) 191 | if !ok { 192 | continue 193 | } 194 | if val < firstValue { 195 | continue 196 | } 197 | if val > lastValue { 198 | break 199 | } 200 | 201 | if val == lastValue { 202 | if isInclusive { 203 | break 204 | } 205 | if !r.options.Inclusive { 206 | break 207 | } 208 | isInclusive = true 209 | } 210 | 211 | lastRowIndex = rowIndex 212 | } 213 | 214 | if !isInclusive { 215 | r.currRowIndex = rowIndex 216 | } else { 217 | r.currRowIndex = rowIndex - 1 218 | } 219 | 220 | r.currWindowFirstValue = lastValue 221 | windowIndex = r.currWindowIndex 222 | r.currWindowIndex++ 223 | 224 | var b bow.Bow 225 | if lastRowIndex == -1 { 226 | b = r.bow.NewEmptySlice() 227 | } else { 228 | b = r.bow.NewSlice(firstRowIndex, lastRowIndex+1) 229 | } 230 | 231 | return windowIndex, &Window{ 232 | Bow: b, 233 | FirstIndex: firstRowIndex, 234 | IntervalColIndex: r.intervalColIndex, 235 | FirstValue: firstValue, 236 | LastValue: lastValue, 237 | IsInclusive: isInclusive, 238 | }, nil 239 | } 240 | 241 | func (r *intervalRolling) Bow() (bow.Bow, error) { 242 | return r.bow, r.err 243 | } 244 | 245 | func (r *intervalRolling) setError(err error) Rolling { 246 | r.err = err 247 | return r 248 | } 249 | -------------------------------------------------------------------------------- /rolling/transformation/factor.go: -------------------------------------------------------------------------------- 1 | package transformation 2 | 3 | import "fmt" 4 | 5 | type Func func(interface{}) (interface{}, error) 6 | 7 | func Factor(n float64) Func { 8 | return func(x interface{}) (interface{}, error) { 9 | switch x := x.(type) { 10 | case float64: 11 | return x * n, nil 12 | case int64: 13 | return int64(float64(x) * n), nil 14 | case nil: 15 | return x, nil 16 | default: 17 | return nil, fmt.Errorf("factor: invalid type %T", x) 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /rolling/transformation/factor_test.go: -------------------------------------------------------------------------------- 1 | package transformation 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestFactor(t *testing.T) { 10 | transform := Factor(0.1) 11 | 12 | t.Run("invalid input", func(t *testing.T) { 13 | res, err := transform("11") 14 | assert.EqualError(t, err, "factor: invalid type string") 15 | assert.Nil(t, res) 16 | }) 17 | 18 | t.Run("preserve nil", func(t *testing.T) { 19 | res, err := transform(nil) 20 | assert.Nil(t, err) 21 | assert.Nil(t, res) 22 | }) 23 | 24 | t.Run("preserve int64", func(t *testing.T) { 25 | res, err := transform(int64(11)) 26 | assert.Nil(t, err) 27 | assert.Equal(t, int64(1), res) 28 | }) 29 | 30 | t.Run("preserve float64", func(t *testing.T) { 31 | res, err := transform(11.) 32 | assert.Nil(t, err) 33 | assert.Equal(t, 1.1, res) 34 | }) 35 | } 36 | -------------------------------------------------------------------------------- /rolling/window.go: -------------------------------------------------------------------------------- 1 | package rolling 2 | 3 | import "github.com/metronlab/bow" 4 | 5 | // Window represents an interval-based window of data with: 6 | // Bow: data 7 | // FirstIndex: index (across all windows) of first row in this window (-1 if none) 8 | // IntervalColIndex: index of the interval column 9 | // FirstValue: Window first value 10 | // LastValue: Window last value 11 | // IsInclusive: Window is inclusive, i.e. includes the last point at the end of the interval 12 | type Window struct { 13 | Bow bow.Bow 14 | FirstIndex int 15 | IntervalColIndex int 16 | FirstValue int64 17 | LastValue int64 18 | IsInclusive bool 19 | } 20 | 21 | // UnsetInclusive returns a copy of the Window with the IsInclusive parameter set to false and with the last row sliced off. 22 | // Returns the unchanged Window if the IsInclusive parameter is not set. 23 | func (w Window) UnsetInclusive() Window { 24 | if !w.IsInclusive { 25 | return w 26 | } 27 | wCopy := w 28 | wCopy.IsInclusive = false 29 | wCopy.Bow = wCopy.Bow.NewSlice(0, wCopy.Bow.NumRows()-1) 30 | return wCopy 31 | } 32 | -------------------------------------------------------------------------------- /scripts/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # The -o pipefail option is important for the trap to be executed if the "go test" command fails 4 | set -o pipefail 5 | 6 | TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) 7 | 8 | : "${PKG:="./..."}" 9 | : "${TIMEOUT:="1h"}" 10 | : "${RUN:=".*"}" 11 | : "${BENCH_RESULTS_DIR_PATH:="/tmp/benchmarks"}" 12 | : "${BENCH_RESULTS_FILE_PATH:="/tmp/benchmarks/${TIMESTAMP}.txt"}" 13 | 14 | mkdir -p "$BENCH_RESULTS_DIR_PATH" 15 | 16 | printf "Run benchmarks into file %s\n" "$BENCH_RESULTS_FILE_PATH" 17 | go test $PKG -run XXX -bench="$RUN" -benchmem -timeout "$TIMEOUT" | tee "$BENCH_RESULTS_FILE_PATH" 18 | 19 | printf "Run benchstat on file %s\n" "$BENCH_RESULTS_FILE_PATH" 20 | benchstat "$BENCH_RESULTS_FILE_PATH" -------------------------------------------------------------------------------- /scripts/benchstat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | OLD_BENCH_FILE_PATH=$1 4 | NEW_BENCH_FILE_PATH=$2 5 | 6 | : "${BENCH_RESULTS_DIR_PATH:="/tmp/benchmarks"}" 7 | : "${BENCH_COMPARISON_FILE_PATH:="$BENCH_RESULTS_DIR_PATH/benchstat.$(date +%Y-%m-%d_%H-%M-%S).txt"}" 8 | 9 | echo 10 | printf "Running benchstat to compare %s and %s in %s\n" "$OLD_BENCH_FILE_PATH" "$NEW_BENCH_FILE_PATH" "$BENCH_COMPARISON_FILE_PATH" 11 | 12 | if [ ! -f "$OLD_BENCH_FILE_PATH" ] 13 | then 14 | printf "%s does not exist\n" "$OLD_BENCH_FILE_PATH" 15 | exit 0 16 | fi 17 | 18 | if [ ! -f "$NEW_BENCH_FILE_PATH" ] 19 | then 20 | printf "%s does not exist\n" "$NEW_BENCH_FILE_PATH" 21 | exit 0 22 | fi 23 | 24 | mkdir -p "$BENCH_RESULTS_DIR_PATH" 25 | 26 | benchstat -delta-test none "$OLD_BENCH_FILE_PATH" "$NEW_BENCH_FILE_PATH" | tee "$BENCH_COMPARISON_FILE_PATH" -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # The -o pipefail option is important for the trap to be executed if the "go test" command fails 4 | set -o pipefail 5 | 6 | : ${TEST_RESULTS:=/tmp/test-results} 7 | : ${COVER_RESULTS:=/tmp/cover-results} 8 | : ${PKG:=./...} 9 | : ${RUN:=".*"} 10 | : ${TIMEOUT:="5m"} 11 | 12 | mkdir -p ${COVER_RESULTS} 13 | mkdir -p ${TEST_RESULTS} 14 | 15 | trap "go-junit-report <${TEST_RESULTS}/go-test.out > ${TEST_RESULTS}/go-test-report.xml" EXIT 16 | go test ${PKG} -v -race -cover -covermode=atomic -coverprofile=${COVER_RESULTS}/coverage.cover -timeout ${TIMEOUT} -run ${RUN} \ 17 | | tee ${TEST_RESULTS}/go-test.out \ 18 | | sed ''/PASS/s//$(printf "\033[32mPASS\033[0m")/'' \ 19 | | sed ''/FAIL/s//$(printf "\033[31mFAIL\033[0m")/'' \ 20 | | sed ''/RUN/s//$(printf "\033[34mRUN\033[0m")/'' 21 | 22 | go tool cover -html=${COVER_RESULTS}/coverage.cover -o ${COVER_RESULTS}/coverage.html 23 | 24 | echo "To open the html coverage file use one of the following commands:" 25 | echo "open file://$COVER_RESULTS/coverage.html on mac" 26 | echo "xdg-open file://$COVER_RESULTS/coverage.html on linux" --------------------------------------------------------------------------------