├── .circleci
    └── config.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── dataframe
    ├── constructors.go
    ├── dataframe.go
    ├── dataframe_test.go
    ├── doc.go
    ├── element.go
    ├── element_numeric.gen.go
    ├── element_numeric.gen.go.tmpl
    ├── example_test.go
    ├── mutations.go
    ├── smartbuilder.go
    ├── smartbuilder_test.go
    ├── stepvalue.go
    └── tablefacade.go
├── datatype_numeric.gen.go.tmpldata
├── doc.go
├── go.mod
├── go.sum
├── internal
    ├── cast
    │   ├── dense.go
    │   ├── doc.go
    │   └── sparse.go
    ├── constructors
    │   ├── doc.go
    │   └── interface.go
    └── debug
    │   ├── assert_disabled.go
    │   ├── assert_enabled.go
    │   ├── debug_disabled.go
    │   ├── debug_enabled.go
    │   ├── doc.go
    │   ├── warn_disabled.go
    │   └── warn_enabled.go
├── iterator
    ├── booleaniterator.go
    ├── chunkiterator.gen.go
    ├── chunkiterator.gen.go.tmpl
    ├── chunkiterator.go
    ├── chunkiterator_test.go
    ├── doc.go
    ├── stepiterator.go
    ├── stepiterator_test.go
    ├── stringiterator.go
    ├── valueiterator.gen.go
    ├── valueiterator.gen.go.tmpl
    ├── valueiterator.go
    └── valueiterator_test.go
├── numeric.tmpldata
└── tools.go


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # Golang CircleCI 2.0 configuration file
 2 | #
 3 | # Check https://circleci.com/docs/2.0/language-go/ for more details
 4 | version: 2
 5 | jobs:
 6 |   build:
 7 |     docker:
 8 |       # specify the version
 9 |       - image: circleci/golang:1.12
10 |     steps:
11 |       - checkout
12 | 
13 |       - run: make ci
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | tmp/
2 | *.test
3 | bin/
4 | vendor/
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2019 Nick Poorman
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | GO_BUILD=go build
 2 | GO_TEST?=go test
 3 | GO_MOD=go mod
 4 | 
 5 | GO_SOURCES  := $(shell find . -path -prune -o -name '*.go' -not -name '*_test.go')
 6 | SOURCES_NO_VENDOR := $(shell find . -path ./vendor -prune -o -name "*.go" -not -name '*_test.go' -print)
 7 | GO_TEMPLATES := $(shell find . -path ./vendor -prune -o -name "*.tmpl" -print)
 8 | GO_COMPILED_TEMPLATES = $(patsubst %.gen.go.tmpl,%.gen.go,$(GO_TEMPLATES))
 9 | 
10 | default: build
11 | 
12 | build: vendor go-templates
13 | 
14 | clean:
15 | 	find . -type f -name '*.gen.go' -exec rm {} +
16 | 	rm -rf bin/
17 | 	rm -rf vendor/
18 | 
19 | test: $(GO_SOURCES)
20 | 	$(GO_TEST) $(GO_TEST_ARGS) ./...
21 | 
22 | ci: test-debug-assert
23 | 
24 | test-debug-assert: $(GO_SOURCES)
25 | 	$(GO_TEST) $(GO_TEST_ARGS) -tags='debug assert' ./...
26 | 
27 | bench: $(GO_SOURCES)
28 | 	$(GO_TEST) $(GO_TEST_ARGS) -bench=. -run=- ./...
29 | 
30 | go-templates: bin/tmpl $(GO_COMPILED_TEMPLATES)
31 | 
32 | %.gen.go: %.gen.go.tmpl
33 | 	bin/tmpl -i -data=numeric.tmpldata $<
34 | 
35 | fmt: $(SOURCES_NO_VENDOR)
36 | 	goimports -w $^
37 | 
38 | bin/tmpl: ./vendor/github.com/apache/arrow/go/arrow/_tools/tmpl/main.go
39 | 	$(GO_BUILD) -o $@ "./$(<D)"
40 | 
41 | vendor:
42 | 	${GO_MOD} vendor
43 | 
44 | .PHONY: default build clean test ci test-debug-assert bench go-templates


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # bullseye
  2 | 
  3 | # NOTICE: THIS PROJECT IS DEPRECATED
  4 | 
  5 | **This project has been merged into [gomem](https://github.com/gomem/gomem). bullseye is now under the "dataframe" package in the [gomem](https://github.com/gomem/gomem) project.**
  6 | 
  7 | .  
  8 | .  
  9 | .  
 10 | .  
 11 | .  
 12 | .  
 13 | .  
 14 | .  
 15 | .  
 16 | 
 17 | [![GoDoc](https://godoc.org/github.com/go-bullseye/bullseye?status.svg)](https://godoc.org/github.com/go-bullseye/bullseye)
 18 | [![CircleCI](https://circleci.com/gh/go-bullseye/bullseye.svg?style=svg)](https://circleci.com/gh/go-bullseye/bullseye)
 19 | 
 20 | A DataFrame built on [Apache Arrow](https://github.com/apache/arrow/tree/master/go).
 21 | 
 22 | <!-- ----------------------------------------------------------------------------------------------- -->
 23 | 
 24 | ## Installation
 25 | 
 26 | Add the package to your `go.mod` file:
 27 | 
 28 |     require github.com/go-bullseye/bullseye master
 29 | 
 30 | Or, clone the repository:
 31 | 
 32 |     git clone --branch master https://github.com/go-bullseye/bullseye.git $GOPATH/src/github.com/go-bullseye/bullseye
 33 | 
 34 | A complete example:
 35 | 
 36 | ```bash
 37 | mkdir my-dataframe-app && cd my-dataframe-app
 38 | 
 39 | cat > go.mod <<-END
 40 |   module my-dataframe-app
 41 | 
 42 |   require github.com/go-bullseye/bullseye master
 43 | END
 44 | 
 45 | cat > main.go <<-END
 46 |   package main
 47 | 
 48 |   import (
 49 |     "fmt"
 50 | 
 51 |     "github.com/apache/arrow/go/arrow/memory"
 52 |     "github.com/go-bullseye/bullseye/dataframe"
 53 |   )
 54 | 
 55 |   func main() {
 56 |     pool := memory.NewGoAllocator()
 57 |     df, _ := dataframe.NewDataFrameFromMem(pool, dataframe.Dict{
 58 |       "col1": []int32{1, 2, 3, 4, 5},
 59 |       "col2": []float64{1.1, 2.2, 3.3, 4.4, 5},
 60 |       "col3": []string{"foo", "bar", "ping", "", "pong"},
 61 |       "col4": []interface{}{2, 4, 6, nil, 8},
 62 |     })
 63 |     defer df.Release()
 64 |     fmt.Printf("DataFrame:\n%s\n", df.Display(0))
 65 |   }
 66 | 
 67 |   // DataFrame:
 68 |   // rec[0]["col1"]: [1 2 3 4 5]
 69 |   // rec[0]["col2"]: [1.1 2.2 3.3 4.4 5]
 70 |   // rec[0]["col3"]: ["foo" "bar" "ping" "" "pong"]
 71 |   // rec[0]["col4"]: [2 4 6 (null) 8]
 72 | END
 73 | 
 74 | go run main.go
 75 | ```
 76 | 
 77 | <!-- ----------------------------------------------------------------------------------------------- -->
 78 | 
 79 | ## Usage
 80 | 
 81 | See the [DataFrame tests](dataframe/dataframe_test.go) for extensive usage examples.
 82 | 
 83 | ## Reference Counting
 84 | 
 85 | From the [arrow/go README](https://github.com/apache/arrow/blob/master/go/README.md)...
 86 | 
 87 | > The library makes use of reference counting so that it can track when memory
 88 | > buffers are no longer used. This allows Arrow to update resource accounting,
 89 | > pool memory such and track overall memory usage as objects are created and
 90 | > released. Types expose two methods to deal with this pattern. The `Retain`
 91 | > method will increase the reference count by 1 and `Release` method will reduce
 92 | > the count by 1. Once the reference count of an object is zero, any associated
 93 | > object will be freed. `Retain` and `Release` are safe to call from multiple
 94 | > goroutines.
 95 | 
 96 | ### When to call `Retain` / `Release`?
 97 | 
 98 | - If you are passed an object and wish to take ownership of it, you must call
 99 |   `Retain`. You must later pair this with a call to `Release` when you no
100 |   longer need the object. "Taking ownership" typically means you wish to
101 |   access the object outside the scope of the current function call.
102 | 
103 | - You own any object you create via functions whose name begins with `New` or
104 |   `Copy` or **any operation that results in a new immutable DataFrame being returned**
105 |   or when receiving an object over a channel. Therefore you must call
106 |   `Release` once you no longer need the object.
107 | 
108 | - If you send an object over a channel, you must call `Retain` before sending
109 |   it as the receiver is assumed to own the object and will later call `Release`
110 |   when it no longer needs the object.
111 | 
112 | _Note:_ You can write a test using `memory.NewCheckedAllocator` to assert that you have
113 | released all resources properly. See: [tests](https://github.com/go-bullseye/bullseye/blob/e0958263a91ec914aa4cd0a1b26e43aab29b4c74/dataframe/dataframe_test.go#L234)
114 | 
115 | ## TODO
116 | 
117 | This DataFrame currently implements most of the scalar types we've come across.
118 | There is still work to be done on some of the list and struct types. Feel free
119 | to submit a PR if find you need them. This library will let you know when you do.
120 | 
121 | - [ ] Implement all Arrow DataTypes.
122 | - [ ] Add a filter function to DataFrame.
123 | - [ ] Add an order by function to DataFrame.
124 | 
125 | ## License
126 | 
127 | (c) 2019 Nick Poorman. Licensed under the Apache License, Version 2.0.
128 | 


--------------------------------------------------------------------------------
/dataframe/constructors.go:
--------------------------------------------------------------------------------
 1 | package dataframe
 2 | 
 3 | import (
 4 | 	"github.com/apache/arrow/go/arrow/array"
 5 | 	"github.com/apache/arrow/go/arrow/memory"
 6 | 	"github.com/go-bullseye/bullseye/internal/cast"
 7 | 	"github.com/go-bullseye/bullseye/internal/constructors"
 8 | )
 9 | 
10 | // NewColumnFromMem is a helper for creating a new Column from memory.
11 | func NewColumnFromMem(mem memory.Allocator, name string, values interface{}) (*array.Column, error) {
12 | 	arr, field, err := constructors.NewInterfaceFromMem(mem, name, values, nil)
13 | 	if err != nil {
14 | 		return nil, err
15 | 	}
16 | 	defer arr.Release()
17 | 
18 | 	// create the chunk from the data
19 | 	chunk := array.NewChunked(arr.DataType(), []array.Interface{arr})
20 | 	defer chunk.Release()
21 | 
22 | 	// create the column from the schema and chunk
23 | 	col := array.NewColumn(*field, chunk)
24 | 
25 | 	return col, nil
26 | }
27 | 
28 | // NewColumnFromSparseMem is a helper for creating a new Column from sparse memory.
29 | func NewColumnFromSparseMem(mem memory.Allocator, name string, values []interface{}, valueIndexes []int, size int) (*array.Column, error) {
30 | 	// build valid mask
31 | 	valid := make([]bool, size)
32 | 	for _, idx := range valueIndexes {
33 | 		valid[idx] = true
34 | 	}
35 | 
36 | 	ifaceDense, err := cast.SparseCollectionToInterface(values, valueIndexes, size)
37 | 	if err != nil {
38 | 		return nil, err
39 | 	}
40 | 
41 | 	arr, field, err := constructors.NewInterfaceFromMem(mem, name, ifaceDense, valid)
42 | 	if err != nil {
43 | 		return nil, err
44 | 	}
45 | 	defer arr.Release()
46 | 
47 | 	// create the chunk from the data
48 | 	chunk := array.NewChunked(arr.DataType(), []array.Interface{arr})
49 | 	defer chunk.Release()
50 | 
51 | 	// create the column from the schema and chunk
52 | 	col := array.NewColumn(*field, chunk)
53 | 
54 | 	return col, nil
55 | }
56 | 


--------------------------------------------------------------------------------
/dataframe/dataframe.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sort"
  6 | 	"strings"
  7 | 	"sync/atomic"
  8 | 
  9 | 	"github.com/apache/arrow/go/arrow"
 10 | 	"github.com/apache/arrow/go/arrow/array"
 11 | 	"github.com/apache/arrow/go/arrow/memory"
 12 | 	"github.com/go-bullseye/bullseye/internal/constructors"
 13 | 	"github.com/go-bullseye/bullseye/internal/debug"
 14 | 	"github.com/go-bullseye/bullseye/iterator"
 15 | 	"github.com/pkg/errors"
 16 | )
 17 | 
 18 | // Dict is a map of string to array of data.
 19 | type Dict map[string]interface{}
 20 | 
 21 | // Option is an option that may be passed to a function.
 22 | type Option func(interface{}) error
 23 | 
 24 | // NewDataFrame creates a new data frame from the provided schema and arrays.
 25 | func NewDataFrame(mem memory.Allocator, schema *arrow.Schema, arrs []array.Interface) (*DataFrame, error) {
 26 | 	df := &DataFrame{
 27 | 		refs:    1,
 28 | 		mem:     mem,
 29 | 		schema:  schema,
 30 | 		rows:    -1,
 31 | 		mutator: NewMutator(mem),
 32 | 	}
 33 | 
 34 | 	if df.rows < 0 {
 35 | 		switch len(arrs) {
 36 | 		case 0:
 37 | 			df.rows = 0
 38 | 		default:
 39 | 			df.rows = int64(arrs[0].Len())
 40 | 		}
 41 | 	}
 42 | 
 43 | 	if df.schema == nil {
 44 | 		return nil, errors.Errorf("dataframe: nil schema")
 45 | 	}
 46 | 
 47 | 	if len(df.schema.Fields()) != len(arrs) {
 48 | 		return nil, errors.Errorf("dataframe: inconsistent schema/arrays")
 49 | 	}
 50 | 
 51 | 	for i, arr := range arrs {
 52 | 		ft := df.schema.Field(i)
 53 | 		if arr.DataType() != ft.Type {
 54 | 			return nil, errors.Errorf("dataframe: column %q is inconsitent with schema", ft.Name)
 55 | 		}
 56 | 
 57 | 		if int64(arr.Len()) < df.rows {
 58 | 			return nil, errors.Errorf("dataframe: column %q expected length >= %d but got length %d", ft.Name, df.rows, arr.Len())
 59 | 		}
 60 | 	}
 61 | 
 62 | 	df.cols = make([]array.Column, len(arrs))
 63 | 	for i := range arrs {
 64 | 		func(i int) {
 65 | 			chunk := array.NewChunked(arrs[i].DataType(), []array.Interface{arrs[i]})
 66 | 			defer chunk.Release()
 67 | 
 68 | 			col := array.NewColumn(df.schema.Field(i), chunk)
 69 | 			df.cols[i] = *col
 70 | 		}(i)
 71 | 	}
 72 | 
 73 | 	return df, nil
 74 | }
 75 | 
 76 | // NewDataFrameFromColumns returns a DataFrame interface.
 77 | func NewDataFrameFromColumns(mem memory.Allocator, cols []array.Column) (*DataFrame, error) {
 78 | 	var rows int64
 79 | 	if len(cols) > 0 {
 80 | 		rows = columnLen(cols[0])
 81 | 	}
 82 | 
 83 | 	return NewDataFrameFromShape(mem, cols, rows)
 84 | }
 85 | 
 86 | // NewDataFrameFromMem creates a new data frame from the provided in-memory data.
 87 | func NewDataFrameFromMem(mem memory.Allocator, dict Dict) (*DataFrame, error) {
 88 | 	var (
 89 | 		err    error
 90 | 		arrs   = make([]array.Interface, 0, len(dict))
 91 | 		fields = make([]arrow.Field, 0, len(dict))
 92 | 	)
 93 | 
 94 | 	keys := make([]string, 0, len(dict))
 95 | 	for k := range dict {
 96 | 		keys = append(keys, k)
 97 | 	}
 98 | 	sort.Strings(keys)
 99 | 	for _, k := range keys {
100 | 		v := dict[k]
101 | 		arr, field, newInterfaceErr := constructors.NewInterfaceFromMem(mem, k, v, nil)
102 | 		if newInterfaceErr != nil {
103 | 			err = newInterfaceErr
104 | 			break
105 | 		}
106 | 		arrs = append(arrs, arr)
107 | 		fields = append(fields, *field)
108 | 	}
109 | 
110 | 	defer func() {
111 | 		for i := range arrs {
112 | 			arrs[i].Release()
113 | 		}
114 | 	}()
115 | 
116 | 	if err != nil {
117 | 		return nil, err
118 | 	}
119 | 
120 | 	schema := arrow.NewSchema(fields, nil)
121 | 	return NewDataFrame(mem, schema, arrs)
122 | }
123 | 
124 | // NewDataFrameFromShape is the same as NewDataFrameFromColumns only it allows you to specify the number
125 | // of rows in the DataFrame.
126 | func NewDataFrameFromShape(mem memory.Allocator, cols []array.Column, rows int64) (*DataFrame, error) {
127 | 	df := &DataFrame{
128 | 		refs:    1,
129 | 		mem:     mem,
130 | 		schema:  buildSchema(cols),
131 | 		cols:    cols,
132 | 		rows:    rows,
133 | 		mutator: NewMutator(mem),
134 | 	}
135 | 
136 | 	// validate the data frame and its constituents.
137 | 	// note we retain the columns after having validated the data frame
138 | 	// in case the validation fails and panics (and would otherwise leak
139 | 	// a ref-count on the columns.)
140 | 	if err := df.validate(); err != nil {
141 | 		return nil, err
142 | 	}
143 | 
144 | 	for i := range df.cols {
145 | 		df.cols[i].Retain()
146 | 	}
147 | 
148 | 	return df, nil
149 | }
150 | 
151 | func NewDataFrameFromTable(mem memory.Allocator, table array.Table) (*DataFrame, error) {
152 | 	cols := make([]array.Column, table.NumCols())
153 | 	for i := range cols {
154 | 		col := table.Column(i)
155 | 		cols[i] = *col
156 | 	}
157 | 
158 | 	return NewDataFrameFromShape(mem, cols, table.NumRows())
159 | }
160 | 
161 | // DataFrame is an immutable DataFrame that uses Arrow
162 | // to store it's data in a standard columnar format.
163 | type DataFrame struct {
164 | 	refs   int64 // reference count
165 | 	mem    memory.Allocator
166 | 	schema *arrow.Schema
167 | 
168 | 	cols []array.Column
169 | 	rows int64
170 | 
171 | 	// Mutations that can be performed on this DataFrame
172 | 	// require a the Mutator to be set up.
173 | 	mutator *Mutator
174 | }
175 | 
176 | // Allocator returns the memory allocator for this DataFrame
177 | func (df *DataFrame) Allocator() memory.Allocator {
178 | 	return df.mem
179 | }
180 | 
181 | // Column returns the column matching the given name.
182 | func (df *DataFrame) Column(name string) *array.Column {
183 | 	for i, col := range df.cols {
184 | 		if col.Name() == name {
185 | 			return &df.cols[i]
186 | 		}
187 | 	}
188 | 	return nil
189 | }
190 | 
191 | // ColumnAt returns the i-th column of this Frame.
192 | func (df *DataFrame) ColumnAt(i int) *array.Column {
193 | 	return &df.cols[i]
194 | }
195 | 
196 | // Columns is the slice of Columns that make up this DataFrame.
197 | func (df *DataFrame) Columns() []array.Column {
198 | 	return df.cols
199 | }
200 | 
201 | // ColumnNames is the slice of column names that make up this DataFrame.
202 | func (df *DataFrame) ColumnNames() []string {
203 | 	fields := df.schema.Fields()
204 | 	names := make([]string, len(fields))
205 | 	for i, field := range fields {
206 | 		names[i] = field.Name
207 | 	}
208 | 	return names
209 | }
210 | 
211 | // ColumnTypes is the slice of column types that make up this DataFrame.
212 | func (df *DataFrame) ColumnTypes() []arrow.Field {
213 | 	return df.schema.Fields()
214 | }
215 | 
216 | // Equals checks for equality between this DataFrame and DataFrame d.
217 | // nil elements at the same location are considered equal.
218 | func (df *DataFrame) Equals(d *DataFrame) bool {
219 | 	if !df.schema.Equal(d.schema) {
220 | 		return false
221 | 	}
222 | 
223 | 	// compare the columns
224 | 	leftCols := df.Columns()
225 | 	rightCols := d.Columns()
226 | 
227 | 	if len(leftCols) != len(rightCols) {
228 | 		return false
229 | 	}
230 | 
231 | 	for i := range leftCols {
232 | 		leftCol := leftCols[i]
233 | 		rightCol := rightCols[i]
234 | 
235 | 		// Could do this with a column iterator?
236 | 		same := compareColumns(&leftCol, &rightCol)
237 | 		if !same {
238 | 			return false
239 | 		}
240 | 	}
241 | 
242 | 	return true
243 | }
244 | 
245 | // NumCols returns the number of columns of this DataFrame using Go's len().
246 | func (df *DataFrame) NumCols() int {
247 | 	return len(df.cols)
248 | }
249 | 
250 | // NumRows returns the number of rows of this DataFrame.
251 | func (df *DataFrame) NumRows() int64 {
252 | 	return df.rows
253 | }
254 | 
255 | // Name returns the name of the i-th column of this DataFrame.
256 | func (df *DataFrame) Name(i int) string {
257 | 	return df.schema.Field(i).Name
258 | }
259 | 
260 | // Dims retrieves the dimensions of a DataFrame.
261 | func (df *DataFrame) Dims() (int, int64) {
262 | 	return len(df.cols), df.rows
263 | }
264 | 
265 | // Display builds out a string representation of the DataFrame that is useful for debugging.
266 | // if chunkSize is <= 0, the biggest possible chunk will be selected.
267 | func (df *DataFrame) Display(chunkSize int64) string {
268 | 	tr := array.NewTableReader(NewTableFacade(df), chunkSize)
269 | 	defer tr.Release()
270 | 
271 | 	n := 0
272 | 	var output strings.Builder
273 | 	for tr.Next() {
274 | 		rec := tr.Record()
275 | 		for i, col := range rec.Columns() {
276 | 			fmt.Fprintf(&output, "rec[%d][%q]: %v\n", n, rec.ColumnName(i), col)
277 | 		}
278 | 		n++
279 | 	}
280 | 
281 | 	return output.String()
282 | }
283 | 
284 | /**
285 |  * These are column specific helpers
286 |  */
287 | 
288 | // SelectColumns returns only columns matching names.
289 | func (df *DataFrame) SelectColumns(names ...string) []array.Column {
290 | 	if len(names) == 0 {
291 | 		return []array.Column{}
292 | 	}
293 | 
294 | 	set := make(map[string]struct{}, len(names))
295 | 	for _, name := range names {
296 | 		set[name] = struct{}{}
297 | 	}
298 | 
299 | 	cols := make([]array.Column, 0, len(names))
300 | 
301 | 	dfColumns := df.Columns()
302 | 	for i := range dfColumns {
303 | 		if _, ok := set[dfColumns[i].Name()]; !ok {
304 | 			continue
305 | 		}
306 | 		cols = append(cols, dfColumns[i])
307 | 	}
308 | 
309 | 	return cols[:len(cols):len(cols)]
310 | }
311 | 
312 | // RejectColumns returns only columns not matching names.
313 | func (df *DataFrame) RejectColumns(names ...string) []array.Column {
314 | 	if len(names) == 0 {
315 | 		return df.Columns()
316 | 	}
317 | 
318 | 	set := make(map[string]struct{}, len(names))
319 | 	for _, name := range names {
320 | 		set[name] = struct{}{}
321 | 	}
322 | 
323 | 	cols := make([]array.Column, 0, df.NumCols()-len(names))
324 | 
325 | 	dfColumns := df.Columns()
326 | 	for i := range dfColumns {
327 | 		if _, drop := set[dfColumns[i].Name()]; drop {
328 | 			continue
329 | 		}
330 | 		cols = append(cols, dfColumns[i])
331 | 	}
332 | 
333 | 	return cols[:len(cols):len(cols)]
334 | }
335 | 
336 | // Apply takes a series of MutationFunc and calls them with the existing DataFrame on the left.
337 | func (df *DataFrame) Apply(fns ...MutationFunc) (*DataFrame, error) {
338 | 	left, err := df.Copy()
339 | 	if err != nil {
340 | 		return nil, err
341 | 	}
342 | 	if len(fns) == 0 {
343 | 		return left, err
344 | 	}
345 | 	for i := range fns {
346 | 		left, err = func() (*DataFrame, error) {
347 | 			defer left.Release()
348 | 			return fns[i](left)
349 | 		}()
350 | 		if err != nil {
351 | 			return nil, err
352 | 		}
353 | 	}
354 | 	return left, err
355 | }
356 | 
357 | // ApplyToColumnFunc is a type alias for a function that will be called for each element
358 | // that is iterated over in a column. The return value will
359 | type ApplyToColumnFunc func(v interface{}) (interface{}, error)
360 | 
361 | // ApplyToColumn creates a new DataFrame with the new column appended. The new column is built
362 | // with the response values obtained from ApplyToColumnFunc. An error response value from
363 | // ApplyToColumnFunc will cause ApplyToColumn to return immediately.
364 | func (df *DataFrame) ApplyToColumn(columnName, newColumnName string, fn ApplyToColumnFunc) (*DataFrame, error) {
365 | 	return df.Apply(func(df *DataFrame) (*DataFrame, error) {
366 | 		// TODO(nickpoorman): refactor this
367 | 		col := df.Column(columnName)
368 | 		field := col.Field()
369 | 		field.Name = newColumnName
370 | 		schema := arrow.NewSchema([]arrow.Field{field}, nil)
371 | 		builder := array.NewRecordBuilder(df.Allocator(), schema)
372 | 		defer builder.Release()
373 | 		smartBuilder := NewSmartBuilder(builder, schema)
374 | 		valueIterator := iterator.NewValueIterator(col)
375 | 		defer valueIterator.Release()
376 | 		for valueIterator.Next() {
377 | 			value := valueIterator.ValueInterface()
378 | 			res, err := fn(value)
379 | 			if err != nil {
380 | 				return nil, err
381 | 			}
382 | 			smartBuilder.Append(0, res)
383 | 		}
384 | 		rec := builder.NewRecord()
385 | 		defer rec.Release()
386 | 		chunk := array.NewChunked(col.DataType(), rec.Columns())
387 | 		defer chunk.Release()
388 | 		newCol := array.NewColumn(field, chunk)
389 | 		defer newCol.Release()
390 | 		return df.AppendColumn(newCol)
391 | 	})
392 | }
393 | 
394 | /**
395 |  * The following functions will always return a new DataFrame.
396 |  */
397 | 
398 | // AppendColumn builds a new DataFrame with the provided Column included.
399 | func (df *DataFrame) AppendColumn(c *array.Column) (*DataFrame, error) {
400 | 	nCols := len(df.cols)
401 | 	cols := make([]array.Column, nCols+1)
402 | 	copy(cols, df.cols)
403 | 	cols[nCols] = *c
404 | 	return NewDataFrameFromShape(df.mem, cols, df.rows)
405 | }
406 | 
407 | // Copy returns a copy of this dataframe. The underlying byte buffers will not be copied.
408 | func (df *DataFrame) Copy() (*DataFrame, error) {
409 | 	nCols := len(df.cols)
410 | 	cols := make([]array.Column, nCols)
411 | 	copy(cols, df.cols)
412 | 	return NewDataFrameFromShape(df.mem, cols, df.rows)
413 | }
414 | 
415 | // CrossJoin returns a DataFrame containing the cross join of two DataFrames.
416 | func (df *DataFrame) CrossJoin(right *DataFrame, opts ...Option) (*DataFrame, error) {
417 | 	fn := df.mutator.CrossJoin(right, opts...)
418 | 	return fn(df)
419 | }
420 | 
421 | // Select the given DataFrame columns by name.
422 | func (df *DataFrame) Select(names ...string) (*DataFrame, error) {
423 | 	fn := df.mutator.Select(names...)
424 | 	return fn(df)
425 | }
426 | 
427 | // Drop the given DataFrame columns by name.
428 | func (df *DataFrame) Drop(names ...string) (*DataFrame, error) {
429 | 	fn := df.mutator.Drop(names...)
430 | 	return fn(df)
431 | }
432 | 
433 | // InnerJoin returns a DataFrame containing the inner join of two DataFrames.
434 | func (df *DataFrame) InnerJoin(right *DataFrame, columns []string, opts ...Option) (*DataFrame, error) {
435 | 	fn := df.mutator.InnerJoin(right, columns, opts...)
436 | 	return fn(df)
437 | }
438 | 
439 | // LeftJoin returns a DataFrame containing the left join of two DataFrames.
440 | func (df *DataFrame) LeftJoin(right *DataFrame, columns []string, opts ...Option) (*DataFrame, error) {
441 | 	fn := df.mutator.LeftJoin(right, columns, opts...)
442 | 	return fn(df)
443 | }
444 | 
445 | // OuterJoin returns a DataFrame containing the outer join of two DataFrames.
446 | // Use union of keys from both frames, similar to a SQL full outer join.
447 | func (df *DataFrame) OuterJoin(right *DataFrame, columns []string, opts ...Option) (*DataFrame, error) {
448 | 	fn := df.mutator.OuterJoin(right, columns, opts...)
449 | 	return fn(df)
450 | }
451 | 
452 | // RightJoin returns a DataFrame containing the right join of two DataFrames.
453 | func (df *DataFrame) RightJoin(right *DataFrame, columns []string, opts ...Option) (*DataFrame, error) {
454 | 	fn := df.mutator.RightJoin(right, columns, opts...)
455 | 	return fn(df)
456 | }
457 | 
458 | // Slice creates a new DataFrame consisting of rows[beg:end].
459 | func (df *DataFrame) Slice(beg, end int64) (*DataFrame, error) {
460 | 	return df.mutator.Slice(beg, end)(df)
461 | }
462 | 
463 | // Schema returns the schema of this Frame.
464 | func (df *DataFrame) Schema() *arrow.Schema {
465 | 	return df.schema
466 | }
467 | 
468 | // Retain increases the reference count by 1.
469 | // Retain may be called simultaneously from multiple goroutines.
470 | func (df *DataFrame) Retain() {
471 | 	atomic.AddInt64(&df.refs, 1)
472 | }
473 | 
474 | // Release decreases the reference count by 1.
475 | // When the reference count goes to zero, the memory is freed.
476 | // Release may be called simultaneously from multiple goroutines.
477 | func (df *DataFrame) Release() {
478 | 	refs := atomic.AddInt64(&df.refs, -1)
479 | 	debug.Assert(refs >= 0, "too many releases")
480 | 
481 | 	if refs == 0 {
482 | 		for i := range df.cols {
483 | 			df.cols[i].Release()
484 | 		}
485 | 		df.cols = nil
486 | 	}
487 | }
488 | 
489 | func (df *DataFrame) validate() error {
490 | 	if len(df.Columns()) != len(df.schema.Fields()) {
491 | 		return errors.New("dataframe validate(): table schema mismatch")
492 | 	}
493 | 	for i, col := range df.cols {
494 | 		if !col.Field().Equal(df.schema.Field(i)) {
495 | 			return errors.Errorf("dataframe validate(): column field %q is inconsistent with schema", col.Name())
496 | 		}
497 | 		colLen := columnLen(col)
498 | 		if colLen < df.rows {
499 | 			return errors.Errorf("dataframe validate(): column %q expected length >= %d but got length %d", col.Name(), df.rows, colLen)
500 | 		}
501 | 	}
502 | 	return nil
503 | }
504 | 
505 | func compareColumns(left, right *array.Column) bool {
506 | 	// We have to use value iterators and the only way to do that is to switch on the type
507 | 	leftDtype := left.DataType()
508 | 	rightDtype := right.DataType()
509 | 	if leftDtype.ID() != rightDtype.ID() {
510 | 		debug.Warnf("warning: comparing different types of columns: %v | %v", leftDtype.Name(), rightDtype.Name())
511 | 		return false
512 | 	}
513 | 
514 | 	// Let's use the stuff we already have to do all columns
515 | 	it := iterator.NewStepIteratorForColumns([]array.Column{*left, *right})
516 | 	defer it.Release()
517 | 
518 | 	for it.Next() {
519 | 		stepValue := it.Values()
520 | 		var elTPrev Element
521 | 		for i := range stepValue.Values {
522 | 			elT := StepValueElementAt(stepValue, i)
523 | 			if elTPrev == nil {
524 | 				elTPrev = elT
525 | 				continue
526 | 			}
527 | 			eq, err := elT.EqStrict(elTPrev)
528 | 			if err != nil {
529 | 				debug.Warnf("warning: bullseye/dataframe#compareColumns: %v\n", err)
530 | 				// types must not be equal
531 | 				return false
532 | 			}
533 | 			if !eq {
534 | 				return false
535 | 			}
536 | 		}
537 | 	}
538 | 
539 | 	return true
540 | }
541 | 
542 | func buildSchema(cols []array.Column) *arrow.Schema {
543 | 	fields := make([]arrow.Field, 0, len(cols))
544 | 	for i := range cols {
545 | 		fields = append(fields, cols[i].Field())
546 | 	}
547 | 	return arrow.NewSchema(fields, nil)
548 | }
549 | 
550 | // columnLen returns the number of rows in the Column.
551 | // Because Arrow chunks arrays, you may encounter an overflow if
552 | // there are MaxInt64 rows, i.e. 9223372036854775807.
553 | func columnLen(col array.Column) int64 {
554 | 	var length int64
555 | 	for _, chunk := range col.Data().Chunks() {
556 | 		// Keep our own counters instead of Chunked's
557 | 		length += int64(chunk.Len())
558 | 	}
559 | 	return length
560 | }
561 | 


--------------------------------------------------------------------------------
/dataframe/dataframe_test.go:
--------------------------------------------------------------------------------
   1 | package dataframe
   2 | 
   3 | import (
   4 | 	"reflect"
   5 | 	"testing"
   6 | 
   7 | 	"github.com/apache/arrow/go/arrow"
   8 | 	"github.com/apache/arrow/go/arrow/array"
   9 | 	"github.com/apache/arrow/go/arrow/memory"
  10 | 	"github.com/go-bullseye/bullseye/iterator"
  11 | 	"github.com/pkg/errors"
  12 | )
  13 | 
  14 | const (
  15 | 	NUMROWS  = int64(30)
  16 | 	NUMCOLS  = 2
  17 | 	COL0NAME = "f1-i32"
  18 | 	COL1NAME = "f2-f64"
  19 | )
  20 | 
  21 | func buildRecords(pool *memory.CheckedAllocator, t *testing.T, last int32) ([]array.Record, *arrow.Schema) {
  22 | 	schema := arrow.NewSchema(
  23 | 		[]arrow.Field{
  24 | 			{Name: COL0NAME, Type: arrow.PrimitiveTypes.Int32},
  25 | 			{Name: COL1NAME, Type: arrow.PrimitiveTypes.Float64},
  26 | 		},
  27 | 		nil,
  28 | 	)
  29 | 
  30 | 	b := array.NewRecordBuilder(pool, schema)
  31 | 	defer b.Release()
  32 | 
  33 | 	b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil)
  34 | 	b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true})
  35 | 	b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5, 6}, nil)
  36 | 	b.Field(1).(*array.Float64Builder).AppendValues([]float64{7, 8, 9, 10}, []bool{true, true, false, true})
  37 | 
  38 | 	rec1 := b.NewRecord()
  39 | 
  40 | 	b.Field(0).(*array.Int32Builder).AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil)
  41 | 	b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil)
  42 | 
  43 | 	rec2 := b.NewRecord()
  44 | 
  45 | 	b.Field(0).(*array.Int32Builder).AppendValues([]int32{31, 32, 33, 34, 35, 36, 37, 38, 39, last}, nil)
  46 | 	b.Field(1).(*array.Float64Builder).AppendValues([]float64{31, 32, 33, 34, 35, 36, 37, 38, 39, 40}, nil)
  47 | 
  48 | 	rec3 := b.NewRecord()
  49 | 
  50 | 	return []array.Record{rec1, rec2, rec3}, schema
  51 | }
  52 | 
  53 | func getColumns(pool *memory.CheckedAllocator, t *testing.T, last int32) []array.Column {
  54 | 	records, schema := buildRecords(pool, t, last)
  55 | 	for i := range records {
  56 | 		defer records[i].Release()
  57 | 	}
  58 | 
  59 | 	tbl := array.NewTableFromRecords(schema, records)
  60 | 	defer tbl.Release()
  61 | 
  62 | 	cols := make([]array.Column, tbl.NumCols())
  63 | 	for i := range cols {
  64 | 		col := tbl.Column(i)
  65 | 		col.Retain()
  66 | 		cols[i] = *col
  67 | 	}
  68 | 
  69 | 	return cols
  70 | }
  71 | 
  72 | func genValues(length int) []int32 {
  73 | 	colVals := make([]int32, 30)
  74 | 	for i := range colVals {
  75 | 		colVals[i] = int32(i)
  76 | 	}
  77 | 	return colVals
  78 | }
  79 | 
  80 | func TestNewDataFrameFromColumns(t *testing.T) {
  81 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
  82 | 	defer pool.AssertSize(t, 0)
  83 | 
  84 | 	cols := getColumns(pool, t, 40)
  85 | 	for i := range cols {
  86 | 		defer cols[i].Release()
  87 | 	}
  88 | 
  89 | 	df, err := NewDataFrameFromColumns(pool, cols)
  90 | 	if err != nil {
  91 | 		t.Fatal(err)
  92 | 	}
  93 | 	defer df.Release()
  94 | 
  95 | 	if got, want := df.NumRows(), NUMROWS; got != want {
  96 | 		t.Fatalf("got=%d, want=%d", got, want)
  97 | 	}
  98 | }
  99 | 
 100 | func TestNumCols(t *testing.T) {
 101 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 102 | 	defer pool.AssertSize(t, 0)
 103 | 
 104 | 	cols := getColumns(pool, t, 40)
 105 | 	for i := range cols {
 106 | 		defer cols[i].Release()
 107 | 	}
 108 | 
 109 | 	df, err := NewDataFrameFromColumns(pool, cols)
 110 | 	if err != nil {
 111 | 		t.Fatal(err)
 112 | 	}
 113 | 	defer df.Release()
 114 | 
 115 | 	if got, want := df.NumCols(), NUMCOLS; got != want {
 116 | 		t.Fatalf("got=%d, want=%d", got, want)
 117 | 	}
 118 | }
 119 | 
 120 | func TestNumRows(t *testing.T) {
 121 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 122 | 	defer pool.AssertSize(t, 0)
 123 | 
 124 | 	cols := getColumns(pool, t, 40)
 125 | 	for i := range cols {
 126 | 		defer cols[i].Release()
 127 | 	}
 128 | 
 129 | 	df, err := NewDataFrameFromColumns(pool, cols)
 130 | 	if err != nil {
 131 | 		t.Fatal(err)
 132 | 	}
 133 | 	defer df.Release()
 134 | 
 135 | 	if got, want := df.NumRows(), NUMROWS; got != want {
 136 | 		t.Fatalf("got=%d, want=%d", got, want)
 137 | 	}
 138 | }
 139 | 
 140 | func TestDims(t *testing.T) {
 141 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 142 | 	defer pool.AssertSize(t, 0)
 143 | 
 144 | 	cols := getColumns(pool, t, 40)
 145 | 	for i := range cols {
 146 | 		defer cols[i].Release()
 147 | 	}
 148 | 
 149 | 	df, err := NewDataFrameFromColumns(pool, cols)
 150 | 	if err != nil {
 151 | 		t.Fatal(err)
 152 | 	}
 153 | 	defer df.Release()
 154 | 
 155 | 	w, l := df.Dims()
 156 | 
 157 | 	if got, want := w, NUMCOLS; got != want {
 158 | 		t.Fatalf("got=%d, want=%d", got, want)
 159 | 	}
 160 | 
 161 | 	if got, want := l, NUMROWS; got != want {
 162 | 		t.Fatalf("got=%d, want=%d", got, want)
 163 | 	}
 164 | }
 165 | 
 166 | func TestEquals(t *testing.T) {
 167 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 168 | 	defer pool.AssertSize(t, 0)
 169 | 
 170 | 	cols := getColumns(pool, t, 40)
 171 | 	for i := range cols {
 172 | 		defer cols[i].Release()
 173 | 	}
 174 | 
 175 | 	df, err := NewDataFrameFromColumns(pool, cols)
 176 | 	if err != nil {
 177 | 		t.Fatal(err)
 178 | 	}
 179 | 	defer df.Release()
 180 | 
 181 | 	cols2 := getColumns(pool, t, 40)
 182 | 	defer func() {
 183 | 		for _, col := range cols2 {
 184 | 			col.Release()
 185 | 		}
 186 | 	}()
 187 | 
 188 | 	df2, err := NewDataFrameFromColumns(pool, cols2)
 189 | 	if err != nil {
 190 | 		t.Fatal(err)
 191 | 	}
 192 | 	defer df2.Release()
 193 | 
 194 | 	if got, want := df.Equals(df2), true; got != want {
 195 | 		t.Fatalf("got=%v, want=%v", got, want)
 196 | 	}
 197 | }
 198 | 
 199 | func TestEqualsFalse(t *testing.T) {
 200 | 	// This test makes sure Equals returns false as well as true.
 201 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 202 | 	defer pool.AssertSize(t, 0)
 203 | 
 204 | 	cols := getColumns(pool, t, 40)
 205 | 	for i := range cols {
 206 | 		defer cols[i].Release()
 207 | 	}
 208 | 
 209 | 	df, err := NewDataFrameFromColumns(pool, cols)
 210 | 	if err != nil {
 211 | 		t.Fatal(err)
 212 | 	}
 213 | 	defer df.Release()
 214 | 
 215 | 	cols2 := getColumns(pool, t, 99)
 216 | 	defer func() {
 217 | 		for _, col := range cols2 {
 218 | 			col.Release()
 219 | 		}
 220 | 	}()
 221 | 
 222 | 	df2, err := NewDataFrameFromColumns(pool, cols2)
 223 | 	if err != nil {
 224 | 		t.Fatal(err)
 225 | 	}
 226 | 	defer df2.Release()
 227 | 
 228 | 	if got, want := df.Equals(df2), false; got != want {
 229 | 		t.Fatalf("got=%v, want=%v", got, want)
 230 | 	}
 231 | }
 232 | 
 233 | func TestName(t *testing.T) {
 234 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 235 | 	defer pool.AssertSize(t, 0)
 236 | 
 237 | 	cols := getColumns(pool, t, 40)
 238 | 	for i := range cols {
 239 | 		defer cols[i].Release()
 240 | 	}
 241 | 
 242 | 	df, err := NewDataFrameFromColumns(pool, cols)
 243 | 	if err != nil {
 244 | 		t.Fatal(err)
 245 | 	}
 246 | 	defer df.Release()
 247 | 
 248 | 	if got, want := df.Name(0), COL0NAME; got != want {
 249 | 		t.Fatalf("got=%s, want=%s", got, want)
 250 | 	}
 251 | 
 252 | 	if got, want := df.Name(1), COL1NAME; got != want {
 253 | 		t.Fatalf("got=%s, want=%s", got, want)
 254 | 	}
 255 | }
 256 | 
 257 | func TestSlice(t *testing.T) {
 258 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 259 | 	defer pool.AssertSize(t, 0)
 260 | 
 261 | 	cols := getColumns(pool, t, 40)
 262 | 	for i := range cols {
 263 | 		defer cols[i].Release()
 264 | 	}
 265 | 
 266 | 	df, err := NewDataFrameFromColumns(pool, cols)
 267 | 	if err != nil {
 268 | 		t.Fatal(err)
 269 | 	}
 270 | 	defer df.Release()
 271 | 
 272 | 	df2, err := df.Slice(0, 5)
 273 | 	if err != nil {
 274 | 		t.Fatal(err)
 275 | 	}
 276 | 	defer df2.Release()
 277 | 
 278 | 	if got, want := df2.NumRows(), int64(5); got != want {
 279 | 		t.Fatalf("got=%d, want=%d", got, want)
 280 | 	}
 281 | }
 282 | 
 283 | func TestColumnNames(t *testing.T) {
 284 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 285 | 	defer pool.AssertSize(t, 0)
 286 | 
 287 | 	df, err := NewDataFrameFromMem(pool, Dict{
 288 | 		"col1-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 289 | 		"col2-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 290 | 	})
 291 | 	if err != nil {
 292 | 		t.Fatal(err)
 293 | 	}
 294 | 	defer df.Release()
 295 | 
 296 | 	if got, want := df.ColumnNames(), []string{"col1-i32", "col2-f64"}; !reflect.DeepEqual(got, want) {
 297 | 		t.Fatalf("got=%v, want=%v", got, want)
 298 | 	}
 299 | }
 300 | 
 301 | func TestColumnTypes(t *testing.T) {
 302 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 303 | 	defer pool.AssertSize(t, 0)
 304 | 
 305 | 	df, err := NewDataFrameFromMem(pool, Dict{
 306 | 		"col1-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 307 | 		"col2-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 308 | 	})
 309 | 	if err != nil {
 310 | 		t.Fatal(err)
 311 | 	}
 312 | 	defer df.Release()
 313 | 
 314 | 	field1 := arrow.Field{
 315 | 		Name:     "col1-i32",
 316 | 		Type:     arrow.PrimitiveTypes.Int32,
 317 | 		Nullable: false,
 318 | 		Metadata: arrow.Metadata{},
 319 | 	}
 320 | 	field2 := arrow.Field{
 321 | 		Name:     "col2-f64",
 322 | 		Type:     arrow.PrimitiveTypes.Float64,
 323 | 		Nullable: false,
 324 | 		Metadata: arrow.Metadata{},
 325 | 	}
 326 | 
 327 | 	if got, want := df.ColumnTypes(), []arrow.Field{field1, field2}; !reflect.DeepEqual(got, want) {
 328 | 		t.Fatalf("got=%v, want=%v", got, want)
 329 | 	}
 330 | }
 331 | 
 332 | func TestAppendColumn(t *testing.T) {
 333 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 334 | 	defer pool.AssertSize(t, 0)
 335 | 
 336 | 	cols := getColumns(pool, t, 40)
 337 | 	for i := range cols {
 338 | 		defer cols[i].Release()
 339 | 	}
 340 | 
 341 | 	df, err := NewDataFrameFromColumns(pool, cols)
 342 | 	if err != nil {
 343 | 		t.Fatal(err)
 344 | 	}
 345 | 	defer df.Release()
 346 | 
 347 | 	// Create a new Column to append
 348 | 	col, err := NewColumnFromMem(pool, "col3-i32", genValues(int(df.NumRows())))
 349 | 	if err != nil {
 350 | 		t.Fatal(err)
 351 | 	}
 352 | 	defer col.Release()
 353 | 
 354 | 	largerDf, err := df.AppendColumn(col)
 355 | 	if err != nil {
 356 | 		t.Fatal(err)
 357 | 	}
 358 | 	defer largerDf.Release()
 359 | 
 360 | 	got := largerDf.Display(-1)
 361 | 	want := `rec[0]["f1-i32"]: [1 2 3 4 5 6 7 8 (null) 10]
 362 | rec[0]["f2-f64"]: [1 2 3 4 5 6 7 8 (null) 10]
 363 | rec[0]["col3-i32"]: [0 1 2 3 4 5 6 7 8 9]
 364 | rec[1]["f1-i32"]: [11 12 13 14 15 16 17 18 19 20]
 365 | rec[1]["f2-f64"]: [11 12 13 14 15 16 17 18 19 20]
 366 | rec[1]["col3-i32"]: [10 11 12 13 14 15 16 17 18 19]
 367 | rec[2]["f1-i32"]: [31 32 33 34 35 36 37 38 39 40]
 368 | rec[2]["f2-f64"]: [31 32 33 34 35 36 37 38 39 40]
 369 | rec[2]["col3-i32"]: [20 21 22 23 24 25 26 27 28 29]
 370 | `
 371 | 
 372 | 	if got != want {
 373 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 374 | 	}
 375 | }
 376 | 
 377 | func TestCopy(t *testing.T) {
 378 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 379 | 	defer pool.AssertSize(t, 0)
 380 | 
 381 | 	df, err := NewDataFrameFromMem(pool, Dict{
 382 | 		"col1-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 383 | 		"col2-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 384 | 	})
 385 | 	if err != nil {
 386 | 		t.Fatal(err)
 387 | 	}
 388 | 	defer df.Release()
 389 | 
 390 | 	df2, err := df.Copy()
 391 | 	if err != nil {
 392 | 		t.Fatal(err)
 393 | 	}
 394 | 	defer df2.Release()
 395 | 
 396 | 	got := df2.Display(-1)
 397 | 	want := `rec[0]["col1-i32"]: [1 2 3 4 5 6 7 8 9 10]
 398 | rec[0]["col2-f64"]: [1 2 3 4 5 6 7 8 9 10]
 399 | `
 400 | 	if got != want {
 401 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 402 | 	}
 403 | 
 404 | 	if &df == &df2 {
 405 | 		t.Fatalf("references are the same. df is not a copy of df2 (%v) == (%v)", &df, &df2)
 406 | 	}
 407 | }
 408 | 
 409 | func TestSelect(t *testing.T) {
 410 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 411 | 	defer pool.AssertSize(t, 0)
 412 | 
 413 | 	df, err := NewDataFrameFromMem(pool, Dict{
 414 | 		"col1-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 415 | 		"col2-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 416 | 		"col3-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 417 | 		"col4-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 418 | 		"col5-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 419 | 		"col6-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 420 | 	})
 421 | 	if err != nil {
 422 | 		t.Fatal(err)
 423 | 	}
 424 | 	defer df.Release()
 425 | 
 426 | 	names := []string{"col1-i32", "col3-i32", "col6-f64"}
 427 | 	df2, err := df.Select(names...)
 428 | 	if err != nil {
 429 | 		t.Fatal(err)
 430 | 	}
 431 | 	defer df2.Release()
 432 | 
 433 | 	got := df2.Display(-1)
 434 | 	want := `rec[0]["col1-i32"]: [1 2 3 4 5 6 7 8 9 10]
 435 | rec[0]["col3-i32"]: [1 2 3 4 5 6 7 8 9 10]
 436 | rec[0]["col6-f64"]: [1 2 3 4 5 6 7 8 9 10]
 437 | `
 438 | 
 439 | 	if got != want {
 440 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 441 | 	}
 442 | }
 443 | 
 444 | func TestDrop(t *testing.T) {
 445 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 446 | 	defer pool.AssertSize(t, 0)
 447 | 
 448 | 	df, err := NewDataFrameFromMem(pool, Dict{
 449 | 		"col1-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 450 | 		"col2-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 451 | 		"col3-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 452 | 		"col4-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 453 | 		"col5-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 454 | 		"col6-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 455 | 	})
 456 | 	if err != nil {
 457 | 		t.Fatal(err)
 458 | 	}
 459 | 	defer df.Release()
 460 | 
 461 | 	names := []string{"col1-i32", "col3-i32", "col6-f64"}
 462 | 	df2, err := df.Drop(names...)
 463 | 	if err != nil {
 464 | 		t.Fatal(err)
 465 | 	}
 466 | 	defer df2.Release()
 467 | 
 468 | 	got := df2.Display(-1)
 469 | 	want := `rec[0]["col2-f64"]: [1 2 3 4 5 6 7 8 9 10]
 470 | rec[0]["col4-f64"]: [1 2 3 4 5 6 7 8 9 10]
 471 | rec[0]["col5-i32"]: [1 2 3 4 5 6 7 8 9 10]
 472 | `
 473 | 
 474 | 	if got != want {
 475 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 476 | 	}
 477 | }
 478 | 
 479 | func TestNewDataFrameFromMem(t *testing.T) {
 480 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 481 | 	defer pool.AssertSize(t, 0)
 482 | 
 483 | 	df, err := NewDataFrameFromMem(pool, Dict{
 484 | 		"col1-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 485 | 		"col2-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 486 | 	})
 487 | 	if err != nil {
 488 | 		t.Fatal(err)
 489 | 	}
 490 | 	defer df.Release()
 491 | 
 492 | 	got := df.Display(5)
 493 | 	want := `rec[0]["col1-i32"]: [1 2 3 4 5]
 494 | rec[0]["col2-f64"]: [1 2 3 4 5]
 495 | rec[1]["col1-i32"]: [6 7 8 9 10]
 496 | rec[1]["col2-f64"]: [6 7 8 9 10]
 497 | `
 498 | 	if got != want {
 499 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 500 | 	}
 501 | }
 502 | 
 503 | func TestNewColumnFromSparseMem(t *testing.T) {
 504 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 505 | 	defer pool.AssertSize(t, 0)
 506 | 
 507 | 	values := []interface{}{1, nil, 3}
 508 | 	valueIndexes := []int{0, 2, 4}
 509 | 	col, err := NewColumnFromSparseMem(pool, "sparse-col-i32", values, valueIndexes, 10)
 510 | 	if err != nil {
 511 | 		t.Fatal(err)
 512 | 	}
 513 | 	defer col.Release()
 514 | 
 515 | 	df, err := NewDataFrameFromColumns(pool, []array.Column{*col})
 516 | 	if err != nil {
 517 | 		t.Fatal(err)
 518 | 	}
 519 | 	defer df.Release()
 520 | 
 521 | 	got := df.Display(-1)
 522 | 	want := `rec[0]["sparse-col-i32"]: [1 (null) 0 (null) 3 (null) (null) (null) (null) (null)]
 523 | `
 524 | 	if got != want {
 525 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 526 | 	}
 527 | }
 528 | 
 529 | func TestColumn(t *testing.T) {
 530 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 531 | 	defer pool.AssertSize(t, 0)
 532 | 
 533 | 	df, err := NewDataFrameFromMem(pool, Dict{
 534 | 		"col1-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 535 | 		"col2-f64": []float64{10, 12, 13, 14, 15, 16, 17, 18, 19, 20},
 536 | 	})
 537 | 	if err != nil {
 538 | 		t.Fatal(err)
 539 | 	}
 540 | 	defer df.Release()
 541 | 
 542 | 	name := "col2-f64"
 543 | 
 544 | 	col := df.Column(name)
 545 | 	if col == nil {
 546 | 		t.Fatal("col should not be nil")
 547 | 	}
 548 | 
 549 | 	// Column should have the same name
 550 | 	if got, want := col.Name(), name; got != want {
 551 | 		t.Fatalf("got=%v, want=%v", got, want)
 552 | 	}
 553 | 
 554 | 	// Pointer should be the same
 555 | 	cols := df.Columns()
 556 | 	if got, want := &cols[1], col; got != want {
 557 | 		t.Fatalf("got=%v, want=%v", got, want)
 558 | 	}
 559 | }
 560 | 
 561 | func TestColumnAt(t *testing.T) {
 562 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 563 | 	defer pool.AssertSize(t, 0)
 564 | 
 565 | 	df, err := NewDataFrameFromMem(pool, Dict{
 566 | 		"col1-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 567 | 		"col2-f64": []float64{10, 12, 13, 14, 15, 16, 17, 18, 19, 20},
 568 | 	})
 569 | 	if err != nil {
 570 | 		t.Fatal(err)
 571 | 	}
 572 | 	defer df.Release()
 573 | 
 574 | 	col := df.ColumnAt(1)
 575 | 	if col == nil {
 576 | 		t.Fatal("col should not be nil")
 577 | 	}
 578 | 
 579 | 	// Pointer should be the same
 580 | 	cols := df.Columns()
 581 | 	if got, want := &cols[1], col; got != want {
 582 | 		t.Fatalf("got=%v, want=%v", got, want)
 583 | 	}
 584 | }
 585 | 
 586 | func TestLeftJoin(t *testing.T) {
 587 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 588 | 	defer pool.AssertSize(t, 0)
 589 | 
 590 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
 591 | 		"A": []float32{5, 2, 3, 1},
 592 | 		"B": []float64{6, 4, 3, 2},
 593 | 		"C": []float64{1.7, 2.3, 2.3, 7.8},
 594 | 		"D": []float64{5, 1, 0, 0},
 595 | 	})
 596 | 	if err != nil {
 597 | 		t.Fatal(err)
 598 | 	}
 599 | 	defer leftDf.Release()
 600 | 
 601 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
 602 | 		"A": []float32{5, 4, 2, 5},
 603 | 		"F": []float64{7, 3, 5, 8},
 604 | 		"D": []float64{5, 0, 0, 0},
 605 | 	})
 606 | 	if err != nil {
 607 | 		t.Fatal(err)
 608 | 	}
 609 | 	defer rightDf.Release()
 610 | 
 611 | 	joinedDf, err := leftDf.LeftJoin(rightDf, []string{"A", "D"})
 612 | 	if err != nil {
 613 | 		t.Fatal(err)
 614 | 	}
 615 | 	defer joinedDf.Release()
 616 | 
 617 | 	got := joinedDf.Display(-1)
 618 | 	want := `rec[0]["A"]: [5 2 3 1]
 619 | rec[0]["D"]: [5 1 0 0]
 620 | rec[0]["B"]: [6 4 3 2]
 621 | rec[0]["C"]: [1.7 2.3 2.3 7.8]
 622 | rec[0]["F"]: [7 (null) (null) (null)]
 623 | `
 624 | 	if got != want {
 625 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 626 | 	}
 627 | }
 628 | 
 629 | func TestLeftJoinCase2(t *testing.T) {
 630 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 631 | 	defer pool.AssertSize(t, 0)
 632 | 
 633 | 	// This test is meant to test LeftJoin
 634 | 	// when there will be duplicate leftDf rows
 635 | 	// because they matched more than one rightDf row.
 636 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
 637 | 		"A": []float64{5, 2, 3, 1},
 638 | 		"B": []float64{6, 4, 3, 2},
 639 | 		"C": []float64{1.7, 2.3, 2.3, 7.8},
 640 | 		"D": []float64{5, 1, 0, 0},
 641 | 	})
 642 | 	if err != nil {
 643 | 		t.Fatal(err)
 644 | 	}
 645 | 	defer leftDf.Release()
 646 | 
 647 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
 648 | 		"A": []float64{5, 4, 2, 5, 3, 3},
 649 | 		"F": []float64{7, 3, 5, 8, 99, 44},
 650 | 		"D": []float64{5, 0, 0, 0, 0, 0},
 651 | 	})
 652 | 	if err != nil {
 653 | 		t.Fatal(err)
 654 | 	}
 655 | 	defer rightDf.Release()
 656 | 
 657 | 	joinedDf, err := leftDf.LeftJoin(rightDf, []string{"A", "D"})
 658 | 	if err != nil {
 659 | 		t.Fatal(err)
 660 | 	}
 661 | 	defer joinedDf.Release()
 662 | 
 663 | 	got := joinedDf.Display(-1)
 664 | 	want := `rec[0]["A"]: [5 2 3 3 1]
 665 | rec[0]["D"]: [5 1 0 0 0]
 666 | rec[0]["B"]: [6 4 3 3 2]
 667 | rec[0]["C"]: [1.7 2.3 2.3 2.3 7.8]
 668 | rec[0]["F"]: [7 (null) 99 44 (null)]
 669 | `
 670 | 	if got != want {
 671 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 672 | 	}
 673 | }
 674 | 
 675 | func TestLeftJoinCase3(t *testing.T) {
 676 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 677 | 	defer pool.AssertSize(t, 0)
 678 | 
 679 | 	// This test is meant to test LeftJoin
 680 | 	// when there is only one column to match on
 681 | 	// that would result in duplicate columns.
 682 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
 683 | 		"A": []float64{5, 2, 3, 1},
 684 | 		"B": []float64{6, 4, 3, 2},
 685 | 		"C": []float64{1.7, 2.3, 2.3, 7.8},
 686 | 		"D": []float64{5, 1, 0, 0},
 687 | 	})
 688 | 	if err != nil {
 689 | 		t.Fatal(err)
 690 | 	}
 691 | 	defer leftDf.Release()
 692 | 
 693 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
 694 | 		"A": []float64{5, 4, 2, 5, 3, 3},
 695 | 		"F": []float64{7, 3, 5, 8, 99, 44},
 696 | 		"D": []float64{5, 0, 0, 0, 0, 0},
 697 | 	})
 698 | 	if err != nil {
 699 | 		t.Fatal(err)
 700 | 	}
 701 | 	defer rightDf.Release()
 702 | 
 703 | 	joinedDf, err := leftDf.LeftJoin(rightDf, []string{"A"})
 704 | 	if err != nil {
 705 | 		t.Fatal(err)
 706 | 	}
 707 | 	defer joinedDf.Release()
 708 | 
 709 | 	got := joinedDf.Display(-1)
 710 | 	want := `rec[0]["A"]: [5 5 2 3 3 1]
 711 | rec[0]["B"]: [6 6 4 3 3 2]
 712 | rec[0]["C"]: [1.7 1.7 2.3 2.3 2.3 7.8]
 713 | rec[0]["D_0"]: [5 5 1 0 0 0]
 714 | rec[0]["D_1"]: [5 0 0 0 0 (null)]
 715 | rec[0]["F"]: [7 8 5 99 44 (null)]
 716 | `
 717 | 	if got != want {
 718 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 719 | 	}
 720 | }
 721 | 
 722 | func TestRightJoin(t *testing.T) {
 723 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 724 | 	defer pool.AssertSize(t, 0)
 725 | 
 726 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
 727 | 		"A": []float64{5, 2, 3, 1},
 728 | 		"B": []float64{6, 4, 3, 2},
 729 | 		"C": []float64{1.7, 2.3, 2.3, 7.8},
 730 | 		"D": []float64{5, 1, 0, 0},
 731 | 	})
 732 | 	if err != nil {
 733 | 		t.Fatal(err)
 734 | 	}
 735 | 	defer leftDf.Release()
 736 | 
 737 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
 738 | 		"A": []float64{5, 4, 2, 5},
 739 | 		"F": []float64{7, 3, 5, 8},
 740 | 		"D": []float64{5, 0, 0, 0},
 741 | 	})
 742 | 	if err != nil {
 743 | 		t.Fatal(err)
 744 | 	}
 745 | 	defer rightDf.Release()
 746 | 
 747 | 	joinedDf, err := leftDf.RightJoin(rightDf, []string{"A", "D"})
 748 | 	if err != nil {
 749 | 		t.Fatal(err)
 750 | 	}
 751 | 	defer joinedDf.Release()
 752 | 
 753 | 	got := joinedDf.Display(-1)
 754 | 	want := `rec[0]["A"]: [5 4 2 5]
 755 | rec[0]["D"]: [5 0 0 0]
 756 | rec[0]["F"]: [7 3 5 8]
 757 | rec[0]["B"]: [6 (null) (null) (null)]
 758 | rec[0]["C"]: [1.7 (null) (null) (null)]
 759 | `
 760 | 	if got != want {
 761 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 762 | 	}
 763 | }
 764 | 
 765 | func TestRightJoinCase2(t *testing.T) {
 766 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 767 | 	defer pool.AssertSize(t, 0)
 768 | 
 769 | 	// This test is meant to test RightJoin
 770 | 	// when there will be duplicate rightDf rows
 771 | 	// because they matched more than one leftDf row.
 772 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
 773 | 		"A": []float64{5, 2, 5, 1, 4},
 774 | 		"B": []float64{6, 4, 3, 2, 9},
 775 | 		"C": []float64{1.7, 2.3, 2.3, 7.8, 9.1},
 776 | 		"D": []float64{5, 1, 5, 0, 0},
 777 | 	})
 778 | 	if err != nil {
 779 | 		t.Fatal(err)
 780 | 	}
 781 | 	defer leftDf.Release()
 782 | 
 783 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
 784 | 		"A": []float64{5, 4, 8},
 785 | 		"F": []float64{7, 3, 8},
 786 | 		"D": []float64{5, 0, 8},
 787 | 	})
 788 | 	if err != nil {
 789 | 		t.Fatal(err)
 790 | 	}
 791 | 	defer rightDf.Release()
 792 | 
 793 | 	joinedDf, err := leftDf.RightJoin(rightDf, []string{"A", "D"})
 794 | 	if err != nil {
 795 | 		t.Fatal(err)
 796 | 	}
 797 | 	defer joinedDf.Release()
 798 | 
 799 | 	got := joinedDf.Display(-1)
 800 | 	want := `rec[0]["A"]: [5 5 4 8]
 801 | rec[0]["D"]: [5 5 0 8]
 802 | rec[0]["F"]: [7 7 3 8]
 803 | rec[0]["B"]: [6 3 9 (null)]
 804 | rec[0]["C"]: [1.7 2.3 9.1 (null)]
 805 | `
 806 | 	if got != want {
 807 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 808 | 	}
 809 | }
 810 | 
 811 | func TestRightJoinCase3(t *testing.T) {
 812 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 813 | 	defer pool.AssertSize(t, 0)
 814 | 
 815 | 	// This test is meant to test RightJoin
 816 | 	// when there is only one column to match on
 817 | 	// that would result in duplicate columns.
 818 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
 819 | 		"A": []float64{5, 2, 3, 1},
 820 | 		"B": []float64{6, 4, 3, 2},
 821 | 		"C": []float64{1.7, 2.3, 2.3, 7.8},
 822 | 		"D": []float64{5, 1, 0, 0},
 823 | 	})
 824 | 	if err != nil {
 825 | 		t.Fatal(err)
 826 | 	}
 827 | 	defer leftDf.Release()
 828 | 
 829 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
 830 | 		"A": []float64{5, 4, 2, 5, 3, 3},
 831 | 		"F": []float64{7, 3, 5, 8, 99, 44},
 832 | 		"D": []float64{5, 0, 0, 0, 0, 0},
 833 | 	})
 834 | 	if err != nil {
 835 | 		t.Fatal(err)
 836 | 	}
 837 | 	defer rightDf.Release()
 838 | 
 839 | 	joinedDf, err := leftDf.RightJoin(rightDf, []string{"A"})
 840 | 	if err != nil {
 841 | 		t.Fatal(err)
 842 | 	}
 843 | 	defer joinedDf.Release()
 844 | 
 845 | 	got := joinedDf.Display(-1)
 846 | 	want := `rec[0]["A"]: [5 4 2 5 3 3]
 847 | rec[0]["D_1"]: [5 0 0 0 0 0]
 848 | rec[0]["F"]: [7 3 5 8 99 44]
 849 | rec[0]["B"]: [6 (null) 4 6 3 3]
 850 | rec[0]["C"]: [1.7 (null) 2.3 1.7 2.3 2.3]
 851 | rec[0]["D_0"]: [5 (null) 1 5 0 0]
 852 | `
 853 | 	if got != want {
 854 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 855 | 	}
 856 | }
 857 | 
 858 | func TestInnerJoinCase1(t *testing.T) {
 859 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 860 | 	defer pool.AssertSize(t, 0)
 861 | 
 862 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
 863 | 		"A": []float64{1, 7, 6, 1},
 864 | 		"B": []float64{2.1, 2.2, 2.3, 2.4},
 865 | 		"C": []float64{3.3, 8.0, 8.0, 1.1},
 866 | 		"D": []float64{5, 3, 2, 0},
 867 | 	})
 868 | 	if err != nil {
 869 | 		t.Fatal(err)
 870 | 	}
 871 | 	defer leftDf.Release()
 872 | 
 873 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
 874 | 		"A": []float64{2, 0, 6, 1, 6},
 875 | 		"F": []float64{2, 5, 2, 8, 9},
 876 | 		"D": []float64{2, 7, 2, 2, 2},
 877 | 	})
 878 | 	if err != nil {
 879 | 		t.Fatal(err)
 880 | 	}
 881 | 	defer rightDf.Release()
 882 | 
 883 | 	joinedDf, err := leftDf.InnerJoin(rightDf, []string{"A", "D"})
 884 | 	if err != nil {
 885 | 		t.Fatal(err)
 886 | 	}
 887 | 	defer joinedDf.Release()
 888 | 
 889 | 	got := joinedDf.Display(-1)
 890 | 	want := `rec[0]["A"]: [6 6]
 891 | rec[0]["D"]: [2 2]
 892 | rec[0]["B"]: [2.3 2.3]
 893 | rec[0]["C"]: [8 8]
 894 | rec[0]["F"]: [2 9]
 895 | `
 896 | 	if got != want {
 897 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 898 | 	}
 899 | }
 900 | 
 901 | func TestOuterJoin(t *testing.T) {
 902 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 903 | 	defer pool.AssertSize(t, 0)
 904 | 
 905 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
 906 | 		"A": []int32{5, 2, 3, 1},
 907 | 		"B": []float64{6, 4, 3, 2},
 908 | 		"C": []float64{1.7, 2.3, 2.3, 7.8},
 909 | 		"D": []int64{5, 1, 0, 0},
 910 | 	})
 911 | 	if err != nil {
 912 | 		t.Fatal(err)
 913 | 	}
 914 | 	defer leftDf.Release()
 915 | 
 916 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
 917 | 		"A": []int32{5, 4, 2, 5},
 918 | 		"F": []float64{7, 3, 5, 8},
 919 | 		"D": []int64{5, 0, 0, 0},
 920 | 	})
 921 | 	if err != nil {
 922 | 		t.Fatal(err)
 923 | 	}
 924 | 	defer rightDf.Release()
 925 | 
 926 | 	joinedDf, err := leftDf.OuterJoin(rightDf, []string{"A", "D"})
 927 | 	if err != nil {
 928 | 		t.Fatal(err)
 929 | 	}
 930 | 	defer joinedDf.Release()
 931 | 
 932 | 	got := joinedDf.Display(-1)
 933 | 	want := `rec[0]["A"]: [5 2 3 1 4 2 5]
 934 | rec[0]["D"]: [5 1 0 0 0 0 0]
 935 | rec[0]["B"]: [6 4 3 2 (null) (null) (null)]
 936 | rec[0]["C"]: [1.7 2.3 2.3 7.8 (null) (null) (null)]
 937 | rec[0]["F"]: [7 (null) (null) (null) 3 5 8]
 938 | `
 939 | 	if got != want {
 940 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 941 | 	}
 942 | }
 943 | 
 944 | func TestOuterJoinCase2(t *testing.T) {
 945 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 946 | 	defer pool.AssertSize(t, 0)
 947 | 
 948 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
 949 | 		"A": []uint8{5, 2, 3, 1},
 950 | 		"B": []float64{6, 4, 3, 2},
 951 | 		"C": []float64{1.7, 2.3, 2.3, 7.8},
 952 | 		"D": []int16{5, 1, 0, 0},
 953 | 	})
 954 | 	if err != nil {
 955 | 		t.Fatal(err)
 956 | 	}
 957 | 	defer leftDf.Release()
 958 | 
 959 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
 960 | 		"A": []uint8{5, 4, 2, 5},
 961 | 		"F": []int8{7, 3, 5, 8},
 962 | 		"D": []int16{5, 0, 0, 0},
 963 | 	})
 964 | 	if err != nil {
 965 | 		t.Fatal(err)
 966 | 	}
 967 | 	defer rightDf.Release()
 968 | 
 969 | 	joinedDf, err := leftDf.OuterJoin(rightDf, []string{"A"})
 970 | 	if err != nil {
 971 | 		t.Fatal(err)
 972 | 	}
 973 | 	defer joinedDf.Release()
 974 | 
 975 | 	got := joinedDf.Display(-1)
 976 | 	want := `rec[0]["A"]: [5 5 2 3 1 4]
 977 | rec[0]["B"]: [6 6 4 3 2 (null)]
 978 | rec[0]["C"]: [1.7 1.7 2.3 2.3 7.8 (null)]
 979 | rec[0]["D_0"]: [5 5 1 0 0 (null)]
 980 | rec[0]["D_1"]: [5 0 0 (null) (null) 0]
 981 | rec[0]["F"]: [7 8 5 (null) (null) 3]
 982 | `
 983 | 	if got != want {
 984 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 985 | 	}
 986 | }
 987 | 
 988 | func TestOuterJoinCase3(t *testing.T) {
 989 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 990 | 	defer pool.AssertSize(t, 0)
 991 | 
 992 | 	// When elements are nil at the same location we should not consider them equal as they are unknown.
 993 | 	// This follows SQL practices.
 994 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
 995 | 		"A": []interface{}{nil, 2, 3, 1},
 996 | 		"B": []float64{6, 4, 3, 2},
 997 | 		"C": []float64{1.7, 2.3, 2.3, 7.8},
 998 | 		"D": []int64{5, 1, 0, 0},
 999 | 	})
1000 | 	if err != nil {
1001 | 		t.Fatal(err)
1002 | 	}
1003 | 	defer leftDf.Release()
1004 | 
1005 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
1006 | 		"A": []interface{}{nil, 4, 2, 5},
1007 | 		"F": []float64{7, 3, 5, 8},
1008 | 		"D": []int64{5, 0, 0, 0},
1009 | 	})
1010 | 	if err != nil {
1011 | 		t.Fatal(err)
1012 | 	}
1013 | 	defer rightDf.Release()
1014 | 
1015 | 	joinedDf, err := leftDf.OuterJoin(rightDf, []string{"A", "D"})
1016 | 	if err != nil {
1017 | 		t.Fatal(err)
1018 | 	}
1019 | 	defer joinedDf.Release()
1020 | 
1021 | 	got := joinedDf.Display(-1)
1022 | 	want := `rec[0]["A"]: [(null) 2 3 1 (null) 4 2 5]
1023 | rec[0]["D"]: [5 1 0 0 5 0 0 0]
1024 | rec[0]["B"]: [6 4 3 2 (null) (null) (null) (null)]
1025 | rec[0]["C"]: [1.7 2.3 2.3 7.8 (null) (null) (null) (null)]
1026 | rec[0]["F"]: [(null) (null) (null) (null) 7 3 5 8]
1027 | `
1028 | 	if got != want {
1029 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
1030 | 	}
1031 | }
1032 | 
1033 | func TestCrossJoin(t *testing.T) {
1034 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
1035 | 	defer pool.AssertSize(t, 0)
1036 | 
1037 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
1038 | 		"A": []int64{5, 2, 3, 1},
1039 | 		"B": []float64{6, 4, 3, 2},
1040 | 		"C": []float64{1.7, 2.3, 2.3, 7.8},
1041 | 		"D": []float32{5, 1, 0, 0},
1042 | 	})
1043 | 	if err != nil {
1044 | 		t.Fatal(err)
1045 | 	}
1046 | 	defer leftDf.Release()
1047 | 
1048 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
1049 | 		"A": []int64{5, 4, 2, 5, 10},
1050 | 		"F": []int32{7, 3, 5, 8, 11},
1051 | 		"D": []float32{5, 0, 0, 0, 12},
1052 | 	})
1053 | 	if err != nil {
1054 | 		t.Fatal(err)
1055 | 	}
1056 | 	defer rightDf.Release()
1057 | 
1058 | 	joinedDf, err := leftDf.CrossJoin(rightDf)
1059 | 	if err != nil {
1060 | 		t.Fatal(err)
1061 | 	}
1062 | 	defer joinedDf.Release()
1063 | 
1064 | 	got := joinedDf.Display(-1)
1065 | 	want := `rec[0]["A_0"]: [5 5 5 5 5 2 2 2 2 2 3 3 3 3 3 1 1 1 1 1]
1066 | rec[0]["B"]: [6 6 6 6 6 4 4 4 4 4 3 3 3 3 3 2 2 2 2 2]
1067 | rec[0]["C"]: [1.7 1.7 1.7 1.7 1.7 2.3 2.3 2.3 2.3 2.3 2.3 2.3 2.3 2.3 2.3 7.8 7.8 7.8 7.8 7.8]
1068 | rec[0]["D_0"]: [5 5 5 5 5 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0]
1069 | rec[0]["A_1"]: [5 4 2 5 10 5 4 2 5 10 5 4 2 5 10 5 4 2 5 10]
1070 | rec[0]["D_1"]: [5 0 0 0 12 5 0 0 0 12 5 0 0 0 12 5 0 0 0 12]
1071 | rec[0]["F"]: [7 3 5 8 11 7 3 5 8 11 7 3 5 8 11 7 3 5 8 11]
1072 | `
1073 | 	if got != want {
1074 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
1075 | 	}
1076 | }
1077 | 
1078 | func TestJoinSuffix(t *testing.T) {
1079 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
1080 | 	defer pool.AssertSize(t, 0)
1081 | 
1082 | 	// This test is meant to test RightJoin
1083 | 	// when there is only one column to match on
1084 | 	// that would result in duplicate columns.
1085 | 	leftDf, err := NewDataFrameFromMem(pool, Dict{
1086 | 		"A": []float64{5, 2, 3, 1},
1087 | 		"B": []float64{6, 4, 3, 2},
1088 | 		"C": []float64{1.7, 2.3, 2.3, 7.8},
1089 | 		"D": []float64{5, 1, 0, 0},
1090 | 	})
1091 | 	if err != nil {
1092 | 		t.Fatal(err)
1093 | 	}
1094 | 	defer leftDf.Release()
1095 | 
1096 | 	rightDf, err := NewDataFrameFromMem(pool, Dict{
1097 | 		"A": []float64{5, 4, 2, 5, 3, 3},
1098 | 		"F": []float64{7, 3, 5, 8, 99, 44},
1099 | 		"D": []float64{5, 0, 0, 0, 0, 0},
1100 | 	})
1101 | 	if err != nil {
1102 | 		t.Fatal(err)
1103 | 	}
1104 | 	defer rightDf.Release()
1105 | 
1106 | 	joinedDf, err := leftDf.RightJoin(rightDf, []string{"A"}, WithLsuffix("_left"), WithRsuffix("_right"))
1107 | 	if err != nil {
1108 | 		t.Fatal(err)
1109 | 	}
1110 | 	defer joinedDf.Release()
1111 | 
1112 | 	got := joinedDf.Display(-1)
1113 | 	want := `rec[0]["A"]: [5 4 2 5 3 3]
1114 | rec[0]["D_right"]: [5 0 0 0 0 0]
1115 | rec[0]["F"]: [7 3 5 8 99 44]
1116 | rec[0]["B"]: [6 (null) 4 6 3 3]
1117 | rec[0]["C"]: [1.7 (null) 2.3 1.7 2.3 2.3]
1118 | rec[0]["D_left"]: [5 (null) 1 5 0 0]
1119 | `
1120 | 	if got != want {
1121 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
1122 | 	}
1123 | }
1124 | 
1125 | func TestInconsistentDataTypesError(t *testing.T) {
1126 | 	// When elements are nil at the same location we should not consider them equal as they are unknown.
1127 | 	// This follows SQL practices.
1128 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
1129 | 	defer pool.AssertSize(t, 0)
1130 | 
1131 | 	df, err := NewDataFrameFromMem(pool, Dict{
1132 | 		"A": []interface{}{nil, 2, 3, 1.2},
1133 | 		"B": []float64{6, 4, 3, 2},
1134 | 		"C": []float64{1.7, 2.3, 2.3, 7.8},
1135 | 		"D": []int64{5, 1, 0, 0},
1136 | 	})
1137 | 	if err == nil {
1138 | 		defer df.Release()
1139 | 	}
1140 | 
1141 | 	var v int
1142 | 	got := err
1143 | 	want := errors.Errorf("inconsistent data types for elements, expecting %v to be of type (%T)", 1.2, v)
1144 | 	if got.Error() != want.Error() {
1145 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
1146 | 	}
1147 | }
1148 | 
1149 | // multByN takes a DataFrame and multiplies a the column by the provided multipier.
1150 | func multByN(columnName string, multipier float64) MutationFunc {
1151 | 	return func(df *DataFrame) (*DataFrame, error) {
1152 | 		col := df.Column(columnName)
1153 | 		schema := arrow.NewSchema([]arrow.Field{col.Field()}, nil)
1154 | 		builder := array.NewRecordBuilder(df.Allocator(), schema)
1155 | 		defer builder.Release()
1156 | 		smartBuilder := NewSmartBuilder(builder, schema)
1157 | 		valueIterator := iterator.NewFloat64ValueIterator(col)
1158 | 		defer valueIterator.Release()
1159 | 		for valueIterator.Next() {
1160 | 			value, isNil := valueIterator.Value()
1161 | 			if isNil {
1162 | 				smartBuilder.Append(0, nil)
1163 | 			} else {
1164 | 				value *= multipier
1165 | 				smartBuilder.Append(0, value)
1166 | 			}
1167 | 		}
1168 | 		rec := builder.NewRecord()
1169 | 		defer rec.Release()
1170 | 		chunk := array.NewChunked(col.DataType(), rec.Columns())
1171 | 		defer chunk.Release()
1172 | 		newCol := array.NewColumn(col.Field(), chunk)
1173 | 		defer newCol.Release()
1174 | 		df2, err := df.Drop(columnName)
1175 | 		if err != nil {
1176 | 			return nil, err
1177 | 		}
1178 | 		defer df2.Release()
1179 | 		return df2.AppendColumn(newCol)
1180 | 	}
1181 | }
1182 | 
1183 | func TestApply(t *testing.T) {
1184 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
1185 | 	defer pool.AssertSize(t, 0)
1186 | 
1187 | 	df, err := NewDataFrameFromMem(pool, Dict{
1188 | 		"col1-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
1189 | 		"col2-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
1190 | 	})
1191 | 	if err != nil {
1192 | 		t.Fatal(err)
1193 | 	}
1194 | 	defer df.Release()
1195 | 
1196 | 	df2, err := df.Apply(multByN("col2-f64", 2.0), multByN("col2-f64", -1.0))
1197 | 	if err != nil {
1198 | 		t.Fatal(err)
1199 | 	}
1200 | 	defer df2.Release()
1201 | 
1202 | 	got := df2.Display(-1)
1203 | 	want := `rec[0]["col1-i32"]: [1 2 3 4 5 6 7 8 9 10]
1204 | rec[0]["col2-f64"]: [-2 -4 -6 -8 -10 -12 -14 -16 -18 -20]
1205 | `
1206 | 	if got != want {
1207 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
1208 | 	}
1209 | 
1210 | 	if &df == &df2 {
1211 | 		t.Fatalf("references are the same. df is not a copy of df2 (%v) == (%v)", &df, &df2)
1212 | 	}
1213 | }
1214 | 
1215 | func TestApplyToColumn(t *testing.T) {
1216 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
1217 | 	defer pool.AssertSize(t, 0)
1218 | 
1219 | 	df, err := NewDataFrameFromMem(pool, Dict{
1220 | 		"col1-i32": []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
1221 | 		"col2-f64": []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
1222 | 	})
1223 | 	if err != nil {
1224 | 		t.Fatal(err)
1225 | 	}
1226 | 	defer df.Release()
1227 | 
1228 | 	df2, err := df.ApplyToColumn("col2-f64", "col2-f64-x2", func(v interface{}) (interface{}, error) {
1229 | 		// This function will be called for every element in "col2-f64"
1230 | 		if v == nil {
1231 | 			// can't multiply nil by anything
1232 | 			return nil, nil
1233 | 		}
1234 | 		value, ok := v.(float64)
1235 | 		if !ok {
1236 | 			return nil, errors.New("v is not a float64")
1237 | 		}
1238 | 		value *= 2
1239 | 		return value, nil
1240 | 	})
1241 | 
1242 | 	if err != nil {
1243 | 		t.Fatal(err)
1244 | 	}
1245 | 	defer df2.Release()
1246 | 
1247 | 	got := df2.Display(-1)
1248 | 	want := `rec[0]["col1-i32"]: [1 2 3 4 5 6 7 8 9 10]
1249 | rec[0]["col2-f64"]: [1 2 3 4 5 6 7 8 9 10]
1250 | rec[0]["col2-f64-x2"]: [2 4 6 8 10 12 14 16 18 20]
1251 | `
1252 | 	if got != want {
1253 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
1254 | 	}
1255 | 
1256 | 	if &df == &df2 {
1257 | 		t.Fatalf("references are the same. df is not a copy of df2 (%v) == (%v)", &df, &df2)
1258 | 	}
1259 | }
1260 | 
1261 | func TestNewDataFrameFromTable(t *testing.T) {
1262 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
1263 | 	defer pool.AssertSize(t, 0)
1264 | 
1265 | 	records, schema := buildRecords(pool, t, 48)
1266 | 	for i := range records {
1267 | 		defer records[i].Release()
1268 | 	}
1269 | 
1270 | 	table := array.NewTableFromRecords(schema, records)
1271 | 	defer table.Release()
1272 | 
1273 | 	df, err := NewDataFrameFromTable(pool, table)
1274 | 	if err != nil {
1275 | 		t.Fatal(err)
1276 | 	}
1277 | 	defer df.Release()
1278 | 
1279 | 	got := df.Display(-1)
1280 | 	want := `rec[0]["f1-i32"]: [1 2 3 4 5 6 7 8 (null) 10]
1281 | rec[0]["f2-f64"]: [1 2 3 4 5 6 7 8 (null) 10]
1282 | rec[1]["f1-i32"]: [11 12 13 14 15 16 17 18 19 20]
1283 | rec[1]["f2-f64"]: [11 12 13 14 15 16 17 18 19 20]
1284 | rec[2]["f1-i32"]: [31 32 33 34 35 36 37 38 39 48]
1285 | rec[2]["f2-f64"]: [31 32 33 34 35 36 37 38 39 40]
1286 | `
1287 | 	if got != want {
1288 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
1289 | 	}
1290 | }
1291 | 


--------------------------------------------------------------------------------
/dataframe/doc.go:
--------------------------------------------------------------------------------
1 | /*
2 | Package dataframe provides the DataFrame implementation.
3 | 
4 | */
5 | package dataframe
6 | 


--------------------------------------------------------------------------------
/dataframe/element.go:
--------------------------------------------------------------------------------
 1 | package dataframe
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/apache/arrow/go/arrow"
 7 | )
 8 | 
 9 | // Element is an interface for Elements within a Column.
10 | type Element interface {
11 | 	// Compare methods
12 | 	// Eq returns true if the left Element is equal to the right Element.
13 | 	// When both are nil Eq returns false because nil actualy signifies "unknown"
14 | 	// and you can't compare two things when you don't know what they are.
15 | 	Eq(Element) (bool, error)
16 | 	// EqStrict returns true if the left Element is equal to the right Element.
17 | 	// When both are nil EqStrict returns true.
18 | 	EqStrict(Element) (bool, error)
19 | 	// Neq returns true when Eq returns false.
20 | 	Neq(Element) (bool, error)
21 | 	// Less returns true if the left Element is less than the right Element.
22 | 	Less(Element) (bool, error)
23 | 	// LessEq returns true if the left Element is less than or equal to the right Element.
24 | 	LessEq(Element) (bool, error)
25 | 	// Greater returns true if the left Element is greter than the right Element.
26 | 	Greater(Element) (bool, error)
27 | 	// GreaterEq returns true if the left Element is greter than or equal to the right Element.
28 | 	GreaterEq(Element) (bool, error)
29 | 
30 | 	// Accessor/conversion methods
31 | 
32 | 	// Copy returns a copy of this Element.
33 | 	Copy() Element
34 | 
35 | 	// Information methods
36 | 
37 | 	// String prints the value of this element as a string.
38 | 	String() string
39 | 	// IsNil returns true when the underlying value is nil.
40 | 	IsNil() bool
41 | }
42 | 
43 | // CastElement returns an Element type for the passed DataType and value v.
44 | func CastElement(dtype arrow.DataType, v interface{}) Element {
45 | 	switch dtype.(type) {
46 | 	// case *arrow.NullType: // TODO: implement
47 | 	// case *arrow.BooleanType: // TODO: implement
48 | 	case *arrow.Uint8Type:
49 | 		return NewUint8Element(v)
50 | 	case *arrow.Int8Type:
51 | 		return NewInt8Element(v)
52 | 	case *arrow.Uint16Type:
53 | 		return NewUint16Element(v)
54 | 	case *arrow.Int16Type:
55 | 		return NewInt16Element(v)
56 | 	case *arrow.Uint32Type:
57 | 		return NewUint32Element(v)
58 | 	case *arrow.Int32Type:
59 | 		return NewInt32Element(v)
60 | 	case *arrow.Uint64Type:
61 | 		return NewUint64Element(v)
62 | 	case *arrow.Int64Type:
63 | 		return NewInt64Element(v)
64 | 	// case arrow.HALF_FLOAT: // TODO: implement?
65 | 	case *arrow.Float32Type:
66 | 		return NewFloat32Element(v)
67 | 	case *arrow.Float64Type:
68 | 		return NewFloat64Element(v)
69 | 	case *arrow.Date32Type:
70 | 		return NewDate32Element(v)
71 | 	case *arrow.Date64Type:
72 | 		return NewDate64Element(v)
73 | 		// case *arrow.StringType: // TODO: implement
74 | 
75 | 	}
76 | 	panic(fmt.Errorf("bullseye/element: unsupported element for %T", dtype))
77 | }
78 | 


--------------------------------------------------------------------------------
/dataframe/element_numeric.gen.go.tmpl:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/apache/arrow/go/arrow"
  7 | 	"github.com/go-bullseye/bullseye"
  8 | )
  9 | 
 10 | {{range .In}}
 11 | // {{.Name}}Element has logic to apply to this type.
 12 | type {{.Name}}Element struct {
 13 | 	v interface{}
 14 | }
 15 | 
 16 | // New{{.Name}}Element creates a new {{.Name}}Element logic wrapper
 17 | // from the given value provided as v.
 18 | func New{{.Name}}Element(v interface{}) *{{.Name}}Element {
 19 | 	return &{{.Name}}Element{
 20 | 		v: v,
 21 | 	}
 22 | }
 23 | 
 24 | // compare takes the left and right elements and applies the comparator function to them.
 25 | func (e {{.Name}}Element) compare(r Element, f func(left, right {{or .QualifiedType .Type}}) bool) (bool, error) {
 26 | 	rE, ok := r.(*{{.Name}}Element)
 27 | 	if !ok {
 28 | 		return false, fmt.Errorf("cannot cast %v to {{.Name}}Element", r)
 29 | 	}
 30 | 
 31 | 	// When their nil status isn't the same, we can't compare them.
 32 | 	// Explicit both nil should be handled elsewhere.
 33 | 	if e.IsNil() != rE.IsNil() {
 34 | 		return false, nil
 35 | 	}
 36 | 
 37 | 	lv, lok := e.v.({{or .QualifiedType .Type}})
 38 | 	if !lok {
 39 | 		return false, fmt.Errorf("cannot assert %v is a {{or .QualifiedType .Type}}", e.v)
 40 | 	}
 41 | 	rv, rok := rE.v.({{or .QualifiedType .Type}})
 42 | 	if !rok {
 43 | 		return false, fmt.Errorf("cannot assert %v is a {{or .QualifiedType .Type}}", rE.v)
 44 | 	}
 45 | 
 46 | 	return f(lv, rv), nil
 47 | }
 48 | 
 49 | // Comparation methods
 50 | 
 51 | // Eq returns true if the left {{.Name}}Element is equal to the right {{.Name}}Element.
 52 | // When both are nil Eq returns false because nil actualy signifies "unknown"
 53 | // and you can't compare two things when you don't know what they are.
 54 | func (e {{.Name}}Element) Eq(r Element) (bool, error) {
 55 | 	if e.IsNil() && r.IsNil() {
 56 | 		return false, nil
 57 | 	}
 58 | 	return e.compare(r, func(left, right {{or .QualifiedType .Type}}) bool {
 59 | 		return left == right
 60 | 	})
 61 | }
 62 | 
 63 | // EqStrict returns true if the left {{.Name}}Element is equal to the right {{.Name}}Element.
 64 | // When both are nil EqStrict returns true.
 65 | func (e {{.Name}}Element) EqStrict(r Element) (bool, error) {
 66 | 	if e.IsNil() && r.IsNil() {
 67 | 		return true, nil
 68 | 	}
 69 | 	return e.compare(r, func(left, right {{or .QualifiedType .Type}}) bool {
 70 | 		return left == right
 71 | 	})
 72 | }
 73 | 
 74 | // Neq returns true if the left {{.Name}}Element
 75 | // is not equal to the right {{.Name}}Element.
 76 | func (e {{.Name}}Element) Neq(r Element) (bool, error) {
 77 | 	v, ok := e.Eq(r)
 78 | 	return !v, ok
 79 | }
 80 | 
 81 | // Less returns true if the left {{.Name}}Element
 82 | // is less than the right {{.Name}}Element.
 83 | func (e {{.Name}}Element) Less(r Element) (bool, error) {
 84 | 	return e.compare(r, func(left, right {{or .QualifiedType .Type}}) bool {
 85 | 		return left < right
 86 | 	})
 87 | }
 88 | 
 89 | // LessEq returns true if the left {{.Name}}Element
 90 | // is less than or equal to the right {{.Name}}Element.
 91 | func (e {{.Name}}Element) LessEq(r Element) (bool, error) {
 92 | 	return e.compare(r, func(left, right {{or .QualifiedType .Type}}) bool {
 93 | 		return left <= right
 94 | 	})
 95 | }
 96 | 
 97 | // Greater returns true if the left {{.Name}}Element
 98 | // is greter than the right {{.Name}}Element.
 99 | func (e {{.Name}}Element) Greater(r Element) (bool, error) {
100 | 	return e.compare(r, func(left, right {{or .QualifiedType .Type}}) bool {
101 | 		return left > right
102 | 	})
103 | }
104 | 
105 | // GreaterEq returns true if the left {{.Name}}Element
106 | // is greter than or equal to the right {{.Name}}Element.
107 | func (e {{.Name}}Element) GreaterEq(r Element) (bool, error) {
108 | 	return e.compare(r, func(left, right {{or .QualifiedType .Type}}) bool {
109 | 		return left >= right
110 | 	})
111 | }
112 | 
113 | // Accessor/conversion methods
114 | 
115 | // Copy returns a copy of this {{.Name}}Element.
116 | func (e {{.Name}}Element) Copy() Element {
117 | 	return e
118 | }
119 | 
120 | // String prints the value of this element as a string.
121 | func (e {{.Name}}Element) String() string {
122 | 	return fmt.Sprintf("%v", e.v)
123 | }
124 | 
125 | // Information methods
126 | 
127 | // IsNil returns true when the underlying value is nil.
128 | func (e {{.Name}}Element) IsNil() bool {
129 | 	return e.v == nil
130 | }
131 | 
132 | 
133 | {{end}}
134 | 


--------------------------------------------------------------------------------
/dataframe/example_test.go:
--------------------------------------------------------------------------------
 1 | package dataframe_test
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/apache/arrow/go/arrow/memory"
 7 | 	"github.com/go-bullseye/bullseye/dataframe"
 8 | )
 9 | 
10 | // This example demonstrates creating a new DataFrame from memory
11 | // using a Dict and displaying the contents of it.
12 | func Example_newDataFrameFromMemory() {
13 | 	pool := memory.NewGoAllocator()
14 | 	df, _ := dataframe.NewDataFrameFromMem(pool, dataframe.Dict{
15 | 		"col1": []int32{1, 2, 3, 4, 5},
16 | 		"col2": []float64{1.1, 2.2, 3.3, 4.4, 5},
17 | 		"col3": []string{"foo", "bar", "ping", "", "pong"},
18 | 		"col4": []interface{}{2, 4, 6, nil, 8},
19 | 	})
20 | 	defer df.Release()
21 | 	fmt.Printf("DataFrame:\n%s\n", df.Display(0))
22 | 
23 | 	// Output:
24 | 	// DataFrame:
25 | 	// rec[0]["col1"]: [1 2 3 4 5]
26 | 	// rec[0]["col2"]: [1.1 2.2 3.3 4.4 5]
27 | 	// rec[0]["col3"]: ["foo" "bar" "ping" "" "pong"]
28 | 	// rec[0]["col4"]: [2 4 6 (null) 8]
29 | }
30 | 


--------------------------------------------------------------------------------
/dataframe/mutations.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/apache/arrow/go/arrow"
  7 | 	"github.com/apache/arrow/go/arrow/array"
  8 | 	"github.com/apache/arrow/go/arrow/memory"
  9 | 	"github.com/go-bullseye/bullseye/iterator"
 10 | 	"github.com/pkg/errors"
 11 | )
 12 | 
 13 | // Mutator is a type that has some standard mutations.
 14 | type Mutator struct {
 15 | 	// Almost all mutations will require setting up new memory as they create new a DataFrame.
 16 | 	// So we need to provide the ability to set the Allocator.
 17 | 	mem memory.Allocator
 18 | }
 19 | 
 20 | // NewMutator creates a new mutator.
 21 | func NewMutator(mem memory.Allocator) *Mutator {
 22 | 	return &Mutator{
 23 | 		mem: mem,
 24 | 	}
 25 | }
 26 | 
 27 | // MutationFunc is a function that mutates an existing DataFrame and returns a new DataFrame or an error.
 28 | type MutationFunc func(*DataFrame) (*DataFrame, error)
 29 | 
 30 | // Select the given DataFrame columns by name.
 31 | func (m *Mutator) Select(names ...string) MutationFunc {
 32 | 	return func(df *DataFrame) (*DataFrame, error) {
 33 | 		cols := df.SelectColumns(names...)
 34 | 		return NewDataFrameFromShape(m.mem, cols, df.NumRows())
 35 | 	}
 36 | }
 37 | 
 38 | // Drop the given DataFrame columns by name.
 39 | func (m *Mutator) Drop(names ...string) MutationFunc {
 40 | 	return func(df *DataFrame) (*DataFrame, error) {
 41 | 		cols := df.RejectColumns(names...)
 42 | 		return NewDataFrameFromShape(m.mem, cols, df.NumRows())
 43 | 	}
 44 | }
 45 | 
 46 | // Slice creates a new DataFrame consisting of rows[beg:end].
 47 | func (m *Mutator) Slice(beg, end int64) MutationFunc {
 48 | 	return func(df *DataFrame) (*DataFrame, error) {
 49 | 		if end > df.NumRows() || beg > end {
 50 | 			return nil, errors.Errorf("mutation: index out of range")
 51 | 		}
 52 | 
 53 | 		dfCols := df.Columns()
 54 | 
 55 | 		cols := make([]array.Column, len(dfCols))
 56 | 		for i, col := range dfCols {
 57 | 			cols[i] = *col.NewSlice(beg, end)
 58 | 		}
 59 | 
 60 | 		defer func() {
 61 | 			for i := range cols {
 62 | 				cols[i].Release()
 63 | 			}
 64 | 		}()
 65 | 
 66 | 		rows := end - beg
 67 | 		return NewDataFrameFromShape(m.mem, cols, rows)
 68 | 	}
 69 | }
 70 | 
 71 | // leftJoinConfig are the config params for LeftJoin.
 72 | type leftJoinConfig struct {
 73 | 	lsuffix string
 74 | 	rsuffix string
 75 | }
 76 | 
 77 | // newLeftJoinConfig creates a new config using options and validates it.
 78 | func newLeftJoinConfig(opts ...Option) (*leftJoinConfig, error) {
 79 | 	cfg := defaultLeftJoinConfig()
 80 | 	for _, opt := range opts {
 81 | 		if err := opt(cfg); err != nil {
 82 | 			return cfg, err
 83 | 		}
 84 | 	}
 85 | 	err := cfg.validate()
 86 | 	return cfg, err
 87 | }
 88 | 
 89 | func (c *leftJoinConfig) validate() error {
 90 | 	if c.lsuffix == c.rsuffix {
 91 | 		return errors.Errorf("lsuffix (%s) cannot be the same as rsuffix (%s)", c.lsuffix, c.rsuffix)
 92 | 	}
 93 | 	return nil
 94 | }
 95 | 
 96 | // defaultLeftJoinConfig returns the default defaultLeftJoinConfig.
 97 | func defaultLeftJoinConfig() *leftJoinConfig {
 98 | 	return &leftJoinConfig{
 99 | 		lsuffix: "_0",
100 | 		rsuffix: "_1",
101 | 	}
102 | }
103 | 
104 | // WithLsuffix configures a right or left join to use the provided left suffix.
105 | func WithLsuffix(lsuffix string) Option {
106 | 	return func(p interface{}) error {
107 | 		o, ok := p.(*leftJoinConfig)
108 | 		if !ok {
109 | 			return errors.Errorf("cannot apply WithLsuffix to: %T", p)
110 | 		}
111 | 		o.lsuffix = lsuffix
112 | 		return nil
113 | 	}
114 | }
115 | 
116 | // WithRsuffix configures a right or left join to use the provided left suffix.
117 | func WithRsuffix(rsuffix string) Option {
118 | 	return func(p interface{}) error {
119 | 		o, ok := p.(*leftJoinConfig)
120 | 		if !ok {
121 | 			return errors.Errorf("cannot apply WithRsuffix to: %T", p)
122 | 		}
123 | 		o.rsuffix = rsuffix
124 | 		return nil
125 | 	}
126 | }
127 | 
128 | // RightJoin returns a DataFrame containing the right join of two DataFrames.
129 | // Acts like SQL in that nil elements are treated as unknown so nil != nil.
130 | func (m *Mutator) RightJoin(rightDf *DataFrame, columnNames []string, opts ...Option) MutationFunc {
131 | 	// RightJoin is just a LeftJoin in reverse order.
132 | 	cfg, err := newLeftJoinConfig(opts...)
133 | 	if err == nil {
134 | 		// Swap lsuffix and rsuffix
135 | 		lsuffix := cfg.lsuffix
136 | 		cfg.lsuffix = cfg.rsuffix
137 | 		cfg.rsuffix = lsuffix
138 | 	}
139 | 
140 | 	return func(leftDf *DataFrame) (*DataFrame, error) {
141 | 		if err != nil {
142 | 			return nil, err
143 | 		}
144 | 
145 | 		// We swap leftDf and rightDf
146 | 		data, err := m.leftJoin(cfg, rightDf, leftDf, columnNames)
147 | 		if err != nil {
148 | 			return nil, err
149 | 		}
150 | 		defer data.Release()
151 | 		// return fn(rightDf)
152 | 		return data.buildDataFrame()
153 | 	}
154 | }
155 | 
156 | // LeftJoin returns a DataFrame containing the left join of two DataFrames.
157 | // Acts like SQL in that nil elements are treated as unknown so nil != nil.
158 | func (m *Mutator) LeftJoin(rightDf *DataFrame, columnNames []string, opts ...Option) MutationFunc {
159 | 	cfg, err := newLeftJoinConfig(opts...)
160 | 	return func(leftDf *DataFrame) (*DataFrame, error) {
161 | 		if err != nil {
162 | 			return nil, err
163 | 		}
164 | 
165 | 		data, err := m.leftJoin(cfg, leftDf, rightDf, columnNames)
166 | 		if err != nil {
167 | 			return nil, err
168 | 		}
169 | 		defer data.Release()
170 | 		return data.buildDataFrame()
171 | 	}
172 | }
173 | 
174 | type joinFuncConfig struct {
175 | 	// Keep a ref to the mutator that created it
176 | 	mutator *Mutator
177 | 
178 | 	matchingLeftColsLen    int
179 | 	matchingRightColsLen   int
180 | 	additionalLeftColsLen  int
181 | 	additionalRightColsLen int
182 | 	columnNames            []string
183 | 	leftColumns            []array.Column
184 | 	rightColumns           []array.Column
185 | 	schema                 *arrow.Schema
186 | 	recordBuilder          *array.RecordBuilder
187 | 	smartBuilder           *SmartBuilder
188 | }
189 | 
190 | // newJoinFuncConfig builds up all the data needed to do a join.
191 | // TODO(nickpoorman): maybe rename leftJoinConfig if this is going to be used for other joins
192 | func (m *Mutator) newJoinFuncConfig(cfg *leftJoinConfig, leftDf *DataFrame, rightDf *DataFrame, columnNames []string, forceNullable bool) (*joinFuncConfig, error) {
193 | 	jc := &joinFuncConfig{
194 | 		mutator:      m,
195 | 		columnNames:  columnNames,
196 | 		leftColumns:  make([]array.Column, 0, leftDf.NumCols()),
197 | 		rightColumns: make([]array.Column, 0, rightDf.NumCols()),
198 | 	}
199 | 
200 | 	// Start by making sure that both DataFrames have the columns we are looking for.
201 | 	for _, name := range columnNames {
202 | 		leftColumn := leftDf.Column(name)
203 | 		if leftColumn == nil {
204 | 			return nil, errors.Errorf("bullseye/mutations: column %s is not in left DataFrame: (%v)", name, leftDf.ColumnNames())
205 | 		}
206 | 		rightColumn := rightDf.Column(name)
207 | 		if rightColumn == nil {
208 | 			return nil, errors.Errorf("bullseye/mutations: column %s is not in right DataFrame: (%v)", name, rightDf.ColumnNames())
209 | 		}
210 | 
211 | 		jc.leftColumns = append(jc.leftColumns, *leftColumn)
212 | 		jc.rightColumns = append(jc.rightColumns, *rightColumn)
213 | 	}
214 | 	// Keep track of the number of matching left and right columns. (They should be the same number)
215 | 	jc.matchingLeftColsLen = len(jc.leftColumns)
216 | 	jc.matchingRightColsLen = len(jc.rightColumns)
217 | 
218 | 	// We will end up needing to iterate over the columns for left in step so join them back together.
219 | 	jc.leftColumns = append(jc.leftColumns, leftDf.RejectColumns(columnNames...)...)
220 | 	jc.rightColumns = append(jc.rightColumns, rightDf.RejectColumns(columnNames...)...)
221 | 
222 | 	// Keep track of the lengths. Now that we have appended the other columns.
223 | 	jc.additionalLeftColsLen = len(jc.leftColumns) - jc.matchingLeftColsLen
224 | 	jc.additionalRightColsLen = len(jc.rightColumns) - jc.matchingRightColsLen
225 | 
226 | 	// get all the fields that make up the schema
227 | 	fields := make([]arrow.Field, 0, jc.matchingLeftColsLen+jc.additionalLeftColsLen+jc.additionalRightColsLen)
228 | 	for i := 0; i < len(jc.leftColumns); i++ {
229 | 		fields = append(fields, jc.leftColumns[i].Field())
230 | 	}
231 | 	for i := jc.matchingRightColsLen; i < len(jc.rightColumns); i++ {
232 | 		fcopy := jc.rightColumns[i].Field()
233 | 		if forceNullable {
234 | 			// This column's values must be nullable since there may not be any matches.
235 | 			fcopy.Nullable = true
236 | 		}
237 | 		// If there are any existing fields that have this name we must change the names.
238 | 		name := fcopy.Name
239 | 		// Start at the end of the matching ones because those clearly wont have a conflict.
240 | 		for i := jc.matchingLeftColsLen; i < len(jc.leftColumns); i++ {
241 | 			if fields[i].Name == name {
242 | 				fields[i].Name = fmt.Sprintf("%s%s", name, cfg.lsuffix)
243 | 				fcopy.Name = fmt.Sprintf("%s%s", name, cfg.rsuffix)
244 | 				break
245 | 			}
246 | 		}
247 | 
248 | 		fields = append(fields, fcopy)
249 | 	}
250 | 
251 | 	jc.schema = arrow.NewSchema(fields, nil)
252 | 	jc.recordBuilder = array.NewRecordBuilder(m.mem, jc.schema)
253 | 	jc.smartBuilder = NewSmartBuilder(jc.recordBuilder, jc.schema)
254 | 
255 | 	return jc, nil
256 | }
257 | 
258 | func (jc *joinFuncConfig) Release() {
259 | 	jc.recordBuilder.Release()
260 | }
261 | 
262 | func (jc *joinFuncConfig) buildDataFrame() (*DataFrame, error) {
263 | 	rec := jc.recordBuilder.NewRecord()
264 | 	defer rec.Release()
265 | 	return NewDataFrame(jc.mutator.mem, jc.schema, rec.Columns())
266 | }
267 | 
268 | // This leftJoin implementation is shared by both LeftJoin and RightJoin.
269 | // Acts like SQL in that nil elements are treated as unknown so nil != nil.
270 | func (m *Mutator) leftJoin(cfg *leftJoinConfig, leftDf *DataFrame, rightDf *DataFrame, columnNames []string) (*joinFuncConfig, error) {
271 | 	data, err := m.newJoinFuncConfig(cfg, leftDf, rightDf, columnNames, true)
272 | 	if err != nil {
273 | 		return nil, err
274 | 	}
275 | 
276 | 	sharedLeftJoinLogic(data, func(appendEmptyRow bool, leftStepValues *iterator.StepValue) {
277 | 		if appendEmptyRow {
278 | 			// If nothing matched then we append the row once with nil for additional right columns.
279 | 			cIdx := 0
280 | 
281 | 			// Add all the values from left columns
282 | 			for i := range leftStepValues.Values {
283 | 				data.smartBuilder.Append(cIdx, leftStepValues.Values[i])
284 | 				cIdx++
285 | 			}
286 | 
287 | 			for i := 0; i < data.additionalRightColsLen; i++ {
288 | 				// cIdx is the offset to the start of the additionalRightCols in smartBuilder
289 | 				data.smartBuilder.Append(cIdx+i, nil)
290 | 			}
291 | 		}
292 | 	})
293 | 
294 | 	return data, nil
295 | }
296 | 
297 | // Acts like SQL in that nil elements are treated as unknown so nil != nil.
298 | func sharedLeftJoinLogic(data *joinFuncConfig, iterationEndFunc func(bool, *iterator.StepValue)) {
299 | 	// What I want here is a step iterator for the matchingLeftCols.
300 | 	leftMatchingIterator := iterator.NewStepIteratorForColumns(data.leftColumns)
301 | 	defer leftMatchingIterator.Release()
302 | 	for leftMatchingIterator.Next() { // Iterate through every row in the left df.
303 | 		leftStepValues := leftMatchingIterator.Values()
304 | 		// If we don't find a match, we'll need to append an empty row.
305 | 		appendEmptyRow := true
306 | 
307 | 		func() {
308 | 			// What I want here is a step iterator for the matchingRightCols.
309 | 			rightMatchingIterator := iterator.NewStepIteratorForColumns(data.rightColumns)
310 | 			defer rightMatchingIterator.Release()
311 | 			for rightMatchingIterator.Next() { // Iterate through every row in the right df.
312 | 				rightStepValues := rightMatchingIterator.Values()
313 | 				match := true
314 | 
315 | 				// For each matching column,
316 | 				// check if the row on the left,
317 | 				// matches with the rows on the right.
318 | 				for columnIndex := range data.columnNames {
319 | 					match = match && stepValueEqAt(leftStepValues, rightStepValues, columnIndex)
320 | 				}
321 | 
322 | 				if match {
323 | 					// For each match, we append a new row with the
324 | 					// left columns values and the additional right column values.
325 | 					appendEmptyRow = false
326 | 
327 | 					// Keep track of the number of columns we need to offset by so we know what index we are on.
328 | 					cIdx := 0
329 | 
330 | 					// Add all the values from left columns
331 | 					for i := range leftStepValues.Values {
332 | 						data.smartBuilder.Append(cIdx, leftStepValues.Values[i])
333 | 						cIdx++
334 | 					}
335 | 
336 | 					// Do the dance we did above and append the elements to each column for additionalRightCols.
337 | 					for i := data.matchingRightColsLen; i < len(data.rightColumns); i++ {
338 | 						value := rightStepValues.Values[i]
339 | 						data.smartBuilder.Append(cIdx, value)
340 | 						cIdx++
341 | 					}
342 | 				}
343 | 			}
344 | 		}()
345 | 		iterationEndFunc(appendEmptyRow, leftStepValues)
346 | 	}
347 | }
348 | 
349 | // InnerJoin returns a DataFrame containing the inner join of two DataFrames.
350 | // Acts like SQL in that nil elements are treated as unknown so nil != nil.
351 | func (m *Mutator) InnerJoin(rightDf *DataFrame, columnNames []string, opts ...Option) MutationFunc {
352 | 	cfg, err := newLeftJoinConfig(opts...)
353 | 	return func(leftDf *DataFrame) (*DataFrame, error) {
354 | 		if err != nil {
355 | 			return nil, err
356 | 		}
357 | 
358 | 		data, err := m.newJoinFuncConfig(cfg, leftDf, rightDf, columnNames, false)
359 | 		if err != nil {
360 | 			return nil, err
361 | 		}
362 | 		defer data.Release()
363 | 
364 | 		// InnerJoin is basically LeftJoin without appending nulls in iterationEndFunc so we stub that callback.
365 | 		sharedLeftJoinLogic(data, func(bool, *iterator.StepValue) {})
366 | 
367 | 		return data.buildDataFrame()
368 | 	}
369 | }
370 | 
371 | // OuterJoin returns a DataFrame containing the outer join of two DataFrames.
372 | // Use union of keys from both frames, similar to a SQL full outer join.
373 | // Acts like SQL in that nil elements are treated as unknown so nil != nil.
374 | func (m *Mutator) OuterJoin(rightDf *DataFrame, columnNames []string, opts ...Option) MutationFunc {
375 | 	cfg, err := newLeftJoinConfig(opts...)
376 | 	return func(leftDf *DataFrame) (*DataFrame, error) {
377 | 		if err != nil {
378 | 			return nil, err
379 | 		}
380 | 
381 | 		data, err := m.leftJoin(cfg, leftDf, rightDf, columnNames)
382 | 		if err != nil {
383 | 			return nil, err
384 | 		}
385 | 		defer data.Release()
386 | 
387 | 		// Now we iterate over the right first.
388 | 		rightIterator := iterator.NewStepIteratorForColumns(data.rightColumns)
389 | 		defer rightIterator.Release()
390 | 		for rightIterator.Next() { // Iterate through every row in the right df.
391 | 			rightStepValues := rightIterator.Values()
392 | 			// If we don't find a match, we'll need to append an empty row.
393 | 
394 | 			if !outerJoinAnyRowsMatch(rightStepValues, data) {
395 | 				// Keep track of the number of columns we need to offset by so we know what index we are on.
396 | 				cIdx := 0
397 | 
398 | 				// Add all the values from right matching columns
399 | 				for i := 0; i < data.matchingRightColsLen; i++ {
400 | 					value := rightStepValues.Values[i]
401 | 					data.smartBuilder.Append(cIdx, value)
402 | 					cIdx++
403 | 				}
404 | 
405 | 				// Add nil for not matching left columns
406 | 				for i := 0; i < data.additionalLeftColsLen; i++ {
407 | 					data.smartBuilder.Append(cIdx, nil)
408 | 					cIdx++
409 | 				}
410 | 
411 | 				// Add the additional values from the right.
412 | 				for i := data.matchingRightColsLen; i < data.matchingRightColsLen+data.additionalRightColsLen; i++ {
413 | 					value := rightStepValues.Values[i]
414 | 					data.smartBuilder.Append(cIdx, value)
415 | 					cIdx++
416 | 				}
417 | 			}
418 | 		}
419 | 
420 | 		return data.buildDataFrame()
421 | 	}
422 | }
423 | 
424 | func outerJoinAnyRowsMatch(rightStepValues *iterator.StepValue, data *joinFuncConfig) bool {
425 | 	leftIterator := iterator.NewStepIteratorForColumns(data.leftColumns)
426 | 	defer leftIterator.Release()
427 | 	for leftIterator.Next() { // Iterate through every row in the left df.
428 | 		leftStepValues := leftIterator.Values()
429 | 		match := true
430 | 
431 | 		// For each matching column,
432 | 		// check if the row on the left,
433 | 		// matches with the rows on the right.
434 | 		for columnIndex := range data.columnNames {
435 | 			match = match && stepValueEqAt(leftStepValues, rightStepValues, columnIndex)
436 | 		}
437 | 
438 | 		if match {
439 | 			return true
440 | 		}
441 | 	}
442 | 
443 | 	return false
444 | }
445 | 
446 | // CrossJoin returns a DataFrame containing the cross join of two DataFrames.
447 | func (m *Mutator) CrossJoin(rightDf *DataFrame, opts ...Option) MutationFunc {
448 | 	cfg, err := newLeftJoinConfig(opts...)
449 | 	return func(leftDf *DataFrame) (*DataFrame, error) {
450 | 		if err != nil {
451 | 			return nil, err
452 | 		}
453 | 
454 | 		data, err := m.newJoinFuncConfig(cfg, leftDf, rightDf, nil, false)
455 | 		if err != nil {
456 | 			return nil, err
457 | 		}
458 | 		defer data.Release()
459 | 
460 | 		leftMatchingIterator := iterator.NewStepIteratorForColumns(data.leftColumns)
461 | 		defer leftMatchingIterator.Release()
462 | 		for leftMatchingIterator.Next() { // Iterate through every row in the left df.
463 | 			leftStepValues := leftMatchingIterator.Values()
464 | 
465 | 			func() {
466 | 				rightMatchingIterator := iterator.NewStepIteratorForColumns(data.rightColumns)
467 | 				defer rightMatchingIterator.Release()
468 | 				for rightMatchingIterator.Next() { // Iterate through every row in the right df.
469 | 					rightStepValues := rightMatchingIterator.Values()
470 | 
471 | 					cIdx := 0
472 | 
473 | 					// Add all columns from both frames.
474 | 					for i := range leftStepValues.Values {
475 | 						data.smartBuilder.Append(cIdx, leftStepValues.Values[i])
476 | 						cIdx++
477 | 					}
478 | 					for i := range rightStepValues.Values {
479 | 						data.smartBuilder.Append(cIdx, rightStepValues.Values[i])
480 | 						cIdx++
481 | 					}
482 | 				}
483 | 			}()
484 | 		}
485 | 
486 | 		return data.buildDataFrame()
487 | 	}
488 | }
489 | 
490 | func stepValueEqAt(left *iterator.StepValue, right *iterator.StepValue, i int) bool {
491 | 	lElem := StepValueElementAt(left, i)
492 | 	rElem := StepValueElementAt(right, i)
493 | 
494 | 	v, err := lElem.Eq(rElem)
495 | 	if err != nil {
496 | 		panic(err)
497 | 	}
498 | 
499 | 	return v
500 | }
501 | 


--------------------------------------------------------------------------------
/dataframe/smartbuilder.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 
  7 | 	"github.com/apache/arrow/go/arrow"
  8 | 	"github.com/apache/arrow/go/arrow/array"
  9 | )
 10 | 
 11 | // AppenderFunc is the function to be used to convert the data to the correct type.
 12 | type AppenderFunc func(array.Builder, interface{})
 13 | 
 14 | // SmartBuilder knows how to convert to the correct type when building.
 15 | type SmartBuilder struct {
 16 | 	recordBuilder  *array.RecordBuilder
 17 | 	schema         *arrow.Schema
 18 | 	fieldAppenders []AppenderFunc
 19 | }
 20 | 
 21 | // NewSmartBuilder creates a SmartBuilder that knows how to convert to the correct type when building.
 22 | func NewSmartBuilder(recordBuilder *array.RecordBuilder, schema *arrow.Schema) *SmartBuilder {
 23 | 	sb := &SmartBuilder{
 24 | 		recordBuilder:  recordBuilder,
 25 | 		schema:         schema,
 26 | 		fieldAppenders: make([]AppenderFunc, 0, len(schema.Fields())),
 27 | 	}
 28 | 
 29 | 	fields := sb.schema.Fields()
 30 | 	for i := range fields {
 31 | 		fn := initFieldAppender(&fields[i])
 32 | 		sb.fieldAppenders = append(sb.fieldAppenders, fn)
 33 | 	}
 34 | 
 35 | 	return sb
 36 | }
 37 | 
 38 | // Append will append the value to the builder.
 39 | func (sb *SmartBuilder) Append(fieldIndex int, v interface{}) {
 40 | 	field := sb.recordBuilder.Field(fieldIndex)
 41 | 	appendFunc := sb.fieldAppenders[fieldIndex]
 42 | 	if appendFunc == nil {
 43 | 		fmt.Fprintln(os.Stderr, "warn: appendFunc is nil")
 44 | 	}
 45 | 	appendFunc(field, v)
 46 | }
 47 | 
 48 | func initFieldAppender(field *arrow.Field) AppenderFunc {
 49 | 	switch field.Type.(type) {
 50 | 	case *arrow.BooleanType:
 51 | 		return func(field array.Builder, v interface{}) {
 52 | 			builder := field.(*array.BooleanBuilder)
 53 | 			if v == nil {
 54 | 				builder.AppendNull()
 55 | 			} else {
 56 | 				vT := v.(bool)
 57 | 				builder.Append(vT)
 58 | 			}
 59 | 		}
 60 | 	case *arrow.Int8Type:
 61 | 		return func(field array.Builder, v interface{}) {
 62 | 			builder := field.(*array.Int8Builder)
 63 | 			if v == nil {
 64 | 				builder.AppendNull()
 65 | 			} else {
 66 | 				vT := v.(int8)
 67 | 				builder.Append(vT)
 68 | 			}
 69 | 		}
 70 | 	case *arrow.Int16Type:
 71 | 		return func(field array.Builder, v interface{}) {
 72 | 			builder := field.(*array.Int16Builder)
 73 | 			if v == nil {
 74 | 				builder.AppendNull()
 75 | 			} else {
 76 | 				vT := v.(int16)
 77 | 				builder.Append(vT)
 78 | 			}
 79 | 		}
 80 | 	case *arrow.Int32Type:
 81 | 		return func(field array.Builder, v interface{}) {
 82 | 			builder := field.(*array.Int32Builder)
 83 | 			if v == nil {
 84 | 				builder.AppendNull()
 85 | 			} else {
 86 | 				vT := v.(int32)
 87 | 				builder.Append(vT)
 88 | 			}
 89 | 		}
 90 | 	case *arrow.Int64Type:
 91 | 		return func(field array.Builder, v interface{}) {
 92 | 			builder := field.(*array.Int64Builder)
 93 | 			if v == nil {
 94 | 				builder.AppendNull()
 95 | 			} else {
 96 | 				vT := v.(int64)
 97 | 				builder.Append(vT)
 98 | 			}
 99 | 		}
100 | 	case *arrow.Uint8Type:
101 | 		return func(field array.Builder, v interface{}) {
102 | 			builder := field.(*array.Uint8Builder)
103 | 			if v == nil {
104 | 				builder.AppendNull()
105 | 			} else {
106 | 				vT := v.(uint8)
107 | 				builder.Append(vT)
108 | 			}
109 | 		}
110 | 	case *arrow.Uint16Type:
111 | 		return func(field array.Builder, v interface{}) {
112 | 			builder := field.(*array.Uint16Builder)
113 | 			if v == nil {
114 | 				builder.AppendNull()
115 | 			} else {
116 | 				vT := v.(uint16)
117 | 				builder.Append(vT)
118 | 			}
119 | 		}
120 | 	case *arrow.Uint32Type:
121 | 		return func(field array.Builder, v interface{}) {
122 | 			builder := field.(*array.Uint32Builder)
123 | 			if v == nil {
124 | 				builder.AppendNull()
125 | 			} else {
126 | 				vT := v.(uint32)
127 | 				builder.Append(vT)
128 | 			}
129 | 		}
130 | 	case *arrow.Uint64Type:
131 | 		return func(field array.Builder, v interface{}) {
132 | 			builder := field.(*array.Uint64Builder)
133 | 			if v == nil {
134 | 				builder.AppendNull()
135 | 			} else {
136 | 				vT := v.(uint64)
137 | 				builder.Append(vT)
138 | 			}
139 | 		}
140 | 	case *arrow.Float32Type:
141 | 		return func(field array.Builder, v interface{}) {
142 | 			builder := field.(*array.Float32Builder)
143 | 			if v == nil {
144 | 				builder.AppendNull()
145 | 			} else {
146 | 				vT := v.(float32)
147 | 				builder.Append(vT)
148 | 			}
149 | 		}
150 | 	case *arrow.Float64Type:
151 | 		return func(field array.Builder, v interface{}) {
152 | 			builder := field.(*array.Float64Builder)
153 | 			if v == nil {
154 | 				builder.AppendNull()
155 | 			} else {
156 | 				vT := v.(float64)
157 | 				builder.Append(vT)
158 | 			}
159 | 		}
160 | 	case *arrow.StringType:
161 | 		return func(field array.Builder, v interface{}) {
162 | 			builder := field.(*array.StringBuilder)
163 | 			if v == nil {
164 | 				builder.AppendNull()
165 | 			} else {
166 | 				vT := v.(string)
167 | 				builder.Append(vT)
168 | 			}
169 | 		}
170 | 
171 | 	default:
172 | 		panic(fmt.Errorf("dataframe/smartbuilder: unhandled field type %T", field.Type))
173 | 	}
174 | }
175 | 


--------------------------------------------------------------------------------
/dataframe/smartbuilder_test.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/apache/arrow/go/arrow"
  8 | 	"github.com/apache/arrow/go/arrow/array"
  9 | 	"github.com/apache/arrow/go/arrow/memory"
 10 | )
 11 | 
 12 | func TestNewSmartBuilder(t *testing.T) {
 13 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 14 | 	defer pool.AssertSize(t, 0)
 15 | 
 16 | 	schema := arrow.NewSchema(
 17 | 		[]arrow.Field{
 18 | 			{Name: COL0NAME, Type: arrow.PrimitiveTypes.Int32},
 19 | 			{Name: COL1NAME, Type: arrow.PrimitiveTypes.Float64},
 20 | 		},
 21 | 		nil,
 22 | 	)
 23 | 
 24 | 	b := array.NewRecordBuilder(pool, schema)
 25 | 	defer b.Release()
 26 | 
 27 | 	smartBuilder := NewSmartBuilder(b, schema)
 28 | 
 29 | 	int32Vals := []int32{1, 2, 3, 4, 5, 6, 7, 8, 9}
 30 | 	for _, v := range int32Vals {
 31 | 		smartBuilder.Append(0, v)
 32 | 	}
 33 | 	smartBuilder.Append(0, nil)
 34 | 
 35 | 	float64Vals := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}
 36 | 	for _, v := range float64Vals {
 37 | 		smartBuilder.Append(1, v)
 38 | 	}
 39 | 	smartBuilder.Append(1, nil)
 40 | 
 41 | 	rec1 := b.NewRecord()
 42 | 	defer rec1.Release()
 43 | 
 44 | 	cols := make([]array.Column, 0, len(rec1.Columns()))
 45 | 	for i, cI := range rec1.Columns() {
 46 | 		field := rec1.Schema().Field(i)
 47 | 		chunk := array.NewChunked(field.Type, []array.Interface{cI})
 48 | 		col := array.NewColumn(field, chunk)
 49 | 		defer col.Release()
 50 | 		cols = append(cols, *col)
 51 | 		chunk.Release()
 52 | 	}
 53 | 
 54 | 	df, err := NewDataFrameFromColumns(pool, cols)
 55 | 	if err != nil {
 56 | 		t.Fatal(err)
 57 | 	}
 58 | 	defer df.Release()
 59 | 
 60 | 	got := df.Display(-1)
 61 | 	want := `rec[0]["f1-i32"]: [1 2 3 4 5 6 7 8 9 (null)]
 62 | rec[0]["f2-f64"]: [1 2 3 4 5 6 7 8 9 (null)]
 63 | `
 64 | 
 65 | 	if got != want {
 66 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
 67 | 	}
 68 | }
 69 | 
 70 | func buildDf(pool *memory.CheckedAllocator, dtype arrow.DataType, vals []interface{}) (*DataFrame, error) {
 71 | 	schema := arrow.NewSchema(
 72 | 		[]arrow.Field{
 73 | 			{Name: fmt.Sprintf("col-%s", dtype.Name()), Type: dtype},
 74 | 		},
 75 | 		nil,
 76 | 	)
 77 | 
 78 | 	b := array.NewRecordBuilder(pool, schema)
 79 | 	defer b.Release()
 80 | 
 81 | 	smartBuilder := NewSmartBuilder(b, schema)
 82 | 	for i := range schema.Fields() {
 83 | 		for j := range vals {
 84 | 			smartBuilder.Append(i, vals[j])
 85 | 		}
 86 | 		smartBuilder.Append(i, nil)
 87 | 	}
 88 | 
 89 | 	rec1 := b.NewRecord()
 90 | 	defer rec1.Release()
 91 | 
 92 | 	cols := make([]array.Column, 0, len(rec1.Columns()))
 93 | 	for i, cI := range rec1.Columns() {
 94 | 		field := rec1.Schema().Field(i)
 95 | 		chunk := array.NewChunked(field.Type, []array.Interface{cI})
 96 | 		col := array.NewColumn(field, chunk)
 97 | 		defer col.Release()
 98 | 		cols = append(cols, *col)
 99 | 		chunk.Release()
100 | 	}
101 | 
102 | 	return NewDataFrameFromColumns(pool, cols)
103 | }
104 | 
105 | func TestNewSmartBuilderBoolean(t *testing.T) {
106 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
107 | 	defer pool.AssertSize(t, 0)
108 | 
109 | 	vals := make([]interface{}, 9)
110 | 	for i := range vals {
111 | 		vals[i] = (i%2 == 0)
112 | 	}
113 | 	df, err := buildDf(pool, arrow.FixedWidthTypes.Boolean, vals)
114 | 	if err != nil {
115 | 		t.Fatal(err)
116 | 	}
117 | 	defer df.Release()
118 | 
119 | 	got := df.Display(-1)
120 | 	want := `rec[0]["col-bool"]: [true false true false true false true false true (null)]
121 | `
122 | 
123 | 	if got != want {
124 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
125 | 	}
126 | }
127 | 
128 | func TestNewSmartBuilderInt8(t *testing.T) {
129 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
130 | 	defer pool.AssertSize(t, 0)
131 | 
132 | 	vals := make([]interface{}, 9)
133 | 	for i := range vals {
134 | 		vals[i] = int8(i)
135 | 	}
136 | 	df, err := buildDf(pool, arrow.PrimitiveTypes.Int8, vals)
137 | 	if err != nil {
138 | 		t.Fatal(err)
139 | 	}
140 | 	defer df.Release()
141 | 
142 | 	got := df.Display(-1)
143 | 	want := `rec[0]["col-int8"]: [0 1 2 3 4 5 6 7 8 (null)]
144 | `
145 | 
146 | 	if got != want {
147 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
148 | 	}
149 | }
150 | 
151 | func TestNewSmartBuilderInt16(t *testing.T) {
152 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
153 | 	defer pool.AssertSize(t, 0)
154 | 
155 | 	vals := make([]interface{}, 9)
156 | 	for i := range vals {
157 | 		vals[i] = int16(i)
158 | 	}
159 | 	df, err := buildDf(pool, arrow.PrimitiveTypes.Int16, vals)
160 | 	if err != nil {
161 | 		t.Fatal(err)
162 | 	}
163 | 	defer df.Release()
164 | 
165 | 	got := df.Display(-1)
166 | 	want := `rec[0]["col-int16"]: [0 1 2 3 4 5 6 7 8 (null)]
167 | `
168 | 
169 | 	if got != want {
170 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
171 | 	}
172 | }
173 | 
174 | func TestNewSmartBuilderInt32(t *testing.T) {
175 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
176 | 	defer pool.AssertSize(t, 0)
177 | 
178 | 	vals := make([]interface{}, 9)
179 | 	for i := range vals {
180 | 		vals[i] = int32(i)
181 | 	}
182 | 	df, err := buildDf(pool, arrow.PrimitiveTypes.Int32, vals)
183 | 	if err != nil {
184 | 		t.Fatal(err)
185 | 	}
186 | 	defer df.Release()
187 | 
188 | 	got := df.Display(-1)
189 | 	want := `rec[0]["col-int32"]: [0 1 2 3 4 5 6 7 8 (null)]
190 | `
191 | 
192 | 	if got != want {
193 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
194 | 	}
195 | }
196 | 
197 | func TestNewSmartBuilderInt64(t *testing.T) {
198 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
199 | 	defer pool.AssertSize(t, 0)
200 | 
201 | 	vals := make([]interface{}, 9)
202 | 	for i := range vals {
203 | 		vals[i] = int64(i)
204 | 	}
205 | 	df, err := buildDf(pool, arrow.PrimitiveTypes.Int64, vals)
206 | 	if err != nil {
207 | 		t.Fatal(err)
208 | 	}
209 | 	defer df.Release()
210 | 
211 | 	got := df.Display(-1)
212 | 	want := `rec[0]["col-int64"]: [0 1 2 3 4 5 6 7 8 (null)]
213 | `
214 | 
215 | 	if got != want {
216 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
217 | 	}
218 | }
219 | 
220 | func TestNewSmartBuilderUint8(t *testing.T) {
221 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
222 | 	defer pool.AssertSize(t, 0)
223 | 
224 | 	vals := make([]interface{}, 9)
225 | 	for i := range vals {
226 | 		vals[i] = uint8(i)
227 | 	}
228 | 	df, err := buildDf(pool, arrow.PrimitiveTypes.Uint8, vals)
229 | 	if err != nil {
230 | 		t.Fatal(err)
231 | 	}
232 | 	defer df.Release()
233 | 
234 | 	got := df.Display(-1)
235 | 	want := `rec[0]["col-uint8"]: [0 1 2 3 4 5 6 7 8 (null)]
236 | `
237 | 
238 | 	if got != want {
239 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
240 | 	}
241 | }
242 | func TestNewSmartBuilderUint16(t *testing.T) {
243 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
244 | 	defer pool.AssertSize(t, 0)
245 | 
246 | 	vals := make([]interface{}, 9)
247 | 	for i := range vals {
248 | 		vals[i] = uint16(i)
249 | 	}
250 | 	df, err := buildDf(pool, arrow.PrimitiveTypes.Uint16, vals)
251 | 	if err != nil {
252 | 		t.Fatal(err)
253 | 	}
254 | 	defer df.Release()
255 | 
256 | 	got := df.Display(-1)
257 | 	want := `rec[0]["col-uint16"]: [0 1 2 3 4 5 6 7 8 (null)]
258 | `
259 | 
260 | 	if got != want {
261 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
262 | 	}
263 | }
264 | 
265 | func TestNewSmartBuilderUint32(t *testing.T) {
266 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
267 | 	defer pool.AssertSize(t, 0)
268 | 
269 | 	vals := make([]interface{}, 9)
270 | 	for i := range vals {
271 | 		vals[i] = uint32(i)
272 | 	}
273 | 	df, err := buildDf(pool, arrow.PrimitiveTypes.Uint32, vals)
274 | 	if err != nil {
275 | 		t.Fatal(err)
276 | 	}
277 | 	defer df.Release()
278 | 
279 | 	got := df.Display(-1)
280 | 	want := `rec[0]["col-uint32"]: [0 1 2 3 4 5 6 7 8 (null)]
281 | `
282 | 
283 | 	if got != want {
284 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
285 | 	}
286 | }
287 | 
288 | func TestNewSmartBuilderUint64(t *testing.T) {
289 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
290 | 	defer pool.AssertSize(t, 0)
291 | 
292 | 	vals := make([]interface{}, 9)
293 | 	for i := range vals {
294 | 		vals[i] = uint64(i)
295 | 	}
296 | 	df, err := buildDf(pool, arrow.PrimitiveTypes.Uint64, vals)
297 | 	if err != nil {
298 | 		t.Fatal(err)
299 | 	}
300 | 	defer df.Release()
301 | 
302 | 	got := df.Display(-1)
303 | 	want := `rec[0]["col-uint64"]: [0 1 2 3 4 5 6 7 8 (null)]
304 | `
305 | 
306 | 	if got != want {
307 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
308 | 	}
309 | }
310 | 
311 | func TestNewSmartBuilderFloat32(t *testing.T) {
312 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
313 | 	defer pool.AssertSize(t, 0)
314 | 
315 | 	vals := make([]interface{}, 9)
316 | 	for i := range vals {
317 | 		vals[i] = float32(i)
318 | 	}
319 | 	df, err := buildDf(pool, arrow.PrimitiveTypes.Float32, vals)
320 | 	if err != nil {
321 | 		t.Fatal(err)
322 | 	}
323 | 	defer df.Release()
324 | 
325 | 	got := df.Display(-1)
326 | 	want := `rec[0]["col-float32"]: [0 1 2 3 4 5 6 7 8 (null)]
327 | `
328 | 
329 | 	if got != want {
330 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
331 | 	}
332 | }
333 | 
334 | func TestNewSmartBuilderFloat64(t *testing.T) {
335 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
336 | 	defer pool.AssertSize(t, 0)
337 | 
338 | 	vals := make([]interface{}, 9)
339 | 	for i := range vals {
340 | 		vals[i] = float64(i)
341 | 	}
342 | 	df, err := buildDf(pool, arrow.PrimitiveTypes.Float64, vals)
343 | 	if err != nil {
344 | 		t.Fatal(err)
345 | 	}
346 | 	defer df.Release()
347 | 
348 | 	got := df.Display(-1)
349 | 	want := `rec[0]["col-float64"]: [0 1 2 3 4 5 6 7 8 (null)]
350 | `
351 | 
352 | 	if got != want {
353 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
354 | 	}
355 | }
356 | 
357 | func TestNewSmartBuilderString(t *testing.T) {
358 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
359 | 	defer pool.AssertSize(t, 0)
360 | 
361 | 	vals := make([]interface{}, 9)
362 | 	for i := range vals {
363 | 		vals[i] = fmt.Sprintf("%d", i)
364 | 	}
365 | 	df, err := buildDf(pool, arrow.BinaryTypes.String, vals)
366 | 	if err != nil {
367 | 		t.Fatal(err)
368 | 	}
369 | 	defer df.Release()
370 | 
371 | 	got := df.Display(-1)
372 | 	want := `rec[0]["col-utf8"]: ["0" "1" "2" "3" "4" "5" "6" "7" "8" (null)]
373 | `
374 | 
375 | 	if got != want {
376 | 		t.Fatalf("\ngot=\n%v\nwant=\n%v", got, want)
377 | 	}
378 | }
379 | 


--------------------------------------------------------------------------------
/dataframe/stepvalue.go:
--------------------------------------------------------------------------------
 1 | package dataframe
 2 | 
 3 | import (
 4 | 	"github.com/go-bullseye/bullseye/iterator"
 5 | )
 6 | 
 7 | // StepValueElementAt gets the value at i from the StepValue and casts it to an Element.
 8 | func StepValueElementAt(stepValue *iterator.StepValue, i int) Element {
 9 | 	stepValueEl, dtype := stepValue.Value(i)
10 | 	return CastElement(dtype, stepValueEl)
11 | }
12 | 


--------------------------------------------------------------------------------
/dataframe/tablefacade.go:
--------------------------------------------------------------------------------
 1 | package dataframe
 2 | 
 3 | import (
 4 | 	"github.com/apache/arrow/go/arrow"
 5 | 	"github.com/apache/arrow/go/arrow/array"
 6 | )
 7 | 
 8 | // I don't want to force the DataFrame API to conform to the TableReader API.
 9 | // (i.e. forcing NumCols to return int64 doesn't make sense in Go).
10 | // So this is a facade the DataFrame TableReader expects.
11 | 
12 | // TableFacade is a simple facade for a TableReader.
13 | type TableFacade interface {
14 | 	array.Table
15 | }
16 | 
17 | type tableReaderFacade struct {
18 | 	df *DataFrame
19 | }
20 | 
21 | // NewTableFacade creates a new TableFacade for a DataFrame.
22 | func NewTableFacade(df *DataFrame) TableFacade {
23 | 	return &tableReaderFacade{
24 | 		df: df,
25 | 	}
26 | }
27 | 
28 | func (f *tableReaderFacade) Schema() *arrow.Schema {
29 | 	return f.df.Schema()
30 | }
31 | 
32 | func (f *tableReaderFacade) NumRows() int64 {
33 | 	return f.df.NumRows()
34 | }
35 | 
36 | func (f *tableReaderFacade) NumCols() int64 {
37 | 	return int64(f.df.NumCols())
38 | }
39 | 
40 | // Column is an immutable column data structure consisting of
41 | // a field (type metadata) and a chunked data array.
42 | func (f *tableReaderFacade) Column(i int) *array.Column {
43 | 	return f.df.ColumnAt(i)
44 | }
45 | 
46 | func (f *tableReaderFacade) Retain() {
47 | 	f.df.Retain()
48 | }
49 | 
50 | func (f *tableReaderFacade) Release() {
51 | 	f.df.Release()
52 | }
53 | 


--------------------------------------------------------------------------------
/datatype_numeric.gen.go.tmpldata:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "Name": "Int8",
 4 |     "Type": "int8",
 5 |     "Size": 8
 6 |   },
 7 |   {
 8 |     "Name": "Int16",
 9 |     "Type": "int16",
10 |     "Size": 16
11 |   },
12 |   {
13 |     "Name": "Int32",
14 |     "Type": "int32",
15 |     "Size": 32
16 |   },
17 |   {
18 |     "Name": "Int64",
19 |     "Type": "int64",
20 |     "Size": 64
21 |   },
22 |   {
23 |     "Name": "Uint8",
24 |     "Type": "uint8",
25 |     "Size": 8
26 |   },
27 |   {
28 |     "Name": "Uint16",
29 |     "Type": "uint16",
30 |     "Size": 16
31 |   },
32 |   {
33 |     "Name": "Uint32",
34 |     "Type": "uint32",
35 |     "Size": 32
36 |   },
37 |   {
38 |     "Name": "Uint64",
39 |     "Type": "uint64",
40 |     "Size": 64
41 |   },
42 |   {
43 |     "Name": "Float32",
44 |     "Type": "float32",
45 |     "Size": 32
46 |   },
47 |   {
48 |     "Name": "Float64",
49 |     "Type": "float64",
50 |     "Size": 64
51 |   },
52 |   {
53 |     "Name": "Date32",
54 |     "Type": "date32",
55 |     "Size": 32
56 |   },
57 |   {
58 |     "Name": "Date64",
59 |     "Type": "date64",
60 |     "Size": 64
61 |   }
62 | ]
63 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Package bullseye provides an implementation of a DataFrame using Apache Arrow.
 3 | 
 4 | Basics
 5 | 
 6 | The DataFrame is an immutable heterogeneous tabular data structure with labeled columns.
 7 | It stores it's raw bytes using a provided Arrow Allocator by using the fundamental data
 8 | structure of Array (columns), which holds a sequence of values of the same type. An array
 9 | consists of memory holding the data and an additional validity bitmap that indicates if
10 | the corresponding entry in the array is valid (not null).
11 | 
12 | Any DataFrames created should be released using Release() to decrement the reference
13 | and free up the memory managed by the Arrow implementation.
14 | 
15 | Getting Started
16 | 
17 | Look in the dataframe package to get started.
18 | */
19 | package bullseye


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/go-bullseye/bullseye
2 | 
3 | go 1.12
4 | 
5 | require (
6 | 	github.com/apache/arrow/go/arrow v0.0.0-20190615061817-720be32a0bb5
7 | 	github.com/pkg/errors v0.8.1
8 | )
9 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/apache/arrow/go/arrow v0.0.0-20190615061817-720be32a0bb5 h1:EGCJTEx+tkmZuz6Wbc0zkA+Dgf7UXKu+126krteiZJQ=
 2 | github.com/apache/arrow/go/arrow v0.0.0-20190615061817-720be32a0bb5/go.mod h1:NG5SvIQXIxzJR5lGmoXTX9R/EmkArKbPPFu0DUFSz10=
 3 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
 4 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 5 | github.com/google/flatbuffers v1.11.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
 6 | github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
 7 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 8 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 9 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
10 | github.com/stretchr/testify v1.2.0 h1:LThGCOvhuJic9Gyd1VBCkhyUXmO8vKaBFvBsJ2k03rg=
11 | github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
12 | 


--------------------------------------------------------------------------------
/internal/cast/dense.go:
--------------------------------------------------------------------------------
  1 | package cast
  2 | 
  3 | import "github.com/pkg/errors"
  4 | 
  5 | // DenseCollectionToInterface casts a slice of interfaces to an interface of the correct type.
  6 | func DenseCollectionToInterface(elms []interface{}) (interface{}, error) {
  7 | 	if len(elms) == 0 {
  8 | 		return nil, nil
  9 | 	}
 10 | 
 11 | 	// find the first one that is not nil
 12 | 	var first interface{}
 13 | 	for i := range elms {
 14 | 		if elms[i] != nil {
 15 | 			first = elms[i]
 16 | 			break
 17 | 		}
 18 | 	}
 19 | 
 20 | 	if first == nil {
 21 | 		return nil, nil
 22 | 	}
 23 | 
 24 | 	var ok bool
 25 | 	switch v := first.(type) {
 26 | 	case bool:
 27 | 		arr := make([]bool, len(elms))
 28 | 		for i, e := range elms {
 29 | 			if e == nil {
 30 | 				continue
 31 | 			}
 32 | 			if arr[i], ok = e.(bool); !ok {
 33 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 34 | 			}
 35 | 		}
 36 | 		return arr, nil
 37 | 
 38 | 	case int8:
 39 | 		arr := make([]int8, len(elms))
 40 | 		for i, e := range elms {
 41 | 			if e == nil {
 42 | 				continue
 43 | 			}
 44 | 			if arr[i], ok = e.(int8); !ok {
 45 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 46 | 			}
 47 | 		}
 48 | 		return arr, nil
 49 | 
 50 | 	case int16:
 51 | 		arr := make([]int16, len(elms))
 52 | 		for i, e := range elms {
 53 | 			if e == nil {
 54 | 				continue
 55 | 			}
 56 | 			if arr[i], ok = e.(int16); !ok {
 57 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 58 | 			}
 59 | 		}
 60 | 		return arr, nil
 61 | 
 62 | 	case int32:
 63 | 		arr := make([]int32, len(elms))
 64 | 		for i, e := range elms {
 65 | 			if e == nil {
 66 | 				continue
 67 | 			}
 68 | 			if arr[i], ok = e.(int32); !ok {
 69 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 70 | 			}
 71 | 		}
 72 | 		return arr, nil
 73 | 
 74 | 	case int64:
 75 | 		arr := make([]int64, len(elms))
 76 | 		for i, e := range elms {
 77 | 			if e == nil {
 78 | 				continue
 79 | 			}
 80 | 			if arr[i], ok = e.(int64); !ok {
 81 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 82 | 			}
 83 | 		}
 84 | 		return arr, nil
 85 | 
 86 | 	case uint8:
 87 | 		arr := make([]uint8, len(elms))
 88 | 		for i, e := range elms {
 89 | 			if e == nil {
 90 | 				continue
 91 | 			}
 92 | 			if arr[i], ok = e.(uint8); !ok {
 93 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 94 | 			}
 95 | 		}
 96 | 		return arr, nil
 97 | 
 98 | 	case uint16:
 99 | 		arr := make([]uint16, len(elms))
100 | 		for i, e := range elms {
101 | 			if e == nil {
102 | 				continue
103 | 			}
104 | 			if arr[i], ok = e.(uint16); !ok {
105 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
106 | 			}
107 | 		}
108 | 		return arr, nil
109 | 
110 | 	case uint32:
111 | 		arr := make([]uint32, len(elms))
112 | 		for i, e := range elms {
113 | 			if e == nil {
114 | 				continue
115 | 			}
116 | 			if arr[i], ok = e.(uint32); !ok {
117 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
118 | 			}
119 | 		}
120 | 		return arr, nil
121 | 
122 | 	case uint64:
123 | 		arr := make([]uint64, len(elms))
124 | 		for i, e := range elms {
125 | 			if e == nil {
126 | 				continue
127 | 			}
128 | 			if arr[i], ok = e.(uint64); !ok {
129 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
130 | 			}
131 | 		}
132 | 		return arr, nil
133 | 
134 | 	case float32:
135 | 		arr := make([]float32, len(elms))
136 | 		for i, e := range elms {
137 | 			if e == nil {
138 | 				continue
139 | 			}
140 | 			if arr[i], ok = e.(float32); !ok {
141 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
142 | 			}
143 | 		}
144 | 		return arr, nil
145 | 
146 | 	case float64:
147 | 		arr := make([]float64, len(elms))
148 | 		for i, e := range elms {
149 | 			if e == nil {
150 | 				continue
151 | 			}
152 | 			if arr[i], ok = e.(float64); !ok {
153 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
154 | 			}
155 | 		}
156 | 		return arr, nil
157 | 
158 | 	case string:
159 | 		arr := make([]string, len(elms))
160 | 		for i, e := range elms {
161 | 			if e == nil {
162 | 				continue
163 | 			}
164 | 			if arr[i], ok = e.(string); !ok {
165 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
166 | 			}
167 | 		}
168 | 		return arr, nil
169 | 
170 | 	case uint:
171 | 		arr := make([]uint, len(elms))
172 | 		for i, e := range elms {
173 | 			if e == nil {
174 | 				continue
175 | 			}
176 | 			if arr[i], ok = e.(uint); !ok {
177 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
178 | 			}
179 | 		}
180 | 		return arr, nil
181 | 
182 | 	case int:
183 | 		arr := make([]int64, len(elms))
184 | 		for i, e := range elms {
185 | 			if e == nil {
186 | 				continue
187 | 			}
188 | 			vv, okk := e.(int)
189 | 			if !okk {
190 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
191 | 			}
192 | 			arr[i] = int64(vv)
193 | 		}
194 | 		return arr, nil
195 | 
196 | 	default:
197 | 		return nil, errors.Errorf("dataframe/dense: invalid data type for %v (%T)", elms, v)
198 | 	}
199 | }
200 | 


--------------------------------------------------------------------------------
/internal/cast/doc.go:
--------------------------------------------------------------------------------
1 | /*
2 | Package cast provides casting for sparse and dense arrays.
3 | 
4 | */
5 | package cast
6 | 


--------------------------------------------------------------------------------
/internal/cast/sparse.go:
--------------------------------------------------------------------------------
  1 | package cast
  2 | 
  3 | import "github.com/pkg/errors"
  4 | 
  5 | const inconsistentDataTypesErrMsg = "inconsistent data types for elements, expecting %v to be of type (%T)"
  6 | 
  7 | // SparseCollectionToInterface casts a slice of interfaces to an interface of the correct type
  8 | // for the provided sparse collection.
  9 | // This should be used for sparse as it should be faster for larger arrays.
 10 | func SparseCollectionToInterface(elms []interface{}, indexes []int, size int) (interface{}, error) {
 11 | 	if len(elms) == 0 {
 12 | 		return nil, nil
 13 | 	}
 14 | 
 15 | 	first := elms[0]
 16 | 
 17 | 	var ok bool
 18 | 	switch v := first.(type) {
 19 | 	case bool:
 20 | 		arr := make([]bool, size)
 21 | 		for i, idx := range indexes {
 22 | 			e := elms[i]
 23 | 			if e == nil {
 24 | 				continue
 25 | 			}
 26 | 			if arr[idx], ok = e.(bool); !ok {
 27 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 28 | 			}
 29 | 		}
 30 | 		return arr, nil
 31 | 
 32 | 	case int8:
 33 | 		arr := make([]int8, size)
 34 | 		for i, idx := range indexes {
 35 | 			e := elms[i]
 36 | 			if e == nil {
 37 | 				continue
 38 | 			}
 39 | 			if arr[idx], ok = e.(int8); !ok {
 40 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 41 | 			}
 42 | 		}
 43 | 		return arr, nil
 44 | 
 45 | 	case int16:
 46 | 		arr := make([]int16, size)
 47 | 		for i, idx := range indexes {
 48 | 			e := elms[i]
 49 | 			if e == nil {
 50 | 				continue
 51 | 			}
 52 | 			if arr[idx], ok = e.(int16); !ok {
 53 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 54 | 			}
 55 | 		}
 56 | 		return arr, nil
 57 | 
 58 | 	case int32:
 59 | 		arr := make([]int32, size)
 60 | 		for i, idx := range indexes {
 61 | 			e := elms[i]
 62 | 			if e == nil {
 63 | 				continue
 64 | 			}
 65 | 			if arr[idx], ok = e.(int32); !ok {
 66 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 67 | 			}
 68 | 		}
 69 | 		return arr, nil
 70 | 
 71 | 	case int64:
 72 | 		arr := make([]int64, size)
 73 | 		for i, idx := range indexes {
 74 | 			e := elms[i]
 75 | 			if e == nil {
 76 | 				continue
 77 | 			}
 78 | 			if arr[idx], ok = e.(int64); !ok {
 79 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 80 | 			}
 81 | 		}
 82 | 		return arr, nil
 83 | 
 84 | 	case uint8:
 85 | 		arr := make([]uint8, size)
 86 | 		for i, idx := range indexes {
 87 | 			e := elms[i]
 88 | 			if e == nil {
 89 | 				continue
 90 | 			}
 91 | 			if arr[idx], ok = e.(uint8); !ok {
 92 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
 93 | 			}
 94 | 		}
 95 | 		return arr, nil
 96 | 
 97 | 	case uint16:
 98 | 		arr := make([]uint16, size)
 99 | 		for i, idx := range indexes {
100 | 			e := elms[i]
101 | 			if e == nil {
102 | 				continue
103 | 			}
104 | 			if arr[idx], ok = e.(uint16); !ok {
105 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
106 | 			}
107 | 		}
108 | 		return arr, nil
109 | 
110 | 	case uint32:
111 | 		arr := make([]uint32, size)
112 | 		for i, idx := range indexes {
113 | 			e := elms[i]
114 | 			if e == nil {
115 | 				continue
116 | 			}
117 | 			if arr[idx], ok = e.(uint32); !ok {
118 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
119 | 			}
120 | 		}
121 | 		return arr, nil
122 | 
123 | 	case uint64:
124 | 		arr := make([]uint64, size)
125 | 		for i, idx := range indexes {
126 | 			e := elms[i]
127 | 			if e == nil {
128 | 				continue
129 | 			}
130 | 			if arr[idx], ok = e.(uint64); !ok {
131 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
132 | 			}
133 | 		}
134 | 		return arr, nil
135 | 
136 | 	case float32:
137 | 		arr := make([]float32, size)
138 | 		for i, idx := range indexes {
139 | 			e := elms[i]
140 | 			if e == nil {
141 | 				continue
142 | 			}
143 | 			if arr[idx], ok = e.(float32); !ok {
144 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
145 | 			}
146 | 		}
147 | 		return arr, nil
148 | 
149 | 	case float64:
150 | 		arr := make([]float64, size)
151 | 		for i, idx := range indexes {
152 | 			e := elms[i]
153 | 			if e == nil {
154 | 				continue
155 | 			}
156 | 			if arr[idx], ok = e.(float64); !ok {
157 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
158 | 			}
159 | 		}
160 | 		return arr, nil
161 | 
162 | 	case string:
163 | 		arr := make([]string, size)
164 | 		for i, idx := range indexes {
165 | 			e := elms[i]
166 | 			if e == nil {
167 | 				continue
168 | 			}
169 | 			if arr[idx], ok = e.(string); !ok {
170 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
171 | 			}
172 | 		}
173 | 		return arr, nil
174 | 
175 | 	case uint:
176 | 		arr := make([]uint, size)
177 | 		for i, idx := range indexes {
178 | 			e := elms[i]
179 | 			if e == nil {
180 | 				continue
181 | 			}
182 | 			if arr[idx], ok = e.(uint); !ok {
183 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
184 | 			}
185 | 		}
186 | 		return arr, nil
187 | 
188 | 	case int:
189 | 		arr := make([]int64, size)
190 | 		for i, idx := range indexes {
191 | 			e := elms[i]
192 | 			if e == nil {
193 | 				continue
194 | 			}
195 | 			vv, okk := e.(int)
196 | 			if !okk {
197 | 				return nil, errors.Errorf(inconsistentDataTypesErrMsg, e, v)
198 | 			}
199 | 			arr[idx] = int64(vv)
200 | 		}
201 | 		return arr, nil
202 | 
203 | 	default:
204 | 		return nil, errors.Errorf("dataframe/sparse: invalid data type for %v (%T)", elms, v)
205 | 	}
206 | }
207 | 


--------------------------------------------------------------------------------
/internal/constructors/doc.go:
--------------------------------------------------------------------------------
1 | /*
2 | Package constructors provides constructors for arrow types.
3 | 
4 | */
5 | package constructors
6 | 


--------------------------------------------------------------------------------
/internal/constructors/interface.go:
--------------------------------------------------------------------------------
  1 | package constructors
  2 | 
  3 | import (
  4 | 	"github.com/apache/arrow/go/arrow"
  5 | 	"github.com/apache/arrow/go/arrow/array"
  6 | 	"github.com/apache/arrow/go/arrow/memory"
  7 | 	"github.com/go-bullseye/bullseye/internal/cast"
  8 | 	"github.com/pkg/errors"
  9 | )
 10 | 
 11 | // NewInterfaceFromMem builds a new column from memory
 12 | // valid is an optional array of booleans. If not specified, all values are valid.
 13 | func NewInterfaceFromMem(mem memory.Allocator, name string, values interface{}, valid []bool) (array.Interface, *arrow.Field, error) {
 14 | 	var arr array.Interface
 15 | 
 16 | 	switch v := values.(type) {
 17 | 	case []bool:
 18 | 		bld := array.NewBooleanBuilder(mem)
 19 | 		defer bld.Release()
 20 | 
 21 | 		bld.AppendValues(v, valid)
 22 | 		arr = bld.NewArray()
 23 | 
 24 | 	case []int8:
 25 | 		bld := array.NewInt8Builder(mem)
 26 | 		defer bld.Release()
 27 | 
 28 | 		bld.AppendValues(v, valid)
 29 | 		arr = bld.NewArray()
 30 | 
 31 | 	case []int16:
 32 | 		bld := array.NewInt16Builder(mem)
 33 | 		defer bld.Release()
 34 | 
 35 | 		bld.AppendValues(v, valid)
 36 | 		arr = bld.NewArray()
 37 | 
 38 | 	case []int32:
 39 | 		bld := array.NewInt32Builder(mem)
 40 | 		defer bld.Release()
 41 | 
 42 | 		bld.AppendValues(v, valid)
 43 | 		arr = bld.NewArray()
 44 | 
 45 | 	case []int64:
 46 | 		bld := array.NewInt64Builder(mem)
 47 | 		defer bld.Release()
 48 | 
 49 | 		bld.AppendValues(v, valid)
 50 | 		arr = bld.NewArray()
 51 | 
 52 | 	case []uint8:
 53 | 		bld := array.NewUint8Builder(mem)
 54 | 		defer bld.Release()
 55 | 
 56 | 		bld.AppendValues(v, valid)
 57 | 		arr = bld.NewArray()
 58 | 
 59 | 	case []uint16:
 60 | 		bld := array.NewUint16Builder(mem)
 61 | 		defer bld.Release()
 62 | 
 63 | 		bld.AppendValues(v, valid)
 64 | 		arr = bld.NewArray()
 65 | 
 66 | 	case []uint32:
 67 | 		bld := array.NewUint32Builder(mem)
 68 | 		defer bld.Release()
 69 | 
 70 | 		bld.AppendValues(v, valid)
 71 | 		arr = bld.NewArray()
 72 | 
 73 | 	case []uint64:
 74 | 		bld := array.NewUint64Builder(mem)
 75 | 		defer bld.Release()
 76 | 
 77 | 		bld.AppendValues(v, valid)
 78 | 		arr = bld.NewArray()
 79 | 
 80 | 	case []float32:
 81 | 		bld := array.NewFloat32Builder(mem)
 82 | 		defer bld.Release()
 83 | 
 84 | 		bld.AppendValues(v, valid)
 85 | 		arr = bld.NewArray()
 86 | 
 87 | 	case []float64:
 88 | 		bld := array.NewFloat64Builder(mem)
 89 | 		defer bld.Release()
 90 | 
 91 | 		bld.AppendValues(v, valid)
 92 | 		arr = bld.NewArray()
 93 | 
 94 | 	case []string:
 95 | 		bld := array.NewStringBuilder(mem)
 96 | 		defer bld.Release()
 97 | 
 98 | 		bld.AppendValues(v, valid)
 99 | 		arr = bld.NewArray()
100 | 
101 | 	case []uint:
102 | 		bld := array.NewUint64Builder(mem)
103 | 		defer bld.Release()
104 | 
105 | 		vs := make([]uint64, len(v))
106 | 		for i, e := range v {
107 | 			vs[i] = uint64(e)
108 | 		}
109 | 
110 | 		bld.AppendValues(vs, valid)
111 | 		arr = bld.NewArray()
112 | 
113 | 	case []int:
114 | 		bld := array.NewInt64Builder(mem)
115 | 		defer bld.Release()
116 | 
117 | 		vs := make([]int64, len(v))
118 | 		for i, e := range v {
119 | 			vs[i] = int64(e)
120 | 		}
121 | 
122 | 		bld.AppendValues(vs, valid)
123 | 		arr = bld.NewArray()
124 | 
125 | 	case []interface{}:
126 | 		validDense := valid
127 | 		if len(validDense) == 0 {
128 | 			// build valid mask
129 | 			validDense = make([]bool, len(v))
130 | 			for idx, value := range v {
131 | 				validDense[idx] = value != nil
132 | 			}
133 | 		}
134 | 		ifaceDense, err := cast.DenseCollectionToInterface(v)
135 | 		if err != nil {
136 | 			return nil, nil, err
137 | 		}
138 | 		return NewInterfaceFromMem(mem, name, ifaceDense, validDense)
139 | 
140 | 	default:
141 | 		err := errors.Errorf("dataframe/interface: invalid data type for %q (%T)", name, v)
142 | 		return nil, nil, err
143 | 	}
144 | 
145 | 	field := &arrow.Field{Name: name, Type: arr.DataType()}
146 | 	return arr, field, nil
147 | }
148 | 


--------------------------------------------------------------------------------
/internal/debug/assert_disabled.go:
--------------------------------------------------------------------------------
1 | // +build !assert
2 | 
3 | package debug
4 | 
5 | // Assert will panic with msg if cond is false.
6 | func Assert(cond bool, msg interface{}) {}


--------------------------------------------------------------------------------
/internal/debug/assert_enabled.go:
--------------------------------------------------------------------------------
 1 | // +build assert
 2 | 
 3 | package debug
 4 | 
 5 | // Assert will panic with msg if cond is false.
 6 | func Assert(cond bool, msg interface{}) {
 7 | 	if !cond {
 8 | 		panic(msg)
 9 | 	}
10 | }
11 | 


--------------------------------------------------------------------------------
/internal/debug/debug_disabled.go:
--------------------------------------------------------------------------------
1 | // +build !debug
2 | 
3 | package debug
4 | 
5 | func Debug(interface{}) {}
6 | 


--------------------------------------------------------------------------------
/internal/debug/debug_enabled.go:
--------------------------------------------------------------------------------
 1 | // +build debug
 2 | 
 3 | package debug
 4 | 
 5 | import (
 6 | 	"log"
 7 | 	"os"
 8 | )
 9 | 
10 | var (
11 | 	debug = log.New(os.Stderr, "(debug) ", log.LstdFlags)
12 | )
13 | 
14 | func Debug(msg interface{}) {
15 | 	debug.Print(msg)
16 | }
17 | 


--------------------------------------------------------------------------------
/internal/debug/doc.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Package debug provides compiled assertions, debug and warn level logging.
 3 | 
 4 | To enable runtime debug or warn level logging, build with the debug or warn tags
 5 | respectively. Building with the debug tag will enable the warn level logger automatically.
 6 | When the debug and warn tags are omitted, the code for the logging will be ommitted from
 7 | the binary.
 8 | 
 9 | To enable runtime assertions, build with the assert tag. When the assert tag is omitted,
10 | the code for the assertions will be ommitted from the binary.
11 | */
12 | package debug
13 | 


--------------------------------------------------------------------------------
/internal/debug/warn_disabled.go:
--------------------------------------------------------------------------------
1 | // +build !debug,!warn
2 | 
3 | package debug
4 | 
5 | func Warn(interface{}) {}
6 | 
7 | func Warnf(format string, v ...interface{}) {}
8 | 


--------------------------------------------------------------------------------
/internal/debug/warn_enabled.go:
--------------------------------------------------------------------------------
 1 | // +build debug warn
 2 | 
 3 | package debug
 4 | 
 5 | import (
 6 | 	"log"
 7 | 	"os"
 8 | )
 9 | 
10 | var (
11 | 	warn = log.New(os.Stderr, "(warn) ", log.LstdFlags)
12 | )
13 | 
14 | func Warn(msg interface{}) {
15 | 	warn.Print(msg)
16 | }
17 | 
18 | func Warnf(format string, v ...interface{}) {
19 | 	warn.Printf(format, v...)
20 | }
21 | 


--------------------------------------------------------------------------------
/iterator/booleaniterator.go:
--------------------------------------------------------------------------------
  1 | package iterator
  2 | 
  3 | import (
  4 | 	"sync/atomic"
  5 | 
  6 | 	"github.com/apache/arrow/go/arrow/array"
  7 | 	"github.com/go-bullseye/bullseye/internal/debug"
  8 | )
  9 | 
 10 | // BooleanValueIterator is an iterator for reading an Arrow Column value by value.
 11 | type BooleanValueIterator struct {
 12 | 	refCount      int64
 13 | 	chunkIterator *ChunkIterator
 14 | 
 15 | 	// Things we need to maintain for the iterator
 16 | 	index int            // current value index
 17 | 	ref   *array.Boolean // the chunk reference
 18 | 	done  bool           // there are no more elements for this iterator
 19 | }
 20 | 
 21 | // NewBooleanValueIterator creates a new BooleanValueIterator for reading an Arrow Column.
 22 | func NewBooleanValueIterator(col *array.Column) *BooleanValueIterator {
 23 | 	// We need a ChunkIterator to read the chunks
 24 | 	chunkIterator := NewChunkIterator(col)
 25 | 
 26 | 	return &BooleanValueIterator{
 27 | 		refCount:      1,
 28 | 		chunkIterator: chunkIterator,
 29 | 
 30 | 		index: 0,
 31 | 		ref:   nil,
 32 | 	}
 33 | }
 34 | 
 35 | // Value will return the current value that the iterator is on and boolean value indicating if the value is actually null.
 36 | func (vr *BooleanValueIterator) Value() (bool, bool) {
 37 | 	return vr.ref.Value(vr.index), vr.ref.IsNull(vr.index)
 38 | }
 39 | 
 40 | // ValuePointer will return a pointer to the current value that the iterator is on. It will return nil if the value is actually null.
 41 | func (vr *BooleanValueIterator) ValuePointer() *bool {
 42 | 	if vr.ref.IsNull(vr.index) {
 43 | 		return nil
 44 | 	}
 45 | 	value := vr.ref.Value(vr.index)
 46 | 	return &value
 47 | }
 48 | 
 49 | // ValueInterface returns the value as an interface{}.
 50 | func (vr *BooleanValueIterator) ValueInterface() interface{} {
 51 | 	if vr.ref.IsNull(vr.index) {
 52 | 		return nil
 53 | 	}
 54 | 	return vr.ref.Value(vr.index)
 55 | }
 56 | 
 57 | // Next moves the iterator to the next value. This will return false
 58 | // when there are no more values.
 59 | func (vr *BooleanValueIterator) Next() bool {
 60 | 	if vr.done {
 61 | 		return false
 62 | 	}
 63 | 
 64 | 	// Move the index up
 65 | 	vr.index++
 66 | 
 67 | 	// Keep moving the chunk up until we get one with data
 68 | 	for vr.ref == nil || vr.index >= vr.ref.Len() {
 69 | 		if !vr.nextChunk() {
 70 | 			// There were no more chunks with data in them
 71 | 			vr.done = true
 72 | 			return false
 73 | 		}
 74 | 	}
 75 | 
 76 | 	return true
 77 | }
 78 | 
 79 | func (vr *BooleanValueIterator) nextChunk() bool {
 80 | 	// Advance the chunk until we get one with data in it or we are done
 81 | 	if !vr.chunkIterator.Next() {
 82 | 		// No more chunks
 83 | 		return false
 84 | 	}
 85 | 
 86 | 	// There was another chunk.
 87 | 	// We maintain the ref and the values because the ref is going to allow us to retain the memory.
 88 | 	ref := vr.chunkIterator.Chunk()
 89 | 	ref.Retain()
 90 | 
 91 | 	if vr.ref != nil {
 92 | 		vr.ref.Release()
 93 | 	}
 94 | 
 95 | 	vr.ref = ref.(*array.Boolean)
 96 | 	vr.index = 0
 97 | 	return true
 98 | }
 99 | 
100 | // Retain keeps a reference to the BooleanValueIterator
101 | func (vr *BooleanValueIterator) Retain() {
102 | 	atomic.AddInt64(&vr.refCount, 1)
103 | }
104 | 
105 | // Release removes a reference to the BooleanValueIterator
106 | func (vr *BooleanValueIterator) Release() {
107 | 	debug.Assert(atomic.LoadInt64(&vr.refCount) > 0, "too many releases")
108 | 
109 | 	if atomic.AddInt64(&vr.refCount, -1) == 0 {
110 | 		if vr.chunkIterator != nil {
111 | 			vr.chunkIterator.Release()
112 | 			vr.chunkIterator = nil
113 | 		}
114 | 
115 | 		if vr.ref != nil {
116 | 			vr.ref.Release()
117 | 			vr.ref = nil
118 | 		}
119 | 	}
120 | }
121 | 


--------------------------------------------------------------------------------
/iterator/chunkiterator.gen.go:
--------------------------------------------------------------------------------
   1 | // Code generated by iterator/chunkiterator.gen.go.tmpl. DO NOT EDIT.
   2 | 
   3 | package iterator
   4 | 
   5 | import (
   6 | 	"sync/atomic"
   7 | 
   8 | 	"github.com/apache/arrow/go/arrow"
   9 | 	"github.com/apache/arrow/go/arrow/array"
  10 | 	"github.com/go-bullseye/bullseye/internal/debug"
  11 | )
  12 | 
  13 | // Int64ChunkIterator is an iterator for reading an Arrow Column value by value.
  14 | type Int64ChunkIterator struct {
  15 | 	refCount int64
  16 | 	col      *array.Column
  17 | 
  18 | 	// Things Chunked maintains. We're going to maintain it ourselves.
  19 | 	chunks []*array.Int64 // cache the chunks on this iterator
  20 | 	length int64          // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
  21 | 	nulls  int64
  22 | 	dtype  arrow.DataType
  23 | 
  24 | 	// Things we need to maintain for the iterator
  25 | 	currentIndex int          // current chunk
  26 | 	currentChunk *array.Int64 // current chunk
  27 | }
  28 | 
  29 | // NewInt64ChunkIterator creates a new Int64ChunkIterator for reading an Arrow Column.
  30 | func NewInt64ChunkIterator(col *array.Column) *Int64ChunkIterator {
  31 | 	col.Retain()
  32 | 
  33 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
  34 | 	columnChunks := col.Data().Chunks()
  35 | 	chunks := make([]*array.Int64, len(columnChunks))
  36 | 	var length int64
  37 | 	var nulls int64
  38 | 
  39 | 	for i, chunk := range columnChunks {
  40 | 		// Keep our own refs to chunks
  41 | 		chunks[i] = chunk.(*array.Int64)
  42 | 		// Retain the chunk
  43 | 		chunks[i].Retain()
  44 | 
  45 | 		// Keep our own counters instead of Chunked's
  46 | 		length += int64(chunk.Len())
  47 | 		nulls += int64(chunk.NullN())
  48 | 	}
  49 | 
  50 | 	return &Int64ChunkIterator{
  51 | 		refCount: 1,
  52 | 		col:      col,
  53 | 
  54 | 		chunks: chunks,
  55 | 		length: length,
  56 | 		nulls:  nulls,
  57 | 		dtype:  col.DataType(),
  58 | 
  59 | 		currentIndex: 0,
  60 | 		currentChunk: nil,
  61 | 	}
  62 | }
  63 | 
  64 | // Chunk will return the current chunk that the iterator is on.
  65 | func (cr *Int64ChunkIterator) Chunk() *array.Int64 { return cr.currentChunk }
  66 | 
  67 | // ChunkValues returns the underlying []int64 chunk values.
  68 | // Keep in mind the []int64 type might not be able
  69 | // to account for nil values. You must check for those explicitly via the chunk.
  70 | func (cr *Int64ChunkIterator) ChunkValues() []int64 { return cr.Chunk().Int64Values() }
  71 | 
  72 | // Next moves the iterator to the next chunk. This will return false
  73 | // when there are no more chunks.
  74 | func (cr *Int64ChunkIterator) Next() bool {
  75 | 	if cr.currentIndex >= len(cr.chunks) {
  76 | 		return false
  77 | 	}
  78 | 
  79 | 	if cr.currentChunk != nil {
  80 | 		cr.currentChunk.Release()
  81 | 	}
  82 | 
  83 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
  84 | 	cr.currentChunk.Retain()
  85 | 	cr.currentIndex++
  86 | 
  87 | 	return true
  88 | }
  89 | 
  90 | // Retain keeps a reference to the Int64ChunkIterator
  91 | func (cr *Int64ChunkIterator) Retain() {
  92 | 	atomic.AddInt64(&cr.refCount, 1)
  93 | }
  94 | 
  95 | // Release removes a reference to the Int64ChunkIterator
  96 | func (cr *Int64ChunkIterator) Release() {
  97 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
  98 | 	ref := atomic.AddInt64(&cr.refCount, -1)
  99 | 	if ref == 0 {
 100 | 		cr.col.Release()
 101 | 		for i := range cr.chunks {
 102 | 			cr.chunks[i].Release()
 103 | 		}
 104 | 		if cr.currentChunk != nil {
 105 | 			cr.currentChunk.Release()
 106 | 			cr.currentChunk = nil
 107 | 		}
 108 | 		cr.col = nil
 109 | 		cr.chunks = nil
 110 | 		cr.dtype = nil
 111 | 	}
 112 | }
 113 | 
 114 | // Uint64ChunkIterator is an iterator for reading an Arrow Column value by value.
 115 | type Uint64ChunkIterator struct {
 116 | 	refCount int64
 117 | 	col      *array.Column
 118 | 
 119 | 	// Things Chunked maintains. We're going to maintain it ourselves.
 120 | 	chunks []*array.Uint64 // cache the chunks on this iterator
 121 | 	length int64           // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
 122 | 	nulls  int64
 123 | 	dtype  arrow.DataType
 124 | 
 125 | 	// Things we need to maintain for the iterator
 126 | 	currentIndex int           // current chunk
 127 | 	currentChunk *array.Uint64 // current chunk
 128 | }
 129 | 
 130 | // NewUint64ChunkIterator creates a new Uint64ChunkIterator for reading an Arrow Column.
 131 | func NewUint64ChunkIterator(col *array.Column) *Uint64ChunkIterator {
 132 | 	col.Retain()
 133 | 
 134 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
 135 | 	columnChunks := col.Data().Chunks()
 136 | 	chunks := make([]*array.Uint64, len(columnChunks))
 137 | 	var length int64
 138 | 	var nulls int64
 139 | 
 140 | 	for i, chunk := range columnChunks {
 141 | 		// Keep our own refs to chunks
 142 | 		chunks[i] = chunk.(*array.Uint64)
 143 | 		// Retain the chunk
 144 | 		chunks[i].Retain()
 145 | 
 146 | 		// Keep our own counters instead of Chunked's
 147 | 		length += int64(chunk.Len())
 148 | 		nulls += int64(chunk.NullN())
 149 | 	}
 150 | 
 151 | 	return &Uint64ChunkIterator{
 152 | 		refCount: 1,
 153 | 		col:      col,
 154 | 
 155 | 		chunks: chunks,
 156 | 		length: length,
 157 | 		nulls:  nulls,
 158 | 		dtype:  col.DataType(),
 159 | 
 160 | 		currentIndex: 0,
 161 | 		currentChunk: nil,
 162 | 	}
 163 | }
 164 | 
 165 | // Chunk will return the current chunk that the iterator is on.
 166 | func (cr *Uint64ChunkIterator) Chunk() *array.Uint64 { return cr.currentChunk }
 167 | 
 168 | // ChunkValues returns the underlying []uint64 chunk values.
 169 | // Keep in mind the []uint64 type might not be able
 170 | // to account for nil values. You must check for those explicitly via the chunk.
 171 | func (cr *Uint64ChunkIterator) ChunkValues() []uint64 { return cr.Chunk().Uint64Values() }
 172 | 
 173 | // Next moves the iterator to the next chunk. This will return false
 174 | // when there are no more chunks.
 175 | func (cr *Uint64ChunkIterator) Next() bool {
 176 | 	if cr.currentIndex >= len(cr.chunks) {
 177 | 		return false
 178 | 	}
 179 | 
 180 | 	if cr.currentChunk != nil {
 181 | 		cr.currentChunk.Release()
 182 | 	}
 183 | 
 184 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
 185 | 	cr.currentChunk.Retain()
 186 | 	cr.currentIndex++
 187 | 
 188 | 	return true
 189 | }
 190 | 
 191 | // Retain keeps a reference to the Uint64ChunkIterator
 192 | func (cr *Uint64ChunkIterator) Retain() {
 193 | 	atomic.AddInt64(&cr.refCount, 1)
 194 | }
 195 | 
 196 | // Release removes a reference to the Uint64ChunkIterator
 197 | func (cr *Uint64ChunkIterator) Release() {
 198 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
 199 | 	ref := atomic.AddInt64(&cr.refCount, -1)
 200 | 	if ref == 0 {
 201 | 		cr.col.Release()
 202 | 		for i := range cr.chunks {
 203 | 			cr.chunks[i].Release()
 204 | 		}
 205 | 		if cr.currentChunk != nil {
 206 | 			cr.currentChunk.Release()
 207 | 			cr.currentChunk = nil
 208 | 		}
 209 | 		cr.col = nil
 210 | 		cr.chunks = nil
 211 | 		cr.dtype = nil
 212 | 	}
 213 | }
 214 | 
 215 | // Float64ChunkIterator is an iterator for reading an Arrow Column value by value.
 216 | type Float64ChunkIterator struct {
 217 | 	refCount int64
 218 | 	col      *array.Column
 219 | 
 220 | 	// Things Chunked maintains. We're going to maintain it ourselves.
 221 | 	chunks []*array.Float64 // cache the chunks on this iterator
 222 | 	length int64            // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
 223 | 	nulls  int64
 224 | 	dtype  arrow.DataType
 225 | 
 226 | 	// Things we need to maintain for the iterator
 227 | 	currentIndex int            // current chunk
 228 | 	currentChunk *array.Float64 // current chunk
 229 | }
 230 | 
 231 | // NewFloat64ChunkIterator creates a new Float64ChunkIterator for reading an Arrow Column.
 232 | func NewFloat64ChunkIterator(col *array.Column) *Float64ChunkIterator {
 233 | 	col.Retain()
 234 | 
 235 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
 236 | 	columnChunks := col.Data().Chunks()
 237 | 	chunks := make([]*array.Float64, len(columnChunks))
 238 | 	var length int64
 239 | 	var nulls int64
 240 | 
 241 | 	for i, chunk := range columnChunks {
 242 | 		// Keep our own refs to chunks
 243 | 		chunks[i] = chunk.(*array.Float64)
 244 | 		// Retain the chunk
 245 | 		chunks[i].Retain()
 246 | 
 247 | 		// Keep our own counters instead of Chunked's
 248 | 		length += int64(chunk.Len())
 249 | 		nulls += int64(chunk.NullN())
 250 | 	}
 251 | 
 252 | 	return &Float64ChunkIterator{
 253 | 		refCount: 1,
 254 | 		col:      col,
 255 | 
 256 | 		chunks: chunks,
 257 | 		length: length,
 258 | 		nulls:  nulls,
 259 | 		dtype:  col.DataType(),
 260 | 
 261 | 		currentIndex: 0,
 262 | 		currentChunk: nil,
 263 | 	}
 264 | }
 265 | 
 266 | // Chunk will return the current chunk that the iterator is on.
 267 | func (cr *Float64ChunkIterator) Chunk() *array.Float64 { return cr.currentChunk }
 268 | 
 269 | // ChunkValues returns the underlying []float64 chunk values.
 270 | // Keep in mind the []float64 type might not be able
 271 | // to account for nil values. You must check for those explicitly via the chunk.
 272 | func (cr *Float64ChunkIterator) ChunkValues() []float64 { return cr.Chunk().Float64Values() }
 273 | 
 274 | // Next moves the iterator to the next chunk. This will return false
 275 | // when there are no more chunks.
 276 | func (cr *Float64ChunkIterator) Next() bool {
 277 | 	if cr.currentIndex >= len(cr.chunks) {
 278 | 		return false
 279 | 	}
 280 | 
 281 | 	if cr.currentChunk != nil {
 282 | 		cr.currentChunk.Release()
 283 | 	}
 284 | 
 285 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
 286 | 	cr.currentChunk.Retain()
 287 | 	cr.currentIndex++
 288 | 
 289 | 	return true
 290 | }
 291 | 
 292 | // Retain keeps a reference to the Float64ChunkIterator
 293 | func (cr *Float64ChunkIterator) Retain() {
 294 | 	atomic.AddInt64(&cr.refCount, 1)
 295 | }
 296 | 
 297 | // Release removes a reference to the Float64ChunkIterator
 298 | func (cr *Float64ChunkIterator) Release() {
 299 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
 300 | 	ref := atomic.AddInt64(&cr.refCount, -1)
 301 | 	if ref == 0 {
 302 | 		cr.col.Release()
 303 | 		for i := range cr.chunks {
 304 | 			cr.chunks[i].Release()
 305 | 		}
 306 | 		if cr.currentChunk != nil {
 307 | 			cr.currentChunk.Release()
 308 | 			cr.currentChunk = nil
 309 | 		}
 310 | 		cr.col = nil
 311 | 		cr.chunks = nil
 312 | 		cr.dtype = nil
 313 | 	}
 314 | }
 315 | 
 316 | // Int32ChunkIterator is an iterator for reading an Arrow Column value by value.
 317 | type Int32ChunkIterator struct {
 318 | 	refCount int64
 319 | 	col      *array.Column
 320 | 
 321 | 	// Things Chunked maintains. We're going to maintain it ourselves.
 322 | 	chunks []*array.Int32 // cache the chunks on this iterator
 323 | 	length int64          // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
 324 | 	nulls  int64
 325 | 	dtype  arrow.DataType
 326 | 
 327 | 	// Things we need to maintain for the iterator
 328 | 	currentIndex int          // current chunk
 329 | 	currentChunk *array.Int32 // current chunk
 330 | }
 331 | 
 332 | // NewInt32ChunkIterator creates a new Int32ChunkIterator for reading an Arrow Column.
 333 | func NewInt32ChunkIterator(col *array.Column) *Int32ChunkIterator {
 334 | 	col.Retain()
 335 | 
 336 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
 337 | 	columnChunks := col.Data().Chunks()
 338 | 	chunks := make([]*array.Int32, len(columnChunks))
 339 | 	var length int64
 340 | 	var nulls int64
 341 | 
 342 | 	for i, chunk := range columnChunks {
 343 | 		// Keep our own refs to chunks
 344 | 		chunks[i] = chunk.(*array.Int32)
 345 | 		// Retain the chunk
 346 | 		chunks[i].Retain()
 347 | 
 348 | 		// Keep our own counters instead of Chunked's
 349 | 		length += int64(chunk.Len())
 350 | 		nulls += int64(chunk.NullN())
 351 | 	}
 352 | 
 353 | 	return &Int32ChunkIterator{
 354 | 		refCount: 1,
 355 | 		col:      col,
 356 | 
 357 | 		chunks: chunks,
 358 | 		length: length,
 359 | 		nulls:  nulls,
 360 | 		dtype:  col.DataType(),
 361 | 
 362 | 		currentIndex: 0,
 363 | 		currentChunk: nil,
 364 | 	}
 365 | }
 366 | 
 367 | // Chunk will return the current chunk that the iterator is on.
 368 | func (cr *Int32ChunkIterator) Chunk() *array.Int32 { return cr.currentChunk }
 369 | 
 370 | // ChunkValues returns the underlying []int32 chunk values.
 371 | // Keep in mind the []int32 type might not be able
 372 | // to account for nil values. You must check for those explicitly via the chunk.
 373 | func (cr *Int32ChunkIterator) ChunkValues() []int32 { return cr.Chunk().Int32Values() }
 374 | 
 375 | // Next moves the iterator to the next chunk. This will return false
 376 | // when there are no more chunks.
 377 | func (cr *Int32ChunkIterator) Next() bool {
 378 | 	if cr.currentIndex >= len(cr.chunks) {
 379 | 		return false
 380 | 	}
 381 | 
 382 | 	if cr.currentChunk != nil {
 383 | 		cr.currentChunk.Release()
 384 | 	}
 385 | 
 386 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
 387 | 	cr.currentChunk.Retain()
 388 | 	cr.currentIndex++
 389 | 
 390 | 	return true
 391 | }
 392 | 
 393 | // Retain keeps a reference to the Int32ChunkIterator
 394 | func (cr *Int32ChunkIterator) Retain() {
 395 | 	atomic.AddInt64(&cr.refCount, 1)
 396 | }
 397 | 
 398 | // Release removes a reference to the Int32ChunkIterator
 399 | func (cr *Int32ChunkIterator) Release() {
 400 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
 401 | 	ref := atomic.AddInt64(&cr.refCount, -1)
 402 | 	if ref == 0 {
 403 | 		cr.col.Release()
 404 | 		for i := range cr.chunks {
 405 | 			cr.chunks[i].Release()
 406 | 		}
 407 | 		if cr.currentChunk != nil {
 408 | 			cr.currentChunk.Release()
 409 | 			cr.currentChunk = nil
 410 | 		}
 411 | 		cr.col = nil
 412 | 		cr.chunks = nil
 413 | 		cr.dtype = nil
 414 | 	}
 415 | }
 416 | 
 417 | // Uint32ChunkIterator is an iterator for reading an Arrow Column value by value.
 418 | type Uint32ChunkIterator struct {
 419 | 	refCount int64
 420 | 	col      *array.Column
 421 | 
 422 | 	// Things Chunked maintains. We're going to maintain it ourselves.
 423 | 	chunks []*array.Uint32 // cache the chunks on this iterator
 424 | 	length int64           // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
 425 | 	nulls  int64
 426 | 	dtype  arrow.DataType
 427 | 
 428 | 	// Things we need to maintain for the iterator
 429 | 	currentIndex int           // current chunk
 430 | 	currentChunk *array.Uint32 // current chunk
 431 | }
 432 | 
 433 | // NewUint32ChunkIterator creates a new Uint32ChunkIterator for reading an Arrow Column.
 434 | func NewUint32ChunkIterator(col *array.Column) *Uint32ChunkIterator {
 435 | 	col.Retain()
 436 | 
 437 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
 438 | 	columnChunks := col.Data().Chunks()
 439 | 	chunks := make([]*array.Uint32, len(columnChunks))
 440 | 	var length int64
 441 | 	var nulls int64
 442 | 
 443 | 	for i, chunk := range columnChunks {
 444 | 		// Keep our own refs to chunks
 445 | 		chunks[i] = chunk.(*array.Uint32)
 446 | 		// Retain the chunk
 447 | 		chunks[i].Retain()
 448 | 
 449 | 		// Keep our own counters instead of Chunked's
 450 | 		length += int64(chunk.Len())
 451 | 		nulls += int64(chunk.NullN())
 452 | 	}
 453 | 
 454 | 	return &Uint32ChunkIterator{
 455 | 		refCount: 1,
 456 | 		col:      col,
 457 | 
 458 | 		chunks: chunks,
 459 | 		length: length,
 460 | 		nulls:  nulls,
 461 | 		dtype:  col.DataType(),
 462 | 
 463 | 		currentIndex: 0,
 464 | 		currentChunk: nil,
 465 | 	}
 466 | }
 467 | 
 468 | // Chunk will return the current chunk that the iterator is on.
 469 | func (cr *Uint32ChunkIterator) Chunk() *array.Uint32 { return cr.currentChunk }
 470 | 
 471 | // ChunkValues returns the underlying []uint32 chunk values.
 472 | // Keep in mind the []uint32 type might not be able
 473 | // to account for nil values. You must check for those explicitly via the chunk.
 474 | func (cr *Uint32ChunkIterator) ChunkValues() []uint32 { return cr.Chunk().Uint32Values() }
 475 | 
 476 | // Next moves the iterator to the next chunk. This will return false
 477 | // when there are no more chunks.
 478 | func (cr *Uint32ChunkIterator) Next() bool {
 479 | 	if cr.currentIndex >= len(cr.chunks) {
 480 | 		return false
 481 | 	}
 482 | 
 483 | 	if cr.currentChunk != nil {
 484 | 		cr.currentChunk.Release()
 485 | 	}
 486 | 
 487 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
 488 | 	cr.currentChunk.Retain()
 489 | 	cr.currentIndex++
 490 | 
 491 | 	return true
 492 | }
 493 | 
 494 | // Retain keeps a reference to the Uint32ChunkIterator
 495 | func (cr *Uint32ChunkIterator) Retain() {
 496 | 	atomic.AddInt64(&cr.refCount, 1)
 497 | }
 498 | 
 499 | // Release removes a reference to the Uint32ChunkIterator
 500 | func (cr *Uint32ChunkIterator) Release() {
 501 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
 502 | 	ref := atomic.AddInt64(&cr.refCount, -1)
 503 | 	if ref == 0 {
 504 | 		cr.col.Release()
 505 | 		for i := range cr.chunks {
 506 | 			cr.chunks[i].Release()
 507 | 		}
 508 | 		if cr.currentChunk != nil {
 509 | 			cr.currentChunk.Release()
 510 | 			cr.currentChunk = nil
 511 | 		}
 512 | 		cr.col = nil
 513 | 		cr.chunks = nil
 514 | 		cr.dtype = nil
 515 | 	}
 516 | }
 517 | 
 518 | // Float32ChunkIterator is an iterator for reading an Arrow Column value by value.
 519 | type Float32ChunkIterator struct {
 520 | 	refCount int64
 521 | 	col      *array.Column
 522 | 
 523 | 	// Things Chunked maintains. We're going to maintain it ourselves.
 524 | 	chunks []*array.Float32 // cache the chunks on this iterator
 525 | 	length int64            // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
 526 | 	nulls  int64
 527 | 	dtype  arrow.DataType
 528 | 
 529 | 	// Things we need to maintain for the iterator
 530 | 	currentIndex int            // current chunk
 531 | 	currentChunk *array.Float32 // current chunk
 532 | }
 533 | 
 534 | // NewFloat32ChunkIterator creates a new Float32ChunkIterator for reading an Arrow Column.
 535 | func NewFloat32ChunkIterator(col *array.Column) *Float32ChunkIterator {
 536 | 	col.Retain()
 537 | 
 538 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
 539 | 	columnChunks := col.Data().Chunks()
 540 | 	chunks := make([]*array.Float32, len(columnChunks))
 541 | 	var length int64
 542 | 	var nulls int64
 543 | 
 544 | 	for i, chunk := range columnChunks {
 545 | 		// Keep our own refs to chunks
 546 | 		chunks[i] = chunk.(*array.Float32)
 547 | 		// Retain the chunk
 548 | 		chunks[i].Retain()
 549 | 
 550 | 		// Keep our own counters instead of Chunked's
 551 | 		length += int64(chunk.Len())
 552 | 		nulls += int64(chunk.NullN())
 553 | 	}
 554 | 
 555 | 	return &Float32ChunkIterator{
 556 | 		refCount: 1,
 557 | 		col:      col,
 558 | 
 559 | 		chunks: chunks,
 560 | 		length: length,
 561 | 		nulls:  nulls,
 562 | 		dtype:  col.DataType(),
 563 | 
 564 | 		currentIndex: 0,
 565 | 		currentChunk: nil,
 566 | 	}
 567 | }
 568 | 
 569 | // Chunk will return the current chunk that the iterator is on.
 570 | func (cr *Float32ChunkIterator) Chunk() *array.Float32 { return cr.currentChunk }
 571 | 
 572 | // ChunkValues returns the underlying []float32 chunk values.
 573 | // Keep in mind the []float32 type might not be able
 574 | // to account for nil values. You must check for those explicitly via the chunk.
 575 | func (cr *Float32ChunkIterator) ChunkValues() []float32 { return cr.Chunk().Float32Values() }
 576 | 
 577 | // Next moves the iterator to the next chunk. This will return false
 578 | // when there are no more chunks.
 579 | func (cr *Float32ChunkIterator) Next() bool {
 580 | 	if cr.currentIndex >= len(cr.chunks) {
 581 | 		return false
 582 | 	}
 583 | 
 584 | 	if cr.currentChunk != nil {
 585 | 		cr.currentChunk.Release()
 586 | 	}
 587 | 
 588 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
 589 | 	cr.currentChunk.Retain()
 590 | 	cr.currentIndex++
 591 | 
 592 | 	return true
 593 | }
 594 | 
 595 | // Retain keeps a reference to the Float32ChunkIterator
 596 | func (cr *Float32ChunkIterator) Retain() {
 597 | 	atomic.AddInt64(&cr.refCount, 1)
 598 | }
 599 | 
 600 | // Release removes a reference to the Float32ChunkIterator
 601 | func (cr *Float32ChunkIterator) Release() {
 602 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
 603 | 	ref := atomic.AddInt64(&cr.refCount, -1)
 604 | 	if ref == 0 {
 605 | 		cr.col.Release()
 606 | 		for i := range cr.chunks {
 607 | 			cr.chunks[i].Release()
 608 | 		}
 609 | 		if cr.currentChunk != nil {
 610 | 			cr.currentChunk.Release()
 611 | 			cr.currentChunk = nil
 612 | 		}
 613 | 		cr.col = nil
 614 | 		cr.chunks = nil
 615 | 		cr.dtype = nil
 616 | 	}
 617 | }
 618 | 
 619 | // Int16ChunkIterator is an iterator for reading an Arrow Column value by value.
 620 | type Int16ChunkIterator struct {
 621 | 	refCount int64
 622 | 	col      *array.Column
 623 | 
 624 | 	// Things Chunked maintains. We're going to maintain it ourselves.
 625 | 	chunks []*array.Int16 // cache the chunks on this iterator
 626 | 	length int64          // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
 627 | 	nulls  int64
 628 | 	dtype  arrow.DataType
 629 | 
 630 | 	// Things we need to maintain for the iterator
 631 | 	currentIndex int          // current chunk
 632 | 	currentChunk *array.Int16 // current chunk
 633 | }
 634 | 
 635 | // NewInt16ChunkIterator creates a new Int16ChunkIterator for reading an Arrow Column.
 636 | func NewInt16ChunkIterator(col *array.Column) *Int16ChunkIterator {
 637 | 	col.Retain()
 638 | 
 639 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
 640 | 	columnChunks := col.Data().Chunks()
 641 | 	chunks := make([]*array.Int16, len(columnChunks))
 642 | 	var length int64
 643 | 	var nulls int64
 644 | 
 645 | 	for i, chunk := range columnChunks {
 646 | 		// Keep our own refs to chunks
 647 | 		chunks[i] = chunk.(*array.Int16)
 648 | 		// Retain the chunk
 649 | 		chunks[i].Retain()
 650 | 
 651 | 		// Keep our own counters instead of Chunked's
 652 | 		length += int64(chunk.Len())
 653 | 		nulls += int64(chunk.NullN())
 654 | 	}
 655 | 
 656 | 	return &Int16ChunkIterator{
 657 | 		refCount: 1,
 658 | 		col:      col,
 659 | 
 660 | 		chunks: chunks,
 661 | 		length: length,
 662 | 		nulls:  nulls,
 663 | 		dtype:  col.DataType(),
 664 | 
 665 | 		currentIndex: 0,
 666 | 		currentChunk: nil,
 667 | 	}
 668 | }
 669 | 
 670 | // Chunk will return the current chunk that the iterator is on.
 671 | func (cr *Int16ChunkIterator) Chunk() *array.Int16 { return cr.currentChunk }
 672 | 
 673 | // ChunkValues returns the underlying []int16 chunk values.
 674 | // Keep in mind the []int16 type might not be able
 675 | // to account for nil values. You must check for those explicitly via the chunk.
 676 | func (cr *Int16ChunkIterator) ChunkValues() []int16 { return cr.Chunk().Int16Values() }
 677 | 
 678 | // Next moves the iterator to the next chunk. This will return false
 679 | // when there are no more chunks.
 680 | func (cr *Int16ChunkIterator) Next() bool {
 681 | 	if cr.currentIndex >= len(cr.chunks) {
 682 | 		return false
 683 | 	}
 684 | 
 685 | 	if cr.currentChunk != nil {
 686 | 		cr.currentChunk.Release()
 687 | 	}
 688 | 
 689 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
 690 | 	cr.currentChunk.Retain()
 691 | 	cr.currentIndex++
 692 | 
 693 | 	return true
 694 | }
 695 | 
 696 | // Retain keeps a reference to the Int16ChunkIterator
 697 | func (cr *Int16ChunkIterator) Retain() {
 698 | 	atomic.AddInt64(&cr.refCount, 1)
 699 | }
 700 | 
 701 | // Release removes a reference to the Int16ChunkIterator
 702 | func (cr *Int16ChunkIterator) Release() {
 703 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
 704 | 	ref := atomic.AddInt64(&cr.refCount, -1)
 705 | 	if ref == 0 {
 706 | 		cr.col.Release()
 707 | 		for i := range cr.chunks {
 708 | 			cr.chunks[i].Release()
 709 | 		}
 710 | 		if cr.currentChunk != nil {
 711 | 			cr.currentChunk.Release()
 712 | 			cr.currentChunk = nil
 713 | 		}
 714 | 		cr.col = nil
 715 | 		cr.chunks = nil
 716 | 		cr.dtype = nil
 717 | 	}
 718 | }
 719 | 
 720 | // Uint16ChunkIterator is an iterator for reading an Arrow Column value by value.
 721 | type Uint16ChunkIterator struct {
 722 | 	refCount int64
 723 | 	col      *array.Column
 724 | 
 725 | 	// Things Chunked maintains. We're going to maintain it ourselves.
 726 | 	chunks []*array.Uint16 // cache the chunks on this iterator
 727 | 	length int64           // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
 728 | 	nulls  int64
 729 | 	dtype  arrow.DataType
 730 | 
 731 | 	// Things we need to maintain for the iterator
 732 | 	currentIndex int           // current chunk
 733 | 	currentChunk *array.Uint16 // current chunk
 734 | }
 735 | 
 736 | // NewUint16ChunkIterator creates a new Uint16ChunkIterator for reading an Arrow Column.
 737 | func NewUint16ChunkIterator(col *array.Column) *Uint16ChunkIterator {
 738 | 	col.Retain()
 739 | 
 740 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
 741 | 	columnChunks := col.Data().Chunks()
 742 | 	chunks := make([]*array.Uint16, len(columnChunks))
 743 | 	var length int64
 744 | 	var nulls int64
 745 | 
 746 | 	for i, chunk := range columnChunks {
 747 | 		// Keep our own refs to chunks
 748 | 		chunks[i] = chunk.(*array.Uint16)
 749 | 		// Retain the chunk
 750 | 		chunks[i].Retain()
 751 | 
 752 | 		// Keep our own counters instead of Chunked's
 753 | 		length += int64(chunk.Len())
 754 | 		nulls += int64(chunk.NullN())
 755 | 	}
 756 | 
 757 | 	return &Uint16ChunkIterator{
 758 | 		refCount: 1,
 759 | 		col:      col,
 760 | 
 761 | 		chunks: chunks,
 762 | 		length: length,
 763 | 		nulls:  nulls,
 764 | 		dtype:  col.DataType(),
 765 | 
 766 | 		currentIndex: 0,
 767 | 		currentChunk: nil,
 768 | 	}
 769 | }
 770 | 
 771 | // Chunk will return the current chunk that the iterator is on.
 772 | func (cr *Uint16ChunkIterator) Chunk() *array.Uint16 { return cr.currentChunk }
 773 | 
 774 | // ChunkValues returns the underlying []uint16 chunk values.
 775 | // Keep in mind the []uint16 type might not be able
 776 | // to account for nil values. You must check for those explicitly via the chunk.
 777 | func (cr *Uint16ChunkIterator) ChunkValues() []uint16 { return cr.Chunk().Uint16Values() }
 778 | 
 779 | // Next moves the iterator to the next chunk. This will return false
 780 | // when there are no more chunks.
 781 | func (cr *Uint16ChunkIterator) Next() bool {
 782 | 	if cr.currentIndex >= len(cr.chunks) {
 783 | 		return false
 784 | 	}
 785 | 
 786 | 	if cr.currentChunk != nil {
 787 | 		cr.currentChunk.Release()
 788 | 	}
 789 | 
 790 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
 791 | 	cr.currentChunk.Retain()
 792 | 	cr.currentIndex++
 793 | 
 794 | 	return true
 795 | }
 796 | 
 797 | // Retain keeps a reference to the Uint16ChunkIterator
 798 | func (cr *Uint16ChunkIterator) Retain() {
 799 | 	atomic.AddInt64(&cr.refCount, 1)
 800 | }
 801 | 
 802 | // Release removes a reference to the Uint16ChunkIterator
 803 | func (cr *Uint16ChunkIterator) Release() {
 804 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
 805 | 	ref := atomic.AddInt64(&cr.refCount, -1)
 806 | 	if ref == 0 {
 807 | 		cr.col.Release()
 808 | 		for i := range cr.chunks {
 809 | 			cr.chunks[i].Release()
 810 | 		}
 811 | 		if cr.currentChunk != nil {
 812 | 			cr.currentChunk.Release()
 813 | 			cr.currentChunk = nil
 814 | 		}
 815 | 		cr.col = nil
 816 | 		cr.chunks = nil
 817 | 		cr.dtype = nil
 818 | 	}
 819 | }
 820 | 
 821 | // Int8ChunkIterator is an iterator for reading an Arrow Column value by value.
 822 | type Int8ChunkIterator struct {
 823 | 	refCount int64
 824 | 	col      *array.Column
 825 | 
 826 | 	// Things Chunked maintains. We're going to maintain it ourselves.
 827 | 	chunks []*array.Int8 // cache the chunks on this iterator
 828 | 	length int64         // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
 829 | 	nulls  int64
 830 | 	dtype  arrow.DataType
 831 | 
 832 | 	// Things we need to maintain for the iterator
 833 | 	currentIndex int         // current chunk
 834 | 	currentChunk *array.Int8 // current chunk
 835 | }
 836 | 
 837 | // NewInt8ChunkIterator creates a new Int8ChunkIterator for reading an Arrow Column.
 838 | func NewInt8ChunkIterator(col *array.Column) *Int8ChunkIterator {
 839 | 	col.Retain()
 840 | 
 841 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
 842 | 	columnChunks := col.Data().Chunks()
 843 | 	chunks := make([]*array.Int8, len(columnChunks))
 844 | 	var length int64
 845 | 	var nulls int64
 846 | 
 847 | 	for i, chunk := range columnChunks {
 848 | 		// Keep our own refs to chunks
 849 | 		chunks[i] = chunk.(*array.Int8)
 850 | 		// Retain the chunk
 851 | 		chunks[i].Retain()
 852 | 
 853 | 		// Keep our own counters instead of Chunked's
 854 | 		length += int64(chunk.Len())
 855 | 		nulls += int64(chunk.NullN())
 856 | 	}
 857 | 
 858 | 	return &Int8ChunkIterator{
 859 | 		refCount: 1,
 860 | 		col:      col,
 861 | 
 862 | 		chunks: chunks,
 863 | 		length: length,
 864 | 		nulls:  nulls,
 865 | 		dtype:  col.DataType(),
 866 | 
 867 | 		currentIndex: 0,
 868 | 		currentChunk: nil,
 869 | 	}
 870 | }
 871 | 
 872 | // Chunk will return the current chunk that the iterator is on.
 873 | func (cr *Int8ChunkIterator) Chunk() *array.Int8 { return cr.currentChunk }
 874 | 
 875 | // ChunkValues returns the underlying []int8 chunk values.
 876 | // Keep in mind the []int8 type might not be able
 877 | // to account for nil values. You must check for those explicitly via the chunk.
 878 | func (cr *Int8ChunkIterator) ChunkValues() []int8 { return cr.Chunk().Int8Values() }
 879 | 
 880 | // Next moves the iterator to the next chunk. This will return false
 881 | // when there are no more chunks.
 882 | func (cr *Int8ChunkIterator) Next() bool {
 883 | 	if cr.currentIndex >= len(cr.chunks) {
 884 | 		return false
 885 | 	}
 886 | 
 887 | 	if cr.currentChunk != nil {
 888 | 		cr.currentChunk.Release()
 889 | 	}
 890 | 
 891 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
 892 | 	cr.currentChunk.Retain()
 893 | 	cr.currentIndex++
 894 | 
 895 | 	return true
 896 | }
 897 | 
 898 | // Retain keeps a reference to the Int8ChunkIterator
 899 | func (cr *Int8ChunkIterator) Retain() {
 900 | 	atomic.AddInt64(&cr.refCount, 1)
 901 | }
 902 | 
 903 | // Release removes a reference to the Int8ChunkIterator
 904 | func (cr *Int8ChunkIterator) Release() {
 905 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
 906 | 	ref := atomic.AddInt64(&cr.refCount, -1)
 907 | 	if ref == 0 {
 908 | 		cr.col.Release()
 909 | 		for i := range cr.chunks {
 910 | 			cr.chunks[i].Release()
 911 | 		}
 912 | 		if cr.currentChunk != nil {
 913 | 			cr.currentChunk.Release()
 914 | 			cr.currentChunk = nil
 915 | 		}
 916 | 		cr.col = nil
 917 | 		cr.chunks = nil
 918 | 		cr.dtype = nil
 919 | 	}
 920 | }
 921 | 
 922 | // Uint8ChunkIterator is an iterator for reading an Arrow Column value by value.
 923 | type Uint8ChunkIterator struct {
 924 | 	refCount int64
 925 | 	col      *array.Column
 926 | 
 927 | 	// Things Chunked maintains. We're going to maintain it ourselves.
 928 | 	chunks []*array.Uint8 // cache the chunks on this iterator
 929 | 	length int64          // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
 930 | 	nulls  int64
 931 | 	dtype  arrow.DataType
 932 | 
 933 | 	// Things we need to maintain for the iterator
 934 | 	currentIndex int          // current chunk
 935 | 	currentChunk *array.Uint8 // current chunk
 936 | }
 937 | 
 938 | // NewUint8ChunkIterator creates a new Uint8ChunkIterator for reading an Arrow Column.
 939 | func NewUint8ChunkIterator(col *array.Column) *Uint8ChunkIterator {
 940 | 	col.Retain()
 941 | 
 942 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
 943 | 	columnChunks := col.Data().Chunks()
 944 | 	chunks := make([]*array.Uint8, len(columnChunks))
 945 | 	var length int64
 946 | 	var nulls int64
 947 | 
 948 | 	for i, chunk := range columnChunks {
 949 | 		// Keep our own refs to chunks
 950 | 		chunks[i] = chunk.(*array.Uint8)
 951 | 		// Retain the chunk
 952 | 		chunks[i].Retain()
 953 | 
 954 | 		// Keep our own counters instead of Chunked's
 955 | 		length += int64(chunk.Len())
 956 | 		nulls += int64(chunk.NullN())
 957 | 	}
 958 | 
 959 | 	return &Uint8ChunkIterator{
 960 | 		refCount: 1,
 961 | 		col:      col,
 962 | 
 963 | 		chunks: chunks,
 964 | 		length: length,
 965 | 		nulls:  nulls,
 966 | 		dtype:  col.DataType(),
 967 | 
 968 | 		currentIndex: 0,
 969 | 		currentChunk: nil,
 970 | 	}
 971 | }
 972 | 
 973 | // Chunk will return the current chunk that the iterator is on.
 974 | func (cr *Uint8ChunkIterator) Chunk() *array.Uint8 { return cr.currentChunk }
 975 | 
 976 | // ChunkValues returns the underlying []uint8 chunk values.
 977 | // Keep in mind the []uint8 type might not be able
 978 | // to account for nil values. You must check for those explicitly via the chunk.
 979 | func (cr *Uint8ChunkIterator) ChunkValues() []uint8 { return cr.Chunk().Uint8Values() }
 980 | 
 981 | // Next moves the iterator to the next chunk. This will return false
 982 | // when there are no more chunks.
 983 | func (cr *Uint8ChunkIterator) Next() bool {
 984 | 	if cr.currentIndex >= len(cr.chunks) {
 985 | 		return false
 986 | 	}
 987 | 
 988 | 	if cr.currentChunk != nil {
 989 | 		cr.currentChunk.Release()
 990 | 	}
 991 | 
 992 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
 993 | 	cr.currentChunk.Retain()
 994 | 	cr.currentIndex++
 995 | 
 996 | 	return true
 997 | }
 998 | 
 999 | // Retain keeps a reference to the Uint8ChunkIterator
1000 | func (cr *Uint8ChunkIterator) Retain() {
1001 | 	atomic.AddInt64(&cr.refCount, 1)
1002 | }
1003 | 
1004 | // Release removes a reference to the Uint8ChunkIterator
1005 | func (cr *Uint8ChunkIterator) Release() {
1006 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
1007 | 	ref := atomic.AddInt64(&cr.refCount, -1)
1008 | 	if ref == 0 {
1009 | 		cr.col.Release()
1010 | 		for i := range cr.chunks {
1011 | 			cr.chunks[i].Release()
1012 | 		}
1013 | 		if cr.currentChunk != nil {
1014 | 			cr.currentChunk.Release()
1015 | 			cr.currentChunk = nil
1016 | 		}
1017 | 		cr.col = nil
1018 | 		cr.chunks = nil
1019 | 		cr.dtype = nil
1020 | 	}
1021 | }
1022 | 
1023 | // TimestampChunkIterator is an iterator for reading an Arrow Column value by value.
1024 | type TimestampChunkIterator struct {
1025 | 	refCount int64
1026 | 	col      *array.Column
1027 | 
1028 | 	// Things Chunked maintains. We're going to maintain it ourselves.
1029 | 	chunks []*array.Timestamp // cache the chunks on this iterator
1030 | 	length int64              // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
1031 | 	nulls  int64
1032 | 	dtype  arrow.DataType
1033 | 
1034 | 	// Things we need to maintain for the iterator
1035 | 	currentIndex int              // current chunk
1036 | 	currentChunk *array.Timestamp // current chunk
1037 | }
1038 | 
1039 | // NewTimestampChunkIterator creates a new TimestampChunkIterator for reading an Arrow Column.
1040 | func NewTimestampChunkIterator(col *array.Column) *TimestampChunkIterator {
1041 | 	col.Retain()
1042 | 
1043 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
1044 | 	columnChunks := col.Data().Chunks()
1045 | 	chunks := make([]*array.Timestamp, len(columnChunks))
1046 | 	var length int64
1047 | 	var nulls int64
1048 | 
1049 | 	for i, chunk := range columnChunks {
1050 | 		// Keep our own refs to chunks
1051 | 		chunks[i] = chunk.(*array.Timestamp)
1052 | 		// Retain the chunk
1053 | 		chunks[i].Retain()
1054 | 
1055 | 		// Keep our own counters instead of Chunked's
1056 | 		length += int64(chunk.Len())
1057 | 		nulls += int64(chunk.NullN())
1058 | 	}
1059 | 
1060 | 	return &TimestampChunkIterator{
1061 | 		refCount: 1,
1062 | 		col:      col,
1063 | 
1064 | 		chunks: chunks,
1065 | 		length: length,
1066 | 		nulls:  nulls,
1067 | 		dtype:  col.DataType(),
1068 | 
1069 | 		currentIndex: 0,
1070 | 		currentChunk: nil,
1071 | 	}
1072 | }
1073 | 
1074 | // Chunk will return the current chunk that the iterator is on.
1075 | func (cr *TimestampChunkIterator) Chunk() *array.Timestamp { return cr.currentChunk }
1076 | 
1077 | // ChunkValues returns the underlying []arrow.Timestamp chunk values.
1078 | // Keep in mind the []arrow.Timestamp type might not be able
1079 | // to account for nil values. You must check for those explicitly via the chunk.
1080 | func (cr *TimestampChunkIterator) ChunkValues() []arrow.Timestamp { return cr.Chunk().TimestampValues() }
1081 | 
1082 | // Next moves the iterator to the next chunk. This will return false
1083 | // when there are no more chunks.
1084 | func (cr *TimestampChunkIterator) Next() bool {
1085 | 	if cr.currentIndex >= len(cr.chunks) {
1086 | 		return false
1087 | 	}
1088 | 
1089 | 	if cr.currentChunk != nil {
1090 | 		cr.currentChunk.Release()
1091 | 	}
1092 | 
1093 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
1094 | 	cr.currentChunk.Retain()
1095 | 	cr.currentIndex++
1096 | 
1097 | 	return true
1098 | }
1099 | 
1100 | // Retain keeps a reference to the TimestampChunkIterator
1101 | func (cr *TimestampChunkIterator) Retain() {
1102 | 	atomic.AddInt64(&cr.refCount, 1)
1103 | }
1104 | 
1105 | // Release removes a reference to the TimestampChunkIterator
1106 | func (cr *TimestampChunkIterator) Release() {
1107 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
1108 | 	ref := atomic.AddInt64(&cr.refCount, -1)
1109 | 	if ref == 0 {
1110 | 		cr.col.Release()
1111 | 		for i := range cr.chunks {
1112 | 			cr.chunks[i].Release()
1113 | 		}
1114 | 		if cr.currentChunk != nil {
1115 | 			cr.currentChunk.Release()
1116 | 			cr.currentChunk = nil
1117 | 		}
1118 | 		cr.col = nil
1119 | 		cr.chunks = nil
1120 | 		cr.dtype = nil
1121 | 	}
1122 | }
1123 | 
1124 | // Time32ChunkIterator is an iterator for reading an Arrow Column value by value.
1125 | type Time32ChunkIterator struct {
1126 | 	refCount int64
1127 | 	col      *array.Column
1128 | 
1129 | 	// Things Chunked maintains. We're going to maintain it ourselves.
1130 | 	chunks []*array.Time32 // cache the chunks on this iterator
1131 | 	length int64           // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
1132 | 	nulls  int64
1133 | 	dtype  arrow.DataType
1134 | 
1135 | 	// Things we need to maintain for the iterator
1136 | 	currentIndex int           // current chunk
1137 | 	currentChunk *array.Time32 // current chunk
1138 | }
1139 | 
1140 | // NewTime32ChunkIterator creates a new Time32ChunkIterator for reading an Arrow Column.
1141 | func NewTime32ChunkIterator(col *array.Column) *Time32ChunkIterator {
1142 | 	col.Retain()
1143 | 
1144 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
1145 | 	columnChunks := col.Data().Chunks()
1146 | 	chunks := make([]*array.Time32, len(columnChunks))
1147 | 	var length int64
1148 | 	var nulls int64
1149 | 
1150 | 	for i, chunk := range columnChunks {
1151 | 		// Keep our own refs to chunks
1152 | 		chunks[i] = chunk.(*array.Time32)
1153 | 		// Retain the chunk
1154 | 		chunks[i].Retain()
1155 | 
1156 | 		// Keep our own counters instead of Chunked's
1157 | 		length += int64(chunk.Len())
1158 | 		nulls += int64(chunk.NullN())
1159 | 	}
1160 | 
1161 | 	return &Time32ChunkIterator{
1162 | 		refCount: 1,
1163 | 		col:      col,
1164 | 
1165 | 		chunks: chunks,
1166 | 		length: length,
1167 | 		nulls:  nulls,
1168 | 		dtype:  col.DataType(),
1169 | 
1170 | 		currentIndex: 0,
1171 | 		currentChunk: nil,
1172 | 	}
1173 | }
1174 | 
1175 | // Chunk will return the current chunk that the iterator is on.
1176 | func (cr *Time32ChunkIterator) Chunk() *array.Time32 { return cr.currentChunk }
1177 | 
1178 | // ChunkValues returns the underlying []arrow.Time32 chunk values.
1179 | // Keep in mind the []arrow.Time32 type might not be able
1180 | // to account for nil values. You must check for those explicitly via the chunk.
1181 | func (cr *Time32ChunkIterator) ChunkValues() []arrow.Time32 { return cr.Chunk().Time32Values() }
1182 | 
1183 | // Next moves the iterator to the next chunk. This will return false
1184 | // when there are no more chunks.
1185 | func (cr *Time32ChunkIterator) Next() bool {
1186 | 	if cr.currentIndex >= len(cr.chunks) {
1187 | 		return false
1188 | 	}
1189 | 
1190 | 	if cr.currentChunk != nil {
1191 | 		cr.currentChunk.Release()
1192 | 	}
1193 | 
1194 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
1195 | 	cr.currentChunk.Retain()
1196 | 	cr.currentIndex++
1197 | 
1198 | 	return true
1199 | }
1200 | 
1201 | // Retain keeps a reference to the Time32ChunkIterator
1202 | func (cr *Time32ChunkIterator) Retain() {
1203 | 	atomic.AddInt64(&cr.refCount, 1)
1204 | }
1205 | 
1206 | // Release removes a reference to the Time32ChunkIterator
1207 | func (cr *Time32ChunkIterator) Release() {
1208 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
1209 | 	ref := atomic.AddInt64(&cr.refCount, -1)
1210 | 	if ref == 0 {
1211 | 		cr.col.Release()
1212 | 		for i := range cr.chunks {
1213 | 			cr.chunks[i].Release()
1214 | 		}
1215 | 		if cr.currentChunk != nil {
1216 | 			cr.currentChunk.Release()
1217 | 			cr.currentChunk = nil
1218 | 		}
1219 | 		cr.col = nil
1220 | 		cr.chunks = nil
1221 | 		cr.dtype = nil
1222 | 	}
1223 | }
1224 | 
1225 | // Time64ChunkIterator is an iterator for reading an Arrow Column value by value.
1226 | type Time64ChunkIterator struct {
1227 | 	refCount int64
1228 | 	col      *array.Column
1229 | 
1230 | 	// Things Chunked maintains. We're going to maintain it ourselves.
1231 | 	chunks []*array.Time64 // cache the chunks on this iterator
1232 | 	length int64           // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
1233 | 	nulls  int64
1234 | 	dtype  arrow.DataType
1235 | 
1236 | 	// Things we need to maintain for the iterator
1237 | 	currentIndex int           // current chunk
1238 | 	currentChunk *array.Time64 // current chunk
1239 | }
1240 | 
1241 | // NewTime64ChunkIterator creates a new Time64ChunkIterator for reading an Arrow Column.
1242 | func NewTime64ChunkIterator(col *array.Column) *Time64ChunkIterator {
1243 | 	col.Retain()
1244 | 
1245 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
1246 | 	columnChunks := col.Data().Chunks()
1247 | 	chunks := make([]*array.Time64, len(columnChunks))
1248 | 	var length int64
1249 | 	var nulls int64
1250 | 
1251 | 	for i, chunk := range columnChunks {
1252 | 		// Keep our own refs to chunks
1253 | 		chunks[i] = chunk.(*array.Time64)
1254 | 		// Retain the chunk
1255 | 		chunks[i].Retain()
1256 | 
1257 | 		// Keep our own counters instead of Chunked's
1258 | 		length += int64(chunk.Len())
1259 | 		nulls += int64(chunk.NullN())
1260 | 	}
1261 | 
1262 | 	return &Time64ChunkIterator{
1263 | 		refCount: 1,
1264 | 		col:      col,
1265 | 
1266 | 		chunks: chunks,
1267 | 		length: length,
1268 | 		nulls:  nulls,
1269 | 		dtype:  col.DataType(),
1270 | 
1271 | 		currentIndex: 0,
1272 | 		currentChunk: nil,
1273 | 	}
1274 | }
1275 | 
1276 | // Chunk will return the current chunk that the iterator is on.
1277 | func (cr *Time64ChunkIterator) Chunk() *array.Time64 { return cr.currentChunk }
1278 | 
1279 | // ChunkValues returns the underlying []arrow.Time64 chunk values.
1280 | // Keep in mind the []arrow.Time64 type might not be able
1281 | // to account for nil values. You must check for those explicitly via the chunk.
1282 | func (cr *Time64ChunkIterator) ChunkValues() []arrow.Time64 { return cr.Chunk().Time64Values() }
1283 | 
1284 | // Next moves the iterator to the next chunk. This will return false
1285 | // when there are no more chunks.
1286 | func (cr *Time64ChunkIterator) Next() bool {
1287 | 	if cr.currentIndex >= len(cr.chunks) {
1288 | 		return false
1289 | 	}
1290 | 
1291 | 	if cr.currentChunk != nil {
1292 | 		cr.currentChunk.Release()
1293 | 	}
1294 | 
1295 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
1296 | 	cr.currentChunk.Retain()
1297 | 	cr.currentIndex++
1298 | 
1299 | 	return true
1300 | }
1301 | 
1302 | // Retain keeps a reference to the Time64ChunkIterator
1303 | func (cr *Time64ChunkIterator) Retain() {
1304 | 	atomic.AddInt64(&cr.refCount, 1)
1305 | }
1306 | 
1307 | // Release removes a reference to the Time64ChunkIterator
1308 | func (cr *Time64ChunkIterator) Release() {
1309 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
1310 | 	ref := atomic.AddInt64(&cr.refCount, -1)
1311 | 	if ref == 0 {
1312 | 		cr.col.Release()
1313 | 		for i := range cr.chunks {
1314 | 			cr.chunks[i].Release()
1315 | 		}
1316 | 		if cr.currentChunk != nil {
1317 | 			cr.currentChunk.Release()
1318 | 			cr.currentChunk = nil
1319 | 		}
1320 | 		cr.col = nil
1321 | 		cr.chunks = nil
1322 | 		cr.dtype = nil
1323 | 	}
1324 | }
1325 | 
1326 | // Date32ChunkIterator is an iterator for reading an Arrow Column value by value.
1327 | type Date32ChunkIterator struct {
1328 | 	refCount int64
1329 | 	col      *array.Column
1330 | 
1331 | 	// Things Chunked maintains. We're going to maintain it ourselves.
1332 | 	chunks []*array.Date32 // cache the chunks on this iterator
1333 | 	length int64           // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
1334 | 	nulls  int64
1335 | 	dtype  arrow.DataType
1336 | 
1337 | 	// Things we need to maintain for the iterator
1338 | 	currentIndex int           // current chunk
1339 | 	currentChunk *array.Date32 // current chunk
1340 | }
1341 | 
1342 | // NewDate32ChunkIterator creates a new Date32ChunkIterator for reading an Arrow Column.
1343 | func NewDate32ChunkIterator(col *array.Column) *Date32ChunkIterator {
1344 | 	col.Retain()
1345 | 
1346 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
1347 | 	columnChunks := col.Data().Chunks()
1348 | 	chunks := make([]*array.Date32, len(columnChunks))
1349 | 	var length int64
1350 | 	var nulls int64
1351 | 
1352 | 	for i, chunk := range columnChunks {
1353 | 		// Keep our own refs to chunks
1354 | 		chunks[i] = chunk.(*array.Date32)
1355 | 		// Retain the chunk
1356 | 		chunks[i].Retain()
1357 | 
1358 | 		// Keep our own counters instead of Chunked's
1359 | 		length += int64(chunk.Len())
1360 | 		nulls += int64(chunk.NullN())
1361 | 	}
1362 | 
1363 | 	return &Date32ChunkIterator{
1364 | 		refCount: 1,
1365 | 		col:      col,
1366 | 
1367 | 		chunks: chunks,
1368 | 		length: length,
1369 | 		nulls:  nulls,
1370 | 		dtype:  col.DataType(),
1371 | 
1372 | 		currentIndex: 0,
1373 | 		currentChunk: nil,
1374 | 	}
1375 | }
1376 | 
1377 | // Chunk will return the current chunk that the iterator is on.
1378 | func (cr *Date32ChunkIterator) Chunk() *array.Date32 { return cr.currentChunk }
1379 | 
1380 | // ChunkValues returns the underlying []arrow.Date32 chunk values.
1381 | // Keep in mind the []arrow.Date32 type might not be able
1382 | // to account for nil values. You must check for those explicitly via the chunk.
1383 | func (cr *Date32ChunkIterator) ChunkValues() []arrow.Date32 { return cr.Chunk().Date32Values() }
1384 | 
1385 | // Next moves the iterator to the next chunk. This will return false
1386 | // when there are no more chunks.
1387 | func (cr *Date32ChunkIterator) Next() bool {
1388 | 	if cr.currentIndex >= len(cr.chunks) {
1389 | 		return false
1390 | 	}
1391 | 
1392 | 	if cr.currentChunk != nil {
1393 | 		cr.currentChunk.Release()
1394 | 	}
1395 | 
1396 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
1397 | 	cr.currentChunk.Retain()
1398 | 	cr.currentIndex++
1399 | 
1400 | 	return true
1401 | }
1402 | 
1403 | // Retain keeps a reference to the Date32ChunkIterator
1404 | func (cr *Date32ChunkIterator) Retain() {
1405 | 	atomic.AddInt64(&cr.refCount, 1)
1406 | }
1407 | 
1408 | // Release removes a reference to the Date32ChunkIterator
1409 | func (cr *Date32ChunkIterator) Release() {
1410 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
1411 | 	ref := atomic.AddInt64(&cr.refCount, -1)
1412 | 	if ref == 0 {
1413 | 		cr.col.Release()
1414 | 		for i := range cr.chunks {
1415 | 			cr.chunks[i].Release()
1416 | 		}
1417 | 		if cr.currentChunk != nil {
1418 | 			cr.currentChunk.Release()
1419 | 			cr.currentChunk = nil
1420 | 		}
1421 | 		cr.col = nil
1422 | 		cr.chunks = nil
1423 | 		cr.dtype = nil
1424 | 	}
1425 | }
1426 | 
1427 | // Date64ChunkIterator is an iterator for reading an Arrow Column value by value.
1428 | type Date64ChunkIterator struct {
1429 | 	refCount int64
1430 | 	col      *array.Column
1431 | 
1432 | 	// Things Chunked maintains. We're going to maintain it ourselves.
1433 | 	chunks []*array.Date64 // cache the chunks on this iterator
1434 | 	length int64           // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
1435 | 	nulls  int64
1436 | 	dtype  arrow.DataType
1437 | 
1438 | 	// Things we need to maintain for the iterator
1439 | 	currentIndex int           // current chunk
1440 | 	currentChunk *array.Date64 // current chunk
1441 | }
1442 | 
1443 | // NewDate64ChunkIterator creates a new Date64ChunkIterator for reading an Arrow Column.
1444 | func NewDate64ChunkIterator(col *array.Column) *Date64ChunkIterator {
1445 | 	col.Retain()
1446 | 
1447 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
1448 | 	columnChunks := col.Data().Chunks()
1449 | 	chunks := make([]*array.Date64, len(columnChunks))
1450 | 	var length int64
1451 | 	var nulls int64
1452 | 
1453 | 	for i, chunk := range columnChunks {
1454 | 		// Keep our own refs to chunks
1455 | 		chunks[i] = chunk.(*array.Date64)
1456 | 		// Retain the chunk
1457 | 		chunks[i].Retain()
1458 | 
1459 | 		// Keep our own counters instead of Chunked's
1460 | 		length += int64(chunk.Len())
1461 | 		nulls += int64(chunk.NullN())
1462 | 	}
1463 | 
1464 | 	return &Date64ChunkIterator{
1465 | 		refCount: 1,
1466 | 		col:      col,
1467 | 
1468 | 		chunks: chunks,
1469 | 		length: length,
1470 | 		nulls:  nulls,
1471 | 		dtype:  col.DataType(),
1472 | 
1473 | 		currentIndex: 0,
1474 | 		currentChunk: nil,
1475 | 	}
1476 | }
1477 | 
1478 | // Chunk will return the current chunk that the iterator is on.
1479 | func (cr *Date64ChunkIterator) Chunk() *array.Date64 { return cr.currentChunk }
1480 | 
1481 | // ChunkValues returns the underlying []arrow.Date64 chunk values.
1482 | // Keep in mind the []arrow.Date64 type might not be able
1483 | // to account for nil values. You must check for those explicitly via the chunk.
1484 | func (cr *Date64ChunkIterator) ChunkValues() []arrow.Date64 { return cr.Chunk().Date64Values() }
1485 | 
1486 | // Next moves the iterator to the next chunk. This will return false
1487 | // when there are no more chunks.
1488 | func (cr *Date64ChunkIterator) Next() bool {
1489 | 	if cr.currentIndex >= len(cr.chunks) {
1490 | 		return false
1491 | 	}
1492 | 
1493 | 	if cr.currentChunk != nil {
1494 | 		cr.currentChunk.Release()
1495 | 	}
1496 | 
1497 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
1498 | 	cr.currentChunk.Retain()
1499 | 	cr.currentIndex++
1500 | 
1501 | 	return true
1502 | }
1503 | 
1504 | // Retain keeps a reference to the Date64ChunkIterator
1505 | func (cr *Date64ChunkIterator) Retain() {
1506 | 	atomic.AddInt64(&cr.refCount, 1)
1507 | }
1508 | 
1509 | // Release removes a reference to the Date64ChunkIterator
1510 | func (cr *Date64ChunkIterator) Release() {
1511 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
1512 | 	ref := atomic.AddInt64(&cr.refCount, -1)
1513 | 	if ref == 0 {
1514 | 		cr.col.Release()
1515 | 		for i := range cr.chunks {
1516 | 			cr.chunks[i].Release()
1517 | 		}
1518 | 		if cr.currentChunk != nil {
1519 | 			cr.currentChunk.Release()
1520 | 			cr.currentChunk = nil
1521 | 		}
1522 | 		cr.col = nil
1523 | 		cr.chunks = nil
1524 | 		cr.dtype = nil
1525 | 	}
1526 | }
1527 | 


--------------------------------------------------------------------------------
/iterator/chunkiterator.gen.go.tmpl:
--------------------------------------------------------------------------------
  1 | package iterator
  2 | 
  3 | import (
  4 | 	"sync/atomic"
  5 | 
  6 | 	"github.com/go-bullseye/bullseye/internal/debug"
  7 | 	"github.com/apache/arrow/go/arrow"
  8 | 	"github.com/apache/arrow/go/arrow/array"
  9 | )
 10 | 
 11 | {{range .In}}
 12 | // {{.Name}}ChunkIterator is an iterator for reading an Arrow Column value by value.
 13 | type {{.Name}}ChunkIterator struct {
 14 | 	refCount int64
 15 | 	col      *array.Column
 16 | 
 17 | 	// Things Chunked maintains. We're going to maintain it ourselves.
 18 | 	chunks []*array.{{.Name}} // cache the chunks on this iterator
 19 | 	length int64              // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
 20 | 	nulls  int64
 21 | 	dtype  arrow.DataType
 22 | 
 23 | 	// Things we need to maintain for the iterator
 24 | 	currentIndex int              // current chunk
 25 | 	currentChunk *array.{{.Name}} // current chunk
 26 | }
 27 | 
 28 | // New{{.Name}}ChunkIterator creates a new {{.Name}}ChunkIterator for reading an Arrow Column.
 29 | func New{{.Name}}ChunkIterator(col *array.Column) *{{.Name}}ChunkIterator {
 30 | 	col.Retain()
 31 | 
 32 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
 33 | 	columnChunks := col.Data().Chunks()
 34 | 	chunks := make([]*array.{{.Name}}, len(columnChunks))
 35 | 	var length int64
 36 | 	var nulls int64
 37 | 
 38 | 	for i, chunk := range columnChunks {
 39 | 		// Keep our own refs to chunks
 40 | 		chunks[i] = chunk.(*array.{{.Name}})
 41 | 		// Retain the chunk
 42 | 		chunks[i].Retain()
 43 | 
 44 | 		// Keep our own counters instead of Chunked's
 45 | 		length += int64(chunk.Len())
 46 | 		nulls += int64(chunk.NullN())
 47 | 	}
 48 | 
 49 | 	return &{{.Name}}ChunkIterator{
 50 | 		refCount: 1,
 51 | 		col:      col,
 52 | 
 53 | 		chunks: chunks,
 54 | 		length: length,
 55 | 		nulls:  nulls,
 56 | 		dtype:  col.DataType(),
 57 | 
 58 | 		currentIndex: 0,
 59 | 		currentChunk: nil,
 60 | 	}
 61 | }
 62 | 
 63 | // Chunk will return the current chunk that the iterator is on.
 64 | func (cr *{{.Name}}ChunkIterator) Chunk() *array.{{.Name}} { return cr.currentChunk }
 65 | 
 66 | // ChunkValues returns the underlying []{{or .QualifiedType .Type}} chunk values.
 67 | // Keep in mind the []{{or .QualifiedType .Type}} type might not be able
 68 | // to account for nil values. You must check for those explicitly via the chunk.
 69 | func (cr *{{.Name}}ChunkIterator) ChunkValues() []{{or .QualifiedType .Type}} { return cr.Chunk().{{.Name}}Values() }
 70 | 
 71 | // Next moves the iterator to the next chunk. This will return false
 72 | // when there are no more chunks.
 73 | func (cr *{{.Name}}ChunkIterator) Next() bool {
 74 | 	if cr.currentIndex >= len(cr.chunks) {
 75 | 		return false
 76 | 	}
 77 | 
 78 | 	if cr.currentChunk != nil {
 79 | 		cr.currentChunk.Release()
 80 | 	}
 81 | 
 82 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
 83 | 	cr.currentChunk.Retain()
 84 | 	cr.currentIndex++
 85 | 
 86 | 	return true
 87 | }
 88 | 
 89 | // Retain keeps a reference to the {{.Name}}ChunkIterator
 90 | func (cr *{{.Name}}ChunkIterator) Retain() {
 91 | 	atomic.AddInt64(&cr.refCount, 1)
 92 | }
 93 | 
 94 | // Release removes a reference to the {{.Name}}ChunkIterator
 95 | func (cr *{{.Name}}ChunkIterator) Release() {
 96 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
 97 | 	ref := atomic.AddInt64(&cr.refCount, -1)
 98 | 	if ref == 0 {
 99 | 		cr.col.Release()
100 | 		for i := range cr.chunks {
101 | 			cr.chunks[i].Release()
102 | 		}
103 | 		if cr.currentChunk != nil {
104 | 			cr.currentChunk.Release()
105 | 			cr.currentChunk = nil
106 | 		}
107 | 		cr.col = nil
108 | 		cr.chunks = nil
109 | 		cr.dtype = nil
110 | 	}
111 | }
112 | 
113 | 
114 | {{end}}
115 | 


--------------------------------------------------------------------------------
/iterator/chunkiterator.go:
--------------------------------------------------------------------------------
  1 | package iterator
  2 | 
  3 | import (
  4 | 	"sync/atomic"
  5 | 
  6 | 	"github.com/apache/arrow/go/arrow"
  7 | 	"github.com/apache/arrow/go/arrow/array"
  8 | 	"github.com/go-bullseye/bullseye/internal/debug"
  9 | )
 10 | 
 11 | // ChunkIterator is a generic iterator for reading an Arrow Column chunk by chunk.
 12 | type ChunkIterator struct {
 13 | 	refCount int64
 14 | 	col      *array.Column
 15 | 
 16 | 	// Things Chunked maintains. We're going to maintain it ourselves.
 17 | 	chunks []array.Interface // cache the chunks on this iterator
 18 | 	length int64             // this isn't set right on Chunked so we won't rely on it there. Instead we keep the correct value here.
 19 | 	nulls  int64
 20 | 	dtype  arrow.DataType
 21 | 
 22 | 	// Things we need to maintain for the iterator
 23 | 	currentIndex int             // current chunk
 24 | 	currentChunk array.Interface // current chunk
 25 | }
 26 | 
 27 | // NewChunkIterator creates a new ChunkIterator for reading an Arrow Column.
 28 | func NewChunkIterator(col *array.Column) *ChunkIterator {
 29 | 	col.Retain()
 30 | 
 31 | 	// Chunked is not using the correct type to keep track of length so we have to recalculate it.
 32 | 	columnChunks := col.Data().Chunks()
 33 | 	chunks := make([]array.Interface, len(columnChunks))
 34 | 	var length int64
 35 | 	var nulls int64
 36 | 
 37 | 	for i, chunk := range columnChunks {
 38 | 		// Retain the chunk
 39 | 		chunk.Retain()
 40 | 
 41 | 		// Keep our own refs to chunks
 42 | 		chunks[i] = chunk
 43 | 
 44 | 		// Keep our own counters instead of Chunked's
 45 | 		length += int64(chunk.Len())
 46 | 		nulls += int64(chunk.NullN())
 47 | 	}
 48 | 
 49 | 	return &ChunkIterator{
 50 | 		refCount: 1,
 51 | 		col:      col,
 52 | 
 53 | 		chunks: chunks,
 54 | 		length: length,
 55 | 		nulls:  nulls,
 56 | 		dtype:  col.DataType(),
 57 | 
 58 | 		currentIndex: 0,
 59 | 		currentChunk: nil,
 60 | 	}
 61 | }
 62 | 
 63 | // Chunk will return the current chunk that the iterator is on.
 64 | func (cr *ChunkIterator) Chunk() array.Interface { return cr.currentChunk }
 65 | 
 66 | // Next moves the iterator to the next chunk. This will return false
 67 | // when there are no more chunks.
 68 | func (cr *ChunkIterator) Next() bool {
 69 | 	if cr.currentIndex >= len(cr.chunks) {
 70 | 		return false
 71 | 	}
 72 | 
 73 | 	if cr.currentChunk != nil {
 74 | 		cr.currentChunk.Release()
 75 | 	}
 76 | 
 77 | 	cr.currentChunk = cr.chunks[cr.currentIndex]
 78 | 	cr.currentChunk.Retain()
 79 | 	cr.currentIndex++
 80 | 
 81 | 	return true
 82 | }
 83 | 
 84 | // Retain keeps a reference to the ChunkIterator
 85 | func (cr *ChunkIterator) Retain() {
 86 | 	atomic.AddInt64(&cr.refCount, 1)
 87 | }
 88 | 
 89 | // Release removes a reference to the ChunkIterator
 90 | func (cr *ChunkIterator) Release() {
 91 | 	debug.Assert(atomic.LoadInt64(&cr.refCount) > 0, "too many releases")
 92 | 	ref := atomic.AddInt64(&cr.refCount, -1)
 93 | 	if ref == 0 {
 94 | 		cr.col.Release()
 95 | 		for i := range cr.chunks {
 96 | 			cr.chunks[i].Release()
 97 | 		}
 98 | 		if cr.currentChunk != nil {
 99 | 			cr.currentChunk.Release()
100 | 			cr.currentChunk = nil
101 | 		}
102 | 		cr.col = nil
103 | 		cr.chunks = nil
104 | 		cr.dtype = nil
105 | 	}
106 | }
107 | 


--------------------------------------------------------------------------------
/iterator/chunkiterator_test.go:
--------------------------------------------------------------------------------
  1 | package iterator_test
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/apache/arrow/go/arrow"
  7 | 	"github.com/apache/arrow/go/arrow/array"
  8 | 	"github.com/apache/arrow/go/arrow/memory"
  9 | 	"github.com/go-bullseye/bullseye/iterator"
 10 | )
 11 | 
 12 | func buildRecords(pool *memory.CheckedAllocator, t *testing.T) ([]array.Record, *arrow.Schema) {
 13 | 	schema := arrow.NewSchema(
 14 | 		[]arrow.Field{
 15 | 			{Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32},
 16 | 			{Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64},
 17 | 		},
 18 | 		nil,
 19 | 	)
 20 | 
 21 | 	b := array.NewRecordBuilder(pool, schema)
 22 | 	defer b.Release()
 23 | 
 24 | 	b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil)
 25 | 	b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true})
 26 | 	b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil)
 27 | 
 28 | 	rec1 := b.NewRecord()
 29 | 
 30 | 	b.Field(0).(*array.Int32Builder).AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil)
 31 | 	b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil)
 32 | 
 33 | 	rec2 := b.NewRecord()
 34 | 
 35 | 	b.Field(0).(*array.Int32Builder).AppendValues([]int32{31, 32, 33, 34, 35, 36, 37, 38, 39, 40}, nil)
 36 | 	b.Field(1).(*array.Float64Builder).AppendValues([]float64{31, 32, 33, 34, 35, 36, 37, 38, 39, 40}, nil)
 37 | 
 38 | 	rec3 := b.NewRecord()
 39 | 
 40 | 	return []array.Record{rec1, rec2, rec3}, schema
 41 | }
 42 | 
 43 | func TestChunkIterator(t *testing.T) {
 44 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 45 | 	defer pool.AssertSize(t, 0)
 46 | 
 47 | 	records, schema := buildRecords(pool, t)
 48 | 	defer func() {
 49 | 		for i := range records {
 50 | 			records[i].Release()
 51 | 		}
 52 | 	}()
 53 | 
 54 | 	expectedPtrs := make([]*int32, len(records))
 55 | 	for i := range expectedPtrs {
 56 | 		expectedPtrs[i] = &records[i].Column(0).(*array.Int32).Int32Values()[0]
 57 | 	}
 58 | 
 59 | 	tbl := array.NewTableFromRecords(schema, records)
 60 | 	defer tbl.Release()
 61 | 
 62 | 	column := tbl.Column(0)
 63 | 	cr := iterator.NewChunkIterator(column)
 64 | 	defer cr.Release()
 65 | 
 66 | 	n := 0
 67 | 	for cr.Next() {
 68 | 		values := cr.Chunk().(*array.Int32).Int32Values()
 69 | 		if got, want := &values[0], expectedPtrs[n]; got != want {
 70 | 			t.Fatalf("got=%d, want=%d", got, want)
 71 | 		}
 72 | 		n++
 73 | 	}
 74 | }
 75 | 
 76 | func TestInt32ChunkIterator(t *testing.T) {
 77 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 78 | 	defer pool.AssertSize(t, 0)
 79 | 
 80 | 	records, schema := buildRecords(pool, t)
 81 | 	defer func() {
 82 | 		for i := range records {
 83 | 			records[i].Release()
 84 | 		}
 85 | 	}()
 86 | 
 87 | 	expectedPtrs := make([]*int32, len(records))
 88 | 	for i := range expectedPtrs {
 89 | 		expectedPtrs[i] = &records[i].Column(0).(*array.Int32).Int32Values()[0]
 90 | 	}
 91 | 
 92 | 	tbl := array.NewTableFromRecords(schema, records)
 93 | 	defer tbl.Release()
 94 | 
 95 | 	column := tbl.Column(0)
 96 | 	cr := iterator.NewInt32ChunkIterator(column)
 97 | 	defer cr.Release()
 98 | 
 99 | 	n := 0
100 | 	for cr.Next() {
101 | 		values := cr.ChunkValues()
102 | 		if got, want := &values[0], expectedPtrs[n]; got != want {
103 | 			t.Fatalf("got=%d, want=%d", got, want)
104 | 		}
105 | 		n++
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/iterator/doc.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Package iterator provides iterators for chunks and values.
 3 | 
 4 | Since Arrow can store chunks larger than the max int64 (9223372036854775807) due to how it
 5 | store chunks, it's best to use iterators to iterate over chunks and their values.
 6 | 
 7 | There are generic ChunkIterator and ValueIterator implementations as well as specific
 8 | generated Arrow types for each of them, i.e. Float64ChunkIterator and Float64ValueIterator.
 9 | 
10 | */
11 | package iterator
12 | 


--------------------------------------------------------------------------------
/iterator/stepiterator.go:
--------------------------------------------------------------------------------
  1 | package iterator
  2 | 
  3 | import (
  4 | 	"sync/atomic"
  5 | 
  6 | 	"github.com/apache/arrow/go/arrow"
  7 | 	"github.com/apache/arrow/go/arrow/array"
  8 | 	"github.com/go-bullseye/bullseye/internal/debug"
  9 | )
 10 | 
 11 | // StepValue holds the value for a given step.
 12 | type StepValue struct {
 13 | 	Values []interface{}
 14 | 	Exists []bool
 15 | 	Dtypes []arrow.DataType
 16 | }
 17 | 
 18 | // Value returns the value at index i and the data type for that value.
 19 | func (sv StepValue) Value(i int) (interface{}, arrow.DataType) {
 20 | 	return sv.Values[i], sv.Dtypes[i]
 21 | }
 22 | 
 23 | // StepIterator iterates over multiple iterators in step.
 24 | type StepIterator interface {
 25 | 	Values() *StepValue
 26 | 	Next() bool
 27 | 	Retain()
 28 | 	Release()
 29 | }
 30 | 
 31 | // stepIterator has a max number of elements it
 32 | // can iterator over that must fit into uint64
 33 | // which I doubt anyone is going to go over.
 34 | type stepIterator struct {
 35 | 	refCount  int64
 36 | 	iterators []ValueIterator
 37 | 	index     uint64
 38 | 	stepValue *StepValue
 39 | 	dtypes    []arrow.DataType
 40 | }
 41 | 
 42 | // NewStepIteratorForColumns creates a new StepIterator given a slice of columns.
 43 | func NewStepIteratorForColumns(cols []array.Column) StepIterator {
 44 | 	itrs := make([]ValueIterator, 0, len(cols))
 45 | 	dtypes := make([]arrow.DataType, 0, len(cols))
 46 | 	for i := range cols {
 47 | 		itrs = append(itrs, NewValueIterator(&cols[i]))
 48 | 		dtypes = append(dtypes, cols[i].DataType())
 49 | 	}
 50 | 	// NewStepIterator will retain the value iterators refs
 51 | 	// so we need to remove our ref to them.
 52 | 	for i := range itrs {
 53 | 		defer itrs[i].Release()
 54 | 	}
 55 | 	return NewStepIterator(dtypes, itrs...)
 56 | }
 57 | 
 58 | // NewStepIterator creates a new StepIterator given a bunch of ValueIterators.
 59 | func NewStepIterator(dtypes []arrow.DataType, iterators ...ValueIterator) StepIterator {
 60 | 	for i := range iterators {
 61 | 		iterators[i].Retain()
 62 | 	}
 63 | 	return &stepIterator{
 64 | 		refCount:  1,
 65 | 		iterators: iterators,
 66 | 		index:     0,
 67 | 		dtypes:    dtypes,
 68 | 	}
 69 | }
 70 | 
 71 | // Values returns the values in the current step as a StepValue.
 72 | func (s *stepIterator) Values() *StepValue {
 73 | 	return s.stepValue
 74 | }
 75 | 
 76 | // Next returns false when there are no more rows in any iterator.
 77 | func (s *stepIterator) Next() bool {
 78 | 	// build the step values
 79 | 	step := &StepValue{
 80 | 		Values: make([]interface{}, len(s.iterators)),
 81 | 		Exists: make([]bool, len(s.iterators)),
 82 | 		Dtypes: s.dtypes,
 83 | 	}
 84 | 
 85 | 	next := false
 86 | 	for i, iterator := range s.iterators {
 87 | 		exists := iterator.Next()
 88 | 		next = exists || next
 89 | 		step.Exists[i] = exists
 90 | 
 91 | 		if exists {
 92 | 			step.Values[i] = iterator.ValueInterface()
 93 | 		} else {
 94 | 			step.Values[i] = nil
 95 | 		}
 96 | 	}
 97 | 
 98 | 	s.stepValue = step
 99 | 	return next
100 | }
101 | 
102 | func (s *stepIterator) Retain() {
103 | 	atomic.AddInt64(&s.refCount, 1)
104 | }
105 | 
106 | func (s *stepIterator) Release() {
107 | 	refs := atomic.AddInt64(&s.refCount, -1)
108 | 	debug.Assert(refs >= 0, "too many releases")
109 | 	if refs == 0 {
110 | 		for i := range s.iterators {
111 | 			s.iterators[i].Release()
112 | 		}
113 | 		s.iterators = nil
114 | 	}
115 | }
116 | 


--------------------------------------------------------------------------------
/iterator/stepiterator_test.go:
--------------------------------------------------------------------------------
 1 | package iterator_test
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/apache/arrow/go/arrow/array"
 7 | 	"github.com/apache/arrow/go/arrow/memory"
 8 | 	"github.com/go-bullseye/bullseye/iterator"
 9 | )
10 | 
11 | func TestNewStepIteratorForColumns(t *testing.T) {
12 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
13 | 	defer pool.AssertSize(t, 0)
14 | 
15 | 	records, schema := buildRecords(pool, t)
16 | 	for i := range records {
17 | 		defer records[i].Release()
18 | 	}
19 | 
20 | 	tbl := array.NewTableFromRecords(schema, records)
21 | 	defer tbl.Release()
22 | 
23 | 	cols := make([]array.Column, 0, tbl.NumCols())
24 | 	for i := 0; i < int(tbl.NumCols()); i++ {
25 | 		cols = append(cols, *tbl.Column(i))
26 | 	}
27 | 
28 | 	it := iterator.NewStepIteratorForColumns(cols)
29 | 	defer it.Release()
30 | }
31 | 


--------------------------------------------------------------------------------
/iterator/stringiterator.go:
--------------------------------------------------------------------------------
  1 | package iterator
  2 | 
  3 | import (
  4 | 	"sync/atomic"
  5 | 
  6 | 	"github.com/apache/arrow/go/arrow/array"
  7 | 	"github.com/go-bullseye/bullseye/internal/debug"
  8 | )
  9 | 
 10 | // StringValueIterator is an iterator for reading an Arrow Column
 11 | // value by value for variable-length UTF-8 strings.
 12 | type StringValueIterator struct {
 13 | 	refCount      int64
 14 | 	chunkIterator *ChunkIterator
 15 | 
 16 | 	// Things we need to maintain for the iterator
 17 | 	index int           // current value index
 18 | 	ref   *array.String // the chunk reference
 19 | 	done  bool          // there are no more elements for this iterator
 20 | }
 21 | 
 22 | // NewStringValueIterator creates a new StringValueIterator for reading an Arrow Column.
 23 | func NewStringValueIterator(col *array.Column) *StringValueIterator {
 24 | 	// We need a ChunkIterator to read the chunks
 25 | 	chunkIterator := NewChunkIterator(col)
 26 | 
 27 | 	return &StringValueIterator{
 28 | 		refCount:      1,
 29 | 		chunkIterator: chunkIterator,
 30 | 
 31 | 		index: 0,
 32 | 		ref:   nil,
 33 | 	}
 34 | }
 35 | 
 36 | // Value will return the current value that the iterator is on and boolean value indicating if the value is actually null.
 37 | func (vr *StringValueIterator) Value() (string, bool) {
 38 | 	return vr.ref.Value(vr.index), vr.ref.IsNull(vr.index)
 39 | }
 40 | 
 41 | // ValuePointer will return a pointer to the current value that the iterator is on. It will return nil if the value is actually null.
 42 | func (vr *StringValueIterator) ValuePointer() *string {
 43 | 	if vr.ref.IsNull(vr.index) {
 44 | 		return nil
 45 | 	}
 46 | 	value := vr.ref.Value(vr.index)
 47 | 	return &value
 48 | }
 49 | 
 50 | // ValueInterface returns the value as an interface{}.
 51 | func (vr *StringValueIterator) ValueInterface() interface{} {
 52 | 	if vr.ref.IsNull(vr.index) {
 53 | 		return nil
 54 | 	}
 55 | 	return vr.ref.Value(vr.index)
 56 | }
 57 | 
 58 | // Next moves the iterator to the next value. This will return false
 59 | // when there are no more values.
 60 | func (vr *StringValueIterator) Next() bool {
 61 | 	if vr.done {
 62 | 		return false
 63 | 	}
 64 | 
 65 | 	// Move the index up
 66 | 	vr.index++
 67 | 
 68 | 	// Keep moving the chunk up until we get one with data
 69 | 	for vr.ref == nil || vr.index >= vr.ref.Len() {
 70 | 		if !vr.nextChunk() {
 71 | 			// There were no more chunks with data in them
 72 | 			vr.done = true
 73 | 			return false
 74 | 		}
 75 | 	}
 76 | 
 77 | 	return true
 78 | }
 79 | 
 80 | func (vr *StringValueIterator) nextChunk() bool {
 81 | 	// Advance the chunk until we get one with data in it or we are done
 82 | 	if !vr.chunkIterator.Next() {
 83 | 		// No more chunks
 84 | 		return false
 85 | 	}
 86 | 
 87 | 	// There was another chunk.
 88 | 	// We maintain the ref and the values because the ref is going to allow us to retain the memory.
 89 | 	ref := vr.chunkIterator.Chunk()
 90 | 	ref.Retain()
 91 | 
 92 | 	if vr.ref != nil {
 93 | 		vr.ref.Release()
 94 | 	}
 95 | 
 96 | 	vr.ref = ref.(*array.String)
 97 | 	vr.index = 0
 98 | 	return true
 99 | }
100 | 
101 | // Retain keeps a reference to the StringValueIterator
102 | func (vr *StringValueIterator) Retain() {
103 | 	atomic.AddInt64(&vr.refCount, 1)
104 | }
105 | 
106 | // Release removes a reference to the StringValueIterator
107 | func (vr *StringValueIterator) Release() {
108 | 	debug.Assert(atomic.LoadInt64(&vr.refCount) > 0, "too many releases")
109 | 
110 | 	if atomic.AddInt64(&vr.refCount, -1) == 0 {
111 | 		if vr.chunkIterator != nil {
112 | 			vr.chunkIterator.Release()
113 | 			vr.chunkIterator = nil
114 | 		}
115 | 
116 | 		if vr.ref != nil {
117 | 			vr.ref.Release()
118 | 			vr.ref = nil
119 | 		}
120 | 	}
121 | }
122 | 


--------------------------------------------------------------------------------
/iterator/valueiterator.gen.go.tmpl:
--------------------------------------------------------------------------------
  1 | package iterator
  2 | 
  3 | import (
  4 | 	"sync/atomic"
  5 | 
  6 | 	"github.com/go-bullseye/bullseye/internal/debug"
  7 | 	"github.com/apache/arrow/go/arrow/array"
  8 | )
  9 | 
 10 | {{range .In}}
 11 | // {{.Name}}ValueIterator is an iterator for reading an Arrow Column value by value.
 12 | type {{.Name}}ValueIterator struct {
 13 | 	refCount    int64
 14 | 	chunkIterator *{{.Name}}ChunkIterator
 15 | 
 16 | 	// Things we need to maintain for the iterator
 17 | 	index  int          // current value index
 18 | 	values []{{or .QualifiedType .Type}}      // current chunk values
 19 | 	ref    *array.{{.Name}} // the chunk reference
 20 | 	done bool // there are no more elements for this iterator
 21 | }
 22 | 
 23 | // New{{.Name}}ValueIterator creates a new {{.Name}}ValueIterator for reading an Arrow Column.
 24 | func New{{.Name}}ValueIterator(col *array.Column) *{{.Name}}ValueIterator {
 25 | 	// We need a ChunkIterator to read the chunks
 26 | 	chunkIterator := New{{.Name}}ChunkIterator(col)
 27 | 
 28 | 	return &{{.Name}}ValueIterator{
 29 | 		refCount:    1,
 30 | 		chunkIterator: chunkIterator,
 31 | 
 32 | 		index:  0,
 33 | 		values: nil,
 34 | 	}
 35 | }
 36 | 
 37 | // Value will return the current value that the iterator is on and boolean value indicating if the value is actually null.
 38 | func (vr *{{.Name}}ValueIterator) Value() ({{or .QualifiedType .Type}}, bool) {
 39 | 	return vr.values[vr.index], vr.ref.IsNull(vr.index)
 40 | }
 41 | 
 42 | // ValuePointer will return a pointer to the current value that the iterator is on. It will return nil if the value is actually null.
 43 | func (vr *{{.Name}}ValueIterator) ValuePointer() *{{or .QualifiedType .Type}} {
 44 | 	if vr.ref.IsNull(vr.index) {
 45 | 		return nil
 46 | 	}
 47 | 	return &vr.values[vr.index]
 48 | }
 49 | 
 50 | // ValueInterface returns the current value as an interface{}.
 51 | func (vr *{{.Name}}ValueIterator) ValueInterface() interface{} {
 52 | 	if vr.ref.IsNull(vr.index) {
 53 | 		return nil
 54 | 	}
 55 | 	return vr.values[vr.index]
 56 | }
 57 | 
 58 | // Next moves the iterator to the next value. This will return false
 59 | // when there are no more values.
 60 | func (vr *{{.Name}}ValueIterator) Next() bool {
 61 | 	if vr.done {
 62 | 		return false
 63 | 	}
 64 | 
 65 | 	// Move the index up
 66 | 	vr.index++
 67 | 
 68 | 	// Keep moving the chunk up until we get one with data
 69 | 	for vr.values == nil || vr.index >= len(vr.values) {
 70 | 		if !vr.nextChunk() {
 71 | 			// There were no more chunks with data in them
 72 | 			vr.done = true
 73 | 			return false
 74 | 		}
 75 | 	}
 76 | 
 77 | 	return true
 78 | }
 79 | 
 80 | func (vr *{{.Name}}ValueIterator) nextChunk() bool {
 81 | 	// Advance the chunk until we get one with data in it or we are done
 82 | 	if !vr.chunkIterator.Next() {
 83 | 		// No more chunks
 84 | 		return false
 85 | 	}
 86 | 
 87 | 	// There was another chunk.
 88 | 	// We maintain the ref and the values because the ref is going to allow us to retain the memory.
 89 | 	ref := vr.chunkIterator.Chunk()
 90 | 	ref.Retain()
 91 | 
 92 | 	if vr.ref != nil {
 93 | 		vr.ref.Release()
 94 | 	}
 95 | 
 96 | 	vr.ref = ref
 97 | 	vr.values = vr.chunkIterator.ChunkValues()
 98 | 	vr.index = 0
 99 | 	return true
100 | }
101 | 
102 | // Retain keeps a reference to the {{.Name}}ValueIterator.
103 | func (vr *{{.Name}}ValueIterator) Retain() {
104 | 	atomic.AddInt64(&vr.refCount, 1)
105 | }
106 | 
107 | // Release removes a reference to the {{.Name}}ValueIterator.
108 | func (vr *{{.Name}}ValueIterator) Release() {
109 | 	refs := atomic.AddInt64(&vr.refCount, -1)
110 | 	debug.Assert(refs >= 0, "too many releases")
111 | 	if refs == 0 {
112 | 		if vr.chunkIterator != nil {
113 | 			vr.chunkIterator.Release()
114 | 			vr.chunkIterator = nil
115 | 		}
116 | 
117 | 		if vr.ref != nil {
118 | 			vr.ref.Release()
119 | 			vr.ref = nil
120 | 		}
121 | 		vr.values = nil
122 | 	}
123 | }
124 | 
125 | 
126 | {{end}}
127 | 


--------------------------------------------------------------------------------
/iterator/valueiterator.go:
--------------------------------------------------------------------------------
 1 | package iterator
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/apache/arrow/go/arrow"
 7 | 	"github.com/apache/arrow/go/arrow/array"
 8 | )
 9 | 
10 | // ValueIterator is a generic iterator for scanning over values.
11 | type ValueIterator interface {
12 | 	// ValueInterface returns the current value as an interface{}.
13 | 	ValueInterface() interface{}
14 | 
15 | 	// Next moves the iterator to the next value. This will return false when there are no more values.
16 | 	Next() bool
17 | 
18 | 	// Retain keeps a reference to the ValueIterator.
19 | 	Retain()
20 | 
21 | 	// Release removes a reference to the ValueIterator.
22 | 	Release()
23 | }
24 | 
25 | // NewValueIterator creates a new generic ValueIterator.
26 | func NewValueIterator(column *array.Column) ValueIterator {
27 | 	field := column.Field()
28 | 	switch field.Type.(type) {
29 | 	case *arrow.Int8Type:
30 | 		return NewInt8ValueIterator(column)
31 | 	case *arrow.Int16Type:
32 | 		return NewInt16ValueIterator(column)
33 | 	case *arrow.Int32Type:
34 | 		return NewInt32ValueIterator(column)
35 | 	case *arrow.Int64Type:
36 | 		return NewInt64ValueIterator(column)
37 | 	case *arrow.Uint8Type:
38 | 		return NewUint8ValueIterator(column)
39 | 	case *arrow.Uint16Type:
40 | 		return NewUint16ValueIterator(column)
41 | 	case *arrow.Uint32Type:
42 | 		return NewUint32ValueIterator(column)
43 | 	case *arrow.Uint64Type:
44 | 		return NewUint64ValueIterator(column)
45 | 	case *arrow.Float32Type:
46 | 		return NewFloat32ValueIterator(column)
47 | 	case *arrow.Float64Type:
48 | 		return NewFloat64ValueIterator(column)
49 | 	case *arrow.Date32Type:
50 | 		return NewDate32ValueIterator(column)
51 | 	case *arrow.Date64Type:
52 | 		return NewDate64ValueIterator(column)
53 | 	case *arrow.BooleanType:
54 | 		return NewBooleanValueIterator(column)
55 | 	case *arrow.StringType:
56 | 		return NewStringValueIterator(column)
57 | 
58 | 	default:
59 | 		panic(fmt.Errorf("dataframe/valueiterator: unhandled field type %T", field.Type))
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/iterator/valueiterator_test.go:
--------------------------------------------------------------------------------
  1 | package iterator_test
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/apache/arrow/go/arrow"
  7 | 	"github.com/apache/arrow/go/arrow/array"
  8 | 	"github.com/apache/arrow/go/arrow/memory"
  9 | 	"github.com/go-bullseye/bullseye/iterator"
 10 | )
 11 | 
 12 | func TestInt32ValueIterator(t *testing.T) {
 13 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 14 | 	defer pool.AssertSize(t, 0)
 15 | 
 16 | 	records, schema := buildRecords(pool, t)
 17 | 	var numRows int64
 18 | 	for i := range records {
 19 | 		defer records[i].Release()
 20 | 		numRows += records[i].NumRows()
 21 | 	}
 22 | 
 23 | 	expectedValues := make([]int32, 0, numRows)
 24 | 	expectedValuesBool := make([]bool, 0, numRows)
 25 | 	for i := range records {
 26 | 		ref := records[i].Column(0).(*array.Int32)
 27 | 		values := ref.Int32Values()
 28 | 		for j := range values {
 29 | 			expectedValues = append(expectedValues, values[j])
 30 | 			expectedValuesBool = append(expectedValuesBool, ref.IsNull(j))
 31 | 		}
 32 | 	}
 33 | 
 34 | 	tbl := array.NewTableFromRecords(schema, records)
 35 | 	defer tbl.Release()
 36 | 
 37 | 	column := tbl.Column(0)
 38 | 	cr := iterator.NewInt32ValueIterator(column)
 39 | 	defer cr.Release()
 40 | 
 41 | 	n := 0
 42 | 	for cr.Next() {
 43 | 		value, null := cr.Value()
 44 | 		if got, want := value, expectedValues[n]; got != want {
 45 | 			t.Fatalf("got=%d, want=%d", got, want)
 46 | 		}
 47 | 		if got, want := null, expectedValuesBool[n]; got != want {
 48 | 			t.Fatalf("got=%v, want=%v", got, want)
 49 | 		}
 50 | 		n++
 51 | 	}
 52 | }
 53 | 
 54 | func TestInt32ValueIteratorPointer(t *testing.T) {
 55 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 56 | 	defer pool.AssertSize(t, 0)
 57 | 
 58 | 	records, schema := buildRecords(pool, t)
 59 | 	var numRows int64
 60 | 	for i := range records {
 61 | 		defer records[i].Release()
 62 | 		numRows += records[i].NumRows()
 63 | 	}
 64 | 
 65 | 	expectedPtrs := make([]*int32, 0, numRows)
 66 | 	for i := range records {
 67 | 		ref := records[i].Column(0).(*array.Int32)
 68 | 		values := ref.Int32Values()
 69 | 		for j := range values {
 70 | 			if ref.IsNull(j) {
 71 | 				expectedPtrs = append(expectedPtrs, nil)
 72 | 			} else {
 73 | 				expectedPtrs = append(expectedPtrs, &values[j])
 74 | 			}
 75 | 		}
 76 | 	}
 77 | 
 78 | 	tbl := array.NewTableFromRecords(schema, records)
 79 | 	defer tbl.Release()
 80 | 
 81 | 	column := tbl.Column(0)
 82 | 	cr := iterator.NewInt32ValueIterator(column)
 83 | 	defer cr.Release()
 84 | 
 85 | 	n := 0
 86 | 	for cr.Next() {
 87 | 		value := cr.ValuePointer()
 88 | 		if got, want := value, expectedPtrs[n]; got != want {
 89 | 			t.Fatalf("got=%d, want=%d", got, want)
 90 | 		}
 91 | 		n++
 92 | 	}
 93 | }
 94 | 
 95 | func TestFloat64ValueIterator(t *testing.T) {
 96 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 97 | 	defer pool.AssertSize(t, 0)
 98 | 
 99 | 	schema := arrow.NewSchema(
100 | 		[]arrow.Field{
101 | 			{Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64},
102 | 		},
103 | 		nil,
104 | 	)
105 | 
106 | 	b := array.NewRecordBuilder(pool, schema)
107 | 	defer b.Release()
108 | 
109 | 	expectedValues := []float64{
110 | 		1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
111 | 		11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
112 | 		31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
113 | 	}
114 | 
115 | 	expectedValuesBool := []bool{
116 | 		true, true, true, true, true, true, true, true, true, true,
117 | 		true, false, true, false, true, true, true, true, true, false,
118 | 		true, true, true, true, true, true, true, true, true, true,
119 | 	}
120 | 
121 | 	b.Field(0).(*array.Float64Builder).AppendValues(expectedValues[0:10], nil)
122 | 	rec1 := b.NewRecord()
123 | 	defer rec1.Release()
124 | 
125 | 	b.Field(0).(*array.Float64Builder).AppendValues(expectedValues[10:20], expectedValuesBool[10:20])
126 | 	rec2 := b.NewRecord()
127 | 	defer rec2.Release()
128 | 
129 | 	b.Field(0).(*array.Float64Builder).AppendValues(expectedValues[20:30], nil)
130 | 	rec3 := b.NewRecord()
131 | 	defer rec3.Release()
132 | 
133 | 	records := []array.Record{rec1, rec2, rec3}
134 | 	tbl := array.NewTableFromRecords(schema, records)
135 | 	defer tbl.Release()
136 | 	column := tbl.Column(0)
137 | 	vr := iterator.NewFloat64ValueIterator(column)
138 | 	defer vr.Release()
139 | 
140 | 	n := 0
141 | 	for vr.Next() {
142 | 		value, null := vr.Value()
143 | 		if got, want := value, expectedValues[n]; got != want {
144 | 			t.Fatalf("got=%f, want=%f", got, want)
145 | 		}
146 | 		if got, want := !null, expectedValuesBool[n]; got != want {
147 | 			t.Fatalf("got=%v, want=%v (n=%d)", got, want, n)
148 | 		}
149 | 		n++
150 | 	}
151 | }
152 | 
153 | func TestDate32ValueIterator(t *testing.T) {
154 | 	t.Skip("TODO: Implement.")
155 | }
156 | 
157 | func TestDate64ValueIterator(t *testing.T) {
158 | 	t.Skip("TODO: Implement.")
159 | }
160 | 
161 | func TestBooleanValueIterator(t *testing.T) {
162 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
163 | 	defer pool.AssertSize(t, 0)
164 | 
165 | 	schema := arrow.NewSchema(
166 | 		[]arrow.Field{
167 | 			{Name: "c1-bools", Type: arrow.FixedWidthTypes.Boolean},
168 | 		},
169 | 		nil,
170 | 	)
171 | 
172 | 	b := array.NewRecordBuilder(pool, schema)
173 | 	defer b.Release()
174 | 
175 | 	expectedValues := []bool{
176 | 		true, true, true, true, true, true, true, true, true, true,
177 | 		false, false, false, false, false, false, false, false, false, false,
178 | 		true, true, false, true, true, true, true, true, true, true,
179 | 	}
180 | 
181 | 	expectedValuesBool := []bool{
182 | 		true, true, true, true, true, true, true, true, true, true,
183 | 		true, false, true, false, true, true, true, true, true, false,
184 | 		true, true, true, true, true, true, true, true, true, true,
185 | 	}
186 | 
187 | 	b.Field(0).(*array.BooleanBuilder).AppendValues(expectedValues[0:10], nil)
188 | 	rec1 := b.NewRecord()
189 | 	defer rec1.Release()
190 | 
191 | 	b.Field(0).(*array.BooleanBuilder).AppendValues(expectedValues[10:20], expectedValuesBool[10:20])
192 | 	rec2 := b.NewRecord()
193 | 	defer rec2.Release()
194 | 
195 | 	b.Field(0).(*array.BooleanBuilder).AppendValues(expectedValues[20:30], nil)
196 | 	rec3 := b.NewRecord()
197 | 	defer rec3.Release()
198 | 
199 | 	records := []array.Record{rec1, rec2, rec3}
200 | 	tbl := array.NewTableFromRecords(schema, records)
201 | 	defer tbl.Release()
202 | 	column := tbl.Column(0)
203 | 	vr := iterator.NewBooleanValueIterator(column)
204 | 	defer vr.Release()
205 | 
206 | 	n := 0
207 | 	for vr.Next() {
208 | 		value, null := vr.Value()
209 | 		if got, want := value, expectedValues[n]; got != want {
210 | 			t.Fatalf("got=%t, want=%t", got, want)
211 | 		}
212 | 		if got, want := !null, expectedValuesBool[n]; got != want {
213 | 			t.Fatalf("got=%v, want=%v (n=%d)", got, want, n)
214 | 		}
215 | 		n++
216 | 	}
217 | }
218 | 
219 | func TestStringValueIterator(t *testing.T) {
220 | 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
221 | 	defer pool.AssertSize(t, 0)
222 | 
223 | 	schema := arrow.NewSchema(
224 | 		[]arrow.Field{
225 | 			{Name: "c1-strings", Type: arrow.BinaryTypes.String},
226 | 		},
227 | 		nil,
228 | 	)
229 | 
230 | 	b := array.NewRecordBuilder(pool, schema)
231 | 	defer b.Release()
232 | 
233 | 	expectedValues := []string{
234 | 		"true", "aaa", "true", "true", "true", "ccc", "true", "d", "true", "e",
235 | 		"false", "false", "false", "false", "false", "false", "false", "dog", "false", "false",
236 | 		"true", "true", "bbb", "true", "true", "true", "true", "true", "cat", "true",
237 | 	}
238 | 
239 | 	expectedValuesBool := []bool{
240 | 		true, true, true, true, true, true, true, true, true, true,
241 | 		true, false, true, false, true, true, true, true, true, false,
242 | 		true, true, true, true, true, true, true, true, true, true,
243 | 	}
244 | 
245 | 	b.Field(0).(*array.StringBuilder).AppendValues(expectedValues[0:10], nil)
246 | 	rec1 := b.NewRecord()
247 | 	defer rec1.Release()
248 | 
249 | 	b.Field(0).(*array.StringBuilder).AppendValues(expectedValues[10:20], expectedValuesBool[10:20])
250 | 	rec2 := b.NewRecord()
251 | 	defer rec2.Release()
252 | 
253 | 	b.Field(0).(*array.StringBuilder).AppendValues(expectedValues[20:30], nil)
254 | 	rec3 := b.NewRecord()
255 | 	defer rec3.Release()
256 | 
257 | 	records := []array.Record{rec1, rec2, rec3}
258 | 	tbl := array.NewTableFromRecords(schema, records)
259 | 	defer tbl.Release()
260 | 	column := tbl.Column(0)
261 | 	vr := iterator.NewStringValueIterator(column)
262 | 	defer vr.Release()
263 | 
264 | 	n := 0
265 | 	for vr.Next() {
266 | 		value, null := vr.Value()
267 | 		if got, want := value, expectedValues[n]; got != want {
268 | 			t.Fatalf("got=%s, want=%s", got, want)
269 | 		}
270 | 		if got, want := !null, expectedValuesBool[n]; got != want {
271 | 			t.Fatalf("got=%v, want=%v (n=%d)", got, want, n)
272 | 		}
273 | 		n++
274 | 	}
275 | }
276 | 


--------------------------------------------------------------------------------
/numeric.tmpldata:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "Name": "Int64",
  4 |     "name": "int64",
  5 |     "Type": "int64",
  6 |     "Default": "0",
  7 |     "Size": "8"
  8 |   },
  9 |   {
 10 |     "Name": "Uint64",
 11 |     "name": "uint64",
 12 |     "Type": "uint64",
 13 |     "Default": "0",
 14 |     "Size": "8"
 15 |   },
 16 |   {
 17 |     "Name": "Float64",
 18 |     "name": "float64",
 19 |     "Type": "float64",
 20 |     "Default": "0",
 21 |     "Size": "8"
 22 |   },
 23 |   {
 24 |     "Name": "Int32",
 25 |     "name": "int32",
 26 |     "Type": "int32",
 27 |     "Default": "0",
 28 |     "Size": "4",
 29 |     "Opt": {
 30 |       "BufferBuilder": true
 31 |     }
 32 |   },
 33 |   {
 34 |     "Name": "Uint32",
 35 |     "name": "uint32",
 36 |     "Type": "uint32",
 37 |     "Default": "0",
 38 |     "Size": "4"
 39 |   },
 40 |   {
 41 |     "Name": "Float32",
 42 |     "name": "float32",
 43 |     "Type": "float32",
 44 |     "Default": "0",
 45 |     "Size": "4"
 46 |   },
 47 |   {
 48 |     "Name": "Int16",
 49 |     "name": "int16",
 50 |     "Type": "int16",
 51 |     "Default": "0",
 52 |     "Size": "2"
 53 |   },
 54 |   {
 55 |     "Name": "Uint16",
 56 |     "name": "uint16",
 57 |     "Type": "uint16",
 58 |     "Default": "0",
 59 |     "Size": "2"
 60 |   },
 61 |   {
 62 |     "Name": "Int8",
 63 |     "name": "int8",
 64 |     "Type": "int8",
 65 |     "Default": "0",
 66 |     "Size": "1"
 67 |   },
 68 |   {
 69 |     "Name": "Uint8",
 70 |     "name": "uint8",
 71 |     "Type": "uint8",
 72 |     "Default": "0",
 73 |     "Size": "1"
 74 |   },
 75 |   {
 76 |     "Name": "Timestamp",
 77 |     "name": "timestamp",
 78 |     "Type": "Timestamp",
 79 |     "QualifiedType": "arrow.Timestamp",
 80 |     "InternalType": "int64",
 81 |     "Default": "0",
 82 |     "Size": "8",
 83 |     "Opt": {
 84 |       "Parametric": true
 85 |     }
 86 |   },
 87 |   {
 88 |     "Name": "Time32",
 89 |     "name": "time32",
 90 |     "Type": "Time32",
 91 |     "QualifiedType": "arrow.Time32",
 92 |     "InternalType": "int32",
 93 |     "Default": "0",
 94 |     "Size": "4",
 95 |     "Opt": {
 96 |       "Parametric": true
 97 |     }
 98 |   },
 99 |   {
100 |     "Name": "Time64",
101 |     "name": "time64",
102 |     "Type": "Time64",
103 |     "QualifiedType": "arrow.Time64",
104 |     "InternalType": "int64",
105 |     "Default": "0",
106 |     "Size": "8",
107 |     "Opt": {
108 |       "Parametric": true
109 |     }
110 |   },
111 |   {
112 |     "Name": "Date32",
113 |     "name": "date32",
114 |     "Type": "Date32",
115 |     "QualifiedType": "arrow.Date32",
116 |     "InternalType": "int32",
117 |     "Default": "0",
118 |     "Size": "4"
119 |   },
120 |   {
121 |     "Name": "Date64",
122 |     "name": "date64",
123 |     "Type": "Date64",
124 |     "QualifiedType": "arrow.Date64",
125 |     "InternalType": "int64",
126 |     "Default": "0",
127 |     "Size": "8"
128 |   }
129 | ]
130 | 


--------------------------------------------------------------------------------
/tools.go:
--------------------------------------------------------------------------------
1 | // +build tools
2 | 
3 | package tools
4 | 
5 | import (
6 | 	_ "github.com/apache/arrow/go/arrow/_tools/tmpl"
7 | )


--------------------------------------------------------------------------------