├── tests
├── config.nims
├── test_tables.nim
├── test_parquet.nim
└── test_arrays.nim
├── src
├── nimarrow.nim
└── nimarrow
│ ├── bitarray.nim
│ ├── parquet.nim
│ ├── tables.nim
│ └── arrays.nim
├── nimarrow.nimble
├── docs
├── nimarrow
│ ├── bitarray.idx
│ ├── parquet.idx
│ ├── tables.idx
│ ├── arrays.idx
│ ├── bitarray.html
│ ├── parquet.html
│ └── tables.html
├── nimarrow.html
└── nimdoc.out.css
├── .github
└── workflows
│ └── ci.yaml
└── README.md
/tests/config.nims:
--------------------------------------------------------------------------------
1 | switch("path", "$projectDir/../src")
--------------------------------------------------------------------------------
/src/nimarrow.nim:
--------------------------------------------------------------------------------
1 | import ./nimarrow/arrays
2 | import ./nimarrow/parquet
3 | import ./nimarrow/tables
4 |
5 | export arrays
6 | export parquet
7 | export tables
--------------------------------------------------------------------------------
/nimarrow.nimble:
--------------------------------------------------------------------------------
1 | # Package
2 |
3 | version = "0.1.3"
4 | author = "Matt Forbes"
5 | description = "apache arrow bindings for nim"
6 | license = "Apache-2.0"
7 | srcDir = "src"
8 |
9 |
10 | # Dependencies
11 |
12 | requires "nim >= 1.0.0",
13 | "nimarrow_glib >= 0.1.3"
14 |
--------------------------------------------------------------------------------
/docs/nimarrow/bitarray.idx:
--------------------------------------------------------------------------------
1 | Units nimarrow/bitarray.html#Units bitarray: Units
2 | BitVector nimarrow/bitarray.html#BitVector bitarray: BitVector
3 | newBitVector nimarrow/bitarray.html#newBitVector,int,int bitarray: newBitVector[T](size: int; init = 0): BitVector[T]
4 | `[]` nimarrow/bitarray.html#[],BitVector[T],int bitarray: `[]`[T](b: BitVector[T]; i: int): Bit
5 | `[]=` nimarrow/bitarray.html#[]=,BitVector[T],int,Bit bitarray: `[]=`[T](b: var BitVector[T]; i: int; value: Bit)
6 | add nimarrow/bitarray.html#add,BitVector[T],Bit bitarray: add[T](b: var BitVector[T]; value: Bit)
7 | cap nimarrow/bitarray.html#cap,BitVector[T] bitarray: cap[T](b: BitVector[T]): int
8 | len nimarrow/bitarray.html#len,BitVector[T] bitarray: len[T](b: BitVector[T]): int
9 | `==` nimarrow/bitarray.html#==,BitVector,BitVector bitarray: `==`(x, y: BitVector): bool
10 | `$` nimarrow/bitarray.html#$,BitVector[T] bitarray: `$`[T](b: BitVector[T]): string
11 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
1 | name: nimarrow CI
2 | on: [push, pull_request]
3 |
4 | jobs:
5 | build:
6 | runs-on: ubuntu-latest
7 | steps:
8 | - uses: actions/checkout@v2
9 | with:
10 | path: nimarrow
11 |
12 | - name: Install build dependencies
13 | run: |
14 | sudo dpkg --add-architecture i386
15 | sudo apt-fast install -y -V ca-certificates lsb-release wget
16 | wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
17 | sudo apt-fast install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
18 | sudo apt-fast update -qq
19 | sudo DEBIAN_FRONTEND='noninteractive' apt-fast install \
20 | --no-install-recommends -yq libarrow-glib-dev libparquet-glib-dev
21 | wget https://nim-lang.org/download/nim-1.4.8-linux_x64.tar.xz
22 | tar xf nim-1.4.8-linux_x64.tar.xz
23 | echo '${{ github.workspace }}/nim-1.4.8/bin' >> $GITHUB_PATH
24 |
25 | - name: Run nimarrow tests
26 | shell: bash
27 | working-directory: nimarrow
28 | run: |
29 | nimble install -y --depsOnly
30 | nimble test
--------------------------------------------------------------------------------
/tests/test_tables.nim:
--------------------------------------------------------------------------------
1 | import std/unittest
2 |
3 | import nimarrow
4 |
5 | type
6 | MyType* = object
7 | a*: string
8 | b*: int32
9 | c*: uint8
10 |
11 | registerTypedTable(MyType)
12 |
13 | test "can construct table from arrays":
14 | let field1 = newArrowField("a", int32)
15 | let field2 = newArrowField("b", string)
16 |
17 | let data1 = newArrowArray(@[1'i32, 2'i32, 3'i32])
18 | let data2 = newArrowArray(@["first", "second", "third"])
19 |
20 | let schema = newArrowSchema(@[field1, field2])
21 |
22 | let tableBuilder = newArrowTableBuilder(schema)
23 | tableBuilder.add data1
24 | tableBuilder.add data2
25 | let table = tableBuilder.build
26 |
27 | check table.len == 3
28 | check table.col(string, "b")[1] == "second"
29 | check @(table.col(int32, "a")) == @[1'i32, 2'i32, 3'i32]
30 |
31 | test "can build typed tables":
32 | let typedBuilder = newTypedBuilder(MyType)
33 | typedBuilder.add MyType(a: "a", b: 1'i32, c: 0'u8)
34 | typedBuilder.add MyType(a: "b", b: 2'i32, c: 1'u8)
35 | typedBuilder.add MyType(a: "c", b: 3'i32, c: 2'u8)
36 | let table = typedBuilder.build
37 |
38 | check table.len == 3
39 |
40 | test "can iterate over typed table":
41 | let expected = @[
42 | MyType(a: "a", b: 1'i32, c: 0'u8),
43 | MyType(a: "b", b: 2'i32, c: 1'u8),
44 | MyType(a: "c", b: 3'i32, c: 2'u8)
45 | ]
46 |
47 | let table = newArrowTable(MyType, expected)
48 | var rebuilt = newSeq[MyType]()
49 | for x in table.iter(MyType):
50 | rebuilt.add x
51 |
52 | check rebuilt == expected
53 |
54 |
--------------------------------------------------------------------------------
/tests/test_parquet.nim:
--------------------------------------------------------------------------------
1 | import std/os
2 | import std/unittest
3 |
4 | import nimarrow
5 |
6 | type
7 | CustomType* = object
8 | x*: int32
9 | y*: string
10 | z*: uint8
11 |
12 | registerTypedTable(CustomType)
13 |
14 | test "can read and write parquet":
15 | let
16 | path = getTempDir() / "test.parquet"
17 | schema = newArrowSchema(@[
18 | newArrowField("a", int32),
19 | newArrowField("b", string)
20 | ])
21 |
22 | let
23 | col1 = newArrowArray(@[1'i32, 2'i32, 3'i32])
24 | col2 = newArrowArray(@["first", "second", "third"])
25 | tableBuilder = newArrowTableBuilder(schema)
26 |
27 | tableBuilder.add col1
28 | tableBuilder.add col2
29 | let table = tableBuilder.build
30 |
31 | # write the table to a local parquet file
32 | table.toParquet(path)
33 |
34 | # read an entire parquet file into an ArrowTable
35 | let rereadTable = fromParquet(path)
36 |
37 | # the re-read table should be the same as the original
38 | check table == rereadTable
39 |
40 | test "can read and write with custom types":
41 | let
42 | path = getTempDir() / "typed.parquet"
43 | expected = @[
44 | CustomType(x: 0'i32, y: "y", z: 0'u8),
45 | CustomType(x: 1'i32, y: "yy", z: 10'u8),
46 | CustomType(x: 2'i32, y: "yyy", z: 100'u8)
47 | ]
48 |
49 | typedWriter = newTypedParquetWriter[CustomType](path)
50 |
51 | # append each record to the writer
52 | for x in expected:
53 | typedWriter.add x
54 |
55 | # important: close the parquet file to write the footer/metadata
56 | typedWriter.close()
57 |
58 | # re-read the parquet file into custom types
59 | let reader = newParquetReader(path)
60 | var reread = newSeq[CustomType]()
61 | for x in reader.iter(CustomType):
62 | reread.add x
63 |
64 | # re-read elements should be the same as were written
65 | check reread == expected
66 |
--------------------------------------------------------------------------------
/src/nimarrow/bitarray.nim:
--------------------------------------------------------------------------------
1 | # Copyright (C) Marc Azar. All rights reserved.
2 | # MIT License. Look at LICENSE.txt for more info
3 | type
4 | Units* = SomeUnsignedInt
5 | Bit = range[0..1]
6 | BitVector*[T: Units] = object
7 | base*: seq[T]
8 | bitlength: int
9 |
10 | # Forward declarations
11 | func `len`*[T](b: BitVector[T]): int {.inline.}
12 | func cap*[T](b: BitVector[T]): int {.inline.}
13 | func `[]`*[T](b: BitVector[T], i: int): Bit {.inline.}
14 |
15 | func newBitVector*[T](size: int, init = 0): BitVector[T] {.inline.} =
16 | ## Create new in-memory BitVector of type T and number of elements is
17 | ## `size` rounded up to the nearest byte. You can initialize the
18 | ## bitvector to 1 by passing any value other than zero to init.
19 | ##
20 | var blocks = size div (T.sizeof * 8)
21 | if blocks == 0 : blocks = 1
22 | elif (size mod (T.sizeof * 8)) > 0 : blocks += 1
23 | result.base = newSeqOfCap[T](blocks)
24 | result.base.setlen(blocks)
25 | result.bitlength = size * 8
26 | if init != 0:
27 | for i in 0 ..< size:
28 | result.base[i] = 1
29 |
30 | func `[]`*[T](b: BitVector[T], i: int): Bit {.inline.} =
31 | assert(i < b.cap and i >= 0, "Index out of range")
32 | b.base[i div (T.sizeof * 8)] shr (i and (T.sizeof * 8 - 1)) and 1
33 |
34 | func `[]=`*[T](b: var BitVector[T], i: int, value: Bit) {.inline.} =
35 | assert(i < b.cap and i >= 0, "Index out of range")
36 | var w = addr b.base[i div (T.sizeof * 8)]
37 | if value == 0:
38 | w[] = w[] and not (1.T shl (i and (T.sizeof * 8 - 1)))
39 | else:
40 | w[] = w[] or (1.T shl (i and (T.sizeof * 8 - 1)))
41 |
42 | func add*[T](b: var BitVector[T], value: Bit) {.inline.} =
43 | ## Add an element to the end of the BitVector.
44 | let i = b.bitlength
45 | if (i div (T.sizeof * 8)) >= b.base.len():
46 | b.base.add 0.T
47 |
48 | b[i] = value
49 | b.bitlength += 1
50 |
51 | func cap*[T](b: BitVector[T]): int {.inline.} =
52 | ## Returns capacity, i.e number of bits
53 | b.len * (T.sizeof * 8)
54 |
55 | func `len`*[T](b: BitVector[T]): int {.inline.} =
56 | ## Returns length, i.e number of elements
57 | b.base.len()
58 |
59 | func `==`*(x, y: Bitvector): bool =
60 | x[0 .. (x.cap - 1)] == y[0 .. (y.cap - 1)]
61 |
62 | func `$`*[T](b: BitVector[T]): string {.inline.} =
63 | ## Prints number of bits and elements the BitVector is capable of handling.
64 | ## It also prints out a slice if specified in little endian format.
65 | result =
66 | "BitVector with capacity of " & $b.cap & " bits and " & $b.len &
67 | " unique elements"
--------------------------------------------------------------------------------
/tests/test_arrays.nim:
--------------------------------------------------------------------------------
1 | import std/options
2 | import std/unittest
3 |
4 | import nimarrow
5 |
6 | test "can construct simple arrays":
7 | let arr = newArrowArray[int32](@[1'i32, 2'i32, 3'i32])
8 | check arr[0] == 1'i32
9 | check @arr[1..2] == @[2'i32, 3'i32]
10 |
11 | let arr2 = newArrowArray[float32](@[1'f32, 2'f32, 3'f32])
12 | check arr2[0] == 1'f32
13 | check @arr2[1..2] == @[2'f32, 3'f32]
14 |
15 | let arr3 = newEmptyArrowArray[int32]()
16 | check arr3.len == 0
17 | check @arr3 == newSeq[int32]()
18 |
19 | let arr4 = newArrowArray(@[some(1'i64), none(int64), some(2'i64),
20 | none(int64), none(int64)])
21 | check arr4.len == 5
22 | check @arr4 == @[1'i64, 0'i64, 2'i64, 0'i64, 0'i64]
23 | check arr4.isNullAt(1)
24 | check arr4.isNullAt(3)
25 | check arr4.isNullAt(4)
26 |
27 | test "can build arrays with builder":
28 | let builder = newArrowArrayBuilder[int64]()
29 | builder.add 1'i64
30 | builder.add 2'i64
31 | builder.add(3'i64)
32 | builder.add(none(int64))
33 | let arr = builder.build()
34 |
35 | check arr.len == 4
36 | check arr[0] == 1'i64
37 | check @arr == @[1'i64, 2'i64, 3'i64, 0'i64]
38 | check arr.isNullAt(3)
39 |
40 | test "can construct string arrays":
41 | let builder = newArrowArrayBuilder[string]()
42 | builder.add "1"
43 | builder.add "two"
44 | builder.add none(string)
45 | builder.add "three"
46 | let arr = builder.build()
47 |
48 | check arr.len == 4
49 | check arr[0] == "1"
50 | check arr[1] == "two"
51 | check arr[2] == ""
52 | check @arr == @["1", "two", "", "three"]
53 | check arr.isNullAt(2)
54 |
55 | let bigBuilder = newArrowArrayBuilder[string]()
56 | for i in 0..<1000:
57 | if i mod 5 == 4:
58 | bigBuilder.add none(string)
59 | else:
60 | bigBuilder.add "element_" & $i
61 |
62 | let bigArr = bigBuilder.build()
63 | check bigArr.len == 1000
64 | check bigArr.isNullAt(99)
65 | check bigArr[100] == "element_100"
66 |
67 | test "can construct binary arrays":
68 | let builder = newArrowArrayBuilder[Bytes]()
69 | builder.add @[0'u8, 1'u8, 2'u8, 3'u8]
70 | builder.add @[4'u8, 5'u8, 6'u8, 7'u8]
71 | builder.add none(Bytes)
72 | builder.add @[0'u8]
73 | builder.add @[]
74 | let arr = builder.build
75 |
76 | check arr[0] == @[0'u8, 1'u8, 2'u8, 3'u8]
77 | check arr[1] == @[4'u8, 5'u8, 6'u8, 7'u8]
78 | check arr[2] == Bytes(@[])
79 | check arr[4] == Bytes(@[])
80 | check arr.isNullAt(2)
81 | check not arr.isNullAt(4)
82 |
83 | test "can iterate over arrays":
84 | let
85 | expected = @[1'i32, 2'i32, 3'i32]
86 | arr = newArrowArray[int32](expected)
87 |
88 | var rebuilt = newSeq[int32]()
89 | for x in arr:
90 | rebuilt.add x
91 |
92 | check rebuilt == expected
--------------------------------------------------------------------------------
/docs/nimarrow/parquet.idx:
--------------------------------------------------------------------------------
1 | ParquetWriterProps nimarrow/parquet.html#ParquetWriterProps parquet: ParquetWriterProps
2 | ParquetWriter nimarrow/parquet.html#ParquetWriter parquet: ParquetWriter
3 | ParquetReader nimarrow/parquet.html#ParquetReader parquet: ParquetReader
4 | TypedParquetWriter nimarrow/parquet.html#TypedParquetWriter parquet: TypedParquetWriter
5 | `=destroy` nimarrow/parquet.html#=destroy,ParquetWriterPropsObj parquet: `=destroy`(x: var ParquetWriterPropsObj)
6 | `=destroy` nimarrow/parquet.html#=destroy,ParquetWriterObj parquet: `=destroy`(x: var ParquetWriterObj)
7 | `=destroy` nimarrow/parquet.html#=destroy,ParquetReaderObj parquet: `=destroy`(x: var ParquetReaderObj)
8 | newParquetWriterProps nimarrow/parquet.html#newParquetWriterProps,GArrowCompressionType,bool,Option[int64],Option[int64],Option[int64],Option[int64] parquet: newParquetWriterProps(compression: GArrowCompressionType = GARROW_COMPRESSION_TYPE_SNAPPY;\n enableDictionary: bool = true;\n dictionaryPageSizeLimit: Option[int64] = none(int64);\n batchSize: Option[int64] = none(int64);\n maxRowGroupLength: Option[int64] = none(int64);\n dataPageSize: Option[int64] = none(int64)): ParquetWriterProps
9 | newParquetWriter nimarrow/parquet.html#newParquetWriter,ArrowSchema,string,Option[ParquetWriterProps] parquet: newParquetWriter(schema: ArrowSchema; path: string;\n props: Option[ParquetWriterProps] = none(ParquetWriterProps)): ParquetWriter
10 | add nimarrow/parquet.html#add,ParquetWriter,ArrowTable parquet: add(w: ParquetWriter; table: ArrowTable)
11 | close nimarrow/parquet.html#close,ParquetWriter parquet: close(w: ParquetWriter)
12 | toParquet nimarrow/parquet.html#toParquet,ArrowTable,string,Option[ParquetWriterProps] parquet: toParquet(t: ArrowTable; path: string;\n props: Option[ParquetWriterProps] = none(ParquetWriterProps))
13 | fromParquet nimarrow/parquet.html#fromParquet,string parquet: fromParquet(path: string): ArrowTable
14 | newTypedParquetWriter nimarrow/parquet.html#newTypedParquetWriter,string,Option[ParquetWriterProps] parquet: newTypedParquetWriter[T: TypeRegistered](path: string;\n props: Option[ParquetWriterProps] = none(ParquetWriterProps)): TypedParquetWriter[\n T]
15 | add nimarrow/parquet.html#add,TypedParquetWriter[T],T parquet: add[T](w: TypedParquetWriter[T]; x: T)
16 | close nimarrow/parquet.html#close,TypedParquetWriter[T] parquet: close[T](w: TypedParquetWriter[T])
17 | newParquetReader nimarrow/parquet.html#newParquetReader,string,bool parquet: newParquetReader(path: string; useThreads: bool = true): ParquetReader
18 | rowGroups nimarrow/parquet.html#rowGroups,ParquetReader parquet: rowGroups(r: ParquetReader): int
19 | read nimarrow/parquet.html#read,ParquetReader,int parquet: read(r: ParquetReader; rowGroup: int): ArrowTable
20 | readFully nimarrow/parquet.html#readFully,ParquetReader parquet: readFully(r: ParquetReader): ArrowTable
21 | iter nimarrow/parquet.html#iter.i,ParquetReader,typedesc parquet: iter(r: ParquetReader; T: typedesc): T:type
22 |
--------------------------------------------------------------------------------
/docs/nimarrow/tables.idx:
--------------------------------------------------------------------------------
1 | ArrowField nimarrow/tables.html#ArrowField tables: ArrowField
2 | ArrowSchema nimarrow/tables.html#ArrowSchema tables: ArrowSchema
3 | ArrowTable nimarrow/tables.html#ArrowTable tables: ArrowTable
4 | ArrowTableBuilder nimarrow/tables.html#ArrowTableBuilder tables: ArrowTableBuilder
5 | `=destroy` nimarrow/tables.html#=destroy,ArrowFieldObj tables: `=destroy`(x: var ArrowFieldObj)
6 | `=destroy` nimarrow/tables.html#=destroy,ArrowSchemaObj tables: `=destroy`(x: var ArrowSchemaObj)
7 | `=destroy` nimarrow/tables.html#=destroy,ArrowTableObj tables: `=destroy`(x: var ArrowTableObj)
8 | newArrowField nimarrow/tables.html#newArrowField,string,typedesc tables: newArrowField(name: string; T: typedesc): ArrowField
9 | glibPtr nimarrow/tables.html#glibPtr,ArrowField tables: glibPtr(field: ArrowField): GArrowFieldPtr
10 | newArrowSchema nimarrow/tables.html#newArrowSchema,openArray[ArrowField] tables: newArrowSchema(fields: openArray[ArrowField]): ArrowSchema
11 | newArrowSchema nimarrow/tables.html#newArrowSchema,GArrowSchemaPtr tables: newArrowSchema(glibSchema: GArrowSchemaPtr): ArrowSchema
12 | glibPtr nimarrow/tables.html#glibPtr,ArrowSchema tables: glibPtr(schema: ArrowSchema): GArrowSchemaPtr
13 | newArrowTable nimarrow/tables.html#newArrowTable,ArrowSchema,GArrowTablePtr tables: newArrowTable(schema: ArrowSchema; glibTable: GArrowTablePtr): ArrowTable
14 | glibPtr nimarrow/tables.html#glibPtr,ArrowTable tables: glibPtr(table: ArrowTable): GArrowTablePtr
15 | len nimarrow/tables.html#len,ArrowTable tables: len(table: ArrowTable): uint64
16 | `$` nimarrow/tables.html#$,ArrowTable tables: `$`(table: ArrowTable): string
17 | `==` nimarrow/tables.html#==,ArrowTable,ArrowTable tables: `==`(table, other: ArrowTable): bool
18 | schema nimarrow/tables.html#schema,ArrowTable tables: schema(table: ArrowTable): ArrowSchema
19 | col nimarrow/tables.html#col,ArrowTable,typedesc,int tables: col(table: ArrowTable; T: typedesc; i: int): ArrowChunkedArray[T]
20 | col nimarrow/tables.html#col,ArrowTable,typedesc,string tables: col(table: ArrowTable; T: typedesc; name: string): ArrowChunkedArray[T]
21 | newArrowTableBuilder nimarrow/tables.html#newArrowTableBuilder,ArrowSchema tables: newArrowTableBuilder(schema: ArrowSchema): ArrowTableBuilder
22 | add nimarrow/tables.html#add,ArrowTableBuilder,ArrowArray[T] tables: add[T](b: ArrowTableBuilder; arr: ArrowArray[T])
23 | build nimarrow/tables.html#build,ArrowTableBuilder tables: build(b: ArrowTableBuilder): ArrowTable
24 | TypedBuilder nimarrow/tables.html#TypedBuilder tables: TypedBuilder
25 | TypeRegistered nimarrow/tables.html#TypeRegistered tables: TypeRegistered
26 | newArrowTable nimarrow/tables.html#newArrowTable.t,typedesc,openArray[typedesc] tables: newArrowTable(T: typedesc; ts: openArray[T:type]): ArrowTable
27 | add nimarrow/tables.html#add,TypedBuilder[T],openArray[T] tables: add[T](typedBuilder: TypedBuilder[T]; ts: openArray[T])
28 | registerTypedTable nimarrow/tables.html#registerTypedTable.m,typedesc tables: registerTypedTable(typ: typedesc): untyped
29 | newTypedBuilder nimarrow/tables.html#newTypedBuilder.t,typedesc tables: newTypedBuilder(T: typedesc): TypedBuilder[T]
30 | iter nimarrow/tables.html#iter.t,ArrowTable,typedesc tables: iter(tbl: ArrowTable; T: typedesc): untyped
31 |
--------------------------------------------------------------------------------
/docs/nimarrow/arrays.idx:
--------------------------------------------------------------------------------
1 | ArrowArray nimarrow/arrays.html#ArrowArray arrays: ArrowArray
2 | NullBitmap nimarrow/arrays.html#NullBitmap arrays: NullBitmap
3 | ArrowArrayBuilder nimarrow/arrays.html#ArrowArrayBuilder arrays: ArrowArrayBuilder
4 | ArrowChunkedArray nimarrow/arrays.html#ArrowChunkedArray arrays: ArrowChunkedArray
5 | Bytes nimarrow/arrays.html#Bytes arrays: Bytes
6 | TypeTag nimarrow/arrays.html#TypeTag arrays: TypeTag
7 | `=destroy` nimarrow/arrays.html#=destroy,ArrowArrayObj[T] arrays: `=destroy`[T](x: var ArrowArrayObj[T])
8 | `=destroy` nimarrow/arrays.html#=destroy,ArrowChunkedArrayObj[T] arrays: `=destroy`[T](x: var ArrowChunkedArrayObj[T])
9 | `=destroy` nimarrow/arrays.html#=destroy,WrappedBufferObj[T] arrays: `=destroy`[T](x: var WrappedBufferObj[T])
10 | getDataType nimarrow/arrays.html#getDataType arrays: getDataType(tag`gensym7: TypeTag[bool]): GArrowDataTypePtr
11 | getDataType nimarrow/arrays.html#getDataType_2 arrays: getDataType(tag`gensym9: TypeTag[int8]): GArrowDataTypePtr
12 | getDataType nimarrow/arrays.html#getDataType_3 arrays: getDataType(tag`gensym11: TypeTag[uint8]): GArrowDataTypePtr
13 | getDataType nimarrow/arrays.html#getDataType_4 arrays: getDataType(tag`gensym13: TypeTag[int16]): GArrowDataTypePtr
14 | getDataType nimarrow/arrays.html#getDataType_5 arrays: getDataType(tag`gensym15: TypeTag[uint16]): GArrowDataTypePtr
15 | getDataType nimarrow/arrays.html#getDataType_6 arrays: getDataType(tag`gensym17: TypeTag[int32]): GArrowDataTypePtr
16 | getDataType nimarrow/arrays.html#getDataType_7 arrays: getDataType(tag`gensym19: TypeTag[uint32]): GArrowDataTypePtr
17 | getDataType nimarrow/arrays.html#getDataType_8 arrays: getDataType(tag`gensym21: TypeTag[int64]): GArrowDataTypePtr
18 | getDataType nimarrow/arrays.html#getDataType_9 arrays: getDataType(tag`gensym23: TypeTag[uint64]): GArrowDataTypePtr
19 | getDataType nimarrow/arrays.html#getDataType_10 arrays: getDataType(tag`gensym25: TypeTag[float32]): GArrowDataTypePtr
20 | getDataType nimarrow/arrays.html#getDataType_11 arrays: getDataType(tag`gensym27: TypeTag[float64]): GArrowDataTypePtr
21 | getDataType nimarrow/arrays.html#getDataType_12 arrays: getDataType(tag`gensym29: TypeTag[string]): GArrowDataTypePtr
22 | getDataType nimarrow/arrays.html#getDataType_13 arrays: getDataType(tag`gensym32: TypeTag[Bytes]): GArrowDataTypePtr
23 | newEmptyArrowArray nimarrow/arrays.html#newEmptyArrowArray arrays: newEmptyArrowArray[T](): ArrowArray[T]
24 | newArrowArray nimarrow/arrays.html#newArrowArray,openArray[T] arrays: newArrowArray[T](data: openArray[T]): ArrowArray[T]
25 | newArrowArray nimarrow/arrays.html#newArrowArray,openArray[Option[T]] arrays: newArrowArray[T](data: openArray[Option[T]]): ArrowArray[T]
26 | len nimarrow/arrays.html#len,ArrowArray[T] arrays: len[T](arr: ArrowArray[T]): int64
27 | isNullAt nimarrow/arrays.html#isNullAt,ArrowArray[T],int64 arrays: isNullAt[T](arr: ArrowArray[T]; i: int64): bool
28 | toSeq nimarrow/arrays.html#toSeq,ArrowArray[T] arrays: toSeq[T](arr: ArrowArray[T]): seq[T]
29 | `@` nimarrow/arrays.html#@,ArrowArray[T] arrays: `@`[T](arr: ArrowArray[T]): seq[T]
30 | `$` nimarrow/arrays.html#$,ArrowArray[T] arrays: `$`[T](arr: ArrowArray[T]): string
31 | `[]` nimarrow/arrays.html#[],ArrowArray[T],int64 arrays: `[]`[T](arr: ArrowArray[T]; i: int64): T
32 | `[]` nimarrow/arrays.html#[],ArrowArray[T],int arrays: `[]`[T](arr: ArrowArray[T]; i: int): T
33 | `[]` nimarrow/arrays.html#[],ArrowArray[T],Slice[int] arrays: `[]`[T](arr: ArrowArray[T]; slice: Slice[int]): ArrowArray[T]
34 | `[]` nimarrow/arrays.html#[],ArrowArray[T],Slice[int64] arrays: `[]`[T](arr: ArrowArray[T]; slice: Slice[int64]): ArrowArray[T]
35 | items nimarrow/arrays.html#items.i,ArrowArray[T] arrays: items[T](arr: ArrowArray[T]): T
36 | glibPtr nimarrow/arrays.html#glibPtr,ArrowArray[T] arrays: glibPtr[T](arr: ArrowArray[T]): GArrowArrayPtr
37 | newArrowArrayBuilder nimarrow/arrays.html#newArrowArrayBuilder arrays: newArrowArrayBuilder[T](): ArrowArrayBuilder[T]
38 | add nimarrow/arrays.html#add,ArrowArrayBuilder[T],T arrays: add[T](builder: ArrowArrayBuilder[T]; x: T)
39 | add nimarrow/arrays.html#add,ArrowArrayBuilder[T],Option[T] arrays: add[T](builder: ArrowArrayBuilder[T]; x: Option[T])
40 | build nimarrow/arrays.html#build,ArrowArrayBuilder[T] arrays: build[T](builder: ArrowArrayBuilder[T]): ArrowArray[T]
41 | newArrowChunkedArray nimarrow/arrays.html#newArrowChunkedArray,GArrowChunkedArrayPtr arrays: newArrowChunkedArray[T](glibChunkedArray: GArrowChunkedArrayPtr): ArrowChunkedArray[\n T]
42 | len nimarrow/arrays.html#len,ArrowChunkedArray[T] arrays: len[T](chunkedArray: ArrowChunkedArray[T]): uint64
43 | `$` nimarrow/arrays.html#$,ArrowChunkedArray[T] arrays: `$`[T](chunkedArray: ArrowChunkedArray[T]): string
44 | toSeq nimarrow/arrays.html#toSeq,ArrowChunkedArray[T] arrays: toSeq[T](chunkedArray: ArrowChunkedArray[T]): seq[T]
45 | `@` nimarrow/arrays.html#@,ArrowChunkedArray[T] arrays: `@`[T](chunkedArray: ArrowChunkedArray[T]): seq[T]
46 | `[]` nimarrow/arrays.html#[],ArrowChunkedArray[T],int64 arrays: `[]`[T](chunkedArray: ArrowChunkedArray[T]; i: int64): T
47 | `==` nimarrow/arrays.html#==,ArrowChunkedArray[T],ArrowChunkedArray[T] arrays: `==`[T](a, b: ArrowChunkedArray[T]): bool
48 | chunks nimarrow/arrays.html#chunks,ArrowChunkedArray[T] arrays: chunks[T](chunkedArray: ArrowChunkedArray[T]): uint
49 | chunk nimarrow/arrays.html#chunk,ArrowChunkedArray[T],uint arrays: chunk[T](chunkedArray: ArrowChunkedArray[T]; i: uint): ArrowArray[T]
50 | combine nimarrow/arrays.html#combine,ArrowChunkedArray[T] arrays: combine[T](chunkedArray: ArrowChunkedArray[T]): ArrowArray[T]
51 | items nimarrow/arrays.html#items.i,ArrowChunkedArray[T] arrays: items[T](chunkedArray: ArrowChunkedArray[T]): T
52 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/emef/nimarrow/actions/workflows/ci.yaml) [](https://opensource.org/licenses/Apache-2.0) 
2 |
3 | # nimarrow - libarrow bindings for nim
4 |
5 | [API Documentation](https://emef.github.io/nimarrow/theindex.html)
6 |
7 | "[Apache Arrow](https://arrow.apache.org/) defines a language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware like CPUs and GPUs. The Arrow memory format also supports zero-copy reads for lightning-fast data access without serialization overhead."
8 |
9 | `nimarrow` provides an ergonomic nim interface to the lower level libarrow c api.
10 |
11 | # Dependencies
12 |
13 | `nimarrow` depends on the package `nimarrow_glib` which provides bindings to the libarrow-glib and libparquet-glib shared libraries. See the [installation notes](https://github.com/emef/nimarrow_glib/#installation-notes) for instructions on how to install those libraries.
14 |
15 | # Project Status
16 |
17 | This library is still a WIP and will be developed alongside the [nimarrow_glib](https://github.com/emef/nimarrow_glib/) library which exposes the libarrow-glib c API.
18 |
19 | - [x] arrays (with Option support)
20 | - [ ] date/timestamp/decimal types
21 | - [x] tables
22 | - [x] parquet read/write
23 | - [x] typed API (without Option support)
24 | - [ ] typed API (with Option support)
25 | - [ ] IPC format
26 |
27 | # Code Samples
28 |
29 | ## Arrays
30 |
31 | An ArrowArray[T] is simply a 1D array of type T. It manages its own data on the heap in 64byte-aligned buffers to interop with the libarrow-glib c API.
32 |
33 | ```nim
34 | import options
35 | import nimarrow
36 |
37 | let arr = newArrowArray[int32](@[1'i32, 2'i32, 3'i32])
38 | doAssert arr[0] == 1'i32
39 | doAssert @arr == @[1'i32, 2'i32, 3'i32]
40 |
41 | # can take a slice of an existing array, returning a view (no copy).
42 | let s = arr[1..3]
43 | doAssert @s == @[2'i32, 3'i32]
44 |
45 | # use array builders to avoid creating a copy of the data, .build()
46 | # transfers ownership of its buffer into the newly-created array.
47 | let builder = newArrowArrayBuilder[int64]()
48 | builder.add 1'i64
49 | builder.add 2'i64
50 | builder.add none(int64)
51 | let withNulls = builder.build()
52 |
53 | # nulls show up as 0, must check isNullAt(i)
54 | doAssert @withNulls == @[1'i64, 2'i64, 0'i64]
55 | doAssert withNulls.isNullAt(2)
56 | ```
57 |
58 | ## Tables
59 |
60 | An ArrowTable is an ordered collection of named arrays (columns). Each column name and type is described by its ArrowField, and an ArrowSchema describes all of the columns in a table.
61 |
62 | To construct a table, we use an ArrowTableBuilder which is constructed with the intended schema. Each column's data must then be added to the builder in the order specified by the schema. Creating a table does not copy any of the column data, it will share the internal buffers of the arrays used to construct it.
63 |
64 | ```nim
65 | import nimarrow
66 |
67 | # Schema will be (a: int32, b: string)
68 | let field1 = newArrowField("a", int32)
69 | let field2 = newArrowField("b", string)
70 | let schema = newArrowSchema(@[field1, field2])
71 |
72 | # Column data for the described fields in the schema.
73 | let data1 = newArrowArray(@[1'i32, 2'i32, 3'i32])
74 | let data2 = newArrowArray(@["first", "second", "third"])
75 |
76 | # Add each column to the table in order specified by the schema.
77 | let tableBuilder = newArrowTableBuilder(schema)
78 | tableBuilder.add data1
79 | tableBuilder.add data2
80 | let table = tableBuilder.build
81 |
82 | # Convert the table into string representation including
83 | # it's metadata and all contents.
84 | discard $table
85 | ```
86 |
87 | ## Basic parquet I/O
88 |
89 | ```nim
90 | import nimarrow
91 |
92 | # write to a parquet file with default properties
93 | let table: ArrowTable = ...
94 | table.toParquet("/tmp/test.parquet")
95 |
96 | # can specify additional writer properties
97 | let props = newParquetWriterProps(compression: GARROW_COMPRESSION_TYPE_SNAPPY)
98 | table.toParquet("/tmp/test.parquet.snappy", props)
99 |
100 | # read a parquet file into an arrow table
101 | let table = fromParquet("/tmp/test.parquet")
102 | ```
103 |
104 | ## Typed API
105 |
106 | The Typed API provides convenience methods for creating ArrowTables and
107 | reading/writing from parquet for a custom nim object. In order to the use
108 | the typed API the macro `registerTypedTable(T)` must be called for the
109 | nim object `T`. This generates all of the methods to fulfill the type-registration
110 | concept and unlocks access to the typed API functions for that type.
111 |
112 | Functions generated for the type by `registerTypedTable` do not use any
113 | runtime-reflection and should roughly match what hand-written versions of
114 | those functions would look like. Every effort is made to avoid copying data
115 | needlessly, and as much is inlined as possible. Numeric columns are accessed
116 | via their raw storage arrays, however strings/binary types do need to be
117 | copied in order to convert to nim values.
118 |
119 | The following conditions must be met for a type to be eligible for registration:
120 |
121 | * The object may be an `object` or a `ref object`.
122 | * The object itself is public.
123 | * All of its fields are public.
124 | * All of its fields are numeric types, string, or `Bytes`.
125 |
126 | Failure to meet these criteria may lead to some strange compile errors when
127 | running the `registerTypedTable` macro.
128 |
129 | #### Register a type for the Typed API
130 |
131 | ```nim
132 | type
133 | CustomType* = object
134 | a*: int32
135 | b*: string
136 | c*: uint8
137 |
138 | registerTypedTable(CustomType)
139 | ```
140 |
141 | #### Use a TypedBuilder to construct an ArrowTable from custom objects
142 |
143 | ```nim
144 | # construct a new TypedBuilder for our type.
145 | let typedBuilder = newTypedBuilder(CustomType)
146 |
147 | # append rows to the builder using CustomType objects.
148 | typedBuilder.add CustomType(a: 0'i32, b: "some string", c: 0'u8)
149 | typedBuilder.add CustomType(a: 1'i32, b: "another", c: 10'u8)
150 | typedBuilder.add CustomType(a: 2'i32, b: "three", c: 100'u8)
151 |
152 | # build an ArrowTable from the typedBuilder.
153 | let tbl = typedBuilder.build
154 | ```
155 |
156 | #### Iterate over a Table, treating each row as a custom object
157 |
158 | ```nim
159 | # some ArrowTable with compatible schema of CustomType.
160 | let tbl: ArrowTable = ...
161 |
162 | for x in tbl.iter(CustomType):
163 | # x is a CustomType object
164 | echo $x
165 | ```
166 |
167 | #### Stream custom objects to a parquet file (writer)
168 |
169 | ```nim
170 | # create a new typed parquet writer for our custom type.
171 | let typedWriter = newTypedParquetWriter[CustomType]("/path/to/file.parquet")
172 |
173 | # stream records to the parquet file.
174 | typedWriter.add CustomType(x: 0'i32, y: "y", z: 0'u8)
175 | typedWriter.add CustomType(x: 0'i32, y: "y", z: 0'u8)
176 | ...
177 |
178 | # don't forget to close the parquet file or it will be corrupt!
179 | typedWriter.close()
180 | ```
181 |
182 | #### Stream custom objects from a parquet file (reader)
183 |
184 | ```nim
185 | # create a generic parquet reader for a parquet file.
186 | let reader = newParquetReader("/path/to/file.parquet")
187 |
188 | # use the .iter(T) iterator to access each row as a custom type
189 | for x in reader.iter(CustomType):
190 | # x is a CustomType object
191 | ...
192 | ```
193 |
--------------------------------------------------------------------------------
/src/nimarrow/parquet.nim:
--------------------------------------------------------------------------------
1 | import std/options
2 |
3 | import nimarrow_glib
4 |
5 | import ./arrays
6 | import ./tables
7 |
8 | type
9 | ParquetWriterPropsObj = object
10 | glibProps: GParquetWriterPropertiesPtr
11 | ParquetWriterProps* = ref ParquetWriterPropsObj
12 |
13 | ParquetWriterObj = object
14 | closed: bool
15 | glibWriter: GParquetArrowFileWriterPtr
16 | ParquetWriter* = ref ParquetWriterObj
17 |
18 | ParquetReaderObj = object
19 | schema: ArrowSchema
20 | glibReader: GParquetArrowFileReaderPtr
21 | ParquetReader* = ref ParquetReaderObj
22 |
23 | TypedParquetWriter*[T] = ref object
24 | writer: ParquetWriter
25 | tableBuilder: TypedBuilder[T]
26 |
27 | proc close*(w: ParquetWriter)
28 |
29 | proc `=destroy`*(x: var ParquetWriterPropsObj) =
30 | if x.glibProps != nil:
31 | gObjectUnref(x.glibProps)
32 |
33 | proc `=destroy`*(x: var ParquetWriterObj) =
34 | if x.glibWriter != nil:
35 | gObjectUnref(x.glibWriter)
36 |
37 | proc `=destroy`*(x: var ParquetReaderObj) =
38 | if x.glibReader != nil:
39 | gObjectUnref(x.glibReader)
40 |
41 | proc newParquetWriterProps*(
42 | compression: GArrowCompressionType = GARROW_COMPRESSION_TYPE_SNAPPY,
43 | enableDictionary: bool = true,
44 | dictionaryPageSizeLimit: Option[int64] = none(int64),
45 | batchSize: Option[int64] = none(int64),
46 | maxRowGroupLength: Option[int64] = none(int64),
47 | dataPageSize: Option[int64] = none(int64)
48 | ): ParquetWriterProps =
49 | ## Construct a new parquet writer properties object, optionally overriding
50 | ## the default settings.
51 | let props = writerPropertiesNew()
52 | props.writerPropertiesSetCompression(compression, nil)
53 |
54 | if enableDictionary:
55 | props.writerPropertiesEnableDictionary(nil)
56 | else:
57 | props.writerPropertiesDisableDictionary(nil)
58 |
59 | if batchSize.isSome:
60 | props.writerPropertiesSetBatchSize(batchSize.get)
61 |
62 | if maxRowGroupLength.isSome:
63 | props.writerPropertiesSetMaxRowGroupLength(maxRowGroupLength.get)
64 |
65 | if dataPageSize.isSome:
66 | props.writerPropertiesSetDataPageSize(dataPageSize.get)
67 |
68 | ParquetWriterProps(glibProps: props)
69 |
70 | proc newParquetWriter*(
71 | schema: ArrowSchema,
72 | path: string,
73 | props: Option[ParquetWriterProps] = none(ParquetWriterProps)
74 | ): ParquetWriter =
75 | ## Construct a new parquet writer which will write to the local file
76 | ## at `path`.
77 | var error: GErrorPtr
78 |
79 | let actualProps = if props.isSome:
80 | props.get
81 | else:
82 | newParquetWriterProps()
83 |
84 | let writer = parquetFileWriterNewPath(
85 | schema.glibPtr(),
86 | path,
87 | actualProps.glibProps,
88 | error
89 | )
90 |
91 | if error != nil:
92 | defer: gErrorFree(error)
93 | raise newException(IOError, $error.message)
94 |
95 | ParquetWriter(glibWriter: writer)
96 |
97 | proc add*(w: ParquetWriter, table: ArrowTable) =
98 | ## Add this table to the parquet file being written.
99 | doAssert not w.closed
100 |
101 | var error: GErrorPtr
102 | let chunkSize = 1024'u64
103 | let success = parquetFileWriterWriteTable(
104 | w.glibWriter, table.glibPtr(), chunkSize, error)
105 |
106 | if error != nil:
107 | defer: gErrorFree(error)
108 | raise newException(IOError, $error.message)
109 |
110 | if not success:
111 | raise newException(IOError, "Error adding table to parquet writer")
112 |
113 | proc close*(w: ParquetWriter) =
114 | ## Close the parquet file for writing. NOTE: this MUST be called when
115 | ## done writing or the file will not be valid! This does not simply
116 | ## close the file descriptor, it finalizes the file by writing the parquet
117 | ## footer/metadata.
118 | var error: GErrorPtr
119 | let success = parquetFileWriterClose(w.glibWriter, error)
120 | if error != nil:
121 | defer: gErrorFree(error)
122 | raise newException(IOError, $error.message)
123 |
124 | if not success:
125 | raise newException(IOError, "Error closing parquet writer")
126 |
127 | w.closed = true
128 |
129 | proc toParquet*(
130 | t: ArrowTable,
131 | path: string,
132 | props: Option[ParquetWriterProps] = none(ParquetWriterProps)
133 | ) =
134 | ## Write this table to a parquet file on the local filesystem at `path`.
135 | let writer = newParquetWriter(t.schema, path, props)
136 | writer.add(t)
137 | writer.close()
138 |
139 | proc fromParquet*(path: string): ArrowTable =
140 | ## Read a parquet file from the local filesystem at `path` into a Table.
141 | var error: GErrorPtr
142 | let reader = parquetFileReaderNewPath(path, error)
143 | if error != nil:
144 | defer: gErrorFree(error)
145 | raise newException(IOError, $error.message)
146 |
147 | defer: gObjectUnref(reader)
148 |
149 | let glibSchema = parquetFileReaderGetSchema(reader, error)
150 | if error != nil:
151 | defer: gErrorFree(error)
152 | raise newException(IOError, $error.message)
153 |
154 | let schema = newArrowSchema(glibSchema)
155 |
156 | let glibTable = parquetFileReaderReadTable(reader, error)
157 | if error != nil:
158 | defer: gErrorFree(error)
159 | raise newException(IOError, $error.message)
160 |
161 | newArrowTable(schema, glibTable)
162 |
163 | template newTypedParquetWriterTmpl(
164 | T: typedesc[TypeRegistered],
165 | path: string,
166 | props: Option[ParquetWriterProps] = none(ParquetWriterProps)
167 | ): TypedParquetWriter[T] =
168 | block:
169 | let typedWriter = new(TypedParquetWriter[T])
170 | typedWriter.tableBuilder = newTypedBuilder(T)
171 | typedWriter.writer = newParquetWriter(
172 | typedWriter.tableBuilder.schema, path, props)
173 | typedWriter
174 |
175 | proc newTypedParquetWriter*[T: TypeRegistered](
176 | path: string,
177 | props: Option[ParquetWriterProps] = none(ParquetWriterProps)
178 | ): TypedParquetWriter[T] =
179 | ## Create a new typed parquet writer, writing to local path `path`.
180 | newTypedParquetWriterTmpl(T, path, props)
181 |
182 | proc add*[T](w: TypedParquetWriter[T], x: T) =
183 | ## Append an element to the parquet file being written.
184 | w.tableBuilder.add x
185 |
186 | proc close*[T](w: TypedParquetWriter[T]) =
187 | ## Close the parquet file for writing. NOTE: this MUST be called when
188 | ## done writing or the file will not be valid! This does not simply
189 | ## close the file descriptor, it finalizes the file by writing the parquet
190 | ## footer/metadata.
191 | w.writer.add w.tableBuilder.build
192 | w.writer.close
193 |
194 | proc newParquetReader*(path: string, useThreads: bool = true): ParquetReader =
195 | ## Create a new parquet reader, reading the local path `path`.
196 | var err: GErrorPtr
197 | let glibReader = parquetFileReaderNewPath(path, err)
198 | if err != nil:
199 | defer: gErrorFree(err)
200 | raise newException(IOError, $err.message)
201 |
202 | parquetFileReaderSetUseThreads(glibReader, useThreads)
203 |
204 | let glibSchema = parquetFileReaderGetSchema(glibReader, err)
205 | if err != nil:
206 | defer: gErrorFree(err)
207 | raise newException(IOError, $err.message)
208 |
209 | let schema = newArrowSchema(glibSchema)
210 |
211 | ParquetReader(schema: schema, glibReader: glibReader)
212 |
213 | proc rowGroups*(r: ParquetReader): int =
214 | ## Return the number of row groups in the file being read.
215 | parquetFileReaderGetNRowGroups(r.glibReader)
216 |
217 | proc read*(r: ParquetReader, rowGroup: int): ArrowTable =
218 | ## Read the row group at index `rowGroup` as an ArrowTable.
219 | var err: GErrorPtr
220 | let glibTable = parquetFileReaderReadRowGroup(
221 | r.glibReader, rowGroup, nil, 0, err)
222 |
223 | if err != nil:
224 | defer: gErrorFree(err)
225 | raise newException(IOError, $err.message)
226 |
227 | newArrowTable(r.schema, glibTable)
228 |
229 | proc readFully*(r: ParquetReader): ArrowTable =
230 | ## Read the entire parquet file into an ArrowTable.
231 | var err: GErrorPtr
232 | let glibTable = parquetFileReaderReadTable(r.glibReader, err)
233 | if err != nil:
234 | defer: gErrorFree(err)
235 | raise newException(IOError, $err.message)
236 |
237 | newArrowTable(r.schema, glibTable)
238 |
239 | iterator iter*(r: ParquetReader, T: typedesc): T {.inline.} =
240 | ## Iterate over the file, converting the rows into the custom type `T`.
241 | # TODO: check schema
242 | let n = r.rowGroups
243 | for i in 0 ..< n:
244 | let grp = r.read(i)
245 | for x in grp.iter(T):
246 | yield x
--------------------------------------------------------------------------------
/src/nimarrow/tables.nim:
--------------------------------------------------------------------------------
1 | import std/macros
2 |
3 | import nimarrow_glib
4 |
5 | import ./arrays
6 |
7 | ## An ArrowTable is an ordered collection of named arrays (columns).
8 | ## Each column name and type is described by its ArrowField,
9 | ## and an ArrowSchema describes all of the columns in a table.
10 | ##
11 | ## To construct a table, we use an ArrowTableBuilder which is
12 | ## constructed with the intended schema. Each column's data must
13 | ## then be added to the builder in the order specified by the
14 | ## schema. Creating a table does not copy any of the column
15 | ## data, it will share the internal buffers of the arrays used
16 | ## to construct it.
17 |
18 | type
19 | ArrowFieldObj = object
20 | glibField: GArrowFieldPtr
21 | ArrowField* = ref ArrowFieldObj
22 |
23 | ArrowSchemaObj = object
24 | glibSchema: GArrowSchemaPtr
25 | ArrowSchema* = ref ArrowSchemaObj
26 |
27 | ArrowTableObj = object
28 | schema: ArrowSchema
29 | glibTable: GArrowTablePtr
30 | ArrowTable* = ref ArrowTableObj
31 |
32 | ArrowTableBuilder* = ref object
33 | valid: bool
34 | schema: ArrowSchema
35 | glibArrays: seq[GArrowArrayPtr]
36 |
37 | proc `=destroy`*(x: var ArrowFieldObj) =
38 | if x.glibField != nil:
39 | gObjectUnref(x.glibField)
40 |
41 | proc `=destroy`*(x: var ArrowSchemaObj) =
42 | if x.glibSchema != nil:
43 | gObjectUnref(x.glibSchema)
44 |
45 | proc `=destroy`*(x: var ArrowTableObj) =
46 | if x.glibTable != nil:
47 | gObjectUnref(x.glibTable)
48 |
49 | proc newArrowField[T](name: string, typeTag: TypeTag[T]): ArrowField =
50 | let glibDataType = getDataType(typeTag)
51 | defer: gObjectUnref(glibDataType)
52 | ArrowField(glibField: fieldNew(name, glibDataType))
53 |
54 | proc newArrowField*(name: string, T: typedesc): ArrowField =
55 | ## Create a new field of type T named `name`.
56 | newArrowFIeld[T](name, TypeTag[T]())
57 |
58 | proc glibPtr*(field: ArrowField): GArrowFieldPtr =
59 | ## Access the underlying glib field pointer.
60 | field.glibField
61 |
62 | proc newArrowSchema*(fields: openArray[ArrowField]): ArrowSchema =
63 | ## Construct a new schema from a sequence of fields.
64 | var fieldList: GListPtr
65 |
66 | for field in fields:
67 | fieldList = glistAppend(fieldList, field.glibField)
68 |
69 | defer: glistFree(fieldList)
70 | ArrowSchema(glibSchema: schemaNew(fieldList))
71 |
72 | proc newArrowSchema*(glibSchema: GArrowSchemaPtr): ArrowSchema =
73 | ## Construct an ArrowSchema from a glib schema pointer. NOTE: this takes
74 | ## ownership of the pointer and does not increase the refcount.
75 | doAssert glibSchema != nil
76 | ArrowSchema(glibSchema: glibSchema)
77 |
78 | proc glibPtr*(schema: ArrowSchema): GArrowSchemaPtr =
79 | ## Access the underlying glib schema pointer.
80 | schema.glibSchema
81 |
82 | proc newArrowTable*(schema: ArrowSchema, glibTable: GArrowTablePtr): ArrowTable =
83 | ## Construct an ArrowTable from schema and glib table pointer. NOTE: this takes
84 | ## ownership of the pointer and does not increase the refcount.
85 | doAssert glibTable != nil
86 | ArrowTable(schema: schema, glibTable: glibTable)
87 |
88 | proc glibPtr*(table: ArrowTable): GArrowTablePtr =
89 | ## Access the underlying glib table pointer.
90 | table.glibTable
91 |
92 | proc len*(table: ArrowTable): uint64 =
93 | ## Get the length (number of rows) of the table.
94 | tableGetNRows(table.glibTable)
95 |
96 | proc `$`*(table: ArrowTable): string =
97 | ## String representation of the table's schema and full contents.
98 | var error: GErrorPtr
99 | result = $tableToString(table.glibTable, error)
100 | if error != nil:
101 | defer: gErrorFree(error)
102 | raise newException(ValueError, $error.message)
103 |
104 | proc `==`*(table, other: ArrowTable): bool =
105 | tableEqual(table.glibPtr, other.glibPtr)
106 |
107 | proc schema*(table: ArrowTable): ArrowSchema =
108 | table.schema
109 |
110 | proc col*(table: ArrowTable, T: typedesc, i: int): ArrowChunkedArray[T] =
111 | let glibChunkedArray = tableGetColumnData(table.glibTable, i)
112 | doAssert glibChunkedArray != nil
113 |
114 | newArrowChunkedArray[T](glibChunkedArray)
115 |
116 | proc col*(table: ArrowTable, T: typedesc, name: string): ArrowChunkedArray[T] =
117 | let index = schemaGetFieldIndex(table.schema.glibSchema, name)
118 | if index == -1:
119 | raise newException(CatchableError, "invalid column " & name)
120 |
121 | table.col(T, index)
122 |
123 | proc newArrowTableBuilder*(schema: ArrowSchema): ArrowTableBuilder =
124 | ## Construct a new table builder for a given schema. Each column
125 | ## specified in the schema must be added using `add` in order.
126 | ArrowTableBuilder(schema: schema, valid: true)
127 |
128 | proc add*[T](b: ArrowTableBuilder, arr: ArrowArray[T]) =
129 | ## Add the next column to the builder, must be of the correct type
130 | ## specified in the schema.
131 | doAssert b.valid
132 |
133 | let i = b.glibArrays.len
134 | let expectedField = schemaGetField(b.schema.glibSchema, uint(i))
135 | let expectedDtype = fieldGetDataType(expectedField)
136 | doAssert dataTypeEqual(expectedDtype, getDataType(TypeTag[T]()))
137 |
138 | b.glibArrays.add arr.glibPtr
139 |
140 | proc build*(b: ArrowTableBuilder): ArrowTable =
141 | ## Build the table, invalidating this builder.
142 | doAssert uint(b.glibArrays.len) == schemaNFields(b.schema.glibPtr)
143 | b.valid = false
144 |
145 | let glibArraysPtr = cast[ptr UncheckedArray[GArrowArrayPtr]](addr b.glibArrays[0])
146 | let nArrays = uint64(b.glibArrays.len)
147 | var error: GErrorPtr
148 | let glibTable = tableNewArrays(b.schema.glibSchema, glibArraysPtr, nArrays, error)
149 | if error != nil:
150 | defer: gErrorFree(error)
151 | raise newException(ValueError, $error.message)
152 |
153 | newArrowTable(b.schema, glibTable)
154 |
155 | type
156 | TypedBuilder*[T] = ref object of RootObj
157 |
158 | TypeRegistered* {.explain.} = concept b, type T
159 | add(TypedBuilder[T], b)
160 | schema(TypedBuilder[T]) is ArrowSchema
161 | build(TypedBuilder[T]) is ArrowTable
162 | newTypedBuilder(T) is TypedBuilder[T]
163 | for x in iter(ArrowTable, T):
164 | x is T
165 |
166 | template newArrowTable*(T: typedesc, ts: openArray[T]): ArrowTable =
167 | let typedBuilder = newTypedBuilder(T)
168 | typedBuilder.add(ts)
169 | typedBuilder.build
170 |
171 | proc add*[T](typedBuilder: TypedBuilder[T], ts: openArray[T]) =
172 | for t in ts:
173 | typedBuilder.add t
174 |
175 | macro registerTypedTable*(typ: typedesc): untyped =
176 | ## Macro which registers a type to be used in the "Typed" API.
177 | result = newStmtList()
178 |
179 | let
180 | typDef = getImpl(typ)
181 | recList = if typDef[2].kind == nnkRefTy: typDef[2][0][2]
182 | else: typDef[2][2]
183 | builderTypName = ident($typ & "TableBuilder")
184 | newBuilderProcName = ident("newTypedBuilder")
185 | addProcName = ident("add")
186 | buildProcName = ident("build")
187 | schemaProcName = ident("schema")
188 | iterProcName = ident("iter")
189 | paramBuilder = ident("builder")
190 | paramValue = ident("x")
191 | paramTable = ident("tbl")
192 | fields = ident("fields")
193 | tblBuilder = ident("tblBuilder")
194 | castBuilder = ident("castBuilder")
195 | tag = ident("tag")
196 | typTag = quote do:
197 | TypeTag[`typ`]
198 | typedBuilder = quote do:
199 | TypedBuilder[`typ`]
200 | builderRecList = newNimNode(nnkRecList)
201 |
202 | builderRecList.add newIdentDefs(ident("schema"), ident("ArrowSchema"))
203 |
204 | for i, identDefs in recList:
205 | let
206 | fieldName = identDefs[0][1]
207 | fieldType = identDefs[1]
208 | arrayBuilderType = quote do:
209 | ArrowArrayBuilder[`fieldType`]
210 |
211 | builderRecList.add newIdentDefs(fieldName, arrayBuilderType)
212 |
213 | let
214 | inheritTypedBuilder = newTree(nnkOfInherit, typedBuilder)
215 | builderObj = newTree(nnkObjectTy, newEmptyNode(), inheritTypedBuilder, builderRecList)
216 | refBuilderObj = newTree(nnkRefTy, builderObj)
217 | builderTypDef = newTree(
218 | nnkTypeDef, builderTypName, newEmptyNode(), refBuilderObj)
219 | typSection = newTree(nnkTypeSection, builderTypDef)
220 |
221 | let newbuilderProcBody = newStmtList()
222 |
223 | newBuilderProcBody.add quote do:
224 | var `fields` = newSeq[ArrowField]()
225 |
226 | for i, identDefs in recList:
227 | let
228 | fieldName = newStrLitNode($identDefs[0][1])
229 | fieldType = identDefs[1]
230 |
231 | newbuilderProcBody.add quote do:
232 | `fields`.add newArrowField(`fieldName`, TypeTag[`fieldType`]())
233 |
234 | newbuilderProcBody.add quote do:
235 | let `castBuilder` = new(`builderTypName`)
236 | `castBuilder`.schema = newArrowSchema(`fields`)
237 |
238 | for i, identDefs in recList:
239 | let
240 | fieldName = identDefs[0][1]
241 | fieldType = identDefs[1]
242 | newArrayBuilderCall = quote do:
243 | newArrowArrayBuilder[`fieldType`]()
244 |
245 | resultDotBuilder = newDotExpr(`castBuilder`, fieldName)
246 | assignBuilder = newAssignment(resultDotBuilder, newArrayBuilderCall)
247 |
248 | newbuilderProcBody.add assignBuilder
249 |
250 | newBuilderProcBody.add quote do:
251 | cast[`typedBuilder`](`castBuilder`)
252 |
253 | let newbuilderProc = newProc(
254 | name = newbuilderProcName,
255 | params = [typedBuilder, nnkIdentDefs.newTree(tag, typTag, newEmptyNode())],
256 | body = newbuilderProcBody
257 | )
258 |
259 | let addBody = newStmtList()
260 |
261 | addBody.add quote do:
262 | let `castBuilder` = cast[`builderTypName`](`paramBuilder`)
263 |
264 | for i, identDefs in recList:
265 | let
266 | fieldName = identDefs[0][1]
267 | fieldBuilder = newDotExpr(castBuilder, fieldName)
268 | fieldAccess = newDotExpr(paramValue, fieldName)
269 |
270 | addBody.add quote do:
271 | `fieldBuilder`.add(`fieldAccess`)
272 |
273 | let addProc = newProc(
274 | name = postfix(addProcName, "*"),
275 | params = [
276 | newEmptyNode(),
277 | nnkIdentDefs.newTree(paramBuilder, typedBuilder, newEmptyNode()),
278 | nnkIdentDefs.newTree(paramValue, typ, newEmptyNode())
279 | ],
280 | body = addBody
281 | )
282 |
283 | let buildBody = newStmtList()
284 | buildBody.add quote do:
285 | let `castBuilder` = cast[`builderTypName`](`paramBuilder`)
286 | let `tblBuilder` = newArrowTableBuilder(`castBuilder`.schema)
287 |
288 | for i, identDefs in recList:
289 | let
290 | fieldName = identDefs[0][1]
291 | fieldBuilder = newDotExpr(castBuilder, fieldName)
292 |
293 | buildBody.add quote do:
294 | `tblBuilder`.add(`fieldBuilder`.build)
295 |
296 | buildBody.add quote do:
297 | `tblBuilder`.build
298 |
299 | let buildProc = newProc(
300 | name = postfix(buildProcName, "*"),
301 | params = [
302 | ident("ArrowTable"),
303 | nnkIdentDefs.newTree(paramBuilder, typedBuilder, newEmptyNode())
304 | ],
305 | body = buildBody
306 | )
307 |
308 | let schemaBody = quote do:
309 | let `castBuilder` = cast[`builderTypName`](`paramBuilder`)
310 | `castBuilder`.schema
311 |
312 | let schemaProc = newProc(
313 | name = postfix(schemaProcName, "*"),
314 | params = [
315 | ident("ArrowSchema"),
316 | nnkIdentDefs.newTree(paramBuilder, typedBuilder, newEmptyNode())
317 | ],
318 | body = schemaBody
319 | )
320 |
321 | let
322 | size = ident("size")
323 | iterBody = newStmtList()
324 | iterIndex = ident("i")
325 | objConstr = newNimNode(nnkObjConstr)
326 |
327 | iterBody.add quote do:
328 | let `size` = `paramTable`.len
329 |
330 | objConstr.add ident($typ)
331 |
332 | for i, identDefs in recList:
333 | let
334 | fieldName = identDefs[0][1]
335 | fieldType = ident($(identDefs[1]))
336 | arrName = ident($fieldName & "Arr")
337 |
338 | iterBody.add quote do:
339 | let `arrName` = @(`paramTable`.col(`fieldType`, `i`))
340 |
341 | objConstr.add newColonExpr(
342 | fieldName,
343 | nnkBracketExpr.newTree(arrName, iterIndex))
344 |
345 | iterBody.add quote do:
346 | for `iterIndex` in 0 ..< `size`:
347 | yield `objConstr`
348 |
349 | let iterProc = nnkIteratorDef.newTree(
350 | postfix(iterProcName, "*"),
351 | newEmptyNode(),
352 | newEmptyNode(),
353 | nnkFormalParams.newTree(
354 | typ,
355 | nnkIdentDefs.newTree(paramTable, ident("ArrowTable"), newEmptyNode()),
356 | nnkIdentDefs.newTree(tag, typTag, newEmptyNode())
357 | ),
358 | nnkPragma.newTree(ident("inline")),
359 | newEmptyNode(),
360 | iterBody
361 | )
362 |
363 | result.add typSection
364 | result.add newbuilderProc
365 | result.add addProc
366 | result.add buildProc
367 | result.add schemaProc
368 | result.add iterProc
369 |
370 | template newTypedBuilder*(T: typedesc): TypedBuilder[T] =
371 | newTypedBuilder(TypeTag[T]())
372 |
373 | template iter*(tbl: ArrowTable, T: typedesc): untyped =
374 | ## Iterate over the ArrowTable, converting rows into the type `T`.
375 | iter(tbl, TypeTag[T]())
--------------------------------------------------------------------------------
/docs/nimarrow.html:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | nimarrow
21 |
22 |
23 |
24 |
25 |
60 |
61 |
62 |
63 |
64 |
65 |
nimarrow
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
Dark Mode
74 |
75 |
76 |
77 |
78 | Index
79 |
80 |
81 |
82 |
83 | Search:
85 |
86 |
87 | Group by:
88 |
89 | Section
90 | Type
91 |
92 |
93 |
94 |
95 | Imports
96 |
99 |
100 |
101 | Exports
102 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
119 |
120 |
121 |
122 | getDataType , getDataType , build , =destroy , combine , newArrowChunkedArray , @ , getDataType , =destroy , toSeq , $ , NullBitmap , [] , getDataType , getDataType , $ , toSeq , chunks , newArrowArray , getDataType , newArrowArrayBuilder , getDataType , newEmptyArrowArray , getDataType , items , =destroy , getDataType , items , ArrowArray , ArrowArrayBuilder , getDataType , == , [] , TypeTag , glibPtr , getDataType , len , len , add , isNullAt , Bytes , newArrowArray , add , @ , chunk , [] , ArrowChunkedArray , getDataType , [] , getDataType , [] , toParquet , ParquetWriter , readFully , newParquetReader , close , close , =destroy , read , =destroy , ParquetReader , newParquetWriter , newParquetWriterProps , fromParquet , ParquetWriterProps , iter , rowGroups , add , newTypedParquetWriter , TypedParquetWriter , =destroy , add , newArrowTable , glibPtr , ArrowTable , ArrowField , newArrowField , ArrowSchema , glibPtr , newArrowSchema , =destroy , newArrowTableBuilder , iter , == , schema , col , =destroy , =destroy , newTypedBuilder , len , $ , add , TypedBuilder , col , TypeRegistered , newArrowTable , build , glibPtr , add , ArrowTableBuilder , newArrowSchema , registerTypedTable
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 | Made with Nim. Generated: 2021-07-08 21:05:38 UTC
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
--------------------------------------------------------------------------------
/src/nimarrow/arrays.nim:
--------------------------------------------------------------------------------
1 | import std/macros
2 | import std/options
3 |
4 | import nimarrow_glib
5 |
6 | import ./bitarray
7 |
8 | type
9 | ArrowArrayObj[T] = object
10 | offsets: WrappedBuffer[uint32]
11 | data: WrappedBuffer[T]
12 | nullBitmap: WrappedBuffer[NullBitmapBase]
13 | glibArray: GArrowArrayPtr
14 |
15 | ArrowArray*[T] = ref ArrowArrayObj[T]
16 |
17 | NullBitmapBase = uint32
18 | NullBitmap* = BitVector[NullBitmapBase]
19 |
20 | WrappedBufferObj[T] = object
21 | raw: ptr UncheckedArray[T]
22 | buf: GArrowBufferPtr
23 | bytes: int64
24 | length: int64
25 |
26 | WrappedBuffer[T] = ref WrappedBufferObj[T]
27 |
28 | ArrowArrayBuilderObj[T] = object
29 | offsets: WrappedBuffer[uint32]
30 | data: WrappedBuffer[T]
31 | nullBitmap: NullBitmap
32 | nNulls: int64
33 | valid: bool
34 |
35 | ArrowArrayBuilder*[T] = ref ArrowArrayBuilderObj[T]
36 |
37 | ArrowChunkedArrayObj[T] = object
38 | glibChunkedArray: GArrowChunkedArrayPtr
39 |
40 | ArrowChunkedArray*[T] = ref ArrowChunkedArrayObj[T]
41 |
42 | Bytes* = seq[byte] ## Binary type
43 |
44 | TypeTag*[T] = object ## Empty container used to map generic type T into
45 | ## the appropriate glib arrow data type internally.
46 |
47 | proc `=destroy`*[T](x: var ArrowArrayObj[T]) =
48 | if x.glibArray != nil:
49 | gObjectUnref(x.glibArray)
50 |
51 | proc `=destroy`*[T](x: var ArrowChunkedArrayObj[T]) =
52 | if x.glibChunkedArray != nil:
53 | gObjectUnref(x.glibChunkedArray)
54 |
55 | proc `=destroy`*[T](x: var WrappedBufferObj[T]) =
56 | if x.raw != nil:
57 | dealloc(x.raw)
58 |
59 | if x.buf != nil:
60 | gObjectUnref(x.buf)
61 |
62 | proc isBinary(t: typedesc): bool =
63 | t is string or t is Bytes
64 |
65 | proc reserveBytes[T](b: WrappedBuffer[T], bytes: int64) =
66 | if b.raw == nil:
67 | let newBytes = 64 * ((bytes + 64) div 64)
68 | b.raw = cast[ptr UncheckedArray[T]](alloc(newBytes))
69 | b.bytes = newBytes
70 | elif bytes > b.bytes:
71 | let newBytes = 2 * b.bytes
72 | b.raw = cast[ptr UncheckedArray[T]](realloc(b.raw, newBytes))
73 | b.bytes = newBytes
74 |
75 | proc reserveElements[T](b: WrappedBuffer[T], elems: int64) =
76 | if b.raw == nil:
77 | let newBytes = max(64, 2 * sizeof(T))
78 | b.raw = cast[ptr UncheckedArray[T]](alloc(newBytes))
79 | b.bytes = newBytes
80 | elif elems * sizeof(T) > b.bytes:
81 | let newBytes = 2 * b.bytes
82 | b.raw = cast[ptr UncheckedArray[T]](realloc(b.raw, newBytes))
83 | b.bytes = newBytes
84 |
85 | proc addElement[T](b: WrappedBuffer[T], elem: T) =
86 | b.reserveElements(b.length + 1)
87 | b.raw[b.length] = elem
88 | b.length += 1
89 |
90 | proc addBytes[T](b: WrappedBuffer[T], data: pointer, size: Natural) =
91 | b.reserveBytes(b.length + size)
92 | let dest = cast[ptr UncheckedArray[char]](b.raw)
93 | copyMem(addr dest[b.length], data, size)
94 | b.length += size
95 |
96 | proc setGlibBuffer[T](b: WrappedBuffer[T]) =
97 | b.buf = bufferNew(b.raw, b.bytes)
98 |
99 | proc newArrowArrayBuilder*[T](): ArrowArrayBuilder[T]
100 | proc add*[T](builder: ArrowArrayBuilder[T], x: T)
101 | proc add*[T](builder: ArrowArrayBuilder[T], x: Option[T])
102 | proc build*[T](builder: ArrowArrayBuilder[T]): ArrowArray[T]
103 |
104 | proc copyToBuffer[T](arr: openArray[T]): WrappedBuffer[T] =
105 | let bytes = ((sizeof(T) * arr.len + 64) / 64).toInt
106 | let raw = cast[ptr UncheckedArray[T]](alloc(bytes))
107 | for i, x in arr:
108 | raw[i] = x
109 |
110 | WrappedBuffer[T](
111 | raw: raw,
112 | bytes: bytes,
113 | length: arr.len)
114 |
115 | proc emptyBuffer[T](): WrappedBuffer[T] =
116 | WrappedBuffer[T](
117 | raw: nil,
118 | buf: nil,
119 | bytes: 0,
120 | length: 0)
121 |
122 | proc declareTypeTagProc(dtype, name: NimNode): NimNode =
123 | let dataTypeNew = getDataTypeIdent(name)
124 | let getDataType = ident"getDataType"
125 | quote do:
126 | proc `getDataType`*(tag: TypeTag[`dtype`]): GArrowDataTypePtr =
127 | `dataTypeNew`()
128 |
129 | macro DeclareNumericArray(dtype, name: untyped): untyped =
130 | let
131 | construct = ident"construct"
132 | getValue = ident"getValue"
133 | iter = ident"iter"
134 | glibArrayNew = arrayNewIdent(name)
135 | glibGetValue = arrayGetValueIdent(name)
136 | glibGetValues = arrayGetValuesIdent(name)
137 |
138 | result = newStmtList()
139 | result.add declareTypeTagProc(dtype, name)
140 | result.add quote do:
141 | proc `construct`(arr: var ArrowArray[`dtype`], length: int64,
142 | offsets: GArrowBufferPtr, data: GArrowBufferPtr,
143 | nullBitmap: GArrowBufferPtr, nNulls: int64) =
144 | arr.glibArray = `glibArrayNew`(length, data, nullBitmap, nNulls)
145 |
146 | proc `getValue`(arr: ArrowArray[`dtype`], i: int64): `dtype` =
147 | `glibGetValue`(arr.glibArray, i)
148 |
149 |
150 | iterator `iter`(arr: ArrowArray[`dtype`]): `dtype` {.inline.} =
151 | var valuesRead: int64
152 | let values = `glibGetValues`(arr.glibArray, valuesRead)
153 |
154 | for i in 0 .. valuesRead - 1:
155 | yield values[i]
156 |
157 | proc convertBytes[T](gbytes: GBytesPtr): T =
158 | var size: uint64
159 | let dataPtr = gbytesGetData(gbytes, size)
160 | when T is string:
161 | result = newString(size)
162 | elif T is Bytes:
163 | result = newSeq[byte](size)
164 | else:
165 | doAssert false
166 |
167 | if size > 0:
168 | copyMem(addr result[0], dataPtr, size)
169 |
170 | macro DeclareBinaryArray(dtype, name: untyped): untyped =
171 | let
172 | construct = ident"construct"
173 | getValue = ident"getValue"
174 | iter = ident"iter"
175 | glibArrayNew = arrayNewIdent(name)
176 |
177 | result = newStmtList()
178 | result.add declareTypeTagProc(dtype, name)
179 | result.add quote do:
180 | proc `construct`(arr: var ArrowArray[`dtype`], length: int64,
181 | offsets: GArrowBufferPtr, data: GArrowBufferPtr,
182 | nullBitmap: GArrowBufferPtr, nNulls: int64) =
183 | arr.glibArray = `glibArrayNew`(length, offsets, data, nullBitmap, nNulls)
184 |
185 | proc `getValue`(arr: ArrowArray[`dtype`], i: int64): `dtype` =
186 | if not arrayIsNull(arr.glibArray, i):
187 | let gbytes = binaryArrayGetValue(arr.glibArray, i)
188 | result = convertBytes[`dtype`](gbytes)
189 |
190 | iterator `iter`(arr: ArrowArray[`dtype`]): `dtype` {.inline.} =
191 | var empty: `dtype`
192 | let size = arrayGetlength(arr.glibArray)
193 | for i in 0 ..< size:
194 | if arrayIsNull(arr.glibArray, i):
195 | yield empty
196 | else:
197 | yield arr.getValue(i)
198 |
199 | DeclareNumericArray(bool, boolean)
200 | DeclareNumericArray(int8, int8)
201 | DeclareNumericArray(uint8, uint8)
202 | DeclareNumericArray(int16, int16)
203 | DeclareNumericArray(uint16, uint16)
204 | DeclareNumericArray(int32, int32)
205 | DeclareNumericArray(uint32, uint32)
206 | DeclareNumericArray(int64, int64)
207 | DeclareNumericArray(uint64, uint64)
208 | DeclareNumericArray(float32, float)
209 | DeclareNumericArray(float64, double)
210 |
211 | # TODO: handle dates
212 | # DeclareNumericArray(int32, date32)
213 | # DeclareNumericArray(int64, date64)
214 |
215 | DeclareBinaryArray(string, string)
216 | DeclareBinaryArray(Bytes, binary)
217 |
218 | proc newArrowArray[T](offsets: WrappedBuffer[uint32], data: WrappedBuffer[T],
219 | nullBitmap: WrappedBuffer[NullBitmapBase],
220 | nNulls: int64): ArrowArray[T] =
221 | if offsets != nil:
222 | offsets.setGlibBuffer()
223 | data.setGlibBuffer()
224 | nullBitmap.setGlibBuffer()
225 |
226 | let length = when isBinary(T):
227 | offsets.length - 1
228 | else:
229 | data.length
230 |
231 | result = new(ArrowArray[T])
232 | result.offsets = offsets
233 | result.data = data
234 | result.nullBitmap = nullBitmap
235 | construct[T](result, length, offsets.buf, data.buf, nullBitmap.buf, nNulls)
236 |
237 | proc newEmptyArrowArray*[T](): ArrowArray[T] =
238 | ## Constructs a new empty arrow array of type T.
239 | newArrowArray[T](emptyBuffer[uint32](), emptyBuffer[T](),
240 | emptyBuffer[NullBitmapBase](), 0)
241 |
242 | proc newArrowArray*[T](data: openArray[T]): ArrowArray[T] =
243 | ## Constructs a new arrow array of type T filled with `data`. Note, this
244 | ## creates a copy of `data` into a new internal buffer. For non-copying
245 | ## array construction, use an ArrowArrayBuilder[T].
246 | let builder = newArrowArrayBuilder[T]()
247 | for x in data:
248 | builder.add x
249 |
250 | builder.build()
251 |
252 | proc newArrowArray*[T](data: openArray[Option[T]]): ArrowArray[T] =
253 | ## Constructs a new arrow array of type T filled with `data`. Treats
254 | ## `none(T)` as null. Note, this creates a copy of `data` into a new
255 | ## internal buffer. For non-copying array construction, use
256 | ## an ArrowArrayBuilder[T].
257 | let builder = newArrowArrayBuilder[T]()
258 | for x in data:
259 | builder.add x
260 |
261 | builder.build()
262 |
263 | proc len*[T](arr: ArrowArray[T]): int64 =
264 | ## Returns the length of the arrow array.
265 | arrayGetLength(arr.glibArray)
266 |
267 | proc isNullAt*[T](arr: ArrowArray[T], i: int64): bool =
268 | ## Returns true when the ith element of the array is null.
269 | arrayIsNull(arr.glibArray, i)
270 |
271 | proc toSeq*[T](arr: ArrowArray[T]): seq[T] =
272 | ## Converts the arrow array into a seq[T] (creates a copy).
273 | result = newSeqOfCap[T](arr.len)
274 | for x in arr:
275 | result.add x
276 |
277 | proc `@`*[T](arr: ArrowArray[T]): seq[T] =
278 | ## Converts the arrow array into a seq[T] (creates a copy).
279 | arr.toSeq
280 |
281 | proc `$`*[T](arr: ArrowArray[T]): string =
282 | ## Returns the string representation of the array.
283 | var err: GErrorPtr
284 | let arrString = arrayToString(arr.glibArray, err)
285 | if err != nil:
286 | result = $err.message
287 | gErrorFree(err)
288 | else:
289 | result = $arrString
290 | gfree(arrString)
291 |
292 | proc `[]`*[T](arr: ArrowArray[T], i: int64): T =
293 | ## Gets the ith element of the array. Note that null values will
294 | ## be returned as 0, so `isNullAt` should be checked first if
295 | ## the array may have null values.
296 | arr.getValue(i)
297 |
298 | proc `[]`*[T](arr: ArrowArray[T], i: int): T =
299 | ## Gets the ith element of the array. Note that null values will
300 | ## be returned as 0, so `isNullAt` should be checked first if
301 | ## the array may have null values.
302 | arr[int64(i)]
303 |
304 | proc `[]`*[T](arr: ArrowArray[T], slice: Slice[int]): ArrowArray[T] =
305 | ## Returns a slice of this array for the given range.
306 | arr[int64(slice.a) .. int64(slice.b)]
307 |
308 | proc `[]`*[T](arr: ArrowArray[T], slice: Slice[int64]): ArrowArray[T] =
309 | ## Returns a slice of this array for the given range.
310 | let length = arr.len
311 | doAssert(slice.a >= 0 and slice.a < length and slice.b >= 0 and
312 | slice.b <= length and slice.a <= slice.b)
313 |
314 | let sliceLength = slice.b - slice.a
315 | let slice = arraySlice(arr.glibArray, slice.a, sliceLength)
316 |
317 | ArrowArray[T](glibArray: slice)
318 |
319 | iterator items*[T](arr: ArrowArray[T]): T {.inline.} =
320 | ## Iterate over each element in the array.
321 | for x in arr.iter():
322 | yield x
323 |
324 | proc glibPtr*[T](arr: ArrowArray[T]): GArrowArrayPtr =
325 | ## Access the underlying glib array pointer.
326 | arr.glibArray
327 |
328 | proc newArrowArrayBuilder*[T](): ArrowArrayBuilder[T] =
329 | ## Construct a new empty array builder.
330 | ArrowArrayBuilder[T](
331 | offsets: emptyBuffer[uint32](),
332 | data: emptyBuffer[T](),
333 | nullBitmap: newBitVector[NullBitmapBase](0),
334 | nNulls: 0,
335 | valid: true)
336 |
337 | proc addNumeric[T](builder: ArrowArrayBuilder[T], x: T) =
338 | doAssert(builder.valid)
339 | builder.data.addElement(x)
340 |
341 | proc addBinary[T](builder: ArrowArrayBuilder[T], x: T) =
342 | let currentOffset = builder.data.length
343 | if x.len > 0:
344 | builder.data.addBytes(unsafeAddr x[0], x.len)
345 |
346 | builder.offsets.addElement uint32(currentOffset)
347 |
348 | proc add*[T](builder: ArrowArrayBuilder[T], x: T) =
349 | ## Add the element to the array.
350 | when isBinary(T):
351 | builder.addBinary(x)
352 | else:
353 | builder.addNumeric(x)
354 |
355 | builder.nullBitmap.add 1
356 |
357 | proc add*[T](builder: ArrowArrayBuilder[T], x: Option[T]) =
358 | ## Add a value or null to the array, none(T) is treated as null.
359 | if x.isSome:
360 | when isBinary(T):
361 | builder.addBinary(x.get)
362 | else:
363 | builder.addNumeric(x.get)
364 |
365 | builder.nullBitmap.add 1
366 |
367 | else:
368 | when isBinary(T):
369 | var empty: T
370 | builder.addBinary(empty)
371 | else:
372 | builder.addNumeric(0)
373 |
374 | builder.nullBitmap.add 0
375 | builder.nNulls += 1
376 |
377 | proc build*[T](builder: ArrowArrayBuilder[T]): ArrowArray[T] =
378 | ## Construct an arrow array from the builder's buffer. This does NOT
379 | ## create a copy of the data, and instead transfers ownership of the
380 | ## internal buffer to the array. After this is called, the builder
381 | ## is no longer valid and cannto be mutated.
382 | when isBinary(T):
383 | let currentOffset = builder.data.length
384 | builder.offsets.addElement uint32(currentOffset)
385 |
386 | let nullBitmapBuf = copyToBuffer[NullBitmapBase](builder.nullBitmap.base)
387 | result = newArrowArray[T](builder.offsets, builder.data,
388 | nullBitmapBuf, builder.nNulls)
389 | builder.valid = false
390 | builder.data = nil
391 |
392 | proc newArrowChunkedArray*[T](
393 | glibChunkedArray: GArrowChunkedArrayPtr): ArrowChunkedArray[T] =
394 | ## Construct a new chunked array from a glib chunked array pointer.
395 | let dtype = chunkedArrayGetValueDataType(glibChunkedArray)
396 | defer: gObjectUnref(dtype)
397 |
398 | let expectedDtype = getDataType(TypeTag[T]())
399 | defer: gObjectUnref(expectedDtype)
400 |
401 | doAssert dataTypeEqual(dtype, expectedDtype)
402 | ArrowChunkedArray[T](glibChunkedArray: glibChunkedArray)
403 |
404 | proc len*[T](chunkedArray: ArrowChunkedArray[T]): uint64 =
405 | ## Return the number of total elements in the array (across all chunks).
406 | chunkedArrayGetNRows(chunkedArray.glibChunkedArray)
407 |
408 | proc `$`*[T](chunkedArray: ArrowChunkedArray[T]): string =
409 | ## String representation of the chunked array.
410 | var err: GErrorPtr
411 | let asString = chunkedArrayToString(chunkedArray.glibChunkedArray, err)
412 | if err != nil:
413 | defer: gErrorFree(err)
414 | raise newException(CatchableError, $err.message)
415 | else:
416 | defer: gfree(asString)
417 | $asString
418 |
419 | proc toSeq*[T](chunkedArray: ArrowChunkedArray[T]): seq[T] =
420 | ## Converts the chunked array into a seq[T] (creates a copy).
421 | result = newSeq[T]()
422 | for i in 0'u ..< chunkedArray.chunks:
423 | result.add @(chunkedArray.chunk(i))
424 |
425 | proc `@`*[T](chunkedArray: ArrowChunkedArray[T]): seq[T] =
426 | ## Converts the chunked array into a seq[T] (creates a copy).
427 | chunkedArray.toSeq
428 |
429 | proc `[]`*[T](chunkedArray: ArrowChunkedArray[T], i: int64): T =
430 | ## Get the element in the logical array represented by the chunked
431 | ## array at index `i`.
432 | doAssert uint64(i) < chunkedArray.len
433 |
434 | # TODO: lookup table + binsearch?
435 | var c = 0'u
436 | var chunk: ArrowArray[T]
437 | var offset = 0'i64
438 | while true:
439 | chunk = chunkedArray.chunk(c)
440 | if offset + chunk.len > i:
441 | break
442 |
443 | offset += chunk.len
444 | c += 1
445 |
446 | chunk[i - offset]
447 |
448 | proc `==`*[T](a, b: ArrowChunkedArray[T]): bool =
449 | ## Compare two chunked arrays for equality.
450 | chunkedArrayEqual(a.glibChunkedArray, b.glibChunkedArray)
451 |
452 | proc chunks*[T](chunkedArray: ArrowChunkedArray[T]): uint =
453 | ## Return the number of chunks in the chunked array.
454 | chunkedArrayGetNChunks(chunkedArray.glibChunkedArray)
455 |
456 | proc chunk*[T](chunkedArray: ArrowChunkedArray[T], i: uint): ArrowArray[T] =
457 | ## Access the chunk at index `i`.
458 | doAssert i < chunkedArray.chunks
459 | let glibArray = chunkedArrayGetChunk(chunkedArray.glibChunkedArray, i)
460 | doAssert glibArray != nil
461 | ArrowArray[T](glibArray: glibArray)
462 |
463 | proc combine*[T](chunkedArray: ArrowChunkedArray[T]): ArrowArray[T] =
464 | ## Combine all of the chunks in the chunked array into a single array,
465 | ## note this creates a copy.
466 | doAssert chunkedArray.isCorrectType[T]
467 | var err: GErrorPtr
468 | let glibArray = chunkedArrayCombine(chunkedArray.glibChunkedArray, err)
469 | if err != nil:
470 | defer: gErrorFree(err)
471 | raise newException(CatchableError, $err.message)
472 |
473 | doAssert glibArray != nil
474 |
475 | ArrowArray[T](glibArray: glibArray)
476 |
477 | iterator items*[T](chunkedArray: ArrowChunkedArray[T]): T {.inline.} =
478 | ## Iterate over the all of the elements in the logical array represented
479 | ## by the chunked array.
480 | let chunks = chunkedArray.chunks
481 | for i in 0'u ..< chunks:
482 | let chunk = chunkedArray.chunk(i)
483 | for x in chunk:
484 | yield x
485 |
--------------------------------------------------------------------------------
/docs/nimarrow/bitarray.html:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | nimarrow/bitarray
21 |
22 |
23 |
24 |
25 |
60 |
61 |
62 |
63 |
64 |
65 |
nimarrow/bitarray
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
Dark Mode
74 |
75 |
76 |
77 |
78 | Index
79 |
80 |
81 |
82 |
83 | Search:
85 |
86 |
87 | Group by:
88 |
89 | Section
90 | Type
91 |
92 |
93 |
94 |
95 | Types
96 |
105 |
106 |
107 | Funcs
108 |
109 |
114 |
119 |
124 |
129 |
134 |
139 |
144 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 | Units = SomeUnsignedInt
165 |
166 |
167 |
168 |
169 |
170 |
171 | BitVector [ T ] = object
172 | base * : seq [ T ]
173 | bitlength : int
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 | func newBitVector [ T ] ( size : int ; init = 0 ) : BitVector [ T ] { ... } {. inline .}
187 |
188 |
189 | Create new in-memory BitVector of type T and number of elements is size rounded up to the nearest byte. You can initialize the bitvector to 1 by passing any value other than zero to init.
190 |
191 |
192 |
193 | func `[]` [ T ] ( b : BitVector [ T ] ; i : int ) : Bit { ... } {. inline .}
194 |
195 |
196 |
197 |
198 |
199 |
200 | func `[]=` [ T ] ( b : var BitVector [ T ] ; i : int ; value : Bit ) { ... } {. inline .}
201 |
202 |
203 |
204 |
205 |
206 |
207 | func add [ T ] ( b : var BitVector [ T ] ; value : Bit ) { ... } {. inline .}
208 |
209 |
210 | Add an element to the end of the BitVector.
211 |
212 |
213 |
214 | func cap [ T ] ( b : BitVector [ T ] ) : int { ... } {. inline .}
215 |
216 |
217 | Returns capacity, i.e number of bits
218 |
219 |
220 |
221 | func len [ T ] ( b : BitVector [ T ] ) : int { ... } {. inline .}
222 |
223 |
224 | Returns length, i.e number of elements
225 |
226 |
227 |
228 | func `==` ( x , y : BitVector ) : bool
229 |
230 |
231 |
232 |
233 |
234 |
235 | func `$` [ T ] ( b : BitVector [ T ] ) : string { ... } {. inline .}
236 |
237 |
238 | Prints number of bits and elements the BitVector is capable of handling. It also prints out a slice if specified in little endian format.
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 | Made with Nim. Generated: 2021-07-08 21:05:38 UTC
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
--------------------------------------------------------------------------------
/docs/nimarrow/parquet.html:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | nimarrow/parquet
21 |
22 |
23 |
24 |
25 |
60 |
61 |
62 |
63 |
64 |
65 |
nimarrow/parquet
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
Dark Mode
74 |
75 |
76 |
77 |
78 | Index
79 |
80 |
81 |
82 |
83 | Search:
85 |
86 |
87 | Group by:
88 |
89 | Section
90 | Type
91 |
92 |
93 |
94 |
95 | Imports
96 |
99 |
100 |
101 | Types
102 |
115 |
116 |
117 | Procs
118 |
119 |
126 |
133 |
138 |
148 |
153 |
160 |
165 |
171 |
176 |
185 |
191 |
196 |
197 |
198 |
199 |
200 | Iterators
201 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
220 |
256 |
257 |
258 |
259 |
260 | proc `=destroy` ( x : var ParquetWriterPropsObj ) { ... } {. raises : [ ] , tags : [ ] .}
261 |
262 |
263 |
264 |
265 |
266 |
267 | proc `=destroy` ( x : var ParquetWriterObj ) { ... } {. raises : [ ] , tags : [ ] .}
268 |
269 |
270 |
271 |
272 |
273 |
274 | proc `=destroy` ( x : var ParquetReaderObj ) { ... } {. raises : [ ] , tags : [ ] .}
275 |
276 |
277 |
278 |
279 |
280 |
281 | proc newParquetWriterProps ( compression : GArrowCompressionType = GARROW_COMPRESSION_TYPE_SNAPPY ;
282 | enableDictionary : bool = true ;
283 | dictionaryPageSizeLimit : Option [ int64 ] = none ( int64 ) ;
284 | batchSize : Option [ int64 ] = none ( int64 ) ;
285 | maxRowGroupLength : Option [ int64 ] = none ( int64 ) ;
286 | dataPageSize : Option [ int64 ] = none ( int64 ) ) : ParquetWriterProps { ... } {.
287 | raises : [ ] , tags : [ ] .}
288 |
289 |
290 | Construct a new parquet writer properties object, optionally overriding the default settings.
291 |
292 |
293 |
294 | proc newParquetWriter ( schema : ArrowSchema ; path : string ; props : Option [
295 | ParquetWriterProps ] = none ( ParquetWriterProps ) ) : ParquetWriter { ... } {.
296 | raises : [ IOError ] , tags : [ ] .}
297 |
298 |
299 | Construct a new parquet writer which will write to the local file at path .
300 |
301 |
302 |
303 | proc add ( w : ParquetWriter ; table : ArrowTable ) { ... } {. raises : [ IOError ] , tags : [ ] .}
304 |
305 |
306 | Add this table to the parquet file being written.
307 |
308 |
309 |
310 | proc close ( w : ParquetWriter ) { ... } {. raises : [ IOError ] , tags : [ ] .}
311 |
312 |
313 | Close the parquet file for writing. NOTE: this MUST be called when done writing or the file will not be valid! This does not simply close the file descriptor, it finalizes the file by writing the parquet footer/metadata.
314 |
315 |
316 |
317 | proc toParquet ( t : ArrowTable ; path : string ;
318 | props : Option [ ParquetWriterProps ] = none ( ParquetWriterProps ) ) { ... } {.
319 | raises : [ IOError ] , tags : [ ] .}
320 |
321 |
322 | Write this table to a parquet file on the local filesystem at path .
323 |
324 |
325 |
326 | proc fromParquet ( path : string ) : ArrowTable { ... } {. raises : [ IOError ] , tags : [ ] .}
327 |
328 |
329 | Read a parquet file from the local filesystem at path into a Table.
330 |
331 |
332 |
333 | proc newTypedParquetWriter [ T : TypeRegistered ] ( path : string ;
334 | props : Option [ ParquetWriterProps ] = none ( ParquetWriterProps ) ) : TypedParquetWriter [
335 | T ]
336 |
337 |
338 | Create a new typed parquet writer, writing to local path path .
339 |
340 |
341 |
342 | proc add [ T ] ( w : TypedParquetWriter [ T ] ; x : T )
343 |
344 |
345 | Append an element to the parquet file being written.
346 |
347 |
348 |
349 | proc close [ T ] ( w : TypedParquetWriter [ T ] )
350 |
351 |
352 | Close the parquet file for writing. NOTE: this MUST be called when done writing or the file will not be valid! This does not simply close the file descriptor, it finalizes the file by writing the parquet footer/metadata.
353 |
354 |
355 |
356 | proc newParquetReader ( path : string ; useThreads : bool = true ) : ParquetReader { ... } {.
357 | raises : [ IOError ] , tags : [ ] .}
358 |
359 |
360 | Create a new parquet reader, reading the local path path .
361 |
362 |
363 |
364 | proc rowGroups ( r : ParquetReader ) : int { ... } {. raises : [ ] , tags : [ ] .}
365 |
366 |
367 | Return the number of row groups in the file being read.
368 |
369 |
370 |
371 | proc read ( r : ParquetReader ; rowGroup : int ) : ArrowTable { ... } {. raises : [ IOError ] ,
372 | tags : [ ] .}
373 |
374 |
375 | Read the row group at index rowGroup as an ArrowTable.
376 |
377 |
378 |
379 | proc readFully ( r : ParquetReader ) : ArrowTable { ... } {. raises : [ IOError ] , tags : [ ] .}
380 |
381 |
382 | Read the entire parquet file into an ArrowTable.
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 | iterator iter ( r : ParquetReader ; T : typedesc ) : T:type { ... } {. inline .}
392 |
393 |
394 | Iterate over the file, converting the rows into the custom type T .
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 | Made with Nim. Generated: 2021-07-08 21:05:38 UTC
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
--------------------------------------------------------------------------------
/docs/nimdoc.out.css:
--------------------------------------------------------------------------------
1 | /*
2 | Stylesheet for use with Docutils/rst2html.
3 |
4 | See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
5 | customize this style sheet.
6 |
7 | Modified from Chad Skeeters' rst2html-style
8 | https://bitbucket.org/cskeeters/rst2html-style/
9 |
10 | Modified by Boyd Greenfield and narimiran
11 | */
12 |
13 | :root {
14 | --primary-background: #fff;
15 | --secondary-background: ghostwhite;
16 | --third-background: #e8e8e8;
17 | --border: #dde;
18 | --text: #222;
19 | --anchor: #07b;
20 | --anchor-focus: #607c9f;
21 | --input-focus: #1fa0eb;
22 | --strong: #3c3c3c;
23 | --hint: #9A9A9A;
24 | --nim-sprite-base64: url("");
25 |
26 | --keyword: #5e8f60;
27 | --identifier: #222;
28 | --comment: #484a86;
29 | --operator: #155da4;
30 | --punctuation: black;
31 | --other: black;
32 | --escapeSequence: #c4891b;
33 | --number: #252dbe;
34 | --literal: #a4255b;
35 | --raw-data: #a4255b;
36 | }
37 |
38 | [data-theme="dark"] {
39 | --primary-background: #171921;
40 | --secondary-background: #1e202a;
41 | --third-background: #2b2e3b;
42 | --border: #0e1014;
43 | --text: #fff;
44 | --anchor: #8be9fd;
45 | --anchor-focus: #8be9fd;
46 | --input-focus: #8be9fd;
47 | --strong: #bd93f9;
48 | --hint: #7A7C85;
49 | --nim-sprite-base64: url("");
50 |
51 | --keyword: #ff79c6;
52 | --identifier: #f8f8f2;
53 | --comment: #6272a4;
54 | --operator: #ff79c6;
55 | --punctuation: #f8f8f2;
56 | --other: #f8f8f2;
57 | --escapeSequence: #bd93f9;
58 | --number: #bd93f9;
59 | --literal: #f1fa8c;
60 | --raw-data: #8be9fd;
61 | }
62 |
63 | .theme-switch-wrapper {
64 | display: flex;
65 | align-items: center;
66 | }
67 |
68 | .theme-switch-wrapper em {
69 | margin-left: 10px;
70 | font-size: 1rem;
71 | }
72 |
73 | .theme-switch {
74 | display: inline-block;
75 | height: 22px;
76 | position: relative;
77 | width: 50px;
78 | }
79 |
80 | .theme-switch input {
81 | display: none;
82 | }
83 |
84 | .slider {
85 | background-color: #ccc;
86 | bottom: 0;
87 | cursor: pointer;
88 | left: 0;
89 | position: absolute;
90 | right: 0;
91 | top: 0;
92 | transition: .4s;
93 | }
94 |
95 | .slider:before {
96 | background-color: #fff;
97 | bottom: 4px;
98 | content: "";
99 | height: 13px;
100 | left: 4px;
101 | position: absolute;
102 | transition: .4s;
103 | width: 13px;
104 | }
105 |
106 | input:checked + .slider {
107 | background-color: #66bb6a;
108 | }
109 |
110 | input:checked + .slider:before {
111 | transform: translateX(26px);
112 | }
113 |
114 | .slider.round {
115 | border-radius: 17px;
116 | }
117 |
118 | .slider.round:before {
119 | border-radius: 50%;
120 | }
121 |
122 | html {
123 | font-size: 100%;
124 | -webkit-text-size-adjust: 100%;
125 | -ms-text-size-adjust: 100%; }
126 |
127 | body {
128 | font-family: "Lato", "Helvetica Neue", "HelveticaNeue", Helvetica, Arial, sans-serif;
129 | font-weight: 400;
130 | font-size: 1.125em;
131 | line-height: 1.5;
132 | color: var(--text);
133 | background-color: var(--primary-background); }
134 |
135 | /* Skeleton grid */
136 | .container {
137 | position: relative;
138 | width: 100%;
139 | max-width: 1050px;
140 | margin: 0 auto;
141 | padding: 0;
142 | box-sizing: border-box; }
143 |
144 | .column,
145 | .columns {
146 | width: 100%;
147 | float: left;
148 | box-sizing: border-box;
149 | margin-left: 1%;
150 | }
151 |
152 | .column:first-child,
153 | .columns:first-child {
154 | margin-left: 0; }
155 |
156 | .three.columns {
157 | width: 22%;
158 | }
159 |
160 | .nine.columns {
161 | width: 77.0%; }
162 |
163 | .twelve.columns {
164 | width: 100%;
165 | margin-left: 0; }
166 |
167 | @media screen and (max-width: 860px) {
168 | .three.columns {
169 | display: none;
170 | }
171 | .nine.columns {
172 | width: 98.0%;
173 | }
174 | body {
175 | font-size: 1em;
176 | line-height: 1.35;
177 | }
178 | }
179 |
180 | cite {
181 | font-style: italic !important; }
182 |
183 |
184 | /* Nim search input */
185 | div#searchInputDiv {
186 | margin-bottom: 1em;
187 | }
188 | input#searchInput {
189 | width: 80%;
190 | }
191 |
192 | /*
193 | * Some custom formatting for input forms.
194 | * This also fixes input form colors on Firefox with a dark system theme on Linux.
195 | */
196 | input {
197 | -moz-appearance: none;
198 | background-color: var(--secondary-background);
199 | color: var(--text);
200 | border: 1px solid var(--border);
201 | font-family: "Lato", "Helvetica Neue", "HelveticaNeue", Helvetica, Arial, sans-serif;
202 | font-size: 0.9em;
203 | padding: 6px;
204 | }
205 |
206 | input:focus {
207 | border: 1px solid var(--input-focus);
208 | box-shadow: 0 0 3px var(--input-focus);
209 | }
210 |
211 | select {
212 | -moz-appearance: none;
213 | background-color: var(--secondary-background);
214 | color: var(--text);
215 | border: 1px solid var(--border);
216 | font-family: "Lato", "Helvetica Neue", "HelveticaNeue", Helvetica, Arial, sans-serif;
217 | font-size: 0.9em;
218 | padding: 6px;
219 | }
220 |
221 | select:focus {
222 | border: 1px solid var(--input-focus);
223 | box-shadow: 0 0 3px var(--input-focus);
224 | }
225 |
226 | /* Docgen styles */
227 | /* Links */
228 | a {
229 | color: var(--anchor);
230 | text-decoration: none;
231 | }
232 |
233 | a span.Identifier {
234 | text-decoration: underline;
235 | text-decoration-color: #aab;
236 | }
237 |
238 | a.reference-toplevel {
239 | font-weight: bold;
240 | }
241 |
242 | a.toc-backref {
243 | text-decoration: none;
244 | color: var(--text); }
245 |
246 | a.link-seesrc {
247 | color: #607c9f;
248 | font-size: 0.9em;
249 | font-style: italic; }
250 |
251 | a:hover,
252 | a:focus {
253 | color: var(--anchor-focus);
254 | text-decoration: underline; }
255 |
256 | a:hover span.Identifier {
257 | color: var(--anchor);
258 | }
259 |
260 |
261 | sub,
262 | sup {
263 | position: relative;
264 | font-size: 75%;
265 | line-height: 0;
266 | vertical-align: baseline; }
267 |
268 | sup {
269 | top: -0.5em; }
270 |
271 | sub {
272 | bottom: -0.25em; }
273 |
274 | img {
275 | width: auto;
276 | height: auto;
277 | max-width: 100%;
278 | vertical-align: middle;
279 | border: 0;
280 | -ms-interpolation-mode: bicubic; }
281 |
282 | @media print {
283 | * {
284 | color: black !important;
285 | text-shadow: none !important;
286 | background: transparent !important;
287 | box-shadow: none !important; }
288 |
289 | a,
290 | a:visited {
291 | text-decoration: underline; }
292 |
293 | a[href]:after {
294 | content: " (" attr(href) ")"; }
295 |
296 | abbr[title]:after {
297 | content: " (" attr(title) ")"; }
298 |
299 | .ir a:after,
300 | a[href^="javascript:"]:after,
301 | a[href^="#"]:after {
302 | content: ""; }
303 |
304 | pre,
305 | blockquote {
306 | border: 1px solid #999;
307 | page-break-inside: avoid; }
308 |
309 | thead {
310 | display: table-header-group; }
311 |
312 | tr,
313 | img {
314 | page-break-inside: avoid; }
315 |
316 | img {
317 | max-width: 100% !important; }
318 |
319 | @page {
320 | margin: 0.5cm; }
321 |
322 | h1 {
323 | page-break-before: always; }
324 |
325 | h1.title {
326 | page-break-before: avoid; }
327 |
328 | p,
329 | h2,
330 | h3 {
331 | orphans: 3;
332 | widows: 3; }
333 |
334 | h2,
335 | h3 {
336 | page-break-after: avoid; }
337 | }
338 |
339 |
340 | p {
341 | margin-top: 0.5em;
342 | margin-bottom: 0.5em;
343 | }
344 |
345 | small {
346 | font-size: 85%; }
347 |
348 | strong {
349 | font-weight: 600;
350 | font-size: 0.95em;
351 | color: var(--strong);
352 | }
353 |
354 | em {
355 | font-style: italic; }
356 |
357 | h1 {
358 | font-size: 1.8em;
359 | font-weight: 400;
360 | padding-bottom: .25em;
361 | border-bottom: 6px solid var(--third-background);
362 | margin-top: 2.5em;
363 | margin-bottom: 1em;
364 | line-height: 1.2em; }
365 |
366 | h1.title {
367 | padding-bottom: 1em;
368 | border-bottom: 0px;
369 | font-size: 2.5em;
370 | text-align: center;
371 | font-weight: 900;
372 | margin-top: 0.75em;
373 | margin-bottom: 0em;
374 | }
375 |
376 | h2 {
377 | font-size: 1.3em;
378 | margin-top: 2em; }
379 |
380 | h2.subtitle {
381 | text-align: center; }
382 |
383 | h3 {
384 | font-size: 1.125em;
385 | font-style: italic;
386 | margin-top: 1.5em; }
387 |
388 | h4 {
389 | font-size: 1.125em;
390 | margin-top: 1em; }
391 |
392 | h5 {
393 | font-size: 1.125em;
394 | margin-top: 0.75em; }
395 |
396 | h6 {
397 | font-size: 1.1em; }
398 |
399 |
400 | ul,
401 | ol {
402 | padding: 0;
403 | margin-top: 0.5em;
404 | margin-left: 0.75em; }
405 |
406 | ul ul,
407 | ul ol,
408 | ol ol,
409 | ol ul {
410 | margin-bottom: 0;
411 | margin-left: 1.25em; }
412 |
413 | ul.simple > li {
414 | list-style-type: circle;
415 | }
416 |
417 | ul.simple-boot li {
418 | list-style-type: none;
419 | margin-left: 0em;
420 | margin-bottom: 0.5em;
421 | }
422 |
423 | ol.simple > li, ul.simple > li {
424 | margin-bottom: 0.2em;
425 | margin-left: 0.4em }
426 |
427 | ul.simple.simple-toc > li {
428 | margin-top: 1em;
429 | }
430 |
431 | ul.simple-toc {
432 | list-style: none;
433 | font-size: 0.9em;
434 | margin-left: -0.3em;
435 | margin-top: 1em; }
436 |
437 | ul.simple-toc > li {
438 | list-style-type: none;
439 | }
440 |
441 | ul.simple-toc-section {
442 | list-style-type: circle;
443 | margin-left: 0.8em;
444 | color: #6c9aae; }
445 |
446 | ul.nested-toc-section {
447 | list-style-type: circle;
448 | margin-left: -0.75em;
449 | color: var(--text);
450 | }
451 |
452 | ul.nested-toc-section > li {
453 | margin-left: 1.25em;
454 | }
455 |
456 |
457 | ol.arabic {
458 | list-style: decimal; }
459 |
460 | ol.loweralpha {
461 | list-style: lower-alpha; }
462 |
463 | ol.upperalpha {
464 | list-style: upper-alpha; }
465 |
466 | ol.lowerroman {
467 | list-style: lower-roman; }
468 |
469 | ol.upperroman {
470 | list-style: upper-roman; }
471 |
472 | ul.auto-toc {
473 | list-style-type: none; }
474 |
475 |
476 | dl {
477 | margin-bottom: 1.5em; }
478 |
479 | dt {
480 | margin-bottom: -0.5em;
481 | margin-left: 0.0em; }
482 |
483 | dd {
484 | margin-left: 2.0em;
485 | margin-bottom: 3.0em;
486 | margin-top: 0.5em; }
487 |
488 |
489 | hr {
490 | margin: 2em 0;
491 | border: 0;
492 | border-top: 1px solid #aaa; }
493 |
494 | blockquote {
495 | font-size: 0.9em;
496 | font-style: italic;
497 | padding-left: 0.5em;
498 | margin-left: 0;
499 | border-left: 5px solid #bbc;
500 | }
501 |
502 | .pre {
503 | font-family: "Source Code Pro", Monaco, Menlo, Consolas, "Courier New", monospace;
504 | font-weight: 500;
505 | font-size: 0.85em;
506 | color: var(--text);
507 | background-color: var(--third-background);
508 | padding-left: 3px;
509 | padding-right: 3px;
510 | border-radius: 4px;
511 | }
512 |
513 | pre {
514 | font-family: "Source Code Pro", Monaco, Menlo, Consolas, "Courier New", monospace;
515 | color: var(--text);
516 | font-weight: 500;
517 | display: inline-block;
518 | box-sizing: border-box;
519 | min-width: 100%;
520 | padding: 0.5em;
521 | margin-top: 0.5em;
522 | margin-bottom: 0.5em;
523 | font-size: 0.85em;
524 | white-space: pre !important;
525 | overflow-y: hidden;
526 | overflow-x: visible;
527 | background-color: var(--secondary-background);
528 | border: 1px solid var(--border);
529 | -webkit-border-radius: 6px;
530 | -moz-border-radius: 6px;
531 | border-radius: 6px; }
532 |
533 | .pre-scrollable {
534 | max-height: 340px;
535 | overflow-y: scroll; }
536 |
537 |
538 | /* Nim line-numbered tables */
539 | .line-nums-table {
540 | width: 100%;
541 | table-layout: fixed; }
542 |
543 | table.line-nums-table {
544 | border-radius: 4px;
545 | border: 1px solid #cccccc;
546 | background-color: ghostwhite;
547 | border-collapse: separate;
548 | margin-top: 15px;
549 | margin-bottom: 25px; }
550 |
551 | .line-nums-table tbody {
552 | border: none; }
553 |
554 | .line-nums-table td pre {
555 | border: none;
556 | background-color: transparent; }
557 |
558 | .line-nums-table td.blob-line-nums {
559 | width: 28px; }
560 |
561 | .line-nums-table td.blob-line-nums pre {
562 | color: #b0b0b0;
563 | -webkit-filter: opacity(75%);
564 | text-align: right;
565 | border-color: transparent;
566 | background-color: transparent;
567 | padding-left: 0px;
568 | margin-left: 0px;
569 | padding-right: 0px;
570 | margin-right: 0px; }
571 |
572 |
573 | table {
574 | max-width: 100%;
575 | background-color: transparent;
576 | margin-top: 0.5em;
577 | margin-bottom: 1.5em;
578 | border-collapse: collapse;
579 | border-color: var(--third-background);
580 | border-spacing: 0;
581 | font-size: 0.9em;
582 | }
583 |
584 | table th, table td {
585 | padding: 0px 0.5em 0px;
586 | border-color: var(--third-background);
587 | }
588 |
589 | table th {
590 | background-color: var(--third-background);
591 | border-color: var(--third-background);
592 | font-weight: bold; }
593 |
594 | table th.docinfo-name {
595 | background-color: transparent;
596 | text-align: right;
597 | }
598 |
599 | table tr:hover {
600 | background-color: var(--third-background); }
601 |
602 |
603 | /* rst2html default used to remove borders from tables and images */
604 | .borderless, table.borderless td, table.borderless th {
605 | border: 0; }
606 |
607 | table.borderless td, table.borderless th {
608 | /* Override padding for "table.docutils td" with "! important".
609 | The right padding separates the table cells. */
610 | padding: 0 0.5em 0 0 !important; }
611 |
612 | .first {
613 | /* Override more specific margin styles with "! important". */
614 | margin-top: 0 !important; }
615 |
616 | .last, .with-subtitle {
617 | margin-bottom: 0 !important; }
618 |
619 | .hidden {
620 | display: none; }
621 |
622 | blockquote.epigraph {
623 | margin: 2em 5em; }
624 |
625 | dl.docutils dd {
626 | margin-bottom: 0.5em; }
627 |
628 | object[type="image/svg+xml"], object[type="application/x-shockwave-flash"] {
629 | overflow: hidden; }
630 |
631 |
632 | div.figure {
633 | margin-left: 2em;
634 | margin-right: 2em; }
635 |
636 | div.footer, div.header {
637 | clear: both;
638 | text-align: center;
639 | color: #666;
640 | font-size: smaller; }
641 |
642 | div.footer {
643 | padding-top: 5em;
644 | }
645 |
646 | div.line-block {
647 | display: block;
648 | margin-top: 1em;
649 | margin-bottom: 1em; }
650 |
651 | div.line-block div.line-block {
652 | margin-top: 0;
653 | margin-bottom: 0;
654 | margin-left: 1.5em; }
655 |
656 | div.topic {
657 | margin: 2em; }
658 |
659 | div.search_results {
660 | background-color: var(--third-background);
661 | margin: 3em;
662 | padding: 1em;
663 | border: 1px solid #4d4d4d;
664 | }
665 |
666 | div#global-links ul {
667 | margin-left: 0;
668 | list-style-type: none;
669 | }
670 |
671 | div#global-links > simple-boot {
672 | margin-left: 3em;
673 | }
674 |
675 | hr.docutils {
676 | width: 75%; }
677 |
678 | img.align-left, .figure.align-left, object.align-left {
679 | clear: left;
680 | float: left;
681 | margin-right: 1em; }
682 |
683 | img.align-right, .figure.align-right, object.align-right {
684 | clear: right;
685 | float: right;
686 | margin-left: 1em; }
687 |
688 | img.align-center, .figure.align-center, object.align-center {
689 | display: block;
690 | margin-left: auto;
691 | margin-right: auto; }
692 |
693 | .align-left {
694 | text-align: left; }
695 |
696 | .align-center {
697 | clear: both;
698 | text-align: center; }
699 |
700 | .align-right {
701 | text-align: right; }
702 |
703 | /* reset inner alignment in figures */
704 | div.align-right {
705 | text-align: inherit; }
706 |
707 | p.attribution {
708 | text-align: right;
709 | margin-left: 50%; }
710 |
711 | p.caption {
712 | font-style: italic; }
713 |
714 | p.credits {
715 | font-style: italic;
716 | font-size: smaller; }
717 |
718 | p.label {
719 | white-space: nowrap; }
720 |
721 | p.rubric {
722 | font-weight: bold;
723 | font-size: larger;
724 | color: maroon;
725 | text-align: center; }
726 |
727 | p.topic-title {
728 | font-weight: bold; }
729 |
730 | pre.address {
731 | margin-bottom: 0;
732 | margin-top: 0;
733 | font: inherit; }
734 |
735 | pre.literal-block, pre.doctest-block, pre.math, pre.code {
736 | margin-left: 2em;
737 | margin-right: 2em; }
738 |
739 | pre.code .ln {
740 | color: grey; }
741 |
742 | /* line numbers */
743 | pre.code, code {
744 | background-color: #eeeeee; }
745 |
746 | pre.code .comment, code .comment {
747 | color: #5c6576; }
748 |
749 | pre.code .keyword, code .keyword {
750 | color: #3B0D06;
751 | font-weight: bold; }
752 |
753 | pre.code .literal.string, code .literal.string {
754 | color: #0c5404; }
755 |
756 | pre.code .name.builtin, code .name.builtin {
757 | color: #352b84; }
758 |
759 | pre.code .deleted, code .deleted {
760 | background-color: #DEB0A1; }
761 |
762 | pre.code .inserted, code .inserted {
763 | background-color: #A3D289; }
764 |
765 | span.classifier {
766 | font-style: oblique; }
767 |
768 | span.classifier-delimiter {
769 | font-weight: bold; }
770 |
771 | span.option {
772 | white-space: nowrap; }
773 |
774 | span.problematic {
775 | color: #b30000; }
776 |
777 | span.section-subtitle {
778 | /* font-size relative to parent (h1..h6 element) */
779 | font-size: 80%; }
780 |
781 | span.DecNumber {
782 | color: var(--number); }
783 |
784 | span.BinNumber {
785 | color: var(--number); }
786 |
787 | span.HexNumber {
788 | color: var(--number); }
789 |
790 | span.OctNumber {
791 | color: var(--number); }
792 |
793 | span.FloatNumber {
794 | color: var(--number); }
795 |
796 | span.Identifier {
797 | color: var(--identifier); }
798 |
799 | span.Keyword {
800 | font-weight: 600;
801 | color: var(--keyword); }
802 |
803 | span.StringLit {
804 | color: var(--literal); }
805 |
806 | span.LongStringLit {
807 | color: var(--literal); }
808 |
809 | span.CharLit {
810 | color: var(--literal); }
811 |
812 | span.EscapeSequence {
813 | color: var(--escapeSequence); }
814 |
815 | span.Operator {
816 | color: var(--operator); }
817 |
818 | span.Punctuation {
819 | color: var(--punctuation); }
820 |
821 | span.Comment, span.LongComment {
822 | font-style: italic;
823 | font-weight: 400;
824 | color: var(--comment); }
825 |
826 | span.RegularExpression {
827 | color: darkviolet; }
828 |
829 | span.TagStart {
830 | color: darkviolet; }
831 |
832 | span.TagEnd {
833 | color: darkviolet; }
834 |
835 | span.Key {
836 | color: #252dbe; }
837 |
838 | span.Value {
839 | color: #252dbe; }
840 |
841 | span.RawData {
842 | color: var(--raw-data); }
843 |
844 | span.Assembler {
845 | color: #252dbe; }
846 |
847 | span.Preprocessor {
848 | color: #252dbe; }
849 |
850 | span.Directive {
851 | color: #252dbe; }
852 |
853 | span.Command, span.Rule, span.Hyperlink, span.Label, span.Reference,
854 | span.Other {
855 | color: var(--other); }
856 |
857 | /* Pop type, const, proc, and iterator defs in nim def blocks */
858 | dt pre > span.Identifier, dt pre > span.Operator {
859 | color: var(--identifier);
860 | font-weight: 700; }
861 |
862 | dt pre > span.Keyword ~ span.Identifier, dt pre > span.Identifier ~ span.Identifier,
863 | dt pre > span.Operator ~ span.Identifier, dt pre > span.Other ~ span.Identifier {
864 | color: var(--identifier);
865 | font-weight: inherit; }
866 |
867 | /* Nim sprite for the footer (taken from main page favicon) */
868 | .nim-sprite {
869 | display: inline-block;
870 | width: 51px;
871 | height: 14px;
872 | background-position: 0 0;
873 | background-size: 51px 14px;
874 | -webkit-filter: opacity(50%);
875 | background-repeat: no-repeat;
876 | background-image: var(--nim-sprite-base64);
877 | margin-bottom: 5px; }
878 |
879 | span.pragmadots {
880 | /* Position: relative frees us up to make the dots
881 | look really nice without fucking up the layout and
882 | causing bulging in the parent container */
883 | position: relative;
884 | /* 1px down looks slightly nicer */
885 | top: 1px;
886 | padding: 2px;
887 | background-color: var(--third-background);
888 | border-radius: 4px;
889 | margin: 0 2px;
890 | cursor: pointer;
891 | font-size: 0.8em;
892 | }
893 |
894 | span.pragmadots:hover {
895 | background-color: var(--hint);
896 | }
897 | span.pragmawrap {
898 | display: none;
899 | }
900 |
901 | span.attachedType {
902 | display: none;
903 | visibility: hidden;
904 | }
905 |
--------------------------------------------------------------------------------
/docs/nimarrow/tables.html:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | nimarrow/tables
21 |
22 |
23 |
24 |
25 |
60 |
61 |
62 |
63 |
64 |
65 |
nimarrow/tables
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
Dark Mode
74 |
75 |
76 |
77 |
78 | Index
79 |
80 |
81 |
82 |
83 | Search:
85 |
86 |
87 | Group by:
88 |
89 | Section
90 | Type
91 |
92 |
93 |
94 |
95 | Imports
96 |
99 |
100 |
101 | Types
102 |
126 |
127 |
128 | Procs
129 |
130 |
135 |
140 |
145 |
152 |
157 |
162 |
169 |
178 |
183 |
188 |
197 |
202 |
209 |
210 |
211 |
212 |
213 | Macros
214 |
219 |
220 |
221 | Templates
222 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
An ArrowTable is an ordered collection of named arrays (columns). Each column name and type is described by its ArrowField, and an ArrowSchema describes all of the columns in a table.
240 |
To construct a table, we use an ArrowTableBuilder which is constructed with the intended schema. Each column's data must then be added to the builder in the order specified by the schema. Creating a table does not copy any of the column data, it will share the internal buffers of the arrays used to construct it.
241 |
242 |
243 |
244 |
245 | arrays
246 |
247 |
304 |
305 |
306 |
307 |
308 | proc `=destroy` ( x : var ArrowFieldObj ) { ... } {. raises : [ ] , tags : [ ] .}
309 |
310 |
311 |
312 |
313 |
314 |
315 | proc `=destroy` ( x : var ArrowSchemaObj ) { ... } {. raises : [ ] , tags : [ ] .}
316 |
317 |
318 |
319 |
320 |
321 |
322 | proc `=destroy` ( x : var ArrowTableObj ) { ... } {. raises : [ ] , tags : [ ] .}
323 |
324 |
325 |
326 |
327 |
328 |
329 | proc newArrowField ( name : string ; T : typedesc ) : ArrowField
330 |
331 |
332 | Create a new field of type T named name .
333 |
334 |
335 |
336 | proc glibPtr ( field : ArrowField ) : GArrowFieldPtr { ... } {. raises : [ ] , tags : [ ] .}
337 |
338 |
339 | Access the underlying glib field pointer.
340 |
341 |
342 |
343 | proc newArrowSchema ( fields : openArray [ ArrowField ] ) : ArrowSchema { ... } {. raises : [ ] ,
344 | tags : [ ] .}
345 |
346 |
347 | Construct a new schema from a sequence of fields.
348 |
349 |
350 |
351 | proc newArrowSchema ( glibSchema : GArrowSchemaPtr ) : ArrowSchema { ... } {. raises : [ ] ,
352 | tags : [ ] .}
353 |
354 |
355 | Construct an ArrowSchema from a glib schema pointer. NOTE: this takes ownership of the pointer and does not increase the refcount.
356 |
357 |
358 |
359 | proc glibPtr ( schema : ArrowSchema ) : GArrowSchemaPtr { ... } {. raises : [ ] , tags : [ ] .}
360 |
361 |
362 | Access the underlying glib schema pointer.
363 |
364 |
365 |
366 | proc newArrowTable ( schema : ArrowSchema ; glibTable : GArrowTablePtr ) : ArrowTable { ... } {.
367 | raises : [ ] , tags : [ ] .}
368 |
369 |
370 | Construct an ArrowTable from schema and glib table pointer. NOTE: this takes ownership of the pointer and does not increase the refcount.
371 |
372 |
373 |
374 | proc glibPtr ( table : ArrowTable ) : GArrowTablePtr { ... } {. raises : [ ] , tags : [ ] .}
375 |
376 |
377 | Access the underlying glib table pointer.
378 |
379 |
380 |
381 | proc len ( table : ArrowTable ) : uint64 { ... } {. raises : [ ] , tags : [ ] .}
382 |
383 |
384 | Get the length (number of rows) of the table.
385 |
386 |
387 |
388 | proc `$` ( table : ArrowTable ) : string { ... } {. raises : [ ValueError ] , tags : [ ] .}
389 |
390 |
391 | String representation of the table's schema and full contents.
392 |
393 |
394 |
395 | proc `==` ( table , other : ArrowTable ) : bool { ... } {. raises : [ ] , tags : [ ] .}
396 |
397 |
398 |
399 |
400 |
401 |
402 | proc schema ( table : ArrowTable ) : ArrowSchema { ... } {. raises : [ ] , tags : [ ] .}
403 |
404 |
405 |
406 |
407 |
408 |
409 | proc col ( table : ArrowTable ; T : typedesc ; i : int ) : ArrowChunkedArray [ T ]
410 |
411 |
412 |
413 |
414 |
415 |
416 | proc col ( table : ArrowTable ; T : typedesc ; name : string ) : ArrowChunkedArray [ T ]
417 |
418 |
419 |
420 |
421 |
422 |
423 | proc newArrowTableBuilder ( schema : ArrowSchema ) : ArrowTableBuilder { ... } {. raises : [ ] ,
424 | tags : [ ] .}
425 |
426 |
427 | Construct a new table builder for a given schema. Each column specified in the schema must be added using add in order.
428 |
429 |
430 |
431 | proc add [ T ] ( b : ArrowTableBuilder ; arr : ArrowArray [ T ] )
432 |
433 |
434 | Add the next column to the builder, must be of the correct type specified in the schema.
435 |
436 |
437 |
438 | proc build ( b : ArrowTableBuilder ) : ArrowTable { ... } {. raises : [ ValueError ] , tags : [ ] .}
439 |
440 |
441 | Build the table, invalidating this builder.
442 |
443 |
444 |
445 | proc add [ T ] ( typedBuilder : TypedBuilder [ T ] ; ts : openArray [ T ] )
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 | macro registerTypedTable ( typ : typedesc ) : untyped
458 |
459 |
460 | Macro which registers a type to be used in the "Typed" API.
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 | template newArrowTable ( T : typedesc ; ts : openArray [ T:type ] ) : ArrowTable
470 |
471 |
472 |
473 |
474 |
475 |
476 | template newTypedBuilder ( T : typedesc ) : TypedBuilder [ T ]
477 |
478 |
479 |
480 |
481 |
482 |
483 | template iter ( tbl : ArrowTable ; T : typedesc ) : untyped
484 |
485 |
486 | Iterate over the ArrowTable, converting rows into the type T .
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 | Made with Nim. Generated: 2021-07-08 21:05:38 UTC
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
--------------------------------------------------------------------------------