├── .gitignore ├── matrix.go ├── columninterface.go ├── dataset_bench_test.go ├── records_test.go ├── record_test.go ├── row_test.go ├── Makefile ├── clasetinterface.go ├── maprows_test.go ├── records.go ├── columns_test.go ├── column_test.go ├── maprows.go ├── LICENSE ├── tabula_test.go ├── tabula.go ├── row.go ├── columns.go ├── rows_test.go ├── README.md ├── rows.go ├── record.go ├── column.go ├── claset.go ├── dataset_test.go ├── datasetinterface.go └── dataset.go /.gitignore: -------------------------------------------------------------------------------- 1 | cover.html 2 | cover.out 3 | *.bench 4 | *.prof 5 | *.test 6 | -------------------------------------------------------------------------------- /matrix.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | // 8 | // Matrix is a combination of columns and rows. 9 | // 10 | type Matrix struct { 11 | Columns *Columns 12 | Rows *Rows 13 | } 14 | -------------------------------------------------------------------------------- /columninterface.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | // 8 | // ColumnInterface define an interface for working with Column. 9 | // 10 | type ColumnInterface interface { 11 | SetType(tipe int) 12 | SetName(name string) 13 | 14 | GetType() int 15 | GetName() string 16 | 17 | SetRecords(recs *Records) 18 | 19 | Interface() interface{} 20 | } 21 | -------------------------------------------------------------------------------- /dataset_bench_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula_test 6 | 7 | import ( 8 | "github.com/shuLhan/tabula" 9 | "testing" 10 | ) 11 | 12 | func BenchmarkPushRow(b *testing.B) { 13 | dataset := tabula.NewDataset(tabula.DatasetModeRows, nil, nil) 14 | 15 | for i := 0; i < b.N; i++ { 16 | e := populateWithRows(dataset) 17 | if e != nil { 18 | b.Fatal(e) 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /records_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula_test 6 | 7 | import ( 8 | "fmt" 9 | "github.com/shuLhan/tabula" 10 | "testing" 11 | ) 12 | 13 | func TestSortByIndex(t *testing.T) { 14 | data := make(tabula.Records, 3) 15 | data[0] = tabula.NewRecordInt(3) 16 | data[1] = tabula.NewRecordInt(2) 17 | data[2] = tabula.NewRecordInt(1) 18 | 19 | sortedIdx := []int{2, 1, 0} 20 | expect := []int{1, 2, 3} 21 | 22 | sorted := data.SortByIndex(sortedIdx) 23 | 24 | got := fmt.Sprint(sorted) 25 | exp := fmt.Sprint(&expect) 26 | 27 | assert(t, exp, got, true) 28 | } 29 | -------------------------------------------------------------------------------- /record_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula_test 6 | 7 | import ( 8 | "fmt" 9 | "github.com/shuLhan/tabula" 10 | "testing" 11 | ) 12 | 13 | // 14 | // TestRecord simply check how the stringer work. 15 | // 16 | func TestRecord(t *testing.T) { 17 | expec := []string{"test", "1", "2"} 18 | expType := []int{tabula.TString, tabula.TInteger, tabula.TInteger} 19 | 20 | row := make(tabula.Row, 0) 21 | 22 | for i := range expec { 23 | r, e := tabula.NewRecordBy(expec[i], expType[i]) 24 | if nil != e { 25 | t.Error(e) 26 | } 27 | 28 | row = append(row, r) 29 | } 30 | 31 | exp := fmt.Sprint(expec) 32 | got := fmt.Sprint(row) 33 | assert(t, exp, got, true) 34 | } 35 | -------------------------------------------------------------------------------- /row_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula_test 6 | 7 | import ( 8 | "github.com/shuLhan/tabula" 9 | "testing" 10 | ) 11 | 12 | var dataFloat64 = []float64{0.1, 0.2, 0.3, 0.4, 0.5} 13 | 14 | func createRow() (row tabula.Row) { 15 | for _, v := range dataFloat64 { 16 | row.PushBack(tabula.NewRecordReal(v)) 17 | } 18 | return 19 | } 20 | 21 | func TestClone(t *testing.T) { 22 | row := createRow() 23 | rowClone := row.Clone() 24 | rowClone2 := row.Clone() 25 | 26 | assert(t, &row, rowClone, true) 27 | 28 | // changing the clone value should not change the original copy. 29 | (*rowClone2)[0].SetFloat(0) 30 | assert(t, &row, rowClone, true) 31 | assert(t, &row, rowClone2, false) 32 | } 33 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #!/bin/make 2 | 3 | ## Copyright 2017 M. Shulhan . All rights reserved. 4 | ## Use of this source code is governed by a BSD-style license that can be found 5 | ## in the LICENSE file. 6 | 7 | SRC_FILES :=$(shell go list -f '{{ join .GoFiles " " }}') 8 | TEST_FILES :=$(shell go list -f '{{ join .TestGoFiles " " }}') 9 | XTEST_FILES :=$(shell go list -f '{{ join .XTestGoFiles " " }}') 10 | COVER_OUT :=cover.out 11 | COVER_HTML :=cover.html 12 | TARGET :=$(shell go list -f '{{ .Target }}') 13 | 14 | .PHONY: all clean coverbrowse 15 | 16 | all: ${TARGET} 17 | 18 | ${TARGET}: ${COVER_HTML} 19 | go install -a . 20 | 21 | ${COVER_HTML}: ${COVER_OUT} 22 | go tool cover -html=$< -o $@ 23 | 24 | ${COVER_OUT}: ${SRC_FILES} ${TEST_FILES} ${XTEST_FILES} 25 | go test -v -coverprofile $@ 26 | 27 | coverbrowse: ${COVER_HTML} 28 | xdg-open $< 29 | 30 | clean: 31 | rm -f ${COVER_HTML} ${COVER_OUT} *.bench *.prof *.test 32 | -------------------------------------------------------------------------------- /clasetinterface.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | // 8 | // ClasetInterface is the interface for working with dataset containing class 9 | // or target attribute. It embed dataset interface. 10 | // 11 | // Yes, the name is Claset with single `s` not Classset with triple `s` to 12 | // minimize typo. 13 | // 14 | type ClasetInterface interface { 15 | DatasetInterface 16 | 17 | GetClassType() int 18 | GetClassValueSpace() []string 19 | GetClassColumn() *Column 20 | GetClassRecords() *Records 21 | GetClassAsStrings() []string 22 | GetClassAsReals() []float64 23 | GetClassIndex() int 24 | MajorityClass() string 25 | MinorityClass() string 26 | Counts() []int 27 | 28 | SetDataset(DatasetInterface) 29 | SetClassIndex(int) 30 | SetMajorityClass(string) 31 | SetMinorityClass(string) 32 | 33 | CountValueSpaces() 34 | RecountMajorMinor() 35 | IsInSingleClass() (bool, string) 36 | 37 | GetMinorityRows() *Rows 38 | } 39 | -------------------------------------------------------------------------------- /maprows_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula_test 6 | 7 | import ( 8 | "fmt" 9 | "github.com/shuLhan/tabula" 10 | "testing" 11 | ) 12 | 13 | func TestAddRow(t *testing.T) { 14 | mapRows := tabula.MapRows{} 15 | rows, e := initRows() 16 | 17 | if e != nil { 18 | t.Fatal(e) 19 | } 20 | 21 | for _, row := range rows { 22 | key := fmt.Sprint((*row)[testClassIdx].Interface()) 23 | mapRows.AddRow(key, row) 24 | } 25 | 26 | got := fmt.Sprint(mapRows) 27 | 28 | assert(t, groupByExpect, got, true) 29 | } 30 | 31 | func TestGetMinority(t *testing.T) { 32 | mapRows := tabula.MapRows{} 33 | rows, e := initRows() 34 | 35 | if e != nil { 36 | t.Fatal(e) 37 | } 38 | 39 | for _, row := range rows { 40 | key := fmt.Sprint((*row)[testClassIdx].Interface()) 41 | mapRows.AddRow(key, row) 42 | } 43 | 44 | // remove the first row in the first key, so we can make it minority. 45 | mapRows[0].Value.PopFront() 46 | 47 | _, minRows := mapRows.GetMinority() 48 | 49 | exp := rowsExpect[3] 50 | got := fmt.Sprint(minRows) 51 | 52 | assert(t, exp, got, true) 53 | } 54 | -------------------------------------------------------------------------------- /records.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | // 8 | // Records define slice of pointer to Record. 9 | // 10 | type Records []*Record 11 | 12 | // 13 | // Len will return the length of records. 14 | // 15 | func (recs *Records) Len() int { 16 | return len(*recs) 17 | } 18 | 19 | // 20 | // SortByIndex will sort the records using slice of index `sortedIDx` and 21 | // return it. 22 | // 23 | func (recs *Records) SortByIndex(sortedIdx []int) *Records { 24 | sorted := make(Records, len(*recs)) 25 | 26 | for x, v := range sortedIdx { 27 | sorted[x] = (*recs)[v] 28 | } 29 | return &sorted 30 | } 31 | 32 | // 33 | // CountWhere return number of record where its value is equal to `v` type and 34 | // value. 35 | // 36 | func (recs *Records) CountWhere(v interface{}) (c int) { 37 | for _, r := range *recs { 38 | if r.IsEqualToInterface(v) { 39 | c++ 40 | } 41 | } 42 | return 43 | } 44 | 45 | // 46 | // CountsWhere will return count of each value in slice `sv`. 47 | // 48 | func (recs *Records) CountsWhere(vs []interface{}) (counts []int) { 49 | for _, v := range vs { 50 | c := recs.CountWhere(v) 51 | counts = append(counts, c) 52 | } 53 | return 54 | } 55 | -------------------------------------------------------------------------------- /columns_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula_test 6 | 7 | import ( 8 | "github.com/shuLhan/tabula" 9 | "testing" 10 | ) 11 | 12 | func TestRandomPickColumns(t *testing.T) { 13 | var dataset tabula.Dataset 14 | var e error 15 | 16 | dataset.Init(tabula.DatasetModeRows, testColTypes, testColNames) 17 | 18 | dataset.Rows, e = initRows() 19 | if e != nil { 20 | t.Fatal(e) 21 | } 22 | 23 | dataset.TransposeToColumns() 24 | 25 | // random pick with duplicate 26 | ncols := 6 27 | dup := true 28 | excludeIdx := []int{3} 29 | 30 | for i := 0; i < 5; i++ { 31 | picked, unpicked, _, _ := 32 | dataset.Columns.RandomPick(ncols, dup, excludeIdx) 33 | 34 | // check if unpicked item exist in picked items. 35 | for _, un := range unpicked { 36 | for _, pick := range picked { 37 | assert(t, un, pick, false) 38 | } 39 | } 40 | } 41 | 42 | // random pick without duplicate 43 | dup = false 44 | for i := 0; i < 5; i++ { 45 | picked, unpicked, _, _ := 46 | dataset.Columns.RandomPick(ncols, dup, excludeIdx) 47 | 48 | // check if unpicked item exist in picked items. 49 | for _, un := range unpicked { 50 | for _, pick := range picked { 51 | assert(t, un, pick, false) 52 | } 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /column_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula_test 6 | 7 | import ( 8 | "github.com/shuLhan/tabula" 9 | "testing" 10 | ) 11 | 12 | var data = []string{"9.987654321", "8.8", "7.7", "6.6", "5.5", "4.4", "3.3"} 13 | var expFloat = []float64{9.987654321, 8.8, 7.7, 6.6, 5.5, 4.4, 3.3} 14 | 15 | func initColReal(t *testing.T) (col *tabula.Column) { 16 | col = tabula.NewColumn(tabula.TReal, "TREAL") 17 | 18 | for x := range data { 19 | rec, e := tabula.NewRecordBy(data[x], tabula.TReal) 20 | if e != nil { 21 | t.Fatal(e) 22 | } 23 | 24 | col.PushBack(rec) 25 | } 26 | 27 | return col 28 | } 29 | 30 | func TestToFloatSlice(t *testing.T) { 31 | col := initColReal(t) 32 | got := col.ToFloatSlice() 33 | 34 | assert(t, expFloat, got, true) 35 | } 36 | 37 | func TestToStringSlice(t *testing.T) { 38 | var col tabula.Column 39 | 40 | for x := range data { 41 | rec, e := tabula.NewRecordBy(data[x], tabula.TString) 42 | if e != nil { 43 | t.Fatal(e) 44 | } 45 | 46 | col.PushBack(rec) 47 | } 48 | 49 | got := col.ToStringSlice() 50 | 51 | assert(t, data, got, true) 52 | } 53 | 54 | func TestDeleteRecordAt(t *testing.T) { 55 | var exp []float64 56 | del := 2 57 | 58 | exp = append(exp, expFloat[:del]...) 59 | exp = append(exp, expFloat[del+1:]...) 60 | 61 | col := initColReal(t) 62 | col.DeleteRecordAt(del) 63 | got := col.ToFloatSlice() 64 | 65 | assert(t, exp, got, true) 66 | } 67 | -------------------------------------------------------------------------------- /maprows.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | import ( 8 | "math" 9 | ) 10 | 11 | // 12 | // MapRowsElement represent a single mapping of string key to rows. 13 | // 14 | type MapRowsElement struct { 15 | Key string 16 | Value Rows 17 | } 18 | 19 | // 20 | // MapRows represent a list of mapping between string key and rows. 21 | // 22 | type MapRows []MapRowsElement 23 | 24 | // 25 | // insertRow will insert a row `v` into map using key `k`. 26 | // 27 | func (mapRows *MapRows) insertRow(k string, v *Row) { 28 | rows := Rows{} 29 | rows.PushBack(v) 30 | el := MapRowsElement{k, rows} 31 | (*mapRows) = append((*mapRows), el) 32 | } 33 | 34 | // 35 | // AddRow will append a row `v` into map value if they key `k` exist in map, 36 | // otherwise it will insert a new map element. 37 | // 38 | func (mapRows *MapRows) AddRow(k string, v *Row) { 39 | for x := range *mapRows { 40 | if (*mapRows)[x].Key == k { 41 | (*mapRows)[x].Value.PushBack(v) 42 | return 43 | } 44 | } 45 | // no key found on map 46 | mapRows.insertRow(k, v) 47 | } 48 | 49 | // 50 | // GetMinority return map value which contain the minimum rows. 51 | // 52 | func (mapRows *MapRows) GetMinority() (keyMin string, valMin Rows) { 53 | min := math.MaxInt32 54 | 55 | for k := range *mapRows { 56 | v := (*mapRows)[k].Value 57 | l := len(v) 58 | if l < min { 59 | keyMin = (*mapRows)[k].Key 60 | valMin = v 61 | min = l 62 | } 63 | } 64 | return 65 | } 66 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017, M. Shulhan (ms@kilabit.info). 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of copyright holder nor the names of its contributors may be 15 | used to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | --- --- --- --- --- --- --- 30 | 31 | TT TT II BB AAAA LLLLLL II KKKKKKKK 32 | TT TT II BB AA AA LL LL II KK 33 | TTTT II BB AA AA LL LL II KK 34 | TT TT II BB AAAAAAAA LLLLLL II KK 35 | TT TT II BB AA AA LL LL II KK 36 | TT TT II BBBBBBBB AA AA LLLLLL II KK 37 | 38 | Website: http://kilabit.info 39 | Contact: ms@kilabit.info 40 | -------------------------------------------------------------------------------- /tabula_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula_test 6 | 7 | import ( 8 | "os" 9 | "reflect" 10 | "runtime" 11 | "testing" 12 | 13 | "github.com/shuLhan/tabula" 14 | ) 15 | 16 | var ( 17 | traces = make([]byte, 1024) 18 | ) 19 | 20 | func printStackTrace() { 21 | var lines, start, end int 22 | 23 | for x, b := range traces { 24 | if b != '\n' { 25 | continue 26 | } 27 | lines++ 28 | if lines == 3 { 29 | start = x 30 | } else if lines == 5 { 31 | end = x + 1 32 | break 33 | } 34 | } 35 | 36 | os.Stderr.Write(traces[start:end]) 37 | } 38 | 39 | func assert(t *testing.T, exp, got interface{}, equal bool) { 40 | if reflect.DeepEqual(exp, got) != equal { 41 | runtime.Stack(traces, true) 42 | printStackTrace() 43 | t.Fatalf("\n"+ 44 | ">>> Expecting '%v'\n"+ 45 | " got '%v'\n", exp, got) 46 | } 47 | } 48 | 49 | var testColTypes = []int{ 50 | tabula.TInteger, 51 | tabula.TInteger, 52 | tabula.TInteger, 53 | tabula.TString, 54 | } 55 | 56 | var testColNames = []string{"int01", "int02", "int03", "class"} 57 | 58 | // Testing data and function for Rows and MapRows 59 | var rowsData = [][]string{ 60 | {"1", "5", "9", "+"}, 61 | {"2", "6", "0", "-"}, 62 | {"3", "7", "1", "-"}, 63 | {"4", "8", "2", "+"}, 64 | } 65 | 66 | var testClassIdx = 3 67 | 68 | var rowsExpect = []string{ 69 | "&[1 5 9 +]", 70 | "&[2 6 0 -]", 71 | "&[3 7 1 -]", 72 | "&[4 8 2 +]", 73 | } 74 | 75 | var groupByExpect = "[{+ &[1 5 9 +]&[4 8 2 +]} {- &[2 6 0 -]&[3 7 1 -]}]" 76 | 77 | func initRows() (rows tabula.Rows, e error) { 78 | for i := range rowsData { 79 | l := len(rowsData[i]) 80 | row := make(tabula.Row, 0) 81 | 82 | for j := 0; j < l; j++ { 83 | rec, e := tabula.NewRecordBy(rowsData[i][j], 84 | testColTypes[j]) 85 | 86 | if nil != e { 87 | return nil, e 88 | } 89 | 90 | row = append(row, rec) 91 | } 92 | 93 | rows.PushBack(&row) 94 | } 95 | return rows, nil 96 | } 97 | -------------------------------------------------------------------------------- /tabula.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | // 6 | // Package tabula is a Go library for working with rows, columns, or matrix 7 | // (table), or in another terms working with data set. 8 | // 9 | // # Overview 10 | // 11 | // Go's slice gave a flexible way to manage sequence of data in one type, but what 12 | // if you want to manage a sequence of value but with different type of data? 13 | // Or manage a bunch of values like a table? 14 | // 15 | // You can use this library to manage sequence of value with different type 16 | // and manage data in two dimensional tuple. 17 | // 18 | // ## Terminology 19 | // 20 | // Here are some terminologies that we used in developing this library, which may 21 | // help reader understand the internal and API. 22 | // 23 | // Record is a single cell in row or column, or the smallest building block of 24 | // dataset. 25 | // 26 | // Row is a horizontal representation of records in dataset. 27 | // 28 | // Column is a vertical representation of records in dataset. 29 | // Each column has a unique name and has the same type data. 30 | // 31 | // Dataset is a collection of rows and columns. 32 | // 33 | // Given those definitions we can draw the representation of rows, columns, or 34 | // matrix: 35 | // 36 | // COL-0 COL-1 ... COL-x 37 | // ROW-0: record record ... record 38 | // ROW-1: record record ... record 39 | // ... 40 | // ROW-y: record record ... record 41 | // 42 | // ## Record Type 43 | // 44 | // There are only three valid type in record: int64, float64, and string. 45 | // 46 | // ## Dataset Mode 47 | // 48 | // Tabula has three mode for dataset: rows, columns, or matrix. 49 | // 50 | // For example, given a table of data, 51 | // 52 | // col1,col2,col3 53 | // a,b,c 54 | // 1,2,3 55 | // 56 | // "rows" mode is where each line saved in its own slice, resulting in Rows: 57 | // 58 | // Rows[0]: [a b c] 59 | // Rows[1]: [1 2 3] 60 | // 61 | // "columns" mode is where each line saved by columns, resulting in Columns: 62 | // 63 | // Columns[0]: {col1 0 0 [] [a 1]} 64 | // Columns[1]: {col2 0 0 [] [b 2]} 65 | // Columns[1]: {col3 0 0 [] [c 3]} 66 | // 67 | // Unlike rows mode, each column contain metadata including column name, type, 68 | // flag, and value space (all possible value that _may_ contain in column value). 69 | // 70 | // "matrix" mode is where each record saved both in row and column. 71 | // 72 | // Matrix mode consume more memory but give a flexible way to manage records. 73 | // 74 | // 75 | package tabula 76 | 77 | import ( 78 | "os" 79 | "strconv" 80 | ) 81 | 82 | var ( 83 | // DEBUG debug level, set using environment TABULA_DEBUG 84 | DEBUG = 0 85 | ) 86 | 87 | func init() { 88 | var e error 89 | DEBUG, e = strconv.Atoi(os.Getenv("TABULA_DEBUG")) 90 | if e != nil { 91 | DEBUG = 0 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /row.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | // 8 | // Row represent slice of record. 9 | // 10 | type Row []*Record 11 | 12 | // 13 | // Len return number of record in row. 14 | // 15 | func (row *Row) Len() int { 16 | return len(*row) 17 | } 18 | 19 | // 20 | // PushBack will add new record to the end of row. 21 | // 22 | func (row *Row) PushBack(r *Record) { 23 | *row = append(*row, r) 24 | } 25 | 26 | // 27 | // Types return type of all records. 28 | // 29 | func (row *Row) Types() (types []int) { 30 | for _, r := range *row { 31 | types = append(types, r.Type()) 32 | } 33 | return 34 | } 35 | 36 | // 37 | // Clone create and return a clone of row. 38 | // 39 | func (row *Row) Clone() *Row { 40 | clone := make(Row, len(*row)) 41 | 42 | for x, rec := range *row { 43 | clone[x] = rec.Clone() 44 | } 45 | return &clone 46 | } 47 | 48 | // 49 | // IsNilAt return true if there is no record value in row at `idx`, otherwise 50 | // return false. 51 | // 52 | func (row *Row) IsNilAt(idx int) bool { 53 | if idx < 0 { 54 | return true 55 | } 56 | if idx >= len(*row) { 57 | return true 58 | } 59 | if (*row)[idx] == nil { 60 | return true 61 | } 62 | return (*row)[idx].IsNil() 63 | } 64 | 65 | // 66 | // SetValueAt will set the value of row at cell index `idx` with record `rec`. 67 | // 68 | func (row *Row) SetValueAt(idx int, rec *Record) { 69 | (*row)[idx] = rec 70 | } 71 | 72 | // 73 | // GetRecord will return pointer to record at index `i`, or nil if index 74 | // is out of range. 75 | // 76 | func (row *Row) GetRecord(i int) *Record { 77 | if i < 0 { 78 | return nil 79 | } 80 | if i >= row.Len() { 81 | return nil 82 | } 83 | return (*row)[i] 84 | } 85 | 86 | // 87 | // GetValueAt return the value of row record at index `idx`. If the index is 88 | // out of range it will return nil and false 89 | // 90 | func (row *Row) GetValueAt(idx int) (interface{}, bool) { 91 | if row.Len() <= idx { 92 | return nil, false 93 | } 94 | return (*row)[idx].Interface(), true 95 | } 96 | 97 | // 98 | // GetIntAt return the integer value of row record at index `idx`. 99 | // If the index is out of range it will return 0 and false. 100 | // 101 | func (row *Row) GetIntAt(idx int) (int64, bool) { 102 | if row.Len() <= idx { 103 | return 0, false 104 | } 105 | 106 | return (*row)[idx].Integer(), true 107 | } 108 | 109 | // 110 | // IsEqual return true if row content equal with `other` row, otherwise return 111 | // false. 112 | // 113 | func (row *Row) IsEqual(other *Row) bool { 114 | if len(*row) != len(*other) { 115 | return false 116 | } 117 | for x, xrec := range *row { 118 | if !xrec.IsEqual((*other)[x]) { 119 | return false 120 | } 121 | } 122 | return true 123 | } 124 | -------------------------------------------------------------------------------- /columns.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | import ( 8 | "github.com/shuLhan/numerus" 9 | "github.com/shuLhan/tekstus" 10 | ) 11 | 12 | // 13 | // Columns represent slice of Column. 14 | // 15 | type Columns []Column 16 | 17 | // 18 | // Len return length of columns. 19 | // 20 | func (cols *Columns) Len() int { 21 | return len(*cols) 22 | } 23 | 24 | // 25 | // Reset each data and attribute in all columns. 26 | // 27 | func (cols *Columns) Reset() { 28 | for x := range *cols { 29 | (*cols)[x].Reset() 30 | } 31 | } 32 | 33 | // 34 | // SetTypes of each column. The length of type must be equal with the number of 35 | // column, otherwise it will used the minimum length between types or columns. 36 | // 37 | func (cols *Columns) SetTypes(types []int) { 38 | typeslen := len(types) 39 | colslen := len(*cols) 40 | minlen := typeslen 41 | 42 | if colslen < minlen { 43 | minlen = colslen 44 | } 45 | 46 | for x := 0; x < minlen; x++ { 47 | (*cols)[x].Type = types[x] 48 | } 49 | } 50 | 51 | // 52 | // RandomPick column in columns until n item and return it like its has been 53 | // shuffled. If duplicate is true, column that has been picked can be picked up 54 | // again, otherwise it will only picked up once. 55 | // 56 | // This function return picked and unpicked column and index of them. 57 | // 58 | func (cols *Columns) RandomPick(n int, dup bool, excludeIdx []int) ( 59 | picked Columns, 60 | unpicked Columns, 61 | pickedIdx []int, 62 | unpickedIdx []int, 63 | ) { 64 | excLen := len(excludeIdx) 65 | colsLen := len(*cols) 66 | allowedLen := colsLen - excLen 67 | 68 | // if duplication is not allowed, limit the number of selected 69 | // column. 70 | if n > allowedLen && !dup { 71 | n = allowedLen 72 | } 73 | 74 | for ; n >= 1; n-- { 75 | idx := numerus.IntPickRandPositive(colsLen, dup, pickedIdx, 76 | excludeIdx) 77 | 78 | pickedIdx = append(pickedIdx, idx) 79 | picked = append(picked, (*cols)[idx]) 80 | } 81 | 82 | // select unpicked columns using picked index. 83 | for cid := range *cols { 84 | // check if column index has been picked up 85 | isPicked := false 86 | for _, idx := range pickedIdx { 87 | if cid == idx { 88 | isPicked = true 89 | break 90 | } 91 | } 92 | if !isPicked { 93 | unpicked = append(unpicked, (*cols)[cid]) 94 | unpickedIdx = append(unpickedIdx, cid) 95 | } 96 | } 97 | 98 | return 99 | } 100 | 101 | // 102 | // GetMinMaxLength given a slice of column, find the minimum and maximum column 103 | // length among them. 104 | // 105 | func (cols *Columns) GetMinMaxLength() (min, max int) { 106 | for _, col := range *cols { 107 | collen := col.Len() 108 | if collen < min { 109 | min = collen 110 | } else if collen > max { 111 | max = collen 112 | } 113 | } 114 | return 115 | } 116 | 117 | // 118 | // Join all column records value at index `row` using separator `sep` and make 119 | // sure if there is a separator in value it will be escaped with `esc`. 120 | // 121 | // Given slice of columns, where row is 1 and sep is `,` and escape is `\` 122 | // 123 | // 0 1 2 124 | // 0 A B C 125 | // 1 D , F <- row 126 | // 2 G H I 127 | // 128 | // this function will return "D,\,,F" in bytes. 129 | // 130 | // 131 | func (cols *Columns) Join(row int, sep, esc []byte) (v []byte) { 132 | for y, col := range *cols { 133 | if y > 0 { 134 | v = append(v, sep...) 135 | } 136 | 137 | rec := col.Records[row] 138 | recV := rec.Bytes() 139 | 140 | if rec.Type() == TString { 141 | recV, _ = tekstus.BytesEncapsulate(sep, recV, esc, nil) 142 | } 143 | 144 | v = append(v, recV...) 145 | } 146 | return 147 | } 148 | -------------------------------------------------------------------------------- /rows_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula_test 6 | 7 | import ( 8 | "fmt" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | func TestPushBack(t *testing.T) { 14 | rows, e := initRows() 15 | if e != nil { 16 | t.Fatal(e) 17 | } 18 | 19 | exp := strings.Join(rowsExpect, "") 20 | got := fmt.Sprint(rows) 21 | 22 | assert(t, exp, got, true) 23 | } 24 | 25 | func TestPopFront(t *testing.T) { 26 | rows, e := initRows() 27 | if e != nil { 28 | t.Fatal(e) 29 | } 30 | 31 | l := len(rows) - 1 32 | for i := range rows { 33 | row := rows.PopFront() 34 | 35 | exp := rowsExpect[i] 36 | got := fmt.Sprint(row) 37 | 38 | assert(t, exp, got, true) 39 | 40 | if i < l { 41 | exp = strings.Join(rowsExpect[i+1:], "") 42 | } else { 43 | exp = "" 44 | } 45 | got = fmt.Sprint(rows) 46 | 47 | assert(t, exp, got, true) 48 | } 49 | 50 | // empty rows 51 | row := rows.PopFront() 52 | 53 | exp := "" 54 | got := fmt.Sprint(row) 55 | 56 | assert(t, exp, got, true) 57 | } 58 | 59 | func TestPopFrontRow(t *testing.T) { 60 | rows, e := initRows() 61 | if e != nil { 62 | t.Fatal(e) 63 | } 64 | 65 | l := len(rows) - 1 66 | for i := range rows { 67 | newRows := rows.PopFrontAsRows() 68 | 69 | exp := rowsExpect[i] 70 | got := fmt.Sprint(newRows) 71 | 72 | assert(t, exp, got, true) 73 | 74 | if i < l { 75 | exp = strings.Join(rowsExpect[i+1:], "") 76 | } else { 77 | exp = "" 78 | } 79 | got = fmt.Sprint(rows) 80 | 81 | assert(t, exp, got, true) 82 | } 83 | 84 | // empty rows 85 | row := rows.PopFrontAsRows() 86 | 87 | exp := "" 88 | got := fmt.Sprint(row) 89 | 90 | assert(t, exp, got, true) 91 | } 92 | 93 | func TestGroupByValue(t *testing.T) { 94 | rows, e := initRows() 95 | if e != nil { 96 | t.Fatal(e) 97 | } 98 | 99 | mapRows := rows.GroupByValue(testClassIdx) 100 | 101 | got := fmt.Sprint(mapRows) 102 | 103 | assert(t, groupByExpect, got, true) 104 | } 105 | 106 | func TestRandomPick(t *testing.T) { 107 | rows, e := initRows() 108 | if e != nil { 109 | t.Fatal(e) 110 | } 111 | 112 | // random pick with duplicate 113 | for i := 0; i < 5; i++ { 114 | picked, unpicked, pickedIdx, unpickedIdx := rows.RandomPick(6, 115 | true) 116 | 117 | // check if unpicked item exist in picked items. 118 | isin, _ := picked.Contains(unpicked) 119 | 120 | if isin { 121 | fmt.Println("Random pick with duplicate rows") 122 | fmt.Println("==> picked rows :", picked) 123 | fmt.Println("==> picked idx :", pickedIdx) 124 | fmt.Println("==> unpicked rows :", unpicked) 125 | fmt.Println("==> unpicked idx :", unpickedIdx) 126 | t.Fatal("random pick: unpicked is false") 127 | } 128 | } 129 | 130 | // random pick without duplication 131 | for i := 0; i < 5; i++ { 132 | picked, unpicked, pickedIdx, unpickedIdx := rows.RandomPick(3, 133 | false) 134 | 135 | // check if picked rows is duplicate 136 | assert(t, picked[0], picked[1], false) 137 | 138 | // check if unpicked item exist in picked items. 139 | isin, _ := picked.Contains(unpicked) 140 | 141 | if isin { 142 | fmt.Println("Random pick with no duplicate rows") 143 | fmt.Println("==> picked rows :", picked) 144 | fmt.Println("==> picked idx :", pickedIdx) 145 | fmt.Println("==> unpicked rows :", unpicked) 146 | fmt.Println("==> unpicked idx :", unpickedIdx) 147 | t.Fatal("random pick: unpicked is false") 148 | } 149 | } 150 | } 151 | 152 | func TestRowsDel(t *testing.T) { 153 | rows, e := initRows() 154 | if e != nil { 155 | t.Fatal(e) 156 | } 157 | 158 | // Test deleting row index out of range. 159 | row := rows.Del(-1) 160 | if row != nil { 161 | t.Fatal("row should be nil!") 162 | } 163 | 164 | row = rows.Del(rows.Len()) 165 | if row != nil { 166 | t.Fatal("row should be nil!") 167 | } 168 | 169 | // Test deleting index that is actually exist. 170 | row = rows.Del(0) 171 | 172 | exp := strings.Join(rowsExpect[1:], "") 173 | got := fmt.Sprint(rows) 174 | 175 | assert(t, exp, got, true) 176 | 177 | got = fmt.Sprint(row) 178 | assert(t, rowsExpect[0], got, true) 179 | } 180 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![GoDoc](https://godoc.org/github.com/shuLhan/tabula?status.svg)](https://godoc.org/github.com/shuLhan/tabula) 2 | [![Go Report Card](https://goreportcard.com/badge/github.com/shuLhan/tabula)](https://goreportcard.com/report/github.com/shuLhan/tabula) 3 | ![cover.run go](https://cover.run/go/github.com/shuLhan/tabula.svg) 4 | 5 | Package tabula is a Go library for working with rows, columns, or matrix 6 | (table), or in another terms working with data set. 7 | 8 | NOTE: This package has been deprecated. See 9 | https://github.com/shuLhan/share/tree/master/lib/tabula for latest implementation. 10 | 11 | # Overview 12 | 13 | Go's slice gave a flexible way to manage sequence of data in one type, but what 14 | if you want to manage a sequence of value but with different type of data? 15 | Or manage a bunch of values like a table? 16 | 17 | You can use this library to manage sequence of value with different type 18 | and manage data in two dimensional tuple. 19 | 20 | ## Terminology 21 | 22 | Here are some terminologies that we used in developing this library, which may 23 | help reader understand the internal and API. 24 | 25 | Record is a single cell in row or column, or the smallest building block of 26 | dataset. 27 | 28 | Row is a horizontal representation of records in dataset. 29 | 30 | Column is a vertical representation of records in dataset. 31 | Each column has a unique name and has the same type data. 32 | 33 | Dataset is a collection of rows and columns. 34 | 35 | Given those definitions we can draw the representation of rows, columns, or 36 | matrix: 37 | 38 | COL-0 COL-1 ... COL-x 39 | ROW-0: record record ... record 40 | ROW-1: record record ... record 41 | ... 42 | ROW-y: record record ... record 43 | 44 | ## What make this package different from other dataset packages? 45 | 46 | ### Record Type 47 | 48 | There are only three valid type in record: int64, float64, and string. 49 | 50 | Each record is a pointer to interface value. Which means, 51 | 52 | - Switching between rows to columns mode, or vice versa, is only a matter of 53 | pointer switching, no memory relocations. 54 | - When using matrix mode, additional memory is used only to allocate slice, the 55 | record in each rows and columns is shared. 56 | 57 | ### Dataset Mode 58 | 59 | Tabula has three mode for dataset: rows, columns, or matrix. 60 | 61 | For example, given a table of data, 62 | 63 | col1,col2,col3 64 | a,b,c 65 | 1,2,3 66 | 67 | - When in "rows" mode, each line is saved in its own slice, resulting in Rows: 68 | 69 | ``` 70 | Rows[0]: [a b c] 71 | Rows[1]: [1 2 3] 72 | ``` 73 | 74 | Columns is used only to save record metadata: column name, type, flag and 75 | value space. 76 | 77 | - When in "columns" mode, each line saved in columns, resulting in Columns: 78 | 79 | ``` 80 | Columns[0]: {col1 0 0 [] [a 1]} 81 | Columns[1]: {col2 0 0 [] [b 2]} 82 | Columns[1]: {col3 0 0 [] [c 3]} 83 | ``` 84 | 85 | Each column will contain metadata including column name, type, flag, and 86 | value space (all possible value that _may_ contain in column value). 87 | 88 | Rows in "columns" mode is empty. 89 | 90 | - When in "matrix" mode, each record is saved both in row and column using 91 | shared pointer to record. 92 | 93 | Matrix mode consume more memory by allocating two slice in rows and columns, 94 | but give flexible way to manage records. 95 | 96 | ## Features 97 | 98 | - **Switching between rows and columns mode**. 99 | 100 | - [**Random pick rows with or without replacement**](https://godoc.org/github.com/shuLhan/tabula#RandomPickRows). 101 | 102 | - [**Random pick columns with or without replacement**](https://godoc.org/github.com/shuLhan/tabula#RandomPickColumns). 103 | 104 | - [**Select column from dataset by index**](https://godoc.org/github.com/shuLhan/tabula#SelectColumnsByIdx). 105 | 106 | - [**Sort columns by index**](https://godoc.org/github.com/shuLhan/tabula#SortColumnsByIndex), 107 | or indirect sort. 108 | 109 | - [**Split rows value by numeric**](https://godoc.org/github.com/shuLhan/tabula#SplitRowsByNumeric). 110 | For example, given two numeric rows, 111 | 112 | ``` 113 | A: {1,2,3,4} 114 | B: {5,6,7,8} 115 | ``` 116 | 117 | if we split row by value 7, the data will splitted into left set 118 | 119 | ``` 120 | A': {1,2} 121 | B': {5,6} 122 | ``` 123 | 124 | and the right set would be 125 | 126 | ``` 127 | A'': {3,4} 128 | B'': {7,8} 129 | ``` 130 | 131 | - [**Split rows by string**](https://godoc.org/github.com/shuLhan/tabula#SplitRowsByCategorical). 132 | For example, given two rows, 133 | 134 | ``` 135 | X: [A,B,A,B,C,D,C,D] 136 | Y: [1,2,3,4,5,6,7,8] 137 | ``` 138 | 139 | if we split the rows with value set `[A,C]`, the data will splitted into left 140 | set which contain all rows that have A or C, 141 | 142 | ``` 143 | X': [A,A,C,C] 144 | Y': [1,3,5,7] 145 | ``` 146 | 147 | and the right set, excluded set, will contain all rows which is not A or C, 148 | 149 | ``` 150 | X'': [B,B,D,D] 151 | Y'': [2,4,6,8] 152 | ``` 153 | 154 | - [**Select row where**](https://godoc.org/github.com/shuLhan/tabula#SelectRowsWhere). 155 | Select row at column index x where their value is equal to y (an analogy to 156 | _select where_ in SQL). 157 | For example, given a rows of dataset, 158 | ``` 159 | ROW-1: {1,A} 160 | ROW-2: {2,B} 161 | ROW-3: {3,A} 162 | ROW-4: {4,C} 163 | ``` 164 | we can select row where the second column contain 'A', which result in, 165 | ``` 166 | ROW-1: {1,A} 167 | ROW-3: {3,A} 168 | ``` 169 | -------------------------------------------------------------------------------- /rows.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | import ( 8 | "fmt" 9 | "math/rand" 10 | "time" 11 | ) 12 | 13 | // 14 | // Rows represent slice of Row. 15 | // 16 | type Rows []*Row 17 | 18 | // 19 | // Len return number of row. 20 | // 21 | func (rows *Rows) Len() int { 22 | return len(*rows) 23 | } 24 | 25 | // 26 | // PushBack append record r to the end of rows. 27 | // 28 | func (rows *Rows) PushBack(r *Row) { 29 | if r != nil { 30 | (*rows) = append((*rows), r) 31 | } 32 | } 33 | 34 | // 35 | // PopFront remove the head, return the record value. 36 | // 37 | func (rows *Rows) PopFront() (row *Row) { 38 | l := len(*rows) 39 | if l > 0 { 40 | row = (*rows)[0] 41 | (*rows) = (*rows)[1:] 42 | } 43 | return 44 | } 45 | 46 | // 47 | // PopFrontAsRows remove the head and return ex-head as new rows. 48 | // 49 | func (rows *Rows) PopFrontAsRows() (newRows Rows) { 50 | row := rows.PopFront() 51 | if nil == row { 52 | return 53 | } 54 | newRows.PushBack(row) 55 | return 56 | } 57 | 58 | // 59 | // Del will detach row at index `i` from slice and return it. 60 | // 61 | func (rows *Rows) Del(i int) (row *Row) { 62 | if i < 0 { 63 | return 64 | } 65 | if i >= rows.Len() { 66 | return 67 | } 68 | 69 | row = (*rows)[i] 70 | 71 | last := len(*rows) - 1 72 | copy((*rows)[i:], (*rows)[i+1:]) 73 | (*rows)[last] = nil 74 | (*rows) = (*rows)[0:last] 75 | 76 | return row 77 | } 78 | 79 | // 80 | // GroupByValue will group each row based on record value in index recGroupIdx 81 | // into map of string -> *Row. 82 | // 83 | // WARNING: returned rows will be empty! 84 | // 85 | // For example, given rows with target group in column index 1, 86 | // 87 | // [1 +] 88 | // [2 -] 89 | // [3 -] 90 | // [4 +] 91 | // 92 | // this function will create a map with key is string of target and value is 93 | // pointer to sub-rows, 94 | // 95 | // + -> [1 +] 96 | // [4 +] 97 | // - -> [2 -] 98 | // [3 -] 99 | // 100 | // 101 | func (rows *Rows) GroupByValue(GroupIdx int) (mapRows MapRows) { 102 | for { 103 | row := rows.PopFront() 104 | if nil == row { 105 | break 106 | } 107 | 108 | key := fmt.Sprint((*row)[GroupIdx]) 109 | 110 | mapRows.AddRow(key, row) 111 | } 112 | return 113 | } 114 | 115 | // 116 | // RandomPick row in rows until n item and return it like its has been shuffled. 117 | // If duplicate is true, row that has been picked can be picked up again, 118 | // otherwise it will only picked up once. 119 | // 120 | // This function return picked and unpicked rows and index of them. 121 | // 122 | func (rows *Rows) RandomPick(n int, duplicate bool) ( 123 | picked Rows, 124 | unpicked Rows, 125 | pickedIdx []int, 126 | unpickedIdx []int, 127 | ) { 128 | rowsLen := len(*rows) 129 | 130 | // if duplication is not allowed, we can only select as many as rows 131 | // that we have. 132 | if n > rowsLen && !duplicate { 133 | n = rowsLen 134 | } 135 | 136 | rand.Seed(time.Now().UnixNano()) 137 | 138 | for ; n >= 1; n-- { 139 | idx := 0 140 | for { 141 | idx = rand.Intn(len(*rows)) 142 | 143 | if duplicate { 144 | // allow duplicate idx 145 | pickedIdx = append(pickedIdx, idx) 146 | break 147 | } 148 | 149 | // check if its already picked 150 | isPicked := false 151 | for _, pastIdx := range pickedIdx { 152 | if idx == pastIdx { 153 | isPicked = true 154 | break 155 | } 156 | } 157 | // get another random idx again 158 | if isPicked { 159 | continue 160 | } 161 | 162 | // bingo, we found unique idx that has not been picked. 163 | pickedIdx = append(pickedIdx, idx) 164 | break 165 | } 166 | 167 | row := (*rows)[idx] 168 | 169 | picked.PushBack(row) 170 | } 171 | 172 | // select unpicked rows using picked index. 173 | for rid := range *rows { 174 | // check if row index has been picked up 175 | isPicked := false 176 | for _, idx := range pickedIdx { 177 | if rid == idx { 178 | isPicked = true 179 | break 180 | } 181 | } 182 | if !isPicked { 183 | unpicked.PushBack((*rows)[rid]) 184 | unpickedIdx = append(unpickedIdx, rid) 185 | } 186 | } 187 | return 188 | } 189 | 190 | // 191 | // Contain return true and index of row, if rows has data that has the same value 192 | // with `row`, otherwise return false and -1 as index. 193 | // 194 | func (rows *Rows) Contain(xrow *Row) (bool, int) { 195 | for x, row := range *rows { 196 | if xrow.IsEqual(row) { 197 | return true, x 198 | } 199 | } 200 | return false, -1 201 | } 202 | 203 | // 204 | // Contains return true and indices of row, if rows has data that has the same 205 | // value with `rows`, otherwise return false and empty indices. 206 | // 207 | func (rows *Rows) Contains(xrows Rows) (isin bool, indices []int) { 208 | // No data to compare. 209 | if len(xrows) <= 0 { 210 | return 211 | } 212 | 213 | for _, xrow := range xrows { 214 | isin, idx := rows.Contain(xrow) 215 | 216 | if isin { 217 | indices = append(indices, idx) 218 | } 219 | } 220 | 221 | // Check if indices length equal to searched rows 222 | if len(indices) == len(xrows) { 223 | return true, indices 224 | } 225 | 226 | return false, nil 227 | } 228 | 229 | // 230 | // SelectWhere return all rows which column value in `colidx` is equal 231 | // to `colval`. 232 | // 233 | func (rows *Rows) SelectWhere(colidx int, colval string) (selected Rows) { 234 | for _, row := range *rows { 235 | col := (*row)[colidx] 236 | if col.IsEqualToString(colval) { 237 | selected.PushBack(row) 238 | } 239 | } 240 | return 241 | } 242 | 243 | // 244 | // String return the string representation of each row. 245 | // 246 | func (rows Rows) String() (s string) { 247 | for x := range rows { 248 | s += fmt.Sprint(rows[x]) 249 | } 250 | return 251 | } 252 | -------------------------------------------------------------------------------- /record.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | import ( 8 | "math" 9 | "reflect" 10 | "strconv" 11 | ) 12 | 13 | const ( 14 | // TUndefined for undefined type 15 | TUndefined = -1 16 | // TString string type. 17 | TString = 0 18 | // TInteger integer type (64 bit). 19 | TInteger = 1 20 | // TReal float type (64 bit). 21 | TReal = 2 22 | ) 23 | 24 | // 25 | // Record represent the smallest building block of data-set. 26 | // 27 | type Record struct { 28 | v interface{} 29 | } 30 | 31 | // 32 | // NewRecord will create and return record with nil value. 33 | // 34 | func NewRecord() *Record { 35 | return &Record{v: nil} 36 | } 37 | 38 | // 39 | // NewRecordBy create new record from string with type set to `t`. 40 | // 41 | func NewRecordBy(v string, t int) (r *Record, e error) { 42 | r = NewRecord() 43 | e = r.SetValue(v, t) 44 | return 45 | } 46 | 47 | // 48 | // NewRecordString will create new record from string. 49 | // 50 | func NewRecordString(v string) (r *Record) { 51 | return &Record{v: v} 52 | } 53 | 54 | // 55 | // NewRecordInt create new record from integer value. 56 | // 57 | func NewRecordInt(v int64) (r *Record) { 58 | return &Record{v: v} 59 | } 60 | 61 | // 62 | // NewRecordReal create new record from float value. 63 | // 64 | func NewRecordReal(v float64) (r *Record) { 65 | return &Record{v: v} 66 | } 67 | 68 | // 69 | // Clone will create and return a clone of record. 70 | // 71 | func (r *Record) Clone() *Record { 72 | return &Record{v: r.v} 73 | } 74 | 75 | // 76 | // IsNil return true if record has not been set with value, or nil. 77 | // 78 | func (r *Record) IsNil() bool { 79 | return r.v == nil 80 | } 81 | 82 | // 83 | // Type of record. 84 | // 85 | func (r *Record) Type() int { 86 | switch r.v.(type) { 87 | case int64: 88 | return TInteger 89 | case float64: 90 | return TReal 91 | } 92 | return TString 93 | } 94 | 95 | // 96 | // SetValue set the record value from string using type `t`. If value can not 97 | // be converted to type, it will return an error. 98 | // 99 | func (r *Record) SetValue(v string, t int) error { 100 | switch t { 101 | case TString: 102 | r.v = v 103 | 104 | case TInteger: 105 | i64, e := strconv.ParseInt(v, 10, 64) 106 | if nil != e { 107 | return e 108 | } 109 | 110 | r.v = i64 111 | 112 | case TReal: 113 | f64, e := strconv.ParseFloat(v, 64) 114 | if nil != e { 115 | return e 116 | } 117 | 118 | r.v = f64 119 | } 120 | return nil 121 | } 122 | 123 | // 124 | // SetString will set the record value with string value. 125 | // 126 | func (r *Record) SetString(v string) { 127 | r.v = v 128 | } 129 | 130 | // 131 | // SetFloat will set the record value with float 64bit. 132 | // 133 | func (r *Record) SetFloat(v float64) { 134 | r.v = v 135 | } 136 | 137 | // 138 | // SetInteger will set the record value with integer 64bit. 139 | // 140 | func (r *Record) SetInteger(v int64) { 141 | r.v = v 142 | } 143 | 144 | // 145 | // IsMissingValue check wether the value is a missing attribute. 146 | // 147 | // If its string the missing value is indicated by character '?'. 148 | // 149 | // If its integer the missing value is indicated by minimum negative integer, 150 | // or math.MinInt64. 151 | // 152 | // If its real the missing value is indicated by -Inf. 153 | // 154 | func (r *Record) IsMissingValue() bool { 155 | switch r.v.(type) { 156 | case string: 157 | str := r.v.(string) 158 | if str == "?" { 159 | return true 160 | } 161 | 162 | case int64: 163 | i64 := r.v.(int64) 164 | if i64 == math.MinInt64 { 165 | return true 166 | } 167 | 168 | case float64: 169 | f64 := r.v.(float64) 170 | return math.IsInf(f64, -1) 171 | } 172 | 173 | return false 174 | } 175 | 176 | // 177 | // Interface return record value as interface. 178 | // 179 | func (r *Record) Interface() interface{} { 180 | return r.v 181 | } 182 | 183 | // 184 | // Bytes convert record value to slice of byte. 185 | // 186 | func (r *Record) Bytes() []byte { 187 | return []byte(r.String()) 188 | } 189 | 190 | // 191 | // String convert record value to string. 192 | // 193 | func (r Record) String() (s string) { 194 | switch r.v.(type) { 195 | case string: 196 | s = r.v.(string) 197 | 198 | case int64: 199 | s = strconv.FormatInt(r.v.(int64), 10) 200 | 201 | case float64: 202 | s = strconv.FormatFloat(r.v.(float64), 'f', -1, 64) 203 | } 204 | return 205 | } 206 | 207 | // 208 | // Float convert given record to float value. If its failed it will return 209 | // the -Infinity value. 210 | // 211 | func (r *Record) Float() (f64 float64) { 212 | var e error 213 | 214 | switch r.v.(type) { 215 | case string: 216 | f64, e = strconv.ParseFloat(r.v.(string), 64) 217 | 218 | if nil != e { 219 | f64 = math.Inf(-1) 220 | } 221 | 222 | case int64: 223 | f64 = float64(r.v.(int64)) 224 | 225 | case float64: 226 | f64 = r.v.(float64) 227 | } 228 | 229 | return 230 | } 231 | 232 | // 233 | // Integer convert given record to integer value. If its failed, it will return 234 | // the minimum integer in 64bit. 235 | // 236 | func (r *Record) Integer() (i64 int64) { 237 | var e error 238 | 239 | switch r.v.(type) { 240 | case string: 241 | i64, e = strconv.ParseInt(r.v.(string), 10, 64) 242 | 243 | if nil != e { 244 | i64 = math.MinInt64 245 | } 246 | 247 | case int64: 248 | i64 = r.v.(int64) 249 | 250 | case float64: 251 | i64 = int64(r.v.(float64)) 252 | } 253 | 254 | return 255 | } 256 | 257 | // 258 | // IsEqual return true if record is equal with other, otherwise return false. 259 | // 260 | func (r *Record) IsEqual(o *Record) bool { 261 | return reflect.DeepEqual(r.v, o.Interface()) 262 | } 263 | 264 | // 265 | // IsEqualToString return true if string representation of record value is 266 | // equal to string `v`. 267 | // 268 | func (r *Record) IsEqualToString(v string) bool { 269 | return r.String() == v 270 | } 271 | 272 | // 273 | // IsEqualToInterface return true if interface type and value equal to record 274 | // type and value. 275 | // 276 | func (r *Record) IsEqualToInterface(v interface{}) bool { 277 | return reflect.DeepEqual(r.v, v) 278 | } 279 | 280 | // 281 | // Reset will reset record value to empty string or zero, depend on type. 282 | // 283 | func (r *Record) Reset() { 284 | switch r.v.(type) { 285 | case string: 286 | r.v = "" 287 | case int64: 288 | r.v = int64(0) 289 | case float64: 290 | r.v = float64(0) 291 | } 292 | } 293 | -------------------------------------------------------------------------------- /column.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | import ( 8 | "strconv" 9 | ) 10 | 11 | // 12 | // Column represent slice of record. A vertical representation of data. 13 | // 14 | type Column struct { 15 | // Name of column. String identifier for the column. 16 | Name string 17 | // Type of column. All record in column have the same type. 18 | Type int 19 | // Flag additional attribute that can be set to mark some value on this 20 | // column 21 | Flag int 22 | // ValueSpace contain the possible value in records 23 | ValueSpace []string 24 | // Records contain column data. 25 | Records Records 26 | } 27 | 28 | // 29 | // NewColumn return new column with type and name. 30 | // 31 | func NewColumn(colType int, colName string) (col *Column) { 32 | col = &Column{ 33 | Type: colType, 34 | Name: colName, 35 | Flag: 0, 36 | } 37 | 38 | col.Records = make([]*Record, 0) 39 | 40 | return 41 | } 42 | 43 | // 44 | // NewColumnString initialize column with type anda data as string. 45 | // 46 | func NewColumnString(data []string, colType int, colName string) ( 47 | col *Column, 48 | e error, 49 | ) { 50 | col = NewColumn(colType, colName) 51 | 52 | datalen := len(data) 53 | 54 | if datalen <= 0 { 55 | return 56 | } 57 | 58 | col.Records = make([]*Record, datalen) 59 | 60 | for x := 0; x < datalen; x++ { 61 | col.Records[x] = NewRecordString(data[x]) 62 | } 63 | 64 | return col, nil 65 | } 66 | 67 | // 68 | // NewColumnInt create new column with record type as integer, and fill it 69 | // with `data`. 70 | // 71 | func NewColumnInt(data []int64, colName string) (col *Column) { 72 | col = NewColumn(TInteger, colName) 73 | 74 | datalen := len(data) 75 | if datalen <= 0 { 76 | return 77 | } 78 | 79 | col.Records = make([]*Record, datalen) 80 | 81 | for x, v := range data { 82 | col.Records[x] = NewRecordInt(v) 83 | } 84 | return 85 | } 86 | 87 | // 88 | // NewColumnReal create new column with record type is real. 89 | // 90 | func NewColumnReal(data []float64, colName string) (col *Column) { 91 | col = NewColumn(TReal, colName) 92 | 93 | datalen := len(data) 94 | 95 | if datalen <= 0 { 96 | return 97 | } 98 | 99 | col.Records = make([]*Record, datalen) 100 | 101 | for x := 0; x < datalen; x++ { 102 | rec := NewRecordReal(data[x]) 103 | col.Records[x] = rec 104 | } 105 | 106 | return 107 | } 108 | 109 | // 110 | // SetType will set the type of column to `tipe`. 111 | // 112 | func (col *Column) SetType(tipe int) { 113 | col.Type = tipe 114 | } 115 | 116 | // 117 | // SetName will set the name of column to `name`. 118 | // 119 | func (col *Column) SetName(name string) { 120 | col.Name = name 121 | } 122 | 123 | // 124 | // GetType return the type of column. 125 | // 126 | func (col *Column) GetType() int { 127 | return col.Type 128 | } 129 | 130 | // 131 | // GetName return the column name. 132 | // 133 | func (col *Column) GetName() string { 134 | return col.Name 135 | } 136 | 137 | // 138 | // SetRecords will set records in column to `recs`. 139 | // 140 | func (col *Column) SetRecords(recs *Records) { 141 | col.Records = *recs 142 | } 143 | 144 | // 145 | // Interface return the column object as an interface. 146 | // 147 | func (col *Column) Interface() interface{} { 148 | return col 149 | } 150 | 151 | // 152 | // Reset column data and flag. 153 | // 154 | func (col *Column) Reset() { 155 | col.Flag = 0 156 | col.Records = make([]*Record, 0) 157 | } 158 | 159 | // 160 | // Len return number of record. 161 | // 162 | func (col *Column) Len() int { 163 | return len(col.Records) 164 | } 165 | 166 | // 167 | // PushBack push record the end of column. 168 | // 169 | func (col *Column) PushBack(r *Record) { 170 | col.Records = append(col.Records, r) 171 | } 172 | 173 | // 174 | // PushRecords append slice of record to the end of column's records. 175 | // 176 | func (col *Column) PushRecords(rs []*Record) { 177 | col.Records = append(col.Records, rs...) 178 | } 179 | 180 | // 181 | // ToIntegers convert slice of record to slice of int64. 182 | // 183 | func (col *Column) ToIntegers() []int64 { 184 | newcol := make([]int64, col.Len()) 185 | 186 | for x := range col.Records { 187 | newcol[x] = col.Records[x].Integer() 188 | } 189 | 190 | return newcol 191 | } 192 | 193 | // 194 | // ToFloatSlice convert slice of record to slice of float64. 195 | // 196 | func (col *Column) ToFloatSlice() (newcol []float64) { 197 | newcol = make([]float64, col.Len()) 198 | 199 | for i := range col.Records { 200 | newcol[i] = col.Records[i].Float() 201 | } 202 | 203 | return 204 | } 205 | 206 | // 207 | // ToStringSlice convert slice of record to slice of string. 208 | // 209 | func (col *Column) ToStringSlice() (newcol []string) { 210 | newcol = make([]string, col.Len()) 211 | 212 | for i := range col.Records { 213 | newcol[i] = col.Records[i].String() 214 | } 215 | 216 | return 217 | } 218 | 219 | // 220 | // ClearValues set all value in column to empty string or zero if column type is 221 | // numeric. 222 | // 223 | func (col *Column) ClearValues() { 224 | for _, r := range col.Records { 225 | r.Reset() 226 | } 227 | } 228 | 229 | // 230 | // SetValueAt will set column value at cell `idx` with `v`, unless the index 231 | // is out of range. 232 | // 233 | func (col *Column) SetValueAt(idx int, v string) { 234 | if idx < 0 { 235 | return 236 | } 237 | if col.Records.Len() <= idx { 238 | return 239 | } 240 | _ = col.Records[idx].SetValue(v, col.Type) 241 | } 242 | 243 | // 244 | // SetValueByNumericAt will set column value at cell `idx` with numeric value 245 | // `v`, unless the index is out of range. 246 | // 247 | func (col *Column) SetValueByNumericAt(idx int, v float64) { 248 | if idx < 0 { 249 | return 250 | } 251 | if col.Records.Len() <= idx { 252 | return 253 | } 254 | switch col.Type { 255 | case TString: 256 | col.Records[idx].SetString(strconv.FormatFloat(v, 'f', -1, 64)) 257 | case TInteger: 258 | col.Records[idx].SetInteger(int64(v)) 259 | case TReal: 260 | col.Records[idx].SetFloat(v) 261 | } 262 | } 263 | 264 | // 265 | // SetValues of all column record. 266 | // 267 | func (col *Column) SetValues(values []string) { 268 | vallen := len(values) 269 | reclen := col.Len() 270 | 271 | // initialize column record if its empty. 272 | if reclen <= 0 { 273 | col.Records = make([]*Record, vallen) 274 | reclen = vallen 275 | } 276 | 277 | // pick the least length 278 | minlen := reclen 279 | if vallen < reclen { 280 | minlen = vallen 281 | } 282 | 283 | for x := 0; x < minlen; x++ { 284 | _ = col.Records[x].SetValue(values[x], col.Type) 285 | } 286 | } 287 | 288 | // 289 | // DeleteRecordAt will delete record at index `i` and return it. 290 | // 291 | func (col *Column) DeleteRecordAt(i int) *Record { 292 | if i < 0 { 293 | return nil 294 | } 295 | 296 | clen := col.Len() 297 | if i >= clen { 298 | return nil 299 | } 300 | 301 | r := col.Records[i] 302 | 303 | last := clen - 1 304 | copy(col.Records[i:], col.Records[i+1:]) 305 | col.Records[last] = nil 306 | col.Records = col.Records[0:last] 307 | 308 | return r 309 | } 310 | -------------------------------------------------------------------------------- /claset.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | import ( 8 | "fmt" 9 | "github.com/shuLhan/numerus" 10 | "github.com/shuLhan/tekstus" 11 | "strconv" 12 | ) 13 | 14 | // 15 | // Claset define a dataset with class attribute. 16 | // 17 | type Claset struct { 18 | // Dataset embedded, for implementing the dataset interface. 19 | Dataset 20 | // ClassIndex contain index for target classification in columns. 21 | ClassIndex int `json:"ClassIndex"` 22 | 23 | // vs contain a copy of value space. 24 | vs []string 25 | // counts number of value space in current set. 26 | counts []int 27 | 28 | // major contain the name of majority class in dataset. 29 | major string 30 | // minor contain the name of minority class in dataset. 31 | minor string 32 | } 33 | 34 | // 35 | // NewClaset create and return new Claset object. 36 | // 37 | func NewClaset(mode int, types []int, names []string) (claset *Claset) { 38 | claset = &Claset{ 39 | ClassIndex: -1, 40 | } 41 | 42 | claset.Init(mode, types, names) 43 | 44 | return 45 | } 46 | 47 | // 48 | // Clone return a copy of current claset object. 49 | // 50 | func (claset *Claset) Clone() interface{} { 51 | clone := Claset{ 52 | ClassIndex: claset.GetClassIndex(), 53 | major: claset.MajorityClass(), 54 | minor: claset.MinorityClass(), 55 | } 56 | clone.SetDataset(claset.GetDataset().Clone().(DatasetInterface)) 57 | return &clone 58 | } 59 | 60 | // 61 | // GetDataset return the dataset. 62 | // 63 | func (claset *Claset) GetDataset() DatasetInterface { 64 | return &claset.Dataset 65 | } 66 | 67 | // 68 | // GetClassType return type of class in dataset. 69 | // 70 | func (claset *Claset) GetClassType() int { 71 | if claset.Columns.Len() <= 0 { 72 | return TString 73 | } 74 | return claset.Columns[claset.ClassIndex].Type 75 | } 76 | 77 | // 78 | // GetClassValueSpace return the class value space. 79 | // 80 | func (claset *Claset) GetClassValueSpace() []string { 81 | if claset.Columns.Len() <= 0 { 82 | return nil 83 | } 84 | return claset.Columns[claset.ClassIndex].ValueSpace 85 | } 86 | 87 | // 88 | // GetClassColumn return dataset class values in column. 89 | // 90 | func (claset *Claset) GetClassColumn() *Column { 91 | if claset.Mode == DatasetModeRows { 92 | claset.TransposeToColumns() 93 | } 94 | if claset.Columns.Len() <= 0 { 95 | return nil 96 | } 97 | return &claset.Columns[claset.ClassIndex] 98 | } 99 | 100 | // 101 | // GetClassRecords return class values as records. 102 | // 103 | func (claset *Claset) GetClassRecords() *Records { 104 | if claset.Mode == DatasetModeRows { 105 | claset.TransposeToColumns() 106 | } 107 | if claset.Columns.Len() <= 0 { 108 | return nil 109 | } 110 | return &claset.Columns[claset.ClassIndex].Records 111 | } 112 | 113 | // 114 | // GetClassAsStrings return all class values as slice of string. 115 | // 116 | func (claset *Claset) GetClassAsStrings() []string { 117 | if claset.Mode == DatasetModeRows { 118 | claset.TransposeToColumns() 119 | } 120 | if claset.Columns.Len() <= 0 { 121 | return nil 122 | } 123 | return claset.Columns[claset.ClassIndex].ToStringSlice() 124 | } 125 | 126 | // 127 | // GetClassAsReals return class record value as slice of float64. 128 | // 129 | func (claset *Claset) GetClassAsReals() []float64 { 130 | if claset.Mode == DatasetModeRows { 131 | claset.TransposeToColumns() 132 | } 133 | if claset.Columns.Len() <= 0 { 134 | return nil 135 | } 136 | return claset.Columns[claset.ClassIndex].ToFloatSlice() 137 | } 138 | 139 | // 140 | // GetClassAsInteger return class record value as slice of int64. 141 | // 142 | func (claset *Claset) GetClassAsInteger() []int64 { 143 | if claset.Mode == DatasetModeRows { 144 | claset.TransposeToColumns() 145 | } 146 | if claset.Columns.Len() <= 0 { 147 | return nil 148 | } 149 | return claset.Columns[claset.ClassIndex].ToIntegers() 150 | } 151 | 152 | // 153 | // GetClassIndex return index of class attribute in dataset. 154 | // 155 | func (claset *Claset) GetClassIndex() int { 156 | return claset.ClassIndex 157 | } 158 | 159 | // 160 | // MajorityClass return the majority class of data. 161 | // 162 | func (claset *Claset) MajorityClass() string { 163 | return claset.major 164 | } 165 | 166 | // 167 | // MinorityClass return the minority class in dataset. 168 | // 169 | func (claset *Claset) MinorityClass() string { 170 | return claset.minor 171 | } 172 | 173 | // 174 | // Counts return the number of each class in value-space. 175 | // 176 | func (claset *Claset) Counts() []int { 177 | if len(claset.counts) <= 0 { 178 | claset.CountValueSpaces() 179 | } 180 | return claset.counts 181 | } 182 | 183 | // 184 | // SetDataset in class set. 185 | // 186 | func (claset *Claset) SetDataset(dataset DatasetInterface) { 187 | claset.Dataset = *(dataset.(*Dataset)) 188 | } 189 | 190 | // 191 | // SetClassIndex will set the class index to `v`. 192 | // 193 | func (claset *Claset) SetClassIndex(v int) { 194 | claset.ClassIndex = v 195 | } 196 | 197 | // 198 | // SetMajorityClass will set the majority class to `v`. 199 | // 200 | func (claset *Claset) SetMajorityClass(v string) { 201 | claset.major = v 202 | } 203 | 204 | // 205 | // SetMinorityClass will set the minority class to `v`. 206 | // 207 | func (claset *Claset) SetMinorityClass(v string) { 208 | claset.minor = v 209 | } 210 | 211 | // 212 | // CountValueSpaces will count number of value space in current dataset. 213 | // 214 | func (claset *Claset) CountValueSpaces() { 215 | classv := claset.GetClassAsStrings() 216 | claset.vs = claset.GetClassValueSpace() 217 | 218 | claset.counts = tekstus.WordsCountTokens(classv, claset.vs, false) 219 | } 220 | 221 | // 222 | // RecountMajorMinor recount major and minor class in claset. 223 | // 224 | func (claset *Claset) RecountMajorMinor() { 225 | claset.CountValueSpaces() 226 | 227 | _, maxIdx, maxok := numerus.IntsFindMax(claset.counts) 228 | _, minIdx, minok := numerus.IntsFindMin(claset.counts) 229 | 230 | if maxok { 231 | claset.major = claset.vs[maxIdx] 232 | } 233 | if minok { 234 | claset.minor = claset.vs[minIdx] 235 | } 236 | } 237 | 238 | // 239 | // IsInSingleClass check whether all target class contain only single value. 240 | // Return true and name of target if all rows is in the same class, 241 | // false and empty string otherwise. 242 | // 243 | func (claset *Claset) IsInSingleClass() (single bool, class string) { 244 | classv := claset.GetClassAsStrings() 245 | 246 | for i, t := range classv { 247 | if i == 0 { 248 | single = true 249 | class = t 250 | continue 251 | } 252 | if t != class { 253 | return false, "" 254 | } 255 | } 256 | return 257 | } 258 | 259 | // 260 | // GetMinorityRows return rows where their class is minority in dataset, or nil 261 | // if dataset is empty. 262 | // 263 | func (claset *Claset) GetMinorityRows() *Rows { 264 | if claset.Len() == 0 { 265 | return nil 266 | } 267 | if claset.vs == nil { 268 | claset.RecountMajorMinor() 269 | } 270 | 271 | minRows := claset.GetRows().SelectWhere(claset.ClassIndex, 272 | claset.minor) 273 | 274 | return &minRows 275 | } 276 | 277 | // 278 | // String, yes it will pretty print the meta-data in JSON format. 279 | // 280 | func (claset *Claset) String() (s string) { 281 | if claset.vs == nil { 282 | claset.RecountMajorMinor() 283 | } 284 | 285 | s = fmt.Sprintf("'claset':{'rows': %d, 'columns': %d, ", claset.Len(), 286 | claset.GetNColumn()) 287 | 288 | s += "'vs':{" 289 | for x, v := range claset.vs { 290 | if x > 0 { 291 | s += ", " 292 | } 293 | s += "'" + v + "':" + strconv.Itoa(claset.counts[x]) 294 | } 295 | s += "}" 296 | 297 | s += ", 'major': '" + claset.major + "'" 298 | s += ", 'minor': '" + claset.minor + "'" 299 | s += "}" 300 | 301 | return 302 | } 303 | -------------------------------------------------------------------------------- /dataset_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula_test 6 | 7 | import ( 8 | "fmt" 9 | "github.com/shuLhan/tabula" 10 | "testing" 11 | ) 12 | 13 | var datasetRows = [][]string{ 14 | {"0", "1", "A"}, 15 | {"1", "1.1", "B"}, 16 | {"2", "1.2", "A"}, 17 | {"3", "1.3", "B"}, 18 | {"4", "1.4", "C"}, 19 | {"5", "1.5", "D"}, 20 | {"6", "1.6", "C"}, 21 | {"7", "1.7", "D"}, 22 | {"8", "1.8", "E"}, 23 | {"9", "1.9", "F"}, 24 | } 25 | 26 | var datasetCols = [][]string{ 27 | {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}, 28 | {"1", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8", "1.9"}, 29 | {"A", "B", "A", "B", "C", "D", "C", "D", "E", "F"}, 30 | } 31 | 32 | var datasetTypes = []int{ 33 | tabula.TInteger, 34 | tabula.TReal, 35 | tabula.TString, 36 | } 37 | 38 | var datasetNames = []string{"int", "real", "string"} 39 | 40 | func populateWithRows(dataset *tabula.Dataset) error { 41 | for _, rowin := range datasetRows { 42 | row := make(tabula.Row, len(rowin)) 43 | 44 | for x, recin := range rowin { 45 | rec, e := tabula.NewRecordBy(recin, datasetTypes[x]) 46 | if e != nil { 47 | return e 48 | } 49 | 50 | row[x] = rec 51 | } 52 | 53 | dataset.PushRow(&row) 54 | } 55 | return nil 56 | } 57 | 58 | func populateWithColumns(t *testing.T, dataset *tabula.Dataset) { 59 | for x := range datasetCols { 60 | col, e := tabula.NewColumnString(datasetCols[x], datasetTypes[x], 61 | datasetNames[x]) 62 | if e != nil { 63 | t.Fatal(e) 64 | } 65 | 66 | dataset.PushColumn(*col) 67 | } 68 | } 69 | 70 | func createDataset(t *testing.T) (dataset *tabula.Dataset) { 71 | dataset = tabula.NewDataset(tabula.DatasetModeRows, datasetTypes, 72 | datasetNames) 73 | 74 | e := populateWithRows(dataset) 75 | if e != nil { 76 | t.Fatal(e) 77 | } 78 | 79 | return 80 | } 81 | 82 | func DatasetStringJoinByIndex(t *testing.T, dataset [][]string, indis []int) (res string) { 83 | for x := range indis { 84 | res += fmt.Sprint("&", dataset[indis[x]]) 85 | } 86 | return res 87 | } 88 | 89 | func DatasetRowsJoin(t *testing.T) (s string) { 90 | for x := range datasetRows { 91 | s += fmt.Sprint("&", datasetRows[x]) 92 | } 93 | return 94 | } 95 | 96 | func DatasetColumnsJoin(t *testing.T) (s string) { 97 | for x := range datasetCols { 98 | s += fmt.Sprint(datasetCols[x]) 99 | } 100 | return 101 | } 102 | 103 | func TestSplitRowsByNumeric(t *testing.T) { 104 | dataset := createDataset(t) 105 | 106 | // Split integer by float 107 | splitL, splitR, e := tabula.SplitRowsByNumeric(dataset, 0, 4.5) 108 | if e != nil { 109 | t.Fatal(e) 110 | } 111 | 112 | expIdx := []int{0, 1, 2, 3, 4} 113 | exp := DatasetStringJoinByIndex(t, datasetRows, expIdx) 114 | rows := splitL.GetDataAsRows() 115 | got := fmt.Sprint(rows) 116 | 117 | assert(t, exp, got, true) 118 | 119 | expIdx = []int{5, 6, 7, 8, 9} 120 | exp = DatasetStringJoinByIndex(t, datasetRows, expIdx) 121 | got = fmt.Sprint(splitR.GetDataAsRows()) 122 | 123 | assert(t, exp, got, true) 124 | 125 | // Split by float 126 | splitL, splitR, e = tabula.SplitRowsByNumeric(dataset, 1, 1.8) 127 | if e != nil { 128 | t.Fatal(e) 129 | } 130 | 131 | expIdx = []int{0, 1, 2, 3, 4, 5, 6, 7} 132 | exp = DatasetStringJoinByIndex(t, datasetRows, expIdx) 133 | got = fmt.Sprint(splitL.GetDataAsRows()) 134 | 135 | assert(t, exp, got, true) 136 | 137 | expIdx = []int{8, 9} 138 | exp = DatasetStringJoinByIndex(t, datasetRows, expIdx) 139 | got = fmt.Sprint(splitR.GetDataAsRows()) 140 | 141 | assert(t, exp, got, true) 142 | } 143 | 144 | func TestSplitRowsByCategorical(t *testing.T) { 145 | dataset := createDataset(t) 146 | splitval := []string{"A", "D"} 147 | 148 | splitL, splitR, e := tabula.SplitRowsByCategorical(dataset, 2, 149 | splitval) 150 | if e != nil { 151 | t.Fatal(e) 152 | } 153 | 154 | expIdx := []int{0, 2, 5, 7} 155 | exp := DatasetStringJoinByIndex(t, datasetRows, expIdx) 156 | got := fmt.Sprint(splitL.GetDataAsRows()) 157 | 158 | assert(t, exp, got, true) 159 | 160 | expIdx = []int{1, 3, 4, 6, 8, 9} 161 | exp = DatasetStringJoinByIndex(t, datasetRows, expIdx) 162 | got = fmt.Sprint(splitR.GetDataAsRows()) 163 | 164 | assert(t, exp, got, true) 165 | } 166 | 167 | func TestModeColumnsPushColumn(t *testing.T) { 168 | dataset := tabula.NewDataset(tabula.DatasetModeColumns, nil, nil) 169 | 170 | exp := "" 171 | got := "" 172 | for x := range datasetCols { 173 | col, e := tabula.NewColumnString(datasetCols[x], datasetTypes[x], 174 | datasetNames[x]) 175 | if e != nil { 176 | t.Fatal(e) 177 | } 178 | 179 | dataset.PushColumn(*col) 180 | 181 | exp += fmt.Sprint(datasetCols[x]) 182 | got += fmt.Sprint(dataset.Columns[x].Records) 183 | } 184 | 185 | assert(t, exp, got, true) 186 | 187 | // Check rows 188 | exp = "" 189 | got = fmt.Sprint(dataset.Rows) 190 | assert(t, exp, got, true) 191 | } 192 | 193 | func TestModeRowsPushColumn(t *testing.T) { 194 | dataset := tabula.NewDataset(tabula.DatasetModeRows, nil, nil) 195 | 196 | populateWithColumns(t, dataset) 197 | 198 | // Check rows 199 | exp := DatasetRowsJoin(t) 200 | got := fmt.Sprint(dataset.Rows) 201 | 202 | assert(t, exp, got, true) 203 | 204 | // Check columns 205 | exp = "[{int 1 0 [] []} {real 2 0 [] []} {string 0 0 [] []}]" 206 | got = fmt.Sprint(dataset.Columns) 207 | 208 | assert(t, exp, got, true) 209 | } 210 | 211 | func TestModeMatrixPushColumn(t *testing.T) { 212 | dataset := tabula.NewDataset(tabula.DatasetModeMatrix, nil, nil) 213 | 214 | exp := "" 215 | got := "" 216 | for x := range datasetCols { 217 | col, e := tabula.NewColumnString(datasetCols[x], datasetTypes[x], 218 | datasetNames[x]) 219 | if e != nil { 220 | t.Fatal(e) 221 | } 222 | 223 | dataset.PushColumn(*col) 224 | 225 | exp += fmt.Sprint(datasetCols[x]) 226 | got += fmt.Sprint(dataset.Columns[x].Records) 227 | } 228 | 229 | assert(t, exp, got, true) 230 | 231 | // Check rows 232 | exp = DatasetRowsJoin(t) 233 | got = fmt.Sprint(dataset.Rows) 234 | 235 | assert(t, exp, got, true) 236 | } 237 | 238 | func TestModeRowsPushRows(t *testing.T) { 239 | dataset := tabula.NewDataset(tabula.DatasetModeRows, nil, nil) 240 | 241 | e := populateWithRows(dataset) 242 | if e != nil { 243 | t.Fatal(e) 244 | } 245 | 246 | exp := DatasetRowsJoin(t) 247 | got := fmt.Sprint(dataset.Rows) 248 | 249 | assert(t, exp, got, true) 250 | } 251 | 252 | func TestModeColumnsPushRows(t *testing.T) { 253 | dataset := tabula.NewDataset(tabula.DatasetModeColumns, nil, nil) 254 | 255 | e := populateWithRows(dataset) 256 | if e != nil { 257 | t.Fatal(e) 258 | } 259 | 260 | // check rows 261 | exp := "" 262 | got := fmt.Sprint(dataset.Rows) 263 | 264 | assert(t, exp, got, true) 265 | 266 | // check columns 267 | exp = DatasetColumnsJoin(t) 268 | got = "" 269 | for x := range dataset.Columns { 270 | got += fmt.Sprint(dataset.Columns[x].Records) 271 | } 272 | 273 | assert(t, exp, got, true) 274 | } 275 | 276 | func TestModeMatrixPushRows(t *testing.T) { 277 | dataset := tabula.NewDataset(tabula.DatasetModeMatrix, nil, nil) 278 | 279 | e := populateWithRows(dataset) 280 | if e != nil { 281 | t.Fatal(e) 282 | } 283 | 284 | exp := DatasetRowsJoin(t) 285 | got := fmt.Sprint(dataset.Rows) 286 | 287 | assert(t, exp, got, true) 288 | 289 | // check columns 290 | exp = DatasetColumnsJoin(t) 291 | got = "" 292 | for x := range dataset.Columns { 293 | got += fmt.Sprint(dataset.Columns[x].Records) 294 | } 295 | 296 | assert(t, exp, got, true) 297 | } 298 | 299 | func TestSelectRowsWhere(t *testing.T) { 300 | dataset := tabula.NewDataset(tabula.DatasetModeMatrix, nil, nil) 301 | 302 | e := populateWithRows(dataset) 303 | if e != nil { 304 | t.Fatal(e) 305 | } 306 | 307 | // select all rows where the first column value is 9. 308 | selected := tabula.SelectRowsWhere(dataset, 0, "9") 309 | exp := dataset.GetRow(9) 310 | got := selected.GetRow(0) 311 | 312 | assert(t, exp, got, true) 313 | } 314 | 315 | func TestDeleteRow(t *testing.T) { 316 | dataset := tabula.NewDataset(tabula.DatasetModeMatrix, nil, nil) 317 | 318 | e := populateWithRows(dataset) 319 | if e != nil { 320 | t.Fatal(e) 321 | } 322 | 323 | delIdx := 2 324 | 325 | // Check rows len. 326 | exp := dataset.Len() - 1 327 | dataset.DeleteRow(delIdx) 328 | got := dataset.Len() 329 | 330 | assert(t, exp, got, true) 331 | 332 | // Check columns len. 333 | for _, col := range dataset.Columns { 334 | got = col.Len() 335 | 336 | assert(t, exp, got, true) 337 | } 338 | 339 | // Check rows data. 340 | ridx := 0 341 | for x, row := range datasetRows { 342 | if x == delIdx { 343 | continue 344 | } 345 | exp := fmt.Sprint("&", row) 346 | got := fmt.Sprint(dataset.GetRow(ridx)) 347 | ridx++ 348 | 349 | assert(t, exp, got, true) 350 | } 351 | 352 | // Check columns data. 353 | for x := range dataset.Columns { 354 | col := datasetCols[x] 355 | 356 | coldel := []string{} 357 | coldel = append(coldel, col[:delIdx]...) 358 | coldel = append(coldel, col[delIdx+1:]...) 359 | 360 | exp := fmt.Sprint(coldel) 361 | got := fmt.Sprint(dataset.Columns[x].Records) 362 | assert(t, exp, got, true) 363 | } 364 | } 365 | -------------------------------------------------------------------------------- /datasetinterface.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | import ( 8 | "encoding/json" 9 | "fmt" 10 | "io/ioutil" 11 | ) 12 | 13 | // 14 | // DatasetInterface is the interface for working with DSV data. 15 | // 16 | type DatasetInterface interface { 17 | Init(mode int, types []int, names []string) 18 | Clone() interface{} 19 | Reset() error 20 | 21 | GetMode() int 22 | SetMode(mode int) 23 | 24 | GetNColumn() int 25 | GetNRow() int 26 | Len() int 27 | 28 | GetColumnsType() []int 29 | SetColumnsType(types []int) 30 | 31 | GetColumnTypeAt(idx int) (int, error) 32 | SetColumnTypeAt(idx, tipe int) error 33 | 34 | GetColumnsName() []string 35 | SetColumnsName(names []string) 36 | 37 | AddColumn(tipe int, name string, vs []string) 38 | GetColumn(idx int) *Column 39 | GetColumnByName(name string) *Column 40 | GetColumns() *Columns 41 | SetColumns(*Columns) 42 | 43 | GetRow(idx int) *Row 44 | GetRows() *Rows 45 | SetRows(*Rows) 46 | DeleteRow(idx int) *Row 47 | 48 | GetData() interface{} 49 | GetDataAsRows() *Rows 50 | GetDataAsColumns() *Columns 51 | 52 | TransposeToColumns() 53 | TransposeToRows() 54 | 55 | PushRow(r *Row) 56 | PushRowToColumns(r *Row) 57 | FillRowsWithColumn(colidx int, col Column) 58 | PushColumn(col Column) 59 | PushColumnToRows(col Column) 60 | 61 | MergeColumns(DatasetInterface) 62 | MergeRows(DatasetInterface) 63 | } 64 | 65 | // 66 | // ReadDatasetConfig open dataset configuration file and initialize dataset 67 | // field from there. 68 | // 69 | func ReadDatasetConfig(ds interface{}, fcfg string) (e error) { 70 | cfg, e := ioutil.ReadFile(fcfg) 71 | 72 | if nil != e { 73 | return e 74 | } 75 | 76 | return json.Unmarshal(cfg, ds) 77 | } 78 | 79 | // 80 | // SortColumnsByIndex will sort all columns using sorted index. 81 | // 82 | func SortColumnsByIndex(di DatasetInterface, sortedIdx []int) { 83 | if di.GetMode() == DatasetModeRows { 84 | di.TransposeToColumns() 85 | } 86 | 87 | cols := di.GetColumns() 88 | for x, col := range *cols { 89 | colsorted := col.Records.SortByIndex(sortedIdx) 90 | (*cols)[x].SetRecords(colsorted) 91 | } 92 | } 93 | 94 | // 95 | // SplitRowsByNumeric will split the data using splitVal in column `colidx`. 96 | // 97 | // For example, given two continuous attribute, 98 | // 99 | // A: {1,2,3,4} 100 | // B: {5,6,7,8} 101 | // 102 | // if colidx is (1) B and splitVal is 7, the data will splitted into left set 103 | // 104 | // A': {1,2} 105 | // B': {5,6} 106 | // 107 | // and right set 108 | // 109 | // A'': {3,4} 110 | // B'': {7,8} 111 | // 112 | func SplitRowsByNumeric(di DatasetInterface, colidx int, splitVal float64) ( 113 | splitLess DatasetInterface, 114 | splitGreater DatasetInterface, 115 | e error, 116 | ) { 117 | // check type of column 118 | coltype, e := di.GetColumnTypeAt(colidx) 119 | if e != nil { 120 | return 121 | } 122 | 123 | if !(coltype == TInteger || coltype == TReal) { 124 | return splitLess, splitGreater, ErrInvalidColType 125 | } 126 | 127 | // Should we convert the data mode back later. 128 | orgmode := di.GetMode() 129 | 130 | if orgmode == DatasetModeColumns { 131 | di.TransposeToRows() 132 | } 133 | 134 | if DEBUG >= 2 { 135 | fmt.Println("[tabula] dataset:", di) 136 | } 137 | 138 | splitLess = di.Clone().(DatasetInterface) 139 | splitGreater = di.Clone().(DatasetInterface) 140 | 141 | rows := di.GetRows() 142 | for _, row := range *rows { 143 | if (*row)[colidx].Float() < splitVal { 144 | splitLess.PushRow(row) 145 | } else { 146 | splitGreater.PushRow(row) 147 | } 148 | } 149 | 150 | if DEBUG >= 2 { 151 | fmt.Println("[tabula] split less:", splitLess) 152 | fmt.Println("[tabula] split greater:", splitGreater) 153 | } 154 | 155 | switch orgmode { 156 | case DatasetModeColumns: 157 | di.TransposeToColumns() 158 | splitLess.TransposeToColumns() 159 | splitGreater.TransposeToColumns() 160 | case DatasetModeMatrix: 161 | // do nothing, since its already filled when pushing new row. 162 | } 163 | 164 | return 165 | } 166 | 167 | // 168 | // SplitRowsByCategorical will split the data using a set of split value in 169 | // column `colidx`. 170 | // 171 | // For example, given two attributes, 172 | // 173 | // X: [A,B,A,B,C,D,C,D] 174 | // Y: [1,2,3,4,5,6,7,8] 175 | // 176 | // if colidx is (0) or A and split value is a set `[A,C]`, the data will 177 | // splitted into left set which contain all rows that have A or C, 178 | // 179 | // X': [A,A,C,C] 180 | // Y': [1,3,5,7] 181 | // 182 | // and the right set, excluded set, will contain all rows which is not A or C, 183 | // 184 | // X'': [B,B,D,D] 185 | // Y'': [2,4,6,8] 186 | // 187 | func SplitRowsByCategorical(di DatasetInterface, colidx int, 188 | splitVal []string) ( 189 | splitIn DatasetInterface, 190 | splitEx DatasetInterface, 191 | e error, 192 | ) { 193 | // check type of column 194 | coltype, e := di.GetColumnTypeAt(colidx) 195 | if e != nil { 196 | return 197 | } 198 | 199 | if coltype != TString { 200 | return splitIn, splitEx, ErrInvalidColType 201 | } 202 | 203 | // should we convert the data mode back? 204 | orgmode := di.GetMode() 205 | 206 | if orgmode == DatasetModeColumns { 207 | di.TransposeToRows() 208 | } 209 | 210 | splitIn = di.Clone().(DatasetInterface) 211 | splitEx = di.Clone().(DatasetInterface) 212 | 213 | for _, row := range *di.GetRows() { 214 | found := false 215 | for _, val := range splitVal { 216 | if (*row)[colidx].String() == val { 217 | splitIn.PushRow(row) 218 | found = true 219 | break 220 | } 221 | } 222 | if !found { 223 | splitEx.PushRow(row) 224 | } 225 | } 226 | 227 | // convert all dataset based on original 228 | switch orgmode { 229 | case DatasetModeColumns: 230 | di.TransposeToColumns() 231 | splitIn.TransposeToColumns() 232 | splitEx.TransposeToColumns() 233 | case DatasetModeMatrix, DatasetNoMode: 234 | splitIn.TransposeToColumns() 235 | splitEx.TransposeToColumns() 236 | } 237 | 238 | return 239 | } 240 | 241 | // 242 | // SplitRowsByValue generic function to split data by value. This function will 243 | // split data using value in column `colidx`. If value is numeric it will return 244 | // any rows that have column value less than `value` in `splitL`, and any column 245 | // value greater or equal to `value` in `splitR`. 246 | // 247 | func SplitRowsByValue(di DatasetInterface, colidx int, value interface{}) ( 248 | splitL DatasetInterface, 249 | splitR DatasetInterface, 250 | e error, 251 | ) { 252 | coltype, e := di.GetColumnTypeAt(colidx) 253 | if e != nil { 254 | return 255 | } 256 | 257 | if coltype == TString { 258 | splitL, splitR, e = SplitRowsByCategorical(di, colidx, 259 | value.([]string)) 260 | } else { 261 | var splitval float64 262 | 263 | switch value.(type) { 264 | case int: 265 | splitval = float64(value.(int)) 266 | case int64: 267 | splitval = float64(value.(int64)) 268 | case float32: 269 | splitval = float64(value.(float32)) 270 | case float64: 271 | splitval = value.(float64) 272 | } 273 | 274 | splitL, splitR, e = SplitRowsByNumeric(di, colidx, 275 | splitval) 276 | } 277 | 278 | if e != nil { 279 | return nil, nil, e 280 | } 281 | 282 | return 283 | } 284 | 285 | // 286 | // SelectRowsWhere return all rows which column value in `colidx` is equal to 287 | // `colval`. 288 | // 289 | func SelectRowsWhere(dataset DatasetInterface, colidx int, colval string) DatasetInterface { 290 | orgmode := dataset.GetMode() 291 | 292 | if orgmode == DatasetModeColumns { 293 | dataset.TransposeToRows() 294 | } 295 | 296 | selected := NewDataset(dataset.GetMode(), nil, nil) 297 | 298 | selected.Rows = dataset.GetRows().SelectWhere(colidx, colval) 299 | 300 | switch orgmode { 301 | case DatasetModeColumns: 302 | dataset.TransposeToColumns() 303 | selected.TransposeToColumns() 304 | case DatasetModeMatrix, DatasetNoMode: 305 | selected.TransposeToColumns() 306 | } 307 | 308 | return selected 309 | } 310 | 311 | // 312 | // RandomPickRows return `n` item of row that has been selected randomly from 313 | // dataset.Rows. The ids of rows that has been picked is saved id `pickedIdx`. 314 | // 315 | // If duplicate is true, the row that has been picked can be picked up again, 316 | // otherwise it only allow one pick. This is also called as random selection 317 | // with or without replacement in machine learning domain. 318 | // 319 | // If output mode is columns, it will be transposed to rows. 320 | // 321 | func RandomPickRows(dataset DatasetInterface, n int, duplicate bool) ( 322 | picked DatasetInterface, 323 | unpicked DatasetInterface, 324 | pickedIdx []int, 325 | unpickedIdx []int, 326 | ) { 327 | orgmode := dataset.GetMode() 328 | 329 | if orgmode == DatasetModeColumns { 330 | dataset.TransposeToRows() 331 | } 332 | 333 | picked = dataset.Clone().(DatasetInterface) 334 | unpicked = dataset.Clone().(DatasetInterface) 335 | 336 | pickedRows, unpickedRows, pickedIdx, unpickedIdx := 337 | dataset.GetRows().RandomPick(n, duplicate) 338 | 339 | picked.SetRows(&pickedRows) 340 | unpicked.SetRows(&unpickedRows) 341 | 342 | // switch the dataset based on original mode 343 | switch orgmode { 344 | case DatasetModeColumns: 345 | dataset.TransposeToColumns() 346 | // transform the picked and unpicked set. 347 | picked.TransposeToColumns() 348 | unpicked.TransposeToColumns() 349 | 350 | case DatasetModeMatrix, DatasetNoMode: 351 | // transform the picked and unpicked set. 352 | picked.TransposeToColumns() 353 | unpicked.TransposeToColumns() 354 | } 355 | 356 | return 357 | } 358 | 359 | // 360 | // RandomPickColumns will select `n` column randomly from dataset and return 361 | // new dataset with picked and unpicked columns, and their column index. 362 | // 363 | // If duplicate is true, column that has been pick up can be pick up again. 364 | // 365 | // If dataset output mode is rows, it will transposed to columns. 366 | // 367 | func RandomPickColumns(dataset DatasetInterface, n int, dup bool, 368 | excludeIdx []int) ( 369 | picked DatasetInterface, 370 | unpicked DatasetInterface, 371 | pickedIdx []int, 372 | unpickedIdx []int, 373 | ) { 374 | orgmode := dataset.GetMode() 375 | 376 | if orgmode == DatasetModeRows { 377 | dataset.TransposeToColumns() 378 | } 379 | 380 | picked = dataset.Clone().(DatasetInterface) 381 | unpicked = dataset.Clone().(DatasetInterface) 382 | 383 | pickedColumns, unpickedColumns, pickedIdx, unpickedIdx := 384 | dataset.GetColumns().RandomPick(n, dup, excludeIdx) 385 | 386 | picked.SetColumns(&pickedColumns) 387 | unpicked.SetColumns(&unpickedColumns) 388 | 389 | // transpose picked and unpicked dataset based on original mode 390 | switch orgmode { 391 | case DatasetModeRows: 392 | dataset.TransposeToRows() 393 | picked.TransposeToRows() 394 | unpicked.TransposeToRows() 395 | case DatasetModeMatrix, DatasetNoMode: 396 | picked.TransposeToRows() 397 | unpicked.TransposeToRows() 398 | } 399 | 400 | return 401 | } 402 | 403 | // 404 | // SelectColumnsByIdx return new dataset with selected column index. 405 | // 406 | func SelectColumnsByIdx(dataset DatasetInterface, colsIdx []int) ( 407 | newset DatasetInterface, 408 | ) { 409 | var col *Column 410 | 411 | orgmode := dataset.GetMode() 412 | 413 | if orgmode == DatasetModeRows { 414 | dataset.TransposeToColumns() 415 | } 416 | 417 | newset = dataset.Clone().(DatasetInterface) 418 | 419 | for _, idx := range colsIdx { 420 | col = dataset.GetColumn(idx) 421 | if col == nil { 422 | continue 423 | } 424 | 425 | newset.PushColumn(*col) 426 | } 427 | 428 | // revert the mode back 429 | switch orgmode { 430 | case DatasetModeRows: 431 | dataset.TransposeToRows() 432 | newset.TransposeToRows() 433 | case DatasetModeColumns: 434 | // do nothing 435 | case DatasetModeMatrix: 436 | // do nothing 437 | } 438 | 439 | return 440 | } 441 | -------------------------------------------------------------------------------- /dataset.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 M. Shulhan . All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be found 3 | // in the LICENSE file. 4 | 5 | package tabula 6 | 7 | import ( 8 | "errors" 9 | "math" 10 | ) 11 | 12 | const ( 13 | // DatasetNoMode default to matrix. 14 | DatasetNoMode = 0 15 | // DatasetModeRows for output mode in rows. 16 | DatasetModeRows = 1 17 | // DatasetModeColumns for output mode in columns. 18 | DatasetModeColumns = 2 19 | // DatasetModeMatrix will save data in rows and columns. 20 | DatasetModeMatrix = 4 21 | ) 22 | 23 | var ( 24 | // ErrColIdxOutOfRange operation on column index is invalid 25 | ErrColIdxOutOfRange = errors.New("tabula: Column index out of range") 26 | // ErrInvalidColType operation on column with different type 27 | ErrInvalidColType = errors.New("tabula: Invalid column type") 28 | // ErrMisColLength returned when operation on columns does not match 29 | // between parameter and their length 30 | ErrMisColLength = errors.New("tabula: mismatch on column length") 31 | ) 32 | 33 | // 34 | // Dataset contain the data, mode of saved data, number of columns and rows in 35 | // data. 36 | // 37 | type Dataset struct { 38 | // Mode define the numeric value of output mode. 39 | Mode int 40 | // Columns is input data that has been parsed. 41 | Columns Columns 42 | // Rows is input data that has been parsed. 43 | Rows Rows 44 | } 45 | 46 | // 47 | // NewDataset create new dataset, use the mode to initialize the dataset. 48 | // 49 | func NewDataset(mode int, types []int, names []string) ( 50 | dataset *Dataset, 51 | ) { 52 | dataset = &Dataset{} 53 | 54 | dataset.Init(mode, types, names) 55 | 56 | return 57 | } 58 | 59 | // 60 | // Init will set the dataset using mode and types. 61 | // 62 | func (dataset *Dataset) Init(mode int, types []int, names []string) { 63 | if types == nil { 64 | dataset.Columns = make(Columns, 0) 65 | } else { 66 | dataset.Columns = make(Columns, len(types)) 67 | dataset.Columns.SetTypes(types) 68 | } 69 | 70 | dataset.SetColumnsName(names) 71 | dataset.SetMode(mode) 72 | } 73 | 74 | // 75 | // Clone return a copy of current dataset. 76 | // 77 | func (dataset *Dataset) Clone() interface{} { 78 | clone := NewDataset(dataset.GetMode(), nil, nil) 79 | 80 | for _, col := range dataset.Columns { 81 | newcol := Column{ 82 | Type: col.Type, 83 | Name: col.Name, 84 | ValueSpace: col.ValueSpace, 85 | } 86 | clone.PushColumn(newcol) 87 | } 88 | 89 | return clone 90 | } 91 | 92 | // 93 | // Reset all data and attributes. 94 | // 95 | func (dataset *Dataset) Reset() error { 96 | dataset.Rows = Rows{} 97 | dataset.Columns.Reset() 98 | return nil 99 | } 100 | 101 | // 102 | // GetMode return mode of data. 103 | // 104 | func (dataset *Dataset) GetMode() int { 105 | return dataset.Mode 106 | } 107 | 108 | // 109 | // SetMode of saved data to `mode`. 110 | // 111 | func (dataset *Dataset) SetMode(mode int) { 112 | switch mode { 113 | case DatasetModeRows: 114 | dataset.Mode = DatasetModeRows 115 | dataset.Rows = make(Rows, 0) 116 | case DatasetModeColumns: 117 | dataset.Mode = DatasetModeColumns 118 | dataset.Columns.Reset() 119 | default: 120 | dataset.Mode = DatasetModeMatrix 121 | dataset.Rows = make(Rows, 0) 122 | dataset.Columns.Reset() 123 | } 124 | } 125 | 126 | // 127 | // GetNColumn return the number of column in dataset. 128 | // 129 | func (dataset *Dataset) GetNColumn() (ncol int) { 130 | ncol = len(dataset.Columns) 131 | 132 | if ncol > 0 { 133 | return 134 | } 135 | 136 | switch dataset.Mode { 137 | case DatasetModeRows: 138 | if len(dataset.Rows) <= 0 { 139 | return 0 140 | } 141 | return dataset.Rows[0].Len() 142 | } 143 | 144 | return 145 | } 146 | 147 | // 148 | // GetNRow return number of rows in dataset. 149 | // 150 | func (dataset *Dataset) GetNRow() (nrow int) { 151 | switch dataset.Mode { 152 | case DatasetModeRows: 153 | nrow = len(dataset.Rows) 154 | case DatasetModeColumns: 155 | if len(dataset.Columns) <= 0 { 156 | nrow = 0 157 | } else { 158 | // get length of record in the first column 159 | nrow = dataset.Columns[0].Len() 160 | } 161 | case DatasetModeMatrix, DatasetNoMode: 162 | // matrix mode could have empty either in rows or column. 163 | nrow = len(dataset.Rows) 164 | } 165 | return 166 | } 167 | 168 | // 169 | // Len return number of row in dataset. 170 | // 171 | func (dataset *Dataset) Len() int { 172 | return dataset.GetNRow() 173 | } 174 | 175 | // 176 | // GetColumnsType return the type of all columns. 177 | // 178 | func (dataset *Dataset) GetColumnsType() (types []int) { 179 | for x := range dataset.Columns { 180 | types = append(types, dataset.Columns[x].Type) 181 | } 182 | 183 | return 184 | } 185 | 186 | // 187 | // SetColumnsType of data in all columns. 188 | // 189 | func (dataset *Dataset) SetColumnsType(types []int) { 190 | dataset.Columns = make(Columns, len(types)) 191 | dataset.Columns.SetTypes(types) 192 | } 193 | 194 | // 195 | // GetColumnTypeAt return type of column in index `colidx` in dataset. 196 | // 197 | func (dataset *Dataset) GetColumnTypeAt(idx int) (int, error) { 198 | if idx >= dataset.GetNColumn() { 199 | return TUndefined, ErrColIdxOutOfRange 200 | } 201 | 202 | return dataset.Columns[idx].Type, nil 203 | } 204 | 205 | // 206 | // SetColumnTypeAt will set column type at index `colidx` to `tipe`. 207 | // 208 | func (dataset *Dataset) SetColumnTypeAt(idx, tipe int) error { 209 | if idx >= dataset.GetNColumn() { 210 | return ErrColIdxOutOfRange 211 | } 212 | 213 | dataset.Columns[idx].Type = tipe 214 | return nil 215 | } 216 | 217 | // 218 | // GetColumnsName return name of all columns. 219 | // 220 | func (dataset *Dataset) GetColumnsName() (names []string) { 221 | for x := range dataset.Columns { 222 | names = append(names, dataset.Columns[x].Name) 223 | } 224 | 225 | return 226 | } 227 | 228 | // 229 | // SetColumnsName set column name. 230 | // 231 | func (dataset *Dataset) SetColumnsName(names []string) { 232 | nameslen := len(names) 233 | 234 | if nameslen <= 0 { 235 | // empty names, return immediately. 236 | return 237 | } 238 | 239 | collen := dataset.GetNColumn() 240 | 241 | if collen <= 0 { 242 | dataset.Columns = make(Columns, nameslen) 243 | collen = nameslen 244 | } 245 | 246 | // find minimum length 247 | minlen := collen 248 | if nameslen < collen { 249 | minlen = nameslen 250 | } 251 | 252 | for x := 0; x < minlen; x++ { 253 | dataset.Columns[x].Name = names[x] 254 | } 255 | } 256 | 257 | // 258 | // AddColumn will create and add new empty column with specific type and name 259 | // into dataset. 260 | // 261 | func (dataset *Dataset) AddColumn(tipe int, name string, vs []string) { 262 | col := Column{ 263 | Type: tipe, 264 | Name: name, 265 | ValueSpace: vs, 266 | } 267 | dataset.PushColumn(col) 268 | } 269 | 270 | // 271 | // GetColumn return pointer to column object at index `idx`. If `idx` is out of 272 | // range return nil. 273 | // 274 | func (dataset *Dataset) GetColumn(idx int) (col *Column) { 275 | if idx > dataset.GetNColumn() { 276 | return 277 | } 278 | 279 | switch dataset.Mode { 280 | case DatasetModeRows: 281 | dataset.TransposeToColumns() 282 | case DatasetModeColumns: 283 | // do nothing 284 | case DatasetModeMatrix: 285 | // do nothing 286 | } 287 | 288 | return &dataset.Columns[idx] 289 | } 290 | 291 | // 292 | // GetColumnByName return column based on their `name`. 293 | // 294 | func (dataset *Dataset) GetColumnByName(name string) (col *Column) { 295 | switch dataset.Mode { 296 | case DatasetModeRows: 297 | dataset.TransposeToColumns() 298 | } 299 | 300 | for x, col := range dataset.Columns { 301 | if col.Name == name { 302 | return &dataset.Columns[x] 303 | } 304 | } 305 | return 306 | } 307 | 308 | // 309 | // GetColumns return columns in dataset, without transposing. 310 | // 311 | func (dataset *Dataset) GetColumns() *Columns { 312 | return &dataset.Columns 313 | } 314 | 315 | // 316 | // SetColumns will replace current columns with new one from parameter. 317 | // 318 | func (dataset *Dataset) SetColumns(cols *Columns) { 319 | dataset.Columns = *cols 320 | } 321 | 322 | // 323 | // GetRow return pointer to row at index `idx` or nil if index is out of range. 324 | // 325 | func (dataset *Dataset) GetRow(idx int) *Row { 326 | if idx < 0 { 327 | return nil 328 | } 329 | if idx >= dataset.Rows.Len() { 330 | return nil 331 | } 332 | return dataset.Rows[idx] 333 | } 334 | 335 | // 336 | // GetRows return rows in dataset, without transposing. 337 | // 338 | func (dataset *Dataset) GetRows() *Rows { 339 | return &dataset.Rows 340 | } 341 | 342 | // 343 | // SetRows will replace current rows with new one from parameter. 344 | // 345 | func (dataset *Dataset) SetRows(rows *Rows) { 346 | dataset.Rows = *rows 347 | } 348 | 349 | // 350 | // GetData return the data, based on mode (rows, columns, or matrix). 351 | // 352 | func (dataset *Dataset) GetData() interface{} { 353 | switch dataset.Mode { 354 | case DatasetModeRows: 355 | return &dataset.Rows 356 | case DatasetModeColumns: 357 | return &dataset.Columns 358 | case DatasetModeMatrix, DatasetNoMode: 359 | return &Matrix{ 360 | Columns: &dataset.Columns, 361 | Rows: &dataset.Rows, 362 | } 363 | } 364 | 365 | return nil 366 | } 367 | 368 | // 369 | // GetDataAsRows return data in rows mode. 370 | // 371 | func (dataset *Dataset) GetDataAsRows() *Rows { 372 | if dataset.Mode == DatasetModeColumns { 373 | dataset.TransposeToRows() 374 | } 375 | return &dataset.Rows 376 | } 377 | 378 | // 379 | // GetDataAsColumns return data in columns mode. 380 | // 381 | func (dataset *Dataset) GetDataAsColumns() (columns *Columns) { 382 | if dataset.Mode == DatasetModeRows { 383 | dataset.TransposeToColumns() 384 | } 385 | return &dataset.Columns 386 | } 387 | 388 | // 389 | // TransposeToColumns move all data from rows (horizontal) to columns 390 | // (vertical) mode. 391 | // 392 | func (dataset *Dataset) TransposeToColumns() { 393 | if dataset.GetNRow() <= 0 { 394 | // nothing to transpose 395 | return 396 | } 397 | 398 | ncol := dataset.GetNColumn() 399 | if ncol <= 0 { 400 | // if no columns defined, initialize it using record type 401 | // in the first row. 402 | types := dataset.GetRow(0).Types() 403 | dataset.SetColumnsType(types) 404 | ncol = len(types) 405 | } 406 | 407 | orgmode := dataset.GetMode() 408 | 409 | switch orgmode { 410 | case DatasetModeRows: 411 | // do nothing. 412 | case DatasetModeColumns, DatasetModeMatrix, DatasetNoMode: 413 | // check if column records contain data. 414 | nrow := dataset.Columns[0].Len() 415 | if nrow > 0 { 416 | // return if column record is not empty, its already 417 | // transposed 418 | return 419 | } 420 | } 421 | 422 | // use the least length 423 | minlen := len(*dataset.GetRow(0)) 424 | 425 | if minlen > ncol { 426 | minlen = ncol 427 | } 428 | 429 | switch orgmode { 430 | case DatasetModeRows, DatasetNoMode: 431 | dataset.SetMode(DatasetModeColumns) 432 | } 433 | 434 | for _, row := range dataset.Rows { 435 | for y := 0; y < minlen; y++ { 436 | dataset.Columns[y].PushBack((*row)[y]) 437 | } 438 | } 439 | 440 | // reset the rows data only if original mode is rows 441 | // this to prevent empty data when mode is matrix. 442 | switch orgmode { 443 | case DatasetModeRows: 444 | dataset.Rows = nil 445 | } 446 | } 447 | 448 | // 449 | // TransposeToRows will move all data from columns (vertical) to rows 450 | // (horizontal) mode. 451 | // 452 | func (dataset *Dataset) TransposeToRows() { 453 | orgmode := dataset.GetMode() 454 | 455 | if orgmode == DatasetModeRows { 456 | // already transposed 457 | return 458 | } 459 | 460 | if orgmode == DatasetModeColumns { 461 | // only set mode if transposing from columns to rows 462 | dataset.SetMode(DatasetModeRows) 463 | } 464 | 465 | // Get the max length of columns. 466 | rowlen := math.MinInt32 467 | flen := len(dataset.Columns) 468 | 469 | for f := 0; f < flen; f++ { 470 | l := dataset.Columns[f].Len() 471 | 472 | if l > rowlen { 473 | rowlen = l 474 | } 475 | } 476 | 477 | dataset.Rows = make(Rows, 0) 478 | 479 | // Transpose record from column to row. 480 | for r := 0; r < rowlen; r++ { 481 | row := make(Row, flen) 482 | 483 | for f := 0; f < flen; f++ { 484 | if dataset.Columns[f].Len() > r { 485 | row[f] = dataset.Columns[f].Records[r] 486 | } else { 487 | row[f] = NewRecord() 488 | } 489 | } 490 | 491 | dataset.Rows = append(dataset.Rows, &row) 492 | } 493 | 494 | // Only reset the columns if original dataset mode is "columns". 495 | // This to prevent empty data when mode is matrix. 496 | if orgmode == DatasetModeColumns { 497 | dataset.Columns.Reset() 498 | } 499 | } 500 | 501 | // 502 | // PushRow save the data, which is already in row object, to Rows. 503 | // 504 | func (dataset *Dataset) PushRow(row *Row) { 505 | switch dataset.GetMode() { 506 | case DatasetModeRows: 507 | dataset.Rows = append(dataset.Rows, row) 508 | case DatasetModeColumns: 509 | dataset.PushRowToColumns(row) 510 | case DatasetModeMatrix, DatasetNoMode: 511 | dataset.Rows = append(dataset.Rows, row) 512 | dataset.PushRowToColumns(row) 513 | } 514 | } 515 | 516 | // 517 | // PushRowToColumns push each data in Row to Columns. 518 | // 519 | func (dataset *Dataset) PushRowToColumns(row *Row) { 520 | rowlen := row.Len() 521 | if rowlen <= 0 { 522 | // return immediately if no data in row. 523 | return 524 | } 525 | 526 | // check if columns is initialize. 527 | collen := len(dataset.Columns) 528 | if collen <= 0 { 529 | dataset.Columns = make(Columns, rowlen) 530 | collen = rowlen 531 | } 532 | 533 | // pick the minimum length. 534 | min := rowlen 535 | if collen < rowlen { 536 | min = collen 537 | } 538 | 539 | for x := 0; x < min; x++ { 540 | dataset.Columns[x].PushBack((*row)[x]) 541 | } 542 | } 543 | 544 | // 545 | // FillRowsWithColumn given a column, fill the dataset with row where the record 546 | // only set at index `colIdx`. 547 | // 548 | // Example, content of dataset was, 549 | // 550 | // index: 0 1 2 551 | // A B C 552 | // X (step 1) nrow = 2 553 | // 554 | // If we filled column at index 2 with [Y Z], the dataset will become: 555 | // 556 | // index: 0 1 2 557 | // A B C 558 | // X Y (step 2) fill the empty row 559 | // Z (step 3) create dummy row which contain the rest of column data. 560 | // 561 | func (dataset *Dataset) FillRowsWithColumn(colIdx int, col Column) { 562 | if dataset.GetMode() != DatasetModeRows { 563 | // Only work if dataset mode is ROWS 564 | return 565 | } 566 | 567 | nrow := dataset.GetNRow() 568 | emptyAt := nrow 569 | 570 | // (step 1) Find the row with empty records 571 | for x, row := range dataset.Rows { 572 | if row.IsNilAt(colIdx) { 573 | emptyAt = x 574 | break 575 | } 576 | } 577 | 578 | // (step 2) Fill the empty rows using column records. 579 | y := 0 580 | for x := emptyAt; x < nrow; x++ { 581 | dataset.Rows[x].SetValueAt(colIdx, col.Records[y]) 582 | y++ 583 | } 584 | 585 | // (step 3) Continue filling the column but using dummy row which 586 | // contain only record at index `colIdx`. 587 | ncol := dataset.GetNColumn() 588 | nrow = col.Len() 589 | for ; y < nrow; y++ { 590 | row := make(Row, ncol) 591 | 592 | for z := 0; z < ncol; z++ { 593 | if z == colIdx { 594 | row[colIdx] = col.Records[y] 595 | } else { 596 | row[z] = NewRecord() 597 | } 598 | } 599 | 600 | dataset.PushRow(&row) 601 | } 602 | } 603 | 604 | // 605 | // PushColumn will append new column to the end of slice if no existing column 606 | // with the same name. If it exist, the records will be merged. 607 | // 608 | func (dataset *Dataset) PushColumn(col Column) { 609 | exist := false 610 | colIdx := 0 611 | for x, c := range dataset.Columns { 612 | if c.Name == col.Name { 613 | exist = true 614 | colIdx = x 615 | break 616 | } 617 | } 618 | 619 | switch dataset.GetMode() { 620 | case DatasetModeRows: 621 | if exist { 622 | dataset.FillRowsWithColumn(colIdx, col) 623 | } else { 624 | // append new column 625 | dataset.Columns = append(dataset.Columns, col) 626 | dataset.PushColumnToRows(col) 627 | // Remove records in column 628 | dataset.Columns[dataset.GetNColumn()-1].Reset() 629 | } 630 | case DatasetModeColumns: 631 | if exist { 632 | dataset.Columns[colIdx].PushRecords(col.Records) 633 | } else { 634 | dataset.Columns = append(dataset.Columns, col) 635 | } 636 | case DatasetModeMatrix, DatasetNoMode: 637 | if exist { 638 | dataset.Columns[colIdx].PushRecords(col.Records) 639 | } else { 640 | dataset.Columns = append(dataset.Columns, col) 641 | dataset.PushColumnToRows(col) 642 | } 643 | } 644 | } 645 | 646 | // 647 | // PushColumnToRows add each record in column to each rows, from top to bottom. 648 | // 649 | func (dataset *Dataset) PushColumnToRows(col Column) { 650 | colsize := col.Len() 651 | if colsize <= 0 { 652 | // Do nothing if column is empty. 653 | return 654 | } 655 | 656 | nrow := dataset.GetNRow() 657 | if nrow <= 0 { 658 | // If no existing rows in dataset, initialize the rows slice. 659 | dataset.Rows = make(Rows, colsize) 660 | 661 | for nrow = 0; nrow < colsize; nrow++ { 662 | row := make(Row, 0) 663 | dataset.Rows[nrow] = &row 664 | } 665 | } 666 | 667 | // Pick the minimum length between column or current row length. 668 | minrow := nrow 669 | 670 | if colsize < nrow { 671 | minrow = colsize 672 | } 673 | 674 | // Push each record in column to each rows 675 | var row *Row 676 | var rec *Record 677 | 678 | for x := 0; x < minrow; x++ { 679 | row = dataset.Rows[x] 680 | rec = col.Records[x] 681 | 682 | row.PushBack(rec) 683 | } 684 | } 685 | 686 | // 687 | // MergeColumns append columns from other dataset into current dataset. 688 | // 689 | func (dataset *Dataset) MergeColumns(other DatasetInterface) { 690 | othermode := other.GetMode() 691 | if othermode == DatasetModeRows { 692 | other.TransposeToColumns() 693 | } 694 | 695 | cols := other.GetDataAsColumns() 696 | for _, col := range *cols { 697 | dataset.PushColumn(col) 698 | } 699 | 700 | switch othermode { 701 | case DatasetModeRows: 702 | other.TransposeToRows() 703 | } 704 | } 705 | 706 | // 707 | // MergeRows append rows from other dataset into current dataset. 708 | // 709 | func (dataset *Dataset) MergeRows(other DatasetInterface) { 710 | rows := other.GetDataAsRows() 711 | for _, row := range *rows { 712 | dataset.PushRow(row) 713 | } 714 | } 715 | 716 | // 717 | // DeleteRow will detach row at index `i` from dataset and return it. 718 | // 719 | func (dataset *Dataset) DeleteRow(i int) (row *Row) { 720 | if i < 0 { 721 | return 722 | } 723 | if i >= dataset.Rows.Len() { 724 | return 725 | } 726 | 727 | orgmode := dataset.GetMode() 728 | if orgmode == DatasetModeColumns { 729 | dataset.TransposeToRows() 730 | } 731 | 732 | row = dataset.Rows.Del(i) 733 | 734 | if orgmode == DatasetModeColumns { 735 | dataset.TransposeToColumns() 736 | } 737 | 738 | if orgmode != DatasetModeRows { 739 | // Delete record in each columns as the same index as deleted 740 | // row. 741 | for x := range dataset.Columns { 742 | dataset.Columns[x].DeleteRecordAt(i) 743 | } 744 | } 745 | 746 | return row 747 | } 748 | --------------------------------------------------------------------------------