├── series
    ├── type-string.go
    ├── type-int.go
    ├── type-float.go
    ├── type-bool.go
    ├── benchmarks_test.go
    ├── series.go
    └── series_test.go
├── dataframe
    ├── benchmark_test.go
    ├── examples_test.go
    └── dataframe.go
├── CHANGELOG.md
├── LICENSE.md
└── README.md


/series/type-string.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"strconv"
  7 | 	"strings"
  8 | )
  9 | 
 10 | type stringElement struct {
 11 | 	e   string
 12 | 	nan bool
 13 | }
 14 | 
 15 | func (e *stringElement) Set(value interface{}) {
 16 | 	e.nan = false
 17 | 	switch value.(type) {
 18 | 	case string:
 19 | 		e.e = string(value.(string))
 20 | 		if e.e == "NaN" {
 21 | 			e.nan = true
 22 | 			return
 23 | 		}
 24 | 	case int:
 25 | 		e.e = strconv.Itoa(value.(int))
 26 | 	case float64:
 27 | 		e.e = strconv.FormatFloat(value.(float64), 'f', 6, 64)
 28 | 	case bool:
 29 | 		b := value.(bool)
 30 | 		if b {
 31 | 			e.e = "true"
 32 | 		} else {
 33 | 			e.e = "false"
 34 | 		}
 35 | 	case Element:
 36 | 		e.e = value.(Element).String()
 37 | 	default:
 38 | 		e.nan = true
 39 | 		return
 40 | 	}
 41 | 	return
 42 | }
 43 | 
 44 | func (e stringElement) Copy() Element {
 45 | 	if e.IsNA() {
 46 | 		return &stringElement{"", true}
 47 | 	}
 48 | 	return &stringElement{e.e, false}
 49 | }
 50 | 
 51 | func (e stringElement) IsNA() bool {
 52 | 	if e.nan {
 53 | 		return true
 54 | 	}
 55 | 	return false
 56 | }
 57 | 
 58 | func (e stringElement) Type() Type {
 59 | 	return String
 60 | }
 61 | 
 62 | func (e stringElement) Val() ElementValue {
 63 | 	if e.IsNA() {
 64 | 		return nil
 65 | 	}
 66 | 	return string(e.e)
 67 | }
 68 | 
 69 | func (e stringElement) String() string {
 70 | 	if e.IsNA() {
 71 | 		return "NaN"
 72 | 	}
 73 | 	return string(e.e)
 74 | }
 75 | 
 76 | func (e stringElement) Int() (int, error) {
 77 | 	if e.IsNA() {
 78 | 		return 0, fmt.Errorf("can't convert NaN to int")
 79 | 	}
 80 | 	return strconv.Atoi(e.e)
 81 | }
 82 | 
 83 | func (e stringElement) Float() float64 {
 84 | 	if e.IsNA() {
 85 | 		return math.NaN()
 86 | 	}
 87 | 	f, err := strconv.ParseFloat(e.e, 64)
 88 | 	if err != nil {
 89 | 		return math.NaN()
 90 | 	}
 91 | 	return f
 92 | }
 93 | 
 94 | func (e stringElement) Bool() (bool, error) {
 95 | 	if e.IsNA() {
 96 | 		return false, fmt.Errorf("can't convert NaN to bool")
 97 | 	}
 98 | 	switch strings.ToLower(e.e) {
 99 | 	case "true", "t", "1":
100 | 		return true, nil
101 | 	case "false", "f", "0":
102 | 		return false, nil
103 | 	}
104 | 	return false, fmt.Errorf("can't convert String \"%v\" to bool", e.e)
105 | }
106 | 
107 | func (e stringElement) Eq(elem Element) bool {
108 | 	if e.IsNA() || elem.IsNA() {
109 | 		return false
110 | 	}
111 | 	return e.e == elem.String()
112 | }
113 | 
114 | func (e stringElement) Neq(elem Element) bool {
115 | 	if e.IsNA() || elem.IsNA() {
116 | 		return false
117 | 	}
118 | 	return e.e != elem.String()
119 | }
120 | 
121 | func (e stringElement) Less(elem Element) bool {
122 | 	if e.IsNA() || elem.IsNA() {
123 | 		return false
124 | 	}
125 | 	return e.e < elem.String()
126 | }
127 | 
128 | func (e stringElement) LessEq(elem Element) bool {
129 | 	if e.IsNA() || elem.IsNA() {
130 | 		return false
131 | 	}
132 | 	return e.e <= elem.String()
133 | }
134 | 
135 | func (e stringElement) Greater(elem Element) bool {
136 | 	if e.IsNA() || elem.IsNA() {
137 | 		return false
138 | 	}
139 | 	return e.e > elem.String()
140 | }
141 | 
142 | func (e stringElement) GreaterEq(elem Element) bool {
143 | 	if e.IsNA() || elem.IsNA() {
144 | 		return false
145 | 	}
146 | 	return e.e >= elem.String()
147 | }
148 | 


--------------------------------------------------------------------------------
/series/type-int.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"strconv"
  7 | )
  8 | 
  9 | type intElement struct {
 10 | 	e   int
 11 | 	nan bool
 12 | }
 13 | 
 14 | func (e *intElement) Set(value interface{}) {
 15 | 	e.nan = false
 16 | 	switch value.(type) {
 17 | 	case string:
 18 | 		if value.(string) == "NaN" {
 19 | 			e.nan = true
 20 | 			return
 21 | 		}
 22 | 		i, err := strconv.Atoi(value.(string))
 23 | 		if err != nil {
 24 | 			e.nan = true
 25 | 			return
 26 | 		}
 27 | 		e.e = i
 28 | 	case int:
 29 | 		e.e = int(value.(int))
 30 | 	case float64:
 31 | 		f := value.(float64)
 32 | 		if math.IsNaN(f) ||
 33 | 			math.IsInf(f, 0) ||
 34 | 			math.IsInf(f, 1) {
 35 | 			e.nan = true
 36 | 			return
 37 | 		}
 38 | 		e.e = int(f)
 39 | 	case bool:
 40 | 		b := value.(bool)
 41 | 		if b {
 42 | 			e.e = 1
 43 | 		} else {
 44 | 			e.e = 0
 45 | 		}
 46 | 	case Element:
 47 | 		v, err := value.(Element).Int()
 48 | 		if err != nil {
 49 | 			e.nan = true
 50 | 			return
 51 | 		}
 52 | 		e.e = v
 53 | 	default:
 54 | 		e.nan = true
 55 | 		return
 56 | 	}
 57 | 	return
 58 | }
 59 | 
 60 | func (e intElement) Copy() Element {
 61 | 	if e.IsNA() {
 62 | 		return &intElement{0, true}
 63 | 	}
 64 | 	return &intElement{e.e, false}
 65 | }
 66 | 
 67 | func (e intElement) IsNA() bool {
 68 | 	if e.nan {
 69 | 		return true
 70 | 	}
 71 | 	return false
 72 | }
 73 | 
 74 | func (e intElement) Type() Type {
 75 | 	return Int
 76 | }
 77 | 
 78 | func (e intElement) Val() ElementValue {
 79 | 	if e.IsNA() {
 80 | 		return nil
 81 | 	}
 82 | 	return int(e.e)
 83 | }
 84 | 
 85 | func (e intElement) String() string {
 86 | 	if e.IsNA() {
 87 | 		return "NaN"
 88 | 	}
 89 | 	return fmt.Sprint(e.e)
 90 | }
 91 | 
 92 | func (e intElement) Int() (int, error) {
 93 | 	if e.IsNA() {
 94 | 		return 0, fmt.Errorf("can't convert NaN to int")
 95 | 	}
 96 | 	return int(e.e), nil
 97 | }
 98 | 
 99 | func (e intElement) Float() float64 {
100 | 	if e.IsNA() {
101 | 		return math.NaN()
102 | 	}
103 | 	return float64(e.e)
104 | }
105 | 
106 | func (e intElement) Bool() (bool, error) {
107 | 	if e.IsNA() {
108 | 		return false, fmt.Errorf("can't convert NaN to bool")
109 | 	}
110 | 	switch e.e {
111 | 	case 1:
112 | 		return true, nil
113 | 	case 0:
114 | 		return false, nil
115 | 	}
116 | 	return false, fmt.Errorf("can't convert Int \"%v\" to bool", e.e)
117 | }
118 | 
119 | func (e intElement) Eq(elem Element) bool {
120 | 	i, err := elem.Int()
121 | 	if err != nil || e.IsNA() {
122 | 		return false
123 | 	}
124 | 	return e.e == i
125 | }
126 | 
127 | func (e intElement) Neq(elem Element) bool {
128 | 	i, err := elem.Int()
129 | 	if err != nil || e.IsNA() {
130 | 		return false
131 | 	}
132 | 	return e.e != i
133 | }
134 | 
135 | func (e intElement) Less(elem Element) bool {
136 | 	i, err := elem.Int()
137 | 	if err != nil || e.IsNA() {
138 | 		return false
139 | 	}
140 | 	return e.e < i
141 | }
142 | 
143 | func (e intElement) LessEq(elem Element) bool {
144 | 	i, err := elem.Int()
145 | 	if err != nil || e.IsNA() {
146 | 		return false
147 | 	}
148 | 	return e.e <= i
149 | }
150 | 
151 | func (e intElement) Greater(elem Element) bool {
152 | 	i, err := elem.Int()
153 | 	if err != nil || e.IsNA() {
154 | 		return false
155 | 	}
156 | 	return e.e > i
157 | }
158 | 
159 | func (e intElement) GreaterEq(elem Element) bool {
160 | 	i, err := elem.Int()
161 | 	if err != nil || e.IsNA() {
162 | 		return false
163 | 	}
164 | 	return e.e >= i
165 | }
166 | 


--------------------------------------------------------------------------------
/series/type-float.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"strconv"
  7 | )
  8 | 
  9 | type floatElement struct {
 10 | 	e   float64
 11 | 	nan bool
 12 | }
 13 | 
 14 | func (e *floatElement) Set(value interface{}) {
 15 | 	e.nan = false
 16 | 	switch value.(type) {
 17 | 	case string:
 18 | 		if value.(string) == "NaN" {
 19 | 			e.nan = true
 20 | 			return
 21 | 		}
 22 | 		f, err := strconv.ParseFloat(value.(string), 64)
 23 | 		if err != nil {
 24 | 			e.nan = true
 25 | 			return
 26 | 		}
 27 | 		e.e = f
 28 | 	case int:
 29 | 		e.e = float64(value.(int))
 30 | 	case float64:
 31 | 		e.e = float64(value.(float64))
 32 | 	case bool:
 33 | 		b := value.(bool)
 34 | 		if b {
 35 | 			e.e = 1
 36 | 		} else {
 37 | 			e.e = 0
 38 | 		}
 39 | 	case Element:
 40 | 		e.e = value.(Element).Float()
 41 | 	default:
 42 | 		e.nan = true
 43 | 		return
 44 | 	}
 45 | 	return
 46 | }
 47 | 
 48 | func (e floatElement) Copy() Element {
 49 | 	if e.IsNA() {
 50 | 		return &floatElement{0.0, true}
 51 | 	}
 52 | 	return &floatElement{e.e, false}
 53 | }
 54 | 
 55 | func (e floatElement) IsNA() bool {
 56 | 	if e.nan || math.IsNaN(e.e) {
 57 | 		return true
 58 | 	}
 59 | 	return false
 60 | }
 61 | 
 62 | func (e floatElement) Type() Type {
 63 | 	return Float
 64 | }
 65 | 
 66 | func (e floatElement) Val() ElementValue {
 67 | 	if e.IsNA() {
 68 | 		return nil
 69 | 	}
 70 | 	return float64(e.e)
 71 | }
 72 | 
 73 | func (e floatElement) String() string {
 74 | 	if e.IsNA() {
 75 | 		return "NaN"
 76 | 	}
 77 | 	return fmt.Sprintf("%f", e.e)
 78 | }
 79 | 
 80 | func (e floatElement) Int() (int, error) {
 81 | 	if e.IsNA() {
 82 | 		return 0, fmt.Errorf("can't convert NaN to int")
 83 | 	}
 84 | 	f := e.e
 85 | 	if math.IsInf(f, 1) || math.IsInf(f, -1) {
 86 | 		return 0, fmt.Errorf("can't convert Inf to int")
 87 | 	}
 88 | 	if math.IsNaN(f) {
 89 | 		return 0, fmt.Errorf("can't convert NaN to int")
 90 | 	}
 91 | 	return int(f), nil
 92 | }
 93 | 
 94 | func (e floatElement) Float() float64 {
 95 | 	if e.IsNA() {
 96 | 		return math.NaN()
 97 | 	}
 98 | 	return float64(e.e)
 99 | }
100 | 
101 | func (e floatElement) Bool() (bool, error) {
102 | 	if e.IsNA() {
103 | 		return false, fmt.Errorf("can't convert NaN to bool")
104 | 	}
105 | 	switch e.e {
106 | 	case 1:
107 | 		return true, nil
108 | 	case 0:
109 | 		return false, nil
110 | 	}
111 | 	return false, fmt.Errorf("can't convert Float \"%v\" to bool", e.e)
112 | }
113 | 
114 | func (e floatElement) Eq(elem Element) bool {
115 | 	f := elem.Float()
116 | 	if e.IsNA() || math.IsNaN(f) {
117 | 		return false
118 | 	}
119 | 	return e.e == f
120 | }
121 | 
122 | func (e floatElement) Neq(elem Element) bool {
123 | 	f := elem.Float()
124 | 	if e.IsNA() || math.IsNaN(f) {
125 | 		return false
126 | 	}
127 | 	return e.e != f
128 | }
129 | 
130 | func (e floatElement) Less(elem Element) bool {
131 | 	f := elem.Float()
132 | 	if e.IsNA() || math.IsNaN(f) {
133 | 		return false
134 | 	}
135 | 	return e.e < f
136 | }
137 | 
138 | func (e floatElement) LessEq(elem Element) bool {
139 | 	f := elem.Float()
140 | 	if e.IsNA() || math.IsNaN(f) {
141 | 		return false
142 | 	}
143 | 	return e.e <= f
144 | }
145 | 
146 | func (e floatElement) Greater(elem Element) bool {
147 | 	f := elem.Float()
148 | 	if e.IsNA() || math.IsNaN(f) {
149 | 		return false
150 | 	}
151 | 	return e.e > f
152 | }
153 | 
154 | func (e floatElement) GreaterEq(elem Element) bool {
155 | 	f := elem.Float()
156 | 	if e.IsNA() || math.IsNaN(f) {
157 | 		return false
158 | 	}
159 | 	return e.e >= f
160 | }
161 | 


--------------------------------------------------------------------------------
/series/type-bool.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"strings"
  7 | )
  8 | 
  9 | type boolElement struct {
 10 | 	e   bool
 11 | 	nan bool
 12 | }
 13 | 
 14 | func (e *boolElement) Set(value interface{}) {
 15 | 	e.nan = false
 16 | 	switch value.(type) {
 17 | 	case string:
 18 | 		if value.(string) == "NaN" {
 19 | 			e.nan = true
 20 | 			return
 21 | 		}
 22 | 		switch strings.ToLower(value.(string)) {
 23 | 		case "true", "t", "1":
 24 | 			e.e = true
 25 | 		case "false", "f", "0":
 26 | 			e.e = false
 27 | 		default:
 28 | 			e.nan = true
 29 | 			return
 30 | 		}
 31 | 	case int:
 32 | 		switch value.(int) {
 33 | 		case 1:
 34 | 			e.e = true
 35 | 		case 0:
 36 | 			e.e = false
 37 | 		default:
 38 | 			e.nan = true
 39 | 			return
 40 | 		}
 41 | 	case float64:
 42 | 		switch value.(float64) {
 43 | 		case 1:
 44 | 			e.e = true
 45 | 		case 0:
 46 | 			e.e = false
 47 | 		default:
 48 | 			e.nan = true
 49 | 			return
 50 | 		}
 51 | 	case bool:
 52 | 		e.e = value.(bool)
 53 | 	case Element:
 54 | 		b, err := value.(Element).Bool()
 55 | 		if err != nil {
 56 | 			e.nan = true
 57 | 			return
 58 | 		}
 59 | 		e.e = b
 60 | 	default:
 61 | 		e.nan = true
 62 | 		return
 63 | 	}
 64 | 	return
 65 | }
 66 | 
 67 | func (e boolElement) Copy() Element {
 68 | 	if e.IsNA() {
 69 | 		return &boolElement{false, true}
 70 | 	}
 71 | 	return &boolElement{e.e, false}
 72 | }
 73 | 
 74 | func (e boolElement) IsNA() bool {
 75 | 	if e.nan {
 76 | 		return true
 77 | 	}
 78 | 	return false
 79 | }
 80 | 
 81 | func (e boolElement) Type() Type {
 82 | 	return Bool
 83 | }
 84 | 
 85 | func (e boolElement) Val() ElementValue {
 86 | 	if e.IsNA() {
 87 | 		return nil
 88 | 	}
 89 | 	return bool(e.e)
 90 | }
 91 | 
 92 | func (e boolElement) String() string {
 93 | 	if e.IsNA() {
 94 | 		return "NaN"
 95 | 	}
 96 | 	if e.e {
 97 | 		return "true"
 98 | 	}
 99 | 	return "false"
100 | }
101 | 
102 | func (e boolElement) Int() (int, error) {
103 | 	if e.IsNA() {
104 | 		return 0, fmt.Errorf("can't convert NaN to int")
105 | 	}
106 | 	if e.e == true {
107 | 		return 1, nil
108 | 	}
109 | 	return 0, nil
110 | }
111 | 
112 | func (e boolElement) Float() float64 {
113 | 	if e.IsNA() {
114 | 		return math.NaN()
115 | 	}
116 | 	if e.e {
117 | 		return 1.0
118 | 	}
119 | 	return 0.0
120 | }
121 | 
122 | func (e boolElement) Bool() (bool, error) {
123 | 	if e.IsNA() {
124 | 		return false, fmt.Errorf("can't convert NaN to bool")
125 | 	}
126 | 	return bool(e.e), nil
127 | }
128 | 
129 | func (e boolElement) Eq(elem Element) bool {
130 | 	b, err := elem.Bool()
131 | 	if err != nil || e.IsNA() {
132 | 		return false
133 | 	}
134 | 	return e.e == b
135 | }
136 | 
137 | func (e boolElement) Neq(elem Element) bool {
138 | 	b, err := elem.Bool()
139 | 	if err != nil || e.IsNA() {
140 | 		return false
141 | 	}
142 | 	return e.e != b
143 | }
144 | 
145 | func (e boolElement) Less(elem Element) bool {
146 | 	b, err := elem.Bool()
147 | 	if err != nil || e.IsNA() {
148 | 		return false
149 | 	}
150 | 	return !e.e && b
151 | }
152 | 
153 | func (e boolElement) LessEq(elem Element) bool {
154 | 	b, err := elem.Bool()
155 | 	if err != nil || e.IsNA() {
156 | 		return false
157 | 	}
158 | 	return !e.e || b
159 | }
160 | 
161 | func (e boolElement) Greater(elem Element) bool {
162 | 	b, err := elem.Bool()
163 | 	if err != nil || e.IsNA() {
164 | 		return false
165 | 	}
166 | 	return e.e && !b
167 | }
168 | 
169 | func (e boolElement) GreaterEq(elem Element) bool {
170 | 	b, err := elem.Bool()
171 | 	if err != nil || e.IsNA() {
172 | 		return false
173 | 	}
174 | 	return e.e || !b
175 | }
176 | 


--------------------------------------------------------------------------------
/series/benchmarks_test.go:
--------------------------------------------------------------------------------
  1 | package series_test
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | 	"strconv"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/libonomy/libonomy-gota/series"
  9 | )
 10 | 
 11 | func generateInts(n int) (data []int) {
 12 | 	for i := 0; i < n; i++ {
 13 | 		data = append(data, rand.Int())
 14 | 	}
 15 | 	return
 16 | }
 17 | 
 18 | func generateFloats(n int) (data []float64) {
 19 | 	for i := 0; i < n; i++ {
 20 | 		data = append(data, rand.Float64())
 21 | 	}
 22 | 	return
 23 | }
 24 | 
 25 | func generateStrings(n int) (data []string) {
 26 | 	for i := 0; i < n; i++ {
 27 | 		data = append(data, strconv.Itoa(rand.Int()))
 28 | 	}
 29 | 	return
 30 | }
 31 | 
 32 | func generateBools(n int) (data []bool) {
 33 | 	for i := 0; i < n; i++ {
 34 | 		r := rand.Intn(2)
 35 | 		b := false
 36 | 		if r == 1 {
 37 | 			b = true
 38 | 		}
 39 | 		data = append(data, b)
 40 | 	}
 41 | 	return
 42 | }
 43 | 
 44 | func generateIntsN(n, k int) (data []int) {
 45 | 	for i := 0; i < n; i++ {
 46 | 		data = append(data, rand.Intn(k))
 47 | 	}
 48 | 	return
 49 | }
 50 | 
 51 | func BenchmarkSeries_New(b *testing.B) {
 52 | 	rand.Seed(100)
 53 | 	table := []struct {
 54 | 		name       string
 55 | 		data       interface{}
 56 | 		seriesType series.Type
 57 | 	}{
 58 | 		{
 59 | 			"[]bool(100000)_Int",
 60 | 			generateBools(100000),
 61 | 			series.Int,
 62 | 		},
 63 | 		{
 64 | 			"[]bool(100000)_String",
 65 | 			generateBools(100000),
 66 | 			series.String,
 67 | 		},
 68 | 		{
 69 | 			"[]bool(100000)_Bool",
 70 | 			generateBools(100000),
 71 | 			series.Bool,
 72 | 		},
 73 | 		{
 74 | 			"[]bool(100000)_Float",
 75 | 			generateBools(100000),
 76 | 			series.Float,
 77 | 		},
 78 | 		{
 79 | 			"[]string(100000)_Int",
 80 | 			generateStrings(100000),
 81 | 			series.Int,
 82 | 		},
 83 | 		{
 84 | 			"[]string(100000)_String",
 85 | 			generateStrings(100000),
 86 | 			series.String,
 87 | 		},
 88 | 		{
 89 | 			"[]string(100000)_Bool",
 90 | 			generateStrings(100000),
 91 | 			series.Bool,
 92 | 		},
 93 | 		{
 94 | 			"[]string(100000)_Float",
 95 | 			generateStrings(100000),
 96 | 			series.Float,
 97 | 		},
 98 | 		{
 99 | 			"[]float64(100000)_Int",
100 | 			generateFloats(100000),
101 | 			series.Int,
102 | 		},
103 | 		{
104 | 			"[]float64(100000)_String",
105 | 			generateFloats(100000),
106 | 			series.String,
107 | 		},
108 | 		{
109 | 			"[]float64(100000)_Bool",
110 | 			generateFloats(100000),
111 | 			series.Bool,
112 | 		},
113 | 		{
114 | 			"[]float64(100000)_Float",
115 | 			generateFloats(100000),
116 | 			series.Float,
117 | 		},
118 | 		{
119 | 			"[]int(100000)_Int",
120 | 			generateInts(100000),
121 | 			series.Int,
122 | 		},
123 | 		{
124 | 			"[]int(100000)_String",
125 | 			generateInts(100000),
126 | 			series.String,
127 | 		},
128 | 		{
129 | 			"[]int(100000)_Bool",
130 | 			generateInts(100000),
131 | 			series.Bool,
132 | 		},
133 | 		{
134 | 			"[]int(100000)_Float",
135 | 			generateInts(100000),
136 | 			series.Float,
137 | 		},
138 | 	}
139 | 	for _, test := range table {
140 | 		b.Run(test.name, func(b *testing.B) {
141 | 			for i := 0; i < b.N; i++ {
142 | 				series.New(test.data, test.seriesType, test.name)
143 | 			}
144 | 		})
145 | 	}
146 | }
147 | 
148 | func BenchmarkSeries_Copy(b *testing.B) {
149 | 	rand.Seed(100)
150 | 	table := []struct {
151 | 		name   string
152 | 		series series.Series
153 | 	}{
154 | 		{
155 | 			"[]int(100000)_Int",
156 | 			series.Ints(generateInts(100000)),
157 | 		},
158 | 		{
159 | 			"[]int(100000)_String",
160 | 			series.Strings(generateInts(100000)),
161 | 		},
162 | 		{
163 | 			"[]int(100000)_Bool",
164 | 			series.Bools(generateInts(100000)),
165 | 		},
166 | 		{
167 | 			"[]int(100000)_Float",
168 | 			series.Floats(generateInts(100000)),
169 | 		},
170 | 	}
171 | 	for _, test := range table {
172 | 		b.Run(test.name, func(b *testing.B) {
173 | 			for i := 0; i < b.N; i++ {
174 | 				test.series.Copy()
175 | 			}
176 | 		})
177 | 	}
178 | }
179 | 
180 | func BenchmarkSeries_Subset(b *testing.B) {
181 | 	rand.Seed(100)
182 | 	table := []struct {
183 | 		name    string
184 | 		indexes interface{}
185 | 		series  series.Series
186 | 	}{
187 | 		{
188 | 			"[]int(100000)_Int",
189 | 			generateIntsN(10000, 2),
190 | 			series.Ints(generateInts(100000)),
191 | 		},
192 | 		{
193 | 			"[]int(100000)_String",
194 | 			generateIntsN(10000, 2),
195 | 			series.Strings(generateInts(100000)),
196 | 		},
197 | 		{
198 | 			"[]int(100000)_Bool",
199 | 			generateIntsN(10000, 2),
200 | 			series.Bools(generateInts(100000)),
201 | 		},
202 | 		{
203 | 			"[]int(100000)_Float",
204 | 			generateIntsN(10000, 2),
205 | 			series.Floats(generateInts(100000)),
206 | 		},
207 | 	}
208 | 	for _, test := range table {
209 | 		b.Run(test.name, func(b *testing.B) {
210 | 			for i := 0; i < b.N; i++ {
211 | 				test.series.Subset(test.indexes)
212 | 			}
213 | 		})
214 | 	}
215 | }
216 | 
217 | func BenchmarkSeries_Set(b *testing.B) {
218 | 	rand.Seed(100)
219 | 	table := []struct {
220 | 		name      string
221 | 		indexes   interface{}
222 | 		newValues series.Series
223 | 		series    series.Series
224 | 	}{
225 | 		{
226 | 			"[]int(100000)_Int",
227 | 			generateIntsN(10000, 2),
228 | 			series.Ints(generateIntsN(10000, 2)),
229 | 			series.Ints(generateInts(100000)),
230 | 		},
231 | 		{
232 | 			"[]int(100000)_String",
233 | 			generateIntsN(10000, 2),
234 | 			series.Strings(generateIntsN(10000, 2)),
235 | 			series.Strings(generateInts(100000)),
236 | 		},
237 | 		{
238 | 			"[]int(100000)_Bool",
239 | 			generateIntsN(10000, 2),
240 | 			series.Bools(generateIntsN(10000, 2)),
241 | 			series.Bools(generateInts(100000)),
242 | 		},
243 | 		{
244 | 			"[]int(100000)_Float",
245 | 			generateIntsN(10000, 2),
246 | 			series.Floats(generateIntsN(10000, 2)),
247 | 			series.Floats(generateInts(100000)),
248 | 		},
249 | 	}
250 | 	for _, test := range table {
251 | 		s := test.series.Copy()
252 | 		b.Run(test.name, func(b *testing.B) {
253 | 			for i := 0; i < b.N; i++ {
254 | 				s.Set(test.indexes, test.newValues)
255 | 			}
256 | 		})
257 | 	}
258 | }
259 | 


--------------------------------------------------------------------------------
/dataframe/benchmark_test.go:
--------------------------------------------------------------------------------
  1 | package dataframe_test
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | 	"strconv"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/libonomy/libonomy-gota/dataframe"
  9 | 	"github.com/libonomy/libonomy-gota/series"
 10 | )
 11 | 
 12 | func generateSeries(n, rep int) (data []series.Series) {
 13 | 	rand.Seed(100)
 14 | 	for j := 0; j < rep; j++ {
 15 | 		var is []int
 16 | 		var bs []bool
 17 | 		var fs []float64
 18 | 		var ss []string
 19 | 		for i := 0; i < n; i++ {
 20 | 			is = append(is, rand.Int())
 21 | 		}
 22 | 		for i := 0; i < n; i++ {
 23 | 			fs = append(fs, rand.Float64())
 24 | 		}
 25 | 		for i := 0; i < n; i++ {
 26 | 			ss = append(ss, strconv.Itoa(rand.Int()))
 27 | 		}
 28 | 		for i := 0; i < n; i++ {
 29 | 			r := rand.Intn(2)
 30 | 			b := false
 31 | 			if r == 1 {
 32 | 				b = true
 33 | 			}
 34 | 			bs = append(bs, b)
 35 | 		}
 36 | 		data = append(data, series.Ints(is))
 37 | 		data = append(data, series.Bools(bs))
 38 | 		data = append(data, series.Floats(fs))
 39 | 		data = append(data, series.Strings(ss))
 40 | 	}
 41 | 	return
 42 | }
 43 | 
 44 | func generateIntsN(n, k int) (data []int) {
 45 | 	for i := 0; i < n; i++ {
 46 | 		data = append(data, rand.Intn(k))
 47 | 	}
 48 | 	return
 49 | }
 50 | 
 51 | func BenchmarkNew(b *testing.B) {
 52 | 	table := []struct {
 53 | 		name string
 54 | 		data []series.Series
 55 | 	}{
 56 | 		{
 57 | 			"100000x4",
 58 | 			generateSeries(100000, 1),
 59 | 		},
 60 | 		{
 61 | 			"100000x40",
 62 | 			generateSeries(100000, 10),
 63 | 		},
 64 | 		{
 65 | 			"100000x400",
 66 | 			generateSeries(100000, 100),
 67 | 		},
 68 | 		{
 69 | 			"1000x40",
 70 | 			generateSeries(1000, 10),
 71 | 		},
 72 | 		{
 73 | 			"1000x4000",
 74 | 			generateSeries(1000, 1000),
 75 | 		},
 76 | 		{
 77 | 			"1000x40000",
 78 | 			generateSeries(1000, 10000),
 79 | 		},
 80 | 	}
 81 | 	for _, test := range table {
 82 | 		b.Run(test.name, func(b *testing.B) {
 83 | 			for i := 0; i < b.N; i++ {
 84 | 				dataframe.New(test.data...)
 85 | 			}
 86 | 		})
 87 | 	}
 88 | }
 89 | 
 90 | func BenchmarkDataFrame_Arrange(b *testing.B) {
 91 | 	data := dataframe.New(generateSeries(100000, 5)...)
 92 | 	table := []struct {
 93 | 		name string
 94 | 		data dataframe.DataFrame
 95 | 		key  []dataframe.Order
 96 | 	}{
 97 | 		{
 98 | 			"100000x20_1",
 99 | 			data,
100 | 			[]dataframe.Order{dataframe.Sort("X0")},
101 | 		},
102 | 		{
103 | 			"100000x20_2",
104 | 			data,
105 | 			[]dataframe.Order{
106 | 				dataframe.Sort("X0"),
107 | 				dataframe.Sort("X1"),
108 | 			},
109 | 		},
110 | 		{
111 | 			"100000x20_3",
112 | 			data,
113 | 			[]dataframe.Order{
114 | 				dataframe.Sort("X0"),
115 | 				dataframe.Sort("X1"),
116 | 				dataframe.Sort("X2"),
117 | 			},
118 | 		},
119 | 	}
120 | 	for _, test := range table {
121 | 		b.Run(test.name, func(b *testing.B) {
122 | 			for i := 0; i < b.N; i++ {
123 | 				test.data.Arrange(test.key...)
124 | 			}
125 | 		})
126 | 	}
127 | }
128 | 
129 | func BenchmarkDataFrame_Subset(b *testing.B) {
130 | 	b.ReportAllocs()
131 | 	data1000x20 := dataframe.New(generateSeries(1000, 5)...)
132 | 	data1000x200 := dataframe.New(generateSeries(1000, 50)...)
133 | 	data1000x2000 := dataframe.New(generateSeries(1000, 500)...)
134 | 	data100000x20 := dataframe.New(generateSeries(100000, 5)...)
135 | 	data1000000x20 := dataframe.New(generateSeries(1000000, 5)...)
136 | 	idx10 := generateIntsN(10, 10)
137 | 	idx100 := generateIntsN(100, 100)
138 | 	idx1000 := generateIntsN(1000, 1000)
139 | 	idx10000 := generateIntsN(10000, 10000)
140 | 	idx100000 := generateIntsN(100000, 100000)
141 | 	idx1000000 := generateIntsN(1000000, 1000000)
142 | 	table := []struct {
143 | 		name    string
144 | 		data    dataframe.DataFrame
145 | 		indexes interface{}
146 | 	}{
147 | 		{
148 | 			"1000000x20_100",
149 | 			data1000000x20,
150 | 			idx100,
151 | 		},
152 | 		{
153 | 			"1000000x20_1000",
154 | 			data1000000x20,
155 | 			idx1000,
156 | 		},
157 | 		{
158 | 			"1000000x20_10000",
159 | 			data1000000x20,
160 | 			idx10000,
161 | 		},
162 | 		{
163 | 			"1000000x20_100000",
164 | 			data1000000x20,
165 | 			idx100000,
166 | 		},
167 | 		{
168 | 			"1000000x20_1000000",
169 | 			data1000000x20,
170 | 			idx1000000,
171 | 		},
172 | 		{
173 | 			"100000x20_100",
174 | 			data100000x20,
175 | 			idx100,
176 | 		},
177 | 		{
178 | 			"100000x20_1000",
179 | 			data100000x20,
180 | 			idx1000,
181 | 		},
182 | 		{
183 | 			"100000x20_10000",
184 | 			data100000x20,
185 | 			idx10000,
186 | 		},
187 | 		{
188 | 			"100000x20_100000",
189 | 			data100000x20,
190 | 			idx100000,
191 | 		},
192 | 		{
193 | 			"1000x20_10",
194 | 			data1000x20,
195 | 			idx10,
196 | 		},
197 | 		{
198 | 			"1000x20_100",
199 | 			data1000x20,
200 | 			idx100,
201 | 		},
202 | 		{
203 | 			"1000x20_1000",
204 | 			data1000x20,
205 | 			idx1000,
206 | 		},
207 | 		{
208 | 			"1000x200_10",
209 | 			data1000x200,
210 | 			idx10,
211 | 		},
212 | 		{
213 | 			"1000x200_100",
214 | 			data1000x200,
215 | 			idx100,
216 | 		},
217 | 		{
218 | 			"1000x200_1000",
219 | 			data1000x200,
220 | 			idx1000,
221 | 		},
222 | 		{
223 | 			"1000x2000_10",
224 | 			data1000x2000,
225 | 			idx10,
226 | 		},
227 | 		{
228 | 			"1000x2000_100",
229 | 			data1000x2000,
230 | 			idx100,
231 | 		},
232 | 		{
233 | 			"1000x2000_1000",
234 | 			data1000x2000,
235 | 			idx1000,
236 | 		},
237 | 	}
238 | 	for _, test := range table {
239 | 		b.Run(test.name, func(b *testing.B) {
240 | 			for i := 0; i < b.N; i++ {
241 | 				test.data.Subset(test.indexes)
242 | 			}
243 | 		})
244 | 	}
245 | }
246 | 
247 | func BenchmarkDataFrame_Elem(b *testing.B) {
248 | 	data := dataframe.New(generateSeries(100000, 5)...)
249 | 	table := []struct {
250 | 		name string
251 | 		data dataframe.DataFrame
252 | 	}{
253 | 		{
254 | 			"100000x20_ALL",
255 | 			data,
256 | 		},
257 | 	}
258 | 	for _, test := range table {
259 | 		b.Run(test.name, func(b *testing.B) {
260 | 			for i := 0; i < b.N; i++ {
261 | 				for k := 0; k < 100000; k++ {
262 | 					test.data.Elem(k, 0)
263 | 				}
264 | 			}
265 | 		})
266 | 	}
267 | }
268 | 


--------------------------------------------------------------------------------
/dataframe/examples_test.go:
--------------------------------------------------------------------------------
  1 | package dataframe_test
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | 
  7 | 	"github.com/libonomy/libonomy-gota/dataframe"
  8 | 	"github.com/libonomy/libonomy-gota/series"
  9 | )
 10 | 
 11 | func ExampleNew() {
 12 | 	df := dataframe.New(
 13 | 		series.New([]string{"b", "a"}, series.String, "COL.1"),
 14 | 		series.New([]int{1, 2}, series.Int, "COL.2"),
 15 | 		series.New([]float64{3.0, 4.0}, series.Float, "COL.3"),
 16 | 	)
 17 | 	fmt.Println(df)
 18 | }
 19 | 
 20 | func ExampleLoadStructs() {
 21 | 	type User struct {
 22 | 		Name     string
 23 | 		Age      int
 24 | 		Accuracy float64
 25 | 	}
 26 | 	users := []User{
 27 | 		User{"Aram", 17, 0.2},
 28 | 		User{"Juan", 18, 0.8},
 29 | 		User{"Ana", 22, 0.5},
 30 | 	}
 31 | 	df := dataframe.LoadStructs(users)
 32 | 	fmt.Println(df)
 33 | }
 34 | 
 35 | func ExampleLoadRecords() {
 36 | 	df := dataframe.LoadRecords(
 37 | 		[][]string{
 38 | 			[]string{"A", "B", "C", "D"},
 39 | 			[]string{"a", "4", "5.1", "true"},
 40 | 			[]string{"k", "5", "7.0", "true"},
 41 | 			[]string{"k", "4", "6.0", "true"},
 42 | 			[]string{"a", "2", "7.1", "false"},
 43 | 		},
 44 | 	)
 45 | 	fmt.Println(df)
 46 | }
 47 | 
 48 | func ExampleLoadRecords_options() {
 49 | 	df := dataframe.LoadRecords(
 50 | 		[][]string{
 51 | 			[]string{"A", "B", "C", "D"},
 52 | 			[]string{"a", "4", "5.1", "true"},
 53 | 			[]string{"k", "5", "7.0", "true"},
 54 | 			[]string{"k", "4", "6.0", "true"},
 55 | 			[]string{"a", "2", "7.1", "false"},
 56 | 		},
 57 | 		dataframe.DetectTypes(false),
 58 | 		dataframe.DefaultType(series.Float),
 59 | 		dataframe.WithTypes(map[string]series.Type{
 60 | 			"A": series.String,
 61 | 			"D": series.Bool,
 62 | 		}),
 63 | 	)
 64 | 	fmt.Println(df)
 65 | }
 66 | 
 67 | func ExampleLoadMaps() {
 68 | 	df := dataframe.LoadMaps(
 69 | 		[]map[string]interface{}{
 70 | 			map[string]interface{}{
 71 | 				"A": "a",
 72 | 				"B": 1,
 73 | 				"C": true,
 74 | 				"D": 0,
 75 | 			},
 76 | 			map[string]interface{}{
 77 | 				"A": "b",
 78 | 				"B": 2,
 79 | 				"C": true,
 80 | 				"D": 0.5,
 81 | 			},
 82 | 		},
 83 | 	)
 84 | 	fmt.Println(df)
 85 | }
 86 | 
 87 | func ExampleReadCSV() {
 88 | 	csvStr := `
 89 | Country,Date,Age,Amount,Id
 90 | "United States",2012-02-01,50,112.1,01234
 91 | "United States",2012-02-01,32,321.31,54320
 92 | "United Kingdom",2012-02-01,17,18.2,12345
 93 | "United States",2012-02-01,32,321.31,54320
 94 | "United Kingdom",2012-02-01,NA,18.2,12345
 95 | "United States",2012-02-01,32,321.31,54320
 96 | "United States",2012-02-01,32,321.31,54320
 97 | Spain,2012-02-01,66,555.42,00241
 98 | `
 99 | 	df := dataframe.ReadCSV(strings.NewReader(csvStr))
100 | 	fmt.Println(df)
101 | }
102 | 
103 | func ExampleReadJSON() {
104 | 	jsonStr := `[{"COL.2":1,"COL.3":3},{"COL.1":5,"COL.2":2,"COL.3":2},{"COL.1":6,"COL.2":3,"COL.3":1}]`
105 | 	df := dataframe.ReadJSON(strings.NewReader(jsonStr))
106 | 	fmt.Println(df)
107 | }
108 | 
109 | func ExampleDataFrame_Subset() {
110 | 	df := dataframe.LoadRecords(
111 | 		[][]string{
112 | 			[]string{"A", "B", "C", "D"},
113 | 			[]string{"a", "4", "5.1", "true"},
114 | 			[]string{"k", "5", "7.0", "true"},
115 | 			[]string{"k", "4", "6.0", "true"},
116 | 			[]string{"a", "2", "7.1", "false"},
117 | 		},
118 | 	)
119 | 	sub := df.Subset([]int{0, 2})
120 | 	fmt.Println(sub)
121 | }
122 | 
123 | func ExampleDataFrame_Select() {
124 | 	df := dataframe.LoadRecords(
125 | 		[][]string{
126 | 			[]string{"A", "B", "C", "D"},
127 | 			[]string{"a", "4", "5.1", "true"},
128 | 			[]string{"k", "5", "7.0", "true"},
129 | 			[]string{"k", "4", "6.0", "true"},
130 | 			[]string{"a", "2", "7.1", "false"},
131 | 		},
132 | 	)
133 | 	sel1 := df.Select([]int{0, 2})
134 | 	sel2 := df.Select([]string{"A", "C"})
135 | 	fmt.Println(sel1)
136 | 	fmt.Println(sel2)
137 | }
138 | 
139 | func ExampleDataFrame_Filter() {
140 | 	df := dataframe.LoadRecords(
141 | 		[][]string{
142 | 			[]string{"A", "B", "C", "D"},
143 | 			[]string{"a", "4", "5.1", "true"},
144 | 			[]string{"k", "5", "7.0", "true"},
145 | 			[]string{"k", "4", "6.0", "true"},
146 | 			[]string{"a", "2", "7.1", "false"},
147 | 		},
148 | 	)
149 | 	fil := df.Filter(
150 | 		dataframe.F{
151 | 			Colname:    "A",
152 | 			Comparator: series.Eq,
153 | 			Comparando: "a",
154 | 		},
155 | 		dataframe.F{
156 | 			Colname:    "B",
157 | 			Comparator: series.Greater,
158 | 			Comparando: 4,
159 | 		},
160 | 	)
161 | 	fil2 := fil.Filter(
162 | 		dataframe.F{
163 | 			Colname:    "D",
164 | 			Comparator: series.Eq,
165 | 			Comparando: true,
166 | 		},
167 | 	)
168 | 	fmt.Println(fil)
169 | 	fmt.Println(fil2)
170 | }
171 | 
172 | func ExampleDataFrame_Mutate() {
173 | 	df := dataframe.LoadRecords(
174 | 		[][]string{
175 | 			[]string{"A", "B", "C", "D"},
176 | 			[]string{"a", "4", "5.1", "true"},
177 | 			[]string{"k", "5", "7.0", "true"},
178 | 			[]string{"k", "4", "6.0", "true"},
179 | 			[]string{"a", "2", "7.1", "false"},
180 | 		},
181 | 	)
182 | 	// Change column C with a new one
183 | 	mut := df.Mutate(
184 | 		series.New([]string{"a", "b", "c", "d"}, series.String, "C"),
185 | 	)
186 | 	// Add a new column E
187 | 	mut2 := df.Mutate(
188 | 		series.New([]string{"a", "b", "c", "d"}, series.String, "E"),
189 | 	)
190 | 	fmt.Println(mut)
191 | 	fmt.Println(mut2)
192 | }
193 | 
194 | func ExampleDataFrame_InnerJoin() {
195 | 	df := dataframe.LoadRecords(
196 | 		[][]string{
197 | 			[]string{"A", "B", "C", "D"},
198 | 			[]string{"a", "4", "5.1", "true"},
199 | 			[]string{"k", "5", "7.0", "true"},
200 | 			[]string{"k", "4", "6.0", "true"},
201 | 			[]string{"a", "2", "7.1", "false"},
202 | 		},
203 | 	)
204 | 	df2 := dataframe.LoadRecords(
205 | 		[][]string{
206 | 			[]string{"A", "F", "D"},
207 | 			[]string{"1", "1", "true"},
208 | 			[]string{"4", "2", "false"},
209 | 			[]string{"2", "8", "false"},
210 | 			[]string{"5", "9", "false"},
211 | 		},
212 | 	)
213 | 	join := df.InnerJoin(df2, "D")
214 | 	fmt.Println(join)
215 | }
216 | 
217 | func ExampleDataFrame_Set() {
218 | 	df := dataframe.LoadRecords(
219 | 		[][]string{
220 | 			[]string{"A", "B", "C", "D"},
221 | 			[]string{"a", "4", "5.1", "true"},
222 | 			[]string{"k", "5", "7.0", "true"},
223 | 			[]string{"k", "4", "6.0", "true"},
224 | 			[]string{"a", "2", "7.1", "false"},
225 | 		},
226 | 	)
227 | 	df2 := df.Set(
228 | 		series.Ints([]int{0, 2}),
229 | 		dataframe.LoadRecords(
230 | 			[][]string{
231 | 				[]string{"A", "B", "C", "D"},
232 | 				[]string{"b", "4", "6.0", "true"},
233 | 				[]string{"c", "3", "6.0", "false"},
234 | 			},
235 | 		),
236 | 	)
237 | 	fmt.Println(df2)
238 | }
239 | 
240 | func ExampleDataFrame_Arrange() {
241 | 	df := dataframe.LoadRecords(
242 | 		[][]string{
243 | 			[]string{"A", "B", "C", "D"},
244 | 			[]string{"a", "4", "5.1", "true"},
245 | 			[]string{"b", "4", "6.0", "true"},
246 | 			[]string{"c", "3", "6.0", "false"},
247 | 			[]string{"a", "2", "7.1", "false"},
248 | 		},
249 | 	)
250 | 	sorted := df.Arrange(
251 | 		dataframe.Sort("A"),
252 | 		dataframe.RevSort("B"),
253 | 	)
254 | 	fmt.Println(sorted)
255 | }
256 | 
257 | func ExampleDataFrame_Describe() {
258 | 	df := dataframe.LoadRecords(
259 | 		[][]string{
260 | 			[]string{"A", "B", "C", "D"},
261 | 			[]string{"a", "4", "5.1", "true"},
262 | 			[]string{"b", "4", "6.0", "true"},
263 | 			[]string{"c", "3", "6.0", "false"},
264 | 			[]string{"a", "2", "7.1", "false"},
265 | 		},
266 | 	)
267 | 	fmt.Println(df.Describe())
268 | }
269 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Change Log
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | This project adheres to [Semantic Versioning](http://semver.org/).
  5 | 
  6 | ## [0.10.1] - 2019-11-08
  7 | 
  8 | ### Fixed
  9 | 
 10 | - LoadRecords printing type debug information
 11 | - Missing closing brackets in series.go
 12 | - Fix gonum import path in dataframe_test
 13 | 
 14 | ## [0.10.0] - 2019-11-08
 15 | 
 16 | ### Changed
 17 | 
 18 | - Merged dev branch changes from multiple collaborators (Sam Zaydel, Kyle
 19 |   Ellrott, Daniela Petruzalek, Christoph Laaber).
 20 | 
 21 | ## [0.9.0] - 2016-10-03
 22 | 
 23 | ### Added
 24 | 
 25 | - Additional method to load arbitrary struct slices to DataFrames (Juan Álvarez)
 26 | - New LoadOption Names to set initial column names (Sander van Harmelen).
 27 | - Parser option for csv delimiter (Kyle Ellrott)
 28 | - New Describe method for reporting summary statistics (Daniela Petruzalek)
 29 | 
 30 | ### Changed
 31 | 
 32 | - Improve the performance of multiple operations.
 33 | - Code cleanup for better consistency (Sander van Harmelen)
 34 | - Renamed 'Deselect' function to 'Drop' (Ben Marshall)
 35 | 
 36 | ## [0.8.0] - 2016-12-12
 37 | 
 38 | ### Added
 39 | 
 40 | - Series.Order method and tests.
 41 | - Series.IsNaN method and tests.
 42 | - DataFrame.Arrange method and tests.
 43 | - DataFrame.Capply method and tests.
 44 | - DataFrame.Rapply method and tests.
 45 | - Benchmarks for several operations on both the `series` and
 46 |   `dataframe` packages.
 47 | - Many optimizations that increase the performance dramatically.
 48 | - New LoadOption where the elements to be parsed as NaN from string
 49 |   can be selected.
 50 | - Gota can now return an implementation of `gonum/mat64.Matrix`
 51 |   interface via `DataFrame.Matrix()` and load a `mat64.Matrix` via
 52 |   `dataframe.LoadMatrix()`.
 53 | 
 54 | ### Changed
 55 | 
 56 | - elementInterface is now exported as Element.
 57 | - Split element.go into separate files for the implementations of the
 58 |   Element interface.
 59 | - LoadOptions API has been renamed for better documentation via `godoc`.
 60 | - `Series.Set` and `DataFrame.Set` now modify the structure in place
 61 |   for performance considerations. If one wants to use the old
 62 |   behaviour, it is suggested to use `DataFrame.Copy().Set(...)`
 63 |   instead of `DataFrame.Set(...)`.
 64 | - `DataFrame.Dim` has been changed to `DataFrame.Dims` for consistency
 65 |   with the `mat64.Matrix` interface.
 66 | - When printing a large `DataFrame` now the behaviour of the stringer
 67 |   interface is much nicer, showing only the first 10 rows and limiting
 68 |   the number of characters that can be shown by line
 69 | 
 70 | ### Removed
 71 | 
 72 | - Some unused functions from the helpers.go file.
 73 | 
 74 | ### Fix
 75 | 
 76 | - Linter errors.
 77 | - stringElement.Float now returns NaN instead of 0 when applicable.
 78 | - Autorenaming column names when `hasHeaders == false` now is
 79 |   consistent with the autorename used with `dataframe.New`
 80 | - Bug where duplicated column names were not been assigned consecutive
 81 |   suffix numbers if the number of duplicates was greater than two.
 82 | 
 83 | ## [0.7.0] - 2016-11-27
 84 | 
 85 | ### Added
 86 | 
 87 | - Many more table tests for both `series` and `dataframe`
 88 | - Set method for `Series` and `DataFrame`
 89 | - When loading data from CSV, JSON, or Records, different
 90 |   `LoadOptions` can now be configured. This includes assigning
 91 |   a default type, manually specifying the column types and others.
 92 | - More documentation for previously undocumented functions.
 93 | 
 94 | ### Changed
 95 | 
 96 | - The project has been restructured on separated `dataframe` and
 97 |   `series` packages.
 98 | - Reviewed entire `Series` codebase for better style and
 99 |   maintainability.
100 | - `DataFrame.Select` now accepts several types of indexes
101 | - Error messages are now more consistent.
102 | - The standard way of checking for errors on both `series` and
103 |   `dataframe` is to check the `Err` field on each structure.
104 | - `ReadCSV`/`ReadJSON` and `WriteCSV`/`WriteJSON` now accept
105 |   `io.Reader` and `io.Writer` respectively.
106 | - Updated README with the new changes.
107 | 
108 | ### Removed
109 | 
110 | - Removed unnecessary abstraction layer on `Series.elements`
111 | 
112 | ## [0.6.0] - 2016-10-29
113 | 
114 | ### Added
115 | 
116 | - InnerJoin, CrossJoin, RightJoin, LeftJoin, OuterJoin functions
117 | 
118 | ### Changed
119 | 
120 | - More code refactoring for easier maintenance and management
121 | - Add more documentation to the exported functions
122 | - Remove unnecessary methods and structures from the exported API
123 | 
124 | ### Removed
125 | 
126 | - colnames and coltypes from the DataFrame structure
127 | 
128 | ## [0.5.0] - 2016-08-09
129 | 
130 | ### Added
131 | 
132 | - Read and write DataFrames from CSV, JSON, []map[string]interface{},
133 |   [][]string.
134 | - New constructor for DataFrame accept Series and NamedSeries as
135 |   arguments.
136 | - Subset, Select, Rename, Mutate, Filter, RBind and CBind methods
137 | - Much Better error handling
138 | 
139 | ### Changed
140 | 
141 | - Almost complete rewrite of DataFrame code.
142 | - Now using Series as first class citizens and building blocks for
143 |   DataFrames.
144 | 
145 | ### Removed
146 | 
147 | - Merge/Join functions have been temporarily removed to be adapted to
148 |   the new architecture.
149 | - Cell interface for allowing custom types into the system.
150 | 
151 | ## [0.4.0] - 2016-02-18
152 | 
153 | ### Added
154 | 
155 | - Getter methods for nrows and ncols.
156 | - An InnerJoin function that performs an Inner Merge/Join of two
157 |   DataFrames by the given keys.
158 | - An RightJoin and LeftJoin functions that performs outer right/outer
159 |   left joins of two DataFrames by the given keys.
160 | - A CrossJoin function that performs an Cross Merge/Join of two
161 |   DataFrames.
162 | - Cell interface now have to implement the NA() method that will
163 |   return a empty cell for the given type.
164 | - Cell interface now have to implement a Copy method.
165 | 
166 | ### Changed
167 | 
168 | - The `cell` interface is now exported: `Cell`.
169 | - Cell method NA() is now IsNA().
170 | - The function parseColumn is now a method.
171 | - A number of fields and methods are now expoted.
172 | 
173 | ### Fixed
174 | 
175 | - Now ensuring that generated subsets are in fact new copies entirely,
176 |   not copying pointers to the same memory address.
177 | 
178 | ## [0.3.0] - 2016-02-18
179 | 
180 | ### Added
181 | 
182 | - Getter and setter methods for the column names of a DataFrame
183 | - Bool column type has been made available
184 | - New Bool() interface
185 | - A `column` now can now if any of it's elements is NA and a list of
186 |   said NA elements ([]bool).
187 | 
188 | ### Changed
189 | 
190 | - Renamed `cell` interface elements to be more idiomatic:
191 |   - ToInteger() is now Int()
192 |   - ToFloat() is now Float()
193 | - The `cell` interface has changed. Int() and Float() now
194 |   return pointers instead of values to prevent future conflicts when
195 |   returning an error.
196 | - The `cell` interface has changed. Checksum() [16]byte added.
197 | - Using cell.Checksum() for identification of unique elements instead
198 |   of raw strings.
199 | - The `cell` interface has changed, now also requires ToBool() method.
200 | - String type now does not contain a string, but a pointer to a string.
201 | 
202 | ### Fixed
203 | 
204 | - Bool type constructor function Bools now parses `bool` and `[]bool`
205 |   elements correctly.
206 | - Int type constructor function Ints now parses `bool` and `[]bool`
207 |   elements correctly.
208 | - Float type constructor function Floats now parses `bool` and `[]bool`
209 |   elements correctly.
210 | - String type constructor function Strings now parses `bool` and `[]bool`
211 |   elements correctly.
212 | 
213 | ## [0.2.1] - 2016-02-14
214 | 
215 | ### Fixed
216 | 
217 | - Fixed a bug when the maximum number of characters on a column was
218 |   not being updated properly when subsetting.
219 | 
220 | ## [0.2.0] - 2016-02-13
221 | 
222 | ### Added
223 | 
224 | - Added a lot of unit tests
225 | 
226 | ### Changed
227 | 
228 | - The base types are now `df.String`, `df.Int`, and `df.Float`.
229 | - Restructured the project in different files.
230 | - Refactored the project so that it will allow columns to be of any
231 |   type as long as it complies with the necessary interfaces.
232 | 
233 | ## [0.1.0] - 2016-02-06
234 | 
235 | ### Added
236 | 
237 | - Load csv data to DataFrame.
238 | - Parse data to four supported types: `int`, `float64`, `date`
239 |   & `string`.
240 | - Row/Column subsetting (Indexing, column names, row numbers, range).
241 | - Unique/Duplicated row subsetting.
242 | - DataFrame combinations by rows and columns (cbind/rbind).
243 | 
244 | [0.1.0]: https://github.com/go-gota/gota/compare/v0.1.0...v0.1.0
245 | [0.2.0]: https://github.com/go-gota/gota/compare/v0.1.0...v0.2.0
246 | [0.2.1]: https://github.com/go-gota/gota/compare/v0.2.0...v0.2.1
247 | [0.3.0]: https://github.com/go-gota/gota/compare/v0.2.1...v0.3.0
248 | [0.4.0]: https://github.com/go-gota/gota/compare/v0.3.0...v0.4.0
249 | [0.5.0]: https://github.com/go-gota/gota/compare/v0.4.0...v0.5.0
250 | [0.6.0]: https://github.com/go-gota/gota/compare/v0.5.0...v0.6.0
251 | [0.7.0]: https://github.com/go-gota/gota/compare/v0.6.0...v0.7.0
252 | [0.8.0]: https://github.com/go-gota/gota/compare/v0.7.0...v0.8.0
253 | [0.9.0]: https://github.com/go-gota/gota/compare/v0.8.0...v0.9.0
254 | [0.10.0]: https://github.com/go-gota/gota/compare/v0.9.0...v0.10.0
255 | [0.10.1]: https://github.com/go-gota/gota/compare/v0.10.0...v0.10.1
256 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | Apache License
  2 | ==============
  3 | 
  4 | _Version 2.0, January 2004_  
  5 | _<http://www.apache.org/licenses/>_
  6 | 
  7 | ### Terms and Conditions for use, reproduction, and distribution
  8 | 
  9 | #### 1. Definitions
 10 | 
 11 | “License” shall mean the terms and conditions for use, reproduction,
 12 | and distribution as defined by Sections 1 through 9 of this document.
 13 | 
 14 | “Licensor” shall mean the copyright owner or entity authorized by the
 15 | copyright owner that is granting the License.
 16 | 
 17 | “Legal Entity” shall mean the union of the acting entity and all other
 18 | entities that control, are controlled by, or are under common control
 19 | with that entity.  For the purposes of this definition, “control”
 20 | means **(i)** the power, direct or indirect, to cause the direction or
 21 | management of such entity, whether by contract or otherwise, or
 22 | **(ii)** ownership of fifty percent (50%) or more of the outstanding
 23 | shares, or **(iii)** beneficial ownership of such entity.
 24 | 
 25 | “You” (or “Your”) shall mean an individual or Legal Entity exercising
 26 | permissions granted by this License.
 27 | 
 28 | “Source” form shall mean the preferred form for making modifications,
 29 | including but not limited to software source code, documentation
 30 | source, and configuration files.
 31 | 
 32 | “Object” form shall mean any form resulting from mechanical
 33 | transformation or translation of a Source form, including but not
 34 | limited to compiled object code, generated documentation, and
 35 | conversions to other media types.
 36 | 
 37 | “Work” shall mean the work of authorship, whether in Source or Object
 38 | form, made available under the License, as indicated by a copyright
 39 | notice that is included in or attached to the work (an example is
 40 | provided in the Appendix below).
 41 | 
 42 | “Derivative Works” shall mean any work, whether in Source or Object
 43 | form, that is based on (or derived from) the Work and for which the
 44 | editorial revisions, annotations, elaborations, or other modifications
 45 | represent, as a whole, an original work of authorship. For the
 46 | purposes of this License, Derivative Works shall not include works
 47 | that remain separable from, or merely link (or bind by name) to the
 48 | interfaces of, the Work and Derivative Works thereof.
 49 | 
 50 | “Contribution” shall mean any work of authorship, including the
 51 | original version of the Work and any modifications or additions to
 52 | that Work or Derivative Works thereof, that is intentionally submitted
 53 | to Licensor for inclusion in the Work by the copyright owner or by an
 54 | individual or Legal Entity authorized to submit on behalf of the
 55 | copyright owner. For the purposes of this definition, “submitted”
 56 | means any form of electronic, verbal, or written communication sent to
 57 | the Licensor or its representatives, including but not limited to
 58 | communication on electronic mailing lists, source code control
 59 | systems, and issue tracking systems that are managed by, or on behalf
 60 | of, the Licensor for the purpose of discussing and improving the Work,
 61 | but excluding communication that is conspicuously marked or otherwise
 62 | designated in writing by the copyright owner as “Not a Contribution.”
 63 | 
 64 | “Contributor” shall mean Licensor and any individual or Legal Entity
 65 | on behalf of whom a Contribution has been received by Licensor and
 66 | subsequently incorporated within the Work.
 67 | 
 68 | #### 2. Grant of Copyright License
 69 | 
 70 | Subject to the terms and conditions of this License, each Contributor
 71 | hereby grants to You a perpetual, worldwide, non-exclusive, no-charge,
 72 | royalty-free, irrevocable copyright license to reproduce, prepare
 73 | Derivative Works of, publicly display, publicly perform, sublicense,
 74 | and distribute the Work and such Derivative Works in Source or Object
 75 | form.
 76 | 
 77 | #### 3. Grant of Patent License
 78 | 
 79 | Subject to the terms and conditions of this License, each Contributor
 80 | hereby grants to You a perpetual, worldwide, non-exclusive, no-charge,
 81 | royalty-free, irrevocable (except as stated in this section) patent
 82 | license to make, have made, use, offer to sell, sell, import, and
 83 | otherwise transfer the Work, where such license applies only to those
 84 | patent claims licensable by such Contributor that are necessarily
 85 | infringed by their Contribution(s) alone or by combination of their
 86 | Contribution(s) with the Work to which such Contribution(s) was
 87 | submitted. If You institute patent litigation against any entity
 88 | (including a cross-claim or counterclaim in a lawsuit) alleging that
 89 | the Work or a Contribution incorporated within the Work constitutes
 90 | direct or contributory patent infringement, then any patent licenses
 91 | granted to You under this License for that Work shall terminate as of
 92 | the date such litigation is filed.
 93 | 
 94 | #### 4. Redistribution
 95 | 
 96 | You may reproduce and distribute copies of the Work or Derivative
 97 | Works thereof in any medium, with or without modifications, and in
 98 | Source or Object form, provided that You meet the following
 99 | conditions:
100 | 
101 | * **(a)** You must give any other recipients of the Work or Derivative
102 |   Works a copy of this License; and
103 | * **(b)** You must cause any modified files to carry prominent notices
104 |   stating that You changed the files; and
105 | * **(c)** You must retain, in the Source form of any Derivative Works
106 |   that You distribute, all copyright, patent, trademark, and
107 |   attribution notices from the Source form of the Work, excluding
108 |   those notices that do not pertain to any part of the Derivative
109 |   Works; and
110 | * **(d)** If the Work includes a “NOTICE” text file as part of its
111 |   distribution, then any Derivative Works that You distribute must
112 |   include a readable copy of the attribution notices contained within
113 |   such NOTICE file, excluding those notices that do not pertain to any
114 |   part of the Derivative Works, in at least one of the following
115 |   places: within a NOTICE text file distributed as part of the
116 |   Derivative Works; within the Source form or documentation, if
117 |   provided along with the Derivative Works; or, within a display
118 |   generated by the Derivative Works, if and wherever such third-party
119 |   notices normally appear. The contents of the NOTICE file are for
120 |   informational purposes only and do not modify the License. You may
121 |   add Your own attribution notices within Derivative Works that You
122 |   distribute, alongside or as an addendum to the NOTICE text from the
123 |   Work, provided that such additional attribution notices cannot be
124 |   construed as modifying the License.
125 | 
126 | You may add Your own copyright statement to Your modifications and may
127 | provide additional or different license terms and conditions for use,
128 | reproduction, or distribution of Your modifications, or for any such
129 | Derivative Works as a whole, provided Your use, reproduction, and
130 | distribution of the Work otherwise complies with the conditions stated
131 | in this License.
132 | 
133 | #### 5. Submission of Contributions
134 | 
135 | Unless You explicitly state otherwise, any Contribution intentionally
136 | submitted for inclusion in the Work by You to the Licensor shall be
137 | under the terms and conditions of this License, without any additional
138 | terms or conditions.  Notwithstanding the above, nothing herein shall
139 | supersede or modify the terms of any separate license agreement you
140 | may have executed with Licensor regarding such Contributions.
141 | 
142 | #### 6. Trademarks
143 | 
144 | This License does not grant permission to use the trade names,
145 | trademarks, service marks, or product names of the Licensor, except as
146 | required for reasonable and customary use in describing the origin of
147 | the Work and reproducing the content of the NOTICE file.
148 | 
149 | #### 7. Disclaimer of Warranty
150 | 
151 | Unless required by applicable law or agreed to in writing, Licensor
152 | provides the Work (and each Contributor provides its Contributions) on
153 | an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
154 | express or implied, including, without limitation, any warranties or
155 | conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR
156 | A PARTICULAR PURPOSE. You are solely responsible for determining the
157 | appropriateness of using or redistributing the Work and assume any
158 | risks associated with Your exercise of permissions under this License.
159 | 
160 | #### 8. Limitation of Liability
161 | 
162 | In no event and under no legal theory, whether in tort (including
163 | negligence), contract, or otherwise, unless required by applicable law
164 | (such as deliberate and grossly negligent acts) or agreed to in
165 | writing, shall any Contributor be liable to You for damages, including
166 | any direct, indirect, special, incidental, or consequential damages of
167 | any character arising as a result of this License or out of the use or
168 | inability to use the Work (including but not limited to damages for
169 | loss of goodwill, work stoppage, computer failure or malfunction, or
170 | any and all other commercial damages or losses), even if such
171 | Contributor has been advised of the possibility of such damages.
172 | 
173 | #### 9. Accepting Warranty or Additional Liability
174 | 
175 | While redistributing the Work or Derivative Works thereof, You may
176 | choose to offer, and charge a fee for, acceptance of support,
177 | warranty, indemnity, or other liability obligations and/or rights
178 | consistent with this License. However, in accepting such obligations,
179 | You may act only on Your own behalf and on Your sole responsibility,
180 | not on behalf of any other Contributor, and only if You agree to
181 | indemnify, defend, and hold each Contributor harmless for any
182 | liability incurred by, or claims asserted against, such Contributor by
183 | reason of your accepting any such warranty or additional liability.
184 | 
185 | _END OF TERMS AND CONDITIONS_
186 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Gota: DataFrames, Series and Data Wrangling for Go
  2 | 
  3 | This is an implementation of DataFrames, Series and data wrangling
  4 | methods for the Go programming language. The API is still in flux so
  5 | _use at your own risk_.
  6 | 
  7 | ## DataFrame
  8 | 
  9 | The term DataFrame typically refers to a tabular dataset that can be
 10 | viewed as a two dimensional table. Often the columns of this dataset
 11 | refers to a list of features, while the rows represent a number of
 12 | measurements. As the data on the real world is not perfect, DataFrame
 13 | supports non measurements or NaN elements.
 14 | 
 15 | Common examples of DataFrames can be found on Excel sheets, CSV files
 16 | or SQL database tables, but this data can come on a variety of other
 17 | formats, like a collection of JSON objects or XML files.
 18 | 
 19 | The utility of DataFrames resides on the ability to subset them, merge
 20 | them, summarize the data for individual features or apply functions to
 21 | entire rows or columns, all while keeping column type integrity.
 22 | 
 23 | ### Usage
 24 | 
 25 | #### Loading data
 26 | 
 27 | DataFrames can be constructed passing Series to the dataframe.New constructor
 28 | function:
 29 | 
 30 | ```go
 31 | df := dataframe.New(
 32 | 	series.New([]string{"b", "a"}, series.String, "COL.1"),
 33 | 	series.New([]int{1, 2}, series.Int, "COL.2"),
 34 | 	series.New([]float64{3.0, 4.0}, series.Float, "COL.3"),
 35 | )
 36 | ```
 37 | 
 38 | You can also load the data directly from other formats.
 39 | The base loading function takes some records in the
 40 | form `[][]string` and returns a new DataFrame from there:
 41 | 
 42 | ```go
 43 | df := dataframe.LoadRecords(
 44 |     [][]string{
 45 |         []string{"A", "B", "C", "D"},
 46 |         []string{"a", "4", "5.1", "true"},
 47 |         []string{"k", "5", "7.0", "true"},
 48 |         []string{"k", "4", "6.0", "true"},
 49 |         []string{"a", "2", "7.1", "false"},
 50 |     },
 51 | )
 52 | ```
 53 | 
 54 | Now you can also create DataFrames by loading an slice of arbitrary structs:
 55 | 
 56 | ```go
 57 | type User struct {
 58 | 	Name     string
 59 | 	Age      int
 60 | 	Accuracy float64
 61 |     ignored  bool // ignored since unexported
 62 | }
 63 | users := []User{
 64 | 	{"Aram", 17, 0.2, true},
 65 | 	{"Juan", 18, 0.8, true},
 66 | 	{"Ana", 22, 0.5, true},
 67 | }
 68 | df := dataframe.LoadStructs(users)
 69 | ```
 70 | 
 71 | By default, the column types will be auto detected but this can be
 72 | configured. For example, if we wish the default type to be `Float` but
 73 | columns `A` and `D` are `String` and `Bool` respectively:
 74 | 
 75 | ```go
 76 | df := dataframe.LoadRecords(
 77 |     [][]string{
 78 |         []string{"A", "B", "C", "D"},
 79 |         []string{"a", "4", "5.1", "true"},
 80 |         []string{"k", "5", "7.0", "true"},
 81 |         []string{"k", "4", "6.0", "true"},
 82 |         []string{"a", "2", "7.1", "false"},
 83 |     },
 84 |     dataframe.DetectTypes(false),
 85 |     dataframe.DefaultType(series.Float),
 86 |     dataframe.WithTypes(map[string]series.Type{
 87 |         "A": series.String,
 88 |         "D": series.Bool,
 89 |     }),
 90 | )
 91 | ```
 92 | 
 93 | Similarly, you can load the data stored on a `[]map[string]interface{}`:
 94 | 
 95 | ```go
 96 | df := dataframe.LoadMaps(
 97 |     []map[string]interface{}{
 98 |         map[string]interface{}{
 99 |             "A": "a",
100 |             "B": 1,
101 |             "C": true,
102 |             "D": 0,
103 |         },
104 |         map[string]interface{}{
105 |             "A": "b",
106 |             "B": 2,
107 |             "C": true,
108 |             "D": 0.5,
109 |         },
110 |     },
111 | )
112 | ```
113 | 
114 | You can also pass an `io.Reader` to the functions `ReadCSV`/`ReadJSON`
115 | and it will work as expected given that the data is correct:
116 | 
117 | ```go
118 | csvStr := `
119 | Country,Date,Age,Amount,Id
120 | "United States",2012-02-01,50,112.1,01234
121 | "United States",2012-02-01,32,321.31,54320
122 | "United Kingdom",2012-02-01,17,18.2,12345
123 | "United States",2012-02-01,32,321.31,54320
124 | "United Kingdom",2012-02-01,NA,18.2,12345
125 | "United States",2012-02-01,32,321.31,54320
126 | "United States",2012-02-01,32,321.31,54320
127 | Spain,2012-02-01,66,555.42,00241
128 | `
129 | df := dataframe.ReadCSV(strings.NewReader(csvStr))
130 | ```
131 | 
132 | ```go
133 | jsonStr := `[{"COL.2":1,"COL.3":3},{"COL.1":5,"COL.2":2,"COL.3":2},{"COL.1":6,"COL.2":3,"COL.3":1}]`
134 | df := dataframe.ReadJSON(strings.NewReader(jsonStr))
135 | ```
136 | 
137 | #### Subsetting
138 | 
139 | We can subset our DataFrames with the Subset method. For example if we
140 | want the first and third rows we can do the following:
141 | 
142 | ```go
143 | sub := df.Subset([]int{0, 2})
144 | ```
145 | 
146 | #### Column selection
147 | 
148 | If instead of subsetting the rows we want to select specific columns,
149 | by an index or column name:
150 | 
151 | ```go
152 | sel1 := df.Select([]int{0, 2})
153 | sel2 := df.Select([]string{"A", "C"})
154 | ```
155 | 
156 | #### Updating values
157 | 
158 | In order to update the values of a DataFrame we can use the Set
159 | method:
160 | 
161 | ```go
162 | df2 := df.Set(
163 |     []int{0, 2},
164 |     dataframe.LoadRecords(
165 |         [][]string{
166 |             []string{"A", "B", "C", "D"},
167 |             []string{"b", "4", "6.0", "true"},
168 |             []string{"c", "3", "6.0", "false"},
169 |         },
170 |     ),
171 | )
172 | ```
173 | 
174 | #### Filtering
175 | 
176 | For more complex row subsetting we can use the Filter method. For
177 | example, if we want the rows where the column "A" is equal to "a" or
178 | column "B" is greater than 4:
179 | 
180 | ```go
181 | fil := df.Filter(
182 |     dataframe.F{"A", series.Eq, "a"},
183 |     dataframe.F{"B", series.Greater, 4},
184 | )
185 | fil2 := fil.Filter(
186 |     dataframe.F{"D", series.Eq, true},
187 | )
188 | ```
189 | 
190 | Filters inside Filter are combined as OR operations whereas if we chain
191 | Filter methods, they will behave as AND.
192 | 
193 | #### Arrange
194 | 
195 | With Arrange a DataFrame can be sorted by the given column names:
196 | 
197 | ```go
198 | sorted := df.Arrange(
199 |     dataframe.Sort("A"),    // Sort in ascending order
200 |     dataframe.RevSort("B"), // Sort in descending order
201 | )
202 | ```
203 | 
204 | #### Mutate
205 | 
206 | If we want to modify a column or add one based on a given Series at
207 | the end we can use the Mutate method:
208 | 
209 | ```go
210 | // Change column C with a new one
211 | mut := df.Mutate(
212 |     series.New([]string{"a", "b", "c", "d"}, series.String, "C"),
213 | )
214 | // Add a new column E
215 | mut2 := df.Mutate(
216 |     series.New([]string{"a", "b", "c", "d"}, series.String, "E"),
217 | )
218 | ```
219 | 
220 | #### Joins
221 | 
222 | Different Join operations are supported (`InnerJoin`, `LeftJoin`,
223 | `RightJoin`, `CrossJoin`). In order to use these methods you have to
224 | specify which are the keys to be used for joining the DataFrames:
225 | 
226 | ```go
227 | df := dataframe.LoadRecords(
228 |     [][]string{
229 |         []string{"A", "B", "C", "D"},
230 |         []string{"a", "4", "5.1", "true"},
231 |         []string{"k", "5", "7.0", "true"},
232 |         []string{"k", "4", "6.0", "true"},
233 |         []string{"a", "2", "7.1", "false"},
234 |     },
235 | )
236 | df2 := dataframe.LoadRecords(
237 |     [][]string{
238 |         []string{"A", "F", "D"},
239 |         []string{"1", "1", "true"},
240 |         []string{"4", "2", "false"},
241 |         []string{"2", "8", "false"},
242 |         []string{"5", "9", "false"},
243 |     },
244 | )
245 | join := df.InnerJoin(df2, "D")
246 | ```
247 | 
248 | #### Function application
249 | 
250 | Functions can be applied to the rows or columns of a DataFrame,
251 | casting the types as necessary:
252 | 
253 | ```go
254 | mean := func(s series.Series) series.Series {
255 |     floats := s.Float()
256 |     sum := 0.0
257 |     for _, f := range floats {
258 |         sum += f
259 |     }
260 |     return series.Floats(sum / float64(len(floats)))
261 | }
262 | df.Capply(mean)
263 | df.Rapply(mean)
264 | ```
265 | 
266 | #### Chaining operations
267 | 
268 | DataFrames support a number of methods for wrangling the data,
269 | filtering, subsetting, selecting columns, adding new columns or
270 | modifying existing ones. All these methods can be chained one after
271 | another and at the end of the procedure check if there has been any
272 | errors by the DataFrame Err field. If any of the methods in the chain
273 | returns an error, the remaining operations on the chain will become
274 | a no-op.
275 | 
276 | ```go
277 | a = a.Rename("Origin", "Country").
278 |     Filter(dataframe.F{"Age", "<", 50}).
279 |     Filter(dataframe.F{"Origin", "==", "United States"}).
280 |     Select("Id", "Origin", "Date").
281 |     Subset([]int{1, 3})
282 | if a.Err != nil {
283 |     log.Fatal("Oh noes!")
284 | }
285 | ```
286 | 
287 | #### Print to console
288 | 
289 | ```go
290 | fmt.Println(flights)
291 | 
292 | > [336776x20] DataFrame
293 | >
294 | >     X0    year  month day   dep_time sched_dep_time dep_delay arr_time ...
295 | >  0: 1     2013  1     1     517      515            2         830      ...
296 | >  1: 2     2013  1     1     533      529            4         850      ...
297 | >  2: 3     2013  1     1     542      540            2         923      ...
298 | >  3: 4     2013  1     1     544      545            -1        1004     ...
299 | >  4: 5     2013  1     1     554      600            -6        812      ...
300 | >  5: 6     2013  1     1     554      558            -4        740      ...
301 | >  6: 7     2013  1     1     555      600            -5        913      ...
302 | >  7: 8     2013  1     1     557      600            -3        709      ...
303 | >  8: 9     2013  1     1     557      600            -3        838      ...
304 | >  9: 10    2013  1     1     558      600            -2        753      ...
305 | >     ...   ...   ...   ...   ...      ...            ...       ...      ...
306 | >     <int> <int> <int> <int> <int>    <int>          <int>     <int>    ...
307 | >
308 | > Not Showing: sched_arr_time <int>, arr_delay <int>, carrier <string>, flight <int>,
309 | > tailnum <string>, origin <string>, dest <string>, air_time <int>, distance <int>, hour <int>,
310 | > minute <int>, time_hour <string>
311 | ```
312 | 
313 | #### Interfacing with gonum
314 | 
315 | A `gonum/mat.Matrix` or any object that implements the `dataframe.Matrix`
316 | interface can be loaded as a `DataFrame` by using the `LoadMatrix()` method. If
317 | one wants to convert a `DataFrame` to a `mat.Matrix` it is necessary to create
318 | the necessary structs and method implementations. Since a `DataFrame` already
319 | implements the `Dims() (r, c int)` method, only implementations for the `At` and
320 | `T` methods are necessary:
321 | 
322 | ```go
323 | type matrix struct {
324 | 	DataFrame
325 | }
326 | 
327 | func (m matrix) At(i, j int) float64 {
328 | 	return m.columns[j].Elem(i).Float()
329 | }
330 | 
331 | func (m matrix) T() mat64.Matrix {
332 | 	return mat64.Transpose{Matrix: m}
333 | }
334 | ```
335 | 
336 | ## Series
337 | 
338 | Series are essentially vectors of elements of the same type with
339 | support for missing values. Series are the building blocks for
340 | DataFrame columns.
341 | 
342 | Four types are currently supported:
343 | 
344 | ```go
345 | Int
346 | Float
347 | String
348 | Bool
349 | ```
350 | 
351 | For more information about the API, make sure to check:
352 | 
353 | - [dataframe godoc][3]
354 | - [series godoc][4]
355 | 
356 | ## License
357 | 
358 | Copyright 2016 Alejandro Sanchez Brotons
359 | 
360 | Licensed under the Apache License, Version 2.0 (the "License"); you
361 | may not use this file except in compliance with the License. You may
362 | obtain a copy of the License at
363 | 
364 |     http://www.apache.org/licenses/LICENSE-2.0
365 | 
366 | Unless required by applicable law or agreed to in writing, software
367 | distributed under the License is distributed on an "AS IS" BASIS,
368 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
369 | implied. See the License for the specific language governing
370 | permissions and limitations under the License.
371 | 
372 | [1]: https://github.com/gonum
373 | [2]: https://github.com/go-gota/gota
374 | [3]: https://godoc.org/github.com/go-gota/gota/dataframe
375 | [4]: https://godoc.org/github.com/go-gota/gota/series
376 | 


--------------------------------------------------------------------------------
/series/series.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"reflect"
  6 | 	"sort"
  7 | 	"strings"
  8 | 
  9 | 	"math"
 10 | 
 11 | 	"gonum.org/v1/gonum/stat"
 12 | )
 13 | 
 14 | // Series is a data structure designed for operating on arrays of elements that
 15 | // should comply with a certain type structure. They are flexible enough that can
 16 | // be transformed to other Series types and account for missing or non valid
 17 | // elements. Most of the power of Series resides on the ability to compare and
 18 | // subset Series of different types.
 19 | type Series struct {
 20 | 	Name     string   // The name of the series
 21 | 	elements Elements // The values of the elements
 22 | 	t        Type     // The type of the series
 23 | 	Err      error    // If there are errors they are stored here
 24 | }
 25 | 
 26 | // Elements is the interface that represents the array of elements contained on
 27 | // a Series.
 28 | type Elements interface {
 29 | 	Elem(int) Element
 30 | 	Len() int
 31 | }
 32 | 
 33 | // Element is the interface that defines the types of methods to be present for
 34 | // elements of a Series
 35 | type Element interface {
 36 | 	// Setter method
 37 | 	Set(interface{})
 38 | 
 39 | 	// Comparation methods
 40 | 	Eq(Element) bool
 41 | 	Neq(Element) bool
 42 | 	Less(Element) bool
 43 | 	LessEq(Element) bool
 44 | 	Greater(Element) bool
 45 | 	GreaterEq(Element) bool
 46 | 
 47 | 	// Accessor/conversion methods
 48 | 	Copy() Element     // FIXME: Returning interface is a recipe for pain
 49 | 	Val() ElementValue // FIXME: Returning interface is a recipe for pain
 50 | 	String() string
 51 | 	Int() (int, error)
 52 | 	Float() float64
 53 | 	Bool() (bool, error)
 54 | 
 55 | 	// Information methods
 56 | 	IsNA() bool
 57 | 	Type() Type
 58 | }
 59 | 
 60 | // intElements is the concrete implementation of Elements for Int elements.
 61 | type intElements []intElement
 62 | 
 63 | func (e intElements) Len() int           { return len(e) }
 64 | func (e intElements) Elem(i int) Element { return &e[i] }
 65 | 
 66 | // stringElements is the concrete implementation of Elements for String elements.
 67 | type stringElements []stringElement
 68 | 
 69 | func (e stringElements) Len() int           { return len(e) }
 70 | func (e stringElements) Elem(i int) Element { return &e[i] }
 71 | 
 72 | // floatElements is the concrete implementation of Elements for Float elements.
 73 | type floatElements []floatElement
 74 | 
 75 | func (e floatElements) Len() int           { return len(e) }
 76 | func (e floatElements) Elem(i int) Element { return &e[i] }
 77 | 
 78 | // boolElements is the concrete implementation of Elements for Bool elements.
 79 | type boolElements []boolElement
 80 | 
 81 | func (e boolElements) Len() int           { return len(e) }
 82 | func (e boolElements) Elem(i int) Element { return &e[i] }
 83 | 
 84 | // ElementValue represents the value that can be used for marshaling or
 85 | // unmarshaling Elements.
 86 | type ElementValue interface{}
 87 | 
 88 | type MapFunction func(Element) Element
 89 | 
 90 | // Comparator is a convenience alias that can be used for a more type safe way of
 91 | // reason and use comparators.
 92 | type Comparator string
 93 | 
 94 | // Supported Comparators
 95 | const (
 96 | 	Eq        Comparator = "==" // Equal
 97 | 	Neq       Comparator = "!=" // Non equal
 98 | 	Greater   Comparator = ">"  // Greater than
 99 | 	GreaterEq Comparator = ">=" // Greater or equal than
100 | 	Less      Comparator = "<"  // Lesser than
101 | 	LessEq    Comparator = "<=" // Lesser or equal than
102 | 	In        Comparator = "in" // Inside
103 | )
104 | 
105 | // Type is a convenience alias that can be used for a more type safe way of
106 | // reason and use Series types.
107 | type Type string
108 | 
109 | // Supported Series Types
110 | const (
111 | 	String Type = "string"
112 | 	Int    Type = "int"
113 | 	Float  Type = "float"
114 | 	Bool   Type = "bool"
115 | )
116 | 
117 | // Indexes represent the elements that can be used for selecting a subset of
118 | // elements within a Series. Currently supported are:
119 | //
120 | //     int            // Matches the given index number
121 | //     []int          // Matches all given index numbers
122 | //     []bool         // Matches all elements in a Series marked as true
123 | //     Series [Int]   // Same as []int
124 | //     Series [Bool]  // Same as []bool
125 | type Indexes interface{}
126 | 
127 | // New is the generic Series constructor
128 | func New(values interface{}, t Type, name string) Series {
129 | 	ret := Series{
130 | 		Name: name,
131 | 		t:    t,
132 | 	}
133 | 
134 | 	// Pre-allocate elements
135 | 	preAlloc := func(n int) {
136 | 		switch t {
137 | 		case String:
138 | 			ret.elements = make(stringElements, n)
139 | 		case Int:
140 | 			ret.elements = make(intElements, n)
141 | 		case Float:
142 | 			ret.elements = make(floatElements, n)
143 | 		case Bool:
144 | 			ret.elements = make(boolElements, n)
145 | 		default:
146 | 			panic(fmt.Sprintf("unknown type %v", t))
147 | 		}
148 | 	}
149 | 
150 | 	if values == nil {
151 | 		preAlloc(1)
152 | 		ret.elements.Elem(0).Set(nil)
153 | 		return ret
154 | 	}
155 | 
156 | 	switch values.(type) {
157 | 	case []string:
158 | 		v := values.([]string)
159 | 		l := len(v)
160 | 		preAlloc(l)
161 | 		for i := 0; i < l; i++ {
162 | 			ret.elements.Elem(i).Set(v[i])
163 | 		}
164 | 	case []float64:
165 | 		v := values.([]float64)
166 | 		l := len(v)
167 | 		preAlloc(l)
168 | 		for i := 0; i < l; i++ {
169 | 			ret.elements.Elem(i).Set(v[i])
170 | 		}
171 | 	case []int:
172 | 		v := values.([]int)
173 | 		l := len(v)
174 | 		preAlloc(l)
175 | 		for i := 0; i < l; i++ {
176 | 			ret.elements.Elem(i).Set(v[i])
177 | 		}
178 | 	case []bool:
179 | 		v := values.([]bool)
180 | 		l := len(v)
181 | 		preAlloc(l)
182 | 		for i := 0; i < l; i++ {
183 | 			ret.elements.Elem(i).Set(v[i])
184 | 		}
185 | 	case Series:
186 | 		v := values.(Series)
187 | 		l := v.Len()
188 | 		preAlloc(l)
189 | 		for i := 0; i < l; i++ {
190 | 			ret.elements.Elem(i).Set(v.elements.Elem(i))
191 | 		}
192 | 	default:
193 | 		switch reflect.TypeOf(values).Kind() {
194 | 		case reflect.Slice:
195 | 			v := reflect.ValueOf(values)
196 | 			l := v.Len()
197 | 			preAlloc(v.Len())
198 | 			for i := 0; i < l; i++ {
199 | 				val := v.Index(i).Interface()
200 | 				ret.elements.Elem(i).Set(val)
201 | 			}
202 | 		default:
203 | 			preAlloc(1)
204 | 			v := reflect.ValueOf(values)
205 | 			val := v.Interface()
206 | 			ret.elements.Elem(0).Set(val)
207 | 		}
208 | 	}
209 | 
210 | 	return ret
211 | }
212 | 
213 | // Strings is a constructor for a String Series
214 | func Strings(values interface{}) Series {
215 | 	return New(values, String, "")
216 | }
217 | 
218 | // Ints is a constructor for an Int Series
219 | func Ints(values interface{}) Series {
220 | 	return New(values, Int, "")
221 | }
222 | 
223 | // Floats is a constructor for a Float Series
224 | func Floats(values interface{}) Series {
225 | 	return New(values, Float, "")
226 | }
227 | 
228 | // Bools is a constructor for a Bool Series
229 | func Bools(values interface{}) Series {
230 | 	return New(values, Bool, "")
231 | }
232 | 
233 | // Empty returns an empty Series of the same type
234 | func (s Series) Empty() Series {
235 | 	return New([]int{}, s.t, s.Name)
236 | }
237 | 
238 | // Append adds new elements to the end of the Series. When using Append, the
239 | // Series is modified in place.
240 | func (s *Series) Append(values interface{}) {
241 | 	if err := s.Err; err != nil {
242 | 		return
243 | 	}
244 | 	news := New(values, s.t, s.Name)
245 | 	switch s.t {
246 | 	case String:
247 | 		s.elements = append(s.elements.(stringElements), news.elements.(stringElements)...)
248 | 	case Int:
249 | 		s.elements = append(s.elements.(intElements), news.elements.(intElements)...)
250 | 	case Float:
251 | 		s.elements = append(s.elements.(floatElements), news.elements.(floatElements)...)
252 | 	case Bool:
253 | 		s.elements = append(s.elements.(boolElements), news.elements.(boolElements)...)
254 | 	}
255 | }
256 | 
257 | // Concat concatenates two series together. It will return a new Series with the
258 | // combined elements of both Series.
259 | func (s Series) Concat(x Series) Series {
260 | 	if err := s.Err; err != nil {
261 | 		return s
262 | 	}
263 | 	if err := x.Err; err != nil {
264 | 		s.Err = fmt.Errorf("concat error: argument has errors: %v", err)
265 | 		return s
266 | 	}
267 | 	y := s.Copy()
268 | 	y.Append(x)
269 | 	return y
270 | }
271 | 
272 | // Subset returns a subset of the series based on the given Indexes.
273 | func (s Series) Subset(indexes Indexes) Series {
274 | 	if err := s.Err; err != nil {
275 | 		return s
276 | 	}
277 | 	idx, err := parseIndexes(s.Len(), indexes)
278 | 	if err != nil {
279 | 		s.Err = err
280 | 		return s
281 | 	}
282 | 	ret := Series{
283 | 		Name: s.Name,
284 | 		t:    s.t,
285 | 	}
286 | 	switch s.t {
287 | 	case String:
288 | 		elements := make(stringElements, len(idx))
289 | 		for k, i := range idx {
290 | 			elements[k] = s.elements.(stringElements)[i]
291 | 		}
292 | 		ret.elements = elements
293 | 	case Int:
294 | 		elements := make(intElements, len(idx))
295 | 		for k, i := range idx {
296 | 			elements[k] = s.elements.(intElements)[i]
297 | 		}
298 | 		ret.elements = elements
299 | 	case Float:
300 | 		elements := make(floatElements, len(idx))
301 | 		for k, i := range idx {
302 | 			elements[k] = s.elements.(floatElements)[i]
303 | 		}
304 | 		ret.elements = elements
305 | 	case Bool:
306 | 		elements := make(boolElements, len(idx))
307 | 		for k, i := range idx {
308 | 			elements[k] = s.elements.(boolElements)[i]
309 | 		}
310 | 		ret.elements = elements
311 | 	default:
312 | 		panic("unknown series type")
313 | 	}
314 | 	return ret
315 | }
316 | 
317 | // Set sets the values on the indexes of a Series and returns the reference
318 | // for itself. The original Series is modified.
319 | func (s Series) Set(indexes Indexes, newvalues Series) Series {
320 | 	if err := s.Err; err != nil {
321 | 		return s
322 | 	}
323 | 	if err := newvalues.Err; err != nil {
324 | 		s.Err = fmt.Errorf("set error: argument has errors: %v", err)
325 | 		return s
326 | 	}
327 | 	idx, err := parseIndexes(s.Len(), indexes)
328 | 	if err != nil {
329 | 		s.Err = err
330 | 		return s
331 | 	}
332 | 	if len(idx) != newvalues.Len() {
333 | 		s.Err = fmt.Errorf("set error: dimensions mismatch")
334 | 		return s
335 | 	}
336 | 	for k, i := range idx {
337 | 		if i < 0 || i >= s.Len() {
338 | 			s.Err = fmt.Errorf("set error: index out of range")
339 | 			return s
340 | 		}
341 | 		s.elements.Elem(i).Set(newvalues.elements.Elem(k))
342 | 	}
343 | 	return s
344 | }
345 | 
346 | // HasNaN checks whether the Series contain NaN elements.
347 | func (s Series) HasNaN() bool {
348 | 	for i := 0; i < s.Len(); i++ {
349 | 		if s.elements.Elem(i).IsNA() {
350 | 			return true
351 | 		}
352 | 	}
353 | 	return false
354 | }
355 | 
356 | // IsNaN returns an array that identifies which of the elements are NaN.
357 | func (s Series) IsNaN() []bool {
358 | 	ret := make([]bool, s.Len())
359 | 	for i := 0; i < s.Len(); i++ {
360 | 		ret[i] = s.elements.Elem(i).IsNA()
361 | 	}
362 | 	return ret
363 | }
364 | 
365 | // Compare compares the values of a Series with other elements. To do so, the
366 | // elements with are to be compared are first transformed to a Series of the same
367 | // type as the caller.
368 | func (s Series) Compare(comparator Comparator, comparando interface{}) Series {
369 | 	if err := s.Err; err != nil {
370 | 		return s
371 | 	}
372 | 	compareElements := func(a, b Element, c Comparator) (bool, error) {
373 | 		var ret bool
374 | 		switch c {
375 | 		case Eq:
376 | 			ret = a.Eq(b)
377 | 		case Neq:
378 | 			ret = a.Neq(b)
379 | 		case Greater:
380 | 			ret = a.Greater(b)
381 | 		case GreaterEq:
382 | 			ret = a.GreaterEq(b)
383 | 		case Less:
384 | 			ret = a.Less(b)
385 | 		case LessEq:
386 | 			ret = a.LessEq(b)
387 | 		default:
388 | 			return false, fmt.Errorf("unknown comparator: %v", c)
389 | 		}
390 | 		return ret, nil
391 | 	}
392 | 
393 | 	comp := New(comparando, s.t, "")
394 | 	bools := make([]bool, s.Len())
395 | 	// In comparator comparation
396 | 	if comparator == In {
397 | 		for i := 0; i < s.Len(); i++ {
398 | 			e := s.elements.Elem(i)
399 | 			b := false
400 | 			for j := 0; j < comp.Len(); j++ {
401 | 				m := comp.elements.Elem(j)
402 | 				c, err := compareElements(e, m, Eq)
403 | 				if err != nil {
404 | 					s = s.Empty()
405 | 					s.Err = err
406 | 					return s
407 | 				}
408 | 				if c {
409 | 					b = true
410 | 					break
411 | 				}
412 | 			}
413 | 			bools[i] = b
414 | 		}
415 | 		return Bools(bools)
416 | 	}
417 | 
418 | 	// Single element comparison
419 | 	if comp.Len() == 1 {
420 | 		for i := 0; i < s.Len(); i++ {
421 | 			e := s.elements.Elem(i)
422 | 			c, err := compareElements(e, comp.elements.Elem(0), comparator)
423 | 			if err != nil {
424 | 				s = s.Empty()
425 | 				s.Err = err
426 | 				return s
427 | 			}
428 | 			bools[i] = c
429 | 		}
430 | 		return Bools(bools)
431 | 	}
432 | 
433 | 	// Multiple element comparison
434 | 	if s.Len() != comp.Len() {
435 | 		s := s.Empty()
436 | 		s.Err = fmt.Errorf("can't compare: length mismatch")
437 | 		return s
438 | 	}
439 | 	for i := 0; i < s.Len(); i++ {
440 | 		e := s.elements.Elem(i)
441 | 		c, err := compareElements(e, comp.elements.Elem(i), comparator)
442 | 		if err != nil {
443 | 			s = s.Empty()
444 | 			s.Err = err
445 | 			return s
446 | 		}
447 | 		bools[i] = c
448 | 	}
449 | 	return Bools(bools)
450 | }
451 | 
452 | // Copy will return a copy of the Series.
453 | func (s Series) Copy() Series {
454 | 	name := s.Name
455 | 	t := s.t
456 | 	err := s.Err
457 | 	var elements Elements
458 | 	switch s.t {
459 | 	case String:
460 | 		elements = make(stringElements, s.Len())
461 | 		copy(elements.(stringElements), s.elements.(stringElements))
462 | 	case Float:
463 | 		elements = make(floatElements, s.Len())
464 | 		copy(elements.(floatElements), s.elements.(floatElements))
465 | 	case Bool:
466 | 		elements = make(boolElements, s.Len())
467 | 		copy(elements.(boolElements), s.elements.(boolElements))
468 | 	case Int:
469 | 		elements = make(intElements, s.Len())
470 | 		copy(elements.(intElements), s.elements.(intElements))
471 | 	}
472 | 	ret := Series{
473 | 		Name:     name,
474 | 		t:        t,
475 | 		elements: elements,
476 | 		Err:      err,
477 | 	}
478 | 	return ret
479 | }
480 | 
481 | // Records returns the elements of a Series as a []string
482 | func (s Series) Records() []string {
483 | 	ret := make([]string, s.Len())
484 | 	for i := 0; i < s.Len(); i++ {
485 | 		e := s.elements.Elem(i)
486 | 		ret[i] = e.String()
487 | 	}
488 | 	return ret
489 | }
490 | 
491 | // Float returns the elements of a Series as a []float64. If the elements can not
492 | // be converted to float64 or contains a NaN returns the float representation of
493 | // NaN.
494 | func (s Series) Float() []float64 {
495 | 	ret := make([]float64, s.Len())
496 | 	for i := 0; i < s.Len(); i++ {
497 | 		e := s.elements.Elem(i)
498 | 		ret[i] = e.Float()
499 | 	}
500 | 	return ret
501 | }
502 | 
503 | // Int returns the elements of a Series as a []int or an error if the
504 | // transformation is not possible.
505 | func (s Series) Int() ([]int, error) {
506 | 	ret := make([]int, s.Len())
507 | 	for i := 0; i < s.Len(); i++ {
508 | 		e := s.elements.Elem(i)
509 | 		val, err := e.Int()
510 | 		if err != nil {
511 | 			return nil, err
512 | 		}
513 | 		ret[i] = val
514 | 	}
515 | 	return ret, nil
516 | }
517 | 
518 | // Bool returns the elements of a Series as a []bool or an error if the
519 | // transformation is not possible.
520 | func (s Series) Bool() ([]bool, error) {
521 | 	ret := make([]bool, s.Len())
522 | 	for i := 0; i < s.Len(); i++ {
523 | 		e := s.elements.Elem(i)
524 | 		val, err := e.Bool()
525 | 		if err != nil {
526 | 			return nil, err
527 | 		}
528 | 		ret[i] = val
529 | 	}
530 | 	return ret, nil
531 | }
532 | 
533 | // Type returns the type of a given series
534 | func (s Series) Type() Type {
535 | 	return s.t
536 | }
537 | 
538 | // Len returns the length of a given Series
539 | func (s Series) Len() int {
540 | 	return s.elements.Len()
541 | }
542 | 
543 | // String implements the Stringer interface for Series
544 | func (s Series) String() string {
545 | 	return fmt.Sprint(s.elements)
546 | }
547 | 
548 | // Str prints some extra information about a given series
549 | func (s Series) Str() string {
550 | 	var ret []string
551 | 	// If name exists print name
552 | 	if s.Name != "" {
553 | 		ret = append(ret, "Name: "+s.Name)
554 | 	}
555 | 	ret = append(ret, "Type: "+fmt.Sprint(s.t))
556 | 	ret = append(ret, "Length: "+fmt.Sprint(s.Len()))
557 | 	if s.Len() != 0 {
558 | 		ret = append(ret, "Values: "+fmt.Sprint(s))
559 | 	}
560 | 	return strings.Join(ret, "\n")
561 | }
562 | 
563 | // Val returns the value of a series for the given index. Will panic if the index
564 | // is out of bounds.
565 | func (s Series) Val(i int) interface{} {
566 | 	return s.elements.Elem(i).Val()
567 | }
568 | 
569 | // Elem returns the element of a series for the given index. Will panic if the
570 | // index is out of bounds.
571 | func (s Series) Elem(i int) Element {
572 | 	return s.elements.Elem(i)
573 | }
574 | 
575 | // parseIndexes will parse the given indexes for a given series of length `l`. No
576 | // out of bounds checks is performed.
577 | func parseIndexes(l int, indexes Indexes) ([]int, error) {
578 | 	var idx []int
579 | 	switch indexes.(type) {
580 | 	case []int:
581 | 		idx = indexes.([]int)
582 | 	case int:
583 | 		idx = []int{indexes.(int)}
584 | 	case []bool:
585 | 		bools := indexes.([]bool)
586 | 		if len(bools) != l {
587 | 			return nil, fmt.Errorf("indexing error: index dimensions mismatch")
588 | 		}
589 | 		for i, b := range bools {
590 | 			if b {
591 | 				idx = append(idx, i)
592 | 			}
593 | 		}
594 | 	case Series:
595 | 		s := indexes.(Series)
596 | 		if err := s.Err; err != nil {
597 | 			return nil, fmt.Errorf("indexing error: new values has errors: %v", err)
598 | 		}
599 | 		if s.HasNaN() {
600 | 			return nil, fmt.Errorf("indexing error: indexes contain NaN")
601 | 		}
602 | 		switch s.t {
603 | 		case Int:
604 | 			return s.Int()
605 | 		case Bool:
606 | 			bools, err := s.Bool()
607 | 			if err != nil {
608 | 				return nil, fmt.Errorf("indexing error: %v", err)
609 | 			}
610 | 			return parseIndexes(l, bools)
611 | 		default:
612 | 			return nil, fmt.Errorf("indexing error: unknown indexing mode")
613 | 		}
614 | 	default:
615 | 		return nil, fmt.Errorf("indexing error: unknown indexing mode")
616 | 	}
617 | 	return idx, nil
618 | }
619 | 
620 | // Order returns the indexes for sorting a Series. NaN elements are pushed to the
621 | // end by order of appearance.
622 | func (s Series) Order(reverse bool) []int {
623 | 	var ie indexedElements
624 | 	var nasIdx []int
625 | 	for i := 0; i < s.Len(); i++ {
626 | 		e := s.elements.Elem(i)
627 | 		if e.IsNA() {
628 | 			nasIdx = append(nasIdx, i)
629 | 		} else {
630 | 			ie = append(ie, indexedElement{i, e})
631 | 		}
632 | 	}
633 | 	var srt sort.Interface
634 | 	srt = ie
635 | 	if reverse {
636 | 		srt = sort.Reverse(srt)
637 | 	}
638 | 	sort.Sort(srt)
639 | 	var ret []int
640 | 	for _, e := range ie {
641 | 		ret = append(ret, e.index)
642 | 	}
643 | 	return append(ret, nasIdx...)
644 | }
645 | 
646 | type indexedElement struct {
647 | 	index   int
648 | 	element Element
649 | }
650 | 
651 | type indexedElements []indexedElement
652 | 
653 | func (e indexedElements) Len() int           { return len(e) }
654 | func (e indexedElements) Less(i, j int) bool { return e[i].element.Less(e[j].element) }
655 | func (e indexedElements) Swap(i, j int)      { e[i], e[j] = e[j], e[i] }
656 | 
657 | // StdDev calculates the standard deviation of a series
658 | func (s Series) StdDev() float64 {
659 | 	stdDev := stat.StdDev(s.Float(), nil)
660 | 	return stdDev
661 | }
662 | 
663 | // Mean calculates the average value of a series
664 | func (s Series) Mean() float64 {
665 | 	stdDev := stat.Mean(s.Float(), nil)
666 | 	return stdDev
667 | }
668 | 
669 | // Median calculates the middle or median value, as opposed to
670 | // mean, and there is less susceptible to being affected by outliers.
671 | func (s Series) Median() float64 {
672 | 	if s.elements.Len() == 0 ||
673 | 		s.Type() == String ||
674 | 		s.Type() == Bool {
675 | 		return math.NaN()
676 | 	}
677 | 	ix := s.Order(false)
678 | 	newElem := make([]Element, len(ix))
679 | 
680 | 	for newpos, oldpos := range ix {
681 | 		newElem[newpos] = s.elements.Elem(oldpos)
682 | 	}
683 | 
684 | 	// When length is odd, we just take length(list)/2
685 | 	// value as the median.
686 | 	if len(newElem)%2 != 0 {
687 | 		return newElem[len(newElem)/2].Float()
688 | 	}
689 | 	// When length is even, we take middle two elements of
690 | 	// list and the median is an average of the two of them.
691 | 	return (newElem[(len(newElem)/2)-1].Float() +
692 | 		newElem[len(newElem)/2].Float()) * 0.5
693 | }
694 | 
695 | // Max return the biggest element in the series
696 | func (s Series) Max() float64 {
697 | 	if s.elements.Len() == 0 || s.Type() == String {
698 | 		return math.NaN()
699 | 	}
700 | 
701 | 	max := s.elements.Elem(0)
702 | 	for i := 1; i < s.elements.Len(); i++ {
703 | 		elem := s.elements.Elem(i)
704 | 		if elem.Greater(max) {
705 | 			max = elem
706 | 		}
707 | 	}
708 | 	return max.Float()
709 | }
710 | 
711 | // MaxStr return the biggest element in a series of type String
712 | func (s Series) MaxStr() string {
713 | 	if s.elements.Len() == 0 || s.Type() != String {
714 | 		return ""
715 | 	}
716 | 
717 | 	max := s.elements.Elem(0)
718 | 	for i := 1; i < s.elements.Len(); i++ {
719 | 		elem := s.elements.Elem(i)
720 | 		if elem.Greater(max) {
721 | 			max = elem
722 | 		}
723 | 	}
724 | 	return max.String()
725 | }
726 | 
727 | // Min return the lowest element in the series
728 | func (s Series) Min() float64 {
729 | 	if s.elements.Len() == 0 || s.Type() == String {
730 | 		return math.NaN()
731 | 	}
732 | 
733 | 	min := s.elements.Elem(0)
734 | 	for i := 1; i < s.elements.Len(); i++ {
735 | 		elem := s.elements.Elem(i)
736 | 		if elem.Less(min) {
737 | 			min = elem
738 | 		}
739 | 	}
740 | 	return min.Float()
741 | }
742 | 
743 | // MinStr return the lowest element in a series of type String
744 | func (s Series) MinStr() string {
745 | 	if s.elements.Len() == 0 || s.Type() != String {
746 | 		return ""
747 | 	}
748 | 
749 | 	min := s.elements.Elem(0)
750 | 	for i := 1; i < s.elements.Len(); i++ {
751 | 		elem := s.elements.Elem(i)
752 | 		if elem.Less(min) {
753 | 			min = elem
754 | 		}
755 | 	}
756 | 	return min.String()
757 | }
758 | 
759 | // Quantile returns the sample of x such that x is greater than or
760 | // equal to the fraction p of samples.
761 | // Note: gonum/stat panics when called with strings
762 | func (s Series) Quantile(p float64) float64 {
763 | 	if s.Type() == String || s.Len() == 0 {
764 | 		return math.NaN()
765 | 	}
766 | 
767 | 	ordered := s.Subset(s.Order(false)).Float()
768 | 
769 | 	return stat.Quantile(p, stat.Empirical, ordered, nil)
770 | }
771 | 
772 | // Map applies a function matching MapFunction signature, which itself
773 | // allowing for a fairly flexible MAP implementation, intended for mapping
774 | // the function over each element in Series and returning a new Series object.
775 | // Function must be compatible with the underlying type of data in the Series.
776 | // In other words it is expected that when working with a Float Series, that
777 | // the function passed in via argument `f` will not expect another type, but
778 | // instead expects to handle Element(s) of type Float.
779 | func (s Series) Map(f MapFunction) Series {
780 | 
781 | 	mappedValues := make([]Element, s.Len())
782 | 	for i := 0; i < s.Len(); i++ {
783 | 		value := f(s.elements.Elem(i))
784 | 		mappedValues[i] = value
785 | 	}
786 | 	return New(mappedValues, s.Type(), s.Name)
787 | }
788 | 


--------------------------------------------------------------------------------
/series/series_test.go:
--------------------------------------------------------------------------------
   1 | package series
   2 | 
   3 | import (
   4 | 	"fmt"
   5 | 	"math"
   6 | 	"reflect"
   7 | 	"testing"
   8 | 	"strings"
   9 | )
  10 | 
  11 | // Check that there are no shared memory addreses between the elements of two Series
  12 | //func checkAddr(addra, addrb []string) error {
  13 | //for i := 0; i < len(addra); i++ {
  14 | //for j := 0; j < len(addrb); j++ {
  15 | //if addra[i] == "<nil>" || addrb[j] == "<nil>" {
  16 | //continue
  17 | //}
  18 | //if addra[i] == addrb[j] {
  19 | //return fmt.Errorf("found same address on\nA:%v\nB:%v", i, j)
  20 | //}
  21 | //}
  22 | //}
  23 | //return nil
  24 | //}
  25 | 
  26 | // Check that all the types on a Series are the same type and that it matches with
  27 | // Series.t
  28 | func checkTypes(s Series) error {
  29 | 	var types []Type
  30 | 	for i := 0; i < s.Len(); i++ {
  31 | 		e := s.elements.Elem(i)
  32 | 		types = append(types, e.Type())
  33 | 	}
  34 | 	for _, t := range types {
  35 | 		if t != s.t {
  36 | 			return fmt.Errorf("bad types for %v Series:\n%v", s.t, types)
  37 | 		}
  38 | 	}
  39 | 	return nil
  40 | }
  41 | 
  42 | // compareFloats compares floating point values up to the number of digits specified.
  43 | // Returns true if both values are equal with the given precision
  44 | func compareFloats(lvalue, rvalue float64, digits int) bool {
  45 | 	if math.IsNaN(lvalue) || math.IsNaN(rvalue) {
  46 | 		return math.IsNaN(lvalue) && math.IsNaN(rvalue)
  47 | 	}
  48 | 	d := math.Pow(10.0, float64(digits))
  49 | 	lv := int(lvalue * d)
  50 | 	rv := int(rvalue * d)
  51 | 	return lv == rv
  52 | }
  53 | 
  54 | func TestSeries_Compare(t *testing.T) {
  55 | 	table := []struct {
  56 | 		series     Series
  57 | 		comparator Comparator
  58 | 		comparando interface{}
  59 | 		expected   Series
  60 | 	}{
  61 | 		{
  62 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
  63 | 			Eq,
  64 | 			"B",
  65 | 			Bools([]bool{false, true, false, true, false, false}),
  66 | 		},
  67 | 		{
  68 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
  69 | 			Eq,
  70 | 			[]string{"B", "B", "C", "D", "A", "A"},
  71 | 			Bools([]bool{false, true, true, false, false, false}),
  72 | 		},
  73 | 		{
  74 | 			Ints([]int{0, 2, 1, 5, 9}),
  75 | 			Eq,
  76 | 			"2",
  77 | 			Bools([]bool{false, true, false, false, false}),
  78 | 		},
  79 | 		{
  80 | 			Ints([]int{0, 2, 1, 5, 9}),
  81 | 			Eq,
  82 | 			[]int{0, 2, 0, 5, 10},
  83 | 			Bools([]bool{true, true, false, true, false}),
  84 | 		},
  85 | 		{
  86 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
  87 | 			Eq,
  88 | 			"2",
  89 | 			Bools([]bool{false, true, false, false, false}),
  90 | 		},
  91 | 		{
  92 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
  93 | 			Eq,
  94 | 			[]float64{0.1, 2, 0, 5, 10},
  95 | 			Bools([]bool{true, true, false, true, false}),
  96 | 		},
  97 | 		{
  98 | 			Bools([]bool{true, true, false}),
  99 | 			Eq,
 100 | 			"true",
 101 | 			Bools([]bool{true, true, false}),
 102 | 		},
 103 | 		{
 104 | 			Bools([]bool{true, true, false}),
 105 | 			Eq,
 106 | 			[]bool{true, false, false},
 107 | 			Bools([]bool{true, false, true}),
 108 | 		},
 109 | 		{
 110 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
 111 | 			Neq,
 112 | 			"B",
 113 | 			Bools([]bool{true, false, true, false, true, true}),
 114 | 		},
 115 | 		{
 116 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
 117 | 			Neq,
 118 | 			[]string{"B", "B", "C", "D", "A", "A"},
 119 | 			Bools([]bool{true, false, false, true, true, true}),
 120 | 		},
 121 | 		{
 122 | 			Ints([]int{0, 2, 1, 5, 9}),
 123 | 			Neq,
 124 | 			"2",
 125 | 			Bools([]bool{true, false, true, true, true}),
 126 | 		},
 127 | 		{
 128 | 			Ints([]int{0, 2, 1, 5, 9}),
 129 | 			Neq,
 130 | 			[]int{0, 2, 0, 5, 10},
 131 | 			Bools([]bool{false, false, true, false, true}),
 132 | 		},
 133 | 		{
 134 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 135 | 			Neq,
 136 | 			"2",
 137 | 			Bools([]bool{true, false, true, true, true}),
 138 | 		},
 139 | 		{
 140 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 141 | 			Neq,
 142 | 			[]float64{0.1, 2, 0, 5, 10},
 143 | 			Bools([]bool{false, false, true, false, true}),
 144 | 		},
 145 | 		{
 146 | 			Bools([]bool{true, true, false}),
 147 | 			Neq,
 148 | 			"true",
 149 | 			Bools([]bool{false, false, true}),
 150 | 		},
 151 | 		{
 152 | 			Bools([]bool{true, true, false}),
 153 | 			Neq,
 154 | 			[]bool{true, false, false},
 155 | 			Bools([]bool{false, true, false}),
 156 | 		},
 157 | 		{
 158 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
 159 | 			Greater,
 160 | 			"B",
 161 | 			Bools([]bool{false, false, true, false, true, true}),
 162 | 		},
 163 | 		{
 164 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
 165 | 			Greater,
 166 | 			[]string{"B", "B", "C", "D", "A", "A"},
 167 | 			Bools([]bool{false, false, false, false, true, true}),
 168 | 		},
 169 | 		{
 170 | 			Ints([]int{0, 2, 1, 5, 9}),
 171 | 			Greater,
 172 | 			"2",
 173 | 			Bools([]bool{false, false, false, true, true}),
 174 | 		},
 175 | 		{
 176 | 			Ints([]int{0, 2, 1, 5, 9}),
 177 | 			Greater,
 178 | 			[]int{0, 2, 0, 5, 10},
 179 | 			Bools([]bool{false, false, true, false, false}),
 180 | 		},
 181 | 		{
 182 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 183 | 			Greater,
 184 | 			"2",
 185 | 			Bools([]bool{false, false, false, true, true}),
 186 | 		},
 187 | 		{
 188 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 189 | 			Greater,
 190 | 			[]float64{0.1, 2, 0, 5, 10},
 191 | 			Bools([]bool{false, false, true, false, false}),
 192 | 		},
 193 | 		{
 194 | 			Bools([]bool{true, true, false}),
 195 | 			Greater,
 196 | 			"true",
 197 | 			Bools([]bool{false, false, false}),
 198 | 		},
 199 | 		{
 200 | 			Bools([]bool{true, true, false}),
 201 | 			Greater,
 202 | 			[]bool{true, false, false},
 203 | 			Bools([]bool{false, true, false}),
 204 | 		},
 205 | 		{
 206 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
 207 | 			GreaterEq,
 208 | 			"B",
 209 | 			Bools([]bool{false, true, true, true, true, true}),
 210 | 		},
 211 | 		{
 212 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
 213 | 			GreaterEq,
 214 | 			[]string{"B", "B", "C", "D", "A", "A"},
 215 | 			Bools([]bool{false, true, true, false, true, true}),
 216 | 		},
 217 | 		{
 218 | 			Ints([]int{0, 2, 1, 5, 9}),
 219 | 			GreaterEq,
 220 | 			"2",
 221 | 			Bools([]bool{false, true, false, true, true}),
 222 | 		},
 223 | 		{
 224 | 			Ints([]int{0, 2, 1, 5, 9}),
 225 | 			GreaterEq,
 226 | 			[]int{0, 2, 0, 5, 10},
 227 | 			Bools([]bool{true, true, true, true, false}),
 228 | 		},
 229 | 		{
 230 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 231 | 			GreaterEq,
 232 | 			"2",
 233 | 			Bools([]bool{false, true, false, true, true}),
 234 | 		},
 235 | 		{
 236 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 237 | 			GreaterEq,
 238 | 			[]float64{0.1, 2, 0, 5, 10},
 239 | 			Bools([]bool{true, true, true, true, false}),
 240 | 		},
 241 | 		{
 242 | 			Bools([]bool{true, true, false}),
 243 | 			GreaterEq,
 244 | 			"true",
 245 | 			Bools([]bool{true, true, false}),
 246 | 		},
 247 | 		{
 248 | 			Bools([]bool{true, true, false}),
 249 | 			GreaterEq,
 250 | 			[]bool{true, false, false},
 251 | 			Bools([]bool{true, true, true}),
 252 | 		},
 253 | 		{
 254 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
 255 | 			Less,
 256 | 			"B",
 257 | 			Bools([]bool{true, false, false, false, false, false}),
 258 | 		},
 259 | 		{
 260 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
 261 | 			Less,
 262 | 			[]string{"B", "B", "C", "D", "A", "A"},
 263 | 			Bools([]bool{true, false, false, true, false, false}),
 264 | 		},
 265 | 		{
 266 | 			Ints([]int{0, 2, 1, 5, 9}),
 267 | 			Less,
 268 | 			"2",
 269 | 			Bools([]bool{true, false, true, false, false}),
 270 | 		},
 271 | 		{
 272 | 			Ints([]int{0, 2, 1, 5, 9}),
 273 | 			Less,
 274 | 			[]int{0, 2, 0, 5, 10},
 275 | 			Bools([]bool{false, false, false, false, true}),
 276 | 		},
 277 | 		{
 278 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 279 | 			Less,
 280 | 			"2",
 281 | 			Bools([]bool{true, false, true, false, false}),
 282 | 		},
 283 | 		{
 284 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 285 | 			Less,
 286 | 			[]float64{0.1, 2, 0, 5, 10},
 287 | 			Bools([]bool{false, false, false, false, true}),
 288 | 		},
 289 | 		{
 290 | 			Bools([]bool{true, true, false}),
 291 | 			Less,
 292 | 			"true",
 293 | 			Bools([]bool{false, false, true}),
 294 | 		},
 295 | 		{
 296 | 			Bools([]bool{true, true, false}),
 297 | 			Less,
 298 | 			[]bool{true, false, false},
 299 | 			Bools([]bool{false, false, false}),
 300 | 		},
 301 | 		{
 302 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
 303 | 			LessEq,
 304 | 			"B",
 305 | 			Bools([]bool{true, true, false, true, false, false}),
 306 | 		},
 307 | 		{
 308 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
 309 | 			LessEq,
 310 | 			[]string{"B", "B", "C", "D", "A", "A"},
 311 | 			Bools([]bool{true, true, true, true, false, false}),
 312 | 		},
 313 | 		{
 314 | 			Ints([]int{0, 2, 1, 5, 9}),
 315 | 			LessEq,
 316 | 			"2",
 317 | 			Bools([]bool{true, true, true, false, false}),
 318 | 		},
 319 | 		{
 320 | 			Ints([]int{0, 2, 1, 5, 9}),
 321 | 			LessEq,
 322 | 			[]int{0, 2, 0, 5, 10},
 323 | 			Bools([]bool{true, true, false, true, true}),
 324 | 		},
 325 | 		{
 326 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 327 | 			LessEq,
 328 | 			"2",
 329 | 			Bools([]bool{true, true, true, false, false}),
 330 | 		},
 331 | 		{
 332 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 333 | 			LessEq,
 334 | 			[]float64{0.1, 2, 0, 5, 10},
 335 | 			Bools([]bool{true, true, false, true, true}),
 336 | 		},
 337 | 		{
 338 | 			Bools([]bool{true, true, false}),
 339 | 			LessEq,
 340 | 			"true",
 341 | 			Bools([]bool{true, true, true}),
 342 | 		},
 343 | 		{
 344 | 			Bools([]bool{true, true, false}),
 345 | 			LessEq,
 346 | 			[]bool{true, false, false},
 347 | 			Bools([]bool{true, false, true}),
 348 | 		},
 349 | 		{
 350 | 			Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
 351 | 			In,
 352 | 			"B",
 353 | 			Bools([]bool{false, true, false, true, false, false}),
 354 | 		},
 355 | 		{
 356 | 			Strings([]string{"Hello", "world", "this", "is", "a", "test"}),
 357 | 			In,
 358 | 			[]string{"cat", "world", "hello", "a"},
 359 | 			Bools([]bool{false, true, false, false, true, false}),
 360 | 		},
 361 | 		{
 362 | 			Ints([]int{0, 2, 1, 5, 9}),
 363 | 			In,
 364 | 			"2",
 365 | 			Bools([]bool{false, true, false, false, false}),
 366 | 		},
 367 | 		{
 368 | 			Ints([]int{0, 2, 1, 5, 9}),
 369 | 			In,
 370 | 			[]int{2, 99, 1234, 9},
 371 | 			Bools([]bool{false, true, false, false, true}),
 372 | 		},
 373 | 		{
 374 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 375 | 			In,
 376 | 			"2",
 377 | 			Bools([]bool{false, true, false, false, false}),
 378 | 		},
 379 | 		{
 380 | 			Floats([]float64{0.1, 2, 1, 5, 9}),
 381 | 			In,
 382 | 			[]float64{2, 99, 1234, 9},
 383 | 			Bools([]bool{false, true, false, false, true}),
 384 | 		},
 385 | 		{
 386 | 			Bools([]bool{true, true, false}),
 387 | 			In,
 388 | 			"true",
 389 | 			Bools([]bool{true, true, false}),
 390 | 		},
 391 | 		{
 392 | 			Bools([]bool{true, true, false}),
 393 | 			In,
 394 | 			[]bool{false, false, false},
 395 | 			Bools([]bool{false, false, true}),
 396 | 		},
 397 | 	}
 398 | 	for testnum, test := range table {
 399 | 		a := test.series
 400 | 		b := a.Compare(test.comparator, test.comparando)
 401 | 		if err := b.Err; err != nil {
 402 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 403 | 		}
 404 | 		expected := test.expected.Records()
 405 | 		received := b.Records()
 406 | 		if !reflect.DeepEqual(expected, received) {
 407 | 			t.Errorf(
 408 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
 409 | 				testnum, expected, received,
 410 | 			)
 411 | 		}
 412 | 		if err := checkTypes(b); err != nil {
 413 | 			t.Errorf(
 414 | 				"Test:%v\nError:%v",
 415 | 				testnum, err,
 416 | 			)
 417 | 		}
 418 | 		//if err := checkAddr(a.Addr(), b.Addr()); err != nil {
 419 | 		//t.Errorf("Test:%v\nError:%v\nA:%v\nB:%v", testnum, err, a.Addr(), b.Addr())
 420 | 		//}
 421 | 	}
 422 | }
 423 | 
 424 | func TestSeries_Subset(t *testing.T) {
 425 | 	table := []struct {
 426 | 		series   Series
 427 | 		indexes  Indexes
 428 | 		expected string
 429 | 	}{
 430 | 		{
 431 | 			Strings([]string{"A", "B", "C", "K", "D"}),
 432 | 			[]int{2, 1, 4, 4, 0, 3},
 433 | 			"[C B D D A K]",
 434 | 		},
 435 | 		{
 436 | 			Strings([]string{"A", "B", "C", "K", "D"}),
 437 | 			int(1),
 438 | 			"[B]",
 439 | 		},
 440 | 		{
 441 | 			Strings([]string{"A", "B", "C", "K", "D"}),
 442 | 			[]bool{true, false, false, true, true},
 443 | 			"[A K D]",
 444 | 		},
 445 | 		{
 446 | 			Strings([]string{"A", "B", "C", "K", "D"}),
 447 | 			Ints([]int{3, 2, 1, 0}),
 448 | 			"[K C B A]",
 449 | 		},
 450 | 		{
 451 | 			Strings([]string{"A", "B", "C", "K", "D"}),
 452 | 			Ints([]int{1}),
 453 | 			"[B]",
 454 | 		},
 455 | 		{
 456 | 			Strings([]string{"A", "B", "C", "K", "D"}),
 457 | 			Ints(2),
 458 | 			"[C]",
 459 | 		},
 460 | 		{
 461 | 			Strings([]string{"A", "B", "C", "K", "D"}),
 462 | 			Bools([]bool{true, false, false, true, true}),
 463 | 			"[A K D]",
 464 | 		},
 465 | 	}
 466 | 	for testnum, test := range table {
 467 | 		a := test.series
 468 | 		b := a.Subset(test.indexes)
 469 | 		if err := b.Err; err != nil {
 470 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 471 | 		}
 472 | 		expected := test.expected
 473 | 		received := fmt.Sprint(b)
 474 | 		if expected != received {
 475 | 			t.Errorf(
 476 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
 477 | 				testnum, expected, received,
 478 | 			)
 479 | 		}
 480 | 		if err := checkTypes(b); err != nil {
 481 | 			t.Errorf(
 482 | 				"Test:%v\nError:%v",
 483 | 				testnum, err,
 484 | 			)
 485 | 		}
 486 | 		//if err := checkAddr(a.Addr(), b.Addr()); err != nil {
 487 | 		//t.Errorf("Test:%v\nError:%v\nA:%v\nB:%v", testnum, err, a.Addr(), b.Addr())
 488 | 		//}
 489 | 	}
 490 | }
 491 | 
 492 | func TestSeries_Set(t *testing.T) {
 493 | 	table := []struct {
 494 | 		series   Series
 495 | 		indexes  Indexes
 496 | 		values   Series
 497 | 		expected string
 498 | 	}{
 499 | 		{
 500 | 			Strings([]string{"A", "B", "C", "K", "D"}),
 501 | 			[]int{1, 2, 4},
 502 | 			Ints([]string{"1", "2", "3"}),
 503 | 			"[A 1 2 K 3]",
 504 | 		},
 505 | 		{
 506 | 			Strings([]string{"A", "B", "C", "K", "D"}),
 507 | 			[]bool{false, true, true, false, true},
 508 | 			Ints([]string{"1", "2", "3"}),
 509 | 			"[A 1 2 K 3]",
 510 | 		},
 511 | 		{
 512 | 			Strings([]string{"A", "B", "C", "K", "D"}),
 513 | 			Ints([]int{1, 2, 4}),
 514 | 			Ints([]string{"1", "2", "3"}),
 515 | 			"[A 1 2 K 3]",
 516 | 		},
 517 | 		{
 518 | 			Strings([]string{"A", "B", "C", "K", "D"}),
 519 | 			Bools([]bool{false, true, true, false, true}),
 520 | 			Ints([]string{"1", "2", "3"}),
 521 | 			"[A 1 2 K 3]",
 522 | 		},
 523 | 	}
 524 | 	for testnum, test := range table {
 525 | 		b := test.series.Set(test.indexes, test.values)
 526 | 		if err := b.Err; err != nil {
 527 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 528 | 		}
 529 | 		expected := test.expected
 530 | 		received := fmt.Sprint(b)
 531 | 		if expected != received {
 532 | 			t.Errorf(
 533 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
 534 | 				testnum, expected, received,
 535 | 			)
 536 | 		}
 537 | 		if err := checkTypes(b); err != nil {
 538 | 			t.Errorf(
 539 | 				"Test:%v\nError:%v",
 540 | 				testnum, err,
 541 | 			)
 542 | 		}
 543 | 		//if err := checkAddr(test.values.Addr(), b.Addr()); err != nil {
 544 | 		//t.Errorf("Test:%v\nError:%v\nNV:%v\nB:%v", testnum, err, test.values.Addr(), b.Addr())
 545 | 		//}
 546 | 	}
 547 | }
 548 | 
 549 | func TestStrings(t *testing.T) {
 550 | 	table := []struct {
 551 | 		series   Series
 552 | 		expected string
 553 | 	}{
 554 | 		{
 555 | 			Strings([]string{"A", "B", "C", "D"}),
 556 | 			"[A B C D]",
 557 | 		},
 558 | 		{
 559 | 			Strings([]string{"A"}),
 560 | 			"[A]",
 561 | 		},
 562 | 		{
 563 | 			Strings("A"),
 564 | 			"[A]",
 565 | 		},
 566 | 		{
 567 | 			Strings([]int{1, 2, 3}),
 568 | 			"[1 2 3]",
 569 | 		},
 570 | 		{
 571 | 			Strings([]int{2}),
 572 | 			"[2]",
 573 | 		},
 574 | 		{
 575 | 			Strings(-1),
 576 | 			"[-1]",
 577 | 		},
 578 | 		{
 579 | 			Strings([]float64{1, 2, 3}),
 580 | 			"[1.000000 2.000000 3.000000]",
 581 | 		},
 582 | 		{
 583 | 			Strings([]float64{2}),
 584 | 			"[2.000000]",
 585 | 		},
 586 | 		{
 587 | 			Strings(-1.0),
 588 | 			"[-1.000000]",
 589 | 		},
 590 | 		{
 591 | 			Strings(math.NaN()),
 592 | 			"[NaN]",
 593 | 		},
 594 | 		{
 595 | 			Strings(math.Inf(1)),
 596 | 			"[+Inf]",
 597 | 		},
 598 | 		{
 599 | 			Strings(math.Inf(-1)),
 600 | 			"[-Inf]",
 601 | 		},
 602 | 		{
 603 | 			Strings([]bool{true, true, false}),
 604 | 			"[true true false]",
 605 | 		},
 606 | 		{
 607 | 			Strings([]bool{false}),
 608 | 			"[false]",
 609 | 		},
 610 | 		{
 611 | 			Strings(true),
 612 | 			"[true]",
 613 | 		},
 614 | 		{
 615 | 			Strings([]int{}),
 616 | 			"[]",
 617 | 		},
 618 | 		{
 619 | 			Strings(nil),
 620 | 			"[NaN]",
 621 | 		},
 622 | 		{
 623 | 			Strings(Strings([]string{"A", "B", "C"})),
 624 | 			"[A B C]",
 625 | 		},
 626 | 	}
 627 | 	for testnum, test := range table {
 628 | 		if err := test.series.Err; err != nil {
 629 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 630 | 		}
 631 | 		expected := test.expected
 632 | 		received := fmt.Sprint(test.series)
 633 | 		if expected != received {
 634 | 			t.Errorf(
 635 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
 636 | 				testnum, expected, received,
 637 | 			)
 638 | 		}
 639 | 		if err := checkTypes(test.series); err != nil {
 640 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 641 | 		}
 642 | 	}
 643 | }
 644 | 
 645 | func TestInts(t *testing.T) {
 646 | 	table := []struct {
 647 | 		series   Series
 648 | 		expected string
 649 | 	}{
 650 | 		{
 651 | 			Ints([]string{"A", "B", "1", "2"}),
 652 | 			"[NaN NaN 1 2]",
 653 | 		},
 654 | 		{
 655 | 			Ints([]string{"1"}),
 656 | 			"[1]",
 657 | 		},
 658 | 		{
 659 | 			Ints("2"),
 660 | 			"[2]",
 661 | 		},
 662 | 		{
 663 | 			Ints([]int{1, 2, 3}),
 664 | 			"[1 2 3]",
 665 | 		},
 666 | 		{
 667 | 			Ints([]int{2}),
 668 | 			"[2]",
 669 | 		},
 670 | 		{
 671 | 			Ints(-1),
 672 | 			"[-1]",
 673 | 		},
 674 | 		{
 675 | 			Ints([]float64{1, 2, 3}),
 676 | 			"[1 2 3]",
 677 | 		},
 678 | 		{
 679 | 			Ints([]float64{2}),
 680 | 			"[2]",
 681 | 		},
 682 | 		{
 683 | 			Ints(-1.0),
 684 | 			"[-1]",
 685 | 		},
 686 | 		{
 687 | 			Ints(math.NaN()),
 688 | 			"[NaN]",
 689 | 		},
 690 | 		{
 691 | 			Ints(math.Inf(1)),
 692 | 			"[NaN]",
 693 | 		},
 694 | 		{
 695 | 			Ints(math.Inf(-1)),
 696 | 			"[NaN]",
 697 | 		},
 698 | 		{
 699 | 			Ints([]bool{true, true, false}),
 700 | 			"[1 1 0]",
 701 | 		},
 702 | 		{
 703 | 			Ints([]bool{false}),
 704 | 			"[0]",
 705 | 		},
 706 | 		{
 707 | 			Ints(true),
 708 | 			"[1]",
 709 | 		},
 710 | 		{
 711 | 			Ints([]int{}),
 712 | 			"[]",
 713 | 		},
 714 | 		{
 715 | 			Ints(nil),
 716 | 			"[NaN]",
 717 | 		},
 718 | 		{
 719 | 			Ints(Strings([]string{"1", "2", "3"})),
 720 | 			"[1 2 3]",
 721 | 		},
 722 | 		{
 723 | 			Ints(Ints([]string{"1", "2", "3"})),
 724 | 			"[1 2 3]",
 725 | 		},
 726 | 	}
 727 | 	for testnum, test := range table {
 728 | 		if err := test.series.Err; err != nil {
 729 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 730 | 		}
 731 | 		expected := test.expected
 732 | 		received := fmt.Sprint(test.series)
 733 | 		if expected != received {
 734 | 			t.Errorf(
 735 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
 736 | 				testnum, expected, received,
 737 | 			)
 738 | 		}
 739 | 		if err := checkTypes(test.series); err != nil {
 740 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 741 | 		}
 742 | 	}
 743 | }
 744 | 
 745 | func TestFloats(t *testing.T) {
 746 | 	table := []struct {
 747 | 		series   Series
 748 | 		expected string
 749 | 	}{
 750 | 		{
 751 | 			Floats([]string{"A", "B", "1", "2"}),
 752 | 			"[NaN NaN 1.000000 2.000000]",
 753 | 		},
 754 | 		{
 755 | 			Floats([]string{"1"}),
 756 | 			"[1.000000]",
 757 | 		},
 758 | 		{
 759 | 			Floats("2.1"),
 760 | 			"[2.100000]",
 761 | 		},
 762 | 		{
 763 | 			Floats([]int{1, 2, 3}),
 764 | 			"[1.000000 2.000000 3.000000]",
 765 | 		},
 766 | 		{
 767 | 			Floats([]int{2}),
 768 | 			"[2.000000]",
 769 | 		},
 770 | 		{
 771 | 			Floats(-1),
 772 | 			"[-1.000000]",
 773 | 		},
 774 | 		{
 775 | 			Floats([]float64{1.1, 2, 3}),
 776 | 			"[1.100000 2.000000 3.000000]",
 777 | 		},
 778 | 		{
 779 | 			Floats([]float64{2}),
 780 | 			"[2.000000]",
 781 | 		},
 782 | 		{
 783 | 			Floats(-1.0),
 784 | 			"[-1.000000]",
 785 | 		},
 786 | 		{
 787 | 			Floats(math.NaN()),
 788 | 			"[NaN]",
 789 | 		},
 790 | 		{
 791 | 			Floats(math.Inf(1)),
 792 | 			"[+Inf]",
 793 | 		},
 794 | 		{
 795 | 			Floats(math.Inf(-1)),
 796 | 			"[-Inf]",
 797 | 		},
 798 | 		{
 799 | 			Floats([]bool{true, true, false}),
 800 | 			"[1.000000 1.000000 0.000000]",
 801 | 		},
 802 | 		{
 803 | 			Floats([]bool{false}),
 804 | 			"[0.000000]",
 805 | 		},
 806 | 		{
 807 | 			Floats(true),
 808 | 			"[1.000000]",
 809 | 		},
 810 | 		{
 811 | 			Floats([]int{}),
 812 | 			"[]",
 813 | 		},
 814 | 		{
 815 | 			Floats(nil),
 816 | 			"[NaN]",
 817 | 		},
 818 | 		{
 819 | 			Floats(Strings([]string{"1", "2", "3"})),
 820 | 			"[1.000000 2.000000 3.000000]",
 821 | 		},
 822 | 	}
 823 | 	for testnum, test := range table {
 824 | 		if err := test.series.Err; err != nil {
 825 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 826 | 		}
 827 | 		expected := test.expected
 828 | 		received := fmt.Sprint(test.series)
 829 | 		if expected != received {
 830 | 			t.Errorf(
 831 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
 832 | 				testnum, expected, received,
 833 | 			)
 834 | 		}
 835 | 		if err := checkTypes(test.series); err != nil {
 836 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 837 | 		}
 838 | 	}
 839 | }
 840 | 
 841 | func TestBools(t *testing.T) {
 842 | 	table := []struct {
 843 | 		series   Series
 844 | 		expected string
 845 | 	}{
 846 | 		{
 847 | 			Bools([]string{"A", "true", "1", "f"}),
 848 | 			"[NaN true true false]",
 849 | 		},
 850 | 		{
 851 | 			Bools([]string{"t"}),
 852 | 			"[true]",
 853 | 		},
 854 | 		{
 855 | 			Bools("False"),
 856 | 			"[false]",
 857 | 		},
 858 | 		{
 859 | 			Bools([]int{1, 2, 0}),
 860 | 			"[true NaN false]",
 861 | 		},
 862 | 		{
 863 | 			Bools([]int{1}),
 864 | 			"[true]",
 865 | 		},
 866 | 		{
 867 | 			Bools(-1),
 868 | 			"[NaN]",
 869 | 		},
 870 | 		{
 871 | 			Bools([]float64{1, 2, 0}),
 872 | 			"[true NaN false]",
 873 | 		},
 874 | 		{
 875 | 			Bools([]float64{0}),
 876 | 			"[false]",
 877 | 		},
 878 | 		{
 879 | 			Bools(-1.0),
 880 | 			"[NaN]",
 881 | 		},
 882 | 		{
 883 | 			Bools(math.NaN()),
 884 | 			"[NaN]",
 885 | 		},
 886 | 		{
 887 | 			Bools(math.Inf(1)),
 888 | 			"[NaN]",
 889 | 		},
 890 | 		{
 891 | 			Bools(math.Inf(-1)),
 892 | 			"[NaN]",
 893 | 		},
 894 | 		{
 895 | 			Bools([]bool{true, true, false}),
 896 | 			"[true true false]",
 897 | 		},
 898 | 		{
 899 | 			Bools([]bool{false}),
 900 | 			"[false]",
 901 | 		},
 902 | 		{
 903 | 			Bools(true),
 904 | 			"[true]",
 905 | 		},
 906 | 		{
 907 | 			Bools([]int{}),
 908 | 			"[]",
 909 | 		},
 910 | 		{
 911 | 			Bools(nil),
 912 | 			"[NaN]",
 913 | 		},
 914 | 		{
 915 | 			Bools(Strings([]string{"1", "0", "1"})),
 916 | 			"[true false true]",
 917 | 		},
 918 | 	}
 919 | 	for testnum, test := range table {
 920 | 		if err := test.series.Err; err != nil {
 921 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 922 | 		}
 923 | 		expected := test.expected
 924 | 		received := fmt.Sprint(test.series)
 925 | 		if expected != received {
 926 | 			t.Errorf(
 927 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
 928 | 				testnum, expected, received,
 929 | 			)
 930 | 		}
 931 | 		if err := checkTypes(test.series); err != nil {
 932 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 933 | 		}
 934 | 	}
 935 | }
 936 | 
 937 | func TestSeries_Copy(t *testing.T) {
 938 | 	tests := []Series{
 939 | 		Strings([]string{"1", "2", "3", "a", "b", "c"}),
 940 | 		Ints([]string{"1", "2", "3", "a", "b", "c"}),
 941 | 		Floats([]string{"1", "2", "3", "a", "b", "c"}),
 942 | 		Bools([]string{"1", "0", "1", "t", "f", "c"}),
 943 | 	}
 944 | 	for testnum, test := range tests {
 945 | 		a := test
 946 | 		b := a.Copy()
 947 | 		if fmt.Sprint(a) != fmt.Sprint(b) {
 948 | 			t.Error("Different values when copying String elements")
 949 | 		}
 950 | 		if err := b.Err; err != nil {
 951 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 952 | 		}
 953 | 		if err := checkTypes(b); err != nil {
 954 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
 955 | 		}
 956 | 		//if err := checkAddr(a.Addr(), b.Addr()); err != nil {
 957 | 		//t.Errorf("Test:%v\nError:%v\nA:%v\nB:%v", testnum, err, a.Addr(), b.Addr())
 958 | 		//}
 959 | 	}
 960 | }
 961 | 
 962 | func TestSeries_Records(t *testing.T) {
 963 | 	tests := []struct {
 964 | 		series   Series
 965 | 		expected []string
 966 | 	}{
 967 | 		{
 968 | 			Strings([]string{"1", "2", "3", "a", "b", "c"}),
 969 | 			[]string{"1", "2", "3", "a", "b", "c"},
 970 | 		},
 971 | 		{
 972 | 			Ints([]string{"1", "2", "3", "a", "b", "c"}),
 973 | 			[]string{"1", "2", "3", "NaN", "NaN", "NaN"},
 974 | 		},
 975 | 		{
 976 | 			Floats([]string{"1", "2", "3", "a", "b", "c"}),
 977 | 			[]string{"1.000000", "2.000000", "3.000000", "NaN", "NaN", "NaN"},
 978 | 		},
 979 | 		{
 980 | 			Bools([]string{"1", "0", "1", "t", "f", "c"}),
 981 | 			[]string{"true", "false", "true", "true", "false", "NaN"},
 982 | 		},
 983 | 	}
 984 | 	for testnum, test := range tests {
 985 | 		expected := test.expected
 986 | 		received := test.series.Records()
 987 | 		if !reflect.DeepEqual(expected, received) {
 988 | 			t.Errorf(
 989 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
 990 | 				testnum, expected, received,
 991 | 			)
 992 | 		}
 993 | 	}
 994 | }
 995 | 
 996 | func TestSeries_Float(t *testing.T) {
 997 | 	precision := 0.0000001
 998 | 	floatEquals := func(x, y []float64) bool {
 999 | 		if len(x) != len(y) {
1000 | 			return false
1001 | 		}
1002 | 		for i := 0; i < len(x); i++ {
1003 | 			a := x[i]
1004 | 			b := y[i]
1005 | 			if (a-b) > precision || (b-a) > precision {
1006 | 				return false
1007 | 			}
1008 | 		}
1009 | 		return true
1010 | 	}
1011 | 	tests := []struct {
1012 | 		series   Series
1013 | 		expected []float64
1014 | 	}{
1015 | 		{
1016 | 			Strings([]string{"1", "2", "3", "a", "b", "c"}),
1017 | 			[]float64{1, 2, 3, math.NaN(), math.NaN(), math.NaN()},
1018 | 		},
1019 | 		{
1020 | 			Ints([]string{"1", "2", "3", "a", "b", "c"}),
1021 | 			[]float64{1, 2, 3, math.NaN(), math.NaN(), math.NaN()},
1022 | 		},
1023 | 		{
1024 | 			Floats([]string{"1", "2", "3", "a", "b", "c"}),
1025 | 			[]float64{1, 2, 3, math.NaN(), math.NaN(), math.NaN()},
1026 | 		},
1027 | 		{
1028 | 			Bools([]string{"1", "0", "1", "t", "f", "c"}),
1029 | 			[]float64{1, 0, 1, 1, 0, math.NaN()},
1030 | 		},
1031 | 	}
1032 | 	for testnum, test := range tests {
1033 | 		expected := test.expected
1034 | 		received := test.series.Float()
1035 | 		if !floatEquals(expected, received) {
1036 | 			t.Errorf(
1037 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1038 | 				testnum, expected, received,
1039 | 			)
1040 | 		}
1041 | 	}
1042 | }
1043 | 
1044 | func TestSeries_Concat(t *testing.T) {
1045 | 	tests := []struct {
1046 | 		a        Series
1047 | 		b        Series
1048 | 		expected []string
1049 | 	}{
1050 | 		{
1051 | 			Strings([]string{"1", "2", "3"}),
1052 | 			Strings([]string{"a", "b", "c"}),
1053 | 			[]string{"1", "2", "3", "a", "b", "c"},
1054 | 		},
1055 | 		{
1056 | 			Ints([]string{"1", "2", "3"}),
1057 | 			Ints([]string{"a", "4", "c"}),
1058 | 			[]string{"1", "2", "3", "NaN", "4", "NaN"},
1059 | 		},
1060 | 		{
1061 | 			Floats([]string{"1", "2", "3"}),
1062 | 			Floats([]string{"a", "4", "c"}),
1063 | 			[]string{"1.000000", "2.000000", "3.000000", "NaN", "4.000000", "NaN"},
1064 | 		},
1065 | 		{
1066 | 			Bools([]string{"1", "1", "0"}),
1067 | 			Bools([]string{"0", "0", "0"}),
1068 | 			[]string{"true", "true", "false", "false", "false", "false"},
1069 | 		},
1070 | 	}
1071 | 	for testnum, test := range tests {
1072 | 		ab := test.a.Concat(test.b)
1073 | 		if err := ab.Err; err != nil {
1074 | 			t.Errorf("Test:%v\nError:%v", testnum, err)
1075 | 		}
1076 | 		received := ab.Records()
1077 | 		expected := test.expected
1078 | 		if !reflect.DeepEqual(expected, received) {
1079 | 			t.Errorf(
1080 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1081 | 				testnum, expected, received,
1082 | 			)
1083 | 		}
1084 | 		//a := test.a
1085 | 		//b := ab
1086 | 		//if err := checkAddr(a.Addr(), b.Addr()); err != nil {
1087 | 		//t.Errorf("Test:%v\nError:%v\nA:%v\nAB:%v", testnum, err, a.Addr(), b.Addr())
1088 | 		//}
1089 | 		//a = test.b
1090 | 		//b = ab
1091 | 		//if err := checkAddr(a.Addr(), b.Addr()); err != nil {
1092 | 		//t.Errorf("Test:%v\nError:%v\nB:%v\nAB:%v", testnum, err, a.Addr(), b.Addr())
1093 | 		//}
1094 | 	}
1095 | }
1096 | 
1097 | func TestSeries_Order(t *testing.T) {
1098 | 	tests := []struct {
1099 | 		series   Series
1100 | 		reverse  bool
1101 | 		expected []int
1102 | 	}{
1103 | 		{
1104 | 			Ints([]string{"2", "1", "3", "NaN", "4", "NaN"}),
1105 | 			false,
1106 | 			[]int{1, 0, 2, 4, 3, 5},
1107 | 		},
1108 | 		{
1109 | 			Floats([]string{"2", "1", "3", "NaN", "4", "NaN"}),
1110 | 			false,
1111 | 			[]int{1, 0, 2, 4, 3, 5},
1112 | 		},
1113 | 		{
1114 | 			Strings([]string{"c", "b", "a"}),
1115 | 			false,
1116 | 			[]int{2, 1, 0},
1117 | 		},
1118 | 		{
1119 | 			Bools([]bool{true, false, false, false, true}),
1120 | 			false,
1121 | 			[]int{1, 2, 3, 0, 4},
1122 | 		},
1123 | 		{
1124 | 			Ints([]string{"2", "1", "3", "NaN", "4", "NaN"}),
1125 | 			true,
1126 | 			[]int{4, 2, 0, 1, 3, 5},
1127 | 		},
1128 | 		{
1129 | 			Floats([]string{"2", "1", "3", "NaN", "4", "NaN"}),
1130 | 			true,
1131 | 			[]int{4, 2, 0, 1, 3, 5},
1132 | 		},
1133 | 		{
1134 | 			Strings([]string{"c", "b", "a"}),
1135 | 			true,
1136 | 			[]int{0, 1, 2},
1137 | 		},
1138 | 		{
1139 | 			Bools([]bool{true, false, false, false, true}),
1140 | 			true,
1141 | 			[]int{0, 4, 1, 2, 3},
1142 | 		},
1143 | 	}
1144 | 	for testnum, test := range tests {
1145 | 		received := test.series.Order(test.reverse)
1146 | 		expected := test.expected
1147 | 		if !reflect.DeepEqual(expected, received) {
1148 | 			t.Errorf(
1149 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1150 | 				testnum, expected, received,
1151 | 			)
1152 | 		}
1153 | 	}
1154 | }
1155 | 
1156 | func TestSeries_IsNaN(t *testing.T) {
1157 | 	tests := []struct {
1158 | 		series   Series
1159 | 		expected []bool
1160 | 	}{
1161 | 		{
1162 | 			Ints([]string{"2", "1", "3", "NaN", "4", "NaN"}),
1163 | 			[]bool{false, false, false, true, false, true},
1164 | 		},
1165 | 		{
1166 | 			Floats([]string{"A", "1", "B", "3"}),
1167 | 			[]bool{true, false, true, false},
1168 | 		},
1169 | 	}
1170 | 	for testnum, test := range tests {
1171 | 		received := test.series.IsNaN()
1172 | 		expected := test.expected
1173 | 		if !reflect.DeepEqual(expected, received) {
1174 | 			t.Errorf(
1175 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1176 | 				testnum, expected, received,
1177 | 			)
1178 | 		}
1179 | 	}
1180 | }
1181 | 
1182 | func TestSeries_StdDev(t *testing.T) {
1183 | 	tests := []struct {
1184 | 		series   Series
1185 | 		expected float64
1186 | 	}{
1187 | 		{
1188 | 			Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1189 | 			3.02765,
1190 | 		},
1191 | 		{
1192 | 			Floats([]float64{1.0, 2.0, 3.0}),
1193 | 			1.0,
1194 | 		},
1195 | 		{
1196 | 			Strings([]string{"A", "B", "C", "D"}),
1197 | 			math.NaN(),
1198 | 		},
1199 | 		{
1200 | 			Bools([]bool{true, true, false, true}),
1201 | 			0.5,
1202 | 		},
1203 | 		{
1204 | 			Floats([]float64{}),
1205 | 			math.NaN(),
1206 | 		},
1207 | 	}
1208 | 
1209 | 	for testnum, test := range tests {
1210 | 		received := test.series.StdDev()
1211 | 		expected := test.expected
1212 | 		if !compareFloats(received, expected, 6) {
1213 | 			t.Errorf(
1214 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1215 | 				testnum, expected, received,
1216 | 			)
1217 | 		}
1218 | 	}
1219 | }
1220 | 
1221 | func TestSeries_Mean(t *testing.T) {
1222 | 	tests := []struct {
1223 | 		series   Series
1224 | 		expected float64
1225 | 	}{
1226 | 		{
1227 | 			Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1228 | 			5.5,
1229 | 		},
1230 | 		{
1231 | 			Floats([]float64{1.0, 2.0, 3.0}),
1232 | 			2.0,
1233 | 		},
1234 | 		{
1235 | 			Strings([]string{"A", "B", "C", "D"}),
1236 | 			math.NaN(),
1237 | 		},
1238 | 		{
1239 | 			Bools([]bool{true, true, false, true}),
1240 | 			0.75,
1241 | 		},
1242 | 		{
1243 | 			Floats([]float64{}),
1244 | 			math.NaN(),
1245 | 		},
1246 | 	}
1247 | 
1248 | 	for testnum, test := range tests {
1249 | 		received := test.series.Mean()
1250 | 		expected := test.expected
1251 | 		if !compareFloats(received, expected, 6) {
1252 | 			t.Errorf(
1253 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1254 | 				testnum, expected, received,
1255 | 			)
1256 | 		}
1257 | 	}
1258 | }
1259 | 
1260 | func TestSeries_Max(t *testing.T) {
1261 | 	tests := []struct {
1262 | 		series   Series
1263 | 		expected float64
1264 | 	}{
1265 | 		{
1266 | 			Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1267 | 			10,
1268 | 		},
1269 | 		{
1270 | 			Floats([]float64{1.0, 2.0, 3.0}),
1271 | 			3.0,
1272 | 		},
1273 | 		{
1274 | 			Strings([]string{"A", "B", "C", "D"}),
1275 | 			math.NaN(),
1276 | 		},
1277 | 		{
1278 | 			Bools([]bool{true, true, false, true}),
1279 | 			1.0,
1280 | 		},
1281 | 		{
1282 | 			Floats([]float64{}),
1283 | 			math.NaN(),
1284 | 		},
1285 | 	}
1286 | 
1287 | 	for testnum, test := range tests {
1288 | 		received := test.series.Max()
1289 | 		expected := test.expected
1290 | 		if !compareFloats(received, expected, 6) {
1291 | 			t.Errorf(
1292 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1293 | 				testnum, expected, received,
1294 | 			)
1295 | 		}
1296 | 	}
1297 | }
1298 | 
1299 | func TestSeries_Median(t *testing.T) {
1300 | 	tests := []struct {
1301 | 		series   Series
1302 | 		expected float64
1303 | 	}{
1304 | 		{
1305 | 			// Extreme observations should not factor in.
1306 | 			Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100, 1000, 10000}),
1307 | 			7,
1308 | 		},
1309 | 		{
1310 | 			// Change in order should influence result.
1311 | 			Ints([]int{1, 2, 3, 10, 100, 1000, 10000, 4, 5, 6, 7, 8, 9}),
1312 | 			7,
1313 | 		},
1314 | 		{
1315 | 			Floats([]float64{20.2755, 4.98964, -20.2006, 1.19854, 1.89977,
1316 | 				1.51178, -17.4687, 4.65567, -8.65952, 6.31649,
1317 | 			}),
1318 | 			1.705775,
1319 | 		},
1320 | 		{
1321 | 			// Change in order should not influence result.
1322 | 			Floats([]float64{4.98964, -20.2006, 1.89977, 1.19854,
1323 | 				1.51178, -17.4687, -8.65952, 20.2755, 4.65567, 6.31649,
1324 | 			}),
1325 | 			1.705775,
1326 | 		},
1327 | 		{
1328 | 			Strings([]string{"A", "B", "C", "D"}),
1329 | 			math.NaN(),
1330 | 		},
1331 | 		{
1332 | 			Bools([]bool{true, true, false, true}),
1333 | 			math.NaN(),
1334 | 		},
1335 | 		{
1336 | 			Floats([]float64{}),
1337 | 			math.NaN(),
1338 | 		},
1339 | 	}
1340 | 
1341 | 	for testnum, test := range tests {
1342 | 		received := test.series.Median()
1343 | 		expected := test.expected
1344 | 		if !compareFloats(received, expected, 6) {
1345 | 			t.Errorf(
1346 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1347 | 				testnum, expected, received,
1348 | 			)
1349 | 		}
1350 | 	}
1351 | }
1352 | 
1353 | func TestSeries_Min(t *testing.T) {
1354 | 	tests := []struct {
1355 | 		series   Series
1356 | 		expected float64
1357 | 	}{
1358 | 		{
1359 | 			Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1360 | 			1.0,
1361 | 		},
1362 | 		{
1363 | 			Floats([]float64{1.0, 2.0, 3.0}),
1364 | 			1.0,
1365 | 		},
1366 | 		{
1367 | 			Strings([]string{"A", "B", "C", "D"}),
1368 | 			math.NaN(),
1369 | 		},
1370 | 		{
1371 | 			Bools([]bool{true, true, false, true}),
1372 | 			0.0,
1373 | 		},
1374 | 		{
1375 | 			Floats([]float64{}),
1376 | 			math.NaN(),
1377 | 		},
1378 | 	}
1379 | 
1380 | 	for testnum, test := range tests {
1381 | 		received := test.series.Min()
1382 | 		expected := test.expected
1383 | 		if !compareFloats(received, expected, 6) {
1384 | 			t.Errorf(
1385 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1386 | 				testnum, expected, received,
1387 | 			)
1388 | 		}
1389 | 	}
1390 | }
1391 | 
1392 | func TestSeries_MaxStr(t *testing.T) {
1393 | 	tests := []struct {
1394 | 		series   Series
1395 | 		expected string
1396 | 	}{
1397 | 		{
1398 | 			Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1399 | 			"",
1400 | 		},
1401 | 		{
1402 | 			Floats([]float64{1.0, 2.0, 3.0}),
1403 | 			"",
1404 | 		},
1405 | 		{
1406 | 			Strings([]string{"A", "B", "C", "D"}),
1407 | 			"D",
1408 | 		},
1409 | 		{
1410 | 			Strings([]string{"quick", "Brown", "fox", "Lazy", "dog"}),
1411 | 			"quick",
1412 | 		},
1413 | 		{
1414 | 			Bools([]bool{true, true, false, true}),
1415 | 			"",
1416 | 		},
1417 | 		{
1418 | 			Floats([]float64{}),
1419 | 			"",
1420 | 		},
1421 | 	}
1422 | 
1423 | 	for testnum, test := range tests {
1424 | 		received := test.series.MaxStr()
1425 | 		expected := test.expected
1426 | 		if received != expected {
1427 | 			t.Errorf(
1428 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1429 | 				testnum, expected, received,
1430 | 			)
1431 | 		}
1432 | 	}
1433 | }
1434 | 
1435 | func TestSeries_MinStr(t *testing.T) {
1436 | 	tests := []struct {
1437 | 		series   Series
1438 | 		expected string
1439 | 	}{
1440 | 		{
1441 | 			Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1442 | 			"",
1443 | 		},
1444 | 		{
1445 | 			Floats([]float64{1.0, 2.0, 3.0}),
1446 | 			"",
1447 | 		},
1448 | 		{
1449 | 			Strings([]string{"A", "B", "C", "D"}),
1450 | 			"A",
1451 | 		},
1452 | 		{
1453 | 			Strings([]string{"quick", "Brown", "fox", "Lazy", "dog"}),
1454 | 			"Brown",
1455 | 		},
1456 | 		{
1457 | 			Bools([]bool{true, true, false, true}),
1458 | 			"",
1459 | 		},
1460 | 		{
1461 | 			Floats([]float64{}),
1462 | 			"",
1463 | 		},
1464 | 	}
1465 | 
1466 | 	for testnum, test := range tests {
1467 | 		received := test.series.MinStr()
1468 | 		expected := test.expected
1469 | 		if received != expected {
1470 | 			t.Errorf(
1471 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1472 | 				testnum, expected, received,
1473 | 			)
1474 | 		}
1475 | 	}
1476 | }
1477 | 
1478 | func TestSeries_Quantile(t *testing.T) {
1479 | 	tests := []struct {
1480 | 		series   Series
1481 | 		p        float64
1482 | 		expected float64
1483 | 	}{
1484 | 		{
1485 | 			Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1486 | 			0.9,
1487 | 			9,
1488 | 		},
1489 | 		{
1490 | 			Floats([]float64{3.141592, math.Sqrt(3), 2.718281, math.Sqrt(2)}),
1491 | 			0.8,
1492 | 			3.141592,
1493 | 		},
1494 | 		{
1495 | 			Floats([]float64{1.0, 2.0, 3.0}),
1496 | 			0.5,
1497 | 			2.0,
1498 | 		},
1499 | 		{
1500 | 			Strings([]string{"A", "B", "C", "D"}),
1501 | 			0.25,
1502 | 			math.NaN(),
1503 | 		},
1504 | 		{
1505 | 			Bools([]bool{false, false, false, true}),
1506 | 			0.75,
1507 | 			0.0,
1508 | 		},
1509 | 		{
1510 | 			Floats([]float64{}),
1511 | 			0.50,
1512 | 			math.NaN(),
1513 | 		},
1514 | 	}
1515 | 
1516 | 	for testnum, test := range tests {
1517 | 		received := test.series.Quantile(test.p)
1518 | 		expected := test.expected
1519 | 		if !compareFloats(received, expected, 6) {
1520 | 			t.Errorf(
1521 | 				"Test:%v\nExpected:\n%v\nReceived:\n%v",
1522 | 				testnum, expected, received,
1523 | 			)
1524 | 		}
1525 | 	}
1526 | }
1527 | 
1528 | 
1529 | func TestSeries_Map(t *testing.T) {
1530 | 		tests := []struct {
1531 | 		series   Series
1532 | 		expected Series
1533 | 	}{
1534 | 		{
1535 | 			Bools([]bool{false, true, false, false, true}),
1536 | 			Bools([]bool{false, true, false, false, true}),
1537 | 		},
1538 | 		{
1539 | 			Floats([]float64{1.5, -3.23, -0.337397, -0.380079, 1.60979, 34.}),
1540 | 			Floats([]float64{3, -6.46, -0.674794, -0.760158, 3.21958, 68.}),
1541 | 		},
1542 | 		{
1543 | 			Floats([]float64{math.Pi, math.Phi, math.SqrtE, math.Cbrt(64)}),
1544 | 			Floats([]float64{2 * math.Pi, 2 * math.Phi, 2 * math.SqrtE, 2 * math.Cbrt(64)}),
1545 | 		},
1546 | 		{
1547 | 			Strings([]string{"XyZApple", "XyZBanana", "XyZCitrus", "XyZDragonfruit"}),
1548 | 			Strings([]string{"Apple", "Banana", "Citrus", "Dragonfruit"}),
1549 | 		},
1550 | 		{
1551 | 			Strings([]string{"San Francisco", "XyZTokyo", "MoscowXyZ", "XyzSydney"}),
1552 | 			Strings([]string{"San Francisco", "Tokyo", "MoscowXyZ", "XyzSydney"}),
1553 | 		},
1554 | 		{
1555 | 			Ints([]int{23, 13, 101, -64, -3}),
1556 | 			Ints([]int{28, 18, 106, -59, 2}),
1557 | 		},
1558 | 		{
1559 | 			Ints([]string{"morning", "noon", "afternoon", "evening", "night"}),
1560 | 			Ints([]int{5, 5, 5, 5, 5}),
1561 | 		},
1562 | 	}
1563 | 
1564 | 	doubleFloat64 := func(e Element) Element {
1565 | 		var result Element
1566 | 		result = e.Copy()
1567 | 		result.Set(result.Float() * 2)		
1568 | 		return Element(result)
1569 | 	}
1570 | 
1571 | 	// and two booleans 
1572 | 	and := func(e Element) Element {
1573 | 		var result Element
1574 | 		result = e.Copy()
1575 | 		b, err := result.Bool()
1576 | 		if err != nil {
1577 | 			t.Errorf("%v", err)
1578 | 			return Element(nil)
1579 | 		}
1580 | 		result.Set(b && true)
1581 | 		return Element(result)
1582 | 	}
1583 | 
1584 | 	// add constant (+5) to value (v)
1585 | 	add5Int := func(e Element) Element {
1586 | 		var result Element
1587 | 		result = e.Copy()
1588 | 		i, err := result.Int()
1589 | 		if err != nil {
1590 | 			return Element(&intElement{
1591 | 				e: +5,
1592 | 				nan: false,
1593 | 			})
1594 | 		}
1595 | 		result.Set(i + 5)		
1596 | 		return Element(result)
1597 | 	}
1598 | 
1599 | 	// trim (XyZ) prefix from string
1600 | 	trimXyZPrefix := func(e Element) Element {
1601 | 		var result Element
1602 | 		result = e.Copy()
1603 | 		result.Set(strings.TrimPrefix(result.String(), "XyZ"))
1604 | 		return Element(result)
1605 | 	}
1606 | 
1607 | 		for testnum, test := range tests {
1608 | 		switch test.series.Type() {
1609 | 		case Bool:
1610 | 			expected := test.expected
1611 | 			received := test.series.Map(and)
1612 | 			for i := 0 ; i<expected.Len() ; i++ {
1613 | 				e, _ := expected.Elem(i).Bool()
1614 | 				r, _ := received.Elem(i).Bool()
1615 | 
1616 | 				if e != r {
1617 | 					t.Errorf(
1618 | 						"Test:%v\nExpected:\n%v\nReceived:\n%v",
1619 | 						testnum, expected, received,
1620 | 					)
1621 | 				}
1622 | 			}
1623 | 			
1624 | 		case Float:
1625 | 			expected := test.expected
1626 | 			received := test.series.Map(doubleFloat64)
1627 | 			for i := 0 ; i<expected.Len() ; i++ {
1628 | 				if !compareFloats(expected.Elem(i).Float(),
1629 | 				received.Elem(i).Float(), 6) {
1630 | 					t.Errorf(
1631 | 						"Test:%v\nExpected:\n%v\nReceived:\n%v",
1632 | 						testnum, expected, received,
1633 | 					)
1634 | 				}
1635 | 			}
1636 | 		case Int:
1637 | 			expected := test.expected
1638 | 			received := test.series.Map(add5Int)
1639 | 			for i := 0 ; i<expected.Len() ; i++ {
1640 | 				e, _ := expected.Elem(i).Int()
1641 | 				r, _ := received.Elem(i).Int()
1642 | 				if e != r {
1643 | 					t.Errorf(
1644 | 						"Test:%v\nExpected:\n%v\nReceived:\n%v",
1645 | 						testnum, expected, received,
1646 | 					)
1647 | 				}
1648 | 			}
1649 | 		case String:
1650 | 			expected := test.expected
1651 | 			received := test.series.Map(trimXyZPrefix)
1652 | 			for i :=0 ; i<expected.Len() ; i++ {
1653 | 				if strings.Compare(expected.Elem(i).String(),
1654 | 				received.Elem(i).String()) != 0 {
1655 | 					t.Errorf(
1656 | 						"Test:%v\nExpected:\n%v\nReceived:\n%v",
1657 | 						testnum, expected, received,
1658 | 					)
1659 | 				}
1660 | 			}
1661 | 		default:
1662 | 		}
1663 | 	}
1664 | }


--------------------------------------------------------------------------------
/dataframe/dataframe.go:
--------------------------------------------------------------------------------
   1 | // Package dataframe provides an implementation of data frames and methods to
   2 | // subset, join, mutate, set, arrange, summarize, etc.
   3 | package dataframe
   4 | 
   5 | import (
   6 | 	"encoding/csv"
   7 | 	"encoding/json"
   8 | 	"fmt"
   9 | 	"io"
  10 | 	"reflect"
  11 | 	"sort"
  12 | 	"strconv"
  13 | 	"strings"
  14 | 	"unicode/utf8"
  15 | 
  16 | 	"github.com/libonomy/libonomy-gota/series"
  17 | )
  18 | 
  19 | // DataFrame is a data structure designed for operating on table like data (Such
  20 | // as Excel, CSV files, SQL table results...) where every column have to keep type
  21 | // integrity. As a general rule of thumb, variables are stored on columns where
  22 | // every row of a DataFrame represents an observation for each variable.
  23 | //
  24 | // On the real world, data is very messy and sometimes there are non measurements
  25 | // or missing data. For this reason, DataFrame has support for NaN elements and
  26 | // allows the most common data cleaning and mungling operations such as
  27 | // subsetting, filtering, type transformations, etc. In addition to this, this
  28 | // library provides the necessary functions to concatenate DataFrames (By rows or
  29 | // columns), different Join operations (Inner, Outer, Left, Right, Cross) and the
  30 | // ability to read and write from different formats (CSV/JSON).
  31 | type DataFrame struct {
  32 | 	columns []series.Series
  33 | 	ncols   int
  34 | 	nrows   int
  35 | 	Err     error
  36 | }
  37 | 
  38 | // New is the generic DataFrame constructor
  39 | func New(se ...series.Series) DataFrame {
  40 | 	if se == nil || len(se) == 0 {
  41 | 		return DataFrame{Err: fmt.Errorf("empty DataFrame")}
  42 | 	}
  43 | 
  44 | 	columns := make([]series.Series, len(se))
  45 | 	for i, s := range se {
  46 | 		columns[i] = s.Copy()
  47 | 	}
  48 | 	nrows, ncols, err := checkColumnsDimensions(columns...)
  49 | 	if err != nil {
  50 | 		return DataFrame{Err: err}
  51 | 	}
  52 | 
  53 | 	// Fill DataFrame base structure
  54 | 	df := DataFrame{
  55 | 		columns: columns,
  56 | 		ncols:   ncols,
  57 | 		nrows:   nrows,
  58 | 	}
  59 | 	colnames := df.Names()
  60 | 	fixColnames(colnames)
  61 | 	for i, colname := range colnames {
  62 | 		df.columns[i].Name = colname
  63 | 	}
  64 | 	return df
  65 | }
  66 | 
  67 | func checkColumnsDimensions(se ...series.Series) (nrows, ncols int, err error) {
  68 | 	ncols = len(se)
  69 | 	nrows = -1
  70 | 	if se == nil || ncols == 0 {
  71 | 		err = fmt.Errorf("no Series given")
  72 | 		return
  73 | 	}
  74 | 	for i, s := range se {
  75 | 		if s.Err != nil {
  76 | 			err = fmt.Errorf("error on series %d: %v", i, s.Err)
  77 | 			return
  78 | 		}
  79 | 		if nrows == -1 {
  80 | 			nrows = s.Len()
  81 | 		}
  82 | 		if nrows != s.Len() {
  83 | 			err = fmt.Errorf("arguments have different dimensions")
  84 | 			return
  85 | 		}
  86 | 	}
  87 | 	return
  88 | }
  89 | 
  90 | // Copy returns a copy of the DataFrame
  91 | func (df DataFrame) Copy() DataFrame {
  92 | 	copy := New(df.columns...)
  93 | 	if df.Err != nil {
  94 | 		copy.Err = df.Err
  95 | 	}
  96 | 	return copy
  97 | }
  98 | 
  99 | // String implements the Stringer interface for DataFrame
 100 | func (df DataFrame) String() (str string) {
 101 | 	return df.print(true, true, true, true, 10, 70, "DataFrame")
 102 | }
 103 | 
 104 | func (df DataFrame) print(
 105 | 	shortRows, shortCols, showDims, showTypes bool,
 106 | 	maxRows int,
 107 | 	maxCharsTotal int,
 108 | 	class string) (str string) {
 109 | 
 110 | 	addRightPadding := func(s string, nchar int) string {
 111 | 		if utf8.RuneCountInString(s) < nchar {
 112 | 			return s + strings.Repeat(" ", nchar-utf8.RuneCountInString(s))
 113 | 		}
 114 | 		return s
 115 | 	}
 116 | 
 117 | 	addLeftPadding := func(s string, nchar int) string {
 118 | 		if utf8.RuneCountInString(s) < nchar {
 119 | 			return strings.Repeat(" ", nchar-utf8.RuneCountInString(s)) + s
 120 | 		}
 121 | 		return s
 122 | 	}
 123 | 
 124 | 	if df.Err != nil {
 125 | 		str = fmt.Sprintf("%s error: %v", class, df.Err)
 126 | 		return
 127 | 	}
 128 | 	nrows, ncols := df.Dims()
 129 | 	if nrows == 0 || ncols == 0 {
 130 | 		str = fmt.Sprintf("Empty %s", class)
 131 | 		return
 132 | 	}
 133 | 	idx := make([]int, maxRows)
 134 | 	for i := 0; i < len(idx); i++ {
 135 | 		idx[i] = i
 136 | 	}
 137 | 	var records [][]string
 138 | 	shortening := false
 139 | 	if shortRows && nrows > maxRows {
 140 | 		shortening = true
 141 | 		df = df.Subset(idx)
 142 | 		records = df.Records()
 143 | 	} else {
 144 | 		records = df.Records()
 145 | 	}
 146 | 
 147 | 	if showDims {
 148 | 		str += fmt.Sprintf("[%dx%d] %s\n\n", nrows, ncols, class)
 149 | 	}
 150 | 
 151 | 	// Add the row numbers
 152 | 	for i := 0; i < df.nrows+1; i++ {
 153 | 		add := ""
 154 | 		if i != 0 {
 155 | 			add = strconv.Itoa(i-1) + ":"
 156 | 		}
 157 | 		records[i] = append([]string{add}, records[i]...)
 158 | 	}
 159 | 	if shortening {
 160 | 		dots := make([]string, ncols+1)
 161 | 		for i := 1; i < ncols+1; i++ {
 162 | 			dots[i] = "..."
 163 | 		}
 164 | 		records = append(records, dots)
 165 | 	}
 166 | 	types := df.Types()
 167 | 	typesrow := make([]string, ncols)
 168 | 	for i := 0; i < ncols; i++ {
 169 | 		typesrow[i] = fmt.Sprintf("<%v>", types[i])
 170 | 	}
 171 | 	typesrow = append([]string{""}, typesrow...)
 172 | 
 173 | 	if showTypes {
 174 | 		records = append(records, typesrow)
 175 | 	}
 176 | 
 177 | 	maxChars := make([]int, df.ncols+1)
 178 | 	for i := 0; i < len(records); i++ {
 179 | 		for j := 0; j < df.ncols+1; j++ {
 180 | 			// Escape special characters
 181 | 			records[i][j] = strconv.Quote(records[i][j])
 182 | 			records[i][j] = records[i][j][1 : len(records[i][j])-1]
 183 | 
 184 | 			// Detect maximum number of characters per column
 185 | 			if len(records[i][j]) > maxChars[j] {
 186 | 				maxChars[j] = utf8.RuneCountInString(records[i][j])
 187 | 			}
 188 | 		}
 189 | 	}
 190 | 	maxCols := len(records[0])
 191 | 	var notShowing []string
 192 | 	if shortCols {
 193 | 		maxCharsCum := 0
 194 | 		for colnum, m := range maxChars {
 195 | 			maxCharsCum += m
 196 | 			if maxCharsCum > maxCharsTotal {
 197 | 				maxCols = colnum
 198 | 				break
 199 | 			}
 200 | 		}
 201 | 		notShowingNames := records[0][maxCols:]
 202 | 		notShowingTypes := typesrow[maxCols:]
 203 | 		notShowing = make([]string, len(notShowingNames))
 204 | 		for i := 0; i < len(notShowingNames); i++ {
 205 | 			notShowing[i] = fmt.Sprintf("%s %s", notShowingNames[i], notShowingTypes[i])
 206 | 		}
 207 | 	}
 208 | 	for i := 0; i < len(records); i++ {
 209 | 		// Add right padding to all elements
 210 | 		records[i][0] = addLeftPadding(records[i][0], maxChars[0]+1)
 211 | 		for j := 1; j < df.ncols+1; j++ {
 212 | 			records[i][j] = addRightPadding(records[i][j], maxChars[j])
 213 | 		}
 214 | 		records[i] = records[i][0:maxCols]
 215 | 		if shortCols && len(notShowing) != 0 {
 216 | 			records[i] = append(records[i], "...")
 217 | 		}
 218 | 		// Create the final string
 219 | 		str += strings.Join(records[i], " ")
 220 | 		str += "\n"
 221 | 	}
 222 | 	if shortCols && len(notShowing) != 0 {
 223 | 		var notShown string
 224 | 		var notShownArr [][]string
 225 | 		cum := 0
 226 | 		i := 0
 227 | 		for n, ns := range notShowing {
 228 | 			cum += len(ns)
 229 | 			if cum > maxCharsTotal {
 230 | 				notShownArr = append(notShownArr, notShowing[i:n])
 231 | 				cum = 0
 232 | 				i = n
 233 | 			}
 234 | 		}
 235 | 		if i < len(notShowing) {
 236 | 			notShownArr = append(notShownArr, notShowing[i:len(notShowing)])
 237 | 		}
 238 | 		for k, ns := range notShownArr {
 239 | 			notShown += strings.Join(ns, ", ")
 240 | 			if k != len(notShownArr)-1 {
 241 | 				notShown += ","
 242 | 			}
 243 | 			notShown += "\n"
 244 | 		}
 245 | 		str += fmt.Sprintf("\nNot Showing: %s", notShown)
 246 | 	}
 247 | 	return str
 248 | }
 249 | 
 250 | // Subsetting, mutating and transforming DataFrame methods
 251 | // =======================================================
 252 | 
 253 | // Set will update the values of a DataFrame for all rows selected via indexes.
 254 | func (df DataFrame) Set(indexes series.Indexes, newvalues DataFrame) DataFrame {
 255 | 	if df.Err != nil {
 256 | 		return df
 257 | 	}
 258 | 	if newvalues.Err != nil {
 259 | 		return DataFrame{Err: fmt.Errorf("argument has errors: %v", newvalues.Err)}
 260 | 	}
 261 | 	if df.ncols != newvalues.ncols {
 262 | 		return DataFrame{Err: fmt.Errorf("different number of columns")}
 263 | 	}
 264 | 	columns := make([]series.Series, df.ncols)
 265 | 	for i, s := range df.columns {
 266 | 		columns[i] = s.Set(indexes, newvalues.columns[i])
 267 | 		if columns[i].Err != nil {
 268 | 			df = DataFrame{Err: fmt.Errorf("setting error on column %d: %v", i, columns[i].Err)}
 269 | 			return df
 270 | 		}
 271 | 	}
 272 | 	return df
 273 | }
 274 | 
 275 | // Subset returns a subset of the rows of the original DataFrame based on the
 276 | // Series subsetting indexes.
 277 | func (df DataFrame) Subset(indexes series.Indexes) DataFrame {
 278 | 	if df.Err != nil {
 279 | 		return df
 280 | 	}
 281 | 	columns := make([]series.Series, df.ncols)
 282 | 	for i, column := range df.columns {
 283 | 		s := column.Subset(indexes)
 284 | 		columns[i] = s
 285 | 	}
 286 | 	nrows, ncols, err := checkColumnsDimensions(columns...)
 287 | 	if err != nil {
 288 | 		return DataFrame{Err: err}
 289 | 	}
 290 | 	return DataFrame{
 291 | 		columns: columns,
 292 | 		ncols:   ncols,
 293 | 		nrows:   nrows,
 294 | 	}
 295 | }
 296 | 
 297 | // SelectIndexes are the supported indexes used for the DataFrame.Select method. Currently supported are:
 298 | //
 299 | //     int              // Matches the given index number
 300 | //     []int            // Matches all given index numbers
 301 | //     []bool           // Matches all columns marked as true
 302 | //     string           // Matches the column with the matching column name
 303 | //     []string         // Matches all columns with the matching column names
 304 | //     Series [Int]     // Same as []int
 305 | //     Series [Bool]    // Same as []bool
 306 | //     Series [String]  // Same as []string
 307 | type SelectIndexes interface{}
 308 | 
 309 | // Select the given DataFrame columns
 310 | func (df DataFrame) Select(indexes SelectIndexes) DataFrame {
 311 | 	if df.Err != nil {
 312 | 		return df
 313 | 	}
 314 | 	idx, err := parseSelectIndexes(df.ncols, indexes, df.Names())
 315 | 	if err != nil {
 316 | 		return DataFrame{Err: fmt.Errorf("can't select columns: %v", err)}
 317 | 	}
 318 | 	columns := make([]series.Series, len(idx))
 319 | 	for k, i := range idx {
 320 | 		if i < 0 || i >= df.ncols {
 321 | 			return DataFrame{Err: fmt.Errorf("can't select columns: index out of range")}
 322 | 		}
 323 | 		columns[k] = df.columns[i].Copy()
 324 | 	}
 325 | 	nrows, ncols, err := checkColumnsDimensions(columns...)
 326 | 	if err != nil {
 327 | 		return DataFrame{Err: err}
 328 | 	}
 329 | 	df = DataFrame{
 330 | 		columns: columns,
 331 | 		ncols:   ncols,
 332 | 		nrows:   nrows,
 333 | 	}
 334 | 	colnames := df.Names()
 335 | 	fixColnames(colnames)
 336 | 	for i, colname := range colnames {
 337 | 		df.columns[i].Name = colname
 338 | 	}
 339 | 	return df
 340 | }
 341 | 
 342 | // Drop the given DataFrame columns
 343 | func (df DataFrame) Drop(indexes SelectIndexes) DataFrame {
 344 | 	if df.Err != nil {
 345 | 		return df
 346 | 	}
 347 | 	idx, err := parseSelectIndexes(df.ncols, indexes, df.Names())
 348 | 	if err != nil {
 349 | 		return DataFrame{Err: fmt.Errorf("can't select columns: %v", err)}
 350 | 	}
 351 | 	var columns []series.Series
 352 | 	for k, col := range df.columns {
 353 | 		if !inIntSlice(k, idx) {
 354 | 			columns = append(columns, col.Copy())
 355 | 		}
 356 | 	}
 357 | 	nrows, ncols, err := checkColumnsDimensions(columns...)
 358 | 	if err != nil {
 359 | 		return DataFrame{Err: err}
 360 | 	}
 361 | 	df = DataFrame{
 362 | 		columns: columns,
 363 | 		ncols:   ncols,
 364 | 		nrows:   nrows,
 365 | 	}
 366 | 	colnames := df.Names()
 367 | 	fixColnames(colnames)
 368 | 	for i, colname := range colnames {
 369 | 		df.columns[i].Name = colname
 370 | 	}
 371 | 	return df
 372 | }
 373 | 
 374 | // Rename changes the name of one of the columns of a DataFrame
 375 | func (df DataFrame) Rename(newname, oldname string) DataFrame {
 376 | 	if df.Err != nil {
 377 | 		return df
 378 | 	}
 379 | 	// Check that colname exist on dataframe
 380 | 	colnames := df.Names()
 381 | 	idx := findInStringSlice(oldname, colnames)
 382 | 	if idx == -1 {
 383 | 		return DataFrame{Err: fmt.Errorf("rename: can't find column name")}
 384 | 	}
 385 | 
 386 | 	copy := df.Copy()
 387 | 	copy.columns[idx].Name = newname
 388 | 	return copy
 389 | }
 390 | 
 391 | // CBind combines the columns of this DataFrame and dfb DataFrame.
 392 | func (df DataFrame) CBind(dfb DataFrame) DataFrame {
 393 | 	if df.Err != nil {
 394 | 		return df
 395 | 	}
 396 | 	if dfb.Err != nil {
 397 | 		return dfb
 398 | 	}
 399 | 	cols := append(df.columns, dfb.columns...)
 400 | 	return New(cols...)
 401 | }
 402 | 
 403 | // RBind matches the column names of two DataFrames and returns combined
 404 | // rows from both of them.
 405 | func (df DataFrame) RBind(dfb DataFrame) DataFrame {
 406 | 	if df.Err != nil {
 407 | 		return df
 408 | 	}
 409 | 	if dfb.Err != nil {
 410 | 		return dfb
 411 | 	}
 412 | 	expandedSeries := make([]series.Series, df.ncols)
 413 | 	for k, v := range df.Names() {
 414 | 		idx := findInStringSlice(v, dfb.Names())
 415 | 		if idx == -1 {
 416 | 			return DataFrame{Err: fmt.Errorf("rbind: column names are not compatible")}
 417 | 		}
 418 | 
 419 | 		originalSeries := df.columns[k]
 420 | 		addedSeries := dfb.columns[idx]
 421 | 		newSeries := originalSeries.Concat(addedSeries)
 422 | 		if err := newSeries.Err; err != nil {
 423 | 			return DataFrame{Err: fmt.Errorf("rbind: %v", err)}
 424 | 		}
 425 | 		expandedSeries[k] = newSeries
 426 | 	}
 427 | 	return New(expandedSeries...)
 428 | }
 429 | 
 430 | // Mutate changes a column of the DataFrame with the given Series or adds it as
 431 | // a new column if the column name does not exist.
 432 | func (df DataFrame) Mutate(s series.Series) DataFrame {
 433 | 	if df.Err != nil {
 434 | 		return df
 435 | 	}
 436 | 	if s.Len() != df.nrows {
 437 | 		return DataFrame{Err: fmt.Errorf("mutate: wrong dimensions")}
 438 | 	}
 439 | 	df = df.Copy()
 440 | 	// Check that colname exist on dataframe
 441 | 	columns := df.columns
 442 | 	if idx := findInStringSlice(s.Name, df.Names()); idx != -1 {
 443 | 		columns[idx] = s
 444 | 	} else {
 445 | 		columns = append(columns, s)
 446 | 	}
 447 | 	nrows, ncols, err := checkColumnsDimensions(columns...)
 448 | 	if err != nil {
 449 | 		return DataFrame{Err: err}
 450 | 	}
 451 | 	df = DataFrame{
 452 | 		columns: columns,
 453 | 		ncols:   ncols,
 454 | 		nrows:   nrows,
 455 | 	}
 456 | 	colnames := df.Names()
 457 | 	fixColnames(colnames)
 458 | 	for i, colname := range colnames {
 459 | 		df.columns[i].Name = colname
 460 | 	}
 461 | 	return df
 462 | }
 463 | 
 464 | // F is the filtering structure
 465 | type F struct {
 466 | 	Colname    string
 467 | 	Comparator series.Comparator
 468 | 	Comparando interface{}
 469 | }
 470 | 
 471 | // Filter will filter the rows of a DataFrame based on the given filters. All
 472 | // filters on the argument of a Filter call are aggregated as an OR operation
 473 | // whereas if we chain Filter calls, every filter will act as an AND operation
 474 | // with regards to the rest.
 475 | func (df DataFrame) Filter(filters ...F) DataFrame {
 476 | 	if df.Err != nil {
 477 | 		return df
 478 | 	}
 479 | 	compResults := make([]series.Series, len(filters))
 480 | 	for i, f := range filters {
 481 | 		idx := findInStringSlice(f.Colname, df.Names())
 482 | 		if idx < 0 {
 483 | 			return DataFrame{Err: fmt.Errorf("filter: can't find column name")}
 484 | 		}
 485 | 		res := df.columns[idx].Compare(f.Comparator, f.Comparando)
 486 | 		if err := res.Err; err != nil {
 487 | 			return DataFrame{Err: fmt.Errorf("filter: %v", err)}
 488 | 		}
 489 | 		compResults[i] = res
 490 | 	}
 491 | 	// Join compResults via "OR"
 492 | 	if len(compResults) == 0 {
 493 | 		return df.Copy()
 494 | 	}
 495 | 	res, err := compResults[0].Bool()
 496 | 	if err != nil {
 497 | 		return DataFrame{Err: fmt.Errorf("filter: %v", err)}
 498 | 	}
 499 | 	for i := 1; i < len(compResults); i++ {
 500 | 		nextRes, err := compResults[i].Bool()
 501 | 		if err != nil {
 502 | 			return DataFrame{Err: fmt.Errorf("filter: %v", err)}
 503 | 		}
 504 | 		for j := 0; j < len(res); j++ {
 505 | 			res[j] = res[j] || nextRes[j]
 506 | 		}
 507 | 	}
 508 | 	return df.Subset(res)
 509 | }
 510 | 
 511 | // Order is the ordering structure
 512 | type Order struct {
 513 | 	Colname string
 514 | 	Reverse bool
 515 | }
 516 | 
 517 | // Sort return an ordering structure for regular column sorting sort.
 518 | func Sort(colname string) Order {
 519 | 	return Order{colname, false}
 520 | }
 521 | 
 522 | // RevSort return an ordering structure for reverse column sorting.
 523 | func RevSort(colname string) Order {
 524 | 	return Order{colname, true}
 525 | }
 526 | 
 527 | // Arrange sort the rows of a DataFrame according to the given Order
 528 | func (df DataFrame) Arrange(order ...Order) DataFrame {
 529 | 	if df.Err != nil {
 530 | 		return df
 531 | 	}
 532 | 	if order == nil || len(order) == 0 {
 533 | 		return DataFrame{Err: fmt.Errorf("rename: no arguments")}
 534 | 	}
 535 | 
 536 | 	// Check that all colnames exist before starting to sort
 537 | 	for i := 0; i < len(order); i++ {
 538 | 		colname := order[i].Colname
 539 | 		if df.colIndex(colname) == -1 {
 540 | 			return DataFrame{Err: fmt.Errorf("colname %s doesn't exist", colname)}
 541 | 		}
 542 | 	}
 543 | 
 544 | 	// Initialize the index that will be used to store temporary and final order
 545 | 	// results.
 546 | 	origIdx := make([]int, df.nrows)
 547 | 	for i := 0; i < df.nrows; i++ {
 548 | 		origIdx[i] = i
 549 | 	}
 550 | 
 551 | 	swapOrigIdx := func(newidx []int) {
 552 | 		newOrigIdx := make([]int, len(newidx))
 553 | 		for k, i := range newidx {
 554 | 			newOrigIdx[k] = origIdx[i]
 555 | 		}
 556 | 		origIdx = newOrigIdx
 557 | 	}
 558 | 
 559 | 	suborder := origIdx
 560 | 	for i := len(order) - 1; i >= 0; i-- {
 561 | 		colname := order[i].Colname
 562 | 		idx := df.colIndex(colname)
 563 | 		nextSeries := df.columns[idx].Subset(suborder)
 564 | 		suborder = nextSeries.Order(order[i].Reverse)
 565 | 		swapOrigIdx(suborder)
 566 | 	}
 567 | 	return df.Subset(origIdx)
 568 | }
 569 | 
 570 | // Capply applies the given function to the columns of a DataFrame
 571 | func (df DataFrame) Capply(f func(series.Series) series.Series) DataFrame {
 572 | 	if df.Err != nil {
 573 | 		return df
 574 | 	}
 575 | 	columns := make([]series.Series, df.ncols)
 576 | 	for i, s := range df.columns {
 577 | 		applied := f(s)
 578 | 		applied.Name = s.Name
 579 | 		columns[i] = applied
 580 | 	}
 581 | 	return New(columns...)
 582 | }
 583 | 
 584 | // Rapply applies the given function to the rows of a DataFrame. Prior to applying
 585 | // the function the elements of each row are cast to a Series of a specific
 586 | // type. In order of priority: String -> Float -> Int -> Bool. This casting also
 587 | // takes place after the function application to equalize the type of the columns.
 588 | func (df DataFrame) Rapply(f func(series.Series) series.Series) DataFrame {
 589 | 	if df.Err != nil {
 590 | 		return df
 591 | 	}
 592 | 
 593 | 	detectType := func(types []series.Type) series.Type {
 594 | 		var hasStrings, hasFloats, hasInts, hasBools bool
 595 | 		for _, t := range types {
 596 | 			switch t {
 597 | 			case series.String:
 598 | 				hasStrings = true
 599 | 			case series.Float:
 600 | 				hasFloats = true
 601 | 			case series.Int:
 602 | 				hasInts = true
 603 | 			case series.Bool:
 604 | 				hasBools = true
 605 | 			}
 606 | 		}
 607 | 		switch {
 608 | 		case hasStrings:
 609 | 			return series.String
 610 | 		case hasBools:
 611 | 			return series.Bool
 612 | 		case hasFloats:
 613 | 			return series.Float
 614 | 		case hasInts:
 615 | 			return series.Int
 616 | 		default:
 617 | 			panic("type not supported")
 618 | 		}
 619 | 	}
 620 | 
 621 | 	// Detect row type prior to function application
 622 | 	types := df.Types()
 623 | 	rowType := detectType(types)
 624 | 
 625 | 	// Create Element matrix
 626 | 	elements := make([][]series.Element, df.nrows)
 627 | 	rowlen := -1
 628 | 	for i := 0; i < df.nrows; i++ {
 629 | 		row := series.New(nil, rowType, "").Empty()
 630 | 		for _, col := range df.columns {
 631 | 			row.Append(col.Elem(i))
 632 | 		}
 633 | 		row = f(row)
 634 | 		if row.Err != nil {
 635 | 			return DataFrame{Err: fmt.Errorf("error applying function on row %d: %v", i, row.Err)}
 636 | 		}
 637 | 
 638 | 		if rowlen != -1 && rowlen != row.Len() {
 639 | 			return DataFrame{Err: fmt.Errorf("error applying function: rows have different lengths")}
 640 | 		}
 641 | 		rowlen = row.Len()
 642 | 
 643 | 		rowElems := make([]series.Element, rowlen)
 644 | 		for j := 0; j < rowlen; j++ {
 645 | 			rowElems[j] = row.Elem(j)
 646 | 		}
 647 | 		elements[i] = rowElems
 648 | 	}
 649 | 
 650 | 	// Cast columns if necessary
 651 | 	columns := make([]series.Series, rowlen)
 652 | 	for j := 0; j < rowlen; j++ {
 653 | 		types := make([]series.Type, df.nrows)
 654 | 		for i := 0; i < df.nrows; i++ {
 655 | 			types[i] = elements[i][j].Type()
 656 | 		}
 657 | 		colType := detectType(types)
 658 | 		s := series.New(nil, colType, "").Empty()
 659 | 		for i := 0; i < df.nrows; i++ {
 660 | 			s.Append(elements[i][j])
 661 | 		}
 662 | 		columns[j] = s
 663 | 	}
 664 | 
 665 | 	nrows, ncols, err := checkColumnsDimensions(columns...)
 666 | 	if err != nil {
 667 | 		return DataFrame{Err: err}
 668 | 	}
 669 | 	df = DataFrame{
 670 | 		columns: columns,
 671 | 		ncols:   ncols,
 672 | 		nrows:   nrows,
 673 | 	}
 674 | 	colnames := df.Names()
 675 | 	fixColnames(colnames)
 676 | 	for i, colname := range colnames {
 677 | 		df.columns[i].Name = colname
 678 | 	}
 679 | 	return df
 680 | }
 681 | 
 682 | // Read/Write Methods
 683 | // =================
 684 | 
 685 | // LoadOption is the type used to configure the load of elements
 686 | type LoadOption func(*loadOptions)
 687 | 
 688 | type loadOptions struct {
 689 | 	// Specifies which is the default type in case detectTypes is disabled.
 690 | 	defaultType series.Type
 691 | 
 692 | 	// If set, the type of each column will be automatically detected unless
 693 | 	// otherwise specified.
 694 | 	detectTypes bool
 695 | 
 696 | 	// If set, the first row of the tabular structure will be used as column
 697 | 	// names.
 698 | 	hasHeader bool
 699 | 
 700 | 	// The names to set as columns names.
 701 | 	names []string
 702 | 
 703 | 	// Defines which values are going to be considered as NaN when parsing from string.
 704 | 	nanValues []string
 705 | 
 706 | 	// Defines the csv delimiter
 707 | 	delimiter rune
 708 | 
 709 | 	// Defines the comment delimiter
 710 | 	comment rune
 711 | 
 712 | 	// The types of specific columns can be specified via column name.
 713 | 	types map[string]series.Type
 714 | }
 715 | 
 716 | // DefaultType sets the defaultType option for loadOptions.
 717 | func DefaultType(t series.Type) LoadOption {
 718 | 	return func(c *loadOptions) {
 719 | 		c.defaultType = t
 720 | 	}
 721 | }
 722 | 
 723 | // DetectTypes sets the detectTypes option for loadOptions.
 724 | func DetectTypes(b bool) LoadOption {
 725 | 	return func(c *loadOptions) {
 726 | 		c.detectTypes = b
 727 | 	}
 728 | }
 729 | 
 730 | // HasHeader sets the hasHeader option for loadOptions.
 731 | func HasHeader(b bool) LoadOption {
 732 | 	return func(c *loadOptions) {
 733 | 		c.hasHeader = b
 734 | 	}
 735 | }
 736 | 
 737 | // Names sets the names option for loadOptions.
 738 | func Names(names ...string) LoadOption {
 739 | 	return func(c *loadOptions) {
 740 | 		c.names = names
 741 | 	}
 742 | }
 743 | 
 744 | // NaNValues sets the nanValues option for loadOptions.
 745 | func NaNValues(nanValues []string) LoadOption {
 746 | 	return func(c *loadOptions) {
 747 | 		c.nanValues = nanValues
 748 | 	}
 749 | }
 750 | 
 751 | // WithTypes sets the types option for loadOptions.
 752 | func WithTypes(coltypes map[string]series.Type) LoadOption {
 753 | 	return func(c *loadOptions) {
 754 | 		c.types = coltypes
 755 | 	}
 756 | }
 757 | 
 758 | // WithDelimiter sets the csv delimiter other than ',', for example '\t'
 759 | func WithDelimiter(b rune) LoadOption {
 760 | 	return func(c *loadOptions) {
 761 | 		c.delimiter = b
 762 | 	}
 763 | }
 764 | 
 765 | // WithComments sets the csv comment line detect to remove lines
 766 | func WithComments(b rune) LoadOption {
 767 | 	return func(c *loadOptions) {
 768 | 		c.comment = b
 769 | 	}
 770 | }
 771 | 
 772 | // LoadStructs creates a new DataFrame from arbitrary struct slices.
 773 | //
 774 | // LoadStructs will ignore unexported fields inside an struct. Note also that
 775 | // unless otherwise specified the column names will correspond with the name of
 776 | // the field.
 777 | //
 778 | // You can configure each field with the `dataframe:"name[,type]"` struct
 779 | // tag. If the name on the tag is the empty string `""` the field name will be
 780 | // used instead. If the name is `"-"` the field will be ignored.
 781 | //
 782 | // Examples:
 783 | //
 784 | //    // field will be ignored
 785 | //    field int
 786 | //
 787 | //    // Field will be ignored
 788 | //    Field int `dataframe:"-"`
 789 | //
 790 | //    // Field will be parsed with column name Field and type int
 791 | //    Field int
 792 | //
 793 | //    // Field will be parsed with column name `field_column` and type int.
 794 | //    Field int `dataframe:"field_column"`
 795 | //
 796 | //    // Field will be parsed with column name `field` and type string.
 797 | //    Field int `dataframe:"field,string"`
 798 | //
 799 | //    // Field will be parsed with column name `Field` and type string.
 800 | //    Field int `dataframe:",string"`
 801 | //
 802 | // If the struct tags and the given LoadOptions contradict each other, the later
 803 | // will have preference over the former.
 804 | func LoadStructs(i interface{}, options ...LoadOption) DataFrame {
 805 | 	if i == nil {
 806 | 		return DataFrame{Err: fmt.Errorf("load: can't create DataFrame from <nil> value")}
 807 | 	}
 808 | 
 809 | 	// Set the default load options
 810 | 	cfg := loadOptions{
 811 | 		defaultType: series.String,
 812 | 		detectTypes: true,
 813 | 		hasHeader:   true,
 814 | 		nanValues:   []string{"NA", "NaN", "<nil>"},
 815 | 	}
 816 | 
 817 | 	// Set any custom load options
 818 | 	for _, option := range options {
 819 | 		option(&cfg)
 820 | 	}
 821 | 
 822 | 	tpy, val := reflect.TypeOf(i), reflect.ValueOf(i)
 823 | 	switch tpy.Kind() {
 824 | 	case reflect.Slice:
 825 | 		if tpy.Elem().Kind() != reflect.Struct {
 826 | 			return DataFrame{Err: fmt.Errorf(
 827 | 				"load: type %s (%s %s) is not supported, must be []struct", tpy.Name(), tpy.Elem().Kind(), tpy.Kind())}
 828 | 		}
 829 | 		if val.Len() == 0 {
 830 | 			return DataFrame{Err: fmt.Errorf("load: can't create DataFrame from empty slice")}
 831 | 		}
 832 | 
 833 | 		numFields := val.Index(0).Type().NumField()
 834 | 		var columns []series.Series
 835 | 		for j := 0; j < numFields; j++ {
 836 | 			// Extract field metadata
 837 | 			if !val.Index(0).Field(j).CanInterface() {
 838 | 				continue
 839 | 			}
 840 | 			field := val.Index(0).Type().Field(j)
 841 | 			fieldName := field.Name
 842 | 			fieldType := field.Type.String()
 843 | 
 844 | 			// Process struct tags
 845 | 			fieldTags := field.Tag.Get("dataframe")
 846 | 			if fieldTags == "-" {
 847 | 				continue
 848 | 			}
 849 | 			tagOpts := strings.Split(fieldTags, ",")
 850 | 			if len(tagOpts) > 2 {
 851 | 				return DataFrame{Err: fmt.Errorf("malformed struct tag on field %s: %s", fieldName, fieldTags)}
 852 | 			}
 853 | 			if len(tagOpts) > 0 {
 854 | 				if name := strings.TrimSpace(tagOpts[0]); name != "" {
 855 | 					fieldName = name
 856 | 				}
 857 | 				if len(tagOpts) == 2 {
 858 | 					if tagType := strings.TrimSpace(tagOpts[1]); tagType != "" {
 859 | 						fieldType = tagType
 860 | 					}
 861 | 				}
 862 | 			}
 863 | 
 864 | 			// Handle `types` option
 865 | 			var t series.Type
 866 | 			if cfgtype, ok := cfg.types[fieldName]; ok {
 867 | 				t = cfgtype
 868 | 			} else {
 869 | 				// Handle `detectTypes` option
 870 | 				if cfg.detectTypes {
 871 | 					// Parse field type
 872 | 					parsedType, err := parseType(fieldType)
 873 | 					if err != nil {
 874 | 						return DataFrame{Err: err}
 875 | 					}
 876 | 					t = parsedType
 877 | 				} else {
 878 | 					t = cfg.defaultType
 879 | 				}
 880 | 			}
 881 | 
 882 | 			// Create Series for this field
 883 | 			elements := make([]interface{}, val.Len())
 884 | 			for i := 0; i < val.Len(); i++ {
 885 | 				fieldValue := val.Index(i).Field(j)
 886 | 				elements[i] = fieldValue.Interface()
 887 | 
 888 | 				// Handle `nanValues` option
 889 | 				if findInStringSlice(fmt.Sprint(elements[i]), cfg.nanValues) != -1 {
 890 | 					elements[i] = nil
 891 | 				}
 892 | 			}
 893 | 
 894 | 			// Handle `hasHeader` option
 895 | 			if !cfg.hasHeader {
 896 | 				tmp := make([]interface{}, 1)
 897 | 				tmp[0] = fieldName
 898 | 				elements = append(tmp, elements...)
 899 | 				fieldName = ""
 900 | 			}
 901 | 			columns = append(columns, series.New(elements, t, fieldName))
 902 | 		}
 903 | 		return New(columns...)
 904 | 	}
 905 | 	return DataFrame{Err: fmt.Errorf(
 906 | 		"load: type %s (%s) is not supported, must be []struct", tpy.Name(), tpy.Kind())}
 907 | }
 908 | 
 909 | func parseType(s string) (series.Type, error) {
 910 | 	switch s {
 911 | 	case "float", "float64", "float32":
 912 | 		return series.Float, nil
 913 | 	case "int", "int64", "int32", "int16", "int8":
 914 | 		return series.Int, nil
 915 | 	case "string":
 916 | 		return series.String, nil
 917 | 	case "bool":
 918 | 		return series.Bool, nil
 919 | 	}
 920 | 	return "", fmt.Errorf("type (%s) is not supported", s)
 921 | }
 922 | 
 923 | // LoadRecords creates a new DataFrame based on the given records.
 924 | func LoadRecords(records [][]string, options ...LoadOption) DataFrame {
 925 | 	// Set the default load options
 926 | 	cfg := loadOptions{
 927 | 		defaultType: series.String,
 928 | 		detectTypes: true,
 929 | 		hasHeader:   true,
 930 | 		nanValues:   []string{"NA", "NaN", "<nil>"},
 931 | 	}
 932 | 
 933 | 	// Set any custom load options
 934 | 	for _, option := range options {
 935 | 		option(&cfg)
 936 | 	}
 937 | 
 938 | 	if len(records) == 0 {
 939 | 		return DataFrame{Err: fmt.Errorf("load records: empty DataFrame")}
 940 | 	}
 941 | 	if cfg.hasHeader && len(records) <= 1 {
 942 | 		return DataFrame{Err: fmt.Errorf("load records: empty DataFrame")}
 943 | 	}
 944 | 	if cfg.names != nil && len(cfg.names) != len(records[0]) {
 945 | 		if len(cfg.names) > len(records[0]) {
 946 | 			return DataFrame{Err: fmt.Errorf("load records: too many column names")}
 947 | 		}
 948 | 		return DataFrame{Err: fmt.Errorf("load records: not enough column names")}
 949 | 	}
 950 | 
 951 | 	// Extract headers
 952 | 	headers := make([]string, len(records[0]))
 953 | 	if cfg.hasHeader {
 954 | 		headers = records[0]
 955 | 		records = records[1:]
 956 | 	}
 957 | 	if cfg.names != nil {
 958 | 		headers = cfg.names
 959 | 	}
 960 | 
 961 | 	types := make([]series.Type, len(headers))
 962 | 	rawcols := make([][]string, len(headers))
 963 | 	for i, colname := range headers {
 964 | 		rawcol := make([]string, len(records))
 965 | 		for j := 0; j < len(records); j++ {
 966 | 			rawcol[j] = records[j][i]
 967 | 			if findInStringSlice(rawcol[j], cfg.nanValues) != -1 {
 968 | 				rawcol[j] = "NaN"
 969 | 			}
 970 | 		}
 971 | 		rawcols[i] = rawcol
 972 | 
 973 | 		t, ok := cfg.types[colname]
 974 | 		if !ok {
 975 | 			t = cfg.defaultType
 976 | 			if cfg.detectTypes {
 977 | 				if l, err := findType(rawcol); err == nil {
 978 | 					t = l
 979 | 				}
 980 | 			}
 981 | 		}
 982 | 		types[i] = t
 983 | 	}
 984 | 
 985 | 	columns := make([]series.Series, len(headers))
 986 | 	for i, colname := range headers {
 987 | 		col := series.New(rawcols[i], types[i], colname)
 988 | 		if col.Err != nil {
 989 | 			return DataFrame{Err: col.Err}
 990 | 		}
 991 | 		columns[i] = col
 992 | 	}
 993 | 	nrows, ncols, err := checkColumnsDimensions(columns...)
 994 | 	if err != nil {
 995 | 		return DataFrame{Err: err}
 996 | 	}
 997 | 	df := DataFrame{
 998 | 		columns: columns,
 999 | 		ncols:   ncols,
1000 | 		nrows:   nrows,
1001 | 	}
1002 | 
1003 | 	colnames := df.Names()
1004 | 	fixColnames(colnames)
1005 | 	for i, colname := range colnames {
1006 | 		df.columns[i].Name = colname
1007 | 	}
1008 | 	return df
1009 | }
1010 | 
1011 | // LoadMaps creates a new DataFrame based on the given maps. This function assumes
1012 | // that every map on the array represents a row of observations.
1013 | func LoadMaps(maps []map[string]interface{}, options ...LoadOption) DataFrame {
1014 | 	if len(maps) == 0 {
1015 | 		return DataFrame{Err: fmt.Errorf("load maps: empty array")}
1016 | 	}
1017 | 	inStrSlice := func(i string, s []string) bool {
1018 | 		for _, v := range s {
1019 | 			if v == i {
1020 | 				return true
1021 | 			}
1022 | 		}
1023 | 		return false
1024 | 	}
1025 | 	// Detect all colnames
1026 | 	var colnames []string
1027 | 	for _, v := range maps {
1028 | 		for k := range v {
1029 | 			if exists := inStrSlice(k, colnames); !exists {
1030 | 				colnames = append(colnames, k)
1031 | 			}
1032 | 		}
1033 | 	}
1034 | 	sort.Strings(colnames)
1035 | 	records := make([][]string, len(maps)+1)
1036 | 	records[0] = colnames
1037 | 	for k, m := range maps {
1038 | 		row := make([]string, len(colnames))
1039 | 		for i, colname := range colnames {
1040 | 			element := ""
1041 | 			val, ok := m[colname]
1042 | 			if ok {
1043 | 				element = fmt.Sprint(val)
1044 | 			}
1045 | 			row[i] = element
1046 | 		}
1047 | 		records[k+1] = row
1048 | 	}
1049 | 	return LoadRecords(records, options...)
1050 | }
1051 | 
1052 | // LoadMatrix loads the given Matrix as a DataFrame
1053 | // TODO: Add Loadoptions
1054 | func LoadMatrix(mat Matrix) DataFrame {
1055 | 	nrows, ncols := mat.Dims()
1056 | 	columns := make([]series.Series, ncols)
1057 | 	for i := 0; i < ncols; i++ {
1058 | 		floats := make([]float64, nrows)
1059 | 		for j := 0; j < nrows; j++ {
1060 | 			floats[j] = mat.At(j, i)
1061 | 		}
1062 | 		columns[i] = series.Floats(floats)
1063 | 	}
1064 | 	nrows, ncols, err := checkColumnsDimensions(columns...)
1065 | 	if err != nil {
1066 | 		return DataFrame{Err: err}
1067 | 	}
1068 | 	df := DataFrame{
1069 | 		columns: columns,
1070 | 		ncols:   ncols,
1071 | 		nrows:   nrows,
1072 | 	}
1073 | 	colnames := df.Names()
1074 | 	fixColnames(colnames)
1075 | 	for i, colname := range colnames {
1076 | 		df.columns[i].Name = colname
1077 | 	}
1078 | 	return df
1079 | }
1080 | 
1081 | // ReadCSV reads a CSV file from a io.Reader and builds a DataFrame with the
1082 | // resulting records.
1083 | func ReadCSV(r io.Reader, options ...LoadOption) DataFrame {
1084 | 	csvReader := csv.NewReader(r)
1085 | 	cfg := loadOptions{
1086 | 		delimiter: ',',
1087 | 	}
1088 | 	for _, option := range options {
1089 | 		option(&cfg)
1090 | 	}
1091 | 	if cfg.delimiter != ',' {
1092 | 		csvReader.Comma = cfg.delimiter
1093 | 	}
1094 | 	if cfg.comment != 0 {
1095 | 		csvReader.Comment = cfg.comment
1096 | 	}
1097 | 
1098 | 	records, err := csvReader.ReadAll()
1099 | 	if err != nil {
1100 | 		return DataFrame{Err: err}
1101 | 	}
1102 | 	return LoadRecords(records, options...)
1103 | }
1104 | 
1105 | // ReadJSON reads a JSON array from a io.Reader and builds a DataFrame with the
1106 | // resulting records.
1107 | func ReadJSON(r io.Reader, options ...LoadOption) DataFrame {
1108 | 	var m []map[string]interface{}
1109 | 	err := json.NewDecoder(r).Decode(&m)
1110 | 	if err != nil {
1111 | 		return DataFrame{Err: err}
1112 | 	}
1113 | 	return LoadMaps(m, options...)
1114 | }
1115 | 
1116 | // WriteOption is the type used to configure the writing of elements
1117 | type WriteOption func(*writeOptions)
1118 | 
1119 | type writeOptions struct {
1120 | 	// Specifies whether the header is also written
1121 | 	writeHeader bool
1122 | }
1123 | 
1124 | // WriteHeader sets the writeHeader option for writeOptions.
1125 | func WriteHeader(b bool) WriteOption {
1126 | 	return func(c *writeOptions) {
1127 | 		c.writeHeader = b
1128 | 	}
1129 | }
1130 | 
1131 | // WriteCSV writes the DataFrame to the given io.Writer as a CSV file.
1132 | func (df DataFrame) WriteCSV(w io.Writer, options ...WriteOption) error {
1133 | 	if df.Err != nil {
1134 | 		return df.Err
1135 | 	}
1136 | 
1137 | 	// Set the default write options
1138 | 	cfg := writeOptions{
1139 | 		writeHeader: true,
1140 | 	}
1141 | 
1142 | 	// Set any custom write options
1143 | 	for _, option := range options {
1144 | 		option(&cfg)
1145 | 	}
1146 | 
1147 | 	records := df.Records()
1148 | 	if !cfg.writeHeader {
1149 | 		records = records[1:]
1150 | 	}
1151 | 
1152 | 	return csv.NewWriter(w).WriteAll(records)
1153 | }
1154 | 
1155 | // WriteJSON writes the DataFrame to the given io.Writer as a JSON array.
1156 | func (df DataFrame) WriteJSON(w io.Writer) error {
1157 | 	if df.Err != nil {
1158 | 		return df.Err
1159 | 	}
1160 | 	return json.NewEncoder(w).Encode(df.Maps())
1161 | }
1162 | 
1163 | // Getters/Setters for DataFrame fields
1164 | // ====================================
1165 | 
1166 | // Names returns the name of the columns on a DataFrame.
1167 | func (df DataFrame) Names() []string {
1168 | 	colnames := make([]string, df.ncols)
1169 | 	for i, s := range df.columns {
1170 | 		colnames[i] = s.Name
1171 | 	}
1172 | 	return colnames
1173 | }
1174 | 
1175 | // Types returns the types of the columns on a DataFrame.
1176 | func (df DataFrame) Types() []series.Type {
1177 | 	coltypes := make([]series.Type, df.ncols)
1178 | 	for i, s := range df.columns {
1179 | 		coltypes[i] = s.Type()
1180 | 	}
1181 | 	return coltypes
1182 | }
1183 | 
1184 | // SetNames changes the column names of a DataFrame to the ones passed as an
1185 | // argument.
1186 | func (df DataFrame) SetNames(colnames ...string) error {
1187 | 	if df.Err != nil {
1188 | 		return df.Err
1189 | 	}
1190 | 	if len(colnames) != df.ncols {
1191 | 		return fmt.Errorf("setting names: wrong dimensions")
1192 | 	}
1193 | 	for k, s := range colnames {
1194 | 		df.columns[k].Name = s
1195 | 	}
1196 | 	return nil
1197 | }
1198 | 
1199 | // Dims retrieves the dimensions of a DataFrame.
1200 | func (df DataFrame) Dims() (int, int) {
1201 | 	return df.Nrow(), df.Ncol()
1202 | }
1203 | 
1204 | // Nrow returns the number of rows on a DataFrame.
1205 | func (df DataFrame) Nrow() int {
1206 | 	return df.nrows
1207 | }
1208 | 
1209 | // Ncol returns the number of columns on a DataFrame.
1210 | func (df DataFrame) Ncol() int {
1211 | 	return df.ncols
1212 | }
1213 | 
1214 | // Col returns a copy of the Series with the given column name contained in the DataFrame.
1215 | func (df DataFrame) Col(colname string) series.Series {
1216 | 	if df.Err != nil {
1217 | 		return series.Series{Err: df.Err}
1218 | 	}
1219 | 	// Check that colname exist on dataframe
1220 | 	idx := findInStringSlice(colname, df.Names())
1221 | 	if idx < 0 {
1222 | 		return series.Series{Err: fmt.Errorf("unknown column name")}
1223 | 	}
1224 | 	return df.columns[idx].Copy()
1225 | }
1226 | 
1227 | // InnerJoin returns a DataFrame containing the inner join of two DataFrames.
1228 | func (df DataFrame) InnerJoin(b DataFrame, keys ...string) DataFrame {
1229 | 	if len(keys) == 0 {
1230 | 		return DataFrame{Err: fmt.Errorf("join keys not specified")}
1231 | 	}
1232 | 	// Check that we have all given keys in both DataFrames
1233 | 	var iKeysA []int
1234 | 	var iKeysB []int
1235 | 	var errorArr []string
1236 | 	for _, key := range keys {
1237 | 		i := df.colIndex(key)
1238 | 		if i < 0 {
1239 | 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
1240 | 		}
1241 | 		iKeysA = append(iKeysA, i)
1242 | 		j := b.colIndex(key)
1243 | 		if j < 0 {
1244 | 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
1245 | 		}
1246 | 		iKeysB = append(iKeysB, j)
1247 | 	}
1248 | 	if len(errorArr) != 0 {
1249 | 		return DataFrame{Err: fmt.Errorf(strings.Join(errorArr, "\n"))}
1250 | 	}
1251 | 
1252 | 	aCols := df.columns
1253 | 	bCols := b.columns
1254 | 	// Initialize newCols
1255 | 	var newCols []series.Series
1256 | 	for _, i := range iKeysA {
1257 | 		newCols = append(newCols, aCols[i].Empty())
1258 | 	}
1259 | 	var iNotKeysA []int
1260 | 	for i := 0; i < df.ncols; i++ {
1261 | 		if !inIntSlice(i, iKeysA) {
1262 | 			iNotKeysA = append(iNotKeysA, i)
1263 | 			newCols = append(newCols, aCols[i].Empty())
1264 | 		}
1265 | 	}
1266 | 	var iNotKeysB []int
1267 | 	for i := 0; i < b.ncols; i++ {
1268 | 		if !inIntSlice(i, iKeysB) {
1269 | 			iNotKeysB = append(iNotKeysB, i)
1270 | 			newCols = append(newCols, bCols[i].Empty())
1271 | 		}
1272 | 	}
1273 | 
1274 | 	// Fill newCols
1275 | 	for i := 0; i < df.nrows; i++ {
1276 | 		for j := 0; j < b.nrows; j++ {
1277 | 			match := true
1278 | 			for k := range keys {
1279 | 				aElem := aCols[iKeysA[k]].Elem(i)
1280 | 				bElem := bCols[iKeysB[k]].Elem(j)
1281 | 				match = match && aElem.Eq(bElem)
1282 | 			}
1283 | 			if match {
1284 | 				ii := 0
1285 | 				for _, k := range iKeysA {
1286 | 					elem := aCols[k].Elem(i)
1287 | 					newCols[ii].Append(elem)
1288 | 					ii++
1289 | 				}
1290 | 				for _, k := range iNotKeysA {
1291 | 					elem := aCols[k].Elem(i)
1292 | 					newCols[ii].Append(elem)
1293 | 					ii++
1294 | 				}
1295 | 				for _, k := range iNotKeysB {
1296 | 					elem := bCols[k].Elem(j)
1297 | 					newCols[ii].Append(elem)
1298 | 					ii++
1299 | 				}
1300 | 			}
1301 | 		}
1302 | 	}
1303 | 	return New(newCols...)
1304 | }
1305 | 
1306 | // LeftJoin returns a DataFrame containing the left join of two DataFrames.
1307 | func (df DataFrame) LeftJoin(b DataFrame, keys ...string) DataFrame {
1308 | 	if len(keys) == 0 {
1309 | 		return DataFrame{Err: fmt.Errorf("join keys not specified")}
1310 | 	}
1311 | 	// Check that we have all given keys in both DataFrames
1312 | 	var iKeysA []int
1313 | 	var iKeysB []int
1314 | 	var errorArr []string
1315 | 	for _, key := range keys {
1316 | 		i := df.colIndex(key)
1317 | 		if i < 0 {
1318 | 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
1319 | 		}
1320 | 		iKeysA = append(iKeysA, i)
1321 | 		j := b.colIndex(key)
1322 | 		if j < 0 {
1323 | 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
1324 | 		}
1325 | 		iKeysB = append(iKeysB, j)
1326 | 	}
1327 | 	if len(errorArr) != 0 {
1328 | 		return DataFrame{Err: fmt.Errorf(strings.Join(errorArr, "\n"))}
1329 | 	}
1330 | 
1331 | 	aCols := df.columns
1332 | 	bCols := b.columns
1333 | 	// Initialize newCols
1334 | 	var newCols []series.Series
1335 | 	for _, i := range iKeysA {
1336 | 		newCols = append(newCols, aCols[i].Empty())
1337 | 	}
1338 | 	var iNotKeysA []int
1339 | 	for i := 0; i < df.ncols; i++ {
1340 | 		if !inIntSlice(i, iKeysA) {
1341 | 			iNotKeysA = append(iNotKeysA, i)
1342 | 			newCols = append(newCols, aCols[i].Empty())
1343 | 		}
1344 | 	}
1345 | 	var iNotKeysB []int
1346 | 	for i := 0; i < b.ncols; i++ {
1347 | 		if !inIntSlice(i, iKeysB) {
1348 | 			iNotKeysB = append(iNotKeysB, i)
1349 | 			newCols = append(newCols, bCols[i].Empty())
1350 | 		}
1351 | 	}
1352 | 
1353 | 	// Fill newCols
1354 | 	for i := 0; i < df.nrows; i++ {
1355 | 		matched := false
1356 | 		for j := 0; j < b.nrows; j++ {
1357 | 			match := true
1358 | 			for k := range keys {
1359 | 				aElem := aCols[iKeysA[k]].Elem(i)
1360 | 				bElem := bCols[iKeysB[k]].Elem(j)
1361 | 				match = match && aElem.Eq(bElem)
1362 | 			}
1363 | 			if match {
1364 | 				matched = true
1365 | 				ii := 0
1366 | 				for _, k := range iKeysA {
1367 | 					elem := aCols[k].Elem(i)
1368 | 					newCols[ii].Append(elem)
1369 | 					ii++
1370 | 				}
1371 | 				for _, k := range iNotKeysA {
1372 | 					elem := aCols[k].Elem(i)
1373 | 					newCols[ii].Append(elem)
1374 | 					ii++
1375 | 				}
1376 | 				for _, k := range iNotKeysB {
1377 | 					elem := bCols[k].Elem(j)
1378 | 					newCols[ii].Append(elem)
1379 | 					ii++
1380 | 				}
1381 | 			}
1382 | 		}
1383 | 		if !matched {
1384 | 			ii := 0
1385 | 			for _, k := range iKeysA {
1386 | 				elem := aCols[k].Elem(i)
1387 | 				newCols[ii].Append(elem)
1388 | 				ii++
1389 | 			}
1390 | 			for _, k := range iNotKeysA {
1391 | 				elem := aCols[k].Elem(i)
1392 | 				newCols[ii].Append(elem)
1393 | 				ii++
1394 | 			}
1395 | 			// for _ = range iNotKeysB {
1396 | 			// 	newCols[ii].Append(nil)
1397 | 			// 	ii++
1398 | 			// }
1399 | 
1400 | 			for _, k := range iNotKeysB {
1401 | 				_ = k
1402 | 				newCols[ii].Append(nil)
1403 | 				ii++
1404 | 			}
1405 | 		}
1406 | 	}
1407 | 	return New(newCols...)
1408 | }
1409 | 
1410 | // RightJoin returns a DataFrame containing the right join of two DataFrames.
1411 | func (df DataFrame) RightJoin(b DataFrame, keys ...string) DataFrame {
1412 | 	if len(keys) == 0 {
1413 | 		return DataFrame{Err: fmt.Errorf("join keys not specified")}
1414 | 	}
1415 | 	// Check that we have all given keys in both DataFrames
1416 | 	var iKeysA []int
1417 | 	var iKeysB []int
1418 | 	var errorArr []string
1419 | 	for _, key := range keys {
1420 | 		i := df.colIndex(key)
1421 | 		if i < 0 {
1422 | 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
1423 | 		}
1424 | 		iKeysA = append(iKeysA, i)
1425 | 		j := b.colIndex(key)
1426 | 		if j < 0 {
1427 | 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
1428 | 		}
1429 | 		iKeysB = append(iKeysB, j)
1430 | 	}
1431 | 	if len(errorArr) != 0 {
1432 | 		return DataFrame{Err: fmt.Errorf(strings.Join(errorArr, "\n"))}
1433 | 	}
1434 | 
1435 | 	aCols := df.columns
1436 | 	bCols := b.columns
1437 | 	// Initialize newCols
1438 | 	var newCols []series.Series
1439 | 	for _, i := range iKeysA {
1440 | 		newCols = append(newCols, aCols[i].Empty())
1441 | 	}
1442 | 	var iNotKeysA []int
1443 | 	for i := 0; i < df.ncols; i++ {
1444 | 		if !inIntSlice(i, iKeysA) {
1445 | 			iNotKeysA = append(iNotKeysA, i)
1446 | 			newCols = append(newCols, aCols[i].Empty())
1447 | 		}
1448 | 	}
1449 | 	var iNotKeysB []int
1450 | 	for i := 0; i < b.ncols; i++ {
1451 | 		if !inIntSlice(i, iKeysB) {
1452 | 			iNotKeysB = append(iNotKeysB, i)
1453 | 			newCols = append(newCols, bCols[i].Empty())
1454 | 		}
1455 | 	}
1456 | 
1457 | 	// Fill newCols
1458 | 	var yesmatched []struct{ i, j int }
1459 | 	var nonmatched []int
1460 | 	for j := 0; j < b.nrows; j++ {
1461 | 		matched := false
1462 | 		for i := 0; i < df.nrows; i++ {
1463 | 			match := true
1464 | 			for k := range keys {
1465 | 				aElem := aCols[iKeysA[k]].Elem(i)
1466 | 				bElem := bCols[iKeysB[k]].Elem(j)
1467 | 				match = match && aElem.Eq(bElem)
1468 | 			}
1469 | 			if match {
1470 | 				matched = true
1471 | 				yesmatched = append(yesmatched, struct{ i, j int }{i, j})
1472 | 			}
1473 | 		}
1474 | 		if !matched {
1475 | 			nonmatched = append(nonmatched, j)
1476 | 		}
1477 | 	}
1478 | 	for _, v := range yesmatched {
1479 | 		i := v.i
1480 | 		j := v.j
1481 | 		ii := 0
1482 | 		for _, k := range iKeysA {
1483 | 			elem := aCols[k].Elem(i)
1484 | 			newCols[ii].Append(elem)
1485 | 			ii++
1486 | 		}
1487 | 		for _, k := range iNotKeysA {
1488 | 			elem := aCols[k].Elem(i)
1489 | 			newCols[ii].Append(elem)
1490 | 			ii++
1491 | 		}
1492 | 		for _, k := range iNotKeysB {
1493 | 			elem := bCols[k].Elem(j)
1494 | 			newCols[ii].Append(elem)
1495 | 			ii++
1496 | 		}
1497 | 	}
1498 | 	for _, j := range nonmatched {
1499 | 		ii := 0
1500 | 		for _, k := range iKeysB {
1501 | 			elem := bCols[k].Elem(j)
1502 | 			newCols[ii].Append(elem)
1503 | 			ii++
1504 | 		}
1505 | 		// for _ = range iNotKeysA {
1506 | 		// 	newCols[ii].Append(nil)
1507 | 		// 	ii++
1508 | 		// }
1509 | 		for _, k := range iNotKeysA {
1510 | 			_ = k
1511 | 			newCols[ii].Append(nil)
1512 | 			ii++
1513 | 		}
1514 | 		for _, k := range iNotKeysB {
1515 | 			elem := bCols[k].Elem(j)
1516 | 			newCols[ii].Append(elem)
1517 | 			ii++
1518 | 		}
1519 | 	}
1520 | 	return New(newCols...)
1521 | }
1522 | 
1523 | // OuterJoin returns a DataFrame containing the outer join of two DataFrames.
1524 | func (df DataFrame) OuterJoin(b DataFrame, keys ...string) DataFrame {
1525 | 	if len(keys) == 0 {
1526 | 		return DataFrame{Err: fmt.Errorf("join keys not specified")}
1527 | 	}
1528 | 	// Check that we have all given keys in both DataFrames
1529 | 	var iKeysA []int
1530 | 	var iKeysB []int
1531 | 	var errorArr []string
1532 | 	for _, key := range keys {
1533 | 		i := df.colIndex(key)
1534 | 		if i < 0 {
1535 | 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
1536 | 		}
1537 | 		iKeysA = append(iKeysA, i)
1538 | 		j := b.colIndex(key)
1539 | 		if j < 0 {
1540 | 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
1541 | 		}
1542 | 		iKeysB = append(iKeysB, j)
1543 | 	}
1544 | 	if len(errorArr) != 0 {
1545 | 		return DataFrame{Err: fmt.Errorf(strings.Join(errorArr, "\n"))}
1546 | 	}
1547 | 
1548 | 	aCols := df.columns
1549 | 	bCols := b.columns
1550 | 	// Initialize newCols
1551 | 	var newCols []series.Series
1552 | 	for _, i := range iKeysA {
1553 | 		newCols = append(newCols, aCols[i].Empty())
1554 | 	}
1555 | 	var iNotKeysA []int
1556 | 	for i := 0; i < df.ncols; i++ {
1557 | 		if !inIntSlice(i, iKeysA) {
1558 | 			iNotKeysA = append(iNotKeysA, i)
1559 | 			newCols = append(newCols, aCols[i].Empty())
1560 | 		}
1561 | 	}
1562 | 	var iNotKeysB []int
1563 | 	for i := 0; i < b.ncols; i++ {
1564 | 		if !inIntSlice(i, iKeysB) {
1565 | 			iNotKeysB = append(iNotKeysB, i)
1566 | 			newCols = append(newCols, bCols[i].Empty())
1567 | 		}
1568 | 	}
1569 | 
1570 | 	// Fill newCols
1571 | 	for i := 0; i < df.nrows; i++ {
1572 | 		matched := false
1573 | 		for j := 0; j < b.nrows; j++ {
1574 | 			match := true
1575 | 			for k := range keys {
1576 | 				aElem := aCols[iKeysA[k]].Elem(i)
1577 | 				bElem := bCols[iKeysB[k]].Elem(j)
1578 | 				match = match && aElem.Eq(bElem)
1579 | 			}
1580 | 			if match {
1581 | 				matched = true
1582 | 				ii := 0
1583 | 				for _, k := range iKeysA {
1584 | 					elem := aCols[k].Elem(i)
1585 | 					newCols[ii].Append(elem)
1586 | 					ii++
1587 | 				}
1588 | 				for _, k := range iNotKeysA {
1589 | 					elem := aCols[k].Elem(i)
1590 | 					newCols[ii].Append(elem)
1591 | 					ii++
1592 | 				}
1593 | 				for _, k := range iNotKeysB {
1594 | 					elem := bCols[k].Elem(j)
1595 | 					newCols[ii].Append(elem)
1596 | 					ii++
1597 | 				}
1598 | 			}
1599 | 		}
1600 | 		if !matched {
1601 | 			ii := 0
1602 | 			for _, k := range iKeysA {
1603 | 				elem := aCols[k].Elem(i)
1604 | 				newCols[ii].Append(elem)
1605 | 				ii++
1606 | 			}
1607 | 			for _, k := range iNotKeysA {
1608 | 				elem := aCols[k].Elem(i)
1609 | 				newCols[ii].Append(elem)
1610 | 				ii++
1611 | 			}
1612 | 			// for _,_ = range iNotKeysB {
1613 | 			// 	newCols[ii].Append(nil)
1614 | 			// 	ii++
1615 | 			// }
1616 | 			for _, k := range iNotKeysB {
1617 | 				_ = k
1618 | 				newCols[ii].Append(nil)
1619 | 				ii++
1620 | 			}
1621 | 		}
1622 | 	}
1623 | 	for j := 0; j < b.nrows; j++ {
1624 | 		matched := false
1625 | 		for i := 0; i < df.nrows; i++ {
1626 | 			match := true
1627 | 			for k := range keys {
1628 | 				aElem := aCols[iKeysA[k]].Elem(i)
1629 | 				bElem := bCols[iKeysB[k]].Elem(j)
1630 | 				match = match && aElem.Eq(bElem)
1631 | 			}
1632 | 			if match {
1633 | 				matched = true
1634 | 			}
1635 | 		}
1636 | 		if !matched {
1637 | 			ii := 0
1638 | 			for _, k := range iKeysB {
1639 | 				elem := bCols[k].Elem(j)
1640 | 				newCols[ii].Append(elem)
1641 | 				ii++
1642 | 			}
1643 | 			// for _ = range iNotKeysA {
1644 | 			// 	newCols[ii].Append(nil)
1645 | 			// 	ii++
1646 | 			// }
1647 | 			for _, k := range iNotKeysA {
1648 | 				_ = k
1649 | 				newCols[ii].Append(nil)
1650 | 				ii++
1651 | 			}
1652 | 			for _, k := range iNotKeysB {
1653 | 				elem := bCols[k].Elem(j)
1654 | 				newCols[ii].Append(elem)
1655 | 				ii++
1656 | 			}
1657 | 		}
1658 | 	}
1659 | 	return New(newCols...)
1660 | }
1661 | 
1662 | // CrossJoin returns a DataFrame containing the cross join of two DataFrames.
1663 | func (df DataFrame) CrossJoin(b DataFrame) DataFrame {
1664 | 	aCols := df.columns
1665 | 	bCols := b.columns
1666 | 	// Initialize newCols
1667 | 	var newCols []series.Series
1668 | 	for i := 0; i < df.ncols; i++ {
1669 | 		newCols = append(newCols, aCols[i].Empty())
1670 | 	}
1671 | 	for i := 0; i < b.ncols; i++ {
1672 | 		newCols = append(newCols, bCols[i].Empty())
1673 | 	}
1674 | 	// Fill newCols
1675 | 	for i := 0; i < df.nrows; i++ {
1676 | 		for j := 0; j < b.nrows; j++ {
1677 | 			for ii := 0; ii < df.ncols; ii++ {
1678 | 				elem := aCols[ii].Elem(i)
1679 | 				newCols[ii].Append(elem)
1680 | 			}
1681 | 			for ii := 0; ii < b.ncols; ii++ {
1682 | 				jj := ii + df.ncols
1683 | 				elem := bCols[ii].Elem(j)
1684 | 				newCols[jj].Append(elem)
1685 | 			}
1686 | 		}
1687 | 	}
1688 | 	return New(newCols...)
1689 | }
1690 | 
1691 | // colIndex returns the index of the column with name `s`. If it fails to find the
1692 | // column it returns -1 instead.
1693 | func (df DataFrame) colIndex(s string) int {
1694 | 	for k, v := range df.Names() {
1695 | 		if v == s {
1696 | 			return k
1697 | 		}
1698 | 	}
1699 | 	return -1
1700 | }
1701 | 
1702 | // Records return the string record representation of a DataFrame.
1703 | func (df DataFrame) Records() [][]string {
1704 | 	var records [][]string
1705 | 	records = append(records, df.Names())
1706 | 	if df.ncols == 0 || df.nrows == 0 {
1707 | 		return records
1708 | 	}
1709 | 	var tRecords [][]string
1710 | 	for _, col := range df.columns {
1711 | 		tRecords = append(tRecords, col.Records())
1712 | 	}
1713 | 	records = append(records, transposeRecords(tRecords)...)
1714 | 	return records
1715 | }
1716 | 
1717 | // Maps return the array of maps representation of a DataFrame.
1718 | func (df DataFrame) Maps() []map[string]interface{} {
1719 | 	maps := make([]map[string]interface{}, df.nrows)
1720 | 	colnames := df.Names()
1721 | 	for i := 0; i < df.nrows; i++ {
1722 | 		m := make(map[string]interface{})
1723 | 		for k, v := range colnames {
1724 | 			val := df.columns[k].Val(i)
1725 | 			m[v] = val
1726 | 		}
1727 | 		maps[i] = m
1728 | 	}
1729 | 	return maps
1730 | }
1731 | 
1732 | // Elem returns the element on row `r` and column `c`. Will panic if the index is
1733 | // out of bounds.
1734 | func (df DataFrame) Elem(r, c int) series.Element {
1735 | 	return df.columns[c].Elem(r)
1736 | }
1737 | 
1738 | // fixColnames assigns a name to the missing column names and makes it so that the
1739 | // column names are unique.
1740 | func fixColnames(colnames []string) {
1741 | 	// Find duplicated colnames
1742 | 	dupnamesidx := make(map[string][]int)
1743 | 	var missingnames []int
1744 | 	for i := 0; i < len(colnames); i++ {
1745 | 		a := colnames[i]
1746 | 		if a == "" {
1747 | 			missingnames = append(missingnames, i)
1748 | 			continue
1749 | 		}
1750 | 		for j := 0; j < len(colnames); j++ {
1751 | 			b := colnames[j]
1752 | 			if i != j && a == b {
1753 | 				temp := dupnamesidx[a]
1754 | 				if !inIntSlice(i, temp) {
1755 | 					dupnamesidx[a] = append(temp, i)
1756 | 				}
1757 | 			}
1758 | 		}
1759 | 	}
1760 | 
1761 | 	// Autofill missing column names
1762 | 	counter := 0
1763 | 	for _, i := range missingnames {
1764 | 		proposedName := fmt.Sprintf("X%d", counter)
1765 | 		for findInStringSlice(proposedName, colnames) != -1 {
1766 | 			counter++
1767 | 			proposedName = fmt.Sprintf("X%d", counter)
1768 | 		}
1769 | 		colnames[i] = proposedName
1770 | 		counter++
1771 | 	}
1772 | 
1773 | 	// Sort map keys to make sure it always follows the same order
1774 | 	var keys []string
1775 | 	for k := range dupnamesidx {
1776 | 		keys = append(keys, k)
1777 | 	}
1778 | 	sort.Strings(keys)
1779 | 
1780 | 	// Add a suffix to the duplicated colnames
1781 | 	for _, name := range keys {
1782 | 		idx := dupnamesidx[name]
1783 | 		if name == "" {
1784 | 			name = "X"
1785 | 		}
1786 | 		counter := 0
1787 | 		for _, i := range idx {
1788 | 			proposedName := fmt.Sprintf("%s_%d", name, counter)
1789 | 			for findInStringSlice(proposedName, colnames) != -1 {
1790 | 				counter++
1791 | 				proposedName = fmt.Sprintf("%s_%d", name, counter)
1792 | 			}
1793 | 			colnames[i] = proposedName
1794 | 			counter++
1795 | 		}
1796 | 	}
1797 | }
1798 | 
1799 | func findInStringSlice(str string, s []string) int {
1800 | 	for i, e := range s {
1801 | 		if e == str {
1802 | 			return i
1803 | 		}
1804 | 	}
1805 | 	return -1
1806 | }
1807 | 
1808 | func parseSelectIndexes(l int, indexes SelectIndexes, colnames []string) ([]int, error) {
1809 | 	var idx []int
1810 | 	switch indexes.(type) {
1811 | 	case []int:
1812 | 		idx = indexes.([]int)
1813 | 	case int:
1814 | 		idx = []int{indexes.(int)}
1815 | 	case []bool:
1816 | 		bools := indexes.([]bool)
1817 | 		if len(bools) != l {
1818 | 			return nil, fmt.Errorf("indexing error: index dimensions mismatch")
1819 | 		}
1820 | 		for i, b := range bools {
1821 | 			if b {
1822 | 				idx = append(idx, i)
1823 | 			}
1824 | 		}
1825 | 	case string:
1826 | 		s := indexes.(string)
1827 | 		i := findInStringSlice(s, colnames)
1828 | 		if i < 0 {
1829 | 			return nil, fmt.Errorf("can't select columns: column name %q not found", s)
1830 | 		}
1831 | 		idx = append(idx, i)
1832 | 	case []string:
1833 | 		xs := indexes.([]string)
1834 | 		for _, s := range xs {
1835 | 			i := findInStringSlice(s, colnames)
1836 | 			if i < 0 {
1837 | 				return nil, fmt.Errorf("can't select columns: column name %q not found", s)
1838 | 			}
1839 | 			idx = append(idx, i)
1840 | 		}
1841 | 	case series.Series:
1842 | 		s := indexes.(series.Series)
1843 | 		if err := s.Err; err != nil {
1844 | 			return nil, fmt.Errorf("indexing error: new values has errors: %v", err)
1845 | 		}
1846 | 		if s.HasNaN() {
1847 | 			return nil, fmt.Errorf("indexing error: indexes contain NaN")
1848 | 		}
1849 | 		switch s.Type() {
1850 | 		case series.Int:
1851 | 			return s.Int()
1852 | 		case series.Bool:
1853 | 			bools, err := s.Bool()
1854 | 			if err != nil {
1855 | 				return nil, fmt.Errorf("indexing error: %v", err)
1856 | 			}
1857 | 			return parseSelectIndexes(l, bools, colnames)
1858 | 		case series.String:
1859 | 			xs := indexes.(series.Series).Records()
1860 | 			return parseSelectIndexes(l, xs, colnames)
1861 | 		default:
1862 | 			return nil, fmt.Errorf("indexing error: unknown indexing mode")
1863 | 		}
1864 | 	default:
1865 | 		return nil, fmt.Errorf("indexing error: unknown indexing mode")
1866 | 	}
1867 | 	return idx, nil
1868 | }
1869 | 
1870 | func findType(arr []string) (series.Type, error) {
1871 | 	var hasFloats, hasInts, hasBools, hasStrings bool
1872 | 	for _, str := range arr {
1873 | 		if str == "" || str == "NaN" {
1874 | 			continue
1875 | 		}
1876 | 		if _, err := strconv.Atoi(str); err == nil {
1877 | 			hasInts = true
1878 | 			continue
1879 | 		}
1880 | 		if _, err := strconv.ParseFloat(str, 64); err == nil {
1881 | 			hasFloats = true
1882 | 			continue
1883 | 		}
1884 | 		if str == "true" || str == "false" {
1885 | 			hasBools = true
1886 | 			continue
1887 | 		}
1888 | 		hasStrings = true
1889 | 	}
1890 | 
1891 | 	switch {
1892 | 	case hasStrings:
1893 | 		return series.String, nil
1894 | 	case hasBools:
1895 | 		return series.Bool, nil
1896 | 	case hasFloats:
1897 | 		return series.Float, nil
1898 | 	case hasInts:
1899 | 		return series.Int, nil
1900 | 	default:
1901 | 		return series.String, fmt.Errorf("couldn't detect type")
1902 | 	}
1903 | }
1904 | 
1905 | func transposeRecords(x [][]string) [][]string {
1906 | 	n := len(x)
1907 | 	if n == 0 {
1908 | 		return x
1909 | 	}
1910 | 	m := len(x[0])
1911 | 	y := make([][]string, m)
1912 | 	for i := 0; i < m; i++ {
1913 | 		z := make([]string, n)
1914 | 		for j := 0; j < n; j++ {
1915 | 			z[j] = x[j][i]
1916 | 		}
1917 | 		y[i] = z
1918 | 	}
1919 | 	return y
1920 | }
1921 | 
1922 | func inIntSlice(i int, is []int) bool {
1923 | 	for _, v := range is {
1924 | 		if v == i {
1925 | 			return true
1926 | 		}
1927 | 	}
1928 | 	return false
1929 | }
1930 | 
1931 | // Matrix is an interface which is compatible with gonum's mat.Matrix interface
1932 | type Matrix interface {
1933 | 	Dims() (r, c int)
1934 | 	At(i, j int) float64
1935 | }
1936 | 
1937 | // Describe prints the summary statistics for each column of the dataframe
1938 | func (df DataFrame) Describe() DataFrame {
1939 | 	labels := series.Strings([]string{
1940 | 		"mean",
1941 | 		"median",
1942 | 		"stddev",
1943 | 		"min",
1944 | 		"25%",
1945 | 		"50%",
1946 | 		"75%",
1947 | 		"max",
1948 | 	})
1949 | 	labels.Name = "column"
1950 | 
1951 | 	ss := []series.Series{labels}
1952 | 
1953 | 	for _, col := range df.columns {
1954 | 		var newCol series.Series
1955 | 		switch col.Type() {
1956 | 		case series.String:
1957 | 			newCol = series.New([]string{
1958 | 				"-",
1959 | 				"-",
1960 | 				"-",
1961 | 				col.MinStr(),
1962 | 				"-",
1963 | 				"-",
1964 | 				"-",
1965 | 				col.MaxStr(),
1966 | 			},
1967 | 				col.Type(),
1968 | 				col.Name,
1969 | 			)
1970 | 		case series.Bool:
1971 | 			fallthrough
1972 | 		case series.Float:
1973 | 			fallthrough
1974 | 		case series.Int:
1975 | 			newCol = series.New([]float64{
1976 | 				col.Mean(),
1977 | 				col.Median(),
1978 | 				col.StdDev(),
1979 | 				col.Min(),
1980 | 				col.Quantile(0.25),
1981 | 				col.Quantile(0.50),
1982 | 				col.Quantile(0.75),
1983 | 				col.Max(),
1984 | 			},
1985 | 				series.Float,
1986 | 				col.Name,
1987 | 			)
1988 | 		}
1989 | 		ss = append(ss, newCol)
1990 | 	}
1991 | 
1992 | 	ddf := New(ss...)
1993 | 	return ddf
1994 | }
1995 | 


--------------------------------------------------------------------------------