├── series
├── type-string.go
├── type-int.go
├── type-float.go
├── type-bool.go
├── benchmarks_test.go
├── series.go
└── series_test.go
├── dataframe
├── benchmark_test.go
├── examples_test.go
└── dataframe.go
├── CHANGELOG.md
├── LICENSE.md
└── README.md
/series/type-string.go:
--------------------------------------------------------------------------------
1 | package series
2 |
3 | import (
4 | "fmt"
5 | "math"
6 | "strconv"
7 | "strings"
8 | )
9 |
10 | type stringElement struct {
11 | e string
12 | nan bool
13 | }
14 |
15 | func (e *stringElement) Set(value interface{}) {
16 | e.nan = false
17 | switch value.(type) {
18 | case string:
19 | e.e = string(value.(string))
20 | if e.e == "NaN" {
21 | e.nan = true
22 | return
23 | }
24 | case int:
25 | e.e = strconv.Itoa(value.(int))
26 | case float64:
27 | e.e = strconv.FormatFloat(value.(float64), 'f', 6, 64)
28 | case bool:
29 | b := value.(bool)
30 | if b {
31 | e.e = "true"
32 | } else {
33 | e.e = "false"
34 | }
35 | case Element:
36 | e.e = value.(Element).String()
37 | default:
38 | e.nan = true
39 | return
40 | }
41 | return
42 | }
43 |
44 | func (e stringElement) Copy() Element {
45 | if e.IsNA() {
46 | return &stringElement{"", true}
47 | }
48 | return &stringElement{e.e, false}
49 | }
50 |
51 | func (e stringElement) IsNA() bool {
52 | if e.nan {
53 | return true
54 | }
55 | return false
56 | }
57 |
58 | func (e stringElement) Type() Type {
59 | return String
60 | }
61 |
62 | func (e stringElement) Val() ElementValue {
63 | if e.IsNA() {
64 | return nil
65 | }
66 | return string(e.e)
67 | }
68 |
69 | func (e stringElement) String() string {
70 | if e.IsNA() {
71 | return "NaN"
72 | }
73 | return string(e.e)
74 | }
75 |
76 | func (e stringElement) Int() (int, error) {
77 | if e.IsNA() {
78 | return 0, fmt.Errorf("can't convert NaN to int")
79 | }
80 | return strconv.Atoi(e.e)
81 | }
82 |
83 | func (e stringElement) Float() float64 {
84 | if e.IsNA() {
85 | return math.NaN()
86 | }
87 | f, err := strconv.ParseFloat(e.e, 64)
88 | if err != nil {
89 | return math.NaN()
90 | }
91 | return f
92 | }
93 |
94 | func (e stringElement) Bool() (bool, error) {
95 | if e.IsNA() {
96 | return false, fmt.Errorf("can't convert NaN to bool")
97 | }
98 | switch strings.ToLower(e.e) {
99 | case "true", "t", "1":
100 | return true, nil
101 | case "false", "f", "0":
102 | return false, nil
103 | }
104 | return false, fmt.Errorf("can't convert String \"%v\" to bool", e.e)
105 | }
106 |
107 | func (e stringElement) Eq(elem Element) bool {
108 | if e.IsNA() || elem.IsNA() {
109 | return false
110 | }
111 | return e.e == elem.String()
112 | }
113 |
114 | func (e stringElement) Neq(elem Element) bool {
115 | if e.IsNA() || elem.IsNA() {
116 | return false
117 | }
118 | return e.e != elem.String()
119 | }
120 |
121 | func (e stringElement) Less(elem Element) bool {
122 | if e.IsNA() || elem.IsNA() {
123 | return false
124 | }
125 | return e.e < elem.String()
126 | }
127 |
128 | func (e stringElement) LessEq(elem Element) bool {
129 | if e.IsNA() || elem.IsNA() {
130 | return false
131 | }
132 | return e.e <= elem.String()
133 | }
134 |
135 | func (e stringElement) Greater(elem Element) bool {
136 | if e.IsNA() || elem.IsNA() {
137 | return false
138 | }
139 | return e.e > elem.String()
140 | }
141 |
142 | func (e stringElement) GreaterEq(elem Element) bool {
143 | if e.IsNA() || elem.IsNA() {
144 | return false
145 | }
146 | return e.e >= elem.String()
147 | }
148 |
--------------------------------------------------------------------------------
/series/type-int.go:
--------------------------------------------------------------------------------
1 | package series
2 |
3 | import (
4 | "fmt"
5 | "math"
6 | "strconv"
7 | )
8 |
9 | type intElement struct {
10 | e int
11 | nan bool
12 | }
13 |
14 | func (e *intElement) Set(value interface{}) {
15 | e.nan = false
16 | switch value.(type) {
17 | case string:
18 | if value.(string) == "NaN" {
19 | e.nan = true
20 | return
21 | }
22 | i, err := strconv.Atoi(value.(string))
23 | if err != nil {
24 | e.nan = true
25 | return
26 | }
27 | e.e = i
28 | case int:
29 | e.e = int(value.(int))
30 | case float64:
31 | f := value.(float64)
32 | if math.IsNaN(f) ||
33 | math.IsInf(f, 0) ||
34 | math.IsInf(f, 1) {
35 | e.nan = true
36 | return
37 | }
38 | e.e = int(f)
39 | case bool:
40 | b := value.(bool)
41 | if b {
42 | e.e = 1
43 | } else {
44 | e.e = 0
45 | }
46 | case Element:
47 | v, err := value.(Element).Int()
48 | if err != nil {
49 | e.nan = true
50 | return
51 | }
52 | e.e = v
53 | default:
54 | e.nan = true
55 | return
56 | }
57 | return
58 | }
59 |
60 | func (e intElement) Copy() Element {
61 | if e.IsNA() {
62 | return &intElement{0, true}
63 | }
64 | return &intElement{e.e, false}
65 | }
66 |
67 | func (e intElement) IsNA() bool {
68 | if e.nan {
69 | return true
70 | }
71 | return false
72 | }
73 |
74 | func (e intElement) Type() Type {
75 | return Int
76 | }
77 |
78 | func (e intElement) Val() ElementValue {
79 | if e.IsNA() {
80 | return nil
81 | }
82 | return int(e.e)
83 | }
84 |
85 | func (e intElement) String() string {
86 | if e.IsNA() {
87 | return "NaN"
88 | }
89 | return fmt.Sprint(e.e)
90 | }
91 |
92 | func (e intElement) Int() (int, error) {
93 | if e.IsNA() {
94 | return 0, fmt.Errorf("can't convert NaN to int")
95 | }
96 | return int(e.e), nil
97 | }
98 |
99 | func (e intElement) Float() float64 {
100 | if e.IsNA() {
101 | return math.NaN()
102 | }
103 | return float64(e.e)
104 | }
105 |
106 | func (e intElement) Bool() (bool, error) {
107 | if e.IsNA() {
108 | return false, fmt.Errorf("can't convert NaN to bool")
109 | }
110 | switch e.e {
111 | case 1:
112 | return true, nil
113 | case 0:
114 | return false, nil
115 | }
116 | return false, fmt.Errorf("can't convert Int \"%v\" to bool", e.e)
117 | }
118 |
119 | func (e intElement) Eq(elem Element) bool {
120 | i, err := elem.Int()
121 | if err != nil || e.IsNA() {
122 | return false
123 | }
124 | return e.e == i
125 | }
126 |
127 | func (e intElement) Neq(elem Element) bool {
128 | i, err := elem.Int()
129 | if err != nil || e.IsNA() {
130 | return false
131 | }
132 | return e.e != i
133 | }
134 |
135 | func (e intElement) Less(elem Element) bool {
136 | i, err := elem.Int()
137 | if err != nil || e.IsNA() {
138 | return false
139 | }
140 | return e.e < i
141 | }
142 |
143 | func (e intElement) LessEq(elem Element) bool {
144 | i, err := elem.Int()
145 | if err != nil || e.IsNA() {
146 | return false
147 | }
148 | return e.e <= i
149 | }
150 |
151 | func (e intElement) Greater(elem Element) bool {
152 | i, err := elem.Int()
153 | if err != nil || e.IsNA() {
154 | return false
155 | }
156 | return e.e > i
157 | }
158 |
159 | func (e intElement) GreaterEq(elem Element) bool {
160 | i, err := elem.Int()
161 | if err != nil || e.IsNA() {
162 | return false
163 | }
164 | return e.e >= i
165 | }
166 |
--------------------------------------------------------------------------------
/series/type-float.go:
--------------------------------------------------------------------------------
1 | package series
2 |
3 | import (
4 | "fmt"
5 | "math"
6 | "strconv"
7 | )
8 |
9 | type floatElement struct {
10 | e float64
11 | nan bool
12 | }
13 |
14 | func (e *floatElement) Set(value interface{}) {
15 | e.nan = false
16 | switch value.(type) {
17 | case string:
18 | if value.(string) == "NaN" {
19 | e.nan = true
20 | return
21 | }
22 | f, err := strconv.ParseFloat(value.(string), 64)
23 | if err != nil {
24 | e.nan = true
25 | return
26 | }
27 | e.e = f
28 | case int:
29 | e.e = float64(value.(int))
30 | case float64:
31 | e.e = float64(value.(float64))
32 | case bool:
33 | b := value.(bool)
34 | if b {
35 | e.e = 1
36 | } else {
37 | e.e = 0
38 | }
39 | case Element:
40 | e.e = value.(Element).Float()
41 | default:
42 | e.nan = true
43 | return
44 | }
45 | return
46 | }
47 |
48 | func (e floatElement) Copy() Element {
49 | if e.IsNA() {
50 | return &floatElement{0.0, true}
51 | }
52 | return &floatElement{e.e, false}
53 | }
54 |
55 | func (e floatElement) IsNA() bool {
56 | if e.nan || math.IsNaN(e.e) {
57 | return true
58 | }
59 | return false
60 | }
61 |
62 | func (e floatElement) Type() Type {
63 | return Float
64 | }
65 |
66 | func (e floatElement) Val() ElementValue {
67 | if e.IsNA() {
68 | return nil
69 | }
70 | return float64(e.e)
71 | }
72 |
73 | func (e floatElement) String() string {
74 | if e.IsNA() {
75 | return "NaN"
76 | }
77 | return fmt.Sprintf("%f", e.e)
78 | }
79 |
80 | func (e floatElement) Int() (int, error) {
81 | if e.IsNA() {
82 | return 0, fmt.Errorf("can't convert NaN to int")
83 | }
84 | f := e.e
85 | if math.IsInf(f, 1) || math.IsInf(f, -1) {
86 | return 0, fmt.Errorf("can't convert Inf to int")
87 | }
88 | if math.IsNaN(f) {
89 | return 0, fmt.Errorf("can't convert NaN to int")
90 | }
91 | return int(f), nil
92 | }
93 |
94 | func (e floatElement) Float() float64 {
95 | if e.IsNA() {
96 | return math.NaN()
97 | }
98 | return float64(e.e)
99 | }
100 |
101 | func (e floatElement) Bool() (bool, error) {
102 | if e.IsNA() {
103 | return false, fmt.Errorf("can't convert NaN to bool")
104 | }
105 | switch e.e {
106 | case 1:
107 | return true, nil
108 | case 0:
109 | return false, nil
110 | }
111 | return false, fmt.Errorf("can't convert Float \"%v\" to bool", e.e)
112 | }
113 |
114 | func (e floatElement) Eq(elem Element) bool {
115 | f := elem.Float()
116 | if e.IsNA() || math.IsNaN(f) {
117 | return false
118 | }
119 | return e.e == f
120 | }
121 |
122 | func (e floatElement) Neq(elem Element) bool {
123 | f := elem.Float()
124 | if e.IsNA() || math.IsNaN(f) {
125 | return false
126 | }
127 | return e.e != f
128 | }
129 |
130 | func (e floatElement) Less(elem Element) bool {
131 | f := elem.Float()
132 | if e.IsNA() || math.IsNaN(f) {
133 | return false
134 | }
135 | return e.e < f
136 | }
137 |
138 | func (e floatElement) LessEq(elem Element) bool {
139 | f := elem.Float()
140 | if e.IsNA() || math.IsNaN(f) {
141 | return false
142 | }
143 | return e.e <= f
144 | }
145 |
146 | func (e floatElement) Greater(elem Element) bool {
147 | f := elem.Float()
148 | if e.IsNA() || math.IsNaN(f) {
149 | return false
150 | }
151 | return e.e > f
152 | }
153 |
154 | func (e floatElement) GreaterEq(elem Element) bool {
155 | f := elem.Float()
156 | if e.IsNA() || math.IsNaN(f) {
157 | return false
158 | }
159 | return e.e >= f
160 | }
161 |
--------------------------------------------------------------------------------
/series/type-bool.go:
--------------------------------------------------------------------------------
1 | package series
2 |
3 | import (
4 | "fmt"
5 | "math"
6 | "strings"
7 | )
8 |
9 | type boolElement struct {
10 | e bool
11 | nan bool
12 | }
13 |
14 | func (e *boolElement) Set(value interface{}) {
15 | e.nan = false
16 | switch value.(type) {
17 | case string:
18 | if value.(string) == "NaN" {
19 | e.nan = true
20 | return
21 | }
22 | switch strings.ToLower(value.(string)) {
23 | case "true", "t", "1":
24 | e.e = true
25 | case "false", "f", "0":
26 | e.e = false
27 | default:
28 | e.nan = true
29 | return
30 | }
31 | case int:
32 | switch value.(int) {
33 | case 1:
34 | e.e = true
35 | case 0:
36 | e.e = false
37 | default:
38 | e.nan = true
39 | return
40 | }
41 | case float64:
42 | switch value.(float64) {
43 | case 1:
44 | e.e = true
45 | case 0:
46 | e.e = false
47 | default:
48 | e.nan = true
49 | return
50 | }
51 | case bool:
52 | e.e = value.(bool)
53 | case Element:
54 | b, err := value.(Element).Bool()
55 | if err != nil {
56 | e.nan = true
57 | return
58 | }
59 | e.e = b
60 | default:
61 | e.nan = true
62 | return
63 | }
64 | return
65 | }
66 |
67 | func (e boolElement) Copy() Element {
68 | if e.IsNA() {
69 | return &boolElement{false, true}
70 | }
71 | return &boolElement{e.e, false}
72 | }
73 |
74 | func (e boolElement) IsNA() bool {
75 | if e.nan {
76 | return true
77 | }
78 | return false
79 | }
80 |
81 | func (e boolElement) Type() Type {
82 | return Bool
83 | }
84 |
85 | func (e boolElement) Val() ElementValue {
86 | if e.IsNA() {
87 | return nil
88 | }
89 | return bool(e.e)
90 | }
91 |
92 | func (e boolElement) String() string {
93 | if e.IsNA() {
94 | return "NaN"
95 | }
96 | if e.e {
97 | return "true"
98 | }
99 | return "false"
100 | }
101 |
102 | func (e boolElement) Int() (int, error) {
103 | if e.IsNA() {
104 | return 0, fmt.Errorf("can't convert NaN to int")
105 | }
106 | if e.e == true {
107 | return 1, nil
108 | }
109 | return 0, nil
110 | }
111 |
112 | func (e boolElement) Float() float64 {
113 | if e.IsNA() {
114 | return math.NaN()
115 | }
116 | if e.e {
117 | return 1.0
118 | }
119 | return 0.0
120 | }
121 |
122 | func (e boolElement) Bool() (bool, error) {
123 | if e.IsNA() {
124 | return false, fmt.Errorf("can't convert NaN to bool")
125 | }
126 | return bool(e.e), nil
127 | }
128 |
129 | func (e boolElement) Eq(elem Element) bool {
130 | b, err := elem.Bool()
131 | if err != nil || e.IsNA() {
132 | return false
133 | }
134 | return e.e == b
135 | }
136 |
137 | func (e boolElement) Neq(elem Element) bool {
138 | b, err := elem.Bool()
139 | if err != nil || e.IsNA() {
140 | return false
141 | }
142 | return e.e != b
143 | }
144 |
145 | func (e boolElement) Less(elem Element) bool {
146 | b, err := elem.Bool()
147 | if err != nil || e.IsNA() {
148 | return false
149 | }
150 | return !e.e && b
151 | }
152 |
153 | func (e boolElement) LessEq(elem Element) bool {
154 | b, err := elem.Bool()
155 | if err != nil || e.IsNA() {
156 | return false
157 | }
158 | return !e.e || b
159 | }
160 |
161 | func (e boolElement) Greater(elem Element) bool {
162 | b, err := elem.Bool()
163 | if err != nil || e.IsNA() {
164 | return false
165 | }
166 | return e.e && !b
167 | }
168 |
169 | func (e boolElement) GreaterEq(elem Element) bool {
170 | b, err := elem.Bool()
171 | if err != nil || e.IsNA() {
172 | return false
173 | }
174 | return e.e || !b
175 | }
176 |
--------------------------------------------------------------------------------
/series/benchmarks_test.go:
--------------------------------------------------------------------------------
1 | package series_test
2 |
3 | import (
4 | "math/rand"
5 | "strconv"
6 | "testing"
7 |
8 | "github.com/libonomy/libonomy-gota/series"
9 | )
10 |
11 | func generateInts(n int) (data []int) {
12 | for i := 0; i < n; i++ {
13 | data = append(data, rand.Int())
14 | }
15 | return
16 | }
17 |
18 | func generateFloats(n int) (data []float64) {
19 | for i := 0; i < n; i++ {
20 | data = append(data, rand.Float64())
21 | }
22 | return
23 | }
24 |
25 | func generateStrings(n int) (data []string) {
26 | for i := 0; i < n; i++ {
27 | data = append(data, strconv.Itoa(rand.Int()))
28 | }
29 | return
30 | }
31 |
32 | func generateBools(n int) (data []bool) {
33 | for i := 0; i < n; i++ {
34 | r := rand.Intn(2)
35 | b := false
36 | if r == 1 {
37 | b = true
38 | }
39 | data = append(data, b)
40 | }
41 | return
42 | }
43 |
44 | func generateIntsN(n, k int) (data []int) {
45 | for i := 0; i < n; i++ {
46 | data = append(data, rand.Intn(k))
47 | }
48 | return
49 | }
50 |
51 | func BenchmarkSeries_New(b *testing.B) {
52 | rand.Seed(100)
53 | table := []struct {
54 | name string
55 | data interface{}
56 | seriesType series.Type
57 | }{
58 | {
59 | "[]bool(100000)_Int",
60 | generateBools(100000),
61 | series.Int,
62 | },
63 | {
64 | "[]bool(100000)_String",
65 | generateBools(100000),
66 | series.String,
67 | },
68 | {
69 | "[]bool(100000)_Bool",
70 | generateBools(100000),
71 | series.Bool,
72 | },
73 | {
74 | "[]bool(100000)_Float",
75 | generateBools(100000),
76 | series.Float,
77 | },
78 | {
79 | "[]string(100000)_Int",
80 | generateStrings(100000),
81 | series.Int,
82 | },
83 | {
84 | "[]string(100000)_String",
85 | generateStrings(100000),
86 | series.String,
87 | },
88 | {
89 | "[]string(100000)_Bool",
90 | generateStrings(100000),
91 | series.Bool,
92 | },
93 | {
94 | "[]string(100000)_Float",
95 | generateStrings(100000),
96 | series.Float,
97 | },
98 | {
99 | "[]float64(100000)_Int",
100 | generateFloats(100000),
101 | series.Int,
102 | },
103 | {
104 | "[]float64(100000)_String",
105 | generateFloats(100000),
106 | series.String,
107 | },
108 | {
109 | "[]float64(100000)_Bool",
110 | generateFloats(100000),
111 | series.Bool,
112 | },
113 | {
114 | "[]float64(100000)_Float",
115 | generateFloats(100000),
116 | series.Float,
117 | },
118 | {
119 | "[]int(100000)_Int",
120 | generateInts(100000),
121 | series.Int,
122 | },
123 | {
124 | "[]int(100000)_String",
125 | generateInts(100000),
126 | series.String,
127 | },
128 | {
129 | "[]int(100000)_Bool",
130 | generateInts(100000),
131 | series.Bool,
132 | },
133 | {
134 | "[]int(100000)_Float",
135 | generateInts(100000),
136 | series.Float,
137 | },
138 | }
139 | for _, test := range table {
140 | b.Run(test.name, func(b *testing.B) {
141 | for i := 0; i < b.N; i++ {
142 | series.New(test.data, test.seriesType, test.name)
143 | }
144 | })
145 | }
146 | }
147 |
148 | func BenchmarkSeries_Copy(b *testing.B) {
149 | rand.Seed(100)
150 | table := []struct {
151 | name string
152 | series series.Series
153 | }{
154 | {
155 | "[]int(100000)_Int",
156 | series.Ints(generateInts(100000)),
157 | },
158 | {
159 | "[]int(100000)_String",
160 | series.Strings(generateInts(100000)),
161 | },
162 | {
163 | "[]int(100000)_Bool",
164 | series.Bools(generateInts(100000)),
165 | },
166 | {
167 | "[]int(100000)_Float",
168 | series.Floats(generateInts(100000)),
169 | },
170 | }
171 | for _, test := range table {
172 | b.Run(test.name, func(b *testing.B) {
173 | for i := 0; i < b.N; i++ {
174 | test.series.Copy()
175 | }
176 | })
177 | }
178 | }
179 |
180 | func BenchmarkSeries_Subset(b *testing.B) {
181 | rand.Seed(100)
182 | table := []struct {
183 | name string
184 | indexes interface{}
185 | series series.Series
186 | }{
187 | {
188 | "[]int(100000)_Int",
189 | generateIntsN(10000, 2),
190 | series.Ints(generateInts(100000)),
191 | },
192 | {
193 | "[]int(100000)_String",
194 | generateIntsN(10000, 2),
195 | series.Strings(generateInts(100000)),
196 | },
197 | {
198 | "[]int(100000)_Bool",
199 | generateIntsN(10000, 2),
200 | series.Bools(generateInts(100000)),
201 | },
202 | {
203 | "[]int(100000)_Float",
204 | generateIntsN(10000, 2),
205 | series.Floats(generateInts(100000)),
206 | },
207 | }
208 | for _, test := range table {
209 | b.Run(test.name, func(b *testing.B) {
210 | for i := 0; i < b.N; i++ {
211 | test.series.Subset(test.indexes)
212 | }
213 | })
214 | }
215 | }
216 |
217 | func BenchmarkSeries_Set(b *testing.B) {
218 | rand.Seed(100)
219 | table := []struct {
220 | name string
221 | indexes interface{}
222 | newValues series.Series
223 | series series.Series
224 | }{
225 | {
226 | "[]int(100000)_Int",
227 | generateIntsN(10000, 2),
228 | series.Ints(generateIntsN(10000, 2)),
229 | series.Ints(generateInts(100000)),
230 | },
231 | {
232 | "[]int(100000)_String",
233 | generateIntsN(10000, 2),
234 | series.Strings(generateIntsN(10000, 2)),
235 | series.Strings(generateInts(100000)),
236 | },
237 | {
238 | "[]int(100000)_Bool",
239 | generateIntsN(10000, 2),
240 | series.Bools(generateIntsN(10000, 2)),
241 | series.Bools(generateInts(100000)),
242 | },
243 | {
244 | "[]int(100000)_Float",
245 | generateIntsN(10000, 2),
246 | series.Floats(generateIntsN(10000, 2)),
247 | series.Floats(generateInts(100000)),
248 | },
249 | }
250 | for _, test := range table {
251 | s := test.series.Copy()
252 | b.Run(test.name, func(b *testing.B) {
253 | for i := 0; i < b.N; i++ {
254 | s.Set(test.indexes, test.newValues)
255 | }
256 | })
257 | }
258 | }
259 |
--------------------------------------------------------------------------------
/dataframe/benchmark_test.go:
--------------------------------------------------------------------------------
1 | package dataframe_test
2 |
3 | import (
4 | "math/rand"
5 | "strconv"
6 | "testing"
7 |
8 | "github.com/libonomy/libonomy-gota/dataframe"
9 | "github.com/libonomy/libonomy-gota/series"
10 | )
11 |
12 | func generateSeries(n, rep int) (data []series.Series) {
13 | rand.Seed(100)
14 | for j := 0; j < rep; j++ {
15 | var is []int
16 | var bs []bool
17 | var fs []float64
18 | var ss []string
19 | for i := 0; i < n; i++ {
20 | is = append(is, rand.Int())
21 | }
22 | for i := 0; i < n; i++ {
23 | fs = append(fs, rand.Float64())
24 | }
25 | for i := 0; i < n; i++ {
26 | ss = append(ss, strconv.Itoa(rand.Int()))
27 | }
28 | for i := 0; i < n; i++ {
29 | r := rand.Intn(2)
30 | b := false
31 | if r == 1 {
32 | b = true
33 | }
34 | bs = append(bs, b)
35 | }
36 | data = append(data, series.Ints(is))
37 | data = append(data, series.Bools(bs))
38 | data = append(data, series.Floats(fs))
39 | data = append(data, series.Strings(ss))
40 | }
41 | return
42 | }
43 |
44 | func generateIntsN(n, k int) (data []int) {
45 | for i := 0; i < n; i++ {
46 | data = append(data, rand.Intn(k))
47 | }
48 | return
49 | }
50 |
51 | func BenchmarkNew(b *testing.B) {
52 | table := []struct {
53 | name string
54 | data []series.Series
55 | }{
56 | {
57 | "100000x4",
58 | generateSeries(100000, 1),
59 | },
60 | {
61 | "100000x40",
62 | generateSeries(100000, 10),
63 | },
64 | {
65 | "100000x400",
66 | generateSeries(100000, 100),
67 | },
68 | {
69 | "1000x40",
70 | generateSeries(1000, 10),
71 | },
72 | {
73 | "1000x4000",
74 | generateSeries(1000, 1000),
75 | },
76 | {
77 | "1000x40000",
78 | generateSeries(1000, 10000),
79 | },
80 | }
81 | for _, test := range table {
82 | b.Run(test.name, func(b *testing.B) {
83 | for i := 0; i < b.N; i++ {
84 | dataframe.New(test.data...)
85 | }
86 | })
87 | }
88 | }
89 |
90 | func BenchmarkDataFrame_Arrange(b *testing.B) {
91 | data := dataframe.New(generateSeries(100000, 5)...)
92 | table := []struct {
93 | name string
94 | data dataframe.DataFrame
95 | key []dataframe.Order
96 | }{
97 | {
98 | "100000x20_1",
99 | data,
100 | []dataframe.Order{dataframe.Sort("X0")},
101 | },
102 | {
103 | "100000x20_2",
104 | data,
105 | []dataframe.Order{
106 | dataframe.Sort("X0"),
107 | dataframe.Sort("X1"),
108 | },
109 | },
110 | {
111 | "100000x20_3",
112 | data,
113 | []dataframe.Order{
114 | dataframe.Sort("X0"),
115 | dataframe.Sort("X1"),
116 | dataframe.Sort("X2"),
117 | },
118 | },
119 | }
120 | for _, test := range table {
121 | b.Run(test.name, func(b *testing.B) {
122 | for i := 0; i < b.N; i++ {
123 | test.data.Arrange(test.key...)
124 | }
125 | })
126 | }
127 | }
128 |
129 | func BenchmarkDataFrame_Subset(b *testing.B) {
130 | b.ReportAllocs()
131 | data1000x20 := dataframe.New(generateSeries(1000, 5)...)
132 | data1000x200 := dataframe.New(generateSeries(1000, 50)...)
133 | data1000x2000 := dataframe.New(generateSeries(1000, 500)...)
134 | data100000x20 := dataframe.New(generateSeries(100000, 5)...)
135 | data1000000x20 := dataframe.New(generateSeries(1000000, 5)...)
136 | idx10 := generateIntsN(10, 10)
137 | idx100 := generateIntsN(100, 100)
138 | idx1000 := generateIntsN(1000, 1000)
139 | idx10000 := generateIntsN(10000, 10000)
140 | idx100000 := generateIntsN(100000, 100000)
141 | idx1000000 := generateIntsN(1000000, 1000000)
142 | table := []struct {
143 | name string
144 | data dataframe.DataFrame
145 | indexes interface{}
146 | }{
147 | {
148 | "1000000x20_100",
149 | data1000000x20,
150 | idx100,
151 | },
152 | {
153 | "1000000x20_1000",
154 | data1000000x20,
155 | idx1000,
156 | },
157 | {
158 | "1000000x20_10000",
159 | data1000000x20,
160 | idx10000,
161 | },
162 | {
163 | "1000000x20_100000",
164 | data1000000x20,
165 | idx100000,
166 | },
167 | {
168 | "1000000x20_1000000",
169 | data1000000x20,
170 | idx1000000,
171 | },
172 | {
173 | "100000x20_100",
174 | data100000x20,
175 | idx100,
176 | },
177 | {
178 | "100000x20_1000",
179 | data100000x20,
180 | idx1000,
181 | },
182 | {
183 | "100000x20_10000",
184 | data100000x20,
185 | idx10000,
186 | },
187 | {
188 | "100000x20_100000",
189 | data100000x20,
190 | idx100000,
191 | },
192 | {
193 | "1000x20_10",
194 | data1000x20,
195 | idx10,
196 | },
197 | {
198 | "1000x20_100",
199 | data1000x20,
200 | idx100,
201 | },
202 | {
203 | "1000x20_1000",
204 | data1000x20,
205 | idx1000,
206 | },
207 | {
208 | "1000x200_10",
209 | data1000x200,
210 | idx10,
211 | },
212 | {
213 | "1000x200_100",
214 | data1000x200,
215 | idx100,
216 | },
217 | {
218 | "1000x200_1000",
219 | data1000x200,
220 | idx1000,
221 | },
222 | {
223 | "1000x2000_10",
224 | data1000x2000,
225 | idx10,
226 | },
227 | {
228 | "1000x2000_100",
229 | data1000x2000,
230 | idx100,
231 | },
232 | {
233 | "1000x2000_1000",
234 | data1000x2000,
235 | idx1000,
236 | },
237 | }
238 | for _, test := range table {
239 | b.Run(test.name, func(b *testing.B) {
240 | for i := 0; i < b.N; i++ {
241 | test.data.Subset(test.indexes)
242 | }
243 | })
244 | }
245 | }
246 |
247 | func BenchmarkDataFrame_Elem(b *testing.B) {
248 | data := dataframe.New(generateSeries(100000, 5)...)
249 | table := []struct {
250 | name string
251 | data dataframe.DataFrame
252 | }{
253 | {
254 | "100000x20_ALL",
255 | data,
256 | },
257 | }
258 | for _, test := range table {
259 | b.Run(test.name, func(b *testing.B) {
260 | for i := 0; i < b.N; i++ {
261 | for k := 0; k < 100000; k++ {
262 | test.data.Elem(k, 0)
263 | }
264 | }
265 | })
266 | }
267 | }
268 |
--------------------------------------------------------------------------------
/dataframe/examples_test.go:
--------------------------------------------------------------------------------
1 | package dataframe_test
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 |
7 | "github.com/libonomy/libonomy-gota/dataframe"
8 | "github.com/libonomy/libonomy-gota/series"
9 | )
10 |
11 | func ExampleNew() {
12 | df := dataframe.New(
13 | series.New([]string{"b", "a"}, series.String, "COL.1"),
14 | series.New([]int{1, 2}, series.Int, "COL.2"),
15 | series.New([]float64{3.0, 4.0}, series.Float, "COL.3"),
16 | )
17 | fmt.Println(df)
18 | }
19 |
20 | func ExampleLoadStructs() {
21 | type User struct {
22 | Name string
23 | Age int
24 | Accuracy float64
25 | }
26 | users := []User{
27 | User{"Aram", 17, 0.2},
28 | User{"Juan", 18, 0.8},
29 | User{"Ana", 22, 0.5},
30 | }
31 | df := dataframe.LoadStructs(users)
32 | fmt.Println(df)
33 | }
34 |
35 | func ExampleLoadRecords() {
36 | df := dataframe.LoadRecords(
37 | [][]string{
38 | []string{"A", "B", "C", "D"},
39 | []string{"a", "4", "5.1", "true"},
40 | []string{"k", "5", "7.0", "true"},
41 | []string{"k", "4", "6.0", "true"},
42 | []string{"a", "2", "7.1", "false"},
43 | },
44 | )
45 | fmt.Println(df)
46 | }
47 |
48 | func ExampleLoadRecords_options() {
49 | df := dataframe.LoadRecords(
50 | [][]string{
51 | []string{"A", "B", "C", "D"},
52 | []string{"a", "4", "5.1", "true"},
53 | []string{"k", "5", "7.0", "true"},
54 | []string{"k", "4", "6.0", "true"},
55 | []string{"a", "2", "7.1", "false"},
56 | },
57 | dataframe.DetectTypes(false),
58 | dataframe.DefaultType(series.Float),
59 | dataframe.WithTypes(map[string]series.Type{
60 | "A": series.String,
61 | "D": series.Bool,
62 | }),
63 | )
64 | fmt.Println(df)
65 | }
66 |
67 | func ExampleLoadMaps() {
68 | df := dataframe.LoadMaps(
69 | []map[string]interface{}{
70 | map[string]interface{}{
71 | "A": "a",
72 | "B": 1,
73 | "C": true,
74 | "D": 0,
75 | },
76 | map[string]interface{}{
77 | "A": "b",
78 | "B": 2,
79 | "C": true,
80 | "D": 0.5,
81 | },
82 | },
83 | )
84 | fmt.Println(df)
85 | }
86 |
87 | func ExampleReadCSV() {
88 | csvStr := `
89 | Country,Date,Age,Amount,Id
90 | "United States",2012-02-01,50,112.1,01234
91 | "United States",2012-02-01,32,321.31,54320
92 | "United Kingdom",2012-02-01,17,18.2,12345
93 | "United States",2012-02-01,32,321.31,54320
94 | "United Kingdom",2012-02-01,NA,18.2,12345
95 | "United States",2012-02-01,32,321.31,54320
96 | "United States",2012-02-01,32,321.31,54320
97 | Spain,2012-02-01,66,555.42,00241
98 | `
99 | df := dataframe.ReadCSV(strings.NewReader(csvStr))
100 | fmt.Println(df)
101 | }
102 |
103 | func ExampleReadJSON() {
104 | jsonStr := `[{"COL.2":1,"COL.3":3},{"COL.1":5,"COL.2":2,"COL.3":2},{"COL.1":6,"COL.2":3,"COL.3":1}]`
105 | df := dataframe.ReadJSON(strings.NewReader(jsonStr))
106 | fmt.Println(df)
107 | }
108 |
109 | func ExampleDataFrame_Subset() {
110 | df := dataframe.LoadRecords(
111 | [][]string{
112 | []string{"A", "B", "C", "D"},
113 | []string{"a", "4", "5.1", "true"},
114 | []string{"k", "5", "7.0", "true"},
115 | []string{"k", "4", "6.0", "true"},
116 | []string{"a", "2", "7.1", "false"},
117 | },
118 | )
119 | sub := df.Subset([]int{0, 2})
120 | fmt.Println(sub)
121 | }
122 |
123 | func ExampleDataFrame_Select() {
124 | df := dataframe.LoadRecords(
125 | [][]string{
126 | []string{"A", "B", "C", "D"},
127 | []string{"a", "4", "5.1", "true"},
128 | []string{"k", "5", "7.0", "true"},
129 | []string{"k", "4", "6.0", "true"},
130 | []string{"a", "2", "7.1", "false"},
131 | },
132 | )
133 | sel1 := df.Select([]int{0, 2})
134 | sel2 := df.Select([]string{"A", "C"})
135 | fmt.Println(sel1)
136 | fmt.Println(sel2)
137 | }
138 |
139 | func ExampleDataFrame_Filter() {
140 | df := dataframe.LoadRecords(
141 | [][]string{
142 | []string{"A", "B", "C", "D"},
143 | []string{"a", "4", "5.1", "true"},
144 | []string{"k", "5", "7.0", "true"},
145 | []string{"k", "4", "6.0", "true"},
146 | []string{"a", "2", "7.1", "false"},
147 | },
148 | )
149 | fil := df.Filter(
150 | dataframe.F{
151 | Colname: "A",
152 | Comparator: series.Eq,
153 | Comparando: "a",
154 | },
155 | dataframe.F{
156 | Colname: "B",
157 | Comparator: series.Greater,
158 | Comparando: 4,
159 | },
160 | )
161 | fil2 := fil.Filter(
162 | dataframe.F{
163 | Colname: "D",
164 | Comparator: series.Eq,
165 | Comparando: true,
166 | },
167 | )
168 | fmt.Println(fil)
169 | fmt.Println(fil2)
170 | }
171 |
172 | func ExampleDataFrame_Mutate() {
173 | df := dataframe.LoadRecords(
174 | [][]string{
175 | []string{"A", "B", "C", "D"},
176 | []string{"a", "4", "5.1", "true"},
177 | []string{"k", "5", "7.0", "true"},
178 | []string{"k", "4", "6.0", "true"},
179 | []string{"a", "2", "7.1", "false"},
180 | },
181 | )
182 | // Change column C with a new one
183 | mut := df.Mutate(
184 | series.New([]string{"a", "b", "c", "d"}, series.String, "C"),
185 | )
186 | // Add a new column E
187 | mut2 := df.Mutate(
188 | series.New([]string{"a", "b", "c", "d"}, series.String, "E"),
189 | )
190 | fmt.Println(mut)
191 | fmt.Println(mut2)
192 | }
193 |
194 | func ExampleDataFrame_InnerJoin() {
195 | df := dataframe.LoadRecords(
196 | [][]string{
197 | []string{"A", "B", "C", "D"},
198 | []string{"a", "4", "5.1", "true"},
199 | []string{"k", "5", "7.0", "true"},
200 | []string{"k", "4", "6.0", "true"},
201 | []string{"a", "2", "7.1", "false"},
202 | },
203 | )
204 | df2 := dataframe.LoadRecords(
205 | [][]string{
206 | []string{"A", "F", "D"},
207 | []string{"1", "1", "true"},
208 | []string{"4", "2", "false"},
209 | []string{"2", "8", "false"},
210 | []string{"5", "9", "false"},
211 | },
212 | )
213 | join := df.InnerJoin(df2, "D")
214 | fmt.Println(join)
215 | }
216 |
217 | func ExampleDataFrame_Set() {
218 | df := dataframe.LoadRecords(
219 | [][]string{
220 | []string{"A", "B", "C", "D"},
221 | []string{"a", "4", "5.1", "true"},
222 | []string{"k", "5", "7.0", "true"},
223 | []string{"k", "4", "6.0", "true"},
224 | []string{"a", "2", "7.1", "false"},
225 | },
226 | )
227 | df2 := df.Set(
228 | series.Ints([]int{0, 2}),
229 | dataframe.LoadRecords(
230 | [][]string{
231 | []string{"A", "B", "C", "D"},
232 | []string{"b", "4", "6.0", "true"},
233 | []string{"c", "3", "6.0", "false"},
234 | },
235 | ),
236 | )
237 | fmt.Println(df2)
238 | }
239 |
240 | func ExampleDataFrame_Arrange() {
241 | df := dataframe.LoadRecords(
242 | [][]string{
243 | []string{"A", "B", "C", "D"},
244 | []string{"a", "4", "5.1", "true"},
245 | []string{"b", "4", "6.0", "true"},
246 | []string{"c", "3", "6.0", "false"},
247 | []string{"a", "2", "7.1", "false"},
248 | },
249 | )
250 | sorted := df.Arrange(
251 | dataframe.Sort("A"),
252 | dataframe.RevSort("B"),
253 | )
254 | fmt.Println(sorted)
255 | }
256 |
257 | func ExampleDataFrame_Describe() {
258 | df := dataframe.LoadRecords(
259 | [][]string{
260 | []string{"A", "B", "C", "D"},
261 | []string{"a", "4", "5.1", "true"},
262 | []string{"b", "4", "6.0", "true"},
263 | []string{"c", "3", "6.0", "false"},
264 | []string{"a", "2", "7.1", "false"},
265 | },
266 | )
267 | fmt.Println(df.Describe())
268 | }
269 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 |
3 | All notable changes to this project will be documented in this file.
4 | This project adheres to [Semantic Versioning](http://semver.org/).
5 |
6 | ## [0.10.1] - 2019-11-08
7 |
8 | ### Fixed
9 |
10 | - LoadRecords printing type debug information
11 | - Missing closing brackets in series.go
12 | - Fix gonum import path in dataframe_test
13 |
14 | ## [0.10.0] - 2019-11-08
15 |
16 | ### Changed
17 |
18 | - Merged dev branch changes from multiple collaborators (Sam Zaydel, Kyle
19 | Ellrott, Daniela Petruzalek, Christoph Laaber).
20 |
21 | ## [0.9.0] - 2016-10-03
22 |
23 | ### Added
24 |
25 | - Additional method to load arbitrary struct slices to DataFrames (Juan Álvarez)
26 | - New LoadOption Names to set initial column names (Sander van Harmelen).
27 | - Parser option for csv delimiter (Kyle Ellrott)
28 | - New Describe method for reporting summary statistics (Daniela Petruzalek)
29 |
30 | ### Changed
31 |
32 | - Improve the performance of multiple operations.
33 | - Code cleanup for better consistency (Sander van Harmelen)
34 | - Renamed 'Deselect' function to 'Drop' (Ben Marshall)
35 |
36 | ## [0.8.0] - 2016-12-12
37 |
38 | ### Added
39 |
40 | - Series.Order method and tests.
41 | - Series.IsNaN method and tests.
42 | - DataFrame.Arrange method and tests.
43 | - DataFrame.Capply method and tests.
44 | - DataFrame.Rapply method and tests.
45 | - Benchmarks for several operations on both the `series` and
46 | `dataframe` packages.
47 | - Many optimizations that increase the performance dramatically.
48 | - New LoadOption where the elements to be parsed as NaN from string
49 | can be selected.
50 | - Gota can now return an implementation of `gonum/mat64.Matrix`
51 | interface via `DataFrame.Matrix()` and load a `mat64.Matrix` via
52 | `dataframe.LoadMatrix()`.
53 |
54 | ### Changed
55 |
56 | - elementInterface is now exported as Element.
57 | - Split element.go into separate files for the implementations of the
58 | Element interface.
59 | - LoadOptions API has been renamed for better documentation via `godoc`.
60 | - `Series.Set` and `DataFrame.Set` now modify the structure in place
61 | for performance considerations. If one wants to use the old
62 | behaviour, it is suggested to use `DataFrame.Copy().Set(...)`
63 | instead of `DataFrame.Set(...)`.
64 | - `DataFrame.Dim` has been changed to `DataFrame.Dims` for consistency
65 | with the `mat64.Matrix` interface.
66 | - When printing a large `DataFrame` now the behaviour of the stringer
67 | interface is much nicer, showing only the first 10 rows and limiting
68 | the number of characters that can be shown by line
69 |
70 | ### Removed
71 |
72 | - Some unused functions from the helpers.go file.
73 |
74 | ### Fix
75 |
76 | - Linter errors.
77 | - stringElement.Float now returns NaN instead of 0 when applicable.
78 | - Autorenaming column names when `hasHeaders == false` now is
79 | consistent with the autorename used with `dataframe.New`
80 | - Bug where duplicated column names were not been assigned consecutive
81 | suffix numbers if the number of duplicates was greater than two.
82 |
83 | ## [0.7.0] - 2016-11-27
84 |
85 | ### Added
86 |
87 | - Many more table tests for both `series` and `dataframe`
88 | - Set method for `Series` and `DataFrame`
89 | - When loading data from CSV, JSON, or Records, different
90 | `LoadOptions` can now be configured. This includes assigning
91 | a default type, manually specifying the column types and others.
92 | - More documentation for previously undocumented functions.
93 |
94 | ### Changed
95 |
96 | - The project has been restructured on separated `dataframe` and
97 | `series` packages.
98 | - Reviewed entire `Series` codebase for better style and
99 | maintainability.
100 | - `DataFrame.Select` now accepts several types of indexes
101 | - Error messages are now more consistent.
102 | - The standard way of checking for errors on both `series` and
103 | `dataframe` is to check the `Err` field on each structure.
104 | - `ReadCSV`/`ReadJSON` and `WriteCSV`/`WriteJSON` now accept
105 | `io.Reader` and `io.Writer` respectively.
106 | - Updated README with the new changes.
107 |
108 | ### Removed
109 |
110 | - Removed unnecessary abstraction layer on `Series.elements`
111 |
112 | ## [0.6.0] - 2016-10-29
113 |
114 | ### Added
115 |
116 | - InnerJoin, CrossJoin, RightJoin, LeftJoin, OuterJoin functions
117 |
118 | ### Changed
119 |
120 | - More code refactoring for easier maintenance and management
121 | - Add more documentation to the exported functions
122 | - Remove unnecessary methods and structures from the exported API
123 |
124 | ### Removed
125 |
126 | - colnames and coltypes from the DataFrame structure
127 |
128 | ## [0.5.0] - 2016-08-09
129 |
130 | ### Added
131 |
132 | - Read and write DataFrames from CSV, JSON, []map[string]interface{},
133 | [][]string.
134 | - New constructor for DataFrame accept Series and NamedSeries as
135 | arguments.
136 | - Subset, Select, Rename, Mutate, Filter, RBind and CBind methods
137 | - Much Better error handling
138 |
139 | ### Changed
140 |
141 | - Almost complete rewrite of DataFrame code.
142 | - Now using Series as first class citizens and building blocks for
143 | DataFrames.
144 |
145 | ### Removed
146 |
147 | - Merge/Join functions have been temporarily removed to be adapted to
148 | the new architecture.
149 | - Cell interface for allowing custom types into the system.
150 |
151 | ## [0.4.0] - 2016-02-18
152 |
153 | ### Added
154 |
155 | - Getter methods for nrows and ncols.
156 | - An InnerJoin function that performs an Inner Merge/Join of two
157 | DataFrames by the given keys.
158 | - An RightJoin and LeftJoin functions that performs outer right/outer
159 | left joins of two DataFrames by the given keys.
160 | - A CrossJoin function that performs an Cross Merge/Join of two
161 | DataFrames.
162 | - Cell interface now have to implement the NA() method that will
163 | return a empty cell for the given type.
164 | - Cell interface now have to implement a Copy method.
165 |
166 | ### Changed
167 |
168 | - The `cell` interface is now exported: `Cell`.
169 | - Cell method NA() is now IsNA().
170 | - The function parseColumn is now a method.
171 | - A number of fields and methods are now expoted.
172 |
173 | ### Fixed
174 |
175 | - Now ensuring that generated subsets are in fact new copies entirely,
176 | not copying pointers to the same memory address.
177 |
178 | ## [0.3.0] - 2016-02-18
179 |
180 | ### Added
181 |
182 | - Getter and setter methods for the column names of a DataFrame
183 | - Bool column type has been made available
184 | - New Bool() interface
185 | - A `column` now can now if any of it's elements is NA and a list of
186 | said NA elements ([]bool).
187 |
188 | ### Changed
189 |
190 | - Renamed `cell` interface elements to be more idiomatic:
191 | - ToInteger() is now Int()
192 | - ToFloat() is now Float()
193 | - The `cell` interface has changed. Int() and Float() now
194 | return pointers instead of values to prevent future conflicts when
195 | returning an error.
196 | - The `cell` interface has changed. Checksum() [16]byte added.
197 | - Using cell.Checksum() for identification of unique elements instead
198 | of raw strings.
199 | - The `cell` interface has changed, now also requires ToBool() method.
200 | - String type now does not contain a string, but a pointer to a string.
201 |
202 | ### Fixed
203 |
204 | - Bool type constructor function Bools now parses `bool` and `[]bool`
205 | elements correctly.
206 | - Int type constructor function Ints now parses `bool` and `[]bool`
207 | elements correctly.
208 | - Float type constructor function Floats now parses `bool` and `[]bool`
209 | elements correctly.
210 | - String type constructor function Strings now parses `bool` and `[]bool`
211 | elements correctly.
212 |
213 | ## [0.2.1] - 2016-02-14
214 |
215 | ### Fixed
216 |
217 | - Fixed a bug when the maximum number of characters on a column was
218 | not being updated properly when subsetting.
219 |
220 | ## [0.2.0] - 2016-02-13
221 |
222 | ### Added
223 |
224 | - Added a lot of unit tests
225 |
226 | ### Changed
227 |
228 | - The base types are now `df.String`, `df.Int`, and `df.Float`.
229 | - Restructured the project in different files.
230 | - Refactored the project so that it will allow columns to be of any
231 | type as long as it complies with the necessary interfaces.
232 |
233 | ## [0.1.0] - 2016-02-06
234 |
235 | ### Added
236 |
237 | - Load csv data to DataFrame.
238 | - Parse data to four supported types: `int`, `float64`, `date`
239 | & `string`.
240 | - Row/Column subsetting (Indexing, column names, row numbers, range).
241 | - Unique/Duplicated row subsetting.
242 | - DataFrame combinations by rows and columns (cbind/rbind).
243 |
244 | [0.1.0]: https://github.com/go-gota/gota/compare/v0.1.0...v0.1.0
245 | [0.2.0]: https://github.com/go-gota/gota/compare/v0.1.0...v0.2.0
246 | [0.2.1]: https://github.com/go-gota/gota/compare/v0.2.0...v0.2.1
247 | [0.3.0]: https://github.com/go-gota/gota/compare/v0.2.1...v0.3.0
248 | [0.4.0]: https://github.com/go-gota/gota/compare/v0.3.0...v0.4.0
249 | [0.5.0]: https://github.com/go-gota/gota/compare/v0.4.0...v0.5.0
250 | [0.6.0]: https://github.com/go-gota/gota/compare/v0.5.0...v0.6.0
251 | [0.7.0]: https://github.com/go-gota/gota/compare/v0.6.0...v0.7.0
252 | [0.8.0]: https://github.com/go-gota/gota/compare/v0.7.0...v0.8.0
253 | [0.9.0]: https://github.com/go-gota/gota/compare/v0.8.0...v0.9.0
254 | [0.10.0]: https://github.com/go-gota/gota/compare/v0.9.0...v0.10.0
255 | [0.10.1]: https://github.com/go-gota/gota/compare/v0.10.0...v0.10.1
256 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Apache License
2 | ==============
3 |
4 | _Version 2.0, January 2004_
5 | __
6 |
7 | ### Terms and Conditions for use, reproduction, and distribution
8 |
9 | #### 1. Definitions
10 |
11 | “License” shall mean the terms and conditions for use, reproduction,
12 | and distribution as defined by Sections 1 through 9 of this document.
13 |
14 | “Licensor” shall mean the copyright owner or entity authorized by the
15 | copyright owner that is granting the License.
16 |
17 | “Legal Entity” shall mean the union of the acting entity and all other
18 | entities that control, are controlled by, or are under common control
19 | with that entity. For the purposes of this definition, “control”
20 | means **(i)** the power, direct or indirect, to cause the direction or
21 | management of such entity, whether by contract or otherwise, or
22 | **(ii)** ownership of fifty percent (50%) or more of the outstanding
23 | shares, or **(iii)** beneficial ownership of such entity.
24 |
25 | “You” (or “Your”) shall mean an individual or Legal Entity exercising
26 | permissions granted by this License.
27 |
28 | “Source” form shall mean the preferred form for making modifications,
29 | including but not limited to software source code, documentation
30 | source, and configuration files.
31 |
32 | “Object” form shall mean any form resulting from mechanical
33 | transformation or translation of a Source form, including but not
34 | limited to compiled object code, generated documentation, and
35 | conversions to other media types.
36 |
37 | “Work” shall mean the work of authorship, whether in Source or Object
38 | form, made available under the License, as indicated by a copyright
39 | notice that is included in or attached to the work (an example is
40 | provided in the Appendix below).
41 |
42 | “Derivative Works” shall mean any work, whether in Source or Object
43 | form, that is based on (or derived from) the Work and for which the
44 | editorial revisions, annotations, elaborations, or other modifications
45 | represent, as a whole, an original work of authorship. For the
46 | purposes of this License, Derivative Works shall not include works
47 | that remain separable from, or merely link (or bind by name) to the
48 | interfaces of, the Work and Derivative Works thereof.
49 |
50 | “Contribution” shall mean any work of authorship, including the
51 | original version of the Work and any modifications or additions to
52 | that Work or Derivative Works thereof, that is intentionally submitted
53 | to Licensor for inclusion in the Work by the copyright owner or by an
54 | individual or Legal Entity authorized to submit on behalf of the
55 | copyright owner. For the purposes of this definition, “submitted”
56 | means any form of electronic, verbal, or written communication sent to
57 | the Licensor or its representatives, including but not limited to
58 | communication on electronic mailing lists, source code control
59 | systems, and issue tracking systems that are managed by, or on behalf
60 | of, the Licensor for the purpose of discussing and improving the Work,
61 | but excluding communication that is conspicuously marked or otherwise
62 | designated in writing by the copyright owner as “Not a Contribution.”
63 |
64 | “Contributor” shall mean Licensor and any individual or Legal Entity
65 | on behalf of whom a Contribution has been received by Licensor and
66 | subsequently incorporated within the Work.
67 |
68 | #### 2. Grant of Copyright License
69 |
70 | Subject to the terms and conditions of this License, each Contributor
71 | hereby grants to You a perpetual, worldwide, non-exclusive, no-charge,
72 | royalty-free, irrevocable copyright license to reproduce, prepare
73 | Derivative Works of, publicly display, publicly perform, sublicense,
74 | and distribute the Work and such Derivative Works in Source or Object
75 | form.
76 |
77 | #### 3. Grant of Patent License
78 |
79 | Subject to the terms and conditions of this License, each Contributor
80 | hereby grants to You a perpetual, worldwide, non-exclusive, no-charge,
81 | royalty-free, irrevocable (except as stated in this section) patent
82 | license to make, have made, use, offer to sell, sell, import, and
83 | otherwise transfer the Work, where such license applies only to those
84 | patent claims licensable by such Contributor that are necessarily
85 | infringed by their Contribution(s) alone or by combination of their
86 | Contribution(s) with the Work to which such Contribution(s) was
87 | submitted. If You institute patent litigation against any entity
88 | (including a cross-claim or counterclaim in a lawsuit) alleging that
89 | the Work or a Contribution incorporated within the Work constitutes
90 | direct or contributory patent infringement, then any patent licenses
91 | granted to You under this License for that Work shall terminate as of
92 | the date such litigation is filed.
93 |
94 | #### 4. Redistribution
95 |
96 | You may reproduce and distribute copies of the Work or Derivative
97 | Works thereof in any medium, with or without modifications, and in
98 | Source or Object form, provided that You meet the following
99 | conditions:
100 |
101 | * **(a)** You must give any other recipients of the Work or Derivative
102 | Works a copy of this License; and
103 | * **(b)** You must cause any modified files to carry prominent notices
104 | stating that You changed the files; and
105 | * **(c)** You must retain, in the Source form of any Derivative Works
106 | that You distribute, all copyright, patent, trademark, and
107 | attribution notices from the Source form of the Work, excluding
108 | those notices that do not pertain to any part of the Derivative
109 | Works; and
110 | * **(d)** If the Work includes a “NOTICE” text file as part of its
111 | distribution, then any Derivative Works that You distribute must
112 | include a readable copy of the attribution notices contained within
113 | such NOTICE file, excluding those notices that do not pertain to any
114 | part of the Derivative Works, in at least one of the following
115 | places: within a NOTICE text file distributed as part of the
116 | Derivative Works; within the Source form or documentation, if
117 | provided along with the Derivative Works; or, within a display
118 | generated by the Derivative Works, if and wherever such third-party
119 | notices normally appear. The contents of the NOTICE file are for
120 | informational purposes only and do not modify the License. You may
121 | add Your own attribution notices within Derivative Works that You
122 | distribute, alongside or as an addendum to the NOTICE text from the
123 | Work, provided that such additional attribution notices cannot be
124 | construed as modifying the License.
125 |
126 | You may add Your own copyright statement to Your modifications and may
127 | provide additional or different license terms and conditions for use,
128 | reproduction, or distribution of Your modifications, or for any such
129 | Derivative Works as a whole, provided Your use, reproduction, and
130 | distribution of the Work otherwise complies with the conditions stated
131 | in this License.
132 |
133 | #### 5. Submission of Contributions
134 |
135 | Unless You explicitly state otherwise, any Contribution intentionally
136 | submitted for inclusion in the Work by You to the Licensor shall be
137 | under the terms and conditions of this License, without any additional
138 | terms or conditions. Notwithstanding the above, nothing herein shall
139 | supersede or modify the terms of any separate license agreement you
140 | may have executed with Licensor regarding such Contributions.
141 |
142 | #### 6. Trademarks
143 |
144 | This License does not grant permission to use the trade names,
145 | trademarks, service marks, or product names of the Licensor, except as
146 | required for reasonable and customary use in describing the origin of
147 | the Work and reproducing the content of the NOTICE file.
148 |
149 | #### 7. Disclaimer of Warranty
150 |
151 | Unless required by applicable law or agreed to in writing, Licensor
152 | provides the Work (and each Contributor provides its Contributions) on
153 | an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
154 | express or implied, including, without limitation, any warranties or
155 | conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR
156 | A PARTICULAR PURPOSE. You are solely responsible for determining the
157 | appropriateness of using or redistributing the Work and assume any
158 | risks associated with Your exercise of permissions under this License.
159 |
160 | #### 8. Limitation of Liability
161 |
162 | In no event and under no legal theory, whether in tort (including
163 | negligence), contract, or otherwise, unless required by applicable law
164 | (such as deliberate and grossly negligent acts) or agreed to in
165 | writing, shall any Contributor be liable to You for damages, including
166 | any direct, indirect, special, incidental, or consequential damages of
167 | any character arising as a result of this License or out of the use or
168 | inability to use the Work (including but not limited to damages for
169 | loss of goodwill, work stoppage, computer failure or malfunction, or
170 | any and all other commercial damages or losses), even if such
171 | Contributor has been advised of the possibility of such damages.
172 |
173 | #### 9. Accepting Warranty or Additional Liability
174 |
175 | While redistributing the Work or Derivative Works thereof, You may
176 | choose to offer, and charge a fee for, acceptance of support,
177 | warranty, indemnity, or other liability obligations and/or rights
178 | consistent with this License. However, in accepting such obligations,
179 | You may act only on Your own behalf and on Your sole responsibility,
180 | not on behalf of any other Contributor, and only if You agree to
181 | indemnify, defend, and hold each Contributor harmless for any
182 | liability incurred by, or claims asserted against, such Contributor by
183 | reason of your accepting any such warranty or additional liability.
184 |
185 | _END OF TERMS AND CONDITIONS_
186 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Gota: DataFrames, Series and Data Wrangling for Go
2 |
3 | This is an implementation of DataFrames, Series and data wrangling
4 | methods for the Go programming language. The API is still in flux so
5 | _use at your own risk_.
6 |
7 | ## DataFrame
8 |
9 | The term DataFrame typically refers to a tabular dataset that can be
10 | viewed as a two dimensional table. Often the columns of this dataset
11 | refers to a list of features, while the rows represent a number of
12 | measurements. As the data on the real world is not perfect, DataFrame
13 | supports non measurements or NaN elements.
14 |
15 | Common examples of DataFrames can be found on Excel sheets, CSV files
16 | or SQL database tables, but this data can come on a variety of other
17 | formats, like a collection of JSON objects or XML files.
18 |
19 | The utility of DataFrames resides on the ability to subset them, merge
20 | them, summarize the data for individual features or apply functions to
21 | entire rows or columns, all while keeping column type integrity.
22 |
23 | ### Usage
24 |
25 | #### Loading data
26 |
27 | DataFrames can be constructed passing Series to the dataframe.New constructor
28 | function:
29 |
30 | ```go
31 | df := dataframe.New(
32 | series.New([]string{"b", "a"}, series.String, "COL.1"),
33 | series.New([]int{1, 2}, series.Int, "COL.2"),
34 | series.New([]float64{3.0, 4.0}, series.Float, "COL.3"),
35 | )
36 | ```
37 |
38 | You can also load the data directly from other formats.
39 | The base loading function takes some records in the
40 | form `[][]string` and returns a new DataFrame from there:
41 |
42 | ```go
43 | df := dataframe.LoadRecords(
44 | [][]string{
45 | []string{"A", "B", "C", "D"},
46 | []string{"a", "4", "5.1", "true"},
47 | []string{"k", "5", "7.0", "true"},
48 | []string{"k", "4", "6.0", "true"},
49 | []string{"a", "2", "7.1", "false"},
50 | },
51 | )
52 | ```
53 |
54 | Now you can also create DataFrames by loading an slice of arbitrary structs:
55 |
56 | ```go
57 | type User struct {
58 | Name string
59 | Age int
60 | Accuracy float64
61 | ignored bool // ignored since unexported
62 | }
63 | users := []User{
64 | {"Aram", 17, 0.2, true},
65 | {"Juan", 18, 0.8, true},
66 | {"Ana", 22, 0.5, true},
67 | }
68 | df := dataframe.LoadStructs(users)
69 | ```
70 |
71 | By default, the column types will be auto detected but this can be
72 | configured. For example, if we wish the default type to be `Float` but
73 | columns `A` and `D` are `String` and `Bool` respectively:
74 |
75 | ```go
76 | df := dataframe.LoadRecords(
77 | [][]string{
78 | []string{"A", "B", "C", "D"},
79 | []string{"a", "4", "5.1", "true"},
80 | []string{"k", "5", "7.0", "true"},
81 | []string{"k", "4", "6.0", "true"},
82 | []string{"a", "2", "7.1", "false"},
83 | },
84 | dataframe.DetectTypes(false),
85 | dataframe.DefaultType(series.Float),
86 | dataframe.WithTypes(map[string]series.Type{
87 | "A": series.String,
88 | "D": series.Bool,
89 | }),
90 | )
91 | ```
92 |
93 | Similarly, you can load the data stored on a `[]map[string]interface{}`:
94 |
95 | ```go
96 | df := dataframe.LoadMaps(
97 | []map[string]interface{}{
98 | map[string]interface{}{
99 | "A": "a",
100 | "B": 1,
101 | "C": true,
102 | "D": 0,
103 | },
104 | map[string]interface{}{
105 | "A": "b",
106 | "B": 2,
107 | "C": true,
108 | "D": 0.5,
109 | },
110 | },
111 | )
112 | ```
113 |
114 | You can also pass an `io.Reader` to the functions `ReadCSV`/`ReadJSON`
115 | and it will work as expected given that the data is correct:
116 |
117 | ```go
118 | csvStr := `
119 | Country,Date,Age,Amount,Id
120 | "United States",2012-02-01,50,112.1,01234
121 | "United States",2012-02-01,32,321.31,54320
122 | "United Kingdom",2012-02-01,17,18.2,12345
123 | "United States",2012-02-01,32,321.31,54320
124 | "United Kingdom",2012-02-01,NA,18.2,12345
125 | "United States",2012-02-01,32,321.31,54320
126 | "United States",2012-02-01,32,321.31,54320
127 | Spain,2012-02-01,66,555.42,00241
128 | `
129 | df := dataframe.ReadCSV(strings.NewReader(csvStr))
130 | ```
131 |
132 | ```go
133 | jsonStr := `[{"COL.2":1,"COL.3":3},{"COL.1":5,"COL.2":2,"COL.3":2},{"COL.1":6,"COL.2":3,"COL.3":1}]`
134 | df := dataframe.ReadJSON(strings.NewReader(jsonStr))
135 | ```
136 |
137 | #### Subsetting
138 |
139 | We can subset our DataFrames with the Subset method. For example if we
140 | want the first and third rows we can do the following:
141 |
142 | ```go
143 | sub := df.Subset([]int{0, 2})
144 | ```
145 |
146 | #### Column selection
147 |
148 | If instead of subsetting the rows we want to select specific columns,
149 | by an index or column name:
150 |
151 | ```go
152 | sel1 := df.Select([]int{0, 2})
153 | sel2 := df.Select([]string{"A", "C"})
154 | ```
155 |
156 | #### Updating values
157 |
158 | In order to update the values of a DataFrame we can use the Set
159 | method:
160 |
161 | ```go
162 | df2 := df.Set(
163 | []int{0, 2},
164 | dataframe.LoadRecords(
165 | [][]string{
166 | []string{"A", "B", "C", "D"},
167 | []string{"b", "4", "6.0", "true"},
168 | []string{"c", "3", "6.0", "false"},
169 | },
170 | ),
171 | )
172 | ```
173 |
174 | #### Filtering
175 |
176 | For more complex row subsetting we can use the Filter method. For
177 | example, if we want the rows where the column "A" is equal to "a" or
178 | column "B" is greater than 4:
179 |
180 | ```go
181 | fil := df.Filter(
182 | dataframe.F{"A", series.Eq, "a"},
183 | dataframe.F{"B", series.Greater, 4},
184 | )
185 | fil2 := fil.Filter(
186 | dataframe.F{"D", series.Eq, true},
187 | )
188 | ```
189 |
190 | Filters inside Filter are combined as OR operations whereas if we chain
191 | Filter methods, they will behave as AND.
192 |
193 | #### Arrange
194 |
195 | With Arrange a DataFrame can be sorted by the given column names:
196 |
197 | ```go
198 | sorted := df.Arrange(
199 | dataframe.Sort("A"), // Sort in ascending order
200 | dataframe.RevSort("B"), // Sort in descending order
201 | )
202 | ```
203 |
204 | #### Mutate
205 |
206 | If we want to modify a column or add one based on a given Series at
207 | the end we can use the Mutate method:
208 |
209 | ```go
210 | // Change column C with a new one
211 | mut := df.Mutate(
212 | series.New([]string{"a", "b", "c", "d"}, series.String, "C"),
213 | )
214 | // Add a new column E
215 | mut2 := df.Mutate(
216 | series.New([]string{"a", "b", "c", "d"}, series.String, "E"),
217 | )
218 | ```
219 |
220 | #### Joins
221 |
222 | Different Join operations are supported (`InnerJoin`, `LeftJoin`,
223 | `RightJoin`, `CrossJoin`). In order to use these methods you have to
224 | specify which are the keys to be used for joining the DataFrames:
225 |
226 | ```go
227 | df := dataframe.LoadRecords(
228 | [][]string{
229 | []string{"A", "B", "C", "D"},
230 | []string{"a", "4", "5.1", "true"},
231 | []string{"k", "5", "7.0", "true"},
232 | []string{"k", "4", "6.0", "true"},
233 | []string{"a", "2", "7.1", "false"},
234 | },
235 | )
236 | df2 := dataframe.LoadRecords(
237 | [][]string{
238 | []string{"A", "F", "D"},
239 | []string{"1", "1", "true"},
240 | []string{"4", "2", "false"},
241 | []string{"2", "8", "false"},
242 | []string{"5", "9", "false"},
243 | },
244 | )
245 | join := df.InnerJoin(df2, "D")
246 | ```
247 |
248 | #### Function application
249 |
250 | Functions can be applied to the rows or columns of a DataFrame,
251 | casting the types as necessary:
252 |
253 | ```go
254 | mean := func(s series.Series) series.Series {
255 | floats := s.Float()
256 | sum := 0.0
257 | for _, f := range floats {
258 | sum += f
259 | }
260 | return series.Floats(sum / float64(len(floats)))
261 | }
262 | df.Capply(mean)
263 | df.Rapply(mean)
264 | ```
265 |
266 | #### Chaining operations
267 |
268 | DataFrames support a number of methods for wrangling the data,
269 | filtering, subsetting, selecting columns, adding new columns or
270 | modifying existing ones. All these methods can be chained one after
271 | another and at the end of the procedure check if there has been any
272 | errors by the DataFrame Err field. If any of the methods in the chain
273 | returns an error, the remaining operations on the chain will become
274 | a no-op.
275 |
276 | ```go
277 | a = a.Rename("Origin", "Country").
278 | Filter(dataframe.F{"Age", "<", 50}).
279 | Filter(dataframe.F{"Origin", "==", "United States"}).
280 | Select("Id", "Origin", "Date").
281 | Subset([]int{1, 3})
282 | if a.Err != nil {
283 | log.Fatal("Oh noes!")
284 | }
285 | ```
286 |
287 | #### Print to console
288 |
289 | ```go
290 | fmt.Println(flights)
291 |
292 | > [336776x20] DataFrame
293 | >
294 | > X0 year month day dep_time sched_dep_time dep_delay arr_time ...
295 | > 0: 1 2013 1 1 517 515 2 830 ...
296 | > 1: 2 2013 1 1 533 529 4 850 ...
297 | > 2: 3 2013 1 1 542 540 2 923 ...
298 | > 3: 4 2013 1 1 544 545 -1 1004 ...
299 | > 4: 5 2013 1 1 554 600 -6 812 ...
300 | > 5: 6 2013 1 1 554 558 -4 740 ...
301 | > 6: 7 2013 1 1 555 600 -5 913 ...
302 | > 7: 8 2013 1 1 557 600 -3 709 ...
303 | > 8: 9 2013 1 1 557 600 -3 838 ...
304 | > 9: 10 2013 1 1 558 600 -2 753 ...
305 | > ... ... ... ... ... ... ... ... ...
306 | > ...
307 | >
308 | > Not Showing: sched_arr_time , arr_delay , carrier , flight ,
309 | > tailnum , origin , dest , air_time , distance , hour ,
310 | > minute , time_hour
311 | ```
312 |
313 | #### Interfacing with gonum
314 |
315 | A `gonum/mat.Matrix` or any object that implements the `dataframe.Matrix`
316 | interface can be loaded as a `DataFrame` by using the `LoadMatrix()` method. If
317 | one wants to convert a `DataFrame` to a `mat.Matrix` it is necessary to create
318 | the necessary structs and method implementations. Since a `DataFrame` already
319 | implements the `Dims() (r, c int)` method, only implementations for the `At` and
320 | `T` methods are necessary:
321 |
322 | ```go
323 | type matrix struct {
324 | DataFrame
325 | }
326 |
327 | func (m matrix) At(i, j int) float64 {
328 | return m.columns[j].Elem(i).Float()
329 | }
330 |
331 | func (m matrix) T() mat64.Matrix {
332 | return mat64.Transpose{Matrix: m}
333 | }
334 | ```
335 |
336 | ## Series
337 |
338 | Series are essentially vectors of elements of the same type with
339 | support for missing values. Series are the building blocks for
340 | DataFrame columns.
341 |
342 | Four types are currently supported:
343 |
344 | ```go
345 | Int
346 | Float
347 | String
348 | Bool
349 | ```
350 |
351 | For more information about the API, make sure to check:
352 |
353 | - [dataframe godoc][3]
354 | - [series godoc][4]
355 |
356 | ## License
357 |
358 | Copyright 2016 Alejandro Sanchez Brotons
359 |
360 | Licensed under the Apache License, Version 2.0 (the "License"); you
361 | may not use this file except in compliance with the License. You may
362 | obtain a copy of the License at
363 |
364 | http://www.apache.org/licenses/LICENSE-2.0
365 |
366 | Unless required by applicable law or agreed to in writing, software
367 | distributed under the License is distributed on an "AS IS" BASIS,
368 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
369 | implied. See the License for the specific language governing
370 | permissions and limitations under the License.
371 |
372 | [1]: https://github.com/gonum
373 | [2]: https://github.com/go-gota/gota
374 | [3]: https://godoc.org/github.com/go-gota/gota/dataframe
375 | [4]: https://godoc.org/github.com/go-gota/gota/series
376 |
--------------------------------------------------------------------------------
/series/series.go:
--------------------------------------------------------------------------------
1 | package series
2 |
3 | import (
4 | "fmt"
5 | "reflect"
6 | "sort"
7 | "strings"
8 |
9 | "math"
10 |
11 | "gonum.org/v1/gonum/stat"
12 | )
13 |
14 | // Series is a data structure designed for operating on arrays of elements that
15 | // should comply with a certain type structure. They are flexible enough that can
16 | // be transformed to other Series types and account for missing or non valid
17 | // elements. Most of the power of Series resides on the ability to compare and
18 | // subset Series of different types.
19 | type Series struct {
20 | Name string // The name of the series
21 | elements Elements // The values of the elements
22 | t Type // The type of the series
23 | Err error // If there are errors they are stored here
24 | }
25 |
26 | // Elements is the interface that represents the array of elements contained on
27 | // a Series.
28 | type Elements interface {
29 | Elem(int) Element
30 | Len() int
31 | }
32 |
33 | // Element is the interface that defines the types of methods to be present for
34 | // elements of a Series
35 | type Element interface {
36 | // Setter method
37 | Set(interface{})
38 |
39 | // Comparation methods
40 | Eq(Element) bool
41 | Neq(Element) bool
42 | Less(Element) bool
43 | LessEq(Element) bool
44 | Greater(Element) bool
45 | GreaterEq(Element) bool
46 |
47 | // Accessor/conversion methods
48 | Copy() Element // FIXME: Returning interface is a recipe for pain
49 | Val() ElementValue // FIXME: Returning interface is a recipe for pain
50 | String() string
51 | Int() (int, error)
52 | Float() float64
53 | Bool() (bool, error)
54 |
55 | // Information methods
56 | IsNA() bool
57 | Type() Type
58 | }
59 |
60 | // intElements is the concrete implementation of Elements for Int elements.
61 | type intElements []intElement
62 |
63 | func (e intElements) Len() int { return len(e) }
64 | func (e intElements) Elem(i int) Element { return &e[i] }
65 |
66 | // stringElements is the concrete implementation of Elements for String elements.
67 | type stringElements []stringElement
68 |
69 | func (e stringElements) Len() int { return len(e) }
70 | func (e stringElements) Elem(i int) Element { return &e[i] }
71 |
72 | // floatElements is the concrete implementation of Elements for Float elements.
73 | type floatElements []floatElement
74 |
75 | func (e floatElements) Len() int { return len(e) }
76 | func (e floatElements) Elem(i int) Element { return &e[i] }
77 |
78 | // boolElements is the concrete implementation of Elements for Bool elements.
79 | type boolElements []boolElement
80 |
81 | func (e boolElements) Len() int { return len(e) }
82 | func (e boolElements) Elem(i int) Element { return &e[i] }
83 |
84 | // ElementValue represents the value that can be used for marshaling or
85 | // unmarshaling Elements.
86 | type ElementValue interface{}
87 |
88 | type MapFunction func(Element) Element
89 |
90 | // Comparator is a convenience alias that can be used for a more type safe way of
91 | // reason and use comparators.
92 | type Comparator string
93 |
94 | // Supported Comparators
95 | const (
96 | Eq Comparator = "==" // Equal
97 | Neq Comparator = "!=" // Non equal
98 | Greater Comparator = ">" // Greater than
99 | GreaterEq Comparator = ">=" // Greater or equal than
100 | Less Comparator = "<" // Lesser than
101 | LessEq Comparator = "<=" // Lesser or equal than
102 | In Comparator = "in" // Inside
103 | )
104 |
105 | // Type is a convenience alias that can be used for a more type safe way of
106 | // reason and use Series types.
107 | type Type string
108 |
109 | // Supported Series Types
110 | const (
111 | String Type = "string"
112 | Int Type = "int"
113 | Float Type = "float"
114 | Bool Type = "bool"
115 | )
116 |
117 | // Indexes represent the elements that can be used for selecting a subset of
118 | // elements within a Series. Currently supported are:
119 | //
120 | // int // Matches the given index number
121 | // []int // Matches all given index numbers
122 | // []bool // Matches all elements in a Series marked as true
123 | // Series [Int] // Same as []int
124 | // Series [Bool] // Same as []bool
125 | type Indexes interface{}
126 |
127 | // New is the generic Series constructor
128 | func New(values interface{}, t Type, name string) Series {
129 | ret := Series{
130 | Name: name,
131 | t: t,
132 | }
133 |
134 | // Pre-allocate elements
135 | preAlloc := func(n int) {
136 | switch t {
137 | case String:
138 | ret.elements = make(stringElements, n)
139 | case Int:
140 | ret.elements = make(intElements, n)
141 | case Float:
142 | ret.elements = make(floatElements, n)
143 | case Bool:
144 | ret.elements = make(boolElements, n)
145 | default:
146 | panic(fmt.Sprintf("unknown type %v", t))
147 | }
148 | }
149 |
150 | if values == nil {
151 | preAlloc(1)
152 | ret.elements.Elem(0).Set(nil)
153 | return ret
154 | }
155 |
156 | switch values.(type) {
157 | case []string:
158 | v := values.([]string)
159 | l := len(v)
160 | preAlloc(l)
161 | for i := 0; i < l; i++ {
162 | ret.elements.Elem(i).Set(v[i])
163 | }
164 | case []float64:
165 | v := values.([]float64)
166 | l := len(v)
167 | preAlloc(l)
168 | for i := 0; i < l; i++ {
169 | ret.elements.Elem(i).Set(v[i])
170 | }
171 | case []int:
172 | v := values.([]int)
173 | l := len(v)
174 | preAlloc(l)
175 | for i := 0; i < l; i++ {
176 | ret.elements.Elem(i).Set(v[i])
177 | }
178 | case []bool:
179 | v := values.([]bool)
180 | l := len(v)
181 | preAlloc(l)
182 | for i := 0; i < l; i++ {
183 | ret.elements.Elem(i).Set(v[i])
184 | }
185 | case Series:
186 | v := values.(Series)
187 | l := v.Len()
188 | preAlloc(l)
189 | for i := 0; i < l; i++ {
190 | ret.elements.Elem(i).Set(v.elements.Elem(i))
191 | }
192 | default:
193 | switch reflect.TypeOf(values).Kind() {
194 | case reflect.Slice:
195 | v := reflect.ValueOf(values)
196 | l := v.Len()
197 | preAlloc(v.Len())
198 | for i := 0; i < l; i++ {
199 | val := v.Index(i).Interface()
200 | ret.elements.Elem(i).Set(val)
201 | }
202 | default:
203 | preAlloc(1)
204 | v := reflect.ValueOf(values)
205 | val := v.Interface()
206 | ret.elements.Elem(0).Set(val)
207 | }
208 | }
209 |
210 | return ret
211 | }
212 |
213 | // Strings is a constructor for a String Series
214 | func Strings(values interface{}) Series {
215 | return New(values, String, "")
216 | }
217 |
218 | // Ints is a constructor for an Int Series
219 | func Ints(values interface{}) Series {
220 | return New(values, Int, "")
221 | }
222 |
223 | // Floats is a constructor for a Float Series
224 | func Floats(values interface{}) Series {
225 | return New(values, Float, "")
226 | }
227 |
228 | // Bools is a constructor for a Bool Series
229 | func Bools(values interface{}) Series {
230 | return New(values, Bool, "")
231 | }
232 |
233 | // Empty returns an empty Series of the same type
234 | func (s Series) Empty() Series {
235 | return New([]int{}, s.t, s.Name)
236 | }
237 |
238 | // Append adds new elements to the end of the Series. When using Append, the
239 | // Series is modified in place.
240 | func (s *Series) Append(values interface{}) {
241 | if err := s.Err; err != nil {
242 | return
243 | }
244 | news := New(values, s.t, s.Name)
245 | switch s.t {
246 | case String:
247 | s.elements = append(s.elements.(stringElements), news.elements.(stringElements)...)
248 | case Int:
249 | s.elements = append(s.elements.(intElements), news.elements.(intElements)...)
250 | case Float:
251 | s.elements = append(s.elements.(floatElements), news.elements.(floatElements)...)
252 | case Bool:
253 | s.elements = append(s.elements.(boolElements), news.elements.(boolElements)...)
254 | }
255 | }
256 |
257 | // Concat concatenates two series together. It will return a new Series with the
258 | // combined elements of both Series.
259 | func (s Series) Concat(x Series) Series {
260 | if err := s.Err; err != nil {
261 | return s
262 | }
263 | if err := x.Err; err != nil {
264 | s.Err = fmt.Errorf("concat error: argument has errors: %v", err)
265 | return s
266 | }
267 | y := s.Copy()
268 | y.Append(x)
269 | return y
270 | }
271 |
272 | // Subset returns a subset of the series based on the given Indexes.
273 | func (s Series) Subset(indexes Indexes) Series {
274 | if err := s.Err; err != nil {
275 | return s
276 | }
277 | idx, err := parseIndexes(s.Len(), indexes)
278 | if err != nil {
279 | s.Err = err
280 | return s
281 | }
282 | ret := Series{
283 | Name: s.Name,
284 | t: s.t,
285 | }
286 | switch s.t {
287 | case String:
288 | elements := make(stringElements, len(idx))
289 | for k, i := range idx {
290 | elements[k] = s.elements.(stringElements)[i]
291 | }
292 | ret.elements = elements
293 | case Int:
294 | elements := make(intElements, len(idx))
295 | for k, i := range idx {
296 | elements[k] = s.elements.(intElements)[i]
297 | }
298 | ret.elements = elements
299 | case Float:
300 | elements := make(floatElements, len(idx))
301 | for k, i := range idx {
302 | elements[k] = s.elements.(floatElements)[i]
303 | }
304 | ret.elements = elements
305 | case Bool:
306 | elements := make(boolElements, len(idx))
307 | for k, i := range idx {
308 | elements[k] = s.elements.(boolElements)[i]
309 | }
310 | ret.elements = elements
311 | default:
312 | panic("unknown series type")
313 | }
314 | return ret
315 | }
316 |
317 | // Set sets the values on the indexes of a Series and returns the reference
318 | // for itself. The original Series is modified.
319 | func (s Series) Set(indexes Indexes, newvalues Series) Series {
320 | if err := s.Err; err != nil {
321 | return s
322 | }
323 | if err := newvalues.Err; err != nil {
324 | s.Err = fmt.Errorf("set error: argument has errors: %v", err)
325 | return s
326 | }
327 | idx, err := parseIndexes(s.Len(), indexes)
328 | if err != nil {
329 | s.Err = err
330 | return s
331 | }
332 | if len(idx) != newvalues.Len() {
333 | s.Err = fmt.Errorf("set error: dimensions mismatch")
334 | return s
335 | }
336 | for k, i := range idx {
337 | if i < 0 || i >= s.Len() {
338 | s.Err = fmt.Errorf("set error: index out of range")
339 | return s
340 | }
341 | s.elements.Elem(i).Set(newvalues.elements.Elem(k))
342 | }
343 | return s
344 | }
345 |
346 | // HasNaN checks whether the Series contain NaN elements.
347 | func (s Series) HasNaN() bool {
348 | for i := 0; i < s.Len(); i++ {
349 | if s.elements.Elem(i).IsNA() {
350 | return true
351 | }
352 | }
353 | return false
354 | }
355 |
356 | // IsNaN returns an array that identifies which of the elements are NaN.
357 | func (s Series) IsNaN() []bool {
358 | ret := make([]bool, s.Len())
359 | for i := 0; i < s.Len(); i++ {
360 | ret[i] = s.elements.Elem(i).IsNA()
361 | }
362 | return ret
363 | }
364 |
365 | // Compare compares the values of a Series with other elements. To do so, the
366 | // elements with are to be compared are first transformed to a Series of the same
367 | // type as the caller.
368 | func (s Series) Compare(comparator Comparator, comparando interface{}) Series {
369 | if err := s.Err; err != nil {
370 | return s
371 | }
372 | compareElements := func(a, b Element, c Comparator) (bool, error) {
373 | var ret bool
374 | switch c {
375 | case Eq:
376 | ret = a.Eq(b)
377 | case Neq:
378 | ret = a.Neq(b)
379 | case Greater:
380 | ret = a.Greater(b)
381 | case GreaterEq:
382 | ret = a.GreaterEq(b)
383 | case Less:
384 | ret = a.Less(b)
385 | case LessEq:
386 | ret = a.LessEq(b)
387 | default:
388 | return false, fmt.Errorf("unknown comparator: %v", c)
389 | }
390 | return ret, nil
391 | }
392 |
393 | comp := New(comparando, s.t, "")
394 | bools := make([]bool, s.Len())
395 | // In comparator comparation
396 | if comparator == In {
397 | for i := 0; i < s.Len(); i++ {
398 | e := s.elements.Elem(i)
399 | b := false
400 | for j := 0; j < comp.Len(); j++ {
401 | m := comp.elements.Elem(j)
402 | c, err := compareElements(e, m, Eq)
403 | if err != nil {
404 | s = s.Empty()
405 | s.Err = err
406 | return s
407 | }
408 | if c {
409 | b = true
410 | break
411 | }
412 | }
413 | bools[i] = b
414 | }
415 | return Bools(bools)
416 | }
417 |
418 | // Single element comparison
419 | if comp.Len() == 1 {
420 | for i := 0; i < s.Len(); i++ {
421 | e := s.elements.Elem(i)
422 | c, err := compareElements(e, comp.elements.Elem(0), comparator)
423 | if err != nil {
424 | s = s.Empty()
425 | s.Err = err
426 | return s
427 | }
428 | bools[i] = c
429 | }
430 | return Bools(bools)
431 | }
432 |
433 | // Multiple element comparison
434 | if s.Len() != comp.Len() {
435 | s := s.Empty()
436 | s.Err = fmt.Errorf("can't compare: length mismatch")
437 | return s
438 | }
439 | for i := 0; i < s.Len(); i++ {
440 | e := s.elements.Elem(i)
441 | c, err := compareElements(e, comp.elements.Elem(i), comparator)
442 | if err != nil {
443 | s = s.Empty()
444 | s.Err = err
445 | return s
446 | }
447 | bools[i] = c
448 | }
449 | return Bools(bools)
450 | }
451 |
452 | // Copy will return a copy of the Series.
453 | func (s Series) Copy() Series {
454 | name := s.Name
455 | t := s.t
456 | err := s.Err
457 | var elements Elements
458 | switch s.t {
459 | case String:
460 | elements = make(stringElements, s.Len())
461 | copy(elements.(stringElements), s.elements.(stringElements))
462 | case Float:
463 | elements = make(floatElements, s.Len())
464 | copy(elements.(floatElements), s.elements.(floatElements))
465 | case Bool:
466 | elements = make(boolElements, s.Len())
467 | copy(elements.(boolElements), s.elements.(boolElements))
468 | case Int:
469 | elements = make(intElements, s.Len())
470 | copy(elements.(intElements), s.elements.(intElements))
471 | }
472 | ret := Series{
473 | Name: name,
474 | t: t,
475 | elements: elements,
476 | Err: err,
477 | }
478 | return ret
479 | }
480 |
481 | // Records returns the elements of a Series as a []string
482 | func (s Series) Records() []string {
483 | ret := make([]string, s.Len())
484 | for i := 0; i < s.Len(); i++ {
485 | e := s.elements.Elem(i)
486 | ret[i] = e.String()
487 | }
488 | return ret
489 | }
490 |
491 | // Float returns the elements of a Series as a []float64. If the elements can not
492 | // be converted to float64 or contains a NaN returns the float representation of
493 | // NaN.
494 | func (s Series) Float() []float64 {
495 | ret := make([]float64, s.Len())
496 | for i := 0; i < s.Len(); i++ {
497 | e := s.elements.Elem(i)
498 | ret[i] = e.Float()
499 | }
500 | return ret
501 | }
502 |
503 | // Int returns the elements of a Series as a []int or an error if the
504 | // transformation is not possible.
505 | func (s Series) Int() ([]int, error) {
506 | ret := make([]int, s.Len())
507 | for i := 0; i < s.Len(); i++ {
508 | e := s.elements.Elem(i)
509 | val, err := e.Int()
510 | if err != nil {
511 | return nil, err
512 | }
513 | ret[i] = val
514 | }
515 | return ret, nil
516 | }
517 |
518 | // Bool returns the elements of a Series as a []bool or an error if the
519 | // transformation is not possible.
520 | func (s Series) Bool() ([]bool, error) {
521 | ret := make([]bool, s.Len())
522 | for i := 0; i < s.Len(); i++ {
523 | e := s.elements.Elem(i)
524 | val, err := e.Bool()
525 | if err != nil {
526 | return nil, err
527 | }
528 | ret[i] = val
529 | }
530 | return ret, nil
531 | }
532 |
533 | // Type returns the type of a given series
534 | func (s Series) Type() Type {
535 | return s.t
536 | }
537 |
538 | // Len returns the length of a given Series
539 | func (s Series) Len() int {
540 | return s.elements.Len()
541 | }
542 |
543 | // String implements the Stringer interface for Series
544 | func (s Series) String() string {
545 | return fmt.Sprint(s.elements)
546 | }
547 |
548 | // Str prints some extra information about a given series
549 | func (s Series) Str() string {
550 | var ret []string
551 | // If name exists print name
552 | if s.Name != "" {
553 | ret = append(ret, "Name: "+s.Name)
554 | }
555 | ret = append(ret, "Type: "+fmt.Sprint(s.t))
556 | ret = append(ret, "Length: "+fmt.Sprint(s.Len()))
557 | if s.Len() != 0 {
558 | ret = append(ret, "Values: "+fmt.Sprint(s))
559 | }
560 | return strings.Join(ret, "\n")
561 | }
562 |
563 | // Val returns the value of a series for the given index. Will panic if the index
564 | // is out of bounds.
565 | func (s Series) Val(i int) interface{} {
566 | return s.elements.Elem(i).Val()
567 | }
568 |
569 | // Elem returns the element of a series for the given index. Will panic if the
570 | // index is out of bounds.
571 | func (s Series) Elem(i int) Element {
572 | return s.elements.Elem(i)
573 | }
574 |
575 | // parseIndexes will parse the given indexes for a given series of length `l`. No
576 | // out of bounds checks is performed.
577 | func parseIndexes(l int, indexes Indexes) ([]int, error) {
578 | var idx []int
579 | switch indexes.(type) {
580 | case []int:
581 | idx = indexes.([]int)
582 | case int:
583 | idx = []int{indexes.(int)}
584 | case []bool:
585 | bools := indexes.([]bool)
586 | if len(bools) != l {
587 | return nil, fmt.Errorf("indexing error: index dimensions mismatch")
588 | }
589 | for i, b := range bools {
590 | if b {
591 | idx = append(idx, i)
592 | }
593 | }
594 | case Series:
595 | s := indexes.(Series)
596 | if err := s.Err; err != nil {
597 | return nil, fmt.Errorf("indexing error: new values has errors: %v", err)
598 | }
599 | if s.HasNaN() {
600 | return nil, fmt.Errorf("indexing error: indexes contain NaN")
601 | }
602 | switch s.t {
603 | case Int:
604 | return s.Int()
605 | case Bool:
606 | bools, err := s.Bool()
607 | if err != nil {
608 | return nil, fmt.Errorf("indexing error: %v", err)
609 | }
610 | return parseIndexes(l, bools)
611 | default:
612 | return nil, fmt.Errorf("indexing error: unknown indexing mode")
613 | }
614 | default:
615 | return nil, fmt.Errorf("indexing error: unknown indexing mode")
616 | }
617 | return idx, nil
618 | }
619 |
620 | // Order returns the indexes for sorting a Series. NaN elements are pushed to the
621 | // end by order of appearance.
622 | func (s Series) Order(reverse bool) []int {
623 | var ie indexedElements
624 | var nasIdx []int
625 | for i := 0; i < s.Len(); i++ {
626 | e := s.elements.Elem(i)
627 | if e.IsNA() {
628 | nasIdx = append(nasIdx, i)
629 | } else {
630 | ie = append(ie, indexedElement{i, e})
631 | }
632 | }
633 | var srt sort.Interface
634 | srt = ie
635 | if reverse {
636 | srt = sort.Reverse(srt)
637 | }
638 | sort.Sort(srt)
639 | var ret []int
640 | for _, e := range ie {
641 | ret = append(ret, e.index)
642 | }
643 | return append(ret, nasIdx...)
644 | }
645 |
646 | type indexedElement struct {
647 | index int
648 | element Element
649 | }
650 |
651 | type indexedElements []indexedElement
652 |
653 | func (e indexedElements) Len() int { return len(e) }
654 | func (e indexedElements) Less(i, j int) bool { return e[i].element.Less(e[j].element) }
655 | func (e indexedElements) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
656 |
657 | // StdDev calculates the standard deviation of a series
658 | func (s Series) StdDev() float64 {
659 | stdDev := stat.StdDev(s.Float(), nil)
660 | return stdDev
661 | }
662 |
663 | // Mean calculates the average value of a series
664 | func (s Series) Mean() float64 {
665 | stdDev := stat.Mean(s.Float(), nil)
666 | return stdDev
667 | }
668 |
669 | // Median calculates the middle or median value, as opposed to
670 | // mean, and there is less susceptible to being affected by outliers.
671 | func (s Series) Median() float64 {
672 | if s.elements.Len() == 0 ||
673 | s.Type() == String ||
674 | s.Type() == Bool {
675 | return math.NaN()
676 | }
677 | ix := s.Order(false)
678 | newElem := make([]Element, len(ix))
679 |
680 | for newpos, oldpos := range ix {
681 | newElem[newpos] = s.elements.Elem(oldpos)
682 | }
683 |
684 | // When length is odd, we just take length(list)/2
685 | // value as the median.
686 | if len(newElem)%2 != 0 {
687 | return newElem[len(newElem)/2].Float()
688 | }
689 | // When length is even, we take middle two elements of
690 | // list and the median is an average of the two of them.
691 | return (newElem[(len(newElem)/2)-1].Float() +
692 | newElem[len(newElem)/2].Float()) * 0.5
693 | }
694 |
695 | // Max return the biggest element in the series
696 | func (s Series) Max() float64 {
697 | if s.elements.Len() == 0 || s.Type() == String {
698 | return math.NaN()
699 | }
700 |
701 | max := s.elements.Elem(0)
702 | for i := 1; i < s.elements.Len(); i++ {
703 | elem := s.elements.Elem(i)
704 | if elem.Greater(max) {
705 | max = elem
706 | }
707 | }
708 | return max.Float()
709 | }
710 |
711 | // MaxStr return the biggest element in a series of type String
712 | func (s Series) MaxStr() string {
713 | if s.elements.Len() == 0 || s.Type() != String {
714 | return ""
715 | }
716 |
717 | max := s.elements.Elem(0)
718 | for i := 1; i < s.elements.Len(); i++ {
719 | elem := s.elements.Elem(i)
720 | if elem.Greater(max) {
721 | max = elem
722 | }
723 | }
724 | return max.String()
725 | }
726 |
727 | // Min return the lowest element in the series
728 | func (s Series) Min() float64 {
729 | if s.elements.Len() == 0 || s.Type() == String {
730 | return math.NaN()
731 | }
732 |
733 | min := s.elements.Elem(0)
734 | for i := 1; i < s.elements.Len(); i++ {
735 | elem := s.elements.Elem(i)
736 | if elem.Less(min) {
737 | min = elem
738 | }
739 | }
740 | return min.Float()
741 | }
742 |
743 | // MinStr return the lowest element in a series of type String
744 | func (s Series) MinStr() string {
745 | if s.elements.Len() == 0 || s.Type() != String {
746 | return ""
747 | }
748 |
749 | min := s.elements.Elem(0)
750 | for i := 1; i < s.elements.Len(); i++ {
751 | elem := s.elements.Elem(i)
752 | if elem.Less(min) {
753 | min = elem
754 | }
755 | }
756 | return min.String()
757 | }
758 |
759 | // Quantile returns the sample of x such that x is greater than or
760 | // equal to the fraction p of samples.
761 | // Note: gonum/stat panics when called with strings
762 | func (s Series) Quantile(p float64) float64 {
763 | if s.Type() == String || s.Len() == 0 {
764 | return math.NaN()
765 | }
766 |
767 | ordered := s.Subset(s.Order(false)).Float()
768 |
769 | return stat.Quantile(p, stat.Empirical, ordered, nil)
770 | }
771 |
772 | // Map applies a function matching MapFunction signature, which itself
773 | // allowing for a fairly flexible MAP implementation, intended for mapping
774 | // the function over each element in Series and returning a new Series object.
775 | // Function must be compatible with the underlying type of data in the Series.
776 | // In other words it is expected that when working with a Float Series, that
777 | // the function passed in via argument `f` will not expect another type, but
778 | // instead expects to handle Element(s) of type Float.
779 | func (s Series) Map(f MapFunction) Series {
780 |
781 | mappedValues := make([]Element, s.Len())
782 | for i := 0; i < s.Len(); i++ {
783 | value := f(s.elements.Elem(i))
784 | mappedValues[i] = value
785 | }
786 | return New(mappedValues, s.Type(), s.Name)
787 | }
788 |
--------------------------------------------------------------------------------
/series/series_test.go:
--------------------------------------------------------------------------------
1 | package series
2 |
3 | import (
4 | "fmt"
5 | "math"
6 | "reflect"
7 | "testing"
8 | "strings"
9 | )
10 |
11 | // Check that there are no shared memory addreses between the elements of two Series
12 | //func checkAddr(addra, addrb []string) error {
13 | //for i := 0; i < len(addra); i++ {
14 | //for j := 0; j < len(addrb); j++ {
15 | //if addra[i] == "" || addrb[j] == "" {
16 | //continue
17 | //}
18 | //if addra[i] == addrb[j] {
19 | //return fmt.Errorf("found same address on\nA:%v\nB:%v", i, j)
20 | //}
21 | //}
22 | //}
23 | //return nil
24 | //}
25 |
26 | // Check that all the types on a Series are the same type and that it matches with
27 | // Series.t
28 | func checkTypes(s Series) error {
29 | var types []Type
30 | for i := 0; i < s.Len(); i++ {
31 | e := s.elements.Elem(i)
32 | types = append(types, e.Type())
33 | }
34 | for _, t := range types {
35 | if t != s.t {
36 | return fmt.Errorf("bad types for %v Series:\n%v", s.t, types)
37 | }
38 | }
39 | return nil
40 | }
41 |
42 | // compareFloats compares floating point values up to the number of digits specified.
43 | // Returns true if both values are equal with the given precision
44 | func compareFloats(lvalue, rvalue float64, digits int) bool {
45 | if math.IsNaN(lvalue) || math.IsNaN(rvalue) {
46 | return math.IsNaN(lvalue) && math.IsNaN(rvalue)
47 | }
48 | d := math.Pow(10.0, float64(digits))
49 | lv := int(lvalue * d)
50 | rv := int(rvalue * d)
51 | return lv == rv
52 | }
53 |
54 | func TestSeries_Compare(t *testing.T) {
55 | table := []struct {
56 | series Series
57 | comparator Comparator
58 | comparando interface{}
59 | expected Series
60 | }{
61 | {
62 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
63 | Eq,
64 | "B",
65 | Bools([]bool{false, true, false, true, false, false}),
66 | },
67 | {
68 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
69 | Eq,
70 | []string{"B", "B", "C", "D", "A", "A"},
71 | Bools([]bool{false, true, true, false, false, false}),
72 | },
73 | {
74 | Ints([]int{0, 2, 1, 5, 9}),
75 | Eq,
76 | "2",
77 | Bools([]bool{false, true, false, false, false}),
78 | },
79 | {
80 | Ints([]int{0, 2, 1, 5, 9}),
81 | Eq,
82 | []int{0, 2, 0, 5, 10},
83 | Bools([]bool{true, true, false, true, false}),
84 | },
85 | {
86 | Floats([]float64{0.1, 2, 1, 5, 9}),
87 | Eq,
88 | "2",
89 | Bools([]bool{false, true, false, false, false}),
90 | },
91 | {
92 | Floats([]float64{0.1, 2, 1, 5, 9}),
93 | Eq,
94 | []float64{0.1, 2, 0, 5, 10},
95 | Bools([]bool{true, true, false, true, false}),
96 | },
97 | {
98 | Bools([]bool{true, true, false}),
99 | Eq,
100 | "true",
101 | Bools([]bool{true, true, false}),
102 | },
103 | {
104 | Bools([]bool{true, true, false}),
105 | Eq,
106 | []bool{true, false, false},
107 | Bools([]bool{true, false, true}),
108 | },
109 | {
110 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
111 | Neq,
112 | "B",
113 | Bools([]bool{true, false, true, false, true, true}),
114 | },
115 | {
116 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
117 | Neq,
118 | []string{"B", "B", "C", "D", "A", "A"},
119 | Bools([]bool{true, false, false, true, true, true}),
120 | },
121 | {
122 | Ints([]int{0, 2, 1, 5, 9}),
123 | Neq,
124 | "2",
125 | Bools([]bool{true, false, true, true, true}),
126 | },
127 | {
128 | Ints([]int{0, 2, 1, 5, 9}),
129 | Neq,
130 | []int{0, 2, 0, 5, 10},
131 | Bools([]bool{false, false, true, false, true}),
132 | },
133 | {
134 | Floats([]float64{0.1, 2, 1, 5, 9}),
135 | Neq,
136 | "2",
137 | Bools([]bool{true, false, true, true, true}),
138 | },
139 | {
140 | Floats([]float64{0.1, 2, 1, 5, 9}),
141 | Neq,
142 | []float64{0.1, 2, 0, 5, 10},
143 | Bools([]bool{false, false, true, false, true}),
144 | },
145 | {
146 | Bools([]bool{true, true, false}),
147 | Neq,
148 | "true",
149 | Bools([]bool{false, false, true}),
150 | },
151 | {
152 | Bools([]bool{true, true, false}),
153 | Neq,
154 | []bool{true, false, false},
155 | Bools([]bool{false, true, false}),
156 | },
157 | {
158 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
159 | Greater,
160 | "B",
161 | Bools([]bool{false, false, true, false, true, true}),
162 | },
163 | {
164 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
165 | Greater,
166 | []string{"B", "B", "C", "D", "A", "A"},
167 | Bools([]bool{false, false, false, false, true, true}),
168 | },
169 | {
170 | Ints([]int{0, 2, 1, 5, 9}),
171 | Greater,
172 | "2",
173 | Bools([]bool{false, false, false, true, true}),
174 | },
175 | {
176 | Ints([]int{0, 2, 1, 5, 9}),
177 | Greater,
178 | []int{0, 2, 0, 5, 10},
179 | Bools([]bool{false, false, true, false, false}),
180 | },
181 | {
182 | Floats([]float64{0.1, 2, 1, 5, 9}),
183 | Greater,
184 | "2",
185 | Bools([]bool{false, false, false, true, true}),
186 | },
187 | {
188 | Floats([]float64{0.1, 2, 1, 5, 9}),
189 | Greater,
190 | []float64{0.1, 2, 0, 5, 10},
191 | Bools([]bool{false, false, true, false, false}),
192 | },
193 | {
194 | Bools([]bool{true, true, false}),
195 | Greater,
196 | "true",
197 | Bools([]bool{false, false, false}),
198 | },
199 | {
200 | Bools([]bool{true, true, false}),
201 | Greater,
202 | []bool{true, false, false},
203 | Bools([]bool{false, true, false}),
204 | },
205 | {
206 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
207 | GreaterEq,
208 | "B",
209 | Bools([]bool{false, true, true, true, true, true}),
210 | },
211 | {
212 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
213 | GreaterEq,
214 | []string{"B", "B", "C", "D", "A", "A"},
215 | Bools([]bool{false, true, true, false, true, true}),
216 | },
217 | {
218 | Ints([]int{0, 2, 1, 5, 9}),
219 | GreaterEq,
220 | "2",
221 | Bools([]bool{false, true, false, true, true}),
222 | },
223 | {
224 | Ints([]int{0, 2, 1, 5, 9}),
225 | GreaterEq,
226 | []int{0, 2, 0, 5, 10},
227 | Bools([]bool{true, true, true, true, false}),
228 | },
229 | {
230 | Floats([]float64{0.1, 2, 1, 5, 9}),
231 | GreaterEq,
232 | "2",
233 | Bools([]bool{false, true, false, true, true}),
234 | },
235 | {
236 | Floats([]float64{0.1, 2, 1, 5, 9}),
237 | GreaterEq,
238 | []float64{0.1, 2, 0, 5, 10},
239 | Bools([]bool{true, true, true, true, false}),
240 | },
241 | {
242 | Bools([]bool{true, true, false}),
243 | GreaterEq,
244 | "true",
245 | Bools([]bool{true, true, false}),
246 | },
247 | {
248 | Bools([]bool{true, true, false}),
249 | GreaterEq,
250 | []bool{true, false, false},
251 | Bools([]bool{true, true, true}),
252 | },
253 | {
254 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
255 | Less,
256 | "B",
257 | Bools([]bool{true, false, false, false, false, false}),
258 | },
259 | {
260 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
261 | Less,
262 | []string{"B", "B", "C", "D", "A", "A"},
263 | Bools([]bool{true, false, false, true, false, false}),
264 | },
265 | {
266 | Ints([]int{0, 2, 1, 5, 9}),
267 | Less,
268 | "2",
269 | Bools([]bool{true, false, true, false, false}),
270 | },
271 | {
272 | Ints([]int{0, 2, 1, 5, 9}),
273 | Less,
274 | []int{0, 2, 0, 5, 10},
275 | Bools([]bool{false, false, false, false, true}),
276 | },
277 | {
278 | Floats([]float64{0.1, 2, 1, 5, 9}),
279 | Less,
280 | "2",
281 | Bools([]bool{true, false, true, false, false}),
282 | },
283 | {
284 | Floats([]float64{0.1, 2, 1, 5, 9}),
285 | Less,
286 | []float64{0.1, 2, 0, 5, 10},
287 | Bools([]bool{false, false, false, false, true}),
288 | },
289 | {
290 | Bools([]bool{true, true, false}),
291 | Less,
292 | "true",
293 | Bools([]bool{false, false, true}),
294 | },
295 | {
296 | Bools([]bool{true, true, false}),
297 | Less,
298 | []bool{true, false, false},
299 | Bools([]bool{false, false, false}),
300 | },
301 | {
302 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
303 | LessEq,
304 | "B",
305 | Bools([]bool{true, true, false, true, false, false}),
306 | },
307 | {
308 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
309 | LessEq,
310 | []string{"B", "B", "C", "D", "A", "A"},
311 | Bools([]bool{true, true, true, true, false, false}),
312 | },
313 | {
314 | Ints([]int{0, 2, 1, 5, 9}),
315 | LessEq,
316 | "2",
317 | Bools([]bool{true, true, true, false, false}),
318 | },
319 | {
320 | Ints([]int{0, 2, 1, 5, 9}),
321 | LessEq,
322 | []int{0, 2, 0, 5, 10},
323 | Bools([]bool{true, true, false, true, true}),
324 | },
325 | {
326 | Floats([]float64{0.1, 2, 1, 5, 9}),
327 | LessEq,
328 | "2",
329 | Bools([]bool{true, true, true, false, false}),
330 | },
331 | {
332 | Floats([]float64{0.1, 2, 1, 5, 9}),
333 | LessEq,
334 | []float64{0.1, 2, 0, 5, 10},
335 | Bools([]bool{true, true, false, true, true}),
336 | },
337 | {
338 | Bools([]bool{true, true, false}),
339 | LessEq,
340 | "true",
341 | Bools([]bool{true, true, true}),
342 | },
343 | {
344 | Bools([]bool{true, true, false}),
345 | LessEq,
346 | []bool{true, false, false},
347 | Bools([]bool{true, false, true}),
348 | },
349 | {
350 | Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
351 | In,
352 | "B",
353 | Bools([]bool{false, true, false, true, false, false}),
354 | },
355 | {
356 | Strings([]string{"Hello", "world", "this", "is", "a", "test"}),
357 | In,
358 | []string{"cat", "world", "hello", "a"},
359 | Bools([]bool{false, true, false, false, true, false}),
360 | },
361 | {
362 | Ints([]int{0, 2, 1, 5, 9}),
363 | In,
364 | "2",
365 | Bools([]bool{false, true, false, false, false}),
366 | },
367 | {
368 | Ints([]int{0, 2, 1, 5, 9}),
369 | In,
370 | []int{2, 99, 1234, 9},
371 | Bools([]bool{false, true, false, false, true}),
372 | },
373 | {
374 | Floats([]float64{0.1, 2, 1, 5, 9}),
375 | In,
376 | "2",
377 | Bools([]bool{false, true, false, false, false}),
378 | },
379 | {
380 | Floats([]float64{0.1, 2, 1, 5, 9}),
381 | In,
382 | []float64{2, 99, 1234, 9},
383 | Bools([]bool{false, true, false, false, true}),
384 | },
385 | {
386 | Bools([]bool{true, true, false}),
387 | In,
388 | "true",
389 | Bools([]bool{true, true, false}),
390 | },
391 | {
392 | Bools([]bool{true, true, false}),
393 | In,
394 | []bool{false, false, false},
395 | Bools([]bool{false, false, true}),
396 | },
397 | }
398 | for testnum, test := range table {
399 | a := test.series
400 | b := a.Compare(test.comparator, test.comparando)
401 | if err := b.Err; err != nil {
402 | t.Errorf("Test:%v\nError:%v", testnum, err)
403 | }
404 | expected := test.expected.Records()
405 | received := b.Records()
406 | if !reflect.DeepEqual(expected, received) {
407 | t.Errorf(
408 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
409 | testnum, expected, received,
410 | )
411 | }
412 | if err := checkTypes(b); err != nil {
413 | t.Errorf(
414 | "Test:%v\nError:%v",
415 | testnum, err,
416 | )
417 | }
418 | //if err := checkAddr(a.Addr(), b.Addr()); err != nil {
419 | //t.Errorf("Test:%v\nError:%v\nA:%v\nB:%v", testnum, err, a.Addr(), b.Addr())
420 | //}
421 | }
422 | }
423 |
424 | func TestSeries_Subset(t *testing.T) {
425 | table := []struct {
426 | series Series
427 | indexes Indexes
428 | expected string
429 | }{
430 | {
431 | Strings([]string{"A", "B", "C", "K", "D"}),
432 | []int{2, 1, 4, 4, 0, 3},
433 | "[C B D D A K]",
434 | },
435 | {
436 | Strings([]string{"A", "B", "C", "K", "D"}),
437 | int(1),
438 | "[B]",
439 | },
440 | {
441 | Strings([]string{"A", "B", "C", "K", "D"}),
442 | []bool{true, false, false, true, true},
443 | "[A K D]",
444 | },
445 | {
446 | Strings([]string{"A", "B", "C", "K", "D"}),
447 | Ints([]int{3, 2, 1, 0}),
448 | "[K C B A]",
449 | },
450 | {
451 | Strings([]string{"A", "B", "C", "K", "D"}),
452 | Ints([]int{1}),
453 | "[B]",
454 | },
455 | {
456 | Strings([]string{"A", "B", "C", "K", "D"}),
457 | Ints(2),
458 | "[C]",
459 | },
460 | {
461 | Strings([]string{"A", "B", "C", "K", "D"}),
462 | Bools([]bool{true, false, false, true, true}),
463 | "[A K D]",
464 | },
465 | }
466 | for testnum, test := range table {
467 | a := test.series
468 | b := a.Subset(test.indexes)
469 | if err := b.Err; err != nil {
470 | t.Errorf("Test:%v\nError:%v", testnum, err)
471 | }
472 | expected := test.expected
473 | received := fmt.Sprint(b)
474 | if expected != received {
475 | t.Errorf(
476 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
477 | testnum, expected, received,
478 | )
479 | }
480 | if err := checkTypes(b); err != nil {
481 | t.Errorf(
482 | "Test:%v\nError:%v",
483 | testnum, err,
484 | )
485 | }
486 | //if err := checkAddr(a.Addr(), b.Addr()); err != nil {
487 | //t.Errorf("Test:%v\nError:%v\nA:%v\nB:%v", testnum, err, a.Addr(), b.Addr())
488 | //}
489 | }
490 | }
491 |
492 | func TestSeries_Set(t *testing.T) {
493 | table := []struct {
494 | series Series
495 | indexes Indexes
496 | values Series
497 | expected string
498 | }{
499 | {
500 | Strings([]string{"A", "B", "C", "K", "D"}),
501 | []int{1, 2, 4},
502 | Ints([]string{"1", "2", "3"}),
503 | "[A 1 2 K 3]",
504 | },
505 | {
506 | Strings([]string{"A", "B", "C", "K", "D"}),
507 | []bool{false, true, true, false, true},
508 | Ints([]string{"1", "2", "3"}),
509 | "[A 1 2 K 3]",
510 | },
511 | {
512 | Strings([]string{"A", "B", "C", "K", "D"}),
513 | Ints([]int{1, 2, 4}),
514 | Ints([]string{"1", "2", "3"}),
515 | "[A 1 2 K 3]",
516 | },
517 | {
518 | Strings([]string{"A", "B", "C", "K", "D"}),
519 | Bools([]bool{false, true, true, false, true}),
520 | Ints([]string{"1", "2", "3"}),
521 | "[A 1 2 K 3]",
522 | },
523 | }
524 | for testnum, test := range table {
525 | b := test.series.Set(test.indexes, test.values)
526 | if err := b.Err; err != nil {
527 | t.Errorf("Test:%v\nError:%v", testnum, err)
528 | }
529 | expected := test.expected
530 | received := fmt.Sprint(b)
531 | if expected != received {
532 | t.Errorf(
533 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
534 | testnum, expected, received,
535 | )
536 | }
537 | if err := checkTypes(b); err != nil {
538 | t.Errorf(
539 | "Test:%v\nError:%v",
540 | testnum, err,
541 | )
542 | }
543 | //if err := checkAddr(test.values.Addr(), b.Addr()); err != nil {
544 | //t.Errorf("Test:%v\nError:%v\nNV:%v\nB:%v", testnum, err, test.values.Addr(), b.Addr())
545 | //}
546 | }
547 | }
548 |
549 | func TestStrings(t *testing.T) {
550 | table := []struct {
551 | series Series
552 | expected string
553 | }{
554 | {
555 | Strings([]string{"A", "B", "C", "D"}),
556 | "[A B C D]",
557 | },
558 | {
559 | Strings([]string{"A"}),
560 | "[A]",
561 | },
562 | {
563 | Strings("A"),
564 | "[A]",
565 | },
566 | {
567 | Strings([]int{1, 2, 3}),
568 | "[1 2 3]",
569 | },
570 | {
571 | Strings([]int{2}),
572 | "[2]",
573 | },
574 | {
575 | Strings(-1),
576 | "[-1]",
577 | },
578 | {
579 | Strings([]float64{1, 2, 3}),
580 | "[1.000000 2.000000 3.000000]",
581 | },
582 | {
583 | Strings([]float64{2}),
584 | "[2.000000]",
585 | },
586 | {
587 | Strings(-1.0),
588 | "[-1.000000]",
589 | },
590 | {
591 | Strings(math.NaN()),
592 | "[NaN]",
593 | },
594 | {
595 | Strings(math.Inf(1)),
596 | "[+Inf]",
597 | },
598 | {
599 | Strings(math.Inf(-1)),
600 | "[-Inf]",
601 | },
602 | {
603 | Strings([]bool{true, true, false}),
604 | "[true true false]",
605 | },
606 | {
607 | Strings([]bool{false}),
608 | "[false]",
609 | },
610 | {
611 | Strings(true),
612 | "[true]",
613 | },
614 | {
615 | Strings([]int{}),
616 | "[]",
617 | },
618 | {
619 | Strings(nil),
620 | "[NaN]",
621 | },
622 | {
623 | Strings(Strings([]string{"A", "B", "C"})),
624 | "[A B C]",
625 | },
626 | }
627 | for testnum, test := range table {
628 | if err := test.series.Err; err != nil {
629 | t.Errorf("Test:%v\nError:%v", testnum, err)
630 | }
631 | expected := test.expected
632 | received := fmt.Sprint(test.series)
633 | if expected != received {
634 | t.Errorf(
635 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
636 | testnum, expected, received,
637 | )
638 | }
639 | if err := checkTypes(test.series); err != nil {
640 | t.Errorf("Test:%v\nError:%v", testnum, err)
641 | }
642 | }
643 | }
644 |
645 | func TestInts(t *testing.T) {
646 | table := []struct {
647 | series Series
648 | expected string
649 | }{
650 | {
651 | Ints([]string{"A", "B", "1", "2"}),
652 | "[NaN NaN 1 2]",
653 | },
654 | {
655 | Ints([]string{"1"}),
656 | "[1]",
657 | },
658 | {
659 | Ints("2"),
660 | "[2]",
661 | },
662 | {
663 | Ints([]int{1, 2, 3}),
664 | "[1 2 3]",
665 | },
666 | {
667 | Ints([]int{2}),
668 | "[2]",
669 | },
670 | {
671 | Ints(-1),
672 | "[-1]",
673 | },
674 | {
675 | Ints([]float64{1, 2, 3}),
676 | "[1 2 3]",
677 | },
678 | {
679 | Ints([]float64{2}),
680 | "[2]",
681 | },
682 | {
683 | Ints(-1.0),
684 | "[-1]",
685 | },
686 | {
687 | Ints(math.NaN()),
688 | "[NaN]",
689 | },
690 | {
691 | Ints(math.Inf(1)),
692 | "[NaN]",
693 | },
694 | {
695 | Ints(math.Inf(-1)),
696 | "[NaN]",
697 | },
698 | {
699 | Ints([]bool{true, true, false}),
700 | "[1 1 0]",
701 | },
702 | {
703 | Ints([]bool{false}),
704 | "[0]",
705 | },
706 | {
707 | Ints(true),
708 | "[1]",
709 | },
710 | {
711 | Ints([]int{}),
712 | "[]",
713 | },
714 | {
715 | Ints(nil),
716 | "[NaN]",
717 | },
718 | {
719 | Ints(Strings([]string{"1", "2", "3"})),
720 | "[1 2 3]",
721 | },
722 | {
723 | Ints(Ints([]string{"1", "2", "3"})),
724 | "[1 2 3]",
725 | },
726 | }
727 | for testnum, test := range table {
728 | if err := test.series.Err; err != nil {
729 | t.Errorf("Test:%v\nError:%v", testnum, err)
730 | }
731 | expected := test.expected
732 | received := fmt.Sprint(test.series)
733 | if expected != received {
734 | t.Errorf(
735 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
736 | testnum, expected, received,
737 | )
738 | }
739 | if err := checkTypes(test.series); err != nil {
740 | t.Errorf("Test:%v\nError:%v", testnum, err)
741 | }
742 | }
743 | }
744 |
745 | func TestFloats(t *testing.T) {
746 | table := []struct {
747 | series Series
748 | expected string
749 | }{
750 | {
751 | Floats([]string{"A", "B", "1", "2"}),
752 | "[NaN NaN 1.000000 2.000000]",
753 | },
754 | {
755 | Floats([]string{"1"}),
756 | "[1.000000]",
757 | },
758 | {
759 | Floats("2.1"),
760 | "[2.100000]",
761 | },
762 | {
763 | Floats([]int{1, 2, 3}),
764 | "[1.000000 2.000000 3.000000]",
765 | },
766 | {
767 | Floats([]int{2}),
768 | "[2.000000]",
769 | },
770 | {
771 | Floats(-1),
772 | "[-1.000000]",
773 | },
774 | {
775 | Floats([]float64{1.1, 2, 3}),
776 | "[1.100000 2.000000 3.000000]",
777 | },
778 | {
779 | Floats([]float64{2}),
780 | "[2.000000]",
781 | },
782 | {
783 | Floats(-1.0),
784 | "[-1.000000]",
785 | },
786 | {
787 | Floats(math.NaN()),
788 | "[NaN]",
789 | },
790 | {
791 | Floats(math.Inf(1)),
792 | "[+Inf]",
793 | },
794 | {
795 | Floats(math.Inf(-1)),
796 | "[-Inf]",
797 | },
798 | {
799 | Floats([]bool{true, true, false}),
800 | "[1.000000 1.000000 0.000000]",
801 | },
802 | {
803 | Floats([]bool{false}),
804 | "[0.000000]",
805 | },
806 | {
807 | Floats(true),
808 | "[1.000000]",
809 | },
810 | {
811 | Floats([]int{}),
812 | "[]",
813 | },
814 | {
815 | Floats(nil),
816 | "[NaN]",
817 | },
818 | {
819 | Floats(Strings([]string{"1", "2", "3"})),
820 | "[1.000000 2.000000 3.000000]",
821 | },
822 | }
823 | for testnum, test := range table {
824 | if err := test.series.Err; err != nil {
825 | t.Errorf("Test:%v\nError:%v", testnum, err)
826 | }
827 | expected := test.expected
828 | received := fmt.Sprint(test.series)
829 | if expected != received {
830 | t.Errorf(
831 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
832 | testnum, expected, received,
833 | )
834 | }
835 | if err := checkTypes(test.series); err != nil {
836 | t.Errorf("Test:%v\nError:%v", testnum, err)
837 | }
838 | }
839 | }
840 |
841 | func TestBools(t *testing.T) {
842 | table := []struct {
843 | series Series
844 | expected string
845 | }{
846 | {
847 | Bools([]string{"A", "true", "1", "f"}),
848 | "[NaN true true false]",
849 | },
850 | {
851 | Bools([]string{"t"}),
852 | "[true]",
853 | },
854 | {
855 | Bools("False"),
856 | "[false]",
857 | },
858 | {
859 | Bools([]int{1, 2, 0}),
860 | "[true NaN false]",
861 | },
862 | {
863 | Bools([]int{1}),
864 | "[true]",
865 | },
866 | {
867 | Bools(-1),
868 | "[NaN]",
869 | },
870 | {
871 | Bools([]float64{1, 2, 0}),
872 | "[true NaN false]",
873 | },
874 | {
875 | Bools([]float64{0}),
876 | "[false]",
877 | },
878 | {
879 | Bools(-1.0),
880 | "[NaN]",
881 | },
882 | {
883 | Bools(math.NaN()),
884 | "[NaN]",
885 | },
886 | {
887 | Bools(math.Inf(1)),
888 | "[NaN]",
889 | },
890 | {
891 | Bools(math.Inf(-1)),
892 | "[NaN]",
893 | },
894 | {
895 | Bools([]bool{true, true, false}),
896 | "[true true false]",
897 | },
898 | {
899 | Bools([]bool{false}),
900 | "[false]",
901 | },
902 | {
903 | Bools(true),
904 | "[true]",
905 | },
906 | {
907 | Bools([]int{}),
908 | "[]",
909 | },
910 | {
911 | Bools(nil),
912 | "[NaN]",
913 | },
914 | {
915 | Bools(Strings([]string{"1", "0", "1"})),
916 | "[true false true]",
917 | },
918 | }
919 | for testnum, test := range table {
920 | if err := test.series.Err; err != nil {
921 | t.Errorf("Test:%v\nError:%v", testnum, err)
922 | }
923 | expected := test.expected
924 | received := fmt.Sprint(test.series)
925 | if expected != received {
926 | t.Errorf(
927 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
928 | testnum, expected, received,
929 | )
930 | }
931 | if err := checkTypes(test.series); err != nil {
932 | t.Errorf("Test:%v\nError:%v", testnum, err)
933 | }
934 | }
935 | }
936 |
937 | func TestSeries_Copy(t *testing.T) {
938 | tests := []Series{
939 | Strings([]string{"1", "2", "3", "a", "b", "c"}),
940 | Ints([]string{"1", "2", "3", "a", "b", "c"}),
941 | Floats([]string{"1", "2", "3", "a", "b", "c"}),
942 | Bools([]string{"1", "0", "1", "t", "f", "c"}),
943 | }
944 | for testnum, test := range tests {
945 | a := test
946 | b := a.Copy()
947 | if fmt.Sprint(a) != fmt.Sprint(b) {
948 | t.Error("Different values when copying String elements")
949 | }
950 | if err := b.Err; err != nil {
951 | t.Errorf("Test:%v\nError:%v", testnum, err)
952 | }
953 | if err := checkTypes(b); err != nil {
954 | t.Errorf("Test:%v\nError:%v", testnum, err)
955 | }
956 | //if err := checkAddr(a.Addr(), b.Addr()); err != nil {
957 | //t.Errorf("Test:%v\nError:%v\nA:%v\nB:%v", testnum, err, a.Addr(), b.Addr())
958 | //}
959 | }
960 | }
961 |
962 | func TestSeries_Records(t *testing.T) {
963 | tests := []struct {
964 | series Series
965 | expected []string
966 | }{
967 | {
968 | Strings([]string{"1", "2", "3", "a", "b", "c"}),
969 | []string{"1", "2", "3", "a", "b", "c"},
970 | },
971 | {
972 | Ints([]string{"1", "2", "3", "a", "b", "c"}),
973 | []string{"1", "2", "3", "NaN", "NaN", "NaN"},
974 | },
975 | {
976 | Floats([]string{"1", "2", "3", "a", "b", "c"}),
977 | []string{"1.000000", "2.000000", "3.000000", "NaN", "NaN", "NaN"},
978 | },
979 | {
980 | Bools([]string{"1", "0", "1", "t", "f", "c"}),
981 | []string{"true", "false", "true", "true", "false", "NaN"},
982 | },
983 | }
984 | for testnum, test := range tests {
985 | expected := test.expected
986 | received := test.series.Records()
987 | if !reflect.DeepEqual(expected, received) {
988 | t.Errorf(
989 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
990 | testnum, expected, received,
991 | )
992 | }
993 | }
994 | }
995 |
996 | func TestSeries_Float(t *testing.T) {
997 | precision := 0.0000001
998 | floatEquals := func(x, y []float64) bool {
999 | if len(x) != len(y) {
1000 | return false
1001 | }
1002 | for i := 0; i < len(x); i++ {
1003 | a := x[i]
1004 | b := y[i]
1005 | if (a-b) > precision || (b-a) > precision {
1006 | return false
1007 | }
1008 | }
1009 | return true
1010 | }
1011 | tests := []struct {
1012 | series Series
1013 | expected []float64
1014 | }{
1015 | {
1016 | Strings([]string{"1", "2", "3", "a", "b", "c"}),
1017 | []float64{1, 2, 3, math.NaN(), math.NaN(), math.NaN()},
1018 | },
1019 | {
1020 | Ints([]string{"1", "2", "3", "a", "b", "c"}),
1021 | []float64{1, 2, 3, math.NaN(), math.NaN(), math.NaN()},
1022 | },
1023 | {
1024 | Floats([]string{"1", "2", "3", "a", "b", "c"}),
1025 | []float64{1, 2, 3, math.NaN(), math.NaN(), math.NaN()},
1026 | },
1027 | {
1028 | Bools([]string{"1", "0", "1", "t", "f", "c"}),
1029 | []float64{1, 0, 1, 1, 0, math.NaN()},
1030 | },
1031 | }
1032 | for testnum, test := range tests {
1033 | expected := test.expected
1034 | received := test.series.Float()
1035 | if !floatEquals(expected, received) {
1036 | t.Errorf(
1037 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1038 | testnum, expected, received,
1039 | )
1040 | }
1041 | }
1042 | }
1043 |
1044 | func TestSeries_Concat(t *testing.T) {
1045 | tests := []struct {
1046 | a Series
1047 | b Series
1048 | expected []string
1049 | }{
1050 | {
1051 | Strings([]string{"1", "2", "3"}),
1052 | Strings([]string{"a", "b", "c"}),
1053 | []string{"1", "2", "3", "a", "b", "c"},
1054 | },
1055 | {
1056 | Ints([]string{"1", "2", "3"}),
1057 | Ints([]string{"a", "4", "c"}),
1058 | []string{"1", "2", "3", "NaN", "4", "NaN"},
1059 | },
1060 | {
1061 | Floats([]string{"1", "2", "3"}),
1062 | Floats([]string{"a", "4", "c"}),
1063 | []string{"1.000000", "2.000000", "3.000000", "NaN", "4.000000", "NaN"},
1064 | },
1065 | {
1066 | Bools([]string{"1", "1", "0"}),
1067 | Bools([]string{"0", "0", "0"}),
1068 | []string{"true", "true", "false", "false", "false", "false"},
1069 | },
1070 | }
1071 | for testnum, test := range tests {
1072 | ab := test.a.Concat(test.b)
1073 | if err := ab.Err; err != nil {
1074 | t.Errorf("Test:%v\nError:%v", testnum, err)
1075 | }
1076 | received := ab.Records()
1077 | expected := test.expected
1078 | if !reflect.DeepEqual(expected, received) {
1079 | t.Errorf(
1080 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1081 | testnum, expected, received,
1082 | )
1083 | }
1084 | //a := test.a
1085 | //b := ab
1086 | //if err := checkAddr(a.Addr(), b.Addr()); err != nil {
1087 | //t.Errorf("Test:%v\nError:%v\nA:%v\nAB:%v", testnum, err, a.Addr(), b.Addr())
1088 | //}
1089 | //a = test.b
1090 | //b = ab
1091 | //if err := checkAddr(a.Addr(), b.Addr()); err != nil {
1092 | //t.Errorf("Test:%v\nError:%v\nB:%v\nAB:%v", testnum, err, a.Addr(), b.Addr())
1093 | //}
1094 | }
1095 | }
1096 |
1097 | func TestSeries_Order(t *testing.T) {
1098 | tests := []struct {
1099 | series Series
1100 | reverse bool
1101 | expected []int
1102 | }{
1103 | {
1104 | Ints([]string{"2", "1", "3", "NaN", "4", "NaN"}),
1105 | false,
1106 | []int{1, 0, 2, 4, 3, 5},
1107 | },
1108 | {
1109 | Floats([]string{"2", "1", "3", "NaN", "4", "NaN"}),
1110 | false,
1111 | []int{1, 0, 2, 4, 3, 5},
1112 | },
1113 | {
1114 | Strings([]string{"c", "b", "a"}),
1115 | false,
1116 | []int{2, 1, 0},
1117 | },
1118 | {
1119 | Bools([]bool{true, false, false, false, true}),
1120 | false,
1121 | []int{1, 2, 3, 0, 4},
1122 | },
1123 | {
1124 | Ints([]string{"2", "1", "3", "NaN", "4", "NaN"}),
1125 | true,
1126 | []int{4, 2, 0, 1, 3, 5},
1127 | },
1128 | {
1129 | Floats([]string{"2", "1", "3", "NaN", "4", "NaN"}),
1130 | true,
1131 | []int{4, 2, 0, 1, 3, 5},
1132 | },
1133 | {
1134 | Strings([]string{"c", "b", "a"}),
1135 | true,
1136 | []int{0, 1, 2},
1137 | },
1138 | {
1139 | Bools([]bool{true, false, false, false, true}),
1140 | true,
1141 | []int{0, 4, 1, 2, 3},
1142 | },
1143 | }
1144 | for testnum, test := range tests {
1145 | received := test.series.Order(test.reverse)
1146 | expected := test.expected
1147 | if !reflect.DeepEqual(expected, received) {
1148 | t.Errorf(
1149 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1150 | testnum, expected, received,
1151 | )
1152 | }
1153 | }
1154 | }
1155 |
1156 | func TestSeries_IsNaN(t *testing.T) {
1157 | tests := []struct {
1158 | series Series
1159 | expected []bool
1160 | }{
1161 | {
1162 | Ints([]string{"2", "1", "3", "NaN", "4", "NaN"}),
1163 | []bool{false, false, false, true, false, true},
1164 | },
1165 | {
1166 | Floats([]string{"A", "1", "B", "3"}),
1167 | []bool{true, false, true, false},
1168 | },
1169 | }
1170 | for testnum, test := range tests {
1171 | received := test.series.IsNaN()
1172 | expected := test.expected
1173 | if !reflect.DeepEqual(expected, received) {
1174 | t.Errorf(
1175 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1176 | testnum, expected, received,
1177 | )
1178 | }
1179 | }
1180 | }
1181 |
1182 | func TestSeries_StdDev(t *testing.T) {
1183 | tests := []struct {
1184 | series Series
1185 | expected float64
1186 | }{
1187 | {
1188 | Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1189 | 3.02765,
1190 | },
1191 | {
1192 | Floats([]float64{1.0, 2.0, 3.0}),
1193 | 1.0,
1194 | },
1195 | {
1196 | Strings([]string{"A", "B", "C", "D"}),
1197 | math.NaN(),
1198 | },
1199 | {
1200 | Bools([]bool{true, true, false, true}),
1201 | 0.5,
1202 | },
1203 | {
1204 | Floats([]float64{}),
1205 | math.NaN(),
1206 | },
1207 | }
1208 |
1209 | for testnum, test := range tests {
1210 | received := test.series.StdDev()
1211 | expected := test.expected
1212 | if !compareFloats(received, expected, 6) {
1213 | t.Errorf(
1214 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1215 | testnum, expected, received,
1216 | )
1217 | }
1218 | }
1219 | }
1220 |
1221 | func TestSeries_Mean(t *testing.T) {
1222 | tests := []struct {
1223 | series Series
1224 | expected float64
1225 | }{
1226 | {
1227 | Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1228 | 5.5,
1229 | },
1230 | {
1231 | Floats([]float64{1.0, 2.0, 3.0}),
1232 | 2.0,
1233 | },
1234 | {
1235 | Strings([]string{"A", "B", "C", "D"}),
1236 | math.NaN(),
1237 | },
1238 | {
1239 | Bools([]bool{true, true, false, true}),
1240 | 0.75,
1241 | },
1242 | {
1243 | Floats([]float64{}),
1244 | math.NaN(),
1245 | },
1246 | }
1247 |
1248 | for testnum, test := range tests {
1249 | received := test.series.Mean()
1250 | expected := test.expected
1251 | if !compareFloats(received, expected, 6) {
1252 | t.Errorf(
1253 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1254 | testnum, expected, received,
1255 | )
1256 | }
1257 | }
1258 | }
1259 |
1260 | func TestSeries_Max(t *testing.T) {
1261 | tests := []struct {
1262 | series Series
1263 | expected float64
1264 | }{
1265 | {
1266 | Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1267 | 10,
1268 | },
1269 | {
1270 | Floats([]float64{1.0, 2.0, 3.0}),
1271 | 3.0,
1272 | },
1273 | {
1274 | Strings([]string{"A", "B", "C", "D"}),
1275 | math.NaN(),
1276 | },
1277 | {
1278 | Bools([]bool{true, true, false, true}),
1279 | 1.0,
1280 | },
1281 | {
1282 | Floats([]float64{}),
1283 | math.NaN(),
1284 | },
1285 | }
1286 |
1287 | for testnum, test := range tests {
1288 | received := test.series.Max()
1289 | expected := test.expected
1290 | if !compareFloats(received, expected, 6) {
1291 | t.Errorf(
1292 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1293 | testnum, expected, received,
1294 | )
1295 | }
1296 | }
1297 | }
1298 |
1299 | func TestSeries_Median(t *testing.T) {
1300 | tests := []struct {
1301 | series Series
1302 | expected float64
1303 | }{
1304 | {
1305 | // Extreme observations should not factor in.
1306 | Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100, 1000, 10000}),
1307 | 7,
1308 | },
1309 | {
1310 | // Change in order should influence result.
1311 | Ints([]int{1, 2, 3, 10, 100, 1000, 10000, 4, 5, 6, 7, 8, 9}),
1312 | 7,
1313 | },
1314 | {
1315 | Floats([]float64{20.2755, 4.98964, -20.2006, 1.19854, 1.89977,
1316 | 1.51178, -17.4687, 4.65567, -8.65952, 6.31649,
1317 | }),
1318 | 1.705775,
1319 | },
1320 | {
1321 | // Change in order should not influence result.
1322 | Floats([]float64{4.98964, -20.2006, 1.89977, 1.19854,
1323 | 1.51178, -17.4687, -8.65952, 20.2755, 4.65567, 6.31649,
1324 | }),
1325 | 1.705775,
1326 | },
1327 | {
1328 | Strings([]string{"A", "B", "C", "D"}),
1329 | math.NaN(),
1330 | },
1331 | {
1332 | Bools([]bool{true, true, false, true}),
1333 | math.NaN(),
1334 | },
1335 | {
1336 | Floats([]float64{}),
1337 | math.NaN(),
1338 | },
1339 | }
1340 |
1341 | for testnum, test := range tests {
1342 | received := test.series.Median()
1343 | expected := test.expected
1344 | if !compareFloats(received, expected, 6) {
1345 | t.Errorf(
1346 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1347 | testnum, expected, received,
1348 | )
1349 | }
1350 | }
1351 | }
1352 |
1353 | func TestSeries_Min(t *testing.T) {
1354 | tests := []struct {
1355 | series Series
1356 | expected float64
1357 | }{
1358 | {
1359 | Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1360 | 1.0,
1361 | },
1362 | {
1363 | Floats([]float64{1.0, 2.0, 3.0}),
1364 | 1.0,
1365 | },
1366 | {
1367 | Strings([]string{"A", "B", "C", "D"}),
1368 | math.NaN(),
1369 | },
1370 | {
1371 | Bools([]bool{true, true, false, true}),
1372 | 0.0,
1373 | },
1374 | {
1375 | Floats([]float64{}),
1376 | math.NaN(),
1377 | },
1378 | }
1379 |
1380 | for testnum, test := range tests {
1381 | received := test.series.Min()
1382 | expected := test.expected
1383 | if !compareFloats(received, expected, 6) {
1384 | t.Errorf(
1385 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1386 | testnum, expected, received,
1387 | )
1388 | }
1389 | }
1390 | }
1391 |
1392 | func TestSeries_MaxStr(t *testing.T) {
1393 | tests := []struct {
1394 | series Series
1395 | expected string
1396 | }{
1397 | {
1398 | Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1399 | "",
1400 | },
1401 | {
1402 | Floats([]float64{1.0, 2.0, 3.0}),
1403 | "",
1404 | },
1405 | {
1406 | Strings([]string{"A", "B", "C", "D"}),
1407 | "D",
1408 | },
1409 | {
1410 | Strings([]string{"quick", "Brown", "fox", "Lazy", "dog"}),
1411 | "quick",
1412 | },
1413 | {
1414 | Bools([]bool{true, true, false, true}),
1415 | "",
1416 | },
1417 | {
1418 | Floats([]float64{}),
1419 | "",
1420 | },
1421 | }
1422 |
1423 | for testnum, test := range tests {
1424 | received := test.series.MaxStr()
1425 | expected := test.expected
1426 | if received != expected {
1427 | t.Errorf(
1428 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1429 | testnum, expected, received,
1430 | )
1431 | }
1432 | }
1433 | }
1434 |
1435 | func TestSeries_MinStr(t *testing.T) {
1436 | tests := []struct {
1437 | series Series
1438 | expected string
1439 | }{
1440 | {
1441 | Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1442 | "",
1443 | },
1444 | {
1445 | Floats([]float64{1.0, 2.0, 3.0}),
1446 | "",
1447 | },
1448 | {
1449 | Strings([]string{"A", "B", "C", "D"}),
1450 | "A",
1451 | },
1452 | {
1453 | Strings([]string{"quick", "Brown", "fox", "Lazy", "dog"}),
1454 | "Brown",
1455 | },
1456 | {
1457 | Bools([]bool{true, true, false, true}),
1458 | "",
1459 | },
1460 | {
1461 | Floats([]float64{}),
1462 | "",
1463 | },
1464 | }
1465 |
1466 | for testnum, test := range tests {
1467 | received := test.series.MinStr()
1468 | expected := test.expected
1469 | if received != expected {
1470 | t.Errorf(
1471 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1472 | testnum, expected, received,
1473 | )
1474 | }
1475 | }
1476 | }
1477 |
1478 | func TestSeries_Quantile(t *testing.T) {
1479 | tests := []struct {
1480 | series Series
1481 | p float64
1482 | expected float64
1483 | }{
1484 | {
1485 | Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
1486 | 0.9,
1487 | 9,
1488 | },
1489 | {
1490 | Floats([]float64{3.141592, math.Sqrt(3), 2.718281, math.Sqrt(2)}),
1491 | 0.8,
1492 | 3.141592,
1493 | },
1494 | {
1495 | Floats([]float64{1.0, 2.0, 3.0}),
1496 | 0.5,
1497 | 2.0,
1498 | },
1499 | {
1500 | Strings([]string{"A", "B", "C", "D"}),
1501 | 0.25,
1502 | math.NaN(),
1503 | },
1504 | {
1505 | Bools([]bool{false, false, false, true}),
1506 | 0.75,
1507 | 0.0,
1508 | },
1509 | {
1510 | Floats([]float64{}),
1511 | 0.50,
1512 | math.NaN(),
1513 | },
1514 | }
1515 |
1516 | for testnum, test := range tests {
1517 | received := test.series.Quantile(test.p)
1518 | expected := test.expected
1519 | if !compareFloats(received, expected, 6) {
1520 | t.Errorf(
1521 | "Test:%v\nExpected:\n%v\nReceived:\n%v",
1522 | testnum, expected, received,
1523 | )
1524 | }
1525 | }
1526 | }
1527 |
1528 |
1529 | func TestSeries_Map(t *testing.T) {
1530 | tests := []struct {
1531 | series Series
1532 | expected Series
1533 | }{
1534 | {
1535 | Bools([]bool{false, true, false, false, true}),
1536 | Bools([]bool{false, true, false, false, true}),
1537 | },
1538 | {
1539 | Floats([]float64{1.5, -3.23, -0.337397, -0.380079, 1.60979, 34.}),
1540 | Floats([]float64{3, -6.46, -0.674794, -0.760158, 3.21958, 68.}),
1541 | },
1542 | {
1543 | Floats([]float64{math.Pi, math.Phi, math.SqrtE, math.Cbrt(64)}),
1544 | Floats([]float64{2 * math.Pi, 2 * math.Phi, 2 * math.SqrtE, 2 * math.Cbrt(64)}),
1545 | },
1546 | {
1547 | Strings([]string{"XyZApple", "XyZBanana", "XyZCitrus", "XyZDragonfruit"}),
1548 | Strings([]string{"Apple", "Banana", "Citrus", "Dragonfruit"}),
1549 | },
1550 | {
1551 | Strings([]string{"San Francisco", "XyZTokyo", "MoscowXyZ", "XyzSydney"}),
1552 | Strings([]string{"San Francisco", "Tokyo", "MoscowXyZ", "XyzSydney"}),
1553 | },
1554 | {
1555 | Ints([]int{23, 13, 101, -64, -3}),
1556 | Ints([]int{28, 18, 106, -59, 2}),
1557 | },
1558 | {
1559 | Ints([]string{"morning", "noon", "afternoon", "evening", "night"}),
1560 | Ints([]int{5, 5, 5, 5, 5}),
1561 | },
1562 | }
1563 |
1564 | doubleFloat64 := func(e Element) Element {
1565 | var result Element
1566 | result = e.Copy()
1567 | result.Set(result.Float() * 2)
1568 | return Element(result)
1569 | }
1570 |
1571 | // and two booleans
1572 | and := func(e Element) Element {
1573 | var result Element
1574 | result = e.Copy()
1575 | b, err := result.Bool()
1576 | if err != nil {
1577 | t.Errorf("%v", err)
1578 | return Element(nil)
1579 | }
1580 | result.Set(b && true)
1581 | return Element(result)
1582 | }
1583 |
1584 | // add constant (+5) to value (v)
1585 | add5Int := func(e Element) Element {
1586 | var result Element
1587 | result = e.Copy()
1588 | i, err := result.Int()
1589 | if err != nil {
1590 | return Element(&intElement{
1591 | e: +5,
1592 | nan: false,
1593 | })
1594 | }
1595 | result.Set(i + 5)
1596 | return Element(result)
1597 | }
1598 |
1599 | // trim (XyZ) prefix from string
1600 | trimXyZPrefix := func(e Element) Element {
1601 | var result Element
1602 | result = e.Copy()
1603 | result.Set(strings.TrimPrefix(result.String(), "XyZ"))
1604 | return Element(result)
1605 | }
1606 |
1607 | for testnum, test := range tests {
1608 | switch test.series.Type() {
1609 | case Bool:
1610 | expected := test.expected
1611 | received := test.series.Map(and)
1612 | for i := 0 ; i maxRows {
140 | shortening = true
141 | df = df.Subset(idx)
142 | records = df.Records()
143 | } else {
144 | records = df.Records()
145 | }
146 |
147 | if showDims {
148 | str += fmt.Sprintf("[%dx%d] %s\n\n", nrows, ncols, class)
149 | }
150 |
151 | // Add the row numbers
152 | for i := 0; i < df.nrows+1; i++ {
153 | add := ""
154 | if i != 0 {
155 | add = strconv.Itoa(i-1) + ":"
156 | }
157 | records[i] = append([]string{add}, records[i]...)
158 | }
159 | if shortening {
160 | dots := make([]string, ncols+1)
161 | for i := 1; i < ncols+1; i++ {
162 | dots[i] = "..."
163 | }
164 | records = append(records, dots)
165 | }
166 | types := df.Types()
167 | typesrow := make([]string, ncols)
168 | for i := 0; i < ncols; i++ {
169 | typesrow[i] = fmt.Sprintf("<%v>", types[i])
170 | }
171 | typesrow = append([]string{""}, typesrow...)
172 |
173 | if showTypes {
174 | records = append(records, typesrow)
175 | }
176 |
177 | maxChars := make([]int, df.ncols+1)
178 | for i := 0; i < len(records); i++ {
179 | for j := 0; j < df.ncols+1; j++ {
180 | // Escape special characters
181 | records[i][j] = strconv.Quote(records[i][j])
182 | records[i][j] = records[i][j][1 : len(records[i][j])-1]
183 |
184 | // Detect maximum number of characters per column
185 | if len(records[i][j]) > maxChars[j] {
186 | maxChars[j] = utf8.RuneCountInString(records[i][j])
187 | }
188 | }
189 | }
190 | maxCols := len(records[0])
191 | var notShowing []string
192 | if shortCols {
193 | maxCharsCum := 0
194 | for colnum, m := range maxChars {
195 | maxCharsCum += m
196 | if maxCharsCum > maxCharsTotal {
197 | maxCols = colnum
198 | break
199 | }
200 | }
201 | notShowingNames := records[0][maxCols:]
202 | notShowingTypes := typesrow[maxCols:]
203 | notShowing = make([]string, len(notShowingNames))
204 | for i := 0; i < len(notShowingNames); i++ {
205 | notShowing[i] = fmt.Sprintf("%s %s", notShowingNames[i], notShowingTypes[i])
206 | }
207 | }
208 | for i := 0; i < len(records); i++ {
209 | // Add right padding to all elements
210 | records[i][0] = addLeftPadding(records[i][0], maxChars[0]+1)
211 | for j := 1; j < df.ncols+1; j++ {
212 | records[i][j] = addRightPadding(records[i][j], maxChars[j])
213 | }
214 | records[i] = records[i][0:maxCols]
215 | if shortCols && len(notShowing) != 0 {
216 | records[i] = append(records[i], "...")
217 | }
218 | // Create the final string
219 | str += strings.Join(records[i], " ")
220 | str += "\n"
221 | }
222 | if shortCols && len(notShowing) != 0 {
223 | var notShown string
224 | var notShownArr [][]string
225 | cum := 0
226 | i := 0
227 | for n, ns := range notShowing {
228 | cum += len(ns)
229 | if cum > maxCharsTotal {
230 | notShownArr = append(notShownArr, notShowing[i:n])
231 | cum = 0
232 | i = n
233 | }
234 | }
235 | if i < len(notShowing) {
236 | notShownArr = append(notShownArr, notShowing[i:len(notShowing)])
237 | }
238 | for k, ns := range notShownArr {
239 | notShown += strings.Join(ns, ", ")
240 | if k != len(notShownArr)-1 {
241 | notShown += ","
242 | }
243 | notShown += "\n"
244 | }
245 | str += fmt.Sprintf("\nNot Showing: %s", notShown)
246 | }
247 | return str
248 | }
249 |
250 | // Subsetting, mutating and transforming DataFrame methods
251 | // =======================================================
252 |
253 | // Set will update the values of a DataFrame for all rows selected via indexes.
254 | func (df DataFrame) Set(indexes series.Indexes, newvalues DataFrame) DataFrame {
255 | if df.Err != nil {
256 | return df
257 | }
258 | if newvalues.Err != nil {
259 | return DataFrame{Err: fmt.Errorf("argument has errors: %v", newvalues.Err)}
260 | }
261 | if df.ncols != newvalues.ncols {
262 | return DataFrame{Err: fmt.Errorf("different number of columns")}
263 | }
264 | columns := make([]series.Series, df.ncols)
265 | for i, s := range df.columns {
266 | columns[i] = s.Set(indexes, newvalues.columns[i])
267 | if columns[i].Err != nil {
268 | df = DataFrame{Err: fmt.Errorf("setting error on column %d: %v", i, columns[i].Err)}
269 | return df
270 | }
271 | }
272 | return df
273 | }
274 |
275 | // Subset returns a subset of the rows of the original DataFrame based on the
276 | // Series subsetting indexes.
277 | func (df DataFrame) Subset(indexes series.Indexes) DataFrame {
278 | if df.Err != nil {
279 | return df
280 | }
281 | columns := make([]series.Series, df.ncols)
282 | for i, column := range df.columns {
283 | s := column.Subset(indexes)
284 | columns[i] = s
285 | }
286 | nrows, ncols, err := checkColumnsDimensions(columns...)
287 | if err != nil {
288 | return DataFrame{Err: err}
289 | }
290 | return DataFrame{
291 | columns: columns,
292 | ncols: ncols,
293 | nrows: nrows,
294 | }
295 | }
296 |
297 | // SelectIndexes are the supported indexes used for the DataFrame.Select method. Currently supported are:
298 | //
299 | // int // Matches the given index number
300 | // []int // Matches all given index numbers
301 | // []bool // Matches all columns marked as true
302 | // string // Matches the column with the matching column name
303 | // []string // Matches all columns with the matching column names
304 | // Series [Int] // Same as []int
305 | // Series [Bool] // Same as []bool
306 | // Series [String] // Same as []string
307 | type SelectIndexes interface{}
308 |
309 | // Select the given DataFrame columns
310 | func (df DataFrame) Select(indexes SelectIndexes) DataFrame {
311 | if df.Err != nil {
312 | return df
313 | }
314 | idx, err := parseSelectIndexes(df.ncols, indexes, df.Names())
315 | if err != nil {
316 | return DataFrame{Err: fmt.Errorf("can't select columns: %v", err)}
317 | }
318 | columns := make([]series.Series, len(idx))
319 | for k, i := range idx {
320 | if i < 0 || i >= df.ncols {
321 | return DataFrame{Err: fmt.Errorf("can't select columns: index out of range")}
322 | }
323 | columns[k] = df.columns[i].Copy()
324 | }
325 | nrows, ncols, err := checkColumnsDimensions(columns...)
326 | if err != nil {
327 | return DataFrame{Err: err}
328 | }
329 | df = DataFrame{
330 | columns: columns,
331 | ncols: ncols,
332 | nrows: nrows,
333 | }
334 | colnames := df.Names()
335 | fixColnames(colnames)
336 | for i, colname := range colnames {
337 | df.columns[i].Name = colname
338 | }
339 | return df
340 | }
341 |
342 | // Drop the given DataFrame columns
343 | func (df DataFrame) Drop(indexes SelectIndexes) DataFrame {
344 | if df.Err != nil {
345 | return df
346 | }
347 | idx, err := parseSelectIndexes(df.ncols, indexes, df.Names())
348 | if err != nil {
349 | return DataFrame{Err: fmt.Errorf("can't select columns: %v", err)}
350 | }
351 | var columns []series.Series
352 | for k, col := range df.columns {
353 | if !inIntSlice(k, idx) {
354 | columns = append(columns, col.Copy())
355 | }
356 | }
357 | nrows, ncols, err := checkColumnsDimensions(columns...)
358 | if err != nil {
359 | return DataFrame{Err: err}
360 | }
361 | df = DataFrame{
362 | columns: columns,
363 | ncols: ncols,
364 | nrows: nrows,
365 | }
366 | colnames := df.Names()
367 | fixColnames(colnames)
368 | for i, colname := range colnames {
369 | df.columns[i].Name = colname
370 | }
371 | return df
372 | }
373 |
374 | // Rename changes the name of one of the columns of a DataFrame
375 | func (df DataFrame) Rename(newname, oldname string) DataFrame {
376 | if df.Err != nil {
377 | return df
378 | }
379 | // Check that colname exist on dataframe
380 | colnames := df.Names()
381 | idx := findInStringSlice(oldname, colnames)
382 | if idx == -1 {
383 | return DataFrame{Err: fmt.Errorf("rename: can't find column name")}
384 | }
385 |
386 | copy := df.Copy()
387 | copy.columns[idx].Name = newname
388 | return copy
389 | }
390 |
391 | // CBind combines the columns of this DataFrame and dfb DataFrame.
392 | func (df DataFrame) CBind(dfb DataFrame) DataFrame {
393 | if df.Err != nil {
394 | return df
395 | }
396 | if dfb.Err != nil {
397 | return dfb
398 | }
399 | cols := append(df.columns, dfb.columns...)
400 | return New(cols...)
401 | }
402 |
403 | // RBind matches the column names of two DataFrames and returns combined
404 | // rows from both of them.
405 | func (df DataFrame) RBind(dfb DataFrame) DataFrame {
406 | if df.Err != nil {
407 | return df
408 | }
409 | if dfb.Err != nil {
410 | return dfb
411 | }
412 | expandedSeries := make([]series.Series, df.ncols)
413 | for k, v := range df.Names() {
414 | idx := findInStringSlice(v, dfb.Names())
415 | if idx == -1 {
416 | return DataFrame{Err: fmt.Errorf("rbind: column names are not compatible")}
417 | }
418 |
419 | originalSeries := df.columns[k]
420 | addedSeries := dfb.columns[idx]
421 | newSeries := originalSeries.Concat(addedSeries)
422 | if err := newSeries.Err; err != nil {
423 | return DataFrame{Err: fmt.Errorf("rbind: %v", err)}
424 | }
425 | expandedSeries[k] = newSeries
426 | }
427 | return New(expandedSeries...)
428 | }
429 |
430 | // Mutate changes a column of the DataFrame with the given Series or adds it as
431 | // a new column if the column name does not exist.
432 | func (df DataFrame) Mutate(s series.Series) DataFrame {
433 | if df.Err != nil {
434 | return df
435 | }
436 | if s.Len() != df.nrows {
437 | return DataFrame{Err: fmt.Errorf("mutate: wrong dimensions")}
438 | }
439 | df = df.Copy()
440 | // Check that colname exist on dataframe
441 | columns := df.columns
442 | if idx := findInStringSlice(s.Name, df.Names()); idx != -1 {
443 | columns[idx] = s
444 | } else {
445 | columns = append(columns, s)
446 | }
447 | nrows, ncols, err := checkColumnsDimensions(columns...)
448 | if err != nil {
449 | return DataFrame{Err: err}
450 | }
451 | df = DataFrame{
452 | columns: columns,
453 | ncols: ncols,
454 | nrows: nrows,
455 | }
456 | colnames := df.Names()
457 | fixColnames(colnames)
458 | for i, colname := range colnames {
459 | df.columns[i].Name = colname
460 | }
461 | return df
462 | }
463 |
464 | // F is the filtering structure
465 | type F struct {
466 | Colname string
467 | Comparator series.Comparator
468 | Comparando interface{}
469 | }
470 |
471 | // Filter will filter the rows of a DataFrame based on the given filters. All
472 | // filters on the argument of a Filter call are aggregated as an OR operation
473 | // whereas if we chain Filter calls, every filter will act as an AND operation
474 | // with regards to the rest.
475 | func (df DataFrame) Filter(filters ...F) DataFrame {
476 | if df.Err != nil {
477 | return df
478 | }
479 | compResults := make([]series.Series, len(filters))
480 | for i, f := range filters {
481 | idx := findInStringSlice(f.Colname, df.Names())
482 | if idx < 0 {
483 | return DataFrame{Err: fmt.Errorf("filter: can't find column name")}
484 | }
485 | res := df.columns[idx].Compare(f.Comparator, f.Comparando)
486 | if err := res.Err; err != nil {
487 | return DataFrame{Err: fmt.Errorf("filter: %v", err)}
488 | }
489 | compResults[i] = res
490 | }
491 | // Join compResults via "OR"
492 | if len(compResults) == 0 {
493 | return df.Copy()
494 | }
495 | res, err := compResults[0].Bool()
496 | if err != nil {
497 | return DataFrame{Err: fmt.Errorf("filter: %v", err)}
498 | }
499 | for i := 1; i < len(compResults); i++ {
500 | nextRes, err := compResults[i].Bool()
501 | if err != nil {
502 | return DataFrame{Err: fmt.Errorf("filter: %v", err)}
503 | }
504 | for j := 0; j < len(res); j++ {
505 | res[j] = res[j] || nextRes[j]
506 | }
507 | }
508 | return df.Subset(res)
509 | }
510 |
511 | // Order is the ordering structure
512 | type Order struct {
513 | Colname string
514 | Reverse bool
515 | }
516 |
517 | // Sort return an ordering structure for regular column sorting sort.
518 | func Sort(colname string) Order {
519 | return Order{colname, false}
520 | }
521 |
522 | // RevSort return an ordering structure for reverse column sorting.
523 | func RevSort(colname string) Order {
524 | return Order{colname, true}
525 | }
526 |
527 | // Arrange sort the rows of a DataFrame according to the given Order
528 | func (df DataFrame) Arrange(order ...Order) DataFrame {
529 | if df.Err != nil {
530 | return df
531 | }
532 | if order == nil || len(order) == 0 {
533 | return DataFrame{Err: fmt.Errorf("rename: no arguments")}
534 | }
535 |
536 | // Check that all colnames exist before starting to sort
537 | for i := 0; i < len(order); i++ {
538 | colname := order[i].Colname
539 | if df.colIndex(colname) == -1 {
540 | return DataFrame{Err: fmt.Errorf("colname %s doesn't exist", colname)}
541 | }
542 | }
543 |
544 | // Initialize the index that will be used to store temporary and final order
545 | // results.
546 | origIdx := make([]int, df.nrows)
547 | for i := 0; i < df.nrows; i++ {
548 | origIdx[i] = i
549 | }
550 |
551 | swapOrigIdx := func(newidx []int) {
552 | newOrigIdx := make([]int, len(newidx))
553 | for k, i := range newidx {
554 | newOrigIdx[k] = origIdx[i]
555 | }
556 | origIdx = newOrigIdx
557 | }
558 |
559 | suborder := origIdx
560 | for i := len(order) - 1; i >= 0; i-- {
561 | colname := order[i].Colname
562 | idx := df.colIndex(colname)
563 | nextSeries := df.columns[idx].Subset(suborder)
564 | suborder = nextSeries.Order(order[i].Reverse)
565 | swapOrigIdx(suborder)
566 | }
567 | return df.Subset(origIdx)
568 | }
569 |
570 | // Capply applies the given function to the columns of a DataFrame
571 | func (df DataFrame) Capply(f func(series.Series) series.Series) DataFrame {
572 | if df.Err != nil {
573 | return df
574 | }
575 | columns := make([]series.Series, df.ncols)
576 | for i, s := range df.columns {
577 | applied := f(s)
578 | applied.Name = s.Name
579 | columns[i] = applied
580 | }
581 | return New(columns...)
582 | }
583 |
584 | // Rapply applies the given function to the rows of a DataFrame. Prior to applying
585 | // the function the elements of each row are cast to a Series of a specific
586 | // type. In order of priority: String -> Float -> Int -> Bool. This casting also
587 | // takes place after the function application to equalize the type of the columns.
588 | func (df DataFrame) Rapply(f func(series.Series) series.Series) DataFrame {
589 | if df.Err != nil {
590 | return df
591 | }
592 |
593 | detectType := func(types []series.Type) series.Type {
594 | var hasStrings, hasFloats, hasInts, hasBools bool
595 | for _, t := range types {
596 | switch t {
597 | case series.String:
598 | hasStrings = true
599 | case series.Float:
600 | hasFloats = true
601 | case series.Int:
602 | hasInts = true
603 | case series.Bool:
604 | hasBools = true
605 | }
606 | }
607 | switch {
608 | case hasStrings:
609 | return series.String
610 | case hasBools:
611 | return series.Bool
612 | case hasFloats:
613 | return series.Float
614 | case hasInts:
615 | return series.Int
616 | default:
617 | panic("type not supported")
618 | }
619 | }
620 |
621 | // Detect row type prior to function application
622 | types := df.Types()
623 | rowType := detectType(types)
624 |
625 | // Create Element matrix
626 | elements := make([][]series.Element, df.nrows)
627 | rowlen := -1
628 | for i := 0; i < df.nrows; i++ {
629 | row := series.New(nil, rowType, "").Empty()
630 | for _, col := range df.columns {
631 | row.Append(col.Elem(i))
632 | }
633 | row = f(row)
634 | if row.Err != nil {
635 | return DataFrame{Err: fmt.Errorf("error applying function on row %d: %v", i, row.Err)}
636 | }
637 |
638 | if rowlen != -1 && rowlen != row.Len() {
639 | return DataFrame{Err: fmt.Errorf("error applying function: rows have different lengths")}
640 | }
641 | rowlen = row.Len()
642 |
643 | rowElems := make([]series.Element, rowlen)
644 | for j := 0; j < rowlen; j++ {
645 | rowElems[j] = row.Elem(j)
646 | }
647 | elements[i] = rowElems
648 | }
649 |
650 | // Cast columns if necessary
651 | columns := make([]series.Series, rowlen)
652 | for j := 0; j < rowlen; j++ {
653 | types := make([]series.Type, df.nrows)
654 | for i := 0; i < df.nrows; i++ {
655 | types[i] = elements[i][j].Type()
656 | }
657 | colType := detectType(types)
658 | s := series.New(nil, colType, "").Empty()
659 | for i := 0; i < df.nrows; i++ {
660 | s.Append(elements[i][j])
661 | }
662 | columns[j] = s
663 | }
664 |
665 | nrows, ncols, err := checkColumnsDimensions(columns...)
666 | if err != nil {
667 | return DataFrame{Err: err}
668 | }
669 | df = DataFrame{
670 | columns: columns,
671 | ncols: ncols,
672 | nrows: nrows,
673 | }
674 | colnames := df.Names()
675 | fixColnames(colnames)
676 | for i, colname := range colnames {
677 | df.columns[i].Name = colname
678 | }
679 | return df
680 | }
681 |
682 | // Read/Write Methods
683 | // =================
684 |
685 | // LoadOption is the type used to configure the load of elements
686 | type LoadOption func(*loadOptions)
687 |
688 | type loadOptions struct {
689 | // Specifies which is the default type in case detectTypes is disabled.
690 | defaultType series.Type
691 |
692 | // If set, the type of each column will be automatically detected unless
693 | // otherwise specified.
694 | detectTypes bool
695 |
696 | // If set, the first row of the tabular structure will be used as column
697 | // names.
698 | hasHeader bool
699 |
700 | // The names to set as columns names.
701 | names []string
702 |
703 | // Defines which values are going to be considered as NaN when parsing from string.
704 | nanValues []string
705 |
706 | // Defines the csv delimiter
707 | delimiter rune
708 |
709 | // Defines the comment delimiter
710 | comment rune
711 |
712 | // The types of specific columns can be specified via column name.
713 | types map[string]series.Type
714 | }
715 |
716 | // DefaultType sets the defaultType option for loadOptions.
717 | func DefaultType(t series.Type) LoadOption {
718 | return func(c *loadOptions) {
719 | c.defaultType = t
720 | }
721 | }
722 |
723 | // DetectTypes sets the detectTypes option for loadOptions.
724 | func DetectTypes(b bool) LoadOption {
725 | return func(c *loadOptions) {
726 | c.detectTypes = b
727 | }
728 | }
729 |
730 | // HasHeader sets the hasHeader option for loadOptions.
731 | func HasHeader(b bool) LoadOption {
732 | return func(c *loadOptions) {
733 | c.hasHeader = b
734 | }
735 | }
736 |
737 | // Names sets the names option for loadOptions.
738 | func Names(names ...string) LoadOption {
739 | return func(c *loadOptions) {
740 | c.names = names
741 | }
742 | }
743 |
744 | // NaNValues sets the nanValues option for loadOptions.
745 | func NaNValues(nanValues []string) LoadOption {
746 | return func(c *loadOptions) {
747 | c.nanValues = nanValues
748 | }
749 | }
750 |
751 | // WithTypes sets the types option for loadOptions.
752 | func WithTypes(coltypes map[string]series.Type) LoadOption {
753 | return func(c *loadOptions) {
754 | c.types = coltypes
755 | }
756 | }
757 |
758 | // WithDelimiter sets the csv delimiter other than ',', for example '\t'
759 | func WithDelimiter(b rune) LoadOption {
760 | return func(c *loadOptions) {
761 | c.delimiter = b
762 | }
763 | }
764 |
765 | // WithComments sets the csv comment line detect to remove lines
766 | func WithComments(b rune) LoadOption {
767 | return func(c *loadOptions) {
768 | c.comment = b
769 | }
770 | }
771 |
772 | // LoadStructs creates a new DataFrame from arbitrary struct slices.
773 | //
774 | // LoadStructs will ignore unexported fields inside an struct. Note also that
775 | // unless otherwise specified the column names will correspond with the name of
776 | // the field.
777 | //
778 | // You can configure each field with the `dataframe:"name[,type]"` struct
779 | // tag. If the name on the tag is the empty string `""` the field name will be
780 | // used instead. If the name is `"-"` the field will be ignored.
781 | //
782 | // Examples:
783 | //
784 | // // field will be ignored
785 | // field int
786 | //
787 | // // Field will be ignored
788 | // Field int `dataframe:"-"`
789 | //
790 | // // Field will be parsed with column name Field and type int
791 | // Field int
792 | //
793 | // // Field will be parsed with column name `field_column` and type int.
794 | // Field int `dataframe:"field_column"`
795 | //
796 | // // Field will be parsed with column name `field` and type string.
797 | // Field int `dataframe:"field,string"`
798 | //
799 | // // Field will be parsed with column name `Field` and type string.
800 | // Field int `dataframe:",string"`
801 | //
802 | // If the struct tags and the given LoadOptions contradict each other, the later
803 | // will have preference over the former.
804 | func LoadStructs(i interface{}, options ...LoadOption) DataFrame {
805 | if i == nil {
806 | return DataFrame{Err: fmt.Errorf("load: can't create DataFrame from value")}
807 | }
808 |
809 | // Set the default load options
810 | cfg := loadOptions{
811 | defaultType: series.String,
812 | detectTypes: true,
813 | hasHeader: true,
814 | nanValues: []string{"NA", "NaN", ""},
815 | }
816 |
817 | // Set any custom load options
818 | for _, option := range options {
819 | option(&cfg)
820 | }
821 |
822 | tpy, val := reflect.TypeOf(i), reflect.ValueOf(i)
823 | switch tpy.Kind() {
824 | case reflect.Slice:
825 | if tpy.Elem().Kind() != reflect.Struct {
826 | return DataFrame{Err: fmt.Errorf(
827 | "load: type %s (%s %s) is not supported, must be []struct", tpy.Name(), tpy.Elem().Kind(), tpy.Kind())}
828 | }
829 | if val.Len() == 0 {
830 | return DataFrame{Err: fmt.Errorf("load: can't create DataFrame from empty slice")}
831 | }
832 |
833 | numFields := val.Index(0).Type().NumField()
834 | var columns []series.Series
835 | for j := 0; j < numFields; j++ {
836 | // Extract field metadata
837 | if !val.Index(0).Field(j).CanInterface() {
838 | continue
839 | }
840 | field := val.Index(0).Type().Field(j)
841 | fieldName := field.Name
842 | fieldType := field.Type.String()
843 |
844 | // Process struct tags
845 | fieldTags := field.Tag.Get("dataframe")
846 | if fieldTags == "-" {
847 | continue
848 | }
849 | tagOpts := strings.Split(fieldTags, ",")
850 | if len(tagOpts) > 2 {
851 | return DataFrame{Err: fmt.Errorf("malformed struct tag on field %s: %s", fieldName, fieldTags)}
852 | }
853 | if len(tagOpts) > 0 {
854 | if name := strings.TrimSpace(tagOpts[0]); name != "" {
855 | fieldName = name
856 | }
857 | if len(tagOpts) == 2 {
858 | if tagType := strings.TrimSpace(tagOpts[1]); tagType != "" {
859 | fieldType = tagType
860 | }
861 | }
862 | }
863 |
864 | // Handle `types` option
865 | var t series.Type
866 | if cfgtype, ok := cfg.types[fieldName]; ok {
867 | t = cfgtype
868 | } else {
869 | // Handle `detectTypes` option
870 | if cfg.detectTypes {
871 | // Parse field type
872 | parsedType, err := parseType(fieldType)
873 | if err != nil {
874 | return DataFrame{Err: err}
875 | }
876 | t = parsedType
877 | } else {
878 | t = cfg.defaultType
879 | }
880 | }
881 |
882 | // Create Series for this field
883 | elements := make([]interface{}, val.Len())
884 | for i := 0; i < val.Len(); i++ {
885 | fieldValue := val.Index(i).Field(j)
886 | elements[i] = fieldValue.Interface()
887 |
888 | // Handle `nanValues` option
889 | if findInStringSlice(fmt.Sprint(elements[i]), cfg.nanValues) != -1 {
890 | elements[i] = nil
891 | }
892 | }
893 |
894 | // Handle `hasHeader` option
895 | if !cfg.hasHeader {
896 | tmp := make([]interface{}, 1)
897 | tmp[0] = fieldName
898 | elements = append(tmp, elements...)
899 | fieldName = ""
900 | }
901 | columns = append(columns, series.New(elements, t, fieldName))
902 | }
903 | return New(columns...)
904 | }
905 | return DataFrame{Err: fmt.Errorf(
906 | "load: type %s (%s) is not supported, must be []struct", tpy.Name(), tpy.Kind())}
907 | }
908 |
909 | func parseType(s string) (series.Type, error) {
910 | switch s {
911 | case "float", "float64", "float32":
912 | return series.Float, nil
913 | case "int", "int64", "int32", "int16", "int8":
914 | return series.Int, nil
915 | case "string":
916 | return series.String, nil
917 | case "bool":
918 | return series.Bool, nil
919 | }
920 | return "", fmt.Errorf("type (%s) is not supported", s)
921 | }
922 |
923 | // LoadRecords creates a new DataFrame based on the given records.
924 | func LoadRecords(records [][]string, options ...LoadOption) DataFrame {
925 | // Set the default load options
926 | cfg := loadOptions{
927 | defaultType: series.String,
928 | detectTypes: true,
929 | hasHeader: true,
930 | nanValues: []string{"NA", "NaN", ""},
931 | }
932 |
933 | // Set any custom load options
934 | for _, option := range options {
935 | option(&cfg)
936 | }
937 |
938 | if len(records) == 0 {
939 | return DataFrame{Err: fmt.Errorf("load records: empty DataFrame")}
940 | }
941 | if cfg.hasHeader && len(records) <= 1 {
942 | return DataFrame{Err: fmt.Errorf("load records: empty DataFrame")}
943 | }
944 | if cfg.names != nil && len(cfg.names) != len(records[0]) {
945 | if len(cfg.names) > len(records[0]) {
946 | return DataFrame{Err: fmt.Errorf("load records: too many column names")}
947 | }
948 | return DataFrame{Err: fmt.Errorf("load records: not enough column names")}
949 | }
950 |
951 | // Extract headers
952 | headers := make([]string, len(records[0]))
953 | if cfg.hasHeader {
954 | headers = records[0]
955 | records = records[1:]
956 | }
957 | if cfg.names != nil {
958 | headers = cfg.names
959 | }
960 |
961 | types := make([]series.Type, len(headers))
962 | rawcols := make([][]string, len(headers))
963 | for i, colname := range headers {
964 | rawcol := make([]string, len(records))
965 | for j := 0; j < len(records); j++ {
966 | rawcol[j] = records[j][i]
967 | if findInStringSlice(rawcol[j], cfg.nanValues) != -1 {
968 | rawcol[j] = "NaN"
969 | }
970 | }
971 | rawcols[i] = rawcol
972 |
973 | t, ok := cfg.types[colname]
974 | if !ok {
975 | t = cfg.defaultType
976 | if cfg.detectTypes {
977 | if l, err := findType(rawcol); err == nil {
978 | t = l
979 | }
980 | }
981 | }
982 | types[i] = t
983 | }
984 |
985 | columns := make([]series.Series, len(headers))
986 | for i, colname := range headers {
987 | col := series.New(rawcols[i], types[i], colname)
988 | if col.Err != nil {
989 | return DataFrame{Err: col.Err}
990 | }
991 | columns[i] = col
992 | }
993 | nrows, ncols, err := checkColumnsDimensions(columns...)
994 | if err != nil {
995 | return DataFrame{Err: err}
996 | }
997 | df := DataFrame{
998 | columns: columns,
999 | ncols: ncols,
1000 | nrows: nrows,
1001 | }
1002 |
1003 | colnames := df.Names()
1004 | fixColnames(colnames)
1005 | for i, colname := range colnames {
1006 | df.columns[i].Name = colname
1007 | }
1008 | return df
1009 | }
1010 |
1011 | // LoadMaps creates a new DataFrame based on the given maps. This function assumes
1012 | // that every map on the array represents a row of observations.
1013 | func LoadMaps(maps []map[string]interface{}, options ...LoadOption) DataFrame {
1014 | if len(maps) == 0 {
1015 | return DataFrame{Err: fmt.Errorf("load maps: empty array")}
1016 | }
1017 | inStrSlice := func(i string, s []string) bool {
1018 | for _, v := range s {
1019 | if v == i {
1020 | return true
1021 | }
1022 | }
1023 | return false
1024 | }
1025 | // Detect all colnames
1026 | var colnames []string
1027 | for _, v := range maps {
1028 | for k := range v {
1029 | if exists := inStrSlice(k, colnames); !exists {
1030 | colnames = append(colnames, k)
1031 | }
1032 | }
1033 | }
1034 | sort.Strings(colnames)
1035 | records := make([][]string, len(maps)+1)
1036 | records[0] = colnames
1037 | for k, m := range maps {
1038 | row := make([]string, len(colnames))
1039 | for i, colname := range colnames {
1040 | element := ""
1041 | val, ok := m[colname]
1042 | if ok {
1043 | element = fmt.Sprint(val)
1044 | }
1045 | row[i] = element
1046 | }
1047 | records[k+1] = row
1048 | }
1049 | return LoadRecords(records, options...)
1050 | }
1051 |
1052 | // LoadMatrix loads the given Matrix as a DataFrame
1053 | // TODO: Add Loadoptions
1054 | func LoadMatrix(mat Matrix) DataFrame {
1055 | nrows, ncols := mat.Dims()
1056 | columns := make([]series.Series, ncols)
1057 | for i := 0; i < ncols; i++ {
1058 | floats := make([]float64, nrows)
1059 | for j := 0; j < nrows; j++ {
1060 | floats[j] = mat.At(j, i)
1061 | }
1062 | columns[i] = series.Floats(floats)
1063 | }
1064 | nrows, ncols, err := checkColumnsDimensions(columns...)
1065 | if err != nil {
1066 | return DataFrame{Err: err}
1067 | }
1068 | df := DataFrame{
1069 | columns: columns,
1070 | ncols: ncols,
1071 | nrows: nrows,
1072 | }
1073 | colnames := df.Names()
1074 | fixColnames(colnames)
1075 | for i, colname := range colnames {
1076 | df.columns[i].Name = colname
1077 | }
1078 | return df
1079 | }
1080 |
1081 | // ReadCSV reads a CSV file from a io.Reader and builds a DataFrame with the
1082 | // resulting records.
1083 | func ReadCSV(r io.Reader, options ...LoadOption) DataFrame {
1084 | csvReader := csv.NewReader(r)
1085 | cfg := loadOptions{
1086 | delimiter: ',',
1087 | }
1088 | for _, option := range options {
1089 | option(&cfg)
1090 | }
1091 | if cfg.delimiter != ',' {
1092 | csvReader.Comma = cfg.delimiter
1093 | }
1094 | if cfg.comment != 0 {
1095 | csvReader.Comment = cfg.comment
1096 | }
1097 |
1098 | records, err := csvReader.ReadAll()
1099 | if err != nil {
1100 | return DataFrame{Err: err}
1101 | }
1102 | return LoadRecords(records, options...)
1103 | }
1104 |
1105 | // ReadJSON reads a JSON array from a io.Reader and builds a DataFrame with the
1106 | // resulting records.
1107 | func ReadJSON(r io.Reader, options ...LoadOption) DataFrame {
1108 | var m []map[string]interface{}
1109 | err := json.NewDecoder(r).Decode(&m)
1110 | if err != nil {
1111 | return DataFrame{Err: err}
1112 | }
1113 | return LoadMaps(m, options...)
1114 | }
1115 |
1116 | // WriteOption is the type used to configure the writing of elements
1117 | type WriteOption func(*writeOptions)
1118 |
1119 | type writeOptions struct {
1120 | // Specifies whether the header is also written
1121 | writeHeader bool
1122 | }
1123 |
1124 | // WriteHeader sets the writeHeader option for writeOptions.
1125 | func WriteHeader(b bool) WriteOption {
1126 | return func(c *writeOptions) {
1127 | c.writeHeader = b
1128 | }
1129 | }
1130 |
1131 | // WriteCSV writes the DataFrame to the given io.Writer as a CSV file.
1132 | func (df DataFrame) WriteCSV(w io.Writer, options ...WriteOption) error {
1133 | if df.Err != nil {
1134 | return df.Err
1135 | }
1136 |
1137 | // Set the default write options
1138 | cfg := writeOptions{
1139 | writeHeader: true,
1140 | }
1141 |
1142 | // Set any custom write options
1143 | for _, option := range options {
1144 | option(&cfg)
1145 | }
1146 |
1147 | records := df.Records()
1148 | if !cfg.writeHeader {
1149 | records = records[1:]
1150 | }
1151 |
1152 | return csv.NewWriter(w).WriteAll(records)
1153 | }
1154 |
1155 | // WriteJSON writes the DataFrame to the given io.Writer as a JSON array.
1156 | func (df DataFrame) WriteJSON(w io.Writer) error {
1157 | if df.Err != nil {
1158 | return df.Err
1159 | }
1160 | return json.NewEncoder(w).Encode(df.Maps())
1161 | }
1162 |
1163 | // Getters/Setters for DataFrame fields
1164 | // ====================================
1165 |
1166 | // Names returns the name of the columns on a DataFrame.
1167 | func (df DataFrame) Names() []string {
1168 | colnames := make([]string, df.ncols)
1169 | for i, s := range df.columns {
1170 | colnames[i] = s.Name
1171 | }
1172 | return colnames
1173 | }
1174 |
1175 | // Types returns the types of the columns on a DataFrame.
1176 | func (df DataFrame) Types() []series.Type {
1177 | coltypes := make([]series.Type, df.ncols)
1178 | for i, s := range df.columns {
1179 | coltypes[i] = s.Type()
1180 | }
1181 | return coltypes
1182 | }
1183 |
1184 | // SetNames changes the column names of a DataFrame to the ones passed as an
1185 | // argument.
1186 | func (df DataFrame) SetNames(colnames ...string) error {
1187 | if df.Err != nil {
1188 | return df.Err
1189 | }
1190 | if len(colnames) != df.ncols {
1191 | return fmt.Errorf("setting names: wrong dimensions")
1192 | }
1193 | for k, s := range colnames {
1194 | df.columns[k].Name = s
1195 | }
1196 | return nil
1197 | }
1198 |
1199 | // Dims retrieves the dimensions of a DataFrame.
1200 | func (df DataFrame) Dims() (int, int) {
1201 | return df.Nrow(), df.Ncol()
1202 | }
1203 |
1204 | // Nrow returns the number of rows on a DataFrame.
1205 | func (df DataFrame) Nrow() int {
1206 | return df.nrows
1207 | }
1208 |
1209 | // Ncol returns the number of columns on a DataFrame.
1210 | func (df DataFrame) Ncol() int {
1211 | return df.ncols
1212 | }
1213 |
1214 | // Col returns a copy of the Series with the given column name contained in the DataFrame.
1215 | func (df DataFrame) Col(colname string) series.Series {
1216 | if df.Err != nil {
1217 | return series.Series{Err: df.Err}
1218 | }
1219 | // Check that colname exist on dataframe
1220 | idx := findInStringSlice(colname, df.Names())
1221 | if idx < 0 {
1222 | return series.Series{Err: fmt.Errorf("unknown column name")}
1223 | }
1224 | return df.columns[idx].Copy()
1225 | }
1226 |
1227 | // InnerJoin returns a DataFrame containing the inner join of two DataFrames.
1228 | func (df DataFrame) InnerJoin(b DataFrame, keys ...string) DataFrame {
1229 | if len(keys) == 0 {
1230 | return DataFrame{Err: fmt.Errorf("join keys not specified")}
1231 | }
1232 | // Check that we have all given keys in both DataFrames
1233 | var iKeysA []int
1234 | var iKeysB []int
1235 | var errorArr []string
1236 | for _, key := range keys {
1237 | i := df.colIndex(key)
1238 | if i < 0 {
1239 | errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
1240 | }
1241 | iKeysA = append(iKeysA, i)
1242 | j := b.colIndex(key)
1243 | if j < 0 {
1244 | errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
1245 | }
1246 | iKeysB = append(iKeysB, j)
1247 | }
1248 | if len(errorArr) != 0 {
1249 | return DataFrame{Err: fmt.Errorf(strings.Join(errorArr, "\n"))}
1250 | }
1251 |
1252 | aCols := df.columns
1253 | bCols := b.columns
1254 | // Initialize newCols
1255 | var newCols []series.Series
1256 | for _, i := range iKeysA {
1257 | newCols = append(newCols, aCols[i].Empty())
1258 | }
1259 | var iNotKeysA []int
1260 | for i := 0; i < df.ncols; i++ {
1261 | if !inIntSlice(i, iKeysA) {
1262 | iNotKeysA = append(iNotKeysA, i)
1263 | newCols = append(newCols, aCols[i].Empty())
1264 | }
1265 | }
1266 | var iNotKeysB []int
1267 | for i := 0; i < b.ncols; i++ {
1268 | if !inIntSlice(i, iKeysB) {
1269 | iNotKeysB = append(iNotKeysB, i)
1270 | newCols = append(newCols, bCols[i].Empty())
1271 | }
1272 | }
1273 |
1274 | // Fill newCols
1275 | for i := 0; i < df.nrows; i++ {
1276 | for j := 0; j < b.nrows; j++ {
1277 | match := true
1278 | for k := range keys {
1279 | aElem := aCols[iKeysA[k]].Elem(i)
1280 | bElem := bCols[iKeysB[k]].Elem(j)
1281 | match = match && aElem.Eq(bElem)
1282 | }
1283 | if match {
1284 | ii := 0
1285 | for _, k := range iKeysA {
1286 | elem := aCols[k].Elem(i)
1287 | newCols[ii].Append(elem)
1288 | ii++
1289 | }
1290 | for _, k := range iNotKeysA {
1291 | elem := aCols[k].Elem(i)
1292 | newCols[ii].Append(elem)
1293 | ii++
1294 | }
1295 | for _, k := range iNotKeysB {
1296 | elem := bCols[k].Elem(j)
1297 | newCols[ii].Append(elem)
1298 | ii++
1299 | }
1300 | }
1301 | }
1302 | }
1303 | return New(newCols...)
1304 | }
1305 |
1306 | // LeftJoin returns a DataFrame containing the left join of two DataFrames.
1307 | func (df DataFrame) LeftJoin(b DataFrame, keys ...string) DataFrame {
1308 | if len(keys) == 0 {
1309 | return DataFrame{Err: fmt.Errorf("join keys not specified")}
1310 | }
1311 | // Check that we have all given keys in both DataFrames
1312 | var iKeysA []int
1313 | var iKeysB []int
1314 | var errorArr []string
1315 | for _, key := range keys {
1316 | i := df.colIndex(key)
1317 | if i < 0 {
1318 | errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
1319 | }
1320 | iKeysA = append(iKeysA, i)
1321 | j := b.colIndex(key)
1322 | if j < 0 {
1323 | errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
1324 | }
1325 | iKeysB = append(iKeysB, j)
1326 | }
1327 | if len(errorArr) != 0 {
1328 | return DataFrame{Err: fmt.Errorf(strings.Join(errorArr, "\n"))}
1329 | }
1330 |
1331 | aCols := df.columns
1332 | bCols := b.columns
1333 | // Initialize newCols
1334 | var newCols []series.Series
1335 | for _, i := range iKeysA {
1336 | newCols = append(newCols, aCols[i].Empty())
1337 | }
1338 | var iNotKeysA []int
1339 | for i := 0; i < df.ncols; i++ {
1340 | if !inIntSlice(i, iKeysA) {
1341 | iNotKeysA = append(iNotKeysA, i)
1342 | newCols = append(newCols, aCols[i].Empty())
1343 | }
1344 | }
1345 | var iNotKeysB []int
1346 | for i := 0; i < b.ncols; i++ {
1347 | if !inIntSlice(i, iKeysB) {
1348 | iNotKeysB = append(iNotKeysB, i)
1349 | newCols = append(newCols, bCols[i].Empty())
1350 | }
1351 | }
1352 |
1353 | // Fill newCols
1354 | for i := 0; i < df.nrows; i++ {
1355 | matched := false
1356 | for j := 0; j < b.nrows; j++ {
1357 | match := true
1358 | for k := range keys {
1359 | aElem := aCols[iKeysA[k]].Elem(i)
1360 | bElem := bCols[iKeysB[k]].Elem(j)
1361 | match = match && aElem.Eq(bElem)
1362 | }
1363 | if match {
1364 | matched = true
1365 | ii := 0
1366 | for _, k := range iKeysA {
1367 | elem := aCols[k].Elem(i)
1368 | newCols[ii].Append(elem)
1369 | ii++
1370 | }
1371 | for _, k := range iNotKeysA {
1372 | elem := aCols[k].Elem(i)
1373 | newCols[ii].Append(elem)
1374 | ii++
1375 | }
1376 | for _, k := range iNotKeysB {
1377 | elem := bCols[k].Elem(j)
1378 | newCols[ii].Append(elem)
1379 | ii++
1380 | }
1381 | }
1382 | }
1383 | if !matched {
1384 | ii := 0
1385 | for _, k := range iKeysA {
1386 | elem := aCols[k].Elem(i)
1387 | newCols[ii].Append(elem)
1388 | ii++
1389 | }
1390 | for _, k := range iNotKeysA {
1391 | elem := aCols[k].Elem(i)
1392 | newCols[ii].Append(elem)
1393 | ii++
1394 | }
1395 | // for _ = range iNotKeysB {
1396 | // newCols[ii].Append(nil)
1397 | // ii++
1398 | // }
1399 |
1400 | for _, k := range iNotKeysB {
1401 | _ = k
1402 | newCols[ii].Append(nil)
1403 | ii++
1404 | }
1405 | }
1406 | }
1407 | return New(newCols...)
1408 | }
1409 |
1410 | // RightJoin returns a DataFrame containing the right join of two DataFrames.
1411 | func (df DataFrame) RightJoin(b DataFrame, keys ...string) DataFrame {
1412 | if len(keys) == 0 {
1413 | return DataFrame{Err: fmt.Errorf("join keys not specified")}
1414 | }
1415 | // Check that we have all given keys in both DataFrames
1416 | var iKeysA []int
1417 | var iKeysB []int
1418 | var errorArr []string
1419 | for _, key := range keys {
1420 | i := df.colIndex(key)
1421 | if i < 0 {
1422 | errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
1423 | }
1424 | iKeysA = append(iKeysA, i)
1425 | j := b.colIndex(key)
1426 | if j < 0 {
1427 | errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
1428 | }
1429 | iKeysB = append(iKeysB, j)
1430 | }
1431 | if len(errorArr) != 0 {
1432 | return DataFrame{Err: fmt.Errorf(strings.Join(errorArr, "\n"))}
1433 | }
1434 |
1435 | aCols := df.columns
1436 | bCols := b.columns
1437 | // Initialize newCols
1438 | var newCols []series.Series
1439 | for _, i := range iKeysA {
1440 | newCols = append(newCols, aCols[i].Empty())
1441 | }
1442 | var iNotKeysA []int
1443 | for i := 0; i < df.ncols; i++ {
1444 | if !inIntSlice(i, iKeysA) {
1445 | iNotKeysA = append(iNotKeysA, i)
1446 | newCols = append(newCols, aCols[i].Empty())
1447 | }
1448 | }
1449 | var iNotKeysB []int
1450 | for i := 0; i < b.ncols; i++ {
1451 | if !inIntSlice(i, iKeysB) {
1452 | iNotKeysB = append(iNotKeysB, i)
1453 | newCols = append(newCols, bCols[i].Empty())
1454 | }
1455 | }
1456 |
1457 | // Fill newCols
1458 | var yesmatched []struct{ i, j int }
1459 | var nonmatched []int
1460 | for j := 0; j < b.nrows; j++ {
1461 | matched := false
1462 | for i := 0; i < df.nrows; i++ {
1463 | match := true
1464 | for k := range keys {
1465 | aElem := aCols[iKeysA[k]].Elem(i)
1466 | bElem := bCols[iKeysB[k]].Elem(j)
1467 | match = match && aElem.Eq(bElem)
1468 | }
1469 | if match {
1470 | matched = true
1471 | yesmatched = append(yesmatched, struct{ i, j int }{i, j})
1472 | }
1473 | }
1474 | if !matched {
1475 | nonmatched = append(nonmatched, j)
1476 | }
1477 | }
1478 | for _, v := range yesmatched {
1479 | i := v.i
1480 | j := v.j
1481 | ii := 0
1482 | for _, k := range iKeysA {
1483 | elem := aCols[k].Elem(i)
1484 | newCols[ii].Append(elem)
1485 | ii++
1486 | }
1487 | for _, k := range iNotKeysA {
1488 | elem := aCols[k].Elem(i)
1489 | newCols[ii].Append(elem)
1490 | ii++
1491 | }
1492 | for _, k := range iNotKeysB {
1493 | elem := bCols[k].Elem(j)
1494 | newCols[ii].Append(elem)
1495 | ii++
1496 | }
1497 | }
1498 | for _, j := range nonmatched {
1499 | ii := 0
1500 | for _, k := range iKeysB {
1501 | elem := bCols[k].Elem(j)
1502 | newCols[ii].Append(elem)
1503 | ii++
1504 | }
1505 | // for _ = range iNotKeysA {
1506 | // newCols[ii].Append(nil)
1507 | // ii++
1508 | // }
1509 | for _, k := range iNotKeysA {
1510 | _ = k
1511 | newCols[ii].Append(nil)
1512 | ii++
1513 | }
1514 | for _, k := range iNotKeysB {
1515 | elem := bCols[k].Elem(j)
1516 | newCols[ii].Append(elem)
1517 | ii++
1518 | }
1519 | }
1520 | return New(newCols...)
1521 | }
1522 |
1523 | // OuterJoin returns a DataFrame containing the outer join of two DataFrames.
1524 | func (df DataFrame) OuterJoin(b DataFrame, keys ...string) DataFrame {
1525 | if len(keys) == 0 {
1526 | return DataFrame{Err: fmt.Errorf("join keys not specified")}
1527 | }
1528 | // Check that we have all given keys in both DataFrames
1529 | var iKeysA []int
1530 | var iKeysB []int
1531 | var errorArr []string
1532 | for _, key := range keys {
1533 | i := df.colIndex(key)
1534 | if i < 0 {
1535 | errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
1536 | }
1537 | iKeysA = append(iKeysA, i)
1538 | j := b.colIndex(key)
1539 | if j < 0 {
1540 | errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
1541 | }
1542 | iKeysB = append(iKeysB, j)
1543 | }
1544 | if len(errorArr) != 0 {
1545 | return DataFrame{Err: fmt.Errorf(strings.Join(errorArr, "\n"))}
1546 | }
1547 |
1548 | aCols := df.columns
1549 | bCols := b.columns
1550 | // Initialize newCols
1551 | var newCols []series.Series
1552 | for _, i := range iKeysA {
1553 | newCols = append(newCols, aCols[i].Empty())
1554 | }
1555 | var iNotKeysA []int
1556 | for i := 0; i < df.ncols; i++ {
1557 | if !inIntSlice(i, iKeysA) {
1558 | iNotKeysA = append(iNotKeysA, i)
1559 | newCols = append(newCols, aCols[i].Empty())
1560 | }
1561 | }
1562 | var iNotKeysB []int
1563 | for i := 0; i < b.ncols; i++ {
1564 | if !inIntSlice(i, iKeysB) {
1565 | iNotKeysB = append(iNotKeysB, i)
1566 | newCols = append(newCols, bCols[i].Empty())
1567 | }
1568 | }
1569 |
1570 | // Fill newCols
1571 | for i := 0; i < df.nrows; i++ {
1572 | matched := false
1573 | for j := 0; j < b.nrows; j++ {
1574 | match := true
1575 | for k := range keys {
1576 | aElem := aCols[iKeysA[k]].Elem(i)
1577 | bElem := bCols[iKeysB[k]].Elem(j)
1578 | match = match && aElem.Eq(bElem)
1579 | }
1580 | if match {
1581 | matched = true
1582 | ii := 0
1583 | for _, k := range iKeysA {
1584 | elem := aCols[k].Elem(i)
1585 | newCols[ii].Append(elem)
1586 | ii++
1587 | }
1588 | for _, k := range iNotKeysA {
1589 | elem := aCols[k].Elem(i)
1590 | newCols[ii].Append(elem)
1591 | ii++
1592 | }
1593 | for _, k := range iNotKeysB {
1594 | elem := bCols[k].Elem(j)
1595 | newCols[ii].Append(elem)
1596 | ii++
1597 | }
1598 | }
1599 | }
1600 | if !matched {
1601 | ii := 0
1602 | for _, k := range iKeysA {
1603 | elem := aCols[k].Elem(i)
1604 | newCols[ii].Append(elem)
1605 | ii++
1606 | }
1607 | for _, k := range iNotKeysA {
1608 | elem := aCols[k].Elem(i)
1609 | newCols[ii].Append(elem)
1610 | ii++
1611 | }
1612 | // for _,_ = range iNotKeysB {
1613 | // newCols[ii].Append(nil)
1614 | // ii++
1615 | // }
1616 | for _, k := range iNotKeysB {
1617 | _ = k
1618 | newCols[ii].Append(nil)
1619 | ii++
1620 | }
1621 | }
1622 | }
1623 | for j := 0; j < b.nrows; j++ {
1624 | matched := false
1625 | for i := 0; i < df.nrows; i++ {
1626 | match := true
1627 | for k := range keys {
1628 | aElem := aCols[iKeysA[k]].Elem(i)
1629 | bElem := bCols[iKeysB[k]].Elem(j)
1630 | match = match && aElem.Eq(bElem)
1631 | }
1632 | if match {
1633 | matched = true
1634 | }
1635 | }
1636 | if !matched {
1637 | ii := 0
1638 | for _, k := range iKeysB {
1639 | elem := bCols[k].Elem(j)
1640 | newCols[ii].Append(elem)
1641 | ii++
1642 | }
1643 | // for _ = range iNotKeysA {
1644 | // newCols[ii].Append(nil)
1645 | // ii++
1646 | // }
1647 | for _, k := range iNotKeysA {
1648 | _ = k
1649 | newCols[ii].Append(nil)
1650 | ii++
1651 | }
1652 | for _, k := range iNotKeysB {
1653 | elem := bCols[k].Elem(j)
1654 | newCols[ii].Append(elem)
1655 | ii++
1656 | }
1657 | }
1658 | }
1659 | return New(newCols...)
1660 | }
1661 |
1662 | // CrossJoin returns a DataFrame containing the cross join of two DataFrames.
1663 | func (df DataFrame) CrossJoin(b DataFrame) DataFrame {
1664 | aCols := df.columns
1665 | bCols := b.columns
1666 | // Initialize newCols
1667 | var newCols []series.Series
1668 | for i := 0; i < df.ncols; i++ {
1669 | newCols = append(newCols, aCols[i].Empty())
1670 | }
1671 | for i := 0; i < b.ncols; i++ {
1672 | newCols = append(newCols, bCols[i].Empty())
1673 | }
1674 | // Fill newCols
1675 | for i := 0; i < df.nrows; i++ {
1676 | for j := 0; j < b.nrows; j++ {
1677 | for ii := 0; ii < df.ncols; ii++ {
1678 | elem := aCols[ii].Elem(i)
1679 | newCols[ii].Append(elem)
1680 | }
1681 | for ii := 0; ii < b.ncols; ii++ {
1682 | jj := ii + df.ncols
1683 | elem := bCols[ii].Elem(j)
1684 | newCols[jj].Append(elem)
1685 | }
1686 | }
1687 | }
1688 | return New(newCols...)
1689 | }
1690 |
1691 | // colIndex returns the index of the column with name `s`. If it fails to find the
1692 | // column it returns -1 instead.
1693 | func (df DataFrame) colIndex(s string) int {
1694 | for k, v := range df.Names() {
1695 | if v == s {
1696 | return k
1697 | }
1698 | }
1699 | return -1
1700 | }
1701 |
1702 | // Records return the string record representation of a DataFrame.
1703 | func (df DataFrame) Records() [][]string {
1704 | var records [][]string
1705 | records = append(records, df.Names())
1706 | if df.ncols == 0 || df.nrows == 0 {
1707 | return records
1708 | }
1709 | var tRecords [][]string
1710 | for _, col := range df.columns {
1711 | tRecords = append(tRecords, col.Records())
1712 | }
1713 | records = append(records, transposeRecords(tRecords)...)
1714 | return records
1715 | }
1716 |
1717 | // Maps return the array of maps representation of a DataFrame.
1718 | func (df DataFrame) Maps() []map[string]interface{} {
1719 | maps := make([]map[string]interface{}, df.nrows)
1720 | colnames := df.Names()
1721 | for i := 0; i < df.nrows; i++ {
1722 | m := make(map[string]interface{})
1723 | for k, v := range colnames {
1724 | val := df.columns[k].Val(i)
1725 | m[v] = val
1726 | }
1727 | maps[i] = m
1728 | }
1729 | return maps
1730 | }
1731 |
1732 | // Elem returns the element on row `r` and column `c`. Will panic if the index is
1733 | // out of bounds.
1734 | func (df DataFrame) Elem(r, c int) series.Element {
1735 | return df.columns[c].Elem(r)
1736 | }
1737 |
1738 | // fixColnames assigns a name to the missing column names and makes it so that the
1739 | // column names are unique.
1740 | func fixColnames(colnames []string) {
1741 | // Find duplicated colnames
1742 | dupnamesidx := make(map[string][]int)
1743 | var missingnames []int
1744 | for i := 0; i < len(colnames); i++ {
1745 | a := colnames[i]
1746 | if a == "" {
1747 | missingnames = append(missingnames, i)
1748 | continue
1749 | }
1750 | for j := 0; j < len(colnames); j++ {
1751 | b := colnames[j]
1752 | if i != j && a == b {
1753 | temp := dupnamesidx[a]
1754 | if !inIntSlice(i, temp) {
1755 | dupnamesidx[a] = append(temp, i)
1756 | }
1757 | }
1758 | }
1759 | }
1760 |
1761 | // Autofill missing column names
1762 | counter := 0
1763 | for _, i := range missingnames {
1764 | proposedName := fmt.Sprintf("X%d", counter)
1765 | for findInStringSlice(proposedName, colnames) != -1 {
1766 | counter++
1767 | proposedName = fmt.Sprintf("X%d", counter)
1768 | }
1769 | colnames[i] = proposedName
1770 | counter++
1771 | }
1772 |
1773 | // Sort map keys to make sure it always follows the same order
1774 | var keys []string
1775 | for k := range dupnamesidx {
1776 | keys = append(keys, k)
1777 | }
1778 | sort.Strings(keys)
1779 |
1780 | // Add a suffix to the duplicated colnames
1781 | for _, name := range keys {
1782 | idx := dupnamesidx[name]
1783 | if name == "" {
1784 | name = "X"
1785 | }
1786 | counter := 0
1787 | for _, i := range idx {
1788 | proposedName := fmt.Sprintf("%s_%d", name, counter)
1789 | for findInStringSlice(proposedName, colnames) != -1 {
1790 | counter++
1791 | proposedName = fmt.Sprintf("%s_%d", name, counter)
1792 | }
1793 | colnames[i] = proposedName
1794 | counter++
1795 | }
1796 | }
1797 | }
1798 |
1799 | func findInStringSlice(str string, s []string) int {
1800 | for i, e := range s {
1801 | if e == str {
1802 | return i
1803 | }
1804 | }
1805 | return -1
1806 | }
1807 |
1808 | func parseSelectIndexes(l int, indexes SelectIndexes, colnames []string) ([]int, error) {
1809 | var idx []int
1810 | switch indexes.(type) {
1811 | case []int:
1812 | idx = indexes.([]int)
1813 | case int:
1814 | idx = []int{indexes.(int)}
1815 | case []bool:
1816 | bools := indexes.([]bool)
1817 | if len(bools) != l {
1818 | return nil, fmt.Errorf("indexing error: index dimensions mismatch")
1819 | }
1820 | for i, b := range bools {
1821 | if b {
1822 | idx = append(idx, i)
1823 | }
1824 | }
1825 | case string:
1826 | s := indexes.(string)
1827 | i := findInStringSlice(s, colnames)
1828 | if i < 0 {
1829 | return nil, fmt.Errorf("can't select columns: column name %q not found", s)
1830 | }
1831 | idx = append(idx, i)
1832 | case []string:
1833 | xs := indexes.([]string)
1834 | for _, s := range xs {
1835 | i := findInStringSlice(s, colnames)
1836 | if i < 0 {
1837 | return nil, fmt.Errorf("can't select columns: column name %q not found", s)
1838 | }
1839 | idx = append(idx, i)
1840 | }
1841 | case series.Series:
1842 | s := indexes.(series.Series)
1843 | if err := s.Err; err != nil {
1844 | return nil, fmt.Errorf("indexing error: new values has errors: %v", err)
1845 | }
1846 | if s.HasNaN() {
1847 | return nil, fmt.Errorf("indexing error: indexes contain NaN")
1848 | }
1849 | switch s.Type() {
1850 | case series.Int:
1851 | return s.Int()
1852 | case series.Bool:
1853 | bools, err := s.Bool()
1854 | if err != nil {
1855 | return nil, fmt.Errorf("indexing error: %v", err)
1856 | }
1857 | return parseSelectIndexes(l, bools, colnames)
1858 | case series.String:
1859 | xs := indexes.(series.Series).Records()
1860 | return parseSelectIndexes(l, xs, colnames)
1861 | default:
1862 | return nil, fmt.Errorf("indexing error: unknown indexing mode")
1863 | }
1864 | default:
1865 | return nil, fmt.Errorf("indexing error: unknown indexing mode")
1866 | }
1867 | return idx, nil
1868 | }
1869 |
1870 | func findType(arr []string) (series.Type, error) {
1871 | var hasFloats, hasInts, hasBools, hasStrings bool
1872 | for _, str := range arr {
1873 | if str == "" || str == "NaN" {
1874 | continue
1875 | }
1876 | if _, err := strconv.Atoi(str); err == nil {
1877 | hasInts = true
1878 | continue
1879 | }
1880 | if _, err := strconv.ParseFloat(str, 64); err == nil {
1881 | hasFloats = true
1882 | continue
1883 | }
1884 | if str == "true" || str == "false" {
1885 | hasBools = true
1886 | continue
1887 | }
1888 | hasStrings = true
1889 | }
1890 |
1891 | switch {
1892 | case hasStrings:
1893 | return series.String, nil
1894 | case hasBools:
1895 | return series.Bool, nil
1896 | case hasFloats:
1897 | return series.Float, nil
1898 | case hasInts:
1899 | return series.Int, nil
1900 | default:
1901 | return series.String, fmt.Errorf("couldn't detect type")
1902 | }
1903 | }
1904 |
1905 | func transposeRecords(x [][]string) [][]string {
1906 | n := len(x)
1907 | if n == 0 {
1908 | return x
1909 | }
1910 | m := len(x[0])
1911 | y := make([][]string, m)
1912 | for i := 0; i < m; i++ {
1913 | z := make([]string, n)
1914 | for j := 0; j < n; j++ {
1915 | z[j] = x[j][i]
1916 | }
1917 | y[i] = z
1918 | }
1919 | return y
1920 | }
1921 |
1922 | func inIntSlice(i int, is []int) bool {
1923 | for _, v := range is {
1924 | if v == i {
1925 | return true
1926 | }
1927 | }
1928 | return false
1929 | }
1930 |
1931 | // Matrix is an interface which is compatible with gonum's mat.Matrix interface
1932 | type Matrix interface {
1933 | Dims() (r, c int)
1934 | At(i, j int) float64
1935 | }
1936 |
1937 | // Describe prints the summary statistics for each column of the dataframe
1938 | func (df DataFrame) Describe() DataFrame {
1939 | labels := series.Strings([]string{
1940 | "mean",
1941 | "median",
1942 | "stddev",
1943 | "min",
1944 | "25%",
1945 | "50%",
1946 | "75%",
1947 | "max",
1948 | })
1949 | labels.Name = "column"
1950 |
1951 | ss := []series.Series{labels}
1952 |
1953 | for _, col := range df.columns {
1954 | var newCol series.Series
1955 | switch col.Type() {
1956 | case series.String:
1957 | newCol = series.New([]string{
1958 | "-",
1959 | "-",
1960 | "-",
1961 | col.MinStr(),
1962 | "-",
1963 | "-",
1964 | "-",
1965 | col.MaxStr(),
1966 | },
1967 | col.Type(),
1968 | col.Name,
1969 | )
1970 | case series.Bool:
1971 | fallthrough
1972 | case series.Float:
1973 | fallthrough
1974 | case series.Int:
1975 | newCol = series.New([]float64{
1976 | col.Mean(),
1977 | col.Median(),
1978 | col.StdDev(),
1979 | col.Min(),
1980 | col.Quantile(0.25),
1981 | col.Quantile(0.50),
1982 | col.Quantile(0.75),
1983 | col.Max(),
1984 | },
1985 | series.Float,
1986 | col.Name,
1987 | )
1988 | }
1989 | ss = append(ss, newCol)
1990 | }
1991 |
1992 | ddf := New(ss...)
1993 | return ddf
1994 | }
1995 |
--------------------------------------------------------------------------------