├── csv_tests
    ├── empty.csv
    ├── pipe.csv
    └── pass.csv
├── dataframe
    ├── filter.go
    ├── filter_test.go
    ├── math_test.go
    ├── math.go
    ├── merge.go
    ├── merge_test.go
    ├── dataframe.go
    ├── constructor.go
    ├── columns.go
    ├── pivot.go
    ├── select.go
    ├── group.go
    └── describe_test.go
├── guides
    ├── docker
    │   ├── down.sh
    │   ├── Dockerfile
    │   ├── docker-compose.yml
    │   └── up.sh
    └── Options.ipynb
├── .gitignore
├── .travis.yml
├── go.mod
├── example_pd_test.go
├── internal
    └── values
    │   ├── options_test.go
    │   ├── sharedMeta.go
    │   ├── type-bool.go
    │   ├── type-float.go
    │   ├── sharedMeta_test.go
    │   ├── type-int.go
    │   ├── type-datetime.go
    │   ├── values.go
    │   ├── options.go
    │   ├── type-interface.go
    │   ├── type-string.go
    │   └── shared_template.go
├── benchmarking
    └── profiler
    │   ├── main.go
    │   ├── benchmarks
    │       ├── compare_test.go
    │       ├── benchmarks_test.go
    │       ├── profile.go
    │       ├── benchmarks.go
    │       ├── compare.go
    │       ├── profile.py
    │       └── config.go
    │   └── comparison_summary.txt
├── LICENSE
├── options
    ├── datatypes_test.go
    ├── datatypes.go
    ├── settable_test.go
    └── settable.go
├── series
    ├── constructor_benchmark_test.go
    ├── merge.go
    ├── series.go
    ├── select.go
    ├── constructor.go
    ├── merge_test.go
    ├── filter_test.go
    ├── describe_test.go
    ├── group_test.go
    ├── group.go
    ├── filter.go
    ├── math_test.go
    ├── select_test.go
    └── constructor_test.go
├── README.md
└── pd.go


/csv_tests/empty.csv:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/csv_tests/pipe.csv:
--------------------------------------------------------------------------------
1 | |A|B
2 | foo|1|2


--------------------------------------------------------------------------------
/csv_tests/pass.csv:
--------------------------------------------------------------------------------
1 | ,A
2 | foo,1
3 | bar,2


--------------------------------------------------------------------------------
/dataframe/filter.go:
--------------------------------------------------------------------------------
1 | package dataframe
2 | 


--------------------------------------------------------------------------------
/dataframe/filter_test.go:
--------------------------------------------------------------------------------
1 | package dataframe
2 | 


--------------------------------------------------------------------------------
/guides/docker/down.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | docker-compose down


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode/
 2 | *.out
 3 | *.test
 4 | cleanGeneratedCode.sh
 5 | Makefile
 6 | notebooks/
 7 | output_test.csv
 8 | debug
 9 | go.sum
10 | benchmarking/archive
11 | data*.csv


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: go
 2 | 
 3 | go:
 4 | - 1.12.x
 5 | 
 6 | before_install:
 7 |   - go get -t -v ./...
 8 | 
 9 | script:
10 |   - go test -race -coverprofile=coverage.txt -covermode=atomic
11 | 
12 | after_success:
13 |   - bash <(curl -s https://codecov.io/bash)


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/ptiger10/pd
 2 | 
 3 | go 1.12
 4 | 
 5 | require (
 6 | 	github.com/araddon/dateparse v0.0.0-20190622164848-0fb0a474d195
 7 | 	github.com/cheekybits/genny v1.0.0
 8 | 	github.com/davecgh/go-spew v1.1.1 // indirect
 9 | 	github.com/stretchr/testify v1.3.0 // indirect
10 | )
11 | 


--------------------------------------------------------------------------------
/guides/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM yunabe/lgo:latest
 2 | 
 3 | # Fetch gopandas
 4 | RUN go get -u github.com/ptiger10/pd/... 
 5 | RUN lgo installpkg github.com/ptiger10/pd/... 
 6 | 
 7 | WORKDIR /notebooks
 8 | 
 9 | # To use JupyterLab, replace "notebook" with "lab".
10 | CMD ["jupyter", "notebook", "--ip=0.0.0.0"]
11 | 


--------------------------------------------------------------------------------
/guides/docker/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | version: "3.3"
 3 | 
 4 | services:
 5 |   jupyter:
 6 |     build:
 7 |       context: .
 8 |     # To use a different port of host, change the first 8888.
 9 |     ports:
10 |       - "8888:8888"
11 |     volumes:
12 |       - type: "bind"
13 |         source: ./../
14 |         target: /notebooks
15 | 


--------------------------------------------------------------------------------
/guides/docker/up.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo $1
 4 | if [ "$1" = "-r" ]; then
 5 |     echo rebuilding
 6 |     docker build --no-cache -t docker_default .
 7 | fi
 8 | docker-compose up -d
 9 | 
10 | url=$(docker-compose exec jupyter jupyter notebook list | grep http | awk '{print $1}')
11 | if [[ -z $url ]]; then
12 |     echo Cannot determine url
13 |     exit 1
14 | fi
15 | 
16 | if [[ "$OSTYPE" == "linux-gnu" ]]; then
17 |     xdg-open $url
18 | elif [[ "$OSTYPE" == "darwin"* ]]; then
19 |     open $url
20 | else
21 |     echo $url
22 | fi
23 | 


--------------------------------------------------------------------------------
/example_pd_test.go:
--------------------------------------------------------------------------------
 1 | package pd
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | )
 6 | 
 7 | func ExampleSeries_defaultIndex() {
 8 | 	s, _ := Series([]string{"foo", "bar", "baz"})
 9 | 	fmt.Println(s)
10 | 	// Output:
11 | 	// 0    foo
12 | 	// 1    bar
13 | 	// 2    baz
14 | 	//
15 | 	// datatype: string
16 | }
17 | 
18 | func ExampleDataFrame_default() {
19 | 	df, _ := DataFrame([]interface{}{[]string{"foo", "bar", "baz"}, []int{7, 11, 19}})
20 | 	fmt.Println(df)
21 | 	// Output:
22 | 	//        0   1
23 | 	// 0    foo   7
24 | 	// 1    bar  11
25 | 	// 2    baz  19
26 | }
27 | 


--------------------------------------------------------------------------------
/internal/values/options_test.go:
--------------------------------------------------------------------------------
 1 | package values
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestNonsettableOptions(t *testing.T) {
 6 | 	if GetDisplayValuesWhitespaceBuffer() != 4 {
 7 | 		t.Error("Default setting not reading for DisplayValuesWhitespaceBuffer")
 8 | 	}
 9 | 	if GetDisplayColumnsWhitespaceBuffer() != 2 {
10 | 		t.Error("Default setting not reading for DisplayColumnsWhitespaceBuffer")
11 | 	}
12 | 	if GetDisplayElementWhitespaceBuffer() != 1 {
13 | 		t.Errorf("Default setting not reading for DisplayElementWhitespaceBuffer")
14 | 	}
15 | 	if GetDisplayIndexWhitespaceBuffer() != 1 {
16 | 		t.Errorf("Default setting not reading for DisplayIndexWhitespaceBuffer")
17 | 	}
18 | 	if GetMultiColNameSeparator() != " | " {
19 | 		t.Errorf("Default setting not reading for MultiColNameSeparator")
20 | 	}
21 | }
22 | 


--------------------------------------------------------------------------------
/internal/values/sharedMeta.go:
--------------------------------------------------------------------------------
 1 | package values
 2 | 
 3 | // Generic methods for valueTypeValue type converters
 4 | 
 5 | func (val valueTypeValue) toFloat64() float64Value {
 6 | 	return float64Value{}
 7 | }
 8 | 
 9 | func (val valueTypeValue) toInt64() int64Value {
10 | 	return int64Value{}
11 | }
12 | 
13 | func (val valueTypeValue) toString() stringValue {
14 | 	return stringValue{}
15 | }
16 | 
17 | func (val valueTypeValue) toBool() boolValue {
18 | 	return boolValue{}
19 | }
20 | 
21 | func (val valueTypeValue) toDateTime() dateTimeValue {
22 | 	return dateTimeValue{}
23 | }
24 | 
25 | func (val valueTypeValues) Less(i, j int) bool {
26 | 	return true
27 | }
28 | 
29 | func (val interfaceValue) tovalueType() valueTypeValue {
30 | 	return valueTypeValue{}
31 | }
32 | 
33 | func newvalueType(vals valueType) valueTypeValue {
34 | 	return valueTypeValue{}
35 | }
36 | 


--------------------------------------------------------------------------------
/benchmarking/profiler/main.go:
--------------------------------------------------------------------------------
 1 | // +build benchmarks
 2 | 
 3 | package main
 4 | 
 5 | import (
 6 | 	"fmt"
 7 | 	"io/ioutil"
 8 | 	"log"
 9 | 	"path/filepath"
10 | 	"runtime"
11 | 
12 | 	"github.com/ptiger10/pd/benchmarking/profiler/benchmarks"
13 | )
14 | 
15 | func main() {
16 | 	benchmarks.ReadData()
17 | 	goBenchmarks := benchmarks.RunGoProfiler()
18 | 	pyBenchmarks := benchmarks.RunPythonProfiler()
19 | 
20 | 	// fmt.Println(goBenchmarks, pyBenchmarks)
21 | 
22 | 	table := benchmarks.CompareBenchmarks(
23 | 		goBenchmarks, pyBenchmarks,
24 | 		benchmarks.SampleSizes, benchmarks.Descriptions)
25 | 	_, thisFile, _, _ := runtime.Caller(0)
26 | 	basename := "comparison_summary.txt"
27 | 	dest := filepath.Join(filepath.Dir(thisFile), basename)
28 | 	err := ioutil.WriteFile(dest, []byte(table), 0666)
29 | 	if err != nil {
30 | 		log.Fatal(err)
31 | 	}
32 | 	fmt.Printf(">> %v\n", basename)
33 | }
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Dave Fort
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/benchmarking/profiler/benchmarks/compare_test.go:
--------------------------------------------------------------------------------
 1 | // +build benchmarks
 2 | 
 3 | package benchmarks
 4 | 
 5 | // func TestPythonProfiler(t *testing.T) {
 6 | // 	got := RunPythonProfiler()
 7 | // 	fmt.Println(got)
 8 | // }
 9 | 
10 | // func TestCompareBenchmarks(t *testing.T) {
11 | // 	type args struct {
12 | // 		goBenchmarks Results
13 | // 		pyBenchmarks Results
14 | // 		sampleSizes  []string
15 | // 		descs        map[string]desc
16 | // 	}
17 | // 	tests := []struct {
18 | // 		name string
19 | // 		args args
20 | // 	}{
21 | // 		{name: "normal", args: args{
22 | // 			goBenchmarks: Results{"100k": {
23 | // 				"sum": []interface{}{"50ms", 50.0}, "mean": []interface{}{"50ms", 50.0}}},
24 | // 			pyBenchmarks: Results{"100k": {"sum": []interface{}{"100ms", 100.0}}},
25 | // 			descs:        map[string]desc{"sum": desc{1, "Simple sum"}, "mean": desc{2, "Simple mean"}},
26 | // 			sampleSizes:  []string{"100k", "200k"}}},
27 | // 	}
28 | // 	for _, tt := range tests {
29 | // 		t.Run(tt.name, func(t *testing.T) {
30 | // 			got := CompareBenchmarks(tt.args.goBenchmarks, tt.args.pyBenchmarks, tt.args.sampleSizes, tt.args.descs)
31 | // 			print(got)
32 | // 		})
33 | // 	}
34 | // }
35 | 
36 | // func TestProfileGo(t *testing.T) {
37 | // 	ProfileGo(benchmarkMeanFloat64_100000)
38 | 
39 | // }
40 | 


--------------------------------------------------------------------------------
/options/datatypes_test.go:
--------------------------------------------------------------------------------
 1 | package options
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestDataType(t *testing.T) {
 8 | 	var tests = []struct {
 9 | 		DataType DataType
10 | 		expected string
11 | 	}{
12 | 
13 | 		{None, "none"},
14 | 		{Float64, "float64"},
15 | 		{Int64, "int64"},
16 | 		{String, "string"},
17 | 		{Bool, "bool"},
18 | 		{DateTime, "dateTime"},
19 | 		{Interface, "interface"},
20 | 		{Unsupported, "unsupported"},
21 | 		{-1, "unknown"},
22 | 		{100, "unknown"},
23 | 	}
24 | 	for _, test := range tests {
25 | 		if test.DataType.String() != test.expected {
26 | 			t.Errorf("DataType.String() for DataType %v returned %v, want %v", test.DataType, test.DataType.String(), test.expected)
27 | 		}
28 | 	}
29 | }
30 | 
31 | func TestGetDataType(t *testing.T) {
32 | 	var tests = []struct {
33 | 		expected DataType
34 | 		dataType string
35 | 	}{
36 | 		{Float64, "float"},
37 | 		{Float64, "float64"},
38 | 		{Float64, "Float64"},
39 | 		{Int64, "int"},
40 | 		{Int64, "int64"},
41 | 		{String, "string"},
42 | 		{String, "STRING"},
43 | 		{Bool, "bool"},
44 | 		{DateTime, "dateTime"},
45 | 		{Interface, "interface"},
46 | 		{Unsupported, "other"},
47 | 	}
48 | 	for _, tt := range tests {
49 | 		got := DT(tt.dataType)
50 | 		if got != tt.expected {
51 | 			t.Errorf("DT() = %v, want %v", got, tt.expected)
52 | 		}
53 | 	}
54 | }
55 | 


--------------------------------------------------------------------------------
/benchmarking/profiler/benchmarks/benchmarks_test.go:
--------------------------------------------------------------------------------
 1 | // +build benchmarks
 2 | 
 3 | package benchmarks
 4 | 
 5 | import (
 6 | 	"testing"
 7 | )
 8 | 
 9 | func BenchmarkMath(b *testing.B) {
10 | 	benchmarks := []struct {
11 | 		name string
12 | 		fn   func(b *testing.B)
13 | 	}{
14 | 		// {"100k sum 1 column", benchmarkSumFloat64_100000},
15 | 		{"100k sum 10 column", benchmarkSumFloat64_100k10x},
16 | 		// {"100k read then sum 1 column", benchmarkReadSumFloat64_100000},
17 | 		// {"100k mean 1 column", benchmarkMeanFloat64_100000},
18 | 		// {"100k sync mean 1 column", benchmarkSyncMeanFloat64_100000},
19 | 		// {"100k median 1 column", benchmarkMedianFloat64_100000},
20 | 		// {"100k min 1 column", benchmarkMinFloat64_100000},
21 | 		// {"100k max 1 column", benchmarkMaxFloat64_100000},
22 | 		// {"100k std 1 column", benchmarkStdFloat64_100000},
23 | 		// {"100k sync std 1 column", benchmarkSyncStdFloat64_100000},
24 | 		// {"500k std 2 columns", benchmarkStdFloat64_500000},
25 | 		// {"500k sync std 2 columns", benchmarkSyncStdFloat64_500000},
26 | 		// {"500k sum 2 columns", benchmarkSumFloat64_500000},
27 | 		// {"500k mean 2 columns", benchmarkMeanFloat64_500000},
28 | 		// {"500k sync mean 2 columns", benchmarkSyncMeanFloat64_500000},
29 | 		// {"5m sum 1 column", benchmarkSumFloat64_5m},
30 | 	}
31 | 	ReadData()
32 | 	for _, bm := range benchmarks {
33 | 		b.Run(bm.name, func(b *testing.B) {
34 | 			bm.fn(b)
35 | 		})
36 | 	}
37 | }
38 | 


--------------------------------------------------------------------------------
/benchmarking/profiler/comparison_summary.txt:
--------------------------------------------------------------------------------
 1 | GoPandas vs Pandas speed comparison
 2 | Fri, 19 Jul 2019 19:56:33 PDT
 3 | +----+----------------------------------------+------+-----------+-----------+---------+
 4 | | #  | DESCRIPTION                            | N    | GOPANDAS  | PANDAS    | SPEED Δ |
 5 | +----+----------------------------------------+------+-----------+-----------+---------+
 6 | | 1  | Sum one column                         | 100k | 248.2μs   | 565.8μs   | 2.28x   |
 7 | +----+----------------------------------------+------+-----------+-----------+---------+
 8 | | 2  | Simple mean of one column              | 100k | 248.1μs   | 588.2μs   | 2.37x   |
 9 | +----+----------------------------------------+------+-----------+-----------+---------+
10 | | 3  | Min of one column                      | 100k | 302.2μs   | 536.6μs   | 1.78x   |
11 | +----+----------------------------------------+------+-----------+-----------+---------+
12 | | 4  | Max of one column                      | 100k | 348.0μs   | 528.6μs   | 1.52x   |
13 | +----+----------------------------------------+------+-----------+-----------+---------+
14 | | 5  | Standard deviation of one column       | 100k | 389.0μs   | 904.9μs   | 2.33x   |
15 | +----+----------------------------------------+------+-----------+-----------+---------+
16 | | 6  | Sum two columns                        | 500k | 2.6ms     | 16.6ms    | 6.46x   |
17 | +----+----------------------------------------+------+-----------+-----------+---------+
18 | 


--------------------------------------------------------------------------------
/options/datatypes.go:
--------------------------------------------------------------------------------
 1 | // Package options defines custom DataTypes and settable options for use in constructing, modifying, and displaying DataFrames and Series.
 2 | package options
 3 | 
 4 | import "strings"
 5 | 
 6 | // DataType identifies the type of a data object.
 7 | // For most values it is interchangeable with the reflect.Type value, but it supports custom identifiers as well (e.g., DateTime).
 8 | type DataType int
 9 | 
10 | // datatype convenience options
11 | const (
12 | 	None DataType = iota
13 | 	Float64
14 | 	Int64
15 | 	String
16 | 	Bool
17 | 	DateTime
18 | 	Interface
19 | 	PlaceholdervalueType
20 | 	Unsupported
21 | )
22 | 
23 | // DT returns the DataType associated with a string.
24 | func DT(datatype string) DataType {
25 | 	datatype = strings.ToLower(datatype)
26 | 	switch datatype {
27 | 	case "string":
28 | 		return String
29 | 	case "bool":
30 | 		return Bool
31 | 	case "datetime":
32 | 		return DateTime
33 | 	case "interface":
34 | 		return Interface
35 | 	default:
36 | 		if strings.Contains(datatype, "float") {
37 | 			return Float64
38 | 		}
39 | 		if strings.Contains(datatype, "int") {
40 | 			return Int64
41 | 		}
42 | 		return Unsupported
43 | 	}
44 | }
45 | 
46 | func (datatype DataType) String() string {
47 | 	datatypes := []string{
48 | 		"none",
49 | 		"float64",
50 | 		"int64",
51 | 		"string",
52 | 		"bool",
53 | 		"dateTime",
54 | 		"interface",
55 | 		"placeholder",
56 | 		"unsupported",
57 | 	}
58 | 
59 | 	if datatype < None || datatype > Unsupported {
60 | 		return "unknown"
61 | 	}
62 | 	return datatypes[datatype]
63 | }
64 | 


--------------------------------------------------------------------------------
/series/constructor_benchmark_test.go:
--------------------------------------------------------------------------------
 1 | package series
 2 | 
 3 | import (
 4 | 	"log"
 5 | 	"testing"
 6 | )
 7 | 
 8 | // Floats
 9 | func float32Slice(n int) []float32 {
10 | 	var l []float32
11 | 	for i := 0; i < n; i++ {
12 | 		l = append(l, 1)
13 | 	}
14 | 	return l
15 | }
16 | 
17 | func float64Slice(n int) []float64 {
18 | 	var l []float64
19 | 	for i := 0; i < n; i++ {
20 | 		l = append(l, 1)
21 | 	}
22 | 	return l
23 | }
24 | 
25 | func benchmarkNewFloat32(i int, b *testing.B) {
26 | 	v := float32Slice(i)
27 | 	b.ResetTimer()
28 | 	for n := 0; n < b.N; n++ {
29 | 		_, err := New(v)
30 | 		if err != nil {
31 | 			log.Fatal(err)
32 | 		}
33 | 	}
34 | }
35 | 
36 | func benchmarkNewFloat64(i int, b *testing.B) {
37 | 	v := float64Slice(i)
38 | 	b.ResetTimer()
39 | 	for n := 0; n < b.N; n++ {
40 | 		_, err := New(v)
41 | 		if err != nil {
42 | 			log.Fatal(err)
43 | 		}
44 | 	}
45 | }
46 | 
47 | // func BenchmarkNewFloat32_1(b *testing.B) { benchmarkNewFloat32(10000, b) }
48 | // func BenchmarkNewFloat64_1(b *testing.B) { benchmarkNewFloat64(10000, b) }
49 | // func BenchmarkNewFloat32_2(b *testing.B) { benchmarkNewFloat32(100000, b) }
50 | 
51 | // func BenchmarkNewFloat64_2(b *testing.B) { benchmarkNewFloat64(100000, b) }
52 | // func BenchmarkNewFloat32_3(b *testing.B) { benchmarkNewFloat32(1000000, b) }
53 | 
54 | // func BenchmarkNewFloat64_3(b *testing.B) { benchmarkNewFloat64(1000000, b) }
55 | 
56 | // func BenchmarkNewFloat32_4(b *testing.B) { benchmarkNewFloat32(10000000, b) }
57 | func BenchmarkNewFloat64_4(b *testing.B) { benchmarkNewFloat64(10000000, b) }
58 | 


--------------------------------------------------------------------------------
/dataframe/math_test.go:
--------------------------------------------------------------------------------
 1 | package dataframe
 2 | 
 3 | import (
 4 | 	"math"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/ptiger10/pd/series"
 8 | )
 9 | 
10 | func Test_Math(t *testing.T) {
11 | 	df := MustNew([]interface{}{[]float64{1, 3, 5}, []float64{-3, math.NaN(), -1}, []float64{-5, 0, 5}},
12 | 		Config{Col: []string{"foo", "bar", "baz"}})
13 | 	tests := []struct {
14 | 		name  string
15 | 		input *DataFrame
16 | 		fn    func(*DataFrame) *series.Series
17 | 		want  *series.Series
18 | 	}{
19 | 		{name: "Empty", input: newEmptyDataFrame(), fn: (*DataFrame).Sum, want: series.MustNew(nil)},
20 | 		{"Sum", df, (*DataFrame).Sum,
21 | 			series.MustNew([]float64{9, -4, 0}, series.Config{Index: []string{"foo", "bar", "baz"}, Name: "sum"}),
22 | 		},
23 | 		{"Mean", df, (*DataFrame).Mean,
24 | 			series.MustNew([]float64{3, -2, 0}, series.Config{Index: []string{"foo", "bar", "baz"}, Name: "mean"}),
25 | 		},
26 | 		{"Min", df, (*DataFrame).Min,
27 | 			series.MustNew([]float64{1, -3, -5}, series.Config{Index: []string{"foo", "bar", "baz"}, Name: "min"}),
28 | 		},
29 | 		{"Max", df, (*DataFrame).Max,
30 | 			series.MustNew([]float64{5, -1, 5}, series.Config{Index: []string{"foo", "bar", "baz"}, Name: "max"}),
31 | 		},
32 | 		{"Std", df, (*DataFrame).Std,
33 | 			series.MustNew([]float64{1.632993161855452, 1, 4.08248290463863}, series.Config{Index: []string{"foo", "bar", "baz"}, Name: "std"}),
34 | 		},
35 | 	}
36 | 	for _, tt := range tests {
37 | 		t.Run(tt.name, func(t *testing.T) {
38 | 			got := tt.fn(tt.input)
39 | 			if !series.Equal(got, tt.want) {
40 | 				t.Errorf("%v() got %v, want %v", tt.name, got, tt.want)
41 | 			}
42 | 		})
43 | 	}
44 | }
45 | 


--------------------------------------------------------------------------------
/dataframe/math.go:
--------------------------------------------------------------------------------
 1 | package dataframe
 2 | 
 3 | import (
 4 | 	"math"
 5 | 
 6 | 	"github.com/ptiger10/pd/series"
 7 | )
 8 | 
 9 | func (df *DataFrame) math(name string, fn func(s *series.Series) float64) *series.Series {
10 | 	if Equal(df, newEmptyDataFrame()) {
11 | 		return series.MustNew(nil)
12 | 	}
13 | 	var vals []interface{}
14 | 	var idx []interface{}
15 | 	for m := 0; m < df.NumCols(); m++ {
16 | 		s := df.hydrateSeries(m)
17 | 		if calc := fn(s); !math.IsNaN(calc) {
18 | 			vals = append(vals, calc)
19 | 			idx = append(idx, s.Name())
20 | 		}
21 | 	}
22 | 	ret := series.MustNew(nil)
23 | 	for i := 0; i < len(vals); i++ {
24 | 		// ducks safe method because values are assumed to be supported
25 | 		s := series.MustNew(vals[i], series.Config{Index: idx[i], Name: name})
26 | 		ret.InPlace.Join(s)
27 | 	}
28 | 
29 | 	return ret
30 | }
31 | 
32 | // Sum all numerical or boolean columns.
33 | func (df *DataFrame) Sum() *series.Series {
34 | 	return df.math("sum", (*series.Series).Sum)
35 | }
36 | 
37 | // Mean of all numerical or boolean columns.
38 | func (df *DataFrame) Mean() *series.Series {
39 | 	return df.math("mean", (*series.Series).Mean)
40 | }
41 | 
42 | // Median of all numerical or boolean columns.
43 | func (df *DataFrame) Median() *series.Series {
44 | 	return df.math("median", (*series.Series).Median)
45 | }
46 | 
47 | // Min all numerical columns.
48 | func (df *DataFrame) Min() *series.Series {
49 | 	return df.math("min", (*series.Series).Min)
50 | }
51 | 
52 | // Max all numerical columns.
53 | func (df *DataFrame) Max() *series.Series {
54 | 	return df.math("max", (*series.Series).Max)
55 | }
56 | 
57 | // Std returns the standard deviation of all numerical columns.
58 | func (df *DataFrame) Std() *series.Series {
59 | 	return df.math("std", (*series.Series).Std)
60 | }
61 | 


--------------------------------------------------------------------------------
/internal/values/type-bool.go:
--------------------------------------------------------------------------------
 1 | package values
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"math"
 6 | 	"time"
 7 | 
 8 | 	"github.com/ptiger10/pd/options"
 9 | )
10 | 
11 | // [START Constructor Functions]
12 | 
13 | // newBool creates a boolValue from atomic bool value
14 | func newBool(val bool) boolValue {
15 | 	return boolValue{val, false}
16 | }
17 | 
18 | func (vals *boolValues) Less(i, j int) bool {
19 | 	if !(*vals)[i].v && (*vals)[j].v {
20 | 		return true
21 | 	}
22 | 	return false
23 | }
24 | 
25 | // [END Constructor Functions]
26 | 
27 | // [START Converters]
28 | // toFloat converts boolValues to float64Values.
29 | //
30 | // true: 1.0, false: 0.0, null: NaN
31 | func (val boolValue) toFloat64() float64Value {
32 | 	if val.null {
33 | 		return float64Value{math.NaN(), true}
34 | 	} else if val.v {
35 | 		return float64Value{1, false}
36 | 	} else {
37 | 		return float64Value{0, false}
38 | 	}
39 | }
40 | 
41 | // toInt converts boolValues to int64Values.
42 | //
43 | // true: 1, false: 0, null: 0
44 | func (val boolValue) toInt64() int64Value {
45 | 	if val.null {
46 | 		return int64Value{0, true}
47 | 	} else if val.v {
48 | 		return int64Value{1, false}
49 | 	} else {
50 | 		return int64Value{0, false}
51 | 	}
52 | }
53 | 
54 | func (val boolValue) toString() stringValue {
55 | 	if val.null {
56 | 		return stringValue{options.GetDisplayStringNullFiller(), true}
57 | 	}
58 | 	return stringValue{fmt.Sprint(val.v), false}
59 | }
60 | 
61 | // toBool returns itself.
62 | func (val boolValue) toBool() boolValue {
63 | 	return val
64 | }
65 | 
66 | // !null: 1/1/1970; null: time.Time{}
67 | func (val boolValue) toDateTime() dateTimeValue {
68 | 	epochDate := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
69 | 	if val.null {
70 | 		return dateTimeValue{time.Time{}, true}
71 | 	}
72 | 	return dateTimeValue{epochDate, false}
73 | }
74 | 
75 | // [END Converters]
76 | 


--------------------------------------------------------------------------------
/internal/values/type-float.go:
--------------------------------------------------------------------------------
 1 | package values
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"math"
 6 | 	"time"
 7 | 
 8 | 	"github.com/ptiger10/pd/options"
 9 | )
10 | 
11 | // newFloat64 creates a float64Value from atomic float64 value
12 | func newFloat64(val float64) float64Value {
13 | 	if math.IsNaN(val) {
14 | 		return float64Value{val, true}
15 | 	}
16 | 	return float64Value{val, false}
17 | }
18 | 
19 | func (vals *float64Values) Less(i, j int) bool {
20 | 	if (*vals)[i].v < (*vals)[j].v {
21 | 		return true
22 | 	}
23 | 	return false
24 | }
25 | 
26 | // [START Converters]
27 | 
28 | // toFloat returns itself
29 | func (val float64Value) toFloat64() float64Value {
30 | 	return val
31 | }
32 | 
33 | // toInt converts a float64Value to int64Value
34 | //
35 | // 1.9: 1, 1.5: 1, null: 0
36 | func (val float64Value) toInt64() int64Value {
37 | 	if val.null {
38 | 		return int64Value{0, true}
39 | 	}
40 | 	v := int64(val.v)
41 | 	return int64Value{v, false}
42 | 
43 | }
44 | 
45 | func (val float64Value) toString() stringValue {
46 | 	if val.null {
47 | 		return stringValue{options.GetDisplayStringNullFiller(), true}
48 | 	}
49 | 	return stringValue{fmt.Sprint(val.v), false}
50 | }
51 | 
52 | // toBool converts float64Value to boolValue
53 | //
54 | // x != 0: true; x == 0: false; null: false
55 | func (val float64Value) toBool() boolValue {
56 | 	if val.null {
57 | 		return boolValue{false, true}
58 | 	}
59 | 	if val.v == 0 {
60 | 		return boolValue{false, false}
61 | 	}
62 | 	return boolValue{true, false}
63 | }
64 | 
65 | // toDateTime converts float64Value to dateTimeValue.
66 | // Tries to convert from Unix EPOCH time, otherwise returns null
67 | func (val float64Value) toDateTime() dateTimeValue {
68 | 	if val.null {
69 | 		return dateTimeValue{time.Time{}, true}
70 | 	}
71 | 	return floatToDateTime(val.v)
72 | }
73 | 
74 | func floatToDateTime(f float64) dateTimeValue {
75 | 	return intToDateTime(int64(f))
76 | }
77 | 
78 | // [END Converters]
79 | 


--------------------------------------------------------------------------------
/internal/values/sharedMeta_test.go:
--------------------------------------------------------------------------------
 1 | package values
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | // Pro forma tests for generics
 9 | func TestMeta(t *testing.T) {
10 | 	newSlicevalueType([]valueType{newvalueType("foo")})
11 | 
12 | 	val := newvalueType("foo")
13 | 	f := val.toFloat64()
14 | 	if vType := reflect.TypeOf(f); vType.Name() != "float64Value" {
15 | 		t.Errorf("%v", vType.Name())
16 | 	}
17 | 	i := val.toInt64()
18 | 	if vType := reflect.TypeOf(i); vType.Name() != "int64Value" {
19 | 		t.Errorf("%v", vType.Name())
20 | 	}
21 | 	s := val.toString()
22 | 	if vType := reflect.TypeOf(s); vType.Name() != "stringValue" {
23 | 		t.Errorf("%v", vType.Name())
24 | 	}
25 | 	b := val.toBool()
26 | 	if vType := reflect.TypeOf(b); vType.Name() != "boolValue" {
27 | 		t.Errorf("%v", vType.Name())
28 | 	}
29 | 	dt := val.toDateTime()
30 | 	if vType := reflect.TypeOf(dt); vType.Name() != "dateTimeValue" {
31 | 		t.Errorf("%v", vType.Name())
32 | 	}
33 | 
34 | 	nullVal := valueTypeValue{"foo", true}
35 | 	nullVal.toString()
36 | 	nullVals := valueTypeValues{nullVal}
37 | 	nullVals.ToInterface()
38 | 
39 | 	vals := valueTypeValues{val}
40 | 	vals.Len()
41 | 	vals.Swap(0, 0)
42 | 	vals.Less(0, 0)
43 | 	vals.Values()
44 | 	vals.Vals()
45 | 	vals.Copy()
46 | 	vals.Value(0)
47 | 	vals.Null(0)
48 | 	vals.ToFloat64()
49 | 	vals.ToInt64()
50 | 	vals.ToString()
51 | 	vals.ToBool()
52 | 	vals.ToDateTime()
53 | 	vals.ToInterface()
54 | 
55 | 	vals.Subset([]int{0})
56 | 	vals.Set(0, "bar")
57 | 	vals.Set(0, "")
58 | 	vals.Drop(0)
59 | 	vals.Insert(0, "foo")
60 | 
61 | 	v := interfaceValue{"foo", false}
62 | 	v.tovalueType()
63 | }
64 | 
65 | // No easy way to Convert valueTypeValues, so expect panic
66 | func TestPanic(t *testing.T) {
67 | 	val := newvalueType("foo")
68 | 	vals := valueTypeValues{val}
69 | 	defer func() {
70 | 		if r := recover(); r == nil {
71 | 			t.Errorf("The code did not panic")
72 | 		}
73 | 	}()
74 | 
75 | 	// The following is the code under test
76 | 	vals.Append(&vals)
77 | }
78 | 


--------------------------------------------------------------------------------
/internal/values/type-int.go:
--------------------------------------------------------------------------------
 1 | package values
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"math"
 6 | 	"time"
 7 | 
 8 | 	"github.com/ptiger10/pd/options"
 9 | )
10 | 
11 | // [START Constructor Functions]
12 | 
13 | // newInt64 creates an int64Value from atomic int64 value
14 | func newInt64(val int64) int64Value {
15 | 	return int64Value{val, false}
16 | }
17 | 
18 | func (vals *int64Values) Less(i, j int) bool {
19 | 	if (*vals)[i].v < (*vals)[j].v {
20 | 		return true
21 | 	}
22 | 	return false
23 | }
24 | 
25 | // [END Constructor Functions]
26 | 
27 | // [START Converters]
28 | 
29 | // toFloat converts int64Value to float64Value
30 | //
31 | // 1: 1.0
32 | func (val int64Value) toFloat64() float64Value {
33 | 	if val.null {
34 | 		return float64Value{math.NaN(), true}
35 | 	}
36 | 	v := float64(val.v)
37 | 	return float64Value{v, false}
38 | 
39 | }
40 | 
41 | // toInt returns itself
42 | func (val int64Value) toInt64() int64Value {
43 | 	return val
44 | }
45 | 
46 | func (val int64Value) toString() stringValue {
47 | 	if val.null {
48 | 		return stringValue{options.GetDisplayStringNullFiller(), true}
49 | 	}
50 | 	return stringValue{fmt.Sprint(val.v), false}
51 | }
52 | 
53 | // toBool converts int64Value to boolValue
54 | //
55 | // x != 0: true; x == 0: false; null: false
56 | func (val int64Value) toBool() boolValue {
57 | 	if val.null {
58 | 		return boolValue{false, true}
59 | 	}
60 | 	if val.v == 0 {
61 | 		return boolValue{false, false}
62 | 	}
63 | 	return boolValue{true, false}
64 | }
65 | 
66 | // toDateTime converts int64Value to dateTimeValue.
67 | // Tries to convert from Unix EPOCH timestamp.
68 | // Defaults to 1970-01-01 00:00:00 +0000 UTC.
69 | func (val int64Value) toDateTime() dateTimeValue {
70 | 	if val.null {
71 | 		return dateTimeValue{time.Time{}, true}
72 | 	}
73 | 	return intToDateTime(val.v)
74 | }
75 | 
76 | func intToDateTime(i int64) dateTimeValue {
77 | 	// convert from nanoseconds to seconds
78 | 	i /= 1000000000
79 | 	v := time.Unix(i, 0).UTC()
80 | 	return dateTimeValue{v, false}
81 | }
82 | 
83 | // [END Converters]
84 | 


--------------------------------------------------------------------------------
/internal/values/type-datetime.go:
--------------------------------------------------------------------------------
 1 | package values
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"math"
 6 | 	"time"
 7 | 
 8 | 	"github.com/ptiger10/pd/options"
 9 | )
10 | 
11 | // [START Constructor Functions]
12 | 
13 | // newDateTime creates a dateTimeValue from atomic time.Time value
14 | func newDateTime(val time.Time) dateTimeValue {
15 | 	if (time.Time{}) == val {
16 | 		return dateTimeValue{val, true}
17 | 	}
18 | 	return dateTimeValue{val, false}
19 | }
20 | 
21 | func (vals *dateTimeValues) Less(i, j int) bool {
22 | 	if (*vals)[i].v.Before((*vals)[j].v) {
23 | 		return true
24 | 	}
25 | 	return false
26 | }
27 | 
28 | // [END Constructor Functions]
29 | 
30 | // [START Converters]
31 | 
32 | // toFloat converts dateTimeValues to float64Values of the Unix EPOCH timestamp
33 | // (seconds since midnight January 1, 1970)
34 | // 2019-05-01 00:00:00 +0000 UTC: 1556757505
35 | func (val dateTimeValue) toFloat64() float64Value {
36 | 	if val.null || val.v == (time.Time{}) {
37 | 		return float64Value{math.NaN(), true}
38 | 	}
39 | 	v := val.v.UnixNano()
40 | 	return float64Value{float64(v), false}
41 | }
42 | 
43 | // ToInt converts dateTimeValues to int64Values of the Unix EPOCH timestamp
44 | // (seconds since midnight January 1, 1970)
45 | //
46 | // 2019-05-01 00:00:00 +0000 UTC: 1556757505
47 | func (val dateTimeValue) toInt64() int64Value {
48 | 	if val.null || val.v == (time.Time{}) {
49 | 		return int64Value{0, true}
50 | 	}
51 | 	v := val.v.UnixNano()
52 | 	return int64Value{v, false}
53 | }
54 | 
55 | func (val dateTimeValue) toString() stringValue {
56 | 	if val.null {
57 | 		return stringValue{options.GetDisplayStringNullFiller(), true}
58 | 	}
59 | 	return stringValue{fmt.Sprint(val.v), false}
60 | }
61 | 
62 | // ToBool converts dateTimeValues to boolValues
63 | //
64 | // x != time.Time{}: true; x == time.Time{}: false; null: false
65 | func (val dateTimeValue) toBool() boolValue {
66 | 	if val.null || val.v == (time.Time{}) {
67 | 		return boolValue{false, true}
68 | 	}
69 | 	return boolValue{true, false}
70 | 
71 | }
72 | 
73 | // ToDateTime returns itself
74 | func (val dateTimeValue) toDateTime() dateTimeValue {
75 | 	return val
76 | }
77 | 
78 | // [END Converters]
79 | 


--------------------------------------------------------------------------------
/series/merge.go:
--------------------------------------------------------------------------------
 1 | package series
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | 	"reflect"
 7 | 
 8 | 	"github.com/ptiger10/pd/internal/index"
 9 | 	"github.com/ptiger10/pd/options"
10 | )
11 | 
12 | // Join converts s2 to the same type as the base Series (s), appends s2 to the end, and modifies s in place.
13 | func (ip InPlace) Join(s2 *Series) error {
14 | 	if ip.s == nil || ip.s.datatype == options.None {
15 | 		ip.s.replace(s2)
16 | 		return nil
17 | 	}
18 | 
19 | 	if s2.index.NumLevels() != ip.s.NumLevels() {
20 | 		return fmt.Errorf("Series.Join(): s2 must have same number of index levels as s (%d != %d)", s2.index.NumLevels(), ip.s.NumLevels())
21 | 	}
22 | 	for i := 0; i < s2.Len(); i++ {
23 | 		elem := s2.Element(i)
24 | 		ip.s.InPlace.Append(elem.Value, elem.Labels...)
25 | 	}
26 | 	return nil
27 | }
28 | 
29 | // Join converts s2 to the same type as the base Series (s), appends s2 to the end, and returns a new Series.
30 | func (s *Series) Join(s2 *Series) (*Series, error) {
31 | 	s = s.Copy()
32 | 	err := s.InPlace.Join(s2)
33 | 	return s, err
34 | }
35 | 
36 | // LookupSeries performs a vlookup of each values in one Series against another Series.
37 | func (s *Series) LookupSeries(s2 *Series) *Series {
38 | 	if s2.index.NumLevels() != s.NumLevels() {
39 | 		if options.GetLogWarnings() {
40 | 			log.Printf("Series.LookupSeries(): s2 must have same number of index levels as s (%d != %d)\n", s2.index.NumLevels(), s.NumLevels())
41 | 		}
42 | 		return newEmptySeries()
43 | 	}
44 | 
45 | 	matchShallow := func(s *Series, idx index.Elements) int {
46 | 		for i := 0; i < s.Len(); i++ {
47 | 			if reflect.DeepEqual(idx, s.index.Elements(i)) {
48 | 				return i
49 | 			}
50 | 		}
51 | 		return -1
52 | 	}
53 | 
54 | 	vals := make([]interface{}, 0)
55 | 	for i := 0; i < s.Len(); i++ {
56 | 		elems := s.index.Elements(i)
57 | 		pos := matchShallow(s2, elems)
58 | 		if pos != -1 {
59 | 			vals = append(vals, s2.At(pos))
60 | 		} else {
61 | 			vals = append(vals, "")
62 | 		}
63 | 	}
64 | 	// ducks error because there will be no unsupported values coming from an existing series
65 | 	ret, _ := New(vals, Config{DataType: s2.datatype})
66 | 	ret.index = s.index
67 | 
68 | 	return ret
69 | }
70 | 


--------------------------------------------------------------------------------
/dataframe/merge.go:
--------------------------------------------------------------------------------
 1 | package dataframe
 2 | 
 3 | // // Join extends the columns, rows, or columns and rows of a dataframe by appending s2 and modifies the DataFrame in place.
 4 | // // If extending rows, the values within a Values container are converted to []interface if the container datatypes are not the same.
 5 | // //
 6 | // // Allowable append values: "rows", "cols", "both"
 7 | // //
 8 | // // Allowable method values: "left", "right", "inner", "outer"
 9 | // func (ip InPlace) Join(append string, method string, df2 *DataFrame) error {
10 | // 	if ip.df.vals == nil {
11 | // 		ip.df.replace(df2)
12 | // 		return nil
13 | // 	}
14 | // 	switch append {
15 | // 	case "rows":
16 | // 		switch method {
17 | // 		case "left":
18 | 
19 | // 		}
20 | // 	}
21 | // 	return nil
22 | // }
23 | 
24 | // assumes equivalent index levels and column positions
25 | func (ip InPlace) appendDataFrameRow(df2 *DataFrame) {
26 | 	// Handling empty DataFrame
27 | 	if Equal(ip.df, newEmptyDataFrame()) {
28 | 		ip.df.replace(df2)
29 | 		return
30 | 	}
31 | 
32 | 	// Append
33 | 	// Index Levels
34 | 	for j := 0; j < ip.df.IndexLevels(); j++ {
35 | 		ip.df.index.Levels[j].Labels.Append(df2.index.Levels[j].Labels)
36 | 		ip.df.index.Levels[j].NeedsRefresh = true
37 | 	}
38 | 	// Values
39 | 	for m := 0; m < ip.df.NumCols(); m++ {
40 | 		ip.df.vals[m].Values.Append(df2.vals[m].Values)
41 | 	}
42 | 	return
43 | }
44 | 
45 | 
46 | func (ip InPlace) appendDataFrameColumn(df2 *DataFrame) error {
47 | 	// Handling empty DataFrame
48 | 	if Equal(ip.df, newEmptyDataFrame()) {
49 | 		ip.df.replace(df2)
50 | 		return nil
51 | 	}
52 | 
53 | 	// Append
54 | 	for m := 0; m < df2.NumCols(); m++ {
55 | 		// drop errors for now, because input is controlled
56 | 		// err := ip.AppendCol(
57 | 		ip.AppendCol(
58 | 			df2.hydrateSeries(m),
59 | 			df2.cols.MultiName(m)...,
60 | 		)
61 | 		// if err != nil {
62 | 		// 	return fmt.Errorf("appendDataFrameColumn(): %v", err)
63 | 		// }
64 | 	}
65 | 	return nil
66 | }
67 | 
68 | // // Join extends the columns, rows, or columns and rows of a dataframe by appending s2 and returns a new DataFrame.
69 | // // If extending rows, the values within a Values container are converted to []interface if the container datatypes are not the same.
70 | // func (df *DataFrame) Join(append string, method string, df2 *DataFrame) *DataFrame {
71 | // 	df = df.Copy()
72 | // 	df.InPlace.Join(append, method, df2)
73 | // 	return df
74 | // }
75 | 


--------------------------------------------------------------------------------
/dataframe/merge_test.go:
--------------------------------------------------------------------------------
 1 | package dataframe
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestMerge_appendDataFrameRow(t *testing.T) {
 6 | 	type args struct {
 7 | 		df2 *DataFrame
 8 | 	}
 9 | 	tests := []struct {
10 | 		name  string
11 | 		input *DataFrame
12 | 		args  args
13 | 		want  *DataFrame
14 | 	}{
15 | 		{name: "empty", input: newEmptyDataFrame(),
16 | 			args: args{df2: MustNew([]interface{}{"foo"})},
17 | 			want: MustNew([]interface{}{"foo"})},
18 | 		{"same datatype", MustNew([]interface{}{"foo"}, Config{Index: "1"}),
19 | 			args{MustNew([]interface{}{"bar"}, Config{Index: "2"})},
20 | 			MustNew([]interface{}{[]string{"foo", "bar"}}, Config{Index: []string{"1", "2"}})},
21 | 		{"different datatype", MustNew([]interface{}{"foo"}, Config{Index: "1"}),
22 | 			args{MustNew([]interface{}{10}, Config{Index: "2"})},
23 | 			MustNew([]interface{}{[]string{"foo", "10"}}, Config{Index: []string{"1", "2"}})},
24 | 	}
25 | 	for _, tt := range tests {
26 | 		t.Run(tt.name, func(t *testing.T) {
27 | 			df := tt.input.Copy()
28 | 			df.InPlace.appendDataFrameRow(tt.args.df2)
29 | 			if !Equal(df, tt.want) {
30 | 				t.Errorf("InPlace.appendDataFrameRow() = %v, want %v", df, tt.want)
31 | 			}
32 | 		})
33 | 	}
34 | }
35 | 
36 | func TestMerge_appendDataFrameColumn(t *testing.T) {
37 | 	type args struct {
38 | 		df2 *DataFrame
39 | 	}
40 | 	type want struct {
41 | 		df  *DataFrame
42 | 		err bool
43 | 	}
44 | 	tests := []struct {
45 | 		name  string
46 | 		input *DataFrame
47 | 		args  args
48 | 		want  want
49 | 	}{
50 | 		{name: "empty", input: newEmptyDataFrame(),
51 | 			args: args{df2: MustNew([]interface{}{"foo"})},
52 | 			want: want{df: MustNew([]interface{}{"foo"}), err: false}},
53 | 		{"pass", MustNew([]interface{}{"foo"}, Config{Col: []string{"1"}}),
54 | 			args{MustNew([]interface{}{"bar"}, Config{Col: []string{"2"}})},
55 | 			want{MustNew([]interface{}{"foo", "bar"}, Config{Col: []string{"1", "2"}}), false}},
56 | 		// fix to append multiple columns in order
57 | 		// {"extra columns in df2", MustNew([]interface{}{"foo"}, Config{Col: []string{"1"}}),
58 | 		// 	args{MustNew([]interface{}{"bar", "baz"}, Config{Col: []string{"1", "2"}})},
59 | 		// 	want{MustNew([]interface{}{"foo"}, Config{Col: []string{"1"}}), false}},
60 | 	}
61 | 	for _, tt := range tests {
62 | 		t.Run(tt.name, func(t *testing.T) {
63 | 			df := tt.input.Copy()
64 | 			err := df.InPlace.appendDataFrameColumn(tt.args.df2)
65 | 			if (err != nil) != tt.want.err {
66 | 				t.Errorf("DataFrame.appendDataFrameColumn() error = %v, want %v", err, tt.want.err)
67 | 				return
68 | 			}
69 | 			if !Equal(df, tt.want.df) {
70 | 				t.Errorf("InPlace.appendDataFrameColumn() = %v, want %v", df, tt.want.df)
71 | 			}
72 | 		})
73 | 	}
74 | }
75 | 


--------------------------------------------------------------------------------
/internal/values/values.go:
--------------------------------------------------------------------------------
 1 | // Package values is an internal package that powers the values within pd/Series and pd/DataFrame.
 2 | // This package defines the Values interface and multiple concrete implementations of the interface.
 3 | package values
 4 | 
 5 | import (
 6 | 	"fmt"
 7 | 
 8 | 	"github.com/ptiger10/pd/options"
 9 | )
10 | 
11 | // The Values interface is the primary means of handling a collection of values.
12 | // The same interface and value types are used for both Series values and Index labels
13 | type Values interface {
14 | 	Len() int                // number of Value/Null structs
15 | 	Vals() interface{}       // an interface of values, ready for type assertion into a slice of their native type
16 | 	Values() []interface{}   // an interface slice of values, for handling values as a predictable slice
17 | 	Subset([]int) Values     // a new Values object comprised of the Value/Null pairs at one or more integer positions
18 | 	Value(int) interface{}   // the value field at an integer position
19 | 	Null(int) bool           // the null field at an integer position
20 | 	Set(int, interface{})    // overwrite the value/null struct at an integer position
21 | 	Copy() Values            // clone the Values
22 | 	Insert(int, interface{}) // insert a Value/Null pair at an integer position
23 | 	Append(Values)           // append Values together
24 | 	Drop(int)                // drop a Value/Null pair at an integer position
25 | 	Swap(i, j int)           // swap two values - necessary for sorting
26 | 	Less(i, j int) bool      // compare two values and return the lesser - required for sorting
27 | 
28 | 	ToFloat64() Values
29 | 	ToInt64() Values
30 | 	ToString() Values
31 | 	ToBool() Values
32 | 	ToDateTime() Values
33 | 	ToInterface() Values
34 | }
35 | 
36 | // Container contains Values (a list of Value/Null pairs satisfying the Values interface) and Kind.
37 | type Container struct {
38 | 	Values   Values
39 | 	DataType options.DataType
40 | }
41 | 
42 | // Convert a collection of values from one type to another, and coerce to null if a value cannot be converted sensibly
43 | func Convert(currentVals Values, dataType options.DataType) (Values, error) {
44 | 	var vals Values
45 | 	switch dataType {
46 | 	case options.None:
47 | 		return nil, fmt.Errorf("unable to convert values: must supply a valid Kind")
48 | 	case options.Float64:
49 | 		vals = currentVals.ToFloat64()
50 | 	case options.Int64:
51 | 		vals = currentVals.ToInt64()
52 | 	case options.String:
53 | 		vals = currentVals.ToString()
54 | 	case options.Bool:
55 | 		vals = currentVals.ToBool()
56 | 	case options.DateTime:
57 | 		vals = currentVals.ToDateTime()
58 | 	case options.Interface:
59 | 		vals = currentVals.ToInterface()
60 | 	default:
61 | 		return nil, fmt.Errorf("unable to convert values: kind not supported: %v", dataType)
62 | 	}
63 | 	return vals, nil
64 | }
65 | 


--------------------------------------------------------------------------------
/internal/values/options.go:
--------------------------------------------------------------------------------
 1 | package values
 2 | 
 3 | var displayValuesWhitespaceBuffer = 4
 4 | var displayColumnsWhitespaceBuffer = 2
 5 | var displayElementWhitespaceBuffer = 1
 6 | var displayIndexWhitespaceBuffer = 1
 7 | var multiColNameSeparator = " | "
 8 | var interpolationMaximum = 50
 9 | var interpolationThreshold = .80
10 | 
11 | // GetDisplayValuesWhitespaceBuffer returns displayValuesWhitespaceBuffer.
12 | // displayValuesWhitespaceBuffer is an option when printing a Series or DataFrame.
13 | // It is the number of spaces between the last level of index labels
14 | // and the first collection of values. In a Series, there is only one collection of values.
15 | // In a DataFrame, the first collection of values is the first Series.
16 | //
17 | // Default buffer: 4 spaces
18 | func GetDisplayValuesWhitespaceBuffer() int {
19 | 	return displayValuesWhitespaceBuffer
20 | }
21 | 
22 | // GetDisplayColumnsWhitespaceBuffer returns displayColumnsWhitespaceBuffer.
23 | // displayColumnsWhitespaceBuffer is an option when printing a Series or DataFrame.
24 | // It is the number of spaces between columns in a DataFrame.
25 | //
26 | // Default buffer: 2 spaces
27 | func GetDisplayColumnsWhitespaceBuffer() int {
28 | 	return displayColumnsWhitespaceBuffer
29 | }
30 | 
31 | // GetDisplayElementWhitespaceBuffer returns displayElementWhitespaceBuffer.
32 | // DisplayElementWhitespaceBuffer is an option when printing an Element.
33 | // It is the number of spaces between the last level of index labels and the first value.
34 | //
35 | // // Default buffer: 1 space
36 | func GetDisplayElementWhitespaceBuffer() int {
37 | 	return displayElementWhitespaceBuffer
38 | }
39 | 
40 | // GetDisplayIndexWhitespaceBuffer returns displayIndexWhitespaceBuffer.
41 | // DisplayIndexWhitespaceBuffer is an option when printing a Series.
42 | // It is the number of spaces between index labels. This applies only to a multi-level index.
43 | //
44 | // Default buffer: 1 space
45 | func GetDisplayIndexWhitespaceBuffer() int {
46 | 	return displayIndexWhitespaceBuffer
47 | }
48 | 
49 | // GetMultiColNameSeparator returns the multiColNameSeparator.
50 | // The multiColNameSeparator separates col names whenever a multicol is concatenated together (e.g., into a Series name or index level name).
51 | //
52 | // Default: " | "
53 | func GetMultiColNameSeparator() string {
54 | 	return multiColNameSeparator
55 | }
56 | 
57 | // GetInterpolationMaximum returns the max number of records that will be checked during an interpolation check.
58 | //
59 | // Default: 50
60 | func GetInterpolationMaximum() int {
61 | 	return interpolationMaximum
62 | }
63 | 
64 | // GetInterpolationThreshold returns the ratio of type inclusion required for a dataType to be interpolated.
65 | //
66 | // Default: .80
67 | func GetInterpolationThreshold() float64 {
68 | 	return interpolationThreshold
69 | }
70 | 


--------------------------------------------------------------------------------
/benchmarking/profiler/benchmarks/profile.go:
--------------------------------------------------------------------------------
 1 | // +build benchmarks
 2 | 
 3 | package benchmarks
 4 | 
 5 | import (
 6 | 	"encoding/json"
 7 | 	"fmt"
 8 | 	"log"
 9 | 	"os/exec"
10 | 	"path"
11 | 	"runtime"
12 | 	"testing"
13 | 	"time"
14 | )
15 | 
16 | type desc struct {
17 | 	order int
18 | 	str   string
19 | }
20 | 
21 | // RunGoProfiler specifies all the benchmarks to profile and return in the benchmark table.
22 | func RunGoProfiler() Results {
23 | 	fmt.Println("Profiling Go")
24 | 	Results := Results{
25 | 		"100k": {
26 | 			"sum": ProfileGo(benchmarkSumFloat64_100000),
27 | 			// "sumx10":        ProfileGo(benchmarkSumFloat64_100k10x),
28 | 			// "readCSVSum10x": ProfileGo(benchmarkReadSumFloat64_100k10x),
29 | 			"mean": ProfileGo(benchmarkMeanFloat64_100000),
30 | 			"min":  ProfileGo(benchmarkMinFloat64_100000),
31 | 			"max":  ProfileGo(benchmarkMaxFloat64_100000),
32 | 			"std":  ProfileGo(benchmarkStdFloat64_100000),
33 | 			// "readCSVSum": ProfileGo(benchmarkReadSumFloat64_100000),
34 | 		},
35 | 		"500k": {
36 | 			"sum2": ProfileGo(benchmarkSumFloat64_500000),
37 | 			// 	"mean2": ProfileGo(benchmarkMeanFloat64_500000),
38 | 		},
39 | 		// "5m": {
40 | 		// 	"sum": ProfileGo(benchmarkSumFloat64_5m),
41 | 		// },
42 | 	}
43 | 	return Results
44 | }
45 | 
46 | // Results contains benchmarking results
47 | // {"num of samples": {"test1": "10ms"...}}
48 | type Results map[string]map[string]Result
49 | 
50 | // A Result of benchmarking data in the form [string, float64]
51 | type Result []interface{}
52 | 
53 | // ProfileGo runs the normal Go benchmarking command but formats the result as a rounded string
54 | // and raw ns float
55 | func ProfileGo(f func(b *testing.B)) Result {
56 | 	benchmark := testing.Benchmark(f).NsPerOp()
57 | 	var speed string
58 | 	switch {
59 | 	case benchmark < int64(time.Microsecond):
60 | 		speed = fmt.Sprintf("%vns", benchmark)
61 | 	case benchmark < int64(time.Millisecond):
62 | 		speed = fmt.Sprintf("%.1fμs", float64(benchmark)/float64(time.Microsecond))
63 | 	case benchmark < int64(time.Second):
64 | 		speed = fmt.Sprintf("%.1fms", float64(benchmark)/float64(time.Millisecond))
65 | 	default:
66 | 		speed = fmt.Sprintf("%.2fs", float64(benchmark)/float64(time.Second))
67 | 	}
68 | 	return Result{speed, float64(benchmark)}
69 | }
70 | 
71 | // RunPythonProfiler executes main.py in this directory, which is expected to return JSON
72 | // in the form of Results. This command is expected to be initiated from the directory above.
73 | func RunPythonProfiler() Results {
74 | 	fmt.Println("Profiling Python")
75 | 	_, thisFile, _, _ := runtime.Caller(0)
76 | 	script := "profile.py"
77 | 	scriptPath := path.Join(path.Dir(thisFile), script)
78 | 	cmd := exec.Command("python3", scriptPath)
79 | 	out, err := cmd.Output()
80 | 	if err != nil {
81 | 		log.Fatal(err)
82 | 	}
83 | 
84 | 	var r Results
85 | 	err = json.Unmarshal(out, &r)
86 | 	if err != nil {
87 | 		log.Fatal(err)
88 | 	}
89 | 	return r
90 | }
91 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pd
 2 | [![Go Report Card](https://goreportcard.com/badge/github.com/ptiger10/pd)](https://goreportcard.com/report/github.com/ptiger10/pd) 
 3 | [![GoDoc](https://godoc.org/github.com/ptiger10/pd?status.svg)](https://godoc.org/github.com/ptiger10/pd) 
 4 | [![Build Status](https://travis-ci.org/ptiger10/pd.svg?branch=master)](https://travis-ci.org/ptiger10/pd)
 5 | [![codecov](https://codecov.io/gh/ptiger10/pd/branch/master/graph/badge.svg)](https://codecov.io/gh/ptiger10/pd)
 6 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 7 | 
 8 | pd (informally known as "GoPandas") is a library for cleaning, aggregating, and transforming data using Series and DataFrames. GoPandas combines a flexible API familiar to Python pandas users with the qualities of Go, including type safety, predictable error handling, and fast concurrent processing.
 9 | 
10 | The API is still version 0 and subject to major revisions. Use in production code at your own risk.
11 | 
12 | Some notable features of GoPandas:
13 | * flexible constructor that supports float, int, string, bool, time.Time, and interface Series
14 | * seamlessly handles null data and type conversions
15 | * well-suited to either the Jupyter notebook style of data exploration or conventional programming
16 | * advanced filtering, grouping, and pivoting
17 | * hierarchical indexing (i.e., multi-level indexes and columns)
18 | * reads from either CSV or any spreadsheet or tabular data structured as [][]interface (e.g., Google Sheets)
19 | * complete test coverage
20 | * minimal dependencies (total package size is <10MB, compared to Pandas at >200MB)
21 | * uses concurrent processing to achieve faster speeds than Pandas on many fundamental operations, and the performance differential becomes more pronounced with scale (6x+ superior performance summing two columns in a 500k row spreadsheet - see the most recent [benchmarking table](benchmarking/profiler/comparison_summary.txt)
22 | 
23 | ## Getting Started
24 | Check out the Jupyter notebook examples in the [guides](https://github.com/ptiger10/pd/tree/master/guides). Github sometimes has trouble rendering .ipynb, backup views are here: [Series](https://nbviewer.jupyter.org/github/ptiger10/pd/blob/master/guides/Series.ipynb?flush_cache=true), [DataFrame](https://nbviewer.jupyter.org/github/ptiger10/pd/blob/master/guides/DataFrame.ipynb?flush_cache=true), [Options](https://nbviewer.jupyter.org/github/ptiger10/pd/blob/master/guides/Options.ipynb?flush_cache=true).
25 | 
26 | To run the Jupyter notebooks yourself, I recommend lgo (Docker required)
27 | * `cd guides/docker`
28 | * start: `./up.sh`
29 | * stop: `./down.sh`
30 | * rebuild package to newest version: `./up.sh -r`
31 | 
32 | ## Replicating Benchmark Tests
33 | * Requires Python 3.x and pandas
34 | * Download data from [here](https://github.com/ptiger10/pdTestData) and save in benchmarking/profiler
35 | * `go run -tags=benchmarks benchmarking/profiler/main.go`


--------------------------------------------------------------------------------
/benchmarking/profiler/benchmarks/benchmarks.go:
--------------------------------------------------------------------------------
  1 | // +build benchmarks
  2 | 
  3 | package benchmarks
  4 | 
  5 | import (
  6 | 	"log"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/ptiger10/pd"
 10 | 	"github.com/ptiger10/pd/options"
 11 | )
 12 | 
 13 | func benchmarkSumFloat64_5m(b *testing.B) {
 14 | 	for n := 0; n < b.N; n++ {
 15 | 		df5m.Sum()
 16 | 	}
 17 | }
 18 | 
 19 | func benchmarkSumFloat64_500000(b *testing.B) {
 20 | 	for n := 0; n < b.N; n++ {
 21 | 		df500k.Sum()
 22 | 	}
 23 | }
 24 | 
 25 | func benchmarkSumFloat64_100k10x(b *testing.B) {
 26 | 	for n := 0; n < b.N; n++ {
 27 | 		df100k10x.Sum()
 28 | 	}
 29 | }
 30 | 
 31 | func benchmarkSumFloat64_100000(b *testing.B) {
 32 | 	for n := 0; n < b.N; n++ {
 33 | 		df100k.Sum()
 34 | 	}
 35 | }
 36 | 
 37 | func benchmarkMeanFloat64_100000(b *testing.B) {
 38 | 	for n := 0; n < b.N; n++ {
 39 | 		df100k.Mean()
 40 | 	}
 41 | }
 42 | 
 43 | func benchmarkSyncMeanFloat64_100000(b *testing.B) {
 44 | 	options.SetAsync(false)
 45 | 	for n := 0; n < b.N; n++ {
 46 | 		df100k.Mean()
 47 | 	}
 48 | 	options.RestoreDefaults()
 49 | }
 50 | 
 51 | func benchmarkMeanFloat64_500000(b *testing.B) {
 52 | 	for n := 0; n < b.N; n++ {
 53 | 		df500k.Mean()
 54 | 	}
 55 | }
 56 | 
 57 | func benchmarkSyncMeanFloat64_500000(b *testing.B) {
 58 | 	options.SetAsync(false)
 59 | 	for n := 0; n < b.N; n++ {
 60 | 		df500k.Mean()
 61 | 	}
 62 | 	options.RestoreDefaults()
 63 | }
 64 | 
 65 | func benchmarkMedianFloat64_100000(b *testing.B) {
 66 | 	for n := 0; n < b.N; n++ {
 67 | 		df100k.Median()
 68 | 	}
 69 | }
 70 | 
 71 | func benchmarkMinFloat64_100000(b *testing.B) {
 72 | 	for n := 0; n < b.N; n++ {
 73 | 		df100k.Min()
 74 | 	}
 75 | }
 76 | 
 77 | func benchmarkMaxFloat64_100000(b *testing.B) {
 78 | 	for n := 0; n < b.N; n++ {
 79 | 		df100k.Max()
 80 | 	}
 81 | }
 82 | 
 83 | func benchmarkStdFloat64_100000(b *testing.B) {
 84 | 	for n := 0; n < b.N; n++ {
 85 | 		df100k.Max()
 86 | 	}
 87 | }
 88 | 
 89 | func benchmarkSyncStdFloat64_100000(b *testing.B) {
 90 | 	options.SetAsync(false)
 91 | 	for n := 0; n < b.N; n++ {
 92 | 		df100k.Std()
 93 | 	}
 94 | 	options.RestoreDefaults()
 95 | }
 96 | 
 97 | func benchmarkStdFloat64_500000(b *testing.B) {
 98 | 	for n := 0; n < b.N; n++ {
 99 | 		df500k.Max()
100 | 	}
101 | }
102 | 
103 | func benchmarkSyncStdFloat64_500000(b *testing.B) {
104 | 	options.SetAsync(false)
105 | 	for n := 0; n < b.N; n++ {
106 | 		df500k.Std()
107 | 	}
108 | 	options.RestoreDefaults()
109 | }
110 | 
111 | func benchmarkReadSumFloat64_100000(b *testing.B) {
112 | 	for n := 0; n < b.N; n++ {
113 | 		df, err := pd.ReadCSV(getPath("100k"), pd.ReadOptions{HeaderRows: 1})
114 | 		if err != nil {
115 | 			log.Fatal(err)
116 | 		}
117 | 		df.Sum()
118 | 	}
119 | }
120 | 
121 | func benchmarkReadSumFloat64_100k10x(b *testing.B) {
122 | 	for n := 0; n < b.N; n++ {
123 | 		df, err := pd.ReadCSV(getPath("100k10x"), pd.ReadOptions{HeaderRows: 1})
124 | 		if err != nil {
125 | 			log.Fatal(err)
126 | 		}
127 | 		df.Sum()
128 | 	}
129 | }
130 | 


--------------------------------------------------------------------------------
/series/series.go:
--------------------------------------------------------------------------------
  1 | // Package series defines the Series, a typed 1-dimensional data structure with an n-level index, analogous to a column in a spreadsheet.
  2 | package series
  3 | 
  4 | import (
  5 | 	"fmt"
  6 | 	"reflect"
  7 | 	"strings"
  8 | 
  9 | 	"github.com/ptiger10/pd/internal/index"
 10 | 	"github.com/ptiger10/pd/internal/values"
 11 | 	"github.com/ptiger10/pd/options"
 12 | )
 13 | 
 14 | // A Series is a 1-D data container with a labeled index, static type, and the ability to handle null values
 15 | type Series struct {
 16 | 	index    index.Index
 17 | 	values   values.Values
 18 | 	datatype options.DataType
 19 | 	name     string
 20 | 	Index    Index
 21 | 	InPlace  InPlace
 22 | }
 23 | 
 24 | func (s *Series) String() string {
 25 | 	if Equal(s, newEmptySeries()) {
 26 | 		return "{Empty Series}"
 27 | 	}
 28 | 	return s.print()
 29 | }
 30 | 
 31 | // InPlace contains methods for modifying a Series in place.
 32 | type InPlace struct {
 33 | 	s *Series
 34 | }
 35 | 
 36 | func (ip InPlace) String() string {
 37 | 	printer := "{InPlace Series Handler}\n"
 38 | 	printer += "Methods:\n"
 39 | 	t := reflect.TypeOf(InPlace{})
 40 | 	for i := 0; i < t.NumMethod(); i++ {
 41 | 		method := t.Method(i)
 42 | 		printer += fmt.Sprintln(method.Name)
 43 | 	}
 44 | 	return printer
 45 | }
 46 | 
 47 | // An Element is a single item in a Series.
 48 | type Element struct {
 49 | 	Value      interface{}
 50 | 	Null       bool
 51 | 	Labels     []interface{}
 52 | 	LabelTypes []options.DataType
 53 | }
 54 | 
 55 | func (el Element) String() string {
 56 | 	var printStr string
 57 | 	for _, pair := range [][]interface{}{
 58 | 		{"Value", el.Value},
 59 | 		{"Null", el.Null},
 60 | 		{"Labels", el.Labels},
 61 | 		{"LabelTypes", el.LabelTypes},
 62 | 	} {
 63 | 		// LabelTypes is 10 characters wide, so left padding set to 10
 64 | 		printStr += fmt.Sprintf("%10v:%v%v\n", pair[0], strings.Repeat(" ", values.GetDisplayElementWhitespaceBuffer()), pair[1])
 65 | 	}
 66 | 	return printStr
 67 | }
 68 | 
 69 | // The Config struct can be used in the custom Series constructor to name the Series or specify its data type.
 70 | type Config struct {
 71 | 	Name            string
 72 | 	DataType        options.DataType
 73 | 	Index           interface{}
 74 | 	IndexName       string
 75 | 	MultiIndex      []interface{}
 76 | 	MultiIndexNames []string
 77 | 	Manual          bool
 78 | }
 79 | 
 80 | // A Grouping returns a collection of index labels with mutually exclusive integer positions.
 81 | type Grouping struct {
 82 | 	s      *Series
 83 | 	groups map[string]*group
 84 | }
 85 | 
 86 | func (g Grouping) String() string {
 87 | 	printer := fmt.Sprintf("{Series Grouping | NumGroups: %v, Groups: [%v]}\n", len(g.groups), strings.Join(g.Groups(), ", "))
 88 | 	return printer
 89 | }
 90 | 
 91 | // Index contains index selection and conversion
 92 | type Index struct {
 93 | 	s *Series
 94 | }
 95 | 
 96 | func (idx Index) String() string {
 97 | 	printer := fmt.Sprintf("{Series Index | Len: %d, NumLevels: %d}\n", idx.Len(), idx.s.NumLevels())
 98 | 	return printer
 99 | }
100 | 


--------------------------------------------------------------------------------
/options/settable_test.go:
--------------------------------------------------------------------------------
 1 | package options
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestSettableOptions(t *testing.T) {
 9 | 	if GetDisplayMaxWidth() != defaultOptions.displayMaxWidth {
10 | 		t.Errorf("Default setting not reading for DisplayMaxWidth")
11 | 	}
12 | 	SetDisplayMaxWidth(15)
13 | 	if GetDisplayMaxWidth() != 15 {
14 | 		t.Error("Unable to set/get DisplayMaxWidth")
15 | 	}
16 | 
17 | 	if GetDisplayFloatPrecision() != defaultOptions.displayFloatPrecision {
18 | 		t.Errorf("Default setting not reading for DisplayFloatPrecision")
19 | 	}
20 | 	SetDisplayFloatPrecision(10)
21 | 	if GetDisplayFloatPrecision() != 10 {
22 | 		t.Error("Unable to set/get DisplayFloatPrecision")
23 | 	}
24 | 
25 | 	if GetDisplayMaxRows() != defaultOptions.displayMaxRows {
26 | 		t.Errorf("Default setting not reading for DisplayMaxRows")
27 | 	}
28 | 	SetDisplayMaxRows(10)
29 | 	if GetDisplayMaxRows() != 10 {
30 | 		t.Error("Unable to set/get DisplayMaxRows")
31 | 	}
32 | 
33 | 	if GetDisplayMaxColumns() != defaultOptions.displayMaxColumns {
34 | 		t.Errorf("Default setting not reading for DisplayMaxColumns")
35 | 	}
36 | 	SetDisplayMaxColumns(10)
37 | 	if GetDisplayMaxColumns() != 10 {
38 | 		t.Error("Unable to set/get DisplayMaxColumns")
39 | 	}
40 | 
41 | 	if GetDisplayRepeatedLabels() != defaultOptions.displayRepeatedLabels {
42 | 		t.Errorf("Default setting not reading for DisplayRepeatedLabels")
43 | 	}
44 | 	SetDisplayRepeatedLabels(true)
45 | 	if GetDisplayRepeatedLabels() != true {
46 | 		t.Error("Unable to set/get DisplayRepeatedLabels")
47 | 	}
48 | 
49 | 	if GetDisplayStringNullFiller() != defaultOptions.displayStringNullFiller {
50 | 		t.Errorf("Default setting not reading for DisplayStringNullFiller")
51 | 	}
52 | 	SetDisplayStringNullFiller("Nothing")
53 | 	if GetDisplayStringNullFiller() != "Nothing" {
54 | 		t.Error("Unable to set/get DisplayStringNullFiller")
55 | 	}
56 | 
57 | 	if GetDisplayTimeFormat() != defaultOptions.displayTimeFormat {
58 | 		t.Errorf("Default setting not reading for DisplayTimeFormat")
59 | 	}
60 | 	SetDisplayTimeFormat("2006")
61 | 	if GetDisplayTimeFormat() != "2006" {
62 | 		t.Error("Unable to set/get DisplayTimeFormat")
63 | 	}
64 | 
65 | 	SetStringNullValues([]string{"Nada", "Nothing"})
66 | 	if !reflect.DeepEqual(GetStringNullValues(), []string{"Nada", "Nothing"}) {
67 | 		t.Error("Unable to set/get StringNullValues")
68 | 	}
69 | 
70 | 	if GetLogWarnings() != defaultOptions.logWarnings {
71 | 		t.Errorf("Default setting not reading for LogWarnings")
72 | 	}
73 | 	SetLogWarnings(false)
74 | 	if GetLogWarnings() != false {
75 | 		t.Error("Unable to set/get LogWarnings")
76 | 	}
77 | 
78 | 	if GetAsync() != defaultOptions.async {
79 | 		t.Errorf("Default setting not reading for Async")
80 | 	}
81 | 	SetAsync(false)
82 | 	if GetAsync() != false {
83 | 		t.Error("Unable to set/get Async")
84 | 	}
85 | 
86 | 	RestoreDefaults()
87 | 	if GetDisplayMaxWidth() != 35 {
88 | 		t.Error("Unable to restore default for DisplayMaxWidth")
89 | 	}
90 | 	if GetLogWarnings() != true {
91 | 		t.Error("Unable to restore default for LogWarnings")
92 | 	}
93 | 
94 | }
95 | 


--------------------------------------------------------------------------------
/dataframe/dataframe.go:
--------------------------------------------------------------------------------
  1 | // Package dataframe defines the DataFrame, a 2-dimensional data structure with an n-level index, n-level column headers,
  2 | // and columns of typed data. It is analogous to a spreadsheet.
  3 | package dataframe
  4 | 
  5 | import (
  6 | 	"fmt"
  7 | 	"reflect"
  8 | 	"strings"
  9 | 
 10 | 	"github.com/ptiger10/pd/internal/index"
 11 | 	"github.com/ptiger10/pd/internal/values"
 12 | 	"github.com/ptiger10/pd/options"
 13 | )
 14 | 
 15 | // A DataFrame is a 2D collection of one or more Series with a shared index and associated columns.
 16 | type DataFrame struct {
 17 | 	name    string
 18 | 	vals    []values.Container
 19 | 	cols    index.Columns
 20 | 	Columns Columns
 21 | 	index   index.Index
 22 | 	Index   Index
 23 | 	InPlace InPlace
 24 | }
 25 | 
 26 | func (df *DataFrame) String() string {
 27 | 	if Equal(df, newEmptyDataFrame()) {
 28 | 		return "{Empty DataFrame}"
 29 | 	}
 30 | 	return df.print()
 31 | }
 32 | 
 33 | // Index contains index level data.
 34 | type Index struct {
 35 | 	df *DataFrame
 36 | }
 37 | 
 38 | func (idx Index) String() string {
 39 | 	printer := fmt.Sprintf("{DataFrame Index | Len: %d, NumLevels: %d}\n", idx.Len(), idx.df.IndexLevels())
 40 | 	return printer
 41 | }
 42 | 
 43 | // Columns contains column level data.
 44 | type Columns struct {
 45 | 	df *DataFrame
 46 | }
 47 | 
 48 | func (col Columns) String() string {
 49 | 	printer := fmt.Sprintf("{DataFrame Columns | NumCols: %d, NumLevels: %d}\n", col.df.NumCols(), col.df.ColLevels())
 50 | 	return printer
 51 | }
 52 | 
 53 | // A Row is a single row in a DataFrame.
 54 | type Row struct {
 55 | 	Values     []interface{}
 56 | 	Nulls      []bool
 57 | 	ValueTypes []options.DataType
 58 | 	Labels     []interface{}
 59 | 	LabelTypes []options.DataType
 60 | }
 61 | 
 62 | func (r Row) String() string {
 63 | 	var printStr string
 64 | 	for _, pair := range [][]interface{}{
 65 | 		{"Values", r.Values},
 66 | 		{"IsNull", r.Nulls},
 67 | 		{"ValueTypes", r.ValueTypes},
 68 | 		{"Labels", r.Labels},
 69 | 		{"LabelTypes", r.LabelTypes},
 70 | 	} {
 71 | 		// LabelTypes is 10 characters wide, so left padding set to 10
 72 | 		printStr += fmt.Sprintf("%10v:%v%v\n", pair[0], strings.Repeat(" ", values.GetDisplayElementWhitespaceBuffer()), pair[1])
 73 | 	}
 74 | 	return printStr
 75 | }
 76 | 
 77 | // Config customizes the DataFrame constructor.
 78 | type Config struct {
 79 | 	Name            string
 80 | 	DataType        options.DataType
 81 | 	Index           interface{}
 82 | 	IndexName       string
 83 | 	MultiIndex      []interface{}
 84 | 	MultiIndexNames []string
 85 | 	Col             []string
 86 | 	ColName         string
 87 | 	MultiCol        [][]string
 88 | 	MultiColNames   []string
 89 | 	Manual          bool
 90 | }
 91 | 
 92 | // A Grouping returns a collection of index labels with mutually exclusive integer positions.
 93 | type Grouping struct {
 94 | 	df     *DataFrame
 95 | 	groups map[string]*group
 96 | 	err    bool
 97 | }
 98 | 
 99 | func (g Grouping) String() string {
100 | 	printer := fmt.Sprintf("{DataFrame Grouping | NumGroups: %v, Groups: [%v]}\n", len(g.groups), strings.Join(g.Groups(), ", "))
101 | 	return printer
102 | }
103 | 
104 | // InPlace contains methods for modifying a DataFrame in place.
105 | type InPlace struct {
106 | 	df *DataFrame
107 | }
108 | 
109 | func (ip InPlace) String() string {
110 | 	printer := "{InPlace DataFrame Handler}\n"
111 | 	printer += "Methods:\n"
112 | 	t := reflect.TypeOf(InPlace{})
113 | 	for i := 0; i < t.NumMethod(); i++ {
114 | 		method := t.Method(i)
115 | 		printer += fmt.Sprintln(method.Name)
116 | 	}
117 | 	return printer
118 | }
119 | 


--------------------------------------------------------------------------------
/benchmarking/profiler/benchmarks/compare.go:
--------------------------------------------------------------------------------
  1 | // +build benchmarks
  2 | 
  3 | package benchmarks
  4 | 
  5 | import (
  6 | 	"fmt"
  7 | 	"log"
  8 | 	"sort"
  9 | 	"strconv"
 10 | 	"strings"
 11 | 	"time"
 12 | )
 13 | 
 14 | // CompareBenchmarks creates a comparison table of GoPandas <> Pandas for equivalent operations
 15 | func CompareBenchmarks(
 16 | 	goBenchmarks, pyBenchmarks Results,
 17 | 	sampleSizes []string,
 18 | 	descs map[string]desc,
 19 | ) string {
 20 | 
 21 | 	var printer string
 22 | 	printer += "GoPandas vs Pandas speed comparison\n"
 23 | 	printer += time.Now().In(time.Local).Format(time.RFC1123) + "\n"
 24 | 	// model
 25 | 	// +-----+-----+
 26 | 	// | foo | bar |
 27 | 	// +-----+-----+
 28 | 	spacerChar := "-"
 29 | 	sepChar := "+"
 30 | 	vChar := "|"
 31 | 
 32 | 	// Sections
 33 | 	type section struct {
 34 | 		name  string
 35 | 		width int
 36 | 	}
 37 | 	num := section{name: "#", width: 4}
 38 | 	desc := section{name: "DESCRIPTION", width: 40}
 39 | 	sample := section{name: "N", width: 6}
 40 | 
 41 | 	goBenchmark := section{name: "GOPANDAS", width: 11}
 42 | 	pyBenchmark := section{name: "PANDAS", width: 11}
 43 | 	comparison := section{name: "SPEED Δ", width: 9}
 44 | 	sections := []section{num, desc, sample, goBenchmark, pyBenchmark, comparison}
 45 | 
 46 | 	// Break Line
 47 | 	breakLineComponents := make([]string, len(sections))
 48 | 	for i := 0; i < len(sections); i++ {
 49 | 		breakLineComponents[i] = strings.Repeat(spacerChar, sections[i].width)
 50 | 	}
 51 | 	breakLine := sepChar + strings.Join(breakLineComponents, sepChar) + sepChar + "\n"
 52 | 
 53 | 	// Header
 54 | 	headerComponents := make([]string, len(sections))
 55 | 	for i := 0; i < len(sections); i++ {
 56 | 		headerComponents[i] = fmt.Sprintf(" %-*v", sections[i].width-1, sections[i].name)
 57 | 	}
 58 | 	header := vChar + strings.Join(headerComponents, vChar) + vChar + "\n"
 59 | 	printer += breakLine + header + breakLine
 60 | 
 61 | 	// Rows
 62 | 	var i int
 63 | 	type orderedDesc struct {
 64 | 		n     int
 65 | 		label string
 66 | 	}
 67 | 	var orderedDescs []orderedDesc
 68 | 	for k, v := range descs {
 69 | 		orderedDescs = append(orderedDescs, orderedDesc{v.order, k})
 70 | 	}
 71 | 	sort.Slice(orderedDescs, func(i, j int) bool {
 72 | 		if orderedDescs[i].n < orderedDescs[j].n {
 73 | 			return true
 74 | 		}
 75 | 		return false
 76 | 	})
 77 | 	for _, sample := range sampleSizes {
 78 | 		results, ok := goBenchmarks[sample]
 79 | 		if !ok {
 80 | 			log.Printf("sample size %v not in %v", sample, goBenchmarks)
 81 | 			continue
 82 | 		}
 83 | 		for _, desc := range orderedDescs {
 84 | 			testName := desc.label
 85 | 			goResult, ok := results[desc.label]
 86 | 			if !ok {
 87 | 				continue
 88 | 			}
 89 | 			i++
 90 | 			gospeed, gons := goResult[0], goResult[1]
 91 | 			goSpeed := gospeed.(string)
 92 | 			goNS := gons.(float64)
 93 | 			pySpeed := "n/a"
 94 | 			comparison := "n/a"
 95 | 			py, ok := pyBenchmarks[sample]
 96 | 			if ok {
 97 | 				pyResult, ok := py[testName]
 98 | 				if ok {
 99 | 					pyspeed, pyns := pyResult[0], pyResult[1]
100 | 					pySpeed = pyspeed.(string)
101 | 					pyNS := pyns.(float64)
102 | 					comparison = fmt.Sprintf("%.2fx", pyNS/goNS)
103 | 				}
104 | 			}
105 | 
106 | 			rowComponents := []string{
107 | 				strconv.Itoa(i), descs[desc.label].str, sample, goSpeed, pySpeed, comparison,
108 | 			}
109 | 			for i := range rowComponents {
110 | 				rowComponents[i] = fmt.Sprintf(
111 | 					" %-*v", sections[i].width-1, rowComponents[i])
112 | 			}
113 | 			printer += vChar + strings.Join(rowComponents, vChar) + vChar + "\n"
114 | 			printer += breakLine
115 | 
116 | 		}
117 | 	}
118 | 	return printer
119 | }
120 | 


--------------------------------------------------------------------------------
/series/select.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 
  7 | 	"github.com/ptiger10/pd/internal/values"
  8 | 
  9 | 	"github.com/ptiger10/pd/options"
 10 | )
 11 | 
 12 | // Element returns information about the value and index labels at this position but panics if an out-of-range position is provided.
 13 | func (s *Series) Element(p int) Element {
 14 | 	idxElems := s.index.Elements(p)
 15 | 	return Element{
 16 | 		s.values.Value(p),
 17 | 		s.values.Null(p),
 18 | 		idxElems.Labels,
 19 | 		idxElems.DataTypes,
 20 | 	}
 21 | }
 22 | 
 23 | // At returns the value at a single integer position, but returns nil if value is null. Panics if position is out of range.
 24 | func (s *Series) At(position int) interface{} {
 25 | 	if position >= s.Len() {
 26 | 		if options.GetLogWarnings() {
 27 | 			log.Printf("s.Index.At(): invalid position: %d (max: %v)", position, s.Len()-1)
 28 | 		}
 29 | 	}
 30 | 	if s.values.Null(position) {
 31 | 		return nil
 32 | 	}
 33 | 	return s.values.Value(position)
 34 | }
 35 | 
 36 | // From subsets the Series from start to end (inclusive) and returns a new Series.
 37 | // If an invalid position is provided, returns empty Series.
 38 | func (s *Series) From(start int, end int) *Series {
 39 | 	rowPositions := values.MakeIntRangeInclusive(start, end)
 40 | 	var err error
 41 | 	s, err = s.Subset(rowPositions)
 42 | 	if err != nil {
 43 | 		if options.GetLogWarnings() {
 44 | 			log.Printf("s.From(): %v", err)
 45 | 		}
 46 | 		return newEmptySeries()
 47 | 	}
 48 | 	return s
 49 | }
 50 | 
 51 | // [END Series methods]
 52 | 
 53 | // [START Selection]
 54 | 
 55 | // XS returns a new Series with only the rows and index levels at the specified positions.
 56 | func (s *Series) XS(rowPositions []int, levelPositions []int) (*Series, error) {
 57 | 	var err error
 58 | 	s, err = s.Subset(rowPositions)
 59 | 	if err != nil {
 60 | 		return newEmptySeries(), fmt.Errorf("s.XS() rows: %v", err)
 61 | 	}
 62 | 	err = s.Index.SubsetLevels(levelPositions)
 63 | 	if err != nil {
 64 | 		return newEmptySeries(), fmt.Errorf("s.XS() index levels: %v", err)
 65 | 	}
 66 | 	return s, nil
 67 | }
 68 | 
 69 | // SelectLabel returns the integer location of the first row in index level 0 with the supplied label, or -1 if the label does not exist.
 70 | func (s *Series) SelectLabel(label string) int {
 71 | 	if s.NumLevels() == 0 {
 72 | 		if options.GetLogWarnings() {
 73 | 			log.Println("Series.SelectLabel(): index has no length")
 74 | 		}
 75 | 		return -1
 76 | 	}
 77 | 	s.index.Levels[0].UpdateLabelMap()
 78 | 	val, ok := s.index.Levels[0].LabelMap[label]
 79 | 	if !ok {
 80 | 		if options.GetLogWarnings() {
 81 | 			log.Printf("Series.SelectLabel(): %v not in label map\n", label)
 82 | 		}
 83 | 		return -1
 84 | 	}
 85 | 	return val[0]
 86 | }
 87 | 
 88 | // SelectLabels returns the integer locations of all rows with the supplied labels within the supplied level.
 89 | // If an error is encountered, returns a new slice of 0 length.
 90 | func (s *Series) SelectLabels(labels []string, level int) []int {
 91 | 	empty := make([]int, 0)
 92 | 	err := s.ensureLevelPositions([]int{level})
 93 | 	if err != nil {
 94 | 		if options.GetLogWarnings() {
 95 | 			log.Printf("Series.SelectLabels(): %v", err)
 96 | 		}
 97 | 		return empty
 98 | 	}
 99 | 	s.index.Levels[level].UpdateLabelMap()
100 | 	include := make([]int, 0)
101 | 	for _, label := range labels {
102 | 		val, ok := s.index.Levels[level].LabelMap[label]
103 | 		if !ok {
104 | 			if options.GetLogWarnings() {
105 | 				log.Printf("Series.SelectLabels(): %v not in label map", label)
106 | 			}
107 | 			return empty
108 | 		}
109 | 		include = append(include, val...)
110 | 	}
111 | 	return include
112 | }
113 | 


--------------------------------------------------------------------------------
/benchmarking/profiler/benchmarks/profile.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import datetime
  3 | import json
  4 | import sys
  5 | import os
  6 | 
  7 | 
  8 | def main():
  9 |     # Start tests
 10 |     results = {
 11 |         "100k": {
 12 |             "sum": sumTest(),
 13 |             # "sumx10": sumTest100k10x(),
 14 |             # "readCSVSum10x": readCSVSumTest10x(),
 15 |             "mean": meanTest(),
 16 |             "min": minTest(),
 17 |             "max": maxTest(),
 18 |             "std": stdTest(),
 19 |             # "readCSVSum": readCSVSumTest(),
 20 |             },
 21 |         "500k": {
 22 |             "sum2": sumTest500(),
 23 |         #     "mean2": meanTest500(),
 24 |         },
 25 |         # "5m": {
 26 |         #     "sum": sumTest5m(),
 27 |         # }
 28 |     }
 29 |     json.dump(results, sys.stdout)
 30 | 
 31 | 
 32 | # timer computes the average duration across n tests
 33 | # returns the duration as string and nanoseconds
 34 | def timer(n):
 35 |     def decorator(fn):
 36 |         def wrapper(*args, **kwargs):
 37 |             times = []
 38 |             for i in range(n):
 39 |                 start = datetime.datetime.now()
 40 |                 fn(*args, **kwargs)
 41 |                 end = datetime.datetime.now()
 42 |                 duration = (end-start).total_seconds()
 43 |                 times.append(duration)
 44 |             duration = sum(times)/len(times)
 45 |             ns = 1000000000
 46 |             mcs = 1000000
 47 |             ms = 1000
 48 |             if duration * mcs < 1:
 49 |                 speed = "{:.1f}ns".format(duration*ns)
 50 |             if duration * ms < 1:
 51 |                 speed = "{:.1f}μs".format(duration*mcs)
 52 |             elif duration < 1:
 53 |                 speed = "{:.1f}ms".format(duration*ms)
 54 |             else:
 55 |                 speed = "{:.1f}s".format(duration)
 56 |             return speed, int(duration*ns)
 57 |         return wrapper
 58 |     return decorator
 59 | 
 60 | 
 61 | def get_filepath(s):
 62 |     basename = files[s]
 63 |     thisFile = sys.argv[0]
 64 |     path = os.path.join(os.path.dirname(thisFile), basename)
 65 |     return path
 66 | 
 67 | 
 68 | files = {
 69 |     '100k': '../dataRandom100k1Col.csv',
 70 |     '100k10x': '../dataRandom100k10Col.csv',
 71 |     '500k': '../dataRandom500k2Col.csv',
 72 |     '5m': '../dataRandom5m1Col.csv',
 73 | }
 74 | df100 = pd.read_csv(get_filepath('100k'))
 75 | df100k10x = pd.read_csv(get_filepath('100k10x'))
 76 | df500 = pd.read_csv(get_filepath('500k'))
 77 | # df5m = pd.read_csv(get_filepath('5m'))
 78 | 
 79 | 
 80 | @timer(1000)
 81 | def sumTest():
 82 |     s = df100.sum()
 83 |     assert round(s.iloc[0], 2) == 50408.63
 84 | 
 85 | 
 86 | @timer(100)
 87 | def sumTest100k10x():
 88 |     s = df100k10x.sum()
 89 |     assert round(s.iloc[0], 2) == 50408.63
 90 | 
 91 | 
 92 | @timer(100)
 93 | def sumTest500():
 94 |     s = df500.sum()
 95 |     assert round(s.iloc[0], 2) == 130598.19
 96 | 
 97 | 
 98 | # @timer(20)
 99 | # def sumTest5m():
100 | #     s = df5m.sum()
101 | #     assert round(s.iloc[0], 2) == 2520431.67
102 | 
103 | 
104 | @timer(1000)
105 | def meanTest():
106 |     s = df100.mean()
107 |     assert round(s.iloc[0], 2) == 0.5
108 | 
109 | 
110 | @timer(100)
111 | def meanTest500():
112 |     s = df500.mean()
113 |     assert round(s.iloc[0], 2) == 0.26
114 | 
115 | 
116 | @timer(1000)
117 | def minTest():
118 |     s = df100.min()
119 |     assert round(s.iloc[0], 2) == 0.0
120 | 
121 | 
122 | @timer(1000)
123 | def maxTest():
124 |     s = df100.max()
125 |     assert round(s.iloc[0], 2) == 1.0
126 | 
127 | 
128 | @timer(1000)
129 | def stdTest():
130 |     s = df100.std()
131 |     assert round(s.iloc[0], 2) == 0.29
132 | 
133 | 
134 | @timer(100)
135 | def medianTest():
136 |     s = df100.median()
137 |     assert round(s.iloc[0], 2) == 0.5
138 | 
139 | 
140 | @timer(50)
141 | def readCSVSumTest():
142 |     df = pd.read_csv(get_filepath('100k'))
143 |     s = df.sum()
144 |     assert round(s.iloc[0], 2) == 50408.63
145 | 
146 | 
147 | @timer(20)
148 | def readCSVSumTest10x():
149 |     df = pd.read_csv(get_filepath('100k10x'))
150 |     s = df.sum()
151 |     assert round(s.iloc[0], 2) == 50408.63
152 | 
153 | 
154 | if __name__ == "__main__":
155 |     main()
156 | 


--------------------------------------------------------------------------------
/series/constructor.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 
  7 | 	"github.com/ptiger10/pd/internal/index"
  8 | 	"github.com/ptiger10/pd/internal/values"
  9 | 	"github.com/ptiger10/pd/options"
 10 | )
 11 | 
 12 | // New creates a new Series with the supplied values and an optional config.
 13 | func New(data interface{}, config ...Config) (*Series, error) {
 14 | 	var idx index.Index
 15 | 	configuration := index.Config{} // Series config
 16 | 
 17 | 	if data == nil {
 18 | 		return newEmptySeries(), nil
 19 | 	}
 20 | 
 21 | 	// Handling config
 22 | 	if config != nil {
 23 | 		if len(config) > 1 {
 24 | 			return newEmptySeries(), fmt.Errorf("series.New(): can supply at most one Config (%d > 1)", len(config))
 25 | 		}
 26 | 		tmp := config[0]
 27 | 		configuration = index.Config{
 28 | 			Name: tmp.Name, DataType: tmp.DataType,
 29 | 			Index: tmp.Index, IndexName: tmp.IndexName,
 30 | 			MultiIndex: tmp.MultiIndex, MultiIndexNames: tmp.MultiIndexNames,
 31 | 		}
 32 | 	}
 33 | 
 34 | 	// Handling values
 35 | 	container, err := values.InterfaceFactory(data)
 36 | 	if err != nil {
 37 | 		return newEmptySeries(), fmt.Errorf("series.New(): %v", err)
 38 | 	}
 39 | 
 40 | 	// Handling index
 41 | 	// empty data: return empty index
 42 | 	if lenValues := container.Values.Len(); lenValues == 0 {
 43 | 		idx = index.New()
 44 | 		// not empty data: use config
 45 | 	} else {
 46 | 		idx, err = index.NewFromConfig(configuration, lenValues)
 47 | 		if err != nil {
 48 | 			return newEmptySeries(), fmt.Errorf("series.New(): %v", err)
 49 | 		}
 50 | 	}
 51 | 
 52 | 	s := &Series{
 53 | 		values:   container.Values,
 54 | 		index:    idx,
 55 | 		datatype: container.DataType,
 56 | 		name:     configuration.Name,
 57 | 	}
 58 | 
 59 | 	// Optional datatype conversion
 60 | 	if configuration.DataType != options.None {
 61 | 		s.values, err = values.Convert(s.values, configuration.DataType)
 62 | 		if err != nil {
 63 | 			return newEmptySeries(), fmt.Errorf("series.New(): %v", err)
 64 | 		}
 65 | 		s.datatype = configuration.DataType
 66 | 	}
 67 | 
 68 | 	s.Index = Index{s: s}
 69 | 	s.InPlace = InPlace{s: s}
 70 | 
 71 | 	// Alignment check
 72 | 	if err := s.ensureAlignment(); err != nil {
 73 | 		return newEmptySeries(), fmt.Errorf("series.New(): %v", err)
 74 | 	}
 75 | 
 76 | 	return s, err
 77 | }
 78 | 
 79 | // MustNew returns a new Series or logs an error and returns an empty Series.
 80 | func MustNew(data interface{}, config ...Config) *Series {
 81 | 	s, err := New(data, config...)
 82 | 	if err != nil {
 83 | 		if options.GetLogWarnings() {
 84 | 			log.Printf("series.MustNew(): %v", err)
 85 | 		}
 86 | 		return newEmptySeries()
 87 | 	}
 88 | 	return s
 89 | }
 90 | 
 91 | func newEmptySeries() *Series {
 92 | 	// ducks error because InterfaceFactory supports nil data
 93 | 	container, _ := values.InterfaceFactory(nil)
 94 | 	s := &Series{index: index.New(), values: container.Values, datatype: container.DataType}
 95 | 	s.Index = Index{s: s}
 96 | 	s.InPlace = InPlace{s: s}
 97 | 	return s
 98 | }
 99 | 
100 | // Copy creates a new deep copy of a Series.
101 | func (s *Series) Copy() *Series {
102 | 	idx := s.index.Copy()
103 | 	valsCopy := s.values.Copy()
104 | 	copyS := &Series{
105 | 		values:   valsCopy,
106 | 		index:    idx,
107 | 		datatype: s.datatype,
108 | 		name:     s.name,
109 | 	}
110 | 	copyS.Index = Index{s: copyS}
111 | 	copyS.InPlace = InPlace{s: copyS}
112 | 	return copyS
113 | }
114 | 
115 | // [START semi-private methods]
116 | 
117 | // FromInternalComponents is a semi-private method for hydrating Series within the DataFrame module.
118 | // The required inputs are not available to the caller.
119 | func FromInternalComponents(container values.Container, index index.Index, name string) *Series {
120 | 	s := &Series{
121 | 		values:   container.Values,
122 | 		index:    index,
123 | 		datatype: container.DataType,
124 | 		name:     name,
125 | 	}
126 | 	s.Index = Index{s: s}
127 | 	s.InPlace = InPlace{s: s}
128 | 	return s
129 | }
130 | 
131 | // ToInternalComponents is a semi-private method for using a Series within the DataFrame module.
132 | // The required inputs are not available to the caller.
133 | func (s *Series) ToInternalComponents() (values.Container, index.Index) {
134 | 	return values.Container{Values: s.values.Copy(), DataType: s.datatype}, s.index.Copy()
135 | }
136 | 
137 | // [END semi-private methods]
138 | 


--------------------------------------------------------------------------------
/series/merge_test.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"log"
  6 | 	"os"
  7 | 	"reflect"
  8 | 	"strings"
  9 | 	"testing"
 10 | )
 11 | 
 12 | func TestSeries_Join(t *testing.T) {
 13 | 	single := MustNew("foo", Config{Index: []int{1}, IndexName: "foobar"})
 14 | 	single2 := MustNew("bar", Config{Index: []int{2}, IndexName: "corge"})
 15 | 	single3 := MustNew(7.11, Config{Index: []int{2}, IndexName: "corge"})
 16 | 	multi := MustNew("foo", Config{MultiIndex: []interface{}{[]string{"A"}, []int{1}}, MultiIndexNames: []string{"foobar", "corge"}})
 17 | 	multi2 := MustNew("bar", Config{MultiIndex: []interface{}{[]string{"B"}, []int{2}}, MultiIndexNames: []string{"waldo", "fred"}})
 18 | 	type args struct {
 19 | 		s2 *Series
 20 | 	}
 21 | 	type want struct {
 22 | 		series *Series
 23 | 		err    bool
 24 | 	}
 25 | 	var tests = []struct {
 26 | 		name  string
 27 | 		input *Series
 28 | 		args  args
 29 | 		want  want
 30 | 	}{
 31 | 		{name: "singleIndex",
 32 | 			input: single, args: args{s2: single2},
 33 | 			want: want{series: MustNew([]string{"foo", "bar"}, Config{Index: []int{1, 2}, IndexName: "foobar"}), err: false}},
 34 | 		{"replace empty s",
 35 | 			newEmptySeries(), args{s2: single2},
 36 | 			want{MustNew([]string{"bar"}, Config{Index: []int{2}, IndexName: "corge"}), false}},
 37 | 		{"singleIndex convert",
 38 | 			single, args{single3},
 39 | 			want{MustNew([]string{"foo", "7.11"}, Config{Index: []int{1, 2}, IndexName: "foobar"}), false}},
 40 | 		{"multiIndex",
 41 | 			multi, args{multi2},
 42 | 			want{MustNew([]string{"foo", "bar"}, Config{MultiIndex: []interface{}{[]string{"A", "B"}, []int{1, 2}}, MultiIndexNames: []string{"foobar", "corge"}}), false}},
 43 | 		{"fail: empty s2",
 44 | 			single, args{newEmptySeries()},
 45 | 			want{single, true}},
 46 | 		{"fail: nil s2",
 47 | 			single, args{&Series{}},
 48 | 			want{single, true}},
 49 | 		{"fail: invalid num levels",
 50 | 			single, args{multi},
 51 | 			want{single, true}},
 52 | 	}
 53 | 	for _, tt := range tests {
 54 | 		t.Run(tt.name, func(t *testing.T) {
 55 | 			s := tt.input.Copy()
 56 | 			sArchive := tt.input.Copy()
 57 | 			err := s.InPlace.Join(tt.args.s2)
 58 | 			if (err != nil) != tt.want.err {
 59 | 				t.Errorf("InPlace.Join() error = %v, want %v", err, tt.want.err)
 60 | 				return
 61 | 			}
 62 | 
 63 | 			if !Equal(s, tt.want.series) {
 64 | 				t.Errorf("InPlace.Join() got %v, want %v", s, tt.want.series)
 65 | 			}
 66 | 
 67 | 			sCopy, err := sArchive.Join(tt.args.s2)
 68 | 			if (err != nil) != tt.want.err {
 69 | 				t.Errorf("Series.Join() error = %v, want %v", err, tt.want.err)
 70 | 				return
 71 | 			}
 72 | 			if !Equal(sCopy, tt.want.series) {
 73 | 				t.Errorf("Series.Join() got %v, want %v", sCopy, tt.want.series)
 74 | 			}
 75 | 			if !strings.Contains(tt.name, "fail") {
 76 | 				if !strings.Contains(tt.name, "same") {
 77 | 					if Equal(sArchive, sCopy) {
 78 | 						t.Errorf("Series.Join() retained access to original, want copy")
 79 | 					}
 80 | 				}
 81 | 			}
 82 | 		})
 83 | 	}
 84 | }
 85 | 
 86 | func TestSeries_LookupSeries(t *testing.T) {
 87 | 	multi := MustNew([]string{"foo", "bar"}, Config{MultiIndex: []interface{}{[]string{"baz", "qux"}, []int{1, 2}}})
 88 | 	multi2 := MustNew("corge", Config{MultiIndex: []interface{}{[]string{"baz"}, []int{1}}})
 89 | 	type args struct {
 90 | 		s2 *Series
 91 | 	}
 92 | 	tests := []struct {
 93 | 		name     string
 94 | 		input    *Series
 95 | 		args     args
 96 | 		want     *Series
 97 | 		wantFail bool
 98 | 	}{
 99 | 		{name: "single", input: MustNew("foo"), args: args{s2: MustNew("bar")},
100 | 			want: MustNew("bar"), wantFail: false},
101 | 		{"multi", multi, args{multi2},
102 | 			MustNew([]string{"corge", ""}, Config{MultiIndex: []interface{}{[]string{"baz", "qux"}, []int{1, 2}}}), false},
103 | 		{"fail", MustNew("foo"), args{multi2},
104 | 			newEmptySeries(), true},
105 | 	}
106 | 	for _, tt := range tests {
107 | 		t.Run(tt.name, func(t *testing.T) {
108 | 			var buf bytes.Buffer
109 | 			log.SetOutput(&buf)
110 | 			defer log.SetOutput(os.Stderr)
111 | 
112 | 			if got := tt.input.LookupSeries(tt.args.s2); !reflect.DeepEqual(got, tt.want) {
113 | 				t.Errorf("Series.LookupSeries() = %v, want %v", got.index, tt.want.index)
114 | 			}
115 | 			if tt.wantFail {
116 | 				if buf.String() == "" {
117 | 					t.Errorf("Series.LookupSeries() returned no log message, want log due to fail")
118 | 				}
119 | 			}
120 | 		})
121 | 	}
122 | }
123 | 


--------------------------------------------------------------------------------
/internal/values/type-interface.go:
--------------------------------------------------------------------------------
  1 | package values
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"reflect"
  7 | 	"time"
  8 | 
  9 | 	"github.com/ptiger10/pd/options"
 10 | )
 11 | 
 12 | // [START Convenience Functions]
 13 | 
 14 | func isNullInterface(i interface{}) bool {
 15 | 	switch i.(type) {
 16 | 	case string:
 17 | 		s := i.(string)
 18 | 		if isNullString(s) {
 19 | 			return true
 20 | 		}
 21 | 	case float32, float64:
 22 | 		f := reflect.ValueOf(i).Float()
 23 | 		if math.IsNaN(f) {
 24 | 			return true
 25 | 		}
 26 | 	}
 27 | 	return false
 28 | }
 29 | 
 30 | func (vals *interfaceValues) Less(i, j int) bool {
 31 | 	if fmt.Sprint((*vals)[i].v) < fmt.Sprint((*vals)[j].v) {
 32 | 		return true
 33 | 	}
 34 | 	return false
 35 | }
 36 | 
 37 | // [END Convenience Functions]
 38 | 
 39 | // newInterface creates an interfaceValue from atomic interface{} value
 40 | func newInterface(val interface{}) interfaceValue {
 41 | 	if isNullInterface(val) {
 42 | 		return interfaceValue{val, true}
 43 | 	}
 44 | 	return interfaceValue{val, false}
 45 | }
 46 | 
 47 | // [START Converters]
 48 | func (val interfaceValue) toFloat64() float64Value {
 49 | 	if val.null {
 50 | 		return float64Value{math.NaN(), true}
 51 | 	}
 52 | 	switch val.v.(type) {
 53 | 	case float32, float64:
 54 | 		v := reflect.ValueOf(val.v).Float()
 55 | 		return newFloat64(v)
 56 | 	case int, int8, int16, int32, int64:
 57 | 		v := reflect.ValueOf(val.v).Int()
 58 | 		return newInt64(v).toFloat64()
 59 | 	case uint, uint8, uint16, uint32, uint64:
 60 | 		v := reflect.ValueOf(val.v).Uint()
 61 | 		return newInt64(int64(v)).toFloat64()
 62 | 	case string:
 63 | 		return newString(val.v.(string)).toFloat64()
 64 | 	case bool:
 65 | 		return newBool(val.v.(bool)).toFloat64()
 66 | 	case time.Time:
 67 | 		return newDateTime(val.v.(time.Time)).toFloat64()
 68 | 	}
 69 | 	return float64Value{math.NaN(), true}
 70 | }
 71 | 
 72 | func (val interfaceValue) toInt64() int64Value {
 73 | 	if val.null {
 74 | 		return int64Value{0, true}
 75 | 	}
 76 | 	switch val.v.(type) {
 77 | 	case float32, float64:
 78 | 		v := reflect.ValueOf(val.v).Float()
 79 | 		return newFloat64(v).toInt64()
 80 | 	case int, int8, int16, int32, int64:
 81 | 		v := reflect.ValueOf(val.v).Int()
 82 | 		return newInt64(v)
 83 | 	case uint, uint8, uint16, uint32, uint64:
 84 | 		v := reflect.ValueOf(val.v).Uint()
 85 | 		return int64Value{int64(v), false}
 86 | 	case string:
 87 | 		return newString(val.v.(string)).toInt64()
 88 | 	case bool:
 89 | 		return newBool(val.v.(bool)).toInt64()
 90 | 	case time.Time:
 91 | 		return newDateTime(val.v.(time.Time)).toInt64()
 92 | 	}
 93 | 	return int64Value{0, true}
 94 | }
 95 | 
 96 | func (val interfaceValue) toString() stringValue {
 97 | 	if isNullString(fmt.Sprint(val.v)) || val.null {
 98 | 		return stringValue{options.GetDisplayStringNullFiller(), true}
 99 | 	}
100 | 	return stringValue{fmt.Sprint(val.v), false}
101 | }
102 | 
103 | func (val interfaceValue) toBool() boolValue {
104 | 	if val.null {
105 | 		return boolValue{false, true}
106 | 	}
107 | 	switch val.v.(type) {
108 | 	case float32, float64:
109 | 		v := reflect.ValueOf(val.v).Float()
110 | 		return newFloat64(v).toBool()
111 | 	case int, int8, int16, int32, int64:
112 | 		v := reflect.ValueOf(val.v).Int()
113 | 		return newInt64(v).toBool()
114 | 	case uint, uint8, uint16, uint32, uint64:
115 | 		v := reflect.ValueOf(val.v).Uint()
116 | 		return newInt64(int64(v)).toBool()
117 | 	case string:
118 | 		return newString(val.v.(string)).toBool()
119 | 	case bool:
120 | 		return newBool(val.v.(bool))
121 | 	case time.Time:
122 | 		return newDateTime(val.v.(time.Time)).toBool()
123 | 	}
124 | 	return boolValue{false, true}
125 | }
126 | 
127 | func (val interfaceValue) toDateTime() dateTimeValue {
128 | 	if val.null {
129 | 		return dateTimeValue{time.Time{}, true}
130 | 	}
131 | 	switch val.v.(type) {
132 | 	case float32, float64:
133 | 		v := reflect.ValueOf(val.v).Float()
134 | 		return newFloat64(v).toDateTime()
135 | 	case int, int8, int16, int32, int64:
136 | 		v := reflect.ValueOf(val.v).Int()
137 | 		return newInt64(v).toDateTime()
138 | 	case uint, uint8, uint16, uint32, uint64:
139 | 		v := reflect.ValueOf(val.v).Uint()
140 | 		return newInt64(int64(v)).toDateTime()
141 | 	case string:
142 | 		return newString(val.v.(string)).toDateTime()
143 | 	case bool:
144 | 		return newBool(val.v.(bool)).toDateTime()
145 | 	case time.Time:
146 | 		return newDateTime(val.v.(time.Time))
147 | 	}
148 | 	return dateTimeValue{time.Time{}, true}
149 | }
150 | 
151 | func (val interfaceValue) toInterface() interfaceValue {
152 | 	return val
153 | }
154 | 
155 | // [END Converters]
156 | 
157 | // emptyValues returns empty interface values
158 | func emptyValues() Values {
159 | 	return &interfaceValues{}
160 | }
161 | 


--------------------------------------------------------------------------------
/dataframe/constructor.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 
  7 | 	"github.com/ptiger10/pd/internal/index"
  8 | 	"github.com/ptiger10/pd/internal/values"
  9 | 	"github.com/ptiger10/pd/options"
 10 | 	"github.com/ptiger10/pd/series"
 11 | )
 12 | 
 13 | // New creates a new DataFrame with default column names.
 14 | func New(data []interface{}, config ...Config) (*DataFrame, error) {
 15 | 	var vals []values.Container
 16 | 	var idx index.Index
 17 | 	var cols index.Columns
 18 | 	configuration := index.Config{}
 19 | 	tmp := Config{}
 20 | 	var err error
 21 | 
 22 | 	if len(data) == 0 {
 23 | 		return newEmptyDataFrame(), nil
 24 | 	}
 25 | 	// Handling config
 26 | 	if config != nil {
 27 | 		if len(config) > 1 {
 28 | 			return newEmptyDataFrame(), fmt.Errorf("dataframe.New(): can supply at most one Config (%d > 1)", len(config))
 29 | 		}
 30 | 		tmp = config[0]
 31 | 		configuration = index.Config{
 32 | 			Name:     tmp.Name,
 33 | 			DataType: tmp.DataType,
 34 | 			Index:    tmp.Index, IndexName: tmp.IndexName,
 35 | 			MultiIndex: tmp.MultiIndex, MultiIndexNames: tmp.MultiIndexNames,
 36 | 			Col: tmp.Col, ColName: tmp.ColName,
 37 | 			MultiCol: tmp.MultiCol, MultiColNames: tmp.MultiColNames,
 38 | 		}
 39 | 	}
 40 | 
 41 | 	// Handling map
 42 | 	isSplit, extractedData, extractedColumns := values.MapSplitter(data)
 43 | 	if isSplit {
 44 | 		data = extractedData
 45 | 		configuration.Col = extractedColumns
 46 | 	}
 47 | 
 48 | 	// Handling values
 49 | 	vals, err = values.InterfaceSliceFactory(data, tmp.Manual, configuration.DataType)
 50 | 	if err != nil {
 51 | 		return newEmptyDataFrame(), fmt.Errorf("dataframe.New(): %v", err)
 52 | 	}
 53 | 
 54 | 	// Handling index
 55 | 	idx, err = index.NewFromConfig(configuration, vals[0].Values.Len())
 56 | 	if err != nil {
 57 | 		return newEmptyDataFrame(), fmt.Errorf("dataframe.New(): %v", err)
 58 | 	}
 59 | 	//Handling columns
 60 | 	cols, err = index.NewColumnsFromConfig(configuration, len(data))
 61 | 	if err != nil {
 62 | 		return newEmptyDataFrame(), fmt.Errorf("dataframe.New(): %v", err)
 63 | 	}
 64 | 
 65 | 	df := &DataFrame{
 66 | 		vals:  vals,
 67 | 		index: idx,
 68 | 		cols:  cols,
 69 | 		name:  configuration.Name,
 70 | 	}
 71 | 
 72 | 	df.Columns = Columns{df: df}
 73 | 	df.Index = Index{df: df}
 74 | 	df.InPlace = InPlace{df: df}
 75 | 
 76 | 	if err := df.ensureAlignment(); err != nil {
 77 | 		return newEmptyDataFrame(), fmt.Errorf("dataframe.New(): %v", err)
 78 | 	}
 79 | 
 80 | 	return df, err
 81 | }
 82 | 
 83 | func newEmptyDataFrame() *DataFrame {
 84 | 	df := &DataFrame{vals: nil, index: index.New(), cols: index.NewColumns()}
 85 | 	df.Columns = Columns{df: df}
 86 | 	df.Index = Index{df: df}
 87 | 	df.InPlace = InPlace{df: df}
 88 | 	return df
 89 | }
 90 | 
 91 | // MustNew constructs a new DataFrame or logs an error and returns an empty DataFrame.
 92 | func MustNew(data []interface{}, config ...Config) *DataFrame {
 93 | 	df, err := New(data, config...)
 94 | 	if err != nil {
 95 | 		if options.GetLogWarnings() {
 96 | 			log.Printf("dataframe.MustNew(): %v", err)
 97 | 		}
 98 | 		return newEmptyDataFrame()
 99 | 	}
100 | 	return df
101 | }
102 | 
103 | // newFromComponents constructs a dataframe from its constituent parts but returns an empty dataframe if series is nil
104 | func newFromComponents(vals []values.Container, idx index.Index, cols index.Columns, name string) *DataFrame {
105 | 	if vals == nil {
106 | 		return newEmptyDataFrame()
107 | 	}
108 | 	df := &DataFrame{
109 | 		vals:  vals,
110 | 		index: idx,
111 | 		cols:  cols,
112 | 		name:  name,
113 | 	}
114 | 	df.Columns = Columns{df: df}
115 | 	df.Index = Index{df: df}
116 | 	df.InPlace = InPlace{df: df}
117 | 
118 | 	return df
119 | }
120 | 
121 | func (df *DataFrame) valsAligned() error {
122 | 	if df.NumCols() == 0 {
123 | 		return nil
124 | 	}
125 | 	lvl0 := df.vals[0].Values.Len()
126 | 	for i := 1; i < df.NumCols(); i++ {
127 | 		if cmpLvl := df.vals[i].Values.Len(); lvl0 != cmpLvl {
128 | 			return fmt.Errorf("df.valsAligned(): values container at %v must have same number of labels as container 0, %d != %d",
129 | 				i, cmpLvl, lvl0)
130 | 		}
131 | 	}
132 | 	return nil
133 | }
134 | 
135 | // Copy creates a new deep copy of a Series.
136 | func (df *DataFrame) Copy() *DataFrame {
137 | 	var valsCopy []values.Container
138 | 	for i := 0; i < len(df.vals); i++ {
139 | 		valsCopy = append(valsCopy, df.vals[i].Copy())
140 | 	}
141 | 	idxCopy := df.index.Copy()
142 | 	colsCopy := df.cols.Copy()
143 | 	dfCopy := &DataFrame{
144 | 		vals:  valsCopy,
145 | 		index: idxCopy,
146 | 		cols:  colsCopy,
147 | 		name:  df.name,
148 | 	}
149 | 	dfCopy.Columns = Columns{df: dfCopy}
150 | 	dfCopy.Index = Index{df: dfCopy}
151 | 	dfCopy.InPlace = InPlace{df: dfCopy}
152 | 	return dfCopy
153 | }
154 | 
155 | // hydrateSeries converts a column of values.Values into a Series with the same index as df.
156 | func (df *DataFrame) hydrateSeries(col int) *series.Series {
157 | 	return series.FromInternalComponents(
158 | 		df.vals[col], df.index, df.cols.Name(col))
159 | }
160 | 


--------------------------------------------------------------------------------
/guides/Options.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import \"github.com/ptiger10/pd/options\"\n",
 10 |     "import \"github.com/ptiger10/pd\""
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "name": "stdout",
 20 |      "output_type": "stream",
 21 |      "text": [
 22 |       "0           foo\n",
 23 |       "1           bar\n",
 24 |       "2    foobarb...\n",
 25 |       "\n",
 26 |       "datatype: string\n",
 27 |       "\n",
 28 |       "<nil>\n"
 29 |      ]
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "options.RestoreDefaults()\n",
 34 |     "options.SetDisplayMaxWidth(10)\n",
 35 |     "pd.Series([]string{\"foo\", \"bar\", \"foobarbazbang\"})"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 3,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "0    1\n",
 48 |       "1    2\n",
 49 |       "2    3\n",
 50 |       "\n",
 51 |       "datatype: float64\n",
 52 |       "\n",
 53 |       "<nil>\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "options.RestoreDefaults()\n",
 59 |     "options.SetDisplayFloatPrecision(0)\n",
 60 |     "pd.Series([]float64{1, 2, 3})"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 8,
 66 |    "metadata": {},
 67 |    "outputs": [
 68 |     {
 69 |      "name": "stdout",
 70 |      "output_type": "stream",
 71 |      "text": [
 72 |       "foo A    1.00\n",
 73 |       "    B    2.00\n",
 74 |       "bar C    3.00\n",
 75 |       "\n",
 76 |       "datatype: float64\n",
 77 |       "\n",
 78 |       "<nil>\n"
 79 |      ]
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "pd.Series([]float64{1, 2, 3}, pd.Config{\n",
 84 |     "    MultiIndex: []interface{}{[]string{\"foo\", \"foo\", \"bar\"}, []string{\"A\", \"B\", \"C\"}}})"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 9,
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "name": "stdout",
 94 |      "output_type": "stream",
 95 |      "text": [
 96 |       "foo A    1.00\n",
 97 |       "foo B    2.00\n",
 98 |       "bar C    3.00\n",
 99 |       "\n",
100 |       "datatype: float64\n",
101 |       "\n",
102 |       "<nil>\n"
103 |      ]
104 |     }
105 |    ],
106 |    "source": [
107 |     "options.RestoreDefaults()\n",
108 |     "options.SetDisplayRepeatedLabels(true)\n",
109 |     "pd.Series([]float64{1, 2, 3}, pd.Config{\n",
110 |     "    MultiIndex: []interface{}{[]string{\"foo\", \"foo\", \"bar\"}, []string{\"A\", \"B\", \"C\"}}})"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 5,
116 |    "metadata": {
117 |     "scrolled": true
118 |    },
119 |    "outputs": [
120 |     {
121 |      "name": "stdout",
122 |      "output_type": "stream",
123 |      "text": [
124 |       "0    #ERR\n",
125 |       "1     foo\n",
126 |       "\n",
127 |       "datatype: string\n",
128 |       "\n",
129 |       "<nil>\n"
130 |      ]
131 |     }
132 |    ],
133 |    "source": [
134 |     "options.RestoreDefaults()\n",
135 |     "options.SetDisplayStringNullFiller(\"#ERR\")\n",
136 |     "pd.Series([]string{\"\", \"foo\"})"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 6,
142 |    "metadata": {},
143 |    "outputs": [
144 |     {
145 |      "name": "stdout",
146 |      "output_type": "stream",
147 |      "text": [
148 |       "0    01/01/2019\n",
149 |       "\n",
150 |       "datatype: dateTime\n",
151 |       "\n",
152 |       "<nil>\n"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "import \"time\"\n",
158 |     "options.RestoreDefaults()\n",
159 |     "options.SetDisplayTimeFormat(\"01/02/2006\")\n",
160 |     "pd.Series([]time.Time{time.Date(2019,1,1,0,0,0,0,time.UTC)})"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 7,
166 |    "metadata": {},
167 |    "outputs": [
168 |     {
169 |      "name": "stdout",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "0    NaN\n",
173 |       "1    foo\n",
174 |       "\n",
175 |       "datatype: string\n",
176 |       "\n",
177 |       "<nil>\n"
178 |      ]
179 |     }
180 |    ],
181 |    "source": [
182 |     "options.RestoreDefaults()\n",
183 |     "options.SetStringNullValues([]string{\"#REF\"})\n",
184 |     "pd.Series([]string{\"#REF\", \"foo\"})"
185 |    ]
186 |   }
187 |  ],
188 |  "metadata": {
189 |   "kernelspec": {
190 |    "display_name": "Go (lgo)",
191 |    "language": "go",
192 |    "name": "lgo"
193 |   },
194 |   "language_info": {
195 |    "file_extension": "",
196 |    "mimetype": "",
197 |    "name": "go",
198 |    "version": ""
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 2
203 | }
204 | 


--------------------------------------------------------------------------------
/benchmarking/profiler/benchmarks/config.go:
--------------------------------------------------------------------------------
  1 | // +build benchmarks
  2 | 
  3 | package benchmarks
  4 | 
  5 | import (
  6 | 	"log"
  7 | 	"math"
  8 | 	"os"
  9 | 	"path/filepath"
 10 | 	"runtime"
 11 | 
 12 | 	"github.com/ptiger10/pd"
 13 | 	"github.com/ptiger10/pd/dataframe"
 14 | )
 15 | 
 16 | // Descriptions of the benchmarking tests
 17 | var Descriptions = map[string]desc{
 18 | 	"sum":           {1, "Sum one column"},
 19 | 	"sumx10":        {2, "Sum 10 columns individually"},
 20 | 	"mean":          {3, "Simple mean of one column"},
 21 | 	"min":           {4, "Min of one column"},
 22 | 	"max":           {5, "Max of one column"},
 23 | 	"std":           {6, "Standard deviation of one column"},
 24 | 	"readCSVSum":    {7, "Read in CSV then calculate sum"},
 25 | 	"readCSVSum10x": {7, "Read CSV, sum 10 cols individually"},
 26 | 	"sum2":          {8, "Sum two columns"},
 27 | 	"mean2":         {9, "Mean of two columns"},
 28 | }
 29 | 
 30 | // SampleSizes is all the potential sample sizes and the order in which they should appear in the comparison table.
 31 | var SampleSizes = []string{
 32 | 	"100k",
 33 | 	"500k",
 34 | 	// "5m",
 35 | }
 36 | 
 37 | var df100k *dataframe.DataFrame
 38 | var df100k10x *dataframe.DataFrame
 39 | var df500k *dataframe.DataFrame
 40 | var df5m *dataframe.DataFrame
 41 | 
 42 | func read100k() {
 43 | 	var err error
 44 | 	df100k, err = pd.ReadCSV(getPath("100k"), pd.ReadOptions{HeaderRows: 1})
 45 | 	if err != nil {
 46 | 		log.Fatal(err)
 47 | 	}
 48 | 
 49 | 	got := math.Round(df100k.Sum().At(0).(float64)*100) / 100
 50 | 	want := 50408.63
 51 | 	if got != want {
 52 | 		log.Fatalf("profiler/config.go: reading in test data: df.Sum() got %v, want %v", got, want)
 53 | 	}
 54 | 
 55 | 	got = math.Round(df100k.Mean().At(0).(float64)*100) / 100
 56 | 	want = 0.5
 57 | 	if got != want {
 58 | 		log.Fatalf("profiler/config.go: reading in test data: df.Mean() got %v, want %v", got, want)
 59 | 	}
 60 | 
 61 | 	got = math.Round(df100k.Median().At(0).(float64)*100) / 100
 62 | 	want = 0.50
 63 | 	if got != want {
 64 | 		log.Fatalf("profiler/config.go: reading in test data: df.Median() got %v, want %v", got, want)
 65 | 	}
 66 | 
 67 | 	got = math.Round(df100k.Min().At(0).(float64)*100) / 100
 68 | 	want = 0.0
 69 | 	if got != want {
 70 | 		log.Fatalf("profiler/config.go: reading in test data: df.Min() got %v, want %v", got, want)
 71 | 	}
 72 | 
 73 | 	got = math.Round(df100k.Max().At(0).(float64)*100) / 100
 74 | 	want = 1.0
 75 | 	if got != want {
 76 | 		log.Fatalf("profiler/config.go: reading in test data: df.Max() got %v, want %v", got, want)
 77 | 	}
 78 | 
 79 | 	got = math.Round(df100k.Std().At(0).(float64)*100) / 100
 80 | 	want = 0.29
 81 | 	if got != want {
 82 | 		log.Fatalf("profiler/config.go: reading in test data: df.Std() got %v, want %v", got, want)
 83 | 	}
 84 | 
 85 | }
 86 | 
 87 | func read100k10x() {
 88 | 	var err error
 89 | 	df100k10x, err = pd.ReadCSV(getPath("100k10x"), pd.ReadOptions{HeaderRows: 1})
 90 | 	if err != nil {
 91 | 		log.Fatal(err)
 92 | 	}
 93 | 
 94 | 	got := math.Round(df100k10x.Sum().At(0).(float64)*100) / 100
 95 | 	want := 50408.63
 96 | 	if got != want {
 97 | 		log.Fatalf("profiler/config.go: reading in test data: df.Sum() got %v, want %v", got, want)
 98 | 	}
 99 | }
100 | 
101 | func read500k() {
102 | 	var err error
103 | 	df500k, err = pd.ReadCSV(getPath("500k"), pd.ReadOptions{HeaderRows: 1})
104 | 	if err != nil {
105 | 		log.Fatal(err)
106 | 	}
107 | 
108 | 	got := math.Round(df500k.Sum().At(0).(float64)*100) / 100
109 | 	want := 130598.19
110 | 	if got != want {
111 | 		log.Fatalf("profiler/config.go: reading in test data: df.Sum500() got %v, want %v", got, want)
112 | 	}
113 | 
114 | 	got = math.Round(df500k.Mean().At(0).(float64)*100) / 100
115 | 	want = 0.26
116 | 	if got != want {
117 | 		log.Fatalf("profiler/config.go: reading in test data: df.Mean() got %v, want %v", got, want)
118 | 	}
119 | }
120 | 
121 | func read5m() {
122 | 	var err error
123 | 	df5m, err = pd.ReadCSV(getPath("5m"), pd.ReadOptions{HeaderRows: 1})
124 | 	if err != nil {
125 | 		log.Fatal(err)
126 | 	}
127 | 
128 | 	got := math.Round(df5m.Sum().At(0).(float64)*100) / 100
129 | 	want := 2520431.67
130 | 	if got != want {
131 | 		log.Fatalf("profiler/config.go: reading in test data: df.Sum() got %v, want %v", got, want)
132 | 	}
133 | }
134 | 
135 | // ReadData initializes data for use in comparison tetss
136 | func ReadData() {
137 | 	read100k()
138 | 	read100k10x()
139 | 	read500k()
140 | 	// read5m()
141 | }
142 | 
143 | var files = map[string]string{
144 | 	"100k":    "../dataRandom100k1Col.csv",
145 | 	"100k10x": "../dataRandom100k10Col.csv",
146 | 	"500k":    "../dataRandom500k2Col.csv",
147 | 	"5m":      "../dataRandom5m1Col.csv",
148 | }
149 | 
150 | func getPath(s string) string {
151 | 	basename, ok := files[s]
152 | 	if !ok {
153 | 		log.Fatalf("profiler/config.go: reading in test data: df.%v not in %v", s, files)
154 | 	}
155 | 	_, thisFile, _, _ := runtime.Caller(0)
156 | 	path := filepath.Join(filepath.Dir(thisFile), basename)
157 | 	if _, err := os.Stat(path); os.IsNotExist(err) {
158 | 		log.Fatalf("profiler/config.go: reading in test data: df.File does not exist at %s", path)
159 | 	}
160 | 	return path
161 | }
162 | 


--------------------------------------------------------------------------------
/series/filter_test.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"strings"
  6 | 	"testing"
  7 | 	"time"
  8 | )
  9 | 
 10 | func TestApply(t *testing.T) {
 11 | 	s := MustNew([]float64{1, 2, 3})
 12 | 	sArchive := s.Copy()
 13 | 
 14 | 	s.InPlace.Apply(func(val interface{}) interface{} {
 15 | 		v, ok := val.(float64)
 16 | 		if !ok {
 17 | 			return ""
 18 | 		}
 19 | 		return ((v - s.Mean()) / s.Std())
 20 | 	})
 21 | 	want := MustNew([]float64{-1.224744871391589, 0, 1.224744871391589})
 22 | 	if !Equal(s, want) {
 23 | 		t.Errorf("InPlace.Apply() returned %v, want %v", s, want)
 24 | 	}
 25 | 
 26 | 	sCopy := sArchive.Apply(func(val interface{}) interface{} {
 27 | 		v, ok := val.(float64)
 28 | 		if !ok {
 29 | 			return ""
 30 | 		}
 31 | 		return ((v - sArchive.Mean()) / sArchive.Std())
 32 | 	})
 33 | 	if !Equal(sCopy, want) {
 34 | 		t.Errorf("Apply() returned %v, want %v", sCopy, want)
 35 | 	}
 36 | 	if Equal(sArchive, sCopy) {
 37 | 		t.Errorf("Apply() retained access to original, want copy")
 38 | 	}
 39 | }
 40 | 
 41 | func TestApply_riskier(t *testing.T) {
 42 | 	s := MustNew([]float64{1, 2, 3})
 43 | 	got := s.Apply(func(val interface{}) interface{} {
 44 | 		return (val.(float64) - s.Mean()) / s.Std()
 45 | 	})
 46 | 	want := MustNew([]float64{-1.224744871391589, 0, 1.224744871391589})
 47 | 	if !Equal(got, want) {
 48 | 		t.Errorf("Apply() returned %v, want %v", got, want)
 49 | 	}
 50 | }
 51 | 
 52 | func TestFilterFloat64(t *testing.T) {
 53 | 	tests := []struct {
 54 | 		name string
 55 | 		fn   func(*Series, float64) []int
 56 | 		arg  float64
 57 | 		want []int
 58 | 	}{
 59 | 		{"GT", (*Series).GT, 2, []int{2}},
 60 | 		{"GTE", (*Series).GTE, 2, []int{1, 2}},
 61 | 		{"LT", (*Series).LT, 2, []int{0}},
 62 | 		{"LTE", (*Series).LTE, 2, []int{0, 1}},
 63 | 		{"EQ", (*Series).EQ, 2, []int{1}},
 64 | 		{"NEQ", (*Series).NEQ, 2, []int{0, 2}},
 65 | 	}
 66 | 	for _, tt := range tests {
 67 | 		t.Run(tt.name, func(t *testing.T) {
 68 | 			s := MustNew([]float64{1, 2, 3})
 69 | 			got := tt.fn(s, tt.arg)
 70 | 			if !reflect.DeepEqual(got, tt.want) {
 71 | 				t.Errorf("s.Filter() got %v, want %v for arg %v", got, tt.want, tt.arg)
 72 | 			}
 73 | 		})
 74 | 	}
 75 | }
 76 | 
 77 | func TestFilterBool(t *testing.T) {
 78 | 	tests := []struct {
 79 | 		name string
 80 | 		fn   func(*Series) []int
 81 | 		want []int
 82 | 	}{
 83 | 		{"True", (*Series).True, []int{1}},
 84 | 		{"False", (*Series).False, []int{0}},
 85 | 	}
 86 | 	for _, tt := range tests {
 87 | 		t.Run(tt.name, func(t *testing.T) {
 88 | 			s := MustNew([]bool{false, true})
 89 | 			got := tt.fn(s)
 90 | 			if !reflect.DeepEqual(got, tt.want) {
 91 | 				t.Errorf("s.Filter() got %v, want %v", got, tt.want)
 92 | 			}
 93 | 		})
 94 | 	}
 95 | }
 96 | 
 97 | func TestFilterDateTime(t *testing.T) {
 98 | 	tests := []struct {
 99 | 		name string
100 | 		fn   func(*Series, time.Time) []int
101 | 		arg  time.Time
102 | 		want []int
103 | 	}{
104 | 		{"Before", (*Series).Before, time.Date(2019, 1, 2, 0, 0, 0, 0, time.UTC), []int{0}},
105 | 		{"After", (*Series).After, time.Date(2019, 1, 2, 0, 0, 0, 0, time.UTC), []int{1}},
106 | 	}
107 | 	for _, tt := range tests {
108 | 		t.Run(tt.name, func(t *testing.T) {
109 | 			s := MustNew([]time.Time{time.Date(2019, 1, 1, 0, 0, 0, 0, time.UTC), time.Date(2019, 3, 1, 0, 0, 0, 0, time.UTC)})
110 | 			got := tt.fn(s, tt.arg)
111 | 			if !reflect.DeepEqual(got, tt.want) {
112 | 				t.Errorf("s.Filter() got %v, want %v", got, tt.want)
113 | 			}
114 | 		})
115 | 	}
116 | }
117 | 
118 | func TestFilter_Contains(t *testing.T) {
119 | 	s := MustNew([]string{"foo", "bar", "baz"})
120 | 	got := s.Contains("ba")
121 | 	want := []int{1, 2}
122 | 	if !reflect.DeepEqual(got, want) {
123 | 		t.Errorf("s.Contains() got %v, want %v", got, want)
124 | 	}
125 | 
126 | 	got = s.InList([]string{"foo", "bar"})
127 | 	want = []int{0, 1}
128 | 	if !reflect.DeepEqual(got, want) {
129 | 		t.Errorf("s.In() got %v, want %v", got, want)
130 | 	}
131 | }
132 | 
133 | func TestFilter_float(t *testing.T) {
134 | 	s := MustNew([]float64{1, 2, 3})
135 | 	got := s.Filter(func(val interface{}) bool {
136 | 		v, ok := val.(float64)
137 | 		if !ok {
138 | 			return false
139 | 		}
140 | 		if v > 2 {
141 | 			return true
142 | 		}
143 | 		return false
144 | 	})
145 | 	want := []int{2}
146 | 	if !reflect.DeepEqual(got, want) {
147 | 		t.Errorf("s.Filter() got %v, want %v", got, want)
148 | 	}
149 | }
150 | 
151 | func TestFilter_string(t *testing.T) {
152 | 	s := MustNew([]string{"bamboo", "leaves", "taboo"})
153 | 	got := s.Filter(func(val interface{}) bool {
154 | 		v, ok := val.(string)
155 | 		if !ok {
156 | 			return false
157 | 		}
158 | 		if strings.HasSuffix(v, "boo") {
159 | 			return true
160 | 		}
161 | 		return false
162 | 	})
163 | 	want := []int{0, 2}
164 | 	if !reflect.DeepEqual(got, want) {
165 | 		t.Errorf("s.Filter() got %v, want %v", got, want)
166 | 	}
167 | }
168 | 
169 | func TestFilter_string_riskier(t *testing.T) {
170 | 	s := MustNew([]string{"bamboo", "leaves", "taboo"})
171 | 	got := s.Filter(func(val interface{}) bool {
172 | 		if strings.HasSuffix(val.(string), "boo") {
173 | 			return true
174 | 		}
175 | 		return false
176 | 	})
177 | 	want := []int{0, 2}
178 | 	if !reflect.DeepEqual(got, want) {
179 | 		t.Errorf("s.Filter() got %v, want %v", got, want)
180 | 	}
181 | }
182 | 


--------------------------------------------------------------------------------
/dataframe/columns.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 
  7 | 	"github.com/ptiger10/pd/internal/index"
  8 | 
  9 | 	"github.com/ptiger10/pd/options"
 10 | )
 11 | 
 12 | // Values returns an []string of the values at each level of the cols.
 13 | func (col Columns) Values() [][]string {
 14 | 	ret := make([][]string, col.df.ColLevels())
 15 | 	for j := 0; j < col.df.ColLevels(); j++ {
 16 | 		ret[j] = col.df.cols.Levels[j].Labels
 17 | 	}
 18 | 	return ret
 19 | }
 20 | 
 21 | // Reorder reorders the columns in the order in which the labels are supplied and excludes any unsupplied labels.
 22 | // Reorder looks for these labels in level 0 and modifies the DataFrame in place.
 23 | func (col Columns) Reorder(labels []string) {
 24 | 	positions := col.df.SelectCols(labels, 0)
 25 | 	col.df.InPlace.SubsetColumns(positions)
 26 | }
 27 | 
 28 | // SwapLevels swaps two column levels and modifies the cols in place.
 29 | func (col Columns) SwapLevels(i, j int) error {
 30 | 	if err := col.df.ensureColumnLevelPositions([]int{i, j}); err != nil {
 31 | 		return fmt.Errorf("Columns.SwapLevels(): %v", err)
 32 | 	}
 33 | 	col.df.cols.Levels[i], col.df.cols.Levels[j] = col.df.cols.Levels[j], col.df.cols.Levels[i]
 34 | 	col.df.cols.Refresh()
 35 | 	return nil
 36 | }
 37 | 
 38 | // At returns the cols values at a specified col level and column position but returns nil if either integer is out of range.
 39 | func (col Columns) At(level int, column int) string {
 40 | 	if err := col.df.ensureColumnLevelPositions([]int{level}); err != nil {
 41 | 		if options.GetLogWarnings() {
 42 | 			log.Printf("Columns.At(): %v", err)
 43 | 		}
 44 | 		return ""
 45 | 	}
 46 | 	if err := col.df.ensureColumnPositions([]int{column}); err != nil {
 47 | 		if options.GetLogWarnings() {
 48 | 			log.Printf("Columns.At(): %v", err)
 49 | 		}
 50 | 		return ""
 51 | 	}
 52 | 	return col.df.cols.Levels[level].Labels[column]
 53 | }
 54 | 
 55 | // RenameLevel renames an cols level in place but does not change anything if level is out of range.
 56 | func (col Columns) RenameLevel(level int, name string) error {
 57 | 	if err := col.df.ensureColumnLevelPositions([]int{level}); err != nil {
 58 | 		return fmt.Errorf("df.cols.RenameLevel(): %v", err)
 59 | 	}
 60 | 	col.df.cols.Levels[level].Name = name
 61 | 	col.df.cols.Refresh()
 62 | 	return nil
 63 | }
 64 | 
 65 | // InsertLevel inserts a level into the cols and modifies the DataFrame in place.
 66 | func (col Columns) InsertLevel(pos int, labels []string, name string) error {
 67 | 	if err := col.df.cols.InsertLevel(pos, labels, name); err != nil {
 68 | 		return fmt.Errorf("df.Column.InsertLevel(): %v", err)
 69 | 	}
 70 | 	return nil
 71 | }
 72 | 
 73 | // AppendLevel adds a new cols level to the end of the current cols  and modifies the DataFrame in place.
 74 | func (col Columns) AppendLevel(labels []string, name string) error {
 75 | 	err := col.InsertLevel(col.df.ColLevels(), labels, name)
 76 | 	if err != nil {
 77 | 		return fmt.Errorf("df.cols.AppendLevel(): %v", err)
 78 | 	}
 79 | 	return nil
 80 | }
 81 | 
 82 | // SubsetLevels modifies the DataFrame in place with only the specified cols levels.
 83 | func (col Columns) SubsetLevels(levelPositions []int) error {
 84 | 
 85 | 	err := col.df.ensureColumnLevelPositions(levelPositions)
 86 | 	if err != nil {
 87 | 		return fmt.Errorf("df.cols.SubsetLevels(): %v", err)
 88 | 	}
 89 | 	if len(levelPositions) == 0 {
 90 | 		return fmt.Errorf("df.cols.SubsetLevels(): no levels provided")
 91 | 	}
 92 | 
 93 | 	levels := make([]index.ColLevel, len(levelPositions))
 94 | 	for j := 0; j < len(levelPositions); j++ {
 95 | 		levels[j] = col.df.cols.Levels[levelPositions[j]]
 96 | 	}
 97 | 	col.df.cols.Levels = levels
 98 | 	col.df.cols.Refresh()
 99 | 	return nil
100 | }
101 | 
102 | // DropLevel drops the specified cols level and modifies the DataFrame in place.
103 | // If there is only one col level remaining, replaces with a new default col level.
104 | func (col Columns) DropLevel(level int) error {
105 | 	if err := col.df.ensureColumnLevelPositions([]int{level}); err != nil {
106 | 		return fmt.Errorf("Columns.DropLevel(): %v", err)
107 | 	}
108 | 	if col.df.ColLevels() == 1 {
109 | 		col.df.cols.Levels = append(col.df.cols.Levels, index.NewDefaultColLevel(col.df.NumCols(), ""))
110 | 	}
111 | 	col.df.cols.Levels = append(col.df.cols.Levels[:level], col.df.cols.Levels[level+1:]...)
112 | 	col.df.cols.Refresh()
113 | 	return nil
114 | }
115 | 
116 | // SelectName returns the integer position of the cols level at the first occurrence of the supplied name, or -1 if not a valid cols level name.
117 | func (col Columns) SelectName(name string) int {
118 | 	v, ok := col.df.cols.NameMap[name]
119 | 	if !ok {
120 | 		if options.GetLogWarnings() {
121 | 			log.Printf("Columns.SelectName(): name not in cols level names: %v\n", name)
122 | 		}
123 | 		return -1
124 | 	}
125 | 	return v[0]
126 | }
127 | 
128 | // SelectNames returns the integer positions of the cols levels with the supplied names.
129 | func (col Columns) SelectNames(names []string) []int {
130 | 	include := make([]int, 0)
131 | 	empty := make([]int, 0)
132 | 	for _, name := range names {
133 | 		v, ok := col.df.cols.NameMap[name]
134 | 		if !ok {
135 | 			if options.GetLogWarnings() {
136 | 				log.Printf("Columns.SelectNames(): name not in cols level names: %v\n", name)
137 | 			}
138 | 			return empty
139 | 		}
140 | 		include = append(include, v...)
141 | 	}
142 | 	return include
143 | }
144 | 


--------------------------------------------------------------------------------
/series/describe_test.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"log"
  6 | 	"os"
  7 | 	"reflect"
  8 | 	"testing"
  9 | 	"time"
 10 | 
 11 | 	"github.com/ptiger10/pd/options"
 12 | )
 13 | 
 14 | func TestSeries_Describe(t *testing.T) {
 15 | 	type want struct {
 16 | 		len          int
 17 | 		numIdxLevels int
 18 | 		maxWidth     int
 19 | 		values       []interface{}
 20 | 		vals         interface{}
 21 | 		datatype     string
 22 | 		name         string
 23 | 		valid        []int
 24 | 		null         []int
 25 | 	}
 26 | 	tests := []struct {
 27 | 		name  string
 28 | 		input *Series
 29 | 		want  want
 30 | 	}{
 31 | 		{"empty",
 32 | 			newEmptySeries(),
 33 | 			want{len: 0, numIdxLevels: 0, maxWidth: 0,
 34 | 				values: []interface{}{}, vals: []interface{}{}, datatype: "none", name: "",
 35 | 				valid: []int{}, null: []int{}}},
 36 | 		{name: "default index",
 37 | 			input: MustNew([]string{"foo", "", "bar", ""}),
 38 | 			want: want{len: 4, numIdxLevels: 1, maxWidth: 3,
 39 | 				values:   []interface{}{"foo", "NaN", "bar", "NaN"},
 40 | 				vals:     []string{"foo", "NaN", "bar", "NaN"},
 41 | 				datatype: "string", name: "",
 42 | 				valid: []int{0, 2}, null: []int{1, 3}}},
 43 | 		{"multi index",
 44 | 			MustNew(
 45 | 				1.0,
 46 | 				Config{MultiIndex: []interface{}{"baz", "qux"}, Name: "foo"},
 47 | 			),
 48 | 			want{len: 1, numIdxLevels: 2, maxWidth: 4,
 49 | 				values: []interface{}{1.0}, vals: []float64{1},
 50 | 				datatype: "float64", name: "foo",
 51 | 				valid: []int{0}, null: []int{}}},
 52 | 	}
 53 | 	for _, tt := range tests {
 54 | 		t.Run(tt.name, func(t *testing.T) {
 55 | 			s := tt.input.Copy()
 56 | 			gotLen := s.Len()
 57 | 			if gotLen != tt.want.len {
 58 | 				t.Errorf("s.Len(): got %v, want %v", gotLen, tt.want.len)
 59 | 			}
 60 | 			gotNumIdxLevels := s.NumLevels()
 61 | 			if gotNumIdxLevels != tt.want.numIdxLevels {
 62 | 				t.Errorf("s.NumLevels(): got %v, want %v", gotNumIdxLevels, tt.want.numIdxLevels)
 63 | 			}
 64 | 			gotMaxWidth := s.MaxWidth()
 65 | 			if gotMaxWidth != tt.want.maxWidth {
 66 | 				t.Errorf("s.MaxWidth(): got %v, want %v", gotMaxWidth, tt.want.maxWidth)
 67 | 			}
 68 | 			gotValues := s.Values()
 69 | 			if !reflect.DeepEqual(gotValues, tt.want.values) {
 70 | 				t.Errorf("s.Values(): got %v, want %v", gotValues, tt.want.values)
 71 | 			}
 72 | 			gotVals := s.Vals()
 73 | 			if !reflect.DeepEqual(gotVals, tt.want.vals) {
 74 | 				t.Errorf("s.Vals(): got %#v, want %v", gotVals, tt.want.vals)
 75 | 			}
 76 | 			gotDatatype := s.DataType()
 77 | 			if gotDatatype != tt.want.datatype {
 78 | 				t.Errorf("s.Datatype(): got %v, want %v", gotDatatype, tt.want.datatype)
 79 | 			}
 80 | 			gotName := s.Name()
 81 | 			if gotName != tt.want.name {
 82 | 				t.Errorf("s.Name(): got %v, want %v", gotName, tt.want.name)
 83 | 			}
 84 | 			gotValid := s.valid()
 85 | 			if !reflect.DeepEqual(gotValid, tt.want.valid) {
 86 | 				t.Errorf("s.valid(): got %v, want %v", gotValid, tt.want.valid)
 87 | 			}
 88 | 			gotNull := s.null()
 89 | 			if !reflect.DeepEqual(gotNull, tt.want.null) {
 90 | 				t.Errorf("s.null(): got %v, want %v", gotNull, tt.want.null)
 91 | 			}
 92 | 		})
 93 | 	}
 94 | }
 95 | 
 96 | func TestSeries_Equal(t *testing.T) {
 97 | 	s, err := New("foo", Config{Index: "bar", Name: "baz"})
 98 | 	if err != nil {
 99 | 		t.Error(err)
100 | 	}
101 | 	s2, _ := New("foo", Config{Index: "bar", Name: "baz"})
102 | 	if !Equal(s, s2) {
103 | 		t.Errorf("Equal() returned false, want true")
104 | 	}
105 | 	s2.datatype = options.Bool
106 | 	if Equal(s, s2) {
107 | 		t.Errorf("Equal() returned true for different kind, want false")
108 | 	}
109 | 
110 | 	s2, _ = New("quux", Config{Index: "bar", Name: "baz"})
111 | 	if Equal(s, s2) {
112 | 		t.Errorf("Equal() returned true for different values, want false")
113 | 	}
114 | 	s2, _ = New("foo", Config{Index: "corge", Name: "baz"})
115 | 	if Equal(s, s2) {
116 | 		t.Errorf("Equal() returned true for different index, want false")
117 | 	}
118 | 	s2, _ = New("foo", Config{Index: "bar", Name: "qux"})
119 | 	if Equal(s, s2) {
120 | 		t.Errorf("Equal() returned true for different name, want false")
121 | 	}
122 | }
123 | 
124 | func TestSeries_ReplaceNil(t *testing.T) {
125 | 	s := MustNew(nil)
126 | 	s2 := MustNew([]int{1, 2})
127 | 	s.replace(s2)
128 | 	if !Equal(s, s2) {
129 | 		t.Errorf("Series.replace() returned %v, want %v", s, s2)
130 | 	}
131 | }
132 | 
133 | func TestSeries_Describe_unsupported(t *testing.T) {
134 | 	s := MustNew([]float64{1, 2, 3})
135 | 	tm := s.Earliest()
136 | 	if (time.Time{}) != tm {
137 | 		t.Errorf("Earliest() got %v, want time.Time{} for unsupported type", tm)
138 | 	}
139 | 	tm = s.Latest()
140 | 	if (time.Time{}) != tm {
141 | 		t.Errorf("Latest() got %v, want time.Time{} for unsupported type", tm)
142 | 	}
143 | }
144 | 
145 | // [START ensure tests]
146 | func TestSeries_EnsureTypes_fail(t *testing.T) {
147 | 	defer log.SetOutput(os.Stderr)
148 | 	vals := []interface{}{1, 2, 3}
149 | 
150 | 	var buf bytes.Buffer
151 | 	log.SetOutput(&buf)
152 | 	ensureFloatFromNumerics(vals)
153 | 	if buf.String() == "" {
154 | 		t.Errorf("ensureNumerics() returned no log message, want log due to fail")
155 | 	}
156 | 	buf.Reset()
157 | 
158 | 	ensureDateTime(vals)
159 | 	if buf.String() == "" {
160 | 		t.Errorf("ensureDateTime() returned no log message, want log due to fail")
161 | 	}
162 | 	buf.Reset()
163 | 
164 | 	ensureBools(vals)
165 | 	if buf.String() == "" {
166 | 		t.Errorf("ensureBools() returned no log message, want log due to fail")
167 | 	}
168 | 	buf.Reset()
169 | }
170 | 
171 | // [END ensure tests]
172 | 


--------------------------------------------------------------------------------
/internal/values/type-string.go:
--------------------------------------------------------------------------------
  1 | package values
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"strconv"
  7 | 	"strings"
  8 | 	"time"
  9 | 
 10 | 	"github.com/araddon/dateparse"
 11 | 	"github.com/ptiger10/pd/options"
 12 | )
 13 | 
 14 | // [START Constructor Functions]
 15 | 
 16 | func isNullString(s string) bool {
 17 | 	nullStrings := options.GetStringNullValues()
 18 | 	for _, ns := range nullStrings {
 19 | 		if strings.TrimSpace(s) == ns {
 20 | 			return true
 21 | 		}
 22 | 	}
 23 | 	return false
 24 | }
 25 | 
 26 | // newString creates an stringValue from atomic string value
 27 | func newString(val string) stringValue {
 28 | 	if isNullString(val) {
 29 | 		return stringValue{options.GetDisplayStringNullFiller(), true}
 30 | 	}
 31 | 	return stringValue{val, false}
 32 | }
 33 | 
 34 | func (vals *stringValues) Less(i, j int) bool {
 35 | 	if (*vals)[i].v < (*vals)[j].v {
 36 | 		return true
 37 | 	}
 38 | 	return false
 39 | }
 40 | 
 41 | // [END Constructor Functions]
 42 | 
 43 | // [START Converters]
 44 | 
 45 | // toFloat converts stringValue to float64Value
 46 | //
 47 | // "1": 1.0, Null: NaN
 48 | func (val stringValue) toFloat64() float64Value {
 49 | 	f, err := strconv.ParseFloat(val.v, 64)
 50 | 	if math.IsNaN(f) || err != nil {
 51 | 		return float64Value{math.NaN(), true}
 52 | 	}
 53 | 	return float64Value{f, false}
 54 | }
 55 | 
 56 | // toInt converts stringValue to int64Value
 57 | //
 58 | // "1": 1, null: NaN
 59 | func (val stringValue) toInt64() int64Value {
 60 | 	if val.null {
 61 | 		return int64Value{0, true}
 62 | 	}
 63 | 	f, err := strconv.ParseFloat(val.v, 64)
 64 | 	if err != nil {
 65 | 		return int64Value{0, true}
 66 | 	}
 67 | 	return int64Value{int64(f), false}
 68 | }
 69 | 
 70 | func (val stringValue) toString() stringValue {
 71 | 	if isNullString(val.v) || val.null {
 72 | 		return stringValue{options.GetDisplayStringNullFiller(), true}
 73 | 	}
 74 | 	return stringValue{fmt.Sprint(val.v), false}
 75 | }
 76 | 
 77 | // toBool converts stringValue to boolValue
 78 | //
 79 | // null: false; notnull: true
 80 | func (val stringValue) toBool() boolValue {
 81 | 	if val.null {
 82 | 		return boolValue{false, true}
 83 | 	}
 84 | 	return boolValue{true, false}
 85 | }
 86 | 
 87 | // toDateTime converts stringValue to dateTimeValue using an external parse library
 88 | //
 89 | // Jan 1 2019: 2019-01-01 00:00:00
 90 | //
 91 | // Acceptable DateTime string formats
 92 | /*
 93 |    	"May 8, 2009 5:57:51 PM",
 94 |    	"oct 7, 1970",
 95 |    	"oct 7, '70",
 96 |    	"oct. 7, 1970",
 97 |    	"oct. 7, 70",
 98 |    	"Mon Jan  2 15:04:05 2006",
 99 |    	"Mon Jan  2 15:04:05 MST 2006",
100 |    	"Mon Jan 02 15:04:05 -0700 2006",
101 |    	"Monday, 02-Jan-06 15:04:05 MST",
102 |    	"Mon, 02 Jan 2006 15:04:05 MST",
103 |    	"Tue, 11 Jul 2017 16:28:13 +0200 (CEST)",
104 |    	"Mon, 02 Jan 2006 15:04:05 -0700",
105 |    	"Thu, 4 Jan 2018 17:53:36 +0000",
106 |    	"Mon Aug 10 15:44:11 UTC+0100 2015",
107 |    	"Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)",
108 |    	"September 17, 2012 10:09am",
109 |    	"September 17, 2012 at 10:09am PST-08",
110 |    	"September 17, 2012, 10:10:09",
111 |    	"October 7, 1970",
112 |    	"October 7th, 1970",
113 |    	"12 Feb 2006, 19:17",
114 |    	"12 Feb 2006 19:17",
115 |    	"7 oct 70",
116 |    	"7 oct 1970",
117 |    	"03 February 2013",
118 |    	"1 July 2013",
119 |    	"2013-Feb-03",
120 |    	//   mm/dd/yy
121 |    	"3/31/2014",
122 |    	"03/31/2014",
123 |    	"08/21/71",
124 |    	"8/1/71",
125 |    	"4/8/2014 22:05",
126 |    	"04/08/2014 22:05",
127 |    	"4/8/14 22:05",
128 |    	"04/2/2014 03:00:51",
129 |    	"8/8/1965 12:00:00 AM",
130 |    	"8/8/1965 01:00:01 PM",
131 |    	"8/8/1965 01:00 PM",
132 |    	"8/8/1965 1:00 PM",
133 |    	"8/8/1965 12:00 AM",
134 |    	"4/02/2014 03:00:51",
135 |    	"03/19/2012 10:11:59",
136 |    	"03/19/2012 10:11:59.3186369",
137 |    	// yyyy/mm/dd
138 |    	"2014/3/31",
139 |    	"2014/03/31",
140 |    	"2014/4/8 22:05",
141 |    	"2014/04/08 22:05",
142 |    	"2014/04/2 03:00:51",
143 |    	"2014/4/02 03:00:51",
144 |    	"2012/03/19 10:11:59",
145 |    	"2012/03/19 10:11:59.3186369",
146 |    	// Chinese
147 |    	"2014年04月08日",
148 |    	//   yyyy-mm-ddThh
149 |    	"2006-01-02T15:04:05+0000",
150 |    	"2009-08-12T22:15:09-07:00",
151 |    	"2009-08-12T22:15:09",
152 |    	"2009-08-12T22:15:09Z",
153 |    	//   yyyy-mm-dd hh:mm:ss
154 |    	"2014-04-26 17:24:37.3186369",
155 |    	"2012-08-03 18:31:59.257000000",
156 |    	"2014-04-26 17:24:37.123",
157 |    	"2013-04-01 22:43",
158 |    	"2013-04-01 22:43:22",
159 |    	"2014-12-16 06:20:00 UTC",
160 |    	"2014-12-16 06:20:00 GMT",
161 |    	"2014-04-26 05:24:37 PM",
162 |    	"2014-04-26 13:13:43 +0800",
163 |    	"2014-04-26 13:13:43 +0800 +08",
164 |    	"2014-04-26 13:13:44 +09:00",
165 |    	"2012-08-03 18:31:59.257000000 +0000 UTC",
166 |    	"2015-09-30 18:48:56.35272715 +0000 UTC",
167 |    	"2015-02-18 00:12:00 +0000 GMT",
168 |    	"2015-02-18 00:12:00 +0000 UTC",
169 |    	"2015-02-08 03:02:00 +0300 MSK m=+0.000000001",
170 |    	"2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001",
171 |    	"2017-07-19 03:21:51+00:00",
172 |    	"2014-04-26",
173 |    	"2014-04",
174 |    	"2014",
175 |    	"2014-05-11 08:20:13,787",
176 |    	// mm.dd.yy
177 |    	"3.31.2014",
178 |    	"03.31.2014",
179 |    	"08.21.71",
180 |    	"2014.03",
181 |    	"2014.03.30",
182 |    	//  yyyymmdd and similar
183 |    	"20140601",
184 |    	"20140722105203",
185 |    	// unix seconds, ms, micro, nano
186 |    	"1332151919",
187 |    	"1384216367189",
188 |    	"1384216367111222",
189 |    	"1384216367111222333",
190 |    }
191 | */
192 | func (val stringValue) toDateTime() dateTimeValue {
193 | 	if val.null {
194 | 		return dateTimeValue{time.Time{}, true}
195 | 	}
196 | 	t, err := dateparse.ParseAny(val.v)
197 | 	if err != nil {
198 | 		return dateTimeValue{time.Time{}, true}
199 | 	}
200 | 	return dateTimeValue{t, false}
201 | }
202 | 
203 | // [END Converters]
204 | 


--------------------------------------------------------------------------------
/dataframe/pivot.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | 
  7 | 	"github.com/ptiger10/pd/internal/index"
  8 | 	"github.com/ptiger10/pd/internal/values"
  9 | 	"github.com/ptiger10/pd/options"
 10 | 	"github.com/ptiger10/pd/series"
 11 | )
 12 | 
 13 | // values:
 14 | // make a [][]interface{} valsMatrix for rows x cols
 15 | // # rows: unique non-stacked labels
 16 | // # cols = unique stacked labels * number of columns
 17 | // isolate first value of the stacked label within each non-stacked label
 18 | // transpose to []interface and feed into interface factory to create []Values.Container
 19 | func (df *DataFrame) stack(level int) (newIdxPositions []int, valsMatrix [][]interface{}, newColLvl []string) {
 20 | 	var unstackedIndexLevels []int
 21 | 	for j := 0; j < df.IndexLevels(); j++ {
 22 | 		if j != level {
 23 | 			unstackedIndexLevels = append(unstackedIndexLevels, j)
 24 | 		}
 25 | 	}
 26 | 	g := df.GroupByIndex(unstackedIndexLevels...)
 27 | 
 28 | 	labelsToStack := df.Index.unique(level)
 29 | 	numRows := g.Len()
 30 | 	numCols := len(labelsToStack) * df.NumCols()
 31 | 	valsMatrix = make([][]interface{}, numRows)
 32 | 	for i := 0; i < numRows; i++ {
 33 | 		valsMatrix[i] = make([]interface{}, numCols)
 34 | 	}
 35 | 
 36 | 	// only extend the labels for the columns-to-be-stacked once
 37 | 	extendColLevel := true
 38 | 	for i, group := range g.Groups() {
 39 | 		newIdxPositions = append(newIdxPositions, g.groups[group].Positions[0])
 40 | 		rows, _ := df.SubsetRows(g.groups[group].Positions)
 41 | 		for labelOffset, label := range labelsToStack {
 42 | 			// log warnings disabled because frequently a label will not exist in an index
 43 | 			archive := options.GetLogWarnings()
 44 | 			options.SetLogWarnings(false)
 45 | 			row := rows.SelectLabels([]string{label}, level)
 46 | 			options.SetLogWarnings(archive)
 47 | 			// log warnings restored
 48 | 			for m := 0; m < df.NumCols(); m++ {
 49 | 				if len(row) > 0 {
 50 | 					valsMatrix[i][m+labelOffset*df.NumCols()] = rows.vals[m].Values.Value(row[0])
 51 | 				}
 52 | 				if extendColLevel {
 53 | 					newColLvl = append(newColLvl, label)
 54 | 				}
 55 | 			}
 56 | 		}
 57 | 		extendColLevel = false
 58 | 	}
 59 | 	return newIdxPositions, valsMatrix, newColLvl
 60 | }
 61 | 
 62 | func (df *DataFrame) stackIndex(level int) *DataFrame {
 63 | 	newIdxPositions, valsMatrix, newColLevel := df.stack(level)
 64 | 	transposedVals := values.TransposeValues(valsMatrix)
 65 | 	var containers []values.Container
 66 | 	for i := 0; i < len(transposedVals); i++ {
 67 | 		container := values.MustCreateValuesFromInterface(transposedVals[i])
 68 | 		containers = append(containers, container)
 69 | 	}
 70 | 
 71 | 	idx := df.index.Copy()
 72 | 	idx.Subset(newIdxPositions)
 73 | 	idx.DropLevel(level)
 74 | 
 75 | 	cols := df.cols.Copy()
 76 | 	for j := 0; j < df.ColLevels(); j++ {
 77 | 		// duplicate each level enough times that it is same length as new column level
 78 | 		cols.Levels[j].Duplicate((len(newColLevel) / df.NumCols()) - 1)
 79 | 	}
 80 | 
 81 | 	// ducks error because input is controlled
 82 | 	cols.InsertLevel(0, newColLevel, df.index.Levels[level].Name)
 83 | 
 84 | 	ret := newFromComponents(containers, idx, cols, df.Name())
 85 | 	if df.dataType() != options.Interface {
 86 | 		ret.InPlace.Convert(df.dataType().String())
 87 | 	}
 88 | 	return ret
 89 | }
 90 | 
 91 | // Pivot transforms data into the desired form and calls aggFunc on the reshaped data.
 92 | func (df *DataFrame) Pivot(index int, values int, columns int, aggFunc string) (*DataFrame, error) {
 93 | 	df = df.Copy()
 94 | 	df.InPlace.SubsetColumns([]int{index, columns, values})
 95 | 	g := df.GroupBy(index, columns)
 96 | 
 97 | 	switch aggFunc {
 98 | 	case "sum":
 99 | 		df = g.Sum()
100 | 	case "mean":
101 | 		df = g.Mean()
102 | 	case "median":
103 | 		df = g.Median()
104 | 	case "min":
105 | 		df = g.Min()
106 | 	case "max":
107 | 		df = g.Max()
108 | 	case "std":
109 | 		df = g.Std()
110 | 	default:
111 | 		return newEmptyDataFrame(), fmt.Errorf("df.Pivot(): aggFunc (%v) does not exist", aggFunc)
112 | 	}
113 | 	df = df.stackIndex(1)
114 | 	df.Columns.DropLevel(1)
115 | 	return df, nil
116 | }
117 | 
118 | // Transpose transforms all rows to columns.
119 | func (df *DataFrame) Transpose() *DataFrame {
120 | 	ret := newEmptyDataFrame()
121 | 	for m := 0; m < df.NumCols(); m++ {
122 | 		row := transposeSeries(df.hydrateSeries(m))
123 | 		ret.InPlace.appendDataFrameRow(row)
124 | 	}
125 | 	return ret
126 | }
127 | 
128 | func transposeSeries(s *series.Series) *DataFrame {
129 | 	// Columns
130 | 	lvls := make([]index.ColLevel, s.NumLevels())
131 | 	cols := index.NewColumns(lvls...)
132 | 	container, idx := s.ToInternalComponents()
133 | 	for j := 0; j < s.NumLevels(); j++ {
134 | 		cols.Levels[j].IsDefault = idx.Levels[j].IsDefault
135 | 		cols.Levels[j].DataType = idx.Levels[j].DataType
136 | 		cols.Levels[j].Name = idx.Levels[j].Name
137 | 		for m := 0; m < s.Len(); m++ {
138 | 			val := idx.Levels[j].Labels.Value(m)
139 | 			// TODO: test null value
140 | 			// if !elem.Null {
141 | 			// 	cols.Levels[j].Labels = append(cols.Levels[j].Labels, fmt.Sprint(elem.Value))
142 | 			// } else {
143 | 			// 	cols.Levels[j].Labels = append(cols.Levels[j].Labels, "")
144 | 			// }
145 | 			cols.Levels[j].Labels = append(cols.Levels[j].Labels, fmt.Sprint(val))
146 | 		}
147 | 	}
148 | 	cols.Refresh()
149 | 
150 | 	// Index
151 | 	names := strings.Split(s.Name(), values.GetMultiColNameSeparator())
152 | 	idxLvls := make([]index.Level, len(names))
153 | 	retIdx := index.New(idxLvls...)
154 | 	for j := 0; j < len(names); j++ {
155 | 		name := names[j]
156 | 		// ducks error because type is known to be supported
157 | 		retIdx.Levels[j], _ = index.NewLevel(values.InterpolateString(name), "")
158 | 	}
159 | 	retIdx.NeedsRefresh = true
160 | 
161 | 	// Values
162 | 	vals := make([]values.Container, s.Len())
163 | 	for m := 0; m < s.Len(); m++ {
164 | 		vals[m].Values = container.Values.Subset([]int{m})
165 | 		vals[m].DataType = container.DataType
166 | 	}
167 | 
168 | 	return newFromComponents(vals, retIdx, cols, "")
169 | }
170 | 


--------------------------------------------------------------------------------
/series/group_test.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"log"
  6 | 	"os"
  7 | 	"reflect"
  8 | 	"strings"
  9 | 	"testing"
 10 | 
 11 | 	"github.com/ptiger10/pd/options"
 12 | )
 13 | 
 14 | func TestGroup_Copy(t *testing.T) {
 15 | 	s := MustNew([]int{1, 2, 3, 4}, Config{Index: []int{1, 1, 2, 2}})
 16 | 	got := s.GroupByIndex(0).copy().groups
 17 | 	want := map[string]*group{
 18 | 		"1": {Positions: []int{0, 1}, FirstPosition: 0},
 19 | 		"2": {Positions: []int{2, 3}, FirstPosition: 2},
 20 | 	}
 21 | 	if !reflect.DeepEqual(got, want) {
 22 | 		t.Errorf("group.copy() got %v, want %v", got, want)
 23 | 	}
 24 | }
 25 | 
 26 | func TestGrouping_Math(t *testing.T) {
 27 | 	s := MustNew([]int{1, 2, 3, 4}, Config{Index: []int{1, 1, 2, 2}})
 28 | 	tests := []struct {
 29 | 		name  string
 30 | 		input *Series
 31 | 		fn    func(Grouping) *Series
 32 | 		want  *Series
 33 | 	}{
 34 | 		{name: "fail: empty", input: newEmptySeries(), fn: Grouping.Sum,
 35 | 			want: newEmptySeries()},
 36 | 		{"sum", s, Grouping.Sum,
 37 | 			MustNew([]float64{3, 7}, Config{Index: []int{1, 2}})},
 38 | 		{"mean", s, Grouping.Mean,
 39 | 			MustNew([]float64{1.5, 3.5}, Config{Index: []int{1, 2}})},
 40 | 		{"min", s, Grouping.Min,
 41 | 			MustNew([]float64{1, 3}, Config{Index: []int{1, 2}})},
 42 | 		{"max", s, Grouping.Max,
 43 | 			MustNew([]float64{2, 4}, Config{Index: []int{1, 2}})},
 44 | 		{"median", s, Grouping.Median,
 45 | 			MustNew([]float64{1.5, 3.5}, Config{Index: []int{1, 2}})},
 46 | 		{"standard deviation", s, Grouping.Std,
 47 | 			MustNew([]float64{0.5, 0.5}, Config{Index: []int{1, 2}})},
 48 | 	}
 49 | 	for _, tt := range tests {
 50 | 		t.Run(tt.name, func(t *testing.T) {
 51 | 			g := tt.input.GroupByIndex()
 52 | 			// Test Asynchronously
 53 | 			got := tt.fn(g)
 54 | 			if !Equal(got, tt.want) {
 55 | 				t.Errorf("s.GroupByIndex math operation returned %v, want %v", got, tt.want)
 56 | 			}
 57 | 			// Test Synchronously
 58 | 			options.SetAsync(false)
 59 | 			gotSync := tt.fn(g)
 60 | 			if !Equal(gotSync, tt.want) {
 61 | 				t.Errorf("s.GroupByIndex synchronous math operation returned %v, want %v", gotSync, tt.want)
 62 | 			}
 63 | 			options.RestoreDefaults()
 64 | 		})
 65 | 	}
 66 | }
 67 | 
 68 | func TestSeries_GroupByIndex(t *testing.T) {
 69 | 	multi := MustNew([]string{"foo", "bar", "baz"}, Config{MultiIndex: []interface{}{[]int{1, 1, 2}, []int{2, 2, 1}}})
 70 | 	type args struct {
 71 | 		levelPositions []int
 72 | 	}
 73 | 	tests := []struct {
 74 | 		name  string
 75 | 		input *Series
 76 | 		args  args
 77 | 		want  map[string]*group
 78 | 	}{
 79 | 		{name: "single no args",
 80 | 			input: MustNew([]string{"foo", "bar", "baz"}, Config{Index: []int{1, 1, 2}}),
 81 | 			args:  args{[]int{}},
 82 | 			want: map[string]*group{
 83 | 				"1": {Positions: []int{0, 1}, FirstPosition: 0},
 84 | 				"2": {Positions: []int{2}, FirstPosition: 2},
 85 | 			}},
 86 | 		{"multi no args",
 87 | 			multi,
 88 | 			args{[]int{}},
 89 | 			map[string]*group{
 90 | 				"1 | 2": {Positions: []int{0, 1}, FirstPosition: 0},
 91 | 				"2 | 1": {Positions: []int{2}, FirstPosition: 2},
 92 | 			}},
 93 | 		{"multi one level",
 94 | 			multi,
 95 | 			args{[]int{0}},
 96 | 			map[string]*group{
 97 | 				"1": {Positions: []int{0, 1}, FirstPosition: 0},
 98 | 				"2": {Positions: []int{2}, FirstPosition: 2},
 99 | 			}},
100 | 		{"multi two levels reversed",
101 | 			multi,
102 | 			args{[]int{1, 0}},
103 | 			map[string]*group{
104 | 				"2 | 1": {Positions: []int{0, 1}, FirstPosition: 0},
105 | 				"1 | 2": {Positions: []int{2}, FirstPosition: 2},
106 | 			}},
107 | 		{"fail: invalid level",
108 | 			multi,
109 | 			args{[]int{10}},
110 | 			newEmptyGrouping().groups},
111 | 		{"fail: partial invalid level",
112 | 			multi,
113 | 			args{[]int{0, 10}},
114 | 			newEmptyGrouping().groups},
115 | 	}
116 | 	for _, tt := range tests {
117 | 		t.Run(tt.name, func(t *testing.T) {
118 | 			var buf bytes.Buffer
119 | 			log.SetOutput(&buf)
120 | 			defer log.SetOutput(os.Stderr)
121 | 
122 | 			s := tt.input.Copy()
123 | 			got := s.GroupByIndex(tt.args.levelPositions...).groups
124 | 			if !reflect.DeepEqual(got, tt.want) {
125 | 				t.Errorf("Series.GroupByIndex() = %#v, want %#v", got, tt.want)
126 | 			}
127 | 
128 | 			if strings.Contains(tt.name, "fail") {
129 | 				if buf.String() == "" {
130 | 					t.Errorf("Series.GroupByIndex() returned no log message, want log due to fail")
131 | 				}
132 | 			}
133 | 		})
134 | 	}
135 | }
136 | 
137 | func Test_Group(t *testing.T) {
138 | 	type args struct {
139 | 		label string
140 | 	}
141 | 	tests := []struct {
142 | 		name string
143 | 		args args
144 | 		want *Series
145 | 	}{
146 | 		{name: "pass", args: args{"1"}, want: MustNew([]int{1, 2}, Config{Index: []int{1, 1}})},
147 | 		{name: "fail", args: args{"100"}, want: newEmptySeries()},
148 | 	}
149 | 	for _, tt := range tests {
150 | 		t.Run(tt.name, func(t *testing.T) {
151 | 			var buf bytes.Buffer
152 | 			log.SetOutput(&buf)
153 | 			defer log.SetOutput(os.Stderr)
154 | 
155 | 			s := MustNew([]int{1, 2, 3, 4}, Config{Index: []int{1, 1, 2, 2}})
156 | 			g := s.GroupByIndex()
157 | 			got := g.Group(tt.args.label)
158 | 			if !Equal(got, tt.want) {
159 | 				t.Errorf("Grouping.Group() = %v, want %v", got, tt.want)
160 | 			}
161 | 			if strings.Contains(tt.name, "fail") {
162 | 				if buf.String() == "" {
163 | 					t.Errorf("Grouping.Group() returned no log message, want log due to fail")
164 | 				}
165 | 			}
166 | 
167 | 		})
168 | 	}
169 | }
170 | 
171 | func TestGrouping_Nth(t *testing.T) {
172 | 	s := MustNew([]string{"foo", "bar", "baz"}, Config{MultiIndex: []interface{}{[]int{1, 1, 2}, []int{2, 2, 1}}})
173 | 	g := s.GroupByIndex()
174 | 	gotFirst := g.First()
175 | 	wantFirst := MustNew([]string{"foo", "baz"}, Config{MultiIndex: []interface{}{[]int{1, 2}, []int{2, 1}}})
176 | 	if !Equal(gotFirst, wantFirst) {
177 | 		t.Errorf("Grouping.First() = %#v, want %#v", gotFirst, wantFirst)
178 | 	}
179 | 	gotLast := g.Last()
180 | 	wantLast := MustNew([]string{"bar", "baz"}, Config{MultiIndex: []interface{}{[]int{1, 2}, []int{2, 1}}})
181 | 	if !Equal(gotLast, wantLast) {
182 | 		t.Errorf("Grouping.Last() = %#v, want %#v", gotLast, wantLast)
183 | 	}
184 | }
185 | 


--------------------------------------------------------------------------------
/internal/values/shared_template.go:
--------------------------------------------------------------------------------
  1 | package values
  2 | 
  3 | import (
  4 | 	"github.com/cheekybits/genny/generic"
  5 | 	"github.com/ptiger10/pd/options"
  6 | )
  7 | 
  8 | //go:generate genny -in=$GOFILE -out=shared_autogen.go gen "valueType=float64,int64,string,bool,time.Time,interface{}"
  9 | 
 10 | // [START] valueTypeValues
 11 | 
 12 | // valueType is the generic ValueType that will be replaced by specific types on `make generate`
 13 | type valueType generic.Type
 14 | 
 15 | // valueTypeValues is a slice of valueType-typed value/null structs.
 16 | type valueTypeValues []valueTypeValue
 17 | 
 18 | // valueTypeValue is a valueType-typed value/null struct.
 19 | type valueTypeValue struct {
 20 | 	v    valueType
 21 | 	null bool
 22 | }
 23 | 
 24 | // newSlicevalueType converts []valueType -> Container with valueTypeValues
 25 | func newSlicevalueType(vals []valueType) Container {
 26 | 	ret := make(valueTypeValues, len(vals))
 27 | 	for i := 0; i < len(vals); i++ {
 28 | 		ret[i] = newvalueType(vals[i])
 29 | 	}
 30 | 	return Container{&ret, options.PlaceholdervalueType}
 31 | }
 32 | 
 33 | // Len returns the number of value/null structs in the container.
 34 | func (vals *valueTypeValues) Len() int {
 35 | 	return len(*vals)
 36 | }
 37 | 
 38 | func (vals *valueTypeValues) Swap(i, j int) {
 39 | 	(*vals)[i], (*vals)[j] = (*vals)[j], (*vals)[i]
 40 | }
 41 | 
 42 | // Subset returns the values located at specific index positions.
 43 | func (vals *valueTypeValues) Subset(rowPositions []int) Values {
 44 | 	ret := make(valueTypeValues, len(rowPositions))
 45 | 	for i := 0; i < len(rowPositions); i++ {
 46 | 		ret[i] = (*vals)[rowPositions[i]]
 47 | 	}
 48 | 	return &ret
 49 | }
 50 | 
 51 | // Append converts vals2 to valueTypeValues and extends the original valueTypeValues.
 52 | func (vals *valueTypeValues) Append(vals2 Values) {
 53 | 	convertedVals, _ := Convert(vals2, options.PlaceholdervalueType)
 54 | 	newVals := convertedVals.(*valueTypeValues)
 55 | 	*vals = append(*vals, *newVals...)
 56 | }
 57 | 
 58 | // Values returns only the Value fields for the collection of Value/Null structs as an interface slice.
 59 | func (vals *valueTypeValues) Values() []interface{} {
 60 | 	v := *vals
 61 | 	ret := make([]interface{}, len(v))
 62 | 	for i := 0; i < len(v); i++ {
 63 | 		ret[i] = v[i].v
 64 | 	}
 65 | 	return ret
 66 | }
 67 | 
 68 | // Vals returns only the Value fields for the collection of Value/Null structs as an empty interface.
 69 | //
 70 | // Caution: This operation excludes the Null field but retains any null values.
 71 | func (vals *valueTypeValues) Vals() interface{} {
 72 | 	v := *vals
 73 | 	ret := make([]valueType, len(v))
 74 | 	for i := 0; i < len(v); i++ {
 75 | 		ret[i] = v[i].v
 76 | 	}
 77 | 	return ret
 78 | }
 79 | 
 80 | // Value returns the Value field at the specified integer position.
 81 | func (vals *valueTypeValues) Value(position int) interface{} {
 82 | 	return (*vals)[position].v
 83 | }
 84 | 
 85 | // Value returns the Null field at the specified integer position.
 86 | func (vals *valueTypeValues) Null(position int) bool {
 87 | 	return (*vals)[position].null
 88 | }
 89 | 
 90 | // Copy transfers every value from the current valueTypeValues container into a new Values container
 91 | func (vals *valueTypeValues) Copy() Values {
 92 | 	v := *vals
 93 | 	newValues := make(valueTypeValues, len(v))
 94 | 	for i := 0; i < len(v); i++ {
 95 | 		newValues[i] = v[i]
 96 | 	}
 97 | 	return &newValues
 98 | }
 99 | 
100 | // Set overwrites a Value/Null pair at an integer position.
101 | func (vals *valueTypeValues) Set(position int, newVal interface{}) {
102 | 	var v interfaceValue
103 | 	if isNullInterface(newVal) {
104 | 		v = interfaceValue{newVal, true}
105 | 	} else {
106 | 		v = interfaceValue{newVal, false}
107 | 	}
108 | 	(*vals)[position] = v.tovalueType()
109 | }
110 | 
111 | // Drop drops the Value/Null pair at an integer position.
112 | func (vals *valueTypeValues) Drop(pos int) {
113 | 	*vals = append((*vals)[:pos], (*vals)[pos+1:]...)
114 | }
115 | 
116 | // Insert inserts a new Value/Null pair at an integer position.
117 | func (vals *valueTypeValues) Insert(pos int, val interface{}) {
118 | 	v := interfaceValue{val, false}
119 | 	*vals = append((*vals)[:pos], append([]valueTypeValue{v.tovalueType()}, (*vals)[pos:]...)...)
120 | }
121 | 
122 | // ToFloat converts valueTypeValues to floatValues.
123 | func (vals *valueTypeValues) ToFloat64() Values {
124 | 	ret := make(float64Values, len(*vals))
125 | 	for i := 0; i < len(*vals); i++ {
126 | 		ret[i] = (*vals)[i].toFloat64()
127 | 	}
128 | 	return &ret
129 | }
130 | 
131 | // ToInt converts valueTypeValues to intValues.
132 | func (vals *valueTypeValues) ToInt64() Values {
133 | 	ret := make(int64Values, len(*vals))
134 | 	for i := 0; i < len(*vals); i++ {
135 | 		ret[i] = (*vals)[i].toInt64()
136 | 	}
137 | 	return &ret
138 | }
139 | 
140 | // ToString converts valueTypeValues to stringValues.
141 | func (vals *valueTypeValues) ToString() Values {
142 | 	ret := make(stringValues, len(*vals))
143 | 	for i := 0; i < len(*vals); i++ {
144 | 		ret[i] = (*vals)[i].toString()
145 | 	}
146 | 	return &ret
147 | }
148 | 
149 | // ToBool converts valueTypeValues to boolValues.
150 | func (vals *valueTypeValues) ToBool() Values {
151 | 	ret := make(boolValues, len(*vals))
152 | 	for i := 0; i < len(*vals); i++ {
153 | 		ret[i] = (*vals)[i].toBool()
154 | 	}
155 | 	return &ret
156 | }
157 | 
158 | // ToBool converts valueTypeValues to dateTimeValues.
159 | func (vals *valueTypeValues) ToDateTime() Values {
160 | 	ret := make(dateTimeValues, len(*vals))
161 | 	for i := 0; i < len(*vals); i++ {
162 | 		ret[i] = (*vals)[i].toDateTime()
163 | 	}
164 | 	return &ret
165 | }
166 | 
167 | // ToInterface converts valueTypeValues to interfaceValues.
168 | func (vals *valueTypeValues) ToInterface() Values {
169 | 	ret := make(interfaceValues, len(*vals))
170 | 	for i := 0; i < len(*vals); i++ {
171 | 		if (*vals)[i].null {
172 | 			ret[i] = interfaceValue{(*vals)[i].v, true}
173 | 		} else {
174 | 			ret[i] = interfaceValue{(*vals)[i].v, false}
175 | 		}
176 | 	}
177 | 	return &ret
178 | }
179 | 
180 | // [END] valueTypeValues
181 | // ---------------------------------------------------------------------------
182 | var placeholder = true
183 | 
184 | // the placeholder and this comment are overwritten on `make generate`, but are included so that the [END] comment survives
185 | 


--------------------------------------------------------------------------------
/series/group.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 	"sort"
  7 | 	"strings"
  8 | 	"sync"
  9 | 
 10 | 	"github.com/ptiger10/pd/internal/values"
 11 | 	"github.com/ptiger10/pd/options"
 12 | )
 13 | 
 14 | type group struct {
 15 | 	Positions     []int
 16 | 	FirstPosition int
 17 | }
 18 | 
 19 | func (grp *group) copy() *group {
 20 | 	pos := make([]int, len(grp.Positions))
 21 | 	for i, p := range grp.Positions {
 22 | 		pos[i] = p
 23 | 	}
 24 | 	return &group{Positions: pos, FirstPosition: grp.FirstPosition}
 25 | }
 26 | 
 27 | // copy a grouping
 28 | func (g Grouping) copy() Grouping {
 29 | 	grps := make(map[string]*group)
 30 | 	for k, v := range g.groups {
 31 | 		grps[k] = v.copy()
 32 | 	}
 33 | 	return Grouping{
 34 | 		s:      g.s.Copy(),
 35 | 		groups: grps,
 36 | 	}
 37 | }
 38 | 
 39 | func (g Grouping) asyncMath(fn func(*Series) float64) *Series {
 40 | 	var wg sync.WaitGroup
 41 | 	g = g.copy()
 42 | 	if g.Len() == 0 {
 43 | 		return newEmptySeries()
 44 | 	}
 45 | 
 46 | 	// synchronous option
 47 | 	if !options.GetAsync() {
 48 | 		ret := newEmptySeries()
 49 | 		for _, group := range g.Groups() {
 50 | 			s := g.math(group, fn)
 51 | 			ret.InPlace.Join(s)
 52 | 		}
 53 | 		return ret
 54 | 	}
 55 | 
 56 | 	// asynchronous option
 57 | 	ch := make(chan calcReturn, g.Len())
 58 | 	for i, group := range g.Groups() {
 59 | 		wg.Add(1)
 60 | 		go g.awaitMath(ch, i, group, fn, &wg)
 61 | 	}
 62 | 	wg.Wait()
 63 | 	close(ch)
 64 | 	var container []calcReturn
 65 | 	for result := range ch {
 66 | 		container = append(container, result)
 67 | 	}
 68 | 	sort.Slice(container, func(i, j int) bool {
 69 | 		return container[i].n < container[j].n
 70 | 	})
 71 | 
 72 | 	s := newEmptySeries()
 73 | 	for _, result := range container {
 74 | 		s.InPlace.Join(result.s)
 75 | 	}
 76 | 	s.index.NeedsRefresh = true
 77 | 	return s
 78 | }
 79 | 
 80 | type calcReturn struct {
 81 | 	s *Series
 82 | 	n int
 83 | }
 84 | 
 85 | func (g Grouping) awaitMath(ch chan<- calcReturn, n int, group string, fn func(*Series) float64, wg *sync.WaitGroup) {
 86 | 	s := g.math(group, fn)
 87 | 	ret := calcReturn{s: s, n: n}
 88 | 	ch <- ret
 89 | 	wg.Done()
 90 | }
 91 | 
 92 | func (g Grouping) math(group string, fn func(*Series) float64) *Series {
 93 | 	positions := g.groups[group].Positions
 94 | 	rows, _ := g.s.Subset(positions)
 95 | 	calc := fn(rows)
 96 | 	s := MustNew(calc)
 97 | 
 98 | 	// index is the same as the index at the first row position of the group
 99 | 	idxCopy := g.s.index.Copy()
100 | 	idxCopy.Subset([]int{g.groups[group].FirstPosition})
101 | 	s.index = idxCopy
102 | 	return s
103 | }
104 | 
105 | // Groups returns all valid group labels in the Grouping.
106 | func (g Grouping) Groups() []string {
107 | 	var keys []string
108 | 	for k := range g.groups {
109 | 		keys = append(keys, k)
110 | 	}
111 | 	sort.Strings(keys)
112 | 	return keys
113 | }
114 | 
115 | // Len returns the number of groups in the Grouping.
116 | func (g Grouping) Len() int {
117 | 	return len(g.groups)
118 | }
119 | 
120 | // Group returns the Series with the given group label, or an error if that label does not exist.
121 | func (g Grouping) Group(label string) *Series {
122 | 	group, ok := g.groups[label]
123 | 	if !ok {
124 | 		if options.GetLogWarnings() {
125 | 			log.Printf("s.Grouping.Group(): label %v not in g.Groups()", label)
126 | 		}
127 | 		return newEmptySeries()
128 | 	}
129 | 	s, _ := g.s.Subset(group.Positions)
130 | 	return s
131 | }
132 | 
133 | func newEmptyGrouping() Grouping {
134 | 	groups := make(map[string]*group)
135 | 	s := newEmptySeries()
136 | 	return Grouping{s: s, groups: groups}
137 | }
138 | 
139 | // GroupByIndex groups a Series by one or more of its index levels. If no int is provided, all index levels are used.
140 | func (s *Series) GroupByIndex(levelPositions ...int) Grouping {
141 | 	groups := make(map[string]*group)
142 | 	if len(levelPositions) != 0 {
143 | 		var err error
144 | 		s = s.Copy()
145 | 		err = s.Index.SubsetLevels(levelPositions)
146 | 		if err != nil {
147 | 			if options.GetLogWarnings() {
148 | 				log.Printf("s.GroupByIndex() %v\n", err)
149 | 			}
150 | 			return newEmptyGrouping()
151 | 		}
152 | 	}
153 | 
154 | 	for i := 0; i < s.Len(); i++ {
155 | 		labels := s.index.Elements(i).Labels
156 | 		var strLabels []string
157 | 		for _, label := range labels {
158 | 			strLabels = append(strLabels, fmt.Sprint(label))
159 | 		}
160 | 		groupLabel := strings.Join(strLabels, values.GetMultiColNameSeparator())
161 | 
162 | 		if _, ok := groups[groupLabel]; !ok {
163 | 			groups[groupLabel] = &group{FirstPosition: i}
164 | 		}
165 | 		groups[groupLabel].Positions = append(groups[groupLabel].Positions, i)
166 | 	}
167 | 	return Grouping{s: s, groups: groups}
168 | }
169 | 
170 | // First returns the first occurrence of each grouping in the Series.
171 | func (g Grouping) First() *Series {
172 | 	first := func(group string) *Series {
173 | 		position := g.groups[group].Positions[0]
174 | 		s, _ := g.s.Subset([]int{position})
175 | 		return s
176 | 	}
177 | 	ret := newEmptySeries()
178 | 	for _, group := range g.Groups() {
179 | 		s := first(group)
180 | 		ret.InPlace.Join(s)
181 | 	}
182 | 	return ret
183 | }
184 | 
185 | // Last returns the last occurrence of each grouping in the Series.
186 | func (g Grouping) Last() *Series {
187 | 	last := func(group string) *Series {
188 | 		lastIdx := len(g.groups[group].Positions) - 1
189 | 		position := g.groups[group].Positions[lastIdx]
190 | 		s, _ := g.s.Subset([]int{position})
191 | 		return s
192 | 	}
193 | 	ret := newEmptySeries()
194 | 	for _, group := range g.Groups() {
195 | 		s := last(group)
196 | 		ret.InPlace.Join(s)
197 | 	}
198 | 	return ret
199 | }
200 | 
201 | // Sum for each group in the Grouping.
202 | func (g Grouping) Sum() *Series {
203 | 	return g.asyncMath((*Series).Sum)
204 | }
205 | 
206 | // Mean for each group in the Grouping.
207 | func (g Grouping) Mean() *Series {
208 | 	return g.asyncMath((*Series).Mean)
209 | }
210 | 
211 | // Min for each group in the Grouping.
212 | func (g Grouping) Min() *Series {
213 | 	return g.asyncMath((*Series).Min)
214 | }
215 | 
216 | // Max for each group in the Grouping.
217 | func (g Grouping) Max() *Series {
218 | 	return g.asyncMath((*Series).Max)
219 | }
220 | 
221 | // Median for each group in the Grouping.
222 | func (g Grouping) Median() *Series {
223 | 	return g.asyncMath((*Series).Median)
224 | }
225 | 
226 | // Std for each group in the Grouping.
227 | func (g Grouping) Std() *Series {
228 | 	return g.asyncMath((*Series).Std)
229 | }
230 | 


--------------------------------------------------------------------------------
/options/settable.go:
--------------------------------------------------------------------------------
  1 | package options
  2 | 
  3 | var defaultOptions = struct {
  4 | 	displayMaxWidth         int
  5 | 	displayMaxRows          int
  6 | 	displayMaxColumns       int
  7 | 	displayFloatPrecision   int
  8 | 	displayRepeatedLabels   bool
  9 | 	displayStringNullFiller string
 10 | 	displayTimeFormat       string
 11 | 	stringNullValues        []string
 12 | 	logWarnings             bool
 13 | 	async                   bool
 14 | }{
 15 | 	displayMaxWidth,
 16 | 	displayMaxRows,
 17 | 	displayMaxColumns,
 18 | 	displayFloatPrecision,
 19 | 	displayRepeatedLabels,
 20 | 	displayStringNullFiller,
 21 | 	displayTimeFormat,
 22 | 	stringNullValues,
 23 | 	logWarnings,
 24 | 	async,
 25 | }
 26 | 
 27 | // RestoreDefaults resets options back to their default setting
 28 | func RestoreDefaults() {
 29 | 	SetDisplayMaxWidth(defaultOptions.displayMaxWidth)
 30 | 	SetDisplayMaxRows(defaultOptions.displayMaxRows)
 31 | 	SetDisplayMaxColumns(defaultOptions.displayMaxColumns)
 32 | 	SetDisplayFloatPrecision(defaultOptions.displayFloatPrecision)
 33 | 	SetDisplayRepeatedLabels(defaultOptions.displayRepeatedLabels)
 34 | 	SetDisplayStringNullFiller(defaultOptions.displayStringNullFiller)
 35 | 	SetDisplayTimeFormat(defaultOptions.displayTimeFormat)
 36 | 	SetStringNullValues(defaultOptions.stringNullValues)
 37 | 	SetLogWarnings(defaultOptions.logWarnings)
 38 | 	SetAsync(defaultOptions.async)
 39 | }
 40 | 
 41 | var displayMaxWidth = 35
 42 | var displayMaxRows = 50
 43 | var displayMaxColumns = 50
 44 | var displayFloatPrecision = 2
 45 | var displayRepeatedLabels = false
 46 | var displayStringNullFiller = "NaN"
 47 | var displayTimeFormat = "1/2/2006T15:04:05"
 48 | var stringNullValues = []string{"NaN", "n/a", "N/A", "", "nil"}
 49 | var logWarnings = true
 50 | var async = true
 51 | 
 52 | // SetDisplayMaxWidth sets DisplayMaxWidth to n characters.
 53 | // DisplayMaxWidth is an option when printing a Series.
 54 | // It is the widest allowable character width for an index label or value.
 55 | // If a label is longer than the max, it will be elided at the end.
 56 | //
 57 | // Default width: 35 characters
 58 | func SetDisplayMaxWidth(n int) {
 59 | 	displayMaxWidth = n
 60 | }
 61 | 
 62 | // GetDisplayMaxWidth returns DisplayMaxWidth.
 63 | func GetDisplayMaxWidth() int {
 64 | 	return displayMaxWidth
 65 | }
 66 | 
 67 | // SetDisplayMaxRows sets DisplayMaxRow to n rows.
 68 | // DisplayMaxRow is an option when printing a Series.
 69 | // It is the max number of rows that will be printed to the screen.
 70 | // If the actual number of rows is longer than the max, the first n/2 and last n/2 will be displayed, and the middle will be elided.
 71 | //
 72 | // Default width: 50 rows
 73 | func SetDisplayMaxRows(n int) {
 74 | 	displayMaxRows = n
 75 | }
 76 | 
 77 | // GetDisplayMaxRows returns DisplayMaxRows.
 78 | func GetDisplayMaxRows() int {
 79 | 	return displayMaxRows
 80 | }
 81 | 
 82 | // SetDisplayMaxColumns sets DisplayMaxColumns to n columns.
 83 | // DisplayMaxColumns is an option when printing a Series.
 84 | // It is the max number of columns that will be printed to the screen.
 85 | // If the actual number of columns is longer than the max, the first n/2 and last n/2 will be displayed, and the middle will be elided.
 86 | //
 87 | // Default width: 50 columns
 88 | func SetDisplayMaxColumns(n int) {
 89 | 	displayMaxColumns = n
 90 | }
 91 | 
 92 | // GetDisplayMaxColumns returns DisplayMaxColumns.
 93 | func GetDisplayMaxColumns() int {
 94 | 	return displayMaxColumns
 95 | }
 96 | 
 97 | // SetDisplayFloatPrecision sets DisplayFloatPrecision to n decimal places.
 98 | // DisplayFloatPrecision is an option when printing a Series.
 99 | // It is the number of decimal points in floating point values and index labels.
100 | //
101 | // Default precision: 2 decimal points
102 | func SetDisplayFloatPrecision(n int) {
103 | 	displayFloatPrecision = n
104 | }
105 | 
106 | // GetDisplayFloatPrecision returns DisplayFloatPrecision.
107 | func GetDisplayFloatPrecision() int {
108 | 	return displayFloatPrecision
109 | }
110 | 
111 | // SetDisplayRepeatedLabels sets DisplayRepeatedLabels to boolean.
112 | // DisplayRepeatedLabels is an option when printing a Series.
113 | // If true, all index labels will be shown, like so:
114 | //
115 | // A 0    foo
116 | //
117 | // B 0    bar
118 | //
119 | // C 1    baz
120 | //
121 | // If false, repeated index labels in the same level will be excluded, like so:
122 | //
123 | // A 0    foo
124 | //
125 | // B ... bar
126 | //
127 | // C 1    baz
128 | //
129 | // NB: ellipsis not included in actual printing
130 | //
131 | // Default: false
132 | func SetDisplayRepeatedLabels(boolean bool) {
133 | 	displayRepeatedLabels = boolean
134 | }
135 | 
136 | // GetDisplayRepeatedLabels returns DisplayRepeatedLabels.
137 | func GetDisplayRepeatedLabels() bool {
138 | 	return displayRepeatedLabels
139 | }
140 | 
141 | // SetDisplayStringNullFiller sets DisplayStringNullFiller to "s".
142 | // DisplayStringNullFiller is an option when printing a Series.
143 | // It is how null string values are represented.
144 | //
145 | // Default: "NaN"
146 | func SetDisplayStringNullFiller(s string) {
147 | 	displayStringNullFiller = s
148 | }
149 | 
150 | // GetDisplayStringNullFiller returns DisplayStringNullFiller.
151 | func GetDisplayStringNullFiller() string {
152 | 	return displayStringNullFiller
153 | }
154 | 
155 | // SetDisplayTimeFormat formats how datetimes are displayed, using the syntax specified in package time.Time.
156 | //
157 | // Default: "1/2/2006T15:04:05"
158 | func SetDisplayTimeFormat(s string) {
159 | 	displayTimeFormat = s
160 | }
161 | 
162 | // GetDisplayTimeFormat returns DisplayTimeFormat.
163 | func GetDisplayTimeFormat() string {
164 | 	return displayTimeFormat
165 | }
166 | 
167 | // SetStringNullValues sets StringNullValues to include only those items contained in nullList.
168 | // StringNullValues is an option when constructing or converting a Series.
169 | // It is the list of string values that are considered null.
170 | //
171 | // default: []string{"NaN", "n/a", "N/A", "", "nil"}
172 | func SetStringNullValues(nullList []string) {
173 | 	stringNullValues = nullList
174 | }
175 | 
176 | // GetStringNullValues returns StringNullValues.
177 | func GetStringNullValues() []string {
178 | 	return stringNullValues
179 | }
180 | 
181 | // SetLogWarnings sets LogWarnings to boolean.
182 | // LogWarnings is an option when executing functions within this module.
183 | // If true, non-returned errors are logged to stderr.
184 | // This is relevant for many common exploratory methods, which are often chained together and therefore not designed to return an error value.
185 | //
186 | // default: true
187 | func SetLogWarnings(boolean bool) {
188 | 	logWarnings = boolean
189 | }
190 | 
191 | // GetLogWarnings returns LogWarnings.
192 | func GetLogWarnings() bool {
193 | 	return logWarnings
194 | }
195 | 
196 | // SetAsync sets Async to boolean.
197 | // Async is an option for executing certain operations over multiple groups (e.g., math on Groupings or Columns) as goroutines instead of synchronously.
198 | // If true, eligible operations are split into goroutines and merged back together.
199 | //
200 | // default: true
201 | func SetAsync(boolean bool) {
202 | 	async = boolean
203 | }
204 | 
205 | // GetAsync returns Async.
206 | func GetAsync() bool {
207 | 	return async
208 | }
209 | 


--------------------------------------------------------------------------------
/dataframe/select.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 
  7 | 	"github.com/ptiger10/pd/internal/values"
  8 | 	"github.com/ptiger10/pd/options"
  9 | 	"github.com/ptiger10/pd/series"
 10 | )
 11 | 
 12 | // Row returns information about the values and index labels in this row but panics if an out-of-range position is provided.
 13 | func (df *DataFrame) Row(position int) Row {
 14 | 	vals := make([]interface{}, df.NumCols())
 15 | 	nulls := make([]bool, df.NumCols())
 16 | 	types := make([]options.DataType, df.NumCols())
 17 | 	for m := 0; m < df.NumCols(); m++ {
 18 | 		vals[m] = df.vals[m].Values.Value(position)
 19 | 		nulls[m] = df.vals[m].Values.Null(position)
 20 | 		types[m] = df.vals[m].DataType
 21 | 	}
 22 | 	idxElems := df.index.Elements(position)
 23 | 	return Row{Values: vals, Nulls: nulls, ValueTypes: types, Labels: idxElems.Labels, LabelTypes: idxElems.DataTypes}
 24 | }
 25 | 
 26 | // SelectLabel returns the integer location of the first row in index level 0 with the supplied label, or -1 if the label does not exist.
 27 | func (df *DataFrame) SelectLabel(label string) int {
 28 | 	if df.IndexLevels() == 0 {
 29 | 		if options.GetLogWarnings() {
 30 | 			log.Println("DataFrame.SelectLabel(): index has no levels")
 31 | 		}
 32 | 		return -1
 33 | 	}
 34 | 	df.index.Levels[0].UpdateLabelMap()
 35 | 	val, ok := df.index.Levels[0].LabelMap[label]
 36 | 	if !ok {
 37 | 		if options.GetLogWarnings() {
 38 | 			log.Printf("DataFrame.SelectLabel(): %v not in label map\n", label)
 39 | 		}
 40 | 		return -1
 41 | 	}
 42 | 	return val[0]
 43 | }
 44 | 
 45 | // SelectLabels returns the integer locations of all rows with the supplied labels within the supplied level.
 46 | // If an error is encountered, returns a new slice of 0 length.
 47 | func (df *DataFrame) SelectLabels(labels []string, level int) []int {
 48 | 	empty := make([]int, 0)
 49 | 	err := df.ensureIndexLevelPositions([]int{level})
 50 | 	if err != nil {
 51 | 		if options.GetLogWarnings() {
 52 | 			log.Printf("DataFrame.SelectLabels(): %v", err)
 53 | 		}
 54 | 		return empty
 55 | 	}
 56 | 	df.index.Levels[level].UpdateLabelMap()
 57 | 	include := make([]int, 0)
 58 | 	for _, label := range labels {
 59 | 		val, ok := df.index.Levels[level].LabelMap[label]
 60 | 		if !ok {
 61 | 			if options.GetLogWarnings() {
 62 | 				log.Printf("DataFrame.SelectLabels(): %v not in label map", label)
 63 | 			}
 64 | 			return empty
 65 | 		}
 66 | 		include = append(include, val...)
 67 | 	}
 68 | 	return include
 69 | }
 70 | 
 71 | // SelectCol returns the integer location of the first row in index level 0 with the supplied label, or -1 if the label does not exist.
 72 | func (df *DataFrame) SelectCol(label string) int {
 73 | 	if df.ColLevels() == 0 {
 74 | 		if options.GetLogWarnings() {
 75 | 			log.Println("DataFrame.SelectCol(): index has no levels")
 76 | 		}
 77 | 		return -1
 78 | 	}
 79 | 	val, ok := df.cols.Levels[0].LabelMap[label]
 80 | 	if !ok {
 81 | 		if options.GetLogWarnings() {
 82 | 			log.Printf("DataFrame.SelectCol(): %v not in label map\n", label)
 83 | 		}
 84 | 		return -1
 85 | 	}
 86 | 	return val[0]
 87 | }
 88 | 
 89 | // SelectCols returns the integer locations of all columns with the supplied labels within the supplied level.
 90 | // If an error is encountered, returns a new slice of 0 length.
 91 | func (df *DataFrame) SelectCols(labels []string, level int) []int {
 92 | 	empty := make([]int, 0)
 93 | 	err := df.ensureColumnLevelPositions([]int{level})
 94 | 	if err != nil {
 95 | 		if options.GetLogWarnings() {
 96 | 			log.Printf("DataFrame.SelectCols(): %v", err)
 97 | 		}
 98 | 		return empty
 99 | 	}
100 | 	include := make([]int, 0)
101 | 	for _, label := range labels {
102 | 		val, ok := df.cols.Levels[level].LabelMap[label]
103 | 		if !ok {
104 | 			if options.GetLogWarnings() {
105 | 				log.Printf("DataFrame.SelectCols(): %v not in label map", label)
106 | 			}
107 | 			return empty
108 | 		}
109 | 		include = append(include, val...)
110 | 	}
111 | 	return include
112 | }
113 | 
114 | // Col returns the first Series with the specified column label at column level 0.
115 | func (df *DataFrame) Col(label string) *series.Series {
116 | 	colPos, ok := df.cols.Levels[0].LabelMap[label]
117 | 	if !ok {
118 | 		if options.GetLogWarnings() {
119 | 			log.Printf("df.Col(): invalid column label: %v not in labels", label)
120 | 		}
121 | 		s, _ := series.New(nil)
122 | 		return s
123 | 	}
124 | 	return df.hydrateSeries(colPos[0])
125 | }
126 | 
127 | // ColAt returns the Series at the specified column.
128 | func (df *DataFrame) ColAt(col int) *series.Series {
129 | 	if err := df.ensureColumnPositions([]int{col}); err != nil {
130 | 		if options.GetLogWarnings() {
131 | 			log.Printf("df.ColAt(): %v", err)
132 | 		}
133 | 		s, _ := series.New(nil)
134 | 		return s
135 | 	}
136 | 	return df.hydrateSeries(col)
137 | }
138 | 
139 | // subsetRows subsets a DataFrame to include only index items and values at the row positions supplied and modifies the DataFrame in place.
140 | func (ip InPlace) subsetRows(positions []int) {
141 | 	for m := 0; m < ip.df.NumCols(); m++ {
142 | 		ip.df.vals[m].Values = ip.df.vals[m].Values.Subset(positions)
143 | 	}
144 | 
145 | 	ip.df.index.Subset(positions)
146 | }
147 | 
148 | // subsetRows subsets a DataFrame to include only index items and values at the row positions supplied and modifies the DataFrame in place.
149 | // For use in internal functions that do not expect en error, such as GroupBy.
150 | func (df *DataFrame) subsetRows(positions []int) *DataFrame {
151 | 	df = df.Copy()
152 | 	df.InPlace.subsetRows(positions)
153 | 	return df
154 | }
155 | 
156 | // SubsetRows subsets a DataFrame to include only the rows at supplied integer positions and modifies the DataFrame in place.
157 | func (ip InPlace) SubsetRows(rowPositions []int) error {
158 | 	if len(rowPositions) == 0 {
159 | 		return fmt.Errorf("dataframe.SubsetRows(): no valid rows provided")
160 | 	}
161 | 	if err := ip.df.ensureRowPositions(rowPositions); err != nil {
162 | 		return fmt.Errorf("dataframe.SubsetRows(): %v", err)
163 | 	}
164 | 
165 | 	ip.subsetRows(rowPositions)
166 | 	return nil
167 | }
168 | 
169 | // SubsetRows subsets a DataFrame to include only the rows at supplied integer positions and returns a new DataFrame.
170 | func (df *DataFrame) SubsetRows(rowPositions []int) (*DataFrame, error) {
171 | 	df = df.Copy()
172 | 	err := df.InPlace.SubsetRows(rowPositions)
173 | 	return df, err
174 | }
175 | 
176 | // SubsetColumns subsets a DataFrame to include only the columns at supplied integer positions and modifies the DataFrame in place.
177 | func (ip InPlace) SubsetColumns(columnPositions []int) error {
178 | 	if len(columnPositions) == 0 {
179 | 		return fmt.Errorf("dataframe.SubsetColumns(): no valid columns provided")
180 | 	}
181 | 
182 | 	if err := ip.df.ensureColumnPositions(columnPositions); err != nil {
183 | 		return fmt.Errorf("dataframe.SubsetColumns(): %v", err)
184 | 	}
185 | 
186 | 	vals := make([]values.Container, len(columnPositions))
187 | 	for i, pos := range columnPositions {
188 | 		vals[i] = ip.df.vals[pos]
189 | 	}
190 | 	ip.df.vals = vals
191 | 	ip.df.cols.Subset(columnPositions)
192 | 
193 | 	return nil
194 | }
195 | 
196 | // SubsetColumns subsets a DataFrame to include only the columns at supplied integer positions and returns a new DataFrame.
197 | func (df *DataFrame) SubsetColumns(columnPositions []int) (*DataFrame, error) {
198 | 	df = df.Copy()
199 | 	err := df.InPlace.SubsetColumns(columnPositions)
200 | 	return df, err
201 | }
202 | 


--------------------------------------------------------------------------------
/series/filter.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"strings"
  5 | 	"time"
  6 | 
  7 | 	"github.com/ptiger10/pd/internal/values"
  8 | )
  9 | 
 10 | // Apply a callback function to every value in a Series and return a new Series.
 11 | // The Apply function iterates over all Series values in interface{} form and applies the callback function to each.
 12 | // The final values are then converted to match the datatype of the original Series.
 13 | // The caller is responsible for handling the type assertion on the interface, though this step is not necessary if the datatype is known with certainty.
 14 | // For example, here are two ways to write an apply function that computes the z-score of every row and rounds it two decimal points:
 15 | //
 16 | // #1 (safer) error check type assertion
 17 | //
 18 | //  s.Apply(func(val interface{}) interface{} {
 19 | // 		v, ok := val.(float64)
 20 | // 			if !ok {
 21 | //				return ""
 22 | // 			}
 23 | // 		return (v - s.Mean()) / s.Std()
 24 | //
 25 | // Input:
 26 | // 0    1
 27 | // 1    2
 28 | // 2    3
 29 | //
 30 | // Output:
 31 | // 0    -1.22...
 32 | // 1        0
 33 | // 2     1.22...
 34 | //
 35 | // #2 (riskier) no error check
 36 | //
 37 | //  s.Apply(func(val interface{}) interface{} {
 38 | // 		return (val.(float64) - s.Mean()) / s.Std()
 39 | // 	})
 40 | func (s *Series) Apply(fn func(interface{}) interface{}) *Series {
 41 | 	ret := s.Copy()
 42 | 	ret.InPlace.Apply(fn)
 43 | 	return ret
 44 | }
 45 | 
 46 | // Apply a callback function to every value in a Series and modify the Series in place.
 47 | func (ip InPlace) Apply(fn func(interface{}) interface{}) {
 48 | 	vals := ip.s.Values()
 49 | 	newVals := make([]interface{}, 0)
 50 | 	for _, val := range vals {
 51 | 		newVal := fn(val)
 52 | 		newVals = append(newVals, newVal)
 53 | 	}
 54 | 	// ducks error because []interface{} as arg in InterfaceFactory cannot trigger unsupported error
 55 | 	container := values.MustCreateValuesFromInterface(newVals)
 56 | 	ret, _ := values.Convert(container.Values, ip.s.datatype)
 57 | 	ip.s.values = ret
 58 | }
 59 | 
 60 | // Filter a Series using a callback function test.
 61 | // The Filter function iterates over all Series values in interface{} form and applies the callback test to each.
 62 | // The return value is a slice of integer positions of all the rows passing the test.
 63 | // The caller is responsible for handling the type assertion on the interface, though this step is not necessary if the datatype is known with certainty.
 64 | // For example, here are two ways to write a filter that returns all rows with the suffix "boo":
 65 | //
 66 | // #1 (safer) error check type assertion
 67 | //
 68 | //  s.Filter(func(val interface{}) bool {
 69 | //		v, ok := val.(string)
 70 | //		if !ok {
 71 | // 			return false
 72 | //		}
 73 | //		if strings.HasSuffix(v, "boo") {
 74 | // 			return true
 75 | // 		}
 76 | // 		return false
 77 | // 	})
 78 | //
 79 | // Input:
 80 | // 0    bamboo
 81 | // 1    leaves
 82 | // 2    taboo
 83 | //
 84 | // Output:
 85 | // []int{0,2}
 86 | //
 87 | // #2 (riskier) no error check
 88 | //
 89 | //  s.Filter(func(val interface{}) bool {
 90 | //		if strings.HasSuffix(val.(string), "boo") {
 91 | // 			return true
 92 | // 		}
 93 | // 		return false
 94 | // 	})
 95 | func (s *Series) Filter(cmp func(interface{}) bool) []int {
 96 | 	vals := s.Values()
 97 | 	include := make([]int, 0)
 98 | 	for i, val := range vals {
 99 | 		if cmp(val) {
100 | 			include = append(include, i)
101 | 		}
102 | 	}
103 | 	return include
104 | }
105 | 
106 | // filterFloat64 converts a Series to float values, applies a filter, and returns the rows where the condition is true.
107 | func (s *Series) filterFloat64(cmp func(float64) bool) []int {
108 | 	include := make([]int, 0)
109 | 	vals := s.ToFloat64().values.Vals().([]float64)
110 | 	for i, val := range vals {
111 | 		if cmp(val) {
112 | 			include = append(include, i)
113 | 		}
114 | 	}
115 | 	return include
116 | }
117 | 
118 | // filterString converts a Series to string values, applies a filter, and returns the rows where the condition is true.
119 | func (s *Series) filterString(cmp func(string) bool) []int {
120 | 	include := make([]int, 0)
121 | 	vals := s.ToString().values.Vals().([]string)
122 | 	for i, val := range vals {
123 | 		if cmp(val) {
124 | 			include = append(include, i)
125 | 		}
126 | 	}
127 | 	return include
128 | }
129 | 
130 | // filterBool converts a Series to bool values, applies a filter, and returns the rows where the condition is true.
131 | func (s *Series) filterBool(cmp func(bool) bool) []int {
132 | 	include := make([]int, 0)
133 | 	vals := s.ToBool().values.Vals().([]bool)
134 | 	for i, val := range vals {
135 | 		if cmp(val) {
136 | 			include = append(include, i)
137 | 		}
138 | 	}
139 | 	return include
140 | }
141 | 
142 | // filterDateTime converts a Series to datetime values, applies a filter, and returns the rows where the condition is true.
143 | func (s *Series) filterDateTime(cmp func(time.Time) bool) []int {
144 | 	include := make([]int, 0)
145 | 	vals := s.ToDateTime().values.Vals().([]time.Time)
146 | 	for i, val := range vals {
147 | 		if cmp(val) {
148 | 			include = append(include, i)
149 | 		}
150 | 	}
151 | 	return include
152 | }
153 | 
154 | // GT filter: Greater Than (numeric).
155 | func (s *Series) GT(comparison float64) []int {
156 | 	return s.filterFloat64(func(elem float64) bool {
157 | 		return elem > comparison
158 | 	})
159 | }
160 | 
161 | // GTE filter: Greater Than or Equal To (numeric).
162 | func (s *Series) GTE(comparison float64) []int {
163 | 	return s.filterFloat64(func(elem float64) bool {
164 | 		return elem >= comparison
165 | 	})
166 | }
167 | 
168 | // LT filter - Less Than (numeric).
169 | func (s *Series) LT(comparison float64) []int {
170 | 	return s.filterFloat64(func(elem float64) bool {
171 | 		return elem < comparison
172 | 	})
173 | }
174 | 
175 | // LTE filter - Less Than or Equal To (numeric).
176 | func (s *Series) LTE(comparison float64) []int {
177 | 	return s.filterFloat64(func(elem float64) bool {
178 | 		return elem <= comparison
179 | 	})
180 | }
181 | 
182 | // EQ filter - Equal To (numeric).
183 | func (s *Series) EQ(comparison float64) []int {
184 | 	return s.filterFloat64(func(elem float64) bool {
185 | 		return elem == comparison
186 | 	})
187 | }
188 | 
189 | // NEQ filter - Not Equal To (numeric).
190 | func (s *Series) NEQ(comparison float64) []int {
191 | 	return s.filterFloat64(func(elem float64) bool {
192 | 		return elem != comparison
193 | 	})
194 | }
195 | 
196 | // Contains filter - value contains substr (string).
197 | func (s *Series) Contains(substr string) []int {
198 | 	return s.filterString(func(elem string) bool {
199 | 		return strings.Contains(elem, substr)
200 | 	})
201 | }
202 | 
203 | // InList filter - value is contained within list (string).
204 | func (s *Series) InList(list []string) []int {
205 | 	return s.filterString(func(elem string) bool {
206 | 		for _, s := range list {
207 | 			if elem == s {
208 | 				return true
209 | 			}
210 | 		}
211 | 		return false
212 | 	})
213 | }
214 | 
215 | // True filter - value is true (bool).
216 | func (s *Series) True() []int {
217 | 	return s.filterBool(func(elem bool) bool {
218 | 		return elem
219 | 	})
220 | }
221 | 
222 | // False filter - value is false (bool).
223 | func (s *Series) False() []int {
224 | 	return s.filterBool(func(elem bool) bool {
225 | 		return !elem
226 | 	})
227 | }
228 | 
229 | // Before filter - value is before a specific time (datetime).
230 | func (s *Series) Before(t time.Time) []int {
231 | 	return s.filterDateTime(func(elem time.Time) bool {
232 | 		return elem.Before(t)
233 | 	})
234 | }
235 | 
236 | // After filter - value is after a specific time (datetime).
237 | func (s *Series) After(t time.Time) []int {
238 | 	return s.filterDateTime(func(elem time.Time) bool {
239 | 		return elem.After(t)
240 | 	})
241 | }
242 | 


--------------------------------------------------------------------------------
/series/math_test.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/ptiger10/pd/options"
  8 | )
  9 | 
 10 | func TestSeriesMath(t *testing.T) {
 11 | 	s, _ := New([]int{1, 2, 3})
 12 | 	if sum := s.Sum(); sum != 6 {
 13 | 		t.Errorf("s.Sum() returned %v, want %v", sum, 6)
 14 | 	}
 15 | 	if mean := s.Mean(); mean != 2 {
 16 | 		t.Errorf("s.Mean() returned %v, want %v", mean, 2)
 17 | 	}
 18 | 
 19 | }
 20 | 
 21 | func TestMath_numerics(t *testing.T) {
 22 | 	var tests = []struct {
 23 | 		name       string
 24 | 		s          *Series
 25 | 		wantSum    float64
 26 | 		wantMean   float64
 27 | 		wantMedian float64
 28 | 		wantMin    float64
 29 | 		wantMax    float64
 30 | 		wantQ1     float64
 31 | 		wantQ2     float64
 32 | 		wantQ3     float64
 33 | 		wantStd    float64
 34 | 	}{
 35 | 		{"float with null", MustNew([]float64{math.NaN(), math.NaN(), 2, 3, 1, 4}), 10, 2.5, 2.5, 1, 4, 1.5, 2.5, 3.5, 1.12},
 36 | 		{"float from string with null", MustNew([]string{"", "", "1", "2", "3", "4", "5"}).ToFloat64(), 15, 3, 3, 1, 5, 1.5, 3, 4.5, 1.41},
 37 | 		{"int from string with null", MustNew([]string{"", "", "1", "2", "3", "4", "5"}).ToInt64(), 15, 3, 3, 1, 5, 1.5, 3, 4.5, 1.41},
 38 | 		{"int", MustNew([]int{2, 1, 3, 4, 5, 6, 7, 8, 9}), 45, 5, 5, 1, 9, 2.5, 5, 7.5, 2.58},
 39 | 		{"float", MustNew([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9}), 45, 5, 5, 1, 9, 2.5, 5, 7.5, 2.58},
 40 | 		{"float with negative", MustNew([]float64{2, -1, 4, 3}), 8, 2, 2.5, -1, 4, 0.5, 2.5, 3.5, 1.87},
 41 | 	}
 42 | 	for _, tt := range tests {
 43 | 		t.Run(tt.name, func(t *testing.T) {
 44 | 			gotSum := tt.s.Sum()
 45 | 			if gotSum != tt.wantSum {
 46 | 				t.Errorf("Sum()returned %v, want %v", gotSum, tt.wantSum)
 47 | 			}
 48 | 			gotMean := tt.s.Mean()
 49 | 			if gotMean != tt.wantMean {
 50 | 				t.Errorf("Mean()returned %v, want %v", gotMean, tt.wantMean)
 51 | 			}
 52 | 			gotMedian := tt.s.Median()
 53 | 			if gotMedian != tt.wantMedian {
 54 | 				t.Errorf("Median()returned %v, want %v", gotMedian, tt.wantMedian)
 55 | 			}
 56 | 			gotMin := tt.s.Min()
 57 | 			if gotMin != tt.wantMin {
 58 | 				t.Errorf("Min()returned %v, want %v", gotMin, tt.wantMin)
 59 | 			}
 60 | 			gotMax := tt.s.Max()
 61 | 			if gotMax != tt.wantMax {
 62 | 				t.Errorf("Max()returned %v, want %v", gotMax, tt.wantMax)
 63 | 			}
 64 | 			gotQ1 := tt.s.Quartile(1)
 65 | 			if gotQ1 != tt.wantQ1 {
 66 | 				t.Errorf("Quartile(1)returned %v, want %v", gotQ1, tt.wantQ1)
 67 | 			}
 68 | 			gotQ2 := tt.s.Quartile(2)
 69 | 			if gotQ2 != tt.wantQ2 {
 70 | 				t.Errorf("Quartile(2)returned %v, want %v", gotQ2, tt.wantQ2)
 71 | 			}
 72 | 			gotQ3 := tt.s.Quartile(3)
 73 | 			if gotQ3 != tt.wantQ3 {
 74 | 				t.Errorf("Quartile(3)returned %v, want %v", gotQ3, tt.wantQ3)
 75 | 			}
 76 | 			gotStd := tt.s.Std()
 77 | 			if math.Round(gotStd*100)/100 != math.Round(tt.wantStd*100)/100 {
 78 | 				t.Errorf("Std()returned %v, want %v", gotStd, tt.wantStd)
 79 | 			}
 80 | 		})
 81 | 
 82 | 	}
 83 | }
 84 | 
 85 | func TestMath_numerics_async(t *testing.T) {
 86 | 	var tests = []struct {
 87 | 		name       string
 88 | 		s          *Series
 89 | 		wantSum    float64
 90 | 		wantMean   float64
 91 | 		wantMedian float64
 92 | 		wantMin    float64
 93 | 		wantMax    float64
 94 | 		wantQ1     float64
 95 | 		wantQ2     float64
 96 | 		wantQ3     float64
 97 | 		wantStd    float64
 98 | 	}{
 99 | 		{"float with null", MustNew([]float64{math.NaN(), math.NaN(), 2, 3, 1, 4}), 10, 2.5, 2.5, 1, 4, 1.5, 2.5, 3.5, 1.12},
100 | 	}
101 | 	for _, tt := range tests {
102 | 		options.SetAsync(false)
103 | 		defer options.RestoreDefaults()
104 | 		t.Run(tt.name, func(t *testing.T) {
105 | 			gotSum := tt.s.Sum()
106 | 			if gotSum != tt.wantSum {
107 | 				t.Errorf("Sum()returned %v, want %v", gotSum, tt.wantSum)
108 | 			}
109 | 			gotMean := tt.s.Mean()
110 | 			if gotMean != tt.wantMean {
111 | 				t.Errorf("Mean()returned %v, want %v", gotMean, tt.wantMean)
112 | 			}
113 | 			gotMedian := tt.s.Median()
114 | 			if gotMedian != tt.wantMedian {
115 | 				t.Errorf("Median()returned %v, want %v", gotMedian, tt.wantMedian)
116 | 			}
117 | 			gotMin := tt.s.Min()
118 | 			if gotMin != tt.wantMin {
119 | 				t.Errorf("Min()returned %v, want %v", gotMin, tt.wantMin)
120 | 			}
121 | 			gotMax := tt.s.Max()
122 | 			if gotMax != tt.wantMax {
123 | 				t.Errorf("Max()returned %v, want %v", gotMax, tt.wantMax)
124 | 			}
125 | 			gotQ1 := tt.s.Quartile(1)
126 | 			if gotQ1 != tt.wantQ1 {
127 | 				t.Errorf("Quartile(1)returned %v, want %v", gotQ1, tt.wantQ1)
128 | 			}
129 | 			gotQ2 := tt.s.Quartile(2)
130 | 			if gotQ2 != tt.wantQ2 {
131 | 				t.Errorf("Quartile(2)returned %v, want %v", gotQ2, tt.wantQ2)
132 | 			}
133 | 			gotQ3 := tt.s.Quartile(3)
134 | 			if gotQ3 != tt.wantQ3 {
135 | 				t.Errorf("Quartile(3)returned %v, want %v", gotQ3, tt.wantQ3)
136 | 			}
137 | 			gotStd := tt.s.Std()
138 | 			if math.Round(gotStd*100)/100 != math.Round(tt.wantStd*100)/100 {
139 | 				t.Errorf("Std()returned %v, want %v", gotStd, tt.wantStd)
140 | 			}
141 | 		})
142 | 
143 | 	}
144 | }
145 | 
146 | func TestMath_bool(t *testing.T) {
147 | 	var tests = []struct {
148 | 		s        *Series
149 | 		wantSum  float64
150 | 		wantMean float64
151 | 	}{
152 | 		{MustNew([]string{"", "true"}).ToBool(), 1, 1},
153 | 		{MustNew([]bool{true, false}), 1, .5},
154 | 		{MustNew([]bool{false}), 0, 0},
155 | 	}
156 | 	for _, tt := range tests {
157 | 		gotSum := tt.s.Sum()
158 | 		if gotSum != tt.wantSum {
159 | 			t.Errorf("Sum()returned %v, want %v", gotSum, tt.wantSum)
160 | 		}
161 | 		gotMean := tt.s.Mean()
162 | 		if gotMean != tt.wantMean {
163 | 			t.Errorf("Mean()returned %v, want %v", gotMean, tt.wantMean)
164 | 		}
165 | 	}
166 | }
167 | 
168 | func TestMath_unsupported(t *testing.T) {
169 | 	var tests = []struct {
170 | 		name string
171 | 		s    *Series
172 | 	}{
173 | 		{"string", MustNew([]string{"foo"})},
174 | 		{"null", MustNew([]string{})},
175 | 	}
176 | 	for _, tt := range tests {
177 | 		t.Run(tt.name, func(t *testing.T) {
178 | 			gotSum := tt.s.Sum()
179 | 			if !math.IsNaN(gotSum) {
180 | 				t.Errorf("Sum()returned %v, want NaN", gotSum)
181 | 			}
182 | 			gotMean := tt.s.Mean()
183 | 			if !math.IsNaN(gotMean) {
184 | 				t.Errorf("Mean()returned %v, want NaN", gotMean)
185 | 			}
186 | 			gotMedian := tt.s.Median()
187 | 			if !math.IsNaN(gotMedian) {
188 | 				t.Errorf("Median()returned %v, want NaN", gotMedian)
189 | 			}
190 | 			gotMin := tt.s.Min()
191 | 			if !math.IsNaN(gotMin) {
192 | 				t.Errorf("Min()returned %v, want NaN", gotMin)
193 | 			}
194 | 			gotMax := tt.s.Max()
195 | 			if !math.IsNaN(gotMax) {
196 | 				t.Errorf("Max()returned %v, want NaN", gotMax)
197 | 			}
198 | 			gotQ1 := tt.s.Quartile(1)
199 | 			if !math.IsNaN(gotQ1) {
200 | 				t.Errorf("Quartile(1)returned %v, want NaN", gotQ1)
201 | 			}
202 | 			gotQ2 := tt.s.Quartile(2)
203 | 			if !math.IsNaN(gotQ2) {
204 | 				t.Errorf("Quartile(2)returned %v, want NaN", gotQ2)
205 | 			}
206 | 			gotQ3 := tt.s.Quartile(3)
207 | 			if !math.IsNaN(gotQ3) {
208 | 				t.Errorf("Quartile(3)returned %v, want NaN", gotQ3)
209 | 			}
210 | 			gotStd := tt.s.Std()
211 | 			if !math.IsNaN(gotStd) {
212 | 				t.Errorf("Std()returned %v, want NaN", gotStd)
213 | 			}
214 | 		})
215 | 	}
216 | }
217 | 
218 | func TestMath_unsupported_other(t *testing.T) {
219 | 	s := MustNew([]float64{})
220 | 	got := s.Std()
221 | 	if !math.IsNaN(got) {
222 | 		t.Errorf("Std()returned %v, want NaN", got)
223 | 	}
224 | 	got = s.Median()
225 | 	if !math.IsNaN(got) {
226 | 		t.Errorf("Median()returned %v, want NaN", got)
227 | 	}
228 | 	gotQuartiles := s.quartiles()
229 | 	if gotQuartiles != nil {
230 | 		t.Errorf("Empty quartiles()returned %v, want nil", gotQuartiles)
231 | 	}
232 | 
233 | 	s = MustNew([]float64{1})
234 | 	got = s.Quartile(10)
235 | 	if !math.IsNaN(got) {
236 | 		t.Errorf("Quartile()returned %v, want NaN", got)
237 | 	}
238 | 
239 | 	s = MustNew([]float64{1})
240 | 	gotQuartiles = s.quartiles()
241 | 	if !math.IsNaN(gotQuartiles[0]) || gotQuartiles[1] != 1 || !math.IsNaN(gotQuartiles[2]) {
242 | 		t.Errorf("quartiles() of len < 4 returned %v, want NaN, median, NaN", gotQuartiles)
243 | 	}
244 | }
245 | 


--------------------------------------------------------------------------------
/series/select_test.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"log"
  6 | 	"os"
  7 | 	"reflect"
  8 | 	"strings"
  9 | 	"testing"
 10 | 
 11 | 	"github.com/ptiger10/pd/options"
 12 | )
 13 | 
 14 | func TestElement(t *testing.T) {
 15 | 	s, err := New([]string{"", "valid"}, Config{MultiIndex: []interface{}{[]string{"A", "B"}, []int{1, 2}}})
 16 | 	if err != nil {
 17 | 		t.Error(err)
 18 | 	}
 19 | 	var tests = []struct {
 20 | 		position int
 21 | 		wantVal  interface{}
 22 | 		wantNull bool
 23 | 		wantIdx  []interface{}
 24 | 	}{
 25 | 		{0, "NaN", true, []interface{}{"A", int64(1)}},
 26 | 		{1, "valid", false, []interface{}{"B", int64(2)}},
 27 | 	}
 28 | 	wantIdxTypes := []options.DataType{options.String, options.Int64}
 29 | 	for _, test := range tests {
 30 | 		got := s.Element(test.position)
 31 | 		if got.Value != test.wantVal {
 32 | 			t.Errorf("Element returned value %v, want %v", got.Value, test.wantVal)
 33 | 		}
 34 | 		if got.Null != test.wantNull {
 35 | 			t.Errorf("Element returned bool %v, want %v", got.Null, test.wantNull)
 36 | 		}
 37 | 		if !reflect.DeepEqual(got.Labels, test.wantIdx) {
 38 | 			t.Errorf("Element returned index %#v, want %#v", got.Labels, test.wantIdx)
 39 | 		}
 40 | 		if !reflect.DeepEqual(got.LabelTypes, wantIdxTypes) {
 41 | 			t.Errorf("Element returned kind %v, want %v", got.LabelTypes, wantIdxTypes)
 42 | 		}
 43 | 	}
 44 | }
 45 | 
 46 | func TestSeries_At(t *testing.T) {
 47 | 	type args struct {
 48 | 		position int
 49 | 	}
 50 | 	tests := []struct {
 51 | 		name  string
 52 | 		input *Series
 53 | 		args  args
 54 | 		want  interface{}
 55 | 	}{
 56 | 		{name: "pass", input: MustNew([]string{"foo", "bar", "baz"}), args: args{1}, want: "bar"},
 57 | 		{name: "nil", input: MustNew([]string{"", "bar", "baz"}), args: args{0}, want: nil},
 58 | 		{"fail: invalid position", MustNew([]string{"foo", "bar", "baz"}), args{10}, nil},
 59 | 	}
 60 | 	for _, tt := range tests {
 61 | 		t.Run(tt.name, func(t *testing.T) {
 62 | 			var buf bytes.Buffer
 63 | 			log.SetOutput(&buf)
 64 | 			defer log.SetOutput(os.Stderr)
 65 | 			// test panic
 66 | 			if strings.Contains(tt.name, "fail") {
 67 | 				defer func() {
 68 | 					if r := recover(); r == nil {
 69 | 						t.Errorf("The code did not panic")
 70 | 					}
 71 | 				}()
 72 | 
 73 | 				tt.input.At(tt.args.position)
 74 | 				if buf.String() == "" {
 75 | 					t.Errorf("Series.At() returned no log message, want log due to fail")
 76 | 				}
 77 | 				return
 78 | 			}
 79 | 			if got := tt.input.At(tt.args.position); !reflect.DeepEqual(got, tt.want) {
 80 | 				t.Errorf("Series.At() = %v, want %v", got, tt.want)
 81 | 			}
 82 | 
 83 | 		})
 84 | 	}
 85 | }
 86 | 
 87 | func TestFrom(t *testing.T) {
 88 | 	s := MustNew([]string{"foo", "bar", "baz"}, Config{Index: []int{0, 1, 2}})
 89 | 	type args struct {
 90 | 		start int
 91 | 		end   int
 92 | 	}
 93 | 	tests := []struct {
 94 | 		name  string
 95 | 		input *Series
 96 | 		args  args
 97 | 		want  *Series
 98 | 	}{
 99 | 		{name: "ascending", input: s, args: args{start: 0, end: 2}, want: MustNew([]string{"foo", "bar", "baz"}, Config{Index: []int{0, 1, 2}})},
100 | 		{"single", s, args{1, 1}, MustNew([]string{"bar"}, Config{Index: []int{1}})},
101 | 		{"partial", s, args{1, 2}, MustNew([]string{"bar", "baz"}, Config{Index: []int{1, 2}})},
102 | 		{"descending", s, args{2, 0}, MustNew([]string{"baz", "bar", "foo"}, Config{Index: []int{2, 1, 0}})},
103 | 		{"fail: partial invalid", s, args{10, 0}, newEmptySeries()},
104 | 	}
105 | 	for _, tt := range tests {
106 | 		t.Run(tt.name, func(t *testing.T) {
107 | 			var buf bytes.Buffer
108 | 			log.SetOutput(&buf)
109 | 			defer log.SetOutput(os.Stderr)
110 | 
111 | 			got := tt.input.From(tt.args.start, tt.args.end)
112 | 			if !Equal(got, tt.want) {
113 | 				t.Errorf("Series.From() got %v, want %v", s, tt.want)
114 | 			}
115 | 
116 | 			if strings.Contains(tt.name, "fail") {
117 | 				if buf.String() == "" {
118 | 					t.Errorf("Series.From() returned no log message, want log due to fail")
119 | 				}
120 | 			}
121 | 		})
122 | 
123 | 	}
124 | }
125 | 
126 | func TestSeries_XS(t *testing.T) {
127 | 	s := MustNew([]string{"foo", "bar", "baz"}, Config{MultiIndex: []interface{}{[]int{1, 2, 3}, []string{"qux", "quux", "quuz"}}})
128 | 	type args struct {
129 | 		rowPositions   []int
130 | 		levelPositions []int
131 | 	}
132 | 	type want struct {
133 | 		series *Series
134 | 		err    bool
135 | 	}
136 | 	tests := []struct {
137 | 		name  string
138 | 		input *Series
139 | 		args  args
140 | 		want  want
141 | 	}{
142 | 		{name: "pass", input: s, args: args{[]int{0, 1}, []int{0}},
143 | 			want: want{series: MustNew([]string{"foo", "bar"}, Config{MultiIndex: []interface{}{[]int{1, 2}}}), err: false}},
144 | 		{"pass reverse", s, args{[]int{1, 0}, []int{1}},
145 | 			want{MustNew([]string{"bar", "foo"}, Config{MultiIndex: []interface{}{[]string{"quux", "qux"}}}), false}},
146 | 		{"pass multi reverse", s, args{[]int{1, 0}, []int{1, 0}},
147 | 			want{MustNew([]string{"bar", "foo"}, Config{MultiIndex: []interface{}{[]string{"quux", "qux"}, []int{2, 1}}}), false}},
148 | 		{"fail: invalid row position", s, args{[]int{10}, []int{0}}, want{newEmptySeries(), true}},
149 | 		{"fail: partial invalid row position", s, args{[]int{0, 10}, []int{0}}, want{newEmptySeries(), true}},
150 | 		{"fail: invalid level position", s, args{[]int{0}, []int{10}}, want{newEmptySeries(), true}},
151 | 		{"fail: partial invalid level position", s, args{[]int{0}, []int{0, 10}}, want{newEmptySeries(), true}},
152 | 	}
153 | 	for _, tt := range tests {
154 | 		t.Run(tt.name, func(t *testing.T) {
155 | 			got, err := tt.input.XS(tt.args.rowPositions, tt.args.levelPositions)
156 | 			if (err != nil) != tt.want.err {
157 | 				t.Errorf("Series.XS() error = %v, want %v", err, tt.want.err)
158 | 			}
159 | 			if !reflect.DeepEqual(got, tt.want.series) {
160 | 				t.Errorf("Series.XS() = %v, want %v", got, tt.want.series)
161 | 			}
162 | 
163 | 		})
164 | 	}
165 | }
166 | 
167 | func TestSeries_SelectLabel(t *testing.T) {
168 | 	s := MustNew([]string{"foo", "bar", "baz"}, Config{Index: []int{1, 1, 1}})
169 | 	type args struct {
170 | 		label string
171 | 	}
172 | 	tests := []struct {
173 | 		name  string
174 | 		input *Series
175 | 		args  args
176 | 		want  int
177 | 	}{
178 | 		{name: "pass", input: s, args: args{label: "1"}, want: 0},
179 | 		{"fail: empty Series", newEmptySeries(), args{label: "1"}, -1},
180 | 		{"fail: label not in Series", s, args{label: "100"}, -1},
181 | 	}
182 | 	for _, tt := range tests {
183 | 		t.Run(tt.name, func(t *testing.T) {
184 | 			var buf bytes.Buffer
185 | 			log.SetOutput(&buf)
186 | 			defer log.SetOutput(os.Stderr)
187 | 			if got := tt.input.SelectLabel(tt.args.label); got != tt.want {
188 | 				t.Errorf("Series.SelectLabel() = %v, want %v", got, tt.want)
189 | 			}
190 | 			if strings.Contains(tt.name, "fail") {
191 | 				if buf.String() == "" {
192 | 					t.Errorf("Series.SelectLabel() returned no log message, want log due to fail")
193 | 				}
194 | 			}
195 | 		})
196 | 	}
197 | }
198 | 
199 | func TestSeries_SelectLabels(t *testing.T) {
200 | 	s := MustNew([]string{"foo", "bar", "baz"}, Config{MultiIndex: []interface{}{[]int{1, 1, 1}, []string{"qux", "quux", "quuz"}}})
201 | 	type args struct {
202 | 		labels []string
203 | 		level  int
204 | 	}
205 | 	tests := []struct {
206 | 		name  string
207 | 		input *Series
208 | 		args  args
209 | 		want  []int
210 | 	}{
211 | 		{name: "pass", input: s, args: args{labels: []string{"1"}, level: 0}, want: []int{0, 1, 2}},
212 | 		{"pass", s, args{[]string{"qux", "quux"}, 1}, []int{0, 1}},
213 | 		{"fail: empty Series", newEmptySeries(), args{[]string{"1"}, 0}, []int{}},
214 | 		{"fail: label not in Series", s, args{[]string{"100"}, 0}, []int{}},
215 | 		{"fail: invalid level", s, args{[]string{"1"}, 100}, []int{}},
216 | 	}
217 | 	for _, tt := range tests {
218 | 		t.Run(tt.name, func(t *testing.T) {
219 | 			var buf bytes.Buffer
220 | 			log.SetOutput(&buf)
221 | 			defer log.SetOutput(os.Stderr)
222 | 			if got := tt.input.SelectLabels(tt.args.labels, tt.args.level); !reflect.DeepEqual(got, tt.want) {
223 | 				t.Errorf("Series.SelectLabels() = %v, want %v", got, tt.want)
224 | 			}
225 | 			if strings.Contains(tt.name, "fail") {
226 | 				if buf.String() == "" {
227 | 					t.Errorf("Series.SelectLabels() returned no log message, want log due to fail")
228 | 				}
229 | 			}
230 | 		})
231 | 	}
232 | }
233 | 


--------------------------------------------------------------------------------
/pd.go:
--------------------------------------------------------------------------------
  1 | // Package pd (aka GoPandas) is a library for cleaning, aggregating, and transforming data.
  2 | // GoPandas combines a flexible API familiar to Python pandas users with the strengths of Go,
  3 | // including type safety, predictable error handling, and concurrent processing.
  4 | package pd
  5 | 
  6 | import (
  7 | 	"bytes"
  8 | 	"encoding/csv"
  9 | 	"fmt"
 10 | 	"io/ioutil"
 11 | 	"log"
 12 | 
 13 | 	"github.com/ptiger10/pd/dataframe"
 14 | 	"github.com/ptiger10/pd/internal/values"
 15 | 	"github.com/ptiger10/pd/options"
 16 | 	"github.com/ptiger10/pd/series"
 17 | )
 18 | 
 19 | // Series constructs a new Series.
 20 | func Series(data interface{}, config ...Config) (*series.Series, error) {
 21 | 	tmp := Config{}
 22 | 	if config != nil {
 23 | 		if len(config) > 1 {
 24 | 			return series.MustNew(nil), fmt.Errorf("pd.Series(): can supply at most one Config (%d > 1)", len(config))
 25 | 		}
 26 | 		tmp = config[0]
 27 | 	}
 28 | 	sConfig := series.Config{
 29 | 		Name: tmp.Name, DataType: tmp.DataType,
 30 | 		Index: tmp.Index, IndexName: tmp.IndexName,
 31 | 		MultiIndex: tmp.MultiIndex, MultiIndexNames: tmp.MultiIndexNames,
 32 | 	}
 33 | 	s, err := series.New(data, sConfig)
 34 | 	if err != nil {
 35 | 		return series.MustNew(nil), fmt.Errorf("pd.Series(): %v", err)
 36 | 	}
 37 | 	return s, nil
 38 | 
 39 | }
 40 | 
 41 | // DataFrame constructs a new DataFrame.
 42 | func DataFrame(data []interface{}, config ...Config) (*dataframe.DataFrame, error) {
 43 | 	tmp := Config{}
 44 | 	if config != nil {
 45 | 		if len(config) > 1 {
 46 | 			return dataframe.MustNew(nil), fmt.Errorf("pd.Series(): can supply at most one Config (%d > 1)", len(config))
 47 | 		}
 48 | 		tmp = config[0]
 49 | 	}
 50 | 	dfConfig := dataframe.Config{
 51 | 		Name: tmp.Name, DataType: tmp.DataType,
 52 | 		Index: tmp.Index, IndexName: tmp.IndexName,
 53 | 		MultiIndex: tmp.MultiIndex, MultiIndexNames: tmp.MultiIndexNames,
 54 | 		Col: tmp.Col, ColName: tmp.ColName,
 55 | 		MultiCol: tmp.MultiCol, MultiColNames: tmp.MultiColNames,
 56 | 	}
 57 | 	df, err := dataframe.New(data, dfConfig)
 58 | 	if err != nil {
 59 | 		return dataframe.MustNew(nil), fmt.Errorf("pd.DataFrame(): %v", err)
 60 | 	}
 61 | 	return df, nil
 62 | }
 63 | 
 64 | // ReadInterface converts [][]interface{}{row1{col1, ...}...} into a DataFrame
 65 | func ReadInterface(input [][]interface{}, config ...ReadOptions) (*dataframe.DataFrame, error) {
 66 | 	if len(input) == 0 {
 67 | 		return dataframe.MustNew(nil), fmt.Errorf("ReadInterface(): Input must contain at least one row")
 68 | 	}
 69 | 	if len(input[0]) == 0 {
 70 | 		return dataframe.MustNew(nil), fmt.Errorf("ReadInterface(): must contain at least one column")
 71 | 	}
 72 | 
 73 | 	data := make([][]interface{}, len(input))
 74 | 	for i := 0; i < len(input); i++ {
 75 | 		data[i] = make([]interface{}, len(input[0]))
 76 | 		for m := 0; m < len(input[0]); m++ {
 77 | 			data[i][m] = input[i][m]
 78 | 		}
 79 | 	}
 80 | 
 81 | 	tmp := ReadOptions{}
 82 | 	if config != nil {
 83 | 		if len(config) > 1 {
 84 | 			return dataframe.MustNew(nil), fmt.Errorf("ReadInterface(): can supply at most one ReadOptions (%d > 1)",
 85 | 				len(config))
 86 | 		}
 87 | 		tmp = config[0]
 88 | 	}
 89 | 
 90 | 	var tmpMultiCol [][]interface{}
 91 | 	if tmp.DropRows > len(data) {
 92 | 		return dataframe.MustNew(nil), fmt.Errorf("ReadInterface(): DropRows cannot exceed the number of rows (%d > %d)",
 93 | 			tmp.DropRows, len(data))
 94 | 	}
 95 | 
 96 | 	data = data[tmp.DropRows:]
 97 | 	// header rows
 98 | 	if tmp.HeaderRows > len(data) {
 99 | 		return dataframe.MustNew(nil), fmt.Errorf("ReadInterface(): HeaderRows cannot exceed the number of rows (%d > %d)",
100 | 			tmp.HeaderRows, len(data))
101 | 	}
102 | 
103 | 	tmpMultiCol = data[:tmp.HeaderRows]
104 | 	for m := 0; m < tmp.HeaderRows; m++ {
105 | 		tmpMultiCol[m] = tmpMultiCol[m][tmp.IndexCols:]
106 | 	}
107 | 
108 | 	data = data[tmp.HeaderRows:]
109 | 
110 | 	if tmp.IndexCols > len(data[0]) {
111 | 		return dataframe.MustNew(nil), fmt.Errorf("ReadInterface(): IndexCols cannot exceed the number of rows (%d > %d)",
112 | 			tmp.IndexCols, len(data))
113 | 	}
114 | 
115 | 	tmpVals := make([][]interface{}, len(data[0])-tmp.IndexCols)
116 | 	tmpMultiIndex := make([][]interface{}, tmp.IndexCols)
117 | 
118 | 	for m := 0; m < len(data[0])-tmp.IndexCols; m++ {
119 | 		tmpVals[m] = make([]interface{}, len(data))
120 | 	}
121 | 	for m := 0; m < tmp.IndexCols; m++ {
122 | 		tmpMultiIndex[m] = make([]interface{}, len(data))
123 | 	}
124 | 
125 | 	// transpose index and values
126 | 	for i := 0; i < len(data); i++ {
127 | 		for m := 0; m < len(data[0]); m++ {
128 | 			if m < tmp.IndexCols {
129 | 				tmpMultiIndex[m][i] = data[i][m]
130 | 			} else {
131 | 				tmpVals[m-tmp.IndexCols][i] = data[i][m]
132 | 			}
133 | 		}
134 | 	}
135 | 	// convert [][]interface{} to []interface{} of []interface for compatibility with DataFrame constructor
136 | 	var (
137 | 		multiIndex []interface{}
138 | 		vals       []interface{}
139 | 	)
140 | 	for _, col := range tmpMultiIndex {
141 | 		multiIndex = append(multiIndex, col)
142 | 	}
143 | 	for _, col := range tmpVals {
144 | 		vals = append(vals, col)
145 | 	}
146 | 	multiCol := make([][]string, len(tmpMultiCol))
147 | 
148 | 	if len(tmpMultiCol) > 0 {
149 | 		for j := 0; j < len(tmpMultiCol); j++ {
150 | 			multiCol[j] = make([]string, len(tmpMultiCol[0]))
151 | 			for m := 0; m < len(tmpMultiCol[0]); m++ {
152 | 				multiCol[j][m] = fmt.Sprint(tmpMultiCol[j][m])
153 | 			}
154 | 		}
155 | 	}
156 | 
157 | 	// ducks error because all []interface{} values are supported and Config properties are controlled
158 | 	df, _ := DataFrame(vals, Config{Manual: tmp.Manual, MultiIndex: multiIndex, MultiCol: multiCol})
159 | 
160 | 	for k, v := range tmp.DataTypes {
161 | 		colInt := df.SelectCol(k)
162 | 		if colInt != -1 {
163 | 			df.InPlace.SetCol(colInt, df.ColAt(colInt).Convert(v))
164 | 		}
165 | 	}
166 | 	for k, v := range tmp.IndexDataTypes {
167 | 		err := df.Index.Convert(v, k)
168 | 		if err != nil {
169 | 			if options.GetLogWarnings() {
170 | 				log.Printf("warning: ReadInterface() converting IndexDataTypes: %v", err)
171 | 			}
172 | 		}
173 | 	}
174 | 	df.RenameCols(tmp.Rename)
175 | 
176 | 	return df, nil
177 | }
178 | 
179 | // ReadCSV converts a CSV file into a DataFrame.
180 | func ReadCSV(path string, config ...ReadOptions) (*dataframe.DataFrame, error) {
181 | 	tmp := ReadOptions{}
182 | 	if config != nil {
183 | 		if len(config) > 1 {
184 | 			return dataframe.MustNew(nil), fmt.Errorf("ReadCSV(): can supply at most one ReadOptions (%d > 1)",
185 | 				len(config))
186 | 		}
187 | 		tmp = config[0]
188 | 	}
189 | 
190 | 	data, err := ioutil.ReadFile(path)
191 | 	if err != nil {
192 | 		return dataframe.MustNew(nil), fmt.Errorf("ReadCSV(): %s", err)
193 | 	}
194 | 	reader := csv.NewReader(bytes.NewReader(data))
195 | 	if tmp.Delimiter != 0 {
196 | 		reader.Comma = tmp.Delimiter
197 | 	}
198 | 
199 | 	records, err := reader.ReadAll()
200 | 	if err != nil {
201 | 		return dataframe.MustNew(nil), fmt.Errorf("ReadCSV(): %v", err)
202 | 	}
203 | 	if len(records) == 0 {
204 | 		return dataframe.MustNew(nil), fmt.Errorf("ReadCSV(): input must contain at least one row")
205 | 	}
206 | 
207 | 	// convert to [][]interface
208 | 	var interfaceRecords [][]interface{}
209 | 	for j := 0; j < len(records); j++ {
210 | 		interfaceRecords = append(interfaceRecords, make([]interface{}, len(records[0])))
211 | 		for m := 0; m < len(records[0]); m++ {
212 | 			// optional interpolation if not in Manual mode
213 | 			if !tmp.Manual {
214 | 				interfaceRecords[j][m] = values.InterpolateString(records[j][m])
215 | 			} else {
216 | 				interfaceRecords[j][m] = records[j][m]
217 | 			}
218 | 		}
219 | 	}
220 | 
221 | 	df, err := ReadInterface(interfaceRecords, tmp)
222 | 	if err != nil {
223 | 		return dataframe.MustNew(nil), fmt.Errorf("ReadCSV(): %v", err)
224 | 	}
225 | 	return df, nil
226 | }
227 | 
228 | // Config customizes the construction of either a DataFrame or Series.
229 | type Config struct {
230 | 	Name            string
231 | 	DataType        options.DataType
232 | 	Index           interface{}
233 | 	IndexName       string
234 | 	MultiIndex      []interface{}
235 | 	MultiIndexNames []string
236 | 	Col             []string
237 | 	ColName         string
238 | 	MultiCol        [][]string
239 | 	MultiColNames   []string
240 | 	Manual          bool
241 | }
242 | 
243 | // ReadOptions are options for reading in files from other formats
244 | type ReadOptions struct {
245 | 	DropRows        int
246 | 	HeaderRows      int
247 | 	IndexCols       int
248 | 	Manual          bool
249 | 	DataTypes       map[string]string
250 | 	IndexDataTypes  map[int]string
251 | 	ColumnDataTypes map[int]string
252 | 	Rename          map[string]string
253 | 	Delimiter       rune
254 | }
255 | 


--------------------------------------------------------------------------------
/dataframe/group.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 	"sort"
  7 | 	"strings"
  8 | 	"sync"
  9 | 
 10 | 	"github.com/ptiger10/pd/internal/values"
 11 | 	"github.com/ptiger10/pd/options"
 12 | 	"github.com/ptiger10/pd/series"
 13 | )
 14 | 
 15 | type group struct {
 16 | 	Positions     []int
 17 | 	FirstPosition int
 18 | }
 19 | 
 20 | func (grp *group) copy() *group {
 21 | 	pos := make([]int, len(grp.Positions))
 22 | 	for i, p := range grp.Positions {
 23 | 		pos[i] = p
 24 | 	}
 25 | 	return &group{Positions: pos, FirstPosition: grp.FirstPosition}
 26 | }
 27 | 
 28 | // copy a grouping
 29 | func (g Grouping) copy() Grouping {
 30 | 	grps := make(map[string]*group)
 31 | 	for k, v := range g.groups {
 32 | 		grps[k] = v.copy()
 33 | 	}
 34 | 	return Grouping{
 35 | 		df:     g.df.Copy(),
 36 | 		groups: grps,
 37 | 	}
 38 | }
 39 | 
 40 | // SortedGroups returns all valid group labels in the Grouping, sorted in alphabetical order.
 41 | func (g Grouping) SortedGroups() []string {
 42 | 	var keys []string
 43 | 	for k := range g.groups {
 44 | 		keys = append(keys, k)
 45 | 	}
 46 | 	sort.Strings(keys)
 47 | 	return keys
 48 | }
 49 | 
 50 | // Groups returns all valid group labels in the Grouping, in their original group position.
 51 | func (g Grouping) Groups() []string {
 52 | 	type groupContainer struct {
 53 | 		grp   *group
 54 | 		label string
 55 | 	}
 56 | 	var orderedGroups []groupContainer
 57 | 	for k, v := range g.groups {
 58 | 		orderedGroups = append(orderedGroups, groupContainer{grp: v, label: k})
 59 | 	}
 60 | 	sort.Slice(orderedGroups, func(i, j int) bool {
 61 | 		if orderedGroups[i].grp.FirstPosition < orderedGroups[j].grp.FirstPosition {
 62 | 			return true
 63 | 		}
 64 | 		return false
 65 | 	})
 66 | 	var labels []string
 67 | 	for _, grp := range orderedGroups {
 68 | 		labels = append(labels, grp.label)
 69 | 	}
 70 | 	return labels
 71 | }
 72 | 
 73 | // Len returns the number of groups in the Grouping.
 74 | func (g Grouping) Len() int {
 75 | 	return len(g.groups)
 76 | }
 77 | 
 78 | // Group returns the DataFrame with the given group label, or an error if that label does not exist.
 79 | func (g Grouping) Group(label string) *DataFrame {
 80 | 	group, ok := g.groups[label]
 81 | 	if !ok {
 82 | 		if options.GetLogWarnings() {
 83 | 			log.Printf("s.Grouping.Group(): label %v not in g.Groups()", label)
 84 | 		}
 85 | 		return newEmptyDataFrame()
 86 | 	}
 87 | 	s := g.df.subsetRows(group.Positions)
 88 | 	return s
 89 | }
 90 | 
 91 | func newEmptyGrouping() Grouping {
 92 | 	groups := make(map[string]*group)
 93 | 	df := newEmptyDataFrame()
 94 | 	return Grouping{df: df, groups: groups, err: true}
 95 | }
 96 | 
 97 | // GroupByIndex groups a DataFrame by one or more of its index levels. If no level is provided, all index levels are used.
 98 | func (df *DataFrame) GroupByIndex(levelPositions ...int) Grouping {
 99 | 	if len(levelPositions) != 0 {
100 | 		df = df.Copy()
101 | 		err := df.Index.SubsetLevels(levelPositions)
102 | 		if err != nil {
103 | 			if options.GetLogWarnings() {
104 | 				log.Printf("df.GroupByIndex() %v\n", err)
105 | 			}
106 | 			return newEmptyGrouping()
107 | 		}
108 | 	}
109 | 
110 | 	// Default: use all label level positions
111 | 	return df.groupby()
112 | }
113 | 
114 | // GroupBy groups a DataFrame by one or more columns.
115 | // If no column is supplied or an invalid column is supplied, an empty grouping is returned.
116 | func (df *DataFrame) GroupBy(cols ...int) Grouping {
117 | 	if len(cols) == 0 {
118 | 		if options.GetLogWarnings() {
119 | 			log.Print("df.GroupBy(): empty cols, returning empty Grouping\n")
120 | 		}
121 | 		return newEmptyGrouping()
122 | 	}
123 | 	if len(cols) == df.NumCols() {
124 | 		if options.GetLogWarnings() {
125 | 			log.Print("df.GroupBy(): at least one column must be excluded from the Grouping\n")
126 | 		}
127 | 		return newEmptyGrouping()
128 | 	}
129 | 	if err := df.ensureColumnPositions(cols); err != nil {
130 | 		if options.GetLogWarnings() {
131 | 			log.Printf("df.GroupBy(): %v\n", err)
132 | 		}
133 | 		return newEmptyGrouping()
134 | 	}
135 | 	df = df.Copy()
136 | 	df.InPlace.replaceIndex(cols)
137 | 
138 | 	return df.groupby()
139 | }
140 | 
141 | func (ip InPlace) replaceIndex(cols []int) {
142 | 	lengthArchive := ip.df.IndexLevels()
143 | 	// set new levels
144 | 	ip.setIndexes(cols)
145 | 
146 | 	// Drop old levels
147 | 	for j := len(cols); j < len(cols)+lengthArchive; j++ {
148 | 		// use lower-level method to change index in place and duck error because level is certain to be in index
149 | 		ip.df.index.DropLevel(j)
150 | 	}
151 | 	ip.df.index.UpdateNameMap()
152 | }
153 | 
154 | func (df *DataFrame) groupby() Grouping {
155 | 	groups := make(map[string]*group)
156 | 	for i := 0; i < df.Len(); i++ {
157 | 		labels := df.Row(i).Labels
158 | 		var strLabels []string
159 | 		for _, label := range labels {
160 | 			strLabels = append(strLabels, fmt.Sprint(label))
161 | 		}
162 | 		groupLabel := strings.Join(strLabels, values.GetMultiColNameSeparator())
163 | 
164 | 		// create group with groupLabel and index labels if it is not already within groups map
165 | 		if _, ok := groups[groupLabel]; !ok {
166 | 			groups[groupLabel] = &group{FirstPosition: i}
167 | 		}
168 | 		groups[groupLabel].Positions = append(groups[groupLabel].Positions, i)
169 | 	}
170 | 	return Grouping{df: df, groups: groups}
171 | }
172 | 
173 | // First returns the first occurrence of each grouping in the DataFrame.
174 | func (g Grouping) First() *DataFrame {
175 | 	first := func(group string) *DataFrame {
176 | 		position := g.groups[group].Positions[0]
177 | 		df := g.df.subsetRows([]int{position})
178 | 		return df
179 | 	}
180 | 	ret := newEmptyDataFrame()
181 | 	for _, group := range g.Groups() {
182 | 		df := first(group)
183 | 		ret.InPlace.appendDataFrameRow(df)
184 | 	}
185 | 	return ret
186 | }
187 | 
188 | // Last returns the last occurrence of each grouping in the DataFrame.
189 | func (g Grouping) Last() *DataFrame {
190 | 	last := func(group string) *DataFrame {
191 | 		lastIdx := len(g.groups[group].Positions) - 1
192 | 		position := g.groups[group].Positions[lastIdx]
193 | 		df := g.df.subsetRows([]int{position})
194 | 		return df
195 | 	}
196 | 	ret := newEmptyDataFrame()
197 | 	for _, group := range g.Groups() {
198 | 		df := last(group)
199 | 		ret.InPlace.appendDataFrameRow(df)
200 | 	}
201 | 	return ret
202 | }
203 | 
204 | type calcReturn struct {
205 | 	df *DataFrame
206 | 	n  int
207 | }
208 | 
209 | func (g Grouping) asyncMath(fn func(*DataFrame) *series.Series) *DataFrame {
210 | 	var wg sync.WaitGroup
211 | 	// synchronous option
212 | 	if !options.GetAsync() {
213 | 		ret := newEmptyDataFrame()
214 | 		for _, group := range g.Groups() {
215 | 			calc := g.mathSingleGroup(group, fn)
216 | 			ret.InPlace.appendDataFrameRow(calc)
217 | 		}
218 | 		return ret
219 | 	}
220 | 
221 | 	// asynchronous option
222 | 	ch := make(chan calcReturn, g.Len())
223 | 	for i, group := range g.Groups() {
224 | 		wg.Add(1)
225 | 		go g.awaitMath(ch, i, group, fn, &wg)
226 | 	}
227 | 	wg.Wait()
228 | 	close(ch)
229 | 	var returnedData []calcReturn
230 | 	for result := range ch {
231 | 		returnedData = append(returnedData, result)
232 | 	}
233 | 	sort.Slice(returnedData, func(i, j int) bool {
234 | 		return returnedData[i].n < returnedData[j].n
235 | 	})
236 | 
237 | 	df := newEmptyDataFrame()
238 | 	for _, result := range returnedData {
239 | 		df.InPlace.appendDataFrameRow((result.df))
240 | 	}
241 | 	df.index.NeedsRefresh = true
242 | 	return df
243 | }
244 | 
245 | func (g Grouping) awaitMath(
246 | 	ch chan<- calcReturn, n int, group string,
247 | 	fn func(*DataFrame) *series.Series, wg *sync.WaitGroup,
248 | ) {
249 | 	df := g.mathSingleGroup(group, fn)
250 | 	ret := calcReturn{df: df, n: n}
251 | 	ch <- ret
252 | 	wg.Done()
253 | }
254 | 
255 | func (g Grouping) mathSingleGroup(group string, fn func(*DataFrame) *series.Series) *DataFrame {
256 | 	positions := g.groups[group].Positions
257 | 	rows := g.df.subsetRows(positions)
258 | 	calc := fn(rows)
259 | 	calc.Rename(group)
260 | 	df := transposeSeries(calc)
261 | 	return df
262 | }
263 | 
264 | // Sum for each group in the Grouping.
265 | func (g Grouping) Sum() *DataFrame {
266 | 	return g.asyncMath((*DataFrame).Sum)
267 | }
268 | 
269 | // Mean for each group in the Grouping.
270 | func (g Grouping) Mean() *DataFrame {
271 | 	return g.asyncMath((*DataFrame).Mean)
272 | }
273 | 
274 | // Min for each group in the Grouping.
275 | func (g Grouping) Min() *DataFrame {
276 | 	return g.asyncMath((*DataFrame).Min)
277 | }
278 | 
279 | // Max for each group in the Grouping.
280 | func (g Grouping) Max() *DataFrame {
281 | 	return g.asyncMath((*DataFrame).Max)
282 | }
283 | 
284 | // Median for each group in the Grouping.
285 | func (g Grouping) Median() *DataFrame {
286 | 	return g.asyncMath((*DataFrame).Median)
287 | }
288 | 
289 | // Std for each group in the Grouping.
290 | func (g Grouping) Std() *DataFrame {
291 | 	return g.asyncMath((*DataFrame).Std)
292 | }
293 | 


--------------------------------------------------------------------------------
/series/constructor_test.go:
--------------------------------------------------------------------------------
  1 | package series
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"log"
  6 | 	"os"
  7 | 	"reflect"
  8 | 	"strings"
  9 | 	"testing"
 10 | 	"time"
 11 | 
 12 | 	"github.com/ptiger10/pd/internal/index"
 13 | 	"github.com/ptiger10/pd/internal/values"
 14 | 	"github.com/ptiger10/pd/options"
 15 | )
 16 | 
 17 | func TestNew_emptySeries(t *testing.T) {
 18 | 	got := newEmptySeries()
 19 | 	want := &Series{values: values.MustCreateValuesFromInterface(nil).Values, index: index.New(), datatype: options.None}
 20 | 	if !Equal(got, want) {
 21 | 		t.Errorf("New(nil) returned %#v, want %#v", got, want)
 22 | 	}
 23 | 	_ = got.Len()
 24 | 	_ = got.NumLevels()
 25 | 	_ = got.Name()
 26 | }
 27 | 
 28 | func TestNew_nilWithConfig_emptySeries(t *testing.T) {
 29 | 	got, err := New(nil, Config{Index: "foo"})
 30 | 	if err != nil {
 31 | 		t.Errorf("New(): %v", err)
 32 | 	}
 33 | 	want := newEmptySeries()
 34 | 	if !Equal(got, want) {
 35 | 		t.Errorf("New(nil) returned %#v, want %#v", got, want)
 36 | 	}
 37 | }
 38 | 
 39 | func TestNew(t *testing.T) {
 40 | 	testDate := time.Date(2019, 1, 1, 0, 0, 0, 0, time.UTC)
 41 | 	type args struct {
 42 | 		data interface{}
 43 | 	}
 44 | 	type want struct {
 45 | 		values interface{}
 46 | 		dtype  options.DataType
 47 | 	}
 48 | 	tests := []struct {
 49 | 		name string
 50 | 		args args
 51 | 		want want
 52 | 	}{
 53 | 		{"all null", args{""}, want{"", options.String}},
 54 | 		{"float32", args{float32(1)}, want{1.0, options.Float64}},
 55 | 		{"float64", args{float64(1)}, want{1.0, options.Float64}},
 56 | 		{"int", args{int(1)}, want{1, options.Int64}},
 57 | 		{"int8", args{int8(1)}, want{1, options.Int64}},
 58 | 		{"int16", args{int16(1)}, want{1, options.Int64}},
 59 | 		{"int32", args{int32(1)}, want{1, options.Int64}},
 60 | 		{"int64", args{int64(1)}, want{1, options.Int64}},
 61 | 		{"string", args{"foo"}, want{"foo", options.String}},
 62 | 		{"bool", args{true}, want{true, options.Bool}},
 63 | 		{"datetime", args{testDate}, want{testDate, options.DateTime}},
 64 | 
 65 | 		{"float32_slice", args{[]float32{1}}, want{1.0, options.Float64}},
 66 | 		{"float64_slice", args{[]float64{1}}, want{1.0, options.Float64}},
 67 | 		{"int_slice", args{[]int{1}}, want{1, options.Int64}},
 68 | 		{"int8_slice", args{[]int8{1}}, want{1, options.Int64}},
 69 | 		{"int16_slice", args{[]int16{1}}, want{1, options.Int64}},
 70 | 		{"int32_slice", args{[]int32{1}}, want{1, options.Int64}},
 71 | 		{"int64_slice", args{[]int64{1}}, want{1, options.Int64}},
 72 | 		{"string_slice", args{[]string{"foo"}}, want{"foo", options.String}},
 73 | 		{"bool_slice", args{[]bool{true}}, want{true, options.Bool}},
 74 | 		{"datetime_slice", args{[]time.Time{testDate}}, want{testDate, options.DateTime}},
 75 | 	}
 76 | 	for _, tt := range tests {
 77 | 		t.Run(tt.name, func(t *testing.T) {
 78 | 			got, err := New(tt.args.data)
 79 | 			if err != nil {
 80 | 				t.Errorf("New() error = %v, wantErr nil", err)
 81 | 			}
 82 | 			container := values.MustCreateValuesFromInterface(tt.want.values)
 83 | 			wantValues := container.Values
 84 | 			wantIdx := index.NewDefault(1)
 85 | 			want := &Series{values: wantValues, index: wantIdx, datatype: tt.want.dtype}
 86 | 			if !Equal(got, want) {
 87 | 				t.Errorf("New() = %v, want %v", got, want)
 88 | 			}
 89 | 		})
 90 | 	}
 91 | }
 92 | 
 93 | func TestNew_conversion(t *testing.T) {
 94 | 	got, err := New("3.5", Config{DataType: options.Float64})
 95 | 	if err != nil {
 96 | 		t.Errorf("New(): %v", err)
 97 | 	}
 98 | 	values, _ := values.InterfaceFactory(3.5)
 99 | 	index := index.NewDefault(1)
100 | 	want := &Series{values: values.Values, index: index, datatype: options.Float64}
101 | 	if !Equal(got, want) {
102 | 		t.Errorf("New(nil) returned %v, want %v", got, want)
103 | 	}
104 | }
105 | 
106 | func TestNew_Fail(t *testing.T) {
107 | 	type args struct {
108 | 		data   interface{}
109 | 		config Config
110 | 	}
111 | 	tests := []struct {
112 | 		name string
113 | 		args args
114 | 	}{
115 | 		{"unsupported value", args{complex64(1), Config{}}},
116 | 		{"unsupported single index", args{"foo", Config{Index: complex64(1)}}},
117 | 		{"unsupported multiIndex", args{"foo", Config{MultiIndex: []interface{}{complex64(1)}}}},
118 | 		{"unsupported conversion", args{"3.5", Config{DataType: options.Unsupported}}},
119 | 		{"index-multiIndex ambiguity", args{"foo", Config{Index: "foo", MultiIndex: []interface{}{"bar"}}}},
120 | 		{"values-index alignmentV1", args{"foo", Config{Index: []string{"foo", "bar"}}}},
121 | 		{"values-index alignmentV2", args{[]string{"foo"}, Config{Index: []string{"foo", "bar"}}}},
122 | 		{"values-index alignmentV3", args{[]string{"foo", "bar"}, Config{Index: "foo"}}},
123 | 		{"values-index alignmentV4", args{[]string{"foo", "bar"}, Config{Index: []string{"foo"}}}},
124 | 		{"values-multiIndex alignmentV1", args{"foo", Config{MultiIndex: []interface{}{[]string{"foo", "bar"}}}}},
125 | 		{"values-multiIndex alignment2", args{[]string{"foo"}, Config{MultiIndex: []interface{}{[]string{"foo", "bar"}}}}},
126 | 		{"values-multiIndex alignmentV3", args{[]string{"foo", "bar"}, Config{MultiIndex: []interface{}{"foo"}}}},
127 | 		{"values-multiIndex alignmentV4", args{[]string{"foo", "bar"}, Config{MultiIndex: []interface{}{"foo"}}}},
128 | 		{"values-multiIndex alignmentV5", args{[]string{"foo", "bar"}, Config{MultiIndex: []interface{}{"foo", "bar"}}}},
129 | 		{"multiIndex alignment", args{[]string{"foo", "bar"}, Config{
130 | 			MultiIndex: []interface{}{[]string{"foo", "bar"}, []string{"baz"}}}}},
131 | 		{"multiIndex names", args{[]string{"foo", "bar"}, Config{
132 | 			MultiIndex:      []interface{}{[]string{"foo", "bar"}, []string{"baz", "qux"}},
133 | 			MultiIndexNames: []string{"1"},
134 | 		}}},
135 | 	}
136 | 	for _, tt := range tests {
137 | 		t.Run(tt.name, func(t *testing.T) {
138 | 			_, err := New(tt.args.data, tt.args.config)
139 | 			if err == nil {
140 | 				t.Error("New() error = nil, want error")
141 | 				return
142 | 			}
143 | 		})
144 | 	}
145 | }
146 | 
147 | func TestNew_Fail_multipleConfigs(t *testing.T) {
148 | 	_, err := New("foo", Config{}, Config{})
149 | 	if err == nil {
150 | 		t.Error("New() error = nil, want error due to multiple configs")
151 | 	}
152 | }
153 | 
154 | func TestMustNew(t *testing.T) {
155 | 	v, _ := values.InterfaceFactory(1.0)
156 | 	tests := []struct {
157 | 		name string
158 | 		args interface{}
159 | 		want *Series
160 | 	}{
161 | 		{name: "pass", args: 1.0,
162 | 			want: &Series{values: v.Values, index: index.NewDefault(1), datatype: options.Float64}},
163 | 		{name: "fail", args: complex64(1),
164 | 			want: newEmptySeries()},
165 | 	}
166 | 	for _, tt := range tests {
167 | 		t.Run(tt.name, func(t *testing.T) {
168 | 			var buf bytes.Buffer
169 | 			log.SetOutput(&buf)
170 | 			defer log.SetOutput(os.Stderr)
171 | 
172 | 			got := MustNew(tt.args)
173 | 			if !Equal(got, tt.want) {
174 | 				t.Errorf("MustNew() = %v, want %v", got, tt.want)
175 | 			}
176 | 			if strings.Contains(tt.name, "fail") {
177 | 				if buf.String() == "" {
178 | 					t.Errorf("series.MustNew() returned no log message, want log due to fail")
179 | 				}
180 | 			}
181 | 		})
182 | 	}
183 | }
184 | func TestMustNew_fail(t *testing.T) {
185 | 	var buf bytes.Buffer
186 | 	log.SetOutput(&buf)
187 | 	defer log.SetOutput(os.Stderr)
188 | 	MustNew(complex64(1))
189 | 	if buf.String() == "" {
190 | 		t.Errorf("MustNew() returned no log message, want log due to fail")
191 | 	}
192 | }
193 | 
194 | func Test_Copy(t *testing.T) {
195 | 	tests := []struct {
196 | 		name  string
197 | 		input *Series
198 | 		want  *Series
199 | 	}{
200 | 		{name: "pass", input: MustNew("foo"), want: MustNew("foo")},
201 | 	}
202 | 	for _, tt := range tests {
203 | 		t.Run(tt.name, func(t *testing.T) {
204 | 			got := tt.input.Copy()
205 | 			if !Equal(got, tt.want) {
206 | 				t.Errorf("s.Copy() returned %v, want %v", got, tt.want)
207 | 			}
208 | 			if reflect.ValueOf(tt.input).Pointer() == reflect.ValueOf(tt.want).Pointer() {
209 | 				t.Errorf("s.Copy() retained reference to original, want copy")
210 | 			}
211 | 			if reflect.ValueOf(tt.input.values).Pointer() == reflect.ValueOf(tt.want.values).Pointer() {
212 | 				t.Errorf("s.Copy() retained reference to original values, want copy")
213 | 			}
214 | 			if reflect.ValueOf(tt.input.index.Levels).Pointer() == reflect.ValueOf(tt.want.index.Levels).Pointer() {
215 | 				t.Errorf("s.Copy() retained reference to original index, want copy")
216 | 			}
217 | 		})
218 | 	}
219 | }
220 | 
221 | func TestFromInternalComponents(t *testing.T) {
222 | 	vals := values.MustCreateValuesFromInterface("foo")
223 | 	index := index.NewDefault(1)
224 | 	got := FromInternalComponents(vals, index, "bar")
225 | 	want := MustNew("foo", Config{Name: "bar"})
226 | 	if !Equal(got, want) {
227 | 		t.Errorf("FromInternalComponents() returned %v, want %v", got, want)
228 | 	}
229 | 
230 | }
231 | 
232 | func TestToInternalComponents(t *testing.T) {
233 | 	s := MustNew("foo")
234 | 	vals, idx := s.ToInternalComponents()
235 | 	wantVals := values.MustCreateValuesFromInterface("foo")
236 | 	wantIdx := index.NewDefault(1)
237 | 	if !reflect.DeepEqual(vals, wantVals) {
238 | 		t.Errorf("ToInternalComponents() returned %v, want %v", vals, wantVals)
239 | 	}
240 | 	if !reflect.DeepEqual(idx, wantIdx) {
241 | 		t.Errorf("ToInternalComponents() returned %v, want %v", idx, wantIdx)
242 | 	}
243 | 
244 | }
245 | 


--------------------------------------------------------------------------------
/dataframe/describe_test.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/ptiger10/pd/options"
  8 | 	"github.com/ptiger10/pd/series"
  9 | )
 10 | 
 11 | func TestDataFrame_Describe(t *testing.T) {
 12 | 	type want struct {
 13 | 		len             int
 14 | 		numCols         int
 15 | 		numIdxLevels    int
 16 | 		numColLevels    int
 17 | 		dataType        options.DataType
 18 | 		dataTypePrinter string
 19 | 		dataTypes       *series.Series
 20 | 	}
 21 | 	tests := []struct {
 22 | 		name  string
 23 | 		input *DataFrame
 24 | 		want  want
 25 | 	}{
 26 | 		{name: "empty",
 27 | 			input: newEmptyDataFrame(),
 28 | 			want: want{
 29 | 				len: 0, numCols: 0, numIdxLevels: 0, numColLevels: 0,
 30 | 				dataType: options.None, dataTypePrinter: "empty", dataTypes: series.MustNew(nil),
 31 | 			}},
 32 | 		{"default index, col",
 33 | 			MustNew([]interface{}{"foo"}),
 34 | 			want{
 35 | 				len: 1, numCols: 1, numIdxLevels: 1, numColLevels: 1,
 36 | 				dataType: options.String, dataTypePrinter: "string", dataTypes: series.MustNew("string", series.Config{Name: "datatypes"}),
 37 | 			}},
 38 | 		{"multi index, single col",
 39 | 			MustNew([]interface{}{"foo"}, Config{MultiIndex: []interface{}{"baz", "qux"}}),
 40 | 			want{
 41 | 				len: 1, numCols: 1, numIdxLevels: 2, numColLevels: 1,
 42 | 				dataType: options.String, dataTypePrinter: "string", dataTypes: series.MustNew("string", series.Config{Name: "datatypes"}),
 43 | 			}},
 44 | 		{"single index, two cols, mixed types",
 45 | 			MustNew([]interface{}{"foo", 5}, Config{Col: []string{"baz", "qux"}}),
 46 | 			want{
 47 | 				len: 1, numCols: 2, numIdxLevels: 1, numColLevels: 1,
 48 | 				dataType: options.Unsupported, dataTypePrinter: "mixed", dataTypes: series.MustNew([]string{"string", "int64"}, series.Config{Name: "datatypes"}),
 49 | 			}},
 50 | 		{"single index, multi col",
 51 | 			MustNew([]interface{}{"foo", "bar"}, Config{MultiCol: [][]string{{"baz", "qux"}, {"corge", "fred"}}}),
 52 | 			want{
 53 | 				len: 1, numCols: 2, numIdxLevels: 1, numColLevels: 2,
 54 | 				dataType: options.String, dataTypePrinter: "string", dataTypes: series.MustNew([]string{"string", "string"}, series.Config{Name: "datatypes"}),
 55 | 			}},
 56 | 	}
 57 | 	for _, tt := range tests {
 58 | 		t.Run(tt.name, func(t *testing.T) {
 59 | 			df := tt.input.Copy()
 60 | 			gotLen := df.Len()
 61 | 			if gotLen != tt.want.len {
 62 | 				t.Errorf("df.Len(): got %v, want %v", gotLen, tt.want.len)
 63 | 			}
 64 | 			gotNumCols := df.NumCols()
 65 | 			if gotNumCols != tt.want.numCols {
 66 | 				t.Errorf("df.NumCols(): got %v, want %v", gotNumCols, tt.want.numCols)
 67 | 			}
 68 | 			gotNumIdxLevels := df.IndexLevels()
 69 | 			if gotNumIdxLevels != tt.want.numIdxLevels {
 70 | 				t.Errorf("df.IndexLevels(): got %v, want %v", gotNumIdxLevels, tt.want.numIdxLevels)
 71 | 			}
 72 | 			gotNumColLevels := df.ColLevels()
 73 | 			if gotNumColLevels != tt.want.numColLevels {
 74 | 				t.Errorf("df.ColLevels(): got %v, want %v", gotNumColLevels, tt.want.numColLevels)
 75 | 			}
 76 | 			gotDataType := df.dataType()
 77 | 			if gotDataType != tt.want.dataType {
 78 | 				t.Errorf("df.gotDataType: got %v, want %v", gotDataType, tt.want.dataType)
 79 | 			}
 80 | 			gotDataTypePrinter := df.dataTypePrinter()
 81 | 			if gotDataTypePrinter != tt.want.dataTypePrinter {
 82 | 				t.Errorf("df.dataTypePrinter: got %v, want %v", gotDataTypePrinter, tt.want.dataTypePrinter)
 83 | 			}
 84 | 			gotDataTypes := df.DataTypes()
 85 | 			if !series.Equal(gotDataTypes, tt.want.dataTypes) {
 86 | 				t.Errorf("df.DataTypes(): got %v, want %v", gotDataTypes, tt.want.dataTypes)
 87 | 			}
 88 | 		})
 89 | 	}
 90 | }
 91 | 
 92 | func TestEqual(t *testing.T) {
 93 | 	df := MustNew([]interface{}{[]string{"foo"}, []string{"bar"}}, Config{Index: "corge", Col: []string{"baz", "qux"}})
 94 | 	type args struct {
 95 | 		df2 *DataFrame
 96 | 	}
 97 | 	tests := []struct {
 98 | 		name  string
 99 | 		input *DataFrame
100 | 		args  args
101 | 		want  bool
102 | 	}{
103 | 		{name: "equal", input: df,
104 | 			args: args{df2: MustNew([]interface{}{[]string{"foo"}, []string{"bar"}}, Config{Index: "corge", Col: []string{"baz", "qux"}})},
105 | 			want: true},
106 | 		{"equal empty", newEmptyDataFrame(), args{newEmptyDataFrame()}, true},
107 | 		{"equal empty copy", newEmptyDataFrame().Copy(), args{newEmptyDataFrame()}, true},
108 | 		{"not equal: values", df,
109 | 			args{MustNew([]interface{}{[]string{"foo"}, []string{"bar"}})}, false},
110 | 		{"not equal: cols", df,
111 | 			args{MustNew([]interface{}{[]string{"foo"}, []string{"bar"}}, Config{Index: "corge", Col: []string{"fred", "qux"}})}, false},
112 | 		{"not equal: name", df,
113 | 			args{MustNew([]interface{}{[]string{"foo"}, []string{"bar"}}, Config{Index: "corge", Col: []string{"baz", "qux"}, Name: "quux"})}, false},
114 | 	}
115 | 	for _, tt := range tests {
116 | 		t.Run(tt.name, func(t *testing.T) {
117 | 			got := Equal(tt.input, tt.args.df2)
118 | 			if got != tt.want {
119 | 				t.Errorf("Equal() got %v, want %v", got, tt.want)
120 | 			}
121 | 		})
122 | 	}
123 | }
124 | 
125 | func TestMaxColWidth(t *testing.T) {
126 | 	type want struct {
127 | 		colWidths       []int
128 | 		exclusionsTable [][]bool
129 | 	}
130 | 	tests := []struct {
131 | 		name  string
132 | 		input *DataFrame
133 | 		want  want
134 | 	}{
135 | 		{name: "empty config", input: MustNew([]interface{}{[]string{"a", "foo"}, []string{"b", "quux"}}),
136 | 			want: want{colWidths: []int{3, 4}, exclusionsTable: [][]bool{{false, false}}}},
137 | 		{"single level",
138 | 			MustNew([]interface{}{[]string{"a", "foo"}, []string{"b", "quux"}},
139 | 				Config{Col: []string{"corge", "bar"}, ColName: "grapply"}),
140 | 			want{[]int{5, 4}, [][]bool{{false, false}}}},
141 | 		{"multi level",
142 | 			MustNew([]interface{}{[]string{"a", "foo"}, []string{"b", "quux"}},
143 | 				Config{MultiCol: [][]string{{"corge", "bar"}, {"qux", "quuz"}}, MultiColNames: []string{"grapply", "grault"}}),
144 | 			want{[]int{5, 4}, [][]bool{{false, false}, {false, false}}}},
145 | 		{"nil: empty colWidths", newEmptyDataFrame(), want{nil, [][]bool{}}},
146 | 	}
147 | 
148 | 	for _, tt := range tests {
149 | 		t.Run(tt.name, func(t *testing.T) {
150 | 			df := tt.input
151 | 			excl := df.makeColumnExclusionsTable()
152 | 			got := df.maxColWidths(excl)
153 | 			if !reflect.DeepEqual(excl, tt.want.exclusionsTable) {
154 | 				t.Errorf("df.makeColumnExclusionsTable() got %v, want %v", excl, tt.want.exclusionsTable)
155 | 			}
156 | 			if !reflect.DeepEqual(got, tt.want.colWidths) {
157 | 				t.Errorf("df.maxColWidths() got %v, want %v", got, tt.want.colWidths)
158 | 			}
159 | 		})
160 | 	}
161 | }
162 | 
163 | func TestMaxColWidthExcludeRepeat(t *testing.T) {
164 | 	df := MustNew(
165 | 		[]interface{}{[]string{"a", "b"}, []string{"c", "quux"}},
166 | 		Config{MultiCol: [][]string{{"waldo", "waldo"}, {"d", "e"}}})
167 | 	excl := [][]bool{{false, true}, {false, false}}
168 | 	got := df.maxColWidths(excl)
169 | 	want := []int{5, 4}
170 | 	if !reflect.DeepEqual(got, want) {
171 | 		t.Errorf("df.maxColWidths() got %v, want %v", got, want)
172 | 	}
173 | }
174 | 
175 | func TestHeadTail(t *testing.T) {
176 | 	df := MustNew([]interface{}{[]string{"foo", "bar", "baz", "qux"}}, Config{Index: []int{0, 1, 2, 3}})
177 | 	type args struct {
178 | 		n int
179 | 	}
180 | 	tests := []struct {
181 | 		name  string
182 | 		input *DataFrame
183 | 		fn    func(*DataFrame, int) *DataFrame
184 | 		args  args
185 | 		want  *DataFrame
186 | 	}{
187 | 		{name: "head", input: df, fn: (*DataFrame).Head, args: args{n: 2},
188 | 			want: MustNew([]interface{}{[]string{"foo", "bar"}}, Config{Index: []int{0, 1}})},
189 | 		{name: "head - max", input: df, fn: (*DataFrame).Head, args: args{n: 10},
190 | 			want: df},
191 | 		{name: "tail", input: df, fn: (*DataFrame).Tail, args: args{n: 2},
192 | 			want: MustNew([]interface{}{[]string{"baz", "qux"}}, Config{Index: []int{2, 3}})},
193 | 		{name: "tail - max", input: df, fn: (*DataFrame).Tail, args: args{n: 10},
194 | 			want: df},
195 | 	}
196 | 	for _, tt := range tests {
197 | 		t.Run(tt.name, func(t *testing.T) {
198 | 			got := tt.fn(tt.input, tt.args.n)
199 | 			if !Equal(got, tt.want) {
200 | 				t.Errorf("df.Head/Tail() got %v, want %v", got, tt.want)
201 | 			}
202 | 		})
203 | 	}
204 | }
205 | 
206 | func TestDataFrame_Export(t *testing.T) {
207 | 	tests := []struct {
208 | 		name  string
209 | 		input *DataFrame
210 | 		want  [][]interface{}
211 | 	}{
212 | 		{name: "pass", input: MustNew([]interface{}{"foo"}, Config{Index: "bar", Col: []string{"baz"}}),
213 | 			want: [][]interface{}{{nil, "baz"}, {"bar", "foo"}}},
214 | 	}
215 | 	for _, tt := range tests {
216 | 		t.Run(tt.name, func(t *testing.T) {
217 | 			got := tt.input.Export()
218 | 			if !reflect.DeepEqual(got, tt.want) {
219 | 				t.Errorf("df.Export() got %v, want %v", got, tt.want)
220 | 			}
221 | 		})
222 | 	}
223 | }
224 | 
225 | func TestDataFrame_ExportToCSV(t *testing.T) {
226 | 	type args struct {
227 | 		filepath string
228 | 	}
229 | 	tests := []struct {
230 | 		name  string
231 | 		input *DataFrame
232 | 		args  args
233 | 		want  bool
234 | 	}{
235 | 		{name: "pass", input: MustNew([]interface{}{"foo"}, Config{Index: "bar", Col: []string{"baz"}}),
236 | 			want: false},
237 | 	}
238 | 	for _, tt := range tests {
239 | 		t.Run(tt.name, func(t *testing.T) {
240 | 			tt.input.ExportToCSV("output_test.csv")
241 | 			//TODO: move ReadCSV to dataframe package to rehydrate output and compare to input
242 | 		})
243 | 	}
244 | }
245 | 


--------------------------------------------------------------------------------