├── .DS_Store
├── assets
    ├── .DS_Store
    └── images
    │   ├── .DS_Store
    │   ├── Exclude.png
    │   ├── Import.png
    │   ├── Length.png
    │   ├── Unique.png
    │   ├── AddRecord.png
    │   ├── Filtered.png
    │   ├── Concatenated.png
    │   ├── KeepColumns.png
    │   ├── ReadAndPrint.png
    │   ├── FilteredAfter.png
    │   ├── FilteredBefore.png
    │   └── FilteredBetween.png
├── TestInnerMergeData.csv
├── TestMergeData.csv
├── TestDataDateFormat.csv
├── Testing.csv
├── test.csv
├── TestData.csv
├── TestDataConcat.csv
├── TestDataCommaSeparatedValue.csv
├── TestDataInnerDuplicate.csv
├── go.mod
├── LICENSE
├── console.go
├── go.sum
├── aws_tooling.go
├── README.md
├── main.go
└── main_test.go


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/.DS_Store


--------------------------------------------------------------------------------
/assets/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/.DS_Store


--------------------------------------------------------------------------------
/assets/images/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/.DS_Store


--------------------------------------------------------------------------------
/assets/images/Exclude.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Exclude.png


--------------------------------------------------------------------------------
/assets/images/Import.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Import.png


--------------------------------------------------------------------------------
/assets/images/Length.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Length.png


--------------------------------------------------------------------------------
/assets/images/Unique.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Unique.png


--------------------------------------------------------------------------------
/assets/images/AddRecord.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/AddRecord.png


--------------------------------------------------------------------------------
/assets/images/Filtered.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Filtered.png


--------------------------------------------------------------------------------
/assets/images/Concatenated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Concatenated.png


--------------------------------------------------------------------------------
/assets/images/KeepColumns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/KeepColumns.png


--------------------------------------------------------------------------------
/assets/images/ReadAndPrint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/ReadAndPrint.png


--------------------------------------------------------------------------------
/assets/images/FilteredAfter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/FilteredAfter.png


--------------------------------------------------------------------------------
/assets/images/FilteredBefore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/FilteredBefore.png


--------------------------------------------------------------------------------
/assets/images/FilteredBetween.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/FilteredBetween.png


--------------------------------------------------------------------------------
/TestInnerMergeData.csv:
--------------------------------------------------------------------------------
1 | ID,City,State,Postal Code
2 | 4,VAN BUREN,AR,72956
3 | 5,TAUNTON,MA,2780
4 | 7,GOLDSBORO,NC,27530
5 | 9,PHOENIX,AZ,85024
6 | 10,JEFFERSON CITY,MO,65109
7 | 11,Denver,CO,66616


--------------------------------------------------------------------------------
/TestMergeData.csv:
--------------------------------------------------------------------------------
 1 | ID,City,State,Postal Code
 2 | 1,APPLETON,WI,54911
 3 | 2,RICHLAND,WA,99354
 4 | 3,KANSAS CITY,KS,66115
 5 | 4,VAN BUREN,AR,72956
 6 | 5,TAUNTON,MA,2780
 7 | 6,FISHERS,NY,14453
 8 | 7,GOLDSBORO,NC,27530
 9 | 8,PATERSON,NJ,7503
10 | 9,PHOENIX,AZ,85024
11 | 10,JEFFERSON CITY,MO,65109


--------------------------------------------------------------------------------
/TestDataDateFormat.csv:
--------------------------------------------------------------------------------
 1 | ID,Date,Cost,Weight,First Name,Last Name
 2 | 1,1/1/22,818,227,Kevin,Fultz
 3 | 2,1/2/22,777,259,Beth,Fultz
 4 | 3,1/3/22,493,461,Avery,Fultz
 5 | 4,1/4/22,121,196,Peter,Wiedmann
 6 | 5,1/5/22,774,415,Andy,Wiedmann
 7 | 6,1/6/22,874,436,Nick,Wilfong
 8 | 7,1/7/22,995,500,Bryan,Curtis
 9 | 8,1/8/22,133,250,Brian,Wenck
10 | 9,1/9/22,939,157,Eric,Petruska
11 | 10,1/10/22,597,475,Carl,Carlson


--------------------------------------------------------------------------------
/Testing.csv:
--------------------------------------------------------------------------------
 1 | ID,Date,Cost,Weight,First Name,Last Name
 2 | 1,2022-01-01,818,227,Kevin,Fultz
 3 | 2,2022-01-02,777,259,Beth,Fultz
 4 | 3,2022-01-03,493,461,Avery,Fultz
 5 | 4,2022-01-04,121,196,Peter,Wiedmann
 6 | 5,2022-01-05,774,415,Andy,Wiedmann
 7 | 6,2022-01-06,874,436,Nick,Wilfong
 8 | 7,2022-01-07,995,500,Bryan,Curtis
 9 | 8,2022-01-08,133,250,Brian,Wenck
10 | 9,2022-01-09,939,157,Eric,Petruska
11 | 10,2022-01-10,597,475,Carl,Carlson
12 | 


--------------------------------------------------------------------------------
/test.csv:
--------------------------------------------------------------------------------
 1 | ID,Date,Cost,Weight,First Name,Last Name
 2 | 1,2022-01-01,818,227,Kevin,Fultz
 3 | 2,2022-01-02,777,259,Beth,Fultz
 4 | 3,2022-01-03,493,461,Avery,Fultz
 5 | 4,2022-01-04,121,196,Peter,Wiedmann
 6 | 5,2022-01-05,774,415,Andy,Wiedmann
 7 | 6,2022-01-06,874,436,Nick,Wilfong
 8 | 7,2022-01-07,995,500,Bryan,Curtis
 9 | 8,2022-01-08,133,250,Brian,Wenck
10 | 9,2022-01-09,939,157,Eric,Petruska
11 | 10,2022-01-10,597,475,Carl,Carlson
12 | 


--------------------------------------------------------------------------------
/TestData.csv:
--------------------------------------------------------------------------------
 1 | ID,Date,Cost,Weight,First Name,Last Name
 2 | 1,2022-01-01,818,227,Kevin,Fultz
 3 | 2,2022-01-02,777,259,Beth,Fultz
 4 | 3,2022-01-03,493,461,Avery,Fultz
 5 | 4,2022-01-04,121,196,Peter,Wiedmann
 6 | 5,2022-01-05,774,415,Andy,Wiedmann
 7 | 6,2022-01-06,874,436,Nick,Wilfong
 8 | 7,2022-01-07,995,500,Bryan,Curtis
 9 | 8,2022-01-08,133,250,Brian,Wenck
10 | 9,2022-01-09,939,157,Eric,Petruska
11 | 10,2022-01-10,597,475,Carl,Carlson


--------------------------------------------------------------------------------
/TestDataConcat.csv:
--------------------------------------------------------------------------------
 1 | ID,Date,Cost,Weight,First Name,Last Name
 2 | 11,2022-01-01,20,34,Ben,Benny
 3 | 12,2022-01-02,84,14,Kevin,Kenny
 4 | 13,2022-01-03,44,8,Carl,McCarlson
 5 | 14,2022-01-04,53,9,Jeff,Jeffery
 6 | 15,2022-01-05,97,39,Steve,Stephenson
 7 | 16,2022-01-06,95,66,Pat,Patrickman
 8 | 17,2022-01-07,0,65,Brian,Briarson
 9 | 18,2022-01-08,99,62,Eric,Ericson
10 | 19,2022-01-09,21,88,Ashley,Asherton
11 | 20,2022-01-10,66,60,Heather,Highman


--------------------------------------------------------------------------------
/TestDataCommaSeparatedValue.csv:
--------------------------------------------------------------------------------
 1 | ID,Date,Cost,Weight,First Name,Last Name
 2 | 1,2022-01-01,818,227,Kevin,Fultz
 3 | 2,2022-01-02,777,259,Beth,Fultz
 4 | 3,2022-01-03,493,461,Avery,Fultz
 5 | 4,2022-01-04,121,196,Peter,Wiedmann
 6 | 5,2022-01-05,774,415,Andy,Wiedmann
 7 | 6,2022-01-06,874,436,Nick,Wilfong
 8 | 7,2022-01-07,995,500,Bryan,Curtis
 9 | 8,2022-01-08,133,250,Brian,Wenck
10 | 9,2022-01-09,939,157,Eric,Petruska
11 | 10,2022-01-10,597,475,Carl,Carlson


--------------------------------------------------------------------------------
/TestDataInnerDuplicate.csv:
--------------------------------------------------------------------------------
 1 | ID,Date,Cost,Weight,First Name,Last Name
 2 | 1,2022-01-01,818,227,Kevin,Fultz
 3 | 2,2022-01-02,777,259,Beth,Fultz
 4 | 3,2022-01-03,493,461,Avery,Fultz
 5 | 4,2022-01-04,121,196,Peter,Wiedmann
 6 | 5,2022-01-05,774,415,Andy,Wiedmann
 7 | 6,2022-01-06,874,436,Nick,Wilfong
 8 | 7,2022-01-07,995,500,Bryan,Curtis
 9 | 8,2022-01-08,133,250,Brian,Wenck
10 | 9,2022-01-09,939,157,Eric,Petruska
11 | 9,2022-01-09,12345,6789,Eric,Petruska
12 | 10,2022-01-10,597,475,Carl,Carlson


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/kfultz07/go-dataframe
 2 | 
 3 | go 1.22
 4 | 
 5 | require github.com/aws/aws-sdk-go v1.44.57
 6 | 
 7 | require (
 8 | 	github.com/jmespath/go-jmespath v0.4.0 // indirect
 9 | 	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
10 | 	github.com/rivo/uniseg v0.4.7 // indirect
11 | 	github.com/schollz/progressbar/v3 v3.18.0 // indirect
12 | 	golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 // indirect
13 | 	golang.org/x/sys v0.29.0 // indirect
14 | 	golang.org/x/term v0.28.0 // indirect
15 | )
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Kevin Fultz
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/console.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strconv"
  6 | 	"time"
  7 | )
  8 | 
  9 | func calculateSpaces(val string, maxColumnWidth int) string {
 10 | 	valLength := len(val)
 11 | 
 12 | 	if len(val)%2 != 0 {
 13 | 		val += " "
 14 | 	}
 15 | 
 16 | 	if len(val) == maxColumnWidth {
 17 | 		return "|" + val + "| ---> " + strconv.Itoa(valLength)
 18 | 	}
 19 | 
 20 | 	for len(val) < maxColumnWidth {
 21 | 		val = " " + val + " "
 22 | 	}
 23 | 
 24 | 	return "|" + val + "| ---> " + strconv.Itoa(valLength)
 25 | }
 26 | 
 27 | func calculateMaxColumnWidth(headers []string) int {
 28 | 	maxWidth := len(headers[0])
 29 | 
 30 | 	for _, each := range headers {
 31 | 		if len(each) > maxWidth {
 32 | 			maxWidth = len(each)
 33 | 		}
 34 | 	}
 35 | 	return maxWidth
 36 | }
 37 | 
 38 | // Dynamically generate the column headers for the table.
 39 | func generateTableColumns(headers []string, maxColumnWidth int) string {
 40 | 	var head string
 41 | 	var columnCount int
 42 | 
 43 | 	for _, h := range headers {
 44 | 		val := calculateSpaces(h, maxColumnWidth)
 45 | 		head += val + "\n"
 46 | 		columnCount++
 47 | 	}
 48 | 
 49 | 	head = "\n" + head
 50 | 
 51 | 	border := " "
 52 | 
 53 | 	for i := 0; i < maxColumnWidth; i++ {
 54 | 		border += "-"
 55 | 	}
 56 | 
 57 | 	head = "Column Count: " + strconv.Itoa(columnCount) + "\n" + border + head + border
 58 | 
 59 | 	return head
 60 | }
 61 | 
 62 | // Method to print all columns in a viewable table within the terminal.
 63 | func (frame DataFrame) ViewColumns() {
 64 | 	var columns []string
 65 | 
 66 | 	// Add columns in order from map.
 67 | 	for i := 0; i < len(frame.Headers); i++ {
 68 | 		for k, v := range frame.Headers {
 69 | 			if v == i {
 70 | 				columns = append(columns, k)
 71 | 			}
 72 | 		}
 73 | 	}
 74 | 
 75 | 	maxColumnWidth := calculateMaxColumnWidth(columns)
 76 | 
 77 | 	head := generateTableColumns(columns, maxColumnWidth)
 78 | 	fmt.Println(head)
 79 | }
 80 | 
 81 | func loading(quit <-chan bool) {
 82 | 	char := []string{
 83 | 		"| L",
 84 | 		"/ LO",
 85 | 		"- LOA",
 86 | 		"\\ LOAD",
 87 | 		"| LOADI",
 88 | 		"/ LOADIN",
 89 | 		"- LOADING",
 90 | 		"\\ LOADING.",
 91 | 		"| LOADING..",
 92 | 		"/ LOADING...",
 93 | 		"-           ",
 94 | 	}
 95 | 
 96 | 	for {
 97 | 		select {
 98 | 		case <-quit:
 99 | 			fmt.Printf("\r")
100 | 			return
101 | 		default:
102 | 			for _, c := range char {
103 | 				fmt.Printf("\r%s", c)
104 | 				time.Sleep(time.Millisecond * 75)
105 | 			}
106 | 		}
107 | 	}
108 | }
109 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/aws/aws-sdk-go v1.44.57 h1:Dx1QD+cA89LE0fVQWSov22tpnTa0znq2Feyaa/myVjg=
 2 | github.com/aws/aws-sdk-go v1.44.57/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
 3 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
 4 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 5 | github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
 6 | github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
 7 | github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
 8 | github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
 9 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
10 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
11 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
12 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
13 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
14 | github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
15 | github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
16 | github.com/schollz/progressbar/v3 v3.18.0 h1:uXdoHABRFmNIjUfte/Ex7WtuyVslrw2wVPQmCN62HpA=
17 | github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec=
18 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
19 | golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
20 | golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
21 | golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd h1:O7DYs+zxREGLKzKoMQrtrEacpb0ZVXA5rIwylE2Xchk=
22 | golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
23 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
24 | golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
25 | golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
26 | golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
27 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
28 | golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg=
29 | golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek=
30 | golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
31 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
32 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
33 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
34 | gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
35 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
36 | 


--------------------------------------------------------------------------------
/aws_tooling.go:
--------------------------------------------------------------------------------
  1 | package dataframe
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"path/filepath"
  8 | 	"strings"
  9 | 
 10 | 	"github.com/aws/aws-sdk-go/aws"
 11 | 	"github.com/aws/aws-sdk-go/aws/session"
 12 | 	"github.com/aws/aws-sdk-go/service/s3"
 13 | 	"github.com/aws/aws-sdk-go/service/s3/s3manager"
 14 | )
 15 | 
 16 | func CreateDataFrameFromAwsS3(path, item, bucket, region, awsAccessKey, awsSecretKey string) (DataFrame, error) {
 17 | 	switch {
 18 | 	case !strings.Contains(item, ".csv"):
 19 | 		return DataFrame{}, errors.New("create dataframe from aws s3: only csv files are currently supported")
 20 | 	case len(path) == 0:
 21 | 		return DataFrame{}, errors.New("create dataframe from aws s3: must provide a path")
 22 | 	case len(item) == 0:
 23 | 		return DataFrame{}, errors.New("create dataframe from aws s3: must provide a file name")
 24 | 	case len(bucket) == 0:
 25 | 		return DataFrame{}, errors.New("create dataframe from aws s3: must provide a bucket name")
 26 | 	case len(region) == 0:
 27 | 		return DataFrame{}, errors.New("create dataframe from aws s3: must provide a region")
 28 | 	case len(awsAccessKey) == 0:
 29 | 		return DataFrame{}, errors.New("create dataframe from aws s3: must provide an access key")
 30 | 	case len(awsSecretKey) == 0:
 31 | 		return DataFrame{}, errors.New("create dataframe from aws s3: must provide a secret key")
 32 | 	}
 33 | 
 34 | 	// Set environment variables.
 35 | 	os.Setenv("AWS_ACCESS_KEY", awsAccessKey)
 36 | 	os.Setenv("AWS_SECRET_KEY", awsSecretKey)
 37 | 
 38 | 	// Create path.
 39 | 	filePath, err := filepath.Abs(path + item)
 40 | 	if err != nil {
 41 | 		return DataFrame{}, err
 42 | 	}
 43 | 
 44 | 	// Create file.
 45 | 	file, err := os.Create(filePath)
 46 | 	if err != nil {
 47 | 		return DataFrame{}, fmt.Errorf("create dataframe from aws s3: error creating the file '%s'", err)
 48 | 	}
 49 | 	defer file.Close()
 50 | 
 51 | 	// Initialize an AWS session.
 52 | 	sess, err := session.NewSession(&aws.Config{
 53 | 		Region: aws.String(region)},
 54 | 	)
 55 | 	if err != nil {
 56 | 		return DataFrame{}, errors.New("create dataframe from aws s3: error initializing session")
 57 | 	}
 58 | 
 59 | 	// Download file from AWS
 60 | 	downloader := s3manager.NewDownloader(sess)
 61 | 
 62 | 	numBytes, err := downloader.Download(file, &s3.GetObjectInput{Bucket: aws.String(bucket), Key: aws.String(item)})
 63 | 	if err != nil {
 64 | 		return DataFrame{}, fmt.Errorf("create dataframe from aws s3: error downloading file '%s'", err)
 65 | 	}
 66 | 
 67 | 	fmt.Println("Downloaded", file.Name(), numBytes, "bytes")
 68 | 
 69 | 	df := CreateDataFrame(path, item)
 70 | 
 71 | 	return df, nil
 72 | }
 73 | 
 74 | func UploadFileToAwsS3(path, filename, bucket, region string) error {
 75 | 	// Check user entries
 76 | 	if path[len(path)-1:] != "/" {
 77 | 		path = path + "/"
 78 | 	}
 79 | 
 80 | 	// Initialize an AWS session.
 81 | 	sess, err := session.NewSession(&aws.Config{Region: aws.String(region)})
 82 | 	if err != nil {
 83 | 		return fmt.Errorf("upload file to s3: error initializing session '%s'", err)
 84 | 	}
 85 | 
 86 | 	// Create an uploader with the session and default options
 87 | 	uploader := s3manager.NewUploader(sess)
 88 | 
 89 | 	f, err := os.Open(path + filename)
 90 | 	if err != nil {
 91 | 		return errors.New("upload file to s3: failed to open file")
 92 | 	}
 93 | 
 94 | 	// Upload the file to S3.
 95 | 	_, err = uploader.Upload(&s3manager.UploadInput{
 96 | 		Bucket: aws.String(bucket),
 97 | 		Key:    aws.String(filename),
 98 | 		Body:   f,
 99 | 	})
100 | 	if err != nil {
101 | 		return errors.New("upload file to s3: failed to upload file to aws s3")
102 | 	}
103 | 	return nil
104 | }
105 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # go-dataframe
  2 | A simple package to abstract away the process of creating usable DataFrames for data analytics. This package is heavily inspired by the amazing Python library, Pandas.
  3 | 
  4 | # Generate DataFrame
  5 | Utilize the CreateDataFrame function to create a DataFrame from an existing CSV file or create an empty DataFrame with the CreateNewDataFrame function. The user can then iterate over the DataFrame to perform the intended tasks. All data in the DataFrame is a string by default. There are various methods to provide additional functionality including: converting data types, update values, filter, concatenate, and more. Please use the below examples or explore the code to learn more.
  6 | 
  7 | # Import Package
  8 | ```go
  9 | import (
 10 |     "fmt"
 11 | 
 12 |     dataframe "github.com/kfultz07/go-dataframe"
 13 | )
 14 | ```
 15 | 
 16 | # Load CSV into DataFrame, create a new field, and save
 17 | ```go
 18 | path := "/Users/Name/Desktop/"
 19 | 
 20 | // Create the DataFrame
 21 | df := dataframe.CreateDataFrame(path, "TestData.csv")
 22 | 
 23 | // Create new field
 24 | df.NewField("CWT")
 25 | 
 26 | // Iterate over DataFrame
 27 | for _, row := range df.FrameRecords {
 28 |     cost := row.ConvertToFloat("Cost", df.Headers)
 29 |     weight := row.ConvertToFloat("Weight", df.Headers)
 30 | 
 31 |     // Results must be converted back to string
 32 |     result := fmt.Sprintf("%f", cwt(cost, weight))
 33 | 
 34 |     // Update the row
 35 |     row.Update("CWT", result, df.Headers)
 36 | }
 37 | 
 38 | df.SaveDataFrame(path, "NewFileName")
 39 | ```
 40 | 
 41 | # Bulk Upload to MySQL Database
 42 | Bulk insert rows into an MySQL database. The rowsPerBatch indicates the threshold of rows to be inserted in each batch. The tableColumns slice must contain the same columns (in the same order) as are found in the MySQL table being uploaded to.
 43 | ```go
 44 | rowsPerBatch := 1000
 45 | tableColumns := []string{"col_1", "col_2", "col_3"}
 46 | 
 47 | if err := df.BulkUploadMySql(db, rowsPerBatch, tableColumns, "table_name"); err != nil {
 48 |     return log.Fatal(err)
 49 | }
 50 | ```
 51 | 
 52 | # Concurrently load multiple CSV files into DataFrames
 53 | Tests performed utilized four files with a total of 5,746,452 records and a varing number of columns. Results indicated an average total load time of 8.81 seconds when loaded sequentially and 4.06 seconds when loaded concurrently utilizing the LoadFrames function. An overall 54% speed improvement. Files must all be in the same directory. Results are returned in a
 54 | slice in the same order as provided in the files parameter.
 55 | ```go
 56 | filePath := "/Users/Name/Desktop/"
 57 | files := []string{
 58 |     "One.csv",
 59 |     "Two.csv",
 60 |     "Three.csv",
 61 |     "Four.csv",
 62 |     "Five.csv",
 63 | }
 64 | 
 65 | results, err := LoadFrames(filePath, files)
 66 | if err != nil {
 67 |     log.Fatal(err)
 68 | }
 69 | 
 70 | dfOne := results[0]
 71 | dfTwo := results[1]
 72 | dfThree := results[2]
 73 | dfFour := results[3]
 74 | dfFive := results[4]
 75 | ```
 76 | 
 77 | # Stream CSV data
 78 | Stream rows of data from a csv file to be processed. Streaming data is preferred when dealing with large files and memory usage needs to be considered. Results are streamed via a channel with a StreamingRecord type. A struct with only desired fields could be created and either operated on sequentially or stored in a slice for later use.
 79 | ```go
 80 | type Product struct {
 81 |     name string
 82 |     cost float64
 83 |     weight float64
 84 | }
 85 | 
 86 | func (p Product) CostPerLb() float64 {
 87 |     if p.weight == 0.0 {
 88 |         return 0.0
 89 |     }
 90 |     return p.cost / p.weight
 91 | }
 92 | 
 93 | filePath := "/Users/Name/Desktop/"
 94 | 
 95 | var products []Product
 96 | 
 97 | c := make(chan StreamingRecord)
 98 | go Stream(filePath, "TestData.csv", c)
 99 | 
100 | for row := range c {
101 |     prod := Product{
102 |         name: row.Val("Name"),
103 |         cost: row.ConvertToFloat("Cost"),
104 |         weight: row.ConvertToInt("Weight"),
105 |     }
106 |     products = append(products, prod)
107 | }
108 | ```
109 | 
110 | # Divide and Conquer
111 | A method that breaks a DataFrame down into smaller sub-frames. This functionality enables the user to process data in the sub-frames concurrently utilizing a worker pool or some other concurrent design pattern. The user provides the number desired sub-frames and the method returns a slice of DataFrames along with an error.
112 | 
113 | An average 66% speed improvement was achieved when testing a CSV file with 5M+ rows and four concurrent workers.
114 | ```go
115 | // Total values in Charge column and sleep for 5 microseconds to simulate expensive processing.
116 | func worker(df dataframe.DataFrame, results chan<- string) {
117 | 	total := 0.0
118 | 	for _, row := range df.FrameRecords {
119 | 		total += row.ConvertToFloat("Charge", df.Headers)
120 | 		time.Sleep(time.Microsecond * 5)
121 | 	}
122 | 	results <- fmt.Sprintf("%f", total)
123 | }
124 | 
125 | // Create sub-frames using the DivideAndConquer method.
126 | frames, err := df.DivideAndConquer(5)
127 | if err != nil {
128 |     panic(err)
129 | }
130 | 
131 | // Spin-up worker pool.
132 | for _, frame := range frames {
133 |     go worker(frame, results)
134 | }
135 | 
136 | // Print results from channel.
137 | for i := 0; i < numJobs; i++ {
138 |     fmt.Println(<-results)
139 | }
140 | ```
141 | 
142 | # AWS S3 Cloud Storage
143 | ```go
144 | // Download a DataFrame from an S3 bucket
145 | path := "/Users/Name/Desktop/" // File path
146 | fileName := "FileName.csv" // File in AWS Bucket must be .csv
147 | bucketName := "BucketName" // Name of the bucket
148 | bucketRegion := "BucketRegion" // Can be found in the Properties tab in the S3 console (ex. us-west-1)
149 | awsAccessKey := "AwsAccessKey" // Access keys can be loaded from environment variables within you program
150 | awsSecretKey := "AwsSecretKey"
151 | df, err := CreateDataFrameFromAwsS3(path, fileName, bucketName, bucketRegion, awsAccessKey, awsSecretKey)
152 | if err != nil {
153 |     panic(err)
154 | }
155 | 
156 | // Upload a file to an S3 bucket
157 | err := UploadFileToAwsS3(path, fileName, bucket, region)
158 | if err != nil {
159 |     panic(err)
160 | }
161 | ```
162 | 
163 | # Various methods to filter DataFrames
164 | ```go
165 | // Variadic methods that generate a new DataFrame
166 | dfFil := df.Filtered("Last Name", "McCarlson", "Benison", "Stephenson")
167 | dfFil := df.Exclude("Last Name", "McCarlson", "Benison", "Stephenson")
168 | 
169 | // Keep only specific columns
170 | columns := [2]string{"First Name", "Last Name"}
171 | dfFil := df.KeepColumns(columns[:])
172 | 
173 | // Remove multiple columns
174 | dfFil := df.RemoveColumns("ID", "Cost", "First Name")
175 | 
176 | // Remove a single column
177 | dfFil := df.RemoveColumns("First Name")
178 | 
179 | // Filter before, after, or between specified dates
180 | dfFil := df.FilteredAfter("Date", "2022-12-31")
181 | dfFil := df.FilteredBefore("Date", "2022-12-31")
182 | dfFil := df.FilteredBetween("Date", "2022-01-01", "2022-12-31")
183 | 
184 | // Filter a numerical column based on a provided value
185 | df, err := df.GreaterThanOrEqualTo("Cost", float64(value))
186 | if err != nil {
187 |     panic(err)
188 | }
189 | 
190 | df, err := df.LessThanOrEqualTo("Weight", float64(value))
191 | if err != nil {
192 |     panic(err)
193 | }
194 | ```
195 | 
196 | # Sort DataFrame
197 | ```go
198 | // Sort specified column in either ascending or descending order.
199 | err := df.Sort("Cost", true)
200 | if err != nil {
201 |     panic("Sort Error: ", err)
202 | }
203 | ```
204 | 
205 | # Add record to DataFrame and later update
206 | ```go
207 | // Add a new record
208 | data := [6]string{"11", "2022-01-01", "123", "456", "Kevin", "Kevison"}
209 | df = df.AddRecord(data[:])
210 | 
211 | // Update a value
212 | for _, row := range df.FrameRecords {
213 |     // row.Val() is used to extract the value in a specific column while iterating
214 |     if row.Val("Last Name", df.Headers) == "McPoyle" {
215 |         row.Update("Last Name", "SchmicMcPoyle", df.Headers)
216 |     }
217 | }
218 | ```
219 | 
220 | # Concatenate DataFrames
221 | ```go
222 | // ConcatFrames uses a pointer to the DataFrame being appended.
223 | // Both DataFrames must have the same columns in the same order.
224 | df, err := df.ConcatFrames(&dfFil)
225 | if err != nil {
226 |     panic("ConcatFrames Error: ", err)
227 | }
228 | ```
229 | 
230 | # Rename a Column
231 | ```go
232 | // Rename an existing column in a DataFrame
233 | // First parameter provides the original column name to be updated.
234 | // The next parameter is the desired new name.
235 | err := df.Rename("Weight", "Total Weight")
236 | if err != nil {
237 |     panic("Rename Column Error: ", err)
238 | }
239 | ```
240 | 
241 | # Merge two DataFrames
242 | ```go
243 | df := CreateDataFrame(path, "TestData.csv")
244 | dfRight := CreateDataFrame(path, "TestDataRight.csv")
245 | 
246 | // Merge all columns found in right DataFrame into left DataFrame.
247 | // User provides the lookup column with the unique values that link the two DataFrames.
248 | err := df.Merge(&dfRight, "ID")
249 | if err != nil {
250 |     panic(err)
251 | }
252 | 
253 | // Merge only specified columns from right DataFrame into left DataFrame.
254 | // User provides columns immediately after the lookup column.
255 | err := df.Merge(&dfRight, "ID", "City", "State")
256 | if err != nil {
257 |     panic(err)
258 | }
259 | 
260 | // Inner merge all columns on a specified primary key.
261 | // Results will only include records where the primary key is found in both DataFrames.
262 | df, err := df.InnerMerge(&dfRight, "ID")
263 | if err != nil {
264 |     panic(err)
265 | }
266 | ```
267 | 
268 | # Various Tools
269 | ```go
270 | // Total rows
271 | total := df.CountRecords()
272 | 
273 | // Returns a slice of all unique values in a specified column
274 | lastNames := df.Unique("Last Name")
275 | 
276 | // Print all columns to console
277 | df.ViewColumns()
278 | 
279 | // Returns a slice of all columns in order
280 | foundColumns := df.Columns()
281 | 
282 | // Generates a decoupled copy of an existing DataFrame.
283 | // Changes made in one DataFrame will not be reflected in the other.
284 | df2 := df.Copy()
285 | ```
286 | 
287 | # Mathematics
288 | ```go
289 | // Sum a numerical column
290 | sum := df.Sum("Cost")
291 | 
292 | // Average a numerical column
293 | average := df.Average("Weight")
294 | 
295 | // Min or Max of a numerical column
296 | minimum := df.Min("Cost")
297 | maximum := df.Max("Cost")
298 | 
299 | // Calculate the standard deviation of a numerical column
300 | stdev, err := df.StandardDeviation("Cost")
301 | if err != nil {
302 |     panic(err)
303 | }
304 | ```


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
   1 | package dataframe
   2 | 
   3 | import (
   4 | 	"database/sql"
   5 | 	"encoding/csv"
   6 | 	"errors"
   7 | 	"fmt"
   8 | 	"io"
   9 | 	"log"
  10 | 	"math"
  11 | 	"os"
  12 | 	"path/filepath"
  13 | 	"sort"
  14 | 	"strconv"
  15 | 	"strings"
  16 | 	"time"
  17 | 
  18 | 	progressbar "github.com/schollz/progressbar/v3"
  19 | 	"golang.org/x/exp/slices"
  20 | )
  21 | 
  22 | type Record struct {
  23 | 	Data []string
  24 | }
  25 | 
  26 | type DataFrame struct {
  27 | 	FrameRecords []Record
  28 | 	Headers      map[string]int
  29 | }
  30 | 
  31 | type StreamingRecord struct {
  32 | 	Data    []string
  33 | 	Headers map[string]int
  34 | }
  35 | 
  36 | // Return the value of the specified field.
  37 | func (x StreamingRecord) Val(fieldName string) string {
  38 | 	if _, ok := x.Headers[fieldName]; !ok {
  39 | 		panic(fmt.Errorf("provided field '%s' is not a valid field in the dataframe", fieldName))
  40 | 	}
  41 | 	return x.Data[x.Headers[fieldName]]
  42 | }
  43 | 
  44 | // Converts the value from a string to float64
  45 | func (x StreamingRecord) ConvertToFloat(fieldName string) float64 {
  46 | 	value, err := strconv.ParseFloat(x.Val(fieldName), 64)
  47 | 	if err != nil {
  48 | 		log.Fatalf("could not convert to float64: %v", err)
  49 | 	}
  50 | 	return value
  51 | }
  52 | 
  53 | // Converts the value from a string to int64
  54 | func (x StreamingRecord) ConvertToInt(fieldName string) int64 {
  55 | 	value, err := strconv.ParseInt(x.Val(fieldName), 0, 64)
  56 | 	if err != nil {
  57 | 		log.Fatalf("could not convert to int64: %v", err)
  58 | 	}
  59 | 	return value
  60 | }
  61 | 
  62 | // Generate a new empty DataFrame.
  63 | func CreateNewDataFrame(headers []string) DataFrame {
  64 | 	myRecords := []Record{}
  65 | 	theHeaders := make(map[string]int)
  66 | 
  67 | 	// Add headers to map in correct order
  68 | 	for i := 0; i < len(headers); i++ {
  69 | 		theHeaders[headers[i]] = i
  70 | 	}
  71 | 
  72 | 	newFrame := DataFrame{FrameRecords: myRecords, Headers: theHeaders}
  73 | 
  74 | 	return newFrame
  75 | }
  76 | 
  77 | // Generate a new DataFrame sourced from a csv file.
  78 | func CreateDataFrame(path, fileName string) DataFrame {
  79 | 	if !strings.Contains(fileName, ".csv") && !strings.Contains(fileName, ".CSV") {
  80 | 		fileName = fileName + ".csv"
  81 | 	}
  82 | 
  83 | 	// Open the CSV file
  84 | 	recordFile, err := os.Open(filepath.Join(path, fileName))
  85 | 	if err != nil {
  86 | 		log.Fatalf("error opening file: please ensure the path and filename are correct: %v", err)
  87 | 	}
  88 | 
  89 | 	// Setup the reader
  90 | 	reader := csv.NewReader(recordFile)
  91 | 
  92 | 	// Read the records
  93 | 	header, err := reader.Read()
  94 | 	if err != nil {
  95 | 		log.Fatalf("error reading the records: %v", err)
  96 | 	}
  97 | 
  98 | 	// Remove Byte Order Marker for UTF-8 files
  99 | 	for i, each := range header {
 100 | 		byteSlice := []byte(each)
 101 | 
 102 | 		if len(byteSlice) < 3 {
 103 | 			continue
 104 | 		}
 105 | 
 106 | 		if byteSlice[0] == 239 && byteSlice[1] == 187 && byteSlice[2] == 191 {
 107 | 			header[i] = each[3:]
 108 | 		}
 109 | 	}
 110 | 
 111 | 	headers := make(map[string]int)
 112 | 	for i, columnName := range header {
 113 | 		headers[columnName] = i
 114 | 	}
 115 | 
 116 | 	// Empty slice to store Records
 117 | 	s := []Record{}
 118 | 
 119 | 	// Loop over the records and create Record objects to be stored
 120 | 	for i := 0; ; i++ {
 121 | 		record, err := reader.Read()
 122 | 		if err == io.EOF {
 123 | 			break
 124 | 		} else if err != nil {
 125 | 			log.Fatalf("error in record loop: %v", err)
 126 | 		}
 127 | 		// Create new Record
 128 | 		x := Record{Data: []string{}}
 129 | 
 130 | 		// Add to Data field of Record struct
 131 | 		x.Data = append(x.Data, record...)
 132 | 		s = append(s, x)
 133 | 	}
 134 | 	newFrame := DataFrame{FrameRecords: s, Headers: headers}
 135 | 	return newFrame
 136 | }
 137 | 
 138 | // Stream rows of data from a csv file to be processed. Streaming data is preferred when dealing with large files
 139 | // and memory usage needs to be considered. Results are streamed via a channel with a StreamingRecord type.
 140 | func Stream(path, fileName string, c chan StreamingRecord) {
 141 | 	defer close(c)
 142 | 
 143 | 	if !strings.Contains(fileName, ".csv") && !strings.Contains(fileName, ".CSV") {
 144 | 		fileName = fileName + ".csv"
 145 | 	}
 146 | 
 147 | 	// Open the CSV file
 148 | 	recordFile, err := os.Open(filepath.Join(path, fileName))
 149 | 	if err != nil {
 150 | 		log.Fatalf("error opening the file: please ensure the path and filename are correct: %v", err)
 151 | 	}
 152 | 
 153 | 	// Setup the reader
 154 | 	reader := csv.NewReader(recordFile)
 155 | 
 156 | 	// Read the records
 157 | 	header, err := reader.Read()
 158 | 	if err != nil {
 159 | 		log.Fatalf("error reading the records: %v", err)
 160 | 	}
 161 | 
 162 | 	// Remove Byte Order Marker for UTF-8 files
 163 | 	for i, each := range header {
 164 | 		byteSlice := []byte(each)
 165 | 
 166 | 		if len(byteSlice) < 3 {
 167 | 			continue
 168 | 		}
 169 | 
 170 | 		if byteSlice[0] == 239 && byteSlice[1] == 187 && byteSlice[2] == 191 {
 171 | 			header[i] = each[3:]
 172 | 		}
 173 | 	}
 174 | 
 175 | 	headers := make(map[string]int)
 176 | 	for i, columnName := range header {
 177 | 		headers[columnName] = i
 178 | 	}
 179 | 
 180 | 	// Loop over the records and create Record objects to be stored
 181 | 	for i := 0; ; i++ {
 182 | 		record, err := reader.Read()
 183 | 		if err == io.EOF {
 184 | 			break
 185 | 		} else if err != nil {
 186 | 			log.Fatalf("error in record loop: %v", err)
 187 | 		}
 188 | 		// Create new Record
 189 | 		x := StreamingRecord{Headers: headers}
 190 | 
 191 | 		// Loop over records and add to Data field of Record struct
 192 | 		x.Data = append(x.Data, record...)
 193 | 		c <- x
 194 | 	}
 195 | }
 196 | 
 197 | func worker(jobs <-chan string, results chan<- DataFrame, resultsNames chan<- string, filePath string) {
 198 | 	for n := range jobs {
 199 | 		df := CreateDataFrame(filePath, n)
 200 | 		results <- df
 201 | 		resultsNames <- n
 202 | 	}
 203 | }
 204 | 
 205 | // Concurrently loads multiple csv files into DataFrames within the same directory.
 206 | // Returns a slice with the DataFrames in the same order as provided in the files parameter.
 207 | func LoadFrames(filePath string, files []string) ([]DataFrame, error) {
 208 | 	numJobs := len(files)
 209 | 
 210 | 	if numJobs <= 1 {
 211 | 		return nil, errors.New("LoadFrames requires at least two files")
 212 | 	}
 213 | 
 214 | 	jobs := make(chan string, numJobs)
 215 | 	results := make(chan DataFrame, numJobs)
 216 | 	resultsNames := make(chan string, numJobs)
 217 | 
 218 | 	// Generate workers
 219 | 	for i := 0; i < 4; i++ {
 220 | 		go worker(jobs, results, resultsNames, filePath)
 221 | 	}
 222 | 
 223 | 	// Load up the jobs channel
 224 | 	for i := 0; i < numJobs; i++ {
 225 | 		jobs <- files[i]
 226 | 	}
 227 | 	close(jobs) // Close jobs channel once loaded
 228 | 
 229 | 	// Map to store results
 230 | 	jobResults := make(map[string]DataFrame)
 231 | 
 232 | 	// Collect results and store in map
 233 | 	for i := 1; i <= numJobs; i++ {
 234 | 		jobResults[<-resultsNames] = <-results
 235 | 	}
 236 | 
 237 | 	var orderedResults []DataFrame
 238 | 	for _, f := range files {
 239 | 		val, ok := jobResults[f]
 240 | 		if !ok {
 241 | 			return []DataFrame{}, errors.New("error occurred while looking up returned DataFrame in the LoadFrames function")
 242 | 		}
 243 | 		orderedResults = append(orderedResults, val)
 244 | 	}
 245 | 	return orderedResults, nil
 246 | }
 247 | 
 248 | // Calculates number of records to include in each subframe.
 249 | func getRowsPerSubframe(rowCount, requestedSubFrames int) (int, error) {
 250 | 	if requestedSubFrames == 0 {
 251 | 		return 0, errors.New("requested Sub Frames in DivideAndConquer cannot be zero")
 252 | 	}
 253 | 	if requestedSubFrames > rowCount {
 254 | 		return 0, errors.New("requested Sub Frames in DivideAndConquer cannot be greater than size of dataframe")
 255 | 	}
 256 | 	if rowCount == 0 {
 257 | 		return 0, errors.New("empty dataframe")
 258 | 	}
 259 | 	return rowCount / requestedSubFrames, nil
 260 | }
 261 | 
 262 | // Breaks down a DataFrame into smaller sub-frames to process data concurrently.
 263 | // RequestedSubFrame parameter provided by the user are the number of subframes they would like returned.
 264 | func (frame DataFrame) DivideAndConquer(requestedSubFrames int) ([]DataFrame, error) {
 265 | 	frameSize := frame.CountRecords()
 266 | 
 267 | 	rowsPerSubframe, err := getRowsPerSubframe(frameSize, requestedSubFrames)
 268 | 	if err != nil {
 269 | 		return []DataFrame{}, err
 270 | 	}
 271 | 
 272 | 	pos := 0
 273 | 	var frames []DataFrame
 274 | 
 275 | 	// Process each subframe.
 276 | 	for requestedSubFrames > 0 {
 277 | 		dfNew := CreateNewDataFrame(frame.Columns())
 278 | 
 279 | 		// When on last subframe.
 280 | 		if requestedSubFrames == 1 {
 281 | 			for pos < frameSize {
 282 | 				dfNew = dfNew.AddRecord(frame.FrameRecords[pos].Data)
 283 | 				pos++
 284 | 			}
 285 | 		} else {
 286 | 			for i := 0; i < rowsPerSubframe; i++ {
 287 | 				dfNew = dfNew.AddRecord(frame.FrameRecords[pos].Data)
 288 | 				pos++
 289 | 			}
 290 | 		}
 291 | 		frames = append(frames, dfNew)
 292 | 		requestedSubFrames--
 293 | 	}
 294 | 
 295 | 	return frames, nil
 296 | }
 297 | 
 298 | func (frame DataFrame) BulkUploadMySql(db *sql.DB, rowsPerBatch int, tableColumns []string, table string) error {
 299 | 	if db == nil {
 300 | 		return errors.New("bulk upload error: database nil pointer")
 301 | 	}
 302 | 	if rowsPerBatch < 1 {
 303 | 		rowsPerBatch = 1000
 304 | 	}
 305 | 	if len(tableColumns) == 0 {
 306 | 		return errors.New("bulk upload error: must provide columns")
 307 | 	}
 308 | 	if len(table) == 0 {
 309 | 		return errors.New("bulk upload error: must provide a table name")
 310 | 	}
 311 | 
 312 | 	frameColumns := frame.Columns()
 313 | 
 314 | 	if len(tableColumns) != len(frameColumns) {
 315 | 		return errors.New("bulk upload error: the provided columns do not match dataframe")
 316 | 	}
 317 | 
 318 | 	var cnt int
 319 | 	bulkData := [][]interface{}{}
 320 | 	bar := progressbar.Default(int64(len(frame.FrameRecords)))
 321 | 
 322 | 	for _, row := range frame.FrameRecords {
 323 | 		data := []interface{}{}
 324 | 		for _, colData := range frameColumns {
 325 | 			data = append(data, row.Val(colData, frame.Headers))
 326 | 		}
 327 | 		bulkData = append(bulkData, data)
 328 | 		bar.Add(1)
 329 | 		cnt++
 330 | 
 331 | 		if cnt == rowsPerBatch {
 332 | 			if err := insertRows(db, bulkData, table, tableColumns); err != nil {
 333 | 				return fmt.Errorf("bulk upload error: inserting records: %v", err)
 334 | 			}
 335 | 			cnt = 0
 336 | 			bulkData = nil
 337 | 		}
 338 | 	}
 339 | 
 340 | 	// Insert remaining rows that did not hit upload threshold.
 341 | 	if len(bulkData) > 0 {
 342 | 		if err := insertRows(db, bulkData, table, tableColumns); err != nil {
 343 | 			return fmt.Errorf("bulk upload error: inserting records: %v", err)
 344 | 		}
 345 | 	}
 346 | 
 347 | 	return nil
 348 | }
 349 | 
 350 | // Bulk insert rows into a specified table.
 351 | func insertRows(db *sql.DB, bulkData [][]interface{}, table string, columns []string) error {
 352 | 	sqlStr := "INSERT INTO `" + table + "`("
 353 | 
 354 | 	// Add all columns to the SQL statement.
 355 | 	for _, col := range columns {
 356 | 		sqlStr += "`" + col + "`,"
 357 | 	}
 358 | 	// Trim the end to remove comma and add additional SQL.
 359 | 	sqlStr = sqlStr[0:len(sqlStr)-1] + ") VALUES "
 360 | 
 361 | 	vals := []interface{}{}
 362 | 
 363 | 	for _, data := range bulkData {
 364 | 		sqlStr += "("
 365 | 
 366 | 		// Add "?," for each of the columns.
 367 | 		for i := 0; i < len(columns); i++ {
 368 | 			sqlStr += "?,"
 369 | 		}
 370 | 		// Trim comma at the end.
 371 | 		sqlStr = sqlStr[0 : len(sqlStr)-1]
 372 | 
 373 | 		sqlStr += "),"
 374 | 		vals = append(vals, data...)
 375 | 	}
 376 | 
 377 | 	// Trim the end to remove comma.
 378 | 	sqlStr = sqlStr[0 : len(sqlStr)-1]
 379 | 
 380 | 	stmt, err := db.Prepare(sqlStr)
 381 | 	if err != nil {
 382 | 		return err
 383 | 	}
 384 | 	_, err = stmt.Exec(vals...)
 385 | 	if err != nil {
 386 | 		return err
 387 | 	}
 388 | 	defer stmt.Close()
 389 | 	return nil
 390 | }
 391 | 
 392 | // User specifies columns they want to keep from a preexisting DataFrame
 393 | func (frame DataFrame) KeepColumns(columns []string) DataFrame {
 394 | 	df := CreateNewDataFrame(columns)
 395 | 
 396 | 	for _, row := range frame.FrameRecords {
 397 | 		var newData []string
 398 | 		for _, column := range columns {
 399 | 			newData = append(newData, row.Val(column, frame.Headers))
 400 | 		}
 401 | 		df = df.AddRecord(newData)
 402 | 	}
 403 | 
 404 | 	return df
 405 | }
 406 | 
 407 | // User specifies columns they want to remove from a preexisting DataFrame
 408 | func (frame DataFrame) RemoveColumns(columns ...string) DataFrame {
 409 | 	approvedColumns := []string{}
 410 | 
 411 | 	for _, col := range frame.Columns() {
 412 | 		if !slices.Contains(columns, col) {
 413 | 			approvedColumns = append(approvedColumns, col)
 414 | 		}
 415 | 	}
 416 | 
 417 | 	return frame.KeepColumns(approvedColumns)
 418 | }
 419 | 
 420 | // Rename a specified column in the DataFrame
 421 | func (frame *DataFrame) Rename(originalColumnName, newColumnName string) error {
 422 | 	columns := []string{}
 423 | 	var columnLocation int
 424 | 
 425 | 	for k, v := range frame.Headers {
 426 | 		columns = append(columns, k)
 427 | 		if k == originalColumnName {
 428 | 			columnLocation = v
 429 | 		}
 430 | 	}
 431 | 
 432 | 	// Check original column name is found in DataFrame
 433 | 	if !slices.Contains(columns, originalColumnName) {
 434 | 		return errors.New("the original column name provided was not found in the DataFrame")
 435 | 	}
 436 | 
 437 | 	// Check new column name does not already exist
 438 | 	if slices.Contains(columns, newColumnName) {
 439 | 		return errors.New("the provided new column name already exists in the DataFrame and is not allowed")
 440 | 	}
 441 | 
 442 | 	// Remove original column name
 443 | 	delete(frame.Headers, originalColumnName)
 444 | 
 445 | 	// Add new column name
 446 | 	frame.Headers[newColumnName] = columnLocation
 447 | 
 448 | 	return nil
 449 | }
 450 | 
 451 | // Add a new record to the DataFrame
 452 | func (frame DataFrame) AddRecord(newData []string) DataFrame {
 453 | 	x := Record{Data: []string{}}
 454 | 	x.Data = append(x.Data, newData...)
 455 | 	frame.FrameRecords = append(frame.FrameRecords, x)
 456 | 	return frame
 457 | }
 458 | 
 459 | // Provides a slice of columns in order
 460 | func (frame DataFrame) Columns() []string {
 461 | 	var columns []string
 462 | 
 463 | 	for i := 0; i < len(frame.Headers); i++ {
 464 | 		for k, v := range frame.Headers {
 465 | 			if v == i {
 466 | 				columns = append(columns, k)
 467 | 			}
 468 | 		}
 469 | 	}
 470 | 	return columns
 471 | }
 472 | 
 473 | // Generates a decoupled copy of an existing DataFrame.
 474 | // Changes made to either the original or new copied frame
 475 | // will not be reflected in the other.
 476 | func (frame DataFrame) Copy() DataFrame {
 477 | 	headers := []string{}
 478 | 
 479 | 	for i := 0; i < len(frame.Headers); i++ {
 480 | 		for k, v := range frame.Headers {
 481 | 			if v == i {
 482 | 				headers = append(headers, k)
 483 | 			}
 484 | 		}
 485 | 	}
 486 | 	df := CreateNewDataFrame(headers)
 487 | 
 488 | 	for i := 0; i < len(frame.FrameRecords); i++ {
 489 | 		df = df.AddRecord(frame.FrameRecords[i].Data)
 490 | 	}
 491 | 	return df
 492 | }
 493 | 
 494 | func (frame DataFrame) NumericColumn(fieldName string) bool {
 495 | 	for _, row := range frame.FrameRecords {
 496 | 		_, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64)
 497 | 		if err != nil {
 498 | 			return false
 499 | 		}
 500 | 	}
 501 | 	return true
 502 | }
 503 | 
 504 | func (frame *DataFrame) Sort(fieldName string, ascending bool) error {
 505 | 	// Ensure provided column exists.
 506 | 	val, ok := frame.Headers[fieldName]
 507 | 	if !ok {
 508 | 		return errors.New("the provided column to sort does not exist")
 509 | 	}
 510 | 
 511 | 	// Converts provided value to float64 if column is numeric.
 512 | 	if frame.NumericColumn(fieldName) {
 513 | 		if ascending {
 514 | 			sort.Slice(frame.FrameRecords, func(i, j int) bool {
 515 | 				iVal, _ := strconv.ParseFloat(frame.FrameRecords[i].Data[val], 64)
 516 | 				jVal, _ := strconv.ParseFloat(frame.FrameRecords[j].Data[val], 64)
 517 | 				return iVal < jVal
 518 | 			})
 519 | 			return nil
 520 | 		}
 521 | 		sort.Slice(frame.FrameRecords, func(i, j int) bool {
 522 | 			iVal, _ := strconv.ParseFloat(frame.FrameRecords[i].Data[val], 64)
 523 | 			jVal, _ := strconv.ParseFloat(frame.FrameRecords[j].Data[val], 64)
 524 | 			return iVal > jVal
 525 | 		})
 526 | 		return nil
 527 | 	}
 528 | 
 529 | 	if ascending {
 530 | 		sort.Slice(frame.FrameRecords, func(i, j int) bool {
 531 | 			return frame.FrameRecords[i].Data[val] < frame.FrameRecords[j].Data[val]
 532 | 		})
 533 | 		return nil
 534 | 	}
 535 | 	sort.Slice(frame.FrameRecords, func(i, j int) bool {
 536 | 		return frame.FrameRecords[i].Data[val] > frame.FrameRecords[j].Data[val]
 537 | 	})
 538 | 	return nil
 539 | }
 540 | 
 541 | // Generates a new filtered DataFrame.
 542 | // New DataFrame will be kept in same order as original.
 543 | func (frame DataFrame) Filtered(fieldName string, value ...string) DataFrame {
 544 | 	headers := []string{}
 545 | 
 546 | 	for i := 0; i < len(frame.Headers); i++ {
 547 | 		for k, v := range frame.Headers {
 548 | 			if v == i {
 549 | 				headers = append(headers, k)
 550 | 			}
 551 | 		}
 552 | 	}
 553 | 	newFrame := CreateNewDataFrame(headers)
 554 | 
 555 | 	for i := 0; i < len(frame.FrameRecords); i++ {
 556 | 		if slices.Contains(value, frame.FrameRecords[i].Data[frame.Headers[fieldName]]) {
 557 | 			newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data)
 558 | 		}
 559 | 	}
 560 | 
 561 | 	return newFrame
 562 | }
 563 | 
 564 | // Generated a new filtered DataFrame that in which a numerical column is either greater than or equal to
 565 | // a provided numerical value.
 566 | func (frame DataFrame) GreaterThanOrEqualTo(fieldName string, value float64) (DataFrame, error) {
 567 | 	headers := []string{}
 568 | 
 569 | 	for i := 0; i < len(frame.Headers); i++ {
 570 | 		for k, v := range frame.Headers {
 571 | 			if v == i {
 572 | 				headers = append(headers, k)
 573 | 			}
 574 | 		}
 575 | 	}
 576 | 	newFrame := CreateNewDataFrame(headers)
 577 | 
 578 | 	for i, row := range frame.FrameRecords {
 579 | 		valString := row.Val(fieldName, frame.Headers)
 580 | 
 581 | 		val, err := strconv.ParseFloat(valString, 64)
 582 | 		if err != nil {
 583 | 			return CreateNewDataFrame([]string{}), err
 584 | 		}
 585 | 
 586 | 		if val >= value {
 587 | 			newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data)
 588 | 		}
 589 | 	}
 590 | 	return newFrame, nil
 591 | }
 592 | 
 593 | // Generated a new filtered DataFrame that in which a numerical column is either less than or equal to
 594 | // a provided numerical value.
 595 | func (frame DataFrame) LessThanOrEqualTo(fieldName string, value float64) (DataFrame, error) {
 596 | 	headers := []string{}
 597 | 
 598 | 	for i := 0; i < len(frame.Headers); i++ {
 599 | 		for k, v := range frame.Headers {
 600 | 			if v == i {
 601 | 				headers = append(headers, k)
 602 | 			}
 603 | 		}
 604 | 	}
 605 | 	newFrame := CreateNewDataFrame(headers)
 606 | 
 607 | 	for i, row := range frame.FrameRecords {
 608 | 		valString := row.Val(fieldName, frame.Headers)
 609 | 
 610 | 		val, err := strconv.ParseFloat(valString, 64)
 611 | 		if err != nil {
 612 | 			return CreateNewDataFrame([]string{}), err
 613 | 		}
 614 | 
 615 | 		if val <= value {
 616 | 			newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data)
 617 | 		}
 618 | 	}
 619 | 	return newFrame, nil
 620 | }
 621 | 
 622 | // Generates a new DataFrame that excludes specified instances.
 623 | // New DataFrame will be kept in same order as original.
 624 | func (frame DataFrame) Exclude(fieldName string, value ...string) DataFrame {
 625 | 	headers := []string{}
 626 | 
 627 | 	for i := 0; i < len(frame.Headers); i++ {
 628 | 		for k, v := range frame.Headers {
 629 | 			if v == i {
 630 | 				headers = append(headers, k)
 631 | 			}
 632 | 		}
 633 | 	}
 634 | 	newFrame := CreateNewDataFrame(headers)
 635 | 
 636 | 	for i := 0; i < len(frame.FrameRecords); i++ {
 637 | 		if !slices.Contains(value, frame.FrameRecords[i].Data[frame.Headers[fieldName]]) {
 638 | 			newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data)
 639 | 		}
 640 | 	}
 641 | 
 642 | 	return newFrame
 643 | }
 644 | 
 645 | // Generates a new filtered DataFrame with all records occuring after a specified date provided by the user.
 646 | // User must provide the date field as well as the desired date.
 647 | // Instances where record dates occur on the same date provided by the user will not be included.
 648 | // Records must occur after the specified date.
 649 | func (frame DataFrame) FilteredAfter(fieldName, desiredDate string) DataFrame {
 650 | 	headers := []string{}
 651 | 
 652 | 	for i := 0; i < len(frame.Headers); i++ {
 653 | 		for k, v := range frame.Headers {
 654 | 			if v == i {
 655 | 				headers = append(headers, k)
 656 | 			}
 657 | 		}
 658 | 	}
 659 | 	newFrame := CreateNewDataFrame(headers)
 660 | 
 661 | 	for i := 0; i < len(frame.FrameRecords); i++ {
 662 | 		recordDate := dateConverter(frame.FrameRecords[i].Data[frame.Headers[fieldName]])
 663 | 		isAfter := recordDate.After(dateConverter(desiredDate))
 664 | 
 665 | 		if isAfter {
 666 | 			newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data)
 667 | 		}
 668 | 	}
 669 | 	return newFrame
 670 | }
 671 | 
 672 | // Generates a new filtered DataFrame with all records occuring before a specified date provided by the user.
 673 | // User must provide the date field as well as the desired date.
 674 | // Instances where record dates occur on the same date provided by the user will not be included. Records must occur
 675 | // before the specified date.
 676 | func (frame DataFrame) FilteredBefore(fieldName, desiredDate string) DataFrame {
 677 | 	headers := []string{}
 678 | 
 679 | 	for i := 0; i < len(frame.Headers); i++ {
 680 | 		for k, v := range frame.Headers {
 681 | 			if v == i {
 682 | 				headers = append(headers, k)
 683 | 			}
 684 | 		}
 685 | 	}
 686 | 	newFrame := CreateNewDataFrame(headers)
 687 | 
 688 | 	for i := 0; i < len(frame.FrameRecords); i++ {
 689 | 		recordDate := dateConverter(frame.FrameRecords[i].Data[frame.Headers[fieldName]])
 690 | 		isBefore := recordDate.Before(dateConverter(desiredDate))
 691 | 
 692 | 		if isBefore {
 693 | 			newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data)
 694 | 		}
 695 | 	}
 696 | 
 697 | 	return newFrame
 698 | }
 699 | 
 700 | // Generates a new filtered DataFrame with all records occuring between a specified date range provided by the user.
 701 | // User must provide the date field as well as the desired date.
 702 | // Instances where record dates occur on the same date provided by the user will not be included. Records must occur
 703 | // between the specified start and end dates.
 704 | func (frame DataFrame) FilteredBetween(fieldName, startDate, endDate string) DataFrame {
 705 | 	headers := []string{}
 706 | 
 707 | 	for i := 0; i < len(frame.Headers); i++ {
 708 | 		for k, v := range frame.Headers {
 709 | 			if v == i {
 710 | 				headers = append(headers, k)
 711 | 			}
 712 | 		}
 713 | 	}
 714 | 	newFrame := CreateNewDataFrame(headers)
 715 | 
 716 | 	for i := 0; i < len(frame.FrameRecords); i++ {
 717 | 		recordDate := dateConverter(frame.FrameRecords[i].Data[frame.Headers[fieldName]])
 718 | 		isAfter := recordDate.After(dateConverter(startDate))
 719 | 		isBefore := recordDate.Before(dateConverter(endDate))
 720 | 
 721 | 		if isAfter && isBefore {
 722 | 			newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data)
 723 | 		}
 724 | 	}
 725 | 
 726 | 	return newFrame
 727 | }
 728 | 
 729 | // Creates a new field and assigns and empty string.
 730 | func (frame *DataFrame) NewField(fieldName string) {
 731 | 	for i, _ := range frame.FrameRecords {
 732 | 		frame.FrameRecords[i].Data = append(frame.FrameRecords[i].Data, "")
 733 | 	}
 734 | 	frame.Headers[fieldName] = len(frame.Headers)
 735 | }
 736 | 
 737 | // Return a slice of all unique values found in a specified field.
 738 | func (frame *DataFrame) Unique(fieldName string) []string {
 739 | 	var results []string
 740 | 
 741 | 	for _, row := range frame.FrameRecords {
 742 | 		if !slices.Contains(results, row.Val(fieldName, frame.Headers)) {
 743 | 			results = append(results, row.Val(fieldName, frame.Headers))
 744 | 		}
 745 | 	}
 746 | 	return results
 747 | }
 748 | 
 749 | // Stack two DataFrames with matching headers.
 750 | func (frame DataFrame) ConcatFrames(dfNew *DataFrame) (DataFrame, error) {
 751 | 	if dfNew == nil {
 752 | 		return frame, errors.New("nil pointer found in ConcatFrames method")
 753 | 	}
 754 | 
 755 | 	// Check number of columns in each frame match.
 756 | 	if len(frame.Headers) != len(dfNew.Headers) {
 757 | 		return frame, errors.New("cannot ConcatFrames as columns do not match")
 758 | 	}
 759 | 
 760 | 	// Check columns in both frames are in the same order.
 761 | 	originalFrame := []string{}
 762 | 	for i := 0; i <= len(frame.Headers); i++ {
 763 | 		for k, v := range frame.Headers {
 764 | 			if v == i {
 765 | 				originalFrame = append(originalFrame, k)
 766 | 			}
 767 | 		}
 768 | 	}
 769 | 
 770 | 	newFrame := []string{}
 771 | 	for i := 0; i <= len(dfNew.Headers); i++ {
 772 | 		for k, v := range dfNew.Headers {
 773 | 			if v == i {
 774 | 				newFrame = append(newFrame, k)
 775 | 			}
 776 | 		}
 777 | 	}
 778 | 
 779 | 	for i, each := range originalFrame {
 780 | 		if each != newFrame[i] {
 781 | 			return frame, errors.New("cannot ConcatFrames as columns are not in the same order")
 782 | 		}
 783 | 	}
 784 | 
 785 | 	// Iterate over new dataframe in order
 786 | 	for i := 0; i < len(dfNew.FrameRecords); i++ {
 787 | 		frame.FrameRecords = append(frame.FrameRecords, dfNew.FrameRecords[i])
 788 | 	}
 789 | 	return frame, nil
 790 | }
 791 | 
 792 | // Import all columns from right frame into left frame if no columns
 793 | // are provided by the user. Process must be done so in order.
 794 | func (frame DataFrame) Merge(dfRight *DataFrame, primaryKey string, columns ...string) error {
 795 | 	if dfRight == nil {
 796 | 		return errors.New("nil pointer found in Merge method")
 797 | 	}
 798 | 
 799 | 	if len(columns) == 0 {
 800 | 		for i := 0; i < len(dfRight.Headers); i++ {
 801 | 			for k, v := range dfRight.Headers {
 802 | 				if v == i {
 803 | 					columns = append(columns, k)
 804 | 				}
 805 | 			}
 806 | 		}
 807 | 	} else {
 808 | 		// Ensure columns user provided are all found in right frame.
 809 | 		for _, col := range columns {
 810 | 			colStatus := false
 811 | 			for k, _ := range dfRight.Headers {
 812 | 				if col == k {
 813 | 					colStatus = true
 814 | 				}
 815 | 			}
 816 | 			// Ensure there are no duplicated columns other than the primary key.
 817 | 			if !colStatus {
 818 | 				return errors.New("merge Error: User provided column not found in right dataframe")
 819 | 			}
 820 | 		}
 821 | 	}
 822 | 
 823 | 	// Check that no columns are duplicated between the two frames (other than primaryKey).
 824 | 	for _, col := range columns {
 825 | 		for k, _ := range frame.Headers {
 826 | 			if col == k && col != primaryKey {
 827 | 				return errors.New("the following column is duplicated in both frames and is not the specified primary key which is not allowed: " + col)
 828 | 			}
 829 | 		}
 830 | 	}
 831 | 
 832 | 	// Load map indicating the location of each lookup value in right frame.
 833 | 	lookup := make(map[string]int)
 834 | 	for i, row := range dfRight.FrameRecords {
 835 | 		lookup[row.Val(primaryKey, dfRight.Headers)] = i
 836 | 	}
 837 | 
 838 | 	// Create new columns in left frame.
 839 | 	for _, col := range columns {
 840 | 		if col != primaryKey {
 841 | 			frame.NewField(col)
 842 | 		}
 843 | 	}
 844 | 
 845 | 	// Iterate over left frame and add new data.
 846 | 	for _, row := range frame.FrameRecords {
 847 | 		lookupVal := row.Val(primaryKey, frame.Headers)
 848 | 
 849 | 		if val, ok := lookup[lookupVal]; ok {
 850 | 			for _, col := range columns {
 851 | 				if col != primaryKey {
 852 | 					valToAdd := dfRight.FrameRecords[val].Data[dfRight.Headers[col]]
 853 | 					row.Update(col, valToAdd, frame.Headers)
 854 | 				}
 855 | 			}
 856 | 		}
 857 | 	}
 858 | 	return nil
 859 | }
 860 | 
 861 | // Performs an inner merge where all columns are consolidated between the two frames but only for records
 862 | // where the specified primary key is found in both frames.
 863 | func (frame DataFrame) InnerMerge(dfRight *DataFrame, primaryKey string) (DataFrame, error) {
 864 | 	if dfRight == nil {
 865 | 		return frame, errors.New("nil pointer found in InnerMerge method")
 866 | 	}
 867 | 
 868 | 	var rightFrameColumns []string
 869 | 
 870 | 	for i := 0; i < len(dfRight.Headers); i++ {
 871 | 		for k, v := range dfRight.Headers {
 872 | 			if v == i {
 873 | 				rightFrameColumns = append(rightFrameColumns, k)
 874 | 			}
 875 | 		}
 876 | 	}
 877 | 
 878 | 	var leftFrameColumns []string
 879 | 
 880 | 	for i := 0; i < len(frame.Headers); i++ {
 881 | 		for k, v := range frame.Headers {
 882 | 			if v == i {
 883 | 				leftFrameColumns = append(leftFrameColumns, k)
 884 | 			}
 885 | 		}
 886 | 	}
 887 | 
 888 | 	// Ensure the specified primary key is found in both frames.
 889 | 	var lStatus bool
 890 | 	var rStatus bool
 891 | 
 892 | 	for _, col := range leftFrameColumns {
 893 | 		if col == primaryKey {
 894 | 			lStatus = true
 895 | 		}
 896 | 	}
 897 | 
 898 | 	for _, col := range rightFrameColumns {
 899 | 		if col == primaryKey {
 900 | 			rStatus = true
 901 | 		}
 902 | 	}
 903 | 
 904 | 	if !lStatus || !rStatus {
 905 | 		return frame, errors.New("the specified primary key was not found in both DataFrames")
 906 | 	}
 907 | 
 908 | 	// Find position of primary key column in right frame.
 909 | 	var rightFramePrimaryKeyPosition int
 910 | 	for i, col := range rightFrameColumns {
 911 | 		if col == primaryKey {
 912 | 			rightFramePrimaryKeyPosition = i
 913 | 		}
 914 | 	}
 915 | 
 916 | 	// Check that no columns are duplicated between the two frames (other than primaryKey).
 917 | 	for _, col := range rightFrameColumns {
 918 | 		for k, _ := range frame.Headers {
 919 | 			if col == k && col != primaryKey {
 920 | 				return frame, errors.New("the following column is duplicated in both frames and is not the specified primary key which is not allowed: " + col)
 921 | 			}
 922 | 		}
 923 | 	}
 924 | 
 925 | 	// Load map indicating the location of each lookup value in right frame.
 926 | 	rLookup := make(map[string]int)
 927 | 	for i, row := range dfRight.FrameRecords {
 928 | 		// Only add if key hasn't already been added. This ensures the first record found in the right
 929 | 		// frame is what is used instead of the last if duplicates are found.
 930 | 		currentKey := row.Val(primaryKey, dfRight.Headers)
 931 | 		_, ok := rLookup[currentKey]
 932 | 		if !ok {
 933 | 			rLookup[currentKey] = i
 934 | 		}
 935 | 	}
 936 | 
 937 | 	// New DataFrame to house records found in both frames.
 938 | 	dfNew := CreateNewDataFrame(leftFrameColumns)
 939 | 
 940 | 	// Add right frame columns to new DataFrame.
 941 | 	for i, col := range rightFrameColumns {
 942 | 		// Skip over primary key column in right frame as it was already included in the left frame.
 943 | 		if i != rightFramePrimaryKeyPosition {
 944 | 			dfNew.NewField(col)
 945 | 		}
 946 | 	}
 947 | 
 948 | 	var approvedPrimaryKeys []string
 949 | 
 950 | 	// Create slice of specified ID's found in both frames.
 951 | 	for _, lRow := range frame.FrameRecords {
 952 | 		currentKey := lRow.Val(primaryKey, frame.Headers)
 953 | 
 954 | 		// Skip blank values as they are not allowed.
 955 | 		if len(currentKey) == 0 || strings.ToLower(currentKey) == "nan" || strings.ToLower(currentKey) == "null" {
 956 | 			continue
 957 | 		}
 958 | 
 959 | 		for _, rRow := range dfRight.FrameRecords {
 960 | 			currentRightFrameKey := rRow.Val(primaryKey, dfRight.Headers)
 961 | 			// Add primary key to approved list if found in right frame.
 962 | 			if currentRightFrameKey == currentKey {
 963 | 				approvedPrimaryKeys = append(approvedPrimaryKeys, currentKey)
 964 | 			}
 965 | 		}
 966 | 	}
 967 | 
 968 | 	// Add approved records to new DataFrame.
 969 | 	for i, row := range frame.FrameRecords {
 970 | 		currentKey := row.Val(primaryKey, frame.Headers)
 971 | 		if slices.Contains(approvedPrimaryKeys, currentKey) {
 972 | 			lData := frame.FrameRecords[i].Data
 973 | 			rData := dfRight.FrameRecords[rLookup[currentKey]].Data
 974 | 
 975 | 			// Add left frame data to variable.
 976 | 			var data []string
 977 | 			data = append(data, lData...)
 978 | 
 979 | 			// Add all right frame data while skipping over the primary key column.
 980 | 			// The primary key column is skipped as it has already been added from the left frame.
 981 | 			for i, d := range rData {
 982 | 				if i != rightFramePrimaryKeyPosition {
 983 | 					data = append(data, d)
 984 | 				}
 985 | 			}
 986 | 
 987 | 			dfNew = dfNew.AddRecord(data)
 988 | 		}
 989 | 	}
 990 | 	return dfNew, nil
 991 | }
 992 | 
 993 | func (frame *DataFrame) CountRecords() int {
 994 | 	return len(frame.FrameRecords)
 995 | }
 996 | 
 997 | // Return a sum of float64 type of a numerical field.
 998 | func (frame *DataFrame) Sum(fieldName string) float64 {
 999 | 	var sum float64
1000 | 
1001 | 	for _, row := range frame.FrameRecords {
1002 | 		val, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64)
1003 | 		if err != nil {
1004 | 			log.Fatalf("could not convert string to float during sum: %v", err)
1005 | 		}
1006 | 		sum += val
1007 | 	}
1008 | 	return sum
1009 | }
1010 | 
1011 | // Return an average of type float64 of a numerical field.
1012 | func (frame *DataFrame) Average(fieldName string) float64 {
1013 | 	sum := frame.Sum(fieldName)
1014 | 	count := frame.CountRecords()
1015 | 
1016 | 	if count == 0 {
1017 | 		return 0.0
1018 | 	}
1019 | 	return sum / float64(count)
1020 | }
1021 | 
1022 | // Return the maximum value in a numerical field.
1023 | func (frame *DataFrame) Max(fieldName string) float64 {
1024 | 	maximum := 0.0
1025 | 	for i, row := range frame.FrameRecords {
1026 | 		// Set the max to the first value in dataframe.
1027 | 		if i == 0 {
1028 | 			initialMax, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64)
1029 | 			if err != nil {
1030 | 				log.Fatalf("could not convert string to float during sum: %v", err)
1031 | 			}
1032 | 			maximum = initialMax
1033 | 		}
1034 | 		val, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64)
1035 | 		if err != nil {
1036 | 			log.Fatalf("could not convert string to float during sum: %v", err)
1037 | 		}
1038 | 
1039 | 		if val > maximum {
1040 | 			maximum = val
1041 | 		}
1042 | 	}
1043 | 	return maximum
1044 | }
1045 | 
1046 | // Return the minimum value in a numerical field.
1047 | func (frame *DataFrame) Min(fieldName string) float64 {
1048 | 	min := 0.0
1049 | 	for i, row := range frame.FrameRecords {
1050 | 		// Set the max to the first value in dataframe.
1051 | 		if i == 0 {
1052 | 			initialMin, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64)
1053 | 			if err != nil {
1054 | 				log.Fatalf("could not convert string to float during sum: %v", err)
1055 | 			}
1056 | 			min = initialMin
1057 | 		}
1058 | 		val, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64)
1059 | 		if err != nil {
1060 | 			log.Fatalf("could not convert string to float during sum: %v", err)
1061 | 		}
1062 | 
1063 | 		if val < min {
1064 | 			min = val
1065 | 		}
1066 | 	}
1067 | 	return min
1068 | }
1069 | 
1070 | func standardDeviation(num []float64) float64 {
1071 | 	l := float64(len(num))
1072 | 	sum := 0.0
1073 | 	var sd float64
1074 | 
1075 | 	for _, n := range num {
1076 | 		sum += n
1077 | 	}
1078 | 
1079 | 	mean := sum / l
1080 | 
1081 | 	for j := 0; j < int(l); j++ {
1082 | 		// The use of Pow math function func Pow(x, y float64) float64
1083 | 		sd += math.Pow(num[j]-mean, 2)
1084 | 	}
1085 | 	// The use of Sqrt math function func Sqrt(x float64) float64
1086 | 	sd = math.Sqrt(sd / l)
1087 | 
1088 | 	return sd
1089 | }
1090 | 
1091 | // Return the standard deviation of a numerical field.
1092 | func (frame *DataFrame) StandardDeviation(fieldName string) (float64, error) {
1093 | 	var nums []float64
1094 | 
1095 | 	for _, row := range frame.FrameRecords {
1096 | 		num, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64)
1097 | 		if err != nil {
1098 | 			return 0.0, errors.New("could not convert string to number in specified column to calculate standard deviation")
1099 | 		}
1100 | 		nums = append(nums, num)
1101 | 	}
1102 | 	return standardDeviation(nums), nil
1103 | }
1104 | 
1105 | func (frame *DataFrame) SaveDataFrame(path, fileName string) bool {
1106 | 	if !strings.Contains(fileName, ".csv") && !strings.Contains(fileName, ".CSV") {
1107 | 		fileName = fileName + ".csv"
1108 | 	}
1109 | 
1110 | 	// Create the csv file
1111 | 	csvFile, err := os.Create(filepath.Join(path, fileName))
1112 | 	if err != nil {
1113 | 		log.Fatalf("error creating the blank csv file to save the data: %v", err)
1114 | 	}
1115 | 	defer csvFile.Close()
1116 | 
1117 | 	w := csv.NewWriter(csvFile)
1118 | 	defer w.Flush()
1119 | 
1120 | 	var data [][]string
1121 | 	var row []string
1122 | 	columnLength := len(frame.Headers)
1123 | 
1124 | 	// Write headers to top of file
1125 | 	for i := 0; i < columnLength; i++ {
1126 | 		for k, v := range frame.Headers {
1127 | 			if v == i {
1128 | 				row = append(row, k)
1129 | 			}
1130 | 		}
1131 | 	}
1132 | 	data = append(data, row)
1133 | 
1134 | 	// Add Data
1135 | 	for i := 0; i < len(frame.FrameRecords); i++ {
1136 | 		var row []string
1137 | 		for pos := 0; pos < columnLength; pos++ {
1138 | 			row = append(row, frame.FrameRecords[i].Data[pos])
1139 | 		}
1140 | 		data = append(data, row)
1141 | 	}
1142 | 
1143 | 	w.WriteAll(data)
1144 | 
1145 | 	return true
1146 | }
1147 | 
1148 | // Return the value of the specified field.
1149 | func (x Record) Val(fieldName string, headers map[string]int) string {
1150 | 	if _, ok := headers[fieldName]; !ok {
1151 | 		panic(fmt.Errorf("the provided field %s is not a valid field in the dataframe", fieldName))
1152 | 	}
1153 | 	return x.Data[headers[fieldName]]
1154 | }
1155 | 
1156 | // Update the value in a specified field.
1157 | func (x Record) Update(fieldName, value string, headers map[string]int) {
1158 | 	if _, ok := headers[fieldName]; !ok {
1159 | 		panic(fmt.Errorf("the provided field %s is not a valid field in the dataframe", fieldName))
1160 | 	}
1161 | 	x.Data[headers[fieldName]] = value
1162 | }
1163 | 
1164 | // Converts the value from a string to float64.
1165 | func (x Record) ConvertToFloat(fieldName string, headers map[string]int) float64 {
1166 | 	value, err := strconv.ParseFloat(x.Val(fieldName, headers), 64)
1167 | 	if err != nil {
1168 | 		log.Fatalf("could not convert to float64: %v", err)
1169 | 	}
1170 | 	return value
1171 | }
1172 | 
1173 | // Converts the value from a string to int64.
1174 | func (x Record) ConvertToInt(fieldName string, headers map[string]int) int64 {
1175 | 	value, err := strconv.ParseInt(x.Val(fieldName, headers), 0, 64)
1176 | 	if err != nil {
1177 | 		log.Fatalf("could not convert to int64: %v", err)
1178 | 	}
1179 | 	return value
1180 | }
1181 | 
1182 | // Converts various date strings into time.Time
1183 | func dateConverter(dateString string) time.Time {
1184 | 	// Convert date if not in 2006-01-02 format
1185 | 	if strings.Contains(dateString, "/") {
1186 | 		dateSlice := strings.Split(dateString, "/")
1187 | 
1188 | 		if len(dateSlice[0]) != 2 {
1189 | 			dateSlice[0] = "0" + dateSlice[0]
1190 | 		}
1191 | 		if len(dateSlice[1]) != 2 {
1192 | 			dateSlice[1] = "0" + dateSlice[1]
1193 | 		}
1194 | 		if len(dateSlice[2]) == 2 {
1195 | 			dateSlice[2] = "20" + dateSlice[2]
1196 | 		}
1197 | 		dateString = dateSlice[2] + "-" + dateSlice[0] + "-" + dateSlice[1]
1198 | 	}
1199 | 
1200 | 	value, err := time.Parse("2006-01-02", dateString)
1201 | 	if err != nil {
1202 | 		log.Fatalf("could not convert to time.Time: %v", err)
1203 | 	}
1204 | 	return value
1205 | }
1206 | 
1207 | // Converts date from specified field to time.Time
1208 | func (x Record) ConvertToDate(fieldName string, headers map[string]int) time.Time {
1209 | 	result := dateConverter(x.Val(fieldName, headers))
1210 | 	return result
1211 | }
1212 | 


--------------------------------------------------------------------------------
/main_test.go:
--------------------------------------------------------------------------------
   1 | package dataframe
   2 | 
   3 | import (
   4 | 	"fmt"
   5 | 	"log"
   6 | 	"math"
   7 | 	"math/rand"
   8 | 	"strconv"
   9 | 	"testing"
  10 | 	"time"
  11 | )
  12 | 
  13 | func TestStream(t *testing.T) {
  14 | 	firstNameAnswers := []string{"Kevin", "Beth", "Avery", "Peter", "Andy", "Nick", "Bryan", "Brian", "Eric", "Carl"}
  15 | 	costAnswers := []string{"818", "777", "493", "121", "774", "874", "995", "133", "939", "597"}
  16 | 
  17 | 	path := "./"
  18 | 	c := make(chan StreamingRecord)
  19 | 	go Stream(path, "TestData.csv", c)
  20 | 
  21 | 	i := 0
  22 | 	for row := range c {
  23 | 		if row.Val("First Name") != firstNameAnswers[i] {
  24 | 			t.Error("First name did not match.")
  25 | 		}
  26 | 		if row.Val("Cost") != costAnswers[i] {
  27 | 			t.Error("Cost did not match.")
  28 | 		}
  29 | 		i++
  30 | 	}
  31 | }
  32 | 
  33 | func TestStreamConvertToInt(t *testing.T) {
  34 | 	costAnswers := []int64{818, 777, 493, 121, 774, 874, 995, 133, 939, 597}
  35 | 
  36 | 	path := "./"
  37 | 	c := make(chan StreamingRecord)
  38 | 	go Stream(path, "TestData.csv", c)
  39 | 
  40 | 	i := 0
  41 | 	for row := range c {
  42 | 		val := row.ConvertToInt("Cost")
  43 | 		if val != costAnswers[i] {
  44 | 			t.Error("Could not convert to int64.")
  45 | 		}
  46 | 		i++
  47 | 	}
  48 | }
  49 | 
  50 | func TestStreamConvertToFloat(t *testing.T) {
  51 | 	costAnswers := []float64{818.0, 777.0, 493.0, 121.0, 774.0, 874.0, 995.0, 133.0, 939.0, 597.0}
  52 | 
  53 | 	path := "./"
  54 | 	c := make(chan StreamingRecord)
  55 | 	go Stream(path, "TestData.csv", c)
  56 | 
  57 | 	i := 0
  58 | 	for row := range c {
  59 | 		val := row.ConvertToFloat("Cost")
  60 | 		if val != costAnswers[i] {
  61 | 			t.Error("Could not convert to float64.")
  62 | 		}
  63 | 		i++
  64 | 	}
  65 | }
  66 | 
  67 | func TestDynamicMetrics(t *testing.T) {
  68 | 	// Create DataFrame
  69 | 	columns := []string{"Value"}
  70 | 	df := CreateNewDataFrame(columns)
  71 | 
  72 | 	sum := 0.0
  73 | 	min := 1
  74 | 	max := 100
  75 | 	recordedMax := 0.0
  76 | 	recordedMin := float64(max) + 1.0
  77 | 	totalRecords := 1_000_000
  78 | 
  79 | 	for i := 0; i < totalRecords; i++ {
  80 | 		// Ensures differing values generated on each run.
  81 | 		rand.Seed(time.Now().UnixNano())
  82 | 		v := float64(rand.Intn(max-min)+min) + rand.Float64()
  83 | 		sum = sum + v
  84 | 
  85 | 		// Add data to DataFrame
  86 | 		data := []string{fmt.Sprintf("%f", v)}
  87 | 		df = df.AddRecord(data)
  88 | 
  89 | 		if v > recordedMax {
  90 | 			recordedMax = v
  91 | 		}
  92 | 		if v < recordedMin {
  93 | 			recordedMin = v
  94 | 		}
  95 | 	}
  96 | 
  97 | 	dataFrameValue := df.Sum("Value")
  98 | 	dataFrameAvgValue := math.Round(df.Average("Value")*100) / 100
  99 | 	dataFrameMaxValue := math.Round(df.Max("Value")*100) / 100
 100 | 	dataFrameMinValue := math.Round(df.Min("Value")*100) / 100
 101 | 	avg := math.Round(sum/float64(totalRecords)*100) / 100
 102 | 	recordedMax = math.Round(recordedMax*100) / 100
 103 | 	recordedMin = math.Round(recordedMin*100) / 100
 104 | 
 105 | 	if math.Abs(dataFrameValue-sum) > 0.001 {
 106 | 		t.Error("Dynamic Metrics: sum float failed", dataFrameValue, sum, math.Abs(dataFrameValue-sum))
 107 | 	}
 108 | 	if dataFrameAvgValue != avg {
 109 | 		t.Error("Dynamic Metrics: average float failed", dataFrameAvgValue, avg)
 110 | 	}
 111 | 	if dataFrameMaxValue != recordedMax {
 112 | 		t.Error("Dynamic Metrics: max value error", dataFrameMaxValue, recordedMax)
 113 | 	}
 114 | 	if dataFrameMinValue != recordedMin {
 115 | 		t.Error("Dynamic Metrics: min value error", dataFrameMinValue, recordedMin)
 116 | 	}
 117 | 	if df.CountRecords() != totalRecords {
 118 | 		t.Error("Dynamic Metrics: count records error", df.CountRecords(), totalRecords)
 119 | 	}
 120 | }
 121 | 
 122 | func TestCreateDataFrameCostFloat(t *testing.T) {
 123 | 	path := "./"
 124 | 	df := CreateDataFrame(path, "TestData.csv")
 125 | 	total := 0.0
 126 | 
 127 | 	for _, row := range df.FrameRecords {
 128 | 		total += row.ConvertToFloat("Cost", df.Headers)
 129 | 	}
 130 | 
 131 | 	if total != 6521.0 {
 132 | 		t.Error("Cost sum incorrect.")
 133 | 	}
 134 | }
 135 | 
 136 | func TestCreateDataFrameCostInt(t *testing.T) {
 137 | 	path := "./"
 138 | 	df := CreateDataFrame(path, "TestData.csv")
 139 | 	var total int64
 140 | 
 141 | 	for _, row := range df.FrameRecords {
 142 | 		total += row.ConvertToInt("Cost", df.Headers)
 143 | 	}
 144 | 
 145 | 	if total != 6521 {
 146 | 		t.Error("Cost sum incorrect.")
 147 | 	}
 148 | }
 149 | 
 150 | func TestSum(t *testing.T) {
 151 | 	path := "./"
 152 | 	df := CreateDataFrame(path, "TestData.csv")
 153 | 
 154 | 	if df.Sum("Weight") != 3376.0 || df.Sum("Cost") != 6521.0 {
 155 | 		t.Error("Just sum error...")
 156 | 	}
 157 | }
 158 | 
 159 | func TestAverage(t *testing.T) {
 160 | 	path := "./"
 161 | 	df := CreateDataFrame(path, "TestData.csv")
 162 | 
 163 | 	if df.Average("Weight") != 337.60 || df.Average("Cost") != 652.10 {
 164 | 		t.Error("Not your average error...")
 165 | 	}
 166 | }
 167 | 
 168 | func TestMax(t *testing.T) {
 169 | 	path := "./"
 170 | 	df := CreateDataFrame(path, "TestData.csv")
 171 | 
 172 | 	if df.Max("Weight") != 500.0 || df.Max("Cost") != 995.0 {
 173 | 		t.Error("Error to the max...")
 174 | 	}
 175 | }
 176 | 
 177 | func TestMin(t *testing.T) {
 178 | 	path := "./"
 179 | 	df := CreateDataFrame(path, "TestData.csv")
 180 | 
 181 | 	if df.Min("Weight") != 157.0 || df.Min("Cost") != 121.0 {
 182 | 		t.Error("Error to the min...")
 183 | 	}
 184 | }
 185 | 
 186 | func TestStandardDeviationFunction(t *testing.T) {
 187 | 	nums := []float64{4.27, 23.45, 34.43, 54.76, 65.90, 234.45}
 188 | 	stdev := standardDeviation(nums)
 189 | 	expected := 76.42444976721926
 190 | 	variance := stdev - expected
 191 | 
 192 | 	if stdev != expected {
 193 | 		t.Error(fmt.Printf("Standard Deviation calculation error: Expected: %f Result: %f Variance: %f\n", expected, stdev, variance))
 194 | 	}
 195 | }
 196 | 
 197 | func TestStandardDeviationMethodPass(t *testing.T) {
 198 | 	// Create DataFrame
 199 | 	columns := []string{"ID", "Value"}
 200 | 	df := CreateNewDataFrame(columns)
 201 | 
 202 | 	for i := 0; i < 1000; i++ {
 203 | 		val := strconv.Itoa(i)
 204 | 		df = df.AddRecord([]string{"ID-" + val, val})
 205 | 	}
 206 | 
 207 | 	stdev, err := df.StandardDeviation("Value")
 208 | 	if err != nil {
 209 | 		t.Error("Test should have passed without any string to float conversion errors.")
 210 | 	}
 211 | 
 212 | 	expected := 288.6749902572095
 213 | 	variance := stdev - expected
 214 | 
 215 | 	if stdev != expected {
 216 | 		t.Error(fmt.Printf("Standard Deviation calculation error: Expected: %f Result: %f Variance: %f\n", expected, stdev, variance))
 217 | 	}
 218 | }
 219 | 
 220 | func TestStandardDeviationMethodFail(t *testing.T) {
 221 | 	// Create DataFrame
 222 | 	columns := []string{"ID", "Value"}
 223 | 	df := CreateNewDataFrame(columns)
 224 | 
 225 | 	for i := 0; i < 1000; i++ {
 226 | 		// Insert row with value that cannot be converted to float64.
 227 | 		if i == 500 {
 228 | 			df = df.AddRecord([]string{"ID-" + "500", "5x0x0x"})
 229 | 		}
 230 | 		val := strconv.Itoa(i)
 231 | 		df = df.AddRecord([]string{"ID-" + val, val})
 232 | 	}
 233 | 
 234 | 	_, err := df.StandardDeviation("Value")
 235 | 	if err == nil {
 236 | 		t.Error("Test should have failed.")
 237 | 	}
 238 | }
 239 | 
 240 | func TestFilteredCount(t *testing.T) {
 241 | 	path := "./"
 242 | 	df := CreateDataFrame(path, "TestData.csv")
 243 | 	dfFil := df.Filtered("Last Name", "Fultz", "Wiedmann")
 244 | 
 245 | 	if df.CountRecords() != 10 || dfFil.CountRecords() != 5 {
 246 | 		t.Error("Filtered count incorrect.")
 247 | 	}
 248 | }
 249 | 
 250 | func TestFilteredCheck(t *testing.T) {
 251 | 	path := "./"
 252 | 	df := CreateDataFrame(path, "TestData.csv")
 253 | 	dfFil := df.Filtered("Last Name", "Fultz", "Wiedmann")
 254 | 
 255 | 	for _, row := range dfFil.FrameRecords {
 256 | 		if row.Val("Last Name", dfFil.Headers) != "Fultz" && row.Val("Last Name", dfFil.Headers) != "Wiedmann" {
 257 | 			t.Error("Invalid parameter found in Filtered DataFrame.")
 258 | 		}
 259 | 	}
 260 | }
 261 | 
 262 | // Ensures changes made in the original dataframe are not also made in a filtered dataframe.
 263 | func TestFilteredChangeToOriginal(t *testing.T) {
 264 | 	path := "./"
 265 | 	df := CreateDataFrame(path, "TestData.csv")
 266 | 	dfFil := df.Filtered("Last Name", "Fultz", "Wiedmann")
 267 | 
 268 | 	for _, row := range df.FrameRecords {
 269 | 		if row.Val("ID", df.Headers) == "2" {
 270 | 			row.Update("Last Name", "Bethany", df.Headers)
 271 | 		}
 272 | 		if row.Val("ID", df.Headers) == "5" {
 273 | 			row.Update("Last Name", "Andyanne", df.Headers)
 274 | 		}
 275 | 	}
 276 | 
 277 | 	// Ensure row was actually updated in the original frame.
 278 | 	for _, row := range df.FrameRecords {
 279 | 		if row.Val("ID", df.Headers) == "2" && row.Val("Last Name", df.Headers) != "Bethany" {
 280 | 			t.Error("Row 2 last name not changed in original frame.")
 281 | 		}
 282 | 		if row.Val("ID", df.Headers) == "5" && row.Val("Last Name", df.Headers) != "Andyanne" {
 283 | 			t.Error("Row 5 last name not changed in original frame.")
 284 | 		}
 285 | 	}
 286 | 
 287 | 	// Check rows in filtered dataframe were not also updated.
 288 | 	for _, row := range dfFil.FrameRecords {
 289 | 		if row.Val("ID", df.Headers) == "2" && row.Val("Last Name", df.Headers) != "Fultz" {
 290 | 			t.Error("Row 2 in filtered dataframe was incorrectly updated with original.")
 291 | 		}
 292 | 		if row.Val("ID", df.Headers) == "5" && row.Val("Last Name", df.Headers) != "Wiedmann" {
 293 | 			t.Error("Row 5 in filtered dataframe was incorrectly updated with original.")
 294 | 		}
 295 | 	}
 296 | }
 297 | 
 298 | func TestGreaterThanOrEqualTo(t *testing.T) {
 299 | 	path := "./"
 300 | 	value := float64(597)
 301 | 	df := CreateDataFrame(path, "TestData.csv")
 302 | 	df, err := df.GreaterThanOrEqualTo("Cost", value)
 303 | 	if err != nil {
 304 | 		t.Error("Greater Than Or Equal To: This should not have failed...")
 305 | 	}
 306 | 
 307 | 	if df.CountRecords() != 7 {
 308 | 		t.Error("Greater Than Or Equal To: Record count is not correct.")
 309 | 	}
 310 | 
 311 | 	ids := []string{"1", "2", "5", "6", "7", "9", "10"}
 312 | 	foundIds := df.Unique("ID")
 313 | 
 314 | 	for i, id := range foundIds {
 315 | 		if id != ids[i] {
 316 | 			t.Error("Greater Than Or Equal To: Records do not match.")
 317 | 		}
 318 | 	}
 319 | }
 320 | 
 321 | func TestLessThanOrEqualTo(t *testing.T) {
 322 | 	path := "./"
 323 | 	value := float64(436)
 324 | 	df := CreateDataFrame(path, "TestData.csv")
 325 | 	df, err := df.LessThanOrEqualTo("Weight", value)
 326 | 	if err != nil {
 327 | 		t.Error("Less Than Or Equal To: This should not have failed...")
 328 | 	}
 329 | 
 330 | 	if df.CountRecords() != 7 {
 331 | 		t.Error("Less Than Or Equal To: Record count is not correct.")
 332 | 	}
 333 | 
 334 | 	ids := []string{"1", "2", "4", "5", "6", "8", "9"}
 335 | 	foundIds := df.Unique("ID")
 336 | 
 337 | 	for i, id := range foundIds {
 338 | 		if id != ids[i] {
 339 | 			t.Error("Less Than Or Equal To: Records do not match.")
 340 | 		}
 341 | 	}
 342 | }
 343 | 
 344 | func TestExcludeCount(t *testing.T) {
 345 | 	path := "./"
 346 | 	df := CreateDataFrame(path, "TestData.csv")
 347 | 	dfExcl := df.Exclude("Last Name", "Fultz", "Wiedmann")
 348 | 
 349 | 	if df.CountRecords() != 10 || dfExcl.CountRecords() != 5 {
 350 | 		t.Error("Excluded count is incorrect.")
 351 | 	}
 352 | }
 353 | 
 354 | func TestExcludeCheck(t *testing.T) {
 355 | 	path := "./"
 356 | 	df := CreateDataFrame(path, "TestData.csv")
 357 | 	dfExcl := df.Exclude("Last Name", "Fultz", "Wiedmann")
 358 | 
 359 | 	for _, row := range dfExcl.FrameRecords {
 360 | 		if row.Val("Last Name", dfExcl.Headers) == "Fultz" || row.Val("Last Name", dfExcl.Headers) == "Wiedmann" {
 361 | 			t.Error("Excluded parameter found in DataFrame.")
 362 | 		}
 363 | 	}
 364 | }
 365 | 
 366 | func TestFilteredAfterCount(t *testing.T) {
 367 | 	path := "./"
 368 | 	df := CreateDataFrame(path, "TestData.csv")
 369 | 	dfFil := df.FilteredAfter("Date", "2022-01-08")
 370 | 
 371 | 	if df.CountRecords() != 10 || dfFil.CountRecords() != 2 {
 372 | 		t.Error("Filtered After count incorrect.")
 373 | 	}
 374 | }
 375 | 
 376 | func TestFilteredAfterCountExcelFormat(t *testing.T) {
 377 | 	path := "./"
 378 | 	df := CreateDataFrame(path, "TestDataDateFormat.csv")
 379 | 	dfFil := df.FilteredAfter("Date", "2022-01-08")
 380 | 
 381 | 	if df.CountRecords() != 10 || dfFil.CountRecords() != 2 {
 382 | 		t.Error("Filtered After Excel Format count incorrect.")
 383 | 	}
 384 | }
 385 | 
 386 | func TestFilteredBeforeCount(t *testing.T) {
 387 | 	path := "./"
 388 | 	df := CreateDataFrame(path, "TestData.csv")
 389 | 	dfFil := df.FilteredBefore("Date", "2022-01-08")
 390 | 
 391 | 	if df.CountRecords() != 10 || dfFil.CountRecords() != 7 {
 392 | 		t.Error("Filtered Before count incorrect.")
 393 | 	}
 394 | }
 395 | 
 396 | func TestFilteredBeforeCountExcelFormat(t *testing.T) {
 397 | 	path := "./"
 398 | 	df := CreateDataFrame(path, "TestDataDateFormat.csv")
 399 | 	dfFil := df.FilteredBefore("Date", "2022-01-08")
 400 | 
 401 | 	if df.CountRecords() != 10 || dfFil.CountRecords() != 7 {
 402 | 		t.Error("Filtered Before Excel Format count incorrect.")
 403 | 	}
 404 | }
 405 | 
 406 | func TestFilteredBetweenCount(t *testing.T) {
 407 | 	path := "./"
 408 | 	df := CreateDataFrame(path, "TestData.csv")
 409 | 	dfFil := df.FilteredBetween("Date", "2022-01-02", "2022-01-09")
 410 | 
 411 | 	if df.CountRecords() != 10 || dfFil.CountRecords() != 6 {
 412 | 		t.Error("Filtered Between count incorrect.")
 413 | 	}
 414 | }
 415 | 
 416 | func TestFilteredBetweenExcelFormat(t *testing.T) {
 417 | 	path := "./"
 418 | 	df := CreateDataFrame(path, "TestDataDateFormat.csv")
 419 | 	dfFil := df.FilteredBetween("Date", "2022-01-02", "2022-01-09")
 420 | 
 421 | 	if df.CountRecords() != 10 || dfFil.CountRecords() != 6 {
 422 | 		t.Error("Filtered Between Excel Format count incorrect.")
 423 | 	}
 424 | }
 425 | 
 426 | func TestRecordCheck(t *testing.T) {
 427 | 	path := "./"
 428 | 	df := CreateDataFrame(path, "TestData.csv")
 429 | 
 430 | 	var id string
 431 | 	var date string
 432 | 	var cost string
 433 | 	var weight string
 434 | 	var firstName string
 435 | 	var lastName string
 436 | 
 437 | 	for _, row := range df.FrameRecords {
 438 | 		if row.Val("ID", df.Headers) == "5" {
 439 | 			id = row.Val("ID", df.Headers)
 440 | 			date = row.Val("Date", df.Headers)
 441 | 			cost = row.Val("Cost", df.Headers)
 442 | 			weight = row.Val("Weight", df.Headers)
 443 | 			firstName = row.Val("First Name", df.Headers)
 444 | 			lastName = row.Val("Last Name", df.Headers)
 445 | 		}
 446 | 	}
 447 | 
 448 | 	if id != "5" {
 449 | 		t.Error("ID failed")
 450 | 	} else if date != "2022-01-05" {
 451 | 		t.Error("Date failed")
 452 | 	} else if cost != "774" {
 453 | 		t.Error("Cost failed")
 454 | 	} else if weight != "415" {
 455 | 		t.Error("Weight failed")
 456 | 	} else if firstName != "Andy" {
 457 | 		t.Error("First Name failed")
 458 | 	} else if lastName != "Wiedmann" {
 459 | 		t.Error("Last Name failed")
 460 | 	}
 461 | }
 462 | 
 463 | func TestRecordCheckPanic(t *testing.T) {
 464 | 	path := "./"
 465 | 	df := CreateDataFrame(path, "TestData.csv")
 466 | 
 467 | 	for _, row := range df.FrameRecords {
 468 | 		defer func() { recover() }()
 469 | 
 470 | 		row.Val("Your Name Here", df.Headers)
 471 | 
 472 | 		// Never reaches here if `OtherFunctionThatPanics` panics.
 473 | 		t.Errorf("The row.Val() method should have panicked.")
 474 | 	}
 475 | }
 476 | 
 477 | func TestAddRecord(t *testing.T) {
 478 | 	path := "./"
 479 | 	df := CreateDataFrame(path, "TestData.csv")
 480 | 	newData := [6]string{"11", "2022-06-23", "101", "500", "Ben", "Benison"}
 481 | 	df = df.AddRecord(newData[:])
 482 | 
 483 | 	if df.CountRecords() != 11 {
 484 | 		t.Error("Add Record: Count does not match.")
 485 | 	}
 486 | 
 487 | 	for _, row := range df.FrameRecords {
 488 | 		if row.Val("ID", df.Headers) == "11" {
 489 | 			if row.Val("Date", df.Headers) != "2022-06-23" {
 490 | 				t.Error("Add Record: date failed")
 491 | 			}
 492 | 			if row.Val("Cost", df.Headers) != "101" {
 493 | 				t.Error("Add Record: cost failed")
 494 | 			}
 495 | 			if row.Val("Weight", df.Headers) != "500" {
 496 | 				t.Error("Add Record: weight failed")
 497 | 			}
 498 | 			if row.Val("First Name", df.Headers) != "Ben" {
 499 | 				t.Error("Add Record: first name failed")
 500 | 			}
 501 | 			if row.Val("Last Name", df.Headers) != "Benison" {
 502 | 				t.Error("Add Record: last name failed")
 503 | 			}
 504 | 		}
 505 | 	}
 506 | }
 507 | 
 508 | func TestByteOrderMark(t *testing.T) {
 509 | 	path := "./"
 510 | 	df := CreateDataFrame(path, "TestDataCommaSeparatedValue.csv")
 511 | 	dfUtf := CreateDataFrame(path, "TestData.csv")
 512 | 
 513 | 	dfTotal := 0.0
 514 | 	for _, row := range df.FrameRecords {
 515 | 		dfTotal += row.ConvertToFloat("ID", df.Headers)
 516 | 	}
 517 | 
 518 | 	dfUtfTotal := 0.0
 519 | 	for _, row := range dfUtf.FrameRecords {
 520 | 		dfUtfTotal += row.ConvertToFloat("ID", dfUtf.Headers)
 521 | 	}
 522 | 
 523 | 	if dfTotal != 55.0 || dfUtfTotal != 55.0 {
 524 | 		t.Error("Byte Order Mark conversion error")
 525 | 	}
 526 | }
 527 | func TestKeepColumns(t *testing.T) {
 528 | 	path := "./"
 529 | 	df := CreateDataFrame(path, "TestData.csv")
 530 | 
 531 | 	columns := [3]string{"First Name", "Last Name", "Weight"}
 532 | 	df = df.KeepColumns(columns[:])
 533 | 
 534 | 	if df.Headers["First Name"] != 0 || df.Headers["Last Name"] != 1 || df.Headers["Weight"] != 2 || len(df.Headers) > 3 {
 535 | 		t.Error("Keep Columns failed")
 536 | 	}
 537 | }
 538 | 
 539 | func TestRemoveColumnsMultiple(t *testing.T) {
 540 | 	path := "./"
 541 | 	df := CreateDataFrame(path, "TestData.csv")
 542 | 
 543 | 	df = df.RemoveColumns("ID", "Cost", "First Name")
 544 | 
 545 | 	if df.Headers["Date"] != 0 || df.Headers["Weight"] != 1 || df.Headers["Last Name"] != 2 || len(df.Headers) > 3 {
 546 | 		t.Error("Remove Multiple Columns failed")
 547 | 	}
 548 | }
 549 | 
 550 | func TestRemoveColumnsSingle(t *testing.T) {
 551 | 	path := "./"
 552 | 	df := CreateDataFrame(path, "TestData.csv")
 553 | 
 554 | 	df = df.RemoveColumns("First Name")
 555 | 
 556 | 	if df.Headers["ID"] != 0 || df.Headers["Date"] != 1 || df.Headers["Cost"] != 2 || df.Headers["Weight"] != 3 || df.Headers["Last Name"] != 4 || len(df.Headers) > 5 {
 557 | 		t.Error("Remove Single Column failed")
 558 | 	}
 559 | }
 560 | 
 561 | func TestDateConverterStandardFormat(t *testing.T) {
 562 | 	var s interface{} = dateConverter("2022-01-31")
 563 | 	if _, ok := s.(time.Time); !ok {
 564 | 		t.Error("Date Converter Standard Format Failed")
 565 | 	}
 566 | }
 567 | 
 568 | func TestDateConverterExcelFormatDoubleDigit(t *testing.T) {
 569 | 	var s interface{} = dateConverter("01/31/2022")
 570 | 	if _, ok := s.(time.Time); !ok {
 571 | 		t.Error("Date Converter Excel Format Failed")
 572 | 	}
 573 | }
 574 | 
 575 | func TestDateConverterExcelFormatSingleMonthDigit(t *testing.T) {
 576 | 	var s interface{} = dateConverter("1/31/2022")
 577 | 	if _, ok := s.(time.Time); !ok {
 578 | 		t.Error("Date Converter Excel Format Failed")
 579 | 	}
 580 | }
 581 | 
 582 | func TestDateConverterExcelFormatSingleDayDigit(t *testing.T) {
 583 | 	var s interface{} = dateConverter("01/1/2022")
 584 | 	if _, ok := s.(time.Time); !ok {
 585 | 		t.Error("Date Converter Excel Format Failed")
 586 | 	}
 587 | }
 588 | 
 589 | func TestDateConverterExcelFormatSingleDigit(t *testing.T) {
 590 | 	var s interface{} = dateConverter("1/1/2022")
 591 | 	if _, ok := s.(time.Time); !ok {
 592 | 		t.Error("Date Converter Excel Format Failed")
 593 | 	}
 594 | }
 595 | 
 596 | func TestDateConverterExcelFormatDoubleYearDigit(t *testing.T) {
 597 | 	var s interface{} = dateConverter("01/31/22")
 598 | 	if _, ok := s.(time.Time); !ok {
 599 | 		t.Error("Date Converter Excel Format Failed")
 600 | 	}
 601 | }
 602 | 
 603 | func TestNewField(t *testing.T) {
 604 | 	path := "./"
 605 | 	df := CreateDataFrame(path, "TestData.csv")
 606 | 	df.NewField("Middle Name")
 607 | 
 608 | 	if df.Headers["Middle Name"] != 6 {
 609 | 		fmt.Println(df.Headers)
 610 | 		t.Error("New field column not added in proper position.")
 611 | 	}
 612 | 
 613 | 	for _, row := range df.FrameRecords {
 614 | 		if row.Val("Middle Name", df.Headers) != "" {
 615 | 			t.Error("Value in New Field is not set to nil")
 616 | 		}
 617 | 	}
 618 | }
 619 | 
 620 | func TestUnique(t *testing.T) {
 621 | 	path := "./"
 622 | 	df := CreateDataFrame(path, "TestData.csv")
 623 | 	names := df.Unique("Last Name")
 624 | 
 625 | 	if len(names) != 7 {
 626 | 		t.Error("Unique slice error.")
 627 | 	}
 628 | }
 629 | 
 630 | func TestUpdate(t *testing.T) {
 631 | 	path := "./"
 632 | 	df := CreateDataFrame(path, "TestData.csv")
 633 | 
 634 | 	for _, row := range df.FrameRecords {
 635 | 		if row.Val("First Name", df.Headers) == "Avery" && row.Val("Last Name", df.Headers) == "Fultz" {
 636 | 			row.Update("Weight", "30", df.Headers)
 637 | 		}
 638 | 	}
 639 | 
 640 | 	for _, row := range df.FrameRecords {
 641 | 		if row.Val("First Name", df.Headers) == "Avery" && row.Val("Last Name", df.Headers) == "Fultz" {
 642 | 			if row.Val("Weight", df.Headers) != "30" {
 643 | 				t.Error("Update row failed.")
 644 | 			}
 645 | 		}
 646 | 	}
 647 | }
 648 | 
 649 | func TestUpdatePanic(t *testing.T) {
 650 | 	path := "./"
 651 | 	df := CreateDataFrame(path, "TestData.csv")
 652 | 
 653 | 	for _, row := range df.FrameRecords {
 654 | 		if row.Val("First Name", df.Headers) == "Avery" && row.Val("Last Name", df.Headers) == "Fultz" {
 655 | 			defer func() { recover() }()
 656 | 
 657 | 			row.Update("Your Name Here", "30", df.Headers)
 658 | 
 659 | 			t.Errorf("Method should have panicked.")
 660 | 		}
 661 | 	}
 662 | }
 663 | 
 664 | func TestMergeFramesAllColumns(t *testing.T) {
 665 | 	path := "./"
 666 | 
 667 | 	// Prep left frame
 668 | 	df := CreateDataFrame(path, "TestData.csv")
 669 | 	newData := [6]string{"11", "2022-06-27", "5467", "9586", "Cassandra", "SchmaSandra"}
 670 | 	df = df.AddRecord(newData[:])
 671 | 
 672 | 	// Prep right frame
 673 | 	dfRight := CreateDataFrame(path, "TestMergeData.csv")
 674 | 
 675 | 	// Merge
 676 | 	err := df.Merge(&dfRight, "ID")
 677 | 	if err != nil {
 678 | 		t.Error(err)
 679 | 	}
 680 | 
 681 | 	if df.CountRecords() != 11 {
 682 | 		t.Error("Merge: record count error.")
 683 | 	}
 684 | 
 685 | 	m := make(map[string][]string)
 686 | 	m["2"] = []string{"RICHLAND", "WA", "99354"}
 687 | 	m["4"] = []string{"VAN BUREN", "AR", "72956"}
 688 | 	m["6"] = []string{"FISHERS", "NY", "14453"}
 689 | 	m["10"] = []string{"JEFFERSON CITY", "MO", "65109"}
 690 | 	m["11"] = []string{"", "", ""}
 691 | 
 692 | 	for _, row := range df.FrameRecords {
 693 | 		if val, ok := m[row.Val("ID", df.Headers)]; ok {
 694 | 			for i, v := range val {
 695 | 				switch i {
 696 | 				case 0:
 697 | 					if row.Val("City", df.Headers) != v {
 698 | 						t.Error("Merge: city error.")
 699 | 					}
 700 | 				case 1:
 701 | 					if row.Val("State", df.Headers) != v {
 702 | 						t.Error("Merge: state error.")
 703 | 					}
 704 | 				case 2:
 705 | 					if row.Val("Postal Code", df.Headers) != v {
 706 | 						t.Error("Merge: postal code error.")
 707 | 					}
 708 | 				}
 709 | 			}
 710 | 		}
 711 | 	}
 712 | }
 713 | 
 714 | func TestMergeFramesSpecifiedColumns(t *testing.T) {
 715 | 	path := "./"
 716 | 
 717 | 	// Prep left frame
 718 | 	df := CreateDataFrame(path, "TestData.csv")
 719 | 	newData := [6]string{"11", "2022-06-27", "5467", "9586", "Cassandra", "SchmaSandra"}
 720 | 	df = df.AddRecord(newData[:])
 721 | 
 722 | 	// Prep right frame
 723 | 	dfRight := CreateDataFrame(path, "TestMergeData.csv")
 724 | 
 725 | 	// Merge
 726 | 	err := df.Merge(&dfRight, "ID", "City", "Postal Code")
 727 | 	if err != nil {
 728 | 		t.Error(err)
 729 | 	}
 730 | 
 731 | 	if df.CountRecords() != 11 {
 732 | 		t.Error("Merge: record count error.")
 733 | 	}
 734 | 
 735 | 	m := make(map[string][]string)
 736 | 	m["2"] = []string{"RICHLAND", "99354"}
 737 | 	m["4"] = []string{"VAN BUREN", "72956"}
 738 | 	m["6"] = []string{"FISHERS", "14453"}
 739 | 	m["10"] = []string{"JEFFERSON CITY", "65109"}
 740 | 	m["11"] = []string{"", ""}
 741 | 
 742 | 	for _, row := range df.FrameRecords {
 743 | 		if val, ok := m[row.Val("ID", df.Headers)]; ok {
 744 | 			for i, v := range val {
 745 | 				switch i {
 746 | 				case 0:
 747 | 					if row.Val("City", df.Headers) != v {
 748 | 						t.Error("Merge: city error.")
 749 | 					}
 750 | 				case 1:
 751 | 					if row.Val("Postal Code", df.Headers) != v {
 752 | 						t.Error("Merge: postal code error.")
 753 | 					}
 754 | 				}
 755 | 			}
 756 | 		}
 757 | 	}
 758 | }
 759 | 
 760 | func TestInnerMerge(t *testing.T) {
 761 | 	path := "./"
 762 | 
 763 | 	// Prep left frame
 764 | 	df := CreateDataFrame(path, "TestData.csv")
 765 | 
 766 | 	// Prep right frame
 767 | 	dfRight := CreateDataFrame(path, "TestInnerMergeData.csv")
 768 | 
 769 | 	// Merge
 770 | 	df, err := df.InnerMerge(&dfRight, "ID")
 771 | 	if err != nil {
 772 | 		t.Error(err)
 773 | 	}
 774 | 
 775 | 	if df.CountRecords() != 5 {
 776 | 		t.Error("Inner Merge: record count error.")
 777 | 	}
 778 | 
 779 | 	columns := []string{"ID", "Date", "Cost", "Weight", "First Name", "Last Name", "City", "State", "Postal Code"}
 780 | 
 781 | 	data := make([][]string, 5)
 782 | 	data[0] = []string{"4", "2022-01-04", "121", "196", "Peter", "Wiedmann", "VAN BUREN", "AR", "72956"}
 783 | 	data[1] = []string{"5", "2022-01-05", "774", "415", "Andy", "Wiedmann", "TAUNTON", "MA", "2780"}
 784 | 	data[2] = []string{"7", "2022-01-07", "995", "500", "Bryan", "Curtis", "GOLDSBORO", "NC", "27530"}
 785 | 	data[3] = []string{"9", "2022-01-09", "939", "157", "Eric", "Petruska", "PHOENIX", "AZ", "85024"}
 786 | 	data[4] = []string{"10", "2022-01-10", "597", "475", "Carl", "Carlson", "JEFFERSON CITY", "MO", "65109"}
 787 | 
 788 | 	for i, row := range df.FrameRecords {
 789 | 		if len(row.Data) != len(data[i]) {
 790 | 			t.Error("Inner Merge: Column count does not match.")
 791 | 		}
 792 | 		for i2, col := range columns {
 793 | 			val := row.Val(col, df.Headers)
 794 | 			if val != data[i][i2] {
 795 | 				t.Error("Inner Merge: Data results to not match what is expected.")
 796 | 			}
 797 | 		}
 798 | 	}
 799 | }
 800 | 
 801 | func TestInnerMergeLeftFrameDuplicates(t *testing.T) {
 802 | 	path := "./"
 803 | 
 804 | 	// Prep left frame
 805 | 	df := CreateDataFrame(path, "TestDataInnerDuplicate.csv")
 806 | 
 807 | 	// Prep right frame
 808 | 	dfRight := CreateDataFrame(path, "TestInnerMergeData.csv")
 809 | 
 810 | 	// Merge
 811 | 	df, err := df.InnerMerge(&dfRight, "ID")
 812 | 	if err != nil {
 813 | 		t.Error(err)
 814 | 	}
 815 | 
 816 | 	if df.CountRecords() != 6 {
 817 | 		t.Error("Inner Merge: record count error.")
 818 | 	}
 819 | 
 820 | 	columns := []string{"ID", "Date", "Cost", "Weight", "First Name", "Last Name", "City", "State", "Postal Code"}
 821 | 
 822 | 	data := make([][]string, 6)
 823 | 	data[0] = []string{"4", "2022-01-04", "121", "196", "Peter", "Wiedmann", "VAN BUREN", "AR", "72956"}
 824 | 	data[1] = []string{"5", "2022-01-05", "774", "415", "Andy", "Wiedmann", "TAUNTON", "MA", "2780"}
 825 | 	data[2] = []string{"7", "2022-01-07", "995", "500", "Bryan", "Curtis", "GOLDSBORO", "NC", "27530"}
 826 | 	data[3] = []string{"9", "2022-01-09", "939", "157", "Eric", "Petruska", "PHOENIX", "AZ", "85024"}
 827 | 	data[4] = []string{"9", "2022-01-09", "12345", "6789", "Eric", "Petruska", "PHOENIX", "AZ", "85024"}
 828 | 	data[5] = []string{"10", "2022-01-10", "597", "475", "Carl", "Carlson", "JEFFERSON CITY", "MO", "65109"}
 829 | 
 830 | 	for i, row := range df.FrameRecords {
 831 | 		if len(row.Data) != len(data[i]) {
 832 | 			t.Error("Inner Merge: Column count does not match.")
 833 | 		}
 834 | 		for i2, col := range columns {
 835 | 			val := row.Val(col, df.Headers)
 836 | 			if val != data[i][i2] {
 837 | 				t.Error("Inner Merge: Data results to not match what is expected.")
 838 | 			}
 839 | 		}
 840 | 	}
 841 | }
 842 | 
 843 | func TestConcatFrames(t *testing.T) {
 844 | 	path := "./"
 845 | 	dfOne := CreateDataFrame(path, "TestData.csv")
 846 | 	df := CreateDataFrame(path, "TestDataConcat.csv")
 847 | 
 848 | 	lastNames := [20]string{
 849 | 		"Fultz",
 850 | 		"Fultz",
 851 | 		"Fultz",
 852 | 		"Wiedmann",
 853 | 		"Wiedmann",
 854 | 		"Wilfong",
 855 | 		"Curtis",
 856 | 		"Wenck",
 857 | 		"Petruska",
 858 | 		"Carlson",
 859 | 		"Benny",
 860 | 		"Kenny",
 861 | 		"McCarlson",
 862 | 		"Jeffery",
 863 | 		"Stephenson",
 864 | 		"Patrickman",
 865 | 		"Briarson",
 866 | 		"Ericson",
 867 | 		"Asherton",
 868 | 		"Highman",
 869 | 	}
 870 | 
 871 | 	dfOne, err := dfOne.ConcatFrames(&df)
 872 | 	if err != nil {
 873 | 		t.Error("Concat Frames: ", err)
 874 | 	}
 875 | 	var totalCost int64
 876 | 	var totalWeight int64
 877 | 
 878 | 	for i, row := range dfOne.FrameRecords {
 879 | 		if row.Val("Last Name", dfOne.Headers) != lastNames[i] {
 880 | 			t.Error("Concat Frames Failed: Last Names")
 881 | 		}
 882 | 		totalCost += row.ConvertToInt("Cost", dfOne.Headers)
 883 | 		totalWeight += row.ConvertToInt("Weight", dfOne.Headers)
 884 | 	}
 885 | 
 886 | 	if totalCost != 7100 || totalWeight != 3821 {
 887 | 		t.Error("Concat Frames Failed: Values")
 888 | 	}
 889 | 
 890 | 	if dfOne.CountRecords() != 20 {
 891 | 		t.Error("Concat Frames Failed: Row Count")
 892 | 	}
 893 | }
 894 | 
 895 | func TestConcatFramesAddress(t *testing.T) {
 896 | 	path := "./"
 897 | 	df := CreateDataFrame(path, "TestData.csv")
 898 | 	df2 := CreateDataFrame(path, "TestDataConcat.csv")
 899 | 
 900 | 	df3, err := df.ConcatFrames(&df2)
 901 | 	if err != nil {
 902 | 		t.Error(err)
 903 | 	}
 904 | 
 905 | 	if &df == &df3 || &df2 == &df3 {
 906 | 		t.Error("ConcatFrames did not create a truly decoupled new dataframe")
 907 | 	}
 908 | 	if df3.CountRecords() != 20 {
 909 | 		t.Error("ConcatFrames did not properly append")
 910 | 	}
 911 | }
 912 | 
 913 | func TestConcatFramesColumnCount(t *testing.T) {
 914 | 	path := "./"
 915 | 	dfOne := CreateDataFrame(path, "TestData.csv")
 916 | 	columns := []string{"one", "two", "three"}
 917 | 	dfTwo := CreateNewDataFrame(columns)
 918 | 
 919 | 	dfOne, err := dfOne.ConcatFrames(&dfTwo)
 920 | 	if err == nil {
 921 | 		t.Error("Concat Frames Did Not Fail --> ", err)
 922 | 	}
 923 | }
 924 | 
 925 | func TestConcatFramesColumnOrder(t *testing.T) {
 926 | 	path := "./"
 927 | 	dfOne := CreateDataFrame(path, "TestData.csv")
 928 | 	columns := []string{
 929 | 		"ID",
 930 | 		"Date",
 931 | 		"Cost",
 932 | 		"Weight",
 933 | 		"Last Name",
 934 | 		"First Name",
 935 | 	}
 936 | 	dfTwo := CreateNewDataFrame(columns)
 937 | 
 938 | 	dfOne, err := dfOne.ConcatFrames(&dfTwo)
 939 | 	if err == nil {
 940 | 		t.Error("Concat Frames Did Not Fail --> ", err)
 941 | 	}
 942 | }
 943 | 
 944 | // Ensures once a new filtered DataFrame is created, if records are updated in the original
 945 | // it will not affect the records in the newly created filtered version.
 946 | func TestCopiedFrame(t *testing.T) {
 947 | 	path := "./"
 948 | 	df := CreateDataFrame(path, "TestData.csv")
 949 | 
 950 | 	df2 := df.Filtered("Last Name", "Wiedmann")
 951 | 
 952 | 	// Update data in original frame.
 953 | 	for _, row := range df.FrameRecords {
 954 | 		if row.Val("First Name", df.Headers) == "Peter" && row.Val("Last Name", df.Headers) == "Wiedmann" {
 955 | 			row.Update("Last Name", "New Last Name", df.Headers)
 956 | 		}
 957 | 	}
 958 | 
 959 | 	// Check value did not change in newly copied frame.
 960 | 	for _, row := range df2.FrameRecords {
 961 | 		if row.Val("ID", df2.Headers) == "4" {
 962 | 			if row.Val("First Name", df2.Headers) != "Peter" || row.Val("Last Name", df2.Headers) != "Wiedmann" {
 963 | 				t.Error("Copied Frame: name appears to have changed in second frame.")
 964 | 			}
 965 | 		}
 966 | 	}
 967 | }
 968 | 
 969 | func TestSaveDataFrameWithoutFileType(t *testing.T) {
 970 | 	path := "./"
 971 | 	df := CreateDataFrame(path, "TestData.csv")
 972 | 
 973 | 	if !df.SaveDataFrame(path, "Testing") {
 974 | 		t.Error("Failed to save dataframe.")
 975 | 	}
 976 | }
 977 | 
 978 | func TestSaveDataFrameWithFileType(t *testing.T) {
 979 | 	path := "./"
 980 | 	df := CreateDataFrame(path, "TestData.csv")
 981 | 
 982 | 	if !df.SaveDataFrame(path, "Testing.csv") {
 983 | 		t.Error("Failed to save dataframe.")
 984 | 	}
 985 | }
 986 | 
 987 | func TestAssortment(t *testing.T) {
 988 | 	path := "./"
 989 | 
 990 | 	// Concatenate Frames
 991 | 	dfOne := CreateDataFrame(path, "TestData.csv")
 992 | 	df := CreateDataFrame(path, "TestDataConcat.csv")
 993 | 	df, err := df.ConcatFrames(&dfOne)
 994 | 	if err != nil {
 995 | 		log.Fatal("Concat Frames: ", err)
 996 | 	}
 997 | 
 998 | 	// Add Records
 999 | 	newData := [6]string{"21", "2022-01-01", "200", "585", "Tommy", "Thompson"}
1000 | 	df = df.AddRecord(newData[:])
1001 | 	newDataTwo := [6]string{"22", "2022-01-31", "687", "948", "Sarah", "McSarahson"}
1002 | 	df = df.AddRecord(newDataTwo[:])
1003 | 
1004 | 	if df.CountRecords() != 22 {
1005 | 		t.Error("Assortment: concat count incorrect.")
1006 | 	}
1007 | 
1008 | 	df = df.Exclude("Last Name", "Fultz", "Highman", "Stephenson")
1009 | 
1010 | 	if df.CountRecords() != 17 {
1011 | 		t.Error("Assortment: excluded count incorrect.")
1012 | 	}
1013 | 
1014 | 	df = df.FilteredAfter("Date", "2022-01-08")
1015 | 
1016 | 	if df.CountRecords() != 4 {
1017 | 		t.Error("Assortment: filtered after count incorrect.")
1018 | 	}
1019 | 
1020 | 	lastNames := df.Unique("Last Name")
1021 | 	checkLastNames := [4]string{"Petruska", "Carlson", "Asherton", "McSarahson"}
1022 | 
1023 | 	if len(lastNames) != 4 {
1024 | 		t.Error("Assortment: last name count failed")
1025 | 	}
1026 | 
1027 | 	for _, name := range lastNames {
1028 | 		var status bool
1029 | 		for _, cName := range checkLastNames {
1030 | 			if name == cName {
1031 | 				status = true
1032 | 			}
1033 | 		}
1034 | 		if !status {
1035 | 			t.Error("Assortment: last name not found.")
1036 | 		}
1037 | 	}
1038 | 
1039 | }
1040 | 
1041 | func TestCopy(t *testing.T) {
1042 | 	path := "./"
1043 | 	df := CreateDataFrame(path, "TestData.csv")
1044 | 	df2 := df.Copy()
1045 | 
1046 | 	for _, row := range df2.FrameRecords {
1047 | 		if row.Val("First Name", df2.Headers) == "Bryan" && row.Val("Last Name", df2.Headers) == "Curtis" {
1048 | 			row.Update("First Name", "Brian", df2.Headers)
1049 | 		}
1050 | 		if row.Val("First Name", df2.Headers) == "Carl" && row.Val("Last Name", df2.Headers) == "Carlson" {
1051 | 			row.Update("First Name", "McCarlson", df2.Headers)
1052 | 		}
1053 | 	}
1054 | 
1055 | 	// Test original frame did not change.
1056 | 	for _, row := range df.FrameRecords {
1057 | 		if row.Val("Last Name", df.Headers) == "Curtis" {
1058 | 			if row.Val("First Name", df.Headers) != "Bryan" {
1059 | 				t.Error("First Name in original frame is not correct.")
1060 | 			}
1061 | 		}
1062 | 		if row.Val("Last Name", df.Headers) == "Carlson" {
1063 | 			if row.Val("First Name", df.Headers) != "Carl" {
1064 | 				t.Error("First Name in original frame is not correct.")
1065 | 			}
1066 | 		}
1067 | 	}
1068 | 
1069 | 	// Test copied frame contains changes.
1070 | 	for _, row := range df2.FrameRecords {
1071 | 		if row.Val("Last Name", df2.Headers) == "Curtis" {
1072 | 			if row.Val("First Name", df2.Headers) != "Brian" {
1073 | 				t.Error("First Name in copied frame is not correct.")
1074 | 			}
1075 | 		}
1076 | 		if row.Val("Last Name", df2.Headers) == "Carlson" {
1077 | 			if row.Val("First Name", df2.Headers) != "McCarlson" {
1078 | 				t.Error("First Name in copied frame is not correct.")
1079 | 			}
1080 | 		}
1081 | 	}
1082 | }
1083 | 
1084 | func TestCopyAddress(t *testing.T) {
1085 | 	path := "./"
1086 | 	df := CreateDataFrame(path, "TestData.csv")
1087 | 	df2 := df.Copy()
1088 | 
1089 | 	if &df == &df2 {
1090 | 		t.Error("Copy did not create a truly decoupled copy.")
1091 | 	}
1092 | }
1093 | 
1094 | func TestColumns(t *testing.T) {
1095 | 	path := "./"
1096 | 	requiredColumns := []string{
1097 | 		"ID",
1098 | 		"Date",
1099 | 		"Cost",
1100 | 		"Weight",
1101 | 		"First Name",
1102 | 		"Last Name",
1103 | 	}
1104 | 	df := CreateDataFrame(path, "TestData.csv")
1105 | 	foundColumns := df.Columns()
1106 | 
1107 | 	if len(foundColumns) != 6 {
1108 | 		t.Error("Length of found columns does not match")
1109 | 	}
1110 | 
1111 | 	for i := 0; i < len(requiredColumns); i++ {
1112 | 		if foundColumns[i] != requiredColumns[i] {
1113 | 			t.Error("Order of found columns does not match")
1114 | 		}
1115 | 	}
1116 | }
1117 | 
1118 | func TestAutoCount(t *testing.T) {
1119 | 	columns := []string{"id", "number", "value"}
1120 | 	df := CreateNewDataFrame(columns)
1121 | 
1122 | 	for i := 0; i < 1_000; i++ {
1123 | 		val := float64(i + 1)
1124 | 		sq := val * val
1125 | 		data := []string{
1126 | 			strconv.Itoa(i),
1127 | 			fmt.Sprintf("%f", val),
1128 | 			fmt.Sprintf("%f", sq),
1129 | 		}
1130 | 		df = df.AddRecord(data)
1131 | 	}
1132 | 
1133 | 	if df.CountRecords() != 1_000 {
1134 | 		t.Error("Test Auto: count is not 1,000,000")
1135 | 	}
1136 | }
1137 | 
1138 | func TestAutoSum(t *testing.T) {
1139 | 	columns := []string{"id", "number", "value"}
1140 | 	df := CreateNewDataFrame(columns)
1141 | 
1142 | 	for i := 0; i < 1_000; i++ {
1143 | 		val := float64(i + 1)
1144 | 		sq := val * val
1145 | 		data := []string{
1146 | 			strconv.Itoa(i),
1147 | 			fmt.Sprintf("%f", val),
1148 | 			fmt.Sprintf("%f", sq),
1149 | 		}
1150 | 		df = df.AddRecord(data)
1151 | 	}
1152 | 
1153 | 	if df.Sum("value") != 333_833_500.0 {
1154 | 		t.Error("Test Auto: sum is not correct")
1155 | 	}
1156 | }
1157 | 
1158 | func TestLoadFrames(t *testing.T) {
1159 | 	filePath := "./"
1160 | 	files := []string{
1161 | 		"TestData.csv",
1162 | 		"TestDataCommaSeparatedValue.csv",
1163 | 		"TestDataConcat.csv",
1164 | 		"TestDataDateFormat.csv",
1165 | 		"TestMergeData.csv",
1166 | 	}
1167 | 
1168 | 	results, err := LoadFrames(filePath, files)
1169 | 	if err != nil {
1170 | 		log.Fatal(err)
1171 | 	}
1172 | 
1173 | 	dfTd := results[0]
1174 | 	dfComma := results[1]
1175 | 	dfConcat := results[2]
1176 | 	dfDate := results[3]
1177 | 	dfMerge := results[4]
1178 | 
1179 | 	if dfTd.CountRecords() != 10 || dfTd.Sum("Weight") != 3376.0 || len(dfTd.Columns()) != 6 {
1180 | 		t.Error("LoadFrames: TestData.csv is not correct")
1181 | 	}
1182 | 	if dfComma.CountRecords() != 10 || dfComma.Sum("Cost") != 6521.0 || len(dfComma.Columns()) != 6 {
1183 | 		t.Error("LoadFrames: TestDataCommaSeparatedValue.csv is not correct")
1184 | 	}
1185 | 	if dfConcat.CountRecords() != 10 || dfConcat.Sum("Weight") != 445.0 || len(dfConcat.Columns()) != 6 {
1186 | 		t.Error("LoadFrames: TestDataConcat.csv is not correct")
1187 | 	}
1188 | 	if dfDate.CountRecords() != 10 || dfDate.Average("Cost") != 652.1 || len(dfDate.Columns()) != 6 {
1189 | 		t.Error("LoadFrames: TestDataDateFormat.csv is not correct")
1190 | 	}
1191 | 	if dfMerge.CountRecords() != 10 || dfMerge.Sum("Postal Code") != 495735.0 || len(dfMerge.Columns()) != 4 {
1192 | 		t.Error("LoadFrames: TestMergeData.csv is not correct")
1193 | 	}
1194 | 
1195 | 	dfFilterTest := dfTd.Filtered("Last Name", "Fultz")
1196 | 	if dfTd.CountRecords() == dfFilterTest.CountRecords() {
1197 | 		t.Error("LoadFrame: variable referencing map value")
1198 | 	}
1199 | }
1200 | 
1201 | func TestLoadFramesError(t *testing.T) {
1202 | 	filePath := "./"
1203 | 	files := []string{"TestData.csv"}
1204 | 
1205 | 	_, err := LoadFrames(filePath, files)
1206 | 	if err == nil {
1207 | 		t.Error("LoadFrames did not fail as expected")
1208 | 	}
1209 | }
1210 | 
1211 | func TestRename(t *testing.T) {
1212 | 	path := "./"
1213 | 	df := CreateDataFrame(path, "TestData.csv")
1214 | 
1215 | 	err := df.Rename("Weight", "Total Weight")
1216 | 	if err != nil {
1217 | 		t.Error(err)
1218 | 	}
1219 | 
1220 | 	for _, row := range df.FrameRecords {
1221 | 		if row.Val("First Name", df.Headers) == "Andy" && row.Val("Last Name", df.Headers) == "Wiedmann" {
1222 | 			row.Update("Total Weight", "1000", df.Headers)
1223 | 		}
1224 | 	}
1225 | 
1226 | 	for _, row := range df.FrameRecords {
1227 | 		if row.Val("First Name", df.Headers) == "Andy" && row.Val("Last Name", df.Headers) == "Wiedmann" {
1228 | 			if row.Val("Total Weight", df.Headers) != "1000" {
1229 | 				t.Error("Value in new column did not update correctly")
1230 | 			}
1231 | 		}
1232 | 	}
1233 | 
1234 | 	foundColumns := []string{}
1235 | 	newColumnStatus := false
1236 | 	for k, _ := range df.Headers {
1237 | 		foundColumns = append(foundColumns, k)
1238 | 		if k == "Total Weight" {
1239 | 			newColumnStatus = true
1240 | 		}
1241 | 	}
1242 | 
1243 | 	if !newColumnStatus {
1244 | 		t.Error("New column was not found")
1245 | 	}
1246 | 	if len(foundColumns) != 6 {
1247 | 		t.Error("Wrong number of columns found")
1248 | 	}
1249 | }
1250 | 
1251 | func TestRenameOriginalNotFound(t *testing.T) {
1252 | 	path := "./"
1253 | 	df := CreateDataFrame(path, "TestData.csv")
1254 | 
1255 | 	err := df.Rename("The Weight", "Total Weight")
1256 | 	if err == nil {
1257 | 		t.Error(err)
1258 | 	}
1259 | }
1260 | 
1261 | func TestRenameDuplicate(t *testing.T) {
1262 | 	path := "./"
1263 | 	df := CreateDataFrame(path, "TestData.csv")
1264 | 
1265 | 	err := df.Rename("Weight", "Cost")
1266 | 	if err == nil {
1267 | 		t.Error(err)
1268 | 	}
1269 | }
1270 | 
1271 | func TestSort(t *testing.T) {
1272 | 	path := "./"
1273 | 	df := CreateDataFrame(path, "TestData.csv")
1274 | 
1275 | 	err := df.Sort("Cost", true)
1276 | 	if err != nil {
1277 | 		t.Error("Sort Error Failed")
1278 | 	}
1279 | 
1280 | 	answers := []string{
1281 | 		"121",
1282 | 		"133",
1283 | 		"493",
1284 | 		"597",
1285 | 		"774",
1286 | 		"777",
1287 | 		"818",
1288 | 		"874",
1289 | 		"939",
1290 | 		"995",
1291 | 	}
1292 | 
1293 | 	for i, row := range df.FrameRecords {
1294 | 		if row.Val("Cost", df.Headers) != answers[i] {
1295 | 			t.Error("Ascending Cost Sort Failed")
1296 | 		}
1297 | 	}
1298 | 
1299 | 	err = df.Sort("Cost", false)
1300 | 	if err != nil {
1301 | 		t.Error("Sort Error Failed")
1302 | 	}
1303 | 
1304 | 	answers = []string{
1305 | 		"995",
1306 | 		"939",
1307 | 		"874",
1308 | 		"818",
1309 | 		"777",
1310 | 		"774",
1311 | 		"597",
1312 | 		"493",
1313 | 		"133",
1314 | 		"121",
1315 | 	}
1316 | 
1317 | 	for i, row := range df.FrameRecords {
1318 | 		if row.Val("Cost", df.Headers) != answers[i] {
1319 | 			t.Error("Descending Cost Sort Failed")
1320 | 		}
1321 | 	}
1322 | 
1323 | 	err = df.Sort("Last Name", true)
1324 | 	if err != nil {
1325 | 		t.Error("Sort Error Failed")
1326 | 	}
1327 | 
1328 | 	answers = []string{
1329 | 		"Carlson",
1330 | 		"Curtis",
1331 | 		"Fultz",
1332 | 		"Fultz",
1333 | 		"Fultz",
1334 | 		"Petruska",
1335 | 		"Wenck",
1336 | 		"Wiedmann",
1337 | 		"Wiedmann",
1338 | 		"Wilfong",
1339 | 	}
1340 | 
1341 | 	for i, row := range df.FrameRecords {
1342 | 		if row.Val("Last Name", df.Headers) != answers[i] {
1343 | 			t.Error("Ascending Name Sort Failed")
1344 | 		}
1345 | 	}
1346 | 
1347 | 	err = df.Sort("Last Name", false)
1348 | 	if err != nil {
1349 | 		t.Error("Sort Error Failed")
1350 | 	}
1351 | 
1352 | 	answers = []string{
1353 | 		"Wilfong",
1354 | 		"Wiedmann",
1355 | 		"Wiedmann",
1356 | 		"Wenck",
1357 | 		"Petruska",
1358 | 		"Fultz",
1359 | 		"Fultz",
1360 | 		"Fultz",
1361 | 		"Curtis",
1362 | 		"Carlson",
1363 | 	}
1364 | 
1365 | 	for i, row := range df.FrameRecords {
1366 | 		if row.Val("Last Name", df.Headers) != answers[i] {
1367 | 			t.Error("Descending Name Sort Failed")
1368 | 		}
1369 | 	}
1370 | 
1371 | 	if df.CountRecords() != 10 {
1372 | 		t.Error("Sort Row Count Failed")
1373 | 	}
1374 | 
1375 | 	if df.Sum("Cost") != 6521.0 {
1376 | 		t.Error("Sort Sum Failed")
1377 | 	}
1378 | 
1379 | 	err = df.Sort("Non Existent Column", true)
1380 | 	if err == nil {
1381 | 		t.Error("Sort Error Failed")
1382 | 	}
1383 | }
1384 | 
1385 | func TestDivideAndConquerOdd(t *testing.T) {
1386 | 	df := CreateNewDataFrame([]string{"One", "Two", "Three"})
1387 | 
1388 | 	total := 0
1389 | 	for i := 0; i < 999_833; i++ {
1390 | 		total += i
1391 | 		iVal := strconv.Itoa(i)
1392 | 		df = df.AddRecord([]string{iVal, iVal, iVal})
1393 | 	}
1394 | 
1395 | 	if df.CountRecords() != 999_833 {
1396 | 		t.Error("Divide And Conquer: Count rows are incorrect.")
1397 | 	}
1398 | 
1399 | 	frames, err := df.DivideAndConquer(5)
1400 | 	if err != nil {
1401 | 		t.Error("Divide And Conquer: Error incorrectly triggered.")
1402 | 	}
1403 | 
1404 | 	if len(frames) != 5 {
1405 | 		t.Errorf("Divide And Conquer: Frame count is '%d' instead of 5.", len(frames))
1406 | 	}
1407 | 
1408 | 	dfTotal := 0
1409 | 	for i, each := range frames {
1410 | 		if i != len(frames)-1 {
1411 | 			if each.CountRecords() != 199_966 {
1412 | 				t.Errorf("Divide And Conquer: Row count on subgroup is incorrect '%d'.", each.CountRecords())
1413 | 			}
1414 | 			dfTotal += int(each.Sum("One"))
1415 | 		} else {
1416 | 			if each.CountRecords() != 199_969 {
1417 | 				t.Errorf("Divide And Conquer: Row count on final subgroup is incorrect '%d'.", each.CountRecords())
1418 | 			}
1419 | 			dfTotal += int(each.Sum("One"))
1420 | 		}
1421 | 	}
1422 | 
1423 | 	if dfTotal != total {
1424 | 		t.Errorf("Divide And Conquer: Sum of all rows is incorrect '%d' instead of '%d'.", dfTotal, total)
1425 | 	}
1426 | }
1427 | 
1428 | func TestDivideAndConquerEven(t *testing.T) {
1429 | 	df := CreateNewDataFrame([]string{"One", "Two", "Three"})
1430 | 
1431 | 	total := 0
1432 | 	for i := 0; i < 100_000; i++ {
1433 | 		total += i
1434 | 		iVal := strconv.Itoa(i)
1435 | 		df = df.AddRecord([]string{iVal, iVal, iVal})
1436 | 	}
1437 | 
1438 | 	if df.CountRecords() != 100_000 {
1439 | 		t.Error("Divide And Conquer: Count rows are incorrect.")
1440 | 	}
1441 | 
1442 | 	frames, err := df.DivideAndConquer(5)
1443 | 	if err != nil {
1444 | 		t.Error("Divide And Conquer: Error incorrectly triggered.")
1445 | 	}
1446 | 
1447 | 	if len(frames) != 5 {
1448 | 		t.Errorf("Divide And Conquer: Frame count is '%d' instead of 5.", len(frames))
1449 | 	}
1450 | 
1451 | 	dfTotal := 0
1452 | 	for _, each := range frames {
1453 | 		if each.CountRecords() != 20_000 {
1454 | 			t.Errorf("Divide And Conquer: Row count on subgroup is incorrect '%d'.", each.CountRecords())
1455 | 		}
1456 | 		dfTotal += int(each.Sum("One"))
1457 | 	}
1458 | 
1459 | 	if dfTotal != total {
1460 | 		t.Errorf("Divide And Conquer: Sum of all rows is incorrect '%d' instead of '%d'.", dfTotal, total)
1461 | 	}
1462 | }
1463 | 
1464 | func TestDivideAndConquerZeroSubframes(t *testing.T) {
1465 | 	df := CreateNewDataFrame([]string{"One", "Two", "Three"})
1466 | 
1467 | 	for i := 0; i < 10; i++ {
1468 | 		iVal := strconv.Itoa(i)
1469 | 		df = df.AddRecord([]string{iVal, iVal, iVal})
1470 | 	}
1471 | 
1472 | 	_, err := df.DivideAndConquer(0)
1473 | 	if err == nil {
1474 | 		t.Error("Divide And Conquer: Zero subframe error should have been triggered.")
1475 | 	}
1476 | }
1477 | 
1478 | func TestDivideAndConquerExcessiveSubframes(t *testing.T) {
1479 | 	df := CreateNewDataFrame([]string{"One", "Two", "Three"})
1480 | 
1481 | 	for i := 0; i < 10; i++ {
1482 | 		iVal := strconv.Itoa(i)
1483 | 		df = df.AddRecord([]string{iVal, iVal, iVal})
1484 | 	}
1485 | 
1486 | 	_, err := df.DivideAndConquer(11)
1487 | 	if err == nil {
1488 | 		t.Error("Divide And Conquer: Excessive subframe error should have been triggered.")
1489 | 	}
1490 | }
1491 | 
1492 | func TestDivideAndConquerEmptyDataFrame(t *testing.T) {
1493 | 	df := CreateNewDataFrame([]string{"One", "Two", "Three"})
1494 | 
1495 | 	_, err := df.DivideAndConquer(100)
1496 | 	if err == nil {
1497 | 		t.Error("Divide And Conquer: Empty dataframe error should have been triggered.")
1498 | 	}
1499 | }
1500 | 


--------------------------------------------------------------------------------