├── .DS_Store ├── assets ├── .DS_Store └── images │ ├── .DS_Store │ ├── Exclude.png │ ├── Import.png │ ├── Length.png │ ├── Unique.png │ ├── AddRecord.png │ ├── Filtered.png │ ├── Concatenated.png │ ├── KeepColumns.png │ ├── ReadAndPrint.png │ ├── FilteredAfter.png │ ├── FilteredBefore.png │ └── FilteredBetween.png ├── TestInnerMergeData.csv ├── TestMergeData.csv ├── TestDataDateFormat.csv ├── Testing.csv ├── test.csv ├── TestData.csv ├── TestDataConcat.csv ├── TestDataCommaSeparatedValue.csv ├── TestDataInnerDuplicate.csv ├── go.mod ├── LICENSE ├── console.go ├── go.sum ├── aws_tooling.go ├── README.md ├── main.go └── main_test.go /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/.DS_Store -------------------------------------------------------------------------------- /assets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/.DS_Store -------------------------------------------------------------------------------- /assets/images/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/.DS_Store -------------------------------------------------------------------------------- /assets/images/Exclude.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Exclude.png -------------------------------------------------------------------------------- /assets/images/Import.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Import.png -------------------------------------------------------------------------------- /assets/images/Length.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Length.png -------------------------------------------------------------------------------- /assets/images/Unique.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Unique.png -------------------------------------------------------------------------------- /assets/images/AddRecord.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/AddRecord.png -------------------------------------------------------------------------------- /assets/images/Filtered.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Filtered.png -------------------------------------------------------------------------------- /assets/images/Concatenated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/Concatenated.png -------------------------------------------------------------------------------- /assets/images/KeepColumns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/KeepColumns.png -------------------------------------------------------------------------------- /assets/images/ReadAndPrint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/ReadAndPrint.png -------------------------------------------------------------------------------- /assets/images/FilteredAfter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/FilteredAfter.png -------------------------------------------------------------------------------- /assets/images/FilteredBefore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/FilteredBefore.png -------------------------------------------------------------------------------- /assets/images/FilteredBetween.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kfultz07/go-dataframe/HEAD/assets/images/FilteredBetween.png -------------------------------------------------------------------------------- /TestInnerMergeData.csv: -------------------------------------------------------------------------------- 1 | ID,City,State,Postal Code 2 | 4,VAN BUREN,AR,72956 3 | 5,TAUNTON,MA,2780 4 | 7,GOLDSBORO,NC,27530 5 | 9,PHOENIX,AZ,85024 6 | 10,JEFFERSON CITY,MO,65109 7 | 11,Denver,CO,66616 -------------------------------------------------------------------------------- /TestMergeData.csv: -------------------------------------------------------------------------------- 1 | ID,City,State,Postal Code 2 | 1,APPLETON,WI,54911 3 | 2,RICHLAND,WA,99354 4 | 3,KANSAS CITY,KS,66115 5 | 4,VAN BUREN,AR,72956 6 | 5,TAUNTON,MA,2780 7 | 6,FISHERS,NY,14453 8 | 7,GOLDSBORO,NC,27530 9 | 8,PATERSON,NJ,7503 10 | 9,PHOENIX,AZ,85024 11 | 10,JEFFERSON CITY,MO,65109 -------------------------------------------------------------------------------- /TestDataDateFormat.csv: -------------------------------------------------------------------------------- 1 | ID,Date,Cost,Weight,First Name,Last Name 2 | 1,1/1/22,818,227,Kevin,Fultz 3 | 2,1/2/22,777,259,Beth,Fultz 4 | 3,1/3/22,493,461,Avery,Fultz 5 | 4,1/4/22,121,196,Peter,Wiedmann 6 | 5,1/5/22,774,415,Andy,Wiedmann 7 | 6,1/6/22,874,436,Nick,Wilfong 8 | 7,1/7/22,995,500,Bryan,Curtis 9 | 8,1/8/22,133,250,Brian,Wenck 10 | 9,1/9/22,939,157,Eric,Petruska 11 | 10,1/10/22,597,475,Carl,Carlson -------------------------------------------------------------------------------- /Testing.csv: -------------------------------------------------------------------------------- 1 | ID,Date,Cost,Weight,First Name,Last Name 2 | 1,2022-01-01,818,227,Kevin,Fultz 3 | 2,2022-01-02,777,259,Beth,Fultz 4 | 3,2022-01-03,493,461,Avery,Fultz 5 | 4,2022-01-04,121,196,Peter,Wiedmann 6 | 5,2022-01-05,774,415,Andy,Wiedmann 7 | 6,2022-01-06,874,436,Nick,Wilfong 8 | 7,2022-01-07,995,500,Bryan,Curtis 9 | 8,2022-01-08,133,250,Brian,Wenck 10 | 9,2022-01-09,939,157,Eric,Petruska 11 | 10,2022-01-10,597,475,Carl,Carlson 12 | -------------------------------------------------------------------------------- /test.csv: -------------------------------------------------------------------------------- 1 | ID,Date,Cost,Weight,First Name,Last Name 2 | 1,2022-01-01,818,227,Kevin,Fultz 3 | 2,2022-01-02,777,259,Beth,Fultz 4 | 3,2022-01-03,493,461,Avery,Fultz 5 | 4,2022-01-04,121,196,Peter,Wiedmann 6 | 5,2022-01-05,774,415,Andy,Wiedmann 7 | 6,2022-01-06,874,436,Nick,Wilfong 8 | 7,2022-01-07,995,500,Bryan,Curtis 9 | 8,2022-01-08,133,250,Brian,Wenck 10 | 9,2022-01-09,939,157,Eric,Petruska 11 | 10,2022-01-10,597,475,Carl,Carlson 12 | -------------------------------------------------------------------------------- /TestData.csv: -------------------------------------------------------------------------------- 1 | ID,Date,Cost,Weight,First Name,Last Name 2 | 1,2022-01-01,818,227,Kevin,Fultz 3 | 2,2022-01-02,777,259,Beth,Fultz 4 | 3,2022-01-03,493,461,Avery,Fultz 5 | 4,2022-01-04,121,196,Peter,Wiedmann 6 | 5,2022-01-05,774,415,Andy,Wiedmann 7 | 6,2022-01-06,874,436,Nick,Wilfong 8 | 7,2022-01-07,995,500,Bryan,Curtis 9 | 8,2022-01-08,133,250,Brian,Wenck 10 | 9,2022-01-09,939,157,Eric,Petruska 11 | 10,2022-01-10,597,475,Carl,Carlson -------------------------------------------------------------------------------- /TestDataConcat.csv: -------------------------------------------------------------------------------- 1 | ID,Date,Cost,Weight,First Name,Last Name 2 | 11,2022-01-01,20,34,Ben,Benny 3 | 12,2022-01-02,84,14,Kevin,Kenny 4 | 13,2022-01-03,44,8,Carl,McCarlson 5 | 14,2022-01-04,53,9,Jeff,Jeffery 6 | 15,2022-01-05,97,39,Steve,Stephenson 7 | 16,2022-01-06,95,66,Pat,Patrickman 8 | 17,2022-01-07,0,65,Brian,Briarson 9 | 18,2022-01-08,99,62,Eric,Ericson 10 | 19,2022-01-09,21,88,Ashley,Asherton 11 | 20,2022-01-10,66,60,Heather,Highman -------------------------------------------------------------------------------- /TestDataCommaSeparatedValue.csv: -------------------------------------------------------------------------------- 1 | ID,Date,Cost,Weight,First Name,Last Name 2 | 1,2022-01-01,818,227,Kevin,Fultz 3 | 2,2022-01-02,777,259,Beth,Fultz 4 | 3,2022-01-03,493,461,Avery,Fultz 5 | 4,2022-01-04,121,196,Peter,Wiedmann 6 | 5,2022-01-05,774,415,Andy,Wiedmann 7 | 6,2022-01-06,874,436,Nick,Wilfong 8 | 7,2022-01-07,995,500,Bryan,Curtis 9 | 8,2022-01-08,133,250,Brian,Wenck 10 | 9,2022-01-09,939,157,Eric,Petruska 11 | 10,2022-01-10,597,475,Carl,Carlson -------------------------------------------------------------------------------- /TestDataInnerDuplicate.csv: -------------------------------------------------------------------------------- 1 | ID,Date,Cost,Weight,First Name,Last Name 2 | 1,2022-01-01,818,227,Kevin,Fultz 3 | 2,2022-01-02,777,259,Beth,Fultz 4 | 3,2022-01-03,493,461,Avery,Fultz 5 | 4,2022-01-04,121,196,Peter,Wiedmann 6 | 5,2022-01-05,774,415,Andy,Wiedmann 7 | 6,2022-01-06,874,436,Nick,Wilfong 8 | 7,2022-01-07,995,500,Bryan,Curtis 9 | 8,2022-01-08,133,250,Brian,Wenck 10 | 9,2022-01-09,939,157,Eric,Petruska 11 | 9,2022-01-09,12345,6789,Eric,Petruska 12 | 10,2022-01-10,597,475,Carl,Carlson -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/kfultz07/go-dataframe 2 | 3 | go 1.22 4 | 5 | require github.com/aws/aws-sdk-go v1.44.57 6 | 7 | require ( 8 | github.com/jmespath/go-jmespath v0.4.0 // indirect 9 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect 10 | github.com/rivo/uniseg v0.4.7 // indirect 11 | github.com/schollz/progressbar/v3 v3.18.0 // indirect 12 | golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 // indirect 13 | golang.org/x/sys v0.29.0 // indirect 14 | golang.org/x/term v0.28.0 // indirect 15 | ) 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Kevin Fultz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /console.go: -------------------------------------------------------------------------------- 1 | package dataframe 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "time" 7 | ) 8 | 9 | func calculateSpaces(val string, maxColumnWidth int) string { 10 | valLength := len(val) 11 | 12 | if len(val)%2 != 0 { 13 | val += " " 14 | } 15 | 16 | if len(val) == maxColumnWidth { 17 | return "|" + val + "| ---> " + strconv.Itoa(valLength) 18 | } 19 | 20 | for len(val) < maxColumnWidth { 21 | val = " " + val + " " 22 | } 23 | 24 | return "|" + val + "| ---> " + strconv.Itoa(valLength) 25 | } 26 | 27 | func calculateMaxColumnWidth(headers []string) int { 28 | maxWidth := len(headers[0]) 29 | 30 | for _, each := range headers { 31 | if len(each) > maxWidth { 32 | maxWidth = len(each) 33 | } 34 | } 35 | return maxWidth 36 | } 37 | 38 | // Dynamically generate the column headers for the table. 39 | func generateTableColumns(headers []string, maxColumnWidth int) string { 40 | var head string 41 | var columnCount int 42 | 43 | for _, h := range headers { 44 | val := calculateSpaces(h, maxColumnWidth) 45 | head += val + "\n" 46 | columnCount++ 47 | } 48 | 49 | head = "\n" + head 50 | 51 | border := " " 52 | 53 | for i := 0; i < maxColumnWidth; i++ { 54 | border += "-" 55 | } 56 | 57 | head = "Column Count: " + strconv.Itoa(columnCount) + "\n" + border + head + border 58 | 59 | return head 60 | } 61 | 62 | // Method to print all columns in a viewable table within the terminal. 63 | func (frame DataFrame) ViewColumns() { 64 | var columns []string 65 | 66 | // Add columns in order from map. 67 | for i := 0; i < len(frame.Headers); i++ { 68 | for k, v := range frame.Headers { 69 | if v == i { 70 | columns = append(columns, k) 71 | } 72 | } 73 | } 74 | 75 | maxColumnWidth := calculateMaxColumnWidth(columns) 76 | 77 | head := generateTableColumns(columns, maxColumnWidth) 78 | fmt.Println(head) 79 | } 80 | 81 | func loading(quit <-chan bool) { 82 | char := []string{ 83 | "| L", 84 | "/ LO", 85 | "- LOA", 86 | "\\ LOAD", 87 | "| LOADI", 88 | "/ LOADIN", 89 | "- LOADING", 90 | "\\ LOADING.", 91 | "| LOADING..", 92 | "/ LOADING...", 93 | "- ", 94 | } 95 | 96 | for { 97 | select { 98 | case <-quit: 99 | fmt.Printf("\r") 100 | return 101 | default: 102 | for _, c := range char { 103 | fmt.Printf("\r%s", c) 104 | time.Sleep(time.Millisecond * 75) 105 | } 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/aws/aws-sdk-go v1.44.57 h1:Dx1QD+cA89LE0fVQWSov22tpnTa0znq2Feyaa/myVjg= 2 | github.com/aws/aws-sdk-go v1.44.57/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= 3 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 4 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= 6 | github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= 7 | github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= 8 | github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= 9 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= 10 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= 11 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 12 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 13 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 14 | github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= 15 | github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 16 | github.com/schollz/progressbar/v3 v3.18.0 h1:uXdoHABRFmNIjUfte/Ex7WtuyVslrw2wVPQmCN62HpA= 17 | github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec= 18 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 19 | golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ= 20 | golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8= 21 | golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd h1:O7DYs+zxREGLKzKoMQrtrEacpb0ZVXA5rIwylE2Xchk= 22 | golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= 23 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 24 | golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 25 | golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= 26 | golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 27 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 28 | golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg= 29 | golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek= 30 | golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= 31 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 32 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 33 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 34 | gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= 35 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 36 | -------------------------------------------------------------------------------- /aws_tooling.go: -------------------------------------------------------------------------------- 1 | package dataframe 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "strings" 9 | 10 | "github.com/aws/aws-sdk-go/aws" 11 | "github.com/aws/aws-sdk-go/aws/session" 12 | "github.com/aws/aws-sdk-go/service/s3" 13 | "github.com/aws/aws-sdk-go/service/s3/s3manager" 14 | ) 15 | 16 | func CreateDataFrameFromAwsS3(path, item, bucket, region, awsAccessKey, awsSecretKey string) (DataFrame, error) { 17 | switch { 18 | case !strings.Contains(item, ".csv"): 19 | return DataFrame{}, errors.New("create dataframe from aws s3: only csv files are currently supported") 20 | case len(path) == 0: 21 | return DataFrame{}, errors.New("create dataframe from aws s3: must provide a path") 22 | case len(item) == 0: 23 | return DataFrame{}, errors.New("create dataframe from aws s3: must provide a file name") 24 | case len(bucket) == 0: 25 | return DataFrame{}, errors.New("create dataframe from aws s3: must provide a bucket name") 26 | case len(region) == 0: 27 | return DataFrame{}, errors.New("create dataframe from aws s3: must provide a region") 28 | case len(awsAccessKey) == 0: 29 | return DataFrame{}, errors.New("create dataframe from aws s3: must provide an access key") 30 | case len(awsSecretKey) == 0: 31 | return DataFrame{}, errors.New("create dataframe from aws s3: must provide a secret key") 32 | } 33 | 34 | // Set environment variables. 35 | os.Setenv("AWS_ACCESS_KEY", awsAccessKey) 36 | os.Setenv("AWS_SECRET_KEY", awsSecretKey) 37 | 38 | // Create path. 39 | filePath, err := filepath.Abs(path + item) 40 | if err != nil { 41 | return DataFrame{}, err 42 | } 43 | 44 | // Create file. 45 | file, err := os.Create(filePath) 46 | if err != nil { 47 | return DataFrame{}, fmt.Errorf("create dataframe from aws s3: error creating the file '%s'", err) 48 | } 49 | defer file.Close() 50 | 51 | // Initialize an AWS session. 52 | sess, err := session.NewSession(&aws.Config{ 53 | Region: aws.String(region)}, 54 | ) 55 | if err != nil { 56 | return DataFrame{}, errors.New("create dataframe from aws s3: error initializing session") 57 | } 58 | 59 | // Download file from AWS 60 | downloader := s3manager.NewDownloader(sess) 61 | 62 | numBytes, err := downloader.Download(file, &s3.GetObjectInput{Bucket: aws.String(bucket), Key: aws.String(item)}) 63 | if err != nil { 64 | return DataFrame{}, fmt.Errorf("create dataframe from aws s3: error downloading file '%s'", err) 65 | } 66 | 67 | fmt.Println("Downloaded", file.Name(), numBytes, "bytes") 68 | 69 | df := CreateDataFrame(path, item) 70 | 71 | return df, nil 72 | } 73 | 74 | func UploadFileToAwsS3(path, filename, bucket, region string) error { 75 | // Check user entries 76 | if path[len(path)-1:] != "/" { 77 | path = path + "/" 78 | } 79 | 80 | // Initialize an AWS session. 81 | sess, err := session.NewSession(&aws.Config{Region: aws.String(region)}) 82 | if err != nil { 83 | return fmt.Errorf("upload file to s3: error initializing session '%s'", err) 84 | } 85 | 86 | // Create an uploader with the session and default options 87 | uploader := s3manager.NewUploader(sess) 88 | 89 | f, err := os.Open(path + filename) 90 | if err != nil { 91 | return errors.New("upload file to s3: failed to open file") 92 | } 93 | 94 | // Upload the file to S3. 95 | _, err = uploader.Upload(&s3manager.UploadInput{ 96 | Bucket: aws.String(bucket), 97 | Key: aws.String(filename), 98 | Body: f, 99 | }) 100 | if err != nil { 101 | return errors.New("upload file to s3: failed to upload file to aws s3") 102 | } 103 | return nil 104 | } 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-dataframe 2 | A simple package to abstract away the process of creating usable DataFrames for data analytics. This package is heavily inspired by the amazing Python library, Pandas. 3 | 4 | # Generate DataFrame 5 | Utilize the CreateDataFrame function to create a DataFrame from an existing CSV file or create an empty DataFrame with the CreateNewDataFrame function. The user can then iterate over the DataFrame to perform the intended tasks. All data in the DataFrame is a string by default. There are various methods to provide additional functionality including: converting data types, update values, filter, concatenate, and more. Please use the below examples or explore the code to learn more. 6 | 7 | # Import Package 8 | ```go 9 | import ( 10 | "fmt" 11 | 12 | dataframe "github.com/kfultz07/go-dataframe" 13 | ) 14 | ``` 15 | 16 | # Load CSV into DataFrame, create a new field, and save 17 | ```go 18 | path := "/Users/Name/Desktop/" 19 | 20 | // Create the DataFrame 21 | df := dataframe.CreateDataFrame(path, "TestData.csv") 22 | 23 | // Create new field 24 | df.NewField("CWT") 25 | 26 | // Iterate over DataFrame 27 | for _, row := range df.FrameRecords { 28 | cost := row.ConvertToFloat("Cost", df.Headers) 29 | weight := row.ConvertToFloat("Weight", df.Headers) 30 | 31 | // Results must be converted back to string 32 | result := fmt.Sprintf("%f", cwt(cost, weight)) 33 | 34 | // Update the row 35 | row.Update("CWT", result, df.Headers) 36 | } 37 | 38 | df.SaveDataFrame(path, "NewFileName") 39 | ``` 40 | 41 | # Bulk Upload to MySQL Database 42 | Bulk insert rows into an MySQL database. The rowsPerBatch indicates the threshold of rows to be inserted in each batch. The tableColumns slice must contain the same columns (in the same order) as are found in the MySQL table being uploaded to. 43 | ```go 44 | rowsPerBatch := 1000 45 | tableColumns := []string{"col_1", "col_2", "col_3"} 46 | 47 | if err := df.BulkUploadMySql(db, rowsPerBatch, tableColumns, "table_name"); err != nil { 48 | return log.Fatal(err) 49 | } 50 | ``` 51 | 52 | # Concurrently load multiple CSV files into DataFrames 53 | Tests performed utilized four files with a total of 5,746,452 records and a varing number of columns. Results indicated an average total load time of 8.81 seconds when loaded sequentially and 4.06 seconds when loaded concurrently utilizing the LoadFrames function. An overall 54% speed improvement. Files must all be in the same directory. Results are returned in a 54 | slice in the same order as provided in the files parameter. 55 | ```go 56 | filePath := "/Users/Name/Desktop/" 57 | files := []string{ 58 | "One.csv", 59 | "Two.csv", 60 | "Three.csv", 61 | "Four.csv", 62 | "Five.csv", 63 | } 64 | 65 | results, err := LoadFrames(filePath, files) 66 | if err != nil { 67 | log.Fatal(err) 68 | } 69 | 70 | dfOne := results[0] 71 | dfTwo := results[1] 72 | dfThree := results[2] 73 | dfFour := results[3] 74 | dfFive := results[4] 75 | ``` 76 | 77 | # Stream CSV data 78 | Stream rows of data from a csv file to be processed. Streaming data is preferred when dealing with large files and memory usage needs to be considered. Results are streamed via a channel with a StreamingRecord type. A struct with only desired fields could be created and either operated on sequentially or stored in a slice for later use. 79 | ```go 80 | type Product struct { 81 | name string 82 | cost float64 83 | weight float64 84 | } 85 | 86 | func (p Product) CostPerLb() float64 { 87 | if p.weight == 0.0 { 88 | return 0.0 89 | } 90 | return p.cost / p.weight 91 | } 92 | 93 | filePath := "/Users/Name/Desktop/" 94 | 95 | var products []Product 96 | 97 | c := make(chan StreamingRecord) 98 | go Stream(filePath, "TestData.csv", c) 99 | 100 | for row := range c { 101 | prod := Product{ 102 | name: row.Val("Name"), 103 | cost: row.ConvertToFloat("Cost"), 104 | weight: row.ConvertToInt("Weight"), 105 | } 106 | products = append(products, prod) 107 | } 108 | ``` 109 | 110 | # Divide and Conquer 111 | A method that breaks a DataFrame down into smaller sub-frames. This functionality enables the user to process data in the sub-frames concurrently utilizing a worker pool or some other concurrent design pattern. The user provides the number desired sub-frames and the method returns a slice of DataFrames along with an error. 112 | 113 | An average 66% speed improvement was achieved when testing a CSV file with 5M+ rows and four concurrent workers. 114 | ```go 115 | // Total values in Charge column and sleep for 5 microseconds to simulate expensive processing. 116 | func worker(df dataframe.DataFrame, results chan<- string) { 117 | total := 0.0 118 | for _, row := range df.FrameRecords { 119 | total += row.ConvertToFloat("Charge", df.Headers) 120 | time.Sleep(time.Microsecond * 5) 121 | } 122 | results <- fmt.Sprintf("%f", total) 123 | } 124 | 125 | // Create sub-frames using the DivideAndConquer method. 126 | frames, err := df.DivideAndConquer(5) 127 | if err != nil { 128 | panic(err) 129 | } 130 | 131 | // Spin-up worker pool. 132 | for _, frame := range frames { 133 | go worker(frame, results) 134 | } 135 | 136 | // Print results from channel. 137 | for i := 0; i < numJobs; i++ { 138 | fmt.Println(<-results) 139 | } 140 | ``` 141 | 142 | # AWS S3 Cloud Storage 143 | ```go 144 | // Download a DataFrame from an S3 bucket 145 | path := "/Users/Name/Desktop/" // File path 146 | fileName := "FileName.csv" // File in AWS Bucket must be .csv 147 | bucketName := "BucketName" // Name of the bucket 148 | bucketRegion := "BucketRegion" // Can be found in the Properties tab in the S3 console (ex. us-west-1) 149 | awsAccessKey := "AwsAccessKey" // Access keys can be loaded from environment variables within you program 150 | awsSecretKey := "AwsSecretKey" 151 | df, err := CreateDataFrameFromAwsS3(path, fileName, bucketName, bucketRegion, awsAccessKey, awsSecretKey) 152 | if err != nil { 153 | panic(err) 154 | } 155 | 156 | // Upload a file to an S3 bucket 157 | err := UploadFileToAwsS3(path, fileName, bucket, region) 158 | if err != nil { 159 | panic(err) 160 | } 161 | ``` 162 | 163 | # Various methods to filter DataFrames 164 | ```go 165 | // Variadic methods that generate a new DataFrame 166 | dfFil := df.Filtered("Last Name", "McCarlson", "Benison", "Stephenson") 167 | dfFil := df.Exclude("Last Name", "McCarlson", "Benison", "Stephenson") 168 | 169 | // Keep only specific columns 170 | columns := [2]string{"First Name", "Last Name"} 171 | dfFil := df.KeepColumns(columns[:]) 172 | 173 | // Remove multiple columns 174 | dfFil := df.RemoveColumns("ID", "Cost", "First Name") 175 | 176 | // Remove a single column 177 | dfFil := df.RemoveColumns("First Name") 178 | 179 | // Filter before, after, or between specified dates 180 | dfFil := df.FilteredAfter("Date", "2022-12-31") 181 | dfFil := df.FilteredBefore("Date", "2022-12-31") 182 | dfFil := df.FilteredBetween("Date", "2022-01-01", "2022-12-31") 183 | 184 | // Filter a numerical column based on a provided value 185 | df, err := df.GreaterThanOrEqualTo("Cost", float64(value)) 186 | if err != nil { 187 | panic(err) 188 | } 189 | 190 | df, err := df.LessThanOrEqualTo("Weight", float64(value)) 191 | if err != nil { 192 | panic(err) 193 | } 194 | ``` 195 | 196 | # Sort DataFrame 197 | ```go 198 | // Sort specified column in either ascending or descending order. 199 | err := df.Sort("Cost", true) 200 | if err != nil { 201 | panic("Sort Error: ", err) 202 | } 203 | ``` 204 | 205 | # Add record to DataFrame and later update 206 | ```go 207 | // Add a new record 208 | data := [6]string{"11", "2022-01-01", "123", "456", "Kevin", "Kevison"} 209 | df = df.AddRecord(data[:]) 210 | 211 | // Update a value 212 | for _, row := range df.FrameRecords { 213 | // row.Val() is used to extract the value in a specific column while iterating 214 | if row.Val("Last Name", df.Headers) == "McPoyle" { 215 | row.Update("Last Name", "SchmicMcPoyle", df.Headers) 216 | } 217 | } 218 | ``` 219 | 220 | # Concatenate DataFrames 221 | ```go 222 | // ConcatFrames uses a pointer to the DataFrame being appended. 223 | // Both DataFrames must have the same columns in the same order. 224 | df, err := df.ConcatFrames(&dfFil) 225 | if err != nil { 226 | panic("ConcatFrames Error: ", err) 227 | } 228 | ``` 229 | 230 | # Rename a Column 231 | ```go 232 | // Rename an existing column in a DataFrame 233 | // First parameter provides the original column name to be updated. 234 | // The next parameter is the desired new name. 235 | err := df.Rename("Weight", "Total Weight") 236 | if err != nil { 237 | panic("Rename Column Error: ", err) 238 | } 239 | ``` 240 | 241 | # Merge two DataFrames 242 | ```go 243 | df := CreateDataFrame(path, "TestData.csv") 244 | dfRight := CreateDataFrame(path, "TestDataRight.csv") 245 | 246 | // Merge all columns found in right DataFrame into left DataFrame. 247 | // User provides the lookup column with the unique values that link the two DataFrames. 248 | err := df.Merge(&dfRight, "ID") 249 | if err != nil { 250 | panic(err) 251 | } 252 | 253 | // Merge only specified columns from right DataFrame into left DataFrame. 254 | // User provides columns immediately after the lookup column. 255 | err := df.Merge(&dfRight, "ID", "City", "State") 256 | if err != nil { 257 | panic(err) 258 | } 259 | 260 | // Inner merge all columns on a specified primary key. 261 | // Results will only include records where the primary key is found in both DataFrames. 262 | df, err := df.InnerMerge(&dfRight, "ID") 263 | if err != nil { 264 | panic(err) 265 | } 266 | ``` 267 | 268 | # Various Tools 269 | ```go 270 | // Total rows 271 | total := df.CountRecords() 272 | 273 | // Returns a slice of all unique values in a specified column 274 | lastNames := df.Unique("Last Name") 275 | 276 | // Print all columns to console 277 | df.ViewColumns() 278 | 279 | // Returns a slice of all columns in order 280 | foundColumns := df.Columns() 281 | 282 | // Generates a decoupled copy of an existing DataFrame. 283 | // Changes made in one DataFrame will not be reflected in the other. 284 | df2 := df.Copy() 285 | ``` 286 | 287 | # Mathematics 288 | ```go 289 | // Sum a numerical column 290 | sum := df.Sum("Cost") 291 | 292 | // Average a numerical column 293 | average := df.Average("Weight") 294 | 295 | // Min or Max of a numerical column 296 | minimum := df.Min("Cost") 297 | maximum := df.Max("Cost") 298 | 299 | // Calculate the standard deviation of a numerical column 300 | stdev, err := df.StandardDeviation("Cost") 301 | if err != nil { 302 | panic(err) 303 | } 304 | ``` -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package dataframe 2 | 3 | import ( 4 | "database/sql" 5 | "encoding/csv" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "log" 10 | "math" 11 | "os" 12 | "path/filepath" 13 | "sort" 14 | "strconv" 15 | "strings" 16 | "time" 17 | 18 | progressbar "github.com/schollz/progressbar/v3" 19 | "golang.org/x/exp/slices" 20 | ) 21 | 22 | type Record struct { 23 | Data []string 24 | } 25 | 26 | type DataFrame struct { 27 | FrameRecords []Record 28 | Headers map[string]int 29 | } 30 | 31 | type StreamingRecord struct { 32 | Data []string 33 | Headers map[string]int 34 | } 35 | 36 | // Return the value of the specified field. 37 | func (x StreamingRecord) Val(fieldName string) string { 38 | if _, ok := x.Headers[fieldName]; !ok { 39 | panic(fmt.Errorf("provided field '%s' is not a valid field in the dataframe", fieldName)) 40 | } 41 | return x.Data[x.Headers[fieldName]] 42 | } 43 | 44 | // Converts the value from a string to float64 45 | func (x StreamingRecord) ConvertToFloat(fieldName string) float64 { 46 | value, err := strconv.ParseFloat(x.Val(fieldName), 64) 47 | if err != nil { 48 | log.Fatalf("could not convert to float64: %v", err) 49 | } 50 | return value 51 | } 52 | 53 | // Converts the value from a string to int64 54 | func (x StreamingRecord) ConvertToInt(fieldName string) int64 { 55 | value, err := strconv.ParseInt(x.Val(fieldName), 0, 64) 56 | if err != nil { 57 | log.Fatalf("could not convert to int64: %v", err) 58 | } 59 | return value 60 | } 61 | 62 | // Generate a new empty DataFrame. 63 | func CreateNewDataFrame(headers []string) DataFrame { 64 | myRecords := []Record{} 65 | theHeaders := make(map[string]int) 66 | 67 | // Add headers to map in correct order 68 | for i := 0; i < len(headers); i++ { 69 | theHeaders[headers[i]] = i 70 | } 71 | 72 | newFrame := DataFrame{FrameRecords: myRecords, Headers: theHeaders} 73 | 74 | return newFrame 75 | } 76 | 77 | // Generate a new DataFrame sourced from a csv file. 78 | func CreateDataFrame(path, fileName string) DataFrame { 79 | if !strings.Contains(fileName, ".csv") && !strings.Contains(fileName, ".CSV") { 80 | fileName = fileName + ".csv" 81 | } 82 | 83 | // Open the CSV file 84 | recordFile, err := os.Open(filepath.Join(path, fileName)) 85 | if err != nil { 86 | log.Fatalf("error opening file: please ensure the path and filename are correct: %v", err) 87 | } 88 | 89 | // Setup the reader 90 | reader := csv.NewReader(recordFile) 91 | 92 | // Read the records 93 | header, err := reader.Read() 94 | if err != nil { 95 | log.Fatalf("error reading the records: %v", err) 96 | } 97 | 98 | // Remove Byte Order Marker for UTF-8 files 99 | for i, each := range header { 100 | byteSlice := []byte(each) 101 | 102 | if len(byteSlice) < 3 { 103 | continue 104 | } 105 | 106 | if byteSlice[0] == 239 && byteSlice[1] == 187 && byteSlice[2] == 191 { 107 | header[i] = each[3:] 108 | } 109 | } 110 | 111 | headers := make(map[string]int) 112 | for i, columnName := range header { 113 | headers[columnName] = i 114 | } 115 | 116 | // Empty slice to store Records 117 | s := []Record{} 118 | 119 | // Loop over the records and create Record objects to be stored 120 | for i := 0; ; i++ { 121 | record, err := reader.Read() 122 | if err == io.EOF { 123 | break 124 | } else if err != nil { 125 | log.Fatalf("error in record loop: %v", err) 126 | } 127 | // Create new Record 128 | x := Record{Data: []string{}} 129 | 130 | // Add to Data field of Record struct 131 | x.Data = append(x.Data, record...) 132 | s = append(s, x) 133 | } 134 | newFrame := DataFrame{FrameRecords: s, Headers: headers} 135 | return newFrame 136 | } 137 | 138 | // Stream rows of data from a csv file to be processed. Streaming data is preferred when dealing with large files 139 | // and memory usage needs to be considered. Results are streamed via a channel with a StreamingRecord type. 140 | func Stream(path, fileName string, c chan StreamingRecord) { 141 | defer close(c) 142 | 143 | if !strings.Contains(fileName, ".csv") && !strings.Contains(fileName, ".CSV") { 144 | fileName = fileName + ".csv" 145 | } 146 | 147 | // Open the CSV file 148 | recordFile, err := os.Open(filepath.Join(path, fileName)) 149 | if err != nil { 150 | log.Fatalf("error opening the file: please ensure the path and filename are correct: %v", err) 151 | } 152 | 153 | // Setup the reader 154 | reader := csv.NewReader(recordFile) 155 | 156 | // Read the records 157 | header, err := reader.Read() 158 | if err != nil { 159 | log.Fatalf("error reading the records: %v", err) 160 | } 161 | 162 | // Remove Byte Order Marker for UTF-8 files 163 | for i, each := range header { 164 | byteSlice := []byte(each) 165 | 166 | if len(byteSlice) < 3 { 167 | continue 168 | } 169 | 170 | if byteSlice[0] == 239 && byteSlice[1] == 187 && byteSlice[2] == 191 { 171 | header[i] = each[3:] 172 | } 173 | } 174 | 175 | headers := make(map[string]int) 176 | for i, columnName := range header { 177 | headers[columnName] = i 178 | } 179 | 180 | // Loop over the records and create Record objects to be stored 181 | for i := 0; ; i++ { 182 | record, err := reader.Read() 183 | if err == io.EOF { 184 | break 185 | } else if err != nil { 186 | log.Fatalf("error in record loop: %v", err) 187 | } 188 | // Create new Record 189 | x := StreamingRecord{Headers: headers} 190 | 191 | // Loop over records and add to Data field of Record struct 192 | x.Data = append(x.Data, record...) 193 | c <- x 194 | } 195 | } 196 | 197 | func worker(jobs <-chan string, results chan<- DataFrame, resultsNames chan<- string, filePath string) { 198 | for n := range jobs { 199 | df := CreateDataFrame(filePath, n) 200 | results <- df 201 | resultsNames <- n 202 | } 203 | } 204 | 205 | // Concurrently loads multiple csv files into DataFrames within the same directory. 206 | // Returns a slice with the DataFrames in the same order as provided in the files parameter. 207 | func LoadFrames(filePath string, files []string) ([]DataFrame, error) { 208 | numJobs := len(files) 209 | 210 | if numJobs <= 1 { 211 | return nil, errors.New("LoadFrames requires at least two files") 212 | } 213 | 214 | jobs := make(chan string, numJobs) 215 | results := make(chan DataFrame, numJobs) 216 | resultsNames := make(chan string, numJobs) 217 | 218 | // Generate workers 219 | for i := 0; i < 4; i++ { 220 | go worker(jobs, results, resultsNames, filePath) 221 | } 222 | 223 | // Load up the jobs channel 224 | for i := 0; i < numJobs; i++ { 225 | jobs <- files[i] 226 | } 227 | close(jobs) // Close jobs channel once loaded 228 | 229 | // Map to store results 230 | jobResults := make(map[string]DataFrame) 231 | 232 | // Collect results and store in map 233 | for i := 1; i <= numJobs; i++ { 234 | jobResults[<-resultsNames] = <-results 235 | } 236 | 237 | var orderedResults []DataFrame 238 | for _, f := range files { 239 | val, ok := jobResults[f] 240 | if !ok { 241 | return []DataFrame{}, errors.New("error occurred while looking up returned DataFrame in the LoadFrames function") 242 | } 243 | orderedResults = append(orderedResults, val) 244 | } 245 | return orderedResults, nil 246 | } 247 | 248 | // Calculates number of records to include in each subframe. 249 | func getRowsPerSubframe(rowCount, requestedSubFrames int) (int, error) { 250 | if requestedSubFrames == 0 { 251 | return 0, errors.New("requested Sub Frames in DivideAndConquer cannot be zero") 252 | } 253 | if requestedSubFrames > rowCount { 254 | return 0, errors.New("requested Sub Frames in DivideAndConquer cannot be greater than size of dataframe") 255 | } 256 | if rowCount == 0 { 257 | return 0, errors.New("empty dataframe") 258 | } 259 | return rowCount / requestedSubFrames, nil 260 | } 261 | 262 | // Breaks down a DataFrame into smaller sub-frames to process data concurrently. 263 | // RequestedSubFrame parameter provided by the user are the number of subframes they would like returned. 264 | func (frame DataFrame) DivideAndConquer(requestedSubFrames int) ([]DataFrame, error) { 265 | frameSize := frame.CountRecords() 266 | 267 | rowsPerSubframe, err := getRowsPerSubframe(frameSize, requestedSubFrames) 268 | if err != nil { 269 | return []DataFrame{}, err 270 | } 271 | 272 | pos := 0 273 | var frames []DataFrame 274 | 275 | // Process each subframe. 276 | for requestedSubFrames > 0 { 277 | dfNew := CreateNewDataFrame(frame.Columns()) 278 | 279 | // When on last subframe. 280 | if requestedSubFrames == 1 { 281 | for pos < frameSize { 282 | dfNew = dfNew.AddRecord(frame.FrameRecords[pos].Data) 283 | pos++ 284 | } 285 | } else { 286 | for i := 0; i < rowsPerSubframe; i++ { 287 | dfNew = dfNew.AddRecord(frame.FrameRecords[pos].Data) 288 | pos++ 289 | } 290 | } 291 | frames = append(frames, dfNew) 292 | requestedSubFrames-- 293 | } 294 | 295 | return frames, nil 296 | } 297 | 298 | func (frame DataFrame) BulkUploadMySql(db *sql.DB, rowsPerBatch int, tableColumns []string, table string) error { 299 | if db == nil { 300 | return errors.New("bulk upload error: database nil pointer") 301 | } 302 | if rowsPerBatch < 1 { 303 | rowsPerBatch = 1000 304 | } 305 | if len(tableColumns) == 0 { 306 | return errors.New("bulk upload error: must provide columns") 307 | } 308 | if len(table) == 0 { 309 | return errors.New("bulk upload error: must provide a table name") 310 | } 311 | 312 | frameColumns := frame.Columns() 313 | 314 | if len(tableColumns) != len(frameColumns) { 315 | return errors.New("bulk upload error: the provided columns do not match dataframe") 316 | } 317 | 318 | var cnt int 319 | bulkData := [][]interface{}{} 320 | bar := progressbar.Default(int64(len(frame.FrameRecords))) 321 | 322 | for _, row := range frame.FrameRecords { 323 | data := []interface{}{} 324 | for _, colData := range frameColumns { 325 | data = append(data, row.Val(colData, frame.Headers)) 326 | } 327 | bulkData = append(bulkData, data) 328 | bar.Add(1) 329 | cnt++ 330 | 331 | if cnt == rowsPerBatch { 332 | if err := insertRows(db, bulkData, table, tableColumns); err != nil { 333 | return fmt.Errorf("bulk upload error: inserting records: %v", err) 334 | } 335 | cnt = 0 336 | bulkData = nil 337 | } 338 | } 339 | 340 | // Insert remaining rows that did not hit upload threshold. 341 | if len(bulkData) > 0 { 342 | if err := insertRows(db, bulkData, table, tableColumns); err != nil { 343 | return fmt.Errorf("bulk upload error: inserting records: %v", err) 344 | } 345 | } 346 | 347 | return nil 348 | } 349 | 350 | // Bulk insert rows into a specified table. 351 | func insertRows(db *sql.DB, bulkData [][]interface{}, table string, columns []string) error { 352 | sqlStr := "INSERT INTO `" + table + "`(" 353 | 354 | // Add all columns to the SQL statement. 355 | for _, col := range columns { 356 | sqlStr += "`" + col + "`," 357 | } 358 | // Trim the end to remove comma and add additional SQL. 359 | sqlStr = sqlStr[0:len(sqlStr)-1] + ") VALUES " 360 | 361 | vals := []interface{}{} 362 | 363 | for _, data := range bulkData { 364 | sqlStr += "(" 365 | 366 | // Add "?," for each of the columns. 367 | for i := 0; i < len(columns); i++ { 368 | sqlStr += "?," 369 | } 370 | // Trim comma at the end. 371 | sqlStr = sqlStr[0 : len(sqlStr)-1] 372 | 373 | sqlStr += ")," 374 | vals = append(vals, data...) 375 | } 376 | 377 | // Trim the end to remove comma. 378 | sqlStr = sqlStr[0 : len(sqlStr)-1] 379 | 380 | stmt, err := db.Prepare(sqlStr) 381 | if err != nil { 382 | return err 383 | } 384 | _, err = stmt.Exec(vals...) 385 | if err != nil { 386 | return err 387 | } 388 | defer stmt.Close() 389 | return nil 390 | } 391 | 392 | // User specifies columns they want to keep from a preexisting DataFrame 393 | func (frame DataFrame) KeepColumns(columns []string) DataFrame { 394 | df := CreateNewDataFrame(columns) 395 | 396 | for _, row := range frame.FrameRecords { 397 | var newData []string 398 | for _, column := range columns { 399 | newData = append(newData, row.Val(column, frame.Headers)) 400 | } 401 | df = df.AddRecord(newData) 402 | } 403 | 404 | return df 405 | } 406 | 407 | // User specifies columns they want to remove from a preexisting DataFrame 408 | func (frame DataFrame) RemoveColumns(columns ...string) DataFrame { 409 | approvedColumns := []string{} 410 | 411 | for _, col := range frame.Columns() { 412 | if !slices.Contains(columns, col) { 413 | approvedColumns = append(approvedColumns, col) 414 | } 415 | } 416 | 417 | return frame.KeepColumns(approvedColumns) 418 | } 419 | 420 | // Rename a specified column in the DataFrame 421 | func (frame *DataFrame) Rename(originalColumnName, newColumnName string) error { 422 | columns := []string{} 423 | var columnLocation int 424 | 425 | for k, v := range frame.Headers { 426 | columns = append(columns, k) 427 | if k == originalColumnName { 428 | columnLocation = v 429 | } 430 | } 431 | 432 | // Check original column name is found in DataFrame 433 | if !slices.Contains(columns, originalColumnName) { 434 | return errors.New("the original column name provided was not found in the DataFrame") 435 | } 436 | 437 | // Check new column name does not already exist 438 | if slices.Contains(columns, newColumnName) { 439 | return errors.New("the provided new column name already exists in the DataFrame and is not allowed") 440 | } 441 | 442 | // Remove original column name 443 | delete(frame.Headers, originalColumnName) 444 | 445 | // Add new column name 446 | frame.Headers[newColumnName] = columnLocation 447 | 448 | return nil 449 | } 450 | 451 | // Add a new record to the DataFrame 452 | func (frame DataFrame) AddRecord(newData []string) DataFrame { 453 | x := Record{Data: []string{}} 454 | x.Data = append(x.Data, newData...) 455 | frame.FrameRecords = append(frame.FrameRecords, x) 456 | return frame 457 | } 458 | 459 | // Provides a slice of columns in order 460 | func (frame DataFrame) Columns() []string { 461 | var columns []string 462 | 463 | for i := 0; i < len(frame.Headers); i++ { 464 | for k, v := range frame.Headers { 465 | if v == i { 466 | columns = append(columns, k) 467 | } 468 | } 469 | } 470 | return columns 471 | } 472 | 473 | // Generates a decoupled copy of an existing DataFrame. 474 | // Changes made to either the original or new copied frame 475 | // will not be reflected in the other. 476 | func (frame DataFrame) Copy() DataFrame { 477 | headers := []string{} 478 | 479 | for i := 0; i < len(frame.Headers); i++ { 480 | for k, v := range frame.Headers { 481 | if v == i { 482 | headers = append(headers, k) 483 | } 484 | } 485 | } 486 | df := CreateNewDataFrame(headers) 487 | 488 | for i := 0; i < len(frame.FrameRecords); i++ { 489 | df = df.AddRecord(frame.FrameRecords[i].Data) 490 | } 491 | return df 492 | } 493 | 494 | func (frame DataFrame) NumericColumn(fieldName string) bool { 495 | for _, row := range frame.FrameRecords { 496 | _, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64) 497 | if err != nil { 498 | return false 499 | } 500 | } 501 | return true 502 | } 503 | 504 | func (frame *DataFrame) Sort(fieldName string, ascending bool) error { 505 | // Ensure provided column exists. 506 | val, ok := frame.Headers[fieldName] 507 | if !ok { 508 | return errors.New("the provided column to sort does not exist") 509 | } 510 | 511 | // Converts provided value to float64 if column is numeric. 512 | if frame.NumericColumn(fieldName) { 513 | if ascending { 514 | sort.Slice(frame.FrameRecords, func(i, j int) bool { 515 | iVal, _ := strconv.ParseFloat(frame.FrameRecords[i].Data[val], 64) 516 | jVal, _ := strconv.ParseFloat(frame.FrameRecords[j].Data[val], 64) 517 | return iVal < jVal 518 | }) 519 | return nil 520 | } 521 | sort.Slice(frame.FrameRecords, func(i, j int) bool { 522 | iVal, _ := strconv.ParseFloat(frame.FrameRecords[i].Data[val], 64) 523 | jVal, _ := strconv.ParseFloat(frame.FrameRecords[j].Data[val], 64) 524 | return iVal > jVal 525 | }) 526 | return nil 527 | } 528 | 529 | if ascending { 530 | sort.Slice(frame.FrameRecords, func(i, j int) bool { 531 | return frame.FrameRecords[i].Data[val] < frame.FrameRecords[j].Data[val] 532 | }) 533 | return nil 534 | } 535 | sort.Slice(frame.FrameRecords, func(i, j int) bool { 536 | return frame.FrameRecords[i].Data[val] > frame.FrameRecords[j].Data[val] 537 | }) 538 | return nil 539 | } 540 | 541 | // Generates a new filtered DataFrame. 542 | // New DataFrame will be kept in same order as original. 543 | func (frame DataFrame) Filtered(fieldName string, value ...string) DataFrame { 544 | headers := []string{} 545 | 546 | for i := 0; i < len(frame.Headers); i++ { 547 | for k, v := range frame.Headers { 548 | if v == i { 549 | headers = append(headers, k) 550 | } 551 | } 552 | } 553 | newFrame := CreateNewDataFrame(headers) 554 | 555 | for i := 0; i < len(frame.FrameRecords); i++ { 556 | if slices.Contains(value, frame.FrameRecords[i].Data[frame.Headers[fieldName]]) { 557 | newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data) 558 | } 559 | } 560 | 561 | return newFrame 562 | } 563 | 564 | // Generated a new filtered DataFrame that in which a numerical column is either greater than or equal to 565 | // a provided numerical value. 566 | func (frame DataFrame) GreaterThanOrEqualTo(fieldName string, value float64) (DataFrame, error) { 567 | headers := []string{} 568 | 569 | for i := 0; i < len(frame.Headers); i++ { 570 | for k, v := range frame.Headers { 571 | if v == i { 572 | headers = append(headers, k) 573 | } 574 | } 575 | } 576 | newFrame := CreateNewDataFrame(headers) 577 | 578 | for i, row := range frame.FrameRecords { 579 | valString := row.Val(fieldName, frame.Headers) 580 | 581 | val, err := strconv.ParseFloat(valString, 64) 582 | if err != nil { 583 | return CreateNewDataFrame([]string{}), err 584 | } 585 | 586 | if val >= value { 587 | newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data) 588 | } 589 | } 590 | return newFrame, nil 591 | } 592 | 593 | // Generated a new filtered DataFrame that in which a numerical column is either less than or equal to 594 | // a provided numerical value. 595 | func (frame DataFrame) LessThanOrEqualTo(fieldName string, value float64) (DataFrame, error) { 596 | headers := []string{} 597 | 598 | for i := 0; i < len(frame.Headers); i++ { 599 | for k, v := range frame.Headers { 600 | if v == i { 601 | headers = append(headers, k) 602 | } 603 | } 604 | } 605 | newFrame := CreateNewDataFrame(headers) 606 | 607 | for i, row := range frame.FrameRecords { 608 | valString := row.Val(fieldName, frame.Headers) 609 | 610 | val, err := strconv.ParseFloat(valString, 64) 611 | if err != nil { 612 | return CreateNewDataFrame([]string{}), err 613 | } 614 | 615 | if val <= value { 616 | newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data) 617 | } 618 | } 619 | return newFrame, nil 620 | } 621 | 622 | // Generates a new DataFrame that excludes specified instances. 623 | // New DataFrame will be kept in same order as original. 624 | func (frame DataFrame) Exclude(fieldName string, value ...string) DataFrame { 625 | headers := []string{} 626 | 627 | for i := 0; i < len(frame.Headers); i++ { 628 | for k, v := range frame.Headers { 629 | if v == i { 630 | headers = append(headers, k) 631 | } 632 | } 633 | } 634 | newFrame := CreateNewDataFrame(headers) 635 | 636 | for i := 0; i < len(frame.FrameRecords); i++ { 637 | if !slices.Contains(value, frame.FrameRecords[i].Data[frame.Headers[fieldName]]) { 638 | newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data) 639 | } 640 | } 641 | 642 | return newFrame 643 | } 644 | 645 | // Generates a new filtered DataFrame with all records occuring after a specified date provided by the user. 646 | // User must provide the date field as well as the desired date. 647 | // Instances where record dates occur on the same date provided by the user will not be included. 648 | // Records must occur after the specified date. 649 | func (frame DataFrame) FilteredAfter(fieldName, desiredDate string) DataFrame { 650 | headers := []string{} 651 | 652 | for i := 0; i < len(frame.Headers); i++ { 653 | for k, v := range frame.Headers { 654 | if v == i { 655 | headers = append(headers, k) 656 | } 657 | } 658 | } 659 | newFrame := CreateNewDataFrame(headers) 660 | 661 | for i := 0; i < len(frame.FrameRecords); i++ { 662 | recordDate := dateConverter(frame.FrameRecords[i].Data[frame.Headers[fieldName]]) 663 | isAfter := recordDate.After(dateConverter(desiredDate)) 664 | 665 | if isAfter { 666 | newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data) 667 | } 668 | } 669 | return newFrame 670 | } 671 | 672 | // Generates a new filtered DataFrame with all records occuring before a specified date provided by the user. 673 | // User must provide the date field as well as the desired date. 674 | // Instances where record dates occur on the same date provided by the user will not be included. Records must occur 675 | // before the specified date. 676 | func (frame DataFrame) FilteredBefore(fieldName, desiredDate string) DataFrame { 677 | headers := []string{} 678 | 679 | for i := 0; i < len(frame.Headers); i++ { 680 | for k, v := range frame.Headers { 681 | if v == i { 682 | headers = append(headers, k) 683 | } 684 | } 685 | } 686 | newFrame := CreateNewDataFrame(headers) 687 | 688 | for i := 0; i < len(frame.FrameRecords); i++ { 689 | recordDate := dateConverter(frame.FrameRecords[i].Data[frame.Headers[fieldName]]) 690 | isBefore := recordDate.Before(dateConverter(desiredDate)) 691 | 692 | if isBefore { 693 | newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data) 694 | } 695 | } 696 | 697 | return newFrame 698 | } 699 | 700 | // Generates a new filtered DataFrame with all records occuring between a specified date range provided by the user. 701 | // User must provide the date field as well as the desired date. 702 | // Instances where record dates occur on the same date provided by the user will not be included. Records must occur 703 | // between the specified start and end dates. 704 | func (frame DataFrame) FilteredBetween(fieldName, startDate, endDate string) DataFrame { 705 | headers := []string{} 706 | 707 | for i := 0; i < len(frame.Headers); i++ { 708 | for k, v := range frame.Headers { 709 | if v == i { 710 | headers = append(headers, k) 711 | } 712 | } 713 | } 714 | newFrame := CreateNewDataFrame(headers) 715 | 716 | for i := 0; i < len(frame.FrameRecords); i++ { 717 | recordDate := dateConverter(frame.FrameRecords[i].Data[frame.Headers[fieldName]]) 718 | isAfter := recordDate.After(dateConverter(startDate)) 719 | isBefore := recordDate.Before(dateConverter(endDate)) 720 | 721 | if isAfter && isBefore { 722 | newFrame = newFrame.AddRecord(frame.FrameRecords[i].Data) 723 | } 724 | } 725 | 726 | return newFrame 727 | } 728 | 729 | // Creates a new field and assigns and empty string. 730 | func (frame *DataFrame) NewField(fieldName string) { 731 | for i, _ := range frame.FrameRecords { 732 | frame.FrameRecords[i].Data = append(frame.FrameRecords[i].Data, "") 733 | } 734 | frame.Headers[fieldName] = len(frame.Headers) 735 | } 736 | 737 | // Return a slice of all unique values found in a specified field. 738 | func (frame *DataFrame) Unique(fieldName string) []string { 739 | var results []string 740 | 741 | for _, row := range frame.FrameRecords { 742 | if !slices.Contains(results, row.Val(fieldName, frame.Headers)) { 743 | results = append(results, row.Val(fieldName, frame.Headers)) 744 | } 745 | } 746 | return results 747 | } 748 | 749 | // Stack two DataFrames with matching headers. 750 | func (frame DataFrame) ConcatFrames(dfNew *DataFrame) (DataFrame, error) { 751 | if dfNew == nil { 752 | return frame, errors.New("nil pointer found in ConcatFrames method") 753 | } 754 | 755 | // Check number of columns in each frame match. 756 | if len(frame.Headers) != len(dfNew.Headers) { 757 | return frame, errors.New("cannot ConcatFrames as columns do not match") 758 | } 759 | 760 | // Check columns in both frames are in the same order. 761 | originalFrame := []string{} 762 | for i := 0; i <= len(frame.Headers); i++ { 763 | for k, v := range frame.Headers { 764 | if v == i { 765 | originalFrame = append(originalFrame, k) 766 | } 767 | } 768 | } 769 | 770 | newFrame := []string{} 771 | for i := 0; i <= len(dfNew.Headers); i++ { 772 | for k, v := range dfNew.Headers { 773 | if v == i { 774 | newFrame = append(newFrame, k) 775 | } 776 | } 777 | } 778 | 779 | for i, each := range originalFrame { 780 | if each != newFrame[i] { 781 | return frame, errors.New("cannot ConcatFrames as columns are not in the same order") 782 | } 783 | } 784 | 785 | // Iterate over new dataframe in order 786 | for i := 0; i < len(dfNew.FrameRecords); i++ { 787 | frame.FrameRecords = append(frame.FrameRecords, dfNew.FrameRecords[i]) 788 | } 789 | return frame, nil 790 | } 791 | 792 | // Import all columns from right frame into left frame if no columns 793 | // are provided by the user. Process must be done so in order. 794 | func (frame DataFrame) Merge(dfRight *DataFrame, primaryKey string, columns ...string) error { 795 | if dfRight == nil { 796 | return errors.New("nil pointer found in Merge method") 797 | } 798 | 799 | if len(columns) == 0 { 800 | for i := 0; i < len(dfRight.Headers); i++ { 801 | for k, v := range dfRight.Headers { 802 | if v == i { 803 | columns = append(columns, k) 804 | } 805 | } 806 | } 807 | } else { 808 | // Ensure columns user provided are all found in right frame. 809 | for _, col := range columns { 810 | colStatus := false 811 | for k, _ := range dfRight.Headers { 812 | if col == k { 813 | colStatus = true 814 | } 815 | } 816 | // Ensure there are no duplicated columns other than the primary key. 817 | if !colStatus { 818 | return errors.New("merge Error: User provided column not found in right dataframe") 819 | } 820 | } 821 | } 822 | 823 | // Check that no columns are duplicated between the two frames (other than primaryKey). 824 | for _, col := range columns { 825 | for k, _ := range frame.Headers { 826 | if col == k && col != primaryKey { 827 | return errors.New("the following column is duplicated in both frames and is not the specified primary key which is not allowed: " + col) 828 | } 829 | } 830 | } 831 | 832 | // Load map indicating the location of each lookup value in right frame. 833 | lookup := make(map[string]int) 834 | for i, row := range dfRight.FrameRecords { 835 | lookup[row.Val(primaryKey, dfRight.Headers)] = i 836 | } 837 | 838 | // Create new columns in left frame. 839 | for _, col := range columns { 840 | if col != primaryKey { 841 | frame.NewField(col) 842 | } 843 | } 844 | 845 | // Iterate over left frame and add new data. 846 | for _, row := range frame.FrameRecords { 847 | lookupVal := row.Val(primaryKey, frame.Headers) 848 | 849 | if val, ok := lookup[lookupVal]; ok { 850 | for _, col := range columns { 851 | if col != primaryKey { 852 | valToAdd := dfRight.FrameRecords[val].Data[dfRight.Headers[col]] 853 | row.Update(col, valToAdd, frame.Headers) 854 | } 855 | } 856 | } 857 | } 858 | return nil 859 | } 860 | 861 | // Performs an inner merge where all columns are consolidated between the two frames but only for records 862 | // where the specified primary key is found in both frames. 863 | func (frame DataFrame) InnerMerge(dfRight *DataFrame, primaryKey string) (DataFrame, error) { 864 | if dfRight == nil { 865 | return frame, errors.New("nil pointer found in InnerMerge method") 866 | } 867 | 868 | var rightFrameColumns []string 869 | 870 | for i := 0; i < len(dfRight.Headers); i++ { 871 | for k, v := range dfRight.Headers { 872 | if v == i { 873 | rightFrameColumns = append(rightFrameColumns, k) 874 | } 875 | } 876 | } 877 | 878 | var leftFrameColumns []string 879 | 880 | for i := 0; i < len(frame.Headers); i++ { 881 | for k, v := range frame.Headers { 882 | if v == i { 883 | leftFrameColumns = append(leftFrameColumns, k) 884 | } 885 | } 886 | } 887 | 888 | // Ensure the specified primary key is found in both frames. 889 | var lStatus bool 890 | var rStatus bool 891 | 892 | for _, col := range leftFrameColumns { 893 | if col == primaryKey { 894 | lStatus = true 895 | } 896 | } 897 | 898 | for _, col := range rightFrameColumns { 899 | if col == primaryKey { 900 | rStatus = true 901 | } 902 | } 903 | 904 | if !lStatus || !rStatus { 905 | return frame, errors.New("the specified primary key was not found in both DataFrames") 906 | } 907 | 908 | // Find position of primary key column in right frame. 909 | var rightFramePrimaryKeyPosition int 910 | for i, col := range rightFrameColumns { 911 | if col == primaryKey { 912 | rightFramePrimaryKeyPosition = i 913 | } 914 | } 915 | 916 | // Check that no columns are duplicated between the two frames (other than primaryKey). 917 | for _, col := range rightFrameColumns { 918 | for k, _ := range frame.Headers { 919 | if col == k && col != primaryKey { 920 | return frame, errors.New("the following column is duplicated in both frames and is not the specified primary key which is not allowed: " + col) 921 | } 922 | } 923 | } 924 | 925 | // Load map indicating the location of each lookup value in right frame. 926 | rLookup := make(map[string]int) 927 | for i, row := range dfRight.FrameRecords { 928 | // Only add if key hasn't already been added. This ensures the first record found in the right 929 | // frame is what is used instead of the last if duplicates are found. 930 | currentKey := row.Val(primaryKey, dfRight.Headers) 931 | _, ok := rLookup[currentKey] 932 | if !ok { 933 | rLookup[currentKey] = i 934 | } 935 | } 936 | 937 | // New DataFrame to house records found in both frames. 938 | dfNew := CreateNewDataFrame(leftFrameColumns) 939 | 940 | // Add right frame columns to new DataFrame. 941 | for i, col := range rightFrameColumns { 942 | // Skip over primary key column in right frame as it was already included in the left frame. 943 | if i != rightFramePrimaryKeyPosition { 944 | dfNew.NewField(col) 945 | } 946 | } 947 | 948 | var approvedPrimaryKeys []string 949 | 950 | // Create slice of specified ID's found in both frames. 951 | for _, lRow := range frame.FrameRecords { 952 | currentKey := lRow.Val(primaryKey, frame.Headers) 953 | 954 | // Skip blank values as they are not allowed. 955 | if len(currentKey) == 0 || strings.ToLower(currentKey) == "nan" || strings.ToLower(currentKey) == "null" { 956 | continue 957 | } 958 | 959 | for _, rRow := range dfRight.FrameRecords { 960 | currentRightFrameKey := rRow.Val(primaryKey, dfRight.Headers) 961 | // Add primary key to approved list if found in right frame. 962 | if currentRightFrameKey == currentKey { 963 | approvedPrimaryKeys = append(approvedPrimaryKeys, currentKey) 964 | } 965 | } 966 | } 967 | 968 | // Add approved records to new DataFrame. 969 | for i, row := range frame.FrameRecords { 970 | currentKey := row.Val(primaryKey, frame.Headers) 971 | if slices.Contains(approvedPrimaryKeys, currentKey) { 972 | lData := frame.FrameRecords[i].Data 973 | rData := dfRight.FrameRecords[rLookup[currentKey]].Data 974 | 975 | // Add left frame data to variable. 976 | var data []string 977 | data = append(data, lData...) 978 | 979 | // Add all right frame data while skipping over the primary key column. 980 | // The primary key column is skipped as it has already been added from the left frame. 981 | for i, d := range rData { 982 | if i != rightFramePrimaryKeyPosition { 983 | data = append(data, d) 984 | } 985 | } 986 | 987 | dfNew = dfNew.AddRecord(data) 988 | } 989 | } 990 | return dfNew, nil 991 | } 992 | 993 | func (frame *DataFrame) CountRecords() int { 994 | return len(frame.FrameRecords) 995 | } 996 | 997 | // Return a sum of float64 type of a numerical field. 998 | func (frame *DataFrame) Sum(fieldName string) float64 { 999 | var sum float64 1000 | 1001 | for _, row := range frame.FrameRecords { 1002 | val, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64) 1003 | if err != nil { 1004 | log.Fatalf("could not convert string to float during sum: %v", err) 1005 | } 1006 | sum += val 1007 | } 1008 | return sum 1009 | } 1010 | 1011 | // Return an average of type float64 of a numerical field. 1012 | func (frame *DataFrame) Average(fieldName string) float64 { 1013 | sum := frame.Sum(fieldName) 1014 | count := frame.CountRecords() 1015 | 1016 | if count == 0 { 1017 | return 0.0 1018 | } 1019 | return sum / float64(count) 1020 | } 1021 | 1022 | // Return the maximum value in a numerical field. 1023 | func (frame *DataFrame) Max(fieldName string) float64 { 1024 | maximum := 0.0 1025 | for i, row := range frame.FrameRecords { 1026 | // Set the max to the first value in dataframe. 1027 | if i == 0 { 1028 | initialMax, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64) 1029 | if err != nil { 1030 | log.Fatalf("could not convert string to float during sum: %v", err) 1031 | } 1032 | maximum = initialMax 1033 | } 1034 | val, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64) 1035 | if err != nil { 1036 | log.Fatalf("could not convert string to float during sum: %v", err) 1037 | } 1038 | 1039 | if val > maximum { 1040 | maximum = val 1041 | } 1042 | } 1043 | return maximum 1044 | } 1045 | 1046 | // Return the minimum value in a numerical field. 1047 | func (frame *DataFrame) Min(fieldName string) float64 { 1048 | min := 0.0 1049 | for i, row := range frame.FrameRecords { 1050 | // Set the max to the first value in dataframe. 1051 | if i == 0 { 1052 | initialMin, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64) 1053 | if err != nil { 1054 | log.Fatalf("could not convert string to float during sum: %v", err) 1055 | } 1056 | min = initialMin 1057 | } 1058 | val, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64) 1059 | if err != nil { 1060 | log.Fatalf("could not convert string to float during sum: %v", err) 1061 | } 1062 | 1063 | if val < min { 1064 | min = val 1065 | } 1066 | } 1067 | return min 1068 | } 1069 | 1070 | func standardDeviation(num []float64) float64 { 1071 | l := float64(len(num)) 1072 | sum := 0.0 1073 | var sd float64 1074 | 1075 | for _, n := range num { 1076 | sum += n 1077 | } 1078 | 1079 | mean := sum / l 1080 | 1081 | for j := 0; j < int(l); j++ { 1082 | // The use of Pow math function func Pow(x, y float64) float64 1083 | sd += math.Pow(num[j]-mean, 2) 1084 | } 1085 | // The use of Sqrt math function func Sqrt(x float64) float64 1086 | sd = math.Sqrt(sd / l) 1087 | 1088 | return sd 1089 | } 1090 | 1091 | // Return the standard deviation of a numerical field. 1092 | func (frame *DataFrame) StandardDeviation(fieldName string) (float64, error) { 1093 | var nums []float64 1094 | 1095 | for _, row := range frame.FrameRecords { 1096 | num, err := strconv.ParseFloat(row.Val(fieldName, frame.Headers), 64) 1097 | if err != nil { 1098 | return 0.0, errors.New("could not convert string to number in specified column to calculate standard deviation") 1099 | } 1100 | nums = append(nums, num) 1101 | } 1102 | return standardDeviation(nums), nil 1103 | } 1104 | 1105 | func (frame *DataFrame) SaveDataFrame(path, fileName string) bool { 1106 | if !strings.Contains(fileName, ".csv") && !strings.Contains(fileName, ".CSV") { 1107 | fileName = fileName + ".csv" 1108 | } 1109 | 1110 | // Create the csv file 1111 | csvFile, err := os.Create(filepath.Join(path, fileName)) 1112 | if err != nil { 1113 | log.Fatalf("error creating the blank csv file to save the data: %v", err) 1114 | } 1115 | defer csvFile.Close() 1116 | 1117 | w := csv.NewWriter(csvFile) 1118 | defer w.Flush() 1119 | 1120 | var data [][]string 1121 | var row []string 1122 | columnLength := len(frame.Headers) 1123 | 1124 | // Write headers to top of file 1125 | for i := 0; i < columnLength; i++ { 1126 | for k, v := range frame.Headers { 1127 | if v == i { 1128 | row = append(row, k) 1129 | } 1130 | } 1131 | } 1132 | data = append(data, row) 1133 | 1134 | // Add Data 1135 | for i := 0; i < len(frame.FrameRecords); i++ { 1136 | var row []string 1137 | for pos := 0; pos < columnLength; pos++ { 1138 | row = append(row, frame.FrameRecords[i].Data[pos]) 1139 | } 1140 | data = append(data, row) 1141 | } 1142 | 1143 | w.WriteAll(data) 1144 | 1145 | return true 1146 | } 1147 | 1148 | // Return the value of the specified field. 1149 | func (x Record) Val(fieldName string, headers map[string]int) string { 1150 | if _, ok := headers[fieldName]; !ok { 1151 | panic(fmt.Errorf("the provided field %s is not a valid field in the dataframe", fieldName)) 1152 | } 1153 | return x.Data[headers[fieldName]] 1154 | } 1155 | 1156 | // Update the value in a specified field. 1157 | func (x Record) Update(fieldName, value string, headers map[string]int) { 1158 | if _, ok := headers[fieldName]; !ok { 1159 | panic(fmt.Errorf("the provided field %s is not a valid field in the dataframe", fieldName)) 1160 | } 1161 | x.Data[headers[fieldName]] = value 1162 | } 1163 | 1164 | // Converts the value from a string to float64. 1165 | func (x Record) ConvertToFloat(fieldName string, headers map[string]int) float64 { 1166 | value, err := strconv.ParseFloat(x.Val(fieldName, headers), 64) 1167 | if err != nil { 1168 | log.Fatalf("could not convert to float64: %v", err) 1169 | } 1170 | return value 1171 | } 1172 | 1173 | // Converts the value from a string to int64. 1174 | func (x Record) ConvertToInt(fieldName string, headers map[string]int) int64 { 1175 | value, err := strconv.ParseInt(x.Val(fieldName, headers), 0, 64) 1176 | if err != nil { 1177 | log.Fatalf("could not convert to int64: %v", err) 1178 | } 1179 | return value 1180 | } 1181 | 1182 | // Converts various date strings into time.Time 1183 | func dateConverter(dateString string) time.Time { 1184 | // Convert date if not in 2006-01-02 format 1185 | if strings.Contains(dateString, "/") { 1186 | dateSlice := strings.Split(dateString, "/") 1187 | 1188 | if len(dateSlice[0]) != 2 { 1189 | dateSlice[0] = "0" + dateSlice[0] 1190 | } 1191 | if len(dateSlice[1]) != 2 { 1192 | dateSlice[1] = "0" + dateSlice[1] 1193 | } 1194 | if len(dateSlice[2]) == 2 { 1195 | dateSlice[2] = "20" + dateSlice[2] 1196 | } 1197 | dateString = dateSlice[2] + "-" + dateSlice[0] + "-" + dateSlice[1] 1198 | } 1199 | 1200 | value, err := time.Parse("2006-01-02", dateString) 1201 | if err != nil { 1202 | log.Fatalf("could not convert to time.Time: %v", err) 1203 | } 1204 | return value 1205 | } 1206 | 1207 | // Converts date from specified field to time.Time 1208 | func (x Record) ConvertToDate(fieldName string, headers map[string]int) time.Time { 1209 | result := dateConverter(x.Val(fieldName, headers)) 1210 | return result 1211 | } 1212 | -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | package dataframe 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "math" 7 | "math/rand" 8 | "strconv" 9 | "testing" 10 | "time" 11 | ) 12 | 13 | func TestStream(t *testing.T) { 14 | firstNameAnswers := []string{"Kevin", "Beth", "Avery", "Peter", "Andy", "Nick", "Bryan", "Brian", "Eric", "Carl"} 15 | costAnswers := []string{"818", "777", "493", "121", "774", "874", "995", "133", "939", "597"} 16 | 17 | path := "./" 18 | c := make(chan StreamingRecord) 19 | go Stream(path, "TestData.csv", c) 20 | 21 | i := 0 22 | for row := range c { 23 | if row.Val("First Name") != firstNameAnswers[i] { 24 | t.Error("First name did not match.") 25 | } 26 | if row.Val("Cost") != costAnswers[i] { 27 | t.Error("Cost did not match.") 28 | } 29 | i++ 30 | } 31 | } 32 | 33 | func TestStreamConvertToInt(t *testing.T) { 34 | costAnswers := []int64{818, 777, 493, 121, 774, 874, 995, 133, 939, 597} 35 | 36 | path := "./" 37 | c := make(chan StreamingRecord) 38 | go Stream(path, "TestData.csv", c) 39 | 40 | i := 0 41 | for row := range c { 42 | val := row.ConvertToInt("Cost") 43 | if val != costAnswers[i] { 44 | t.Error("Could not convert to int64.") 45 | } 46 | i++ 47 | } 48 | } 49 | 50 | func TestStreamConvertToFloat(t *testing.T) { 51 | costAnswers := []float64{818.0, 777.0, 493.0, 121.0, 774.0, 874.0, 995.0, 133.0, 939.0, 597.0} 52 | 53 | path := "./" 54 | c := make(chan StreamingRecord) 55 | go Stream(path, "TestData.csv", c) 56 | 57 | i := 0 58 | for row := range c { 59 | val := row.ConvertToFloat("Cost") 60 | if val != costAnswers[i] { 61 | t.Error("Could not convert to float64.") 62 | } 63 | i++ 64 | } 65 | } 66 | 67 | func TestDynamicMetrics(t *testing.T) { 68 | // Create DataFrame 69 | columns := []string{"Value"} 70 | df := CreateNewDataFrame(columns) 71 | 72 | sum := 0.0 73 | min := 1 74 | max := 100 75 | recordedMax := 0.0 76 | recordedMin := float64(max) + 1.0 77 | totalRecords := 1_000_000 78 | 79 | for i := 0; i < totalRecords; i++ { 80 | // Ensures differing values generated on each run. 81 | rand.Seed(time.Now().UnixNano()) 82 | v := float64(rand.Intn(max-min)+min) + rand.Float64() 83 | sum = sum + v 84 | 85 | // Add data to DataFrame 86 | data := []string{fmt.Sprintf("%f", v)} 87 | df = df.AddRecord(data) 88 | 89 | if v > recordedMax { 90 | recordedMax = v 91 | } 92 | if v < recordedMin { 93 | recordedMin = v 94 | } 95 | } 96 | 97 | dataFrameValue := df.Sum("Value") 98 | dataFrameAvgValue := math.Round(df.Average("Value")*100) / 100 99 | dataFrameMaxValue := math.Round(df.Max("Value")*100) / 100 100 | dataFrameMinValue := math.Round(df.Min("Value")*100) / 100 101 | avg := math.Round(sum/float64(totalRecords)*100) / 100 102 | recordedMax = math.Round(recordedMax*100) / 100 103 | recordedMin = math.Round(recordedMin*100) / 100 104 | 105 | if math.Abs(dataFrameValue-sum) > 0.001 { 106 | t.Error("Dynamic Metrics: sum float failed", dataFrameValue, sum, math.Abs(dataFrameValue-sum)) 107 | } 108 | if dataFrameAvgValue != avg { 109 | t.Error("Dynamic Metrics: average float failed", dataFrameAvgValue, avg) 110 | } 111 | if dataFrameMaxValue != recordedMax { 112 | t.Error("Dynamic Metrics: max value error", dataFrameMaxValue, recordedMax) 113 | } 114 | if dataFrameMinValue != recordedMin { 115 | t.Error("Dynamic Metrics: min value error", dataFrameMinValue, recordedMin) 116 | } 117 | if df.CountRecords() != totalRecords { 118 | t.Error("Dynamic Metrics: count records error", df.CountRecords(), totalRecords) 119 | } 120 | } 121 | 122 | func TestCreateDataFrameCostFloat(t *testing.T) { 123 | path := "./" 124 | df := CreateDataFrame(path, "TestData.csv") 125 | total := 0.0 126 | 127 | for _, row := range df.FrameRecords { 128 | total += row.ConvertToFloat("Cost", df.Headers) 129 | } 130 | 131 | if total != 6521.0 { 132 | t.Error("Cost sum incorrect.") 133 | } 134 | } 135 | 136 | func TestCreateDataFrameCostInt(t *testing.T) { 137 | path := "./" 138 | df := CreateDataFrame(path, "TestData.csv") 139 | var total int64 140 | 141 | for _, row := range df.FrameRecords { 142 | total += row.ConvertToInt("Cost", df.Headers) 143 | } 144 | 145 | if total != 6521 { 146 | t.Error("Cost sum incorrect.") 147 | } 148 | } 149 | 150 | func TestSum(t *testing.T) { 151 | path := "./" 152 | df := CreateDataFrame(path, "TestData.csv") 153 | 154 | if df.Sum("Weight") != 3376.0 || df.Sum("Cost") != 6521.0 { 155 | t.Error("Just sum error...") 156 | } 157 | } 158 | 159 | func TestAverage(t *testing.T) { 160 | path := "./" 161 | df := CreateDataFrame(path, "TestData.csv") 162 | 163 | if df.Average("Weight") != 337.60 || df.Average("Cost") != 652.10 { 164 | t.Error("Not your average error...") 165 | } 166 | } 167 | 168 | func TestMax(t *testing.T) { 169 | path := "./" 170 | df := CreateDataFrame(path, "TestData.csv") 171 | 172 | if df.Max("Weight") != 500.0 || df.Max("Cost") != 995.0 { 173 | t.Error("Error to the max...") 174 | } 175 | } 176 | 177 | func TestMin(t *testing.T) { 178 | path := "./" 179 | df := CreateDataFrame(path, "TestData.csv") 180 | 181 | if df.Min("Weight") != 157.0 || df.Min("Cost") != 121.0 { 182 | t.Error("Error to the min...") 183 | } 184 | } 185 | 186 | func TestStandardDeviationFunction(t *testing.T) { 187 | nums := []float64{4.27, 23.45, 34.43, 54.76, 65.90, 234.45} 188 | stdev := standardDeviation(nums) 189 | expected := 76.42444976721926 190 | variance := stdev - expected 191 | 192 | if stdev != expected { 193 | t.Error(fmt.Printf("Standard Deviation calculation error: Expected: %f Result: %f Variance: %f\n", expected, stdev, variance)) 194 | } 195 | } 196 | 197 | func TestStandardDeviationMethodPass(t *testing.T) { 198 | // Create DataFrame 199 | columns := []string{"ID", "Value"} 200 | df := CreateNewDataFrame(columns) 201 | 202 | for i := 0; i < 1000; i++ { 203 | val := strconv.Itoa(i) 204 | df = df.AddRecord([]string{"ID-" + val, val}) 205 | } 206 | 207 | stdev, err := df.StandardDeviation("Value") 208 | if err != nil { 209 | t.Error("Test should have passed without any string to float conversion errors.") 210 | } 211 | 212 | expected := 288.6749902572095 213 | variance := stdev - expected 214 | 215 | if stdev != expected { 216 | t.Error(fmt.Printf("Standard Deviation calculation error: Expected: %f Result: %f Variance: %f\n", expected, stdev, variance)) 217 | } 218 | } 219 | 220 | func TestStandardDeviationMethodFail(t *testing.T) { 221 | // Create DataFrame 222 | columns := []string{"ID", "Value"} 223 | df := CreateNewDataFrame(columns) 224 | 225 | for i := 0; i < 1000; i++ { 226 | // Insert row with value that cannot be converted to float64. 227 | if i == 500 { 228 | df = df.AddRecord([]string{"ID-" + "500", "5x0x0x"}) 229 | } 230 | val := strconv.Itoa(i) 231 | df = df.AddRecord([]string{"ID-" + val, val}) 232 | } 233 | 234 | _, err := df.StandardDeviation("Value") 235 | if err == nil { 236 | t.Error("Test should have failed.") 237 | } 238 | } 239 | 240 | func TestFilteredCount(t *testing.T) { 241 | path := "./" 242 | df := CreateDataFrame(path, "TestData.csv") 243 | dfFil := df.Filtered("Last Name", "Fultz", "Wiedmann") 244 | 245 | if df.CountRecords() != 10 || dfFil.CountRecords() != 5 { 246 | t.Error("Filtered count incorrect.") 247 | } 248 | } 249 | 250 | func TestFilteredCheck(t *testing.T) { 251 | path := "./" 252 | df := CreateDataFrame(path, "TestData.csv") 253 | dfFil := df.Filtered("Last Name", "Fultz", "Wiedmann") 254 | 255 | for _, row := range dfFil.FrameRecords { 256 | if row.Val("Last Name", dfFil.Headers) != "Fultz" && row.Val("Last Name", dfFil.Headers) != "Wiedmann" { 257 | t.Error("Invalid parameter found in Filtered DataFrame.") 258 | } 259 | } 260 | } 261 | 262 | // Ensures changes made in the original dataframe are not also made in a filtered dataframe. 263 | func TestFilteredChangeToOriginal(t *testing.T) { 264 | path := "./" 265 | df := CreateDataFrame(path, "TestData.csv") 266 | dfFil := df.Filtered("Last Name", "Fultz", "Wiedmann") 267 | 268 | for _, row := range df.FrameRecords { 269 | if row.Val("ID", df.Headers) == "2" { 270 | row.Update("Last Name", "Bethany", df.Headers) 271 | } 272 | if row.Val("ID", df.Headers) == "5" { 273 | row.Update("Last Name", "Andyanne", df.Headers) 274 | } 275 | } 276 | 277 | // Ensure row was actually updated in the original frame. 278 | for _, row := range df.FrameRecords { 279 | if row.Val("ID", df.Headers) == "2" && row.Val("Last Name", df.Headers) != "Bethany" { 280 | t.Error("Row 2 last name not changed in original frame.") 281 | } 282 | if row.Val("ID", df.Headers) == "5" && row.Val("Last Name", df.Headers) != "Andyanne" { 283 | t.Error("Row 5 last name not changed in original frame.") 284 | } 285 | } 286 | 287 | // Check rows in filtered dataframe were not also updated. 288 | for _, row := range dfFil.FrameRecords { 289 | if row.Val("ID", df.Headers) == "2" && row.Val("Last Name", df.Headers) != "Fultz" { 290 | t.Error("Row 2 in filtered dataframe was incorrectly updated with original.") 291 | } 292 | if row.Val("ID", df.Headers) == "5" && row.Val("Last Name", df.Headers) != "Wiedmann" { 293 | t.Error("Row 5 in filtered dataframe was incorrectly updated with original.") 294 | } 295 | } 296 | } 297 | 298 | func TestGreaterThanOrEqualTo(t *testing.T) { 299 | path := "./" 300 | value := float64(597) 301 | df := CreateDataFrame(path, "TestData.csv") 302 | df, err := df.GreaterThanOrEqualTo("Cost", value) 303 | if err != nil { 304 | t.Error("Greater Than Or Equal To: This should not have failed...") 305 | } 306 | 307 | if df.CountRecords() != 7 { 308 | t.Error("Greater Than Or Equal To: Record count is not correct.") 309 | } 310 | 311 | ids := []string{"1", "2", "5", "6", "7", "9", "10"} 312 | foundIds := df.Unique("ID") 313 | 314 | for i, id := range foundIds { 315 | if id != ids[i] { 316 | t.Error("Greater Than Or Equal To: Records do not match.") 317 | } 318 | } 319 | } 320 | 321 | func TestLessThanOrEqualTo(t *testing.T) { 322 | path := "./" 323 | value := float64(436) 324 | df := CreateDataFrame(path, "TestData.csv") 325 | df, err := df.LessThanOrEqualTo("Weight", value) 326 | if err != nil { 327 | t.Error("Less Than Or Equal To: This should not have failed...") 328 | } 329 | 330 | if df.CountRecords() != 7 { 331 | t.Error("Less Than Or Equal To: Record count is not correct.") 332 | } 333 | 334 | ids := []string{"1", "2", "4", "5", "6", "8", "9"} 335 | foundIds := df.Unique("ID") 336 | 337 | for i, id := range foundIds { 338 | if id != ids[i] { 339 | t.Error("Less Than Or Equal To: Records do not match.") 340 | } 341 | } 342 | } 343 | 344 | func TestExcludeCount(t *testing.T) { 345 | path := "./" 346 | df := CreateDataFrame(path, "TestData.csv") 347 | dfExcl := df.Exclude("Last Name", "Fultz", "Wiedmann") 348 | 349 | if df.CountRecords() != 10 || dfExcl.CountRecords() != 5 { 350 | t.Error("Excluded count is incorrect.") 351 | } 352 | } 353 | 354 | func TestExcludeCheck(t *testing.T) { 355 | path := "./" 356 | df := CreateDataFrame(path, "TestData.csv") 357 | dfExcl := df.Exclude("Last Name", "Fultz", "Wiedmann") 358 | 359 | for _, row := range dfExcl.FrameRecords { 360 | if row.Val("Last Name", dfExcl.Headers) == "Fultz" || row.Val("Last Name", dfExcl.Headers) == "Wiedmann" { 361 | t.Error("Excluded parameter found in DataFrame.") 362 | } 363 | } 364 | } 365 | 366 | func TestFilteredAfterCount(t *testing.T) { 367 | path := "./" 368 | df := CreateDataFrame(path, "TestData.csv") 369 | dfFil := df.FilteredAfter("Date", "2022-01-08") 370 | 371 | if df.CountRecords() != 10 || dfFil.CountRecords() != 2 { 372 | t.Error("Filtered After count incorrect.") 373 | } 374 | } 375 | 376 | func TestFilteredAfterCountExcelFormat(t *testing.T) { 377 | path := "./" 378 | df := CreateDataFrame(path, "TestDataDateFormat.csv") 379 | dfFil := df.FilteredAfter("Date", "2022-01-08") 380 | 381 | if df.CountRecords() != 10 || dfFil.CountRecords() != 2 { 382 | t.Error("Filtered After Excel Format count incorrect.") 383 | } 384 | } 385 | 386 | func TestFilteredBeforeCount(t *testing.T) { 387 | path := "./" 388 | df := CreateDataFrame(path, "TestData.csv") 389 | dfFil := df.FilteredBefore("Date", "2022-01-08") 390 | 391 | if df.CountRecords() != 10 || dfFil.CountRecords() != 7 { 392 | t.Error("Filtered Before count incorrect.") 393 | } 394 | } 395 | 396 | func TestFilteredBeforeCountExcelFormat(t *testing.T) { 397 | path := "./" 398 | df := CreateDataFrame(path, "TestDataDateFormat.csv") 399 | dfFil := df.FilteredBefore("Date", "2022-01-08") 400 | 401 | if df.CountRecords() != 10 || dfFil.CountRecords() != 7 { 402 | t.Error("Filtered Before Excel Format count incorrect.") 403 | } 404 | } 405 | 406 | func TestFilteredBetweenCount(t *testing.T) { 407 | path := "./" 408 | df := CreateDataFrame(path, "TestData.csv") 409 | dfFil := df.FilteredBetween("Date", "2022-01-02", "2022-01-09") 410 | 411 | if df.CountRecords() != 10 || dfFil.CountRecords() != 6 { 412 | t.Error("Filtered Between count incorrect.") 413 | } 414 | } 415 | 416 | func TestFilteredBetweenExcelFormat(t *testing.T) { 417 | path := "./" 418 | df := CreateDataFrame(path, "TestDataDateFormat.csv") 419 | dfFil := df.FilteredBetween("Date", "2022-01-02", "2022-01-09") 420 | 421 | if df.CountRecords() != 10 || dfFil.CountRecords() != 6 { 422 | t.Error("Filtered Between Excel Format count incorrect.") 423 | } 424 | } 425 | 426 | func TestRecordCheck(t *testing.T) { 427 | path := "./" 428 | df := CreateDataFrame(path, "TestData.csv") 429 | 430 | var id string 431 | var date string 432 | var cost string 433 | var weight string 434 | var firstName string 435 | var lastName string 436 | 437 | for _, row := range df.FrameRecords { 438 | if row.Val("ID", df.Headers) == "5" { 439 | id = row.Val("ID", df.Headers) 440 | date = row.Val("Date", df.Headers) 441 | cost = row.Val("Cost", df.Headers) 442 | weight = row.Val("Weight", df.Headers) 443 | firstName = row.Val("First Name", df.Headers) 444 | lastName = row.Val("Last Name", df.Headers) 445 | } 446 | } 447 | 448 | if id != "5" { 449 | t.Error("ID failed") 450 | } else if date != "2022-01-05" { 451 | t.Error("Date failed") 452 | } else if cost != "774" { 453 | t.Error("Cost failed") 454 | } else if weight != "415" { 455 | t.Error("Weight failed") 456 | } else if firstName != "Andy" { 457 | t.Error("First Name failed") 458 | } else if lastName != "Wiedmann" { 459 | t.Error("Last Name failed") 460 | } 461 | } 462 | 463 | func TestRecordCheckPanic(t *testing.T) { 464 | path := "./" 465 | df := CreateDataFrame(path, "TestData.csv") 466 | 467 | for _, row := range df.FrameRecords { 468 | defer func() { recover() }() 469 | 470 | row.Val("Your Name Here", df.Headers) 471 | 472 | // Never reaches here if `OtherFunctionThatPanics` panics. 473 | t.Errorf("The row.Val() method should have panicked.") 474 | } 475 | } 476 | 477 | func TestAddRecord(t *testing.T) { 478 | path := "./" 479 | df := CreateDataFrame(path, "TestData.csv") 480 | newData := [6]string{"11", "2022-06-23", "101", "500", "Ben", "Benison"} 481 | df = df.AddRecord(newData[:]) 482 | 483 | if df.CountRecords() != 11 { 484 | t.Error("Add Record: Count does not match.") 485 | } 486 | 487 | for _, row := range df.FrameRecords { 488 | if row.Val("ID", df.Headers) == "11" { 489 | if row.Val("Date", df.Headers) != "2022-06-23" { 490 | t.Error("Add Record: date failed") 491 | } 492 | if row.Val("Cost", df.Headers) != "101" { 493 | t.Error("Add Record: cost failed") 494 | } 495 | if row.Val("Weight", df.Headers) != "500" { 496 | t.Error("Add Record: weight failed") 497 | } 498 | if row.Val("First Name", df.Headers) != "Ben" { 499 | t.Error("Add Record: first name failed") 500 | } 501 | if row.Val("Last Name", df.Headers) != "Benison" { 502 | t.Error("Add Record: last name failed") 503 | } 504 | } 505 | } 506 | } 507 | 508 | func TestByteOrderMark(t *testing.T) { 509 | path := "./" 510 | df := CreateDataFrame(path, "TestDataCommaSeparatedValue.csv") 511 | dfUtf := CreateDataFrame(path, "TestData.csv") 512 | 513 | dfTotal := 0.0 514 | for _, row := range df.FrameRecords { 515 | dfTotal += row.ConvertToFloat("ID", df.Headers) 516 | } 517 | 518 | dfUtfTotal := 0.0 519 | for _, row := range dfUtf.FrameRecords { 520 | dfUtfTotal += row.ConvertToFloat("ID", dfUtf.Headers) 521 | } 522 | 523 | if dfTotal != 55.0 || dfUtfTotal != 55.0 { 524 | t.Error("Byte Order Mark conversion error") 525 | } 526 | } 527 | func TestKeepColumns(t *testing.T) { 528 | path := "./" 529 | df := CreateDataFrame(path, "TestData.csv") 530 | 531 | columns := [3]string{"First Name", "Last Name", "Weight"} 532 | df = df.KeepColumns(columns[:]) 533 | 534 | if df.Headers["First Name"] != 0 || df.Headers["Last Name"] != 1 || df.Headers["Weight"] != 2 || len(df.Headers) > 3 { 535 | t.Error("Keep Columns failed") 536 | } 537 | } 538 | 539 | func TestRemoveColumnsMultiple(t *testing.T) { 540 | path := "./" 541 | df := CreateDataFrame(path, "TestData.csv") 542 | 543 | df = df.RemoveColumns("ID", "Cost", "First Name") 544 | 545 | if df.Headers["Date"] != 0 || df.Headers["Weight"] != 1 || df.Headers["Last Name"] != 2 || len(df.Headers) > 3 { 546 | t.Error("Remove Multiple Columns failed") 547 | } 548 | } 549 | 550 | func TestRemoveColumnsSingle(t *testing.T) { 551 | path := "./" 552 | df := CreateDataFrame(path, "TestData.csv") 553 | 554 | df = df.RemoveColumns("First Name") 555 | 556 | if df.Headers["ID"] != 0 || df.Headers["Date"] != 1 || df.Headers["Cost"] != 2 || df.Headers["Weight"] != 3 || df.Headers["Last Name"] != 4 || len(df.Headers) > 5 { 557 | t.Error("Remove Single Column failed") 558 | } 559 | } 560 | 561 | func TestDateConverterStandardFormat(t *testing.T) { 562 | var s interface{} = dateConverter("2022-01-31") 563 | if _, ok := s.(time.Time); !ok { 564 | t.Error("Date Converter Standard Format Failed") 565 | } 566 | } 567 | 568 | func TestDateConverterExcelFormatDoubleDigit(t *testing.T) { 569 | var s interface{} = dateConverter("01/31/2022") 570 | if _, ok := s.(time.Time); !ok { 571 | t.Error("Date Converter Excel Format Failed") 572 | } 573 | } 574 | 575 | func TestDateConverterExcelFormatSingleMonthDigit(t *testing.T) { 576 | var s interface{} = dateConverter("1/31/2022") 577 | if _, ok := s.(time.Time); !ok { 578 | t.Error("Date Converter Excel Format Failed") 579 | } 580 | } 581 | 582 | func TestDateConverterExcelFormatSingleDayDigit(t *testing.T) { 583 | var s interface{} = dateConverter("01/1/2022") 584 | if _, ok := s.(time.Time); !ok { 585 | t.Error("Date Converter Excel Format Failed") 586 | } 587 | } 588 | 589 | func TestDateConverterExcelFormatSingleDigit(t *testing.T) { 590 | var s interface{} = dateConverter("1/1/2022") 591 | if _, ok := s.(time.Time); !ok { 592 | t.Error("Date Converter Excel Format Failed") 593 | } 594 | } 595 | 596 | func TestDateConverterExcelFormatDoubleYearDigit(t *testing.T) { 597 | var s interface{} = dateConverter("01/31/22") 598 | if _, ok := s.(time.Time); !ok { 599 | t.Error("Date Converter Excel Format Failed") 600 | } 601 | } 602 | 603 | func TestNewField(t *testing.T) { 604 | path := "./" 605 | df := CreateDataFrame(path, "TestData.csv") 606 | df.NewField("Middle Name") 607 | 608 | if df.Headers["Middle Name"] != 6 { 609 | fmt.Println(df.Headers) 610 | t.Error("New field column not added in proper position.") 611 | } 612 | 613 | for _, row := range df.FrameRecords { 614 | if row.Val("Middle Name", df.Headers) != "" { 615 | t.Error("Value in New Field is not set to nil") 616 | } 617 | } 618 | } 619 | 620 | func TestUnique(t *testing.T) { 621 | path := "./" 622 | df := CreateDataFrame(path, "TestData.csv") 623 | names := df.Unique("Last Name") 624 | 625 | if len(names) != 7 { 626 | t.Error("Unique slice error.") 627 | } 628 | } 629 | 630 | func TestUpdate(t *testing.T) { 631 | path := "./" 632 | df := CreateDataFrame(path, "TestData.csv") 633 | 634 | for _, row := range df.FrameRecords { 635 | if row.Val("First Name", df.Headers) == "Avery" && row.Val("Last Name", df.Headers) == "Fultz" { 636 | row.Update("Weight", "30", df.Headers) 637 | } 638 | } 639 | 640 | for _, row := range df.FrameRecords { 641 | if row.Val("First Name", df.Headers) == "Avery" && row.Val("Last Name", df.Headers) == "Fultz" { 642 | if row.Val("Weight", df.Headers) != "30" { 643 | t.Error("Update row failed.") 644 | } 645 | } 646 | } 647 | } 648 | 649 | func TestUpdatePanic(t *testing.T) { 650 | path := "./" 651 | df := CreateDataFrame(path, "TestData.csv") 652 | 653 | for _, row := range df.FrameRecords { 654 | if row.Val("First Name", df.Headers) == "Avery" && row.Val("Last Name", df.Headers) == "Fultz" { 655 | defer func() { recover() }() 656 | 657 | row.Update("Your Name Here", "30", df.Headers) 658 | 659 | t.Errorf("Method should have panicked.") 660 | } 661 | } 662 | } 663 | 664 | func TestMergeFramesAllColumns(t *testing.T) { 665 | path := "./" 666 | 667 | // Prep left frame 668 | df := CreateDataFrame(path, "TestData.csv") 669 | newData := [6]string{"11", "2022-06-27", "5467", "9586", "Cassandra", "SchmaSandra"} 670 | df = df.AddRecord(newData[:]) 671 | 672 | // Prep right frame 673 | dfRight := CreateDataFrame(path, "TestMergeData.csv") 674 | 675 | // Merge 676 | err := df.Merge(&dfRight, "ID") 677 | if err != nil { 678 | t.Error(err) 679 | } 680 | 681 | if df.CountRecords() != 11 { 682 | t.Error("Merge: record count error.") 683 | } 684 | 685 | m := make(map[string][]string) 686 | m["2"] = []string{"RICHLAND", "WA", "99354"} 687 | m["4"] = []string{"VAN BUREN", "AR", "72956"} 688 | m["6"] = []string{"FISHERS", "NY", "14453"} 689 | m["10"] = []string{"JEFFERSON CITY", "MO", "65109"} 690 | m["11"] = []string{"", "", ""} 691 | 692 | for _, row := range df.FrameRecords { 693 | if val, ok := m[row.Val("ID", df.Headers)]; ok { 694 | for i, v := range val { 695 | switch i { 696 | case 0: 697 | if row.Val("City", df.Headers) != v { 698 | t.Error("Merge: city error.") 699 | } 700 | case 1: 701 | if row.Val("State", df.Headers) != v { 702 | t.Error("Merge: state error.") 703 | } 704 | case 2: 705 | if row.Val("Postal Code", df.Headers) != v { 706 | t.Error("Merge: postal code error.") 707 | } 708 | } 709 | } 710 | } 711 | } 712 | } 713 | 714 | func TestMergeFramesSpecifiedColumns(t *testing.T) { 715 | path := "./" 716 | 717 | // Prep left frame 718 | df := CreateDataFrame(path, "TestData.csv") 719 | newData := [6]string{"11", "2022-06-27", "5467", "9586", "Cassandra", "SchmaSandra"} 720 | df = df.AddRecord(newData[:]) 721 | 722 | // Prep right frame 723 | dfRight := CreateDataFrame(path, "TestMergeData.csv") 724 | 725 | // Merge 726 | err := df.Merge(&dfRight, "ID", "City", "Postal Code") 727 | if err != nil { 728 | t.Error(err) 729 | } 730 | 731 | if df.CountRecords() != 11 { 732 | t.Error("Merge: record count error.") 733 | } 734 | 735 | m := make(map[string][]string) 736 | m["2"] = []string{"RICHLAND", "99354"} 737 | m["4"] = []string{"VAN BUREN", "72956"} 738 | m["6"] = []string{"FISHERS", "14453"} 739 | m["10"] = []string{"JEFFERSON CITY", "65109"} 740 | m["11"] = []string{"", ""} 741 | 742 | for _, row := range df.FrameRecords { 743 | if val, ok := m[row.Val("ID", df.Headers)]; ok { 744 | for i, v := range val { 745 | switch i { 746 | case 0: 747 | if row.Val("City", df.Headers) != v { 748 | t.Error("Merge: city error.") 749 | } 750 | case 1: 751 | if row.Val("Postal Code", df.Headers) != v { 752 | t.Error("Merge: postal code error.") 753 | } 754 | } 755 | } 756 | } 757 | } 758 | } 759 | 760 | func TestInnerMerge(t *testing.T) { 761 | path := "./" 762 | 763 | // Prep left frame 764 | df := CreateDataFrame(path, "TestData.csv") 765 | 766 | // Prep right frame 767 | dfRight := CreateDataFrame(path, "TestInnerMergeData.csv") 768 | 769 | // Merge 770 | df, err := df.InnerMerge(&dfRight, "ID") 771 | if err != nil { 772 | t.Error(err) 773 | } 774 | 775 | if df.CountRecords() != 5 { 776 | t.Error("Inner Merge: record count error.") 777 | } 778 | 779 | columns := []string{"ID", "Date", "Cost", "Weight", "First Name", "Last Name", "City", "State", "Postal Code"} 780 | 781 | data := make([][]string, 5) 782 | data[0] = []string{"4", "2022-01-04", "121", "196", "Peter", "Wiedmann", "VAN BUREN", "AR", "72956"} 783 | data[1] = []string{"5", "2022-01-05", "774", "415", "Andy", "Wiedmann", "TAUNTON", "MA", "2780"} 784 | data[2] = []string{"7", "2022-01-07", "995", "500", "Bryan", "Curtis", "GOLDSBORO", "NC", "27530"} 785 | data[3] = []string{"9", "2022-01-09", "939", "157", "Eric", "Petruska", "PHOENIX", "AZ", "85024"} 786 | data[4] = []string{"10", "2022-01-10", "597", "475", "Carl", "Carlson", "JEFFERSON CITY", "MO", "65109"} 787 | 788 | for i, row := range df.FrameRecords { 789 | if len(row.Data) != len(data[i]) { 790 | t.Error("Inner Merge: Column count does not match.") 791 | } 792 | for i2, col := range columns { 793 | val := row.Val(col, df.Headers) 794 | if val != data[i][i2] { 795 | t.Error("Inner Merge: Data results to not match what is expected.") 796 | } 797 | } 798 | } 799 | } 800 | 801 | func TestInnerMergeLeftFrameDuplicates(t *testing.T) { 802 | path := "./" 803 | 804 | // Prep left frame 805 | df := CreateDataFrame(path, "TestDataInnerDuplicate.csv") 806 | 807 | // Prep right frame 808 | dfRight := CreateDataFrame(path, "TestInnerMergeData.csv") 809 | 810 | // Merge 811 | df, err := df.InnerMerge(&dfRight, "ID") 812 | if err != nil { 813 | t.Error(err) 814 | } 815 | 816 | if df.CountRecords() != 6 { 817 | t.Error("Inner Merge: record count error.") 818 | } 819 | 820 | columns := []string{"ID", "Date", "Cost", "Weight", "First Name", "Last Name", "City", "State", "Postal Code"} 821 | 822 | data := make([][]string, 6) 823 | data[0] = []string{"4", "2022-01-04", "121", "196", "Peter", "Wiedmann", "VAN BUREN", "AR", "72956"} 824 | data[1] = []string{"5", "2022-01-05", "774", "415", "Andy", "Wiedmann", "TAUNTON", "MA", "2780"} 825 | data[2] = []string{"7", "2022-01-07", "995", "500", "Bryan", "Curtis", "GOLDSBORO", "NC", "27530"} 826 | data[3] = []string{"9", "2022-01-09", "939", "157", "Eric", "Petruska", "PHOENIX", "AZ", "85024"} 827 | data[4] = []string{"9", "2022-01-09", "12345", "6789", "Eric", "Petruska", "PHOENIX", "AZ", "85024"} 828 | data[5] = []string{"10", "2022-01-10", "597", "475", "Carl", "Carlson", "JEFFERSON CITY", "MO", "65109"} 829 | 830 | for i, row := range df.FrameRecords { 831 | if len(row.Data) != len(data[i]) { 832 | t.Error("Inner Merge: Column count does not match.") 833 | } 834 | for i2, col := range columns { 835 | val := row.Val(col, df.Headers) 836 | if val != data[i][i2] { 837 | t.Error("Inner Merge: Data results to not match what is expected.") 838 | } 839 | } 840 | } 841 | } 842 | 843 | func TestConcatFrames(t *testing.T) { 844 | path := "./" 845 | dfOne := CreateDataFrame(path, "TestData.csv") 846 | df := CreateDataFrame(path, "TestDataConcat.csv") 847 | 848 | lastNames := [20]string{ 849 | "Fultz", 850 | "Fultz", 851 | "Fultz", 852 | "Wiedmann", 853 | "Wiedmann", 854 | "Wilfong", 855 | "Curtis", 856 | "Wenck", 857 | "Petruska", 858 | "Carlson", 859 | "Benny", 860 | "Kenny", 861 | "McCarlson", 862 | "Jeffery", 863 | "Stephenson", 864 | "Patrickman", 865 | "Briarson", 866 | "Ericson", 867 | "Asherton", 868 | "Highman", 869 | } 870 | 871 | dfOne, err := dfOne.ConcatFrames(&df) 872 | if err != nil { 873 | t.Error("Concat Frames: ", err) 874 | } 875 | var totalCost int64 876 | var totalWeight int64 877 | 878 | for i, row := range dfOne.FrameRecords { 879 | if row.Val("Last Name", dfOne.Headers) != lastNames[i] { 880 | t.Error("Concat Frames Failed: Last Names") 881 | } 882 | totalCost += row.ConvertToInt("Cost", dfOne.Headers) 883 | totalWeight += row.ConvertToInt("Weight", dfOne.Headers) 884 | } 885 | 886 | if totalCost != 7100 || totalWeight != 3821 { 887 | t.Error("Concat Frames Failed: Values") 888 | } 889 | 890 | if dfOne.CountRecords() != 20 { 891 | t.Error("Concat Frames Failed: Row Count") 892 | } 893 | } 894 | 895 | func TestConcatFramesAddress(t *testing.T) { 896 | path := "./" 897 | df := CreateDataFrame(path, "TestData.csv") 898 | df2 := CreateDataFrame(path, "TestDataConcat.csv") 899 | 900 | df3, err := df.ConcatFrames(&df2) 901 | if err != nil { 902 | t.Error(err) 903 | } 904 | 905 | if &df == &df3 || &df2 == &df3 { 906 | t.Error("ConcatFrames did not create a truly decoupled new dataframe") 907 | } 908 | if df3.CountRecords() != 20 { 909 | t.Error("ConcatFrames did not properly append") 910 | } 911 | } 912 | 913 | func TestConcatFramesColumnCount(t *testing.T) { 914 | path := "./" 915 | dfOne := CreateDataFrame(path, "TestData.csv") 916 | columns := []string{"one", "two", "three"} 917 | dfTwo := CreateNewDataFrame(columns) 918 | 919 | dfOne, err := dfOne.ConcatFrames(&dfTwo) 920 | if err == nil { 921 | t.Error("Concat Frames Did Not Fail --> ", err) 922 | } 923 | } 924 | 925 | func TestConcatFramesColumnOrder(t *testing.T) { 926 | path := "./" 927 | dfOne := CreateDataFrame(path, "TestData.csv") 928 | columns := []string{ 929 | "ID", 930 | "Date", 931 | "Cost", 932 | "Weight", 933 | "Last Name", 934 | "First Name", 935 | } 936 | dfTwo := CreateNewDataFrame(columns) 937 | 938 | dfOne, err := dfOne.ConcatFrames(&dfTwo) 939 | if err == nil { 940 | t.Error("Concat Frames Did Not Fail --> ", err) 941 | } 942 | } 943 | 944 | // Ensures once a new filtered DataFrame is created, if records are updated in the original 945 | // it will not affect the records in the newly created filtered version. 946 | func TestCopiedFrame(t *testing.T) { 947 | path := "./" 948 | df := CreateDataFrame(path, "TestData.csv") 949 | 950 | df2 := df.Filtered("Last Name", "Wiedmann") 951 | 952 | // Update data in original frame. 953 | for _, row := range df.FrameRecords { 954 | if row.Val("First Name", df.Headers) == "Peter" && row.Val("Last Name", df.Headers) == "Wiedmann" { 955 | row.Update("Last Name", "New Last Name", df.Headers) 956 | } 957 | } 958 | 959 | // Check value did not change in newly copied frame. 960 | for _, row := range df2.FrameRecords { 961 | if row.Val("ID", df2.Headers) == "4" { 962 | if row.Val("First Name", df2.Headers) != "Peter" || row.Val("Last Name", df2.Headers) != "Wiedmann" { 963 | t.Error("Copied Frame: name appears to have changed in second frame.") 964 | } 965 | } 966 | } 967 | } 968 | 969 | func TestSaveDataFrameWithoutFileType(t *testing.T) { 970 | path := "./" 971 | df := CreateDataFrame(path, "TestData.csv") 972 | 973 | if !df.SaveDataFrame(path, "Testing") { 974 | t.Error("Failed to save dataframe.") 975 | } 976 | } 977 | 978 | func TestSaveDataFrameWithFileType(t *testing.T) { 979 | path := "./" 980 | df := CreateDataFrame(path, "TestData.csv") 981 | 982 | if !df.SaveDataFrame(path, "Testing.csv") { 983 | t.Error("Failed to save dataframe.") 984 | } 985 | } 986 | 987 | func TestAssortment(t *testing.T) { 988 | path := "./" 989 | 990 | // Concatenate Frames 991 | dfOne := CreateDataFrame(path, "TestData.csv") 992 | df := CreateDataFrame(path, "TestDataConcat.csv") 993 | df, err := df.ConcatFrames(&dfOne) 994 | if err != nil { 995 | log.Fatal("Concat Frames: ", err) 996 | } 997 | 998 | // Add Records 999 | newData := [6]string{"21", "2022-01-01", "200", "585", "Tommy", "Thompson"} 1000 | df = df.AddRecord(newData[:]) 1001 | newDataTwo := [6]string{"22", "2022-01-31", "687", "948", "Sarah", "McSarahson"} 1002 | df = df.AddRecord(newDataTwo[:]) 1003 | 1004 | if df.CountRecords() != 22 { 1005 | t.Error("Assortment: concat count incorrect.") 1006 | } 1007 | 1008 | df = df.Exclude("Last Name", "Fultz", "Highman", "Stephenson") 1009 | 1010 | if df.CountRecords() != 17 { 1011 | t.Error("Assortment: excluded count incorrect.") 1012 | } 1013 | 1014 | df = df.FilteredAfter("Date", "2022-01-08") 1015 | 1016 | if df.CountRecords() != 4 { 1017 | t.Error("Assortment: filtered after count incorrect.") 1018 | } 1019 | 1020 | lastNames := df.Unique("Last Name") 1021 | checkLastNames := [4]string{"Petruska", "Carlson", "Asherton", "McSarahson"} 1022 | 1023 | if len(lastNames) != 4 { 1024 | t.Error("Assortment: last name count failed") 1025 | } 1026 | 1027 | for _, name := range lastNames { 1028 | var status bool 1029 | for _, cName := range checkLastNames { 1030 | if name == cName { 1031 | status = true 1032 | } 1033 | } 1034 | if !status { 1035 | t.Error("Assortment: last name not found.") 1036 | } 1037 | } 1038 | 1039 | } 1040 | 1041 | func TestCopy(t *testing.T) { 1042 | path := "./" 1043 | df := CreateDataFrame(path, "TestData.csv") 1044 | df2 := df.Copy() 1045 | 1046 | for _, row := range df2.FrameRecords { 1047 | if row.Val("First Name", df2.Headers) == "Bryan" && row.Val("Last Name", df2.Headers) == "Curtis" { 1048 | row.Update("First Name", "Brian", df2.Headers) 1049 | } 1050 | if row.Val("First Name", df2.Headers) == "Carl" && row.Val("Last Name", df2.Headers) == "Carlson" { 1051 | row.Update("First Name", "McCarlson", df2.Headers) 1052 | } 1053 | } 1054 | 1055 | // Test original frame did not change. 1056 | for _, row := range df.FrameRecords { 1057 | if row.Val("Last Name", df.Headers) == "Curtis" { 1058 | if row.Val("First Name", df.Headers) != "Bryan" { 1059 | t.Error("First Name in original frame is not correct.") 1060 | } 1061 | } 1062 | if row.Val("Last Name", df.Headers) == "Carlson" { 1063 | if row.Val("First Name", df.Headers) != "Carl" { 1064 | t.Error("First Name in original frame is not correct.") 1065 | } 1066 | } 1067 | } 1068 | 1069 | // Test copied frame contains changes. 1070 | for _, row := range df2.FrameRecords { 1071 | if row.Val("Last Name", df2.Headers) == "Curtis" { 1072 | if row.Val("First Name", df2.Headers) != "Brian" { 1073 | t.Error("First Name in copied frame is not correct.") 1074 | } 1075 | } 1076 | if row.Val("Last Name", df2.Headers) == "Carlson" { 1077 | if row.Val("First Name", df2.Headers) != "McCarlson" { 1078 | t.Error("First Name in copied frame is not correct.") 1079 | } 1080 | } 1081 | } 1082 | } 1083 | 1084 | func TestCopyAddress(t *testing.T) { 1085 | path := "./" 1086 | df := CreateDataFrame(path, "TestData.csv") 1087 | df2 := df.Copy() 1088 | 1089 | if &df == &df2 { 1090 | t.Error("Copy did not create a truly decoupled copy.") 1091 | } 1092 | } 1093 | 1094 | func TestColumns(t *testing.T) { 1095 | path := "./" 1096 | requiredColumns := []string{ 1097 | "ID", 1098 | "Date", 1099 | "Cost", 1100 | "Weight", 1101 | "First Name", 1102 | "Last Name", 1103 | } 1104 | df := CreateDataFrame(path, "TestData.csv") 1105 | foundColumns := df.Columns() 1106 | 1107 | if len(foundColumns) != 6 { 1108 | t.Error("Length of found columns does not match") 1109 | } 1110 | 1111 | for i := 0; i < len(requiredColumns); i++ { 1112 | if foundColumns[i] != requiredColumns[i] { 1113 | t.Error("Order of found columns does not match") 1114 | } 1115 | } 1116 | } 1117 | 1118 | func TestAutoCount(t *testing.T) { 1119 | columns := []string{"id", "number", "value"} 1120 | df := CreateNewDataFrame(columns) 1121 | 1122 | for i := 0; i < 1_000; i++ { 1123 | val := float64(i + 1) 1124 | sq := val * val 1125 | data := []string{ 1126 | strconv.Itoa(i), 1127 | fmt.Sprintf("%f", val), 1128 | fmt.Sprintf("%f", sq), 1129 | } 1130 | df = df.AddRecord(data) 1131 | } 1132 | 1133 | if df.CountRecords() != 1_000 { 1134 | t.Error("Test Auto: count is not 1,000,000") 1135 | } 1136 | } 1137 | 1138 | func TestAutoSum(t *testing.T) { 1139 | columns := []string{"id", "number", "value"} 1140 | df := CreateNewDataFrame(columns) 1141 | 1142 | for i := 0; i < 1_000; i++ { 1143 | val := float64(i + 1) 1144 | sq := val * val 1145 | data := []string{ 1146 | strconv.Itoa(i), 1147 | fmt.Sprintf("%f", val), 1148 | fmt.Sprintf("%f", sq), 1149 | } 1150 | df = df.AddRecord(data) 1151 | } 1152 | 1153 | if df.Sum("value") != 333_833_500.0 { 1154 | t.Error("Test Auto: sum is not correct") 1155 | } 1156 | } 1157 | 1158 | func TestLoadFrames(t *testing.T) { 1159 | filePath := "./" 1160 | files := []string{ 1161 | "TestData.csv", 1162 | "TestDataCommaSeparatedValue.csv", 1163 | "TestDataConcat.csv", 1164 | "TestDataDateFormat.csv", 1165 | "TestMergeData.csv", 1166 | } 1167 | 1168 | results, err := LoadFrames(filePath, files) 1169 | if err != nil { 1170 | log.Fatal(err) 1171 | } 1172 | 1173 | dfTd := results[0] 1174 | dfComma := results[1] 1175 | dfConcat := results[2] 1176 | dfDate := results[3] 1177 | dfMerge := results[4] 1178 | 1179 | if dfTd.CountRecords() != 10 || dfTd.Sum("Weight") != 3376.0 || len(dfTd.Columns()) != 6 { 1180 | t.Error("LoadFrames: TestData.csv is not correct") 1181 | } 1182 | if dfComma.CountRecords() != 10 || dfComma.Sum("Cost") != 6521.0 || len(dfComma.Columns()) != 6 { 1183 | t.Error("LoadFrames: TestDataCommaSeparatedValue.csv is not correct") 1184 | } 1185 | if dfConcat.CountRecords() != 10 || dfConcat.Sum("Weight") != 445.0 || len(dfConcat.Columns()) != 6 { 1186 | t.Error("LoadFrames: TestDataConcat.csv is not correct") 1187 | } 1188 | if dfDate.CountRecords() != 10 || dfDate.Average("Cost") != 652.1 || len(dfDate.Columns()) != 6 { 1189 | t.Error("LoadFrames: TestDataDateFormat.csv is not correct") 1190 | } 1191 | if dfMerge.CountRecords() != 10 || dfMerge.Sum("Postal Code") != 495735.0 || len(dfMerge.Columns()) != 4 { 1192 | t.Error("LoadFrames: TestMergeData.csv is not correct") 1193 | } 1194 | 1195 | dfFilterTest := dfTd.Filtered("Last Name", "Fultz") 1196 | if dfTd.CountRecords() == dfFilterTest.CountRecords() { 1197 | t.Error("LoadFrame: variable referencing map value") 1198 | } 1199 | } 1200 | 1201 | func TestLoadFramesError(t *testing.T) { 1202 | filePath := "./" 1203 | files := []string{"TestData.csv"} 1204 | 1205 | _, err := LoadFrames(filePath, files) 1206 | if err == nil { 1207 | t.Error("LoadFrames did not fail as expected") 1208 | } 1209 | } 1210 | 1211 | func TestRename(t *testing.T) { 1212 | path := "./" 1213 | df := CreateDataFrame(path, "TestData.csv") 1214 | 1215 | err := df.Rename("Weight", "Total Weight") 1216 | if err != nil { 1217 | t.Error(err) 1218 | } 1219 | 1220 | for _, row := range df.FrameRecords { 1221 | if row.Val("First Name", df.Headers) == "Andy" && row.Val("Last Name", df.Headers) == "Wiedmann" { 1222 | row.Update("Total Weight", "1000", df.Headers) 1223 | } 1224 | } 1225 | 1226 | for _, row := range df.FrameRecords { 1227 | if row.Val("First Name", df.Headers) == "Andy" && row.Val("Last Name", df.Headers) == "Wiedmann" { 1228 | if row.Val("Total Weight", df.Headers) != "1000" { 1229 | t.Error("Value in new column did not update correctly") 1230 | } 1231 | } 1232 | } 1233 | 1234 | foundColumns := []string{} 1235 | newColumnStatus := false 1236 | for k, _ := range df.Headers { 1237 | foundColumns = append(foundColumns, k) 1238 | if k == "Total Weight" { 1239 | newColumnStatus = true 1240 | } 1241 | } 1242 | 1243 | if !newColumnStatus { 1244 | t.Error("New column was not found") 1245 | } 1246 | if len(foundColumns) != 6 { 1247 | t.Error("Wrong number of columns found") 1248 | } 1249 | } 1250 | 1251 | func TestRenameOriginalNotFound(t *testing.T) { 1252 | path := "./" 1253 | df := CreateDataFrame(path, "TestData.csv") 1254 | 1255 | err := df.Rename("The Weight", "Total Weight") 1256 | if err == nil { 1257 | t.Error(err) 1258 | } 1259 | } 1260 | 1261 | func TestRenameDuplicate(t *testing.T) { 1262 | path := "./" 1263 | df := CreateDataFrame(path, "TestData.csv") 1264 | 1265 | err := df.Rename("Weight", "Cost") 1266 | if err == nil { 1267 | t.Error(err) 1268 | } 1269 | } 1270 | 1271 | func TestSort(t *testing.T) { 1272 | path := "./" 1273 | df := CreateDataFrame(path, "TestData.csv") 1274 | 1275 | err := df.Sort("Cost", true) 1276 | if err != nil { 1277 | t.Error("Sort Error Failed") 1278 | } 1279 | 1280 | answers := []string{ 1281 | "121", 1282 | "133", 1283 | "493", 1284 | "597", 1285 | "774", 1286 | "777", 1287 | "818", 1288 | "874", 1289 | "939", 1290 | "995", 1291 | } 1292 | 1293 | for i, row := range df.FrameRecords { 1294 | if row.Val("Cost", df.Headers) != answers[i] { 1295 | t.Error("Ascending Cost Sort Failed") 1296 | } 1297 | } 1298 | 1299 | err = df.Sort("Cost", false) 1300 | if err != nil { 1301 | t.Error("Sort Error Failed") 1302 | } 1303 | 1304 | answers = []string{ 1305 | "995", 1306 | "939", 1307 | "874", 1308 | "818", 1309 | "777", 1310 | "774", 1311 | "597", 1312 | "493", 1313 | "133", 1314 | "121", 1315 | } 1316 | 1317 | for i, row := range df.FrameRecords { 1318 | if row.Val("Cost", df.Headers) != answers[i] { 1319 | t.Error("Descending Cost Sort Failed") 1320 | } 1321 | } 1322 | 1323 | err = df.Sort("Last Name", true) 1324 | if err != nil { 1325 | t.Error("Sort Error Failed") 1326 | } 1327 | 1328 | answers = []string{ 1329 | "Carlson", 1330 | "Curtis", 1331 | "Fultz", 1332 | "Fultz", 1333 | "Fultz", 1334 | "Petruska", 1335 | "Wenck", 1336 | "Wiedmann", 1337 | "Wiedmann", 1338 | "Wilfong", 1339 | } 1340 | 1341 | for i, row := range df.FrameRecords { 1342 | if row.Val("Last Name", df.Headers) != answers[i] { 1343 | t.Error("Ascending Name Sort Failed") 1344 | } 1345 | } 1346 | 1347 | err = df.Sort("Last Name", false) 1348 | if err != nil { 1349 | t.Error("Sort Error Failed") 1350 | } 1351 | 1352 | answers = []string{ 1353 | "Wilfong", 1354 | "Wiedmann", 1355 | "Wiedmann", 1356 | "Wenck", 1357 | "Petruska", 1358 | "Fultz", 1359 | "Fultz", 1360 | "Fultz", 1361 | "Curtis", 1362 | "Carlson", 1363 | } 1364 | 1365 | for i, row := range df.FrameRecords { 1366 | if row.Val("Last Name", df.Headers) != answers[i] { 1367 | t.Error("Descending Name Sort Failed") 1368 | } 1369 | } 1370 | 1371 | if df.CountRecords() != 10 { 1372 | t.Error("Sort Row Count Failed") 1373 | } 1374 | 1375 | if df.Sum("Cost") != 6521.0 { 1376 | t.Error("Sort Sum Failed") 1377 | } 1378 | 1379 | err = df.Sort("Non Existent Column", true) 1380 | if err == nil { 1381 | t.Error("Sort Error Failed") 1382 | } 1383 | } 1384 | 1385 | func TestDivideAndConquerOdd(t *testing.T) { 1386 | df := CreateNewDataFrame([]string{"One", "Two", "Three"}) 1387 | 1388 | total := 0 1389 | for i := 0; i < 999_833; i++ { 1390 | total += i 1391 | iVal := strconv.Itoa(i) 1392 | df = df.AddRecord([]string{iVal, iVal, iVal}) 1393 | } 1394 | 1395 | if df.CountRecords() != 999_833 { 1396 | t.Error("Divide And Conquer: Count rows are incorrect.") 1397 | } 1398 | 1399 | frames, err := df.DivideAndConquer(5) 1400 | if err != nil { 1401 | t.Error("Divide And Conquer: Error incorrectly triggered.") 1402 | } 1403 | 1404 | if len(frames) != 5 { 1405 | t.Errorf("Divide And Conquer: Frame count is '%d' instead of 5.", len(frames)) 1406 | } 1407 | 1408 | dfTotal := 0 1409 | for i, each := range frames { 1410 | if i != len(frames)-1 { 1411 | if each.CountRecords() != 199_966 { 1412 | t.Errorf("Divide And Conquer: Row count on subgroup is incorrect '%d'.", each.CountRecords()) 1413 | } 1414 | dfTotal += int(each.Sum("One")) 1415 | } else { 1416 | if each.CountRecords() != 199_969 { 1417 | t.Errorf("Divide And Conquer: Row count on final subgroup is incorrect '%d'.", each.CountRecords()) 1418 | } 1419 | dfTotal += int(each.Sum("One")) 1420 | } 1421 | } 1422 | 1423 | if dfTotal != total { 1424 | t.Errorf("Divide And Conquer: Sum of all rows is incorrect '%d' instead of '%d'.", dfTotal, total) 1425 | } 1426 | } 1427 | 1428 | func TestDivideAndConquerEven(t *testing.T) { 1429 | df := CreateNewDataFrame([]string{"One", "Two", "Three"}) 1430 | 1431 | total := 0 1432 | for i := 0; i < 100_000; i++ { 1433 | total += i 1434 | iVal := strconv.Itoa(i) 1435 | df = df.AddRecord([]string{iVal, iVal, iVal}) 1436 | } 1437 | 1438 | if df.CountRecords() != 100_000 { 1439 | t.Error("Divide And Conquer: Count rows are incorrect.") 1440 | } 1441 | 1442 | frames, err := df.DivideAndConquer(5) 1443 | if err != nil { 1444 | t.Error("Divide And Conquer: Error incorrectly triggered.") 1445 | } 1446 | 1447 | if len(frames) != 5 { 1448 | t.Errorf("Divide And Conquer: Frame count is '%d' instead of 5.", len(frames)) 1449 | } 1450 | 1451 | dfTotal := 0 1452 | for _, each := range frames { 1453 | if each.CountRecords() != 20_000 { 1454 | t.Errorf("Divide And Conquer: Row count on subgroup is incorrect '%d'.", each.CountRecords()) 1455 | } 1456 | dfTotal += int(each.Sum("One")) 1457 | } 1458 | 1459 | if dfTotal != total { 1460 | t.Errorf("Divide And Conquer: Sum of all rows is incorrect '%d' instead of '%d'.", dfTotal, total) 1461 | } 1462 | } 1463 | 1464 | func TestDivideAndConquerZeroSubframes(t *testing.T) { 1465 | df := CreateNewDataFrame([]string{"One", "Two", "Three"}) 1466 | 1467 | for i := 0; i < 10; i++ { 1468 | iVal := strconv.Itoa(i) 1469 | df = df.AddRecord([]string{iVal, iVal, iVal}) 1470 | } 1471 | 1472 | _, err := df.DivideAndConquer(0) 1473 | if err == nil { 1474 | t.Error("Divide And Conquer: Zero subframe error should have been triggered.") 1475 | } 1476 | } 1477 | 1478 | func TestDivideAndConquerExcessiveSubframes(t *testing.T) { 1479 | df := CreateNewDataFrame([]string{"One", "Two", "Three"}) 1480 | 1481 | for i := 0; i < 10; i++ { 1482 | iVal := strconv.Itoa(i) 1483 | df = df.AddRecord([]string{iVal, iVal, iVal}) 1484 | } 1485 | 1486 | _, err := df.DivideAndConquer(11) 1487 | if err == nil { 1488 | t.Error("Divide And Conquer: Excessive subframe error should have been triggered.") 1489 | } 1490 | } 1491 | 1492 | func TestDivideAndConquerEmptyDataFrame(t *testing.T) { 1493 | df := CreateNewDataFrame([]string{"One", "Two", "Three"}) 1494 | 1495 | _, err := df.DivideAndConquer(100) 1496 | if err == nil { 1497 | t.Error("Divide And Conquer: Empty dataframe error should have been triggered.") 1498 | } 1499 | } 1500 | --------------------------------------------------------------------------------