├── TODO
├── CODEOWNERS
├── shallow-explore
├── .gitignore
├── go.mod
├── LICENSE
├── utils
    ├── explore_test.go
    └── explore.go
├── go.sum
├── README.md
└── main.go


/TODO:
--------------------------------------------------------------------------------
1 | - Unit tests
2 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *       @tmickleydoyle
2 | 


--------------------------------------------------------------------------------
/shallow-explore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmickleydoyle/shallow-explore/HEAD/shallow-explore


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Test binary, built with `go test -c`
 9 | *.test
10 | 
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 | 
14 | # Dependency directories (remove the comment below to include it)
15 | # vendor/
16 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/tmickleydoyle/shallow-explore
 2 | 
 3 | go 1.17
 4 | 
 5 | require (
 6 | 	github.com/charmbracelet/lipgloss v0.7.1
 7 | 	github.com/guptarohit/asciigraph v0.5.5
 8 | )
 9 | 
10 | require (
11 | 	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
12 | 	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
13 | 	github.com/mattn/go-isatty v0.0.19 // indirect
14 | 	github.com/mattn/go-runewidth v0.0.14 // indirect
15 | 	github.com/muesli/reflow v0.3.0 // indirect
16 | 	github.com/muesli/termenv v0.15.1 // indirect
17 | 	github.com/rivo/uniseg v0.4.4 // indirect
18 | 	golang.org/x/sys v0.9.0 // indirect
19 | )
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Thomas Mickley-Doyle
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/utils/explore_test.go:
--------------------------------------------------------------------------------
 1 | package explore
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestConvertStringToInt(t *testing.T) {
 8 | 	intArray, origArray := ConvertStringToInt([]string{"1", "2", "3"})
 9 | 
10 | 	if len(intArray) == 0 {
11 | 		t.Errorf("Array could not convert strings to numbers")
12 | 	}
13 | 	if len(origArray) == 0 {
14 | 		t.Errorf("Original array did not return: ")
15 | 	}
16 | 
17 | 	stringArray, _ := ConvertStringToInt([]string{"one", "two", "three"})
18 | 
19 | 	if len(stringArray) != 0 {
20 | 		t.Errorf("Incorrectly converted string to float")
21 | 	}
22 | }
23 | 
24 | func TestMinMaxValues(t *testing.T) {
25 | 	minValue, maxValue := MinMaxValues([]float64{1.0, 2.0, 3.0})
26 | 
27 | 	if minValue != 1 {
28 | 		t.Errorf("Min value is not calculated properly")
29 | 	}
30 | 	if maxValue != 3 {
31 | 		t.Errorf("Min value is not calculated properly")
32 | 	}
33 | }
34 | 
35 | func TestMeanValue(t *testing.T) {
36 | 	meanValue := MeanValue([]float64{1.0, 2.0, 3.0})
37 | 
38 | 	if meanValue != 2 {
39 | 		t.Errorf("Mean value is not calculated properly")
40 | 	}
41 | }
42 | 
43 | func TestMedianValue(t *testing.T) {
44 | 	medianValue := MedianValue([]float64{1.0, 2.0, 3.0})
45 | 
46 | 	if medianValue != 2 {
47 | 		t.Errorf("Mean value is not calculated properly")
48 | 	}
49 | }
50 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
 2 | github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
 3 | github.com/charmbracelet/lipgloss v0.4.0 h1:768h64EFkGUr8V5yAKV7/Ta0NiVceiPaV+PphaW1K9g=
 4 | github.com/charmbracelet/lipgloss v0.4.0/go.mod h1:vmdkHvce7UzX6xkyf4cca8WlwdQ5RQr8fzta+xl7BOM=
 5 | github.com/charmbracelet/lipgloss v0.7.1 h1:17WMwi7N1b1rVWOjMT+rCh7sQkvDU75B2hbZpc5Kc1E=
 6 | github.com/charmbracelet/lipgloss v0.7.1/go.mod h1:yG0k3giv8Qj8edTCbbg6AlQ5e8KNWpFujkNawKNhE2c=
 7 | github.com/guptarohit/asciigraph v0.5.2 h1:aG4kATuuyHQMdTi89KKVIRIcDSIHrsKIozo/UsUE5AM=
 8 | github.com/guptarohit/asciigraph v0.5.2/go.mod h1:dYl5wwK4gNsnFf9Zp+l06rFiDZ5YtXM6x7SRWZ3KGag=
 9 | github.com/guptarohit/asciigraph v0.5.5 h1:ccFnUF8xYIOUPPY3tmdvRyHqmn1MYI9iv1pLKX+/ZkQ=
10 | github.com/guptarohit/asciigraph v0.5.5/go.mod h1:dYl5wwK4gNsnFf9Zp+l06rFiDZ5YtXM6x7SRWZ3KGag=
11 | github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
12 | github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
13 | github.com/mattn/go-isatty v0.0.13 h1:qdl+GuBjcsKKDco5BsxPJlId98mSWNKqYA+Co0SC1yA=
14 | github.com/mattn/go-isatty v0.0.13/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
15 | github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
16 | github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
17 | github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
18 | github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
19 | github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
20 | github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
21 | github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
22 | github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
23 | github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68 h1:y1p/ycavWjGT9FnmSjdbWUlLGvcxrY0Rw3ATltrxOhk=
24 | github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68/go.mod h1:Xk+z4oIWdQqJzsxyjgl3P22oYZnHdZ8FFTHAQQt5BMQ=
25 | github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
26 | github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
27 | github.com/muesli/termenv v0.9.0 h1:wnbOaGz+LUR3jNT0zOzinPnyDaCZUQRZj9GxK8eRVl8=
28 | github.com/muesli/termenv v0.9.0/go.mod h1:R/LzAKf+suGs4IsO95y7+7DpFHO0KABgnZqtlyx2mBw=
29 | github.com/muesli/termenv v0.15.1 h1:UzuTb/+hhlBugQz28rpzey4ZuKcZ03MeKsoG7IJZIxs=
30 | github.com/muesli/termenv v0.15.1/go.mod h1:HeAQPTzpfs016yGtA4g00CsdYnVLJvxsS4ANqrZs2sQ=
31 | github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
32 | github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
33 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
34 | github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
35 | github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
36 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
37 | golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
38 | golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
39 | golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
40 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
41 | golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
42 | golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # shallow-explore
  2 | From the command line, quickly explore data from a CSV file.
  3 | 
  4 | `shallow-explore` is a [Golang](https://go.dev/) backed command-line tool for iterating over columns from a CSV file. This is a gut check tool to make sure the assumptions about the data are within the expected range of normal.
  5 | 
  6 | ## How-To
  7 | 
  8 | After installation, run the following command to start analyzing data:
  9 | 
 10 | ```bash
 11 | # Style (default): light mode
 12 | shallow-explore -csv ~/complete/path/to/file/sample.csv
 13 | 
 14 | # Style: dark mode
 15 | shallow-explore -csv ~/complete/path/to/file/sample.csv -style dark
 16 | 
 17 | # Style: light mode
 18 | shallow-explore -csv ~/complete/path/to/file/sample.csv -style light
 19 | ```
 20 | 
 21 | Note: The complete path of the file is required to load the data into the program.
 22 | 
 23 | ### New Features
 24 | 
 25 | #### Data Summary
 26 | Generate a comprehensive summary of your CSV file with statistics about each column:
 27 | 
 28 | ```bash
 29 | shallow-explore -csv ~/path/to/file/sample.csv -summary
 30 | ```
 31 | 
 32 | #### Data Filtering
 33 | Filter data based on column values:
 34 | 
 35 | ```bash
 36 | # Filter rows where the "Age" column equals "30"
 37 | shallow-explore -csv ~/path/to/file/sample.csv -filter true -column "Age" -condition equals -value "30"
 38 | 
 39 | # Available conditions: equals, contains, greater_than, less_than, starts_with, ends_with
 40 | shallow-explore -csv ~/path/to/file/sample.csv -filter true -column "Name" -condition contains -value "Smith"
 41 | ```
 42 | 
 43 | #### Export to JSON
 44 | Export your CSV data to JSON format:
 45 | 
 46 | ```bash
 47 | shallow-explore -csv ~/path/to/file/sample.csv -export-json -export-path "output.json"
 48 | 
 49 | # The export-path is optional. If not provided, a timestamped filename will be generated
 50 | shallow-explore -csv ~/path/to/file/sample.csv -export-json
 51 | ```
 52 | 
 53 | #### Data Correlation
 54 | Calculate the correlation between two numeric columns:
 55 | 
 56 | ```bash
 57 | shallow-explore -csv ~/path/to/file/sample.csv -correlate -col1 "Height" -col2 "Weight"
 58 | ```
 59 | 
 60 | #### Anomaly Detection
 61 | Detect anomalies in numeric columns using Z-score method:
 62 | 
 63 | ```bash
 64 | # Default threshold is 3.0
 65 | shallow-explore -csv ~/path/to/file/sample.csv -anomalies
 66 | 
 67 | # Custom threshold
 68 | shallow-explore -csv ~/path/to/file/sample.csv -anomalies -threshold 2.5
 69 | ```
 70 | 
 71 | ### Output
 72 | 
 73 | `shallow-explore` supports three types of data: integers, floats, and strings.
 74 | 
 75 | The following output is an example of an integer or float column. The column name at the top of the frame followed by a summary line graph of the items, and some quick statistics about the data.
 76 | 
 77 | <img width="709" alt="Screen Shot 2022-01-11 at 8 31 11 PM" src="https://user-images.githubusercontent.com/8069675/149228948-2dc71027-858e-406c-b09b-65231c9c04ca.png">
 78 | 
 79 | For string-based data, the column name is still at the top of the output. Below the column name lives a horizontal histogram and a count of unique entities found in the column.
 80 | 
 81 | <img width="769" alt="Screen Shot 2022-01-11 at 8 30 47 PM" src="https://user-images.githubusercontent.com/8069675/149228970-7cebd181-4faa-4369-886d-8e58650fca81.png">
 82 | 
 83 | ## Installation
 84 | 
 85 | If Golang is installed, run the following command:
 86 | 
 87 | ```bash
 88 | go install github.com/tmickleydoyle/shallow-explore
 89 | ```
 90 | 
 91 | ## Instructions for Installing Go
 92 | 
 93 | [Go docs](https://go.dev/)
 94 | 
 95 | ### Installation with Homebrew
 96 | 
 97 | ```bash
 98 | brew install go
 99 | ```
100 | 
101 | ## Why I Built This Tool
102 | 
103 | I find myself running and rerunning the same basic statistical analysis on data to get an understanding of how trends are moving. I figured why not make it easier and share it with everyone else! I hope this speeds up your decision making :heart:
104 | 
105 | ## Feature Overview
106 | 
107 | - **CSV Data Exploration**: Visualize and analyze CSV data with automatic recognition of data types
108 | - **Data Summary**: Get a comprehensive overview of your CSV data with column types and completeness percentages
109 | - **Data Filtering**: Filter CSV data based on various conditions to focus on specific subsets
110 | - **JSON Export**: Export your CSV data to JSON format for use in other applications
111 | - **Data Correlation**: Calculate Pearson correlation coefficients between numeric columns
112 | - **Anomaly Detection**: Find outliers in numeric data using Z-score method
113 | - **Customizable Display**: Choose between light and dark mode for better visibility
114 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 	"log"
  7 | 	"path/filepath"
  8 | 	"strings"
  9 | 	"time"
 10 | 
 11 | 	"github.com/guptarohit/asciigraph"
 12 | 	"github.com/charmbracelet/lipgloss"
 13 | 	explore "github.com/tmickleydoyle/shallow-explore/utils"	
 14 | )
 15 | 
 16 | var (
 17 | 	file      string
 18 | 	path      string
 19 | 	style     string
 20 | 	filter    string
 21 | 	column    string
 22 | 	condition string
 23 | 	value     string
 24 | 	exportJson bool
 25 | 	exportPath string
 26 | 	correlate  bool
 27 | 	col1       string
 28 | 	col2       string
 29 | 	summary    bool
 30 | 	anomalies  bool
 31 | 	threshold  float64
 32 | )
 33 | 
 34 | var styleDark = lipgloss.NewStyle().
 35 | 	Bold(true).
 36 | 	Foreground(lipgloss.Color("#FAFAFA")).
 37 | 	Background(lipgloss.Color("#808080")).
 38 | 	PaddingTop(1).
 39 | 	PaddingBottom(1).
 40 | 	PaddingLeft(2).
 41 | 	PaddingRight(2).
 42 | 	BorderStyle(lipgloss.NormalBorder()).
 43 | 	BorderForeground(lipgloss.Color("#FAFAFA"))
 44 | 
 45 | var styleLight = lipgloss.NewStyle().
 46 | 	Bold(true).
 47 | 	Foreground(lipgloss.Color("#808080")).
 48 | 	Background(lipgloss.Color("#FAFAFA")).
 49 | 	PaddingTop(1).
 50 | 	PaddingBottom(1).
 51 | 	PaddingLeft(2).
 52 | 	PaddingRight(2).
 53 | 	BorderStyle(lipgloss.NormalBorder()).
 54 | 	BorderForeground(lipgloss.Color("#808080"))
 55 | 
 56 | func main() {
 57 | 	// Basic flags
 58 | 	flag.StringVar(&file, "csv", "", "path to CSV file")
 59 | 	flag.StringVar(&style, "style", "", "output style (dark or light)")
 60 | 	
 61 | 	// Data filtering flags
 62 | 	flag.StringVar(&filter, "filter", "", "enable filtering mode")
 63 | 	flag.StringVar(&column, "column", "", "column to filter on")
 64 | 	flag.StringVar(&condition, "condition", "equals", "filter condition (equals, contains, greater_than, less_than, starts_with, ends_with)")
 65 | 	flag.StringVar(&value, "value", "", "filter value to compare against")
 66 | 	
 67 | 	// JSON export flags
 68 | 	flag.BoolVar(&exportJson, "export-json", false, "export data to JSON")
 69 | 	flag.StringVar(&exportPath, "export-path", "", "path for exported JSON file")
 70 | 	
 71 | 	// Correlation flags
 72 | 	flag.BoolVar(&correlate, "correlate", false, "calculate correlation between two columns")
 73 | 	flag.StringVar(&col1, "col1", "", "first column for correlation")
 74 | 	flag.StringVar(&col2, "col2", "", "second column for correlation")
 75 | 	
 76 | 	// Summary flag
 77 | 	flag.BoolVar(&summary, "summary", false, "generate summary of CSV file")
 78 | 	
 79 | 	// Anomaly detection
 80 | 	flag.BoolVar(&anomalies, "anomalies", false, "detect anomalies in numeric columns")
 81 | 	flag.Float64Var(&threshold, "threshold", 3.0, "z-score threshold for anomaly detection (default: 3.0)")
 82 | 	
 83 | 	flag.Parse()
 84 | 
 85 | 	if file == "" {
 86 | 		log.Fatal("Could not find the path to the CSV file")
 87 | 	}
 88 | 	path = file
 89 | 	records := explore.ReadCSVFile(path)
 90 | 	
 91 | 	// Set default style
 92 | 	var selectedStyle lipgloss.Style
 93 | 	if style == "dark" {
 94 | 		selectedStyle = styleDark
 95 | 	} else {
 96 | 		selectedStyle = styleLight
 97 | 	}
 98 | 	
 99 | 	// Handle summary mode
100 | 	if summary {
101 | 		summaryData := explore.CSVSummary(records)
102 | 		fmt.Println(selectedStyle.Render(fmt.Sprintf("CSV Summary for: %s\n", path)))
103 | 		
104 | 		// Display summary information
105 | 		fmt.Println(selectedStyle.Render(fmt.Sprintf("Total Rows: %d\n", summaryData["total_rows"])))
106 | 		fmt.Println(selectedStyle.Render(fmt.Sprintf("Total Columns: %d\n", summaryData["total_columns"])))
107 | 		
108 | 		// Display column names and types
109 | 		fmt.Println(selectedStyle.Render("Column Information:"))
110 | 		columnTypes := summaryData["column_types"].(map[string]string)
111 | 		completeness := summaryData["completeness"].(map[string]float64)
112 | 		
113 | 		for _, colName := range summaryData["column_names"].([]string) {
114 | 			fmt.Println(selectedStyle.Render(fmt.Sprintf("  %s (Type: %s, Completeness: %.1f%%)", 
115 | 				colName, columnTypes[colName], completeness[colName])))
116 | 		}
117 | 		return
118 | 	}
119 | 	
120 | 	// Handle filtering
121 | 	if filter != "" && column != "" && value != "" {
122 | 		records = explore.FilterCSVData(records, column, condition, value)
123 | 		fmt.Println(selectedStyle.Render(fmt.Sprintf("Filtered data where %s %s %s (Found %d records)\n", 
124 | 			column, condition, value, len(records)-1)))
125 | 	}
126 | 	
127 | 	// Handle JSON export
128 | 	if exportJson {
129 | 		if exportPath == "" {
130 | 			// Generate default filename based on input file if not provided
131 | 			base := filepath.Base(path)
132 | 			baseName := strings.TrimSuffix(base, filepath.Ext(base))
133 | 			exportPath = baseName + "_" + time.Now().Format("20060102_150405") + ".json"
134 | 		}
135 | 		
136 | 		err := explore.ExportToJSON(records, exportPath)
137 | 		if err != nil {
138 | 			log.Fatalf("Failed to export to JSON: %v", err)
139 | 		}
140 | 		
141 | 		fmt.Println(selectedStyle.Render(fmt.Sprintf("Data exported to JSON: %s\n", exportPath)))
142 | 		return
143 | 	}
144 | 	
145 | 	// Handle correlation calculation
146 | 	if correlate && col1 != "" && col2 != "" {
147 | 		correlation, err := explore.CalculateCorrelation(records, col1, col2)
148 | 		if err != nil {
149 | 			log.Fatalf("Failed to calculate correlation: %v", err)
150 | 		}
151 | 		
152 | 		fmt.Println(selectedStyle.Render(fmt.Sprintf("Correlation between '%s' and '%s': %.4f\n", 
153 | 			col1, col2, correlation)))
154 | 		
155 | 		// Interpretation guide
156 | 		var interpretation string
157 | 		corrAbs := math.Abs(correlation)
158 | 		
159 | 		if corrAbs >= 0.9 {
160 | 			interpretation = "Very strong relationship"
161 | 		} else if corrAbs >= 0.7 {
162 | 			interpretation = "Strong relationship"
163 | 		} else if corrAbs >= 0.5 {
164 | 			interpretation = "Moderate relationship"
165 | 		} else if corrAbs >= 0.3 {
166 | 			interpretation = "Weak relationship"
167 | 		} else {
168 | 			interpretation = "Little to no relationship"
169 | 		}
170 | 		
171 | 		if correlation < 0 {
172 | 			interpretation += " (negative/inverse)"
173 | 		} else {
174 | 			interpretation += " (positive/direct)"
175 | 		}
176 | 		
177 | 		fmt.Println(selectedStyle.Render("Interpretation: " + interpretation))
178 | 		return
179 | 	}
180 | 	
181 | 	// Handle anomaly detection
182 | 	if anomalies {
183 | 		anomalousRows := explore.DetectAnomalies(records, threshold)
184 | 		
185 | 		if len(anomalousRows) == 0 {
186 | 			fmt.Println(selectedStyle.Render(fmt.Sprintf("No anomalies detected with threshold %.1f\n", threshold)))
187 | 		} else {
188 | 			fmt.Println(selectedStyle.Render(fmt.Sprintf("Anomalies detected with threshold %.1f:\n", threshold)))
189 | 			
190 | 			for colName, rows := range anomalousRows {
191 | 				fmt.Println(selectedStyle.Render(fmt.Sprintf("Column '%s': %d anomalies found", colName, len(rows))))
192 | 				
193 | 				// Show first 5 anomalies at most
194 | 				displayRows := rows
195 | 				if len(rows) > 5 {
196 | 					displayRows = rows[:5]
197 | 				}
198 | 				
199 | 				// Get column index
200 | 				colIndex := -1
201 | 				for i, col := range records[0] {
202 | 					if col == colName {
203 | 						colIndex = i
204 | 						break
205 | 					}
206 | 				}
207 | 				
208 | 				for _, rowIdx := range displayRows {
209 | 					if colIndex < len(records[rowIdx]) {
210 | 						fmt.Println(selectedStyle.Render(fmt.Sprintf("  Row %d: %s", rowIdx, records[rowIdx][colIndex])))
211 | 					}
212 | 				}
213 | 				
214 | 				if len(rows) > 5 {
215 | 					fmt.Println(selectedStyle.Render(fmt.Sprintf("  ... and %d more anomalies", len(rows)-5)))
216 | 				}
217 | 			}
218 | 		}
219 | 		return
220 | 	}
221 | 	
222 | 	// Default behavior: explore each column
223 | 	for column := range records[0] {
224 | 		colValues := []string{}
225 | 
226 | 		for i := 1; i < len(records); i++ {
227 | 			colValues = append(colValues, records[i][column])
228 | 		}
229 | 
230 | 		transformedArray, _ := explore.ConvertStringToInt(colValues)
231 | 		plotArray, stringValues := explore.ConvertStringToInt(colValues)
232 | 		column := fmt.Sprintf("Column: %s\n\n", records[0][column])
233 | 
234 | 		if len(transformedArray) > 0 {
235 | 			min, max := explore.MinMaxValues(transformedArray)
236 | 			mean := explore.MeanValue(transformedArray)
237 | 			median := explore.MedianValue(transformedArray)
238 | 			statsOutput := explore.FloatOutput(min, max, mean, median)
239 | 			graph := asciigraph.Plot(plotArray, asciigraph.Height(20), asciigraph.Width(90), asciigraph.Caption(statsOutput))
240 | 			fmt.Println(selectedStyle.Render(column + graph))
241 | 		} else {
242 | 			valuesMap := explore.CountValues(stringValues)
243 | 			sortedMap := explore.SortMapByValue(valuesMap)
244 | 			histogram := explore.HistTopTen(sortedMap, column)
245 | 			fmt.Println(selectedStyle.Render(column + histogram))
246 | 		}
247 | 	}
248 | }
249 | 


--------------------------------------------------------------------------------
/utils/explore.go:
--------------------------------------------------------------------------------
  1 | package explore
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"fmt"
  6 | 	"log"
  7 | 	"math"
  8 | 	"sort"
  9 | 	"strconv"
 10 | 	"strings"
 11 | 	"bytes"
 12 | 	"encoding/json"
 13 | 	"io"
 14 | 	"io/ioutil"
 15 | 	"net/http"
 16 | 	"os"
 17 | 	"time"
 18 | )
 19 | 
 20 | type Sorted struct {
 21 | 	Key   string
 22 | 	Value int
 23 | }
 24 | 
 25 | type SortedList []Sorted
 26 | 
 27 | func (p SortedList) Len() int           { return len(p) }
 28 | func (p SortedList) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
 29 | func (p SortedList) Less(i, j int) bool { return p[i].Value > p[j].Value }
 30 | 
 31 | func ReadCSVFile(filePath string) [][]string {
 32 | 	f, err := os.Open(filePath)
 33 | 	if err != nil {
 34 | 		log.Fatal("Unable to read input file "+filePath, err)
 35 | 	}
 36 | 	defer f.Close()
 37 | 
 38 | 	csvReader := csv.NewReader(f)
 39 | 	records, err := csvReader.ReadAll()
 40 | 	if err != nil {
 41 | 		log.Fatal("Unable to parse file as CSV for "+filePath, err)
 42 | 	}
 43 | 
 44 | 	return records
 45 | }
 46 | 
 47 | func ConvertStringToInt(stringArray []string) ([]float64, []string) {
 48 | 	var intArray []float64
 49 | 
 50 | 	for _, i := range stringArray {
 51 | 		if convertNum, err := strconv.ParseFloat(i, 64); err == nil {
 52 | 			intArray = append(intArray, convertNum)
 53 | 		} else if intValue, err := strconv.ParseInt(i, 10, 64); err == nil {
 54 | 			convertNum := float64(intValue)
 55 | 			intArray = append(intArray, convertNum)
 56 | 		}
 57 | 	}
 58 | 
 59 | 	return intArray, stringArray
 60 | }
 61 | 
 62 | func MinMaxValues(intArray []float64) (float64, float64) {
 63 | 	var max float64 = intArray[0]
 64 | 	var min float64 = intArray[0]
 65 | 	for _, value := range intArray {
 66 | 		if max < value {
 67 | 			max = value
 68 | 		}
 69 | 		if min > value {
 70 | 			min = value
 71 | 		}
 72 | 	}
 73 | 	return min, max
 74 | }
 75 | 
 76 | func MeanValue(intArray []float64) float64 {
 77 | 	total := 0.0
 78 | 
 79 | 	for _, v := range intArray {
 80 | 		total += v
 81 | 	}
 82 | 
 83 | 	return math.Round(total / float64(len(intArray)))
 84 | }
 85 | 
 86 | func MedianValue(intArray []float64) float64 {
 87 | 	sort.Float64s(intArray)
 88 | 	mNumber := len(intArray) / 2
 89 | 
 90 | 	if len(intArray)%2 != 0 {
 91 | 		return intArray[mNumber]
 92 | 	}
 93 | 
 94 | 	return (intArray[mNumber-1] + intArray[mNumber]) / 2
 95 | }
 96 | 
 97 | func FloatOutput(min float64, max float64, mean float64, median float64) string {
 98 | 	outputString :=
 99 | 		`(The plot is a general trend of all points) 
100 | 
101 |    Min: %.2f
102 |    Max: %.2f
103 |   Mean: %.2f
104 | Median: %.2f
105 | `
106 | 	finalOutput := fmt.Sprintf(outputString, min, max, mean, median)
107 | 	return finalOutput
108 | }
109 | 
110 | func CountValues(stringArray []string) map[string]int {
111 | 	valuesMap := make(map[string]int)
112 | 
113 | 	for _, v := range stringArray {
114 | 		v := string(v)
115 | 		if _, ok := valuesMap[v]; ok {
116 | 			valuesMap[v] = valuesMap[v] + 1
117 | 		} else {
118 | 			valuesMap[v] = 1
119 | 		}
120 | 	}
121 | 
122 | 	return valuesMap
123 | }
124 | 
125 | func SortMapByValue(valueMap map[string]int) SortedList {
126 | 	sortedMap := make(SortedList, len(valueMap))
127 | 
128 | 	i := 0
129 | 	for k, v := range valueMap {
130 | 		sortedMap[i] = Sorted{k, v}
131 | 		i++
132 | 	}
133 | 
134 | 	sort.Sort(sortedMap)
135 | 
136 | 	return sortedMap
137 | }
138 | 
139 | func HistTopTen(sortedList SortedList, column string) string {
140 | 	var key string
141 | 	var barValue int
142 | 	max := float64(sortedList[0].Value)
143 | 	histString := "Horizontal Histogram"
144 | 
145 | 	if len(sortedList) > 10 {
146 | 		histString = histString + " - Top Ten\n\n"
147 | 	} else {
148 | 		histString = histString + "\n\n"
149 | 	}
150 | 
151 | 	i := 0
152 | 	for d := range sortedList {
153 | 		if i < 10 {
154 | 			if max > 75 {
155 | 				barValue = int((float64(sortedList[d].Value) / max) * 75)
156 | 			} else {
157 | 				barValue = sortedList[d].Value
158 | 			}
159 | 			bar := strings.Repeat("☐", barValue)
160 | 			if len(sortedList[d].Key) > 20 {
161 | 				key = sortedList[d].Key[:17] + "..."
162 | 			} else {
163 | 				key = sortedList[d].Key
164 | 			}
165 | 			histString = histString + fmt.Sprintf("%20v: %s (%d)\n", key, bar, sortedList[d].Value)
166 | 		}
167 | 		i++
168 | 	}
169 | 
170 | 	histString = histString + fmt.Sprintf("%20v: %d\n", "Unique Strings", len(sortedList))
171 | 
172 | 	return histString
173 | }
174 | 
175 | type ChatGPTRequest struct {
176 | 	Message string `json:"message"`
177 | }
178 | 
179 | type ChatGPTResponse struct {
180 | 	Reply string `json:"reply"`
181 | }
182 | 
183 | // CSVSummary generates a summary of the entire CSV file
184 | func CSVSummary(records [][]string) map[string]interface{} {
185 | 	summary := make(map[string]interface{})
186 | 	
187 | 	// Basic file info
188 | 	summary["total_rows"] = len(records) - 1 // Excluding header
189 | 	summary["total_columns"] = len(records[0])
190 | 	summary["column_names"] = records[0]
191 | 	
192 | 	// Column type inference
193 | 	columnTypes := make(map[string]string)
194 | 	for i, colName := range records[0] {
195 | 		// Check first 10 rows (or fewer if less data) to infer type
196 | 		samples := []string{}
197 | 		for j := 1; j < min(len(records), 11); j++ {
198 | 			if i < len(records[j]) {
199 | 				samples = append(samples, records[j][i])
200 | 			}
201 | 		}
202 | 		columnTypes[colName] = inferColumnType(samples)
203 | 	}
204 | 	summary["column_types"] = columnTypes
205 | 	
206 | 	// Data completeness
207 | 	completeness := make(map[string]float64)
208 | 	for i, colName := range records[0] {
209 | 		filledCount := 0
210 | 		for j := 1; j < len(records); j++ {
211 | 			if i < len(records[j]) && records[j][i] != "" {
212 | 				filledCount++
213 | 			}
214 | 		}
215 | 		completeness[colName] = float64(filledCount) / float64(len(records)-1) * 100
216 | 	}
217 | 	summary["completeness"] = completeness
218 | 	
219 | 	return summary
220 | }
221 | 
222 | // Helper function for CSVSummary
223 | func min(a, b int) int {
224 | 	if a < b {
225 | 		return a
226 | 	}
227 | 	return b
228 | }
229 | 
230 | // Helper function to infer column type
231 | func inferColumnType(samples []string) string {
232 | 	isInt := true
233 | 	isFloat := true
234 | 	
235 | 	for _, sample := range samples {
236 | 		if sample == "" {
237 | 			continue // Skip empty values for type inference
238 | 		}
239 | 		
240 | 		// Try parsing as int
241 | 		_, err := strconv.ParseInt(sample, 10, 64)
242 | 		if err != nil {
243 | 			isInt = false
244 | 		}
245 | 		
246 | 		// Try parsing as float
247 | 		_, err = strconv.ParseFloat(sample, 64)
248 | 		if err != nil {
249 | 			isFloat = false
250 | 		}
251 | 		
252 | 		if !isInt && !isFloat {
253 | 			break
254 | 		}
255 | 	}
256 | 	
257 | 	if isInt {
258 | 		return "integer"
259 | 	} else if isFloat {
260 | 		return "float"
261 | 	}
262 | 	return "string"
263 | }
264 | 
265 | // FilterCSVData filters CSV data based on column and condition
266 | func FilterCSVData(records [][]string, columnName string, condition string, value string) [][]string {
267 | 	filtered := [][]string{records[0]} // Start with header row
268 | 	
269 | 	// Find column index
270 | 	colIndex := -1
271 | 	for i, col := range records[0] {
272 | 		if col == columnName {
273 | 			colIndex = i
274 | 			break
275 | 		}
276 | 	}
277 | 	
278 | 	if colIndex == -1 {
279 | 		return filtered // Column not found
280 | 	}
281 | 	
282 | 	// Process each row
283 | 	for i := 1; i < len(records); i++ {
284 | 		row := records[i]
285 | 		if colIndex >= len(row) {
286 | 			continue // Skip rows with insufficient columns
287 | 		}
288 | 		
289 | 		cellValue := row[colIndex]
290 | 		includeRow := false
291 | 		
292 | 		switch condition {
293 | 		case "equals":
294 | 			includeRow = cellValue == value
295 | 		case "contains":
296 | 			includeRow = strings.Contains(cellValue, value)
297 | 		case "greater_than":
298 | 			cellFloat, err1 := strconv.ParseFloat(cellValue, 64)
299 | 			valueFloat, err2 := strconv.ParseFloat(value, 64)
300 | 			if err1 == nil && err2 == nil {
301 | 				includeRow = cellFloat > valueFloat
302 | 			}
303 | 		case "less_than":
304 | 			cellFloat, err1 := strconv.ParseFloat(cellValue, 64)
305 | 			valueFloat, err2 := strconv.ParseFloat(value, 64)
306 | 			if err1 == nil && err2 == nil {
307 | 				includeRow = cellFloat < valueFloat
308 | 			}
309 | 		case "starts_with":
310 | 			includeRow = strings.HasPrefix(cellValue, value)
311 | 		case "ends_with":
312 | 			includeRow = strings.HasSuffix(cellValue, value)
313 | 		default:
314 | 			includeRow = false
315 | 		}
316 | 		
317 | 		if includeRow {
318 | 			filtered = append(filtered, row)
319 | 		}
320 | 	}
321 | 	
322 | 	return filtered
323 | }
324 | 
325 | // ExportToJSON exports CSV data to a JSON file
326 | func ExportToJSON(records [][]string, outputPath string) error {
327 | 	result := []map[string]string{}
328 | 	headers := records[0]
329 | 	
330 | 	// Convert each row to a map
331 | 	for i := 1; i < len(records); i++ {
332 | 		row := make(map[string]string)
333 | 		for j, header := range headers {
334 | 			if j < len(records[i]) {
335 | 				row[header] = records[i][j]
336 | 			} else {
337 | 				row[header] = ""
338 | 			}
339 | 		}
340 | 		result = append(result, row)
341 | 	}
342 | 	
343 | 	// Marshal to JSON
344 | 	jsonData, err := json.MarshalIndent(result, "", "  ")
345 | 	if err != nil {
346 | 		return err
347 | 	}
348 | 	
349 | 	// Write to file
350 | 	return ioutil.WriteFile(outputPath, jsonData, 0644)
351 | }
352 | 
353 | // CalculateCorrelation calculates Pearson correlation between two numeric columns
354 | func CalculateCorrelation(records [][]string, column1 string, column2 string) (float64, error) {
355 | 	// Find column indices
356 | 	col1Index := -1
357 | 	col2Index := -1
358 | 	for i, col := range records[0] {
359 | 		if col == column1 {
360 | 			col1Index = i
361 | 		}
362 | 		if col == column2 {
363 | 			col2Index = i
364 | 		}
365 | 	}
366 | 	
367 | 	if col1Index == -1 || col2Index == -1 {
368 | 		return 0, fmt.Errorf("column not found")
369 | 	}
370 | 	
371 | 	// Extract numeric values
372 | 	var x []float64
373 | 	var y []float64
374 | 	
375 | 	for i := 1; i < len(records); i++ {
376 | 		row := records[i]
377 | 		if col1Index < len(row) && col2Index < len(row) {
378 | 			val1, err1 := strconv.ParseFloat(row[col1Index], 64)
379 | 			val2, err2 := strconv.ParseFloat(row[col2Index], 64)
380 | 			if err1 == nil && err2 == nil {
381 | 				x = append(x, val1)
382 | 				y = append(y, val2)
383 | 			}
384 | 		}
385 | 	}
386 | 	
387 | 	if len(x) < 2 {
388 | 		return 0, fmt.Errorf("insufficient numeric data for correlation")
389 | 	}
390 | 	
391 | 	// Calculate means
392 | 	xMean := 0.0
393 | 	yMean := 0.0
394 | 	for i := range x {
395 | 		xMean += x[i]
396 | 		yMean += y[i]
397 | 	}
398 | 	xMean /= float64(len(x))
399 | 	yMean /= float64(len(y))
400 | 	
401 | 	// Calculate correlation
402 | 	numerator := 0.0
403 | 	xDenom := 0.0
404 | 	yDenom := 0.0
405 | 	
406 | 	for i := range x {
407 | 		xDiff := x[i] - xMean
408 | 		yDiff := y[i] - yMean
409 | 		numerator += xDiff * yDiff
410 | 		xDenom += xDiff * xDiff
411 | 		yDenom += yDiff * yDiff
412 | 	}
413 | 	
414 | 	if xDenom == 0 || yDenom == 0 {
415 | 		return 0, nil // No variation in at least one variable
416 | 	}
417 | 	
418 | 	return numerator / math.Sqrt(xDenom * yDenom), nil
419 | }
420 | 
421 | // DetectAnomalies detects anomalies in numeric columns using Z-score method
422 | func DetectAnomalies(records [][]string, threshold float64) map[string][]int {
423 | 	anomolies := make(map[string][]int)
424 | 	
425 | 	// Process each column
426 | 	for colIndex, colName := range records[0] {
427 | 		// Extract numeric values
428 | 		values := []float64{}
429 | 		valueIndices := []int{}
430 | 		
431 | 		for i := 1; i < len(records); i++ {
432 | 			if colIndex < len(records[i]) {
433 | 				if val, err := strconv.ParseFloat(records[i][colIndex], 64); err == nil {
434 | 					values = append(values, val)
435 | 					valueIndices = append(valueIndices, i)
436 | 				}
437 | 			}
438 | 		}
439 | 		
440 | 		// Need enough data points for meaningful anomaly detection
441 | 		if len(values) < 5 {
442 | 			continue
443 | 		}
444 | 		
445 | 		// Calculate mean and standard deviation
446 | 		mean := 0.0
447 | 		for _, val := range values {
448 | 			mean += val
449 | 		}
450 | 		mean /= float64(len(values))
451 | 		
452 | 		stdDev := 0.0
453 | 		for _, val := range values {
454 | 			stdDev += math.Pow(val - mean, 2)
455 | 		}
456 | 		stdDev = math.Sqrt(stdDev / float64(len(values)))
457 | 		
458 | 		if stdDev == 0 {
459 | 			continue // Skip columns with no variation
460 | 		}
461 | 		
462 | 		// Find anomalies (values with Z-score above threshold)
463 | 		anomalousRows := []int{}
464 | 		for i, val := range values {
465 | 			zScore := math.Abs(val - mean) / stdDev
466 | 			if zScore > threshold {
467 | 				anomalousRows = append(anomalousRows, valueIndices[i])
468 | 			}
469 | 		}
470 | 		
471 | 		if len(anomalousRows) > 0 {
472 | 			anomolies[colName] = anomalousRows
473 | 		}
474 | 	}
475 | 	
476 | 	return anomolies
477 | }
478 | 
479 | func CallChatGPTAPI(message string, authToken string) (string, error) {
480 | 	payload := ChatGPTRequest{
481 | 		Message: message,
482 | 	}
483 | 
484 | 	payloadBytes, err := json.Marshal(payload)
485 | 	if err != nil {
486 | 		return "", err
487 | 	}
488 | 
489 | 	client := &http.Client{}
490 | 
491 | 	req, err := http.NewRequest("POST", "https://api.chatgpt.com/v1/chat/completions", bytes.NewBuffer(payloadBytes)) 
492 | 	if err != nil {
493 | 		return "", err
494 | 	}
495 | 
496 | 	req.Header.Set("Content-Type", "application/json")
497 | 	req.Header.Set("Authorization", "Bearer "+authToken)
498 | 
499 | 	resp, err := client.Do(req)
500 | 	if err != nil {
501 | 		return "", err
502 | 	}
503 | 	defer resp.Body.Close()
504 | 
505 | 	respBody, err := ioutil.ReadAll(resp.Body)
506 | 	if err != nil {
507 | 		return "", err
508 | 	}
509 | 
510 | 	var apiResponse ChatGPTResponse
511 | 	err = json.Unmarshal(respBody, &apiResponse)
512 | 	if err != nil {
513 | 		return "", err
514 | 	}
515 | 
516 | 	return apiResponse.Reply, nil
517 | }


--------------------------------------------------------------------------------