├── .gitattributes ├── Chapter09 ├── building_a_scalable_pipeline │ ├── example1 │ │ ├── ~ │ │ │ └── .pachyderm │ │ │ │ └── port-forward.pid │ │ └── myprogram.go │ ├── example2 │ │ ├── 1.json │ │ └── myprogram.go │ ├── example5 │ │ ├── 2.json │ │ └── 3.json │ ├── example3 │ │ └── model.json │ ├── example6 │ │ └── model.json │ ├── example4 │ │ └── prediction.json │ └── example7 │ │ └── prediction.json └── running_model_reliably │ ├── example1 │ ├── Dockerfile │ ├── Makefile │ └── myprogram.go │ ├── example2 │ ├── Dockerfile │ ├── Makefile │ └── myprogram.go │ ├── example3 │ ├── Dockerfile │ ├── Makefile │ └── myprogram.go │ └── example4 │ └── attributes │ ├── 1.json │ ├── 2.json │ └── 3.json ├── Chapter01 ├── handling_data_gopher_style │ ├── example1 │ │ ├── myfile.csv │ │ └── myprogram.py │ └── example2 │ │ ├── myfile.csv │ │ └── myprogram.go ├── csv_files │ ├── example5 │ │ └── myprogram.go │ ├── example1 │ │ └── myprogram.go │ ├── example2 │ │ └── myprogram.go │ ├── example3 │ │ └── myprogram.go │ ├── example6 │ │ └── myprogram.go │ └── example4 │ │ └── myprogram.go ├── caching │ ├── example1 │ │ └── myprogram.go │ └── example2 │ │ └── myprogram.go ├── sql-like_databases │ ├── example1 │ │ └── myprogram.go │ ├── example3 │ │ └── myprogram.go │ └── example2 │ │ └── myprogram.go └── json │ ├── example1 │ └── myprogram.go │ └── example2 │ └── myprogram.go ├── Chapter04 ├── linear_regression │ ├── example2 │ │ ├── .~lock.basketball.csv# │ │ └── myprogram.go │ ├── example1 │ │ ├── myprogram.go │ │ └── Advertising.csv │ ├── example3 │ │ └── myprogram.go │ ├── example5 │ │ └── myprogram.go │ ├── example6 │ │ ├── test.csv │ │ └── myprogram.go │ ├── example4 │ │ └── myprogram.go │ └── example7 │ │ └── myprogram.go ├── multiple_regression │ ├── example1 │ │ └── myprogram.go │ └── example2 │ │ ├── test.csv │ │ └── myprogram.go └── non-linear_regression │ ├── example3 │ ├── test.csv │ └── myprogram.go │ ├── example1 │ └── myprogram.go │ └── example2 │ └── myprogram.go ├── Chapter08 ├── deep_learning │ └── example1 │ │ ├── pug.jpg │ │ ├── gopher.jpg │ │ └── airplane.jpg └── utilizing_our_simple_nn │ └── example1 │ └── test.csv ├── Chapter03 ├── validation │ └── training_test │ │ └── example1 │ │ ├── example1 │ │ └── myprogram.go └── evaluation │ ├── categorical_metrics │ ├── example3 │ │ ├── example3 │ │ └── myprogram.go │ ├── example1 │ │ ├── labeled.csv │ │ └── myprogram.go │ └── example2 │ │ ├── labeled.csv │ │ └── myprogram.go │ └── continuous_metrics │ ├── example2 │ ├── myprogram.go │ └── continuous_data.csv │ └── example1 │ ├── myprogram.go │ └── continuous_data.csv ├── Chapter02 ├── vectors │ ├── example2 │ │ └── myprogram.go │ ├── example1 │ │ └── myprogram.go │ ├── example3 │ │ └── myprogram.go │ └── example4 │ │ └── myprogram.go ├── matrices │ ├── example1 │ │ └── myprogram.go │ ├── example3 │ │ └── myprogram.go │ └── example2 │ │ └── myprogram.go ├── hypothesis_testing │ ├── example1 │ │ └── myprogram.go │ └── example2 │ │ └── myprogram.go ├── statistical_measures │ ├── example1 │ │ └── myprogram.go │ └── example2 │ │ └── myprogram.go └── statistical_visualizations │ ├── example1 │ └── myprogram.go │ └── example2 │ └── myprogram.go ├── Chapter05 ├── logistic_regression │ ├── example1 │ │ └── myprogram.go │ ├── example2 │ │ └── myprogram.go │ ├── example4 │ │ └── myprogram.go │ ├── example5 │ │ └── myprogram.go │ ├── example3 │ │ └── myprogram.go │ ├── example7 │ │ └── myprogram.go │ └── example6 │ │ └── myprogram.go ├── kNN │ └── example1 │ │ └── myprogram.go ├── decision_tree │ ├── example1 │ │ └── myprogram.go │ └── example2 │ │ └── myprogram.go └── naive_bayes │ └── example1 │ └── myprogram.go ├── Chapter06 ├── distance │ └── example1 │ │ └── myprogram.go ├── k-means │ ├── example2 │ │ └── myprogram.go │ ├── example3 │ │ └── myprogram.go │ ├── example1 │ │ └── myprogram.go │ ├── example5 │ │ └── myprogram.go │ └── example4 │ │ └── myprogram.go └── evaluating │ ├── example1 │ └── myprogram.go │ └── example2 │ └── myprogram.go ├── Chapter07 ├── representing_time_series │ ├── example1 │ │ ├── myprogram.go │ │ └── AirPassengers.csv │ └── example2 │ │ ├── myprogram.go │ │ └── AirPassengers.csv ├── anomaly_detection │ └── example1 │ │ └── myprogram.go ├── statistics │ ├── example1 │ │ ├── myprogram.go │ │ └── AirPassengers.csv │ ├── example3 │ │ ├── myprogram.go │ │ └── AirPassengers.csv │ ├── example2 │ │ ├── myprogram.go │ │ └── AirPassengers.csv │ └── example4 │ │ ├── myprogram.go │ │ └── AirPassengers.csv └── auto_regressive │ ├── example5 │ └── myprogram.go │ ├── example1 │ ├── myprogram.go │ └── AirPassengers.csv │ ├── example2 │ ├── myprogram.go │ └── AirPassengers.csv │ ├── example3 │ └── myprogram.go │ ├── example4 │ └── myprogram.go │ └── example6 │ ├── AirPassengers.csv │ └── myprogram.go ├── LICENSE └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto -------------------------------------------------------------------------------- /Chapter09/building_a_scalable_pipeline/example1/~/.pachyderm/port-forward.pid: -------------------------------------------------------------------------------- 1 | 20672 -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example1/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | ADD goregtrain / 3 | -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | ADD goregtrain / 3 | -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example3/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | ADD goregpredict / 3 | -------------------------------------------------------------------------------- /Chapter01/handling_data_gopher_style/example1/myfile.csv: -------------------------------------------------------------------------------- 1 | 1,testval1 2 | 2,testval2 3 | 3,testval3 4 | -------------------------------------------------------------------------------- /Chapter01/handling_data_gopher_style/example2/myfile.csv: -------------------------------------------------------------------------------- 1 | 1,testval1 2 | 2,testval2 3 | 3,testval3 4 | -------------------------------------------------------------------------------- /Chapter04/linear_regression/example2/.~lock.basketball.csv#: -------------------------------------------------------------------------------- 1 | ,dwhitena,fermi,10.08.2017 16:07,file:///home/dwhitena/.config/libreoffice/4; -------------------------------------------------------------------------------- /Chapter08/deep_learning/example1/pug.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-With-Go/HEAD/Chapter08/deep_learning/example1/pug.jpg -------------------------------------------------------------------------------- /Chapter08/deep_learning/example1/gopher.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-With-Go/HEAD/Chapter08/deep_learning/example1/gopher.jpg -------------------------------------------------------------------------------- /Chapter08/deep_learning/example1/airplane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-With-Go/HEAD/Chapter08/deep_learning/example1/airplane.jpg -------------------------------------------------------------------------------- /Chapter03/validation/training_test/example1/example1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-With-Go/HEAD/Chapter03/validation/training_test/example1/example1 -------------------------------------------------------------------------------- /Chapter03/evaluation/categorical_metrics/example3/example3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-With-Go/HEAD/Chapter03/evaluation/categorical_metrics/example3/example3 -------------------------------------------------------------------------------- /Chapter02/vectors/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/gonum/matrix/mat64" 7 | ) 8 | 9 | func main() { 10 | 11 | // Create a new vector value. 12 | myvector := mat64.NewVector(2, []float64{11.0, 5.2}) 13 | 14 | fmt.Println(myvector) 15 | } 16 | -------------------------------------------------------------------------------- /Chapter09/building_a_scalable_pipeline/example2/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "independent_variables": [ 3 | { 4 | "name": "bmi", 5 | "value": 0.0616962065187 6 | }, 7 | { 8 | "name": "ltg", 9 | "value": 0.0199084208763 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /Chapter09/building_a_scalable_pipeline/example5/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "independent_variables": [ 3 | { 4 | "name": "bmi", 5 | "value": 0.0444512133366 6 | }, 7 | { 8 | "name": "ltg", 9 | "value": 0.00286377051894 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /Chapter09/building_a_scalable_pipeline/example5/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "independent_variables": [ 3 | { 4 | "name": "bmi", 5 | "value": -0.0115950145052 6 | }, 7 | { 8 | "name": "ltg", 9 | "value": 0.0226920225667 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example4/attributes/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "independent_variables": [ 3 | { 4 | "name": "bmi", 5 | "value": 0.0616962065187 6 | }, 7 | { 8 | "name": "ltg", 9 | "value": 0.0199084208763 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example4/attributes/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "independent_variables": [ 3 | { 4 | "name": "bmi", 5 | "value": 0.0444512133366 6 | }, 7 | { 8 | "name": "ltg", 9 | "value": 0.00286377051894 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example4/attributes/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "independent_variables": [ 3 | { 4 | "name": "bmi", 5 | "value": -0.0115950145052 6 | }, 7 | { 8 | "name": "ltg", 9 | "value": 0.0226920225667 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /Chapter02/vectors/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | func main() { 6 | 7 | // Initialize a "vector" via a slice. 8 | var myvector []float64 9 | 10 | // Add a couple of components to the vector. 11 | myvector = append(myvector, 11.0) 12 | myvector = append(myvector, 5.2) 13 | 14 | fmt.Println(myvector) 15 | } 16 | -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example2/Makefile: -------------------------------------------------------------------------------- 1 | all: compile docker push clean 2 | 3 | compile: 4 | GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o goregtrain 5 | 6 | docker: 7 | sudo docker build --force-rm=true -t dwhitena/goregtrain:multi . 8 | 9 | push: 10 | sudo docker push dwhitena/goregtrain:multi 11 | 12 | clean: 13 | rm goregtrain 14 | -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example3/Makefile: -------------------------------------------------------------------------------- 1 | all: compile docker push clean 2 | 3 | compile: 4 | GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o goregpredict 5 | 6 | docker: 7 | sudo docker build --force-rm=true -t dwhitena/goregpredict . 8 | 9 | push: 10 | sudo docker push dwhitena/goregpredict 11 | 12 | clean: 13 | rm goregpredict 14 | -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example1/Makefile: -------------------------------------------------------------------------------- 1 | all: compile docker push clean 2 | 3 | compile: 4 | GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o goregtrain 5 | 6 | docker: 7 | sudo docker build --force-rm=true -t dwhitena/goregtrain:single . 8 | 9 | push: 10 | sudo docker push dwhitena/goregtrain:single 11 | 12 | clean: 13 | rm goregtrain 14 | -------------------------------------------------------------------------------- /Chapter05/logistic_regression/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | ) 7 | 8 | func main() { 9 | 10 | fmt.Println(logistic(1.0)) 11 | } 12 | 13 | // logistic implements the logistic function, which 14 | // is used in logistic regression. 15 | func logistic(x float64) float64 { 16 | return 1 / (1 + math.Exp(-x)) 17 | } 18 | -------------------------------------------------------------------------------- /Chapter01/handling_data_gopher_style/example1/myprogram.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | # Define column names. 4 | cols = [ 5 | 'integercolumn', 6 | 'stringcolumn' 7 | ] 8 | 9 | # Read in the CSV with pandas. 10 | data = pd.read_csv('myfile.csv', names=cols) 11 | 12 | # Print out the maximum value in the integer column. 13 | print(data['integercolumn'].max()) 14 | -------------------------------------------------------------------------------- /Chapter06/distance/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/gonum/floats" 7 | ) 8 | 9 | func main() { 10 | 11 | // Calculate the Euclidean distance, specified here via 12 | // the last argument in the Distance function. 13 | distance := floats.Distance([]float64{1, 2}, []float64{3, 4}, 2) 14 | 15 | fmt.Printf("\nDistance: %0.2f\n\n", distance) 16 | } 17 | -------------------------------------------------------------------------------- /Chapter02/matrices/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/gonum/matrix/mat64" 7 | ) 8 | 9 | func main() { 10 | 11 | // Create a flat representation of our matrix. 12 | data := []float64{1.2, -5.7, -2.4, 7.3} 13 | 14 | // Form our matrix. 15 | a := mat64.NewDense(2, 2, data) 16 | 17 | // As a sanity check, output the matrix to standard out. 18 | fa := mat64.Formatted(a, mat64.Prefix(" ")) 19 | fmt.Printf("A = %v\n\n", fa) 20 | } 21 | -------------------------------------------------------------------------------- /Chapter09/building_a_scalable_pipeline/example3/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "model" 4 | }, 5 | "transform": { 6 | "image": "dwhitena/goregtrain:single", 7 | "cmd": [ 8 | "/goregtrain", 9 | "-inDir=/pfs/training", 10 | "-outDir=/pfs/out" 11 | ] 12 | }, 13 | "parallelism_spec": { 14 | "constant": "1" 15 | }, 16 | "input": { 17 | "atom": { 18 | "repo": "training", 19 | "glob": "/" 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /Chapter09/building_a_scalable_pipeline/example6/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "model" 4 | }, 5 | "transform": { 6 | "image": "dwhitena/goregtrain:multi", 7 | "cmd": [ 8 | "/goregtrain", 9 | "-inDir=/pfs/training", 10 | "-outDir=/pfs/out" 11 | ] 12 | }, 13 | "parallelism_spec": { 14 | "constant": "1" 15 | }, 16 | "input": { 17 | "atom": { 18 | "repo": "training", 19 | "glob": "/" 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /Chapter02/hypothesis_testing/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/gonum/stat" 7 | ) 8 | 9 | func main() { 10 | 11 | // Define observed and expected values. Most 12 | // of the time these will come from your 13 | // data (website visits, etc.). 14 | observed := []float64{48, 52} 15 | expected := []float64{50, 50} 16 | 17 | // Calculate the ChiSquare test statistic. 18 | chiSquare := stat.ChiSquare(observed, expected) 19 | 20 | fmt.Println(chiSquare) 21 | } 22 | -------------------------------------------------------------------------------- /Chapter07/representing_time_series/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/kniren/gota/dataframe" 9 | ) 10 | 11 | func main() { 12 | 13 | // Open the CSV file. 14 | passengersFile, err := os.Open("AirPassengers.csv") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer passengersFile.Close() 19 | 20 | // Create a dataframe from the CSV file. 21 | passengersDF := dataframe.ReadCSV(passengersFile) 22 | 23 | // As a sanity check, display the records to stdout. 24 | // Gota will format the dataframe for pretty printing. 25 | fmt.Println(passengersDF) 26 | } 27 | -------------------------------------------------------------------------------- /Chapter01/csv_files/example5/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/kniren/gota/dataframe" 9 | ) 10 | 11 | func main() { 12 | 13 | // Open the CSV file. 14 | irisFile, err := os.Open("../data/iris_labeled.csv") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer irisFile.Close() 19 | 20 | // Create a dataframe from the CSV file. 21 | // The types of the columns will be inferred. 22 | irisDF := dataframe.ReadCSV(irisFile) 23 | 24 | // As a sanity check, display the records to stdout. 25 | // Gota will format the dataframe for pretty printing. 26 | fmt.Println(irisDF) 27 | } 28 | -------------------------------------------------------------------------------- /Chapter01/caching/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | cache "github.com/patrickmn/go-cache" 8 | ) 9 | 10 | func main() { 11 | 12 | // Create a cache with a default expiration time of 5 minutes, and which 13 | // purges expired items every 30 seconds 14 | c := cache.New(5*time.Minute, 30*time.Second) 15 | 16 | // Put a key and value into the cache. 17 | c.Set("mykey", "myvalue", cache.DefaultExpiration) 18 | 19 | // For a sanity check. Output the key and value in the cache 20 | // to standard out. 21 | v, found := c.Get("mykey") 22 | if found { 23 | fmt.Printf("key: mykey, value: %s\n", v) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /Chapter04/linear_regression/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/kniren/gota/dataframe" 9 | ) 10 | 11 | func main() { 12 | 13 | // Open the CSV file. 14 | advertFile, err := os.Open("Advertising.csv") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer advertFile.Close() 19 | 20 | // Create a dataframe from the CSV file. 21 | advertDF := dataframe.ReadCSV(advertFile) 22 | 23 | // Use the Describe method to calculate summary statistics 24 | // for all of the columns in one shot. 25 | advertSummary := advertDF.Describe() 26 | 27 | // Output the summary statistics to stdout. 28 | fmt.Println(advertSummary) 29 | } 30 | -------------------------------------------------------------------------------- /Chapter09/building_a_scalable_pipeline/example4/prediction.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "prediction" 4 | }, 5 | "transform": { 6 | "image": "dwhitena/goregpredict", 7 | "cmd": [ 8 | "/goregpredict", 9 | "-inModelDir=/pfs/model", 10 | "-inVarDir=/pfs/attributes", 11 | "-outDir=/pfs/out" 12 | ] 13 | }, 14 | "parallelism_spec": { 15 | "constant": "1" 16 | }, 17 | "input": { 18 | "cross": [ 19 | { 20 | "atom": { 21 | "repo": "attributes", 22 | "glob": "/*" 23 | } 24 | }, 25 | { 26 | "atom": { 27 | "repo": "model", 28 | "glob": "/" 29 | } 30 | } 31 | ] 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /Chapter09/building_a_scalable_pipeline/example7/prediction.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "prediction" 4 | }, 5 | "transform": { 6 | "image": "dwhitena/goregpredict", 7 | "cmd": [ 8 | "/goregpredict", 9 | "-inModelDir=/pfs/model", 10 | "-inVarDir=/pfs/attributes", 11 | "-outDir=/pfs/out" 12 | ] 13 | }, 14 | "parallelism_spec": { 15 | "constant": "10" 16 | }, 17 | "input": { 18 | "cross": [ 19 | { 20 | "atom": { 21 | "repo": "attributes", 22 | "glob": "/*" 23 | } 24 | }, 25 | { 26 | "atom": { 27 | "repo": "model", 28 | "glob": "/" 29 | } 30 | } 31 | ] 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /Chapter03/evaluation/categorical_metrics/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/gonum/stat" 7 | "gonum.org/v1/gonum/integrate" 8 | ) 9 | 10 | func main() { 11 | 12 | // Define our scores and classes. 13 | scores := []float64{0.1, 0.35, 0.4, 0.8} 14 | classes := []bool{true, false, true, false} 15 | 16 | // Calculate the true positive rates (recalls) and 17 | // false positive rates. 18 | tpr, fpr := stat.ROC(0, scores, classes, nil) 19 | 20 | // Compute the Area Under Curve. 21 | auc := integrate.Trapezoidal(fpr, tpr) 22 | 23 | // Output the results to standard out. 24 | fmt.Printf("true positive rate: %v\n", tpr) 25 | fmt.Printf("false positive rate: %v\n", fpr) 26 | fmt.Printf("auc: %v\n", auc) 27 | } 28 | -------------------------------------------------------------------------------- /Chapter01/csv_files/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "log" 7 | "os" 8 | ) 9 | 10 | func main() { 11 | 12 | // Open the iris dataset file. 13 | f, err := os.Open("../data/iris.csv") 14 | if err != nil { 15 | log.Fatal(err) 16 | } 17 | defer f.Close() 18 | 19 | // Create a new CSV reader reading from the opened file. 20 | reader := csv.NewReader(f) 21 | 22 | // Assume we don't know the number of fields per line. By setting 23 | // FieldsPerRecord negative, each row may have a variable 24 | // number of fields. 25 | reader.FieldsPerRecord = -1 26 | 27 | // Read in all of the CSV records. 28 | rawCSVData, err := reader.ReadAll() 29 | if err != nil { 30 | log.Fatal(err) 31 | } 32 | 33 | fmt.Println(rawCSVData) 34 | } 35 | -------------------------------------------------------------------------------- /Chapter02/vectors/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/gonum/floats" 7 | ) 8 | 9 | func main() { 10 | 11 | // Initialize a couple of "vectors" represented as slices. 12 | vectorA := []float64{11.0, 5.2, -1.3} 13 | vectorB := []float64{-7.2, 4.2, 5.1} 14 | 15 | // Compute the dot product of A and B 16 | // (https://en.wikipedia.org/wiki/Dot_product). 17 | dotProduct := floats.Dot(vectorA, vectorB) 18 | fmt.Printf("The dot product of A and B is: %0.2f\n", dotProduct) 19 | 20 | // Scale each element of A by 1.5. 21 | floats.Scale(1.5, vectorA) 22 | fmt.Printf("Scaling A by 1.5 gives: %v\n", vectorA) 23 | 24 | // Compute the norm/length of B. 25 | normB := floats.Norm(vectorB, 2) 26 | fmt.Printf("The norm/length of B is: %0.2f\n", normB) 27 | } 28 | -------------------------------------------------------------------------------- /Chapter02/matrices/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/gonum/matrix/mat64" 8 | ) 9 | 10 | func main() { 11 | 12 | // Create a new matrix a. 13 | a := mat64.NewDense(3, 3, []float64{1, 2, 3, 0, 4, 5, 0, 0, 6}) 14 | 15 | // Compute and output the transpose of the matrix. 16 | ft := mat64.Formatted(a.T(), mat64.Prefix(" ")) 17 | fmt.Printf("a^T = %v\n\n", ft) 18 | 19 | // Compute and output the determinant of a. 20 | deta := mat64.Det(a) 21 | fmt.Printf("det(a) = %.2f\n\n", deta) 22 | 23 | // Compute and output the inverse of a. 24 | aInverse := mat64.NewDense(0, 0, nil) 25 | if err := aInverse.Inverse(a); err != nil { 26 | log.Fatal(err) 27 | } 28 | fi := mat64.Formatted(aInverse, mat64.Prefix(" ")) 29 | fmt.Printf("a^-1 = %v\n\n", fi) 30 | } 31 | -------------------------------------------------------------------------------- /Chapter02/vectors/example4/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/gonum/blas/blas64" 7 | "github.com/gonum/matrix/mat64" 8 | ) 9 | 10 | func main() { 11 | 12 | // Initialize a couple of "vectors" represented as slices. 13 | vectorA := mat64.NewVector(3, []float64{11.0, 5.2, -1.3}) 14 | vectorB := mat64.NewVector(3, []float64{-7.2, 4.2, 5.1}) 15 | 16 | // Compute the dot product of A and B 17 | // (https://en.wikipedia.org/wiki/Dot_product). 18 | dotProduct := mat64.Dot(vectorA, vectorB) 19 | fmt.Printf("The dot product of A and B is: %0.2f\n", dotProduct) 20 | 21 | // Scale each element of A by 1.5. 22 | vectorA.ScaleVec(1.5, vectorA) 23 | fmt.Printf("Scaling A by 1.5 gives: %v\n", vectorA) 24 | 25 | // Compute the norm/length of B. 26 | normB := blas64.Nrm2(3, vectorB.RawVector()) 27 | fmt.Printf("The norm/length of B is: %0.2f\n", normB) 28 | } 29 | -------------------------------------------------------------------------------- /Chapter01/csv_files/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "log" 8 | "os" 9 | ) 10 | 11 | func main() { 12 | 13 | // Open the iris dataset file. 14 | f, err := os.Open("../data/iris.csv") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer f.Close() 19 | 20 | // Create a new CSV reader reading from the opened file. 21 | reader := csv.NewReader(f) 22 | reader.FieldsPerRecord = -1 23 | 24 | // rawCSVData will hold our successfully parsed rows. 25 | var rawCSVData [][]string 26 | 27 | // Read in the records one by one. 28 | for { 29 | 30 | // Read in a row. Check if we are at the end of the file. 31 | record, err := reader.Read() 32 | if err == io.EOF { 33 | break 34 | } 35 | 36 | // Append the record to our data set. 37 | rawCSVData = append(rawCSVData, record) 38 | } 39 | 40 | fmt.Println(rawCSVData) 41 | } 42 | -------------------------------------------------------------------------------- /Chapter01/handling_data_gopher_style/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "log" 7 | "os" 8 | "strconv" 9 | ) 10 | 11 | func main() { 12 | 13 | // Open the CSV. 14 | f, err := os.Open("myfile.csv") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | 19 | // Read in the CSV records. 20 | r := csv.NewReader(f) 21 | records, err := r.ReadAll() 22 | if err != nil { 23 | log.Fatal(err) 24 | } 25 | 26 | // Get the maximum value in the integer column. 27 | var intMax int 28 | for _, record := range records { 29 | 30 | // Parse the integer value. 31 | intVal, err := strconv.Atoi(record[0]) 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | 36 | // Replace the maximum value if appropriate. 37 | if intVal > intMax { 38 | intMax = intVal 39 | } 40 | } 41 | 42 | // Print the maxium value. 43 | fmt.Println(intMax) 44 | } 45 | -------------------------------------------------------------------------------- /Chapter01/sql-like_databases/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "log" 6 | "os" 7 | 8 | // pq is the libary that allows us to connect 9 | // to postgres with databases/sql. 10 | _ "github.com/lib/pq" 11 | ) 12 | 13 | func main() { 14 | 15 | // Get the postgres connection URL. I have it stored in 16 | // an environmental variable. 17 | pgURL := os.Getenv("PGURL") 18 | if pgURL == "" { 19 | log.Fatal("PGURL empty") 20 | } 21 | 22 | // Open a database value. Specify the postgres driver 23 | // for databases/sql. 24 | db, err := sql.Open("postgres", pgURL) 25 | if err != nil { 26 | log.Fatal(err) 27 | } 28 | defer db.Close() 29 | 30 | // sql.Open() does not establish any connections to the 31 | // database. It just prepares the database connection value 32 | // for later use. To make sure the database is available and 33 | // accessible, we will use db.Ping(). 34 | if err := db.Ping(); err != nil { 35 | log.Fatal(err) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /Chapter01/sql-like_databases/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "log" 6 | "os" 7 | 8 | // pq is the libary that allows us to connect 9 | // to postgres with databases/sql. 10 | _ "github.com/lib/pq" 11 | ) 12 | 13 | func main() { 14 | 15 | // Get my postgres connection URL. I have it stored in 16 | // an environmental variable. 17 | pgURL := os.Getenv("PGURL") 18 | if pgURL == "" { 19 | log.Fatal("PGURL empty") 20 | } 21 | 22 | // Open a database value. Specify the postgres driver 23 | // for databases/sql. 24 | db, err := sql.Open("postgres", pgURL) 25 | if err != nil { 26 | log.Fatal(err) 27 | } 28 | defer db.Close() 29 | 30 | // Update some values. 31 | res, err := db.Exec("UPDATE iris SET species = 'setosa' WHERE species = 'Iris-setosa'") 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | 36 | // See how many rows where updated. 37 | rowCount, err := res.RowsAffected() 38 | if err != nil { 39 | log.Fatal(err) 40 | } 41 | 42 | // Output the number of rows to standard out. 43 | log.Printf("affected = %d\n", rowCount) 44 | } 45 | -------------------------------------------------------------------------------- /Chapter02/matrices/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/gonum/matrix/mat64" 7 | ) 8 | 9 | func main() { 10 | 11 | // Create a flat representation of our matrix. 12 | data := []float64{1.2, -5.7, -2.4, 7.3} 13 | 14 | // Form our matrix. 15 | a := mat64.NewDense(2, 2, data) 16 | 17 | // Get a single value from the matrix. 18 | val := a.At(0, 1) 19 | fmt.Printf("The value of a at (0,1) is: %.2f\n\n", val) 20 | 21 | // Get the values in a specific column. 22 | col := mat64.Col(nil, 0, a) 23 | fmt.Printf("The values in the 1st column are: %v\n\n", col) 24 | 25 | // Get the values in a kspecific row. 26 | row := mat64.Row(nil, 1, a) 27 | fmt.Printf("The values in the 2nd row are: %v\n\n", row) 28 | 29 | // Modify a single element. 30 | a.Set(0, 1, 11.2) 31 | 32 | // Modify an entire row. 33 | a.SetRow(0, []float64{14.3, -4.2}) 34 | 35 | // Modify an entire column. 36 | a.SetCol(0, []float64{1.7, -0.3}) 37 | 38 | // As a sanity check, output the matrix to standard out. 39 | fa := mat64.Formatted(a, mat64.Prefix(" ")) 40 | fmt.Printf("A = %v\n\n", fa) 41 | } 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 sagarsawant69 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Chapter05/kNN/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "math" 7 | 8 | "github.com/sjwhitworth/golearn/base" 9 | "github.com/sjwhitworth/golearn/evaluation" 10 | "github.com/sjwhitworth/golearn/knn" 11 | ) 12 | 13 | func main() { 14 | 15 | // Read in the iris data set into golearn "instances". 16 | irisData, err := base.ParseCSVToInstances("iris.csv", true) 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | 21 | // Initialize a new KNN classifier. We will use a simple 22 | // Euclidean distance measure and k=2. 23 | knn := knn.NewKnnClassifier("euclidean", "linear", 2) 24 | 25 | // Use cross-fold validation to successively train and evalute the model 26 | // on 5 folds of the data set. 27 | cv, err := evaluation.GenerateCrossFoldValidationConfusionMatrices(irisData, knn, 5) 28 | if err != nil { 29 | log.Fatal(err) 30 | } 31 | 32 | // Get the mean, variance and standard deviation of the accuracy for the 33 | // cross validation. 34 | mean, variance := evaluation.GetCrossValidatedMetric(cv, evaluation.GetAccuracy) 35 | stdev := math.Sqrt(variance) 36 | 37 | // Output the cross metrics to standard out. 38 | fmt.Printf("\nAccuracy\n%.2f (+/- %.2f)\n\n", mean, stdev*2) 39 | } 40 | -------------------------------------------------------------------------------- /Chapter01/csv_files/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "log" 8 | "os" 9 | ) 10 | 11 | func main() { 12 | 13 | // Open the iris dataset file. 14 | f, err := os.Open("../data/iris_unexpected_fields.csv") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer f.Close() 19 | 20 | // Create a new CSV reader reading from the opened file. 21 | reader := csv.NewReader(f) 22 | 23 | // We should have 5 fields per line. By setting 24 | // FieldsPerRecord to 5, we can validate that each of the 25 | // rows in our CSV has the correct number of fields. 26 | reader.FieldsPerRecord = 5 27 | 28 | // rawCSVData will hold our successfully parsed rows. 29 | var rawCSVData [][]string 30 | 31 | // Read in the records one by one. 32 | for { 33 | 34 | // Read in a row. Check if we are at the end of the file. 35 | record, err := reader.Read() 36 | if err == io.EOF { 37 | break 38 | } 39 | 40 | // If we had a parsing error, log the error and move on. 41 | if err != nil { 42 | log.Println(err) 43 | continue 44 | } 45 | 46 | // Append the record to our data set, if it has the expected 47 | // number of fields. 48 | rawCSVData = append(rawCSVData, record) 49 | } 50 | 51 | fmt.Printf("parsed %d lines successfully\n", len(rawCSVData)) 52 | } 53 | -------------------------------------------------------------------------------- /Chapter05/logistic_regression/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "image/color" 5 | "log" 6 | "math" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/vg" 11 | ) 12 | 13 | func main() { 14 | 15 | // Create a new plot. 16 | p, err := plot.New() 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | p.Title.Text = "Logistic Function" 21 | p.X.Label.Text = "x" 22 | p.Y.Label.Text = "f(x)" 23 | 24 | // Create the plotter function. 25 | logisticPlotter := plotter.NewFunction(func(x float64) float64 { return logistic(x) }) 26 | logisticPlotter.Color = color.RGBA{B: 255, A: 255} 27 | 28 | // Add the plotter function to the plot. 29 | p.Add(logisticPlotter) 30 | 31 | // Set the axis ranges. Unlike other data sets, 32 | // functions don't set the axis ranges automatically 33 | // since functions don't necessarily have a 34 | // finite range of x and y values. 35 | p.X.Min = -10 36 | p.X.Max = 10 37 | p.Y.Min = -0.1 38 | p.Y.Max = 1.1 39 | 40 | // Save the plot to a PNG file. 41 | if err := p.Save(4*vg.Inch, 4*vg.Inch, "logistic.png"); err != nil { 42 | log.Fatal(err) 43 | } 44 | } 45 | 46 | // logistic implements the logistic function, which 47 | // is used in logistic regression. 48 | func logistic(x float64) float64 { 49 | return 1 / (1 + math.Exp(-x)) 50 | } 51 | -------------------------------------------------------------------------------- /Chapter02/statistical_measures/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/stat" 9 | "github.com/kniren/gota/dataframe" 10 | "github.com/montanaflynn/stats" 11 | ) 12 | 13 | func main() { 14 | 15 | // Open the CSV file. 16 | irisFile, err := os.Open("../data/iris.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer irisFile.Close() 21 | 22 | // Create a dataframe from the CSV file. 23 | irisDF := dataframe.ReadCSV(irisFile) 24 | 25 | // Get the float values from the "sepal_length" column as 26 | // we will be looking at the measures for this variable. 27 | sepalLength := irisDF.Col("petal_length").Float() 28 | 29 | // Calculate the Mean of the variable. 30 | meanVal := stat.Mean(sepalLength, nil) 31 | 32 | // Calculate the Mode of the variable. 33 | modeVal, modeCount := stat.Mode(sepalLength, nil) 34 | 35 | // Calculate the Median of the variable. 36 | medianVal, err := stats.Median(sepalLength) 37 | if err != nil { 38 | log.Fatal(err) 39 | } 40 | 41 | // Output the results to standard out. 42 | fmt.Printf("\nSepal Length Summary Statistics:\n") 43 | fmt.Printf("Mean value: %0.2f\n", meanVal) 44 | fmt.Printf("Mode value: %0.2f\n", modeVal) 45 | fmt.Printf("Mode count: %d\n", int(modeCount)) 46 | fmt.Printf("Median value: %0.2f\n\n", medianVal) 47 | } 48 | -------------------------------------------------------------------------------- /Chapter01/caching/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/boltdb/bolt" 8 | ) 9 | 10 | func main() { 11 | 12 | // Open an embedded.db data file in your current directory. 13 | // It will be created if it doesn't exist. 14 | db, err := bolt.Open("embedded.db", 0600, nil) 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer db.Close() 19 | 20 | // Create a "bucket" in the boltdb file for our data. 21 | if err := db.Update(func(tx *bolt.Tx) error { 22 | _, err := tx.CreateBucket([]byte("MyBucket")) 23 | if err != nil { 24 | return fmt.Errorf("create bucket: %s", err) 25 | } 26 | return nil 27 | }); err != nil { 28 | log.Fatal(err) 29 | } 30 | 31 | // Put the map keys and values into the BoltDB file. 32 | if err := db.Update(func(tx *bolt.Tx) error { 33 | b := tx.Bucket([]byte("MyBucket")) 34 | err := b.Put([]byte("mykey"), []byte("myvalue")) 35 | return err 36 | }); err != nil { 37 | log.Fatal(err) 38 | } 39 | 40 | // Output the keys and values in the embedded 41 | // BoltDB file to standard out. 42 | if err := db.View(func(tx *bolt.Tx) error { 43 | b := tx.Bucket([]byte("MyBucket")) 44 | c := b.Cursor() 45 | for k, v := c.First(); k != nil; k, v = c.Next() { 46 | fmt.Printf("key: %s, value: %s\n", k, v) 47 | } 48 | return nil 49 | }); err != nil { 50 | log.Fatal(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /Chapter05/decision_tree/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "math" 7 | "math/rand" 8 | 9 | "github.com/sjwhitworth/golearn/base" 10 | "github.com/sjwhitworth/golearn/evaluation" 11 | "github.com/sjwhitworth/golearn/trees" 12 | ) 13 | 14 | func main() { 15 | 16 | // Read in the iris data set into golearn "instances". 17 | irisData, err := base.ParseCSVToInstances("iris.csv", true) 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | 22 | // This is to seed the random processes involved in building the 23 | // decision tree. 24 | rand.Seed(44111342) 25 | 26 | // We will use the ID3 algorithm to build our decision tree. Also, we 27 | // will start with a parameter of 0.6 that controls the train-prune split. 28 | tree := trees.NewID3DecisionTree(0.6) 29 | 30 | // Use cross-fold validation to successively train and evaluate the model 31 | // on 5 folds of the data set. 32 | cv, err := evaluation.GenerateCrossFoldValidationConfusionMatrices(irisData, tree, 5) 33 | if err != nil { 34 | log.Fatal(err) 35 | } 36 | 37 | // Get the mean, variance and standard deviation of the accuracy for the 38 | // cross validation. 39 | mean, variance := evaluation.GetCrossValidatedMetric(cv, evaluation.GetAccuracy) 40 | stdev := math.Sqrt(variance) 41 | 42 | // Output the cross metrics to standard out. 43 | fmt.Printf("\nAccuracy\n%.2f (+/- %.2f)\n\n", mean, stdev*2) 44 | } 45 | -------------------------------------------------------------------------------- /Chapter01/csv_files/example6/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/kniren/gota/dataframe" 9 | ) 10 | 11 | func main() { 12 | 13 | // Pull in the CSV file. 14 | irisFile, err := os.Open("../data/iris_labeled.csv") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer irisFile.Close() 19 | 20 | // Create a dataframe from the CSV file. 21 | // The types of the columns will be inferred. 22 | irisDF := dataframe.ReadCSV(irisFile) 23 | 24 | // Create a filter for the dataframe. 25 | filter := dataframe.F{ 26 | Colname: "species", 27 | Comparator: "==", 28 | Comparando: "Iris-versicolor", 29 | } 30 | 31 | // Filter the dataframe to see only the rows where 32 | // the iris species is "Iris-versicolor". 33 | versicolorDF := irisDF.Filter(filter) 34 | if versicolorDF.Err != nil { 35 | log.Fatal(versicolorDF.Err) 36 | } 37 | 38 | // Output the results to standard out. 39 | fmt.Println(versicolorDF) 40 | 41 | // Filter the dataframe again, but only select out the 42 | // sepal_width and species columns. 43 | versicolorDF = irisDF.Filter(filter).Select([]string{"sepal_width", "species"}) 44 | fmt.Println(versicolorDF) 45 | 46 | // Filter and select the dataframe again, but only display 47 | // the first three results. 48 | versicolorDF = irisDF.Filter(filter).Select([]string{"sepal_width", "species"}).Subset([]int{0, 1, 2}) 49 | fmt.Println(versicolorDF) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /Chapter05/decision_tree/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "math" 7 | "math/rand" 8 | 9 | "github.com/sjwhitworth/golearn/base" 10 | "github.com/sjwhitworth/golearn/ensemble" 11 | "github.com/sjwhitworth/golearn/evaluation" 12 | ) 13 | 14 | func main() { 15 | 16 | // Read in the iris data set into golearn "instances". 17 | irisData, err := base.ParseCSVToInstances("iris.csv", true) 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | 22 | // This is to seed the random processes involved in building the 23 | // decision trees. 24 | rand.Seed(44111342) 25 | 26 | // Assemble a random forest with 10 trees and 2 features per tree, 27 | // which is a sane default (number of features per tree is normally set 28 | // to sqrt(number of features)). 29 | rf := ensemble.NewRandomForest(10, 2) 30 | 31 | // Use cross-fold validation to successively train and evaluate the model 32 | // on 5 folds of the data set. 33 | cv, err := evaluation.GenerateCrossFoldValidationConfusionMatrices(irisData, rf, 5) 34 | if err != nil { 35 | log.Fatal(err) 36 | } 37 | 38 | // Get the mean, variance and standard deviation of the accuracy for the 39 | // cross validation. 40 | mean, variance := evaluation.GetCrossValidatedMetric(cv, evaluation.GetAccuracy) 41 | stdev := math.Sqrt(variance) 42 | 43 | // Output the cross metrics to standard out. 44 | fmt.Printf("\nAccuracy\n%.2f (+/- %.2f)\n\n", mean, stdev*2) 45 | } 46 | -------------------------------------------------------------------------------- /Chapter06/k-means/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "image/color" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/vg" 11 | "github.com/kniren/gota/dataframe" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the driver dataset file. 17 | f, err := os.Open("fleet_data.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer f.Close() 22 | 23 | // Create a dataframe from the CSV file. 24 | driverDF := dataframe.ReadCSV(f) 25 | 26 | // Extract the distance column. 27 | yVals := driverDF.Col("Distance_Feature").Float() 28 | 29 | // pts will hold the values for plotting 30 | pts := make(plotter.XYs, driverDF.Nrow()) 31 | 32 | // Fill pts with data. 33 | for i, floatVal := range driverDF.Col("Speeding_Feature").Float() { 34 | pts[i].X = floatVal 35 | pts[i].Y = yVals[i] 36 | } 37 | 38 | // Create the plot. 39 | p, err := plot.New() 40 | if err != nil { 41 | log.Fatal(err) 42 | } 43 | p.X.Label.Text = "Speeding" 44 | p.Y.Label.Text = "Distance" 45 | p.Add(plotter.NewGrid()) 46 | 47 | s, err := plotter.NewScatter(pts) 48 | if err != nil { 49 | log.Fatal(err) 50 | } 51 | s.GlyphStyle.Color = color.RGBA{R: 255, B: 128, A: 255} 52 | s.GlyphStyle.Radius = vg.Points(3) 53 | 54 | // Save the plot to a PNG file. 55 | p.Add(s) 56 | if err := p.Save(4*vg.Inch, 4*vg.Inch, "fleet_data_scatter.png"); err != nil { 57 | log.Fatal(err) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /Chapter07/anomaly_detection/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/lytics/anomalyzer" 8 | ) 9 | 10 | func main() { 11 | 12 | // Initialize an AnomalyzerConf value with 13 | // configurations such as which anomaly detection 14 | // methods we want to use. 15 | conf := &anomalyzer.AnomalyzerConf{ 16 | Sensitivity: 0.1, 17 | UpperBound: 5, 18 | LowerBound: anomalyzer.NA, // ignore the lower bound 19 | ActiveSize: 1, 20 | NSeasons: 4, 21 | Methods: []string{"diff", "fence", "highrank", "lowrank", "magnitude"}, 22 | } 23 | 24 | // Create a time series of periodic observations 25 | // as a slice of floats. This could come from a 26 | // database or file, as utilized in earlier examples. 27 | ts := []float64{0.1, 0.2, 0.5, 0.12, 0.38, 0.9, 0.74} 28 | 29 | // Create a new anomalyzer based on the existing 30 | // time series values and configuration. 31 | anom, err := anomalyzer.NewAnomalyzer(conf, ts) 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | 36 | // Supply a new observed value to the Anomalyzer. 37 | // The Anomalyzer will analyze the value in reference 38 | // to pre-existing values in the series and output 39 | // a probability of the value being anomalous. 40 | prob := anom.Push(15.2) 41 | fmt.Printf("Probability of 15.2 being anomalous: %0.2f\n", prob) 42 | 43 | prob = anom.Push(0.43) 44 | fmt.Printf("Probability of 0.33 being anomalous: %0.2f\n", prob) 45 | } 46 | -------------------------------------------------------------------------------- /Chapter02/hypothesis_testing/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/gonum/stat" 7 | "github.com/gonum/stat/distuv" 8 | ) 9 | 10 | func main() { 11 | 12 | // Define the observed frequencies. 13 | observed := []float64{ 14 | 260.0, // This number is the number of observed with no regular exercise. 15 | 135.0, // This number is the number of observed with sporatic exercise. 16 | 105.0, // This number is the number of observed with regular exercise. 17 | } 18 | 19 | // Define the total observed. 20 | totalObserved := 500.0 21 | 22 | // Calculate the expected frequencies (again assuming the null Hypothesis). 23 | expected := []float64{ 24 | totalObserved * 0.60, 25 | totalObserved * 0.25, 26 | totalObserved * 0.15, 27 | } 28 | 29 | // Calculate the ChiSquare test statistic. 30 | chiSquare := stat.ChiSquare(observed, expected) 31 | 32 | // Output the test statistic to standard out. 33 | fmt.Printf("\nChi-square: %0.2f\n", chiSquare) 34 | 35 | // Create a Chi-squared distribution with K degrees of freedom. 36 | // In this case we have K=3-1=2, because the degrees of freedom 37 | // for a Chi-squared distribution is the number of possible 38 | // categories minus one. 39 | chiDist := distuv.ChiSquared{ 40 | K: 2.0, 41 | Src: nil, 42 | } 43 | 44 | // Calculate the p-value for our specific test statistic. 45 | pValue := chiDist.Prob(chiSquare) 46 | 47 | // Output the p-value to standard out. 48 | fmt.Printf("p-value: %0.4f\n\n", pValue) 49 | } 50 | -------------------------------------------------------------------------------- /Chapter09/building_a_scalable_pipeline/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | 6 | "github.com/pachyderm/pachyderm/src/client" 7 | ) 8 | 9 | func main() { 10 | 11 | // Connect to Pachyderm using the IP of our 12 | // Kubernetes cluster. Here we will use localhost 13 | // to mimic the sceneario when you have k8s running 14 | // locally and/or you are forwarding the Pachyderm 15 | // port to your localhost.. By default 16 | // Pachyderm will be exposed on port 30650. 17 | c, err := client.NewFromAddress("0.0.0.0:30650") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer c.Close() 22 | 23 | // Create a data repository called "training." 24 | if err := c.CreateRepo("training"); err != nil { 25 | log.Fatal(err) 26 | } 27 | 28 | // Create a data repository called "attributes." 29 | if err := c.CreateRepo("attributes"); err != nil { 30 | log.Fatal(err) 31 | } 32 | 33 | // Now, we will list all the current data repositories 34 | // on the Pachyderm cluster as a sanity check. We 35 | // should now have two data repositories. 36 | repos, err := c.ListRepo(nil) 37 | if err != nil { 38 | log.Fatal(err) 39 | } 40 | 41 | // Check that the number of repos is what we expect. 42 | if len(repos) != 2 { 43 | log.Fatal("Unexpected number of data repositories") 44 | } 45 | 46 | // Check that the name of the repo is what we expect. 47 | if repos[0].Repo.Name != "attributes" || repos[1].Repo.Name != "training" { 48 | log.Fatal("Unexpected data repository name") 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /Chapter03/evaluation/categorical_metrics/example1/labeled.csv: -------------------------------------------------------------------------------- 1 | observed,predicted 2 | 0,0 3 | 0,0 4 | 0,0 5 | 0,0 6 | 0,0 7 | 0,0 8 | 0,0 9 | 0,0 10 | 0,0 11 | 0,0 12 | 0,0 13 | 0,0 14 | 0,0 15 | 0,0 16 | 0,0 17 | 0,0 18 | 0,0 19 | 0,0 20 | 0,0 21 | 0,0 22 | 0,0 23 | 0,0 24 | 0,0 25 | 0,0 26 | 0,0 27 | 0,0 28 | 0,0 29 | 0,0 30 | 0,0 31 | 0,0 32 | 0,0 33 | 0,0 34 | 0,0 35 | 0,0 36 | 0,0 37 | 0,0 38 | 0,0 39 | 0,0 40 | 0,0 41 | 0,0 42 | 0,0 43 | 0,0 44 | 0,0 45 | 0,0 46 | 0,0 47 | 0,0 48 | 0,0 49 | 0,0 50 | 0,0 51 | 0,0 52 | 1,1 53 | 1,1 54 | 1,1 55 | 1,1 56 | 1,1 57 | 1,1 58 | 1,1 59 | 1,1 60 | 1,1 61 | 1,1 62 | 1,1 63 | 1,1 64 | 1,1 65 | 1,1 66 | 1,1 67 | 1,1 68 | 1,1 69 | 1,1 70 | 1,1 71 | 1,1 72 | 1,2 73 | 1,1 74 | 1,2 75 | 1,1 76 | 1,1 77 | 1,1 78 | 1,1 79 | 1,1 80 | 1,1 81 | 1,1 82 | 1,1 83 | 1,1 84 | 1,1 85 | 1,2 86 | 1,1 87 | 1,1 88 | 1,1 89 | 1,1 90 | 1,1 91 | 1,1 92 | 1,1 93 | 1,1 94 | 1,1 95 | 1,1 96 | 1,1 97 | 1,1 98 | 1,1 99 | 1,1 100 | 1,1 101 | 1,1 102 | 2,2 103 | 2,2 104 | 2,2 105 | 2,2 106 | 2,2 107 | 2,2 108 | 2,1 109 | 2,2 110 | 2,2 111 | 2,2 112 | 2,2 113 | 2,2 114 | 2,2 115 | 2,2 116 | 2,2 117 | 2,2 118 | 2,2 119 | 2,2 120 | 2,2 121 | 2,1 122 | 2,2 123 | 2,2 124 | 2,2 125 | 2,2 126 | 2,2 127 | 2,2 128 | 2,2 129 | 2,2 130 | 2,2 131 | 2,2 132 | 2,2 133 | 2,2 134 | 2,2 135 | 2,2 136 | 2,2 137 | 2,2 138 | 2,2 139 | 2,2 140 | 2,2 141 | 2,2 142 | 2,2 143 | 2,2 144 | 2,2 145 | 2,2 146 | 2,2 147 | 2,2 148 | 2,2 149 | 2,2 150 | 2,2 151 | 2,2 152 | -------------------------------------------------------------------------------- /Chapter03/evaluation/categorical_metrics/example2/labeled.csv: -------------------------------------------------------------------------------- 1 | observed,predicted 2 | 0,0 3 | 0,0 4 | 0,0 5 | 0,0 6 | 0,0 7 | 0,0 8 | 0,0 9 | 0,0 10 | 0,0 11 | 0,0 12 | 0,0 13 | 0,0 14 | 0,0 15 | 0,0 16 | 0,0 17 | 0,0 18 | 0,0 19 | 0,0 20 | 0,0 21 | 0,0 22 | 0,0 23 | 0,0 24 | 0,0 25 | 0,0 26 | 0,0 27 | 0,0 28 | 0,0 29 | 0,0 30 | 0,0 31 | 0,0 32 | 0,0 33 | 0,0 34 | 0,0 35 | 0,0 36 | 0,0 37 | 0,0 38 | 0,0 39 | 0,0 40 | 0,0 41 | 0,0 42 | 0,0 43 | 0,0 44 | 0,0 45 | 0,0 46 | 0,0 47 | 0,0 48 | 0,0 49 | 0,0 50 | 0,0 51 | 0,0 52 | 1,1 53 | 1,1 54 | 1,1 55 | 1,1 56 | 1,1 57 | 1,1 58 | 1,1 59 | 1,1 60 | 1,1 61 | 1,1 62 | 1,1 63 | 1,1 64 | 1,1 65 | 1,1 66 | 1,1 67 | 1,1 68 | 1,1 69 | 1,1 70 | 1,1 71 | 1,1 72 | 1,2 73 | 1,1 74 | 1,2 75 | 1,1 76 | 1,1 77 | 1,1 78 | 1,1 79 | 1,1 80 | 1,1 81 | 1,1 82 | 1,1 83 | 1,1 84 | 1,1 85 | 1,2 86 | 1,1 87 | 1,1 88 | 1,1 89 | 1,1 90 | 1,1 91 | 1,1 92 | 1,1 93 | 1,1 94 | 1,1 95 | 1,1 96 | 1,1 97 | 1,1 98 | 1,1 99 | 1,1 100 | 1,1 101 | 1,1 102 | 2,2 103 | 2,2 104 | 2,2 105 | 2,2 106 | 2,2 107 | 2,2 108 | 2,1 109 | 2,2 110 | 2,2 111 | 2,2 112 | 2,2 113 | 2,2 114 | 2,2 115 | 2,2 116 | 2,2 117 | 2,2 118 | 2,2 119 | 2,2 120 | 2,2 121 | 2,1 122 | 2,2 123 | 2,2 124 | 2,2 125 | 2,2 126 | 2,2 127 | 2,2 128 | 2,2 129 | 2,2 130 | 2,2 131 | 2,2 132 | 2,2 133 | 2,2 134 | 2,2 135 | 2,2 136 | 2,2 137 | 2,2 138 | 2,2 139 | 2,2 140 | 2,2 141 | 2,2 142 | 2,2 143 | 2,2 144 | 2,2 145 | 2,2 146 | 2,2 147 | 2,2 148 | 2,2 149 | 2,2 150 | 2,2 151 | 2,2 152 | -------------------------------------------------------------------------------- /Chapter07/representing_time_series/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "image/color" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/vg" 11 | "github.com/kniren/gota/dataframe" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the CSV file. 17 | passengersFile, err := os.Open("AirPassengers.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer passengersFile.Close() 22 | 23 | // Create a dataframe from the CSV file. 24 | passengersDF := dataframe.ReadCSV(passengersFile) 25 | 26 | // Extract the number of passengers column. 27 | yVals := passengersDF.Col("AirPassengers").Float() 28 | 29 | // pts will hold the values for plotting. 30 | pts := make(plotter.XYs, passengersDF.Nrow()) 31 | 32 | // Fill pts with data. 33 | for i, floatVal := range passengersDF.Col("time").Float() { 34 | pts[i].X = floatVal 35 | pts[i].Y = yVals[i] 36 | } 37 | 38 | // Create the plot. 39 | p, err := plot.New() 40 | if err != nil { 41 | log.Fatal(err) 42 | } 43 | p.X.Label.Text = "time" 44 | p.Y.Label.Text = "passengers" 45 | p.Add(plotter.NewGrid()) 46 | 47 | // Add the line plot points for the time series. 48 | l, err := plotter.NewLine(pts) 49 | if err != nil { 50 | log.Fatal(err) 51 | } 52 | l.LineStyle.Width = vg.Points(1) 53 | l.LineStyle.Color = color.RGBA{B: 255, A: 255} 54 | 55 | // Save the plot to a PNG file. 56 | p.Add(l) 57 | if err := p.Save(10*vg.Inch, 4*vg.Inch, "passengers_ts.png"); err != nil { 58 | log.Fatal(err) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /Chapter01/sql-like_databases/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "log" 7 | "os" 8 | 9 | // pq is the libary that allows us to connect 10 | // to postgres with databases/sql. 11 | _ "github.com/lib/pq" 12 | ) 13 | 14 | func main() { 15 | 16 | // Get my postgres connection URL. I have it stored in 17 | // an environmental variable. 18 | pgURL := os.Getenv("PGURL") 19 | if pgURL == "" { 20 | log.Fatal("PGURL empty") 21 | } 22 | 23 | // Open a database value. Specify the postgres driver 24 | // for databases/sql. 25 | db, err := sql.Open("postgres", pgURL) 26 | if err != nil { 27 | log.Fatal(err) 28 | } 29 | defer db.Close() 30 | 31 | // Query the database. 32 | rows, err := db.Query(` 33 | SELECT 34 | sepal_length as sLength, 35 | sepal_width as sWidth, 36 | petal_length as pLength, 37 | petal_width as pWidth 38 | FROM iris 39 | WHERE species = $1`, "Iris-setosa") 40 | if err != nil { 41 | log.Fatal(err) 42 | } 43 | defer rows.Close() 44 | 45 | // Iterate over the rows, sending the results to 46 | // standard out. 47 | for rows.Next() { 48 | 49 | var ( 50 | sLength float64 51 | sWidth float64 52 | pLength float64 53 | pWidth float64 54 | ) 55 | 56 | if err := rows.Scan(&sLength, &sWidth, &pLength, &pWidth); err != nil { 57 | log.Fatal(err) 58 | } 59 | 60 | fmt.Printf("%.2f, %.2f, %.2f, %.2f\n", sLength, sWidth, pLength, pWidth) 61 | } 62 | 63 | // Check for errors after we are done iterating over rows. 64 | if err := rows.Err(); err != nil { 65 | log.Fatal(err) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /Chapter04/linear_regression/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/vg" 11 | "github.com/kniren/gota/dataframe" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the advertising dataset file. 17 | f, err := os.Open("Advertising.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer f.Close() 22 | 23 | // Create a dataframe from the CSV file. 24 | advertDF := dataframe.ReadCSV(f) 25 | 26 | // Create a histogram for each of the columns in the dataset. 27 | for _, colName := range advertDF.Names() { 28 | 29 | // Create a plotter.Values value and fill it with the 30 | // values from the respective column of the dataframe. 31 | plotVals := make(plotter.Values, advertDF.Nrow()) 32 | for i, floatVal := range advertDF.Col(colName).Float() { 33 | plotVals[i] = floatVal 34 | } 35 | 36 | // Make a plot and set its title. 37 | p, err := plot.New() 38 | if err != nil { 39 | log.Fatal(err) 40 | } 41 | p.Title.Text = fmt.Sprintf("Histogram of a %s", colName) 42 | 43 | // Create a histogram of our values drawn 44 | // from the standard normal. 45 | h, err := plotter.NewHist(plotVals, 16) 46 | if err != nil { 47 | log.Fatal(err) 48 | } 49 | 50 | // Normalize the histogram. 51 | h.Normalize(1) 52 | 53 | // Add the histogram to the plot. 54 | p.Add(h) 55 | 56 | // Save the plot to a PNG file. 57 | if err := p.Save(4*vg.Inch, 4*vg.Inch, colName+"_hist.png"); err != nil { 58 | log.Fatal(err) 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /Chapter04/linear_regression/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "image/color" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/vg" 11 | "github.com/kniren/gota/dataframe" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the advertising dataset file. 17 | f, err := os.Open("Advertising.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer f.Close() 22 | 23 | // Create a dataframe from the CSV file. 24 | advertDF := dataframe.ReadCSV(f) 25 | 26 | // Extract the target column. 27 | yVals := advertDF.Col("Sales").Float() 28 | 29 | // Create a scatter plot for each of the features in the dataset. 30 | for _, colName := range advertDF.Names() { 31 | 32 | // pts will hold the values for plotting 33 | pts := make(plotter.XYs, advertDF.Nrow()) 34 | 35 | // Fill pts with data. 36 | for i, floatVal := range advertDF.Col(colName).Float() { 37 | pts[i].X = floatVal 38 | pts[i].Y = yVals[i] 39 | } 40 | 41 | // Create the plot. 42 | p, err := plot.New() 43 | if err != nil { 44 | log.Fatal(err) 45 | } 46 | p.X.Label.Text = colName 47 | p.Y.Label.Text = "y" 48 | p.Add(plotter.NewGrid()) 49 | 50 | s, err := plotter.NewScatter(pts) 51 | if err != nil { 52 | log.Fatal(err) 53 | } 54 | s.GlyphStyle.Color = color.RGBA{R: 255, B: 128, A: 255} 55 | s.GlyphStyle.Radius = vg.Points(3) 56 | 57 | // Save the plot to a PNG file. 58 | p.Add(s) 59 | if err := p.Save(4*vg.Inch, 4*vg.Inch, colName+"_scatter.png"); err != nil { 60 | log.Fatal(err) 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /Chapter05/naive_bayes/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/sjwhitworth/golearn/base" 8 | "github.com/sjwhitworth/golearn/evaluation" 9 | "github.com/sjwhitworth/golearn/filters" 10 | "github.com/sjwhitworth/golearn/naive" 11 | ) 12 | 13 | func main() { 14 | 15 | // Read in the loan training data set into golearn "instances". 16 | trainingData, err := base.ParseCSVToInstances("training.csv", true) 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | 21 | // Initialize a new Naive Bayes classifier. 22 | nb := naive.NewBernoulliNBClassifier() 23 | 24 | // Fit the Naive Bayes classifier. 25 | nb.Fit(convertToBinary(trainingData)) 26 | 27 | // Read in the loan test data set into golearn "instances". 28 | testData, err := base.ParseCSVToInstances("test.csv", true) 29 | if err != nil { 30 | log.Fatal(err) 31 | } 32 | 33 | // Make our predictions. 34 | predictions := nb.Predict(convertToBinary(testData)) 35 | 36 | // Generate a Confusion Matrix. 37 | cm, err := evaluation.GetConfusionMatrix(testData, predictions) 38 | if err != nil { 39 | log.Fatal(err) 40 | } 41 | 42 | // Retrieve the accuracy. 43 | accuracy := evaluation.GetAccuracy(cm) 44 | fmt.Printf("\nAccuracy: %0.2f\n\n", accuracy) 45 | } 46 | 47 | // convertToBinary utilizes built in golearn functionality to 48 | // convert our labels to a binary label format. 49 | func convertToBinary(src base.FixedDataGrid) base.FixedDataGrid { 50 | b := filters.NewBinaryConvertFilter() 51 | attrs := base.NonClassAttributes(src) 52 | for _, a := range attrs { 53 | b.AddAttribute(a) 54 | } 55 | b.Train() 56 | ret := base.NewLazilyFilteredInstances(src, b) 57 | return ret 58 | } 59 | -------------------------------------------------------------------------------- /Chapter09/building_a_scalable_pipeline/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | 7 | "github.com/pachyderm/pachyderm/src/client" 8 | ) 9 | 10 | func main() { 11 | 12 | // Connect to Pachyderm on our localhost. By default 13 | // Pachyderm will be exposed on port 30650. 14 | c, err := client.NewFromAddress("0.0.0.0:30650") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer c.Close() 19 | 20 | // Start a commit in our "attributes" data repo on the "master" branch. 21 | commit, err := c.StartCommit("attributes", "master") 22 | if err != nil { 23 | log.Fatal(err) 24 | } 25 | 26 | // Open one of the attributes JSON files. 27 | f, err := os.Open("1.json") 28 | if err != nil { 29 | log.Fatal(err) 30 | } 31 | 32 | // Put a file containing the attributes into the data repository. 33 | if _, err := c.PutFile("attributes", commit.ID, "1.json", f); err != nil { 34 | log.Fatal(err) 35 | } 36 | 37 | // Finish the commit. 38 | if err := c.FinishCommit("attributes", commit.ID); err != nil { 39 | log.Fatal(err) 40 | } 41 | 42 | // Start a commit in our "training" data repo on the "master" branch. 43 | commit, err = c.StartCommit("training", "master") 44 | if err != nil { 45 | log.Fatal(err) 46 | } 47 | 48 | // Open up the training data set. 49 | f, err = os.Open("diabetes.csv") 50 | if err != nil { 51 | log.Fatal(err) 52 | } 53 | 54 | // Put a file containing the training data set into the data repository. 55 | if _, err := c.PutFile("training", commit.ID, "diabetes.csv", f); err != nil { 56 | log.Fatal(err) 57 | } 58 | 59 | // Finish the commit. 60 | if err := c.FinishCommit("training", commit.ID); err != nil { 61 | log.Fatal(err) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /Chapter07/statistics/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "math" 7 | "os" 8 | 9 | "github.com/gonum/stat" 10 | "github.com/kniren/gota/dataframe" 11 | ) 12 | 13 | func main() { 14 | 15 | // Open the CSV file. 16 | passengersFile, err := os.Open("AirPassengers.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer passengersFile.Close() 21 | 22 | // Create a dataframe from the CSV file. 23 | passengersDF := dataframe.ReadCSV(passengersFile) 24 | 25 | // Get the time and passengers as a slice of floats. 26 | passengers := passengersDF.Col("AirPassengers").Float() 27 | 28 | // Loop over various values of lag in the series. 29 | fmt.Println("Autocorrelation:") 30 | for i := 1; i < 11; i++ { 31 | 32 | // Calculate the autocorrelation. 33 | ac := acf(passengers, i) 34 | fmt.Printf("Lag %d period: %0.2f\n", i, ac) 35 | } 36 | } 37 | 38 | // acf calculates the autocorrelation for a series 39 | // at the given lag. 40 | func acf(x []float64, lag int) float64 { 41 | 42 | // Shift the series. 43 | xAdj := x[lag:len(x)] 44 | xLag := x[0 : len(x)-lag] 45 | 46 | // numerator will hold our accumulated numerator, and 47 | // denominator will hold our accumulated denominator. 48 | var numerator float64 49 | var denominator float64 50 | 51 | // Calculate the mean of our x values, which will be used 52 | // in each term of the autocorrelation. 53 | xBar := stat.Mean(x, nil) 54 | 55 | // Calculate the numerator. 56 | for idx, xVal := range xAdj { 57 | numerator += ((xVal - xBar) * (xLag[idx] - xBar)) 58 | } 59 | 60 | // Calculate the denominator. 61 | for _, xVal := range x { 62 | denominator += math.Pow(xVal-xBar, 2) 63 | } 64 | 65 | return numerator / denominator 66 | } 67 | -------------------------------------------------------------------------------- /Chapter04/linear_regression/example5/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "log" 7 | "os" 8 | "strconv" 9 | 10 | "github.com/sajari/regression" 11 | ) 12 | 13 | func main() { 14 | 15 | // Open the training dataset file. 16 | f, err := os.Open("training.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer f.Close() 21 | 22 | // Create a new CSV reader reading from the opened file. 23 | reader := csv.NewReader(f) 24 | 25 | // Read in all of the CSV records 26 | reader.FieldsPerRecord = 4 27 | trainingData, err := reader.ReadAll() 28 | if err != nil { 29 | log.Fatal(err) 30 | } 31 | 32 | // In this case we are going to try and model our Sales (y) 33 | // by the TV feature plus an intercept. As such, let's create 34 | // the struct needed to train a model using github.com/sajari/regression. 35 | var r regression.Regression 36 | r.SetObserved("Sales") 37 | r.SetVar(0, "TV") 38 | 39 | // Loop of records in the CSV, adding the training data to the regression value. 40 | for i, record := range trainingData { 41 | 42 | // Skip the header. 43 | if i == 0 { 44 | continue 45 | } 46 | 47 | // Parse the Sales rogression measure, or "y". 48 | yVal, err := strconv.ParseFloat(record[3], 64) 49 | if err != nil { 50 | log.Fatal(err) 51 | } 52 | 53 | // Parse the TV value. 54 | tvVal, err := strconv.ParseFloat(record[0], 64) 55 | if err != nil { 56 | log.Fatal(err) 57 | } 58 | 59 | // Add these points to the regression value. 60 | r.Train(regression.DataPoint(yVal, []float64{tvVal})) 61 | } 62 | 63 | // Train/fit the regression model. 64 | r.Run() 65 | 66 | // Output the trained model parameters. 67 | fmt.Printf("\nRegression Formula:\n%v\n\n", r.Formula) 68 | } 69 | -------------------------------------------------------------------------------- /Chapter02/statistical_visualizations/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/vg" 11 | "github.com/kniren/gota/dataframe" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the CSV file. 17 | irisFile, err := os.Open("../data/iris.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer irisFile.Close() 22 | 23 | // Create a dataframe from the CSV file. 24 | irisDF := dataframe.ReadCSV(irisFile) 25 | 26 | // Create a histogram for each of the feature columns in the dataset. 27 | for _, colName := range irisDF.Names() { 28 | 29 | // If the column is one of the feature columns, let's create 30 | // a histogram of the values. 31 | if colName != "species" { 32 | 33 | // Create a plotter.Values value and fill it with the 34 | // values from the respective column of the dataframe. 35 | v := make(plotter.Values, irisDF.Nrow()) 36 | for i, floatVal := range irisDF.Col(colName).Float() { 37 | v[i] = floatVal 38 | } 39 | 40 | // Make a plot and set its title. 41 | p, err := plot.New() 42 | if err != nil { 43 | log.Fatal(err) 44 | } 45 | p.Title.Text = fmt.Sprintf("Histogram of a %s", colName) 46 | 47 | // Create a histogram of our values drawn 48 | // from the standard normal. 49 | h, err := plotter.NewHist(v, 16) 50 | if err != nil { 51 | log.Fatal(err) 52 | } 53 | 54 | // Normalize the histogram. 55 | h.Normalize(1) 56 | 57 | // Add the histogram to the plot. 58 | p.Add(h) 59 | 60 | // Save the plot to a PNG file. 61 | if err := p.Save(4*vg.Inch, 4*vg.Inch, colName+"_hist.png"); err != nil { 62 | log.Fatal(err) 63 | } 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /Chapter06/k-means/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "log" 8 | "os" 9 | "strconv" 10 | 11 | "github.com/mash/gokmeans" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the driver dataset file. 17 | f, err := os.Open("fleet_data.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer f.Close() 22 | 23 | // Create a new CSV reader. 24 | r := csv.NewReader(f) 25 | r.FieldsPerRecord = 3 26 | 27 | // Initialize a slice of gokmeans.Node's to 28 | // hold our input data. 29 | var data []gokmeans.Node 30 | 31 | // Loop over the records creating our slice of 32 | // gokmeans.Node's. 33 | for { 34 | 35 | // Read in our record and check for errors. 36 | record, err := r.Read() 37 | if err == io.EOF { 38 | break 39 | } 40 | if err != nil { 41 | log.Fatal(err) 42 | } 43 | 44 | // Skip the header. 45 | if record[0] == "Driver_ID" { 46 | continue 47 | } 48 | 49 | // Initialize a point. 50 | var point []float64 51 | 52 | // Fill in our point. 53 | for i := 1; i < 3; i++ { 54 | 55 | // Parse the float value. 56 | val, err := strconv.ParseFloat(record[i], 64) 57 | if err != nil { 58 | log.Fatal(err) 59 | } 60 | 61 | // Append this value to our point. 62 | point = append(point, val) 63 | } 64 | 65 | // Append our point to the data. 66 | data = append(data, gokmeans.Node{point[0], point[1]}) 67 | } 68 | 69 | // Generate our clusters with k-means. 70 | success, centroids := gokmeans.Train(data, 2, 50) 71 | if !success { 72 | log.Fatal("Could not generate clusters") 73 | } 74 | 75 | // Output the centroids to stdout. 76 | fmt.Println("The centroids for our clusters are:") 77 | for _, centroid := range centroids { 78 | fmt.Println(centroid) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /Chapter04/multiple_regression/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "log" 7 | "os" 8 | "strconv" 9 | 10 | "github.com/sajari/regression" 11 | ) 12 | 13 | func main() { 14 | 15 | // Open the training dataset file. 16 | f, err := os.Open("training.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer f.Close() 21 | 22 | // Create a new CSV reader reading from the opened file. 23 | reader := csv.NewReader(f) 24 | 25 | // Read in all of the CSV records 26 | reader.FieldsPerRecord = 4 27 | trainingData, err := reader.ReadAll() 28 | if err != nil { 29 | log.Fatal(err) 30 | } 31 | 32 | // In this case we are going to try and model our Sales 33 | // by the TV and Radio features plus an intercept. 34 | var r regression.Regression 35 | r.SetObserved("Sales") 36 | r.SetVar(0, "TV") 37 | r.SetVar(1, "Radio") 38 | 39 | // Loop over the CSV records adding the training data. 40 | for i, record := range trainingData { 41 | 42 | // Skip the header. 43 | if i == 0 { 44 | continue 45 | } 46 | 47 | // Parse the Sales. 48 | yVal, err := strconv.ParseFloat(record[3], 64) 49 | if err != nil { 50 | log.Fatal(err) 51 | } 52 | 53 | // Parse the TV value. 54 | tvVal, err := strconv.ParseFloat(record[0], 64) 55 | if err != nil { 56 | log.Fatal(err) 57 | } 58 | 59 | // Parse the Radio value. 60 | radioVal, err := strconv.ParseFloat(record[1], 64) 61 | if err != nil { 62 | log.Fatal(err) 63 | } 64 | 65 | // Add these points to the regression value. 66 | r.Train(regression.DataPoint(yVal, []float64{tvVal, radioVal})) 67 | } 68 | 69 | // Train/fit the regression model. 70 | r.Run() 71 | 72 | // Output the trained model parameters. 73 | fmt.Printf("\nRegression Formula:\n%v\n\n", r.Formula) 74 | } 75 | -------------------------------------------------------------------------------- /Chapter04/linear_regression/example6/test.csv: -------------------------------------------------------------------------------- 1 | TV,Radio,Newspaper,Sales 2 | 172.500000,18.100000,30.700000,14.400000 3 | 85.700000,35.800000,49.300000,13.300000 4 | 188.400000,18.100000,25.600000,14.900000 5 | 163.500000,36.800000,7.400000,18.000000 6 | 117.200000,14.700000,5.400000,11.900000 7 | 234.500000,3.400000,84.800000,11.900000 8 | 17.900000,37.600000,21.600000,8.000000 9 | 206.800000,5.200000,19.400000,12.200000 10 | 215.400000,23.600000,57.600000,17.100000 11 | 284.300000,10.600000,6.400000,15.000000 12 | 50.000000,11.600000,18.400000,8.400000 13 | 164.500000,20.900000,47.400000,14.500000 14 | 19.600000,20.100000,17.000000,7.600000 15 | 168.400000,7.100000,12.800000,11.700000 16 | 222.400000,3.400000,13.100000,11.500000 17 | 276.900000,48.900000,41.800000,27.000000 18 | 248.400000,30.200000,20.300000,20.200000 19 | 170.200000,7.800000,35.200000,11.700000 20 | 276.700000,2.300000,23.700000,11.800000 21 | 165.600000,10.000000,17.600000,12.600000 22 | 156.600000,2.600000,8.300000,10.500000 23 | 218.500000,5.400000,27.400000,12.200000 24 | 56.200000,5.700000,29.700000,8.700000 25 | 287.600000,43.000000,71.800000,26.200000 26 | 253.800000,21.300000,30.000000,17.600000 27 | 205.000000,45.100000,19.600000,22.600000 28 | 139.500000,2.100000,26.600000,10.300000 29 | 191.100000,28.700000,18.200000,17.300000 30 | 286.000000,13.900000,3.700000,15.900000 31 | 18.700000,12.100000,23.400000,6.700000 32 | 39.500000,41.100000,5.800000,10.800000 33 | 75.500000,10.800000,6.000000,9.900000 34 | 17.200000,4.100000,31.600000,5.900000 35 | 166.800000,42.000000,3.600000,19.600000 36 | 149.700000,35.600000,6.000000,17.300000 37 | 38.200000,3.700000,13.800000,7.600000 38 | 94.200000,4.900000,8.100000,9.700000 39 | 177.000000,9.300000,6.400000,12.800000 40 | 283.600000,42.000000,66.200000,25.500000 41 | 232.100000,8.600000,8.700000,13.400000 42 | -------------------------------------------------------------------------------- /Chapter04/multiple_regression/example2/test.csv: -------------------------------------------------------------------------------- 1 | TV,Radio,Newspaper,Sales 2 | 172.500000,18.100000,30.700000,14.400000 3 | 85.700000,35.800000,49.300000,13.300000 4 | 188.400000,18.100000,25.600000,14.900000 5 | 163.500000,36.800000,7.400000,18.000000 6 | 117.200000,14.700000,5.400000,11.900000 7 | 234.500000,3.400000,84.800000,11.900000 8 | 17.900000,37.600000,21.600000,8.000000 9 | 206.800000,5.200000,19.400000,12.200000 10 | 215.400000,23.600000,57.600000,17.100000 11 | 284.300000,10.600000,6.400000,15.000000 12 | 50.000000,11.600000,18.400000,8.400000 13 | 164.500000,20.900000,47.400000,14.500000 14 | 19.600000,20.100000,17.000000,7.600000 15 | 168.400000,7.100000,12.800000,11.700000 16 | 222.400000,3.400000,13.100000,11.500000 17 | 276.900000,48.900000,41.800000,27.000000 18 | 248.400000,30.200000,20.300000,20.200000 19 | 170.200000,7.800000,35.200000,11.700000 20 | 276.700000,2.300000,23.700000,11.800000 21 | 165.600000,10.000000,17.600000,12.600000 22 | 156.600000,2.600000,8.300000,10.500000 23 | 218.500000,5.400000,27.400000,12.200000 24 | 56.200000,5.700000,29.700000,8.700000 25 | 287.600000,43.000000,71.800000,26.200000 26 | 253.800000,21.300000,30.000000,17.600000 27 | 205.000000,45.100000,19.600000,22.600000 28 | 139.500000,2.100000,26.600000,10.300000 29 | 191.100000,28.700000,18.200000,17.300000 30 | 286.000000,13.900000,3.700000,15.900000 31 | 18.700000,12.100000,23.400000,6.700000 32 | 39.500000,41.100000,5.800000,10.800000 33 | 75.500000,10.800000,6.000000,9.900000 34 | 17.200000,4.100000,31.600000,5.900000 35 | 166.800000,42.000000,3.600000,19.600000 36 | 149.700000,35.600000,6.000000,17.300000 37 | 38.200000,3.700000,13.800000,7.600000 38 | 94.200000,4.900000,8.100000,9.700000 39 | 177.000000,9.300000,6.400000,12.800000 40 | 283.600000,42.000000,66.200000,25.500000 41 | 232.100000,8.600000,8.700000,13.400000 42 | -------------------------------------------------------------------------------- /Chapter04/non-linear_regression/example3/test.csv: -------------------------------------------------------------------------------- 1 | TV,Radio,Newspaper,Sales 2 | 172.500000,18.100000,30.700000,14.400000 3 | 85.700000,35.800000,49.300000,13.300000 4 | 188.400000,18.100000,25.600000,14.900000 5 | 163.500000,36.800000,7.400000,18.000000 6 | 117.200000,14.700000,5.400000,11.900000 7 | 234.500000,3.400000,84.800000,11.900000 8 | 17.900000,37.600000,21.600000,8.000000 9 | 206.800000,5.200000,19.400000,12.200000 10 | 215.400000,23.600000,57.600000,17.100000 11 | 284.300000,10.600000,6.400000,15.000000 12 | 50.000000,11.600000,18.400000,8.400000 13 | 164.500000,20.900000,47.400000,14.500000 14 | 19.600000,20.100000,17.000000,7.600000 15 | 168.400000,7.100000,12.800000,11.700000 16 | 222.400000,3.400000,13.100000,11.500000 17 | 276.900000,48.900000,41.800000,27.000000 18 | 248.400000,30.200000,20.300000,20.200000 19 | 170.200000,7.800000,35.200000,11.700000 20 | 276.700000,2.300000,23.700000,11.800000 21 | 165.600000,10.000000,17.600000,12.600000 22 | 156.600000,2.600000,8.300000,10.500000 23 | 218.500000,5.400000,27.400000,12.200000 24 | 56.200000,5.700000,29.700000,8.700000 25 | 287.600000,43.000000,71.800000,26.200000 26 | 253.800000,21.300000,30.000000,17.600000 27 | 205.000000,45.100000,19.600000,22.600000 28 | 139.500000,2.100000,26.600000,10.300000 29 | 191.100000,28.700000,18.200000,17.300000 30 | 286.000000,13.900000,3.700000,15.900000 31 | 18.700000,12.100000,23.400000,6.700000 32 | 39.500000,41.100000,5.800000,10.800000 33 | 75.500000,10.800000,6.000000,9.900000 34 | 17.200000,4.100000,31.600000,5.900000 35 | 166.800000,42.000000,3.600000,19.600000 36 | 149.700000,35.600000,6.000000,17.300000 37 | 38.200000,3.700000,13.800000,7.600000 38 | 94.200000,4.900000,8.100000,9.700000 39 | 177.000000,9.300000,6.400000,12.800000 40 | 283.600000,42.000000,66.200000,25.500000 41 | 232.100000,8.600000,8.700000,13.400000 42 | -------------------------------------------------------------------------------- /Chapter05/logistic_regression/example4/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/vg" 11 | "github.com/kniren/gota/dataframe" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the CSV file. 17 | loanDataFile, err := os.Open("clean_loan_data.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer loanDataFile.Close() 22 | 23 | // Create a dataframe from the CSV file. 24 | loanDF := dataframe.ReadCSV(loanDataFile) 25 | 26 | // Use the Describe method to calculate summary statistics 27 | // for all of the columns in one shot. 28 | loanSummary := loanDF.Describe() 29 | 30 | // Output the summary statistics to stdout. 31 | fmt.Println(loanSummary) 32 | 33 | // Create a histogram for each of the columns in the dataset. 34 | for _, colName := range loanDF.Names() { 35 | 36 | // Create a plotter.Values value and fill it with the 37 | // values from the respective column of the dataframe. 38 | plotVals := make(plotter.Values, loanDF.Nrow()) 39 | for i, floatVal := range loanDF.Col(colName).Float() { 40 | plotVals[i] = floatVal 41 | } 42 | 43 | // Make a plot and set its title. 44 | p, err := plot.New() 45 | if err != nil { 46 | log.Fatal(err) 47 | } 48 | p.Title.Text = fmt.Sprintf("Histogram of %s", colName) 49 | 50 | // Create a histogram of our values. 51 | h, err := plotter.NewHist(plotVals, 16) 52 | if err != nil { 53 | log.Fatal(err) 54 | } 55 | 56 | // Normalize the histogram. 57 | h.Normalize(1) 58 | 59 | // Add the histogram to the plot. 60 | p.Add(h) 61 | 62 | // Save the plot to a PNG file. 63 | if err := p.Save(4*vg.Inch, 4*vg.Inch, colName+"_hist.png"); err != nil { 64 | log.Fatal(err) 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /Chapter06/evaluating/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/kniren/gota/dataframe" 9 | ) 10 | 11 | type centroid []float64 12 | 13 | func main() { 14 | 15 | // Pull in the CSV file. 16 | irisFile, err := os.Open("iris.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer irisFile.Close() 21 | 22 | // Create a dataframe from the CSV file. 23 | irisDF := dataframe.ReadCSV(irisFile) 24 | 25 | // Define the names of the three separate species contained in the CSV file. 26 | speciesNames := []string{ 27 | "Iris-setosa", 28 | "Iris-versicolor", 29 | "Iris-virginica", 30 | } 31 | 32 | // Create a map to hold our centroid information. 33 | centroids := make(map[string]centroid) 34 | 35 | // Filter the dataset into three separate dataframes, 36 | // each corresponding to one of the Iris species. 37 | for _, species := range speciesNames { 38 | 39 | // Filer the original dataset. 40 | filter := dataframe.F{ 41 | Colname: "species", 42 | Comparator: "==", 43 | Comparando: species, 44 | } 45 | filtered := irisDF.Filter(filter) 46 | 47 | // Calculate the mean of features. 48 | summaryDF := filtered.Describe() 49 | 50 | // Put each dimension's mean into the corresponding centroid. 51 | var c centroid 52 | for _, feature := range summaryDF.Names() { 53 | 54 | // Skip the irrelevant columns. 55 | if feature == "column" || feature == "species" { 56 | continue 57 | } 58 | c = append(c, summaryDF.Col(feature).Float()[0]) 59 | } 60 | 61 | // Add this centroid to our map. 62 | centroids[species] = c 63 | } 64 | 65 | // As a sanity check, output our centroids. 66 | for _, species := range speciesNames { 67 | fmt.Printf("%s centroid: %v\n", species, centroids[species]) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /Chapter06/k-means/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/vg" 11 | "github.com/kniren/gota/dataframe" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the CSV file. 17 | driverDataFile, err := os.Open("fleet_data.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer driverDataFile.Close() 22 | 23 | // Create a dataframe from the CSV file. 24 | driverDF := dataframe.ReadCSV(driverDataFile) 25 | 26 | // Use the Describe method to calculate summary statistics 27 | // for all of the columns in one shot. 28 | driverSummary := driverDF.Describe() 29 | 30 | // Output the summary statistics to stdout. 31 | fmt.Println(driverSummary) 32 | 33 | // Create a histogram for each of the columns in the dataset. 34 | for _, colName := range driverDF.Names() { 35 | 36 | // Create a plotter.Values value and fill it with the 37 | // values from the respective column of the dataframe. 38 | plotVals := make(plotter.Values, driverDF.Nrow()) 39 | for i, floatVal := range driverDF.Col(colName).Float() { 40 | plotVals[i] = floatVal 41 | } 42 | 43 | // Make a plot and set its title. 44 | p, err := plot.New() 45 | if err != nil { 46 | log.Fatal(err) 47 | } 48 | p.Title.Text = fmt.Sprintf("Histogram of %s", colName) 49 | 50 | // Create a histogram of our values. 51 | h, err := plotter.NewHist(plotVals, 16) 52 | if err != nil { 53 | log.Fatal(err) 54 | } 55 | 56 | // Normalize the histogram. 57 | h.Normalize(1) 58 | 59 | // Add the histogram to the plot. 60 | p.Add(h) 61 | 62 | // Save the plot to a PNG file. 63 | if err := p.Save(4*vg.Inch, 4*vg.Inch, colName+"_hist.png"); err != nil { 64 | log.Fatal(err) 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /Chapter02/statistical_visualizations/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | 7 | "github.com/gonum/plot" 8 | "github.com/gonum/plot/plotter" 9 | "github.com/gonum/plot/vg" 10 | "github.com/kniren/gota/dataframe" 11 | ) 12 | 13 | func main() { 14 | 15 | // Open the CSV file. 16 | irisFile, err := os.Open("../data/iris.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer irisFile.Close() 21 | 22 | // Create a dataframe from the CSV file. 23 | irisDF := dataframe.ReadCSV(irisFile) 24 | 25 | // Create the plot and set its title and axis label. 26 | p, err := plot.New() 27 | if err != nil { 28 | log.Fatal(err) 29 | } 30 | 31 | p.Title.Text = "Box plots" 32 | p.Y.Label.Text = "Values" 33 | 34 | // Create the box for our data. 35 | w := vg.Points(50) 36 | 37 | // Create a box plot for each of the feature columns in the dataset. 38 | for idx, colName := range irisDF.Names() { 39 | 40 | // If the column is one of the feature columns, let's create 41 | // a histogram of the values. 42 | if colName != "species" { 43 | 44 | // Create a plotter.Values value and fill it with the 45 | // values from the respective column of the dataframe. 46 | v := make(plotter.Values, irisDF.Nrow()) 47 | for i, floatVal := range irisDF.Col(colName).Float() { 48 | v[i] = floatVal 49 | } 50 | 51 | // Add the data to the plot. 52 | b, err := plotter.NewBoxPlot(w, float64(idx), v) 53 | if err != nil { 54 | log.Fatal(err) 55 | } 56 | p.Add(b) 57 | } 58 | } 59 | 60 | // Set the X axis of the plot to nominal with 61 | // the given names for x=0, x=1, etc. 62 | p.NominalX("sepal_length", "sepal_width", "petal_length", "petal_width") 63 | 64 | if err := p.Save(6*vg.Inch, 8*vg.Inch, "boxplots.png"); err != nil { 65 | log.Fatal(err) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /Chapter05/logistic_regression/example5/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "log" 6 | "os" 7 | 8 | "github.com/kniren/gota/dataframe" 9 | ) 10 | 11 | func main() { 12 | 13 | // Open the clean loan dataset file. 14 | f, err := os.Open("clean_loan_data.csv") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer f.Close() 19 | 20 | // Create a dataframe from the CSV file. 21 | // The types of the columns will be inferred. 22 | loanDF := dataframe.ReadCSV(f) 23 | 24 | // Calculate the number of elements in each set. 25 | trainingNum := (4 * loanDF.Nrow()) / 5 26 | testNum := loanDF.Nrow() / 5 27 | if trainingNum+testNum < loanDF.Nrow() { 28 | trainingNum++ 29 | } 30 | 31 | // Create the subset indices. 32 | trainingIdx := make([]int, trainingNum) 33 | testIdx := make([]int, testNum) 34 | 35 | // Enumerate the training indices. 36 | for i := 0; i < trainingNum; i++ { 37 | trainingIdx[i] = i 38 | } 39 | 40 | // Enumerate the test indices. 41 | for i := 0; i < testNum; i++ { 42 | testIdx[i] = trainingNum + i 43 | } 44 | 45 | // Create the subset dataframes. 46 | trainingDF := loanDF.Subset(trainingIdx) 47 | testDF := loanDF.Subset(testIdx) 48 | 49 | // Create a map that will be used in writing the data 50 | // to files. 51 | setMap := map[int]dataframe.DataFrame{ 52 | 0: trainingDF, 53 | 1: testDF, 54 | } 55 | 56 | // Create the respective files. 57 | for idx, setName := range []string{"training.csv", "test.csv"} { 58 | 59 | // Save the filtered dataset file. 60 | f, err := os.Create(setName) 61 | if err != nil { 62 | log.Fatal(err) 63 | } 64 | 65 | // Create a buffered writer. 66 | w := bufio.NewWriter(f) 67 | 68 | // Write the dataframe out as a CSV. 69 | if err := setMap[idx].WriteCSV(w); err != nil { 70 | log.Fatal(err) 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Chapter04/linear_regression/example4/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "log" 6 | "os" 7 | 8 | "github.com/kniren/gota/dataframe" 9 | ) 10 | 11 | func main() { 12 | 13 | // Open the advertising dataset file. 14 | f, err := os.Open("Advertising.csv") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer f.Close() 19 | 20 | // Create a dataframe from the CSV file. 21 | // The types of the columns will be inferred. 22 | advertDF := dataframe.ReadCSV(f) 23 | 24 | // Calculate the number of elements in each set. 25 | trainingNum := (4 * advertDF.Nrow()) / 5 26 | testNum := advertDF.Nrow() / 5 27 | if trainingNum+testNum < advertDF.Nrow() { 28 | trainingNum++ 29 | } 30 | 31 | // Create the subset indices. 32 | trainingIdx := make([]int, trainingNum) 33 | testIdx := make([]int, testNum) 34 | 35 | // Enumerate the training indices. 36 | for i := 0; i < trainingNum; i++ { 37 | trainingIdx[i] = i 38 | } 39 | 40 | // Enumerate the test indices. 41 | for i := 0; i < testNum; i++ { 42 | testIdx[i] = trainingNum + i 43 | } 44 | 45 | // Create the subset dataframes. 46 | trainingDF := advertDF.Subset(trainingIdx) 47 | testDF := advertDF.Subset(testIdx) 48 | 49 | // Create a map that will be used in writing the data 50 | // to files. 51 | setMap := map[int]dataframe.DataFrame{ 52 | 0: trainingDF, 53 | 1: testDF, 54 | } 55 | 56 | // Create the respective files. 57 | for idx, setName := range []string{"training.csv", "test.csv"} { 58 | 59 | // Save the filtered dataset file. 60 | f, err := os.Create(setName) 61 | if err != nil { 62 | log.Fatal(err) 63 | } 64 | 65 | // Create a buffered writer. 66 | w := bufio.NewWriter(f) 67 | 68 | // Write the dataframe out as a CSV. 69 | if err := setMap[idx].WriteCSV(w); err != nil { 70 | log.Fatal(err) 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Chapter03/validation/training_test/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "log" 6 | "os" 7 | 8 | "github.com/kniren/gota/dataframe" 9 | ) 10 | 11 | func main() { 12 | 13 | // Open the diabetes dataset file. 14 | f, err := os.Open("diabetes.csv") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer f.Close() 19 | 20 | // Create a dataframe from the CSV file. 21 | // The types of the columns will be inferred. 22 | diabetesDF := dataframe.ReadCSV(f) 23 | 24 | // Calculate the number of elements in each set. 25 | trainingNum := (4 * diabetesDF.Nrow()) / 5 26 | testNum := diabetesDF.Nrow() / 5 27 | if trainingNum+testNum < diabetesDF.Nrow() { 28 | trainingNum++ 29 | } 30 | 31 | // Create the subset indices. 32 | trainingIdx := make([]int, trainingNum) 33 | testIdx := make([]int, testNum) 34 | 35 | // Enumerate the training indices. 36 | for i := 0; i < trainingNum; i++ { 37 | trainingIdx[i] = i 38 | } 39 | 40 | // Enumerate the test indices. 41 | for i := 0; i < testNum; i++ { 42 | testIdx[i] = trainingNum + i 43 | } 44 | 45 | // Create the subset dataframes. 46 | trainingDF := diabetesDF.Subset(trainingIdx) 47 | testDF := diabetesDF.Subset(testIdx) 48 | 49 | // Create a map that will be used in writing the data 50 | // to files. 51 | setMap := map[int]dataframe.DataFrame{ 52 | 0: trainingDF, 53 | 1: testDF, 54 | } 55 | 56 | // Create the respective files. 57 | for idx, setName := range []string{"training.csv", "test.csv"} { 58 | 59 | // Save the filtered dataset file. 60 | f, err := os.Create(setName) 61 | if err != nil { 62 | log.Fatal(err) 63 | } 64 | 65 | // Create a buffered writer. 66 | w := bufio.NewWriter(f) 67 | 68 | // Write the dataframe out as a CSV. 69 | if err := setMap[idx].WriteCSV(w); err != nil { 70 | log.Fatal(err) 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Chapter03/evaluation/continuous_metrics/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "log" 8 | "os" 9 | "strconv" 10 | 11 | "github.com/gonum/stat" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the continuous observations and predictions. 17 | f, err := os.Open("continuous_data.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer f.Close() 22 | 23 | // Create a new CSV reader reading from the opened file. 24 | reader := csv.NewReader(f) 25 | 26 | // observed and predicted will hold the parsed observed and predicted values 27 | // form the continous data file. 28 | var observed []float64 29 | var predicted []float64 30 | 31 | // line will track row numbers for logging. 32 | line := 1 33 | 34 | // Read in the records looking for unexpected types in the columns. 35 | for { 36 | 37 | // Read in a row. Check if we are at the end of the file. 38 | record, err := reader.Read() 39 | if err == io.EOF { 40 | break 41 | } 42 | 43 | // Skip the header. 44 | if line == 1 { 45 | line++ 46 | continue 47 | } 48 | 49 | // Read in the observed and predicted values. 50 | observedVal, err := strconv.ParseFloat(record[0], 64) 51 | if err != nil { 52 | log.Printf("Parsing line %d failed, unexpected type\n", line) 53 | continue 54 | } 55 | 56 | predictedVal, err := strconv.ParseFloat(record[1], 64) 57 | if err != nil { 58 | log.Printf("Parsing line %d failed, unexpected type\n", line) 59 | continue 60 | } 61 | 62 | // Append the record to our slice, if it has the expected type. 63 | observed = append(observed, observedVal) 64 | predicted = append(predicted, predictedVal) 65 | line++ 66 | } 67 | 68 | // Calculate the R^2 value. 69 | rSquared := stat.RSquaredFrom(observed, predicted, nil) 70 | 71 | // Output the R^2 value to standard out. 72 | fmt.Printf("\nR^2 = %0.2f\n\n", rSquared) 73 | } 74 | -------------------------------------------------------------------------------- /Chapter06/k-means/example5/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/floats" 9 | "github.com/kniren/gota/dataframe" 10 | ) 11 | 12 | func main() { 13 | 14 | // Open the driver dataset file. 15 | f, err := os.Open("fleet_data.csv") 16 | if err != nil { 17 | log.Fatal(err) 18 | } 19 | defer f.Close() 20 | 21 | // Create a dataframe from the CSV file. 22 | driverDF := dataframe.ReadCSV(f) 23 | 24 | // Extract the distance column. 25 | distances := driverDF.Col("Distance_Feature").Float() 26 | 27 | // clusterOne and clusterTwo will hold the values for plotting. 28 | var clusterOne [][]float64 29 | var clusterTwo [][]float64 30 | 31 | // Fill the clusters with data. 32 | for i, speed := range driverDF.Col("Speeding_Feature").Float() { 33 | distanceOne := floats.Distance([]float64{distances[i], speed}, []float64{50.05, 8.83}, 2) 34 | distanceTwo := floats.Distance([]float64{distances[i], speed}, []float64{180.02, 18.29}, 2) 35 | if distanceOne < distanceTwo { 36 | clusterOne = append(clusterOne, []float64{distances[i], speed}) 37 | continue 38 | } 39 | clusterTwo = append(clusterTwo, []float64{distances[i], speed}) 40 | } 41 | 42 | // Output our within cluster metrics. 43 | fmt.Printf("\nCluster 1 Metric: %0.2f\n", withinClusterMean(clusterOne, []float64{50.05, 8.83})) 44 | fmt.Printf("\nCluster 2 Metric: %0.2f\n", withinClusterMean(clusterTwo, []float64{180.02, 18.29})) 45 | } 46 | 47 | // withinClusterMean calculates the mean distance between 48 | // points in a cluster and the centroid of the cluster. 49 | func withinClusterMean(cluster [][]float64, centroid []float64) float64 { 50 | 51 | // meanDistance will hold our result. 52 | var meanDistance float64 53 | 54 | // Loop over the points in the cluster. 55 | for _, point := range cluster { 56 | meanDistance += floats.Distance(point, centroid, 2) / float64(len(cluster)) 57 | } 58 | 59 | return meanDistance 60 | } 61 | -------------------------------------------------------------------------------- /Chapter01/json/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "log" 8 | "net/http" 9 | ) 10 | 11 | // citiBikeURL provides the station statuses of CitiBike bike sharing stations. 12 | const citiBikeURL = "https://gbfs.citibikenyc.com/gbfs/en/station_status.json" 13 | 14 | // stationData is used to unmarshal the JSON document returned form citiBikeURL. 15 | type stationData struct { 16 | LastUpdated int `json:"last_updated"` 17 | TTL int `json:"ttl"` 18 | Data struct { 19 | Stations []station `json:"stations"` 20 | } `json:"data"` 21 | } 22 | 23 | // station is used to unmarshal each of the station documents in stationData. 24 | type station struct { 25 | ID string `json:"station_id"` 26 | NumBikesAvailable int `json:"num_bikes_available"` 27 | NumBikesDisabled int `json:"num_bike_disabled"` 28 | NumDocksAvailable int `json:"num_docks_available"` 29 | NumDocksDisabled int `json:"num_docks_disabled"` 30 | IsInstalled int `json:"is_installed"` 31 | IsRenting int `json:"is_renting"` 32 | IsReturning int `json:"is_returning"` 33 | LastReported int `json:"last_reported"` 34 | HasAvailableKeys bool `json:"eightd_has_available_keys"` 35 | } 36 | 37 | func main() { 38 | 39 | // Get the JSON response from the URL. 40 | response, err := http.Get(citiBikeURL) 41 | if err != nil { 42 | log.Fatal(err) 43 | } 44 | 45 | // Defer closing the response body. 46 | defer response.Body.Close() 47 | 48 | // Read the body of the response into []byte. 49 | body, err := ioutil.ReadAll(response.Body) 50 | if err != nil { 51 | log.Fatal(err) 52 | } 53 | 54 | // Declare a variable of type stationData. 55 | var sd stationData 56 | 57 | // Unmarshal the JSON data into the variable. 58 | if err := json.Unmarshal(body, &sd); err != nil { 59 | log.Fatal(err) 60 | return 61 | } 62 | 63 | // Print the first station. 64 | fmt.Printf("%+v\n\n", sd.Data.Stations[0]) 65 | } 66 | -------------------------------------------------------------------------------- /Chapter02/statistical_measures/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/floats" 9 | "github.com/gonum/stat" 10 | "github.com/kniren/gota/dataframe" 11 | ) 12 | 13 | func main() { 14 | 15 | // Open the CSV file. 16 | irisFile, err := os.Open("../data/iris.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer irisFile.Close() 21 | 22 | // Create a dataframe from the CSV file. 23 | irisDF := dataframe.ReadCSV(irisFile) 24 | 25 | // Get the float values from the "sepal_length" column as 26 | // we will be looking at the measures for this variable. 27 | sepalLength := irisDF.Col("petal_length").Float() 28 | 29 | // Calculate the Max of the variable. 30 | minVal := floats.Min(sepalLength) 31 | 32 | // Calculate the Max of the variable. 33 | maxVal := floats.Max(sepalLength) 34 | 35 | // Calculate the Median of the variable. 36 | rangeVal := maxVal - minVal 37 | 38 | // Calculate the variance of the variable. 39 | varianceVal := stat.Variance(sepalLength, nil) 40 | 41 | // Calculate the standard deviation of the variable. 42 | stdDevVal := stat.StdDev(sepalLength, nil) 43 | 44 | // Sort the values. 45 | inds := make([]int, len(sepalLength)) 46 | floats.Argsort(sepalLength, inds) 47 | 48 | // Get the Quantiles. 49 | quant25 := stat.Quantile(0.25, stat.Empirical, sepalLength, nil) 50 | quant50 := stat.Quantile(0.50, stat.Empirical, sepalLength, nil) 51 | quant75 := stat.Quantile(0.75, stat.Empirical, sepalLength, nil) 52 | 53 | // Output the results to standard out. 54 | fmt.Printf("\nSepal Length Summary Statistics:\n") 55 | fmt.Printf("Max value: %0.2f\n", maxVal) 56 | fmt.Printf("Min value: %0.2f\n", minVal) 57 | fmt.Printf("Range value: %0.2f\n", rangeVal) 58 | fmt.Printf("Variance value: %0.2f\n", varianceVal) 59 | fmt.Printf("Std Dev value: %0.2f\n", stdDevVal) 60 | fmt.Printf("25 Quantile: %0.2f\n", quant25) 61 | fmt.Printf("50 Quantile: %0.2f\n", quant50) 62 | fmt.Printf("75 Quantile: %0.2f\n\n", quant75) 63 | } 64 | -------------------------------------------------------------------------------- /Chapter04/non-linear_regression/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "log" 7 | "math" 8 | "os" 9 | "strconv" 10 | ) 11 | 12 | func main() { 13 | 14 | // Open the test dataset file. 15 | f, err := os.Open("test.csv") 16 | if err != nil { 17 | log.Fatal(err) 18 | } 19 | defer f.Close() 20 | 21 | // Create a new CSV reader reading from the opened file. 22 | reader := csv.NewReader(f) 23 | 24 | // Read in all of the CSV records 25 | reader.FieldsPerRecord = 4 26 | testData, err := reader.ReadAll() 27 | if err != nil { 28 | log.Fatal(err) 29 | } 30 | 31 | // Loop over the holdout data predicting y and evaluating the prediction 32 | // with the mean absolute error. 33 | var mAE float64 34 | for i, record := range testData { 35 | 36 | // Skip the header. 37 | if i == 0 { 38 | continue 39 | } 40 | 41 | // Parse the Sales. 42 | yObserved, err := strconv.ParseFloat(record[3], 64) 43 | if err != nil { 44 | log.Fatal(err) 45 | } 46 | 47 | // Parse the TV value. 48 | tvVal, err := strconv.ParseFloat(record[0], 64) 49 | if err != nil { 50 | log.Fatal(err) 51 | } 52 | 53 | // Parse the Radio value. 54 | radioVal, err := strconv.ParseFloat(record[1], 64) 55 | if err != nil { 56 | log.Fatal(err) 57 | } 58 | 59 | // Parse the Newspaper value. 60 | newspaperVal, err := strconv.ParseFloat(record[2], 64) 61 | if err != nil { 62 | log.Fatal(err) 63 | } 64 | 65 | // Predict y with our trained model. 66 | yPredicted := predict(tvVal, radioVal, newspaperVal) 67 | 68 | // Add the to the mean absolute error. 69 | mAE += math.Abs(yObserved-yPredicted) / float64(len(testData)) 70 | } 71 | 72 | // Output the MAE to standard out. 73 | fmt.Printf("\nMAE = %0.2f\n\n", mAE) 74 | } 75 | 76 | // predict uses our trained regression model to made a prediction based on a 77 | // TV, Radio, and Newspaper value. 78 | func predict(tv, radio, newspaper float64) float64 { 79 | return 3.038 + tv*0.047 + 0.177*radio + 0.001*newspaper 80 | } 81 | -------------------------------------------------------------------------------- /Chapter07/statistics/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "strconv" 8 | 9 | "github.com/kniren/gota/dataframe" 10 | "github.com/sajari/regression" 11 | ) 12 | 13 | func main() { 14 | 15 | // Open the CSV file. 16 | passengersFile, err := os.Open("AirPassengers.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer passengersFile.Close() 21 | 22 | // Create a dataframe from the CSV file. 23 | passengersDF := dataframe.ReadCSV(passengersFile) 24 | 25 | // Get the time and passengers as a slice of floats. 26 | passengers := passengersDF.Col("AirPassengers").Float() 27 | 28 | // Loop over various values of lag in the series. 29 | fmt.Println("Partial Autocorrelation:") 30 | for i := 1; i < 11; i++ { 31 | 32 | // Calculate the partial autocorrelation. 33 | pac := pacf(passengers, i) 34 | fmt.Printf("Lag %d period: %0.2f\n", i, pac) 35 | } 36 | } 37 | 38 | // pacf calculates the partial autocorrelation for a series 39 | // at the given lag. 40 | func pacf(x []float64, lag int) float64 { 41 | 42 | // Create a regresssion.Regression value needed to train 43 | // a model using github.com/sajari/regression. 44 | var r regression.Regression 45 | r.SetObserved("x") 46 | 47 | // Define the current lag and all of the intermediate lags. 48 | for i := 0; i < lag; i++ { 49 | r.SetVar(i, "x"+strconv.Itoa(i)) 50 | } 51 | 52 | // Shift the series. 53 | xAdj := x[lag:len(x)] 54 | 55 | // Loop over the series creating the data set 56 | // for the regression. 57 | for i, xVal := range xAdj { 58 | 59 | // Loop over the intermediate lags to build up 60 | // our independent variables. 61 | laggedVariables := make([]float64, lag) 62 | for idx := 1; idx <= lag; idx++ { 63 | 64 | // Get the lagged series variables. 65 | laggedVariables[idx-1] = x[lag+i-idx] 66 | } 67 | 68 | // Add these points to the regression value. 69 | r.Train(regression.DataPoint(xVal, laggedVariables)) 70 | } 71 | 72 | // Fit the regression. 73 | r.Run() 74 | 75 | return r.Coeff(lag) 76 | } 77 | -------------------------------------------------------------------------------- /Chapter03/evaluation/continuous_metrics/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "log" 8 | "math" 9 | "os" 10 | "strconv" 11 | ) 12 | 13 | func main() { 14 | 15 | // Open the continuous observations and predictions. 16 | f, err := os.Open("continuous_data.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer f.Close() 21 | 22 | // Create a new CSV reader reading from the opened file. 23 | reader := csv.NewReader(f) 24 | 25 | // observed and predicted will hold the parsed observed and predicted values 26 | // form the continous data file. 27 | var observed []float64 28 | var predicted []float64 29 | 30 | // line will track row numbers for logging. 31 | line := 1 32 | 33 | // Read in the records looking for unexpected types in the columns. 34 | for { 35 | 36 | // Read in a row. Check if we are at the end of the file. 37 | record, err := reader.Read() 38 | if err == io.EOF { 39 | break 40 | } 41 | 42 | // Skip the header. 43 | if line == 1 { 44 | line++ 45 | continue 46 | } 47 | 48 | // Read in the observed and predicted values. 49 | observedVal, err := strconv.ParseFloat(record[0], 64) 50 | if err != nil { 51 | log.Printf("Parsing line %d failed, unexpected type\n", line) 52 | continue 53 | } 54 | 55 | predictedVal, err := strconv.ParseFloat(record[1], 64) 56 | if err != nil { 57 | log.Printf("Parsing line %d failed, unexpected type\n", line) 58 | continue 59 | } 60 | 61 | // Append the record to our slice, if it has the expected type. 62 | observed = append(observed, observedVal) 63 | predicted = append(predicted, predictedVal) 64 | line++ 65 | } 66 | 67 | // Calculate the mean absolute error and mean squared error. 68 | var mAE float64 69 | var mSE float64 70 | for idx, oVal := range observed { 71 | mAE += math.Abs(oVal-predicted[idx]) / float64(len(observed)) 72 | mSE += math.Pow(oVal-predicted[idx], 2) / float64(len(observed)) 73 | } 74 | 75 | // Output the MAE and MSE value to standard out. 76 | fmt.Printf("\nMAE = %0.2f\n", mAE) 77 | fmt.Printf("\nMSE = %0.2f\n\n", mSE) 78 | 79 | } 80 | -------------------------------------------------------------------------------- /Chapter08/utilizing_our_simple_nn/example1/test.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,setosa,virginica,versicolor 2 | 0.583333333333,0.291666666667,0.728813559322,0.75,0.0,1.0,0.0 3 | 0.0833333333333,0.458333333333,0.0847457627119,0.0416666666667,1.0,0.0,0.0 4 | 0.194444444444,0.583333333333,0.101694915254,0.125,1.0,0.0,0.0 5 | 0.916666666667,0.416666666667,0.949152542373,0.833333333333,0.0,1.0,0.0 6 | 0.611111111111,0.416666666667,0.813559322034,0.875,0.0,1.0,0.0 7 | 0.694444444444,0.333333333333,0.64406779661,0.541666666667,0.0,0.0,1.0 8 | 0.166666666667,0.458333333333,0.0847457627119,0.0,1.0,0.0,0.0 9 | 0.138888888889,0.416666666667,0.0677966101695,0.0,1.0,0.0,0.0 10 | 0.222222222222,0.625,0.0677966101695,0.0833333333333,1.0,0.0,0.0 11 | 0.5,0.25,0.779661016949,0.541666666667,0.0,1.0,0.0 12 | 0.555555555556,0.208333333333,0.661016949153,0.583333333333,0.0,0.0,1.0 13 | 0.472222222222,0.583333333333,0.593220338983,0.625,0.0,0.0,1.0 14 | 0.222222222222,0.75,0.0847457627119,0.0833333333333,1.0,0.0,0.0 15 | 0.194444444444,0.0,0.423728813559,0.375,0.0,0.0,1.0 16 | 0.416666666667,0.25,0.508474576271,0.458333333333,0.0,0.0,1.0 17 | 0.444444444444,0.416666666667,0.542372881356,0.583333333333,0.0,0.0,1.0 18 | 0.722222222222,0.458333333333,0.661016949153,0.583333333333,0.0,0.0,1.0 19 | 0.805555555556,0.666666666667,0.864406779661,1.0,0.0,1.0,0.0 20 | 0.5,0.333333333333,0.508474576271,0.5,0.0,0.0,1.0 21 | 0.138888888889,0.458333333333,0.101694915254,0.0416666666667,1.0,0.0,0.0 22 | 0.416666666667,0.833333333333,0.0338983050847,0.0416666666667,1.0,0.0,0.0 23 | 0.944444444444,0.25,1.0,0.916666666667,0.0,1.0,0.0 24 | 0.222222222222,0.583333333333,0.0847457627119,0.0416666666667,1.0,0.0,0.0 25 | 0.527777777778,0.375,0.559322033898,0.5,0.0,0.0,1.0 26 | 0.638888888889,0.416666666667,0.576271186441,0.541666666667,0.0,0.0,1.0 27 | 0.0277777777778,0.416666666667,0.0508474576271,0.0416666666667,1.0,0.0,0.0 28 | 0.0,0.416666666667,0.0169491525424,0.0,1.0,0.0,0.0 29 | 0.555555555556,0.583333333333,0.779661016949,0.958333333333,0.0,1.0,0.0 30 | 0.361111111111,0.291666666667,0.542372881356,0.5,0.0,0.0,1.0 31 | 0.194444444444,0.125,0.389830508475,0.375,0.0,0.0,1.0 32 | -------------------------------------------------------------------------------- /Chapter01/json/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "io/ioutil" 6 | "log" 7 | "net/http" 8 | ) 9 | 10 | // citiBikeURL provides the station statuses of CitiBike bike sharing stations. 11 | const citiBikeURL = "https://gbfs.citibikenyc.com/gbfs/en/station_status.json" 12 | 13 | // stationData is used to unmarshal the JSON document returned form citiBikeURL. 14 | type stationData struct { 15 | LastUpdated int `json:"last_updated"` 16 | TTL int `json:"ttl"` 17 | Data struct { 18 | Stations []station `json:"stations"` 19 | } `json:"data"` 20 | } 21 | 22 | // station is used to unmarshal each of the station documents in stationData. 23 | type station struct { 24 | ID string `json:"station_id"` 25 | NumBikesAvailable int `json:"num_bikes_available"` 26 | NumBikesDisabled int `json:"num_bike_disabled"` 27 | NumDocksAvailable int `json:"num_docks_available"` 28 | NumDocksDisabled int `json:"num_docks_disabled"` 29 | IsInstalled int `json:"is_installed"` 30 | IsRenting int `json:"is_renting"` 31 | IsReturning int `json:"is_returning"` 32 | LastReported int `json:"last_reported"` 33 | HasAvailableKeys bool `json:"eightd_has_available_keys"` 34 | } 35 | 36 | func main() { 37 | 38 | // Get the JSON response from the URL. 39 | response, err := http.Get(citiBikeURL) 40 | if err != nil { 41 | log.Fatal(err) 42 | } 43 | 44 | // Defer closing the response body. 45 | defer response.Body.Close() 46 | 47 | // Read the body of the response into []byte. 48 | body, err := ioutil.ReadAll(response.Body) 49 | if err != nil { 50 | log.Fatal(err) 51 | } 52 | 53 | // Unmarshal the JSON data into the variable. 54 | var sd stationData 55 | if err := json.Unmarshal(body, &sd); err != nil { 56 | log.Fatal(err) 57 | return 58 | } 59 | 60 | // Marshal the data. 61 | outputData, err := json.Marshal(sd) 62 | if err != nil { 63 | log.Fatal(err) 64 | } 65 | 66 | // Save the marshalled data to a file. 67 | if err := ioutil.WriteFile("citibike.json", outputData, 0644); err != nil { 68 | log.Fatal(err) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /Chapter04/linear_regression/example7/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "image/color" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/vg" 11 | "github.com/kniren/gota/dataframe" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the advertising dataset file. 17 | f, err := os.Open("Advertising.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer f.Close() 22 | 23 | // Create a dataframe from the CSV file. 24 | advertDF := dataframe.ReadCSV(f) 25 | 26 | // Extract the target column. 27 | yVals := advertDF.Col("Sales").Float() 28 | 29 | // pts will hold the values for plotting. 30 | pts := make(plotter.XYs, advertDF.Nrow()) 31 | 32 | // ptsPred will hold the predicted values for plotting. 33 | ptsPred := make(plotter.XYs, advertDF.Nrow()) 34 | 35 | // Fill pts with data. 36 | for i, floatVal := range advertDF.Col("TV").Float() { 37 | pts[i].X = floatVal 38 | pts[i].Y = yVals[i] 39 | ptsPred[i].X = floatVal 40 | ptsPred[i].Y = predict(floatVal) 41 | } 42 | 43 | // Create the plot. 44 | p, err := plot.New() 45 | if err != nil { 46 | log.Fatal(err) 47 | } 48 | p.X.Label.Text = "TV" 49 | p.Y.Label.Text = "Sales" 50 | p.Add(plotter.NewGrid()) 51 | 52 | // Add the scatter plot points for the observations. 53 | s, err := plotter.NewScatter(pts) 54 | if err != nil { 55 | log.Fatal(err) 56 | } 57 | s.GlyphStyle.Color = color.RGBA{R: 255, B: 128, A: 255} 58 | s.GlyphStyle.Radius = vg.Points(3) 59 | 60 | // Add the line plot points for the predictions. 61 | l, err := plotter.NewLine(ptsPred) 62 | if err != nil { 63 | log.Fatal(err) 64 | } 65 | l.LineStyle.Width = vg.Points(1) 66 | l.LineStyle.Dashes = []vg.Length{vg.Points(5), vg.Points(5)} 67 | l.LineStyle.Color = color.RGBA{B: 255, A: 255} 68 | 69 | // Save the plot to a PNG file. 70 | p.Add(s, l) 71 | if err := p.Save(4*vg.Inch, 4*vg.Inch, "regression_line.png"); err != nil { 72 | log.Fatal(err) 73 | } 74 | } 75 | 76 | // predict uses our trained regression model to made a prediction. 77 | func predict(tv float64) float64 { 78 | return 7.07 + tv*0.05 79 | } 80 | -------------------------------------------------------------------------------- /Chapter03/evaluation/categorical_metrics/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "log" 8 | "os" 9 | "strconv" 10 | ) 11 | 12 | func main() { 13 | 14 | // Open the binary observations and predictions. 15 | f, err := os.Open("labeled.csv") 16 | if err != nil { 17 | log.Fatal(err) 18 | } 19 | defer f.Close() 20 | 21 | // Create a new CSV reader reading from the opened file. 22 | reader := csv.NewReader(f) 23 | 24 | // observed and predicted will hold the parsed observed and predicted values 25 | // form the labeled data file. 26 | var observed []int 27 | var predicted []int 28 | 29 | // line will track row numbers for logging. 30 | line := 1 31 | 32 | // Read in the records looking for unexpected types in the columns. 33 | for { 34 | 35 | // Read in a row. Check if we are at the end of the file. 36 | record, err := reader.Read() 37 | if err == io.EOF { 38 | break 39 | } 40 | 41 | // Skip the header. 42 | if line == 1 { 43 | line++ 44 | continue 45 | } 46 | 47 | // Read in the observed and predicted values. 48 | observedVal, err := strconv.Atoi(record[0]) 49 | if err != nil { 50 | log.Printf("Parsing line %d failed, unexpected type\n", line) 51 | continue 52 | } 53 | 54 | predictedVal, err := strconv.Atoi(record[1]) 55 | if err != nil { 56 | log.Printf("Parsing line %d failed, unexpected type\n", line) 57 | continue 58 | } 59 | 60 | // Append the record to our slice, if it has the expected type. 61 | observed = append(observed, observedVal) 62 | predicted = append(predicted, predictedVal) 63 | line++ 64 | } 65 | 66 | // This variable will hold our count of true positive and 67 | // true negative values. 68 | var truePosNeg int 69 | 70 | // Accumulate the true positive/negative count. 71 | for idx, oVal := range observed { 72 | if oVal == predicted[idx] { 73 | truePosNeg++ 74 | } 75 | } 76 | 77 | // Calculate the accuracy (subset accuracy). 78 | accuracy := float64(truePosNeg) / float64(len(observed)) 79 | 80 | // Output the Accuracy value to standard out. 81 | fmt.Printf("\nAccuracy = %0.2f\n\n", accuracy) 82 | } 83 | -------------------------------------------------------------------------------- /Chapter04/non-linear_regression/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "log" 7 | "os" 8 | "strconv" 9 | 10 | "github.com/gonum/matrix/mat64" 11 | ) 12 | 13 | func main() { 14 | 15 | // Open the training dataset file. 16 | f, err := os.Open("training.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer f.Close() 21 | 22 | // Create a new CSV reader reading from the opened file. 23 | reader := csv.NewReader(f) 24 | reader.FieldsPerRecord = 4 25 | 26 | // Read in all of the CSV records 27 | rawCSVData, err := reader.ReadAll() 28 | if err != nil { 29 | log.Fatal(err) 30 | } 31 | 32 | // featureData will hold all the float values that will eventually be 33 | // used to form our matrix of features. 34 | featureData := make([]float64, 4*len(rawCSVData)) 35 | yData := make([]float64, len(rawCSVData)) 36 | 37 | // featureIndex and yIndex will track the current index of the matrix values. 38 | var featureIndex int 39 | var yIndex int 40 | 41 | // Sequentially move the rows into a slice of floats. 42 | for idx, record := range rawCSVData { 43 | 44 | // Skip the header row. 45 | if idx == 0 { 46 | continue 47 | } 48 | 49 | // Loop over the float columns. 50 | for i, val := range record { 51 | 52 | // Convert the value to a float. 53 | valParsed, err := strconv.ParseFloat(val, 64) 54 | if err != nil { 55 | log.Fatal("Could not parse float value") 56 | } 57 | 58 | if i < 3 { 59 | 60 | // Add an intercept to the model. 61 | if i == 0 { 62 | featureData[featureIndex] = 1 63 | featureIndex++ 64 | } 65 | 66 | // Add the float value to the slice of feature floats. 67 | featureData[featureIndex] = valParsed 68 | featureIndex++ 69 | } 70 | 71 | if i == 3 { 72 | 73 | // Add the float value to the slice of y floats. 74 | yData[yIndex] = valParsed 75 | yIndex++ 76 | } 77 | 78 | } 79 | } 80 | 81 | // Form the matrices that will be input to our regression. 82 | features := mat64.NewDense(len(rawCSVData), 4, featureData) 83 | y := mat64.NewVector(len(rawCSVData), yData) 84 | 85 | if features != nil && y != nil { 86 | fmt.Println("Matrices formed for ridge regression") 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /Chapter07/auto_regressive/example5/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "strconv" 8 | 9 | "github.com/kniren/gota/dataframe" 10 | "github.com/sajari/regression" 11 | ) 12 | 13 | func main() { 14 | 15 | // Open the CSV file. 16 | passengersFile, err := os.Open("log_diff_series.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer passengersFile.Close() 21 | 22 | // Create a dataframe from the CSV file. 23 | passengersDF := dataframe.ReadCSV(passengersFile) 24 | 25 | // Get the time and passengers as a slice of floats. 26 | passengers := passengersDF.Col("log_differenced_passengers").Float() 27 | 28 | // Calculate the coefficients for lag 1 and 2 and 29 | // our error. 30 | coeffs, intercept := autoregressive(passengers, 2) 31 | 32 | // Output the AR(2) model to stdout. 33 | fmt.Printf("\nlog(x(t)) - log(x(t-1)) = %0.6f + lag1*%0.6f + lag2*%0.6f\n\n", intercept, coeffs[0], coeffs[1]) 34 | } 35 | 36 | // autoregressive calculates an AR model for a series 37 | // at a given order. 38 | func autoregressive(x []float64, lag int) ([]float64, float64) { 39 | 40 | // Create a regresssion.Regression value needed to train 41 | // a model using github.com/sajari/regression. 42 | var r regression.Regression 43 | r.SetObserved("x") 44 | 45 | // Define the current lag and all of the intermediate lags. 46 | for i := 0; i < lag; i++ { 47 | r.SetVar(i, "x"+strconv.Itoa(i)) 48 | } 49 | 50 | // Shift the series. 51 | xAdj := x[lag:len(x)] 52 | 53 | // Loop over the series creating the data set 54 | // for the regression. 55 | for i, xVal := range xAdj { 56 | 57 | // Loop over the intermediate lags to build up 58 | // our independent variables. 59 | laggedVariables := make([]float64, lag) 60 | for idx := 1; idx <= lag; idx++ { 61 | 62 | // Get the lagged series variables. 63 | laggedVariables[idx-1] = x[lag+i-idx] 64 | } 65 | 66 | // Add these points to the regression value. 67 | r.Train(regression.DataPoint(xVal, laggedVariables)) 68 | } 69 | 70 | // Fit the regression. 71 | r.Run() 72 | 73 | // coeff hold the coefficients for our lags. 74 | var coeff []float64 75 | for i := 1; i <= lag; i++ { 76 | coeff = append(coeff, r.Coeff(i)) 77 | } 78 | 79 | return coeff, r.Coeff(0) 80 | } 81 | -------------------------------------------------------------------------------- /Chapter05/logistic_regression/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "log" 6 | "os" 7 | "strconv" 8 | "strings" 9 | ) 10 | 11 | func main() { 12 | 13 | // Open the loan dataset file. 14 | f, err := os.Open("loan_data.csv") 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | defer f.Close() 19 | 20 | // Create a new CSV reader reading from the opened file. 21 | reader := csv.NewReader(f) 22 | reader.FieldsPerRecord = 2 23 | 24 | // Read in all of the CSV records 25 | rawCSVData, err := reader.ReadAll() 26 | if err != nil { 27 | log.Fatal(err) 28 | } 29 | 30 | // Create the output file. 31 | f, err = os.Create("clean_loan_data.csv") 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | defer f.Close() 36 | 37 | // Create a CSV writer. 38 | w := csv.NewWriter(f) 39 | 40 | // Sequentially move the rows writing out the parsed values. 41 | for idx, record := range rawCSVData { 42 | 43 | // Skip the header row. 44 | if idx == 0 { 45 | 46 | // Write the header to the output file. 47 | if err := w.Write([]string{"FICO_score", "class"}); err != nil { 48 | log.Fatal(err) 49 | } 50 | continue 51 | } 52 | 53 | // Initialize a slice to hold our parsed values. 54 | outRecord := make([]string, 2) 55 | 56 | // Parse and normalize the FICO score. 57 | score, err := strconv.ParseFloat(strings.Split(record[0], "-")[0], 64) 58 | if err != nil { 59 | log.Fatal(err) 60 | } 61 | 62 | outRecord[0] = strconv.FormatFloat((score-640.0)/(830.0-640.0), 'f', 4, 64) 63 | 64 | // Parse the Interest rate class. 65 | rate, err := strconv.ParseFloat(strings.TrimSuffix(record[1], "%"), 64) 66 | if err != nil { 67 | log.Fatal(err) 68 | } 69 | 70 | if rate <= 12.0 { 71 | outRecord[1] = "1.0" 72 | 73 | // Write the record to the output file. 74 | if err := w.Write(outRecord); err != nil { 75 | log.Fatal(err) 76 | } 77 | continue 78 | } 79 | 80 | outRecord[1] = "0.0" 81 | 82 | // Write the record to the output file. 83 | if err := w.Write(outRecord); err != nil { 84 | log.Fatal(err) 85 | } 86 | } 87 | 88 | // Write any buffered data to the underlying writer (standard output). 89 | w.Flush() 90 | 91 | if err := w.Error(); err != nil { 92 | log.Fatal(err) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /Chapter07/auto_regressive/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "image/color" 6 | "log" 7 | "os" 8 | "strconv" 9 | 10 | "github.com/gonum/plot" 11 | "github.com/gonum/plot/plotter" 12 | "github.com/gonum/plot/vg" 13 | "github.com/kniren/gota/dataframe" 14 | ) 15 | 16 | func main() { 17 | 18 | // Open the CSV file. 19 | passengersFile, err := os.Open("AirPassengers.csv") 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | defer passengersFile.Close() 24 | 25 | // Create a dataframe from the CSV file. 26 | passengersDF := dataframe.ReadCSV(passengersFile) 27 | 28 | // Extract the number of passengers and time columns 29 | // as slices of floats. 30 | passengerVals := passengersDF.Col("AirPassengers").Float() 31 | timeVals := passengersDF.Col("time").Float() 32 | 33 | // pts will hold the values for plotting. 34 | pts := make(plotter.XYs, passengersDF.Nrow()-1) 35 | 36 | // differenced will hold our differenced values 37 | // that will be output to a new CSV file. 38 | var differenced [][]string 39 | differenced = append(differenced, []string{"time", "differenced_passengers"}) 40 | 41 | // Fill pts with data. 42 | for i := 1; i < len(passengerVals); i++ { 43 | pts[i-1].X = timeVals[i] 44 | pts[i-1].Y = passengerVals[i] - passengerVals[i-1] 45 | differenced = append(differenced, []string{ 46 | strconv.FormatFloat(timeVals[i], 'f', -1, 64), 47 | strconv.FormatFloat(passengerVals[i]-passengerVals[i-1], 'f', -1, 64), 48 | }) 49 | } 50 | 51 | // Create the plot. 52 | p, err := plot.New() 53 | if err != nil { 54 | log.Fatal(err) 55 | } 56 | p.X.Label.Text = "time" 57 | p.Y.Label.Text = "differenced passengers" 58 | p.Add(plotter.NewGrid()) 59 | 60 | // Add the line plot points for the time series. 61 | l, err := plotter.NewLine(pts) 62 | if err != nil { 63 | log.Fatal(err) 64 | } 65 | l.LineStyle.Width = vg.Points(1) 66 | l.LineStyle.Color = color.RGBA{B: 255, A: 255} 67 | 68 | // Save the plot to a PNG file. 69 | p.Add(l) 70 | if err := p.Save(10*vg.Inch, 4*vg.Inch, "diff_passengers_ts.png"); err != nil { 71 | log.Fatal(err) 72 | } 73 | 74 | // Save the differenced data out to a new CSV. 75 | f, err := os.Create("diff_series.csv") 76 | if err != nil { 77 | log.Fatal(err) 78 | } 79 | defer f.Close() 80 | 81 | w := csv.NewWriter(f) 82 | w.WriteAll(differenced) 83 | 84 | if err := w.Error(); err != nil { 85 | log.Fatal(err) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /Chapter06/k-means/example4/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "image/color" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/floats" 9 | "github.com/gonum/plot" 10 | "github.com/gonum/plot/plotter" 11 | "github.com/gonum/plot/vg" 12 | "github.com/kniren/gota/dataframe" 13 | ) 14 | 15 | func main() { 16 | 17 | // Open the driver dataset file. 18 | f, err := os.Open("fleet_data.csv") 19 | if err != nil { 20 | log.Fatal(err) 21 | } 22 | defer f.Close() 23 | 24 | // Create a dataframe from the CSV file. 25 | driverDF := dataframe.ReadCSV(f) 26 | 27 | // Extract the distance column. 28 | yVals := driverDF.Col("Distance_Feature").Float() 29 | 30 | // clusterOne and clusterTwo will hold the values for plotting. 31 | var clusterOne [][]float64 32 | var clusterTwo [][]float64 33 | 34 | // Fill the clusters with data. 35 | for i, xVal := range driverDF.Col("Speeding_Feature").Float() { 36 | distanceOne := floats.Distance([]float64{yVals[i], xVal}, []float64{50.05, 8.83}, 2) 37 | distanceTwo := floats.Distance([]float64{yVals[i], xVal}, []float64{180.02, 18.29}, 2) 38 | if distanceOne < distanceTwo { 39 | clusterOne = append(clusterOne, []float64{xVal, yVals[i]}) 40 | continue 41 | } 42 | clusterTwo = append(clusterTwo, []float64{xVal, yVals[i]}) 43 | } 44 | 45 | // pts* will hold the values for plotting 46 | ptsOne := make(plotter.XYs, len(clusterOne)) 47 | ptsTwo := make(plotter.XYs, len(clusterTwo)) 48 | 49 | // Fill pts with data. 50 | for i, point := range clusterOne { 51 | ptsOne[i].X = point[0] 52 | ptsOne[i].Y = point[1] 53 | } 54 | 55 | for i, point := range clusterTwo { 56 | ptsTwo[i].X = point[0] 57 | ptsTwo[i].Y = point[1] 58 | } 59 | 60 | // Create the plot. 61 | p, err := plot.New() 62 | if err != nil { 63 | log.Fatal(err) 64 | } 65 | p.X.Label.Text = "Speeding" 66 | p.Y.Label.Text = "Distance" 67 | p.Add(plotter.NewGrid()) 68 | 69 | sOne, err := plotter.NewScatter(ptsOne) 70 | if err != nil { 71 | log.Fatal(err) 72 | } 73 | sOne.GlyphStyle.Color = color.RGBA{R: 255, B: 128, A: 255} 74 | sOne.GlyphStyle.Radius = vg.Points(3) 75 | 76 | sTwo, err := plotter.NewScatter(ptsTwo) 77 | if err != nil { 78 | log.Fatal(err) 79 | } 80 | sTwo.GlyphStyle.Color = color.RGBA{B: 255, A: 255} 81 | sTwo.GlyphStyle.Radius = vg.Points(3) 82 | 83 | // Save the plot to a PNG file. 84 | p.Add(sOne, sTwo) 85 | if err := p.Save(4*vg.Inch, 4*vg.Inch, "fleet_data_clusters.png"); err != nil { 86 | log.Fatal(err) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /Chapter07/auto_regressive/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "image/color" 6 | "log" 7 | "math" 8 | "os" 9 | "strconv" 10 | 11 | "github.com/gonum/plot" 12 | "github.com/gonum/plot/plotter" 13 | "github.com/gonum/plot/vg" 14 | "github.com/kniren/gota/dataframe" 15 | ) 16 | 17 | func main() { 18 | 19 | // Open the CSV file. 20 | passengersFile, err := os.Open("AirPassengers.csv") 21 | if err != nil { 22 | log.Fatal(err) 23 | } 24 | defer passengersFile.Close() 25 | 26 | // Create a dataframe from the CSV file. 27 | passengersDF := dataframe.ReadCSV(passengersFile) 28 | 29 | // Extract the number of passengers and time columns 30 | // as slices of floats. 31 | passengerVals := passengersDF.Col("AirPassengers").Float() 32 | timeVals := passengersDF.Col("time").Float() 33 | 34 | // pts will hold the values for plotting. 35 | pts := make(plotter.XYs, passengersDF.Nrow()-1) 36 | 37 | // differenced will hold our differenced values 38 | // that will be output to a new CSV file. 39 | var differenced [][]string 40 | differenced = append(differenced, []string{"time", "log_differenced_passengers"}) 41 | 42 | // Fill pts with data. 43 | for i := 1; i < len(passengerVals); i++ { 44 | pts[i-1].X = timeVals[i] 45 | pts[i-1].Y = math.Log(passengerVals[i]) - math.Log(passengerVals[i-1]) 46 | differenced = append(differenced, []string{ 47 | strconv.FormatFloat(timeVals[i], 'f', -1, 64), 48 | strconv.FormatFloat(math.Log(passengerVals[i])-math.Log(passengerVals[i-1]), 'f', -1, 64), 49 | }) 50 | } 51 | 52 | // Create the plot. 53 | p, err := plot.New() 54 | if err != nil { 55 | log.Fatal(err) 56 | } 57 | p.X.Label.Text = "time" 58 | p.Y.Label.Text = "log(differenced passengers)" 59 | p.Add(plotter.NewGrid()) 60 | 61 | // Add the line plot points for the time series. 62 | l, err := plotter.NewLine(pts) 63 | if err != nil { 64 | log.Fatal(err) 65 | } 66 | l.LineStyle.Width = vg.Points(1) 67 | l.LineStyle.Color = color.RGBA{B: 255, A: 255} 68 | 69 | // Save the plot to a PNG file. 70 | p.Add(l) 71 | if err := p.Save(10*vg.Inch, 4*vg.Inch, "log_diff_passengers_ts.png"); err != nil { 72 | log.Fatal(err) 73 | } 74 | 75 | // Save the differenced data out to a new CSV. 76 | f, err := os.Create("log_diff_series.csv") 77 | if err != nil { 78 | log.Fatal(err) 79 | } 80 | defer f.Close() 81 | 82 | w := csv.NewWriter(f) 83 | w.WriteAll(differenced) 84 | 85 | if err := w.Error(); err != nil { 86 | log.Fatal(err) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /Chapter05/logistic_regression/example7/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "log" 8 | "math" 9 | "os" 10 | "strconv" 11 | ) 12 | 13 | func main() { 14 | 15 | // Open the test examples. 16 | f, err := os.Open("test.csv") 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | defer f.Close() 21 | 22 | // Create a new CSV reader reading from the opened file. 23 | reader := csv.NewReader(f) 24 | 25 | // observed and predicted will hold the parsed observed and predicted values 26 | // form the labeled data file. 27 | var observed []float64 28 | var predicted []float64 29 | 30 | // line will track row numbers for logging. 31 | line := 1 32 | 33 | // Read in the records looking for unexpected types in the columns. 34 | for { 35 | 36 | // Read in a row. Check if we are at the end of the file. 37 | record, err := reader.Read() 38 | if err == io.EOF { 39 | break 40 | } 41 | 42 | // Skip the header. 43 | if line == 1 { 44 | line++ 45 | continue 46 | } 47 | 48 | // Read in the observed value. 49 | observedVal, err := strconv.ParseFloat(record[1], 64) 50 | if err != nil { 51 | log.Printf("Parsing line %d failed, unexpected type\n", line) 52 | continue 53 | } 54 | 55 | // Make the corresponding prediction. 56 | score, err := strconv.ParseFloat(record[0], 64) 57 | if err != nil { 58 | log.Printf("Parsing line %d failed, unexpected type\n", line) 59 | continue 60 | } 61 | 62 | predictedVal := predict(score) 63 | 64 | // Append the record to our slice, if it has the expected type. 65 | observed = append(observed, observedVal) 66 | predicted = append(predicted, predictedVal) 67 | line++ 68 | } 69 | 70 | // This variable will hold our count of true positive and 71 | // true negative values. 72 | var truePosNeg int 73 | 74 | // Accumulate the true positive/negative count. 75 | for idx, oVal := range observed { 76 | if oVal == predicted[idx] { 77 | truePosNeg++ 78 | } 79 | } 80 | 81 | // Calculate the accuracy (subset accuracy). 82 | accuracy := float64(truePosNeg) / float64(len(observed)) 83 | 84 | // Output the Accuracy value to standard out. 85 | fmt.Printf("\nAccuracy = %0.2f\n\n", accuracy) 86 | } 87 | 88 | // predict makes a prediction based on our 89 | // trained logistic regression model. 90 | func predict(score float64) float64 { 91 | 92 | // Calculate the predicted probability. 93 | p := 1 / (1 + math.Exp(-13.65*score+4.89)) 94 | 95 | // Output the corresponding class. 96 | if p >= 0.5 { 97 | return 1.0 98 | } 99 | 100 | return 0.0 101 | } 102 | -------------------------------------------------------------------------------- /Chapter07/statistics/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "math" 6 | "os" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/plotutil" 11 | "github.com/gonum/plot/vg" 12 | "github.com/gonum/stat" 13 | "github.com/kniren/gota/dataframe" 14 | ) 15 | 16 | func main() { 17 | 18 | // Open the CSV file. 19 | passengersFile, err := os.Open("AirPassengers.csv") 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | defer passengersFile.Close() 24 | 25 | // Create a dataframe from the CSV file. 26 | passengersDF := dataframe.ReadCSV(passengersFile) 27 | 28 | // Get the time and passengers as a slice of floats. 29 | passengers := passengersDF.Col("AirPassengers").Float() 30 | 31 | // Create a new plot, to plot our autocorrelations. 32 | p, err := plot.New() 33 | if err != nil { 34 | log.Fatal(err) 35 | } 36 | 37 | p.Title.Text = "Autocorrelations for AirPassengers" 38 | p.X.Label.Text = "Lag" 39 | p.Y.Label.Text = "ACF" 40 | p.Y.Min = 0 41 | p.Y.Max = 1 42 | 43 | w := vg.Points(3) 44 | 45 | // Create the points for plotting. 46 | numLags := 20 47 | pts := make(plotter.Values, numLags) 48 | 49 | // Loop over various values of lag in the series. 50 | for i := 1; i <= numLags; i++ { 51 | 52 | // Calculate the autocorrelation. 53 | pts[i-1] = acf(passengers, i) 54 | } 55 | 56 | // Add the points to the plot. 57 | bars, err := plotter.NewBarChart(pts, w) 58 | if err != nil { 59 | log.Fatal(err) 60 | } 61 | bars.LineStyle.Width = vg.Length(0) 62 | bars.Color = plotutil.Color(1) 63 | 64 | // Save the plot to a PNG file. 65 | p.Add(bars) 66 | if err := p.Save(8*vg.Inch, 4*vg.Inch, "acf.png"); err != nil { 67 | log.Fatal(err) 68 | } 69 | } 70 | 71 | // acf calculates the autocorrelation for a series 72 | // at the given lag. 73 | func acf(x []float64, lag int) float64 { 74 | 75 | // Shift the series. 76 | xAdj := x[lag:len(x)] 77 | xLag := x[0 : len(x)-lag] 78 | 79 | // numerator will hold our accumulated numerator, and 80 | // denominator will hold our accumulated denominator. 81 | var numerator float64 82 | var denominator float64 83 | 84 | // Calculate the mean of our x values, which will be used 85 | // in each term of the autocorrelation. 86 | xBar := stat.Mean(x, nil) 87 | 88 | // Calculate the numerator. 89 | for idx, xVal := range xAdj { 90 | numerator += ((xVal - xBar) * (xLag[idx] - xBar)) 91 | } 92 | 93 | // Calculate the denominator. 94 | for _, xVal := range x { 95 | denominator += math.Pow(xVal-xBar, 2) 96 | } 97 | 98 | return numerator / denominator 99 | } 100 | -------------------------------------------------------------------------------- /Chapter07/auto_regressive/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "math" 6 | "os" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/plotutil" 11 | "github.com/gonum/plot/vg" 12 | "github.com/gonum/stat" 13 | "github.com/kniren/gota/dataframe" 14 | ) 15 | 16 | func main() { 17 | 18 | // Open the CSV file. 19 | passengersFile, err := os.Open("log_diff_series.csv") 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | defer passengersFile.Close() 24 | 25 | // Create a dataframe from the CSV file. 26 | passengersDF := dataframe.ReadCSV(passengersFile) 27 | 28 | // Get the time and passengers as a slice of floats. 29 | passengers := passengersDF.Col("log_differenced_passengers").Float() 30 | 31 | // Create a new plot, to plot our autocorrelations. 32 | p, err := plot.New() 33 | if err != nil { 34 | log.Fatal(err) 35 | } 36 | 37 | p.Title.Text = "Autocorrelations for log(differenced passengers)" 38 | p.X.Label.Text = "Lag" 39 | p.Y.Label.Text = "ACF" 40 | p.Y.Min = 0 41 | p.Y.Max = 1 42 | 43 | w := vg.Points(3) 44 | 45 | // Create the points for plotting. 46 | numLags := 20 47 | pts := make(plotter.Values, numLags) 48 | 49 | // Loop over various values of lag in the series. 50 | for i := 1; i <= numLags; i++ { 51 | 52 | // Calculate the autocorrelation. 53 | pts[i-1] = acf(passengers, i) 54 | } 55 | 56 | // Add the points to the plot. 57 | bars, err := plotter.NewBarChart(pts, w) 58 | if err != nil { 59 | log.Fatal(err) 60 | } 61 | bars.LineStyle.Width = vg.Length(0) 62 | bars.Color = plotutil.Color(1) 63 | 64 | // Save the plot to a PNG file. 65 | p.Add(bars) 66 | if err := p.Save(8*vg.Inch, 4*vg.Inch, "acf.png"); err != nil { 67 | log.Fatal(err) 68 | } 69 | } 70 | 71 | // acf calculates the autocorrelation for a series 72 | // at the given lag. 73 | func acf(x []float64, lag int) float64 { 74 | 75 | // Shift the series. 76 | xAdj := x[lag:len(x)] 77 | xLag := x[0 : len(x)-lag] 78 | 79 | // numerator will hold our accumulated numerator, and 80 | // denominator will hold our accumulated denominator. 81 | var numerator float64 82 | var denominator float64 83 | 84 | // Calculate the mean of our x values, which will be used 85 | // in each term of the autocorrelation. 86 | xBar := stat.Mean(x, nil) 87 | 88 | // Calculate the numerator. 89 | for idx, xVal := range xAdj { 90 | numerator += ((xVal - xBar) * (xLag[idx] - xBar)) 91 | } 92 | 93 | // Calculate the denominator. 94 | for _, xVal := range x { 95 | denominator += math.Pow(xVal-xBar, 2) 96 | } 97 | 98 | return numerator / denominator 99 | } 100 | -------------------------------------------------------------------------------- /Chapter04/non-linear_regression/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "log" 7 | "os" 8 | "strconv" 9 | 10 | "github.com/berkmancenter/ridge" 11 | "github.com/gonum/matrix/mat64" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the training dataset file. 17 | f, err := os.Open("training.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer f.Close() 22 | 23 | // Create a new CSV reader reading from the opened file. 24 | reader := csv.NewReader(f) 25 | reader.FieldsPerRecord = 4 26 | 27 | // Read in all of the CSV records 28 | rawCSVData, err := reader.ReadAll() 29 | if err != nil { 30 | log.Fatal(err) 31 | } 32 | 33 | // featureData will hold all the float values that will eventually be 34 | // used to form our matrix of features. 35 | featureData := make([]float64, 4*len(rawCSVData)) 36 | yData := make([]float64, len(rawCSVData)) 37 | 38 | // featureIndex and yIndex will track the current index of the matrix values. 39 | var featureIndex int 40 | var yIndex int 41 | 42 | // Sequentially move the rows into a slice of floats. 43 | for idx, record := range rawCSVData { 44 | 45 | // Skip the header row. 46 | if idx == 0 { 47 | continue 48 | } 49 | 50 | // Loop over the float columns. 51 | for i, val := range record { 52 | 53 | // Convert the value to a float. 54 | valParsed, err := strconv.ParseFloat(val, 64) 55 | if err != nil { 56 | log.Fatal("Could not parse float value") 57 | } 58 | 59 | if i < 3 { 60 | 61 | // Add an intercept to the model. 62 | if i == 0 { 63 | featureData[featureIndex] = 1 64 | featureIndex++ 65 | } 66 | 67 | // Add the float value to the slice of feature floats. 68 | featureData[featureIndex] = valParsed 69 | featureIndex++ 70 | } 71 | 72 | if i == 3 { 73 | 74 | // Add the float value to the slice of y floats. 75 | yData[yIndex] = valParsed 76 | yIndex++ 77 | } 78 | 79 | } 80 | } 81 | 82 | // Form the matrices that will be input to our regression. 83 | features := mat64.NewDense(len(rawCSVData), 4, featureData) 84 | y := mat64.NewVector(len(rawCSVData), yData) 85 | 86 | // Create a new RidgeRegression value, where 1.0 is the 87 | // penalty value. 88 | r := ridge.New(features, y, 1.0) 89 | 90 | // Train our regression model. 91 | r.Regress() 92 | 93 | // Print our regression formula. 94 | c1 := r.Coefficients.At(0, 0) 95 | c2 := r.Coefficients.At(1, 0) 96 | c3 := r.Coefficients.At(2, 0) 97 | c4 := r.Coefficients.At(3, 0) 98 | fmt.Printf("\nRegression formula:\n") 99 | fmt.Printf("y = %0.3f + %0.3f TV + %0.3f Radio + %0.3f Newspaper\n\n", c1, c2, c3, c4) 100 | } 101 | -------------------------------------------------------------------------------- /Chapter07/statistics/example4/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "strconv" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/plotutil" 11 | "github.com/gonum/plot/vg" 12 | "github.com/kniren/gota/dataframe" 13 | "github.com/sajari/regression" 14 | ) 15 | 16 | func main() { 17 | 18 | // Open the CSV file. 19 | passengersFile, err := os.Open("AirPassengers.csv") 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | defer passengersFile.Close() 24 | 25 | // Create a dataframe from the CSV file. 26 | passengersDF := dataframe.ReadCSV(passengersFile) 27 | 28 | // Get the time and passengers as a slice of floats. 29 | passengers := passengersDF.Col("AirPassengers").Float() 30 | 31 | // Create a new plot, to plot our autocorrelations. 32 | p, err := plot.New() 33 | if err != nil { 34 | log.Fatal(err) 35 | } 36 | 37 | p.Title.Text = "Partial Autocorrelations for AirPassengers" 38 | p.X.Label.Text = "Lag" 39 | p.Y.Label.Text = "PACF" 40 | p.Y.Min = 15 41 | p.Y.Max = -1 42 | 43 | w := vg.Points(3) 44 | 45 | // Create the points for plotting. 46 | numLags := 20 47 | pts := make(plotter.Values, numLags) 48 | 49 | // Loop over various values of lag in the series. 50 | for i := 1; i <= numLags; i++ { 51 | 52 | // Calculate the partial autocorrelation. 53 | pts[i-1] = pacf(passengers, i) 54 | } 55 | 56 | // Add the points to the plot. 57 | bars, err := plotter.NewBarChart(pts, w) 58 | if err != nil { 59 | log.Fatal(err) 60 | } 61 | bars.LineStyle.Width = vg.Length(0) 62 | bars.Color = plotutil.Color(1) 63 | 64 | // Save the plot to a PNG file. 65 | p.Add(bars) 66 | if err := p.Save(8*vg.Inch, 4*vg.Inch, "pacf.png"); err != nil { 67 | log.Fatal(err) 68 | } 69 | } 70 | 71 | // pacf calculates the partial autocorrelation for a series 72 | // at the given lag. 73 | func pacf(x []float64, lag int) float64 { 74 | 75 | // Create a regresssion.Regression value needed to train 76 | // a model using github.com/sajari/regression. 77 | var r regression.Regression 78 | r.SetObserved("x") 79 | 80 | // Define the current lag and all of the intermediate lags. 81 | for i := 0; i < lag; i++ { 82 | r.SetVar(i, "x"+strconv.Itoa(i)) 83 | } 84 | 85 | // Shift the series. 86 | xAdj := x[lag:len(x)] 87 | 88 | // Loop over the series creating the data set 89 | // for the regression. 90 | for i, xVal := range xAdj { 91 | 92 | // Loop over the intermediate lags to build up 93 | // our independent variables. 94 | laggedVariables := make([]float64, lag) 95 | for idx := 1; idx <= lag; idx++ { 96 | 97 | // Get the lagged series variables. 98 | laggedVariables[idx-1] = x[lag+i-idx] 99 | } 100 | 101 | // Add these points to the regression value. 102 | r.Train(regression.DataPoint(xVal, laggedVariables)) 103 | } 104 | 105 | // Fit the regression. 106 | r.Run() 107 | 108 | return r.Coeff(lag) 109 | } 110 | -------------------------------------------------------------------------------- /Chapter07/auto_regressive/example4/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "strconv" 7 | 8 | "github.com/gonum/plot" 9 | "github.com/gonum/plot/plotter" 10 | "github.com/gonum/plot/plotutil" 11 | "github.com/gonum/plot/vg" 12 | "github.com/kniren/gota/dataframe" 13 | "github.com/sajari/regression" 14 | ) 15 | 16 | func main() { 17 | 18 | // Open the CSV file. 19 | passengersFile, err := os.Open("log_diff_series.csv") 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | defer passengersFile.Close() 24 | 25 | // Create a dataframe from the CSV file. 26 | passengersDF := dataframe.ReadCSV(passengersFile) 27 | 28 | // Get the time and passengers as a slice of floats. 29 | passengers := passengersDF.Col("log_differenced_passengers").Float() 30 | 31 | // Create a new plot, to plot our autocorrelations. 32 | p, err := plot.New() 33 | if err != nil { 34 | log.Fatal(err) 35 | } 36 | 37 | p.Title.Text = "Partial Autocorrelations for log(differenced passengers)" 38 | p.X.Label.Text = "Lag" 39 | p.Y.Label.Text = "PACF" 40 | p.Y.Min = 15 41 | p.Y.Max = -1 42 | 43 | w := vg.Points(3) 44 | 45 | // Create the points for plotting. 46 | numLags := 20 47 | pts := make(plotter.Values, numLags) 48 | 49 | // Loop over various values of lag in the series. 50 | for i := 1; i <= numLags; i++ { 51 | 52 | // Calculate the partial autocorrelation. 53 | pts[i-1] = pacf(passengers, i) 54 | } 55 | 56 | // Add the points to the plot. 57 | bars, err := plotter.NewBarChart(pts, w) 58 | if err != nil { 59 | log.Fatal(err) 60 | } 61 | bars.LineStyle.Width = vg.Length(0) 62 | bars.Color = plotutil.Color(1) 63 | 64 | // Save the plot to a PNG file. 65 | p.Add(bars) 66 | if err := p.Save(8*vg.Inch, 4*vg.Inch, "pacf.png"); err != nil { 67 | log.Fatal(err) 68 | } 69 | } 70 | 71 | // pacf calculates the partial autocorrelation for a series 72 | // at the given lag. 73 | func pacf(x []float64, lag int) float64 { 74 | 75 | // Create a regresssion.Regression value needed to train 76 | // a model using github.com/sajari/regression. 77 | var r regression.Regression 78 | r.SetObserved("x") 79 | 80 | // Define the current lag and all of the intermediate lags. 81 | for i := 0; i < lag; i++ { 82 | r.SetVar(i, "x"+strconv.Itoa(i)) 83 | } 84 | 85 | // Shift the series. 86 | xAdj := x[lag:len(x)] 87 | 88 | // Loop over the series creating the data set 89 | // for the regression. 90 | for i, xVal := range xAdj { 91 | 92 | // Loop over the intermediate lags to build up 93 | // our independent variables. 94 | laggedVariables := make([]float64, lag) 95 | for idx := 1; idx <= lag; idx++ { 96 | 97 | // Get the lagged series variables. 98 | laggedVariables[idx-1] = x[lag+i-idx] 99 | } 100 | 101 | // Add these points to the regression value. 102 | r.Train(regression.DataPoint(xVal, laggedVariables)) 103 | } 104 | 105 | // Fit the regression. 106 | r.Run() 107 | 108 | return r.Coeff(lag) 109 | } 110 | -------------------------------------------------------------------------------- /Chapter07/statistics/example1/AirPassengers.csv: -------------------------------------------------------------------------------- 1 | time,AirPassengers 2 | 1949.0,112 3 | 1949.08333333,118 4 | 1949.16666667,132 5 | 1949.25,129 6 | 1949.33333333,121 7 | 1949.41666667,135 8 | 1949.5,148 9 | 1949.58333333,148 10 | 1949.66666667,136 11 | 1949.75,119 12 | 1949.83333333,104 13 | 1949.91666667,118 14 | 1950.0,115 15 | 1950.08333333,126 16 | 1950.16666667,141 17 | 1950.25,135 18 | 1950.33333333,125 19 | 1950.41666667,149 20 | 1950.5,170 21 | 1950.58333333,170 22 | 1950.66666667,158 23 | 1950.75,133 24 | 1950.83333333,114 25 | 1950.91666667,140 26 | 1951.0,145 27 | 1951.08333333,150 28 | 1951.16666667,178 29 | 1951.25,163 30 | 1951.33333333,172 31 | 1951.41666667,178 32 | 1951.5,199 33 | 1951.58333333,199 34 | 1951.66666667,184 35 | 1951.75,162 36 | 1951.83333333,146 37 | 1951.91666667,166 38 | 1952.0,171 39 | 1952.08333333,180 40 | 1952.16666667,193 41 | 1952.25,181 42 | 1952.33333333,183 43 | 1952.41666667,218 44 | 1952.5,230 45 | 1952.58333333,242 46 | 1952.66666667,209 47 | 1952.75,191 48 | 1952.83333333,172 49 | 1952.91666667,194 50 | 1953.0,196 51 | 1953.08333333,196 52 | 1953.16666667,236 53 | 1953.25,235 54 | 1953.33333333,229 55 | 1953.41666667,243 56 | 1953.5,264 57 | 1953.58333333,272 58 | 1953.66666667,237 59 | 1953.75,211 60 | 1953.83333333,180 61 | 1953.91666667,201 62 | 1954.0,204 63 | 1954.08333333,188 64 | 1954.16666667,235 65 | 1954.25,227 66 | 1954.33333333,234 67 | 1954.41666667,264 68 | 1954.5,302 69 | 1954.58333333,293 70 | 1954.66666667,259 71 | 1954.75,229 72 | 1954.83333333,203 73 | 1954.91666667,229 74 | 1955.0,242 75 | 1955.08333333,233 76 | 1955.16666667,267 77 | 1955.25,269 78 | 1955.33333333,270 79 | 1955.41666667,315 80 | 1955.5,364 81 | 1955.58333333,347 82 | 1955.66666667,312 83 | 1955.75,274 84 | 1955.83333333,237 85 | 1955.91666667,278 86 | 1956.0,284 87 | 1956.08333333,277 88 | 1956.16666667,317 89 | 1956.25,313 90 | 1956.33333333,318 91 | 1956.41666667,374 92 | 1956.5,413 93 | 1956.58333333,405 94 | 1956.66666667,355 95 | 1956.75,306 96 | 1956.83333333,271 97 | 1956.91666667,306 98 | 1957.0,315 99 | 1957.08333333,301 100 | 1957.16666667,356 101 | 1957.25,348 102 | 1957.33333333,355 103 | 1957.41666667,422 104 | 1957.5,465 105 | 1957.58333333,467 106 | 1957.66666667,404 107 | 1957.75,347 108 | 1957.83333333,305 109 | 1957.91666667,336 110 | 1958.0,340 111 | 1958.08333333,318 112 | 1958.16666667,362 113 | 1958.25,348 114 | 1958.33333333,363 115 | 1958.41666667,435 116 | 1958.5,491 117 | 1958.58333333,505 118 | 1958.66666667,404 119 | 1958.75,359 120 | 1958.83333333,310 121 | 1958.91666667,337 122 | 1959.0,360 123 | 1959.08333333,342 124 | 1959.16666667,406 125 | 1959.25,396 126 | 1959.33333333,420 127 | 1959.41666667,472 128 | 1959.5,548 129 | 1959.58333333,559 130 | 1959.66666667,463 131 | 1959.75,407 132 | 1959.83333333,362 133 | 1959.91666667,405 134 | 1960.0,417 135 | 1960.08333333,391 136 | 1960.16666667,419 137 | 1960.25,461 138 | 1960.33333333,472 139 | 1960.41666667,535 140 | 1960.5,622 141 | 1960.58333333,606 142 | 1960.66666667,508 143 | 1960.75,461 144 | 1960.83333333,390 145 | 1960.91666667,432 146 | -------------------------------------------------------------------------------- /Chapter07/statistics/example2/AirPassengers.csv: -------------------------------------------------------------------------------- 1 | time,AirPassengers 2 | 1949.0,112 3 | 1949.08333333,118 4 | 1949.16666667,132 5 | 1949.25,129 6 | 1949.33333333,121 7 | 1949.41666667,135 8 | 1949.5,148 9 | 1949.58333333,148 10 | 1949.66666667,136 11 | 1949.75,119 12 | 1949.83333333,104 13 | 1949.91666667,118 14 | 1950.0,115 15 | 1950.08333333,126 16 | 1950.16666667,141 17 | 1950.25,135 18 | 1950.33333333,125 19 | 1950.41666667,149 20 | 1950.5,170 21 | 1950.58333333,170 22 | 1950.66666667,158 23 | 1950.75,133 24 | 1950.83333333,114 25 | 1950.91666667,140 26 | 1951.0,145 27 | 1951.08333333,150 28 | 1951.16666667,178 29 | 1951.25,163 30 | 1951.33333333,172 31 | 1951.41666667,178 32 | 1951.5,199 33 | 1951.58333333,199 34 | 1951.66666667,184 35 | 1951.75,162 36 | 1951.83333333,146 37 | 1951.91666667,166 38 | 1952.0,171 39 | 1952.08333333,180 40 | 1952.16666667,193 41 | 1952.25,181 42 | 1952.33333333,183 43 | 1952.41666667,218 44 | 1952.5,230 45 | 1952.58333333,242 46 | 1952.66666667,209 47 | 1952.75,191 48 | 1952.83333333,172 49 | 1952.91666667,194 50 | 1953.0,196 51 | 1953.08333333,196 52 | 1953.16666667,236 53 | 1953.25,235 54 | 1953.33333333,229 55 | 1953.41666667,243 56 | 1953.5,264 57 | 1953.58333333,272 58 | 1953.66666667,237 59 | 1953.75,211 60 | 1953.83333333,180 61 | 1953.91666667,201 62 | 1954.0,204 63 | 1954.08333333,188 64 | 1954.16666667,235 65 | 1954.25,227 66 | 1954.33333333,234 67 | 1954.41666667,264 68 | 1954.5,302 69 | 1954.58333333,293 70 | 1954.66666667,259 71 | 1954.75,229 72 | 1954.83333333,203 73 | 1954.91666667,229 74 | 1955.0,242 75 | 1955.08333333,233 76 | 1955.16666667,267 77 | 1955.25,269 78 | 1955.33333333,270 79 | 1955.41666667,315 80 | 1955.5,364 81 | 1955.58333333,347 82 | 1955.66666667,312 83 | 1955.75,274 84 | 1955.83333333,237 85 | 1955.91666667,278 86 | 1956.0,284 87 | 1956.08333333,277 88 | 1956.16666667,317 89 | 1956.25,313 90 | 1956.33333333,318 91 | 1956.41666667,374 92 | 1956.5,413 93 | 1956.58333333,405 94 | 1956.66666667,355 95 | 1956.75,306 96 | 1956.83333333,271 97 | 1956.91666667,306 98 | 1957.0,315 99 | 1957.08333333,301 100 | 1957.16666667,356 101 | 1957.25,348 102 | 1957.33333333,355 103 | 1957.41666667,422 104 | 1957.5,465 105 | 1957.58333333,467 106 | 1957.66666667,404 107 | 1957.75,347 108 | 1957.83333333,305 109 | 1957.91666667,336 110 | 1958.0,340 111 | 1958.08333333,318 112 | 1958.16666667,362 113 | 1958.25,348 114 | 1958.33333333,363 115 | 1958.41666667,435 116 | 1958.5,491 117 | 1958.58333333,505 118 | 1958.66666667,404 119 | 1958.75,359 120 | 1958.83333333,310 121 | 1958.91666667,337 122 | 1959.0,360 123 | 1959.08333333,342 124 | 1959.16666667,406 125 | 1959.25,396 126 | 1959.33333333,420 127 | 1959.41666667,472 128 | 1959.5,548 129 | 1959.58333333,559 130 | 1959.66666667,463 131 | 1959.75,407 132 | 1959.83333333,362 133 | 1959.91666667,405 134 | 1960.0,417 135 | 1960.08333333,391 136 | 1960.16666667,419 137 | 1960.25,461 138 | 1960.33333333,472 139 | 1960.41666667,535 140 | 1960.5,622 141 | 1960.58333333,606 142 | 1960.66666667,508 143 | 1960.75,461 144 | 1960.83333333,390 145 | 1960.91666667,432 146 | -------------------------------------------------------------------------------- /Chapter07/statistics/example3/AirPassengers.csv: -------------------------------------------------------------------------------- 1 | time,AirPassengers 2 | 1949.0,112 3 | 1949.08333333,118 4 | 1949.16666667,132 5 | 1949.25,129 6 | 1949.33333333,121 7 | 1949.41666667,135 8 | 1949.5,148 9 | 1949.58333333,148 10 | 1949.66666667,136 11 | 1949.75,119 12 | 1949.83333333,104 13 | 1949.91666667,118 14 | 1950.0,115 15 | 1950.08333333,126 16 | 1950.16666667,141 17 | 1950.25,135 18 | 1950.33333333,125 19 | 1950.41666667,149 20 | 1950.5,170 21 | 1950.58333333,170 22 | 1950.66666667,158 23 | 1950.75,133 24 | 1950.83333333,114 25 | 1950.91666667,140 26 | 1951.0,145 27 | 1951.08333333,150 28 | 1951.16666667,178 29 | 1951.25,163 30 | 1951.33333333,172 31 | 1951.41666667,178 32 | 1951.5,199 33 | 1951.58333333,199 34 | 1951.66666667,184 35 | 1951.75,162 36 | 1951.83333333,146 37 | 1951.91666667,166 38 | 1952.0,171 39 | 1952.08333333,180 40 | 1952.16666667,193 41 | 1952.25,181 42 | 1952.33333333,183 43 | 1952.41666667,218 44 | 1952.5,230 45 | 1952.58333333,242 46 | 1952.66666667,209 47 | 1952.75,191 48 | 1952.83333333,172 49 | 1952.91666667,194 50 | 1953.0,196 51 | 1953.08333333,196 52 | 1953.16666667,236 53 | 1953.25,235 54 | 1953.33333333,229 55 | 1953.41666667,243 56 | 1953.5,264 57 | 1953.58333333,272 58 | 1953.66666667,237 59 | 1953.75,211 60 | 1953.83333333,180 61 | 1953.91666667,201 62 | 1954.0,204 63 | 1954.08333333,188 64 | 1954.16666667,235 65 | 1954.25,227 66 | 1954.33333333,234 67 | 1954.41666667,264 68 | 1954.5,302 69 | 1954.58333333,293 70 | 1954.66666667,259 71 | 1954.75,229 72 | 1954.83333333,203 73 | 1954.91666667,229 74 | 1955.0,242 75 | 1955.08333333,233 76 | 1955.16666667,267 77 | 1955.25,269 78 | 1955.33333333,270 79 | 1955.41666667,315 80 | 1955.5,364 81 | 1955.58333333,347 82 | 1955.66666667,312 83 | 1955.75,274 84 | 1955.83333333,237 85 | 1955.91666667,278 86 | 1956.0,284 87 | 1956.08333333,277 88 | 1956.16666667,317 89 | 1956.25,313 90 | 1956.33333333,318 91 | 1956.41666667,374 92 | 1956.5,413 93 | 1956.58333333,405 94 | 1956.66666667,355 95 | 1956.75,306 96 | 1956.83333333,271 97 | 1956.91666667,306 98 | 1957.0,315 99 | 1957.08333333,301 100 | 1957.16666667,356 101 | 1957.25,348 102 | 1957.33333333,355 103 | 1957.41666667,422 104 | 1957.5,465 105 | 1957.58333333,467 106 | 1957.66666667,404 107 | 1957.75,347 108 | 1957.83333333,305 109 | 1957.91666667,336 110 | 1958.0,340 111 | 1958.08333333,318 112 | 1958.16666667,362 113 | 1958.25,348 114 | 1958.33333333,363 115 | 1958.41666667,435 116 | 1958.5,491 117 | 1958.58333333,505 118 | 1958.66666667,404 119 | 1958.75,359 120 | 1958.83333333,310 121 | 1958.91666667,337 122 | 1959.0,360 123 | 1959.08333333,342 124 | 1959.16666667,406 125 | 1959.25,396 126 | 1959.33333333,420 127 | 1959.41666667,472 128 | 1959.5,548 129 | 1959.58333333,559 130 | 1959.66666667,463 131 | 1959.75,407 132 | 1959.83333333,362 133 | 1959.91666667,405 134 | 1960.0,417 135 | 1960.08333333,391 136 | 1960.16666667,419 137 | 1960.25,461 138 | 1960.33333333,472 139 | 1960.41666667,535 140 | 1960.5,622 141 | 1960.58333333,606 142 | 1960.66666667,508 143 | 1960.75,461 144 | 1960.83333333,390 145 | 1960.91666667,432 146 | -------------------------------------------------------------------------------- /Chapter07/statistics/example4/AirPassengers.csv: -------------------------------------------------------------------------------- 1 | time,AirPassengers 2 | 1949.0,112 3 | 1949.08333333,118 4 | 1949.16666667,132 5 | 1949.25,129 6 | 1949.33333333,121 7 | 1949.41666667,135 8 | 1949.5,148 9 | 1949.58333333,148 10 | 1949.66666667,136 11 | 1949.75,119 12 | 1949.83333333,104 13 | 1949.91666667,118 14 | 1950.0,115 15 | 1950.08333333,126 16 | 1950.16666667,141 17 | 1950.25,135 18 | 1950.33333333,125 19 | 1950.41666667,149 20 | 1950.5,170 21 | 1950.58333333,170 22 | 1950.66666667,158 23 | 1950.75,133 24 | 1950.83333333,114 25 | 1950.91666667,140 26 | 1951.0,145 27 | 1951.08333333,150 28 | 1951.16666667,178 29 | 1951.25,163 30 | 1951.33333333,172 31 | 1951.41666667,178 32 | 1951.5,199 33 | 1951.58333333,199 34 | 1951.66666667,184 35 | 1951.75,162 36 | 1951.83333333,146 37 | 1951.91666667,166 38 | 1952.0,171 39 | 1952.08333333,180 40 | 1952.16666667,193 41 | 1952.25,181 42 | 1952.33333333,183 43 | 1952.41666667,218 44 | 1952.5,230 45 | 1952.58333333,242 46 | 1952.66666667,209 47 | 1952.75,191 48 | 1952.83333333,172 49 | 1952.91666667,194 50 | 1953.0,196 51 | 1953.08333333,196 52 | 1953.16666667,236 53 | 1953.25,235 54 | 1953.33333333,229 55 | 1953.41666667,243 56 | 1953.5,264 57 | 1953.58333333,272 58 | 1953.66666667,237 59 | 1953.75,211 60 | 1953.83333333,180 61 | 1953.91666667,201 62 | 1954.0,204 63 | 1954.08333333,188 64 | 1954.16666667,235 65 | 1954.25,227 66 | 1954.33333333,234 67 | 1954.41666667,264 68 | 1954.5,302 69 | 1954.58333333,293 70 | 1954.66666667,259 71 | 1954.75,229 72 | 1954.83333333,203 73 | 1954.91666667,229 74 | 1955.0,242 75 | 1955.08333333,233 76 | 1955.16666667,267 77 | 1955.25,269 78 | 1955.33333333,270 79 | 1955.41666667,315 80 | 1955.5,364 81 | 1955.58333333,347 82 | 1955.66666667,312 83 | 1955.75,274 84 | 1955.83333333,237 85 | 1955.91666667,278 86 | 1956.0,284 87 | 1956.08333333,277 88 | 1956.16666667,317 89 | 1956.25,313 90 | 1956.33333333,318 91 | 1956.41666667,374 92 | 1956.5,413 93 | 1956.58333333,405 94 | 1956.66666667,355 95 | 1956.75,306 96 | 1956.83333333,271 97 | 1956.91666667,306 98 | 1957.0,315 99 | 1957.08333333,301 100 | 1957.16666667,356 101 | 1957.25,348 102 | 1957.33333333,355 103 | 1957.41666667,422 104 | 1957.5,465 105 | 1957.58333333,467 106 | 1957.66666667,404 107 | 1957.75,347 108 | 1957.83333333,305 109 | 1957.91666667,336 110 | 1958.0,340 111 | 1958.08333333,318 112 | 1958.16666667,362 113 | 1958.25,348 114 | 1958.33333333,363 115 | 1958.41666667,435 116 | 1958.5,491 117 | 1958.58333333,505 118 | 1958.66666667,404 119 | 1958.75,359 120 | 1958.83333333,310 121 | 1958.91666667,337 122 | 1959.0,360 123 | 1959.08333333,342 124 | 1959.16666667,406 125 | 1959.25,396 126 | 1959.33333333,420 127 | 1959.41666667,472 128 | 1959.5,548 129 | 1959.58333333,559 130 | 1959.66666667,463 131 | 1959.75,407 132 | 1959.83333333,362 133 | 1959.91666667,405 134 | 1960.0,417 135 | 1960.08333333,391 136 | 1960.16666667,419 137 | 1960.25,461 138 | 1960.33333333,472 139 | 1960.41666667,535 140 | 1960.5,622 141 | 1960.58333333,606 142 | 1960.66666667,508 143 | 1960.75,461 144 | 1960.83333333,390 145 | 1960.91666667,432 146 | -------------------------------------------------------------------------------- /Chapter07/auto_regressive/example1/AirPassengers.csv: -------------------------------------------------------------------------------- 1 | time,AirPassengers 2 | 1949.0,112 3 | 1949.08333333,118 4 | 1949.16666667,132 5 | 1949.25,129 6 | 1949.33333333,121 7 | 1949.41666667,135 8 | 1949.5,148 9 | 1949.58333333,148 10 | 1949.66666667,136 11 | 1949.75,119 12 | 1949.83333333,104 13 | 1949.91666667,118 14 | 1950.0,115 15 | 1950.08333333,126 16 | 1950.16666667,141 17 | 1950.25,135 18 | 1950.33333333,125 19 | 1950.41666667,149 20 | 1950.5,170 21 | 1950.58333333,170 22 | 1950.66666667,158 23 | 1950.75,133 24 | 1950.83333333,114 25 | 1950.91666667,140 26 | 1951.0,145 27 | 1951.08333333,150 28 | 1951.16666667,178 29 | 1951.25,163 30 | 1951.33333333,172 31 | 1951.41666667,178 32 | 1951.5,199 33 | 1951.58333333,199 34 | 1951.66666667,184 35 | 1951.75,162 36 | 1951.83333333,146 37 | 1951.91666667,166 38 | 1952.0,171 39 | 1952.08333333,180 40 | 1952.16666667,193 41 | 1952.25,181 42 | 1952.33333333,183 43 | 1952.41666667,218 44 | 1952.5,230 45 | 1952.58333333,242 46 | 1952.66666667,209 47 | 1952.75,191 48 | 1952.83333333,172 49 | 1952.91666667,194 50 | 1953.0,196 51 | 1953.08333333,196 52 | 1953.16666667,236 53 | 1953.25,235 54 | 1953.33333333,229 55 | 1953.41666667,243 56 | 1953.5,264 57 | 1953.58333333,272 58 | 1953.66666667,237 59 | 1953.75,211 60 | 1953.83333333,180 61 | 1953.91666667,201 62 | 1954.0,204 63 | 1954.08333333,188 64 | 1954.16666667,235 65 | 1954.25,227 66 | 1954.33333333,234 67 | 1954.41666667,264 68 | 1954.5,302 69 | 1954.58333333,293 70 | 1954.66666667,259 71 | 1954.75,229 72 | 1954.83333333,203 73 | 1954.91666667,229 74 | 1955.0,242 75 | 1955.08333333,233 76 | 1955.16666667,267 77 | 1955.25,269 78 | 1955.33333333,270 79 | 1955.41666667,315 80 | 1955.5,364 81 | 1955.58333333,347 82 | 1955.66666667,312 83 | 1955.75,274 84 | 1955.83333333,237 85 | 1955.91666667,278 86 | 1956.0,284 87 | 1956.08333333,277 88 | 1956.16666667,317 89 | 1956.25,313 90 | 1956.33333333,318 91 | 1956.41666667,374 92 | 1956.5,413 93 | 1956.58333333,405 94 | 1956.66666667,355 95 | 1956.75,306 96 | 1956.83333333,271 97 | 1956.91666667,306 98 | 1957.0,315 99 | 1957.08333333,301 100 | 1957.16666667,356 101 | 1957.25,348 102 | 1957.33333333,355 103 | 1957.41666667,422 104 | 1957.5,465 105 | 1957.58333333,467 106 | 1957.66666667,404 107 | 1957.75,347 108 | 1957.83333333,305 109 | 1957.91666667,336 110 | 1958.0,340 111 | 1958.08333333,318 112 | 1958.16666667,362 113 | 1958.25,348 114 | 1958.33333333,363 115 | 1958.41666667,435 116 | 1958.5,491 117 | 1958.58333333,505 118 | 1958.66666667,404 119 | 1958.75,359 120 | 1958.83333333,310 121 | 1958.91666667,337 122 | 1959.0,360 123 | 1959.08333333,342 124 | 1959.16666667,406 125 | 1959.25,396 126 | 1959.33333333,420 127 | 1959.41666667,472 128 | 1959.5,548 129 | 1959.58333333,559 130 | 1959.66666667,463 131 | 1959.75,407 132 | 1959.83333333,362 133 | 1959.91666667,405 134 | 1960.0,417 135 | 1960.08333333,391 136 | 1960.16666667,419 137 | 1960.25,461 138 | 1960.33333333,472 139 | 1960.41666667,535 140 | 1960.5,622 141 | 1960.58333333,606 142 | 1960.66666667,508 143 | 1960.75,461 144 | 1960.83333333,390 145 | 1960.91666667,432 146 | -------------------------------------------------------------------------------- /Chapter07/auto_regressive/example2/AirPassengers.csv: -------------------------------------------------------------------------------- 1 | time,AirPassengers 2 | 1949.0,112 3 | 1949.08333333,118 4 | 1949.16666667,132 5 | 1949.25,129 6 | 1949.33333333,121 7 | 1949.41666667,135 8 | 1949.5,148 9 | 1949.58333333,148 10 | 1949.66666667,136 11 | 1949.75,119 12 | 1949.83333333,104 13 | 1949.91666667,118 14 | 1950.0,115 15 | 1950.08333333,126 16 | 1950.16666667,141 17 | 1950.25,135 18 | 1950.33333333,125 19 | 1950.41666667,149 20 | 1950.5,170 21 | 1950.58333333,170 22 | 1950.66666667,158 23 | 1950.75,133 24 | 1950.83333333,114 25 | 1950.91666667,140 26 | 1951.0,145 27 | 1951.08333333,150 28 | 1951.16666667,178 29 | 1951.25,163 30 | 1951.33333333,172 31 | 1951.41666667,178 32 | 1951.5,199 33 | 1951.58333333,199 34 | 1951.66666667,184 35 | 1951.75,162 36 | 1951.83333333,146 37 | 1951.91666667,166 38 | 1952.0,171 39 | 1952.08333333,180 40 | 1952.16666667,193 41 | 1952.25,181 42 | 1952.33333333,183 43 | 1952.41666667,218 44 | 1952.5,230 45 | 1952.58333333,242 46 | 1952.66666667,209 47 | 1952.75,191 48 | 1952.83333333,172 49 | 1952.91666667,194 50 | 1953.0,196 51 | 1953.08333333,196 52 | 1953.16666667,236 53 | 1953.25,235 54 | 1953.33333333,229 55 | 1953.41666667,243 56 | 1953.5,264 57 | 1953.58333333,272 58 | 1953.66666667,237 59 | 1953.75,211 60 | 1953.83333333,180 61 | 1953.91666667,201 62 | 1954.0,204 63 | 1954.08333333,188 64 | 1954.16666667,235 65 | 1954.25,227 66 | 1954.33333333,234 67 | 1954.41666667,264 68 | 1954.5,302 69 | 1954.58333333,293 70 | 1954.66666667,259 71 | 1954.75,229 72 | 1954.83333333,203 73 | 1954.91666667,229 74 | 1955.0,242 75 | 1955.08333333,233 76 | 1955.16666667,267 77 | 1955.25,269 78 | 1955.33333333,270 79 | 1955.41666667,315 80 | 1955.5,364 81 | 1955.58333333,347 82 | 1955.66666667,312 83 | 1955.75,274 84 | 1955.83333333,237 85 | 1955.91666667,278 86 | 1956.0,284 87 | 1956.08333333,277 88 | 1956.16666667,317 89 | 1956.25,313 90 | 1956.33333333,318 91 | 1956.41666667,374 92 | 1956.5,413 93 | 1956.58333333,405 94 | 1956.66666667,355 95 | 1956.75,306 96 | 1956.83333333,271 97 | 1956.91666667,306 98 | 1957.0,315 99 | 1957.08333333,301 100 | 1957.16666667,356 101 | 1957.25,348 102 | 1957.33333333,355 103 | 1957.41666667,422 104 | 1957.5,465 105 | 1957.58333333,467 106 | 1957.66666667,404 107 | 1957.75,347 108 | 1957.83333333,305 109 | 1957.91666667,336 110 | 1958.0,340 111 | 1958.08333333,318 112 | 1958.16666667,362 113 | 1958.25,348 114 | 1958.33333333,363 115 | 1958.41666667,435 116 | 1958.5,491 117 | 1958.58333333,505 118 | 1958.66666667,404 119 | 1958.75,359 120 | 1958.83333333,310 121 | 1958.91666667,337 122 | 1959.0,360 123 | 1959.08333333,342 124 | 1959.16666667,406 125 | 1959.25,396 126 | 1959.33333333,420 127 | 1959.41666667,472 128 | 1959.5,548 129 | 1959.58333333,559 130 | 1959.66666667,463 131 | 1959.75,407 132 | 1959.83333333,362 133 | 1959.91666667,405 134 | 1960.0,417 135 | 1960.08333333,391 136 | 1960.16666667,419 137 | 1960.25,461 138 | 1960.33333333,472 139 | 1960.41666667,535 140 | 1960.5,622 141 | 1960.58333333,606 142 | 1960.66666667,508 143 | 1960.75,461 144 | 1960.83333333,390 145 | 1960.91666667,432 146 | -------------------------------------------------------------------------------- /Chapter07/auto_regressive/example6/AirPassengers.csv: -------------------------------------------------------------------------------- 1 | time,AirPassengers 2 | 1949.0,112 3 | 1949.08333333,118 4 | 1949.16666667,132 5 | 1949.25,129 6 | 1949.33333333,121 7 | 1949.41666667,135 8 | 1949.5,148 9 | 1949.58333333,148 10 | 1949.66666667,136 11 | 1949.75,119 12 | 1949.83333333,104 13 | 1949.91666667,118 14 | 1950.0,115 15 | 1950.08333333,126 16 | 1950.16666667,141 17 | 1950.25,135 18 | 1950.33333333,125 19 | 1950.41666667,149 20 | 1950.5,170 21 | 1950.58333333,170 22 | 1950.66666667,158 23 | 1950.75,133 24 | 1950.83333333,114 25 | 1950.91666667,140 26 | 1951.0,145 27 | 1951.08333333,150 28 | 1951.16666667,178 29 | 1951.25,163 30 | 1951.33333333,172 31 | 1951.41666667,178 32 | 1951.5,199 33 | 1951.58333333,199 34 | 1951.66666667,184 35 | 1951.75,162 36 | 1951.83333333,146 37 | 1951.91666667,166 38 | 1952.0,171 39 | 1952.08333333,180 40 | 1952.16666667,193 41 | 1952.25,181 42 | 1952.33333333,183 43 | 1952.41666667,218 44 | 1952.5,230 45 | 1952.58333333,242 46 | 1952.66666667,209 47 | 1952.75,191 48 | 1952.83333333,172 49 | 1952.91666667,194 50 | 1953.0,196 51 | 1953.08333333,196 52 | 1953.16666667,236 53 | 1953.25,235 54 | 1953.33333333,229 55 | 1953.41666667,243 56 | 1953.5,264 57 | 1953.58333333,272 58 | 1953.66666667,237 59 | 1953.75,211 60 | 1953.83333333,180 61 | 1953.91666667,201 62 | 1954.0,204 63 | 1954.08333333,188 64 | 1954.16666667,235 65 | 1954.25,227 66 | 1954.33333333,234 67 | 1954.41666667,264 68 | 1954.5,302 69 | 1954.58333333,293 70 | 1954.66666667,259 71 | 1954.75,229 72 | 1954.83333333,203 73 | 1954.91666667,229 74 | 1955.0,242 75 | 1955.08333333,233 76 | 1955.16666667,267 77 | 1955.25,269 78 | 1955.33333333,270 79 | 1955.41666667,315 80 | 1955.5,364 81 | 1955.58333333,347 82 | 1955.66666667,312 83 | 1955.75,274 84 | 1955.83333333,237 85 | 1955.91666667,278 86 | 1956.0,284 87 | 1956.08333333,277 88 | 1956.16666667,317 89 | 1956.25,313 90 | 1956.33333333,318 91 | 1956.41666667,374 92 | 1956.5,413 93 | 1956.58333333,405 94 | 1956.66666667,355 95 | 1956.75,306 96 | 1956.83333333,271 97 | 1956.91666667,306 98 | 1957.0,315 99 | 1957.08333333,301 100 | 1957.16666667,356 101 | 1957.25,348 102 | 1957.33333333,355 103 | 1957.41666667,422 104 | 1957.5,465 105 | 1957.58333333,467 106 | 1957.66666667,404 107 | 1957.75,347 108 | 1957.83333333,305 109 | 1957.91666667,336 110 | 1958.0,340 111 | 1958.08333333,318 112 | 1958.16666667,362 113 | 1958.25,348 114 | 1958.33333333,363 115 | 1958.41666667,435 116 | 1958.5,491 117 | 1958.58333333,505 118 | 1958.66666667,404 119 | 1958.75,359 120 | 1958.83333333,310 121 | 1958.91666667,337 122 | 1959.0,360 123 | 1959.08333333,342 124 | 1959.16666667,406 125 | 1959.25,396 126 | 1959.33333333,420 127 | 1959.41666667,472 128 | 1959.5,548 129 | 1959.58333333,559 130 | 1959.66666667,463 131 | 1959.75,407 132 | 1959.83333333,362 133 | 1959.91666667,405 134 | 1960.0,417 135 | 1960.08333333,391 136 | 1960.16666667,419 137 | 1960.25,461 138 | 1960.33333333,472 139 | 1960.41666667,535 140 | 1960.5,622 141 | 1960.58333333,606 142 | 1960.66666667,508 143 | 1960.75,461 144 | 1960.83333333,390 145 | 1960.91666667,432 146 | -------------------------------------------------------------------------------- /Chapter03/evaluation/categorical_metrics/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "log" 8 | "os" 9 | "strconv" 10 | ) 11 | 12 | func main() { 13 | 14 | // Open the labeled observations and predictions. 15 | f, err := os.Open("labeled.csv") 16 | if err != nil { 17 | log.Fatal(err) 18 | } 19 | defer f.Close() 20 | 21 | // Create a new CSV reader reading from the opened file. 22 | reader := csv.NewReader(f) 23 | 24 | // observed and predicted will hold the parsed observed and predicted values 25 | // form the labeled data file. 26 | var observed []int 27 | var predicted []int 28 | 29 | // line will track row numbers for logging. 30 | line := 1 31 | 32 | // Read in the records looking for unexpected types in the columns. 33 | for { 34 | 35 | // Read in a row. Check if we are at the end of the file. 36 | record, err := reader.Read() 37 | if err == io.EOF { 38 | break 39 | } 40 | 41 | // Skip the header. 42 | if line == 1 { 43 | line++ 44 | continue 45 | } 46 | 47 | // Read in the observed and predicted values. 48 | observedVal, err := strconv.Atoi(record[0]) 49 | if err != nil { 50 | log.Printf("Parsing line %d failed, unexpected type\n", line) 51 | continue 52 | } 53 | 54 | predictedVal, err := strconv.Atoi(record[1]) 55 | if err != nil { 56 | log.Printf("Parsing line %d failed, unexpected type\n", line) 57 | continue 58 | } 59 | 60 | // Append the record to our slice, if it has the expected type. 61 | observed = append(observed, observedVal) 62 | predicted = append(predicted, predictedVal) 63 | line++ 64 | } 65 | 66 | // classes contains the three possible classes in the labeled data. 67 | classes := []int{0, 1, 2} 68 | 69 | // Loop over each class. 70 | for _, class := range classes { 71 | 72 | // These variables will hold our count of true positives and 73 | // our count of false positives. 74 | var truePos int 75 | var falsePos int 76 | var falseNeg int 77 | 78 | // Accumulate the true positive and false positive counts. 79 | for idx, oVal := range observed { 80 | 81 | switch oVal { 82 | 83 | // If the observed value is the relevant class, we should check to 84 | // see if we predicted that class. 85 | case class: 86 | if predicted[idx] == class { 87 | truePos++ 88 | continue 89 | } 90 | 91 | falseNeg++ 92 | 93 | // If the observed value is a different class, we should check to see 94 | // if we predicted a false positive. 95 | default: 96 | if predicted[idx] == class { 97 | falsePos++ 98 | } 99 | } 100 | } 101 | 102 | // Calculate the precision. 103 | precision := float64(truePos) / float64(truePos+falsePos) 104 | 105 | // Calculate the recall. 106 | recall := float64(truePos) / float64(truePos+falseNeg) 107 | 108 | // Output the precision value to standard out. 109 | fmt.Printf("\nPrecision (class %d) = %0.2f", class, precision) 110 | fmt.Printf("\nRecall (class %d) = %0.2f\n\n", class, recall) 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /Chapter07/representing_time_series/example1/AirPassengers.csv: -------------------------------------------------------------------------------- 1 | time,AirPassengers 2 | 1949.0,112 3 | 1949.08333333,118 4 | 1949.16666667,132 5 | 1949.25,129 6 | 1949.33333333,121 7 | 1949.41666667,135 8 | 1949.5,148 9 | 1949.58333333,148 10 | 1949.66666667,136 11 | 1949.75,119 12 | 1949.83333333,104 13 | 1949.91666667,118 14 | 1950.0,115 15 | 1950.08333333,126 16 | 1950.16666667,141 17 | 1950.25,135 18 | 1950.33333333,125 19 | 1950.41666667,149 20 | 1950.5,170 21 | 1950.58333333,170 22 | 1950.66666667,158 23 | 1950.75,133 24 | 1950.83333333,114 25 | 1950.91666667,140 26 | 1951.0,145 27 | 1951.08333333,150 28 | 1951.16666667,178 29 | 1951.25,163 30 | 1951.33333333,172 31 | 1951.41666667,178 32 | 1951.5,199 33 | 1951.58333333,199 34 | 1951.66666667,184 35 | 1951.75,162 36 | 1951.83333333,146 37 | 1951.91666667,166 38 | 1952.0,171 39 | 1952.08333333,180 40 | 1952.16666667,193 41 | 1952.25,181 42 | 1952.33333333,183 43 | 1952.41666667,218 44 | 1952.5,230 45 | 1952.58333333,242 46 | 1952.66666667,209 47 | 1952.75,191 48 | 1952.83333333,172 49 | 1952.91666667,194 50 | 1953.0,196 51 | 1953.08333333,196 52 | 1953.16666667,236 53 | 1953.25,235 54 | 1953.33333333,229 55 | 1953.41666667,243 56 | 1953.5,264 57 | 1953.58333333,272 58 | 1953.66666667,237 59 | 1953.75,211 60 | 1953.83333333,180 61 | 1953.91666667,201 62 | 1954.0,204 63 | 1954.08333333,188 64 | 1954.16666667,235 65 | 1954.25,227 66 | 1954.33333333,234 67 | 1954.41666667,264 68 | 1954.5,302 69 | 1954.58333333,293 70 | 1954.66666667,259 71 | 1954.75,229 72 | 1954.83333333,203 73 | 1954.91666667,229 74 | 1955.0,242 75 | 1955.08333333,233 76 | 1955.16666667,267 77 | 1955.25,269 78 | 1955.33333333,270 79 | 1955.41666667,315 80 | 1955.5,364 81 | 1955.58333333,347 82 | 1955.66666667,312 83 | 1955.75,274 84 | 1955.83333333,237 85 | 1955.91666667,278 86 | 1956.0,284 87 | 1956.08333333,277 88 | 1956.16666667,317 89 | 1956.25,313 90 | 1956.33333333,318 91 | 1956.41666667,374 92 | 1956.5,413 93 | 1956.58333333,405 94 | 1956.66666667,355 95 | 1956.75,306 96 | 1956.83333333,271 97 | 1956.91666667,306 98 | 1957.0,315 99 | 1957.08333333,301 100 | 1957.16666667,356 101 | 1957.25,348 102 | 1957.33333333,355 103 | 1957.41666667,422 104 | 1957.5,465 105 | 1957.58333333,467 106 | 1957.66666667,404 107 | 1957.75,347 108 | 1957.83333333,305 109 | 1957.91666667,336 110 | 1958.0,340 111 | 1958.08333333,318 112 | 1958.16666667,362 113 | 1958.25,348 114 | 1958.33333333,363 115 | 1958.41666667,435 116 | 1958.5,491 117 | 1958.58333333,505 118 | 1958.66666667,404 119 | 1958.75,359 120 | 1958.83333333,310 121 | 1958.91666667,337 122 | 1959.0,360 123 | 1959.08333333,342 124 | 1959.16666667,406 125 | 1959.25,396 126 | 1959.33333333,420 127 | 1959.41666667,472 128 | 1959.5,548 129 | 1959.58333333,559 130 | 1959.66666667,463 131 | 1959.75,407 132 | 1959.83333333,362 133 | 1959.91666667,405 134 | 1960.0,417 135 | 1960.08333333,391 136 | 1960.16666667,419 137 | 1960.25,461 138 | 1960.33333333,472 139 | 1960.41666667,535 140 | 1960.5,622 141 | 1960.58333333,606 142 | 1960.66666667,508 143 | 1960.75,461 144 | 1960.83333333,390 145 | 1960.91666667,432 146 | -------------------------------------------------------------------------------- /Chapter07/representing_time_series/example2/AirPassengers.csv: -------------------------------------------------------------------------------- 1 | time,AirPassengers 2 | 1949.0,112 3 | 1949.08333333,118 4 | 1949.16666667,132 5 | 1949.25,129 6 | 1949.33333333,121 7 | 1949.41666667,135 8 | 1949.5,148 9 | 1949.58333333,148 10 | 1949.66666667,136 11 | 1949.75,119 12 | 1949.83333333,104 13 | 1949.91666667,118 14 | 1950.0,115 15 | 1950.08333333,126 16 | 1950.16666667,141 17 | 1950.25,135 18 | 1950.33333333,125 19 | 1950.41666667,149 20 | 1950.5,170 21 | 1950.58333333,170 22 | 1950.66666667,158 23 | 1950.75,133 24 | 1950.83333333,114 25 | 1950.91666667,140 26 | 1951.0,145 27 | 1951.08333333,150 28 | 1951.16666667,178 29 | 1951.25,163 30 | 1951.33333333,172 31 | 1951.41666667,178 32 | 1951.5,199 33 | 1951.58333333,199 34 | 1951.66666667,184 35 | 1951.75,162 36 | 1951.83333333,146 37 | 1951.91666667,166 38 | 1952.0,171 39 | 1952.08333333,180 40 | 1952.16666667,193 41 | 1952.25,181 42 | 1952.33333333,183 43 | 1952.41666667,218 44 | 1952.5,230 45 | 1952.58333333,242 46 | 1952.66666667,209 47 | 1952.75,191 48 | 1952.83333333,172 49 | 1952.91666667,194 50 | 1953.0,196 51 | 1953.08333333,196 52 | 1953.16666667,236 53 | 1953.25,235 54 | 1953.33333333,229 55 | 1953.41666667,243 56 | 1953.5,264 57 | 1953.58333333,272 58 | 1953.66666667,237 59 | 1953.75,211 60 | 1953.83333333,180 61 | 1953.91666667,201 62 | 1954.0,204 63 | 1954.08333333,188 64 | 1954.16666667,235 65 | 1954.25,227 66 | 1954.33333333,234 67 | 1954.41666667,264 68 | 1954.5,302 69 | 1954.58333333,293 70 | 1954.66666667,259 71 | 1954.75,229 72 | 1954.83333333,203 73 | 1954.91666667,229 74 | 1955.0,242 75 | 1955.08333333,233 76 | 1955.16666667,267 77 | 1955.25,269 78 | 1955.33333333,270 79 | 1955.41666667,315 80 | 1955.5,364 81 | 1955.58333333,347 82 | 1955.66666667,312 83 | 1955.75,274 84 | 1955.83333333,237 85 | 1955.91666667,278 86 | 1956.0,284 87 | 1956.08333333,277 88 | 1956.16666667,317 89 | 1956.25,313 90 | 1956.33333333,318 91 | 1956.41666667,374 92 | 1956.5,413 93 | 1956.58333333,405 94 | 1956.66666667,355 95 | 1956.75,306 96 | 1956.83333333,271 97 | 1956.91666667,306 98 | 1957.0,315 99 | 1957.08333333,301 100 | 1957.16666667,356 101 | 1957.25,348 102 | 1957.33333333,355 103 | 1957.41666667,422 104 | 1957.5,465 105 | 1957.58333333,467 106 | 1957.66666667,404 107 | 1957.75,347 108 | 1957.83333333,305 109 | 1957.91666667,336 110 | 1958.0,340 111 | 1958.08333333,318 112 | 1958.16666667,362 113 | 1958.25,348 114 | 1958.33333333,363 115 | 1958.41666667,435 116 | 1958.5,491 117 | 1958.58333333,505 118 | 1958.66666667,404 119 | 1958.75,359 120 | 1958.83333333,310 121 | 1958.91666667,337 122 | 1959.0,360 123 | 1959.08333333,342 124 | 1959.16666667,406 125 | 1959.25,396 126 | 1959.33333333,420 127 | 1959.41666667,472 128 | 1959.5,548 129 | 1959.58333333,559 130 | 1959.66666667,463 131 | 1959.75,407 132 | 1959.83333333,362 133 | 1959.91666667,405 134 | 1960.0,417 135 | 1960.08333333,391 136 | 1960.16666667,419 137 | 1960.25,461 138 | 1960.33333333,472 139 | 1960.41666667,535 140 | 1960.5,622 141 | 1960.58333333,606 142 | 1960.66666667,508 143 | 1960.75,461 144 | 1960.83333333,390 145 | 1960.91666667,432 146 | -------------------------------------------------------------------------------- /Chapter04/linear_regression/example6/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "log" 7 | "math" 8 | "os" 9 | "strconv" 10 | 11 | "github.com/sajari/regression" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the training dataset file. 17 | f, err := os.Open("training.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer f.Close() 22 | 23 | // Create a new CSV reader reading from the opened file. 24 | reader := csv.NewReader(f) 25 | 26 | // Read in all of the CSV records 27 | reader.FieldsPerRecord = 4 28 | trainingData, err := reader.ReadAll() 29 | if err != nil { 30 | log.Fatal(err) 31 | } 32 | 33 | // In this case we are going to try and model our Sales (y) 34 | // by the TV feature plus an intercept. As such, let's create 35 | // the struct needed to train a model using github.com/sajari/regression. 36 | var r regression.Regression 37 | r.SetObserved("Sales") 38 | r.SetVar(0, "TV") 39 | 40 | // Loop of records in the CSV, adding the training data to the regression value. 41 | for i, record := range trainingData { 42 | 43 | // Skip the header. 44 | if i == 0 { 45 | continue 46 | } 47 | 48 | // Parse the Sales rogression measure, or "y". 49 | yVal, err := strconv.ParseFloat(record[3], 64) 50 | if err != nil { 51 | log.Fatal(err) 52 | } 53 | 54 | // Parse the TV value. 55 | tvVal, err := strconv.ParseFloat(record[0], 64) 56 | if err != nil { 57 | log.Fatal(err) 58 | } 59 | 60 | // Add these points to the regression value. 61 | r.Train(regression.DataPoint(yVal, []float64{tvVal})) 62 | } 63 | 64 | // Train/fit the regression model. 65 | r.Run() 66 | 67 | // Output the trained model parameters. 68 | fmt.Printf("\nRegression Formula:\n%v\n\n", r.Formula) 69 | 70 | // Open the test dataset file. 71 | f, err = os.Open("test.csv") 72 | if err != nil { 73 | log.Fatal(err) 74 | } 75 | defer f.Close() 76 | 77 | // Create a CSV reader reading from the opened file. 78 | reader = csv.NewReader(f) 79 | 80 | // Read in all of the CSV records 81 | reader.FieldsPerRecord = 4 82 | testData, err := reader.ReadAll() 83 | if err != nil { 84 | log.Fatal(err) 85 | } 86 | 87 | // Loop over the test data predicting y and evaluating the prediction 88 | // with the mean absolute error. 89 | var mAE float64 90 | for i, record := range testData { 91 | 92 | // Skip the header. 93 | if i == 0 { 94 | continue 95 | } 96 | 97 | // Parse the observed diabetes progression measure, or "y". 98 | yObserved, err := strconv.ParseFloat(record[3], 64) 99 | if err != nil { 100 | log.Fatal(err) 101 | } 102 | 103 | // Parse the bmi value. 104 | tvVal, err := strconv.ParseFloat(record[0], 64) 105 | if err != nil { 106 | log.Fatal(err) 107 | } 108 | 109 | // Predict y with our trained model. 110 | yPredicted, err := r.Predict([]float64{tvVal}) 111 | 112 | // Add the to the mean absolute error. 113 | mAE += math.Abs(yObserved-yPredicted) / float64(len(testData)) 114 | } 115 | 116 | // Output the MAE to standard out. 117 | fmt.Printf("MAE = %0.2f\n\n", mAE) 118 | } 119 | -------------------------------------------------------------------------------- /Chapter01/csv_files/example4/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "log" 8 | "os" 9 | "strconv" 10 | ) 11 | 12 | // CSVRecord contains a sucessfully parsed row of the CSV file. 13 | type CSVRecord struct { 14 | SepalLength float64 15 | SepalWidth float64 16 | PetalLength float64 17 | PetalWidth float64 18 | Species string 19 | ParseError error 20 | } 21 | 22 | func main() { 23 | 24 | // Open the iris dataset file. 25 | f, err := os.Open("../data/iris_mixed_types.csv") 26 | if err != nil { 27 | log.Fatal(err) 28 | } 29 | defer f.Close() 30 | 31 | // Create a new CSV reader reading from the opened file. 32 | reader := csv.NewReader(f) 33 | 34 | // Create a slice value that will hold all of the successfully parsed 35 | // records from the CSV. 36 | var csvData []CSVRecord 37 | 38 | // line will help us keep track of line number for logging. 39 | line := 1 40 | 41 | // Read in the records looking for unexpected types. 42 | for { 43 | 44 | // Read in a row. Check if we are at the end of the file. 45 | record, err := reader.Read() 46 | if err == io.EOF { 47 | break 48 | } 49 | 50 | // Create a CSVRecord value for the row. 51 | var csvRecord CSVRecord 52 | 53 | // Parse each of the values in the record based on an expected type. 54 | for idx, value := range record { 55 | 56 | // Parse the value in the record as a string for the string column. 57 | if idx == 4 { 58 | 59 | // Validate that the value is not an empty string. If the 60 | // value is an empty string break the parsing loop. 61 | if value == "" { 62 | log.Printf("Parsing line %d failed, unexpected type in column %d\n", line, idx) 63 | csvRecord.ParseError = fmt.Errorf("Empty string value") 64 | break 65 | } 66 | 67 | // Add the string value to the CSVRecord. 68 | csvRecord.Species = value 69 | continue 70 | } 71 | 72 | // Otherwise, parse the value in the record as a float64. 73 | // floatValue will hold the parsed float value of the record 74 | // for the numeric columns. 75 | var floatValue float64 76 | 77 | // If the value can not be parsed as a float, log and break the 78 | // parsing loop. 79 | if floatValue, err = strconv.ParseFloat(value, 64); err != nil { 80 | log.Printf("Parsing line %d failed, unexpected type in column %d\n", line, idx) 81 | csvRecord.ParseError = fmt.Errorf("Could not parse float") 82 | break 83 | } 84 | 85 | // Add the float value to the respective field in the CSVRecord. 86 | switch idx { 87 | case 0: 88 | csvRecord.SepalLength = floatValue 89 | case 1: 90 | csvRecord.SepalWidth = floatValue 91 | case 2: 92 | csvRecord.PetalLength = floatValue 93 | case 3: 94 | csvRecord.PetalWidth = floatValue 95 | } 96 | } 97 | 98 | // Append successfully parsed records to the slice defined above. 99 | if csvRecord.ParseError == nil { 100 | csvData = append(csvData, csvRecord) 101 | } 102 | 103 | // Increment the line counter. 104 | line++ 105 | } 106 | 107 | fmt.Printf("successfully parsed %d lines\n", len(csvData)) 108 | } 109 | -------------------------------------------------------------------------------- /Chapter04/multiple_regression/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "log" 7 | "math" 8 | "os" 9 | "strconv" 10 | 11 | "github.com/sajari/regression" 12 | ) 13 | 14 | func main() { 15 | 16 | // Open the training dataset file. 17 | f, err := os.Open("training.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer f.Close() 22 | 23 | // Create a new CSV reader reading from the opened file. 24 | reader := csv.NewReader(f) 25 | 26 | // Read in all of the CSV records 27 | reader.FieldsPerRecord = 4 28 | trainingData, err := reader.ReadAll() 29 | if err != nil { 30 | log.Fatal(err) 31 | } 32 | 33 | // In this case we are going to try and model our Sales 34 | // by the TV and Radio features plus an intercept. 35 | var r regression.Regression 36 | r.SetObserved("Sales") 37 | r.SetVar(0, "TV") 38 | r.SetVar(1, "Radio") 39 | 40 | // Loop over the CSV records adding the training data. 41 | for i, record := range trainingData { 42 | 43 | // Skip the header. 44 | if i == 0 { 45 | continue 46 | } 47 | 48 | // Parse the Sales. 49 | yVal, err := strconv.ParseFloat(record[3], 64) 50 | if err != nil { 51 | log.Fatal(err) 52 | } 53 | 54 | // Parse the TV value. 55 | tvVal, err := strconv.ParseFloat(record[0], 64) 56 | if err != nil { 57 | log.Fatal(err) 58 | } 59 | 60 | // Parse the Radio value. 61 | radioVal, err := strconv.ParseFloat(record[1], 64) 62 | if err != nil { 63 | log.Fatal(err) 64 | } 65 | 66 | // Add these points to the regression value. 67 | r.Train(regression.DataPoint(yVal, []float64{tvVal, radioVal})) 68 | } 69 | 70 | // Train/fit the regression model. 71 | r.Run() 72 | 73 | // Output the trained model parameters. 74 | fmt.Printf("\nRegression Formula:\n%v\n\n", r.Formula) 75 | 76 | // Open the test dataset file. 77 | f, err = os.Open("test.csv") 78 | if err != nil { 79 | log.Fatal(err) 80 | } 81 | defer f.Close() 82 | 83 | // Create a CSV reader reading from the opened file. 84 | reader = csv.NewReader(f) 85 | 86 | // Read in all of the CSV records 87 | reader.FieldsPerRecord = 4 88 | testData, err := reader.ReadAll() 89 | if err != nil { 90 | log.Fatal(err) 91 | } 92 | 93 | // Loop over the test data predicting y and evaluating the prediction 94 | // with the mean absolute error. 95 | var mAE float64 96 | for i, record := range testData { 97 | 98 | // Skip the header. 99 | if i == 0 { 100 | continue 101 | } 102 | 103 | // Parse the Sales. 104 | yObserved, err := strconv.ParseFloat(record[3], 64) 105 | if err != nil { 106 | log.Fatal(err) 107 | } 108 | 109 | // Parse the TV value. 110 | tvVal, err := strconv.ParseFloat(record[0], 64) 111 | if err != nil { 112 | log.Fatal(err) 113 | } 114 | 115 | // Parse the Radio value. 116 | radioVal, err := strconv.ParseFloat(record[1], 64) 117 | if err != nil { 118 | log.Fatal(err) 119 | } 120 | 121 | // Predict y with our trained model. 122 | yPredicted, err := r.Predict([]float64{tvVal, radioVal}) 123 | 124 | // Add the to the mean absolute error. 125 | mAE += math.Abs(yObserved-yPredicted) / float64(len(testData)) 126 | } 127 | 128 | // Output the MAE to standard out. 129 | fmt.Printf("MAE = %0.2f\n\n", mAE) 130 | } 131 | -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example1/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "encoding/json" 6 | "flag" 7 | "fmt" 8 | "io/ioutil" 9 | "log" 10 | "os" 11 | "path/filepath" 12 | "strconv" 13 | 14 | "github.com/sajari/regression" 15 | ) 16 | 17 | // ModelInfo includes the information about the 18 | // model that is output from the training. 19 | type ModelInfo struct { 20 | Intercept float64 `json:"intercept"` 21 | Coefficients []CoefficientInfo `json:"coefficients"` 22 | } 23 | 24 | // CoefficientInfo include information about a 25 | // particular model coefficient. 26 | type CoefficientInfo struct { 27 | Name string `json:"name"` 28 | Coefficient float64 `json:"coefficient"` 29 | } 30 | 31 | func main() { 32 | 33 | // Declare the input and output directory flags. 34 | inDirPtr := flag.String("inDir", "", "The directory containing the training data") 35 | outDirPtr := flag.String("outDir", "", "The output directory") 36 | 37 | // Parse the command line flags. 38 | flag.Parse() 39 | 40 | // Open the training dataset file. 41 | f, err := os.Open(filepath.Join(*inDirPtr, "diabetes.csv")) 42 | if err != nil { 43 | log.Fatal(err) 44 | } 45 | defer f.Close() 46 | 47 | // Create a new CSV reader reading from the opened file. 48 | reader := csv.NewReader(f) 49 | 50 | // Read in all of the CSV records 51 | reader.FieldsPerRecord = 11 52 | trainingData, err := reader.ReadAll() 53 | if err != nil { 54 | log.Fatal(err) 55 | } 56 | 57 | // In this case we are going to try and model our disease measure 58 | // y by the bmi feature plust an intercept. As such, let's create 59 | // the struct needed to train a model using github.com/sajari/regression. 60 | var r regression.Regression 61 | r.SetObserved("diabetes progression") 62 | r.SetVar(0, "bmi") 63 | 64 | // Loop of records in the CSV, adding the training data to the regression value. 65 | for i, record := range trainingData { 66 | 67 | // Skip the header. 68 | if i == 0 { 69 | continue 70 | } 71 | 72 | // Parse the diabetes progression measure, or "y". 73 | yVal, err := strconv.ParseFloat(record[10], 64) 74 | if err != nil { 75 | log.Fatal(err) 76 | } 77 | 78 | // Parse the bmi value. 79 | bmiVal, err := strconv.ParseFloat(record[2], 64) 80 | if err != nil { 81 | log.Fatal(err) 82 | } 83 | 84 | // Add these points to the regression value. 85 | r.Train(regression.DataPoint(yVal, []float64{bmiVal})) 86 | } 87 | 88 | // Train/fit the regression model. 89 | r.Run() 90 | 91 | // Output the trained model parameters to stdout. 92 | fmt.Printf("\nRegression Formula:\n%v\n\n", r.Formula) 93 | 94 | // Fill in the model information. 95 | modelInfo := ModelInfo{ 96 | Intercept: r.Coeff(0), 97 | Coefficients: []CoefficientInfo{ 98 | CoefficientInfo{ 99 | Name: "bmi", 100 | Coefficient: r.Coeff(1), 101 | }, 102 | }, 103 | } 104 | 105 | // Marshal the model information. 106 | outputData, err := json.MarshalIndent(modelInfo, "", " ") 107 | if err != nil { 108 | log.Fatal(err) 109 | } 110 | 111 | // Save the marshalled output to a file. 112 | if err := ioutil.WriteFile(filepath.Join(*outDirPtr, "model.json"), outputData, 0644); err != nil { 113 | log.Fatal(err) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "encoding/json" 6 | "flag" 7 | "fmt" 8 | "io/ioutil" 9 | "log" 10 | "os" 11 | "path/filepath" 12 | "strconv" 13 | 14 | "github.com/sajari/regression" 15 | ) 16 | 17 | // ModelInfo includes the information about the 18 | // model that is output from the training. 19 | type ModelInfo struct { 20 | Intercept float64 `json:"intercept"` 21 | Coefficients []CoefficientInfo `json:"coefficients"` 22 | } 23 | 24 | // CoefficientInfo include information about a 25 | // particular model coefficient. 26 | type CoefficientInfo struct { 27 | Name string `json:"name"` 28 | Coefficient float64 `json:"coefficient"` 29 | } 30 | 31 | func main() { 32 | 33 | // Declare the input and output directory flags. 34 | inDirPtr := flag.String("inDir", "", "The directory containing the training data") 35 | outDirPtr := flag.String("outDir", "", "The output directory") 36 | 37 | // Parse the command line flags. 38 | flag.Parse() 39 | 40 | // Open the training dataset file. 41 | f, err := os.Open(filepath.Join(*inDirPtr, "diabetes.csv")) 42 | if err != nil { 43 | log.Fatal(err) 44 | } 45 | defer f.Close() 46 | 47 | // Create a new CSV reader reading from the opened file. 48 | reader := csv.NewReader(f) 49 | 50 | // Read in all of the CSV records 51 | reader.FieldsPerRecord = 11 52 | trainingData, err := reader.ReadAll() 53 | if err != nil { 54 | log.Fatal(err) 55 | } 56 | 57 | // In this case we are going to try and model our disease measure 58 | // y by the bmi feature plust an intercept. As such, let's create 59 | // the struct needed to train a model using github.com/sajari/regression. 60 | var r regression.Regression 61 | r.SetObserved("diabetes progression") 62 | r.SetVar(0, "bmi") 63 | r.SetVar(1, "ltg") 64 | 65 | // Loop of records in the CSV, adding the training data to the regression value. 66 | for i, record := range trainingData { 67 | 68 | // Skip the header. 69 | if i == 0 { 70 | continue 71 | } 72 | 73 | // Parse the diabetes progression measure, or "y". 74 | yVal, err := strconv.ParseFloat(record[10], 64) 75 | if err != nil { 76 | log.Fatal(err) 77 | } 78 | 79 | // Parse the bmi value. 80 | bmiVal, err := strconv.ParseFloat(record[2], 64) 81 | if err != nil { 82 | log.Fatal(err) 83 | } 84 | 85 | // Parse the ltg value. 86 | ltgVal, err := strconv.ParseFloat(record[8], 64) 87 | if err != nil { 88 | log.Fatal(err) 89 | } 90 | 91 | // Add these points to the regression value. 92 | r.Train(regression.DataPoint(yVal, []float64{bmiVal, ltgVal})) 93 | } 94 | 95 | // Train/fit the regression model. 96 | r.Run() 97 | 98 | // Output the trained model parameters to stdout. 99 | fmt.Printf("\nRegression Formula:\n%v\n\n", r.Formula) 100 | 101 | // Fill in the model information. 102 | modelInfo := ModelInfo{ 103 | Intercept: r.Coeff(0), 104 | Coefficients: []CoefficientInfo{ 105 | CoefficientInfo{ 106 | Name: "bmi", 107 | Coefficient: r.Coeff(1), 108 | }, 109 | CoefficientInfo{ 110 | Name: "ltg", 111 | Coefficient: r.Coeff(2), 112 | }, 113 | }, 114 | } 115 | 116 | // Marshal the model information. 117 | outputData, err := json.MarshalIndent(modelInfo, "", " ") 118 | if err != nil { 119 | log.Fatal(err) 120 | } 121 | 122 | // Save the marshalled output to a file. 123 | if err := ioutil.WriteFile(filepath.Join(*outDirPtr, "model.json"), outputData, 0644); err != nil { 124 | log.Fatal(err) 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Machine Learning With Go 5 | This is the code repository for [Machine Learning With Go](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-go?utm_source=github&utm_medium=repository&utm_campaign=9781785882104), published by [Packt](https://www.packtpub.com/?utm_source=github). It contains all the supporting project files necessary to work through the book from start to finish. 6 | ## About the Book 7 | The mission of this book is to turn readers into productive, innovative data analysts who leverage Go to build robust and valuable applications. To this end, the book clearly introduces the technical aspects of building predictive models in Go, but it also helps the reader understand how machine learning workflows are being applied in real-world scenarios. 8 | 9 | ## Instructions and Navigation 10 | All of the code is organized into folders. Each folder starts with a number followed by the application name. For example, Chapter02. 11 | 12 | 13 | 14 | The code will look like the following: 15 | ``` 16 | // Create a new matrix a. 17 | a := mat.NewDense(3, 3, []float64{1, 2, 3, 0, 4, 5, 0, 0, 6}) 18 | // Compute and output the transpose of the matrix. 19 | ft := mat.Formatted(a.T(), mat.Prefix(" ")) 20 | fmt.Printf("a^T = %v\n\n", ft) 21 | // Compute and output the determinant of a. 22 | deta := mat.Det(a) 23 | fmt.Printf("det(a) = %.2f\n\n", deta) 24 | // Compute and output the inverse of a. 25 | aInverse := mat.NewDense(0, 0, nil) 26 | if err := aInverse.Inverse(a); err != nil { 27 | log.Fatal(err) 28 | } 29 | fi := mat.Formatted(aInverse, mat.Prefix(" ")) 30 | fmt.Printf("a^-1 = %v\n\n", fi) 31 | ``` 32 | 33 | To run the examples in this book and experiment with the techniques covered in the book, 34 | you will generally need the following: 35 | Access to a bash-like shell. 36 | A complete Go environment including Go, an editor, and related default or 37 | custom environment variables defined. You can, for example, follow this guide 38 | at https://www.goinggo.net/2016/05/installing-go-and-your-workspace.htm 39 | l. 40 | Various Go dependencies. These can be obtained as they are needed via go get 41 | .... 42 | Then, to run the examples related to some of the advanced topics, such as data pipelining 43 | and deep learning, you will need a few additional things: 44 | An installation or deployment of Pachyderm. You can follow these docs to get 45 | Pachyderm up and running locally or in the 46 | cloud, http://pachyderm.readthedocs.io/en/latest/. 47 | A working Docker installation 48 | (https://www.docker.com/community-edition#/download). 49 | An installation of TensorFlow. To install TensorFlow locally, you can follow this 50 | guide at https://www.tensorflow.org/install/. 51 | 52 | ## Related Products 53 | * [Mastering Machine Learning with scikit-learn - Second Edition](https://www.packtpub.com/big-data-and-business-intelligence/mastering-machine-learning-scikit-learn-second-edition?utm_source=github&utm_medium=repository&utm_campaign=9781788299879) 54 | 55 | * [Mastering Machine Learning with scikit-learn](https://www.packtpub.com/big-data-and-business-intelligence/mastering-machine-learning-scikit-learn?utm_source=github&utm_medium=repository&utm_campaign=9781783988365) 56 | 57 | * [Machine Learning with JavaScript](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-javascript?utm_source=github&utm_medium=repository&utm_campaign=9781787280199) 58 | 59 | ### Download a free PDF 60 | 61 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
62 |

https://packt.link/free-ebook/9781785882104

-------------------------------------------------------------------------------- /Chapter05/logistic_regression/example6/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "log" 7 | "math" 8 | "math/rand" 9 | "os" 10 | "strconv" 11 | "time" 12 | 13 | "github.com/gonum/matrix/mat64" 14 | ) 15 | 16 | func main() { 17 | 18 | // Open the training dataset file. 19 | f, err := os.Open("training.csv") 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | defer f.Close() 24 | 25 | // Create a new CSV reader reading from the opened file. 26 | reader := csv.NewReader(f) 27 | reader.FieldsPerRecord = 2 28 | 29 | // Read in all of the CSV records 30 | rawCSVData, err := reader.ReadAll() 31 | if err != nil { 32 | log.Fatal(err) 33 | } 34 | 35 | // featureData and labels will hold all the float values that 36 | // will eventually be used in our training. 37 | featureData := make([]float64, 2*(len(rawCSVData)-1)) 38 | labels := make([]float64, len(rawCSVData)-1) 39 | 40 | // featureIndex will track the current index of the features 41 | // matrix values. 42 | var featureIndex int 43 | 44 | // Sequentially move the rows into the slices of floats. 45 | for idx, record := range rawCSVData { 46 | 47 | // Skip the header row. 48 | if idx == 0 { 49 | continue 50 | } 51 | 52 | // Add the FICO score feature. 53 | featureVal, err := strconv.ParseFloat(record[0], 64) 54 | if err != nil { 55 | log.Fatal(err) 56 | } 57 | 58 | featureData[featureIndex] = featureVal 59 | 60 | // Add an intercept. 61 | featureData[featureIndex+1] = 1.0 62 | 63 | // Increment our feature row. 64 | featureIndex += 2 65 | 66 | // Add the class label. 67 | labelVal, err := strconv.ParseFloat(record[1], 64) 68 | if err != nil { 69 | log.Fatal(err) 70 | } 71 | 72 | labels[idx-1] = labelVal 73 | } 74 | 75 | // Form a matrix from the features. 76 | features := mat64.NewDense(len(rawCSVData)-1, 2, featureData) 77 | 78 | // Train the logistic regression model. 79 | weights := logisticRegression(features, labels, 1000, 0.3) 80 | 81 | // Output the Logistic Regression model formula to stdout. 82 | formula := "p = 1 / ( 1 + exp(- m1 * FICO.score - m2) )" 83 | fmt.Printf("\n%s\n\nm1 = %0.2f\nm2 = %0.2f\n\n", formula, weights[0], weights[1]) 84 | } 85 | 86 | // logistic implements the logistic function, which 87 | // is used in logistic regression. 88 | func logistic(x float64) float64 { 89 | return 1.0 / (1.0 + math.Exp(-x)) 90 | } 91 | 92 | // logisticRegression fits a logistic regression model 93 | // for the given data. 94 | func logisticRegression(features *mat64.Dense, labels []float64, numSteps int, learningRate float64) []float64 { 95 | 96 | // Initialize random weights. 97 | _, numWeights := features.Dims() 98 | weights := make([]float64, numWeights) 99 | 100 | s := rand.NewSource(time.Now().UnixNano()) 101 | r := rand.New(s) 102 | 103 | for idx, _ := range weights { 104 | weights[idx] = r.Float64() 105 | } 106 | 107 | // Iteratively optimize the weights. 108 | for i := 0; i < numSteps; i++ { 109 | 110 | // Initialize a variable to accumulate error for this iteration. 111 | var sumError float64 112 | 113 | // Make predictions for each label and accumlate error. 114 | for idx, label := range labels { 115 | 116 | // Get the features corresponding to this label. 117 | featureRow := mat64.Row(nil, idx, features) 118 | 119 | // Calculate the error for this iteration's weights. 120 | pred := logistic(featureRow[0]*weights[0] + featureRow[1]*weights[1]) 121 | predError := label - pred 122 | sumError += math.Pow(predError, 2) 123 | 124 | // Update the feature weights. 125 | for j := 0; j < len(featureRow); j++ { 126 | weights[j] += learningRate * predError * pred * (1 - pred) * featureRow[j] 127 | } 128 | } 129 | } 130 | 131 | return weights 132 | } 133 | -------------------------------------------------------------------------------- /Chapter07/auto_regressive/example6/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "log" 7 | "math" 8 | "os" 9 | "strconv" 10 | 11 | "github.com/gonum/plot" 12 | "github.com/gonum/plot/plotter" 13 | "github.com/gonum/plot/vg" 14 | ) 15 | 16 | func main() { 17 | 18 | // Open the log differenced dataset file. 19 | transFile, err := os.Open("log_diff_series.csv") 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | defer transFile.Close() 24 | 25 | // Create a CSV reader reading from the opened file. 26 | transReader := csv.NewReader(transFile) 27 | 28 | // Read in all of the CSV records 29 | transReader.FieldsPerRecord = 2 30 | transData, err := transReader.ReadAll() 31 | if err != nil { 32 | log.Fatal(err) 33 | } 34 | 35 | // Loop over the data predicting the transformed 36 | // observations. 37 | var transPredictions []float64 38 | for i, _ := range transData { 39 | 40 | // Skip the header and the first two observations 41 | // (because we need two lags to make a prediction). 42 | if i == 0 || i == 1 || i == 2 { 43 | continue 44 | } 45 | 46 | // Parse the first lag. 47 | lagOne, err := strconv.ParseFloat(transData[i-1][1], 64) 48 | if err != nil { 49 | log.Fatal(err) 50 | } 51 | 52 | // Parse the second lag. 53 | lagTwo, err := strconv.ParseFloat(transData[i-2][1], 64) 54 | if err != nil { 55 | log.Fatal(err) 56 | } 57 | 58 | // Predict the transformed variable with our trained AR model. 59 | transPredictions = append(transPredictions, 0.008159+0.234953*lagOne-0.173682*lagTwo) 60 | } 61 | 62 | // Open the original dataset file. 63 | origFile, err := os.Open("AirPassengers.csv") 64 | if err != nil { 65 | log.Fatal(err) 66 | } 67 | defer origFile.Close() 68 | 69 | // Create a CSV reader reading from the opened file. 70 | origReader := csv.NewReader(origFile) 71 | 72 | // Read in all of the CSV records 73 | origReader.FieldsPerRecord = 2 74 | origData, err := origReader.ReadAll() 75 | if err != nil { 76 | log.Fatal(err) 77 | } 78 | 79 | // pts* will hold the values for plotting. 80 | ptsObs := make(plotter.XYs, len(transPredictions)) 81 | ptsPred := make(plotter.XYs, len(transPredictions)) 82 | 83 | // Reverse the transformation and calculate the MAE. 84 | var mAE float64 85 | var cumSum float64 86 | for i := 4; i <= len(origData)-1; i++ { 87 | 88 | // Parse the original observation. 89 | observed, err := strconv.ParseFloat(origData[i][1], 64) 90 | if err != nil { 91 | log.Fatal(err) 92 | } 93 | 94 | // Parse the original date. 95 | date, err := strconv.ParseFloat(origData[i][0], 64) 96 | if err != nil { 97 | log.Fatal(err) 98 | } 99 | 100 | // Get the cumulative sum up to the index in 101 | // the transformed predictions. 102 | cumSum += transPredictions[i-4] 103 | 104 | // Calculate the reverse transformed prediction. 105 | predicted := math.Exp(math.Log(observed) + cumSum) 106 | 107 | // Accumulate the MAE. 108 | mAE += math.Abs(observed-predicted) / float64(len(transPredictions)) 109 | 110 | // Fill in the points for plotting. 111 | ptsObs[i-4].X = date 112 | ptsPred[i-4].X = date 113 | ptsObs[i-4].Y = observed 114 | ptsPred[i-4].Y = predicted 115 | } 116 | 117 | // Output the MAE to standard out. 118 | fmt.Printf("\nMAE = %0.2f\n\n", mAE) 119 | 120 | // Create the plot. 121 | p, err := plot.New() 122 | if err != nil { 123 | log.Fatal(err) 124 | } 125 | p.X.Label.Text = "time" 126 | p.Y.Label.Text = "passengers" 127 | p.Add(plotter.NewGrid()) 128 | 129 | // Add the line plot points for the time series. 130 | lObs, err := plotter.NewLine(ptsObs) 131 | if err != nil { 132 | log.Fatal(err) 133 | } 134 | lObs.LineStyle.Width = vg.Points(1) 135 | 136 | lPred, err := plotter.NewLine(ptsPred) 137 | if err != nil { 138 | log.Fatal(err) 139 | } 140 | lPred.LineStyle.Width = vg.Points(1) 141 | lPred.LineStyle.Dashes = []vg.Length{vg.Points(5), vg.Points(5)} 142 | 143 | // Save the plot to a PNG file. 144 | p.Add(lObs, lPred) 145 | p.Legend.Add("Observed", lObs) 146 | p.Legend.Add("Predicted", lPred) 147 | if err := p.Save(10*vg.Inch, 4*vg.Inch, "passengers_ts.png"); err != nil { 148 | log.Fatal(err) 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /Chapter09/running_model_reliably/example3/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "flag" 6 | "fmt" 7 | "io/ioutil" 8 | "log" 9 | "os" 10 | "path/filepath" 11 | ) 12 | 13 | // ModelInfo includes the information about the 14 | // model that is output from the training. 15 | type ModelInfo struct { 16 | Intercept float64 `json:"intercept"` 17 | Coefficients []CoefficientInfo `json:"coefficients"` 18 | } 19 | 20 | // CoefficientInfo include information about a 21 | // particular model coefficient. 22 | type CoefficientInfo struct { 23 | Name string `json:"name"` 24 | Coefficient float64 `json:"coefficient"` 25 | } 26 | 27 | // PredictionData includes the data necessary to make 28 | // a prediction and encodes the output prediction. 29 | type PredictionData struct { 30 | Prediction float64 `json:"predicted_diabetes_progression"` 31 | IndependentVars []IndependentVar `json:"independent_variables"` 32 | } 33 | 34 | // IndependentVar include information about and a 35 | // value for an independent variable. 36 | type IndependentVar struct { 37 | Name string `json:"name"` 38 | Value float64 `json:"value"` 39 | } 40 | 41 | func main() { 42 | 43 | // Declare the input and output directory flags. 44 | inModelDirPtr := flag.String("inModelDir", "", "The directory containing the model.") 45 | inVarDirPtr := flag.String("inVarDir", "", "The directory containing the input attributes.") 46 | outDirPtr := flag.String("outDir", "", "The output directory") 47 | 48 | // Parse the command line flags. 49 | flag.Parse() 50 | 51 | // Load the model file. 52 | f, err := ioutil.ReadFile(filepath.Join(*inModelDirPtr, "model.json")) 53 | if err != nil { 54 | log.Fatal(err) 55 | } 56 | 57 | // Unmarshal the model information. 58 | var modelInfo ModelInfo 59 | if err := json.Unmarshal(f, &modelInfo); err != nil { 60 | log.Fatal(err) 61 | } 62 | 63 | // Walk over files in the input. 64 | if err := filepath.Walk(*inVarDirPtr, func(path string, info os.FileInfo, err error) error { 65 | 66 | // Skip any directories. 67 | if info.IsDir() { 68 | return nil 69 | } 70 | 71 | // Open any files. 72 | f, err := ioutil.ReadFile(filepath.Join(*inVarDirPtr, info.Name())) 73 | if err != nil { 74 | return err 75 | } 76 | 77 | // Unmarshal the independent variables. 78 | var predictionData PredictionData 79 | if err := json.Unmarshal(f, &predictionData); err != nil { 80 | return err 81 | } 82 | 83 | // Make the prediction. 84 | if err := Predict(&modelInfo, &predictionData); err != nil { 85 | return err 86 | } 87 | 88 | // Marshal the prediction data. 89 | outputData, err := json.MarshalIndent(predictionData, "", " ") 90 | if err != nil { 91 | log.Fatal(err) 92 | } 93 | 94 | // Save the marshalled output to a file. 95 | if err := ioutil.WriteFile(filepath.Join(*outDirPtr, info.Name()), outputData, 0644); err != nil { 96 | log.Fatal(err) 97 | } 98 | 99 | return nil 100 | }); err != nil { 101 | log.Fatal(err) 102 | } 103 | } 104 | 105 | // Predict makes a prediction based on input JSON. 106 | func Predict(modelInfo *ModelInfo, predictionData *PredictionData) error { 107 | 108 | // Initialize the prediction value 109 | // to the intercept. 110 | prediction := modelInfo.Intercept 111 | 112 | // Create a map of independent variable coefficients. 113 | coeffs := make(map[string]float64) 114 | varNames := make([]string, len(modelInfo.Coefficients)) 115 | for idx, coeff := range modelInfo.Coefficients { 116 | coeffs[coeff.Name] = coeff.Coefficient 117 | varNames[idx] = coeff.Name 118 | } 119 | 120 | // Create a map of the independent variable values. 121 | varVals := make(map[string]float64) 122 | for _, indVar := range predictionData.IndependentVars { 123 | varVals[indVar.Name] = indVar.Value 124 | } 125 | 126 | // Loop over the independent variables. 127 | for _, varName := range varNames { 128 | 129 | // Get the coefficient. 130 | coeff, ok := coeffs[varName] 131 | if !ok { 132 | return fmt.Errorf("Could not find model coefficient %s", varName) 133 | } 134 | 135 | // Get the variable value. 136 | val, ok := varVals[varName] 137 | if !ok { 138 | return fmt.Errorf("Expected a value for variable %s", varName) 139 | } 140 | 141 | // Add to the prediction. 142 | prediction = prediction + coeff*val 143 | } 144 | 145 | // Add the prediction to the prediction data. 146 | predictionData.Prediction = prediction 147 | 148 | return nil 149 | } 150 | -------------------------------------------------------------------------------- /Chapter06/evaluating/example2/myprogram.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/gonum/floats" 9 | "github.com/kniren/gota/dataframe" 10 | ) 11 | 12 | type centroid []float64 13 | 14 | func main() { 15 | 16 | // Pull in the CSV file. 17 | irisFile, err := os.Open("iris.csv") 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer irisFile.Close() 22 | 23 | // Create a dataframe from the CSV file. 24 | irisDF := dataframe.ReadCSV(irisFile) 25 | 26 | // Define the names of the three separate species contained in the CSV file. 27 | speciesNames := []string{ 28 | "Iris-setosa", 29 | "Iris-versicolor", 30 | "Iris-virginica", 31 | } 32 | 33 | // Create a map to hold our centroid information. 34 | centroids := make(map[string]centroid) 35 | 36 | // Create a map to hold the filtered dataframe for each cluster. 37 | clusters := make(map[string]dataframe.DataFrame) 38 | 39 | // Filter the dataset into three separate dataframes, 40 | // each corresponding to one of the Iris species. 41 | for _, species := range speciesNames { 42 | 43 | // Filer the original dataset. 44 | filter := dataframe.F{ 45 | Colname: "species", 46 | Comparator: "==", 47 | Comparando: species, 48 | } 49 | filtered := irisDF.Filter(filter) 50 | 51 | // Add the filtered dataframe to the map of clusters. 52 | clusters[species] = filtered 53 | 54 | // Calculate the mean of features. 55 | summaryDF := filtered.Describe() 56 | 57 | // Put each dimension's mean into the corresponding centroid. 58 | var c centroid 59 | for _, feature := range summaryDF.Names() { 60 | 61 | // Skip the irrelevant columns. 62 | if feature == "column" || feature == "species" { 63 | continue 64 | } 65 | c = append(c, summaryDF.Col(feature).Float()[0]) 66 | } 67 | 68 | // Add this centroid to our map. 69 | centroids[species] = c 70 | } 71 | 72 | // Convert our labels into a slice of strings and create a slice 73 | // of float column names for convenience. 74 | labels := irisDF.Col("species").Records() 75 | floatColumns := []string{ 76 | "sepal_length", 77 | "sepal_width", 78 | "petal_length", 79 | "petal_width", 80 | } 81 | 82 | // Loop over the records accumulating the average silhouette coefficient. 83 | var silhouette float64 84 | 85 | for idx, label := range labels { 86 | 87 | // a will store our accumulated value for a. 88 | var a float64 89 | 90 | // Loop over the data points in the same cluster. 91 | for i := 0; i < clusters[label].Nrow(); i++ { 92 | 93 | // Get the data point for comparison. 94 | current := dfFloatRow(irisDF, floatColumns, idx) 95 | other := dfFloatRow(clusters[label], floatColumns, i) 96 | 97 | // Add to a. 98 | a += floats.Distance(current, other, 2) / float64(clusters[label].Nrow()) 99 | } 100 | 101 | // Determine the nearest other cluster. 102 | var otherCluster string 103 | var distanceToCluster float64 104 | for _, species := range speciesNames { 105 | 106 | // Skip the cluster containing the data point. 107 | if species == label { 108 | continue 109 | } 110 | 111 | // Calculate the distance to the cluster from the current cluster. 112 | distanceForThisCluster := floats.Distance(centroids[label], centroids[species], 2) 113 | 114 | // Replace the current cluster if relevant. 115 | if distanceToCluster == 0.0 || distanceForThisCluster < distanceToCluster { 116 | otherCluster = species 117 | distanceToCluster = distanceForThisCluster 118 | } 119 | } 120 | 121 | // b will store our accumulated value for b. 122 | var b float64 123 | 124 | // Loop over the data points in the nearest other cluster. 125 | for i := 0; i < clusters[otherCluster].Nrow(); i++ { 126 | 127 | // Get the data point for comparison. 128 | current := dfFloatRow(irisDF, floatColumns, idx) 129 | other := dfFloatRow(clusters[otherCluster], floatColumns, i) 130 | 131 | // Add to b. 132 | b += floats.Distance(current, other, 2) / float64(clusters[otherCluster].Nrow()) 133 | } 134 | 135 | // Add to the average silhouette coefficient. 136 | if a > b { 137 | silhouette += ((b - a) / a) / float64(len(labels)) 138 | } 139 | silhouette += ((b - a) / b) / float64(len(labels)) 140 | } 141 | 142 | // Output the final average silhouette coeffcient to stdout. 143 | fmt.Printf("\nAverage Silhouette Coefficient: %0.2f\n\n", silhouette) 144 | } 145 | 146 | // dfFloatRow retrieves a slice of float values from a DataFrame 147 | // at the given index and for the given column names. 148 | func dfFloatRow(df dataframe.DataFrame, names []string, idx int) []float64 { 149 | var row []float64 150 | for _, name := range names { 151 | row = append(row, df.Col(name).Float()[idx]) 152 | } 153 | return row 154 | } 155 | -------------------------------------------------------------------------------- /Chapter03/evaluation/continuous_metrics/example1/continuous_data.csv: -------------------------------------------------------------------------------- 1 | observation,prediction 2 | 22.1,17.9707745128 3 | 10.4,9.1479740484 4 | 9.3,7.85022376458 5 | 18.5,14.2343945747 6 | 12.9,15.6272181394 7 | 7.2,7.44616232089 8 | 11.8,9.76595037403 9 | 13.2,12.7464977292 10 | 4.8,7.44140865685 11 | 10.6,16.5304143076 12 | 8.6,10.1747654818 13 | 17.4,17.2387102501 14 | 9.2,8.16396559143 15 | 9.7,11.6674159913 16 | 19.0,16.7348218615 17 | 22.4,16.3212530897 18 | 12.5,10.2555777705 19 | 24.4,20.409404167 20 | 11.3,10.3221290671 21 | 14.6,14.0347406849 22 | 18.0,17.4145958197 23 | 12.5,18.3177919879 24 | 5.6,7.66007720284 25 | 15.5,17.88520856 26 | 9.7,9.9941262481 27 | 12.0,19.529976319 28 | 15.0,13.825579467 29 | 15.9,18.4461409171 30 | 18.9,18.8597096889 31 | 10.5,10.3886803637 32 | 21.4,20.956075532 33 | 11.9,12.399480254 34 | 9.6,11.6531549992 35 | 17.4,19.6583252481 36 | 9.5,11.5818500386 37 | 12.8,20.851494923 38 | 25.4,19.7201228807 39 | 14.7,10.5835805895 40 | 10.1,9.08142275179 41 | 21.5,17.8709475679 42 | 16.6,16.6587632368 43 | 17.1,15.4465789058 44 | 20.7,20.9893511803 45 | 12.9,16.8679244547 46 | 8.5,8.225763224 47 | 14.9,15.3562592889 48 | 10.6,11.296630196 49 | 23.2,18.436633589 50 | 14.8,17.8329182555 51 | 9.7,10.2127947941 52 | 11.4,16.5304143076 53 | 10.7,11.8052722486 54 | 22.6,17.3195225388 55 | 21.2,15.7127840922 56 | 20.2,19.5204689909 57 | 23.7,16.4876313313 58 | 5.5,7.37961102429 59 | 13.2,13.5070839761 60 | 23.8,17.0533173524 61 | 18.4,17.0485636884 62 | 8.1,9.57580381229 63 | 24.2,19.4539176943 64 | 15.7,18.4081116047 65 | 14.0,11.9146065216 66 | 18.0,13.2646471099 67 | 9.3,10.312621739 68 | 9.5,8.52999772277 69 | 13.4,13.6544475614 70 | 18.9,18.3177919879 71 | 22.3,17.338537195 72 | 18.3,16.4971386593 73 | 12.4,12.2521166687 74 | 8.8,8.30657551273 75 | 11.0,13.1838348212 76 | 17.0,17.1769126175 77 | 8.7,7.83596277245 78 | 6.9,8.33985116104 79 | 14.2,12.7607587213 80 | 5.3,7.28929140747 81 | 11.0,12.5468438394 82 | 11.8,10.6643928782 83 | 12.3,18.431879925 84 | 11.3,10.6121025737 85 | 13.6,10.2840997547 86 | 21.7,17.1816662816 87 | 15.2,16.2166724808 88 | 12.0,10.6596392142 89 | 16.0,12.2948996451 90 | 12.9,11.2300788994 91 | 16.7,12.2521166687 92 | 11.2,13.4167643593 93 | 7.3,8.39214146551 94 | 19.4,17.3813201714 95 | 22.2,18.9595366338 96 | 11.5,12.1380287316 97 | 16.9,14.7953269318 98 | 11.7,16.4258336987 99 | 15.5,15.8221183652 100 | 25.4,20.8039582826 101 | 17.2,13.4595473357 102 | 11.7,17.6047423814 103 | 23.8,21.1224537735 104 | 14.8,20.3523601985 105 | 14.7,15.9647282865 106 | 20.7,18.3558213003 107 | 19.2,13.5878962648 108 | 7.2,8.22100955995 109 | 8.7,11.3299058443 110 | 5.3,7.6553235388 111 | 19.8,19.1734515157 112 | 13.4,17.7663669589 113 | 21.8,18.5221995418 114 | 14.1,15.3847812732 115 | 15.9,16.9962733839 116 | 14.6,10.749958831 117 | 12.6,10.6025952456 118 | 12.2,13.6496938974 119 | 9.4,10.6643928782 120 | 15.9,13.0079492516 121 | 6.6,7.95480437353 122 | 15.5,13.7495208423 123 | 7.0,7.92628238927 124 | 11.6,17.6808010061 125 | 15.2,12.8843539864 126 | 19.7,17.9422525285 127 | 10.6,11.1777885949 128 | 6.6,7.40337934451 129 | 8.8,10.8450321119 130 | 24.7,17.5049154365 131 | 9.7,9.86577731894 132 | 1.6,7.06586919743 133 | 12.7,19.639310592 134 | 5.7,7.43190132877 135 | 19.6,17.4811471163 136 | 10.8,8.78669558111 137 | 11.6,9.32861328204 138 | 9.5,8.24953154421 139 | 20.8,20.0433720356 140 | 9.6,9.07666908775 141 | 20.7,15.8221183652 142 | 10.9,10.5217829569 143 | 19.2,16.240440801 144 | 20.1,17.5144227646 145 | 10.4,12.0049261384 146 | 11.4,11.6056183588 147 | 10.3,13.7019842019 148 | 13.2,18.4461409171 149 | 25.4,18.5935045024 150 | 10.9,8.83898588558 151 | 10.1,9.15748137648 152 | 16.1,20.3761285187 153 | 11.6,12.7845270415 154 | 16.6,16.4258336987 155 | 19.0,15.1756200553 156 | 15.6,15.9599746224 157 | 3.2,7.2274937749 158 | 15.3,11.4962840858 159 | 10.1,14.153582286 160 | 7.3,7.58877224219 161 | 12.9,13.2931690942 162 | 14.4,15.2326640238 163 | 13.3,11.1064836342 164 | 14.9,15.9884966067 165 | 18.0,14.8048342599 166 | 11.9,12.6038878079 167 | 11.9,18.1799357307 168 | 8.0,7.88349941288 169 | 12.2,16.8631707907 170 | 17.1,17.2719858984 171 | 15.0,20.5472604242 172 | 8.4,9.40942557078 173 | 14.5,14.8523709004 174 | 7.6,7.96431170161 175 | 11.7,15.037763798 176 | 11.5,17.6047423814 177 | 27.0,20.195489285 178 | 20.2,18.8406950327 179 | 11.7,15.1233297508 180 | 11.8,20.1859819569 181 | 12.6,14.9046612048 182 | 10.5,14.4768314409 183 | 12.2,17.4193494837 184 | 8.7,9.70415274146 185 | 26.2,20.7041313377 186 | 17.6,19.097392891 187 | 22.6,16.7776048379 188 | 10.3,13.6639548895 189 | 17.3,16.1168455359 190 | 15.9,20.628072713 191 | 6.7,7.92152872523 192 | 10.8,8.91029084623 193 | 9.9,10.6216099018 194 | 5.9,7.85022376458 195 | 19.6,14.9617051734 196 | 17.3,14.148828622 197 | 7.6,8.84849321367 198 | 9.7,11.5105450779 199 | 12.8,15.4465789058 200 | 25.5,20.5139847759 201 | 13.4,18.0658477936 202 | -------------------------------------------------------------------------------- /Chapter03/evaluation/continuous_metrics/example2/continuous_data.csv: -------------------------------------------------------------------------------- 1 | observation,prediction 2 | 22.1,17.9707745128 3 | 10.4,9.1479740484 4 | 9.3,7.85022376458 5 | 18.5,14.2343945747 6 | 12.9,15.6272181394 7 | 7.2,7.44616232089 8 | 11.8,9.76595037403 9 | 13.2,12.7464977292 10 | 4.8,7.44140865685 11 | 10.6,16.5304143076 12 | 8.6,10.1747654818 13 | 17.4,17.2387102501 14 | 9.2,8.16396559143 15 | 9.7,11.6674159913 16 | 19.0,16.7348218615 17 | 22.4,16.3212530897 18 | 12.5,10.2555777705 19 | 24.4,20.409404167 20 | 11.3,10.3221290671 21 | 14.6,14.0347406849 22 | 18.0,17.4145958197 23 | 12.5,18.3177919879 24 | 5.6,7.66007720284 25 | 15.5,17.88520856 26 | 9.7,9.9941262481 27 | 12.0,19.529976319 28 | 15.0,13.825579467 29 | 15.9,18.4461409171 30 | 18.9,18.8597096889 31 | 10.5,10.3886803637 32 | 21.4,20.956075532 33 | 11.9,12.399480254 34 | 9.6,11.6531549992 35 | 17.4,19.6583252481 36 | 9.5,11.5818500386 37 | 12.8,20.851494923 38 | 25.4,19.7201228807 39 | 14.7,10.5835805895 40 | 10.1,9.08142275179 41 | 21.5,17.8709475679 42 | 16.6,16.6587632368 43 | 17.1,15.4465789058 44 | 20.7,20.9893511803 45 | 12.9,16.8679244547 46 | 8.5,8.225763224 47 | 14.9,15.3562592889 48 | 10.6,11.296630196 49 | 23.2,18.436633589 50 | 14.8,17.8329182555 51 | 9.7,10.2127947941 52 | 11.4,16.5304143076 53 | 10.7,11.8052722486 54 | 22.6,17.3195225388 55 | 21.2,15.7127840922 56 | 20.2,19.5204689909 57 | 23.7,16.4876313313 58 | 5.5,7.37961102429 59 | 13.2,13.5070839761 60 | 23.8,17.0533173524 61 | 18.4,17.0485636884 62 | 8.1,9.57580381229 63 | 24.2,19.4539176943 64 | 15.7,18.4081116047 65 | 14.0,11.9146065216 66 | 18.0,13.2646471099 67 | 9.3,10.312621739 68 | 9.5,8.52999772277 69 | 13.4,13.6544475614 70 | 18.9,18.3177919879 71 | 22.3,17.338537195 72 | 18.3,16.4971386593 73 | 12.4,12.2521166687 74 | 8.8,8.30657551273 75 | 11.0,13.1838348212 76 | 17.0,17.1769126175 77 | 8.7,7.83596277245 78 | 6.9,8.33985116104 79 | 14.2,12.7607587213 80 | 5.3,7.28929140747 81 | 11.0,12.5468438394 82 | 11.8,10.6643928782 83 | 12.3,18.431879925 84 | 11.3,10.6121025737 85 | 13.6,10.2840997547 86 | 21.7,17.1816662816 87 | 15.2,16.2166724808 88 | 12.0,10.6596392142 89 | 16.0,12.2948996451 90 | 12.9,11.2300788994 91 | 16.7,12.2521166687 92 | 11.2,13.4167643593 93 | 7.3,8.39214146551 94 | 19.4,17.3813201714 95 | 22.2,18.9595366338 96 | 11.5,12.1380287316 97 | 16.9,14.7953269318 98 | 11.7,16.4258336987 99 | 15.5,15.8221183652 100 | 25.4,20.8039582826 101 | 17.2,13.4595473357 102 | 11.7,17.6047423814 103 | 23.8,21.1224537735 104 | 14.8,20.3523601985 105 | 14.7,15.9647282865 106 | 20.7,18.3558213003 107 | 19.2,13.5878962648 108 | 7.2,8.22100955995 109 | 8.7,11.3299058443 110 | 5.3,7.6553235388 111 | 19.8,19.1734515157 112 | 13.4,17.7663669589 113 | 21.8,18.5221995418 114 | 14.1,15.3847812732 115 | 15.9,16.9962733839 116 | 14.6,10.749958831 117 | 12.6,10.6025952456 118 | 12.2,13.6496938974 119 | 9.4,10.6643928782 120 | 15.9,13.0079492516 121 | 6.6,7.95480437353 122 | 15.5,13.7495208423 123 | 7.0,7.92628238927 124 | 11.6,17.6808010061 125 | 15.2,12.8843539864 126 | 19.7,17.9422525285 127 | 10.6,11.1777885949 128 | 6.6,7.40337934451 129 | 8.8,10.8450321119 130 | 24.7,17.5049154365 131 | 9.7,9.86577731894 132 | 1.6,7.06586919743 133 | 12.7,19.639310592 134 | 5.7,7.43190132877 135 | 19.6,17.4811471163 136 | 10.8,8.78669558111 137 | 11.6,9.32861328204 138 | 9.5,8.24953154421 139 | 20.8,20.0433720356 140 | 9.6,9.07666908775 141 | 20.7,15.8221183652 142 | 10.9,10.5217829569 143 | 19.2,16.240440801 144 | 20.1,17.5144227646 145 | 10.4,12.0049261384 146 | 11.4,11.6056183588 147 | 10.3,13.7019842019 148 | 13.2,18.4461409171 149 | 25.4,18.5935045024 150 | 10.9,8.83898588558 151 | 10.1,9.15748137648 152 | 16.1,20.3761285187 153 | 11.6,12.7845270415 154 | 16.6,16.4258336987 155 | 19.0,15.1756200553 156 | 15.6,15.9599746224 157 | 3.2,7.2274937749 158 | 15.3,11.4962840858 159 | 10.1,14.153582286 160 | 7.3,7.58877224219 161 | 12.9,13.2931690942 162 | 14.4,15.2326640238 163 | 13.3,11.1064836342 164 | 14.9,15.9884966067 165 | 18.0,14.8048342599 166 | 11.9,12.6038878079 167 | 11.9,18.1799357307 168 | 8.0,7.88349941288 169 | 12.2,16.8631707907 170 | 17.1,17.2719858984 171 | 15.0,20.5472604242 172 | 8.4,9.40942557078 173 | 14.5,14.8523709004 174 | 7.6,7.96431170161 175 | 11.7,15.037763798 176 | 11.5,17.6047423814 177 | 27.0,20.195489285 178 | 20.2,18.8406950327 179 | 11.7,15.1233297508 180 | 11.8,20.1859819569 181 | 12.6,14.9046612048 182 | 10.5,14.4768314409 183 | 12.2,17.4193494837 184 | 8.7,9.70415274146 185 | 26.2,20.7041313377 186 | 17.6,19.097392891 187 | 22.6,16.7776048379 188 | 10.3,13.6639548895 189 | 17.3,16.1168455359 190 | 15.9,20.628072713 191 | 6.7,7.92152872523 192 | 10.8,8.91029084623 193 | 9.9,10.6216099018 194 | 5.9,7.85022376458 195 | 19.6,14.9617051734 196 | 17.3,14.148828622 197 | 7.6,8.84849321367 198 | 9.7,11.5105450779 199 | 12.8,15.4465789058 200 | 25.5,20.5139847759 201 | 13.4,18.0658477936 202 | -------------------------------------------------------------------------------- /Chapter04/linear_regression/example1/Advertising.csv: -------------------------------------------------------------------------------- 1 | TV,Radio,Newspaper,Sales 2 | 230.1,37.8,69.2,22.1 3 | 44.5,39.3,45.1,10.4 4 | 17.2,45.9,69.3,9.3 5 | 151.5,41.3,58.5,18.5 6 | 180.8,10.8,58.4,12.9 7 | 8.7,48.9,75,7.2 8 | 57.5,32.8,23.5,11.8 9 | 120.2,19.6,11.6,13.2 10 | 8.6,2.1,1,4.8 11 | 199.8,2.6,21.2,10.6 12 | 66.1,5.8,24.2,8.6 13 | 214.7,24,4,17.4 14 | 23.8,35.1,65.9,9.2 15 | 97.5,7.6,7.2,9.7 16 | 204.1,32.9,46,19 17 | 195.4,47.7,52.9,22.4 18 | 67.8,36.6,114,12.5 19 | 281.4,39.6,55.8,24.4 20 | 69.2,20.5,18.3,11.3 21 | 147.3,23.9,19.1,14.6 22 | 218.4,27.7,53.4,18 23 | 237.4,5.1,23.5,12.5 24 | 13.2,15.9,49.6,5.6 25 | 228.3,16.9,26.2,15.5 26 | 62.3,12.6,18.3,9.7 27 | 262.9,3.5,19.5,12 28 | 142.9,29.3,12.6,15 29 | 240.1,16.7,22.9,15.9 30 | 248.8,27.1,22.9,18.9 31 | 70.6,16,40.8,10.5 32 | 292.9,28.3,43.2,21.4 33 | 112.9,17.4,38.6,11.9 34 | 97.2,1.5,30,9.6 35 | 265.6,20,0.3,17.4 36 | 95.7,1.4,7.4,9.5 37 | 290.7,4.1,8.5,12.8 38 | 266.9,43.8,5,25.4 39 | 74.7,49.4,45.7,14.7 40 | 43.1,26.7,35.1,10.1 41 | 228,37.7,32,21.5 42 | 202.5,22.3,31.6,16.6 43 | 177,33.4,38.7,17.1 44 | 293.6,27.7,1.8,20.7 45 | 206.9,8.4,26.4,12.9 46 | 25.1,25.7,43.3,8.5 47 | 175.1,22.5,31.5,14.9 48 | 89.7,9.9,35.7,10.6 49 | 239.9,41.5,18.5,23.2 50 | 227.2,15.8,49.9,14.8 51 | 66.9,11.7,36.8,9.7 52 | 199.8,3.1,34.6,11.4 53 | 100.4,9.6,3.6,10.7 54 | 216.4,41.7,39.6,22.6 55 | 182.6,46.2,58.7,21.2 56 | 262.7,28.8,15.9,20.2 57 | 198.9,49.4,60,23.7 58 | 7.3,28.1,41.4,5.5 59 | 136.2,19.2,16.6,13.2 60 | 210.8,49.6,37.7,23.8 61 | 210.7,29.5,9.3,18.4 62 | 53.5,2,21.4,8.1 63 | 261.3,42.7,54.7,24.2 64 | 239.3,15.5,27.3,15.7 65 | 102.7,29.6,8.4,14 66 | 131.1,42.8,28.9,18 67 | 69,9.3,0.9,9.3 68 | 31.5,24.6,2.2,9.5 69 | 139.3,14.5,10.2,13.4 70 | 237.4,27.5,11,18.9 71 | 216.8,43.9,27.2,22.3 72 | 199.1,30.6,38.7,18.3 73 | 109.8,14.3,31.7,12.4 74 | 26.8,33,19.3,8.8 75 | 129.4,5.7,31.3,11 76 | 213.4,24.6,13.1,17 77 | 16.9,43.7,89.4,8.7 78 | 27.5,1.6,20.7,6.9 79 | 120.5,28.5,14.2,14.2 80 | 5.4,29.9,9.4,5.3 81 | 116,7.7,23.1,11 82 | 76.4,26.7,22.3,11.8 83 | 239.8,4.1,36.9,12.3 84 | 75.3,20.3,32.5,11.3 85 | 68.4,44.5,35.6,13.6 86 | 213.5,43,33.8,21.7 87 | 193.2,18.4,65.7,15.2 88 | 76.3,27.5,16,12 89 | 110.7,40.6,63.2,16 90 | 88.3,25.5,73.4,12.9 91 | 109.8,47.8,51.4,16.7 92 | 134.3,4.9,9.3,11.2 93 | 28.6,1.5,33,7.3 94 | 217.7,33.5,59,19.4 95 | 250.9,36.5,72.3,22.2 96 | 107.4,14,10.9,11.5 97 | 163.3,31.6,52.9,16.9 98 | 197.6,3.5,5.9,11.7 99 | 184.9,21,22,15.5 100 | 289.7,42.3,51.2,25.4 101 | 135.2,41.7,45.9,17.2 102 | 222.4,4.3,49.8,11.7 103 | 296.4,36.3,100.9,23.8 104 | 280.2,10.1,21.4,14.8 105 | 187.9,17.2,17.9,14.7 106 | 238.2,34.3,5.3,20.7 107 | 137.9,46.4,59,19.2 108 | 25,11,29.7,7.2 109 | 90.4,0.3,23.2,8.7 110 | 13.1,0.4,25.6,5.3 111 | 255.4,26.9,5.5,19.8 112 | 225.8,8.2,56.5,13.4 113 | 241.7,38,23.2,21.8 114 | 175.7,15.4,2.4,14.1 115 | 209.6,20.6,10.7,15.9 116 | 78.2,46.8,34.5,14.6 117 | 75.1,35,52.7,12.6 118 | 139.2,14.3,25.6,12.2 119 | 76.4,0.8,14.8,9.4 120 | 125.7,36.9,79.2,15.9 121 | 19.4,16,22.3,6.6 122 | 141.3,26.8,46.2,15.5 123 | 18.8,21.7,50.4,7 124 | 224,2.4,15.6,11.6 125 | 123.1,34.6,12.4,15.2 126 | 229.5,32.3,74.2,19.7 127 | 87.2,11.8,25.9,10.6 128 | 7.8,38.9,50.6,6.6 129 | 80.2,0,9.2,8.8 130 | 220.3,49,3.2,24.7 131 | 59.6,12,43.1,9.7 132 | 0.7,39.6,8.7,1.6 133 | 265.2,2.9,43,12.7 134 | 8.4,27.2,2.1,5.7 135 | 219.8,33.5,45.1,19.6 136 | 36.9,38.6,65.6,10.8 137 | 48.3,47,8.5,11.6 138 | 25.6,39,9.3,9.5 139 | 273.7,28.9,59.7,20.8 140 | 43,25.9,20.5,9.6 141 | 184.9,43.9,1.7,20.7 142 | 73.4,17,12.9,10.9 143 | 193.7,35.4,75.6,19.2 144 | 220.5,33.2,37.9,20.1 145 | 104.6,5.7,34.4,10.4 146 | 96.2,14.8,38.9,11.4 147 | 140.3,1.9,9,10.3 148 | 240.1,7.3,8.7,13.2 149 | 243.2,49,44.3,25.4 150 | 38,40.3,11.9,10.9 151 | 44.7,25.8,20.6,10.1 152 | 280.7,13.9,37,16.1 153 | 121,8.4,48.7,11.6 154 | 197.6,23.3,14.2,16.6 155 | 171.3,39.7,37.7,19 156 | 187.8,21.1,9.5,15.6 157 | 4.1,11.6,5.7,3.2 158 | 93.9,43.5,50.5,15.3 159 | 149.8,1.3,24.3,10.1 160 | 11.7,36.9,45.2,7.3 161 | 131.7,18.4,34.6,12.9 162 | 172.5,18.1,30.7,14.4 163 | 85.7,35.8,49.3,13.3 164 | 188.4,18.1,25.6,14.9 165 | 163.5,36.8,7.4,18 166 | 117.2,14.7,5.4,11.9 167 | 234.5,3.4,84.8,11.9 168 | 17.9,37.6,21.6,8 169 | 206.8,5.2,19.4,12.2 170 | 215.4,23.6,57.6,17.1 171 | 284.3,10.6,6.4,15 172 | 50,11.6,18.4,8.4 173 | 164.5,20.9,47.4,14.5 174 | 19.6,20.1,17,7.6 175 | 168.4,7.1,12.8,11.7 176 | 222.4,3.4,13.1,11.5 177 | 276.9,48.9,41.8,27 178 | 248.4,30.2,20.3,20.2 179 | 170.2,7.8,35.2,11.7 180 | 276.7,2.3,23.7,11.8 181 | 165.6,10,17.6,12.6 182 | 156.6,2.6,8.3,10.5 183 | 218.5,5.4,27.4,12.2 184 | 56.2,5.7,29.7,8.7 185 | 287.6,43,71.8,26.2 186 | 253.8,21.3,30,17.6 187 | 205,45.1,19.6,22.6 188 | 139.5,2.1,26.6,10.3 189 | 191.1,28.7,18.2,17.3 190 | 286,13.9,3.7,15.9 191 | 18.7,12.1,23.4,6.7 192 | 39.5,41.1,5.8,10.8 193 | 75.5,10.8,6,9.9 194 | 17.2,4.1,31.6,5.9 195 | 166.8,42,3.6,19.6 196 | 149.7,35.6,6,17.3 197 | 38.2,3.7,13.8,7.6 198 | 94.2,4.9,8.1,9.7 199 | 177,9.3,6.4,12.8 200 | 283.6,42,66.2,25.5 201 | 232.1,8.6,8.7,13.4 202 | --------------------------------------------------------------------------------