├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── cmd ├── max │ └── max.go ├── mean │ └── mean.go ├── min │ └── min.go ├── stats │ └── stats.go ├── std │ └── std.go ├── sum │ └── sum.go └── var │ └── var.go ├── stats.go ├── test ├── example1.dat ├── example2.dat └── example3.dat └── tool.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.tar.gz 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Jonhnny Weslley 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=stats-tools 2 | VERSION=0.0.1 3 | LDFLAGS="-X stats.programVersion=$(VERSION)" 4 | 5 | all: test 6 | 7 | deps: 8 | go get ./... 9 | 10 | install: deps 11 | go install -a -v -ldflags $(LDFLAGS) ./cmd/... 12 | 13 | test: deps 14 | go test -v ./... 15 | 16 | qa: 17 | go vet 18 | golint 19 | go test -coverprofile=.cover~ 20 | go tool cover -html=.cover~ 21 | 22 | dist: 23 | @for os in linux darwin; do \ 24 | for arch in 386 amd64; do \ 25 | target=$(PROGRAM)-$$os-$$arch-$(VERSION); \ 26 | echo Building $$target; \ 27 | mkdir $$target; \ 28 | cp ./README.md ./LICENSE $$target; \ 29 | for tool in $$(ls ./cmd); do \ 30 | GOOS=$$os GOARCH=$$arch go build -ldflags $(LDFLAGS) -o $$target/$$tool ./cmd/$$tool ; \ 31 | done; \ 32 | tar -zcf $$target.tar.gz $$target; \ 33 | rm -rf $$target; \ 34 | done \ 35 | done 36 | 37 | clean: 38 | rm -rf *.tar.gz 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stats-tools 2 | 3 | A set of command-line statistics tools. 4 | 5 | 6 | ## Installation 7 | 8 | [Download](https://github.com/jweslley/stats-tools/releases) the binaries and put them somewhere in your path. 9 | 10 | 11 | ### From source 12 | 13 | git clone git://github.com/jweslley/stats-tools.git 14 | cd stats-tools 15 | make 16 | 17 | 18 | ## Utilities 19 | 20 | * **min** - Calculate the minimum of a number sequence 21 | * **max** - Calculate the maximum of a number sequence 22 | * **mean** - Calculate the mean of a number sequence 23 | * **std** - Calculate the standard deviation of a number sequence 24 | * **var** - Calculate the variance of a number sequence 25 | * **sum** - Calculate the sum of a number sequence 26 | * **stats** - Output a summary table including mean, median, mininum, maximum, standard deviation, variance and number count of a number sequence 27 | 28 | 29 | ## Usage 30 | 31 | All utilities take as input a file in table format to perform some calculation based on it. A tipical input file is shown below: 32 | 33 | 1 2 4 34 | 3 5 4 35 | 6 4 6 36 | 4 5 6 37 | 9 12 16 38 | 39 | Considering this input file, let's call it `example1.dat`, you can calculate some statistics like: 40 | 41 | The `max` value on the first column: 42 | 43 | max example1.dat 44 | 45 | The `min` value on the second column: 46 | 47 | min -c 2 example1.dat 48 | 49 | 50 | If the input file's columns are separated by another character instead of whitespace characters, like CSV files, you can use the `-s` option to denote this. The next example outputs a statistical `summary` about the second column of the following file (`example2.dat`): 51 | 52 | "A",10,12 53 | "A",11,14 54 | "B",5,8 55 | "B",6,10 56 | "A",10.5,13 57 | "B",7,11 58 | 59 | Calculating the summary: 60 | 61 | stats -c 2 -s , example2.dat 62 | 63 | Commonly, data files may contain a head, i.e., the first line describes the columns, something like the `example3.dat` file showed below: 64 | 65 | Year,Make,Model,Description,Price 66 | 1997,Ford,E350,"ac abs moon",3000.00 67 | 1999,Chevy,"Venture ""Extended Edition""","",4900.00 68 | 1999,Chevy,"Venture ""Extended Edition, Very Large""","",5000.00 69 | 1996,Jeep,Grand Cherokee,"MUST SELL!air, moon roof, loaded",4799.00 70 | 71 | The `-b` option remove the first line from calculations. In this case, the mean price of the cars is given by: 72 | 73 | mean -b -s, -c5 test/example3.dat 74 | 75 | 76 | ### Piping data 77 | 78 | All `stats-tools` read data from standard input if no file is passed to them. The following command calculates the max value on the second column containing the word `bar` in the file `foo.dat`: 79 | 80 | grep bar foo.dat | max -c 2 81 | 82 | 83 | ## Bugs and Feedback 84 | 85 | If you discover any bugs or have some idea, feel free to create an issue on GitHub: 86 | 87 | http://github.com/jweslley/stats-tools/issues 88 | 89 | 90 | ## License 91 | 92 | MIT license. Copyright (c) 2011 Jonhnny Weslley 93 | 94 | See the LICENSE file provided with the source distribution for full details. 95 | -------------------------------------------------------------------------------- /cmd/max/max.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/jweslley/stats-tools" 6 | ) 7 | 8 | func main() { 9 | stats.Tool("Calculate the maximum of a number sequence", func(s *stats.Stats) { 10 | fmt.Println(s.Max()) 11 | }) 12 | } 13 | -------------------------------------------------------------------------------- /cmd/mean/mean.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/jweslley/stats-tools" 6 | ) 7 | 8 | func main() { 9 | stats.Tool("Calculate the mean of a number sequence", func(s *stats.Stats) { 10 | fmt.Println(s.Mean()) 11 | }) 12 | } 13 | -------------------------------------------------------------------------------- /cmd/min/min.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/jweslley/stats-tools" 6 | ) 7 | 8 | func main() { 9 | stats.Tool("Calculate the minimum of a number sequence", func(s *stats.Stats) { 10 | fmt.Println(s.Min()) 11 | }) 12 | } 13 | -------------------------------------------------------------------------------- /cmd/stats/stats.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/jweslley/stats-tools" 6 | ) 7 | 8 | func main() { 9 | stats.Tool("Output a summary table including mean, median, mininum, maximum, standard deviation, variance and number count of a number sequence", func(s *stats.Stats) { 10 | fmt.Printf("Min: %.6f\nMean: %.6f\nMax: %.6f\nStdDev: %.6f\nVar: %.6f\nCount: %d\n", 11 | s.Min(), s.Mean(), s.Max(), s.StdDev(), s.Variance(), s.Count()) 12 | }) 13 | } 14 | -------------------------------------------------------------------------------- /cmd/std/std.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/jweslley/stats-tools" 6 | ) 7 | 8 | func main() { 9 | stats.Tool("Calculate the standard deviation of a number sequence", func(s *stats.Stats) { 10 | fmt.Println(s.StdDev()) 11 | }) 12 | } 13 | -------------------------------------------------------------------------------- /cmd/sum/sum.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/jweslley/stats-tools" 6 | ) 7 | 8 | func main() { 9 | stats.Tool("Calculate the sum of a number sequence", func(s *stats.Stats) { 10 | fmt.Println(s.Sum()) 11 | }) 12 | } 13 | -------------------------------------------------------------------------------- /cmd/var/var.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/jweslley/stats-tools" 6 | ) 7 | 8 | func main() { 9 | stats.Tool("Calculate the variance of a number sequence", func(s *stats.Stats) { 10 | fmt.Println(s.Variance()) 11 | }) 12 | } 13 | -------------------------------------------------------------------------------- /stats.go: -------------------------------------------------------------------------------- 1 | package stats 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | func NewStats() *Stats { 8 | return &Stats{ 9 | max: -math.MaxFloat64, 10 | min: math.MaxFloat64, 11 | variance: [2]float64{-1, 0}, 12 | } 13 | } 14 | 15 | type Stats struct { 16 | count int64 17 | sum, min, max float64 18 | variance [2]float64 19 | } 20 | 21 | func (s *Stats) Count() int64 { 22 | return s.count 23 | } 24 | 25 | func (s *Stats) Sum() float64 { 26 | return s.sum 27 | } 28 | 29 | func (s *Stats) Mean() float64 { 30 | if s.count == 0 { 31 | return 0 32 | } 33 | return s.sum / float64(s.count) 34 | } 35 | 36 | func (s *Stats) Min() float64 { 37 | if s.count == 0 { 38 | return 0 39 | } 40 | return s.min 41 | } 42 | 43 | func (s *Stats) Max() float64 { 44 | if s.count == 0 { 45 | return 0 46 | } 47 | return s.max 48 | } 49 | 50 | func (s *Stats) StdDev() float64 { 51 | return math.Sqrt(s.Variance()) 52 | } 53 | 54 | func (s *Stats) Variance() float64 { 55 | if s.count <= 1 { 56 | return 0 57 | } 58 | return s.variance[1] / float64(s.count-1) 59 | } 60 | 61 | func (s *Stats) Update(v float64) { 62 | s.count++ 63 | s.sum += v 64 | if v < s.min { 65 | s.min = v 66 | } 67 | if v > s.max { 68 | s.max = v 69 | } 70 | if s.variance[0] == -1 { 71 | s.variance[0] = v 72 | s.variance[1] = 0.0 73 | } else { 74 | v0 := s.variance[0] 75 | v1 := s.variance[1] 76 | s.variance[0] = v0 + (v-v0)/float64(s.count) 77 | s.variance[1] = v1 + (v-v0)*(v-s.variance[0]) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /test/example1.dat: -------------------------------------------------------------------------------- 1 | 1 2 4 2 | 3 5 4 3 | 6 4 6 4 | 4 5 6 5 | 9 12 16 6 | -------------------------------------------------------------------------------- /test/example2.dat: -------------------------------------------------------------------------------- 1 | "A",10,12 2 | "A",11,14 3 | "B",5,8 4 | "B",6,10 5 | "A",10.5,13 6 | "B",7,11 7 | -------------------------------------------------------------------------------- /test/example3.dat: -------------------------------------------------------------------------------- 1 | Year,Make,Model,Description,Price 2 | 1997,Ford,E350,"ac abs moon",3000.00 3 | 1999,Chevy,"Venture ""Extended Edition""","",4900.00 4 | 1999,Chevy,"Venture ""Extended Edition, Very Large""","",5000.00 5 | 1996,Jeep,Grand Cherokee,"MUST SELL!air, moon roof, loaded",4799.00 6 | -------------------------------------------------------------------------------- /tool.go: -------------------------------------------------------------------------------- 1 | package stats 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "os" 8 | "strconv" 9 | "unicode/utf8" 10 | 11 | flag "github.com/ogier/pflag" 12 | ) 13 | 14 | const programVersion = "0.0.1" 15 | 16 | var ( 17 | ignore = flag.BoolP("ignore", "i", false, "ignore invalid numbers") 18 | behead = flag.BoolP("behead", "b", false, 19 | "remove the first line (head) from calculations. Useful to ignore column names") 20 | separator = flag.StringP("separator", "s", " ", 21 | "define the SEPARATOR to use instead of whitespace for column separator") 22 | column = flag.IntP("column", "c", 1, "calculate stats based on the specified COLUMN") 23 | version = flag.BoolP("version", "v", false, "print version information and exit") 24 | ) 25 | 26 | func fail(format string, v ...interface{}) { 27 | fmt.Fprintf(os.Stderr, format, v...) 28 | os.Exit(1) 29 | } 30 | 31 | func calculate(s *Stats) { 32 | if len(flag.Args()) == 0 { 33 | parse("", os.Stdin, s) 34 | } 35 | 36 | for _, filename := range flag.Args() { 37 | if filename == "-" { 38 | parse("", os.Stdin, s) 39 | continue 40 | } 41 | file, err := os.Open(filename) 42 | if err != nil { 43 | fail("%s\n", err.Error()) 44 | } 45 | parse(filename, file, s) 46 | } 47 | } 48 | 49 | func parse(filename string, input *os.File, s *Stats) { 50 | r := csv.NewReader(input) 51 | sep, _ := utf8.DecodeRuneInString(*separator) 52 | r.Comma = sep 53 | var line int64 54 | for { 55 | line += 1 56 | record, err := r.Read() 57 | if err == io.EOF { 58 | return 59 | } 60 | if err != nil { 61 | fail("An error occurred while reading the file %s: %+v\n", filename, err) 62 | } 63 | if line == 1 && *behead { 64 | continue 65 | } 66 | if *column > len(record) { 67 | fail("Invalid column number: %d", *column) 68 | } 69 | value, err := strconv.ParseFloat(record[*column-1], 64) 70 | if err != nil { 71 | if *ignore { 72 | continue 73 | } else { 74 | fail("Invalid number found in file %s at line %d: %s\n", filename, line, record[*column-1]) 75 | } 76 | } 77 | s.Update(value) 78 | } 79 | } 80 | 81 | func Tool(desc string, outputter func(s *Stats)) { 82 | flag.Usage = func() { 83 | fmt.Fprintf(os.Stderr, "Usage: %s [OPTION]... [FILE]...\n", os.Args[0]) 84 | fmt.Fprintf(os.Stderr, "%s\n\n", desc) 85 | flag.PrintDefaults() 86 | fmt.Fprintln(os.Stderr, "\nWith no FILE, or when FILE is -, read standard input.") 87 | } 88 | flag.Parse() 89 | 90 | if *version { 91 | fmt.Printf("%s %s\n", os.Args[0], programVersion) 92 | return 93 | } 94 | 95 | s := NewStats() 96 | calculate(s) 97 | outputter(s) 98 | } 99 | --------------------------------------------------------------------------------