├── .travis.yml ├── LICENSE.md ├── README.md ├── assoc.go ├── assoc_test.go ├── doc.go ├── example_test.go ├── go.mod ├── script.go ├── script_test.go ├── value.go └── value_test.go /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | language: go 4 | 5 | go: 6 | - 1.x 7 | - master 8 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright © 2016, Scott Pakin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | awk 2 | === 3 | 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/spakin/awk)](https://goreportcard.com/report/github.com/spakin/awk) [![Build Status](https://travis-ci.com/spakin/awk.svg?branch=master)](https://travis-ci.com/spakin/awk) [![Go project version](https://badge.fury.io/go/github.com%2Fspakin%2Fawk.svg)](https://badge.fury.io/go/github.com%2Fspakin%2Fawk) [![GoDoc](https://godoc.org/github.com/spakin/awk?status.svg)](https://godoc.org/github.com/spakin/awk) 5 | 6 | Description 7 | ----------- 8 | 9 | `awk` is a package for the [Go programming language](https://golang.org/) that provides an [AWK](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html)-style text processing capability. The package facilitates splitting an input stream into records (default: newline-separated lines) and fields (default: whitespace-separated columns) then applying a sequence of statements of the form "if 〈_pattern_〉 then 〈_action_〉" to each record in turn. For example, the following is a complete Go program that adds up the first two columns of a [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) file to produce a third column: 10 | ```Go 11 | package main 12 | 13 | import ( 14 | "github.com/spakin/awk" 15 | "os" 16 | ) 17 | 18 | func main() { 19 | s := awk.NewScript() 20 | s.Begin = func(s *awk.Script) { 21 | s.SetFS(",") 22 | s.SetOFS(",") 23 | } 24 | s.AppendStmt(nil, func(s *awk.Script) { 25 | s.SetF(3, s.NewValue(s.F(1).Int()+s.F(2).Int())) 26 | s.Println() 27 | }) 28 | s.Run(os.Stdin) 29 | } 30 | ``` 31 | 32 | In the above, the `awk` package handles all the mundane details such as reading lines from the file, checking for EOF, splitting lines into columns, handling errors, and other such things. With the help of `awk`, Go easily can be applied to the sorts of text-processing tasks that one would normally implement in a scripting language but without sacrificing Go's speed, safety, or flexibility. 33 | 34 | Installation 35 | ------------ 36 | 37 | The `awk` package has opted into the [Go module system](https://blog.golang.org/using-go-modules) so installation is in fact unnecessary if your program or package has done likewise. Otherwise, a traditional 38 | ```bash 39 | go get github.com/spakin/awk 40 | ``` 41 | will install the package. 42 | 43 | Documentation 44 | ------------- 45 | 46 | Descriptions and examples of the `awk` API can be found online in the [GoDoc documentation of package `awk`](https://godoc.org/github.com/spakin/awk). 47 | 48 | Author 49 | ------ 50 | 51 | [Scott Pakin](http://www.pakin.org/~scott/), *scott+awk@pakin.org* 52 | -------------------------------------------------------------------------------- /assoc.go: -------------------------------------------------------------------------------- 1 | // This file defines an AWK-like associative array, ValueArray. 2 | 3 | package awk 4 | 5 | import ( 6 | "strings" 7 | ) 8 | 9 | // A ValueArray maps Values to Values. 10 | type ValueArray struct { 11 | script *Script // Pointer to the script that produced this value 12 | data map[string]*Value // The associative array proper 13 | } 14 | 15 | // NewValueArray creates and returns an associative array of Values. 16 | func (s *Script) NewValueArray() *ValueArray { 17 | return &ValueArray{ 18 | script: s, 19 | data: make(map[string]*Value), 20 | } 21 | } 22 | 23 | // Set (index, value) assigns a Value to an index of a ValueArray. Multiple 24 | // indexes can be specified to simulate multidimensional arrays. (In fact, the 25 | // indexes are concatenated into a single string with intervening Script.SubSep 26 | // characters.) The final argument is always the value to assign. Arguments 27 | // can be provided either as Values or as any types that can be converted to 28 | // Values. 29 | func (va *ValueArray) Set(args ...interface{}) { 30 | // Ensure we were given at least one index and a value. 31 | if len(args) < 2 { 32 | panic("ValueArray.Set requires at least one index and one value") 33 | } 34 | 35 | // Convert each argument to a Value. 36 | argVals := make([]*Value, len(args)) 37 | for i, arg := range args { 38 | v, ok := arg.(*Value) 39 | if !ok { 40 | v = va.script.NewValue(arg) 41 | } 42 | argVals[i] = v 43 | } 44 | 45 | // Handle the most common case: one index and one value. 46 | if len(args) == 2 { 47 | va.data[argVals[0].String()] = argVals[1] 48 | return 49 | } 50 | 51 | // Merge the indexes into a single string. 52 | idxStrs := make([]string, len(argVals)-1) 53 | for i, v := range argVals[:len(argVals)-1] { 54 | idxStrs[i] = v.String() 55 | } 56 | idx := strings.Join(idxStrs, va.script.SubSep) 57 | 58 | // Associate the final argument with the index string. 59 | va.data[idx] = argVals[len(argVals)-1] 60 | } 61 | 62 | // Get returns the Value associated with a given index into a ValueArray. 63 | // Multiple indexes can be specified to simulate multidimensional arrays. (In 64 | // fact, the indexes are concatenated into a single string with intervening 65 | // Script.SubSep characters.) The arguments can be provided either as Values 66 | // or as any types that can be converted to Values. If the index doesn't 67 | // appear in the array, a zero value is returned. 68 | func (va *ValueArray) Get(args ...interface{}) *Value { 69 | // Ensure we were given at least one index. 70 | if len(args) < 1 { 71 | panic("ValueArray.Get requires at least one index") 72 | } 73 | 74 | // Convert each argument to a Value. 75 | argVals := make([]*Value, len(args)) 76 | for i, arg := range args { 77 | v, ok := arg.(*Value) 78 | if !ok { 79 | v = va.script.NewValue(arg) 80 | } 81 | argVals[i] = v 82 | } 83 | 84 | // Handle the most common case: a single index. 85 | if len(args) == 1 { 86 | vv, found := va.data[argVals[0].String()] 87 | if !found { 88 | return va.script.NewValue("") 89 | } 90 | return vv 91 | } 92 | 93 | // Merge the indexes into a single string. 94 | idxStrs := make([]string, len(argVals)) 95 | for i, v := range argVals { 96 | idxStrs[i] = v.String() 97 | } 98 | idx := strings.Join(idxStrs, va.script.SubSep) 99 | 100 | // Look up the index in the associative array. 101 | vv, found := va.data[idx] 102 | if !found { 103 | return va.script.NewValue("") 104 | } 105 | return vv 106 | } 107 | 108 | // Delete deletes a key and associated value from a ValueArray. Multiple 109 | // indexes can be specified to simulate multidimensional arrays. (In fact, the 110 | // indexes are concatenated into a single string with intervening Script.SubSep 111 | // characters.) The arguments can be provided either as Values or as any types 112 | // that can be converted to Values. If no argument is provided, the entire 113 | // ValueArray is emptied. 114 | func (va *ValueArray) Delete(args ...interface{}) { 115 | // If we were given no arguments, delete the entire array. 116 | if args == nil { 117 | va.data = make(map[string]*Value) 118 | return 119 | } 120 | 121 | // Convert each argument to a Value. 122 | argVals := make([]*Value, len(args)) 123 | for i, arg := range args { 124 | v, ok := arg.(*Value) 125 | if !ok { 126 | v = va.script.NewValue(arg) 127 | } 128 | argVals[i] = v 129 | } 130 | 131 | // Handle the most common case: a single index. 132 | if len(args) == 1 { 133 | delete(va.data, argVals[0].String()) 134 | return 135 | } 136 | 137 | // Merge the indexes into a single string. 138 | idxStrs := make([]string, len(argVals)) 139 | for i, v := range argVals { 140 | idxStrs[i] = v.String() 141 | } 142 | idx := strings.Join(idxStrs, va.script.SubSep) 143 | 144 | // Delete the index from the associative array. 145 | delete(va.data, idx) 146 | } 147 | 148 | // Keys returns all keys in the associative array in undefined order. 149 | func (va *ValueArray) Keys() []*Value { 150 | keys := make([]*Value, 0, len(va.data)) 151 | for kstr := range va.data { 152 | keys = append(keys, va.script.NewValue(kstr)) 153 | } 154 | return keys 155 | } 156 | 157 | // Values returns all values in the associative array in undefined order. 158 | func (va *ValueArray) Values() []*Value { 159 | vals := make([]*Value, 0, len(va.data)) 160 | for _, v := range va.data { 161 | vals = append(vals, va.script.NewValue(v)) 162 | } 163 | return vals 164 | } 165 | -------------------------------------------------------------------------------- /assoc_test.go: -------------------------------------------------------------------------------- 1 | // This file tests operations on associative arrays 2 | 3 | package awk 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | // TestIntIntArray tests Get/Set operations on an associative array that 10 | // maps integers to integers. 11 | func TestIntIntArray(t *testing.T) { 12 | scr := NewScript() 13 | a := scr.NewValueArray() 14 | for i := 0; i < 10; i++ { 15 | a.Set(i, i*10) 16 | } 17 | for i := 9; i >= 0; i-- { 18 | got := a.Get(i).Int() 19 | if got != i*10 { 20 | t.Fatalf("Expected %d but received %d", i*10, got) 21 | } 22 | } 23 | } 24 | 25 | // TestValueValueArray tests Get/Set operations on an 26 | // associative array that maps Values to Values. 27 | func TestValueValueArray(t *testing.T) { 28 | scr := NewScript() 29 | a := scr.NewValueArray() 30 | for i := 0; i < 10; i++ { 31 | a.Set(scr.NewValue(i), scr.NewValue(i*10)) 32 | } 33 | for i := 9; i >= 0; i-- { 34 | got := a.Get(scr.NewValue(i)).Int() 35 | if got != i*10 { 36 | t.Fatalf("Expected %d but received %d", i*10, got) 37 | } 38 | } 39 | } 40 | 41 | // TestStringStringArray tests Get/Set operations on an associative array that 42 | // maps strings to strings. 43 | func TestStringStringArray(t *testing.T) { 44 | scr := NewScript() 45 | a := scr.NewValueArray() 46 | keys := []string{"The", "tree", "has", "entered", "my", "hands"} 47 | values := []string{"The", "sap", "has", "ascended", "my", "arms"} 48 | for i, k := range keys { 49 | a.Set(k, values[i]) 50 | } 51 | for i, k := range keys { 52 | want := values[i] 53 | got := a.Get(k).String() 54 | if got != want { 55 | t.Fatalf("Expected %q but received %q", want, got) 56 | } 57 | } 58 | } 59 | 60 | // TestMultiDimArray tests Get/Set operations on a "multidimensional" 61 | // associative array. 62 | func TestMultiDimArray(t *testing.T) { 63 | scr := NewScript() 64 | a := scr.NewValueArray() 65 | for i := 9; i >= 0; i-- { 66 | for j := 9; j >= 0; j-- { 67 | a.Set(i, j, i*10+j) 68 | } 69 | } 70 | for i := 0; i < 10; i++ { 71 | for j := 9; j >= 0; j-- { 72 | got := a.Get(i, j).Int() 73 | if got != i*10+j { 74 | t.Fatalf("Expected %d but received %d", i*10+j, got) 75 | } 76 | } 77 | } 78 | } 79 | 80 | // TestArrayKeys tests the Keys operation on an associative array. 81 | func TestArrayKeys(t *testing.T) { 82 | scr := NewScript() 83 | a := scr.NewValueArray() 84 | for i := 10; i <= 100; i += 10 { 85 | a.Set(i, i*2) 86 | } 87 | ksum := 0 88 | for _, k := range a.Keys() { 89 | ksum += k.Int() 90 | } 91 | if ksum != 550 { 92 | t.Fatalf("Expected 550 but received %d", ksum) 93 | } 94 | } 95 | 96 | // TestArrayValues tests the Values operation on an associative array. 97 | func TestArrayValues(t *testing.T) { 98 | scr := NewScript() 99 | a := scr.NewValueArray() 100 | for i := 10; i <= 100; i += 10 { 101 | a.Set(i, i*2) 102 | } 103 | vsum := 0 104 | for _, v := range a.Values() { 105 | vsum += v.Int() 106 | } 107 | if vsum != 1100 { 108 | t.Fatalf("Expected 1100 but received %d", vsum) 109 | } 110 | } 111 | 112 | // TestArrayDelete tests deleting an element from an associative array. 113 | func TestArrayDelete(t *testing.T) { 114 | // Create an array of values, then delete every other element. 115 | scr := NewScript() 116 | a := scr.NewValueArray() 117 | for i := 0; i <= 100; i++ { 118 | a.Set(i, i/2) 119 | } 120 | for i := 1; i <= 100; i += 2 { 121 | a.Delete(i) 122 | } 123 | vsum := 0 124 | for i := 0; i <= 100; i++ { 125 | vsum += a.Get(i).Int() 126 | } 127 | if vsum != 1275 { 128 | t.Fatalf("Expected 1275 but received %d", vsum) 129 | } 130 | 131 | // Empty the array and try again. 132 | a.Delete() 133 | vsum = 0 134 | for i := 0; i <= 100; i++ { 135 | vsum += a.Get(i).Int() 136 | } 137 | if vsum != 0 { 138 | t.Fatalf("Expected 0 but received %d", vsum) 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Package awk implements AWK-style processing of input streams. 4 | 5 | 6 | Introduction 7 | 8 | The awk package can be considered a shallow EDSL (embedded domain-specific 9 | language) for Go that facilitates text processing. It aims to implement 10 | the core semantics provided by 11 | AWK, a pattern scanning and processing language defined as part of the POSIX 12 | 1003.1 standard 13 | (http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html) and 14 | therefore part of all standard Linux/Unix distributions. 15 | 16 | AWK's forte is simple transformations of tabular data. For example, the 17 | following is a complete AWK program that reads an entire file from the standard 18 | input device, splits each file into whitespace-separated columns, and outputs 19 | all lines in which the fifth column is an odd number: 20 | 21 | $5 % 2 == 1 22 | 23 | Here's a typical Go analogue of that one-line AWK program: 24 | 25 | package main 26 | 27 | import ( 28 | "bufio" 29 | "fmt" 30 | "io" 31 | "os" 32 | "strconv" 33 | "strings" 34 | ) 35 | 36 | func main() { 37 | input := bufio.NewReader(os.Stdin) 38 | for { 39 | line, err := input.ReadString('\n') 40 | if err != nil { 41 | if err != io.EOF { 42 | panic(err) 43 | } 44 | break 45 | } 46 | scanner := bufio.NewScanner(strings.NewReader(line)) 47 | scanner.Split(bufio.ScanWords) 48 | cols := make([]string, 0, 10) 49 | for scanner.Scan() { 50 | cols = append(cols, scanner.Text()) 51 | } 52 | if err := scanner.Err(); err != nil { 53 | panic(err) 54 | } 55 | if len(cols) < 5 { 56 | continue 57 | } 58 | num, err := strconv.Atoi(cols[4]) 59 | if num%2 == 1 { 60 | fmt.Print(line) 61 | } 62 | } 63 | } 64 | 65 | The goal of the awk package is to emulate AWK's simplicity while simultaneously 66 | taking advantage of Go's speed, safety, and flexibility. With the awk package, 67 | the preceding code reduces to the following: 68 | 69 | package main 70 | 71 | import ( 72 | "github.com/spakin/awk" 73 | "os" 74 | ) 75 | 76 | func main() { 77 | s := awk.NewScript() 78 | s.AppendStmt(func(s *awk.Script) bool { return s.F(5).Int()%2 == 1 }, nil) 79 | if err := s.Run(os.Stdin); err != nil { 80 | panic(err) 81 | } 82 | } 83 | 84 | While not a one-liner like the original AWK program, the above is conceptually 85 | close to it. The AppendStmt method defines a script in terms of patterns and 86 | actions exactly as in the AWK program. The Run method then runs the script on 87 | an input stream, which can be any io.Reader. 88 | 89 | 90 | Usage 91 | 92 | For those programmers unfamiliar with AWK, an AWK program consists of a 93 | sequence of pattern/action pairs. Each pattern that matches a given line 94 | causes the corresponding action to be performed. AWK programs tend to be terse 95 | because AWK implicitly reads the input file, splits it into records (default: 96 | newline-terminated lines), and splits each record into fields (default: 97 | whitespace-separated columns), saving the programmer from having to express 98 | such operations explicitly. Furthermore, AWK provides a default pattern, which 99 | matches every record, and a default action, which outputs a record unmodified. 100 | 101 | The awk package attempts to mimic those semantics in Go. Basic usage consists 102 | of three steps: 103 | 104 | 1. Script allocation (awk.NewScript) 105 | 106 | 2. Script definition (Script.AppendStmt) 107 | 108 | 3. Script execution (Script.Run) 109 | 110 | In Step 2, AppendStmt is called once for each pattern/action pair that is to be 111 | appended to the script. The same script can be applied to multiple input 112 | streams by re-executing Step 3. Actions to be executed on every run of Step 3 113 | can be supplied by assigning the script's Begin and End fields. The Begin 114 | action is typically used to initialize script state by calling methods such as 115 | SetRS and SetFS and assigning user-defined data to the script's State field 116 | (what would be global variables in AWK). The End action is typically used to 117 | store or report final results. 118 | 119 | To mimic AWK's dynamic type system. the awk package provides the Value and 120 | ValueArray types. Value represents a scalar that can be coerced without error 121 | to a string, an int, or a float64. ValueArray represents a—possibly 122 | multidimensional—associative array of Values. 123 | 124 | Both patterns and actions can access the current record's fields via the 125 | script's F method, which takes a 1-based index and returns the corresponding 126 | field as a Value. An index of 0 returns the entire record as a Value. 127 | 128 | 129 | Features 130 | 131 | The following AWK features and GNU AWK extensions are currently supported by 132 | the awk package: 133 | 134 | • the basic pattern/action structure of an AWK script, including BEGIN and END 135 | rules and range patterns 136 | 137 | • control over record separation (RS), including regular expressions and null 138 | strings (implying blank lines as separators) 139 | 140 | • control over field separation (FS), including regular expressions and null 141 | strings (implying single-character fields) 142 | 143 | • fixed-width fields (FIELDWIDTHS) 144 | 145 | • fields defined by a regular expression (FPAT) 146 | 147 | • control over case-sensitive vs. case-insensitive comparisons (IGNORECASE) 148 | 149 | • control over the number conversion format (CONVFMT) 150 | 151 | • automatic enumeration of records (NR) and fields (NR) 152 | 153 | • "weak typing" 154 | 155 | • multidimensional associative arrays 156 | 157 | • premature termination of record processing (next) and script processing (exit) 158 | 159 | • explicit record reading (getline) from either the current stream or 160 | a specified stream 161 | 162 | • maintenance of regular-expression status variables (RT, RSTART, and RLENGTH) 163 | 164 | For more information about AWK and its features, see the awk(1) manual page on 165 | any Linux/Unix system (available online from, e.g., 166 | http://linux.die.net/man/1/awk) or read the book, "The AWK Programming 167 | Language" by Aho, Kernighan, and Weinberger. 168 | 169 | 170 | Examples 171 | 172 | A number of examples ported from the POSIX 1003.1 standard document 173 | (http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html) are 174 | presented below. 175 | 176 | */ 177 | package awk 178 | -------------------------------------------------------------------------------- /example_test.go: -------------------------------------------------------------------------------- 1 | // This file presents some examples of awk package usage. 2 | 3 | package awk_test 4 | 5 | import ( 6 | "fmt" 7 | "github.com/spakin/awk" 8 | "os" 9 | "sort" 10 | ) 11 | 12 | var s *awk.Script 13 | 14 | // Write to the standard output all input lines for which field 3 is 15 | // greater than 5 (AWK: $3 > 5). 16 | func Example_01() { 17 | s.AppendStmt(func(s *awk.Script) bool { return s.F(3).Int() > 5 }, nil) 18 | } 19 | 20 | // Write every tenth line (AWK: (NR % 10) == 0). 21 | func Example_02() { 22 | s.AppendStmt(func(s *awk.Script) bool { return s.NR%10 == 0 }, nil) 23 | } 24 | 25 | // Write any line with a substring containing a 'G' or 'D', followed by a 26 | // sequence of digits and characters (AWK: 27 | // /(G|D)([[:digit:][:alpha:]]*)/). This example uses character classes digit 28 | // and alpha to match language-independent digit and alphabetic characters 29 | // respectively. 30 | func Example_04() { 31 | s.AppendStmt(func(s *awk.Script) bool { return s.F(0).Match("(G|D)([[:digit:][:alpha:]]*)") }, nil) 32 | } 33 | 34 | // Write any line in which the second field matches the regular expression 35 | // "xyz" and the fourth field does not (AWK: $2 ~ /xyz/ && $4 !~ /xyz/). 36 | func Example_05() { 37 | s.AppendStmt(func(s *awk.Script) bool { 38 | return s.F(2).Match("xyz") && !s.F(4).Match("xyz") 39 | }, nil) 40 | } 41 | 42 | // Write any line in which the second field contains a backslash (AWK: $2 ~ 43 | // /\\/). 44 | func Example_06() { 45 | s.AppendStmt(func(s *awk.Script) bool { return s.F(2).Match(`\\`) }, nil) 46 | } 47 | 48 | // Write the second to the last and the last field in each line. Separate the 49 | // fields by a colon (AWK: {OFS=":"; print $(NF-1), $NF}). 50 | func Example_08() { 51 | s.AppendStmt(nil, func(s *awk.Script) { fmt.Printf("%v:%v\n", s.F(s.NF-1), s.F(s.NF)) }) 52 | } 53 | 54 | // Write the line number and number of fields in each line (AWK: {print NR ":" 55 | // NF}). The three strings representing the line number, the colon, and the 56 | // number of fields are concatenated and that string is written to standard 57 | // output. 58 | func Example_09() { 59 | s.AppendStmt(nil, func(s *awk.Script) { fmt.Printf("%d:%d\n", s.NR, s.NF) }) 60 | } 61 | 62 | // Write lines longer than 72 characters (AWK: length($0) > 72). 63 | func Example_10() { 64 | s.AppendStmt(func(s *awk.Script) bool { return len(s.F(0).String()) > 72 }, nil) 65 | } 66 | 67 | // Write the first two fields in opposite order (AWK: {print $2, $1}). 68 | func Example_11() { 69 | s.AppendStmt(nil, func(s *awk.Script) { s.Println(s.F(2), s.F(1)) }) 70 | } 71 | 72 | // Do the same as Example 11, with input fields separated by a comma, space and 73 | // tab characters, or both (AWK: 74 | // 75 | // BEGIN { FS = ",[ \t]*|[ \t]+" } 76 | // { print $2, $1 } 77 | // 78 | // ). 79 | func Example_12() { 80 | s.Begin = func(s *awk.Script) { s.SetFS(",[ \t]*|[ \t]+") } 81 | s.AppendStmt(nil, func(s *awk.Script) { s.Println(s.F(2), s.F(1)) }) 82 | } 83 | 84 | // Add up the first column and print the sum and average (AWK: 85 | // 86 | // {s += $1 } 87 | // END {print "sum is", s, "average is", s/NR} 88 | // 89 | // ). 90 | func Example_13() { 91 | s.Begin = func(s *awk.Script) { s.State = 0.0 } 92 | s.AppendStmt(nil, func(s *awk.Script) { s.State = s.State.(float64) + s.F(1).Float64() }) 93 | s.End = func(s *awk.Script) { 94 | sum := s.State.(float64) 95 | s.Println("sum is", sum, "average is", sum/float64(s.NR)) 96 | } 97 | } 98 | 99 | // Write fields in reverse order, one per line (many lines out for each line 100 | // in). AWK: {for (i = NF; i > 0; --i) print $i}. 101 | func Example_14() { 102 | s.AppendStmt(nil, func(s *awk.Script) { 103 | for i := s.NF; i > 0; i-- { 104 | s.Println(s.F(i)) 105 | } 106 | }) 107 | } 108 | 109 | // Write all lines between occurrences of the strings "start" and "stop" (AWK: 110 | // /start/, /stop/). This version of the Go code uses awk.Range to combine 111 | // begin and end functions into a match range. 112 | func Example_15a() { 113 | s.AppendStmt(awk.Range(func(s *awk.Script) bool { return s.F(1).Match("start") }, 114 | func(s *awk.Script) bool { return s.F(1).Match("stop") }), 115 | nil) 116 | } 117 | 118 | // Write all lines between occurrences of the strings "start" and "stop" (AWK: 119 | // /start/, /stop/). This version of the Go code uses awk.Auto to define the 120 | // begin and end conditions as simple regular-expression matches. 121 | func Example_15b() { 122 | s.AppendStmt(awk.Auto("start", "stop"), nil) 123 | } 124 | 125 | // Write all lines whose first field is different from the previous line's 126 | // first field (AWK: $1 != prev {print; prev = $1}). 127 | func Example_16() { 128 | s.State = s.NewValue("") 129 | s.AppendStmt(func(s *awk.Script) bool { return !s.F(1).StrEqual(s.State) }, 130 | func(s *awk.Script) { 131 | s.Println() 132 | s.State = s.F(1) 133 | }) 134 | } 135 | 136 | // For all rows of the form "Total: ", accumulate . Once all 137 | // rows have been read, output the grand total. 138 | func ExampleScript_AppendStmt() { 139 | s := awk.NewScript() 140 | s.State = 0.0 141 | s.AppendStmt(func(s *awk.Script) bool { return s.NF == 2 && s.F(1).StrEqual("Total:") }, 142 | func(s *awk.Script) { s.State = s.State.(float64) + s.F(2).Float64() }) 143 | s.End = func(s *awk.Script) { fmt.Printf("The grand total is %.2f\n", s.State.(float64)) } 144 | s.Run(os.Stdin) 145 | } 146 | 147 | // Output each line preceded by its line number. 148 | func ExampleScript_AppendStmt_nilPattern() { 149 | s := awk.NewScript() 150 | s.AppendStmt(nil, func(s *awk.Script) { fmt.Printf("%4d %v\n", s.NR, s.F(0)) }) 151 | s.Run(os.Stdin) 152 | } 153 | 154 | // Output only rows in which the first column contains a larger number than the 155 | // second column. 156 | func ExampleScript_AppendStmt_nilAction() { 157 | s := awk.NewScript() 158 | s.AppendStmt(func(s *awk.Script) bool { return s.F(1).Int() > s.F(2).Int() }, nil) 159 | s.Run(os.Stdin) 160 | } 161 | 162 | // Output all input lines that appear between "BEGIN" and "END" inclusive. 163 | func ExampleRange() { 164 | s := awk.NewScript() 165 | s.AppendStmt(awk.Range(func(s *awk.Script) bool { return s.F(1).StrEqual("BEGIN") }, 166 | func(s *awk.Script) bool { return s.F(1).StrEqual("END") }), 167 | nil) 168 | s.Run(os.Stdin) 169 | } 170 | 171 | // Extract the first column of the input into a slice of strings. 172 | func ExampleBegin() { 173 | var data []string 174 | s := awk.NewScript() 175 | s.Begin = func(s *awk.Script) { 176 | s.SetFS(",") 177 | data = make([]string, 0) 178 | } 179 | s.AppendStmt(nil, func(s *awk.Script) { data = append(data, s.F(1).String()) }) 180 | s.Run(os.Stdin) 181 | } 182 | 183 | // Output each line with its columns in reverse order. 184 | func ExampleScript_F() { 185 | s := awk.NewScript() 186 | s.AppendStmt(nil, func(s *awk.Script) { 187 | for i := s.NF; i > 0; i-- { 188 | if i > 1 { 189 | fmt.Printf("%v ", s.F(i)) 190 | } else { 191 | fmt.Printf("%v\n", s.F(i)) 192 | } 193 | } 194 | }) 195 | s.Run(os.Stdin) 196 | } 197 | 198 | // Allocate and populate a 2-D array. The diagonal is made up of strings while 199 | // the rest of the array consists of float64 values. 200 | func ExampleValueArray_Set() { 201 | va := s.NewValueArray() 202 | diag := []string{"Dasher", "Dancer", "Prancer", "Vixen", "Comet", "Cupid", "Dunder", "Blixem"} 203 | for i := 0; i < 8; i++ { 204 | for j := 0; j < 8; j++ { 205 | if i == j { 206 | va.Set(i, j, diag[i]) 207 | } else { 208 | va.Set(i, j, float64(i*8+j)/63.0) 209 | } 210 | } 211 | } 212 | } 213 | 214 | // Sort each line's columns, which are assumed to be floating-point numbers. 215 | func ExampleScript_FFloat64s() { 216 | s := awk.NewScript() 217 | s.AppendStmt(nil, func(s *awk.Script) { 218 | nums := s.FFloat64s() 219 | sort.Float64s(nums) 220 | for _, n := range nums[:len(nums)-1] { 221 | fmt.Printf("%.5g ", n) 222 | } 223 | fmt.Printf("%.5g\n", nums[len(nums)-1]) 224 | }) 225 | s.Run(os.Stdin) 226 | } 227 | 228 | // Delete the fifth line of the input stream but output all other lines. 229 | func ExampleAuto_int() { 230 | s := awk.NewScript() 231 | s.AppendStmt(awk.Auto(5), func(s *awk.Script) { s.Next() }) 232 | s.AppendStmt(nil, nil) 233 | s.Run(os.Stdin) 234 | } 235 | 236 | // Output only those lines containing the string, "fnord". 237 | func ExampleAuto_string() { 238 | s := awk.NewScript() 239 | s.AppendStmt(awk.Auto("fnord"), nil) 240 | s.Run(os.Stdin) 241 | } 242 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/spakin/awk 2 | 3 | go 1.14 4 | -------------------------------------------------------------------------------- /script.go: -------------------------------------------------------------------------------- 1 | // This file lets users define and execute AWK-like scripts within Go. 2 | 3 | package awk 4 | 5 | import ( 6 | "bufio" 7 | "errors" 8 | "fmt" 9 | "io" 10 | "os" 11 | "regexp" 12 | "strings" 13 | "unicode/utf8" 14 | ) 15 | 16 | // A scriptAborter is an error that causes the current script to abort but lets 17 | // the rest of the program run. 18 | type scriptAborter struct{ error } 19 | 20 | // A recordStopper is thrown when a script wants to continue immediately with 21 | // the next record. 22 | type recordStopper struct{ error } 23 | 24 | // A parseState indicates where we are in our parsing state. 25 | type parseState int 26 | 27 | // The following are the possibilities for a parseState. 28 | const ( 29 | notRunning parseState = iota // Before/after Run was called 30 | atBegin // Before any records are read 31 | inMiddle // While records are being read 32 | atEnd // After all records are read 33 | ) 34 | 35 | // A stopState describes premature stop conditions. 36 | type stopState int 37 | 38 | // The following are possibilities for a stopState. 39 | const ( 40 | dontStop stopState = iota // Normal execution 41 | stopRec // Abort the current record 42 | stopScript // Abort the entire script 43 | ) 44 | 45 | // Choose arbitrary initial sizes for record and field buffers. 46 | const ( 47 | initialFieldSize = 4096 48 | initialRecordSize = 4096 49 | ) 50 | 51 | // A Script encapsulates all of the internal state for an AWK-like script. 52 | type Script struct { 53 | State interface{} // Arbitrary, user-supplied data 54 | Output io.Writer // Output stream (defaults to os.Stdout) 55 | Begin ActionFunc // Action to perform before any input is read 56 | End ActionFunc // Action to perform after all input is read 57 | ConvFmt string // Conversion format for numbers, "%.6g" by default 58 | SubSep string // Separator for simulated multidimensional arrays 59 | NR int // Number of input records seen so far 60 | NF int // Number of fields in the current input record 61 | RT string // Actual string terminating the current record 62 | RStart int // 1-based index of the previous regexp match (Value.Match) 63 | RLength int // Length of the previous regexp match (Value.Match) 64 | MaxRecordSize int // Maximum number of characters allowed in each record 65 | MaxFieldSize int // Maximum number of characters allowed in each field 66 | 67 | nf0 int // Value of NF for which F(0) was computed 68 | rs string // Input record separator, newline by default 69 | fs string // Input field separator, space by default 70 | fieldWidths []int // Fixed-width column sizes 71 | fPat string // Input field regular expression 72 | ors string // Output record separator, newline by default 73 | ofs string // Output field separator, space by default 74 | ignCase bool // true: REs are case-insensitive; false: case-sensitive 75 | rules []statement // List of pattern-action pairs to execute 76 | fields []*Value // Fields in the current record; fields[0] is the entire record 77 | regexps map[string]*regexp.Regexp // Map from a regular-expression string to a compiled regular expression 78 | getlineState map[io.Reader]*Script // Parsing state needed to invoke GetLine repeatedly on a given io.Reader 79 | rsScanner *bufio.Scanner // Scanner associated with RS 80 | input io.Reader // Script input stream 81 | state parseState // What we're currently parsing 82 | stop stopState // What we should stop doing 83 | } 84 | 85 | // NewScript initializes a new Script with default values. 86 | func NewScript() *Script { 87 | return &Script{ 88 | Output: os.Stdout, 89 | ConvFmt: "%.6g", 90 | SubSep: "\034", 91 | NR: 0, 92 | NF: 0, 93 | MaxRecordSize: bufio.MaxScanTokenSize, 94 | MaxFieldSize: bufio.MaxScanTokenSize, 95 | nf0: 0, 96 | rs: "\n", 97 | fs: " ", 98 | ors: "\n", 99 | ofs: " ", 100 | ignCase: false, 101 | rules: make([]statement, 0, 10), 102 | fields: make([]*Value, 0), 103 | regexps: make(map[string]*regexp.Regexp, 10), 104 | getlineState: make(map[io.Reader]*Script), 105 | state: notRunning, 106 | } 107 | } 108 | 109 | // abortScript aborts the current script with a formatted error message. 110 | func (s *Script) abortScript(format string, a ...interface{}) { 111 | s.stop = stopScript 112 | panic(scriptAborter{fmt.Errorf(format, a...)}) 113 | } 114 | 115 | // Copy returns a copy of a Script. 116 | func (s *Script) Copy() *Script { 117 | sc := *s 118 | sc.rules = make([]statement, len(s.rules)) 119 | copy(sc.rules, s.rules) 120 | sc.fieldWidths = make([]int, len(s.fieldWidths)) 121 | copy(sc.fieldWidths, s.fieldWidths) 122 | sc.fields = make([]*Value, len(s.fields)) 123 | copy(sc.fields, s.fields) 124 | sc.regexps = make(map[string]*regexp.Regexp, len(s.regexps)) 125 | for k, v := range s.regexps { 126 | sc.regexps[k] = v 127 | } 128 | sc.getlineState = make(map[io.Reader]*Script, len(s.getlineState)) 129 | for k, v := range s.getlineState { 130 | sc.getlineState[k] = v 131 | } 132 | return &sc 133 | } 134 | 135 | // SetRS sets the input record separator (really, a record terminator). It is 136 | // invalid to call SetRS after the first record is read. (It is acceptable to 137 | // call SetRS from a Begin action, though.) As in AWK, if the record separator 138 | // is a single character, that character is used to separate records; if the 139 | // record separator is multiple characters, it's treated as a regular 140 | // expression (subject to the current setting of Script.IgnoreCase); and if the 141 | // record separator is an empty string, records are separated by blank lines. 142 | // That last case implicitly causes newlines to be accepted as a field 143 | // separator in addition to whatever was specified by SetFS. 144 | func (s *Script) SetRS(rs string) { 145 | if s.state == inMiddle { 146 | s.abortScript("SetRS was called from a running script") 147 | } 148 | s.rs = rs 149 | } 150 | 151 | // SetFS sets the input field separator. As in AWK, if the field separator is 152 | // a single space (the default), fields are separated by runs of whitespace; if 153 | // the field separator is any other single character, that character is used to 154 | // separate fields; if the field separator is an empty string, each individual 155 | // character becomes a separate field; and if the field separator is multiple 156 | // characters, it's treated as a regular expression (subject to the current 157 | // setting of Script.IgnoreCase). 158 | func (s *Script) SetFS(fs string) { 159 | s.fs = fs 160 | s.fieldWidths = nil 161 | s.fPat = "" 162 | } 163 | 164 | // SetFieldWidths indicates that each record is composed of fixed-width columns 165 | // and specifies the width in characters of each column. It is invalid to pass 166 | // SetFieldWidths a nil argument or a non-positive field width. 167 | func (s *Script) SetFieldWidths(fw []int) { 168 | // Sanity-check the argument. 169 | if fw == nil { 170 | s.abortScript("SetFieldWidths was passed a nil slice") 171 | } 172 | for _, w := range fw { 173 | if w <= 0 { 174 | s.abortScript(fmt.Sprintf("SetFieldWidths was passed an invalid field width (%d)", w)) 175 | } 176 | } 177 | 178 | // Assign the field widths and reset the field separator and field 179 | // matcher (not strictly but consistent with the SetFS method). 180 | s.fs = " " 181 | s.fieldWidths = fw 182 | s.fPat = "" 183 | } 184 | 185 | // SetFPat defines a "field pattern", a regular expression that matches fields. 186 | // This lies in contrast to providing a regular expression to SetFS, which 187 | // matches the separation between fields, not the fields themselves. 188 | func (s *Script) SetFPat(fp string) { 189 | s.fs = " " 190 | s.fieldWidths = nil 191 | s.fPat = fp 192 | } 193 | 194 | // recomputeF0 recomputes F(0) by concatenating F(1)...F(NF) with OFS. 195 | func (s *Script) recomputeF0() { 196 | if len(s.fields) >= 1 { 197 | s.fields[0] = s.NewValue(strings.Join(s.FStrings(), s.ofs)) 198 | } 199 | s.nf0 = s.NF 200 | } 201 | 202 | // SetORS sets the output record separator. 203 | func (s *Script) SetORS(ors string) { s.ors = ors } 204 | 205 | // SetOFS sets the output field separator. 206 | func (s *Script) SetOFS(ofs string) { 207 | s.ofs = ofs 208 | s.recomputeF0() 209 | } 210 | 211 | // F returns a specified field of the current record. Field numbers are 212 | // 1-based. Field 0 refers to the entire record. Requesting a field greater 213 | // than NF returns a zero value. Requesting a negative field number panics 214 | // with an out-of-bounds error. 215 | func (s *Script) F(i int) *Value { 216 | if i == 0 && s.NF != s.nf0 { 217 | s.recomputeF0() 218 | } 219 | if i < len(s.fields) { 220 | return s.fields[i] 221 | } 222 | return s.NewValue("") 223 | } 224 | 225 | // SetF sets a field of the current record to the given Value. Field numbers 226 | // are 1-based. Field 0 refers to the entire record. Setting it causes the 227 | // entire line to be reparsed (and NF recomputed). Setting a field numbered 228 | // larger than NF extends NF to that value. Setting a negative field number 229 | // panics with an out-of-bounds error. 230 | func (s *Script) SetF(i int, v *Value) { 231 | // Zero index: Assign and reparse the entire record. 232 | if i == 0 { 233 | s.splitRecord(v.String()) 234 | return 235 | } 236 | 237 | // Index larger than NF: extend NF and try again. 238 | if i >= len(s.fields) { 239 | for i >= len(s.fields) { 240 | s.fields = append(s.fields, s.NewValue("")) 241 | } 242 | s.NF = len(s.fields) - 1 243 | } 244 | 245 | // Index not larger than (the possibly modified) NF: write the field. 246 | s.fields[i] = v 247 | 248 | // Force F(0) to be recomputed the next time it's accessed. 249 | s.nf0 = -1 250 | } 251 | 252 | // FStrings returns all fields in the current record as a []string of length 253 | // NF. 254 | func (s *Script) FStrings() []string { 255 | a := make([]string, s.NF) 256 | for i := 0; i < s.NF; i++ { 257 | a[i] = s.F(i + 1).String() 258 | } 259 | return a 260 | } 261 | 262 | // FInts returns all fields in the current record as a []int of length NF. 263 | func (s *Script) FInts() []int { 264 | a := make([]int, s.NF) 265 | for i := 0; i < s.NF; i++ { 266 | a[i] = s.F(i + 1).Int() 267 | } 268 | return a 269 | } 270 | 271 | // FFloat64s returns all fields in the current record as a []float64 of length 272 | // NF. 273 | func (s *Script) FFloat64s() []float64 { 274 | a := make([]float64, s.NF) 275 | for i := 0; i < s.NF; i++ { 276 | a[i] = s.F(i + 1).Float64() 277 | } 278 | return a 279 | } 280 | 281 | // IgnoreCase specifies whether regular-expression and string comparisons 282 | // should be performed in a case-insensitive manner. 283 | func (s *Script) IgnoreCase(ign bool) { 284 | s.ignCase = ign 285 | } 286 | 287 | // Println is like fmt.Println but honors the current output stream, output 288 | // field separator, and output record separator. If called with no arguments, 289 | // Println outputs all fields in the current record. 290 | func (s *Script) Println(args ...interface{}) { 291 | // No arguments: Output all fields of the current record. 292 | if args == nil { 293 | for i := 1; i <= s.NF; i++ { 294 | fmt.Fprintf(s.Output, "%v", s.F(i)) 295 | if i == s.NF { 296 | fmt.Fprintf(s.Output, "%s", s.ors) 297 | } else { 298 | fmt.Fprintf(s.Output, "%s", s.ofs) 299 | } 300 | } 301 | return 302 | } 303 | 304 | // One or more arguments: Output them. 305 | for i, arg := range args { 306 | fmt.Fprintf(s.Output, "%v", arg) 307 | if i == len(args)-1 { 308 | fmt.Fprintf(s.Output, "%s", s.ors) 309 | } else { 310 | fmt.Fprintf(s.Output, "%s", s.ofs) 311 | } 312 | } 313 | } 314 | 315 | // A PatternFunc represents a pattern to match against. It is expected to 316 | // examine the state of the given Script then return either true or false. If 317 | // it returns true, the corresponding ActionFunc is executed. Otherwise, the 318 | // corresponding ActionFunc is not executed. 319 | type PatternFunc func(*Script) bool 320 | 321 | // An ActionFunc represents an action to perform when the corresponding 322 | // PatternFunc returns true. 323 | type ActionFunc func(*Script) 324 | 325 | // A statement represents a single pattern-action pair. 326 | type statement struct { 327 | Pattern PatternFunc 328 | Action ActionFunc 329 | } 330 | 331 | // The matchAny pattern is true only in the middle of a script, when a record 332 | // is available for parsing. 333 | func matchAny(s *Script) bool { 334 | return s.state == inMiddle 335 | } 336 | 337 | // The printRecord statement outputs the current record verbatim to the current 338 | // output stream. 339 | func printRecord(s *Script) { 340 | fmt.Fprintf(s.Output, "%v%s", s.fields[0], s.ors) 341 | } 342 | 343 | // Next stops processing the current record and proceeds with the next record. 344 | func (s *Script) Next() { 345 | if s.stop == dontStop { 346 | s.stop = stopRec 347 | } 348 | panic(recordStopper{errors.New("Unexpected Next invocation")}) // Unexpected if we don't catch it 349 | } 350 | 351 | // Exit stops processing the entire script, causing the Run method to return. 352 | func (s *Script) Exit() { 353 | if s.stop == dontStop { 354 | s.stop = stopScript 355 | } 356 | } 357 | 358 | // Range combines two patterns into a single pattern that statefully returns 359 | // true between the time the first and second pattern become true (both 360 | // inclusively). 361 | func Range(p1, p2 PatternFunc) PatternFunc { 362 | inRange := false 363 | return func(s *Script) bool { 364 | if inRange { 365 | inRange = !p2(s) 366 | return true 367 | } 368 | inRange = p1(s) 369 | return inRange 370 | } 371 | } 372 | 373 | // Auto provides a simplified mechanism for creating various common-case 374 | // PatternFunc functions. It accepts zero, one, or an even number of 375 | // arguments. If given no arguments, it matches every record. If given a 376 | // single argument, its behavior depends on that argument's type: 377 | // 378 | // • A Script.PatternFunc is returned as is. 379 | // 380 | // • A *regexp.Regexp returns a function that matches that regular expression 381 | // against the entire record. 382 | // 383 | // • A string is treated as a regular expression and behaves likewise. 384 | // 385 | // • An int returns a function that matches that int against NR. 386 | // 387 | // • Any other type causes a run-time panic. 388 | // 389 | // If given an even number of arguments, pairs of arguments are treated as 390 | // ranges (cf. the Range function). The PatternFunc returns true if the record 391 | // lies within any of the ranges. 392 | func Auto(v ...interface{}) PatternFunc { 393 | if len(v) == 0 { 394 | // No arguments: Match anything. 395 | return matchAny 396 | } 397 | if len(v)%2 == 0 { 398 | // Even number of arguments other than 0: Return a disjunction 399 | // of ranges. 400 | fList := make([]PatternFunc, len(v)/2) 401 | for i := 0; i < len(v); i += 2 { 402 | f1 := Auto(v[i]) 403 | f2 := Auto(v[i+1]) 404 | fList[i/2] = Range(f1, f2) 405 | } 406 | return func(s *Script) bool { 407 | // Return true iff any range is true. Note that we 408 | // always evaluate every range to avoid confusing 409 | // results because of statefulness. 410 | m := false 411 | for _, f := range fList { 412 | if f(s) { 413 | m = true 414 | } 415 | } 416 | return m 417 | } 418 | } 419 | if len(v)%2 == 1 { 420 | // Single argument: Decide what to do based on its type. 421 | switch x := v[0].(type) { 422 | case PatternFunc: 423 | // Already a PatternFunc: Return it unmodified. 424 | return x 425 | case string: 426 | // String: Treat as a regular expression that matches 427 | // against F[0]. 428 | return func(s *Script) bool { 429 | r, err := s.compileRegexp(x) 430 | if err != nil { 431 | s.abortScript(err.Error()) 432 | } 433 | return r.MatchString(s.F(0).String()) 434 | } 435 | case int: 436 | // Integer: Match against NR. 437 | return func(s *Script) bool { 438 | return s.NR == x 439 | } 440 | case *regexp.Regexp: 441 | // Regular expression: Convert to a string then, 442 | // dynamically, back to a regular expression. This 443 | // enables dynamic toggling of case sensitivity. 444 | xs := x.String() 445 | return func(s *Script) bool { 446 | r, err := s.compileRegexp(xs) 447 | if err != nil { 448 | s.abortScript(err.Error()) 449 | } 450 | return r.MatchString(s.F(0).String()) 451 | } 452 | default: 453 | panic(fmt.Sprintf("Auto does not accept arguments of type %T", x)) 454 | } 455 | } 456 | panic("Auto expects 0, 1, or an even number of arguments") 457 | } 458 | 459 | // AppendStmt appends a pattern-action pair to a Script. If the pattern 460 | // function is nil, the action will be performed on every record. If the 461 | // action function is nil, the record will be output verbatim to the standard 462 | // output device. It is invalid to call AppendStmt from a running script. 463 | func (s *Script) AppendStmt(p PatternFunc, a ActionFunc) { 464 | // Panic if we were called on a running script. 465 | if s.state != notRunning { 466 | s.abortScript("AppendStmt was called from a running script") 467 | } 468 | 469 | // Append a statement to the list of rules. 470 | stmt := statement{ 471 | Pattern: p, 472 | Action: a, 473 | } 474 | if p == nil { 475 | stmt.Pattern = matchAny 476 | } 477 | if a == nil { 478 | stmt.Action = printRecord 479 | } 480 | s.rules = append(s.rules, stmt) 481 | } 482 | 483 | // compileRegexp caches and returns the result of regexp.Compile. It 484 | // automatically prepends "(?i)" to the expression if the script is currently 485 | // set to perform case-insensitive regular-expression matching. 486 | func (s *Script) compileRegexp(expr string) (*regexp.Regexp, error) { 487 | if s.ignCase { 488 | expr = "(?i)" + expr 489 | } 490 | re, found := s.regexps[expr] 491 | if found { 492 | return re, nil 493 | } 494 | var err error 495 | re, err = regexp.Compile(expr) 496 | if err != nil { 497 | return nil, err 498 | } 499 | s.regexps[expr] = re 500 | return re, nil 501 | } 502 | 503 | // makeSingleCharFieldSplitter returns a splitter that returns the next field 504 | // by splitting on a single character (except for space, which is a special 505 | // case handled elsewhere). 506 | func (s *Script) makeSingleCharFieldSplitter() func([]byte, bool) (int, []byte, error) { 507 | // Ensure the separator character is valid. 508 | firstRune, _ := utf8.DecodeRuneInString(s.fs) 509 | if firstRune == utf8.RuneError { 510 | return func(data []byte, atEOF bool) (int, []byte, error) { 511 | return 0, nil, errors.New("Invalid rune in separator") 512 | } 513 | } 514 | 515 | // The separator is valid. Return a splitter customized to that 516 | // separator. 517 | returnedFinalToken := false // true=already returned a final, non-terminated token; false=didn't 518 | return func(data []byte, atEOF bool) (advance int, token []byte, err error) { 519 | // Scan until we see a separator or run out of data. 520 | for width, i := 0, 0; i < len(data); i += width { 521 | var r rune 522 | r, width = utf8.DecodeRune(data[i:]) 523 | if r == utf8.RuneError && i+width >= len(data) && !atEOF { 524 | // Invalid rune at the end of the data. 525 | // Request more data and try again. 526 | return 0, nil, nil 527 | } 528 | if r == firstRune { 529 | return i + width, data[:i], nil 530 | } 531 | } 532 | 533 | // We didn't see a separator. If we're at EOF, we have 534 | // a final, non-terminated token. Return it (unless we 535 | // already did). 536 | if atEOF && !returnedFinalToken { 537 | returnedFinalToken = true 538 | return len(data), data, nil 539 | } 540 | 541 | // Request more data. 542 | return 0, nil, nil 543 | } 544 | } 545 | 546 | // makeREFieldSplitter returns a splitter that returns the next field by 547 | // splitting on a regular expression. 548 | func (s *Script) makeREFieldSplitter() func([]byte, bool) (int, []byte, error) { 549 | // Ensure that the regular expression is valid. 550 | var sepRegexp *regexp.Regexp 551 | var err error 552 | if s.rs == "" { 553 | // A special case in AWK is that if the record terminator is 554 | // empty (implying a blank line) then newlines are accepted as 555 | // a field separator in addition to whatever is specified for 556 | // FS. 557 | sepRegexp, err = s.compileRegexp(`(` + s.fs + `)|(\r?\n)`) 558 | } else { 559 | sepRegexp, err = s.compileRegexp(s.fs) 560 | } 561 | if err != nil { 562 | return func(data []byte, atEOF bool) (int, []byte, error) { 563 | return 0, nil, err 564 | } 565 | } 566 | 567 | // The regular expression is valid. Return a splitter customized to 568 | // that regular expression. 569 | returnedFinalToken := false // true=already returned a final, non-terminated token; false=didn't 570 | return func(data []byte, atEOF bool) (advance int, token []byte, err error) { 571 | // If we match the regular expression, return everything up to 572 | // the match. 573 | loc := sepRegexp.FindIndex(data) 574 | if loc != nil { 575 | return loc[1], data[:loc[0]], nil 576 | } 577 | 578 | // We didn't see a separator. If we're at EOF, we have a 579 | // final, non-terminated token. Return it (unless we already 580 | // did). 581 | if atEOF && !returnedFinalToken { 582 | returnedFinalToken = true 583 | return len(data), data, nil 584 | } 585 | 586 | // Request more data. 587 | return 0, nil, nil 588 | } 589 | } 590 | 591 | // makeFixedFieldSplitter returns a splitter than returns the next field by 592 | // splitting a record into fixed-size chunks. 593 | func (s *Script) makeFixedFieldSplitter() func([]byte, bool) (int, []byte, error) { 594 | f := 0 // Index into s.fieldWidths 595 | returnedFinalToken := false // true=already returned a final, non-terminated token; false=didn't 596 | return func(data []byte, atEOF bool) (advance int, token []byte, err error) { 597 | // If we've exhausted s.fieldWidths, return empty-handed. 598 | if f >= len(s.fieldWidths) { 599 | return 0, nil, nil 600 | } 601 | 602 | // If we have enough characters for the current field, return a 603 | // token and advance to the next field. 604 | fw := s.fieldWidths[f] 605 | if len(data) >= fw { 606 | f++ 607 | return fw, data[:fw], nil 608 | } 609 | 610 | // If we don't have enough characters for the current field but 611 | // we're at EOF, return whatever we have (unless we already 612 | // did). 613 | if atEOF && !returnedFinalToken { 614 | returnedFinalToken = true 615 | return len(data), data, nil 616 | } 617 | 618 | // If we don't have enough characters for the current field and 619 | // we're not at EOF, request more data. 620 | return 0, nil, nil 621 | } 622 | } 623 | 624 | // makeREFieldMatcher returns a splitter that returns the next field by 625 | // matching against a regular expression. 626 | func (s *Script) makeREFieldMatcher() func([]byte, bool) (int, []byte, error) { 627 | // Ensure that the regular expression is valid. 628 | sepRegexp, err := s.compileRegexp(s.fPat) 629 | if err != nil { 630 | return func(data []byte, atEOF bool) (int, []byte, error) { 631 | return 0, nil, err 632 | } 633 | } 634 | 635 | // The regular expression is valid. Return a splitter customized to 636 | // that regular expression. 637 | return func(data []byte, atEOF bool) (advance int, token []byte, err error) { 638 | // If we match the regular expression, return the match. 639 | // Otherwise, request more data. 640 | loc := sepRegexp.FindIndex(data) 641 | if loc == nil { 642 | return 0, nil, nil 643 | } 644 | return loc[1], data[loc[0]:loc[1]], nil 645 | } 646 | } 647 | 648 | // makeFieldSplitter returns a splitter that returns the next field. 649 | func (s *Script) makeFieldSplitter() func([]byte, bool) (int, []byte, error) { 650 | // If we were given fixed field widths, use them. 651 | if s.fieldWidths != nil { 652 | return s.makeFixedFieldSplitter() 653 | } 654 | 655 | // If were given a field-matching regular expression, use it. 656 | if s.fPat != "" { 657 | return s.makeREFieldMatcher() 658 | } 659 | 660 | // If the separator is empty, each rune is a separate field. 661 | if s.fs == "" { 662 | return bufio.ScanRunes 663 | } 664 | 665 | // If the separator is a single space, return the next word as the 666 | // field. 667 | if s.fs == " " { 668 | return bufio.ScanWords 669 | } 670 | 671 | // If the separator is a single character and the record terminator is 672 | // not empty (a special case in AWK), split based on that. This code 673 | // is derived from the bufio.ScanWords source. 674 | if utf8.RuneCountInString(s.fs) == 1 && s.rs != "" { 675 | return s.makeSingleCharFieldSplitter() 676 | } 677 | 678 | // If the separator is multiple characters (or the record terminator is 679 | // empty), treat it as a regular expression, and scan based on that. 680 | return s.makeREFieldSplitter() 681 | } 682 | 683 | // makeRecordSplitter returns a splitter that returns the next record. 684 | // Although all the AWK documentation I've read define RS as a record 685 | // separator, as far as I can tell, AWK in fact treats it as a record 686 | // *terminator* so we do, too. 687 | func (s *Script) makeRecordSplitter() func([]byte, bool) (int, []byte, error) { 688 | // If the terminator is a single character, scan based on that. This 689 | // code is derived from the bufio.ScanWords source. 690 | if utf8.RuneCountInString(s.rs) == 1 { 691 | // Ensure the terminator character is valid. 692 | firstRune, _ := utf8.DecodeRuneInString(s.rs) 693 | if firstRune == utf8.RuneError { 694 | return func(data []byte, atEOF bool) (int, []byte, error) { 695 | return 0, nil, errors.New("Invalid rune in terminator") 696 | } 697 | } 698 | 699 | // The terminator is valid. Return a splitter customized to 700 | // that terminator. 701 | return func(data []byte, atEOF bool) (advance int, token []byte, err error) { 702 | // Scan until we see a terminator or run out of data. 703 | s.RT = string(firstRune) 704 | for width, i := 0, 0; i < len(data); i += width { 705 | var r rune 706 | r, width = utf8.DecodeRune(data[i:]) 707 | if r == utf8.RuneError && i+width >= len(data) && !atEOF { 708 | // Invalid rune at the end of the data. 709 | // Request more data and try again. 710 | return 0, nil, nil 711 | } 712 | if r == firstRune { 713 | return i + width, data[:i], nil 714 | } 715 | } 716 | 717 | // We didn't see a terminator. If we're at EOF, we 718 | // have a final, non-terminated token. Return it if 719 | // it's nonempty. 720 | if atEOF && len(data) > 0 { 721 | return len(data), data, nil 722 | } 723 | 724 | // Request more data. 725 | return 0, nil, nil 726 | } 727 | } 728 | 729 | // If the terminator is multiple characters, treat it as a regular 730 | // expression, and scan based on that. Or, as a special case, if the 731 | // terminator is empty, we treat it as a regular expression 732 | // representing one or more blank lines. 733 | return func(data []byte, atEOF bool) (advance int, token []byte, err error) { 734 | // Generate a regular expression based on the current RS and 735 | // IgnoreCase. 736 | var termRegexp *regexp.Regexp 737 | if s.rs == "" { 738 | termRegexp, err = s.compileRegexp(`\r?\n(\r?\n)+`) 739 | } else { 740 | termRegexp, err = s.compileRegexp(s.rs) 741 | } 742 | if err != nil { 743 | return 0, nil, err 744 | } 745 | 746 | // If we match the regular expression, return everything up to 747 | // the match. 748 | loc := termRegexp.FindIndex(data) 749 | if loc != nil { 750 | s.RT = string(data[loc[0]:loc[1]]) 751 | return loc[1], data[:loc[0]], nil 752 | } 753 | 754 | // We didn't see a terminator. If we're at EOF, we have a 755 | // final, non-terminated token. Return it if it's nonempty. 756 | if atEOF && len(data) > 0 { 757 | s.RT = "" 758 | return len(data), data, nil 759 | } 760 | 761 | // Request more data. 762 | return 0, nil, nil 763 | } 764 | } 765 | 766 | // Read the next record from a stream and return it. 767 | func (s *Script) readRecord() (string, error) { 768 | // Return the next record. 769 | if s.rsScanner.Scan() { 770 | return s.rsScanner.Text(), nil 771 | } 772 | if err := s.rsScanner.Err(); err != nil { 773 | return "", err 774 | } 775 | return "", io.EOF 776 | } 777 | 778 | // splitRecord splits a record into fields. It stores the fields in the Script 779 | // struct's F field and update NF. As in real AWK, field 0 is the entire 780 | // record. 781 | func (s *Script) splitRecord(rec string) error { 782 | fsScanner := bufio.NewScanner(strings.NewReader(rec)) 783 | fsScanner.Buffer(make([]byte, initialFieldSize), s.MaxFieldSize) 784 | fsScanner.Split(s.makeFieldSplitter()) 785 | fields := make([]*Value, 0, 100) 786 | fields = append(fields, s.NewValue(rec)) 787 | for fsScanner.Scan() { 788 | fields = append(fields, s.NewValue(fsScanner.Text())) 789 | } 790 | if err := fsScanner.Err(); err != nil { 791 | return err 792 | } 793 | s.fields = fields 794 | s.NF = len(fields) - 1 795 | s.nf0 = s.NF 796 | return nil 797 | } 798 | 799 | // GetLine reads the next record from an input stream and returns it. If the 800 | // argument to GetLine is nil, GetLine reads from the current input stream and 801 | // increments NR. Otherwise, it reads from the given io.Reader and does not 802 | // increment NR. Call SetF(0, ...) on the Value returned by GetLine to perform 803 | // the equivalent of AWK's getline with no variable argument. 804 | func (s *Script) GetLine(r io.Reader) (*Value, error) { 805 | // Handle the simpler case of a nil argument (to read from the current 806 | // input stream). 807 | if r == nil { 808 | rec, err := s.readRecord() 809 | if err != nil { 810 | return nil, err 811 | } 812 | s.NR++ 813 | return s.NewValue(rec), nil 814 | } 815 | 816 | // If we've seen this io.Reader before, reuse its parsing state. 817 | // Otherwise, create a new Script for storing state. 818 | sc := s.getlineState[r] 819 | if sc == nil { 820 | // Copy the given script so we don't alter any of the original 821 | // script's state. 822 | sc = s.Copy() 823 | s.getlineState[r] = sc 824 | 825 | // Create (and store) a new scanner based on the record 826 | // terminator. 827 | sc.input = r 828 | sc.rsScanner = bufio.NewScanner(sc.input) 829 | sc.rsScanner.Buffer(make([]byte, initialRecordSize), sc.MaxRecordSize) 830 | sc.rsScanner.Split(sc.makeRecordSplitter()) 831 | } 832 | 833 | // Read a record from the given reader. 834 | rec, err := sc.readRecord() 835 | if err != nil { 836 | return nil, err 837 | } 838 | return sc.NewValue(rec), nil 839 | } 840 | 841 | // Run executes a script against a given input stream. It is perfectly valid 842 | // to run the same script on multiple input streams. 843 | func (s *Script) Run(r io.Reader) (err error) { 844 | // Catch scriptAborter panics and return them as errors. Re-throw all 845 | // other panics. 846 | defer func() { 847 | if r := recover(); r != nil { 848 | if e, ok := r.(scriptAborter); ok { 849 | err = e 850 | } else { 851 | panic(r) 852 | } 853 | } 854 | }() 855 | 856 | // Reinitialize most of our state. 857 | s.input = r 858 | s.ConvFmt = "%.6g" 859 | s.NF = 0 860 | s.NR = 0 861 | 862 | // Process the Begin action, if any. 863 | if s.Begin != nil { 864 | s.state = atBegin 865 | s.Begin(s) 866 | } 867 | 868 | // Create (and store) a new scanner based on the record terminator. 869 | s.rsScanner = bufio.NewScanner(s.input) 870 | s.rsScanner.Buffer(make([]byte, initialRecordSize), s.MaxRecordSize) 871 | s.rsScanner.Split(s.makeRecordSplitter()) 872 | 873 | // Process each record in turn. 874 | s.state = inMiddle 875 | for { 876 | // Read a record. 877 | s.stop = dontStop 878 | rec, err := s.readRecord() 879 | if err != nil { 880 | if err == io.EOF { 881 | break 882 | } 883 | return err 884 | } 885 | s.NR++ 886 | 887 | // Split the record into its constituent fields. 888 | err = s.splitRecord(rec) 889 | if err != nil { 890 | return err 891 | } 892 | 893 | // Process all applicable actions. 894 | func() { 895 | // An action is able to break out of the 896 | // action-processing loop by calling Next, which throws 897 | // a recordStopper. We catch that and continue 898 | // with the next record. 899 | defer func() { 900 | if r := recover(); r != nil { 901 | if _, ok := r.(recordStopper); !ok { 902 | panic(r) 903 | } 904 | } 905 | }() 906 | 907 | // Perform each action whose pattern matches the 908 | // current record. 909 | for _, rule := range s.rules { 910 | if rule.Pattern(s) { 911 | rule.Action(s) 912 | if s.stop != dontStop { 913 | break 914 | } 915 | } 916 | } 917 | }() 918 | 919 | // Stop the script if an error occurred or an action calls Exit. 920 | if s.stop == stopScript { 921 | return nil 922 | } 923 | } 924 | 925 | // Process the End action, if any. 926 | if s.End != nil { 927 | s.state = atEnd 928 | s.End(s) 929 | } 930 | s.state = notRunning 931 | return nil 932 | } 933 | 934 | // RunPipeline chains together a set of scripts into a pipeline, with each 935 | // script sending its output to the next. (Implication: Script.Output will be 936 | // overwritten in all but the last script.) If any script in the pipeline 937 | // fails, a non-nil error will be returned. 938 | func RunPipeline(r io.Reader, ss ...*Script) error { 939 | // Spawn scripts in reverse order so they begin blocked on input. 940 | eChan := make(chan error, len(ss)) 941 | for i := len(ss) - 1; i > 0; i-- { 942 | s := ss[i] 943 | pr, pw := io.Pipe() 944 | ss[i-1].Output = pw 945 | go func(i int, pr *io.PipeReader) { 946 | eChan <- s.Run(pr) 947 | if i < len(ss)-1 { 948 | ss[i].Output.(*io.PipeWriter).Close() 949 | } 950 | }(i, pr) 951 | } 952 | 953 | // Spawn the first script to enable the rest to begin. 954 | go func() { 955 | eChan <- ss[0].Run(r) 956 | if len(ss) > 1 { 957 | ss[0].Output.(*io.PipeWriter).Close() 958 | } 959 | }() 960 | 961 | // Wait for all scripts to finish. 962 | for range ss { 963 | err := <-eChan 964 | if err != nil { 965 | // Error -- close all output pipes then return. 966 | for j := 0; j < len(ss)-1; j++ { 967 | ss[j].Output.(*io.PipeWriter).Close() 968 | } 969 | return err 970 | } 971 | } 972 | return nil 973 | } 974 | -------------------------------------------------------------------------------- /script_test.go: -------------------------------------------------------------------------------- 1 | // This file tests script primitives. 2 | 3 | package awk 4 | 5 | import ( 6 | "bufio" 7 | "bytes" 8 | "fmt" 9 | "io" 10 | "regexp" 11 | "sort" 12 | "strings" 13 | "testing" 14 | ) 15 | 16 | // TestReadRecordNewline tests reading newline-separated records. 17 | func TestReadRecordNewline(t *testing.T) { 18 | // Define the basic test we plan to repeat. 19 | allRecords := []string{"X", "Word", "More than one word", "", "More text"} 20 | allRecordsStr := strings.Join(allRecords, "\n") 21 | scr := NewScript() 22 | doTest := func() { 23 | scr.input = bufio.NewReader(strings.NewReader(allRecordsStr)) 24 | scr.SetRS("\n") 25 | scr.rsScanner = bufio.NewScanner(scr.input) 26 | scr.rsScanner.Split(scr.makeRecordSplitter()) 27 | for _, oneRecord := range allRecords { 28 | rec, err := scr.readRecord() 29 | if err != nil { 30 | t.Fatal(err) 31 | } 32 | if rec != oneRecord { 33 | t.Fatalf("Expected %q but received %q", oneRecord, rec) 34 | } 35 | } 36 | } 37 | 38 | // Test with no trailing newline. 39 | doTest() 40 | 41 | // Test with a trailing newline. 42 | allRecordsStr += "\n" 43 | doTest() 44 | } 45 | 46 | // TestReadRecordWhitespace tests reading whitespace-separated records. 47 | func TestReadRecordWhitespace(t *testing.T) { 48 | allRecordsStr := " banana banana banana banana banana banana\tbanana banana\nbanana banana" 49 | want := []string{ 50 | "", 51 | "", 52 | "banana", 53 | "banana", 54 | "banana", 55 | "", 56 | "banana", 57 | "", 58 | "", 59 | "banana", 60 | "banana\tbanana", 61 | "banana\nbanana", 62 | "banana", 63 | } 64 | scr := NewScript() 65 | scr.input = bufio.NewReader(strings.NewReader(allRecordsStr)) 66 | scr.SetRS(" ") 67 | scr.rsScanner = bufio.NewScanner(scr.input) 68 | scr.rsScanner.Split(scr.makeRecordSplitter()) 69 | for _, str := range want { 70 | rec, err := scr.readRecord() 71 | if err != nil { 72 | t.Fatal(err) 73 | } 74 | if rec != str { 75 | t.Fatalf("Expected %q but received %q", str, rec) 76 | } 77 | } 78 | } 79 | 80 | // TestReadRecordRE tests reading regular-expression-separated records. 81 | func TestReadRecordRE(t *testing.T) { 82 | allRecordsStr := "hellohowdyhelloyellowhellogoodbye" 83 | scr := NewScript() 84 | scr.input = bufio.NewReader(strings.NewReader(allRecordsStr)) 85 | scr.SetRS(`<[^>]+>[^<]*<[^>]+>`) 86 | scr.rsScanner = bufio.NewScanner(scr.input) 87 | scr.rsScanner.Split(scr.makeRecordSplitter()) 88 | for i := 0; i < 3; i++ { 89 | rec, err := scr.readRecord() 90 | if err != nil { 91 | t.Fatal(err) 92 | } 93 | if rec != "hello" { 94 | t.Fatalf("Expected %q but received %q", "hello", rec) 95 | } 96 | } 97 | } 98 | 99 | // TestSplitRecordWhitespace tests splitting a record into whitespace-separated 100 | // fields. 101 | func TestSplitRecordWhitespace(t *testing.T) { 102 | recordStr := "The woods are lovely, dark and deep," 103 | fields := regexp.MustCompile(`\s+`).Split(recordStr, -1) 104 | scr := NewScript() 105 | scr.splitRecord(recordStr) 106 | for i, f := range fields { 107 | if scr.F(i+1).String() != f { 108 | t.Fatalf("Expected %q but received %q", f, scr.F(i+1)) 109 | } 110 | } 111 | } 112 | 113 | // TestSplitRecordComma tests splitting a record into comma-separated fields. 114 | func TestSplitRecordComma(t *testing.T) { 115 | recordStr := "The woods are lovely, dark and deep," 116 | fields := strings.Split(recordStr, ",") 117 | scr := NewScript() 118 | scr.SetFS(",") 119 | scr.splitRecord(recordStr) 120 | for i, f := range fields { 121 | if scr.F(i+1).String() != f { 122 | t.Fatalf("Expected %q but received %q", f, scr.F(i+1)) 123 | } 124 | } 125 | } 126 | 127 | // TestSplitFieldRE tests splitting a field based on a regular expression. 128 | func TestSplitFieldRE(t *testing.T) { 129 | // Determine what we want to provide and see in return. 130 | recordStr := "foo-bar---baz------------quux--corge-grault---garply-" 131 | re, err := regexp.Compile(`\w+`) 132 | if err != nil { 133 | t.Fatal(err) 134 | } 135 | words := re.FindAllString(recordStr, -1) 136 | words = append(words, "") 137 | 138 | // Split the record. 139 | scr := NewScript() 140 | scr.SetFS("-+") 141 | scr.splitRecord(recordStr) 142 | 143 | // Check the result. 144 | for i := 1; i <= scr.NF; i++ { 145 | f := scr.F(i).String() 146 | if f != words[i-1] { 147 | t.Fatalf("Expected %q for field %d but received %q", words[i-1], i, f) 148 | } 149 | } 150 | } 151 | 152 | // TestSplitFieldREIgnCase tests splitting a field based on a case-insensitive 153 | // regular expression. 154 | func TestSplitFieldREIgnCase(t *testing.T) { 155 | // Determine what we want to provide and see in return. 156 | recordStr := "fooxbarXxxbazxxXXxxxXxxXxquucksxXcorgexgraultxxxgarplyx" 157 | re, err := regexp.Compile(`[fobarzqucksgeltpy]+`) 158 | if err != nil { 159 | t.Fatal(err) 160 | } 161 | words := re.FindAllString(recordStr, -1) 162 | words = append(words, "") 163 | 164 | // Split the record. 165 | scr := NewScript() 166 | scr.SetFS("x+") 167 | scr.IgnoreCase(true) 168 | err = scr.splitRecord(recordStr) 169 | if err != nil { 170 | t.Fatal(err) 171 | } 172 | 173 | // Check the result. 174 | for i := 1; i <= scr.NF; i++ { 175 | f := scr.F(i).String() 176 | if f != words[i-1] { 177 | t.Fatalf("Expected %q for field %d but received %q", words[i-1], i, f) 178 | } 179 | } 180 | } 181 | 182 | // TestSplitFieldFixed tests splitting a field based on fixed-width columns. 183 | func TestSplitFieldFixed(t *testing.T) { 184 | // Determine what we want to provide and see in return. 185 | inputStr := "CeterumcenseoCarthaginemessedelendam." 186 | desiredOutput := []string{"Ceterum", "censeo", "Carthaginem", "esse", "delendam."} 187 | 188 | // Split the record. 189 | scr := NewScript() 190 | scr.SetFieldWidths([]int{7, 6, 11, 4, 123}) 191 | err := scr.splitRecord(inputStr) 192 | if err != nil { 193 | t.Fatal(err) 194 | } 195 | 196 | // Check the result. 197 | for i := 1; i <= scr.NF; i++ { 198 | f := scr.F(i).String() 199 | if f != desiredOutput[i-1] { 200 | t.Fatalf("Expected %q for field %d but received %q", desiredOutput[i-1], i, f) 201 | } 202 | } 203 | } 204 | 205 | // TestSplitFieldREPat tests splitting a field based on a field-matching 206 | // regular expression. 207 | func TestSplitFieldREPat(t *testing.T) { 208 | // Determine what we want to provide and see in return. 209 | inputStr := "23 Skidoo. 3-2-1 blast off! 99 red balloons." 210 | desiredOutput := 122 211 | 212 | // Split the record. 213 | scr := NewScript() 214 | scr.SetFPat(`-?\d+`) 215 | err := scr.splitRecord(inputStr) 216 | if err != nil { 217 | t.Fatal(err) 218 | } 219 | 220 | // Check the result. 221 | output := 0 222 | for i := 1; i <= scr.NF; i++ { 223 | t.Log(scr.F(i)) 224 | output += scr.F(i).Int() 225 | } 226 | if output != desiredOutput { 227 | t.Fatalf("Expected %d but received %d", desiredOutput, output) 228 | } 229 | } 230 | 231 | // TestBeginEnd tests creating and running a script that contains a BEGIN 232 | // action and an END action. 233 | func TestBeginEnd(t *testing.T) { 234 | scr := NewScript() 235 | val := 123 236 | scr.Begin = func(s *Script) { val *= 10 } 237 | scr.End = func(s *Script) { val += 4 } 238 | err := scr.Run(strings.NewReader("dummy data")) 239 | if err != nil { 240 | t.Fatal(err) 241 | } 242 | if val != 1234 { 243 | t.Fatalf("Expected 1234 but received %d", val) 244 | } 245 | } 246 | 247 | // TestSimpleSum tests adding up a column of numbers. 248 | func TestSimpleSum(t *testing.T) { 249 | scr := NewScript() 250 | sum := 0 251 | scr.AppendStmt(nil, func(s *Script) { sum += s.F(1).Int() }) 252 | err := scr.Run(strings.NewReader("2\n4\n6\n8\n")) 253 | if err != nil { 254 | t.Fatal(err) 255 | } 256 | if sum != 20 { 257 | t.Fatalf("Expected 20 but received %d", sum) 258 | } 259 | } 260 | 261 | // TestRunTwice tests running the same script twice. 262 | func TestRunTwice(t *testing.T) { 263 | // Run once. 264 | scr := NewScript() 265 | sum := 0 266 | scr.AppendStmt(nil, func(s *Script) { sum += s.F(1).Int() * s.NR }) 267 | err := scr.Run(strings.NewReader("1\n3\n5\n7\n")) 268 | if err != nil { 269 | t.Fatal(err) 270 | } 271 | if sum != 50 { 272 | t.Fatalf("Expected 50 but received %d on the first trial", sum) 273 | } 274 | 275 | // Run again. 276 | sum = 0 277 | err = scr.Run(strings.NewReader("1\n3\n5\n7\n")) 278 | if err != nil { 279 | t.Fatal(err) 280 | } 281 | if sum != 50 { 282 | t.Fatalf("Expected 50 but received %d on the second trial", sum) 283 | } 284 | } 285 | 286 | // TestFieldCreation tests creating ("autovivifying" in Perl-speak) new fields. 287 | func TestFieldCreation(t *testing.T) { 288 | scr := NewScript() 289 | sum := 0 290 | scr.AppendStmt(nil, func(s *Script) { sum += 1 << uint(s.F(2).Int()) }) 291 | err := scr.Run(strings.NewReader("x 3\ny 2\n\nz 1\n")) 292 | if err != nil { 293 | t.Fatal(err) 294 | } 295 | if sum != 15 { 296 | t.Fatalf("Expected 15 but received %d", sum) 297 | } 298 | } 299 | 300 | // TestRecordReplacement tests overwriting field 0 with a new record. 301 | func TestRecordReplacement(t *testing.T) { 302 | scr := NewScript() 303 | sum := 0 304 | scr.AppendStmt(nil, func(s *Script) { 305 | sum += s.F(2).Int() 306 | s.SetF(0, s.NewValue("10 20 30 40 50")) 307 | sum += s.F(5).Int() 308 | }) 309 | err := scr.Run(strings.NewReader("x 3\ny 2\n\nz 1\n")) 310 | if err != nil { 311 | t.Fatal(err) 312 | } 313 | if sum != 206 { 314 | t.Fatalf("Expected 206 but received %d", sum) 315 | } 316 | } 317 | 318 | // TestRecordChangeCase tests changing IgnoreCase during the execution of a 319 | // script. 320 | func TestRecordChangeCase(t *testing.T) { 321 | scr := NewScript() 322 | sum := 0 323 | scr.AppendStmt(func(s *Script) bool { return s.F(1).Int()%2 == 0 }, 324 | func(s *Script) { sum += s.F(1).Int() }) 325 | scr.AppendStmt(func(s *Script) bool { return s.NR == 3 }, 326 | func(s *Script) { s.IgnoreCase(true) }) 327 | scr.SetRS("EOL") 328 | err := scr.Run(strings.NewReader("1EOL2EOL3EOL4Eol5eol6eoL")) 329 | if err != nil { 330 | t.Fatal(err) 331 | } 332 | if sum != 12 { 333 | t.Fatalf("Expected 12 but received %d", sum) 334 | } 335 | } 336 | 337 | // TestRecordBlankLines tests the AWK special case of blank-line-separated 338 | // records. 339 | func TestRecordBlankLines(t *testing.T) { 340 | recordStr := "uno\ndos\n\ntres\ncuatro\n\ncinco,seis,siete\nocho\n\nnueve,diez\n\n" 341 | expected := regexp.MustCompile(`[\n,]+`).Split(recordStr, -1) 342 | expected = expected[:len(expected)-1] // Skip empty final record. 343 | actual := make([]string, 0, 10) 344 | scr := NewScript() 345 | scr.SetRS("") 346 | scr.SetFS(",") 347 | scr.AppendStmt(nil, func(s *Script) { 348 | for i := 1; i <= s.NF; i++ { 349 | actual = append(actual, s.F(i).String()) 350 | } 351 | }) 352 | err := scr.Run(strings.NewReader(recordStr)) 353 | if err != nil { 354 | t.Fatal(err) 355 | } 356 | for i, s1 := range expected { 357 | s2 := actual[i] 358 | if s1 != s2 { 359 | t.Fatalf("Expected %v but received %v", expected, actual) 360 | } 361 | } 362 | } 363 | 364 | // TestExit tests premature script termination. 365 | func TestExit(t *testing.T) { 366 | scr := NewScript() 367 | sum := 0 368 | scr.Begin = func(s *Script) { s.IgnoreCase(true) } 369 | scr.AppendStmt(nil, func(s *Script) { sum += s.F(1).Int() }) 370 | scr.AppendStmt(func(s *Script) bool { return s.F(1).StrEqual("stop") }, 371 | func(s *Script) { s.Exit() }) 372 | err := scr.Run(strings.NewReader("111\n222\n333\n444\nSTOP\n555\n666\n")) 373 | if err != nil { 374 | t.Fatal(err) 375 | } 376 | if sum != 1110 { 377 | t.Fatalf("Expected 1110 but received %d", sum) 378 | } 379 | } 380 | 381 | // TestRecordRange tests range patterns. 382 | func TestRecordRange(t *testing.T) { 383 | scr := NewScript() 384 | all := []string{ 385 | "bad", 386 | "terrible", 387 | "BEGIN", 388 | "good", 389 | "great", 390 | "fantastic", 391 | "END", 392 | "awful", 393 | "dreadful", 394 | } 395 | want := []string{ 396 | "BEGIN", 397 | "good", 398 | "great", 399 | "fantastic", 400 | "END", 401 | } 402 | got := make([]string, 0, 10) 403 | scr.AppendStmt(Range(func(s *Script) bool { return s.F(1).Match("BEGIN") }, 404 | func(s *Script) bool { return s.F(1).Match("END") }), 405 | func(s *Script) { got = append(got, s.F(1).String()) }) 406 | err := scr.Run(strings.NewReader(strings.Join(all, "\n"))) 407 | if err != nil { 408 | t.Fatal(err) 409 | } 410 | for i, s1 := range want { 411 | s2 := got[i] 412 | if s1 != s2 { 413 | t.Fatalf("Expected %q but received %q", s1, s2) 414 | } 415 | } 416 | } 417 | 418 | // TestSplitRecordRE tests splitting the input string into regexp-separated 419 | // records. 420 | func TestSplitRecordRE(t *testing.T) { 421 | scr := NewScript() 422 | pluses := 0 423 | scr.Begin = func(s *Script) { s.SetRS(`\++`) } 424 | scr.AppendStmt(nil, func(s *Script) { pluses += len(s.RT) }) 425 | err := scr.Run(strings.NewReader("a++++++a++a++++a+++a+++++a+")) 426 | if err != nil { 427 | t.Fatal(err) 428 | } 429 | if pluses != 21 { 430 | t.Fatalf("Expected 21 but received %d", pluses) 431 | } 432 | } 433 | 434 | // TestDefaultAction tests the default printing action. 435 | func TestDefaultAction(t *testing.T) { 436 | // Define a script and some test input. 437 | scr := NewScript() 438 | scr.Output = new(bytes.Buffer) 439 | scr.IgnoreCase(true) 440 | scr.AppendStmt(func(s *Script) bool { return s.F(1).StrEqual("Duck") }, nil) 441 | inputStr := `Duck 1 442 | duck 2 443 | duck 3 444 | duck 4 445 | Goose! 5 446 | Duck 6 447 | duck 7 448 | DUCK 8 449 | duck 9 450 | Goose! 451 | ` 452 | 453 | // Test with the default record separator. 454 | err := scr.Run(strings.NewReader(inputStr)) 455 | if err != nil { 456 | t.Fatal(err) 457 | } 458 | outputStr := string(scr.Output.(*bytes.Buffer).Bytes()) 459 | desiredOutputStr := `Duck 1 460 | duck 2 461 | duck 3 462 | duck 4 463 | Duck 6 464 | duck 7 465 | DUCK 8 466 | duck 9 467 | ` 468 | if outputStr != desiredOutputStr { 469 | t.Fatalf("Expected %#v but received %#v", desiredOutputStr, outputStr) 470 | } 471 | 472 | // Test with a modified record separator. 473 | scr.Output.(*bytes.Buffer).Reset() 474 | scr.SetORS("|") 475 | err = scr.Run(strings.NewReader(inputStr)) 476 | if err != nil { 477 | t.Fatal(err) 478 | } 479 | outputStr = string(scr.Output.(*bytes.Buffer).Bytes()) 480 | desiredOutputStr = `Duck 1|duck 2|duck 3|duck 4|Duck 6|duck 7|DUCK 8|duck 9|` 481 | if outputStr != desiredOutputStr { 482 | t.Fatalf("Expected %#v but received %#v", desiredOutputStr, outputStr) 483 | } 484 | } 485 | 486 | // TestFInts tests the bulk conversion of fields to ints. 487 | func TestFInts(t *testing.T) { 488 | // Define a script and some test inputs and outputs. 489 | scr := NewScript() 490 | inputStr := "8675309" 491 | desiredOutput := []int{0, 3, 5, 6, 7, 8, 9} 492 | var output []int 493 | scr.SetFS("") 494 | scr.AppendStmt(nil, func(s *Script) { 495 | iList := s.FInts() 496 | sort.Ints(iList) 497 | output = iList 498 | }) 499 | 500 | // Run the script. 501 | err := scr.Run(strings.NewReader(inputStr)) 502 | if err != nil { 503 | t.Fatal(err) 504 | } 505 | 506 | // Validate the output. 507 | for i, val := range desiredOutput { 508 | if val != output[i] { 509 | t.Fatalf("Expected %v but received %v", desiredOutput, output) 510 | } 511 | } 512 | } 513 | 514 | // TestFieldCreation0 ensures that field creation updates F(0). 515 | func TestFieldCreation0(t *testing.T) { 516 | // Define a script and some test inputs and outputs. 517 | input := "spam egg spam spam bacon spam" 518 | desiredOutput := "spam,egg,spam,spam,bacon,spam,,,,,sausage" 519 | var output string 520 | scr := NewScript() 521 | scr.Begin = func(s *Script) { scr.SetOFS(",") } 522 | scr.AppendStmt(nil, func(s *Script) { 523 | scr.SetF(scr.NF+5, scr.NewValue("sausage")) 524 | output = scr.F(0).String() 525 | }) 526 | 527 | // Run the script and validate the output. 528 | err := scr.Run(strings.NewReader(input)) 529 | if err != nil { 530 | t.Fatal(err) 531 | } 532 | if output != desiredOutput { 533 | t.Fatalf("Expected %q but received %q", desiredOutput, output) 534 | } 535 | } 536 | 537 | // TestFieldModification0 ensures that field modification updates F(0). 538 | func TestFieldModification0(t *testing.T) { 539 | // Define a script and some test inputs and outputs. 540 | input := "spam egg spam spam bacon spam" 541 | desiredOutput := "spam,egg,sausage,spam,bacon,spam" 542 | var output string 543 | scr := NewScript() 544 | scr.Begin = func(s *Script) { scr.SetOFS(",") } 545 | scr.AppendStmt(nil, func(s *Script) { 546 | scr.SetF(3, scr.NewValue("sausage")) 547 | output = scr.F(0).String() 548 | }) 549 | 550 | // Run the script and validate the output. 551 | err := scr.Run(strings.NewReader(input)) 552 | if err != nil { 553 | t.Fatal(err) 554 | } 555 | if output != desiredOutput { 556 | t.Fatalf("Expected %q but received %q", desiredOutput, output) 557 | } 558 | } 559 | 560 | // TestNFModification0 ensures that modifying NF updates F(0). 561 | func TestNFModification0(t *testing.T) { 562 | // Define a script and some test inputs and outputs. 563 | input := "spam egg spam spam bacon spam" 564 | desiredOutput := "spam egg spam" 565 | var output string 566 | scr := NewScript() 567 | scr.AppendStmt(nil, func(s *Script) { 568 | scr.NF = 3 569 | output = scr.F(0).String() 570 | }) 571 | 572 | // Run the script and validate the output. 573 | err := scr.Run(strings.NewReader(input)) 574 | if err != nil { 575 | t.Fatal(err) 576 | } 577 | if output != desiredOutput { 578 | t.Fatalf("Expected %q but received %q", desiredOutput, output) 579 | } 580 | } 581 | 582 | // TestAutoInt tests the Auto function with an int argument. 583 | func TestAutoInt(t *testing.T) { 584 | // Define a script and some test inputs and outputs. 585 | input := strings.Replace("It does not matter how slowly you go as long as you do not stop.", " ", "\n", -1) 586 | var output string 587 | desiredOutput := "go" 588 | scr := NewScript() 589 | scr.AppendStmt(Auto(8), func(s *Script) { output = s.F(1).String() }) 590 | 591 | // Run the script and validate the output. 592 | err := scr.Run(strings.NewReader(input)) 593 | if err != nil { 594 | t.Fatal(err) 595 | } 596 | if output != desiredOutput { 597 | t.Fatalf("Expected %q but received %q", desiredOutput, output) 598 | } 599 | } 600 | 601 | // TestAutoRegexp tests the Auto function with a Regexp argument. 602 | func TestAutoRegexp(t *testing.T) { 603 | // Define a script and some test inputs and outputs. 604 | input := strings.Replace("It does not matter how slowly you go as long as you do not stop.", " ", "\n", -1) 605 | var output string 606 | desiredOutput := "go" 607 | scr := NewScript() 608 | re := regexp.MustCompile("Go") 609 | scr.Begin = func(s *Script) { scr.IgnoreCase(true) } 610 | scr.AppendStmt(Auto(re), func(s *Script) { output = s.F(1).String() }) 611 | 612 | // Run the script and validate the output. 613 | err := scr.Run(strings.NewReader(input)) 614 | if err != nil { 615 | t.Fatal(err) 616 | } 617 | if output != desiredOutput { 618 | t.Fatalf("Expected %q but received %q", desiredOutput, output) 619 | } 620 | } 621 | 622 | // TestAutoString tests the Auto function with a string argument. 623 | func TestAutoString(t *testing.T) { 624 | // Define a script and some test inputs and outputs. 625 | input := strings.Replace("It does not matter how slowly you go as long as you do not stop.", " ", "\n", -1) 626 | var output string 627 | desiredOutput := "go" 628 | scr := NewScript() 629 | scr.Begin = func(s *Script) { scr.IgnoreCase(true) } 630 | scr.AppendStmt(Auto("Go"), func(s *Script) { output = s.F(1).String() }) 631 | 632 | // Run the script and validate the output. 633 | err := scr.Run(strings.NewReader(input)) 634 | if err != nil { 635 | t.Fatal(err) 636 | } 637 | if output != desiredOutput { 638 | t.Fatalf("Expected %q but received %q", desiredOutput, output) 639 | } 640 | } 641 | 642 | // TestAutoIntRange tests the Auto function with a range of int arguments. 643 | func TestAutoIntRange(t *testing.T) { 644 | // Define a script and some test inputs and outputs. 645 | input := strings.Replace("10 20 30 40 50 60 70 80 90 100", " ", "\n", -1) 646 | var output int 647 | desiredOutput := 150 648 | scr := NewScript() 649 | scr.AppendStmt(Auto(4, 6), func(s *Script) { output += s.F(1).Int() }) 650 | 651 | // Run the script and validate the output. 652 | err := scr.Run(strings.NewReader(input)) 653 | if err != nil { 654 | t.Fatal(err) 655 | } 656 | if output != desiredOutput { 657 | t.Fatalf("Expected %d but received %d", desiredOutput, output) 658 | } 659 | } 660 | 661 | // TestAutoIntRanges tests the Auto function with multiple ranges of int 662 | // arguments. 663 | func TestAutoIntRanges(t *testing.T) { 664 | // Define a script and some test inputs and outputs. 665 | input := strings.Replace("Don't be afraid to give up the good to go for the great.", " ", "\n", -1) 666 | output := make([]string, 0, 15) 667 | desiredOutput := strings.Split("Don't be afraid to go", " ") 668 | scr := NewScript() 669 | scr.Begin = func(s *Script) { scr.IgnoreCase(true) } 670 | scr.AppendStmt(Auto(1, 3, 9, 10), func(s *Script) { output = append(output, s.F(1).String()) }) 671 | 672 | // Run the script and validate the output. 673 | err := scr.Run(strings.NewReader(input)) 674 | if err != nil { 675 | t.Fatal(err) 676 | } 677 | if len(output) != len(desiredOutput) { 678 | t.Fatalf("Expected %v but received %v", desiredOutput, output) 679 | } 680 | for i, o := range desiredOutput { 681 | if output[i] != o { 682 | t.Fatalf("Expected %v but received %v", desiredOutput, output) 683 | } 684 | } 685 | } 686 | 687 | // TestCatchSetRSError tests that we properly catch invalid uses of SetRS. 688 | func TestCatchSetRSError(t *testing.T) { 689 | // Define a script. 690 | scr := NewScript() 691 | scr.Begin = func(s *Script) { scr.IgnoreCase(true) } 692 | scr.AppendStmt(nil, func(s *Script) { s.SetRS("/") }) 693 | expected := "SetRS was called from a running script" 694 | 695 | // Run the script and ensure it threw the expected error. 696 | err := scr.Run(strings.NewReader("The progress of rivers to the ocean is not so rapid as that of man to error.")) 697 | if err == nil { 698 | t.Fatalf("Expected error %q, but no error was returned", expected) 699 | } 700 | if err.Error() != expected { 701 | t.Fatalf("Expected error %q, but received error %q", expected, err.Error()) 702 | } 703 | } 704 | 705 | // TestNext tests that Next immediately stops the current action and 706 | // immediately continues with the next record. 707 | func TestNext(t *testing.T) { 708 | // Define a script. 709 | var output []string 710 | scr := NewScript() 711 | scr.Begin = func(s *Script) { output = make([]string, 0, 3) } 712 | scr.AppendStmt(nil, func(s *Script) { 713 | output = append(output, s.F(0).String()) 714 | s.Next() 715 | t.Fatal("Next did not immediately exit the current action") 716 | }) 717 | scr.AppendStmt(nil, func(s *Script) { 718 | t.Fatal("Next did not immediately go to the next record") 719 | }) 720 | 721 | // Define our input and desired output. 722 | input := []string{ 723 | "追いかけ", // Oikake 724 | "待ち伏せ", // Machibuse 725 | "気まぐれ", // Kimagure 726 | "お惚け", // Otoboke 727 | } 728 | desiredOutput := strings.Join(input, " ") 729 | 730 | // Run the script and validate the output. 731 | err := scr.Run(strings.NewReader(strings.Join(input, "\n"))) 732 | if err != nil { 733 | t.Fatal(err) 734 | } 735 | outputStr := strings.Join(output, " ") 736 | if outputStr != desiredOutput { 737 | t.Fatalf("Expected %q but received %q", desiredOutput, outputStr) 738 | } 739 | } 740 | 741 | // TestGetLineSelf tests that GetLine can read the next record from the current 742 | // input stream. 743 | func TestGetLineSelf(t *testing.T) { 744 | // Define a script. 745 | var output []string 746 | scr := NewScript() 747 | scr.Begin = func(s *Script) { output = nil } 748 | scr.AppendStmt(Auto("skip"), func(s *Script) { 749 | nSkip := s.F(2).Int() 750 | for i := 0; i < nSkip; i++ { 751 | _, err := s.GetLine(nil) 752 | if err != nil && err != io.EOF { 753 | t.Fatal(err) 754 | } 755 | } 756 | s.Next() 757 | }) 758 | scr.AppendStmt(nil, func(s *Script) { 759 | output = append(output, s.F(0).String()) 760 | }) 761 | 762 | // Define our input and desired output. 763 | input := []string{ 764 | "apple", 765 | "boy", 766 | "skip 1", 767 | "cat", 768 | "skip 1", 769 | "dog", 770 | "east", 771 | "five", 772 | "skip 2", 773 | "goat", 774 | "house", 775 | "skip 1", 776 | "ice cream", 777 | "July", 778 | "skip 1", 779 | "skip 1", 780 | "king", 781 | "lemon", 782 | } 783 | desiredOutput := []string{ 784 | "apple", 785 | "boy", 786 | "east", 787 | "five", 788 | "July", 789 | "king", 790 | "lemon", 791 | } 792 | 793 | // Run the script and validate the output. 794 | err := scr.Run(strings.NewReader(strings.Join(input, "\n"))) 795 | if err != nil { 796 | t.Fatal(err) 797 | } 798 | if len(output) != len(desiredOutput) { 799 | t.Fatalf("Expected %v (length %d) but received %v (length %d)", desiredOutput, len(desiredOutput), output, len(output)) 800 | } 801 | for i, o := range desiredOutput { 802 | if output[i] != o { 803 | t.Fatalf("Expected %v but received %v", desiredOutput, output) 804 | } 805 | } 806 | 807 | // Repeat the test, but attempt to skip past the end of the file. The 808 | // error check after the GetLine call is supposed to ignore EOF, not 809 | // fail. 810 | input = append(input, "skip 5") 811 | err = scr.Run(strings.NewReader(strings.Join(input, "\n"))) 812 | if err != nil { 813 | t.Fatal(err) 814 | } 815 | if len(output) != len(desiredOutput) { 816 | t.Fatalf("Expected %v (length %d) but received %v (length %d)", desiredOutput, len(desiredOutput), output, len(output)) 817 | } 818 | for i, o := range desiredOutput { 819 | if output[i] != o { 820 | t.Fatalf("Expected %v but received %v", desiredOutput, output) 821 | } 822 | } 823 | } 824 | 825 | // TestGetLineOther tests that GetLine can read the next record from an 826 | // alternative input stream. 827 | func TestGetLineOther(t *testing.T) { 828 | // Define our inputs and desired output. 829 | input := []string{ 830 | "INSERT", 831 | "Boston", 832 | "Chicago", 833 | "Denver", 834 | "INSERT", 835 | "Frank", 836 | "INSERT", 837 | "INSERT", 838 | "Ida", 839 | "John", 840 | "King", 841 | "INSERT", 842 | } 843 | inserts := []string{ 844 | "Adams", 845 | "Easy", 846 | "George", 847 | "Henry", 848 | "Lincoln", 849 | } 850 | desiredOutput := []string{ 851 | "Adams", 852 | "Boston", 853 | "Chicago", 854 | "Denver", 855 | "Easy", 856 | "Frank", 857 | "George", 858 | "Henry", 859 | "Ida", 860 | "John", 861 | "King", 862 | "Lincoln", 863 | } 864 | 865 | // Define a script. 866 | var output []string 867 | insertsStrm := strings.NewReader(strings.Join(inserts, "\n")) 868 | scr := NewScript() 869 | scr.Begin = func(s *Script) { output = nil } 870 | scr.AppendStmt(Auto("INSERT"), func(s *Script) { 871 | ins, err := s.GetLine(insertsStrm) 872 | if err != nil { 873 | t.Fatal(err) 874 | } 875 | output = append(output, ins.String()) 876 | s.Next() 877 | }) 878 | scr.AppendStmt(nil, func(s *Script) { 879 | output = append(output, s.F(0).String()) 880 | }) 881 | 882 | // Run the script and validate the output. 883 | err := scr.Run(strings.NewReader(strings.Join(input, "\n"))) 884 | if err != nil { 885 | t.Fatal(err) 886 | } 887 | if len(output) != len(desiredOutput) { 888 | t.Fatalf("Expected %v (length %d) but received %v (length %d)", desiredOutput, len(desiredOutput), output, len(output)) 889 | } 890 | for i, o := range desiredOutput { 891 | if output[i] != o { 892 | t.Fatalf("Expected %v but received %v", desiredOutput, output) 893 | } 894 | } 895 | } 896 | 897 | // TestGetLineSetF tests that GetLine + SetF can replace the current input line. 898 | func TestGetLineSetF(t *testing.T) { 899 | // Define a script. 900 | scr := NewScript() 901 | scr.AppendStmt(nil, func(s *Script) { 902 | // Validate the current line. 903 | for i := 1; i <= 3; i++ { 904 | if s.F(i).Int() != (s.NR-1)*3+i { 905 | t.Fatalf("Expected %d but received %d", (s.NR-1)*3+i, s.F(i).Int()) 906 | } 907 | } 908 | 909 | // Read and validate the next line. 910 | line, err := s.GetLine(nil) 911 | if err != nil { 912 | t.Fatal(err) 913 | } 914 | s.SetF(0, line) 915 | for i := 1; i <= 3; i++ { 916 | if s.F(i).Int() != (s.NR-1)*3+i { 917 | t.Fatalf("Expected %d but received %d", (s.NR-1)*3+i, s.F(i).Int()) 918 | } 919 | } 920 | }) 921 | 922 | // Run the script and validate the output. 923 | input := []string{ 924 | " 1 2 3", 925 | " 4 5 6", 926 | " 7 8 9", 927 | "10 11 12", 928 | } 929 | err := scr.Run(strings.NewReader(strings.Join(input, "\n"))) 930 | if err != nil { 931 | t.Fatal(err) 932 | } 933 | } 934 | 935 | // TestBigLongLine tests splitting a very long record into whitespace-separated 936 | // fields 937 | func TestBigLongLine(t *testing.T) { 938 | // Specify the word to appear in each field. 939 | word := "pneumonoultramicroscopicsilicovolcanoconiosis" 940 | 941 | // Define a script that simply verifies that each field is 942 | // correct. 943 | scr := NewScript() 944 | scr.AppendStmt(nil, func(s *Script) { 945 | // Validate the current line. 946 | for i := 1; i <= s.NF; i++ { 947 | if s.F(i).String() != word { 948 | t.Fatalf("Expected %q but received %q", word, s.F(i).String()) 949 | } 950 | } 951 | }) 952 | 953 | // Define a function to test a record with a given number of fields. 954 | testBigRecord := func(numFields int) error { 955 | // Create a very long string. 956 | recordStr := word 957 | for i := 0; i < numFields-1; i++ { 958 | recordStr += " " + word 959 | } 960 | 961 | // Run the script and return its error value. 962 | input := strings.NewReader(recordStr) 963 | return scr.Run(input) 964 | } 965 | 966 | // Try increasingly large records until we exhaust the default maximum 967 | // record size. 968 | var err error 969 | var numFields int 970 | for numFields = 100; numFields <= 100000000; numFields *= 10 { 971 | err = testBigRecord(numFields) 972 | if err != nil { 973 | break 974 | } 975 | } 976 | if err == nil { 977 | // We never managed to exhaust the default maximum record size. 978 | // Assume it's big enough for all practical purposes. 979 | return 980 | } 981 | 982 | // Set the buffer size and try again. There should be no error this 983 | // time. 984 | scr.MaxRecordSize = (len(word) + 1) * numFields 985 | err = testBigRecord(numFields) 986 | if err != nil { 987 | t.Fatal(err) 988 | } 989 | } 990 | 991 | // TestRunPipeline1 tests that RunPipeline can implement a pipeline of a single 992 | // operation. 993 | func TestRunPipeline1(t *testing.T) { 994 | // Define a script that repeats the first word of each line 995 | rep := NewScript() 996 | rep.AppendStmt(nil, func(s *Script) { 997 | s.Println(s.F(1), s.F(1)) 998 | }) 999 | 1000 | // Pipe inputs into the pipeline we're about to run and from the 1001 | // pipeline into a memory buffer. 1002 | pr, pw := io.Pipe() 1003 | rep.Output = bytes.NewBuffer(make([]byte, 0, 10000)) 1004 | 1005 | // Write numbers into the pipe in the background. 1006 | go func() { 1007 | for i := 1; i <= 100; i++ { 1008 | fmt.Fprintf(pw, "%3d\n", i) 1009 | } 1010 | pw.Close() 1011 | }() 1012 | 1013 | // Execute a pipeline in the foreground. 1014 | err := RunPipeline(pr, rep) 1015 | if err != nil { 1016 | t.Fatal(err) 1017 | } 1018 | 1019 | // Ensure we received the expected output. 1020 | exp := bytes.NewBuffer(make([]byte, 0, 10000)) 1021 | for i := 1; i <= 100; i++ { 1022 | fmt.Fprintf(exp, "%d %d\n", i, i) 1023 | } 1024 | got := rep.Output.(*bytes.Buffer).String() 1025 | if exp.String() != got { 1026 | t.Fatalf("Incorrect output %q", got) 1027 | } 1028 | } 1029 | 1030 | // TestRunPipeline2 tests that RunPipeline can implement a pipeline of two 1031 | // operations. 1032 | func TestRunPipeline2(t *testing.T) { 1033 | // Define a script that repeats the first word of each line 1034 | rep := NewScript() 1035 | rep.AppendStmt(nil, func(s *Script) { 1036 | s.Println(s.F(1), s.F(1)) 1037 | }) 1038 | 1039 | // Define a script that replaces the second word of each line 1040 | // with twice its value. 1041 | dbl := NewScript() 1042 | dbl.AppendStmt(nil, func(s *Script) { 1043 | s.Println(s.F(1), s.F(2).Int()*2) 1044 | }) 1045 | 1046 | // Pipe inputs into the pipeline we're about to run and from the 1047 | // pipeline into a memory buffer. 1048 | pr, pw := io.Pipe() 1049 | dbl.Output = bytes.NewBuffer(make([]byte, 0, 10000)) 1050 | 1051 | // Write numbers into the pipe in the background. 1052 | go func() { 1053 | for i := 1; i <= 100; i++ { 1054 | fmt.Fprintf(pw, "%3d\n", i) 1055 | } 1056 | pw.Close() 1057 | }() 1058 | 1059 | // Execute a pipeline in the foreground. 1060 | err := RunPipeline(pr, rep, dbl) 1061 | if err != nil { 1062 | t.Fatal(err) 1063 | } 1064 | 1065 | // Ensure we received the expected output. 1066 | exp := bytes.NewBuffer(make([]byte, 0, 10000)) 1067 | for i := 1; i <= 100; i++ { 1068 | fmt.Fprintf(exp, "%d %d\n", i, i*2) 1069 | } 1070 | got := dbl.Output.(*bytes.Buffer).String() 1071 | if exp.String() != got { 1072 | t.Fatalf("Incorrect output %q", got) 1073 | } 1074 | } 1075 | 1076 | // TestRunPipeline5 tests that RunPipeline can implement a pipeline of five 1077 | // operations. 1078 | func TestRunPipeline5(t *testing.T) { 1079 | // Define a script that repeats the first word of each line 1080 | rep := NewScript() 1081 | rep.AppendStmt(nil, func(s *Script) { 1082 | s.Println(s.F(1), s.F(1)) 1083 | }) 1084 | 1085 | // Define a script that replaces the second number in a line with 1086 | // "fizz" if the first number is a multiple of 3. 1087 | fizz := NewScript() 1088 | fizz.AppendStmt(nil, func(s *Script) { 1089 | if s.F(1).Int()%3 == 0 { 1090 | s.Println(s.F(1), "fizz") 1091 | } else { 1092 | s.Println() 1093 | } 1094 | }) 1095 | 1096 | // Define a script that replaces the second number in a line with 1097 | // "buzz" if the first number is a multiple of 5. 1098 | buzz := NewScript() 1099 | buzz.AppendStmt(nil, func(s *Script) { 1100 | if s.F(1).Int()%5 == 0 { 1101 | s.Println(s.F(1), "buzz") 1102 | } else { 1103 | s.Println() 1104 | } 1105 | }) 1106 | 1107 | // Define a script that replaces the second number in a line with 1108 | // "fizzbuzz" if the first number is a multiple of 15. 1109 | fizzbuzz := NewScript() 1110 | fizzbuzz.AppendStmt(nil, func(s *Script) { 1111 | if s.F(1).Int()%15 == 0 { 1112 | s.Println(s.F(1), "fizzbuzz") 1113 | } else { 1114 | s.Println() 1115 | } 1116 | }) 1117 | 1118 | // Define a script that outputs only the second field. 1119 | strip := NewScript() 1120 | strip.AppendStmt(nil, func(s *Script) { 1121 | s.Println(s.F(2)) 1122 | }) 1123 | 1124 | // Pipe inputs into the pipeline we're about to run and from the 1125 | // pipeline into a memory buffer. 1126 | pr, pw := io.Pipe() 1127 | strip.Output = bytes.NewBuffer(make([]byte, 0, 10000)) 1128 | 1129 | // Write numbers into the pipe in the background. 1130 | go func() { 1131 | for i := 1; i <= 100; i++ { 1132 | fmt.Fprintf(pw, "%3d\n", i) 1133 | } 1134 | pw.Close() 1135 | }() 1136 | 1137 | // Execute a pipeline in the foreground. 1138 | err := RunPipeline(pr, rep, fizz, buzz, fizzbuzz, strip) 1139 | if err != nil { 1140 | t.Fatal(err) 1141 | } 1142 | 1143 | // Ensure we received the expected output. 1144 | exp := bytes.NewBuffer(make([]byte, 0, 10000)) 1145 | for i := 1; i <= 100; i++ { 1146 | switch { 1147 | case i%15 == 0: 1148 | fmt.Fprintln(exp, "fizzbuzz") 1149 | case i%5 == 0: 1150 | fmt.Fprintln(exp, "buzz") 1151 | case i%3 == 0: 1152 | fmt.Fprintln(exp, "fizz") 1153 | default: 1154 | fmt.Fprintf(exp, "%d\n", i) 1155 | } 1156 | } 1157 | got := strip.Output.(*bytes.Buffer).String() 1158 | if exp.String() != got { 1159 | t.Fatalf("Incorrect output %q", got) 1160 | } 1161 | } 1162 | -------------------------------------------------------------------------------- /value.go: -------------------------------------------------------------------------------- 1 | // This file defines an AWK-like data type, Value, that can easily be converted 2 | // to different Go data types. 3 | 4 | package awk 5 | 6 | import ( 7 | "fmt" 8 | "regexp" 9 | "strconv" 10 | "strings" 11 | ) 12 | 13 | const convFmt = "%.6g" 14 | 15 | // A Value represents an immutable datum that can be converted to an int, 16 | // float64, or string in best-effort fashion (i.e., never returning an error). 17 | type Value struct { 18 | ival int // Value converted to an int 19 | fval float64 // Value converted to a float64 20 | sval string // Value converted to a string 21 | 22 | ivalOk bool // true: ival is valid; false: invalid 23 | fvalOk bool // true: fval is valid; false: invalid 24 | svalOk bool // true: sval is valid; false: invalid 25 | 26 | script *Script // Pointer to the script that produced this value 27 | } 28 | 29 | // NewValue creates a Value from an arbitrary Go data type. Data types that do 30 | // not map straightforwardly to one of {int, float64, string} are represented 31 | // by a zero value. 32 | func (s *Script) NewValue(v interface{}) *Value { 33 | val := &Value{} 34 | switch v := v.(type) { 35 | case uint: 36 | val.ival = int(v) 37 | val.ivalOk = true 38 | case uint8: 39 | val.ival = int(v) 40 | val.ivalOk = true 41 | case uint16: 42 | val.ival = int(v) 43 | val.ivalOk = true 44 | case uint32: 45 | val.ival = int(v) 46 | val.ivalOk = true 47 | case uint64: 48 | val.ival = int(v) 49 | val.ivalOk = true 50 | case uintptr: 51 | val.ival = int(v) 52 | val.ivalOk = true 53 | 54 | case int: 55 | val.ival = int(v) 56 | val.ivalOk = true 57 | case int8: 58 | val.ival = int(v) 59 | val.ivalOk = true 60 | case int16: 61 | val.ival = int(v) 62 | val.ivalOk = true 63 | case int32: 64 | val.ival = int(v) 65 | val.ivalOk = true 66 | case int64: 67 | val.ival = int(v) 68 | val.ivalOk = true 69 | 70 | case bool: 71 | if v { 72 | val.ival = 1 73 | } 74 | val.ivalOk = true 75 | 76 | case float32: 77 | val.fval = float64(v) 78 | val.fvalOk = true 79 | case float64: 80 | val.fval = float64(v) 81 | val.fvalOk = true 82 | 83 | case complex64: 84 | val.fval = float64(real(v)) 85 | val.fvalOk = true 86 | case complex128: 87 | val.fval = float64(real(v)) 88 | val.fvalOk = true 89 | 90 | case string: 91 | val.sval = v 92 | val.svalOk = true 93 | 94 | case *Value: 95 | *val = *v 96 | 97 | default: 98 | val.svalOk = true 99 | } 100 | val.script = s 101 | return val 102 | } 103 | 104 | // matchInt matches a base-ten integer. 105 | var matchInt = regexp.MustCompile(`^\s*([-+]?\d+)`) 106 | 107 | // Int converts a Value to an int. 108 | func (v *Value) Int() int { 109 | switch { 110 | case v.ivalOk: 111 | case v.fvalOk: 112 | v.ival = int(v.fval) 113 | v.ivalOk = true 114 | case v.svalOk: 115 | // Perform a best-effort conversion from string to int. 116 | strs := matchInt.FindStringSubmatch(v.sval) 117 | var i64 int64 118 | if len(strs) >= 2 { 119 | i64, _ = strconv.ParseInt(strs[1], 10, 0) 120 | } 121 | v.ival = int(i64) 122 | v.ivalOk = true 123 | } 124 | return v.ival 125 | } 126 | 127 | // matchFloat matches a base-ten floating-point number. 128 | var matchFloat = regexp.MustCompile(`^\s*([-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[Ee][-+]?\d+)?)`) 129 | 130 | // Float64 converts a Value to a float64. 131 | func (v *Value) Float64() float64 { 132 | switch { 133 | case v.fvalOk: 134 | case v.ivalOk: 135 | v.fval = float64(v.ival) 136 | v.fvalOk = true 137 | case v.svalOk: 138 | // Perform a best-effort conversion from string to float64. 139 | v.fval = 0.0 140 | strs := matchFloat.FindStringSubmatch(v.sval) 141 | if len(strs) >= 2 { 142 | v.fval, _ = strconv.ParseFloat(strs[1], 64) 143 | } 144 | v.fvalOk = true 145 | } 146 | return v.fval 147 | } 148 | 149 | // String converts a Value to a string. 150 | func (v *Value) String() string { 151 | switch { 152 | case v.svalOk: 153 | case v.ivalOk: 154 | v.sval = strconv.FormatInt(int64(v.ival), 10) 155 | v.svalOk = true 156 | case v.fvalOk: 157 | v.sval = fmt.Sprintf(v.script.ConvFmt, v.fval) 158 | v.svalOk = true 159 | } 160 | return v.sval 161 | } 162 | 163 | // Match says whether a given regular expression, provided as a string, matches 164 | // the Value. If the associated script set IgnoreCase(true), the match is 165 | // tested in a case-insensitive manner. 166 | func (v *Value) Match(expr string) bool { 167 | // Compile the regular expression. 168 | re, err := v.script.compileRegexp(expr) 169 | if err != nil { 170 | return false // Fail silently 171 | } 172 | 173 | // Return true if the expression matches the value, interpreted as a 174 | // string. 175 | loc := re.FindStringIndex(v.String()) 176 | if loc == nil { 177 | v.script.RStart = 0 178 | v.script.RLength = -1 179 | return false 180 | } 181 | v.script.RStart = loc[0] + 1 182 | v.script.RLength = loc[1] - loc[0] 183 | return true 184 | } 185 | 186 | // StrEqual says whether a Value, treated as a string, has the same contents as 187 | // a given Value, which can be provided either as a Value or as any type that 188 | // can be converted to a Value. If the associated script called 189 | // IgnoreCase(true), the comparison is performed in a case-insensitive manner. 190 | func (v *Value) StrEqual(v2 interface{}) bool { 191 | switch v2 := v2.(type) { 192 | case *Value: 193 | if v.script.ignCase { 194 | return strings.EqualFold(v.String(), v2.String()) 195 | } 196 | return v.String() == v2.String() 197 | case string: 198 | if v.script.ignCase { 199 | return strings.EqualFold(v.String(), v2) 200 | } 201 | return v.String() == v2 202 | default: 203 | v2Val := v.script.NewValue(v2) 204 | if v.script.ignCase { 205 | return strings.EqualFold(v.String(), v2Val.String()) 206 | } 207 | return v.String() == v2Val.String() 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /value_test.go: -------------------------------------------------------------------------------- 1 | // This file tests conversions from each data type to every other data type. 2 | 3 | package awk 4 | 5 | import ( 6 | "math" 7 | "testing" 8 | ) 9 | 10 | // TestIntToInt converts various ints to Values then back to ints. 11 | func TestIntToInt(t *testing.T) { 12 | scr := NewScript() 13 | for _, n := range []int{0, -123, 123, -456, 456, math.MaxInt32, math.MinInt32, 123} { 14 | v := scr.NewValue(n) 15 | i := v.Int() 16 | if i != n { 17 | t.Fatalf("Expected %d but received %d", n, i) 18 | } 19 | } 20 | } 21 | 22 | // TestIntToInt converts various ints to Values then to float64s. 23 | func TestIntToFloat64(t *testing.T) { 24 | scr := NewScript() 25 | for _, n := range []int{0, -123, 123, -456, 456, math.MaxInt32, math.MinInt32, 123} { 26 | v := scr.NewValue(n) 27 | f := v.Float64() 28 | if f != float64(n) { 29 | t.Fatalf("Expected %.4g but received %.4g", float64(n), f) 30 | } 31 | } 32 | } 33 | 34 | // TestIntToString converts various ints to Values then to strings. 35 | func TestIntToString(t *testing.T) { 36 | scr := NewScript() 37 | in := []int{0, -123, 123, -456, 456, math.MaxInt32, math.MinInt32, 123} 38 | out := []string{"0", "-123", "123", "-456", "456", "2147483647", "-2147483648", "123"} 39 | for idx, n := range in { 40 | v := scr.NewValue(n) 41 | s := v.String() 42 | if s != out[idx] { 43 | t.Fatalf("Expected %q but received %q", out[idx], s) 44 | } 45 | } 46 | } 47 | 48 | // TestFloat64ToInt converts various float64s to Values then to ints. 49 | func TestFloat64ToInt(t *testing.T) { 50 | scr := NewScript() 51 | in := []float64{0.0, -123.0, 123.0, -456.7, 456.7, 123.0, -456.4, 456.4} 52 | out := []int{0, -123, 123, -456, 456, 123, -456, 456} 53 | for idx, n := range in { 54 | v := scr.NewValue(n) 55 | i := v.Int() 56 | if i != out[idx] { 57 | t.Fatalf("Expected %d but received %d", out[idx], i) 58 | } 59 | } 60 | } 61 | 62 | // TestFloat64ToFloat64 converts various float64s to Values then back to 63 | // float64s. 64 | func TestFloat64ToFloat64(t *testing.T) { 65 | scr := NewScript() 66 | for _, n := range []float64{0.0, -123.0, 123.0, -456.7, 456.7, math.MaxFloat64, -math.MaxFloat64, 123.0, -456.4, 456.4} { 67 | v := scr.NewValue(n) 68 | f := v.Float64() 69 | if f != n { 70 | t.Fatalf("Expected %.4g but received %.4g", n, f) 71 | } 72 | } 73 | } 74 | 75 | // TestFloat64ToString converts various float64s to Values then to strings. 76 | func TestFloat64ToString(t *testing.T) { 77 | scr := NewScript() 78 | in := []float64{0.0, -123.0, 123.0, -456.7, 456.7, math.MaxFloat64, -math.MaxFloat64, 123.0, -456.4, 456.4} 79 | out := []string{"0", "-123", "123", "-456.7", "456.7", "1.79769e+308", "-1.79769e+308", "123", "-456.4", "456.4"} 80 | for idx, n := range in { 81 | v := scr.NewValue(n) 82 | s := v.String() 83 | if s != out[idx] { 84 | t.Fatalf("Expected %q but received %q", out[idx], s) 85 | } 86 | } 87 | } 88 | 89 | // TestStringToInt converts various strings to Values then to ints. 90 | func TestStringToInt(t *testing.T) { 91 | scr := NewScript() 92 | in := []string{"0", "-123", "123", "-456", "456", "9223372036854775807", "-9223372036854775808", "123", "Text999", "321_go", " 789 ", "0x111", "0222"} 93 | out := []int{0, -123, 123, -456, 456, 9223372036854775807, -9223372036854775808, 123, 0, 321, 789, 0, 222} 94 | for idx, n := range in { 95 | v := scr.NewValue(n) 96 | i := v.Int() 97 | if i != out[idx] { 98 | t.Fatalf("Expected %d for %q but received %d", out[idx], n, i) 99 | } 100 | } 101 | } 102 | 103 | // TestStringToFloat64 converts various strings to Values then to float64s. 104 | func TestStringToFloat64(t *testing.T) { 105 | scr := NewScript() 106 | in := []string{"0", "-123", "123", "-456.7", "456.7", "17.9769e+307", "-17.9769e+307", "123", "-456.4", "456.4", "Text99.99", "99.99e+1000", " 111.111 ", "0x222", "0333", "321_go", "1.2e3e4"} 107 | out := []float64{0, -123, 123, -456.7, 456.7, 1.79769e+308, -1.79769e+308, 123, -456.4, 456.4, 0, math.Inf(1), 111.111, 0.0, 333.0, 321.0, 1.2e3} 108 | for idx, n := range in { 109 | v := scr.NewValue(n) 110 | f := v.Float64() 111 | if f != out[idx] { 112 | t.Fatalf("Expected %.4g for %q but received %.4g", out[idx], n, f) 113 | } 114 | } 115 | } 116 | 117 | // TestStringToString converts various strings to Values then back to strings. 118 | func TestStringToString(t *testing.T) { 119 | scr := NewScript() 120 | for _, n := range []string{"0", "-123", "123", "-456.7", "456.7", "17.9769e+307", "-17.9769e+307", "123", "-456.4", "456.4", "Text99.99", "99.99e+1000"} { 121 | v := scr.NewValue(n) 122 | s := v.String() 123 | if s != n { 124 | t.Fatalf("Expected %q but received %q", n, s) 125 | } 126 | } 127 | } 128 | 129 | // TestMatch tests if regular-expression matching works. 130 | func TestMatch(t *testing.T) { 131 | // We run the test twice to confirm that regexp caching works. 132 | scr := NewScript() 133 | v := scr.NewValue("Mississippi") 134 | in := []string{"p*", "[is]+", "Miss", "hippie", "ippi"} 135 | out := []bool{true, true, true, false, true} 136 | for range [2]struct{}{} { 137 | for idx, n := range in { 138 | m := v.Match(n) 139 | if m != out[idx] { 140 | t.Fatalf("Expected %v but received %v\n", out[idx], m) 141 | } 142 | } 143 | } 144 | 145 | // Test if RStart and RLength are maintained properly. 146 | if !v.Match("[is]+") { 147 | t.Fatalf("Failed to match %v against %q", v, "[is]+") 148 | } 149 | if scr.RStart != 2 || scr.RLength != 7 { 150 | t.Fatalf("Expected {2, 7} but received {%d, %d}", scr.RStart, scr.RLength) 151 | } 152 | if v.Match("[xy]+") { 153 | t.Fatalf("Incorrectly matched %v against %q", v, "[xy]+") 154 | } 155 | if scr.RStart != 0 || scr.RLength != -1 { 156 | t.Fatalf("Expected {0, -1} but received {%d, %d}", scr.RStart, scr.RLength) 157 | } 158 | } 159 | 160 | // TestStrEqual tests if string comparisons work. 161 | func TestStrEqual(t *testing.T) { 162 | // Test case-sensitive comparisons. 163 | scr := NewScript() 164 | v := scr.NewValue("good") 165 | for _, bad := range []string{"bad", "goody", "Good", "good "} { 166 | if v.StrEqual(scr.NewValue(bad)) { 167 | t.Fatalf("Incorrectly matched %q = %q", "good", bad) 168 | } 169 | } 170 | if !v.StrEqual(scr.NewValue("good")) { 171 | t.Fatalf("Failed to match %q", "good") 172 | } 173 | 174 | // Test case-insensitive comparisons. 175 | scr.IgnoreCase(true) 176 | for _, bad := range []string{"bad", "goody", "good "} { 177 | if v.StrEqual(scr.NewValue(bad)) { 178 | t.Fatalf("Incorrectly matched %q = %q", "good", bad) 179 | } 180 | } 181 | if !v.StrEqual(scr.NewValue("good")) { 182 | t.Fatalf("Failed to match %q", "good") 183 | } 184 | if !v.StrEqual(scr.NewValue("GooD")) { 185 | t.Fatalf("Failed to match %q = %q", "good", "GooD") 186 | } 187 | } 188 | --------------------------------------------------------------------------------