├── .travis.yml
├── LICENSE.md
├── README.md
├── assoc.go
├── assoc_test.go
├── doc.go
├── example_test.go
├── go.mod
├── script.go
├── script_test.go
├── value.go
└── value_test.go


/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: false
2 | 
3 | language: go
4 | 
5 | go:
6 |   - 1.x
7 |   - master
8 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright © 2016, Scott Pakin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 5 | 
 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 7 | 
 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 9 | 
10 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 | 
12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | awk
 2 | ===
 3 | 
 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/spakin/awk)](https://goreportcard.com/report/github.com/spakin/awk) [![Build Status](https://travis-ci.com/spakin/awk.svg?branch=master)](https://travis-ci.com/spakin/awk) [![Go project version](https://badge.fury.io/go/github.com%2Fspakin%2Fawk.svg)](https://badge.fury.io/go/github.com%2Fspakin%2Fawk) [![GoDoc](https://godoc.org/github.com/spakin/awk?status.svg)](https://godoc.org/github.com/spakin/awk)
 5 | 
 6 | Description
 7 | -----------
 8 | 
 9 | `awk` is a package for the [Go programming language](https://golang.org/) that provides an [AWK](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html)-style text processing capability.  The package facilitates splitting an input stream into records (default: newline-separated lines) and fields (default: whitespace-separated columns) then applying a sequence of statements of the form "if 〈_pattern_〉 then 〈_action_〉" to each record in turn.  For example, the following is a complete Go program that adds up the first two columns of a [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) file to produce a third column:
10 | ```Go
11 | package main
12 | 
13 | import (
14 |     "github.com/spakin/awk"
15 |     "os"
16 | )
17 | 
18 | func main() {
19 |     s := awk.NewScript()
20 |     s.Begin = func(s *awk.Script) {
21 |         s.SetFS(",")
22 |         s.SetOFS(",")
23 |     }
24 |     s.AppendStmt(nil, func(s *awk.Script) {
25 |         s.SetF(3, s.NewValue(s.F(1).Int()+s.F(2).Int()))
26 |         s.Println()
27 |     })
28 |     s.Run(os.Stdin)
29 | }
30 | ```
31 | 
32 | In the above, the `awk` package handles all the mundane details such as reading lines from the file, checking for EOF, splitting lines into columns, handling errors, and other such things.  With the help of `awk`, Go easily can be applied to the sorts of text-processing tasks that one would normally implement in a scripting language but without sacrificing Go's speed, safety, or flexibility.
33 | 
34 | Installation
35 | ------------
36 | 
37 | The `awk` package has opted into the [Go module system](https://blog.golang.org/using-go-modules) so installation is in fact unnecessary if your program or package has done likewise.  Otherwise, a traditional
38 | ```bash
39 | go get github.com/spakin/awk
40 | ```
41 | will install the package.
42 | 
43 | Documentation
44 | -------------
45 | 
46 | Descriptions and examples of the `awk` API can be found online in the [GoDoc documentation of package `awk`](https://godoc.org/github.com/spakin/awk).
47 | 
48 | Author
49 | ------
50 | 
51 | [Scott Pakin](http://www.pakin.org/~scott/), *scott+awk@pakin.org*
52 | 


--------------------------------------------------------------------------------
/assoc.go:
--------------------------------------------------------------------------------
  1 | // This file defines an AWK-like associative array, ValueArray.
  2 | 
  3 | package awk
  4 | 
  5 | import (
  6 | 	"strings"
  7 | )
  8 | 
  9 | // A ValueArray maps Values to Values.
 10 | type ValueArray struct {
 11 | 	script *Script           // Pointer to the script that produced this value
 12 | 	data   map[string]*Value // The associative array proper
 13 | }
 14 | 
 15 | // NewValueArray creates and returns an associative array of Values.
 16 | func (s *Script) NewValueArray() *ValueArray {
 17 | 	return &ValueArray{
 18 | 		script: s,
 19 | 		data:   make(map[string]*Value),
 20 | 	}
 21 | }
 22 | 
 23 | // Set (index, value) assigns a Value to an index of a ValueArray.  Multiple
 24 | // indexes can be specified to simulate multidimensional arrays.  (In fact, the
 25 | // indexes are concatenated into a single string with intervening Script.SubSep
 26 | // characters.)  The final argument is always the value to assign.  Arguments
 27 | // can be provided either as Values or as any types that can be converted to
 28 | // Values.
 29 | func (va *ValueArray) Set(args ...interface{}) {
 30 | 	// Ensure we were given at least one index and a value.
 31 | 	if len(args) < 2 {
 32 | 		panic("ValueArray.Set requires at least one index and one value")
 33 | 	}
 34 | 
 35 | 	// Convert each argument to a Value.
 36 | 	argVals := make([]*Value, len(args))
 37 | 	for i, arg := range args {
 38 | 		v, ok := arg.(*Value)
 39 | 		if !ok {
 40 | 			v = va.script.NewValue(arg)
 41 | 		}
 42 | 		argVals[i] = v
 43 | 	}
 44 | 
 45 | 	// Handle the most common case: one index and one value.
 46 | 	if len(args) == 2 {
 47 | 		va.data[argVals[0].String()] = argVals[1]
 48 | 		return
 49 | 	}
 50 | 
 51 | 	// Merge the indexes into a single string.
 52 | 	idxStrs := make([]string, len(argVals)-1)
 53 | 	for i, v := range argVals[:len(argVals)-1] {
 54 | 		idxStrs[i] = v.String()
 55 | 	}
 56 | 	idx := strings.Join(idxStrs, va.script.SubSep)
 57 | 
 58 | 	// Associate the final argument with the index string.
 59 | 	va.data[idx] = argVals[len(argVals)-1]
 60 | }
 61 | 
 62 | // Get returns the Value associated with a given index into a ValueArray.
 63 | // Multiple indexes can be specified to simulate multidimensional arrays.  (In
 64 | // fact, the indexes are concatenated into a single string with intervening
 65 | // Script.SubSep characters.)  The arguments can be provided either as Values
 66 | // or as any types that can be converted to Values.  If the index doesn't
 67 | // appear in the array, a zero value is returned.
 68 | func (va *ValueArray) Get(args ...interface{}) *Value {
 69 | 	// Ensure we were given at least one index.
 70 | 	if len(args) < 1 {
 71 | 		panic("ValueArray.Get requires at least one index")
 72 | 	}
 73 | 
 74 | 	// Convert each argument to a Value.
 75 | 	argVals := make([]*Value, len(args))
 76 | 	for i, arg := range args {
 77 | 		v, ok := arg.(*Value)
 78 | 		if !ok {
 79 | 			v = va.script.NewValue(arg)
 80 | 		}
 81 | 		argVals[i] = v
 82 | 	}
 83 | 
 84 | 	// Handle the most common case: a single index.
 85 | 	if len(args) == 1 {
 86 | 		vv, found := va.data[argVals[0].String()]
 87 | 		if !found {
 88 | 			return va.script.NewValue("")
 89 | 		}
 90 | 		return vv
 91 | 	}
 92 | 
 93 | 	// Merge the indexes into a single string.
 94 | 	idxStrs := make([]string, len(argVals))
 95 | 	for i, v := range argVals {
 96 | 		idxStrs[i] = v.String()
 97 | 	}
 98 | 	idx := strings.Join(idxStrs, va.script.SubSep)
 99 | 
100 | 	// Look up the index in the associative array.
101 | 	vv, found := va.data[idx]
102 | 	if !found {
103 | 		return va.script.NewValue("")
104 | 	}
105 | 	return vv
106 | }
107 | 
108 | // Delete deletes a key and associated value from a ValueArray.  Multiple
109 | // indexes can be specified to simulate multidimensional arrays.  (In fact, the
110 | // indexes are concatenated into a single string with intervening Script.SubSep
111 | // characters.)  The arguments can be provided either as Values or as any types
112 | // that can be converted to Values.  If no argument is provided, the entire
113 | // ValueArray is emptied.
114 | func (va *ValueArray) Delete(args ...interface{}) {
115 | 	// If we were given no arguments, delete the entire array.
116 | 	if args == nil {
117 | 		va.data = make(map[string]*Value)
118 | 		return
119 | 	}
120 | 
121 | 	// Convert each argument to a Value.
122 | 	argVals := make([]*Value, len(args))
123 | 	for i, arg := range args {
124 | 		v, ok := arg.(*Value)
125 | 		if !ok {
126 | 			v = va.script.NewValue(arg)
127 | 		}
128 | 		argVals[i] = v
129 | 	}
130 | 
131 | 	// Handle the most common case: a single index.
132 | 	if len(args) == 1 {
133 | 		delete(va.data, argVals[0].String())
134 | 		return
135 | 	}
136 | 
137 | 	// Merge the indexes into a single string.
138 | 	idxStrs := make([]string, len(argVals))
139 | 	for i, v := range argVals {
140 | 		idxStrs[i] = v.String()
141 | 	}
142 | 	idx := strings.Join(idxStrs, va.script.SubSep)
143 | 
144 | 	// Delete the index from the associative array.
145 | 	delete(va.data, idx)
146 | }
147 | 
148 | // Keys returns all keys in the associative array in undefined order.
149 | func (va *ValueArray) Keys() []*Value {
150 | 	keys := make([]*Value, 0, len(va.data))
151 | 	for kstr := range va.data {
152 | 		keys = append(keys, va.script.NewValue(kstr))
153 | 	}
154 | 	return keys
155 | }
156 | 
157 | // Values returns all values in the associative array in undefined order.
158 | func (va *ValueArray) Values() []*Value {
159 | 	vals := make([]*Value, 0, len(va.data))
160 | 	for _, v := range va.data {
161 | 		vals = append(vals, va.script.NewValue(v))
162 | 	}
163 | 	return vals
164 | }
165 | 


--------------------------------------------------------------------------------
/assoc_test.go:
--------------------------------------------------------------------------------
  1 | // This file tests operations on associative arrays
  2 | 
  3 | package awk
  4 | 
  5 | import (
  6 | 	"testing"
  7 | )
  8 | 
  9 | // TestIntIntArray tests Get/Set operations on an associative array that
 10 | // maps integers to integers.
 11 | func TestIntIntArray(t *testing.T) {
 12 | 	scr := NewScript()
 13 | 	a := scr.NewValueArray()
 14 | 	for i := 0; i < 10; i++ {
 15 | 		a.Set(i, i*10)
 16 | 	}
 17 | 	for i := 9; i >= 0; i-- {
 18 | 		got := a.Get(i).Int()
 19 | 		if got != i*10 {
 20 | 			t.Fatalf("Expected %d but received %d", i*10, got)
 21 | 		}
 22 | 	}
 23 | }
 24 | 
 25 | // TestValueValueArray tests Get/Set operations on an
 26 | // associative array that maps Values to Values.
 27 | func TestValueValueArray(t *testing.T) {
 28 | 	scr := NewScript()
 29 | 	a := scr.NewValueArray()
 30 | 	for i := 0; i < 10; i++ {
 31 | 		a.Set(scr.NewValue(i), scr.NewValue(i*10))
 32 | 	}
 33 | 	for i := 9; i >= 0; i-- {
 34 | 		got := a.Get(scr.NewValue(i)).Int()
 35 | 		if got != i*10 {
 36 | 			t.Fatalf("Expected %d but received %d", i*10, got)
 37 | 		}
 38 | 	}
 39 | }
 40 | 
 41 | // TestStringStringArray tests Get/Set operations on an associative array that
 42 | // maps strings to strings.
 43 | func TestStringStringArray(t *testing.T) {
 44 | 	scr := NewScript()
 45 | 	a := scr.NewValueArray()
 46 | 	keys := []string{"The", "tree", "has", "entered", "my", "hands"}
 47 | 	values := []string{"The", "sap", "has", "ascended", "my", "arms"}
 48 | 	for i, k := range keys {
 49 | 		a.Set(k, values[i])
 50 | 	}
 51 | 	for i, k := range keys {
 52 | 		want := values[i]
 53 | 		got := a.Get(k).String()
 54 | 		if got != want {
 55 | 			t.Fatalf("Expected %q but received %q", want, got)
 56 | 		}
 57 | 	}
 58 | }
 59 | 
 60 | // TestMultiDimArray tests Get/Set operations on a "multidimensional"
 61 | // associative array.
 62 | func TestMultiDimArray(t *testing.T) {
 63 | 	scr := NewScript()
 64 | 	a := scr.NewValueArray()
 65 | 	for i := 9; i >= 0; i-- {
 66 | 		for j := 9; j >= 0; j-- {
 67 | 			a.Set(i, j, i*10+j)
 68 | 		}
 69 | 	}
 70 | 	for i := 0; i < 10; i++ {
 71 | 		for j := 9; j >= 0; j-- {
 72 | 			got := a.Get(i, j).Int()
 73 | 			if got != i*10+j {
 74 | 				t.Fatalf("Expected %d but received %d", i*10+j, got)
 75 | 			}
 76 | 		}
 77 | 	}
 78 | }
 79 | 
 80 | // TestArrayKeys tests the Keys operation on an associative array.
 81 | func TestArrayKeys(t *testing.T) {
 82 | 	scr := NewScript()
 83 | 	a := scr.NewValueArray()
 84 | 	for i := 10; i <= 100; i += 10 {
 85 | 		a.Set(i, i*2)
 86 | 	}
 87 | 	ksum := 0
 88 | 	for _, k := range a.Keys() {
 89 | 		ksum += k.Int()
 90 | 	}
 91 | 	if ksum != 550 {
 92 | 		t.Fatalf("Expected 550 but received %d", ksum)
 93 | 	}
 94 | }
 95 | 
 96 | // TestArrayValues tests the Values operation on an associative array.
 97 | func TestArrayValues(t *testing.T) {
 98 | 	scr := NewScript()
 99 | 	a := scr.NewValueArray()
100 | 	for i := 10; i <= 100; i += 10 {
101 | 		a.Set(i, i*2)
102 | 	}
103 | 	vsum := 0
104 | 	for _, v := range a.Values() {
105 | 		vsum += v.Int()
106 | 	}
107 | 	if vsum != 1100 {
108 | 		t.Fatalf("Expected 1100 but received %d", vsum)
109 | 	}
110 | }
111 | 
112 | // TestArrayDelete tests deleting an element from an associative array.
113 | func TestArrayDelete(t *testing.T) {
114 | 	// Create an array of values, then delete every other element.
115 | 	scr := NewScript()
116 | 	a := scr.NewValueArray()
117 | 	for i := 0; i <= 100; i++ {
118 | 		a.Set(i, i/2)
119 | 	}
120 | 	for i := 1; i <= 100; i += 2 {
121 | 		a.Delete(i)
122 | 	}
123 | 	vsum := 0
124 | 	for i := 0; i <= 100; i++ {
125 | 		vsum += a.Get(i).Int()
126 | 	}
127 | 	if vsum != 1275 {
128 | 		t.Fatalf("Expected 1275 but received %d", vsum)
129 | 	}
130 | 
131 | 	// Empty the array and try again.
132 | 	a.Delete()
133 | 	vsum = 0
134 | 	for i := 0; i <= 100; i++ {
135 | 		vsum += a.Get(i).Int()
136 | 	}
137 | 	if vsum != 0 {
138 | 		t.Fatalf("Expected 0 but received %d", vsum)
139 | 	}
140 | }
141 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | Package awk implements AWK-style processing of input streams.
  4 | 
  5 | 
  6 | Introduction
  7 | 
  8 | The awk package can be considered a shallow EDSL (embedded domain-specific
  9 | language) for Go that facilitates text processing.  It aims to implement
 10 | the core semantics provided by
 11 | AWK, a pattern scanning and processing language defined as part of the POSIX
 12 | 1003.1 standard
 13 | (http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html) and
 14 | therefore part of all standard Linux/Unix distributions.
 15 | 
 16 | AWK's forte is simple transformations of tabular data.  For example, the
 17 | following is a complete AWK program that reads an entire file from the standard
 18 | input device, splits each file into whitespace-separated columns, and outputs
 19 | all lines in which the fifth column is an odd number:
 20 | 
 21 |     $5 % 2 == 1
 22 | 
 23 | Here's a typical Go analogue of that one-line AWK program:
 24 | 
 25 |     package main
 26 | 
 27 |     import (
 28 |             "bufio"
 29 |             "fmt"
 30 |             "io"
 31 |             "os"
 32 |             "strconv"
 33 |             "strings"
 34 |     )
 35 | 
 36 |     func main() {
 37 |             input := bufio.NewReader(os.Stdin)
 38 |             for {
 39 |                     line, err := input.ReadString('\n')
 40 |                     if err != nil {
 41 |                             if err != io.EOF {
 42 |                                     panic(err)
 43 |                             }
 44 |                             break
 45 |                     }
 46 |                     scanner := bufio.NewScanner(strings.NewReader(line))
 47 |                     scanner.Split(bufio.ScanWords)
 48 |                     cols := make([]string, 0, 10)
 49 |                     for scanner.Scan() {
 50 |                             cols = append(cols, scanner.Text())
 51 |                     }
 52 |                     if err := scanner.Err(); err != nil {
 53 |                             panic(err)
 54 |                     }
 55 |                     if len(cols) < 5 {
 56 |                             continue
 57 |                     }
 58 |                     num, err := strconv.Atoi(cols[4])
 59 |                     if num%2 == 1 {
 60 |                             fmt.Print(line)
 61 |                     }
 62 |             }
 63 |     }
 64 | 
 65 | The goal of the awk package is to emulate AWK's simplicity while simultaneously
 66 | taking advantage of Go's speed, safety, and flexibility.  With the awk package,
 67 | the preceding code reduces to the following:
 68 | 
 69 |     package main
 70 | 
 71 |     import (
 72 | 	    "github.com/spakin/awk"
 73 | 	    "os"
 74 |     )
 75 | 
 76 |     func main() {
 77 | 	    s := awk.NewScript()
 78 | 	    s.AppendStmt(func(s *awk.Script) bool { return s.F(5).Int()%2 == 1 }, nil)
 79 | 	    if err := s.Run(os.Stdin); err != nil {
 80 | 		    panic(err)
 81 | 	    }
 82 |     }
 83 | 
 84 | While not a one-liner like the original AWK program, the above is conceptually
 85 | close to it.  The AppendStmt method defines a script in terms of patterns and
 86 | actions exactly as in the AWK program.  The Run method then runs the script on
 87 | an input stream, which can be any io.Reader.
 88 | 
 89 | 
 90 | Usage
 91 | 
 92 | For those programmers unfamiliar with AWK, an AWK program consists of a
 93 | sequence of pattern/action pairs.  Each pattern that matches a given line
 94 | causes the corresponding action to be performed.  AWK programs tend to be terse
 95 | because AWK implicitly reads the input file, splits it into records (default:
 96 | newline-terminated lines), and splits each record into fields (default:
 97 | whitespace-separated columns), saving the programmer from having to express
 98 | such operations explicitly.  Furthermore, AWK provides a default pattern, which
 99 | matches every record, and a default action, which outputs a record unmodified.
100 | 
101 | The awk package attempts to mimic those semantics in Go.  Basic usage consists
102 | of three steps:
103 | 
104 | 1. Script allocation (awk.NewScript)
105 | 
106 | 2. Script definition (Script.AppendStmt)
107 | 
108 | 3. Script execution (Script.Run)
109 | 
110 | In Step 2, AppendStmt is called once for each pattern/action pair that is to be
111 | appended to the script.  The same script can be applied to multiple input
112 | streams by re-executing Step 3.  Actions to be executed on every run of Step 3
113 | can be supplied by assigning the script's Begin and End fields.  The Begin
114 | action is typically used to initialize script state by calling methods such as
115 | SetRS and SetFS and assigning user-defined data to the script's State field
116 | (what would be global variables in AWK).  The End action is typically used to
117 | store or report final results.
118 | 
119 | To mimic AWK's dynamic type system. the awk package provides the Value and
120 | ValueArray types.  Value represents a scalar that can be coerced without error
121 | to a string, an int, or a float64.  ValueArray represents a—possibly
122 | multidimensional—associative array of Values.
123 | 
124 | Both patterns and actions can access the current record's fields via the
125 | script's F method, which takes a 1-based index and returns the corresponding
126 | field as a Value.  An index of 0 returns the entire record as a Value.
127 | 
128 | 
129 | Features
130 | 
131 | The following AWK features and GNU AWK extensions are currently supported by
132 | the awk package:
133 | 
134 | • the basic pattern/action structure of an AWK script, including BEGIN and END
135 | rules and range patterns
136 | 
137 | • control over record separation (RS), including regular expressions and null
138 | strings (implying blank lines as separators)
139 | 
140 | • control over field separation (FS), including regular expressions and null
141 | strings (implying single-character fields)
142 | 
143 | • fixed-width fields (FIELDWIDTHS)
144 | 
145 | • fields defined by a regular expression (FPAT)
146 | 
147 | • control over case-sensitive vs. case-insensitive comparisons (IGNORECASE)
148 | 
149 | • control over the number conversion format (CONVFMT)
150 | 
151 | • automatic enumeration of records (NR) and fields (NR)
152 | 
153 | • "weak typing"
154 | 
155 | • multidimensional associative arrays
156 | 
157 | • premature termination of record processing (next) and script processing (exit)
158 | 
159 | • explicit record reading (getline) from either the current stream or
160 | a specified stream
161 | 
162 | • maintenance of regular-expression status variables (RT, RSTART, and RLENGTH)
163 | 
164 | For more information about AWK and its features, see the awk(1) manual page on
165 | any Linux/Unix system (available online from, e.g.,
166 | http://linux.die.net/man/1/awk) or read the book, "The AWK Programming
167 | Language" by Aho, Kernighan, and Weinberger.
168 | 
169 | 
170 | Examples
171 | 
172 | A number of examples ported from the POSIX 1003.1 standard document
173 | (http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html) are
174 | presented below.
175 | 
176 | */
177 | package awk
178 | 


--------------------------------------------------------------------------------
/example_test.go:
--------------------------------------------------------------------------------
  1 | // This file presents some examples of awk package usage.
  2 | 
  3 | package awk_test
  4 | 
  5 | import (
  6 | 	"fmt"
  7 | 	"github.com/spakin/awk"
  8 | 	"os"
  9 | 	"sort"
 10 | )
 11 | 
 12 | var s *awk.Script
 13 | 
 14 | // Write to the standard output all input lines for which field 3 is
 15 | // greater than 5 (AWK: $3 > 5).
 16 | func Example_01() {
 17 | 	s.AppendStmt(func(s *awk.Script) bool { return s.F(3).Int() > 5 }, nil)
 18 | }
 19 | 
 20 | // Write every tenth line (AWK: (NR % 10) == 0).
 21 | func Example_02() {
 22 | 	s.AppendStmt(func(s *awk.Script) bool { return s.NR%10 == 0 }, nil)
 23 | }
 24 | 
 25 | // Write any line with a substring containing a 'G' or 'D', followed by a
 26 | // sequence of digits and characters (AWK:
 27 | // /(G|D)([[:digit:][:alpha:]]*)/). This example uses character classes digit
 28 | // and alpha to match language-independent digit and alphabetic characters
 29 | // respectively.
 30 | func Example_04() {
 31 | 	s.AppendStmt(func(s *awk.Script) bool { return s.F(0).Match("(G|D)([[:digit:][:alpha:]]*)") }, nil)
 32 | }
 33 | 
 34 | // Write any line in which the second field matches the regular expression
 35 | // "xyz" and the fourth field does not (AWK: $2 ~ /xyz/ && $4 !~ /xyz/).
 36 | func Example_05() {
 37 | 	s.AppendStmt(func(s *awk.Script) bool {
 38 | 		return s.F(2).Match("xyz") && !s.F(4).Match("xyz")
 39 | 	}, nil)
 40 | }
 41 | 
 42 | // Write any line in which the second field contains a backslash (AWK: $2 ~
 43 | // /\\/).
 44 | func Example_06() {
 45 | 	s.AppendStmt(func(s *awk.Script) bool { return s.F(2).Match(`\\`) }, nil)
 46 | }
 47 | 
 48 | // Write the second to the last and the last field in each line. Separate the
 49 | // fields by a colon (AWK: {OFS=":"; print $(NF-1), $NF}).
 50 | func Example_08() {
 51 | 	s.AppendStmt(nil, func(s *awk.Script) { fmt.Printf("%v:%v\n", s.F(s.NF-1), s.F(s.NF)) })
 52 | }
 53 | 
 54 | // Write the line number and number of fields in each line (AWK: {print NR ":"
 55 | // NF}). The three strings representing the line number, the colon, and the
 56 | // number of fields are concatenated and that string is written to standard
 57 | // output.
 58 | func Example_09() {
 59 | 	s.AppendStmt(nil, func(s *awk.Script) { fmt.Printf("%d:%d\n", s.NR, s.NF) })
 60 | }
 61 | 
 62 | // Write lines longer than 72 characters (AWK: length($0) > 72).
 63 | func Example_10() {
 64 | 	s.AppendStmt(func(s *awk.Script) bool { return len(s.F(0).String()) > 72 }, nil)
 65 | }
 66 | 
 67 | // Write the first two fields in opposite order (AWK: {print $2, $1}).
 68 | func Example_11() {
 69 | 	s.AppendStmt(nil, func(s *awk.Script) { s.Println(s.F(2), s.F(1)) })
 70 | }
 71 | 
 72 | // Do the same as Example 11, with input fields separated by a comma, space and
 73 | // tab characters, or both (AWK:
 74 | //
 75 | //     BEGIN { FS = ",[ \t]*|[ \t]+" }
 76 | //           { print $2, $1 }
 77 | //
 78 | // ).
 79 | func Example_12() {
 80 | 	s.Begin = func(s *awk.Script) { s.SetFS(",[ \t]*|[ \t]+") }
 81 | 	s.AppendStmt(nil, func(s *awk.Script) { s.Println(s.F(2), s.F(1)) })
 82 | }
 83 | 
 84 | // Add up the first column and print the sum and average (AWK:
 85 | //
 86 | //         {s += $1 }
 87 | //     END {print "sum is", s, "average is", s/NR}
 88 | //
 89 | // ).
 90 | func Example_13() {
 91 | 	s.Begin = func(s *awk.Script) { s.State = 0.0 }
 92 | 	s.AppendStmt(nil, func(s *awk.Script) { s.State = s.State.(float64) + s.F(1).Float64() })
 93 | 	s.End = func(s *awk.Script) {
 94 | 		sum := s.State.(float64)
 95 | 		s.Println("sum is", sum, "average is", sum/float64(s.NR))
 96 | 	}
 97 | }
 98 | 
 99 | // Write fields in reverse order, one per line (many lines out for each line
100 | // in).  AWK: {for (i = NF; i > 0; --i) print $i}.
101 | func Example_14() {
102 | 	s.AppendStmt(nil, func(s *awk.Script) {
103 | 		for i := s.NF; i > 0; i-- {
104 | 			s.Println(s.F(i))
105 | 		}
106 | 	})
107 | }
108 | 
109 | // Write all lines between occurrences of the strings "start" and "stop" (AWK:
110 | // /start/, /stop/).  This version of the Go code uses awk.Range to combine
111 | // begin and end functions into a match range.
112 | func Example_15a() {
113 | 	s.AppendStmt(awk.Range(func(s *awk.Script) bool { return s.F(1).Match("start") },
114 | 		func(s *awk.Script) bool { return s.F(1).Match("stop") }),
115 | 		nil)
116 | }
117 | 
118 | // Write all lines between occurrences of the strings "start" and "stop" (AWK:
119 | // /start/, /stop/).  This version of the Go code uses awk.Auto to define the
120 | // begin and end conditions as simple regular-expression matches.
121 | func Example_15b() {
122 | 	s.AppendStmt(awk.Auto("start", "stop"), nil)
123 | }
124 | 
125 | // Write all lines whose first field is different from the previous line's
126 | // first field (AWK: $1 != prev {print; prev = $1}).
127 | func Example_16() {
128 | 	s.State = s.NewValue("")
129 | 	s.AppendStmt(func(s *awk.Script) bool { return !s.F(1).StrEqual(s.State) },
130 | 		func(s *awk.Script) {
131 | 			s.Println()
132 | 			s.State = s.F(1)
133 | 		})
134 | }
135 | 
136 | // For all rows of the form "Total: <number>", accumulate <number>.  Once all
137 | // rows have been read, output the grand total.
138 | func ExampleScript_AppendStmt() {
139 | 	s := awk.NewScript()
140 | 	s.State = 0.0
141 | 	s.AppendStmt(func(s *awk.Script) bool { return s.NF == 2 && s.F(1).StrEqual("Total:") },
142 | 		func(s *awk.Script) { s.State = s.State.(float64) + s.F(2).Float64() })
143 | 	s.End = func(s *awk.Script) { fmt.Printf("The grand total is %.2f\n", s.State.(float64)) }
144 | 	s.Run(os.Stdin)
145 | }
146 | 
147 | // Output each line preceded by its line number.
148 | func ExampleScript_AppendStmt_nilPattern() {
149 | 	s := awk.NewScript()
150 | 	s.AppendStmt(nil, func(s *awk.Script) { fmt.Printf("%4d %v\n", s.NR, s.F(0)) })
151 | 	s.Run(os.Stdin)
152 | }
153 | 
154 | // Output only rows in which the first column contains a larger number than the
155 | // second column.
156 | func ExampleScript_AppendStmt_nilAction() {
157 | 	s := awk.NewScript()
158 | 	s.AppendStmt(func(s *awk.Script) bool { return s.F(1).Int() > s.F(2).Int() }, nil)
159 | 	s.Run(os.Stdin)
160 | }
161 | 
162 | // Output all input lines that appear between "BEGIN" and "END" inclusive.
163 | func ExampleRange() {
164 | 	s := awk.NewScript()
165 | 	s.AppendStmt(awk.Range(func(s *awk.Script) bool { return s.F(1).StrEqual("BEGIN") },
166 | 		func(s *awk.Script) bool { return s.F(1).StrEqual("END") }),
167 | 		nil)
168 | 	s.Run(os.Stdin)
169 | }
170 | 
171 | // Extract the first column of the input into a slice of strings.
172 | func ExampleBegin() {
173 | 	var data []string
174 | 	s := awk.NewScript()
175 | 	s.Begin = func(s *awk.Script) {
176 | 		s.SetFS(",")
177 | 		data = make([]string, 0)
178 | 	}
179 | 	s.AppendStmt(nil, func(s *awk.Script) { data = append(data, s.F(1).String()) })
180 | 	s.Run(os.Stdin)
181 | }
182 | 
183 | // Output each line with its columns in reverse order.
184 | func ExampleScript_F() {
185 | 	s := awk.NewScript()
186 | 	s.AppendStmt(nil, func(s *awk.Script) {
187 | 		for i := s.NF; i > 0; i-- {
188 | 			if i > 1 {
189 | 				fmt.Printf("%v ", s.F(i))
190 | 			} else {
191 | 				fmt.Printf("%v\n", s.F(i))
192 | 			}
193 | 		}
194 | 	})
195 | 	s.Run(os.Stdin)
196 | }
197 | 
198 | // Allocate and populate a 2-D array.  The diagonal is made up of strings while
199 | // the rest of the array consists of float64 values.
200 | func ExampleValueArray_Set() {
201 | 	va := s.NewValueArray()
202 | 	diag := []string{"Dasher", "Dancer", "Prancer", "Vixen", "Comet", "Cupid", "Dunder", "Blixem"}
203 | 	for i := 0; i < 8; i++ {
204 | 		for j := 0; j < 8; j++ {
205 | 			if i == j {
206 | 				va.Set(i, j, diag[i])
207 | 			} else {
208 | 				va.Set(i, j, float64(i*8+j)/63.0)
209 | 			}
210 | 		}
211 | 	}
212 | }
213 | 
214 | // Sort each line's columns, which are assumed to be floating-point numbers.
215 | func ExampleScript_FFloat64s() {
216 | 	s := awk.NewScript()
217 | 	s.AppendStmt(nil, func(s *awk.Script) {
218 | 		nums := s.FFloat64s()
219 | 		sort.Float64s(nums)
220 | 		for _, n := range nums[:len(nums)-1] {
221 | 			fmt.Printf("%.5g ", n)
222 | 		}
223 | 		fmt.Printf("%.5g\n", nums[len(nums)-1])
224 | 	})
225 | 	s.Run(os.Stdin)
226 | }
227 | 
228 | // Delete the fifth line of the input stream but output all other lines.
229 | func ExampleAuto_int() {
230 | 	s := awk.NewScript()
231 | 	s.AppendStmt(awk.Auto(5), func(s *awk.Script) { s.Next() })
232 | 	s.AppendStmt(nil, nil)
233 | 	s.Run(os.Stdin)
234 | }
235 | 
236 | // Output only those lines containing the string, "fnord".
237 | func ExampleAuto_string() {
238 | 	s := awk.NewScript()
239 | 	s.AppendStmt(awk.Auto("fnord"), nil)
240 | 	s.Run(os.Stdin)
241 | }
242 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/spakin/awk
2 | 
3 | go 1.14
4 | 


--------------------------------------------------------------------------------
/script.go:
--------------------------------------------------------------------------------
  1 | // This file lets users define and execute AWK-like scripts within Go.
  2 | 
  3 | package awk
  4 | 
  5 | import (
  6 | 	"bufio"
  7 | 	"errors"
  8 | 	"fmt"
  9 | 	"io"
 10 | 	"os"
 11 | 	"regexp"
 12 | 	"strings"
 13 | 	"unicode/utf8"
 14 | )
 15 | 
 16 | // A scriptAborter is an error that causes the current script to abort but lets
 17 | // the rest of the program run.
 18 | type scriptAborter struct{ error }
 19 | 
 20 | // A recordStopper is thrown when a script wants to continue immediately with
 21 | // the next record.
 22 | type recordStopper struct{ error }
 23 | 
 24 | // A parseState indicates where we are in our parsing state.
 25 | type parseState int
 26 | 
 27 | // The following are the possibilities for a parseState.
 28 | const (
 29 | 	notRunning parseState = iota // Before/after Run was called
 30 | 	atBegin                      // Before any records are read
 31 | 	inMiddle                     // While records are being read
 32 | 	atEnd                        // After all records are read
 33 | )
 34 | 
 35 | // A stopState describes premature stop conditions.
 36 | type stopState int
 37 | 
 38 | // The following are possibilities for a stopState.
 39 | const (
 40 | 	dontStop   stopState = iota // Normal execution
 41 | 	stopRec                     // Abort the current record
 42 | 	stopScript                  // Abort the entire script
 43 | )
 44 | 
 45 | // Choose arbitrary initial sizes for record and field buffers.
 46 | const (
 47 | 	initialFieldSize  = 4096
 48 | 	initialRecordSize = 4096
 49 | )
 50 | 
 51 | // A Script encapsulates all of the internal state for an AWK-like script.
 52 | type Script struct {
 53 | 	State         interface{} // Arbitrary, user-supplied data
 54 | 	Output        io.Writer   // Output stream (defaults to os.Stdout)
 55 | 	Begin         ActionFunc  // Action to perform before any input is read
 56 | 	End           ActionFunc  // Action to perform after all input is read
 57 | 	ConvFmt       string      // Conversion format for numbers, "%.6g" by default
 58 | 	SubSep        string      // Separator for simulated multidimensional arrays
 59 | 	NR            int         // Number of input records seen so far
 60 | 	NF            int         // Number of fields in the current input record
 61 | 	RT            string      // Actual string terminating the current record
 62 | 	RStart        int         // 1-based index of the previous regexp match (Value.Match)
 63 | 	RLength       int         // Length of the previous regexp match (Value.Match)
 64 | 	MaxRecordSize int         // Maximum number of characters allowed in each record
 65 | 	MaxFieldSize  int         // Maximum number of characters allowed in each field
 66 | 
 67 | 	nf0          int                       // Value of NF for which F(0) was computed
 68 | 	rs           string                    // Input record separator, newline by default
 69 | 	fs           string                    // Input field separator, space by default
 70 | 	fieldWidths  []int                     // Fixed-width column sizes
 71 | 	fPat         string                    // Input field regular expression
 72 | 	ors          string                    // Output record separator, newline by default
 73 | 	ofs          string                    // Output field separator, space by default
 74 | 	ignCase      bool                      // true: REs are case-insensitive; false: case-sensitive
 75 | 	rules        []statement               // List of pattern-action pairs to execute
 76 | 	fields       []*Value                  // Fields in the current record; fields[0] is the entire record
 77 | 	regexps      map[string]*regexp.Regexp // Map from a regular-expression string to a compiled regular expression
 78 | 	getlineState map[io.Reader]*Script     // Parsing state needed to invoke GetLine repeatedly on a given io.Reader
 79 | 	rsScanner    *bufio.Scanner            // Scanner associated with RS
 80 | 	input        io.Reader                 // Script input stream
 81 | 	state        parseState                // What we're currently parsing
 82 | 	stop         stopState                 // What we should stop doing
 83 | }
 84 | 
 85 | // NewScript initializes a new Script with default values.
 86 | func NewScript() *Script {
 87 | 	return &Script{
 88 | 		Output:        os.Stdout,
 89 | 		ConvFmt:       "%.6g",
 90 | 		SubSep:        "\034",
 91 | 		NR:            0,
 92 | 		NF:            0,
 93 | 		MaxRecordSize: bufio.MaxScanTokenSize,
 94 | 		MaxFieldSize:  bufio.MaxScanTokenSize,
 95 | 		nf0:           0,
 96 | 		rs:            "\n",
 97 | 		fs:            " ",
 98 | 		ors:           "\n",
 99 | 		ofs:           " ",
100 | 		ignCase:       false,
101 | 		rules:         make([]statement, 0, 10),
102 | 		fields:        make([]*Value, 0),
103 | 		regexps:       make(map[string]*regexp.Regexp, 10),
104 | 		getlineState:  make(map[io.Reader]*Script),
105 | 		state:         notRunning,
106 | 	}
107 | }
108 | 
109 | // abortScript aborts the current script with a formatted error message.
110 | func (s *Script) abortScript(format string, a ...interface{}) {
111 | 	s.stop = stopScript
112 | 	panic(scriptAborter{fmt.Errorf(format, a...)})
113 | }
114 | 
115 | // Copy returns a copy of a Script.
116 | func (s *Script) Copy() *Script {
117 | 	sc := *s
118 | 	sc.rules = make([]statement, len(s.rules))
119 | 	copy(sc.rules, s.rules)
120 | 	sc.fieldWidths = make([]int, len(s.fieldWidths))
121 | 	copy(sc.fieldWidths, s.fieldWidths)
122 | 	sc.fields = make([]*Value, len(s.fields))
123 | 	copy(sc.fields, s.fields)
124 | 	sc.regexps = make(map[string]*regexp.Regexp, len(s.regexps))
125 | 	for k, v := range s.regexps {
126 | 		sc.regexps[k] = v
127 | 	}
128 | 	sc.getlineState = make(map[io.Reader]*Script, len(s.getlineState))
129 | 	for k, v := range s.getlineState {
130 | 		sc.getlineState[k] = v
131 | 	}
132 | 	return &sc
133 | }
134 | 
135 | // SetRS sets the input record separator (really, a record terminator).  It is
136 | // invalid to call SetRS after the first record is read.  (It is acceptable to
137 | // call SetRS from a Begin action, though.)  As in AWK, if the record separator
138 | // is a single character, that character is used to separate records; if the
139 | // record separator is multiple characters, it's treated as a regular
140 | // expression (subject to the current setting of Script.IgnoreCase); and if the
141 | // record separator is an empty string, records are separated by blank lines.
142 | // That last case implicitly causes newlines to be accepted as a field
143 | // separator in addition to whatever was specified by SetFS.
144 | func (s *Script) SetRS(rs string) {
145 | 	if s.state == inMiddle {
146 | 		s.abortScript("SetRS was called from a running script")
147 | 	}
148 | 	s.rs = rs
149 | }
150 | 
151 | // SetFS sets the input field separator.  As in AWK, if the field separator is
152 | // a single space (the default), fields are separated by runs of whitespace; if
153 | // the field separator is any other single character, that character is used to
154 | // separate fields; if the field separator is an empty string, each individual
155 | // character becomes a separate field; and if the field separator is multiple
156 | // characters, it's treated as a regular expression (subject to the current
157 | // setting of Script.IgnoreCase).
158 | func (s *Script) SetFS(fs string) {
159 | 	s.fs = fs
160 | 	s.fieldWidths = nil
161 | 	s.fPat = ""
162 | }
163 | 
164 | // SetFieldWidths indicates that each record is composed of fixed-width columns
165 | // and specifies the width in characters of each column.  It is invalid to pass
166 | // SetFieldWidths a nil argument or a non-positive field width.
167 | func (s *Script) SetFieldWidths(fw []int) {
168 | 	// Sanity-check the argument.
169 | 	if fw == nil {
170 | 		s.abortScript("SetFieldWidths was passed a nil slice")
171 | 	}
172 | 	for _, w := range fw {
173 | 		if w <= 0 {
174 | 			s.abortScript(fmt.Sprintf("SetFieldWidths was passed an invalid field width (%d)", w))
175 | 		}
176 | 	}
177 | 
178 | 	// Assign the field widths and reset the field separator and field
179 | 	// matcher (not strictly but consistent with the SetFS method).
180 | 	s.fs = " "
181 | 	s.fieldWidths = fw
182 | 	s.fPat = ""
183 | }
184 | 
185 | // SetFPat defines a "field pattern", a regular expression that matches fields.
186 | // This lies in contrast to providing a regular expression to SetFS, which
187 | // matches the separation between fields, not the fields themselves.
188 | func (s *Script) SetFPat(fp string) {
189 | 	s.fs = " "
190 | 	s.fieldWidths = nil
191 | 	s.fPat = fp
192 | }
193 | 
194 | // recomputeF0 recomputes F(0) by concatenating F(1)...F(NF) with OFS.
195 | func (s *Script) recomputeF0() {
196 | 	if len(s.fields) >= 1 {
197 | 		s.fields[0] = s.NewValue(strings.Join(s.FStrings(), s.ofs))
198 | 	}
199 | 	s.nf0 = s.NF
200 | }
201 | 
202 | // SetORS sets the output record separator.
203 | func (s *Script) SetORS(ors string) { s.ors = ors }
204 | 
205 | // SetOFS sets the output field separator.
206 | func (s *Script) SetOFS(ofs string) {
207 | 	s.ofs = ofs
208 | 	s.recomputeF0()
209 | }
210 | 
211 | // F returns a specified field of the current record.  Field numbers are
212 | // 1-based.  Field 0 refers to the entire record.  Requesting a field greater
213 | // than NF returns a zero value.  Requesting a negative field number panics
214 | // with an out-of-bounds error.
215 | func (s *Script) F(i int) *Value {
216 | 	if i == 0 && s.NF != s.nf0 {
217 | 		s.recomputeF0()
218 | 	}
219 | 	if i < len(s.fields) {
220 | 		return s.fields[i]
221 | 	}
222 | 	return s.NewValue("")
223 | }
224 | 
225 | // SetF sets a field of the current record to the given Value.  Field numbers
226 | // are 1-based.  Field 0 refers to the entire record.  Setting it causes the
227 | // entire line to be reparsed (and NF recomputed).  Setting a field numbered
228 | // larger than NF extends NF to that value.  Setting a negative field number
229 | // panics with an out-of-bounds error.
230 | func (s *Script) SetF(i int, v *Value) {
231 | 	// Zero index: Assign and reparse the entire record.
232 | 	if i == 0 {
233 | 		s.splitRecord(v.String())
234 | 		return
235 | 	}
236 | 
237 | 	// Index larger than NF: extend NF and try again.
238 | 	if i >= len(s.fields) {
239 | 		for i >= len(s.fields) {
240 | 			s.fields = append(s.fields, s.NewValue(""))
241 | 		}
242 | 		s.NF = len(s.fields) - 1
243 | 	}
244 | 
245 | 	// Index not larger than (the possibly modified) NF: write the field.
246 | 	s.fields[i] = v
247 | 
248 | 	// Force F(0) to be recomputed the next time it's accessed.
249 | 	s.nf0 = -1
250 | }
251 | 
252 | // FStrings returns all fields in the current record as a []string of length
253 | // NF.
254 | func (s *Script) FStrings() []string {
255 | 	a := make([]string, s.NF)
256 | 	for i := 0; i < s.NF; i++ {
257 | 		a[i] = s.F(i + 1).String()
258 | 	}
259 | 	return a
260 | }
261 | 
262 | // FInts returns all fields in the current record as a []int of length NF.
263 | func (s *Script) FInts() []int {
264 | 	a := make([]int, s.NF)
265 | 	for i := 0; i < s.NF; i++ {
266 | 		a[i] = s.F(i + 1).Int()
267 | 	}
268 | 	return a
269 | }
270 | 
271 | // FFloat64s returns all fields in the current record as a []float64 of length
272 | // NF.
273 | func (s *Script) FFloat64s() []float64 {
274 | 	a := make([]float64, s.NF)
275 | 	for i := 0; i < s.NF; i++ {
276 | 		a[i] = s.F(i + 1).Float64()
277 | 	}
278 | 	return a
279 | }
280 | 
281 | // IgnoreCase specifies whether regular-expression and string comparisons
282 | // should be performed in a case-insensitive manner.
283 | func (s *Script) IgnoreCase(ign bool) {
284 | 	s.ignCase = ign
285 | }
286 | 
287 | // Println is like fmt.Println but honors the current output stream, output
288 | // field separator, and output record separator.  If called with no arguments,
289 | // Println outputs all fields in the current record.
290 | func (s *Script) Println(args ...interface{}) {
291 | 	// No arguments: Output all fields of the current record.
292 | 	if args == nil {
293 | 		for i := 1; i <= s.NF; i++ {
294 | 			fmt.Fprintf(s.Output, "%v", s.F(i))
295 | 			if i == s.NF {
296 | 				fmt.Fprintf(s.Output, "%s", s.ors)
297 | 			} else {
298 | 				fmt.Fprintf(s.Output, "%s", s.ofs)
299 | 			}
300 | 		}
301 | 		return
302 | 	}
303 | 
304 | 	// One or more arguments: Output them.
305 | 	for i, arg := range args {
306 | 		fmt.Fprintf(s.Output, "%v", arg)
307 | 		if i == len(args)-1 {
308 | 			fmt.Fprintf(s.Output, "%s", s.ors)
309 | 		} else {
310 | 			fmt.Fprintf(s.Output, "%s", s.ofs)
311 | 		}
312 | 	}
313 | }
314 | 
315 | // A PatternFunc represents a pattern to match against.  It is expected to
316 | // examine the state of the given Script then return either true or false.  If
317 | // it returns true, the corresponding ActionFunc is executed.  Otherwise, the
318 | // corresponding ActionFunc is not executed.
319 | type PatternFunc func(*Script) bool
320 | 
321 | // An ActionFunc represents an action to perform when the corresponding
322 | // PatternFunc returns true.
323 | type ActionFunc func(*Script)
324 | 
325 | // A statement represents a single pattern-action pair.
326 | type statement struct {
327 | 	Pattern PatternFunc
328 | 	Action  ActionFunc
329 | }
330 | 
331 | // The matchAny pattern is true only in the middle of a script, when a record
332 | // is available for parsing.
333 | func matchAny(s *Script) bool {
334 | 	return s.state == inMiddle
335 | }
336 | 
337 | // The printRecord statement outputs the current record verbatim to the current
338 | // output stream.
339 | func printRecord(s *Script) {
340 | 	fmt.Fprintf(s.Output, "%v%s", s.fields[0], s.ors)
341 | }
342 | 
343 | // Next stops processing the current record and proceeds with the next record.
344 | func (s *Script) Next() {
345 | 	if s.stop == dontStop {
346 | 		s.stop = stopRec
347 | 	}
348 | 	panic(recordStopper{errors.New("Unexpected Next invocation")}) // Unexpected if we don't catch it
349 | }
350 | 
351 | // Exit stops processing the entire script, causing the Run method to return.
352 | func (s *Script) Exit() {
353 | 	if s.stop == dontStop {
354 | 		s.stop = stopScript
355 | 	}
356 | }
357 | 
358 | // Range combines two patterns into a single pattern that statefully returns
359 | // true between the time the first and second pattern become true (both
360 | // inclusively).
361 | func Range(p1, p2 PatternFunc) PatternFunc {
362 | 	inRange := false
363 | 	return func(s *Script) bool {
364 | 		if inRange {
365 | 			inRange = !p2(s)
366 | 			return true
367 | 		}
368 | 		inRange = p1(s)
369 | 		return inRange
370 | 	}
371 | }
372 | 
373 | // Auto provides a simplified mechanism for creating various common-case
374 | // PatternFunc functions.  It accepts zero, one, or an even number of
375 | // arguments.  If given no arguments, it matches every record.  If given a
376 | // single argument, its behavior depends on that argument's type:
377 | //
378 | // • A Script.PatternFunc is returned as is.
379 | //
380 | // • A *regexp.Regexp returns a function that matches that regular expression
381 | // against the entire record.
382 | //
383 | // • A string is treated as a regular expression and behaves likewise.
384 | //
385 | // • An int returns a function that matches that int against NR.
386 | //
387 | // • Any other type causes a run-time panic.
388 | //
389 | // If given an even number of arguments, pairs of arguments are treated as
390 | // ranges (cf. the Range function).  The PatternFunc returns true if the record
391 | // lies within any of the ranges.
392 | func Auto(v ...interface{}) PatternFunc {
393 | 	if len(v) == 0 {
394 | 		// No arguments: Match anything.
395 | 		return matchAny
396 | 	}
397 | 	if len(v)%2 == 0 {
398 | 		// Even number of arguments other than 0: Return a disjunction
399 | 		// of ranges.
400 | 		fList := make([]PatternFunc, len(v)/2)
401 | 		for i := 0; i < len(v); i += 2 {
402 | 			f1 := Auto(v[i])
403 | 			f2 := Auto(v[i+1])
404 | 			fList[i/2] = Range(f1, f2)
405 | 		}
406 | 		return func(s *Script) bool {
407 | 			// Return true iff any range is true.  Note that we
408 | 			// always evaluate every range to avoid confusing
409 | 			// results because of statefulness.
410 | 			m := false
411 | 			for _, f := range fList {
412 | 				if f(s) {
413 | 					m = true
414 | 				}
415 | 			}
416 | 			return m
417 | 		}
418 | 	}
419 | 	if len(v)%2 == 1 {
420 | 		// Single argument: Decide what to do based on its type.
421 | 		switch x := v[0].(type) {
422 | 		case PatternFunc:
423 | 			// Already a PatternFunc: Return it unmodified.
424 | 			return x
425 | 		case string:
426 | 			// String: Treat as a regular expression that matches
427 | 			// against F[0].
428 | 			return func(s *Script) bool {
429 | 				r, err := s.compileRegexp(x)
430 | 				if err != nil {
431 | 					s.abortScript(err.Error())
432 | 				}
433 | 				return r.MatchString(s.F(0).String())
434 | 			}
435 | 		case int:
436 | 			// Integer: Match against NR.
437 | 			return func(s *Script) bool {
438 | 				return s.NR == x
439 | 			}
440 | 		case *regexp.Regexp:
441 | 			// Regular expression: Convert to a string then,
442 | 			// dynamically, back to a regular expression.  This
443 | 			// enables dynamic toggling of case sensitivity.
444 | 			xs := x.String()
445 | 			return func(s *Script) bool {
446 | 				r, err := s.compileRegexp(xs)
447 | 				if err != nil {
448 | 					s.abortScript(err.Error())
449 | 				}
450 | 				return r.MatchString(s.F(0).String())
451 | 			}
452 | 		default:
453 | 			panic(fmt.Sprintf("Auto does not accept arguments of type %T", x))
454 | 		}
455 | 	}
456 | 	panic("Auto expects 0, 1, or an even number of arguments")
457 | }
458 | 
459 | // AppendStmt appends a pattern-action pair to a Script.  If the pattern
460 | // function is nil, the action will be performed on every record.  If the
461 | // action function is nil, the record will be output verbatim to the standard
462 | // output device.  It is invalid to call AppendStmt from a running script.
463 | func (s *Script) AppendStmt(p PatternFunc, a ActionFunc) {
464 | 	// Panic if we were called on a running script.
465 | 	if s.state != notRunning {
466 | 		s.abortScript("AppendStmt was called from a running script")
467 | 	}
468 | 
469 | 	// Append a statement to the list of rules.
470 | 	stmt := statement{
471 | 		Pattern: p,
472 | 		Action:  a,
473 | 	}
474 | 	if p == nil {
475 | 		stmt.Pattern = matchAny
476 | 	}
477 | 	if a == nil {
478 | 		stmt.Action = printRecord
479 | 	}
480 | 	s.rules = append(s.rules, stmt)
481 | }
482 | 
483 | // compileRegexp caches and returns the result of regexp.Compile.  It
484 | // automatically prepends "(?i)" to the expression if the script is currently
485 | // set to perform case-insensitive regular-expression matching.
486 | func (s *Script) compileRegexp(expr string) (*regexp.Regexp, error) {
487 | 	if s.ignCase {
488 | 		expr = "(?i)" + expr
489 | 	}
490 | 	re, found := s.regexps[expr]
491 | 	if found {
492 | 		return re, nil
493 | 	}
494 | 	var err error
495 | 	re, err = regexp.Compile(expr)
496 | 	if err != nil {
497 | 		return nil, err
498 | 	}
499 | 	s.regexps[expr] = re
500 | 	return re, nil
501 | }
502 | 
503 | // makeSingleCharFieldSplitter returns a splitter that returns the next field
504 | // by splitting on a single character (except for space, which is a special
505 | // case handled elsewhere).
506 | func (s *Script) makeSingleCharFieldSplitter() func([]byte, bool) (int, []byte, error) {
507 | 	// Ensure the separator character is valid.
508 | 	firstRune, _ := utf8.DecodeRuneInString(s.fs)
509 | 	if firstRune == utf8.RuneError {
510 | 		return func(data []byte, atEOF bool) (int, []byte, error) {
511 | 			return 0, nil, errors.New("Invalid rune in separator")
512 | 		}
513 | 	}
514 | 
515 | 	// The separator is valid.  Return a splitter customized to that
516 | 	// separator.
517 | 	returnedFinalToken := false // true=already returned a final, non-terminated token; false=didn't
518 | 	return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
519 | 		// Scan until we see a separator or run out of data.
520 | 		for width, i := 0, 0; i < len(data); i += width {
521 | 			var r rune
522 | 			r, width = utf8.DecodeRune(data[i:])
523 | 			if r == utf8.RuneError && i+width >= len(data) && !atEOF {
524 | 				// Invalid rune at the end of the data.
525 | 				// Request more data and try again.
526 | 				return 0, nil, nil
527 | 			}
528 | 			if r == firstRune {
529 | 				return i + width, data[:i], nil
530 | 			}
531 | 		}
532 | 
533 | 		// We didn't see a separator.  If we're at EOF, we have
534 | 		// a final, non-terminated token.  Return it (unless we
535 | 		// already did).
536 | 		if atEOF && !returnedFinalToken {
537 | 			returnedFinalToken = true
538 | 			return len(data), data, nil
539 | 		}
540 | 
541 | 		// Request more data.
542 | 		return 0, nil, nil
543 | 	}
544 | }
545 | 
546 | // makeREFieldSplitter returns a splitter that returns the next field by
547 | // splitting on a regular expression.
548 | func (s *Script) makeREFieldSplitter() func([]byte, bool) (int, []byte, error) {
549 | 	// Ensure that the regular expression is valid.
550 | 	var sepRegexp *regexp.Regexp
551 | 	var err error
552 | 	if s.rs == "" {
553 | 		// A special case in AWK is that if the record terminator is
554 | 		// empty (implying a blank line) then newlines are accepted as
555 | 		// a field separator in addition to whatever is specified for
556 | 		// FS.
557 | 		sepRegexp, err = s.compileRegexp(`(` + s.fs + `)|(\r?\n)`)
558 | 	} else {
559 | 		sepRegexp, err = s.compileRegexp(s.fs)
560 | 	}
561 | 	if err != nil {
562 | 		return func(data []byte, atEOF bool) (int, []byte, error) {
563 | 			return 0, nil, err
564 | 		}
565 | 	}
566 | 
567 | 	// The regular expression is valid.  Return a splitter customized to
568 | 	// that regular expression.
569 | 	returnedFinalToken := false // true=already returned a final, non-terminated token; false=didn't
570 | 	return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
571 | 		// If we match the regular expression, return everything up to
572 | 		// the match.
573 | 		loc := sepRegexp.FindIndex(data)
574 | 		if loc != nil {
575 | 			return loc[1], data[:loc[0]], nil
576 | 		}
577 | 
578 | 		// We didn't see a separator.  If we're at EOF, we have a
579 | 		// final, non-terminated token.  Return it (unless we already
580 | 		// did).
581 | 		if atEOF && !returnedFinalToken {
582 | 			returnedFinalToken = true
583 | 			return len(data), data, nil
584 | 		}
585 | 
586 | 		// Request more data.
587 | 		return 0, nil, nil
588 | 	}
589 | }
590 | 
591 | // makeFixedFieldSplitter returns a splitter than returns the next field by
592 | // splitting a record into fixed-size chunks.
593 | func (s *Script) makeFixedFieldSplitter() func([]byte, bool) (int, []byte, error) {
594 | 	f := 0                      // Index into s.fieldWidths
595 | 	returnedFinalToken := false // true=already returned a final, non-terminated token; false=didn't
596 | 	return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
597 | 		// If we've exhausted s.fieldWidths, return empty-handed.
598 | 		if f >= len(s.fieldWidths) {
599 | 			return 0, nil, nil
600 | 		}
601 | 
602 | 		// If we have enough characters for the current field, return a
603 | 		// token and advance to the next field.
604 | 		fw := s.fieldWidths[f]
605 | 		if len(data) >= fw {
606 | 			f++
607 | 			return fw, data[:fw], nil
608 | 		}
609 | 
610 | 		// If we don't have enough characters for the current field but
611 | 		// we're at EOF, return whatever we have (unless we already
612 | 		// did).
613 | 		if atEOF && !returnedFinalToken {
614 | 			returnedFinalToken = true
615 | 			return len(data), data, nil
616 | 		}
617 | 
618 | 		// If we don't have enough characters for the current field and
619 | 		// we're not at EOF, request more data.
620 | 		return 0, nil, nil
621 | 	}
622 | }
623 | 
624 | // makeREFieldMatcher returns a splitter that returns the next field by
625 | // matching against a regular expression.
626 | func (s *Script) makeREFieldMatcher() func([]byte, bool) (int, []byte, error) {
627 | 	// Ensure that the regular expression is valid.
628 | 	sepRegexp, err := s.compileRegexp(s.fPat)
629 | 	if err != nil {
630 | 		return func(data []byte, atEOF bool) (int, []byte, error) {
631 | 			return 0, nil, err
632 | 		}
633 | 	}
634 | 
635 | 	// The regular expression is valid.  Return a splitter customized to
636 | 	// that regular expression.
637 | 	return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
638 | 		// If we match the regular expression, return the match.
639 | 		// Otherwise, request more data.
640 | 		loc := sepRegexp.FindIndex(data)
641 | 		if loc == nil {
642 | 			return 0, nil, nil
643 | 		}
644 | 		return loc[1], data[loc[0]:loc[1]], nil
645 | 	}
646 | }
647 | 
648 | // makeFieldSplitter returns a splitter that returns the next field.
649 | func (s *Script) makeFieldSplitter() func([]byte, bool) (int, []byte, error) {
650 | 	// If we were given fixed field widths, use them.
651 | 	if s.fieldWidths != nil {
652 | 		return s.makeFixedFieldSplitter()
653 | 	}
654 | 
655 | 	// If were given a field-matching regular expression, use it.
656 | 	if s.fPat != "" {
657 | 		return s.makeREFieldMatcher()
658 | 	}
659 | 
660 | 	// If the separator is empty, each rune is a separate field.
661 | 	if s.fs == "" {
662 | 		return bufio.ScanRunes
663 | 	}
664 | 
665 | 	// If the separator is a single space, return the next word as the
666 | 	// field.
667 | 	if s.fs == " " {
668 | 		return bufio.ScanWords
669 | 	}
670 | 
671 | 	// If the separator is a single character and the record terminator is
672 | 	// not empty (a special case in AWK), split based on that.  This code
673 | 	// is derived from the bufio.ScanWords source.
674 | 	if utf8.RuneCountInString(s.fs) == 1 && s.rs != "" {
675 | 		return s.makeSingleCharFieldSplitter()
676 | 	}
677 | 
678 | 	// If the separator is multiple characters (or the record terminator is
679 | 	// empty), treat it as a regular expression, and scan based on that.
680 | 	return s.makeREFieldSplitter()
681 | }
682 | 
683 | // makeRecordSplitter returns a splitter that returns the next record.
684 | // Although all the AWK documentation I've read define RS as a record
685 | // separator, as far as I can tell, AWK in fact treats it as a record
686 | // *terminator* so we do, too.
687 | func (s *Script) makeRecordSplitter() func([]byte, bool) (int, []byte, error) {
688 | 	// If the terminator is a single character, scan based on that.  This
689 | 	// code is derived from the bufio.ScanWords source.
690 | 	if utf8.RuneCountInString(s.rs) == 1 {
691 | 		// Ensure the terminator character is valid.
692 | 		firstRune, _ := utf8.DecodeRuneInString(s.rs)
693 | 		if firstRune == utf8.RuneError {
694 | 			return func(data []byte, atEOF bool) (int, []byte, error) {
695 | 				return 0, nil, errors.New("Invalid rune in terminator")
696 | 			}
697 | 		}
698 | 
699 | 		// The terminator is valid.  Return a splitter customized to
700 | 		// that terminator.
701 | 		return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
702 | 			// Scan until we see a terminator or run out of data.
703 | 			s.RT = string(firstRune)
704 | 			for width, i := 0, 0; i < len(data); i += width {
705 | 				var r rune
706 | 				r, width = utf8.DecodeRune(data[i:])
707 | 				if r == utf8.RuneError && i+width >= len(data) && !atEOF {
708 | 					// Invalid rune at the end of the data.
709 | 					// Request more data and try again.
710 | 					return 0, nil, nil
711 | 				}
712 | 				if r == firstRune {
713 | 					return i + width, data[:i], nil
714 | 				}
715 | 			}
716 | 
717 | 			// We didn't see a terminator.  If we're at EOF, we
718 | 			// have a final, non-terminated token.  Return it if
719 | 			// it's nonempty.
720 | 			if atEOF && len(data) > 0 {
721 | 				return len(data), data, nil
722 | 			}
723 | 
724 | 			// Request more data.
725 | 			return 0, nil, nil
726 | 		}
727 | 	}
728 | 
729 | 	// If the terminator is multiple characters, treat it as a regular
730 | 	// expression, and scan based on that.  Or, as a special case, if the
731 | 	// terminator is empty, we treat it as a regular expression
732 | 	// representing one or more blank lines.
733 | 	return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
734 | 		// Generate a regular expression based on the current RS and
735 | 		// IgnoreCase.
736 | 		var termRegexp *regexp.Regexp
737 | 		if s.rs == "" {
738 | 			termRegexp, err = s.compileRegexp(`\r?\n(\r?\n)+`)
739 | 		} else {
740 | 			termRegexp, err = s.compileRegexp(s.rs)
741 | 		}
742 | 		if err != nil {
743 | 			return 0, nil, err
744 | 		}
745 | 
746 | 		// If we match the regular expression, return everything up to
747 | 		// the match.
748 | 		loc := termRegexp.FindIndex(data)
749 | 		if loc != nil {
750 | 			s.RT = string(data[loc[0]:loc[1]])
751 | 			return loc[1], data[:loc[0]], nil
752 | 		}
753 | 
754 | 		// We didn't see a terminator.  If we're at EOF, we have a
755 | 		// final, non-terminated token.  Return it if it's nonempty.
756 | 		if atEOF && len(data) > 0 {
757 | 			s.RT = ""
758 | 			return len(data), data, nil
759 | 		}
760 | 
761 | 		// Request more data.
762 | 		return 0, nil, nil
763 | 	}
764 | }
765 | 
766 | // Read the next record from a stream and return it.
767 | func (s *Script) readRecord() (string, error) {
768 | 	// Return the next record.
769 | 	if s.rsScanner.Scan() {
770 | 		return s.rsScanner.Text(), nil
771 | 	}
772 | 	if err := s.rsScanner.Err(); err != nil {
773 | 		return "", err
774 | 	}
775 | 	return "", io.EOF
776 | }
777 | 
778 | // splitRecord splits a record into fields.  It stores the fields in the Script
779 | // struct's F field and update NF.  As in real AWK, field 0 is the entire
780 | // record.
781 | func (s *Script) splitRecord(rec string) error {
782 | 	fsScanner := bufio.NewScanner(strings.NewReader(rec))
783 | 	fsScanner.Buffer(make([]byte, initialFieldSize), s.MaxFieldSize)
784 | 	fsScanner.Split(s.makeFieldSplitter())
785 | 	fields := make([]*Value, 0, 100)
786 | 	fields = append(fields, s.NewValue(rec))
787 | 	for fsScanner.Scan() {
788 | 		fields = append(fields, s.NewValue(fsScanner.Text()))
789 | 	}
790 | 	if err := fsScanner.Err(); err != nil {
791 | 		return err
792 | 	}
793 | 	s.fields = fields
794 | 	s.NF = len(fields) - 1
795 | 	s.nf0 = s.NF
796 | 	return nil
797 | }
798 | 
799 | // GetLine reads the next record from an input stream and returns it.  If the
800 | // argument to GetLine is nil, GetLine reads from the current input stream and
801 | // increments NR.  Otherwise, it reads from the given io.Reader and does not
802 | // increment NR.  Call SetF(0, ...) on the Value returned by GetLine to perform
803 | // the equivalent of AWK's getline with no variable argument.
804 | func (s *Script) GetLine(r io.Reader) (*Value, error) {
805 | 	// Handle the simpler case of a nil argument (to read from the current
806 | 	// input stream).
807 | 	if r == nil {
808 | 		rec, err := s.readRecord()
809 | 		if err != nil {
810 | 			return nil, err
811 | 		}
812 | 		s.NR++
813 | 		return s.NewValue(rec), nil
814 | 	}
815 | 
816 | 	// If we've seen this io.Reader before, reuse its parsing state.
817 | 	// Otherwise, create a new Script for storing state.
818 | 	sc := s.getlineState[r]
819 | 	if sc == nil {
820 | 		// Copy the given script so we don't alter any of the original
821 | 		// script's state.
822 | 		sc = s.Copy()
823 | 		s.getlineState[r] = sc
824 | 
825 | 		// Create (and store) a new scanner based on the record
826 | 		// terminator.
827 | 		sc.input = r
828 | 		sc.rsScanner = bufio.NewScanner(sc.input)
829 | 		sc.rsScanner.Buffer(make([]byte, initialRecordSize), sc.MaxRecordSize)
830 | 		sc.rsScanner.Split(sc.makeRecordSplitter())
831 | 	}
832 | 
833 | 	// Read a record from the given reader.
834 | 	rec, err := sc.readRecord()
835 | 	if err != nil {
836 | 		return nil, err
837 | 	}
838 | 	return sc.NewValue(rec), nil
839 | }
840 | 
841 | // Run executes a script against a given input stream.  It is perfectly valid
842 | // to run the same script on multiple input streams.
843 | func (s *Script) Run(r io.Reader) (err error) {
844 | 	// Catch scriptAborter panics and return them as errors.  Re-throw all
845 | 	// other panics.
846 | 	defer func() {
847 | 		if r := recover(); r != nil {
848 | 			if e, ok := r.(scriptAborter); ok {
849 | 				err = e
850 | 			} else {
851 | 				panic(r)
852 | 			}
853 | 		}
854 | 	}()
855 | 
856 | 	// Reinitialize most of our state.
857 | 	s.input = r
858 | 	s.ConvFmt = "%.6g"
859 | 	s.NF = 0
860 | 	s.NR = 0
861 | 
862 | 	// Process the Begin action, if any.
863 | 	if s.Begin != nil {
864 | 		s.state = atBegin
865 | 		s.Begin(s)
866 | 	}
867 | 
868 | 	// Create (and store) a new scanner based on the record terminator.
869 | 	s.rsScanner = bufio.NewScanner(s.input)
870 | 	s.rsScanner.Buffer(make([]byte, initialRecordSize), s.MaxRecordSize)
871 | 	s.rsScanner.Split(s.makeRecordSplitter())
872 | 
873 | 	// Process each record in turn.
874 | 	s.state = inMiddle
875 | 	for {
876 | 		// Read a record.
877 | 		s.stop = dontStop
878 | 		rec, err := s.readRecord()
879 | 		if err != nil {
880 | 			if err == io.EOF {
881 | 				break
882 | 			}
883 | 			return err
884 | 		}
885 | 		s.NR++
886 | 
887 | 		// Split the record into its constituent fields.
888 | 		err = s.splitRecord(rec)
889 | 		if err != nil {
890 | 			return err
891 | 		}
892 | 
893 | 		// Process all applicable actions.
894 | 		func() {
895 | 			// An action is able to break out of the
896 | 			// action-processing loop by calling Next, which throws
897 | 			// a recordStopper.  We catch that and continue
898 | 			// with the next record.
899 | 			defer func() {
900 | 				if r := recover(); r != nil {
901 | 					if _, ok := r.(recordStopper); !ok {
902 | 						panic(r)
903 | 					}
904 | 				}
905 | 			}()
906 | 
907 | 			// Perform each action whose pattern matches the
908 | 			// current record.
909 | 			for _, rule := range s.rules {
910 | 				if rule.Pattern(s) {
911 | 					rule.Action(s)
912 | 					if s.stop != dontStop {
913 | 						break
914 | 					}
915 | 				}
916 | 			}
917 | 		}()
918 | 
919 | 		// Stop the script if an error occurred or an action calls  Exit.
920 | 		if s.stop == stopScript {
921 | 			return nil
922 | 		}
923 | 	}
924 | 
925 | 	// Process the End action, if any.
926 | 	if s.End != nil {
927 | 		s.state = atEnd
928 | 		s.End(s)
929 | 	}
930 | 	s.state = notRunning
931 | 	return nil
932 | }
933 | 
934 | // RunPipeline chains together a set of scripts into a pipeline, with each
935 | // script sending its output to the next.  (Implication: Script.Output will be
936 | // overwritten in all but the last script.)  If any script in the pipeline
937 | // fails, a non-nil error will be returned.
938 | func RunPipeline(r io.Reader, ss ...*Script) error {
939 | 	// Spawn scripts in reverse order so they begin blocked on input.
940 | 	eChan := make(chan error, len(ss))
941 | 	for i := len(ss) - 1; i > 0; i-- {
942 | 		s := ss[i]
943 | 		pr, pw := io.Pipe()
944 | 		ss[i-1].Output = pw
945 | 		go func(i int, pr *io.PipeReader) {
946 | 			eChan <- s.Run(pr)
947 | 			if i < len(ss)-1 {
948 | 				ss[i].Output.(*io.PipeWriter).Close()
949 | 			}
950 | 		}(i, pr)
951 | 	}
952 | 
953 | 	// Spawn the first script to enable the rest to begin.
954 | 	go func() {
955 | 		eChan <- ss[0].Run(r)
956 | 		if len(ss) > 1 {
957 | 			ss[0].Output.(*io.PipeWriter).Close()
958 | 		}
959 | 	}()
960 | 
961 | 	// Wait for all scripts to finish.
962 | 	for range ss {
963 | 		err := <-eChan
964 | 		if err != nil {
965 | 			// Error -- close all output pipes then return.
966 | 			for j := 0; j < len(ss)-1; j++ {
967 | 				ss[j].Output.(*io.PipeWriter).Close()
968 | 			}
969 | 			return err
970 | 		}
971 | 	}
972 | 	return nil
973 | }
974 | 


--------------------------------------------------------------------------------
/script_test.go:
--------------------------------------------------------------------------------
   1 | // This file tests script primitives.
   2 | 
   3 | package awk
   4 | 
   5 | import (
   6 | 	"bufio"
   7 | 	"bytes"
   8 | 	"fmt"
   9 | 	"io"
  10 | 	"regexp"
  11 | 	"sort"
  12 | 	"strings"
  13 | 	"testing"
  14 | )
  15 | 
  16 | // TestReadRecordNewline tests reading newline-separated records.
  17 | func TestReadRecordNewline(t *testing.T) {
  18 | 	// Define the basic test we plan to repeat.
  19 | 	allRecords := []string{"X", "Word", "More than one word", "", "More text"}
  20 | 	allRecordsStr := strings.Join(allRecords, "\n")
  21 | 	scr := NewScript()
  22 | 	doTest := func() {
  23 | 		scr.input = bufio.NewReader(strings.NewReader(allRecordsStr))
  24 | 		scr.SetRS("\n")
  25 | 		scr.rsScanner = bufio.NewScanner(scr.input)
  26 | 		scr.rsScanner.Split(scr.makeRecordSplitter())
  27 | 		for _, oneRecord := range allRecords {
  28 | 			rec, err := scr.readRecord()
  29 | 			if err != nil {
  30 | 				t.Fatal(err)
  31 | 			}
  32 | 			if rec != oneRecord {
  33 | 				t.Fatalf("Expected %q but received %q", oneRecord, rec)
  34 | 			}
  35 | 		}
  36 | 	}
  37 | 
  38 | 	// Test with no trailing newline.
  39 | 	doTest()
  40 | 
  41 | 	// Test with a trailing newline.
  42 | 	allRecordsStr += "\n"
  43 | 	doTest()
  44 | }
  45 | 
  46 | // TestReadRecordWhitespace tests reading whitespace-separated records.
  47 | func TestReadRecordWhitespace(t *testing.T) {
  48 | 	allRecordsStr := "  banana banana banana  banana   banana banana\tbanana banana\nbanana banana"
  49 | 	want := []string{
  50 | 		"",
  51 | 		"",
  52 | 		"banana",
  53 | 		"banana",
  54 | 		"banana",
  55 | 		"",
  56 | 		"banana",
  57 | 		"",
  58 | 		"",
  59 | 		"banana",
  60 | 		"banana\tbanana",
  61 | 		"banana\nbanana",
  62 | 		"banana",
  63 | 	}
  64 | 	scr := NewScript()
  65 | 	scr.input = bufio.NewReader(strings.NewReader(allRecordsStr))
  66 | 	scr.SetRS(" ")
  67 | 	scr.rsScanner = bufio.NewScanner(scr.input)
  68 | 	scr.rsScanner.Split(scr.makeRecordSplitter())
  69 | 	for _, str := range want {
  70 | 		rec, err := scr.readRecord()
  71 | 		if err != nil {
  72 | 			t.Fatal(err)
  73 | 		}
  74 | 		if rec != str {
  75 | 			t.Fatalf("Expected %q but received %q", str, rec)
  76 | 		}
  77 | 	}
  78 | }
  79 | 
  80 | // TestReadRecordRE tests reading regular-expression-separated records.
  81 | func TestReadRecordRE(t *testing.T) {
  82 | 	allRecordsStr := "hello<foo>howdy</foo>hello<bar>yellow</bar>hello<baz>goodbye</baz>"
  83 | 	scr := NewScript()
  84 | 	scr.input = bufio.NewReader(strings.NewReader(allRecordsStr))
  85 | 	scr.SetRS(`<[^>]+>[^<]*<[^>]+>`)
  86 | 	scr.rsScanner = bufio.NewScanner(scr.input)
  87 | 	scr.rsScanner.Split(scr.makeRecordSplitter())
  88 | 	for i := 0; i < 3; i++ {
  89 | 		rec, err := scr.readRecord()
  90 | 		if err != nil {
  91 | 			t.Fatal(err)
  92 | 		}
  93 | 		if rec != "hello" {
  94 | 			t.Fatalf("Expected %q but received %q", "hello", rec)
  95 | 		}
  96 | 	}
  97 | }
  98 | 
  99 | // TestSplitRecordWhitespace tests splitting a record into whitespace-separated
 100 | // fields.
 101 | func TestSplitRecordWhitespace(t *testing.T) {
 102 | 	recordStr := "The woods are lovely,  dark and    deep,"
 103 | 	fields := regexp.MustCompile(`\s+`).Split(recordStr, -1)
 104 | 	scr := NewScript()
 105 | 	scr.splitRecord(recordStr)
 106 | 	for i, f := range fields {
 107 | 		if scr.F(i+1).String() != f {
 108 | 			t.Fatalf("Expected %q but received %q", f, scr.F(i+1))
 109 | 		}
 110 | 	}
 111 | }
 112 | 
 113 | // TestSplitRecordComma tests splitting a record into comma-separated fields.
 114 | func TestSplitRecordComma(t *testing.T) {
 115 | 	recordStr := "The woods are lovely,  dark and    deep,"
 116 | 	fields := strings.Split(recordStr, ",")
 117 | 	scr := NewScript()
 118 | 	scr.SetFS(",")
 119 | 	scr.splitRecord(recordStr)
 120 | 	for i, f := range fields {
 121 | 		if scr.F(i+1).String() != f {
 122 | 			t.Fatalf("Expected %q but received %q", f, scr.F(i+1))
 123 | 		}
 124 | 	}
 125 | }
 126 | 
 127 | // TestSplitFieldRE tests splitting a field based on a regular expression.
 128 | func TestSplitFieldRE(t *testing.T) {
 129 | 	// Determine what we want to provide and see in return.
 130 | 	recordStr := "foo-bar---baz------------quux--corge-grault---garply-"
 131 | 	re, err := regexp.Compile(`\w+`)
 132 | 	if err != nil {
 133 | 		t.Fatal(err)
 134 | 	}
 135 | 	words := re.FindAllString(recordStr, -1)
 136 | 	words = append(words, "")
 137 | 
 138 | 	// Split the record.
 139 | 	scr := NewScript()
 140 | 	scr.SetFS("-+")
 141 | 	scr.splitRecord(recordStr)
 142 | 
 143 | 	// Check the result.
 144 | 	for i := 1; i <= scr.NF; i++ {
 145 | 		f := scr.F(i).String()
 146 | 		if f != words[i-1] {
 147 | 			t.Fatalf("Expected %q for field %d but received %q", words[i-1], i, f)
 148 | 		}
 149 | 	}
 150 | }
 151 | 
 152 | // TestSplitFieldREIgnCase tests splitting a field based on a case-insensitive
 153 | // regular expression.
 154 | func TestSplitFieldREIgnCase(t *testing.T) {
 155 | 	// Determine what we want to provide and see in return.
 156 | 	recordStr := "fooxbarXxxbazxxXXxxxXxxXxquucksxXcorgexgraultxxxgarplyx"
 157 | 	re, err := regexp.Compile(`[fobarzqucksgeltpy]+`)
 158 | 	if err != nil {
 159 | 		t.Fatal(err)
 160 | 	}
 161 | 	words := re.FindAllString(recordStr, -1)
 162 | 	words = append(words, "")
 163 | 
 164 | 	// Split the record.
 165 | 	scr := NewScript()
 166 | 	scr.SetFS("x+")
 167 | 	scr.IgnoreCase(true)
 168 | 	err = scr.splitRecord(recordStr)
 169 | 	if err != nil {
 170 | 		t.Fatal(err)
 171 | 	}
 172 | 
 173 | 	// Check the result.
 174 | 	for i := 1; i <= scr.NF; i++ {
 175 | 		f := scr.F(i).String()
 176 | 		if f != words[i-1] {
 177 | 			t.Fatalf("Expected %q for field %d but received %q", words[i-1], i, f)
 178 | 		}
 179 | 	}
 180 | }
 181 | 
 182 | // TestSplitFieldFixed tests splitting a field based on fixed-width columns.
 183 | func TestSplitFieldFixed(t *testing.T) {
 184 | 	// Determine what we want to provide and see in return.
 185 | 	inputStr := "CeterumcenseoCarthaginemessedelendam."
 186 | 	desiredOutput := []string{"Ceterum", "censeo", "Carthaginem", "esse", "delendam."}
 187 | 
 188 | 	// Split the record.
 189 | 	scr := NewScript()
 190 | 	scr.SetFieldWidths([]int{7, 6, 11, 4, 123})
 191 | 	err := scr.splitRecord(inputStr)
 192 | 	if err != nil {
 193 | 		t.Fatal(err)
 194 | 	}
 195 | 
 196 | 	// Check the result.
 197 | 	for i := 1; i <= scr.NF; i++ {
 198 | 		f := scr.F(i).String()
 199 | 		if f != desiredOutput[i-1] {
 200 | 			t.Fatalf("Expected %q for field %d but received %q", desiredOutput[i-1], i, f)
 201 | 		}
 202 | 	}
 203 | }
 204 | 
 205 | // TestSplitFieldREPat tests splitting a field based on a field-matching
 206 | // regular expression.
 207 | func TestSplitFieldREPat(t *testing.T) {
 208 | 	// Determine what we want to provide and see in return.
 209 | 	inputStr := "23 Skidoo.  3-2-1 blast off!  99 red balloons."
 210 | 	desiredOutput := 122
 211 | 
 212 | 	// Split the record.
 213 | 	scr := NewScript()
 214 | 	scr.SetFPat(`-?\d+`)
 215 | 	err := scr.splitRecord(inputStr)
 216 | 	if err != nil {
 217 | 		t.Fatal(err)
 218 | 	}
 219 | 
 220 | 	// Check the result.
 221 | 	output := 0
 222 | 	for i := 1; i <= scr.NF; i++ {
 223 | 		t.Log(scr.F(i))
 224 | 		output += scr.F(i).Int()
 225 | 	}
 226 | 	if output != desiredOutput {
 227 | 		t.Fatalf("Expected %d but received %d", desiredOutput, output)
 228 | 	}
 229 | }
 230 | 
 231 | // TestBeginEnd tests creating and running a script that contains a BEGIN
 232 | // action and an END action.
 233 | func TestBeginEnd(t *testing.T) {
 234 | 	scr := NewScript()
 235 | 	val := 123
 236 | 	scr.Begin = func(s *Script) { val *= 10 }
 237 | 	scr.End = func(s *Script) { val += 4 }
 238 | 	err := scr.Run(strings.NewReader("dummy data"))
 239 | 	if err != nil {
 240 | 		t.Fatal(err)
 241 | 	}
 242 | 	if val != 1234 {
 243 | 		t.Fatalf("Expected 1234 but received %d", val)
 244 | 	}
 245 | }
 246 | 
 247 | // TestSimpleSum tests adding up a column of numbers.
 248 | func TestSimpleSum(t *testing.T) {
 249 | 	scr := NewScript()
 250 | 	sum := 0
 251 | 	scr.AppendStmt(nil, func(s *Script) { sum += s.F(1).Int() })
 252 | 	err := scr.Run(strings.NewReader("2\n4\n6\n8\n"))
 253 | 	if err != nil {
 254 | 		t.Fatal(err)
 255 | 	}
 256 | 	if sum != 20 {
 257 | 		t.Fatalf("Expected 20 but received %d", sum)
 258 | 	}
 259 | }
 260 | 
 261 | // TestRunTwice tests running the same script twice.
 262 | func TestRunTwice(t *testing.T) {
 263 | 	// Run once.
 264 | 	scr := NewScript()
 265 | 	sum := 0
 266 | 	scr.AppendStmt(nil, func(s *Script) { sum += s.F(1).Int() * s.NR })
 267 | 	err := scr.Run(strings.NewReader("1\n3\n5\n7\n"))
 268 | 	if err != nil {
 269 | 		t.Fatal(err)
 270 | 	}
 271 | 	if sum != 50 {
 272 | 		t.Fatalf("Expected 50 but received %d on the first trial", sum)
 273 | 	}
 274 | 
 275 | 	// Run again.
 276 | 	sum = 0
 277 | 	err = scr.Run(strings.NewReader("1\n3\n5\n7\n"))
 278 | 	if err != nil {
 279 | 		t.Fatal(err)
 280 | 	}
 281 | 	if sum != 50 {
 282 | 		t.Fatalf("Expected 50 but received %d on the second trial", sum)
 283 | 	}
 284 | }
 285 | 
 286 | // TestFieldCreation tests creating ("autovivifying" in Perl-speak) new fields.
 287 | func TestFieldCreation(t *testing.T) {
 288 | 	scr := NewScript()
 289 | 	sum := 0
 290 | 	scr.AppendStmt(nil, func(s *Script) { sum += 1 << uint(s.F(2).Int()) })
 291 | 	err := scr.Run(strings.NewReader("x 3\ny 2\n\nz 1\n"))
 292 | 	if err != nil {
 293 | 		t.Fatal(err)
 294 | 	}
 295 | 	if sum != 15 {
 296 | 		t.Fatalf("Expected 15 but received %d", sum)
 297 | 	}
 298 | }
 299 | 
 300 | // TestRecordReplacement tests overwriting field 0 with a new record.
 301 | func TestRecordReplacement(t *testing.T) {
 302 | 	scr := NewScript()
 303 | 	sum := 0
 304 | 	scr.AppendStmt(nil, func(s *Script) {
 305 | 		sum += s.F(2).Int()
 306 | 		s.SetF(0, s.NewValue("10 20 30 40 50"))
 307 | 		sum += s.F(5).Int()
 308 | 	})
 309 | 	err := scr.Run(strings.NewReader("x 3\ny 2\n\nz 1\n"))
 310 | 	if err != nil {
 311 | 		t.Fatal(err)
 312 | 	}
 313 | 	if sum != 206 {
 314 | 		t.Fatalf("Expected 206 but received %d", sum)
 315 | 	}
 316 | }
 317 | 
 318 | // TestRecordChangeCase tests changing IgnoreCase during the execution of a
 319 | // script.
 320 | func TestRecordChangeCase(t *testing.T) {
 321 | 	scr := NewScript()
 322 | 	sum := 0
 323 | 	scr.AppendStmt(func(s *Script) bool { return s.F(1).Int()%2 == 0 },
 324 | 		func(s *Script) { sum += s.F(1).Int() })
 325 | 	scr.AppendStmt(func(s *Script) bool { return s.NR == 3 },
 326 | 		func(s *Script) { s.IgnoreCase(true) })
 327 | 	scr.SetRS("EOL")
 328 | 	err := scr.Run(strings.NewReader("1EOL2EOL3EOL4Eol5eol6eoL"))
 329 | 	if err != nil {
 330 | 		t.Fatal(err)
 331 | 	}
 332 | 	if sum != 12 {
 333 | 		t.Fatalf("Expected 12 but received %d", sum)
 334 | 	}
 335 | }
 336 | 
 337 | // TestRecordBlankLines tests the AWK special case of blank-line-separated
 338 | // records.
 339 | func TestRecordBlankLines(t *testing.T) {
 340 | 	recordStr := "uno\ndos\n\ntres\ncuatro\n\ncinco,seis,siete\nocho\n\nnueve,diez\n\n"
 341 | 	expected := regexp.MustCompile(`[\n,]+`).Split(recordStr, -1)
 342 | 	expected = expected[:len(expected)-1] // Skip empty final record.
 343 | 	actual := make([]string, 0, 10)
 344 | 	scr := NewScript()
 345 | 	scr.SetRS("")
 346 | 	scr.SetFS(",")
 347 | 	scr.AppendStmt(nil, func(s *Script) {
 348 | 		for i := 1; i <= s.NF; i++ {
 349 | 			actual = append(actual, s.F(i).String())
 350 | 		}
 351 | 	})
 352 | 	err := scr.Run(strings.NewReader(recordStr))
 353 | 	if err != nil {
 354 | 		t.Fatal(err)
 355 | 	}
 356 | 	for i, s1 := range expected {
 357 | 		s2 := actual[i]
 358 | 		if s1 != s2 {
 359 | 			t.Fatalf("Expected %v but received %v", expected, actual)
 360 | 		}
 361 | 	}
 362 | }
 363 | 
 364 | // TestExit tests premature script termination.
 365 | func TestExit(t *testing.T) {
 366 | 	scr := NewScript()
 367 | 	sum := 0
 368 | 	scr.Begin = func(s *Script) { s.IgnoreCase(true) }
 369 | 	scr.AppendStmt(nil, func(s *Script) { sum += s.F(1).Int() })
 370 | 	scr.AppendStmt(func(s *Script) bool { return s.F(1).StrEqual("stop") },
 371 | 		func(s *Script) { s.Exit() })
 372 | 	err := scr.Run(strings.NewReader("111\n222\n333\n444\nSTOP\n555\n666\n"))
 373 | 	if err != nil {
 374 | 		t.Fatal(err)
 375 | 	}
 376 | 	if sum != 1110 {
 377 | 		t.Fatalf("Expected 1110 but received %d", sum)
 378 | 	}
 379 | }
 380 | 
 381 | // TestRecordRange tests range patterns.
 382 | func TestRecordRange(t *testing.T) {
 383 | 	scr := NewScript()
 384 | 	all := []string{
 385 | 		"bad",
 386 | 		"terrible",
 387 | 		"BEGIN",
 388 | 		"good",
 389 | 		"great",
 390 | 		"fantastic",
 391 | 		"END",
 392 | 		"awful",
 393 | 		"dreadful",
 394 | 	}
 395 | 	want := []string{
 396 | 		"BEGIN",
 397 | 		"good",
 398 | 		"great",
 399 | 		"fantastic",
 400 | 		"END",
 401 | 	}
 402 | 	got := make([]string, 0, 10)
 403 | 	scr.AppendStmt(Range(func(s *Script) bool { return s.F(1).Match("BEGIN") },
 404 | 		func(s *Script) bool { return s.F(1).Match("END") }),
 405 | 		func(s *Script) { got = append(got, s.F(1).String()) })
 406 | 	err := scr.Run(strings.NewReader(strings.Join(all, "\n")))
 407 | 	if err != nil {
 408 | 		t.Fatal(err)
 409 | 	}
 410 | 	for i, s1 := range want {
 411 | 		s2 := got[i]
 412 | 		if s1 != s2 {
 413 | 			t.Fatalf("Expected %q but received %q", s1, s2)
 414 | 		}
 415 | 	}
 416 | }
 417 | 
 418 | // TestSplitRecordRE tests splitting the input string into regexp-separated
 419 | // records.
 420 | func TestSplitRecordRE(t *testing.T) {
 421 | 	scr := NewScript()
 422 | 	pluses := 0
 423 | 	scr.Begin = func(s *Script) { s.SetRS(`\++`) }
 424 | 	scr.AppendStmt(nil, func(s *Script) { pluses += len(s.RT) })
 425 | 	err := scr.Run(strings.NewReader("a++++++a++a++++a+++a+++++a+"))
 426 | 	if err != nil {
 427 | 		t.Fatal(err)
 428 | 	}
 429 | 	if pluses != 21 {
 430 | 		t.Fatalf("Expected 21 but received %d", pluses)
 431 | 	}
 432 | }
 433 | 
 434 | // TestDefaultAction tests the default printing action.
 435 | func TestDefaultAction(t *testing.T) {
 436 | 	// Define a script and some test input.
 437 | 	scr := NewScript()
 438 | 	scr.Output = new(bytes.Buffer)
 439 | 	scr.IgnoreCase(true)
 440 | 	scr.AppendStmt(func(s *Script) bool { return s.F(1).StrEqual("Duck") }, nil)
 441 | 	inputStr := `Duck 1
 442 | duck 2
 443 | duck 3
 444 | duck 4
 445 | Goose! 5
 446 | Duck 6
 447 | duck 7
 448 | DUCK 8
 449 | duck 9
 450 | Goose!
 451 | `
 452 | 
 453 | 	// Test with the default record separator.
 454 | 	err := scr.Run(strings.NewReader(inputStr))
 455 | 	if err != nil {
 456 | 		t.Fatal(err)
 457 | 	}
 458 | 	outputStr := string(scr.Output.(*bytes.Buffer).Bytes())
 459 | 	desiredOutputStr := `Duck 1
 460 | duck 2
 461 | duck 3
 462 | duck 4
 463 | Duck 6
 464 | duck 7
 465 | DUCK 8
 466 | duck 9
 467 | `
 468 | 	if outputStr != desiredOutputStr {
 469 | 		t.Fatalf("Expected %#v but received %#v", desiredOutputStr, outputStr)
 470 | 	}
 471 | 
 472 | 	// Test with a modified record separator.
 473 | 	scr.Output.(*bytes.Buffer).Reset()
 474 | 	scr.SetORS("|")
 475 | 	err = scr.Run(strings.NewReader(inputStr))
 476 | 	if err != nil {
 477 | 		t.Fatal(err)
 478 | 	}
 479 | 	outputStr = string(scr.Output.(*bytes.Buffer).Bytes())
 480 | 	desiredOutputStr = `Duck 1|duck 2|duck 3|duck 4|Duck 6|duck 7|DUCK 8|duck 9|`
 481 | 	if outputStr != desiredOutputStr {
 482 | 		t.Fatalf("Expected %#v but received %#v", desiredOutputStr, outputStr)
 483 | 	}
 484 | }
 485 | 
 486 | // TestFInts tests the bulk conversion of fields to ints.
 487 | func TestFInts(t *testing.T) {
 488 | 	// Define a script and some test inputs and outputs.
 489 | 	scr := NewScript()
 490 | 	inputStr := "8675309"
 491 | 	desiredOutput := []int{0, 3, 5, 6, 7, 8, 9}
 492 | 	var output []int
 493 | 	scr.SetFS("")
 494 | 	scr.AppendStmt(nil, func(s *Script) {
 495 | 		iList := s.FInts()
 496 | 		sort.Ints(iList)
 497 | 		output = iList
 498 | 	})
 499 | 
 500 | 	// Run the script.
 501 | 	err := scr.Run(strings.NewReader(inputStr))
 502 | 	if err != nil {
 503 | 		t.Fatal(err)
 504 | 	}
 505 | 
 506 | 	// Validate the output.
 507 | 	for i, val := range desiredOutput {
 508 | 		if val != output[i] {
 509 | 			t.Fatalf("Expected %v but received %v", desiredOutput, output)
 510 | 		}
 511 | 	}
 512 | }
 513 | 
 514 | // TestFieldCreation0 ensures that field creation updates F(0).
 515 | func TestFieldCreation0(t *testing.T) {
 516 | 	// Define a script and some test inputs and outputs.
 517 | 	input := "spam egg spam spam bacon spam"
 518 | 	desiredOutput := "spam,egg,spam,spam,bacon,spam,,,,,sausage"
 519 | 	var output string
 520 | 	scr := NewScript()
 521 | 	scr.Begin = func(s *Script) { scr.SetOFS(",") }
 522 | 	scr.AppendStmt(nil, func(s *Script) {
 523 | 		scr.SetF(scr.NF+5, scr.NewValue("sausage"))
 524 | 		output = scr.F(0).String()
 525 | 	})
 526 | 
 527 | 	// Run the script and validate the output.
 528 | 	err := scr.Run(strings.NewReader(input))
 529 | 	if err != nil {
 530 | 		t.Fatal(err)
 531 | 	}
 532 | 	if output != desiredOutput {
 533 | 		t.Fatalf("Expected %q but received %q", desiredOutput, output)
 534 | 	}
 535 | }
 536 | 
 537 | // TestFieldModification0 ensures that field modification updates F(0).
 538 | func TestFieldModification0(t *testing.T) {
 539 | 	// Define a script and some test inputs and outputs.
 540 | 	input := "spam egg spam spam bacon spam"
 541 | 	desiredOutput := "spam,egg,sausage,spam,bacon,spam"
 542 | 	var output string
 543 | 	scr := NewScript()
 544 | 	scr.Begin = func(s *Script) { scr.SetOFS(",") }
 545 | 	scr.AppendStmt(nil, func(s *Script) {
 546 | 		scr.SetF(3, scr.NewValue("sausage"))
 547 | 		output = scr.F(0).String()
 548 | 	})
 549 | 
 550 | 	// Run the script and validate the output.
 551 | 	err := scr.Run(strings.NewReader(input))
 552 | 	if err != nil {
 553 | 		t.Fatal(err)
 554 | 	}
 555 | 	if output != desiredOutput {
 556 | 		t.Fatalf("Expected %q but received %q", desiredOutput, output)
 557 | 	}
 558 | }
 559 | 
 560 | // TestNFModification0 ensures that modifying NF updates F(0).
 561 | func TestNFModification0(t *testing.T) {
 562 | 	// Define a script and some test inputs and outputs.
 563 | 	input := "spam egg spam spam bacon spam"
 564 | 	desiredOutput := "spam egg spam"
 565 | 	var output string
 566 | 	scr := NewScript()
 567 | 	scr.AppendStmt(nil, func(s *Script) {
 568 | 		scr.NF = 3
 569 | 		output = scr.F(0).String()
 570 | 	})
 571 | 
 572 | 	// Run the script and validate the output.
 573 | 	err := scr.Run(strings.NewReader(input))
 574 | 	if err != nil {
 575 | 		t.Fatal(err)
 576 | 	}
 577 | 	if output != desiredOutput {
 578 | 		t.Fatalf("Expected %q but received %q", desiredOutput, output)
 579 | 	}
 580 | }
 581 | 
 582 | // TestAutoInt tests the Auto function with an int argument.
 583 | func TestAutoInt(t *testing.T) {
 584 | 	// Define a script and some test inputs and outputs.
 585 | 	input := strings.Replace("It does not matter how slowly you go as long as you do not stop.", " ", "\n", -1)
 586 | 	var output string
 587 | 	desiredOutput := "go"
 588 | 	scr := NewScript()
 589 | 	scr.AppendStmt(Auto(8), func(s *Script) { output = s.F(1).String() })
 590 | 
 591 | 	// Run the script and validate the output.
 592 | 	err := scr.Run(strings.NewReader(input))
 593 | 	if err != nil {
 594 | 		t.Fatal(err)
 595 | 	}
 596 | 	if output != desiredOutput {
 597 | 		t.Fatalf("Expected %q but received %q", desiredOutput, output)
 598 | 	}
 599 | }
 600 | 
 601 | // TestAutoRegexp tests the Auto function with a Regexp argument.
 602 | func TestAutoRegexp(t *testing.T) {
 603 | 	// Define a script and some test inputs and outputs.
 604 | 	input := strings.Replace("It does not matter how slowly you go as long as you do not stop.", " ", "\n", -1)
 605 | 	var output string
 606 | 	desiredOutput := "go"
 607 | 	scr := NewScript()
 608 | 	re := regexp.MustCompile("Go")
 609 | 	scr.Begin = func(s *Script) { scr.IgnoreCase(true) }
 610 | 	scr.AppendStmt(Auto(re), func(s *Script) { output = s.F(1).String() })
 611 | 
 612 | 	// Run the script and validate the output.
 613 | 	err := scr.Run(strings.NewReader(input))
 614 | 	if err != nil {
 615 | 		t.Fatal(err)
 616 | 	}
 617 | 	if output != desiredOutput {
 618 | 		t.Fatalf("Expected %q but received %q", desiredOutput, output)
 619 | 	}
 620 | }
 621 | 
 622 | // TestAutoString tests the Auto function with a string argument.
 623 | func TestAutoString(t *testing.T) {
 624 | 	// Define a script and some test inputs and outputs.
 625 | 	input := strings.Replace("It does not matter how slowly you go as long as you do not stop.", " ", "\n", -1)
 626 | 	var output string
 627 | 	desiredOutput := "go"
 628 | 	scr := NewScript()
 629 | 	scr.Begin = func(s *Script) { scr.IgnoreCase(true) }
 630 | 	scr.AppendStmt(Auto("Go"), func(s *Script) { output = s.F(1).String() })
 631 | 
 632 | 	// Run the script and validate the output.
 633 | 	err := scr.Run(strings.NewReader(input))
 634 | 	if err != nil {
 635 | 		t.Fatal(err)
 636 | 	}
 637 | 	if output != desiredOutput {
 638 | 		t.Fatalf("Expected %q but received %q", desiredOutput, output)
 639 | 	}
 640 | }
 641 | 
 642 | // TestAutoIntRange tests the Auto function with a range of int arguments.
 643 | func TestAutoIntRange(t *testing.T) {
 644 | 	// Define a script and some test inputs and outputs.
 645 | 	input := strings.Replace("10 20 30 40 50 60 70 80 90 100", " ", "\n", -1)
 646 | 	var output int
 647 | 	desiredOutput := 150
 648 | 	scr := NewScript()
 649 | 	scr.AppendStmt(Auto(4, 6), func(s *Script) { output += s.F(1).Int() })
 650 | 
 651 | 	// Run the script and validate the output.
 652 | 	err := scr.Run(strings.NewReader(input))
 653 | 	if err != nil {
 654 | 		t.Fatal(err)
 655 | 	}
 656 | 	if output != desiredOutput {
 657 | 		t.Fatalf("Expected %d but received %d", desiredOutput, output)
 658 | 	}
 659 | }
 660 | 
 661 | // TestAutoIntRanges tests the Auto function with multiple ranges of int
 662 | // arguments.
 663 | func TestAutoIntRanges(t *testing.T) {
 664 | 	// Define a script and some test inputs and outputs.
 665 | 	input := strings.Replace("Don't be afraid to give up the good to go for the great.", " ", "\n", -1)
 666 | 	output := make([]string, 0, 15)
 667 | 	desiredOutput := strings.Split("Don't be afraid to go", " ")
 668 | 	scr := NewScript()
 669 | 	scr.Begin = func(s *Script) { scr.IgnoreCase(true) }
 670 | 	scr.AppendStmt(Auto(1, 3, 9, 10), func(s *Script) { output = append(output, s.F(1).String()) })
 671 | 
 672 | 	// Run the script and validate the output.
 673 | 	err := scr.Run(strings.NewReader(input))
 674 | 	if err != nil {
 675 | 		t.Fatal(err)
 676 | 	}
 677 | 	if len(output) != len(desiredOutput) {
 678 | 		t.Fatalf("Expected %v but received %v", desiredOutput, output)
 679 | 	}
 680 | 	for i, o := range desiredOutput {
 681 | 		if output[i] != o {
 682 | 			t.Fatalf("Expected %v but received %v", desiredOutput, output)
 683 | 		}
 684 | 	}
 685 | }
 686 | 
 687 | // TestCatchSetRSError tests that we properly catch invalid uses of SetRS.
 688 | func TestCatchSetRSError(t *testing.T) {
 689 | 	// Define a script.
 690 | 	scr := NewScript()
 691 | 	scr.Begin = func(s *Script) { scr.IgnoreCase(true) }
 692 | 	scr.AppendStmt(nil, func(s *Script) { s.SetRS("/") })
 693 | 	expected := "SetRS was called from a running script"
 694 | 
 695 | 	// Run the script and ensure it threw the expected error.
 696 | 	err := scr.Run(strings.NewReader("The progress of rivers to the ocean is not so rapid as that of man to error."))
 697 | 	if err == nil {
 698 | 		t.Fatalf("Expected error %q, but no error was returned", expected)
 699 | 	}
 700 | 	if err.Error() != expected {
 701 | 		t.Fatalf("Expected error %q, but received error %q", expected, err.Error())
 702 | 	}
 703 | }
 704 | 
 705 | // TestNext tests that Next immediately stops the current action and
 706 | // immediately continues with the next record.
 707 | func TestNext(t *testing.T) {
 708 | 	// Define a script.
 709 | 	var output []string
 710 | 	scr := NewScript()
 711 | 	scr.Begin = func(s *Script) { output = make([]string, 0, 3) }
 712 | 	scr.AppendStmt(nil, func(s *Script) {
 713 | 		output = append(output, s.F(0).String())
 714 | 		s.Next()
 715 | 		t.Fatal("Next did not immediately exit the current action")
 716 | 	})
 717 | 	scr.AppendStmt(nil, func(s *Script) {
 718 | 		t.Fatal("Next did not immediately go to the next record")
 719 | 	})
 720 | 
 721 | 	// Define our input and desired output.
 722 | 	input := []string{
 723 | 		"追いかけ", // Oikake
 724 | 		"待ち伏せ", // Machibuse
 725 | 		"気まぐれ", // Kimagure
 726 | 		"お惚け",  // Otoboke
 727 | 	}
 728 | 	desiredOutput := strings.Join(input, " ")
 729 | 
 730 | 	// Run the script and validate the output.
 731 | 	err := scr.Run(strings.NewReader(strings.Join(input, "\n")))
 732 | 	if err != nil {
 733 | 		t.Fatal(err)
 734 | 	}
 735 | 	outputStr := strings.Join(output, " ")
 736 | 	if outputStr != desiredOutput {
 737 | 		t.Fatalf("Expected %q but received %q", desiredOutput, outputStr)
 738 | 	}
 739 | }
 740 | 
 741 | // TestGetLineSelf tests that GetLine can read the next record from the current
 742 | // input stream.
 743 | func TestGetLineSelf(t *testing.T) {
 744 | 	// Define a script.
 745 | 	var output []string
 746 | 	scr := NewScript()
 747 | 	scr.Begin = func(s *Script) { output = nil }
 748 | 	scr.AppendStmt(Auto("skip"), func(s *Script) {
 749 | 		nSkip := s.F(2).Int()
 750 | 		for i := 0; i < nSkip; i++ {
 751 | 			_, err := s.GetLine(nil)
 752 | 			if err != nil && err != io.EOF {
 753 | 				t.Fatal(err)
 754 | 			}
 755 | 		}
 756 | 		s.Next()
 757 | 	})
 758 | 	scr.AppendStmt(nil, func(s *Script) {
 759 | 		output = append(output, s.F(0).String())
 760 | 	})
 761 | 
 762 | 	// Define our input and desired output.
 763 | 	input := []string{
 764 | 		"apple",
 765 | 		"boy",
 766 | 		"skip 1",
 767 | 		"cat",
 768 | 		"skip 1",
 769 | 		"dog",
 770 | 		"east",
 771 | 		"five",
 772 | 		"skip 2",
 773 | 		"goat",
 774 | 		"house",
 775 | 		"skip 1",
 776 | 		"ice cream",
 777 | 		"July",
 778 | 		"skip 1",
 779 | 		"skip 1",
 780 | 		"king",
 781 | 		"lemon",
 782 | 	}
 783 | 	desiredOutput := []string{
 784 | 		"apple",
 785 | 		"boy",
 786 | 		"east",
 787 | 		"five",
 788 | 		"July",
 789 | 		"king",
 790 | 		"lemon",
 791 | 	}
 792 | 
 793 | 	// Run the script and validate the output.
 794 | 	err := scr.Run(strings.NewReader(strings.Join(input, "\n")))
 795 | 	if err != nil {
 796 | 		t.Fatal(err)
 797 | 	}
 798 | 	if len(output) != len(desiredOutput) {
 799 | 		t.Fatalf("Expected %v (length %d) but received %v (length %d)", desiredOutput, len(desiredOutput), output, len(output))
 800 | 	}
 801 | 	for i, o := range desiredOutput {
 802 | 		if output[i] != o {
 803 | 			t.Fatalf("Expected %v but received %v", desiredOutput, output)
 804 | 		}
 805 | 	}
 806 | 
 807 | 	// Repeat the test, but attempt to skip past the end of the file.  The
 808 | 	// error check after the GetLine call is supposed to ignore EOF, not
 809 | 	// fail.
 810 | 	input = append(input, "skip 5")
 811 | 	err = scr.Run(strings.NewReader(strings.Join(input, "\n")))
 812 | 	if err != nil {
 813 | 		t.Fatal(err)
 814 | 	}
 815 | 	if len(output) != len(desiredOutput) {
 816 | 		t.Fatalf("Expected %v (length %d) but received %v (length %d)", desiredOutput, len(desiredOutput), output, len(output))
 817 | 	}
 818 | 	for i, o := range desiredOutput {
 819 | 		if output[i] != o {
 820 | 			t.Fatalf("Expected %v but received %v", desiredOutput, output)
 821 | 		}
 822 | 	}
 823 | }
 824 | 
 825 | // TestGetLineOther tests that GetLine can read the next record from an
 826 | // alternative input stream.
 827 | func TestGetLineOther(t *testing.T) {
 828 | 	// Define our inputs and desired output.
 829 | 	input := []string{
 830 | 		"INSERT",
 831 | 		"Boston",
 832 | 		"Chicago",
 833 | 		"Denver",
 834 | 		"INSERT",
 835 | 		"Frank",
 836 | 		"INSERT",
 837 | 		"INSERT",
 838 | 		"Ida",
 839 | 		"John",
 840 | 		"King",
 841 | 		"INSERT",
 842 | 	}
 843 | 	inserts := []string{
 844 | 		"Adams",
 845 | 		"Easy",
 846 | 		"George",
 847 | 		"Henry",
 848 | 		"Lincoln",
 849 | 	}
 850 | 	desiredOutput := []string{
 851 | 		"Adams",
 852 | 		"Boston",
 853 | 		"Chicago",
 854 | 		"Denver",
 855 | 		"Easy",
 856 | 		"Frank",
 857 | 		"George",
 858 | 		"Henry",
 859 | 		"Ida",
 860 | 		"John",
 861 | 		"King",
 862 | 		"Lincoln",
 863 | 	}
 864 | 
 865 | 	// Define a script.
 866 | 	var output []string
 867 | 	insertsStrm := strings.NewReader(strings.Join(inserts, "\n"))
 868 | 	scr := NewScript()
 869 | 	scr.Begin = func(s *Script) { output = nil }
 870 | 	scr.AppendStmt(Auto("INSERT"), func(s *Script) {
 871 | 		ins, err := s.GetLine(insertsStrm)
 872 | 		if err != nil {
 873 | 			t.Fatal(err)
 874 | 		}
 875 | 		output = append(output, ins.String())
 876 | 		s.Next()
 877 | 	})
 878 | 	scr.AppendStmt(nil, func(s *Script) {
 879 | 		output = append(output, s.F(0).String())
 880 | 	})
 881 | 
 882 | 	// Run the script and validate the output.
 883 | 	err := scr.Run(strings.NewReader(strings.Join(input, "\n")))
 884 | 	if err != nil {
 885 | 		t.Fatal(err)
 886 | 	}
 887 | 	if len(output) != len(desiredOutput) {
 888 | 		t.Fatalf("Expected %v (length %d) but received %v (length %d)", desiredOutput, len(desiredOutput), output, len(output))
 889 | 	}
 890 | 	for i, o := range desiredOutput {
 891 | 		if output[i] != o {
 892 | 			t.Fatalf("Expected %v but received %v", desiredOutput, output)
 893 | 		}
 894 | 	}
 895 | }
 896 | 
 897 | // TestGetLineSetF tests that GetLine + SetF can replace the current input line.
 898 | func TestGetLineSetF(t *testing.T) {
 899 | 	// Define a script.
 900 | 	scr := NewScript()
 901 | 	scr.AppendStmt(nil, func(s *Script) {
 902 | 		// Validate the current line.
 903 | 		for i := 1; i <= 3; i++ {
 904 | 			if s.F(i).Int() != (s.NR-1)*3+i {
 905 | 				t.Fatalf("Expected %d but received %d", (s.NR-1)*3+i, s.F(i).Int())
 906 | 			}
 907 | 		}
 908 | 
 909 | 		// Read and validate the next line.
 910 | 		line, err := s.GetLine(nil)
 911 | 		if err != nil {
 912 | 			t.Fatal(err)
 913 | 		}
 914 | 		s.SetF(0, line)
 915 | 		for i := 1; i <= 3; i++ {
 916 | 			if s.F(i).Int() != (s.NR-1)*3+i {
 917 | 				t.Fatalf("Expected %d but received %d", (s.NR-1)*3+i, s.F(i).Int())
 918 | 			}
 919 | 		}
 920 | 	})
 921 | 
 922 | 	// Run the script and validate the output.
 923 | 	input := []string{
 924 | 		" 1  2  3",
 925 | 		" 4  5  6",
 926 | 		" 7  8  9",
 927 | 		"10 11 12",
 928 | 	}
 929 | 	err := scr.Run(strings.NewReader(strings.Join(input, "\n")))
 930 | 	if err != nil {
 931 | 		t.Fatal(err)
 932 | 	}
 933 | }
 934 | 
 935 | // TestBigLongLine tests splitting a very long record into whitespace-separated
 936 | // fields
 937 | func TestBigLongLine(t *testing.T) {
 938 | 	// Specify the word to appear in each field.
 939 | 	word := "pneumonoultramicroscopicsilicovolcanoconiosis"
 940 | 
 941 | 	// Define a script that simply verifies that each field is
 942 | 	// correct.
 943 | 	scr := NewScript()
 944 | 	scr.AppendStmt(nil, func(s *Script) {
 945 | 		// Validate the current line.
 946 | 		for i := 1; i <= s.NF; i++ {
 947 | 			if s.F(i).String() != word {
 948 | 				t.Fatalf("Expected %q but received %q", word, s.F(i).String())
 949 | 			}
 950 | 		}
 951 | 	})
 952 | 
 953 | 	// Define a function to test a record with a given number of fields.
 954 | 	testBigRecord := func(numFields int) error {
 955 | 		// Create a very long string.
 956 | 		recordStr := word
 957 | 		for i := 0; i < numFields-1; i++ {
 958 | 			recordStr += " " + word
 959 | 		}
 960 | 
 961 | 		// Run the script and return its error value.
 962 | 		input := strings.NewReader(recordStr)
 963 | 		return scr.Run(input)
 964 | 	}
 965 | 
 966 | 	// Try increasingly large records until we exhaust the default maximum
 967 | 	// record size.
 968 | 	var err error
 969 | 	var numFields int
 970 | 	for numFields = 100; numFields <= 100000000; numFields *= 10 {
 971 | 		err = testBigRecord(numFields)
 972 | 		if err != nil {
 973 | 			break
 974 | 		}
 975 | 	}
 976 | 	if err == nil {
 977 | 		// We never managed to exhaust the default maximum record size.
 978 | 		// Assume it's big enough for all practical purposes.
 979 | 		return
 980 | 	}
 981 | 
 982 | 	// Set the buffer size and try again.  There should be no error this
 983 | 	// time.
 984 | 	scr.MaxRecordSize = (len(word) + 1) * numFields
 985 | 	err = testBigRecord(numFields)
 986 | 	if err != nil {
 987 | 		t.Fatal(err)
 988 | 	}
 989 | }
 990 | 
 991 | // TestRunPipeline1 tests that RunPipeline can implement a pipeline of a single
 992 | // operation.
 993 | func TestRunPipeline1(t *testing.T) {
 994 | 	// Define a script that repeats the first word of each line
 995 | 	rep := NewScript()
 996 | 	rep.AppendStmt(nil, func(s *Script) {
 997 | 		s.Println(s.F(1), s.F(1))
 998 | 	})
 999 | 
1000 | 	// Pipe inputs into the pipeline we're about to run and from the
1001 | 	// pipeline into a memory buffer.
1002 | 	pr, pw := io.Pipe()
1003 | 	rep.Output = bytes.NewBuffer(make([]byte, 0, 10000))
1004 | 
1005 | 	// Write numbers into the pipe in the background.
1006 | 	go func() {
1007 | 		for i := 1; i <= 100; i++ {
1008 | 			fmt.Fprintf(pw, "%3d\n", i)
1009 | 		}
1010 | 		pw.Close()
1011 | 	}()
1012 | 
1013 | 	// Execute a pipeline in the foreground.
1014 | 	err := RunPipeline(pr, rep)
1015 | 	if err != nil {
1016 | 		t.Fatal(err)
1017 | 	}
1018 | 
1019 | 	// Ensure we received the expected output.
1020 | 	exp := bytes.NewBuffer(make([]byte, 0, 10000))
1021 | 	for i := 1; i <= 100; i++ {
1022 | 		fmt.Fprintf(exp, "%d %d\n", i, i)
1023 | 	}
1024 | 	got := rep.Output.(*bytes.Buffer).String()
1025 | 	if exp.String() != got {
1026 | 		t.Fatalf("Incorrect output %q", got)
1027 | 	}
1028 | }
1029 | 
1030 | // TestRunPipeline2 tests that RunPipeline can implement a pipeline of two
1031 | // operations.
1032 | func TestRunPipeline2(t *testing.T) {
1033 | 	// Define a script that repeats the first word of each line
1034 | 	rep := NewScript()
1035 | 	rep.AppendStmt(nil, func(s *Script) {
1036 | 		s.Println(s.F(1), s.F(1))
1037 | 	})
1038 | 
1039 | 	// Define a script that replaces the second word of each line
1040 | 	// with twice its value.
1041 | 	dbl := NewScript()
1042 | 	dbl.AppendStmt(nil, func(s *Script) {
1043 | 		s.Println(s.F(1), s.F(2).Int()*2)
1044 | 	})
1045 | 
1046 | 	// Pipe inputs into the pipeline we're about to run and from the
1047 | 	// pipeline into a memory buffer.
1048 | 	pr, pw := io.Pipe()
1049 | 	dbl.Output = bytes.NewBuffer(make([]byte, 0, 10000))
1050 | 
1051 | 	// Write numbers into the pipe in the background.
1052 | 	go func() {
1053 | 		for i := 1; i <= 100; i++ {
1054 | 			fmt.Fprintf(pw, "%3d\n", i)
1055 | 		}
1056 | 		pw.Close()
1057 | 	}()
1058 | 
1059 | 	// Execute a pipeline in the foreground.
1060 | 	err := RunPipeline(pr, rep, dbl)
1061 | 	if err != nil {
1062 | 		t.Fatal(err)
1063 | 	}
1064 | 
1065 | 	// Ensure we received the expected output.
1066 | 	exp := bytes.NewBuffer(make([]byte, 0, 10000))
1067 | 	for i := 1; i <= 100; i++ {
1068 | 		fmt.Fprintf(exp, "%d %d\n", i, i*2)
1069 | 	}
1070 | 	got := dbl.Output.(*bytes.Buffer).String()
1071 | 	if exp.String() != got {
1072 | 		t.Fatalf("Incorrect output %q", got)
1073 | 	}
1074 | }
1075 | 
1076 | // TestRunPipeline5 tests that RunPipeline can implement a pipeline of five
1077 | // operations.
1078 | func TestRunPipeline5(t *testing.T) {
1079 | 	// Define a script that repeats the first word of each line
1080 | 	rep := NewScript()
1081 | 	rep.AppendStmt(nil, func(s *Script) {
1082 | 		s.Println(s.F(1), s.F(1))
1083 | 	})
1084 | 
1085 | 	// Define a script that replaces the second number in a line with
1086 | 	// "fizz" if the first number is a multiple of 3.
1087 | 	fizz := NewScript()
1088 | 	fizz.AppendStmt(nil, func(s *Script) {
1089 | 		if s.F(1).Int()%3 == 0 {
1090 | 			s.Println(s.F(1), "fizz")
1091 | 		} else {
1092 | 			s.Println()
1093 | 		}
1094 | 	})
1095 | 
1096 | 	// Define a script that replaces the second number in a line with
1097 | 	// "buzz" if the first number is a multiple of 5.
1098 | 	buzz := NewScript()
1099 | 	buzz.AppendStmt(nil, func(s *Script) {
1100 | 		if s.F(1).Int()%5 == 0 {
1101 | 			s.Println(s.F(1), "buzz")
1102 | 		} else {
1103 | 			s.Println()
1104 | 		}
1105 | 	})
1106 | 
1107 | 	// Define a script that replaces the second number in a line with
1108 | 	// "fizzbuzz" if the first number is a multiple of 15.
1109 | 	fizzbuzz := NewScript()
1110 | 	fizzbuzz.AppendStmt(nil, func(s *Script) {
1111 | 		if s.F(1).Int()%15 == 0 {
1112 | 			s.Println(s.F(1), "fizzbuzz")
1113 | 		} else {
1114 | 			s.Println()
1115 | 		}
1116 | 	})
1117 | 
1118 | 	// Define a script that outputs only the second field.
1119 | 	strip := NewScript()
1120 | 	strip.AppendStmt(nil, func(s *Script) {
1121 | 		s.Println(s.F(2))
1122 | 	})
1123 | 
1124 | 	// Pipe inputs into the pipeline we're about to run and from the
1125 | 	// pipeline into a memory buffer.
1126 | 	pr, pw := io.Pipe()
1127 | 	strip.Output = bytes.NewBuffer(make([]byte, 0, 10000))
1128 | 
1129 | 	// Write numbers into the pipe in the background.
1130 | 	go func() {
1131 | 		for i := 1; i <= 100; i++ {
1132 | 			fmt.Fprintf(pw, "%3d\n", i)
1133 | 		}
1134 | 		pw.Close()
1135 | 	}()
1136 | 
1137 | 	// Execute a pipeline in the foreground.
1138 | 	err := RunPipeline(pr, rep, fizz, buzz, fizzbuzz, strip)
1139 | 	if err != nil {
1140 | 		t.Fatal(err)
1141 | 	}
1142 | 
1143 | 	// Ensure we received the expected output.
1144 | 	exp := bytes.NewBuffer(make([]byte, 0, 10000))
1145 | 	for i := 1; i <= 100; i++ {
1146 | 		switch {
1147 | 		case i%15 == 0:
1148 | 			fmt.Fprintln(exp, "fizzbuzz")
1149 | 		case i%5 == 0:
1150 | 			fmt.Fprintln(exp, "buzz")
1151 | 		case i%3 == 0:
1152 | 			fmt.Fprintln(exp, "fizz")
1153 | 		default:
1154 | 			fmt.Fprintf(exp, "%d\n", i)
1155 | 		}
1156 | 	}
1157 | 	got := strip.Output.(*bytes.Buffer).String()
1158 | 	if exp.String() != got {
1159 | 		t.Fatalf("Incorrect output %q", got)
1160 | 	}
1161 | }
1162 | 


--------------------------------------------------------------------------------
/value.go:
--------------------------------------------------------------------------------
  1 | // This file defines an AWK-like data type, Value, that can easily be converted
  2 | // to different Go data types.
  3 | 
  4 | package awk
  5 | 
  6 | import (
  7 | 	"fmt"
  8 | 	"regexp"
  9 | 	"strconv"
 10 | 	"strings"
 11 | )
 12 | 
 13 | const convFmt = "%.6g"
 14 | 
 15 | // A Value represents an immutable datum that can be converted to an int,
 16 | // float64, or string in best-effort fashion (i.e., never returning an error).
 17 | type Value struct {
 18 | 	ival int     // Value converted to an int
 19 | 	fval float64 // Value converted to a float64
 20 | 	sval string  // Value converted to a string
 21 | 
 22 | 	ivalOk bool // true: ival is valid; false: invalid
 23 | 	fvalOk bool // true: fval is valid; false: invalid
 24 | 	svalOk bool // true: sval is valid; false: invalid
 25 | 
 26 | 	script *Script // Pointer to the script that produced this value
 27 | }
 28 | 
 29 | // NewValue creates a Value from an arbitrary Go data type.  Data types that do
 30 | // not map straightforwardly to one of {int, float64, string} are represented
 31 | // by a zero value.
 32 | func (s *Script) NewValue(v interface{}) *Value {
 33 | 	val := &Value{}
 34 | 	switch v := v.(type) {
 35 | 	case uint:
 36 | 		val.ival = int(v)
 37 | 		val.ivalOk = true
 38 | 	case uint8:
 39 | 		val.ival = int(v)
 40 | 		val.ivalOk = true
 41 | 	case uint16:
 42 | 		val.ival = int(v)
 43 | 		val.ivalOk = true
 44 | 	case uint32:
 45 | 		val.ival = int(v)
 46 | 		val.ivalOk = true
 47 | 	case uint64:
 48 | 		val.ival = int(v)
 49 | 		val.ivalOk = true
 50 | 	case uintptr:
 51 | 		val.ival = int(v)
 52 | 		val.ivalOk = true
 53 | 
 54 | 	case int:
 55 | 		val.ival = int(v)
 56 | 		val.ivalOk = true
 57 | 	case int8:
 58 | 		val.ival = int(v)
 59 | 		val.ivalOk = true
 60 | 	case int16:
 61 | 		val.ival = int(v)
 62 | 		val.ivalOk = true
 63 | 	case int32:
 64 | 		val.ival = int(v)
 65 | 		val.ivalOk = true
 66 | 	case int64:
 67 | 		val.ival = int(v)
 68 | 		val.ivalOk = true
 69 | 
 70 | 	case bool:
 71 | 		if v {
 72 | 			val.ival = 1
 73 | 		}
 74 | 		val.ivalOk = true
 75 | 
 76 | 	case float32:
 77 | 		val.fval = float64(v)
 78 | 		val.fvalOk = true
 79 | 	case float64:
 80 | 		val.fval = float64(v)
 81 | 		val.fvalOk = true
 82 | 
 83 | 	case complex64:
 84 | 		val.fval = float64(real(v))
 85 | 		val.fvalOk = true
 86 | 	case complex128:
 87 | 		val.fval = float64(real(v))
 88 | 		val.fvalOk = true
 89 | 
 90 | 	case string:
 91 | 		val.sval = v
 92 | 		val.svalOk = true
 93 | 
 94 | 	case *Value:
 95 | 		*val = *v
 96 | 
 97 | 	default:
 98 | 		val.svalOk = true
 99 | 	}
100 | 	val.script = s
101 | 	return val
102 | }
103 | 
104 | // matchInt matches a base-ten integer.
105 | var matchInt = regexp.MustCompile(`^\s*([-+]?\d+)`)
106 | 
107 | // Int converts a Value to an int.
108 | func (v *Value) Int() int {
109 | 	switch {
110 | 	case v.ivalOk:
111 | 	case v.fvalOk:
112 | 		v.ival = int(v.fval)
113 | 		v.ivalOk = true
114 | 	case v.svalOk:
115 | 		// Perform a best-effort conversion from string to int.
116 | 		strs := matchInt.FindStringSubmatch(v.sval)
117 | 		var i64 int64
118 | 		if len(strs) >= 2 {
119 | 			i64, _ = strconv.ParseInt(strs[1], 10, 0)
120 | 		}
121 | 		v.ival = int(i64)
122 | 		v.ivalOk = true
123 | 	}
124 | 	return v.ival
125 | }
126 | 
127 | // matchFloat matches a base-ten floating-point number.
128 | var matchFloat = regexp.MustCompile(`^\s*([-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[Ee][-+]?\d+)?)`)
129 | 
130 | // Float64 converts a Value to a float64.
131 | func (v *Value) Float64() float64 {
132 | 	switch {
133 | 	case v.fvalOk:
134 | 	case v.ivalOk:
135 | 		v.fval = float64(v.ival)
136 | 		v.fvalOk = true
137 | 	case v.svalOk:
138 | 		// Perform a best-effort conversion from string to float64.
139 | 		v.fval = 0.0
140 | 		strs := matchFloat.FindStringSubmatch(v.sval)
141 | 		if len(strs) >= 2 {
142 | 			v.fval, _ = strconv.ParseFloat(strs[1], 64)
143 | 		}
144 | 		v.fvalOk = true
145 | 	}
146 | 	return v.fval
147 | }
148 | 
149 | // String converts a Value to a string.
150 | func (v *Value) String() string {
151 | 	switch {
152 | 	case v.svalOk:
153 | 	case v.ivalOk:
154 | 		v.sval = strconv.FormatInt(int64(v.ival), 10)
155 | 		v.svalOk = true
156 | 	case v.fvalOk:
157 | 		v.sval = fmt.Sprintf(v.script.ConvFmt, v.fval)
158 | 		v.svalOk = true
159 | 	}
160 | 	return v.sval
161 | }
162 | 
163 | // Match says whether a given regular expression, provided as a string, matches
164 | // the Value.  If the associated script set IgnoreCase(true), the match is
165 | // tested in a case-insensitive manner.
166 | func (v *Value) Match(expr string) bool {
167 | 	// Compile the regular expression.
168 | 	re, err := v.script.compileRegexp(expr)
169 | 	if err != nil {
170 | 		return false // Fail silently
171 | 	}
172 | 
173 | 	// Return true if the expression matches the value, interpreted as a
174 | 	// string.
175 | 	loc := re.FindStringIndex(v.String())
176 | 	if loc == nil {
177 | 		v.script.RStart = 0
178 | 		v.script.RLength = -1
179 | 		return false
180 | 	}
181 | 	v.script.RStart = loc[0] + 1
182 | 	v.script.RLength = loc[1] - loc[0]
183 | 	return true
184 | }
185 | 
186 | // StrEqual says whether a Value, treated as a string, has the same contents as
187 | // a given Value, which can be provided either as a Value or as any type that
188 | // can be converted to a Value.  If the associated script called
189 | // IgnoreCase(true), the comparison is performed in a case-insensitive manner.
190 | func (v *Value) StrEqual(v2 interface{}) bool {
191 | 	switch v2 := v2.(type) {
192 | 	case *Value:
193 | 		if v.script.ignCase {
194 | 			return strings.EqualFold(v.String(), v2.String())
195 | 		}
196 | 		return v.String() == v2.String()
197 | 	case string:
198 | 		if v.script.ignCase {
199 | 			return strings.EqualFold(v.String(), v2)
200 | 		}
201 | 		return v.String() == v2
202 | 	default:
203 | 		v2Val := v.script.NewValue(v2)
204 | 		if v.script.ignCase {
205 | 			return strings.EqualFold(v.String(), v2Val.String())
206 | 		}
207 | 		return v.String() == v2Val.String()
208 | 	}
209 | }
210 | 


--------------------------------------------------------------------------------
/value_test.go:
--------------------------------------------------------------------------------
  1 | // This file tests conversions from each data type to every other data type.
  2 | 
  3 | package awk
  4 | 
  5 | import (
  6 | 	"math"
  7 | 	"testing"
  8 | )
  9 | 
 10 | // TestIntToInt converts various ints to Values then back to ints.
 11 | func TestIntToInt(t *testing.T) {
 12 | 	scr := NewScript()
 13 | 	for _, n := range []int{0, -123, 123, -456, 456, math.MaxInt32, math.MinInt32, 123} {
 14 | 		v := scr.NewValue(n)
 15 | 		i := v.Int()
 16 | 		if i != n {
 17 | 			t.Fatalf("Expected %d but received %d", n, i)
 18 | 		}
 19 | 	}
 20 | }
 21 | 
 22 | // TestIntToInt converts various ints to Values then to float64s.
 23 | func TestIntToFloat64(t *testing.T) {
 24 | 	scr := NewScript()
 25 | 	for _, n := range []int{0, -123, 123, -456, 456, math.MaxInt32, math.MinInt32, 123} {
 26 | 		v := scr.NewValue(n)
 27 | 		f := v.Float64()
 28 | 		if f != float64(n) {
 29 | 			t.Fatalf("Expected %.4g but received %.4g", float64(n), f)
 30 | 		}
 31 | 	}
 32 | }
 33 | 
 34 | // TestIntToString converts various ints to Values then to strings.
 35 | func TestIntToString(t *testing.T) {
 36 | 	scr := NewScript()
 37 | 	in := []int{0, -123, 123, -456, 456, math.MaxInt32, math.MinInt32, 123}
 38 | 	out := []string{"0", "-123", "123", "-456", "456", "2147483647", "-2147483648", "123"}
 39 | 	for idx, n := range in {
 40 | 		v := scr.NewValue(n)
 41 | 		s := v.String()
 42 | 		if s != out[idx] {
 43 | 			t.Fatalf("Expected %q but received %q", out[idx], s)
 44 | 		}
 45 | 	}
 46 | }
 47 | 
 48 | // TestFloat64ToInt converts various float64s to Values then to ints.
 49 | func TestFloat64ToInt(t *testing.T) {
 50 | 	scr := NewScript()
 51 | 	in := []float64{0.0, -123.0, 123.0, -456.7, 456.7, 123.0, -456.4, 456.4}
 52 | 	out := []int{0, -123, 123, -456, 456, 123, -456, 456}
 53 | 	for idx, n := range in {
 54 | 		v := scr.NewValue(n)
 55 | 		i := v.Int()
 56 | 		if i != out[idx] {
 57 | 			t.Fatalf("Expected %d but received %d", out[idx], i)
 58 | 		}
 59 | 	}
 60 | }
 61 | 
 62 | // TestFloat64ToFloat64 converts various float64s to Values then back to
 63 | // float64s.
 64 | func TestFloat64ToFloat64(t *testing.T) {
 65 | 	scr := NewScript()
 66 | 	for _, n := range []float64{0.0, -123.0, 123.0, -456.7, 456.7, math.MaxFloat64, -math.MaxFloat64, 123.0, -456.4, 456.4} {
 67 | 		v := scr.NewValue(n)
 68 | 		f := v.Float64()
 69 | 		if f != n {
 70 | 			t.Fatalf("Expected %.4g but received %.4g", n, f)
 71 | 		}
 72 | 	}
 73 | }
 74 | 
 75 | // TestFloat64ToString converts various float64s to Values then to strings.
 76 | func TestFloat64ToString(t *testing.T) {
 77 | 	scr := NewScript()
 78 | 	in := []float64{0.0, -123.0, 123.0, -456.7, 456.7, math.MaxFloat64, -math.MaxFloat64, 123.0, -456.4, 456.4}
 79 | 	out := []string{"0", "-123", "123", "-456.7", "456.7", "1.79769e+308", "-1.79769e+308", "123", "-456.4", "456.4"}
 80 | 	for idx, n := range in {
 81 | 		v := scr.NewValue(n)
 82 | 		s := v.String()
 83 | 		if s != out[idx] {
 84 | 			t.Fatalf("Expected %q but received %q", out[idx], s)
 85 | 		}
 86 | 	}
 87 | }
 88 | 
 89 | // TestStringToInt converts various strings to Values then to ints.
 90 | func TestStringToInt(t *testing.T) {
 91 | 	scr := NewScript()
 92 | 	in := []string{"0", "-123", "123", "-456", "456", "9223372036854775807", "-9223372036854775808", "123", "Text999", "321_go", "  789  ", "0x111", "0222"}
 93 | 	out := []int{0, -123, 123, -456, 456, 9223372036854775807, -9223372036854775808, 123, 0, 321, 789, 0, 222}
 94 | 	for idx, n := range in {
 95 | 		v := scr.NewValue(n)
 96 | 		i := v.Int()
 97 | 		if i != out[idx] {
 98 | 			t.Fatalf("Expected %d for %q but received %d", out[idx], n, i)
 99 | 		}
100 | 	}
101 | }
102 | 
103 | // TestStringToFloat64 converts various strings to Values then to float64s.
104 | func TestStringToFloat64(t *testing.T) {
105 | 	scr := NewScript()
106 | 	in := []string{"0", "-123", "123", "-456.7", "456.7", "17.9769e+307", "-17.9769e+307", "123", "-456.4", "456.4", "Text99.99", "99.99e+1000", "  111.111  ", "0x222", "0333", "321_go", "1.2e3e4"}
107 | 	out := []float64{0, -123, 123, -456.7, 456.7, 1.79769e+308, -1.79769e+308, 123, -456.4, 456.4, 0, math.Inf(1), 111.111, 0.0, 333.0, 321.0, 1.2e3}
108 | 	for idx, n := range in {
109 | 		v := scr.NewValue(n)
110 | 		f := v.Float64()
111 | 		if f != out[idx] {
112 | 			t.Fatalf("Expected %.4g for %q but received %.4g", out[idx], n, f)
113 | 		}
114 | 	}
115 | }
116 | 
117 | // TestStringToString converts various strings to Values then back to strings.
118 | func TestStringToString(t *testing.T) {
119 | 	scr := NewScript()
120 | 	for _, n := range []string{"0", "-123", "123", "-456.7", "456.7", "17.9769e+307", "-17.9769e+307", "123", "-456.4", "456.4", "Text99.99", "99.99e+1000"} {
121 | 		v := scr.NewValue(n)
122 | 		s := v.String()
123 | 		if s != n {
124 | 			t.Fatalf("Expected %q but received %q", n, s)
125 | 		}
126 | 	}
127 | }
128 | 
129 | // TestMatch tests if regular-expression matching works.
130 | func TestMatch(t *testing.T) {
131 | 	// We run the test twice to confirm that regexp caching works.
132 | 	scr := NewScript()
133 | 	v := scr.NewValue("Mississippi")
134 | 	in := []string{"p*", "[is]+", "Miss", "hippie", "ippi"}
135 | 	out := []bool{true, true, true, false, true}
136 | 	for range [2]struct{}{} {
137 | 		for idx, n := range in {
138 | 			m := v.Match(n)
139 | 			if m != out[idx] {
140 | 				t.Fatalf("Expected %v but received %v\n", out[idx], m)
141 | 			}
142 | 		}
143 | 	}
144 | 
145 | 	// Test if RStart and RLength are maintained properly.
146 | 	if !v.Match("[is]+") {
147 | 		t.Fatalf("Failed to match %v against %q", v, "[is]+")
148 | 	}
149 | 	if scr.RStart != 2 || scr.RLength != 7 {
150 | 		t.Fatalf("Expected {2, 7} but received {%d, %d}", scr.RStart, scr.RLength)
151 | 	}
152 | 	if v.Match("[xy]+") {
153 | 		t.Fatalf("Incorrectly matched %v against %q", v, "[xy]+")
154 | 	}
155 | 	if scr.RStart != 0 || scr.RLength != -1 {
156 | 		t.Fatalf("Expected {0, -1} but received {%d, %d}", scr.RStart, scr.RLength)
157 | 	}
158 | }
159 | 
160 | // TestStrEqual tests if string comparisons work.
161 | func TestStrEqual(t *testing.T) {
162 | 	// Test case-sensitive comparisons.
163 | 	scr := NewScript()
164 | 	v := scr.NewValue("good")
165 | 	for _, bad := range []string{"bad", "goody", "Good", "good "} {
166 | 		if v.StrEqual(scr.NewValue(bad)) {
167 | 			t.Fatalf("Incorrectly matched %q = %q", "good", bad)
168 | 		}
169 | 	}
170 | 	if !v.StrEqual(scr.NewValue("good")) {
171 | 		t.Fatalf("Failed to match %q", "good")
172 | 	}
173 | 
174 | 	// Test case-insensitive comparisons.
175 | 	scr.IgnoreCase(true)
176 | 	for _, bad := range []string{"bad", "goody", "good "} {
177 | 		if v.StrEqual(scr.NewValue(bad)) {
178 | 			t.Fatalf("Incorrectly matched %q = %q", "good", bad)
179 | 		}
180 | 	}
181 | 	if !v.StrEqual(scr.NewValue("good")) {
182 | 		t.Fatalf("Failed to match %q", "good")
183 | 	}
184 | 	if !v.StrEqual(scr.NewValue("GooD")) {
185 | 		t.Fatalf("Failed to match %q = %q", "good", "GooD")
186 | 	}
187 | }
188 | 


--------------------------------------------------------------------------------