├── .gitignore
├── gg.pdf
├── testdata
    ├── source.cpio
    ├── source.zip
    └── source.tar
├── test.go
├── main_test.go
├── LICENSE
├── utils
    └── compareOutputs.sh
├── README.md
├── multi_reader.go
├── multi_reader_test.go
├── gg.1
├── main.go
├── scan_test.go
└── scan.go


/.gitignore:
--------------------------------------------------------------------------------
1 | log
2 | 


--------------------------------------------------------------------------------
/gg.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelTJones/gg/HEAD/gg.pdf


--------------------------------------------------------------------------------
/testdata/source.cpio:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelTJones/gg/HEAD/testdata/source.cpio


--------------------------------------------------------------------------------
/testdata/source.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelTJones/gg/HEAD/testdata/source.zip


--------------------------------------------------------------------------------
/test.go:
--------------------------------------------------------------------------------
 1 | // 1: gg -summary -n -g '(?s:.)' test.go
 2 | // 2: gg -summary -n aV '(?s:.)' test.go
 3 | 
 4 | // 4
 5 | // 5
 6 | 
 7 | /*
 8 | 8
 9 | 9
10 | */
11 | 
12 | package main // 12
13 | 
14 | func unused() { // 14
15 | 	_ = `line 15...
16 | ...and line16`
17 | } /* 17*/
18 | 
19 | // 19
20 | 


--------------------------------------------------------------------------------
/main_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"runtime"
 6 | 	"testing"
 7 | )
 8 | 
 9 | func Test_getMaxCPU(t *testing.T) {
10 | 	actualProcs := runtime.NumCPU()
11 | 	tests := []struct {
12 | 		name    string
13 | 		rcvdVal int
14 | 		want1   int
15 | 	}{
16 | 		{
17 | 			name:    "0 should use all CPUs",
18 | 			rcvdVal: 0,
19 | 			want1:   actualProcs,
20 | 		},
21 | 
22 | 		{
23 | 			name:    "negative number should use all but x CPUs",
24 | 			rcvdVal: -2,
25 | 			want1:   actualProcs - 2,
26 | 		},
27 | 
28 | 		{
29 | 			name:    "should use at least 1 CPU",
30 | 			rcvdVal: -1 * (actualProcs + 2),
31 | 			want1:   2,
32 | 		},
33 | 
34 | 		{
35 | 			name:    "should use 2 workers even if only 1 CPU requested",
36 | 			rcvdVal: 1,
37 | 			want1:   2,
38 | 		},
39 | 	}
40 | 
41 | 	for _, tt := range tests {
42 | 		t.Run(tt.name, func(t *testing.T) {
43 | 			*flagCPUs = tt.rcvdVal
44 | 			got1 := getMaxCPU()
45 | 
46 | 			if !reflect.DeepEqual(got1, tt.want1) {
47 | 				t.Errorf("getMaxCPU got1 = %v, want1: %v", got1, tt.want1)
48 | 			}
49 | 		})
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Michael T Jones
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/utils/compareOutputs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | go build -o gg *.go
 4 | 
 5 | options="a aC aD aI aK aN aO aP aR aS aV"
 6 | queries="grep for test 2 true -42 5.25 -5.25"
 7 | sources=". testdata/*"
 8 | 
 9 | err=false
10 | for o in $options; do
11 |     for q in $queries; do
12 |         for s in $sources; do
13 |             ./gg -cpu=1 -summary=false -log=a $o $q $s > ./new
14 |             gg -cpu=1 -summary=false -log=b $o $q $s > ./old
15 |             CHANGES=$(diff ./new ./old | wc -l)
16 | 
17 |             if [ $CHANGES -eq 0 ]; then
18 |                 rm -rf ./new ./old ./a ./b
19 |             else
20 |                 echo "Outputs don't match for 'gg $o $q $s'"
21 |                 diff ./new ./old
22 |                 err=true
23 |                 break
24 |             fi
25 |             if [ $err = true ]; then
26 |                 break
27 |             fi
28 |         done
29 |         if [ $err = true ]; then
30 |             break
31 |         fi
32 |     done
33 |     if [ $err = true ]; then
34 |         break
35 |     fi
36 | done
37 | 
38 | if [ $err = false ]; then
39 |     echo "Nice, everything is still working!"
40 | fi
41 | rm -rf ./gg ./a ./b
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # gg is grep for Go-language source code
 3 | 
 4 | It restricts the search to
 5 | designated Go token classes, such as identifiers, package names, numbers, comments, keywords, and
 6 | the rest. Because gg understands what it is searching for, it can make smart matches. For
 7 | example:
 8 | 
 9 | * Searching for numbers by _value_ rather than regular expression: find 255
10 | expressed as 0b1111_1111, 0377, 255, or 0xff with "gg v 255 *.go". Note: this is a value
11 | ("v") search
12 | as opposed to a number ("n") search. Values must be valid  Go integer or floating point
13 | literals (22, 0xface, 6.02214076e23, 0o644).
14 | 
15 | * Searching for "if" in Go keywords, but not in comments or strings, is "gg k if ." for _keywords_ matching "if" in all the ".go" files in the current directory.
16 | 
17 | * Searching a file hierarchy recursively for _comments_ containing "case" (ignoring
18 |   switch statements), is "gg -r c case ."
19 | 
20 | * gg has a grep mode, "-g" which omits the Go grammar tokenization. This mode is generally
21 | twice as fast as standard gg, and even faster compared to classic grep. Related is "-go=false" to allow scanning of non-Go files.
22 | 
23 | ## Documentation
24 | 
25 | gg does much more. Please see the [man
26 | page](https://github.com/MichaelTJones/gg/blob/master/gg.pdf) for details.
27 | 
28 | ## Installation
29 | 
30 | ```go
31 | go get github.com/MichaelTJones/gg
32 | cd gg
33 | go install
34 | ```
35 | 


--------------------------------------------------------------------------------
/multi_reader.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"archive/tar"
  5 | 	"archive/zip"
  6 | 	"errors"
  7 | 	"io"
  8 | 
  9 | 	"github.com/cavaliercoder/go-cpio"
 10 | )
 11 | 
 12 | // these are the allowed extensions in the multiReader
 13 | const (
 14 | 	eCPIO = iota
 15 | 	eTAR
 16 | 	eZIP
 17 | )
 18 | 
 19 | // multiReader is a struct to allow us to treat all files
 20 | // the same way. It implements the ReadNexter interface.
 21 | // Every multiReader can have a single implementation
 22 | // inside, a zip multiReader cannot be used to read tar files.
 23 | type multiReader struct {
 24 | 	ext   int
 25 | 	rCPIO *cpio.Reader
 26 | 	rTAR  *tar.Reader
 27 | 
 28 | 	rZIP      *zip.ReadCloser
 29 | 	zipReader io.Reader
 30 | 	// zipIndex needs to start the value -1, otherwise
 31 | 	// our logic to determine wich file we are reading
 32 | 	// will not work
 33 | 	zipIndex int
 34 | }
 35 | 
 36 | func (r *multiReader) Read(p []byte) (int, error) {
 37 | 	switch r.ext {
 38 | 	case eCPIO:
 39 | 		return r.rCPIO.Read(p)
 40 | 	case eTAR:
 41 | 		return r.rTAR.Read(p)
 42 | 	case eZIP:
 43 | 		n, e := r.zipReader.Read(p)
 44 | 		return n, e
 45 | 	}
 46 | 	return 0, errors.New("internal reader not found")
 47 | }
 48 | 
 49 | func (r *multiReader) Next() (string, error) {
 50 | 	switch r.ext {
 51 | 	case eCPIO:
 52 | 		header, err := r.rCPIO.Next()
 53 | 		n := ""
 54 | 		if err == nil {
 55 | 			n = header.Name
 56 | 		}
 57 | 		return n, err
 58 | 	case eTAR:
 59 | 		header, err := r.rTAR.Next()
 60 | 		n := ""
 61 | 		if err == nil {
 62 | 			n = header.Name
 63 | 		}
 64 | 		return n, err
 65 | 	case eZIP:
 66 | 		r.zipIndex++
 67 | 		if r.zipIndex >= len(r.rZIP.Reader.File) {
 68 | 			r.rZIP.Close()
 69 | 			return "", io.EOF
 70 | 		}
 71 | 
 72 | 		file := r.rZIP.Reader.File[r.zipIndex]
 73 | 		reader, err := file.Open()
 74 | 		if err != nil {
 75 | 			return "", err
 76 | 		}
 77 | 		r.zipReader = reader
 78 | 		f := file.FileHeader.Name
 79 | 
 80 | 		return f, nil
 81 | 	}
 82 | 	return "", errors.New("internal reader not found")
 83 | }
 84 | 
 85 | func newMultiReader(r io.Reader, ext string, name string) *multiReader {
 86 | 	switch ext {
 87 | 	case ".cpio":
 88 | 		final := cpio.NewReader(r)
 89 | 		return &multiReader{ext: eCPIO, rCPIO: final}
 90 | 	case ".tar":
 91 | 		tr := tar.NewReader(r)
 92 | 		return &multiReader{ext: eTAR, rTAR: tr}
 93 | 	case ".zip":
 94 | 		z, err := zip.OpenReader(name)
 95 | 		if err != nil {
 96 | 			println(err)
 97 | 			return &multiReader{}
 98 | 		}
 99 | 		return &multiReader{ext: eZIP, rZIP: z, zipIndex: -1}
100 | 	}
101 | 	return &multiReader{}
102 | }
103 | 


--------------------------------------------------------------------------------
/multi_reader_test.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"io"
  6 | 	"reflect"
  7 | 	"testing"
  8 | )
  9 | 
 10 | func sameMultiReader(a, b *multiReader) bool {
 11 | 	if a.ext != b.ext {
 12 | 		return false
 13 | 	}
 14 | 	if a.zipIndex != b.zipIndex {
 15 | 		return false
 16 | 	}
 17 | 	return true
 18 | }
 19 | 
 20 | func Test_newMultiReader(t *testing.T) {
 21 | 	type args struct {
 22 | 		r    io.Reader
 23 | 		ext  string
 24 | 		name string
 25 | 	}
 26 | 	tests := []struct {
 27 | 		name string
 28 | 		args func(t *testing.T) args
 29 | 
 30 | 		want1 *multiReader
 31 | 	}{
 32 | 		{
 33 | 			name: "wrong extension should yeld empty multiReader",
 34 | 			args: func(*testing.T) args {
 35 | 				var r *bytes.Buffer
 36 | 				return args{
 37 | 					r:    r,
 38 | 					ext:  "asd",
 39 | 					name: "",
 40 | 				}
 41 | 			},
 42 | 			want1: &multiReader{},
 43 | 		},
 44 | 
 45 | 		{
 46 | 			name: "cpio extension should create a cpio multiReader",
 47 | 			args: func(*testing.T) args {
 48 | 				var r *bytes.Buffer
 49 | 				return args{
 50 | 					r:    r,
 51 | 					ext:  ".cpio",
 52 | 					name: "",
 53 | 				}
 54 | 			},
 55 | 			want1: &multiReader{ext: eCPIO},
 56 | 		},
 57 | 
 58 | 		{
 59 | 			name: "tar extension should create a tar multiReader",
 60 | 			args: func(*testing.T) args {
 61 | 				var r *bytes.Buffer
 62 | 				return args{
 63 | 					r:    r,
 64 | 					ext:  ".tar",
 65 | 					name: "",
 66 | 				}
 67 | 			},
 68 | 			want1: &multiReader{ext: eTAR},
 69 | 		},
 70 | 
 71 | 		{
 72 | 			name: "zip extension should create a zip multiReader",
 73 | 			args: func(*testing.T) args {
 74 | 				var r *bytes.Buffer
 75 | 				return args{
 76 | 					r:    r,
 77 | 					ext:  ".zip",
 78 | 					name: "testdata/source.zip",
 79 | 				}
 80 | 			},
 81 | 			want1: &multiReader{ext: eZIP, zipIndex: -1},
 82 | 		},
 83 | 
 84 | 		{
 85 | 			name: "zip should return empty mutiReader if file doesn't exists",
 86 | 			args: func(*testing.T) args {
 87 | 				var r *bytes.Buffer
 88 | 				return args{
 89 | 					r:    r,
 90 | 					ext:  ".zip",
 91 | 					name: "invalid.zip",
 92 | 				}
 93 | 			},
 94 | 			want1: &multiReader{},
 95 | 		},
 96 | 	}
 97 | 
 98 | 	for _, tt := range tests {
 99 | 		t.Run(tt.name, func(t *testing.T) {
100 | 			tArgs := tt.args(t)
101 | 
102 | 			got1 := newMultiReader(tArgs.r, tArgs.ext, tArgs.name)
103 | 
104 | 			if !sameMultiReader(got1, tt.want1) {
105 | 				t.Errorf("newMultiReader got1 = %v, want1: %v", got1, tt.want1)
106 | 			}
107 | 		})
108 | 	}
109 | }
110 | 
111 | func Test_multiReader_Next(t *testing.T) {
112 | 	zipMR := newMultiReader(&bytes.Buffer{}, ".zip", "testdata/source.zip")
113 | 	tests := []struct {
114 | 		name    string
115 | 		init    func(t *testing.T) *multiReader
116 | 		inspect func(r *multiReader, t *testing.T)
117 | 
118 | 		want1      string
119 | 		wantErr    bool
120 | 		inspectErr func(err error, t *testing.T)
121 | 	}{
122 | 		{
123 | 			name:    "we should find our files in the zip",
124 | 			init:    func(*testing.T) *multiReader { return zipMR },
125 | 			want1:   "main.go",
126 | 			wantErr: false,
127 | 		},
128 | 
129 | 		{
130 | 			name:    "we should find our files in the zip",
131 | 			init:    func(*testing.T) *multiReader { return zipMR },
132 | 			want1:   "main_test.go",
133 | 			wantErr: false,
134 | 		},
135 | 
136 | 		{
137 | 			name:    "we should find our files in the zip",
138 | 			init:    func(*testing.T) *multiReader { return zipMR },
139 | 			want1:   "scan.go",
140 | 			wantErr: false,
141 | 		},
142 | 
143 | 		{
144 | 			name:    "we should find our files in the zip",
145 | 			init:    func(*testing.T) *multiReader { return zipMR },
146 | 			want1:   "scan_test.go",
147 | 			wantErr: false,
148 | 		},
149 | 
150 | 		{
151 | 			name:    "at the end we should get an io.EOF",
152 | 			init:    func(*testing.T) *multiReader { return zipMR },
153 | 			want1:   "",
154 | 			wantErr: true,
155 | 			inspectErr: func(err error, t *testing.T) {
156 | 				if err != io.EOF {
157 | 					t.Errorf("expected io.EOF err, got: %v", err)
158 | 				}
159 | 			},
160 | 		},
161 | 	}
162 | 
163 | 	for _, tt := range tests {
164 | 		t.Run(tt.name, func(t *testing.T) {
165 | 			receiver := tt.init(t)
166 | 			got1, err := receiver.Next()
167 | 
168 | 			if tt.inspect != nil {
169 | 				tt.inspect(receiver, t)
170 | 			}
171 | 
172 | 			if !reflect.DeepEqual(got1, tt.want1) {
173 | 				t.Errorf("multiReader.Next got1 = %v, want1: %v", got1, tt.want1)
174 | 			}
175 | 
176 | 			if (err != nil) != tt.wantErr {
177 | 				t.Fatalf("multiReader.Next error = %v, wantErr: %t", err, tt.wantErr)
178 | 			}
179 | 
180 | 			if tt.inspectErr != nil {
181 | 				tt.inspectErr(err, t)
182 | 			}
183 | 		})
184 | 	}
185 | }
186 | 


--------------------------------------------------------------------------------
/gg.1:
--------------------------------------------------------------------------------
  1 | \# gg manpage
  2 | .do xflag 3
  3 | .minss 9
  4 | .letadj 95 98 18 105 102
  5 | .padj
  6 | .ft R
  7 | .fp 1 R MinionPro-Regular otf
  8 | .fp 2 C MetaOT-Book otf
  9 | .fzoom 2 0.88
 10 | \# nroff -man gg.1
 11 | \# groff -t -e -mandoc -Tps gg.1 > gg.ps
 12 | \# troff -t -mandoc gg.1 | dpost > gg.ps
 13 | \# heirloom troff
 14 | \# tbl -g gg.1 | troff -t -mandoc | dpost > gg.ps
 15 | \# tbl -g gg.1 | troff -t -mandoc | dpost | ps2pdf -dCompatibilityLevel=1.3 - - > gg.pdf
 16 | .TH GG 1
 17 | .SH NAME
 18 | gg \- grep Go-language source code
 19 | .SH SYNOPSIS
 20 | gg [\fIoptions\fR] \fIacdiknoprstvg\fR \fIregexp\fR [\fIfile ...\fR]
 21 | .SH DESCRIPTION
 22 | gg is classic grep (g/RE/p) with Go token flags to limit the search to
 23 | package names, numbers, identifiers, comments, keywords, and more.
 24 | The token flags are "acdiknoprstvg" in any order or combination:
 25 | .PP
 26 | .RS
 27 | .TS
 28 | c l.
 29 | a	search in All of the following
 30 | c	search in Comments (//... or /*...*/)
 31 | d	search in Defined non-types (iota, nil, new, true, ...)
 32 | i	search in Identifiers ([alphabetic][alphabetic | numeric]*)
 33 | k	search in Keywords (if, for, func, go, ...)
 34 | n	search in Numbers (regex "255" matches 255, 0.255, 1e255)
 35 | o	search in Operators (\|,\|+\|-\|*\|/\|[\|]\|{\|}\|(\|)\|>>\|)
 36 | p	search in Package names
 37 | r	search in Rune literals ('a', '\\U00101234')
 38 | s	search in Strings (quoted or raw)
 39 | t	search in Types (bool, int, float64, map, ...)
 40 | v	search in Values (number 255 == 0b11111111, 0377, 0o377, 255, 0xff)
 41 | g	search as grep, perform line-by-line matches in each file
 42 | .TE
 43 | .RE
 44 | .PP
 45 | gg combines lexical analysis and Go-native pattern matching to extend
 46 | .BR grep (1)
 47 | for Go developers.
 48 | The search is restricted, seeking matches only in chosen token classes.
 49 | A search in number literals finds equal \fIvalues\fR, "v 255" matches the number 255
 50 | in source code as 0b1111_1111, 0377, 0o377, 255, and 0xff.
 51 | Go's linear-time regular expression engine is Unicode-aware and supports
 52 | many Perl extensions: numbers in identifiers are found with
 53 | "\f2gg i [0-9]\f1"
 54 | or
 55 | "\f2gg i [\\d]\f1",
 56 | comments with math symbols by
 57 | "\f2gg c \\p{Sm}\f1",
 58 | and Greek in strings via
 59 | "\f2gg s \\p{Greek}\f1"
 60 | each with appropriate shell escaping.
 61 | .PP
 62 | gg searches files named on the command line or in a file of filenames provided by
 63 | the "-list" argument.
 64 | If neither of these is present, gg reads filenames from the standard input, which is useful
 65 | in shell pipelines such as
 66 | "\f2find . -name '*.go' | gg k fallthrough\f1"
 67 | .PP
 68 | Files are Go source code files or directories.
 69 | Source files include typical ".go"
 70 | files; compressed ".go" files named ".go.bz2", ".go.gz", or ".go.zst" for Bzip2, Gzip,
 71 | and ZStandard compression formats; archives of any such files in the formats "a.cpio",
 72 | "a.tar", or "a.zip"; or, finally, compressed archives as in "a.cpio.bz2" and "a.tar.gz".
 73 | If a named file is a directory then Go source files in that directory are scanned
 74 | without visiting subdirectories.
 75 | With the "-r" flag enabled, named directories are processed recursively, scanning
 76 | each Go source file or archive in that directory's hierarchy.
 77 | .SH OPTIONS
 78 | .TP
 79 | .BR \-cpu =\fIn\fR
 80 | Set the number of CPUs to use. Negative n means "all but n."
 81 | Default is all.
 82 | .TP
 83 | .BR \-go =\fIbool\fR
 84 | Limit search to ".go" files.
 85 | Default is true.
 86 | .TP
 87 | .BR \-h =\fIbool\fR
 88 | Display file names ("headers") on matches.
 89 | Default is false for single-file searches and true otherwise.
 90 | .TP
 91 | .BR \-list =\fIfile\fR
 92 | Search files listed one per line in the named file.
 93 | .TP
 94 | .BR \-log =\fIfile\fR
 95 | Write a log of execution details to a named file.
 96 | The special file names "[stdout]" and "[stderr]" refer to the stdout and stderr streams.
 97 | (Last line of log details efficiency.)
 98 | .TP
 99 | .BR \-n =\fIbool\fR
100 | Display line numbers following each match. Numbers count from one per file.
101 | Default is false.
102 | .TP
103 | .BR \-output =\fIfile\fR
104 | gg output is normally to stdout but may be directed to a named file.
105 | The special names "[stdout]" and "[stderr]" refer to the stdout and stderr streams.
106 | .TP
107 | .BR \-r =\fIbool\fR
108 | Search directories recursively.
109 | Default is false.
110 | .TP
111 | .BR \-visible =\fIbool\fR
112 | Restrict search to visible files, those with names that do not start with "." (in the shell tradition).
113 | Default is true.
114 | .TP
115 | .BR \fIacdiknoprstvCDIKNOPRSTVg\fR
116 | The Go token class flags have an upper case negative form to disable the indicated class.
117 | Used with "a" for "all", "aCS" means "search All tokens except Comments and Strings."
118 | Flag "g" means bypass Go lexical analysis and search files as the
119 | .BR grep (1)
120 | command, matching whole lines.
121 | .SH EXAMPLES
122 | To search for comments containing "case" (ignoring switch statements) in every
123 | ".go" file in the current working directory, use the command:
124 | .PP
125 | .nf
126 | .RS
127 | \f2gg c case .\f1
128 | .RE
129 | .fi
130 | .PP
131 | To find number literals containing the digits 42 (or any regexp) in ".go" files located anywhere in the current
132 | directory's hierarchy, use the command:
133 | .PP
134 | .nf
135 | .RS
136 | \f2gg -r n 42 .\f1
137 | .RE
138 | .fi
139 | .PP
140 | Find numbers with values equal to 255 in any Go form (0b1111_1111, 0377,
141 | 0o377, 255, 0x00ff) in ".go" files in the gzipped
142 | .BR tar (1)
143 | archive omega with the command:
144 | .PP
145 | .nf
146 | .RS
147 | \f2gg v 255 omega.tar.gz\f1
148 | .RE
149 | .fi
150 | .SH AUTHOR
151 | Michael T. Jones (https://github.com/MichaelTJones)
152 | .SH SEE ALSO
153 | .nf
154 | https://golang.org/pkg/regexp/syntax/
155 | https://en.wikipedia.org/wiki/Unicode_character_property
156 | Further advanced features and options are described in main.go
157 | .fi
158 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |    gg is classic grep (g/RE/p) with Go knowledge to search package names,
  3 |    numbers, identifiers, comments, keywords, and other language tokens.
  4 | */
  5 | 
  6 | package main
  7 | 
  8 | import (
  9 | 	"flag"
 10 | 	"fmt"
 11 | 	"log"
 12 | 	"os"
 13 | 	"runtime"
 14 | 	"runtime/pprof"
 15 | 	"strings"
 16 | 	"time"
 17 | )
 18 | 
 19 | // common flags
 20 | var flagCPUs = flag.Int("cpu", -1, "number of CPUs to use (0 for all)")
 21 | var flagGo = flag.Bool("go", true, `limit grep to Go files ("main.go")`)
 22 | var flagList = flag.String("list", "", "list of filenames to grep")
 23 | var flagLog = flag.String("log", "", `write log to named file (or "[stdout]" or "[stderr]")`)
 24 | var flagOutput = flag.String("output", "", `write output to named file (or "[stdout]" or "[stderr]")`)
 25 | var flagRecursive = flag.Bool("r", false, "grep directories recursively")
 26 | var flagVisible = flag.Bool("visible", true, `limit grep to visible files (skip ".hidden.go")`)
 27 | 
 28 | // grep-compatibility flags
 29 | var flagActLikeGrep = flag.Bool("g", false, "act like grep")
 30 | var flagFileName = flag.Bool("h", false, `disply file name ("header") for each match`)
 31 | var flagLineNumber = flag.Bool("n", false, "disply line number for each match")
 32 | 
 33 | // secret developer flags
 34 | var flagBufferSize = flag.Int("bufferSize", 64*1024, "output buffer size")
 35 | var flagBufferWrites = flag.Bool("bufferWrites", true, "buffer output writes")
 36 | var flagDigits = flag.Bool("digits", true, "format numbers into digit groups")
 37 | var flagMap = flag.Bool("map", true, "use mmap for plain files")
 38 | var flagProfileCPU = flag.String("cpuprofile", "", "write cpu profile to file")
 39 | var flagProfileMem = flag.String("memprofile", "", "write memory profile to file")
 40 | var flagSummary = flag.Bool("summary", false, "print performance summary")
 41 | var flagUnordered = flag.Bool("unordered", true, "disregard file traversal order")
 42 | 
 43 | // var flagTrim = flag.Bool("trim", false, "trim matched strings")
 44 | 
 45 | // usage string is the whole man page
 46 | var usage = `NAME
 47 |     gg - grep Go-language source code
 48 | 
 49 | SYNOPSIS
 50 |     gg [options] acdiknoprstvg regexp [file ...]
 51 | 
 52 | DESCRIPTION
 53 |     gg is classic grep (g/RE/p) with flag-directed Go token focus to search
 54 |     in package names, numbers, identifiers, comments, keywords, and more.
 55 |     Token flags are "acdiknoprstvg" in any order or combination:
 56 | 
 57 |        a   search in All of the following
 58 |        c   search in Comments (//... or /*...*/)
 59 |        d   search in Defined non-types (iota, nil, new, true,...)
 60 |        i   search in Identifiers ([alphabetic][alphabetic | numeric]*)
 61 |        k   search in Keywords (if, for, func, go, ...)
 62 |        n   search in Numbers ("255" matches 255, 0.255, 1e255)
 63 |        o   search in Operators (,  +  -  *  /  [  ] {  }  ( )  >>...)
 64 |        p   search in Package names
 65 |        r   search in Rune literals ('a', '\U00101234')
 66 |        s   search in Strings (quoted or raw)
 67 |        t   search in Types (bool, int, float64, map, ...)
 68 |        v   search in Values (255 is 0b11111111, 0377, 255, 0xff)
 69 |        g   search as grep, perform simple line-by-line matches in file
 70 | 
 71 |     gg combines lexical analysis and Go-native pattern matching to extend
 72 |     grep(1) for Go developers.  The search is restricted, seeking matches
 73 |     only in chosen token classes.  A search in number literals can match
 74 |     values, "v 255" matches the numeric value 255 in source code as
 75 |     0b1111_1111, 0377, 0o377, 255, 0xff, etc.  Go's linear-time regular
 76 |     expression engine is Unicode-aware and supports many Perl extensions:
 77 |     numbers in identifiers are found with "gg i [0-9]" or "gg i [\d]",
 78 |     comments with math symbols by "gg c \p{Sm}", and Greek in strings via
 79 |     "gg s \p{Greek}" each with appropriate shell escaping.
 80 | 
 81 |     gg searches files names listed on the command line or in a file of
 82 |     filenames provided the "-list" argument.  If neither of these is
 83 |     present, gg reads file names from the standard input which is useful in
 84 |     shell pipelines such as "find . -name "*.go" | gg k fallthrough"
 85 | 
 86 |     Files are Go source code files or directories.  Source files include
 87 |     typical ".go" files; compressed ".go" files named ".go.bz2", ".go.gz",
 88 |     or ".go.zst" for Bzip2, Gzip, and ZStandard compression formats;
 89 |     archives of any such files in the formats "a.cpio", "a.tar", or
 90 |     "a.zip"; or, finally, compressed archives as in "a.cpio.bz2" and
 91 |     "a.tar.gz".  If a named file is a directory then all Go source files
 92 |     in that directory are scanned without visiting subdirectories.  With
 93 |     the "-r" flag enabled, named directories are processed recursively,
 94 |     scanning each Go source file or archive in that directory's hierarchy.
 95 | 
 96 | OPTIONS
 97 |     -cpu=n
 98 |         Set the number of CPUs to use. Negative n means "all but n."
 99 |         Default is all.
100 | 
101 |     -go=bool
102 |         Limit search to ".go" files.  Default is true.
103 | 
104 |     -h=bool
105 |         Display file names ("headers") on matches.  Default is false for
106 |         single-file searches and true otherwise.
107 | 
108 |     -list=file
109 |         Search files listed one per line in the named file.
110 | 
111 |     -log=file
112 |         Write a log of execution details to a named file.  The special
113 |         file names "[stdout]" and "[stderr]" refer to the stdout and
114 |         stderr streams.  (Last line of log details efficiency.)
115 | 
116 |     -n=bool
117 |         Display line numbers following each match. Numbers count from
118 |         one per file.  Default is false.
119 | 
120 |     -output=file
121 |         gg output is normally to stdout but may be directed to a named
122 |         file.  The special names "[stdout]" and "[stderr]" refer to the
123 |         stdout and stderr streams.
124 | 
125 |     -r=bool
126 |         Search directories recursively.  Default is false.
127 | 
128 |     -visible=bool
129 |         Restrict search to visible files, those with names that do not
130 |         start with "." (in the shell tradition).  Default is true.
131 | 
132 |     acdiknoprstvCDIKNOPRSTVg
133 |         The Go token class flags have an upper case negative form to
134 |         disable the indicated class.  Used with "a" for "all", "aCS"
135 |         means "search All tokens except Comments and Strings."  Flag "g"
136 |         means search as if the grep command, ignore Go lexical analysis
137 |         and match lines.
138 | 
139 | EXAMPLES
140 |     To search for comments containing "case" (ignoring switch statements)
141 |     in every ".go" file in the current working directory, use the command:
142 | 
143 |         gg c case .
144 | 
145 |     To find number literals containing the digits 42 in ".go" files located
146 |     anywhere in the current directory's hierarchy, use the command:
147 | 
148 |         gg -r n 42 .
149 | 
150 |     Find numbers with values of 255 (0b1111_1111, 0377, 0o377, 255, 0xff)
151 |     in ".go" files in the gzipped tar(1) archive omega with the command:
152 | 
153 |         gg v 255 omega.tar.gz
154 | 
155 | AUTHOR
156 |     Michael T. Jones (https://github.com/MichaelTJones)
157 | 
158 | SEE ALSO
159 |     https://golang.org/pkg/regexp/syntax/
160 |     https://github.com/google/re2/wiki/Syntax
161 |     https://en.wikipedia.org/wiki/Unicode_character_property
162 | `
163 | 
164 | func main() {
165 | 	// parse command line to allow access to profiling options in doProfile()
166 | 	flag.Usage = func() {
167 | 		fmt.Fprintf(flag.CommandLine.Output(), "\n%s", usage)
168 | 	}
169 | 	flag.Parse()
170 | 
171 | 	// launch program
172 | 	programStatus := doProfile()
173 | 
174 | 	// return program status to shell
175 | 	os.Exit(programStatus)
176 | }
177 | 
178 | // profile the program's execution
179 | func doProfile() int {
180 | 	if *flagProfileCPU != "" {
181 | 		f, err := os.Create(*flagProfileCPU)
182 | 		if err != nil {
183 | 			fmt.Fprintf(os.Stderr, "could not create CPU profile: %v\n", err)
184 | 			return 2 // grep-compatible code for program error
185 | 		}
186 | 		defer func() {
187 | 			f.Close()
188 | 			fmt.Fprintf(os.Stderr, "cpu profile recorded in %s\n", *flagProfileCPU)
189 | 		}()
190 | 		if err := pprof.StartCPUProfile(f); err != nil {
191 | 			fmt.Fprintf(os.Stderr, "could not start CPU profile: %v\n", err)
192 | 			return 2 // grep-compatible code for program error
193 | 		}
194 | 		defer pprof.StopCPUProfile()
195 | 	}
196 | 
197 | 	// execute the program
198 | 	programStatus := doMain()
199 | 
200 | 	if *flagProfileMem != "" {
201 | 		f, err := os.Create(*flagProfileMem)
202 | 		if err != nil {
203 | 			fmt.Fprintf(os.Stderr, "could not create memory profile: %v\n", err)
204 | 			return 2 // grep-compatible code for program error
205 | 		}
206 | 		defer f.Close()
207 | 		defer func() {
208 | 			f.Close()
209 | 			fmt.Fprintf(os.Stderr, "memory profile recorded in %s\n", *flagProfileMem)
210 | 		}()
211 | 		runtime.GC() // get up-to-date statistics
212 | 		if err := pprof.WriteHeapProfile(f); err != nil {
213 | 			fmt.Fprintf(os.Stderr, "could not write memory profile: %v\n", err)
214 | 			return 2 // grep-compatible code for program error
215 | 		}
216 | 	}
217 | 
218 | 	// trigger completion of profiling and return status
219 | 	return programStatus
220 | }
221 | 
222 | func doMain() int {
223 | 	// set logging format and destination before first log event
224 | 	log.SetFlags(log.LstdFlags | log.Lmicroseconds)
225 | 	switch strings.ToLower(*flagLog) {
226 | 	case "":
227 | 		// no logging
228 | 	case "[stdout]":
229 | 		log.SetOutput(os.Stdout)
230 | 	case "[stderr]":
231 | 		log.SetOutput(os.Stderr)
232 | 	default:
233 | 		file, err := os.Create(*flagLog)
234 | 		if err != nil {
235 | 			log.Print(err)
236 | 			return 2
237 | 		}
238 | 		log.SetOutput(file)
239 | 	}
240 | 
241 | 	// control concurrency (for testing and tuning)
242 | 	*flagCPUs = getMaxCPU()
243 | 
244 | 	// bonus feature:
245 | 	// If you make a symbolic link to the executable or otherwise rename it from "gg" then it
246 | 	// will automatically run in "be like grep" mode without needing the "g" or any other flag.
247 | 	// if !strings.HasSuffix(os.Args[0], "gg") {
248 | 	// 	*flagActLikeGrep = true // if user's made a symlink or renamed, become grep
249 | 	// }
250 | 
251 | 	if flag.NArg() < 1 {
252 | 		fmt.Fprintf(os.Stderr, "usage: gg [flags] acdiknoprstvg regexp [file ...]\n")
253 | 		fmt.Fprintf(os.Stderr, "    gg -help for details\n")
254 | 		return 2 // failure: (like grep: return 2 instead of 1)
255 | 	}
256 | 
257 | 	if *flagRecursive {
258 | 		*flagFileName = true
259 | 	}
260 | 
261 | 	// perform actual work
262 | 	start := time.Now()
263 | 	s, err := doScan()
264 | 	elapsed := time.Since(start).Seconds()
265 | 	user, system, _ := getResourceUsage()
266 | 
267 | 	// print performance summary
268 | 	if *flagLog != "" {
269 | 		s.print(elapsed, user, system, printf) // print to log
270 | 	}
271 | 	if *flagSummary {
272 | 		s.print(elapsed, user, system, func(f string, v ...interface{}) {
273 | 			_, _ = fmt.Printf(f, v...) // print to stdout
274 | 		})
275 | 	}
276 | 
277 | 	// return grep-compatible program status
278 | 	programStatus := 0
279 | 	switch {
280 | 	case err != nil:
281 | 		printf("error: %v", err)
282 | 		programStatus = 2 // program failure: (like grep)
283 | 	case s.matches <= 0:
284 | 		programStatus = 1 // search unsuccessful: no match; handy in shell "&&" constructs
285 | 	default: // err==nil && s.matches>=1
286 | 		programStatus = 0 // search successful: 1 or more matches
287 | 	}
288 | 	return programStatus
289 | }
290 | 
291 | func getMaxCPU() int {
292 | 	// honor cpu option flag...
293 | 	cpus := runtime.NumCPU() // default is all CPUs
294 | 	switch {
295 | 	case *flagCPUs > 0:
296 | 		cpus = *flagCPUs // claim N CPUs (+2 means "use 2 CPUs")
297 | 	case *flagCPUs < 0:
298 | 		cpus = *flagCPUs + cpus // spare N CPUs (-2 means "use all but 2 CPUs")
299 | 	}
300 | 	// ...but allow at least 2 scan worker goroutines
301 | 	if cpus < 2 {
302 | 		cpus = 2
303 | 	}
304 | 	return cpus
305 | }
306 | 


--------------------------------------------------------------------------------
/scan_test.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"regexp"
  6 | 	"testing"
  7 | )
  8 | 
  9 | func Test_visibleWithFlagSet(t *testing.T) {
 10 | 	*flagVisible = true
 11 | 	type args struct {
 12 | 		name string
 13 | 	}
 14 | 	tests := []struct {
 15 | 		name string
 16 | 		args func(t *testing.T) args
 17 | 
 18 | 		want1 bool
 19 | 	}{
 20 | 		{
 21 | 			name: "hidden file",
 22 | 			args: func(*testing.T) args {
 23 | 				return args{name: ".test"}
 24 | 			},
 25 | 			want1: false,
 26 | 		},
 27 | 
 28 | 		{
 29 | 			name: "normal file in hidden folder should not be visible",
 30 | 			args: func(*testing.T) args {
 31 | 				return args{name: "/home/user/.config/test.go"}
 32 | 			},
 33 | 			want1: false,
 34 | 		},
 35 | 
 36 | 		{
 37 | 			name: "normal file",
 38 | 			args: func(*testing.T) args {
 39 | 				return args{name: "test"}
 40 | 			},
 41 | 			want1: true,
 42 | 		},
 43 | 
 44 | 		{
 45 | 			name: "go source file",
 46 | 			args: func(*testing.T) args {
 47 | 				return args{name: "test.go"}
 48 | 			},
 49 | 			want1: true,
 50 | 		},
 51 | 	}
 52 | 
 53 | 	for _, tt := range tests {
 54 | 		t.Run(tt.name, func(t *testing.T) {
 55 | 			tArgs := tt.args(t)
 56 | 
 57 | 			got1 := isVisible(tArgs.name)
 58 | 
 59 | 			if !reflect.DeepEqual(got1, tt.want1) {
 60 | 				t.Errorf("visible got1 = %v, want1: %v", got1, tt.want1)
 61 | 			}
 62 | 		})
 63 | 	}
 64 | }
 65 | 
 66 | func Test_visibleWithoutFlagSet(t *testing.T) {
 67 | 	// flagVisible = false means that we will show results for hidden files
 68 | 	*flagVisible = false
 69 | 	type args struct {
 70 | 		name string
 71 | 	}
 72 | 	tests := []struct {
 73 | 		name string
 74 | 		args func(t *testing.T) args
 75 | 
 76 | 		want1 bool
 77 | 	}{
 78 | 		{
 79 | 			name: "hidden file",
 80 | 			args: func(*testing.T) args {
 81 | 				return args{name: ".test"}
 82 | 			},
 83 | 			want1: true,
 84 | 		},
 85 | 
 86 | 		{
 87 | 			name: "normal file in hidden folder should be visible",
 88 | 			args: func(*testing.T) args {
 89 | 				return args{name: "/home/user/.config/test.go"}
 90 | 			},
 91 | 			want1: true,
 92 | 		},
 93 | 
 94 | 		{
 95 | 			name: "normal file",
 96 | 			args: func(*testing.T) args {
 97 | 				return args{name: "test"}
 98 | 			},
 99 | 			want1: true,
100 | 		},
101 | 
102 | 		{
103 | 			name: "go source file",
104 | 			args: func(*testing.T) args {
105 | 				return args{name: "test.go"}
106 | 			},
107 | 			want1: true,
108 | 		},
109 | 	}
110 | 
111 | 	for _, tt := range tests {
112 | 		t.Run(tt.name, func(t *testing.T) {
113 | 			tArgs := tt.args(t)
114 | 
115 | 			got1 := isVisible(tArgs.name)
116 | 
117 | 			if !reflect.DeepEqual(got1, tt.want1) {
118 | 				t.Errorf("visible got1 = %v, want1: %v", got1, tt.want1)
119 | 			}
120 | 		})
121 | 	}
122 | }
123 | 
124 | func Test_isCompressed(t *testing.T) {
125 | 	type args struct {
126 | 		name string
127 | 	}
128 | 	tests := []struct {
129 | 		name string
130 | 		args func(t *testing.T) args
131 | 
132 | 		want1 bool
133 | 	}{
134 | 		{
135 | 			name: ".bz2 is a valid compression",
136 | 			args: func(*testing.T) args {
137 | 				return args{name: "test.bz2"}
138 | 			},
139 | 			want1: true,
140 | 		},
141 | 
142 | 		{
143 | 			name: ".gz is a valid compression",
144 | 			args: func(*testing.T) args {
145 | 				return args{name: "test.gz"}
146 | 			},
147 | 			want1: true,
148 | 		},
149 | 
150 | 		{
151 | 			name: ".zst is a valid compression",
152 | 			args: func(*testing.T) args {
153 | 				return args{name: "test.zst"}
154 | 			},
155 | 			want1: true,
156 | 		},
157 | 
158 | 		{
159 | 			name: ".go isn't a valid compression",
160 | 			args: func(*testing.T) args {
161 | 				return args{name: "test.go"}
162 | 			},
163 | 			want1: false,
164 | 		},
165 | 	}
166 | 
167 | 	for _, tt := range tests {
168 | 		t.Run(tt.name, func(t *testing.T) {
169 | 			tArgs := tt.args(t)
170 | 
171 | 			got1 := isCompressed(tArgs.name)
172 | 
173 | 			if !reflect.DeepEqual(got1, tt.want1) {
174 | 				t.Errorf("isCompressed got1 = %v, want1: %v", got1, tt.want1)
175 | 			}
176 | 		})
177 | 	}
178 | }
179 | 
180 | func Test_isGoWithFlagSet(t *testing.T) {
181 | 	*flagGo = true
182 | 	type args struct {
183 | 		name string
184 | 	}
185 | 	tests := []struct {
186 | 		name string
187 | 		args func(t *testing.T) args
188 | 
189 | 		want1 bool
190 | 	}{
191 | 		{
192 | 			name: "go files should pass",
193 | 			args: func(*testing.T) args {
194 | 				return args{name: "test.go"}
195 | 			},
196 | 			want1: true,
197 | 		},
198 | 
199 | 		{
200 | 			name: "zip files should not pass",
201 | 			args: func(*testing.T) args {
202 | 				return args{name: "test.go.zip"}
203 | 			},
204 | 			// is this assertion right ?
205 | 			want1: false,
206 | 		},
207 | 
208 | 		{
209 | 			name: "gz files should pass",
210 | 			args: func(*testing.T) args {
211 | 				return args{name: "test.go.gz"}
212 | 			},
213 | 			want1: true,
214 | 		},
215 | 
216 | 		{
217 | 			name: "bz2 files should pass",
218 | 			args: func(*testing.T) args {
219 | 				return args{name: "test.go.bz2"}
220 | 			},
221 | 			want1: true,
222 | 		},
223 | 
224 | 		{
225 | 			name: "zst files should pass",
226 | 			args: func(*testing.T) args {
227 | 				return args{name: "test.go.zst"}
228 | 			},
229 | 			want1: true,
230 | 		},
231 | 	}
232 | 
233 | 	for _, tt := range tests {
234 | 		t.Run(tt.name, func(t *testing.T) {
235 | 			tArgs := tt.args(t)
236 | 
237 | 			got1 := isGo(tArgs.name)
238 | 
239 | 			if !reflect.DeepEqual(got1, tt.want1) {
240 | 				t.Errorf("isGo got1 = %v, want1: %v", got1, tt.want1)
241 | 			}
242 | 		})
243 | 	}
244 | }
245 | 
246 | func Test_isGoWithoutFlagSet(t *testing.T) {
247 | 	// with this flag set to false our search isn't limited to .go files
248 | 	*flagGo = false
249 | 	type args struct {
250 | 		name string
251 | 	}
252 | 	tests := []struct {
253 | 		name string
254 | 		args func(t *testing.T) args
255 | 
256 | 		want1 bool
257 | 	}{
258 | 		{
259 | 			name: "go files should pass",
260 | 			args: func(*testing.T) args {
261 | 				return args{name: "test.go"}
262 | 			},
263 | 			want1: true,
264 | 		},
265 | 
266 | 		{
267 | 			name: "zipped go files should pass",
268 | 			args: func(*testing.T) args {
269 | 				return args{name: "test.go.zip"}
270 | 			},
271 | 			want1: true,
272 | 		},
273 | 
274 | 		{
275 | 			name: "anything should pass when flagGo = false",
276 | 			args: func(*testing.T) args {
277 | 				return args{name: "test.zip.exe"}
278 | 			},
279 | 			want1: true,
280 | 		},
281 | 	}
282 | 
283 | 	for _, tt := range tests {
284 | 		t.Run(tt.name, func(t *testing.T) {
285 | 			tArgs := tt.args(t)
286 | 
287 | 			got1 := isGo(tArgs.name)
288 | 
289 | 			if !reflect.DeepEqual(got1, tt.want1) {
290 | 				t.Errorf("isGo got1 = %v, want1: %v", got1, tt.want1)
291 | 			}
292 | 		})
293 | 	}
294 | }
295 | 
296 | func Test_isArchive(t *testing.T) {
297 | 	type args struct {
298 | 		name string
299 | 	}
300 | 	tests := []struct {
301 | 		name string
302 | 		args func(t *testing.T) args
303 | 
304 | 		want1 bool
305 | 	}{
306 | 		{
307 | 			name: "tar is a valid archive format",
308 | 			args: func(*testing.T) args {
309 | 				return args{name: "test.tar"}
310 | 			},
311 | 			want1: true,
312 | 		},
313 | 
314 | 		{
315 | 			name: "zip is a valid archive format",
316 | 			args: func(*testing.T) args {
317 | 				return args{name: "test.zip"}
318 | 			},
319 | 			want1: true,
320 | 		},
321 | 
322 | 		{
323 | 			name: "cpio is a valid archive format",
324 | 			args: func(*testing.T) args {
325 | 				return args{name: "test.cpio"}
326 | 			},
327 | 			want1: true,
328 | 		},
329 | 
330 | 		{
331 | 			name: "cpio.bz2 is a valid archive format",
332 | 			args: func(*testing.T) args {
333 | 				return args{name: "test.cpio.bz2"}
334 | 			},
335 | 			want1: true,
336 | 		},
337 | 
338 | 		{
339 | 			name: "cpio.exe isn't a valid archive format",
340 | 			args: func(*testing.T) args {
341 | 				return args{name: "test.cpio.exe"}
342 | 			},
343 | 			want1: false,
344 | 		},
345 | 	}
346 | 
347 | 	for _, tt := range tests {
348 | 		t.Run(tt.name, func(t *testing.T) {
349 | 			tArgs := tt.args(t)
350 | 
351 | 			got1 := isArchive(tArgs.name)
352 | 
353 | 			if !reflect.DeepEqual(got1, tt.want1) {
354 | 				t.Errorf("isArchive got1 = %v, want1: %v", got1, tt.want1)
355 | 			}
356 | 		})
357 | 	}
358 | }
359 | 
360 | func Test_parseFirstArg(t *testing.T) {
361 | 	type args struct {
362 | 		input string
363 | 	}
364 | 	tests := []struct {
365 | 		name string
366 | 		args func(t *testing.T) args
367 | 
368 | 		want1 searchMode
369 | 	}{
370 | 		{
371 | 			name: "'a' should include all",
372 | 			args: func(*testing.T) args {
373 | 				return args{input: "a"}
374 | 			},
375 | 			want1: searchMode{
376 | 				C: true,
377 | 				D: true,
378 | 				I: true,
379 | 				K: true,
380 | 				N: true,
381 | 				O: true,
382 | 				P: true,
383 | 				R: true,
384 | 				S: true,
385 | 				T: true,
386 | 				V: true,
387 | 			},
388 | 		},
389 | 
390 | 		{
391 | 			name: "'c' should include only comments",
392 | 			args: func(*testing.T) args {
393 | 				return args{input: "c"}
394 | 			},
395 | 			want1: searchMode{
396 | 				C: true,
397 | 			},
398 | 		},
399 | 
400 | 		{
401 | 			name: "'aC' should only exclude comments",
402 | 			args: func(*testing.T) args {
403 | 				return args{input: "aC"}
404 | 			},
405 | 			want1: searchMode{
406 | 				C: false,
407 | 				D: true,
408 | 				I: true,
409 | 				K: true,
410 | 				N: true,
411 | 				O: true,
412 | 				P: true,
413 | 				R: true,
414 | 				S: true,
415 | 				T: true,
416 | 				V: true,
417 | 			},
418 | 		},
419 | 
420 | 		{
421 | 			name: "'d' should include only defined non-types",
422 | 			args: func(*testing.T) args {
423 | 				return args{input: "d"}
424 | 			},
425 | 			want1: searchMode{
426 | 				D: true,
427 | 			},
428 | 		},
429 | 
430 | 		{
431 | 			name: "'aD' should only exclude defined non-types",
432 | 			args: func(*testing.T) args {
433 | 				return args{input: "aD"}
434 | 			},
435 | 			want1: searchMode{
436 | 				C: true,
437 | 				D: false,
438 | 				I: true,
439 | 				K: true,
440 | 				N: true,
441 | 				O: true,
442 | 				P: true,
443 | 				R: true,
444 | 				S: true,
445 | 				T: true,
446 | 				V: true,
447 | 			},
448 | 		},
449 | 
450 | 		{
451 | 			name: "'i' should include only identifiers",
452 | 			args: func(*testing.T) args {
453 | 				return args{input: "i"}
454 | 			},
455 | 			want1: searchMode{
456 | 				I: true,
457 | 			},
458 | 		},
459 | 
460 | 		{
461 | 			name: "'aI' should only exclude identifiers",
462 | 			args: func(*testing.T) args {
463 | 				return args{input: "aI"}
464 | 			},
465 | 			want1: searchMode{
466 | 				C: true,
467 | 				D: true,
468 | 				I: false,
469 | 				K: true,
470 | 				N: true,
471 | 				O: true,
472 | 				P: true,
473 | 				R: true,
474 | 				S: true,
475 | 				T: true,
476 | 				V: true,
477 | 			},
478 | 		},
479 | 
480 | 		{
481 | 			name: "'k' should include only keywords",
482 | 			args: func(*testing.T) args {
483 | 				return args{input: "k"}
484 | 			},
485 | 			want1: searchMode{
486 | 				K: true,
487 | 			},
488 | 		},
489 | 
490 | 		{
491 | 			name: "'aK' should only exclude keywords",
492 | 			args: func(*testing.T) args {
493 | 				return args{input: "aK"}
494 | 			},
495 | 			want1: searchMode{
496 | 				C: true,
497 | 				D: true,
498 | 				I: true,
499 | 				K: false,
500 | 				N: true,
501 | 				O: true,
502 | 				P: true,
503 | 				R: true,
504 | 				S: true,
505 | 				T: true,
506 | 				V: true,
507 | 			},
508 | 		},
509 | 
510 | 		{
511 | 			name: "'n' should include only numbers",
512 | 			args: func(*testing.T) args {
513 | 				return args{input: "n"}
514 | 			},
515 | 			want1: searchMode{
516 | 				N: true,
517 | 			},
518 | 		},
519 | 
520 | 		{
521 | 			name: "'aN' should only exclude numbers",
522 | 			args: func(*testing.T) args {
523 | 				return args{input: "aN"}
524 | 			},
525 | 			want1: searchMode{
526 | 				C: true,
527 | 				D: true,
528 | 				I: true,
529 | 				K: true,
530 | 				N: false,
531 | 				O: true,
532 | 				P: true,
533 | 				R: true,
534 | 				S: true,
535 | 				T: true,
536 | 				V: true,
537 | 			},
538 | 		},
539 | 
540 | 		{
541 | 			name: "'o' should include only operators",
542 | 			args: func(*testing.T) args {
543 | 				return args{input: "o"}
544 | 			},
545 | 			want1: searchMode{
546 | 				O: true,
547 | 			},
548 | 		},
549 | 
550 | 		{
551 | 			name: "'aO' should only exclude operators",
552 | 			args: func(*testing.T) args {
553 | 				return args{input: "aO"}
554 | 			},
555 | 			want1: searchMode{
556 | 				C: true,
557 | 				D: true,
558 | 				I: true,
559 | 				K: true,
560 | 				N: true,
561 | 				O: false,
562 | 				P: true,
563 | 				R: true,
564 | 				S: true,
565 | 				T: true,
566 | 				V: true,
567 | 			},
568 | 		},
569 | 
570 | 		{
571 | 			name: "'p' should include only package names",
572 | 			args: func(*testing.T) args {
573 | 				return args{input: "p"}
574 | 			},
575 | 			want1: searchMode{
576 | 				P: true,
577 | 			},
578 | 		},
579 | 
580 | 		{
581 | 			name: "'aP' should only exclude package names",
582 | 			args: func(*testing.T) args {
583 | 				return args{input: "aP"}
584 | 			},
585 | 			want1: searchMode{
586 | 				C: true,
587 | 				D: true,
588 | 				I: true,
589 | 				K: true,
590 | 				N: true,
591 | 				O: true,
592 | 				P: false,
593 | 				R: true,
594 | 				S: true,
595 | 				T: true,
596 | 				V: true,
597 | 			},
598 | 		},
599 | 
600 | 		{
601 | 			name: "'r' should include only rune literals",
602 | 			args: func(*testing.T) args {
603 | 				return args{input: "r"}
604 | 			},
605 | 			want1: searchMode{
606 | 				R: true,
607 | 			},
608 | 		},
609 | 
610 | 		{
611 | 			name: "'aR' should only exclude rune literals",
612 | 			args: func(*testing.T) args {
613 | 				return args{input: "aR"}
614 | 			},
615 | 			want1: searchMode{
616 | 				C: true,
617 | 				D: true,
618 | 				I: true,
619 | 				K: true,
620 | 				N: true,
621 | 				O: true,
622 | 				P: true,
623 | 				R: false,
624 | 				S: true,
625 | 				T: true,
626 | 				V: true,
627 | 			},
628 | 		},
629 | 
630 | 		{
631 | 			name: "'s' should include only strings",
632 | 			args: func(*testing.T) args {
633 | 				return args{input: "s"}
634 | 			},
635 | 			want1: searchMode{
636 | 				S: true,
637 | 			},
638 | 		},
639 | 
640 | 		{
641 | 			name: "'aS' should only exclude strings",
642 | 			args: func(*testing.T) args {
643 | 				return args{input: "aS"}
644 | 			},
645 | 			want1: searchMode{
646 | 				C: true,
647 | 				D: true,
648 | 				I: true,
649 | 				K: true,
650 | 				N: true,
651 | 				O: true,
652 | 				P: true,
653 | 				R: true,
654 | 				S: false,
655 | 				T: true,
656 | 				V: true,
657 | 			},
658 | 		},
659 | 
660 | 		{
661 | 			name: "'t' should include only types",
662 | 			args: func(*testing.T) args {
663 | 				return args{input: "t"}
664 | 			},
665 | 			want1: searchMode{
666 | 				T: true,
667 | 			},
668 | 		},
669 | 
670 | 		{
671 | 			name: "'aT' should only exclude types",
672 | 			args: func(*testing.T) args {
673 | 				return args{input: "aT"}
674 | 			},
675 | 			want1: searchMode{
676 | 				C: true,
677 | 				D: true,
678 | 				I: true,
679 | 				K: true,
680 | 				N: true,
681 | 				O: true,
682 | 				P: true,
683 | 				R: true,
684 | 				S: true,
685 | 				T: false,
686 | 				V: true,
687 | 			},
688 | 		},
689 | 
690 | 		{
691 | 			name: "'v' should include only numeric values",
692 | 			args: func(*testing.T) args {
693 | 				return args{input: "v"}
694 | 			},
695 | 			want1: searchMode{
696 | 				V: true,
697 | 			},
698 | 		},
699 | 
700 | 		{
701 | 			name: "'aV' should only exclude numeric values",
702 | 			args: func(*testing.T) args {
703 | 				return args{input: "aV"}
704 | 			},
705 | 			want1: searchMode{
706 | 				C: true,
707 | 				D: true,
708 | 				I: true,
709 | 				K: true,
710 | 				N: true,
711 | 				O: true,
712 | 				P: true,
713 | 				R: true,
714 | 				S: true,
715 | 				T: true,
716 | 				V: false,
717 | 			},
718 | 		},
719 | 
720 | 		{
721 | 			name: "'g' should be grep mode",
722 | 			args: func(*testing.T) args {
723 | 				return args{input: "g"}
724 | 			},
725 | 			want1: searchMode{
726 | 				G: true,
727 | 			},
728 | 		},
729 | 	}
730 | 
731 | 	for _, tt := range tests {
732 | 		t.Run(tt.name, func(t *testing.T) {
733 | 			tArgs := tt.args(t)
734 | 
735 | 			got1 := parseFirstArg(tArgs.input)
736 | 
737 | 			if !reflect.DeepEqual(got1, tt.want1) {
738 | 				t.Errorf("parseFirstArg got1 = %v, want1: %v", got1, tt.want1)
739 | 			}
740 | 		})
741 | 	}
742 | }
743 | 
744 | func Test_setupModeGG(t *testing.T) {
745 | 	type args struct {
746 | 		args []string
747 | 	}
748 | 	tests := []struct {
749 | 		name string
750 | 		args func(t *testing.T) args
751 | 
752 | 		want1 searchMode
753 | 	}{
754 | 		{
755 | 			name: "empty args should not set anything",
756 | 			args: func(*testing.T) args {
757 | 				return args{args: []string{}}
758 | 			},
759 | 			want1: searchMode{},
760 | 		},
761 | 
762 | 		{
763 | 			name: "empty args should not set anything",
764 | 			args: func(*testing.T) args {
765 | 				return args{args: []string{""}}
766 | 			},
767 | 			want1: searchMode{},
768 | 		},
769 | 
770 | 		{
771 | 			name: "value matcher should work for ints",
772 | 			args: func(*testing.T) args {
773 | 				return args{args: []string{"v", "11"}}
774 | 			},
775 | 			want1: searchMode{V: true, vInt: 11, vIsInt: true},
776 | 		},
777 | 
778 | 		{
779 | 			name: "value matcher should work for negative ints",
780 | 			args: func(*testing.T) args {
781 | 				return args{args: []string{"v", "-42"}}
782 | 			},
783 | 			want1: searchMode{V: true, vInt: 42, vIsInt: true},
784 | 		},
785 | 
786 | 		{
787 | 			name: "value matcher should work for floats",
788 | 			args: func(*testing.T) args {
789 | 				return args{args: []string{"v", "8.93"}}
790 | 			},
791 | 			want1: searchMode{V: true, vFloat: 8.93, vIsInt: false},
792 | 		},
793 | 
794 | 		{
795 | 			name: "value matcher should work for negative floats",
796 | 			args: func(*testing.T) args {
797 | 				return args{args: []string{"v", "-8.93"}}
798 | 			},
799 | 			want1: searchMode{V: true, vFloat: 8.93, vIsInt: false},
800 | 		},
801 | 
802 | 		{
803 | 			name: "value matcher should not work for random strings",
804 | 			args: func(*testing.T) args {
805 | 				return args{args: []string{"v", "asdf"}}
806 | 			},
807 | 			want1: searchMode{},
808 | 		},
809 | 	}
810 | 
811 | 	for _, tt := range tests {
812 | 		t.Run(tt.name, func(t *testing.T) {
813 | 			tArgs := tt.args(t)
814 | 
815 | 			got1 := setupModeGG(tArgs.args)
816 | 
817 | 			if !reflect.DeepEqual(got1, tt.want1) {
818 | 				t.Errorf("setupModeGG got1 = %v, want1: %v", got1, tt.want1)
819 | 			}
820 | 		})
821 | 	}
822 | }
823 | 
824 | func Test_getRegexp(t *testing.T) {
825 | 	re, _ := regexp.Compile("[0-9]test?")
826 | 	reErr, _ := regexp.Compile("*")
827 | 	type args struct {
828 | 		input string
829 | 	}
830 | 	tests := []struct {
831 | 		name string
832 | 		args func(t *testing.T) args
833 | 
834 | 		want1      *regexp.Regexp
835 | 		wantErr    bool
836 | 		inspectErr func(err error, t *testing.T)
837 | 	}{
838 | 		{
839 | 			name: "valid regexp should work",
840 | 			args: func(*testing.T) args {
841 | 				return args{input: "[0-9]test?"}
842 | 			},
843 | 			want1:   re,
844 | 			wantErr: false,
845 | 			inspectErr: func(error, *testing.T) {
846 | 			},
847 | 		},
848 | 
849 | 		{
850 | 			name: "invalid regexp should not work",
851 | 			args: func(*testing.T) args {
852 | 				return args{input: "*"}
853 | 			},
854 | 			want1:   reErr,
855 | 			wantErr: true,
856 | 			inspectErr: func(error, *testing.T) {
857 | 			},
858 | 		},
859 | 	}
860 | 
861 | 	for _, tt := range tests {
862 | 		t.Run(tt.name, func(t *testing.T) {
863 | 			tArgs := tt.args(t)
864 | 
865 | 			got1, err := getRegexp(tArgs.input)
866 | 
867 | 			if !reflect.DeepEqual(got1, tt.want1) {
868 | 				t.Errorf("getRegexp got1 = %v, want1: %v", got1, tt.want1)
869 | 			}
870 | 
871 | 			if (err != nil) != tt.wantErr {
872 | 				t.Fatalf("getRegexp error = %v, wantErr: %t", err, tt.wantErr)
873 | 			}
874 | 
875 | 			if tt.inspectErr != nil {
876 | 				tt.inspectErr(err, t)
877 | 			}
878 | 		})
879 | 	}
880 | }
881 | 


--------------------------------------------------------------------------------
/scan.go:
--------------------------------------------------------------------------------
   1 | package main
   2 | 
   3 | import (
   4 | 	"bufio"
   5 | 	"bytes"
   6 | 	"compress/bzip2"
   7 | 	"compress/gzip"
   8 | 	"errors"
   9 | 	"flag"
  10 | 	"fmt"
  11 | 	"io"
  12 | 	"io/ioutil"
  13 | 	"log"
  14 | 	"os"
  15 | 	"path/filepath"
  16 | 	"regexp"
  17 | 	"strconv"
  18 | 	"strings"
  19 | 	"syscall"
  20 | 
  21 | 	"launchpad.net/gommap"
  22 | 
  23 | 	"github.com/MichaelTJones/lex"
  24 | 	// "github.com/MichaelTJones/walk"
  25 | 	"github.com/klauspost/compress/zstd"
  26 | 	// "github.com/mirtchovski/walk"
  27 | )
  28 | 
  29 | /*
  30 | Go-Grep: scan any number of Go source code files, where scanning means passing each
  31 | through Go-language lexical analysis and reporting lines where selected classes of
  32 | tokens match a search pattern defined by a reguar expression.
  33 | */
  34 | 
  35 | // token class inclusion
  36 | // a: search all of the following
  37 | // c: search Comments ("//..." or "/*...*/")
  38 | // d: search Defined non-types (iota, nil, new, true,...)
  39 | // i: search Identifiers ([a-zA-Z][a-zA-Z0-9]*)
  40 | // k: search Keywords (if, for, func, go, ...)
  41 | // n: search Numbers as strings (255 as 255, 0.255, 1e255)
  42 | // o: search Operators (,+-*/[]{}()>>...)
  43 | // p: search Package names
  44 | // r: search Rune literals ('a', '\U00101234')
  45 | // s: search Strings ("quoted" or `raw`)
  46 | // t: search Types (bool, int, float64, map, ...)
  47 | // v: search numeric Values (255 as 0b1111_1111, 0377, 255, 0xff)
  48 | var G, C, D, I, K, N, O, P, R, S, T, V bool
  49 | 
  50 | var dispatch = []*bool{nil, nil, &C, &I, &K, &O, &R, nil, &S, &T, &D, &N, nil}
  51 | 
  52 | // matching
  53 | var regex *regexp.Regexp // pattern
  54 | 
  55 | // warning: do not use negative numbers in value matches. the code here is fine and ready,
  56 | // bbut the lexer does not (can not) decide when a "-" is a prefix negative sign vs when
  57 | // it is a subtraction operator, That's the job of the parser. we can add a mini-parser
  58 | // for this, but for now, just don't enter negative values on ghe command line.
  59 | var sign int       // literal sign
  60 | var vIsInt bool    // is number literal an int or floating point
  61 | var vInt uint64    // literal value
  62 | var vFloat float64 // literal value
  63 | 
  64 | type Scan struct {
  65 | 	regex    *regexp.Regexp
  66 | 	path     []byte
  67 | 	Summary  // accumulator: bytes, tokens, lines, matches
  68 | 	complete bool
  69 | 	total    Summary
  70 | 	report   []byte
  71 | }
  72 | 
  73 | func NewScan() *Scan {
  74 | 	return &Scan{}
  75 | }
  76 | 
  77 | func doScan() (Summary, error) {
  78 | 	s := NewScan()
  79 | 	fixedArgs := 2
  80 | 	if *flagActLikeGrep {
  81 | 		fixedArgs = 1
  82 | 	}
  83 | 
  84 | 	if flag.NArg() < fixedArgs {
  85 | 		return Summary{}, errors.New("not enough arguments: missing keywords and pattern")
  86 | 	}
  87 | 
  88 | 	// initialize regular expression matcher
  89 | 	var err error
  90 | 	regex, err = getRegexp(flag.Arg(fixedArgs - 1))
  91 | 	if err != nil {
  92 | 		return Summary{}, err
  93 | 	}
  94 | 
  95 | 	// gg mode
  96 | 	mode := setupModeGG(flag.Args())
  97 | 	C = mode.C
  98 | 	D = mode.D
  99 | 	G = mode.G
 100 | 	I = mode.I
 101 | 	K = mode.K
 102 | 	N = mode.N
 103 | 	O = mode.O
 104 | 	P = mode.P
 105 | 	R = mode.R
 106 | 	S = mode.S
 107 | 	T = mode.T
 108 | 	V = mode.V
 109 | 	vIsInt = mode.vIsInt
 110 | 	vInt = mode.vInt
 111 | 	vFloat = mode.vFloat
 112 | 
 113 | 	println("scan begins")
 114 | 	scanned := false
 115 | 
 116 | 	// scan files in the file of filenames indicated by the "-list" option.
 117 | 	if *flagList != "" {
 118 | 		println("processing files listed in the -list option")
 119 | 		*flagFileName = true // presume multiple files...print names
 120 | 		s.List(*flagList)
 121 | 		scanned = true
 122 | 	}
 123 | 
 124 | 	// scan files named on command line.
 125 | 	if flag.NArg() > fixedArgs {
 126 | 		println("processing files listed on command line")
 127 | 		if flag.NArg() > fixedArgs+1 {
 128 | 			*flagFileName = true // multiple files...print names
 129 | 		}
 130 | 		for _, v := range flag.Args()[fixedArgs:] {
 131 | 			s.File(v)
 132 | 		}
 133 | 		scanned = true
 134 | 	}
 135 | 
 136 | 	// scan files named in standard input if nothing scanned yet.
 137 | 	if !scanned {
 138 | 		println("processing files listed in standard input")
 139 | 		*flagFileName = true // multiple files...print names
 140 | 		scanner := bufio.NewScanner(os.Stdin)
 141 | 		for scanner.Scan() {
 142 | 			s.File(scanner.Text())
 143 | 		}
 144 | 	}
 145 | 	summary := s.Complete() // parallel rendevousz here...waits for completion
 146 | 	println("scan ends")
 147 | 	return summary, nil
 148 | }
 149 | 
 150 | func isVisible(name string) bool {
 151 | 	if *flagVisible {
 152 | 		for _, s := range strings.Split(name, string(os.PathSeparator)) {
 153 | 			if s != "" && s != "." && s != ".." && s[0] == '.' {
 154 | 				return false
 155 | 			}
 156 | 		}
 157 | 	}
 158 | 	return true
 159 | }
 160 | 
 161 | func isGo(name string) bool {
 162 | 	if !*flagGo {
 163 | 		return true
 164 | 	}
 165 | 	if isCompressed(name) {
 166 | 		ext := filepath.Ext(name)
 167 | 		name = strings.TrimSuffix(name, ext) // unwrap the compression suffix
 168 | 	}
 169 | 	return filepath.Ext(name) == ".go"
 170 | }
 171 | 
 172 | func isArchive(name string) bool {
 173 | 	if isCompressed(name) {
 174 | 		ext := filepath.Ext(name)
 175 | 		name = strings.TrimSuffix(name, ext) // unwrap the compression suffix
 176 | 	}
 177 | 	ext := filepath.Ext(name)
 178 | 	return ext == ".cpio" || ext == ".tar" || ext == ".zip"
 179 | }
 180 | 
 181 | func isBinary(source []byte) bool {
 182 | 	const byteLimit = 2 * 1024
 183 | 	const nonPrintLimit = 8 + 1 // one Unicode byte order mark is forgiven
 184 | 	nonPrint := 0
 185 | 	for i, c := range source {
 186 | 		if i > byteLimit {
 187 | 			break
 188 | 		}
 189 | 		if c < 32 && c != ' ' && c != '\n' && c != '\t' {
 190 | 			nonPrint++
 191 | 		}
 192 | 		if nonPrint > nonPrintLimit {
 193 | 			return true
 194 | 		}
 195 | 	}
 196 | 	return false
 197 | }
 198 | 
 199 | func isCompressed(name string) bool {
 200 | 	ext := filepath.Ext(name)
 201 | 	return ext == ".bz2" || ext == ".gz" || ext == ".zst"
 202 | }
 203 | 
 204 | func decompress(oldName string, oldData []byte) (newName string, newData []byte, mapped bool, err error) {
 205 | 	ext := filepath.Ext(oldName)
 206 | 	if (ext == ".go" && len(oldData) > 0) || (ext == ".zip") {
 207 | 		return oldName, oldData, false, nil // nothing to do
 208 | 	}
 209 | 	if *flagMap && ext == ".go" {
 210 | 		file, err := os.Open(oldName)
 211 | 		if err == nil {
 212 | 			mmap, err := gommap.Map(file.Fd(), gommap.PROT_READ, gommap.MAP_PRIVATE)
 213 | 			if err == nil {
 214 | 				err = mmap.Advise(gommap.MADV_SEQUENTIAL | gommap.MADV_WILLNEED)
 215 | 				// fmt.Printf("mmaped: %q len=%d head=%q\n", oldName, len(mmap), mmap[:32])
 216 | 				file.Close()
 217 | 				return oldName, []byte(mmap), true, err
 218 | 			}
 219 | 		}
 220 | 	}
 221 | 
 222 | 	var oldSize int64
 223 | 	var encoded, decoder io.Reader
 224 | 
 225 | 	// Select source of encoded data
 226 | 	switch {
 227 | 	case len(oldData) == 0:
 228 | 		// Read from named file
 229 | 		file, err := os.Open(oldName)
 230 | 		if err != nil {
 231 | 			println(err)
 232 | 			return oldName, nil, false, err
 233 | 		}
 234 | 		defer file.Close()
 235 | 		info, err := file.Stat()
 236 | 		if err != nil {
 237 | 			println(err)
 238 | 			return oldName, nil, false, err
 239 | 		}
 240 | 		oldSize = info.Size()
 241 | 		encoded = file
 242 | 	default:
 243 | 		// Use provided data (likely reading from an archive)
 244 | 		oldSize = int64(len(oldData))
 245 | 		encoded = bytes.NewReader(oldData)
 246 | 	}
 247 | 
 248 | 	// Select decompression algorithm based on file extension
 249 | 	decompressed := false
 250 | 	switch {
 251 | 	case ext == ".bz2":
 252 | 		decoder, err = bzip2.NewReader(encoded), nil
 253 | 		decompressed = true
 254 | 	case ext == ".gz":
 255 | 		decoder, err = gzip.NewReader(encoded)
 256 | 		decompressed = true
 257 | 	case ext == ".zst":
 258 | 		decoder, err = zstd.NewReader(encoded)
 259 | 		decompressed = true
 260 | 	default:
 261 | 		decoder, err = encoded, nil // "just reading" is minimal compression
 262 | 	}
 263 | 	if err != nil {
 264 | 		println(err) // error creating the decoder
 265 | 		return oldName, nil, false, err
 266 | 	}
 267 | 
 268 | 	// Decompress the data
 269 | 	if newData, err = ioutil.ReadAll(decoder); err != nil {
 270 | 		println(err) // error using the decoder
 271 | 		return oldName, nil, false, err
 272 | 	}
 273 | 	if decompressed {
 274 | 		// Decompress the name ("sample.go.zst" → "sample.go")
 275 | 		newName = strings.TrimSuffix(oldName, ext)
 276 | 		printf("  %8d → %8d bytes (%6.3f×)  decompress and scan %s",
 277 | 			oldSize, len(newData), float64(len(newData))/float64(oldSize), oldName)
 278 | 	} else {
 279 | 		newName = oldName
 280 | 		printf("  %8d bytes  scan %s", len(newData), oldName)
 281 | 	}
 282 | 
 283 | 	return newName, newData, false, nil
 284 | }
 285 | 
 286 | func (s *Scan) List(name string) {
 287 | 	file, err := os.Open(name)
 288 | 	if err != nil {
 289 | 		println(err)
 290 | 		return
 291 | 	}
 292 | 
 293 | 	println("scanning list of files:", name)
 294 | 	scanner := bufio.NewScanner(file)
 295 | 	for scanner.Scan() {
 296 | 		s.File(scanner.Text())
 297 | 	}
 298 | 	file.Close()
 299 | }
 300 | 
 301 | var cap int
 302 | 
 303 | func (s *Scan) File(name string) {
 304 | 	if !isVisible(name) {
 305 | 		return
 306 | 	}
 307 | 
 308 | 	info, err := os.Lstat(name)
 309 | 	if err != nil {
 310 | 		println(err)
 311 | 		return
 312 | 	}
 313 | 
 314 | 	// process plain files
 315 | 	if info.Mode().IsRegular() {
 316 | 		processRegularFile(name, s)
 317 | 	} else if info.Mode().IsDir() { // process directories
 318 | 		switch *flagRecursive {
 319 | 		case false:
 320 | 			// process files in this directory
 321 | 			println("processing Go files in directory", name)
 322 | 
 323 | 			bases, err := ioutil.ReadDir(name)
 324 | 			if err != nil {
 325 | 				println(err)
 326 | 				return
 327 | 			}
 328 | 
 329 | 			// user request: honor .gitignore blacklist
 330 | 			var skip map[string]bool
 331 | 
 332 | 			foundGitIgnore := false
 333 | 			for _, base := range bases {
 334 | 				if base.Name() == ".gitignore" {
 335 | 					foundGitIgnore = true
 336 | 					break
 337 | 				}
 338 | 			}
 339 | 			if foundGitIgnore {
 340 | 				gi, err := os.Open(".gitignore")
 341 | 				if err == nil {
 342 | 					skip = make(map[string]bool)
 343 | 					skip[".gitignore"] = true
 344 | 					scanner := bufio.NewScanner(gi)
 345 | 					for scanner.Scan() {
 346 | 						skip[scanner.Text()] = true
 347 | 					}
 348 | 					gi.Close()
 349 | 				}
 350 | 			}
 351 | 
 352 | 			for _, base := range bases {
 353 | 				if skip != nil && skip[base.Name()] {
 354 | 					printf("  skipping .gitignored file %q", base.Name())
 355 | 					continue
 356 | 				}
 357 | 				fullName := filepath.Join(name, base.Name())
 358 | 				if isVisible(fullName) && isGo(fullName) {
 359 | 					s.Scan(fullName, nil)
 360 | 				}
 361 | 			}
 362 | 		case true:
 363 | 			// process files in this directory hierarchy
 364 | 			println("processing Go files in and under directory", name)
 365 | 
 366 | 			walker := func(path string, info os.FileInfo, err error) error {
 367 | 				if err != nil {
 368 | 					println(err)
 369 | 					return err
 370 | 				}
 371 | 				name := info.Name()
 372 | 
 373 | 				// user request: honor .gitignore blacklist
 374 | 				var skip map[string]bool
 375 | 
 376 | 				gi, err := os.Open(".gitignore")
 377 | 				if err == nil {
 378 | 					skip = make(map[string]bool)
 379 | 					skip[".gitignore"] = true
 380 | 					scanner := bufio.NewScanner(gi)
 381 | 					for scanner.Scan() {
 382 | 						skip[scanner.Text()] = true
 383 | 					}
 384 | 					gi.Close()
 385 | 				}
 386 | 
 387 | 				if info.IsDir() {
 388 | 					if !isVisible(name) {
 389 | 						println("skipping hidden directory", name)
 390 | 						return filepath.SkipDir
 391 | 					}
 392 | 				} else {
 393 | 					if skip != nil && skip[name] {
 394 | 						printf("  skipping .gitignored file %q", name)
 395 | 					} else if isVisible(path) && isGo(path) {
 396 | 						s.Scan(path, nil)
 397 | 					}
 398 | 				}
 399 | 				return nil
 400 | 			}
 401 | 
 402 | 			err = filepath.Walk(name, walker) // standard library walker
 403 | 			// err = walk.Walk(name, walker) // mtj concurrent walker
 404 | 			// err = Walk(name, walker) // standard library walker
 405 | 			if err != nil {
 406 | 				println(err)
 407 | 			}
 408 | 		}
 409 | 	}
 410 | }
 411 | 
 412 | type Work struct {
 413 | 	name   string
 414 | 	source []byte
 415 | }
 416 | 
 417 | type Summary struct {
 418 | 	bytes   int
 419 | 	tokens  int
 420 | 	matches int
 421 | 	lines   int
 422 | 	files   int
 423 | }
 424 | 
 425 | func (s *Summary) print(elapsed, user, system float64, printer func(string, ...interface{})) {
 426 | 	printer("performance\n")
 427 | 	if s.matches == 1 {
 428 | 		printer("  grep  %s match\n", formatInt(s.matches))
 429 | 	} else {
 430 | 		printer("  grep  %s matches\n", formatInt(s.matches))
 431 | 	}
 432 | 	printer("  work  %s byte%s, %s token%s, %s line%s, %s file%s\n",
 433 | 		formatInt(s.bytes), plural(s.bytes, ""),
 434 | 		formatInt(s.tokens), plural(s.tokens, ""),
 435 | 		formatInt(s.lines), plural(s.lines, ""),
 436 | 		formatInt(s.files), plural(s.files, ""))
 437 | 	printer("  time  %.6f sec elapsed, %.6f sec user + %.6f system\n", elapsed, user, system)
 438 | 	if elapsed > 0 {
 439 | 		printer("  rate  %s bytes/sec, %s tokens/sec, %s lines/sec, %s files/sec\n",
 440 | 			formatInt(int(float64(s.bytes)/elapsed)),
 441 | 			formatInt(int(float64(s.tokens)/elapsed)),
 442 | 			formatInt(int(float64(s.lines)/elapsed)),
 443 | 			formatInt(int(float64(s.files)/elapsed)))
 444 | 		printer("  cpus  %d worker%s (parallel speedup = %.2fx)\n",
 445 | 			*flagCPUs, plural(*flagCPUs, ""), (user+system)/elapsed)
 446 | 	}
 447 | }
 448 | 
 449 | func formatInt(n int) (s string) {
 450 | 	if *flagDigits {
 451 | 		// const separator = ","      // comma
 452 | 		const separator = "\u202f" // Narrow No-Break Space (NNBSP)
 453 | 
 454 | 		sign := ""
 455 | 		if n < 0 {
 456 | 			sign = "-"
 457 | 			n = -n
 458 | 		}
 459 | 
 460 | 		s = strconv.Itoa(n)
 461 | 
 462 | 		n := ""
 463 | 		sep := ""
 464 | 		for len(s) > 3 {
 465 | 			n = s[len(s)-3:] + sep + n
 466 | 			sep = separator
 467 | 			s = s[:len(s)-3]
 468 | 		}
 469 | 		if len(s) > 0 {
 470 | 			n = s + sep + n
 471 | 		}
 472 | 
 473 | 		s = sign + n
 474 | 	} else {
 475 | 		s = strconv.Itoa(n)
 476 | 	}
 477 | 
 478 | 	return s
 479 | }
 480 | 
 481 | var first = true
 482 | var workers int
 483 | var scattered int
 484 | var work []chan Work
 485 | var result []chan *Scan
 486 | var done chan Summary
 487 | 
 488 | func worker(wIn chan Work, sOut chan *Scan) {
 489 | 	for w := range wIn {
 490 | 		s := NewScan()
 491 | 		s.regex = regex.Copy()
 492 | 		s.scan(w.name, w.source)
 493 | 		sOut <- s
 494 | 	}
 495 | 	sOut <- &Scan{complete: true} // signal that this worker is done
 496 | }
 497 | 
 498 | func (s *Scan) Scan(name string, source []byte) {
 499 | 	if first {
 500 | 		workers = *flagCPUs
 501 | 		switch *flagUnordered {
 502 | 		case true:
 503 | 			const workQueue = 1024
 504 | 			work = make([]chan Work, 1)
 505 | 			result = make([]chan *Scan, 1)
 506 | 			work[0] = make(chan Work, workQueue)
 507 | 			result[0] = make(chan *Scan, workQueue)
 508 | 			for i := 0; i < workers; i++ {
 509 | 				go worker(work[0], result[0])
 510 | 			}
 511 | 		case false:
 512 | 			work = make([]chan Work, workers)
 513 | 			result = make([]chan *Scan, workers)
 514 | 			for i := 0; i < workers; i++ {
 515 | 				const balanceQueue = 512
 516 | 				work[i] = make(chan Work, balanceQueue)
 517 | 				result[i] = make(chan *Scan, balanceQueue)
 518 | 				go worker(work[i], result[i])
 519 | 			}
 520 | 		}
 521 | 		done = make(chan Summary)
 522 | 		go reporter() // wait for and gather results
 523 | 		first = false
 524 | 	}
 525 | 
 526 | 	switch {
 527 | 	case name == "": // end of scan
 528 | 		switch *flagUnordered {
 529 | 		case true:
 530 | 			close(work[0]) // signal completion to workers
 531 | 		case false:
 532 | 			for i := range work {
 533 | 				close(work[i]) // signal completion to workers
 534 | 			}
 535 | 		}
 536 | 	default: // another file to scan
 537 | 		switch *flagUnordered {
 538 | 		case true:
 539 | 			work[0] <- Work{name: name, source: source} // enqueue scan request
 540 | 		case false:
 541 | 			work[scattered%workers] <- Work{name: name, source: source} // enqueue scan request
 542 | 		}
 543 | 		scattered++
 544 | 	}
 545 | }
 546 | 
 547 | func formatMatch(b *bytes.Buffer, path, match []byte, line int) {
 548 | 	// expand buffer with single allocation
 549 | 	grow := (len(path) + 1) + (len(match) + 1)
 550 | 	n := ""
 551 | 	if *flagLineNumber {
 552 | 		n = strconv.Itoa(line)
 553 | 		grow += len(n) + 1 // n + ':'
 554 | 	}
 555 | 	b.Grow(grow)
 556 | 
 557 | 	// format is "path;match\n" or "path:line:match\n"
 558 | 	b.Write(path)
 559 | 	b.WriteByte(':')
 560 | 	if *flagLineNumber {
 561 | 		b.WriteString(n)
 562 | 		b.WriteByte(':')
 563 | 	}
 564 | 	b.Write(match)
 565 | 	b.WriteByte('\n')
 566 | }
 567 | 
 568 | type Liner struct {
 569 | 	b []byte
 570 | 	t []byte
 571 | }
 572 | 
 573 | func newLiner(b []byte) *Liner {
 574 | 	return &Liner{b: b}
 575 | }
 576 | 
 577 | func (liner *Liner) scan() bool {
 578 | 	if len(liner.b) == 0 {
 579 | 		return false
 580 | 	}
 581 | 	index := bytes.IndexByte(liner.b, '\n')
 582 | 	if index < 0 {
 583 | 		liner.t, liner.b = liner.b, nil
 584 | 	} else {
 585 | 		liner.t, liner.b = liner.b[:index+1], liner.b[index+1:]
 586 | 	}
 587 | 	return true
 588 | }
 589 | 
 590 | func (liner *Liner) text() []byte {
 591 | 	return liner.t
 592 | }
 593 | 
 594 | func (liner *Liner) trim() []byte {
 595 | 	n := len(liner.t)
 596 | 	if n > 0 && liner.t[n-1] == '\n' {
 597 | 		return liner.t[:n-1]
 598 | 	}
 599 | 	return liner.t
 600 | }
 601 | 
 602 | func tokenHandler(flag bool, lexer *lex.Lexer, text []byte, s *Scan, printLine int, buf *bytes.Buffer) int {
 603 | 	if flag { //&& printLine < lexer.Line {
 604 | 		if lexer.Type == lex.String && lexer.Subtype == lex.Raw && bytes.Count(text, []byte{'\n'}) > 0 {
 605 | 			// match each line of the raw string individually
 606 | 			lineInString := 0
 607 | 			liner := newLiner(text)
 608 | 			for liner.scan() {
 609 | 				if s.regex.Match(liner.text()) {
 610 | 					s.matches++
 611 | 					line := lexer.Line + lineInString
 612 | 					if printLine < line {
 613 | 						formatMatch(buf, s.path, liner.trim(), line)
 614 | 						printLine = line
 615 | 					}
 616 | 				}
 617 | 				lineInString++
 618 | 			}
 619 | 		} else if lexer.Type == lex.Comment && lexer.Subtype == lex.Block && bytes.Count(text, []byte{'\n'}) > 0 {
 620 | 			// match each line of the block comment individually
 621 | 			lineInString := 0
 622 | 			liner := newLiner(text)
 623 | 			for liner.scan() {
 624 | 				if s.regex.Match(liner.text()) {
 625 | 					s.matches++
 626 | 					line := lexer.Line + lineInString
 627 | 					if printLine < line {
 628 | 						formatMatch(buf, s.path, liner.trim(), line)
 629 | 						printLine = line
 630 | 					}
 631 | 				}
 632 | 				lineInString++
 633 | 			}
 634 | 		} else if printLine < lexer.Line && s.regex.Match(text) {
 635 | 			// match the token but print the line that contains it
 636 | 			s.matches++
 637 | 			formatMatch(buf, s.path, lexer.GetLine(), lexer.Line)
 638 | 			printLine = lexer.Line
 639 | 		}
 640 | 	}
 641 | 	return printLine
 642 | }
 643 | 
 644 | func (s *Scan) scan(name string, source []byte) {
 645 | 	var err error
 646 | 	var newName string
 647 | 	var mapped bool
 648 | 	newName, source, mapped, err = decompress(name, source)
 649 | 	if err != nil {
 650 | 		return
 651 | 	}
 652 | 
 653 | 	if !*flagGo && isBinary(source) {
 654 | 		// enable printf if desired. makes log cluttered:
 655 | 		// printf("skipping binary file %s", newName)
 656 | 		if *flagMap && mapped {
 657 | 			// finished using []byte] source so unmap file to free the file descriptor
 658 | 			gommap.MMap(source).UnsafeUnmap()
 659 | 		}
 660 | 		return
 661 | 	}
 662 | 
 663 | 	s.path = []byte(newName)
 664 | 	s.bytes = len(source)
 665 | 	s.lines = bytes.Count(source, []byte{'\n'})
 666 | 	s.files = 1
 667 | 
 668 | 	printLine := 0
 669 | 
 670 | 	// handle grep mode
 671 | 	if *flagActLikeGrep || G {
 672 | 		fileLine := 0
 673 | 		liner := newLiner(source)
 674 | 		buf := new(bytes.Buffer)
 675 | 		for liner.scan() {
 676 | 			fileLine++
 677 | 			if s.regex.Match(liner.text()) {
 678 | 				s.matches++
 679 | 				formatMatch(buf, s.path, liner.trim(), fileLine)
 680 | 			}
 681 | 		}
 682 | 		s.report = buf.Bytes()
 683 | 		if *flagMap && mapped {
 684 | 			// finished using []byte] source so unmap file to free the file descriptor
 685 | 			gommap.MMap(source).UnsafeUnmap()
 686 | 		}
 687 | 		return
 688 | 	}
 689 | 
 690 | 	// Perform the scan by tabulating token types, subtypes, and values
 691 | 	// lexer := &lex.Lexer{Input: source, Mode: lex.ScanGo} // | lex.SkipSpace}
 692 | 	lexer := lex.NewLexer(source, lex.ScanGo)
 693 | 	expectPackageName := false
 694 | 	buf := new(bytes.Buffer)
 695 | 	for tok, text := lexer.Scan(); tok != lex.EOF; tok, text = lexer.Scan() {
 696 | 		s.tokens++
 697 | 
 698 | 		// go mini-parser: expect package name after "package" keyword
 699 | 		if expectPackageName && tok == lex.Identifier {
 700 | 			if P && s.regex.Match(text) {
 701 | 				s.matches++
 702 | 				if printLine < lexer.Line {
 703 | 					formatMatch(buf, s.path, text, lexer.Line)
 704 | 					printLine = lexer.Line
 705 | 				}
 706 | 			}
 707 | 			expectPackageName = false
 708 | 		} else if tok == lex.Keyword && bytes.Equal(text, []byte("package")) {
 709 | 			expectPackageName = true // set expectations
 710 | 		}
 711 | 
 712 | 		if tok < 0 {
 713 | 			if f := dispatch[-tok]; f != nil && *f {
 714 | 				// printLine = tokenHandler(*f, lexer, text, s, printLine, buf)
 715 | 				if lexer.Type == lex.String && lexer.Subtype == lex.Raw && bytes.Count(text, []byte{'\n'}) > 0 {
 716 | 					// match each line of the raw string individually
 717 | 					lineInString := 0
 718 | 					liner := newLiner(text)
 719 | 					for liner.scan() {
 720 | 						if s.regex.Match(liner.text()) {
 721 | 							s.matches++
 722 | 							line := lexer.Line + lineInString
 723 | 							if printLine < line {
 724 | 								formatMatch(buf, s.path, liner.trim(), line)
 725 | 								printLine = line
 726 | 							}
 727 | 						}
 728 | 						lineInString++
 729 | 					}
 730 | 				} else if lexer.Type == lex.Comment && lexer.Subtype == lex.Block && bytes.Count(text, []byte{'\n'}) > 0 {
 731 | 					// match each line of the block comment individually
 732 | 					lineInString := 0
 733 | 					liner := newLiner(text)
 734 | 					for liner.scan() {
 735 | 						if s.regex.Match(liner.text()) {
 736 | 							s.matches++
 737 | 							line := lexer.Line + lineInString
 738 | 							if printLine < line {
 739 | 								formatMatch(buf, s.path, liner.trim(), line)
 740 | 								printLine = line
 741 | 							}
 742 | 						}
 743 | 						lineInString++
 744 | 					}
 745 | 				} else if printLine < lexer.Line && s.regex.Match(text) {
 746 | 					// match the token but print the line that contains it
 747 | 					s.matches++
 748 | 					formatMatch(buf, s.path, lexer.GetLine(), lexer.Line)
 749 | 					printLine = lexer.Line
 750 | 				}
 751 | 			}
 752 | 			if tok == lex.Number && V && printLine < lexer.Line {
 753 | 				n := text
 754 | 				var nS int
 755 | 				if n[0] == '-' { // never used, but someday...
 756 | 					nS = -1
 757 | 					n = n[1:]
 758 | 				}
 759 | 				switch vIsInt {
 760 | 				case true:
 761 | 					var nI uint64
 762 | 					nI, err = strconv.ParseUint(string(n), 0, 64)
 763 | 					if err == nil && nS == sign && nI == vInt {
 764 | 						// match the token but print the line
 765 | 						formatMatch(buf, s.path, lexer.GetLine(), lexer.Line)
 766 | 						printLine = lexer.Line
 767 | 					}
 768 | 				case false:
 769 | 					var nF float64
 770 | 					nF, err = strconv.ParseFloat(string(n), 64)
 771 | 					if err == nil && nS == sign && nF == vFloat {
 772 | 						// match the token but print the line
 773 | 						formatMatch(buf, s.path, lexer.GetLine(), lexer.Line)
 774 | 						printLine = lexer.Line
 775 | 					}
 776 | 				}
 777 | 			}
 778 | 		}
 779 | 
 780 | 	}
 781 | 	s.report = buf.Bytes()
 782 | 	if *flagMap && mapped {
 783 | 		// finished using []byte] source so unmap file to free the file descriptor
 784 | 		gommap.MMap(source).UnsafeUnmap()
 785 | 	}
 786 | }
 787 | 
 788 | // Complete a scan
 789 | func (s *Scan) Complete() Summary {
 790 | 	if !s.complete {
 791 | 		s.Scan("", nil)  // Signal end of additional files...
 792 | 		s.total = <-done // ...and await completion.of scanning & reporting
 793 | 		for i := range result {
 794 | 			close(result[i])
 795 | 		}
 796 | 		s.complete = true // Record completion
 797 | 	}
 798 | 	return s.total
 799 | }
 800 | 
 801 | func reporter() {
 802 | 	var w io.Writer
 803 | 	var b *bufio.Writer
 804 | 
 805 | 	switch lower := strings.ToLower(*flagOutput); {
 806 | 	case lower == "" || lower == "[stdout]":
 807 | 		file := os.Stdout
 808 | 		if *flagBufferWrites {
 809 | 			b = bufio.NewWriterSize(file, *flagBufferSize) // ensure buffered writes
 810 | 			w = b
 811 | 		} else {
 812 | 			w = file
 813 | 		}
 814 | 	case lower == "[stderr]":
 815 | 		file := os.Stderr
 816 | 		if *flagBufferWrites {
 817 | 			b = bufio.NewWriterSize(file, *flagBufferSize) // ensure buffered writes
 818 | 			w = b
 819 | 		} else {
 820 | 			w = file
 821 | 		}
 822 | 	case lower != "":
 823 | 		var err error
 824 | 		file, err := os.Create(*flagOutput)
 825 | 		if err != nil {
 826 | 			println(err)
 827 | 			return
 828 | 		}
 829 | 		defer file.Close()
 830 | 		w = file
 831 | 	}
 832 | 
 833 | 	// summary statistics
 834 | 	total := Summary{}
 835 | 
 836 | 	// report results per file
 837 | 	gathered := 0
 838 | 	completed := 0
 839 | 	for {
 840 | 		// get next result in search order
 841 | 		var s *Scan
 842 | 		switch *flagUnordered {
 843 | 		case false:
 844 | 			s = <-result[gathered%workers]
 845 | 		case true:
 846 | 			s = <-result[0]
 847 | 		}
 848 | 		gathered++
 849 | 
 850 | 		// handle completion events
 851 | 		if s.complete {
 852 | 			completed++ // one more worker has finished
 853 | 			if completed == workers {
 854 | 				break // all workers have now finished
 855 | 			}
 856 | 			continue
 857 | 		}
 858 | 
 859 | 		// report all matching lines in file
 860 | 		w.Write(s.report)
 861 | 
 862 | 		total.bytes += s.bytes
 863 | 		total.tokens += s.tokens
 864 | 		total.matches += s.matches
 865 | 		total.lines += s.lines
 866 | 		total.files++
 867 | 	}
 868 | 	if b != nil {
 869 | 		b.Flush() // bug fix: must defer flush until return
 870 | 	}
 871 | 
 872 | 	// signal completion to main program
 873 | 	done <- total // scanning complete, here are totals
 874 | }
 875 | 
 876 | func println(v ...interface{}) {
 877 | 	if *flagLog != "" {
 878 | 		log.Println(v...)
 879 | 	}
 880 | }
 881 | 
 882 | func printf(f string, v ...interface{}) {
 883 | 	if *flagLog != "" {
 884 | 		log.Printf(f, v...)
 885 | 	}
 886 | }
 887 | 
 888 | func plural(n int, fill string) string {
 889 | 	if n == 1 {
 890 | 		return fill
 891 | 	}
 892 | 	return "s"
 893 | }
 894 | 
 895 | type searchMode struct {
 896 | 	// c: search Comments ("//..." or "/*...*/")
 897 | 	C bool
 898 | 	// d: search Defined non-types (iota, nil, new, true,...)
 899 | 	D bool
 900 | 	// grep mode ?
 901 | 	G bool
 902 | 	// i: search Identifiers ([a-zA-Z][a-zA-Z0-9]*)
 903 | 	I bool
 904 | 	// k: search Keywords (if, for, func, go, ...)
 905 | 	K bool
 906 | 	// n: search Numbers as strings (255 as 255, 0.255, 1e255)
 907 | 	N bool
 908 | 	// o: search Operators (,+-*/[]{}()>>...)
 909 | 	O bool
 910 | 	// p: search Package names
 911 | 	P bool
 912 | 	// r: search Rune literals ('a', '\U00101234')
 913 | 	R bool
 914 | 	// s: search Strings ("quoted" or `raw`)
 915 | 	S bool
 916 | 	// t: search Types (bool, int, float64, map, ...)
 917 | 	T bool
 918 | 	// v: search numeric Values (255 as 0b1111_1111, 0377, 255, 0xff)
 919 | 	V      bool
 920 | 	vIsInt bool
 921 | 	vInt   uint64
 922 | 	vFloat float64
 923 | }
 924 | 
 925 | func parseFirstArg(input string) searchMode {
 926 | 	result := searchMode{}
 927 | 	// a: search all of the following
 928 | 	if strings.Contains(input, "a") {
 929 | 		result.C = true
 930 | 		result.D = true
 931 | 		result.I = true
 932 | 		result.K = true
 933 | 		result.N = true
 934 | 		result.O = true
 935 | 		result.P = true
 936 | 		result.R = true
 937 | 		result.S = true
 938 | 		result.T = true
 939 | 		result.V = true
 940 | 	}
 941 | 
 942 | 	// initialize token class inclusion flags
 943 | 	for _, class := range input {
 944 | 		switch class {
 945 | 		case 'a':
 946 | 			// already noted
 947 | 		case 'c':
 948 | 			result.C = true
 949 | 		case 'C':
 950 | 			result.C = false
 951 | 		case 'd':
 952 | 			result.D = true
 953 | 		case 'D':
 954 | 			result.D = false
 955 | 		case 'g':
 956 | 			result.G = true
 957 | 		case 'i':
 958 | 			result.I = true
 959 | 		case 'I':
 960 | 			result.I = false
 961 | 		case 'k':
 962 | 			result.K = true
 963 | 		case 'K':
 964 | 			result.K = false
 965 | 		case 'n':
 966 | 			result.N = true
 967 | 		case 'N':
 968 | 			result.N = false
 969 | 		case 'o':
 970 | 			result.O = true
 971 | 		case 'O':
 972 | 			result.O = false
 973 | 		case 'p':
 974 | 			result.P = true
 975 | 		case 'P':
 976 | 			result.P = false
 977 | 		case 'r':
 978 | 			result.R = true
 979 | 		case 'R':
 980 | 			result.R = false
 981 | 		case 's':
 982 | 			result.S = true
 983 | 		case 'S':
 984 | 			result.S = false
 985 | 		case 't':
 986 | 			result.T = true
 987 | 		case 'T':
 988 | 			result.T = false
 989 | 		case 'v':
 990 | 			result.V = true
 991 | 		case 'V':
 992 | 			result.V = false
 993 | 		default:
 994 | 			fmt.Fprintf(os.Stderr, "error: unrecognized token class '%c'\n", class)
 995 | 		}
 996 | 	}
 997 | 	return result
 998 | }
 999 | 
1000 | func setupModeGG(args []string) searchMode {
1001 | 	res := searchMode{}
1002 | 	if !*flagActLikeGrep {
1003 | 		if len(args) < 2 {
1004 | 			// not enough args received, complete args with empty strings
1005 | 			for i := len(args); i < 2; i++ {
1006 | 				args = append(args, "")
1007 | 			}
1008 | 		}
1009 | 		// handle "all" flag first before subsequent upper-case anti-flags
1010 | 		res = parseFirstArg(args[0])
1011 | 
1012 | 		// initialize numeric value matcher
1013 | 		if res.V && len(args[1]) > 0 {
1014 | 			n := args[1]
1015 | 			if n[0] == '-' {
1016 | 				sign = -1
1017 | 				n = n[1:]
1018 | 			}
1019 | 			var err error
1020 | 			res.vInt, err = strconv.ParseUint(n, 0, 64)
1021 | 			res.vIsInt = true
1022 | 			if err != nil {
1023 | 				res.vIsInt = false
1024 | 				// we did not consume all the input...maybe it is a float.
1025 | 				res.vFloat, err = strconv.ParseFloat(n, 64)
1026 | 				// _ = res.vFloat + -5.25
1027 | 				if err != nil {
1028 | 					res.V = false
1029 | 					fmt.Fprintf(os.Stderr, "error: %v\n", err)
1030 | 				}
1031 | 			}
1032 | 		}
1033 | 	}
1034 | 	return res
1035 | }
1036 | 
1037 | func getRegexp(input string) (*regexp.Regexp, error) {
1038 | 	regexp, err := regexp.Compile(input)
1039 | 	if err != nil {
1040 | 		fmt.Fprintf(os.Stderr, "error: %v\n", err)
1041 | 	}
1042 | 	return regexp, err
1043 | }
1044 | 
1045 | // Scanner is an interace created to allow us to create some tests
1046 | type Scanner interface {
1047 | 	Scan(name string, source []byte)
1048 | }
1049 | 
1050 | type ReadNexter interface {
1051 | 	Read(p []byte) (n int, err error)
1052 | 	Next() (string, error)
1053 | }
1054 | 
1055 | func processRegularFile(name string, s Scanner) {
1056 | 	var err error
1057 | 	var data []byte
1058 | 	if isArchive(name) && isCompressed(name) {
1059 | 		name, data, _, err = decompress(name, nil)
1060 | 		if err != nil {
1061 | 			println(err)
1062 | 			return
1063 | 		}
1064 | 	}
1065 | 
1066 | 	var archive io.Reader
1067 | 	switch {
1068 | 	case len(data) == 0:
1069 | 		f, err := os.Open(name)
1070 | 		if err != nil {
1071 | 			println(err)
1072 | 			return
1073 | 		}
1074 | 		defer f.Close()
1075 | 		archive = f
1076 | 	default:
1077 | 		archive = bytes.NewReader(data)
1078 | 	}
1079 | 
1080 | 	ext := strings.ToLower(filepath.Ext(name))
1081 | 	switch {
1082 | 	case ext == ".cpio":
1083 | 		println("processing cpio archive", name)
1084 | 		r := newMultiReader(archive, ext, "")
1085 | 		scanFile(name, r, s)
1086 | 	case ext == ".tar":
1087 | 		println("processing tar archive", name)
1088 | 		r := newMultiReader(archive, ext, "")
1089 | 		scanFile(name, r, s)
1090 | 	case ext == ".zip":
1091 | 		println("processing zip archive:", name)
1092 | 		mr := newMultiReader(nil, ext, name)
1093 | 		scanFile(name, mr, s)
1094 | 	case isGo(name):
1095 | 		s.Scan(name, nil)
1096 | 	default:
1097 | 		println("skipping file with unrecognized extension:", name)
1098 | 	}
1099 | }
1100 | 
1101 | func scanFile(fileName string, r ReadNexter, s Scanner) {
1102 | 	for {
1103 | 		name, err := r.Next()
1104 | 		if err == io.EOF {
1105 | 			break // End of archive
1106 | 		}
1107 | 		if err != nil {
1108 | 			println(err)
1109 | 			return
1110 | 		}
1111 | 
1112 | 		memberName := fileName + "::" + name // "archive.cpio::file.go"
1113 | 		if !isGo(name) {
1114 | 			println("skipping file with unrecognized extension:", memberName)
1115 | 			continue
1116 | 		}
1117 | 		var buf bytes.Buffer
1118 | 		buf.ReadFrom(r)
1119 | 		bytes := buf.Bytes()
1120 | 		if err != nil {
1121 | 			println(err)
1122 | 			return
1123 | 		}
1124 | 		s.Scan(memberName, bytes)
1125 | 	}
1126 | }
1127 | 
1128 | func getResourceUsage() (user, system float64, size uint64) {
1129 | 	var usage syscall.Rusage
1130 | 	if err := syscall.Getrusage(syscall.RUSAGE_SELF, &usage); err != nil {
1131 | 		println("Error: unable to gather resource usage data:", err)
1132 | 	}
1133 | 	user = float64(usage.Utime.Sec) + float64(usage.Utime.Usec)/1e6   // work by this process
1134 | 	system = float64(usage.Stime.Sec) + float64(usage.Stime.Usec)/1e6 // work by OS on behalf of this process (reading files)
1135 | 	size = uint64(uint32(usage.Maxrss))
1136 | 	return
1137 | }
1138 | 


--------------------------------------------------------------------------------
/testdata/source.tar:
--------------------------------------------------------------------------------
   1 | main.go                                                                                             0000644 0001750 0001750 00000021764 13510124317 012202  0                                                                                                    ustar   nicolas                         nicolas                                                                                                                                                                                                                package main
   2 | 
   3 | import (
   4 | 	"flag"
   5 | 	"fmt"
   6 | 	"log"
   7 | 	"os"
   8 | 	"runtime"
   9 | 	"strings"
  10 | 	"time"
  11 | )
  12 | 
  13 | var flagCPUs = flag.Int("cpu", 0, "number of CPUs to use (0 for all)")
  14 | var flagGo = flag.Bool("go", true, `limit grep to Go files ("main.go")`)
  15 | var flagList = flag.String("list", "", "list of filenames to grep")
  16 | var flagLog = flag.String("log", "", `write log to named file (or "[stdout]" or "[stderr]")`)
  17 | var flagOutput = flag.String("output", "", `write output to named file (or "[stdout]" or "[stderr]")`)
  18 | var flagRecursive = flag.Bool("r", false, "grep directories recursively")
  19 | var flagVisible = flag.Bool("visible", true, `limit grep to visible files (skip ".hidden.go")`)
  20 | 
  21 | // grep-compatibility flags
  22 | var flagActLikeGrep = flag.Bool("g", false, "act like grep")
  23 | var flagFileName = flag.Bool("h", false, `disply file name ("header") for each match`)
  24 | var flagLineNumber = flag.Bool("n", false, "disply line number for each match")
  25 | 
  26 | // secret developer flags
  27 | var flagSummary = flag.Bool("summary", true, "print performance summary")
  28 | var flagBufferWrites = flag.Bool("bufferWrites", true, "buffer output writes")
  29 | var flagBufferSize = flag.Int("bufferSize", 64*1024, "output buffer size")
  30 | var flagTrim = flag.Bool("trim", false, "trim matched strings")
  31 | 
  32 | var usage = `NAME
  33 |     gg - grep Go-language source code
  34 | 
  35 | SYNOPSIS
  36 |     gg [options] acdiknoprstvg regexp [file ...]
  37 | 
  38 | DESCRIPTION
  39 |     gg is classic grep (g/RE/p) with flag-directed Go token focus to search
  40 |     in package names, numbers, identifiers, comments, keywords, and more.
  41 |     Token flags are "acdiknoprstvg" in any order or combination:
  42 | 
  43 |        a   search in All of the following
  44 |        c   search in Comments (//... or /*...*/)
  45 |        d   search in Defined non-types (iota, nil, new, true,...)
  46 |        i   search in Identifiers ([alphabetic][alphabetic | numeric]*)
  47 |        k   search in Keywords (if, for, func, go, ...)
  48 |        n   search in Numbers ("255" matches 255, 0.255, 1e255)
  49 |        o   search in Operators (,  +  -  *  /  [  ] {  }  ( )  >>...)
  50 |        p   search in Package names
  51 |        r   search in Rune literals ('a', '\U00101234')
  52 |        s   search in Strings (quoted or raw)
  53 |        t   search in Types (bool, int, float64, map, ...)
  54 |        v   search in Values (255 is 0b11111111, 0377, 255, 0xff)
  55 |        g   search as grep, perform simple line-by-line matches in file
  56 | 
  57 |     gg combines lexical analysis and Go-native pattern matching to extend
  58 |     grep(1) for Go developers.  The search is restricted, seeking matches
  59 |     only in chosen token classes.  A search in number literals can match
  60 |     values, "v 255" matches the numeric value 255 in source code as
  61 |     0b1111_1111, 0377, 0o377, 255, 0xff, etc.  Go's linear-time regular
  62 |     expression engine is Unicode-aware and supports many Perl extensions:
  63 |     numbers in identifiers are found with "gg i [0-9]" or "gg i [\d]",
  64 |     comments with math symbols by "gg c \p{Sm}", and Greek in strings via
  65 |     "gg s \p{Greek}" each with appropriate shell escaping.
  66 | 
  67 |     gg searches files names listed on the command line or in a file of
  68 |     filenames provided the "-list" argument.  If neither of these is
  69 |     present, gg reads file names from the standard input which is useful in
  70 |     shell pipelines such as "find . -name "*.go" | gg k fallthrough"
  71 | 
  72 |     Files are Go source code files or directories.  Source files include
  73 |     typical ".go" files; compressed ".go" files named ".go.bz2", ".go.gz",
  74 |     or ".go.zst" for Bzip2, Gzip, and ZStandard compression formats;
  75 |     archives of any such files in the formats "a.cpio", "a.tar", or
  76 |     "a.zip"; or, finally, compressed archives as in "a.cpio.bz2" and
  77 |     "a.tar.gz".  If a named file is a directory then all Go source files
  78 |     in that directory are scanned without visiting subdirectories.  With
  79 |     the "-r" flag enabled, named directories are processed recursively,
  80 |     scanning each Go source file or archive in that directory's hierarchy.
  81 | 
  82 | OPTIONS
  83 |     -cpu=n
  84 |         Set the number of CPUs to use. Negative n means "all but n."
  85 |         Default is all.
  86 | 
  87 |     -go=bool
  88 |         Limit search to ".go" files.  Default is true.
  89 | 
  90 |     -h=bool
  91 |         Display file names ("headers") on matches.  Default is false for
  92 |         single-file searches and true otherwise.
  93 | 
  94 |     -list=file
  95 |         Search files listed one per line in the named file.
  96 | 
  97 |     -log=file
  98 |         Write a log of execution details to a named file.  The special
  99 |         file names "[stdout]" and "[stderr]" refer to the stdout and
 100 |         stderr streams.  (Last line of log details efficiency.)
 101 | 
 102 |     -n=bool
 103 |         Display line numbers following each match. Numbers count from
 104 |         one per file.  Default is false.
 105 | 
 106 |     -output=file
 107 |         gg output is normally to stdout but may be directed to a named
 108 |         file.  The special names "[stdout]" and "[stderr]" refer to the
 109 |         stdout and stderr streams.
 110 | 
 111 |     -r=bool
 112 |         Search directories recursively.  Default is false.
 113 | 
 114 |     -visible=bool
 115 |         Restrict search to visible files, those with names that do not
 116 |         start with "." (in the shell tradition).  Default is true.
 117 | 
 118 |     acdiknoprstvCDIKNOPRSTVg
 119 |         The Go token class flags have an upper case negative form to
 120 |         disable the indicated class.  Used with "a" for "all", "aCS"
 121 |         means "search All tokens except Comments and Strings."  Flag "g"
 122 |         means search as if the grep command, ignore Go lexical analysis
 123 |         and match lines.
 124 | 
 125 | EXAMPLES
 126 |     To search for comments containing "case" (ignoring switch statements)
 127 |     in every ".go" file in the current working directory, use the command:
 128 | 
 129 |         gg c case .
 130 | 
 131 |     To find number literals containing the digits 42 in ".go" files located
 132 |     anywhere in the current directory's hierarchy, use the command:
 133 | 
 134 |         gg -r n 42 .
 135 | 
 136 |     Find numbers with values of 255 (0b1111_1111, 0377, 0o377, 255, 0xff)
 137 |     in ".go" files in the gzipped tar(1) archive omega with the command:
 138 | 
 139 |         gg v 255 omega.tar.gz
 140 | 
 141 | AUTHOR
 142 |     Michael T. Jones (https://github.com/MichaelTJones)
 143 | 
 144 | SEE ALSO
 145 |     https://golang.org/pkg/regexp/syntax/
 146 |     https://github.com/google/re2/wiki/Syntax
 147 |     https://en.wikipedia.org/wiki/Unicode_character_property
 148 | `
 149 | 
 150 | func main() {
 151 | 	// parse command line before configuring logging (to allow "-log xyz.txt")
 152 | 	flag.Usage = func() {
 153 | 		fmt.Fprintf(flag.CommandLine.Output(), "\n%s", usage)
 154 | 	}
 155 | 	flag.Parse()
 156 | 
 157 | 	// set logging format and destination before first log event
 158 | 	log.SetFlags(log.LstdFlags | log.Lmicroseconds)
 159 | 	switch strings.ToLower(*flagLog) {
 160 | 	case "":
 161 | 		// no logging
 162 | 	case "[stdout]":
 163 | 		log.SetOutput(os.Stdout)
 164 | 	case "[stderr]":
 165 | 		log.SetOutput(os.Stderr)
 166 | 	default:
 167 | 		file, err := os.Create(*flagLog)
 168 | 		if err != nil {
 169 | 			log.Fatal(err)
 170 | 		}
 171 | 		log.SetOutput(file)
 172 | 	}
 173 | 
 174 | 	// control concurrency for testing (no disadvantage for maximal concurrrency)
 175 | 	*flagCPUs = getMaxCPU()
 176 | 
 177 | 	// bonus feature
 178 | 	// If you make a symbolic link to the executable or otherwise rename it from "gg" then it
 179 | 	// will automatically run in "be like grep" mode.
 180 | 	if !strings.HasSuffix(os.Args[0], "gg") {
 181 | 		*flagActLikeGrep = true // if user's made a symlink or renamed, become grep
 182 | 	}
 183 | 
 184 | 	if flag.NArg() < 2 {
 185 | 		fmt.Fprintf(os.Stderr, "usage: gg [flags] acdiknoprstvg regexp [file ...]\n")
 186 | 		fmt.Fprintf(os.Stderr, "    try gg -help for more\n")
 187 | 		os.Exit(1)
 188 | 	}
 189 | 
 190 | 	if *flagRecursive {
 191 | 		*flagFileName = true
 192 | 	}
 193 | 
 194 | 	// perform actual work
 195 | 	start := time.Now()
 196 | 	s := doScan()
 197 | 	elapsed := time.Since(start).Seconds()
 198 | 	user, system, _ := getResourceUsage()
 199 | 
 200 | 	// log performance summary
 201 | 	if *flagLog != "" {
 202 | 		printf("performance")
 203 | 		printf("  grep  %d matches\n", s.matches)
 204 | 		printf("  work  %d bytes, %d tokens, %d lines, %d files\n",
 205 | 			s.bytes, s.tokens, s.lines, s.files)
 206 | 		printf("  time  %.6f sec elapsed, %.6f sec user + %.6f system\n", elapsed, user, system)
 207 | 		if elapsed > 0 {
 208 | 			printf("  rate  %.0f bytes/sec, %.0f tokens/sec, %.0f lines/sec, %.0f files/sec\n",
 209 | 				float64(s.bytes)/elapsed,
 210 | 				float64(s.tokens)/elapsed,
 211 | 				float64(s.lines)/elapsed,
 212 | 				float64(s.files)/elapsed)
 213 | 			printf("  scale %d worker%s (parallel speedup = %.2fx)\n",
 214 | 				*flagCPUs, plural(*flagCPUs, ""), (user+system)/elapsed)
 215 | 		}
 216 | 	}
 217 | 	// print performance summary
 218 | 	if *flagSummary {
 219 | 		fmt.Printf("performance")
 220 | 		fmt.Printf("  grep  %d matches\n", s.matches)
 221 | 		fmt.Printf("  work  %d bytes, %d tokens, %d lines, %d files\n",
 222 | 			s.bytes, s.tokens, s.lines, s.files)
 223 | 		fmt.Printf("  time  %.6f sec elapsed, %.6f sec user + %.6f system\n", elapsed, user, system)
 224 | 		if elapsed > 0 {
 225 | 			fmt.Printf("  rate  %.0f bytes/sec, %.0f tokens/sec, %.0f lines/sec, %.0f files/sec\n",
 226 | 				float64(s.bytes)/elapsed,
 227 | 				float64(s.tokens)/elapsed,
 228 | 				float64(s.lines)/elapsed,
 229 | 				float64(s.files)/elapsed)
 230 | 			fmt.Printf("  scale %d worker%s (parallel speedup = %.2fx)\n",
 231 | 				*flagCPUs, plural(*flagCPUs, ""), (user+system)/elapsed)
 232 | 		}
 233 | 	}
 234 | 
 235 | 	// exit with grep-compatible codes
 236 | 	if s.matches == 0 {
 237 | 		os.Exit(1)
 238 | 	}
 239 | 	os.Exit(0)
 240 | }
 241 | 
 242 | func getMaxCPU() int {
 243 | 	if *flagCPUs != 1 {
 244 | 		if *flagCPUs == 0 { // claim CPUs
 245 | 			return runtime.NumCPU()
 246 | 		} else if *flagCPUs < 0 { // spare CPUs
 247 | 			res := *flagCPUs + runtime.NumCPU() // "-cpu -2" ==> "all but 2 CPUs"
 248 | 			if res < 1 {
 249 | 				return 1
 250 | 			}
 251 | 			return res
 252 | 		}
 253 | 	}
 254 | 	return 1
 255 | }
 256 |             main_test.go                                                                                        0000644 0001750 0001750 00000001460 13510124317 013230  0                                                                                                    ustar   nicolas                         nicolas                                                                                                                                                                                                                package main
 257 | 
 258 | import (
 259 | 	"reflect"
 260 | 	"runtime"
 261 | 	"testing"
 262 | )
 263 | 
 264 | func Test_getMaxCPU(t *testing.T) {
 265 | 	actualProcs := runtime.NumCPU()
 266 | 	tests := []struct {
 267 | 		name    string
 268 | 		rcvdVal int
 269 | 		want1   int
 270 | 	}{
 271 | 		{
 272 | 			name:    "0 should use all CPUs",
 273 | 			rcvdVal: 0,
 274 | 			want1:   actualProcs,
 275 | 		},
 276 | 
 277 | 		{
 278 | 			name:    "negative number should use all but x CPUs",
 279 | 			rcvdVal: -2,
 280 | 			want1:   actualProcs - 2,
 281 | 		},
 282 | 
 283 | 		{
 284 | 			name:    "should use at least 1 CPU",
 285 | 			rcvdVal: -1 * (actualProcs + 2),
 286 | 			want1:   1,
 287 | 		},
 288 | 
 289 | 		{
 290 | 			name:    "should use only 1 CPU",
 291 | 			rcvdVal: 1,
 292 | 			want1:   1,
 293 | 		},
 294 | 	}
 295 | 
 296 | 	for _, tt := range tests {
 297 | 		t.Run(tt.name, func(t *testing.T) {
 298 | 			*flagCPUs = tt.rcvdVal
 299 | 			got1 := getMaxCPU()
 300 | 
 301 | 			if !reflect.DeepEqual(got1, tt.want1) {
 302 | 				t.Errorf("getMaxCPU got1 = %v, want1: %v", got1, tt.want1)
 303 | 			}
 304 | 		})
 305 | 	}
 306 | }
 307 |                                                                                                                                                                                                                 scan.go                                                                                             0000644 0001750 0001750 00000047562 13510155213 012205  0                                                                                                    ustar   nicolas                         nicolas                                                                                                                                                                                                                package main
 308 | 
 309 | import (
 310 | 	"archive/tar"
 311 | 	"archive/zip"
 312 | 	"bufio"
 313 | 	"bytes"
 314 | 	"compress/bzip2"
 315 | 	"compress/gzip"
 316 | 	"flag"
 317 | 	"fmt"
 318 | 	"io"
 319 | 	"io/ioutil"
 320 | 	"log"
 321 | 	"os"
 322 | 	"path/filepath"
 323 | 	"regexp"
 324 | 	"strconv"
 325 | 	"strings"
 326 | 	"syscall"
 327 | 
 328 | 	"github.com/MichaelTJones/lex"
 329 | 	"github.com/cavaliercoder/go-cpio"
 330 | 	"github.com/klauspost/compress/zstd"
 331 | )
 332 | 
 333 | /*
 334 | Go-Grep: scan any number of Go source code files, where scanning means passing each
 335 | through Go-language lexical analysis and reporting lines where selected classes of
 336 | tokens match a search pattern defined by a reguar expression.
 337 | */
 338 | 
 339 | // token class inclusion
 340 | // a: search all of the following
 341 | // c: search Comments ("//..." or "/*...*/")
 342 | // d: search Defined non-types (iota, nil, new, true,...)
 343 | // i: search Identifiers ([a-zA-Z][a-zA-Z0-9]*)
 344 | // k: search Keywords (if, for, func, go, ...)
 345 | // n: search Numbers as strings (255 as 255, 0.255, 1e255)
 346 | // o: search Operators (,+-*/[]{}()>>...)
 347 | // p: search Package names
 348 | // r: search Rune literals ('a', '\U00101234')
 349 | // s: search Strings ("quoted" or `raw`)
 350 | // t: search Types (bool, int, float64, map, ...)
 351 | // v: search numeric Values (255 as 0b1111_1111, 0377, 255, 0xff)
 352 | var G, C, D, I, K, N, O, P, R, S, T, V bool
 353 | 
 354 | // matching
 355 | var regex *regexp.Regexp // pattern
 356 | 
 357 | var sign int // literal sign
 358 | var vIsInt bool
 359 | var vInt uint64    // literal value
 360 | var vFloat float64 // literal value
 361 | 
 362 | func doScan() Summary {
 363 | 	s := NewScan()
 364 | 	fixedArgs := 2
 365 | 	if *flagActLikeGrep {
 366 | 		fixedArgs = 1
 367 | 	}
 368 | 
 369 | 	if flag.NArg() < fixedArgs {
 370 | 		return Summary{}
 371 | 	}
 372 | 
 373 | 	// initialize regular expression matcher
 374 | 	var err error
 375 | 	regex, err = getRegexp(flag.Arg(fixedArgs - 1))
 376 | 	if err != nil {
 377 | 		return Summary{}
 378 | 	}
 379 | 
 380 | 	// gg mode
 381 | 	mode := setupModeGG(flag.Args())
 382 | 	C = mode.C
 383 | 	D = mode.D
 384 | 	G = mode.G
 385 | 	I = mode.I
 386 | 	K = mode.K
 387 | 	N = mode.N
 388 | 	O = mode.O
 389 | 	P = mode.P
 390 | 	R = mode.R
 391 | 	S = mode.S
 392 | 	T = mode.T
 393 | 	V = mode.V
 394 | 	vIsInt = mode.vIsInt
 395 | 	vInt = mode.vInt
 396 | 	vFloat = mode.vFloat
 397 | 
 398 | 	println("scan begins")
 399 | 	scanned := false
 400 | 
 401 | 	// scan files in the file of filenames indicated by the "-list" option.
 402 | 	if *flagList != "" {
 403 | 		println("processing files listed in the -list option")
 404 | 		*flagFileName = true // presume multiple files...print names
 405 | 		s.List(*flagList)
 406 | 		scanned = true
 407 | 	}
 408 | 
 409 | 	// scan files named on command line.
 410 | 	if flag.NArg() > fixedArgs {
 411 | 		println("processing files listed on command line")
 412 | 		if flag.NArg() > fixedArgs+1 {
 413 | 			*flagFileName = true // multiple files...print names
 414 | 		}
 415 | 		for _, v := range flag.Args()[fixedArgs:] {
 416 | 			s.File(v)
 417 | 		}
 418 | 		scanned = true
 419 | 	}
 420 | 
 421 | 	// scan files named in standard input if nothing scanned yet.
 422 | 	if !scanned {
 423 | 		println("processing files listed in standard input")
 424 | 		*flagFileName = true // multiple files...print names
 425 | 		scanner := bufio.NewScanner(os.Stdin)
 426 | 		for scanner.Scan() {
 427 | 			s.File(scanner.Text())
 428 | 		}
 429 | 	}
 430 | 	summary := s.Complete() // parallel rendevousz here...will wait
 431 | 	println("scan ends")
 432 | 	return summary
 433 | }
 434 | 
 435 | type Scan struct {
 436 | 	path  string
 437 | 	line  []uint32
 438 | 	match []string
 439 | 
 440 | 	bytes   int
 441 | 	tokens  int
 442 | 	lines   int
 443 | 	matches int
 444 | 
 445 | 	complete bool
 446 | 	total    Summary
 447 | }
 448 | 
 449 | func NewScan() *Scan {
 450 | 	return &Scan{}
 451 | }
 452 | 
 453 | func visible(name string) bool {
 454 | 	if *flagVisible {
 455 | 		for _, s := range strings.Split(name, string(os.PathSeparator)) {
 456 | 			if s != "" && s != "." && s != ".." && s[0] == '.' {
 457 | 				return false
 458 | 			}
 459 | 		}
 460 | 	}
 461 | 	return true
 462 | }
 463 | 
 464 | func isCompressed(name string) bool {
 465 | 	ext := filepath.Ext(name)
 466 | 	return ext == ".bz2" || ext == ".gz" || ext == ".zst"
 467 | }
 468 | 
 469 | func decompress(oldName string, oldData []byte) (newName string, newData []byte, err error) {
 470 | 	ext := filepath.Ext(oldName)
 471 | 	if (ext == ".go" && len(oldData) > 0) || (ext == ".zip") {
 472 | 		return oldName, oldData, nil // nothing to do
 473 | 	}
 474 | 
 475 | 	var oldSize int64
 476 | 	var encoded, decoder io.Reader
 477 | 
 478 | 	// Select source of encoded data
 479 | 	switch {
 480 | 	case len(oldData) == 0:
 481 | 		// Read from named file
 482 | 		file, err := os.Open(oldName)
 483 | 		if err != nil {
 484 | 			println(err)
 485 | 			return oldName, nil, err
 486 | 		}
 487 | 		defer file.Close()
 488 | 		info, err := file.Stat()
 489 | 		if err != nil {
 490 | 			println(err)
 491 | 			return oldName, nil, err
 492 | 		}
 493 | 		oldSize = info.Size()
 494 | 		encoded = file
 495 | 	default:
 496 | 		// Use provided data (likely reading from an archive)
 497 | 		oldSize = int64(len(oldData))
 498 | 		encoded = bytes.NewReader(oldData)
 499 | 	}
 500 | 
 501 | 	// Select decompression algorithm based on file extension
 502 | 	switch {
 503 | 	case ext == ".bz2":
 504 | 		decoder, err = bzip2.NewReader(encoded), nil
 505 | 	case ext == ".gz":
 506 | 		decoder, err = gzip.NewReader(encoded)
 507 | 	case ext == ".zst":
 508 | 		decoder, err = zstd.NewReader(encoded)
 509 | 	default:
 510 | 		decoder, err = encoded, nil // "just reading" is minimal compression
 511 | 	}
 512 | 	if err != nil {
 513 | 		println(err) // error creating the decoder
 514 | 		return oldName, nil, err
 515 | 	}
 516 | 
 517 | 	// Decompress the data
 518 | 	if newData, err = ioutil.ReadAll(decoder); err != nil {
 519 | 		println(err) // error using the decoder
 520 | 		return oldName, nil, err
 521 | 	}
 522 | 	if ext != ".go" {
 523 | 		// Decompress the name ("sample.go.zst" → "sample.go")
 524 | 		newName = strings.TrimSuffix(oldName, ext)
 525 | 		printf("  %8d → %8d bytes (%6.3f×)  decompress and scan %s",
 526 | 			oldSize, len(newData), float64(len(newData))/float64(oldSize), oldName)
 527 | 	} else {
 528 | 		newName = oldName
 529 | 		printf("  %8d bytes  scan %s", len(newData), oldName)
 530 | 	}
 531 | 
 532 | 	return newName, newData, nil
 533 | }
 534 | 
 535 | func isArchive(name string) bool {
 536 | 	if isCompressed(name) {
 537 | 		ext := filepath.Ext(name)
 538 | 		name = strings.TrimSuffix(name, ext) // unwrap the compression suffix
 539 | 	}
 540 | 	ext := filepath.Ext(name)
 541 | 	return ext == ".cpio" || ext == ".tar" || ext == ".zip"
 542 | }
 543 | 
 544 | func isGo(name string) bool {
 545 | 	if !*flagGo {
 546 | 		return true
 547 | 	}
 548 | 	if isCompressed(name) {
 549 | 		ext := filepath.Ext(name)
 550 | 		name = strings.TrimSuffix(name, ext) // unwrap the compression suffix
 551 | 	}
 552 | 	return filepath.Ext(name) == ".go"
 553 | }
 554 | 
 555 | func (s *Scan) List(name string) {
 556 | 	file, err := os.Open(name)
 557 | 	if err != nil {
 558 | 		println(err)
 559 | 		return
 560 | 	}
 561 | 
 562 | 	println("scanning list of files:", name)
 563 | 	scanner := bufio.NewScanner(file)
 564 | 	for scanner.Scan() {
 565 | 		s.File(scanner.Text())
 566 | 	}
 567 | 	file.Close()
 568 | }
 569 | 
 570 | func (s *Scan) File(name string) {
 571 | 	if !visible(name) {
 572 | 		return
 573 | 	}
 574 | 
 575 | 	info, err := os.Lstat(name)
 576 | 	if err != nil {
 577 | 		println(err)
 578 | 		return
 579 | 	}
 580 | 
 581 | 	// process plain files
 582 | 	if info.Mode().IsRegular() {
 583 | 		processRegularFile(name, s)
 584 | 	} else if info.Mode().IsDir() { // process directories
 585 | 		switch *flagRecursive {
 586 | 		case false:
 587 | 			// process files in this directory
 588 | 			println("processing Go files in directory", name)
 589 | 
 590 | 			bases, err := ioutil.ReadDir(name)
 591 | 			if err != nil {
 592 | 				println(err)
 593 | 				return
 594 | 			}
 595 | 			for _, base := range bases {
 596 | 				fullName := filepath.Join(name, base.Name())
 597 | 				if visible(fullName) && isGo(fullName) {
 598 | 					s.Scan(fullName, nil)
 599 | 				}
 600 | 
 601 | 			}
 602 | 		case true:
 603 | 			// process files in this directory hierarchy
 604 | 			println("processing Go files in and under directory", name)
 605 | 
 606 | 			walker := func(path string, info os.FileInfo, err error) error {
 607 | 				if err != nil {
 608 | 					println(err)
 609 | 					return err
 610 | 				}
 611 | 				name := info.Name()
 612 | 				if info.IsDir() {
 613 | 					if !visible(name) {
 614 | 						println("skipping hidden directory", name)
 615 | 						return filepath.SkipDir
 616 | 					}
 617 | 				} else {
 618 | 					if visible(path) && isGo(path) {
 619 | 						s.Scan(path, nil)
 620 | 					}
 621 | 				}
 622 | 				return nil
 623 | 			}
 624 | 
 625 | 			err = filepath.Walk(name, walker) // standard library walker
 626 | 			if err != nil {
 627 | 				println(err)
 628 | 			}
 629 | 		}
 630 | 	}
 631 | }
 632 | 
 633 | type Work struct {
 634 | 	name   string
 635 | 	source []byte
 636 | }
 637 | type Summary struct {
 638 | 	bytes   int
 639 | 	tokens  int
 640 | 	matches int
 641 | 	lines   int
 642 | 	files   int
 643 | }
 644 | 
 645 | var first = true
 646 | var workers int
 647 | var scattered int
 648 | var work []chan Work
 649 | var result []chan *Scan
 650 | var done chan Summary
 651 | 
 652 | func worker(index int) {
 653 | 	for w := range work[index] {
 654 | 		s := NewScan()
 655 | 		s.scan(w.name, w.source)
 656 | 		result[index] <- s
 657 | 	}
 658 | 	result[index] <- &Scan{complete: true} // signal that this worker is done
 659 | }
 660 | 
 661 | func (s *Scan) Scan(name string, source []byte) {
 662 | 	if first {
 663 | 		workers = *flagCPUs
 664 | 		work = make([]chan Work, workers)
 665 | 		result = make([]chan *Scan, workers)
 666 | 		for i := 0; i < workers; i++ {
 667 | 			work[i] = make(chan Work, 512)
 668 | 			result[i] = make(chan *Scan, 512)
 669 | 			go worker(i)
 670 | 		}
 671 | 		done = make(chan Summary)
 672 | 		go reporter() // wait for and gather results
 673 | 		first = false
 674 | 	}
 675 | 
 676 | 	switch {
 677 | 	case name != "": // another file to scan
 678 | 		work[scattered%workers] <- Work{name: name, source: source} // enqueue scan request
 679 | 		scattered++
 680 | 	case name == "": // end of scan
 681 | 		for i := range work {
 682 | 			close(work[i]) // signal completion to workers
 683 | 		}
 684 | 	}
 685 | }
 686 | 
 687 | func (s *Scan) scan(name string, source []byte) {
 688 | 	var err error
 689 | 	var newName string
 690 | 	newName, source, err = decompress(name, source)
 691 | 	if err != nil {
 692 | 		return
 693 | 	}
 694 | 	s.path = newName
 695 | 	s.bytes += len(source)
 696 | 
 697 | 	// handle grep mode
 698 | 	if *flagActLikeGrep || G {
 699 | 		scanner := bufio.NewScanner(bytes.NewReader(source))
 700 | 		line := uint32(1)
 701 | 		for scanner.Scan() {
 702 | 			s.lines++
 703 | 			if regex.MatchString(scanner.Text()) {
 704 | 				s.match = append(s.match, scanner.Text()+"\n")
 705 | 				s.matches++
 706 | 				if *flagLineNumber {
 707 | 					s.line = append(s.line, line)
 708 | 				}
 709 | 			}
 710 | 			line++
 711 | 		}
 712 | 		return
 713 | 	}
 714 | 
 715 | 	// Perform the scan by tabulating token types, subtypes, and values
 716 | 	line := -1
 717 | 	lexer := &lex.Lexer{Input: string(source), Mode: lex.ScanGo} // | lex.SkipSpace}
 718 | 
 719 | 	expectPackageName := false
 720 | 	skip := false
 721 | 	// theWholeLine := ""
 722 | 	for tok, text := lexer.Scan(); tok != lex.EOF; tok, text = lexer.Scan() {
 723 | 		s.tokens++
 724 | 
 725 | 		// if !skip {
 726 | 		// 	theWholeLine = lexer.GetLine()
 727 | 		// 	if !regex.MatchString(theWholeLine) {
 728 | 		// 		skip = true
 729 | 		// 	}
 730 | 		// }
 731 | 
 732 | 		// go mini-parser: expect package name after "package" keyword
 733 | 		if expectPackageName && tok == lex.Identifier {
 734 | 			if P && regex.MatchString(text) {
 735 | 				s.match = append(s.match, lexer.GetLine())
 736 | 				// s.match = append(s.match, theWholeLine)
 737 | 				s.matches++
 738 | 				if *flagLineNumber {
 739 | 					s.line = append(s.line, uint32(lexer.Line))
 740 | 				}
 741 | 			}
 742 | 			expectPackageName = false
 743 | 		} else if tok == lex.Keyword && text == "package" {
 744 | 			expectPackageName = true // set expectations
 745 | 		}
 746 | 
 747 | 		handle := func(flag bool) {
 748 | 			// if !skip {
 749 | 			if true || !skip {
 750 | 				if flag && lexer.Line > line {
 751 | 					if lexer.Type == lex.String && lexer.Subtype == lex.Raw {
 752 | 						// match each line of the raw string individually
 753 | 						scanner := bufio.NewScanner(strings.NewReader(text))
 754 | 						lineInString := 0
 755 | 						for scanner.Scan() {
 756 | 							if regex.MatchString(scanner.Text()) {
 757 | 								s.match = append(s.match, scanner.Text()+"\n")
 758 | 								s.matches++
 759 | 								line = lexer.Line + lineInString
 760 | 								lineInString++
 761 | 								if *flagLineNumber {
 762 | 									s.line = append(s.line, uint32(line+1))
 763 | 								}
 764 | 							}
 765 | 						}
 766 | 					} else if regex.MatchString(text) {
 767 | 						// match the token but print the line that contains it
 768 | 						s.match = append(s.match, lexer.GetLine())
 769 | 						// s.match = append(s.match, theWholeLine)
 770 | 						s.matches++
 771 | 						line = lexer.Line
 772 | 						if *flagLineNumber {
 773 | 							s.line = append(s.line, uint32(line+1))
 774 | 						}
 775 | 					}
 776 | 				}
 777 | 			}
 778 | 		}
 779 | 
 780 | 		switch tok {
 781 | 		case lex.Space:
 782 | 			if text == "\n" {
 783 | 				skip = false
 784 | 				s.lines++
 785 | 			}
 786 | 		case lex.Comment:
 787 | 			handle(C)
 788 | 		case lex.String:
 789 | 			handle(S)
 790 | 		case lex.Operator:
 791 | 			handle(O)
 792 | 		case lex.Rune:
 793 | 			handle(R)
 794 | 		case lex.Identifier:
 795 | 			handle(I)
 796 | 		case lex.Number:
 797 | 			handle(N) // literal match
 798 | 			// introducing... the value match
 799 | 			if V && lexer.Line > line {
 800 | 				n := text
 801 | 				var nS int
 802 | 				if n[0] == '-' { // never used, but someday...
 803 | 					nS = -1
 804 | 					n = n[1:]
 805 | 				}
 806 | 				switch vIsInt {
 807 | 				case true:
 808 | 					var nI uint64
 809 | 					nI, err = strconv.ParseUint(n, 0, 64)
 810 | 					if err == nil && nS == sign && nI == vInt {
 811 | 						s.match = append(s.match, lexer.GetLine()) // match the token but print the line
 812 | 						line = lexer.Line
 813 | 					}
 814 | 				case false:
 815 | 					var nF float64
 816 | 					nF, err = strconv.ParseFloat(n, 64)
 817 | 					if err == nil && nS == sign && nF == vFloat {
 818 | 						s.match = append(s.match, lexer.GetLine()) // match the token but print the line
 819 | 						line = lexer.Line
 820 | 					}
 821 | 				}
 822 | 			}
 823 | 		case lex.Keyword:
 824 | 			handle(K)
 825 | 		case lex.Type:
 826 | 			handle(T)
 827 | 		case lex.Other:
 828 | 			handle(D)
 829 | 		case lex.Character:
 830 | 			// seems maningless match unexpected illegal characters, maybe "."?
 831 | 		}
 832 | 	}
 833 | }
 834 | 
 835 | // Complete a scan
 836 | func (s *Scan) Complete() Summary {
 837 | 	if !s.complete {
 838 | 		s.Scan("", nil)  // Signal end of additional files...
 839 | 		s.total = <-done // ...and await completion.of scanning
 840 | 
 841 | 		for i := range result {
 842 | 			close(result[i])
 843 | 		}
 844 | 
 845 | 		s.complete = true // Record completion
 846 | 	}
 847 | 	return s.total
 848 | }
 849 | 
 850 | func reporter() {
 851 | 	var w io.Writer
 852 | 
 853 | 	switch lower := strings.ToLower(*flagOutput); {
 854 | 	case lower == "" || lower == "[stdout]":
 855 | 		file := os.Stdout
 856 | 		if *flagBufferWrites {
 857 | 			b := bufio.NewWriterSize(file, *flagBufferSize) // ensure buffered writes
 858 | 			defer b.Flush()
 859 | 			w = b
 860 | 		} else {
 861 | 			w = file
 862 | 		}
 863 | 	case lower == "[stderr]":
 864 | 		file := os.Stderr
 865 | 		if *flagBufferWrites {
 866 | 			b := bufio.NewWriterSize(file, *flagBufferSize) // ensure buffered writes
 867 | 			defer b.Flush()
 868 | 			w = b
 869 | 		} else {
 870 | 			w = file
 871 | 		}
 872 | 	case lower != "":
 873 | 		var err error
 874 | 		file, err := os.Create(*flagOutput)
 875 | 		if err != nil {
 876 | 			println(err)
 877 | 			return
 878 | 		}
 879 | 		defer file.Close()
 880 | 		w = file
 881 | 	}
 882 | 
 883 | 	// summary statistics
 884 | 	total := Summary{}
 885 | 
 886 | 	// report results per file
 887 | 	gathered := 0
 888 | 	completed := 0
 889 | 	for {
 890 | 		// get next result in search order
 891 | 		s := <-result[gathered%workers]
 892 | 		gathered++
 893 | 
 894 | 		// handle completion events
 895 | 		if s.complete {
 896 | 			completed++ // one more worker has finished
 897 | 			if completed == workers {
 898 | 				break // all workers have now finished
 899 | 			}
 900 | 			continue
 901 | 		}
 902 | 
 903 | 		// report this file's matching lines
 904 | 		for i, m := range s.match {
 905 | 			// first the filename, from "-h"
 906 | 			if *flagFileName {
 907 | 				fmt.Fprintf(w, "%s:", s.path)
 908 | 			}
 909 | 
 910 | 			// second the line number, from "-n"
 911 | 			if *flagLineNumber {
 912 | 				fmt.Fprintf(w, "%d:", s.line[i])
 913 | 			}
 914 | 
 915 | 			// finally, the match itself
 916 | 			start := 0
 917 | 			if *flagTrim {
 918 | 				for start < len(m) {
 919 | 					ch := m[start]
 920 | 					if ch == ' ' || ch == '\t' {
 921 | 						start++
 922 | 					} else {
 923 | 						break
 924 | 					}
 925 | 				}
 926 | 				if start < len(m) {
 927 | 					m = m[start:]
 928 | 				}
 929 | 			}
 930 | 			fmt.Fprintf(w, "%s", m)
 931 | 		}
 932 | 
 933 | 		total.bytes += s.bytes
 934 | 		total.tokens += s.tokens
 935 | 		total.matches += s.matches
 936 | 		total.lines += s.lines
 937 | 		total.files++
 938 | 	}
 939 | 
 940 | 	// signal completion to main program
 941 | 	done <- total // scanning complete, here are totals
 942 | }
 943 | 
 944 | func println(v ...interface{}) {
 945 | 	if *flagLog != "" {
 946 | 		log.Println(v...)
 947 | 	}
 948 | }
 949 | 
 950 | func printf(f string, v ...interface{}) {
 951 | 	if *flagLog != "" {
 952 | 		log.Printf(f, v...)
 953 | 	}
 954 | }
 955 | 
 956 | func plural(n int, fill string) string {
 957 | 	if n == 1 {
 958 | 		return fill
 959 | 	}
 960 | 	return "s"
 961 | }
 962 | 
 963 | type searchMode struct {
 964 | 	// c: search Comments ("//..." or "/*...*/")
 965 | 	C bool
 966 | 	// d: search Defined non-types (iota, nil, new, true,...)
 967 | 	D bool
 968 | 	// grep mode ?
 969 | 	G bool
 970 | 	// i: search Identifiers ([a-zA-Z][a-zA-Z0-9]*)
 971 | 	I bool
 972 | 	// k: search Keywords (if, for, func, go, ...)
 973 | 	K bool
 974 | 	// n: search Numbers as strings (255 as 255, 0.255, 1e255)
 975 | 	N bool
 976 | 	// o: search Operators (,+-*/[]{}()>>...)
 977 | 	O bool
 978 | 	// p: search Package names
 979 | 	P bool
 980 | 	// r: search Rune literals ('a', '\U00101234')
 981 | 	R bool
 982 | 	// s: search Strings ("quoted" or `raw`)
 983 | 	S bool
 984 | 	// t: search Types (bool, int, float64, map, ...)
 985 | 	T bool
 986 | 	// v: search numeric Values (255 as 0b1111_1111, 0377, 255, 0xff)
 987 | 	V      bool
 988 | 	vIsInt bool
 989 | 	vInt   uint64
 990 | 	vFloat float64
 991 | }
 992 | 
 993 | func parseFirstArg(input string) searchMode {
 994 | 	result := searchMode{}
 995 | 	// a: search all of the following
 996 | 	if strings.Contains(input, "a") {
 997 | 		result.C = true
 998 | 		result.D = true
 999 | 		result.I = true
1000 | 		result.K = true
1001 | 		result.N = true
1002 | 		result.O = true
1003 | 		result.P = true
1004 | 		result.R = true
1005 | 		result.S = true
1006 | 		result.T = true
1007 | 		result.V = true
1008 | 	}
1009 | 
1010 | 	// initialize token class inclusion flags
1011 | 	for _, class := range input {
1012 | 		switch class {
1013 | 		case 'a':
1014 | 			// already noted
1015 | 		case 'c':
1016 | 			result.C = true
1017 | 		case 'C':
1018 | 			result.C = false
1019 | 		case 'd':
1020 | 			result.D = true
1021 | 		case 'D':
1022 | 			result.D = false
1023 | 		case 'g':
1024 | 			result.G = true
1025 | 		case 'i':
1026 | 			result.I = true
1027 | 		case 'I':
1028 | 			result.I = false
1029 | 		case 'k':
1030 | 			result.K = true
1031 | 		case 'K':
1032 | 			result.K = false
1033 | 		case 'n':
1034 | 			result.N = true
1035 | 		case 'N':
1036 | 			result.N = false
1037 | 		case 'o':
1038 | 			result.O = true
1039 | 		case 'O':
1040 | 			result.O = false
1041 | 		case 'p':
1042 | 			result.P = true
1043 | 		case 'P':
1044 | 			result.P = false
1045 | 		case 'r':
1046 | 			result.R = true
1047 | 		case 'R':
1048 | 			result.R = false
1049 | 		case 's':
1050 | 			result.S = true
1051 | 		case 'S':
1052 | 			result.S = false
1053 | 		case 't':
1054 | 			result.T = true
1055 | 		case 'T':
1056 | 			result.T = false
1057 | 		case 'v':
1058 | 			result.V = true
1059 | 		case 'V':
1060 | 			result.V = false
1061 | 		default:
1062 | 			fmt.Fprintf(os.Stderr, "error: unrecognized token class '%c'\n", class)
1063 | 		}
1064 | 	}
1065 | 	return result
1066 | }
1067 | 
1068 | func setupModeGG(args []string) searchMode {
1069 | 	res := searchMode{}
1070 | 	if !*flagActLikeGrep {
1071 | 		if len(args) < 2 {
1072 | 			// not enough args received, complete args with empty strings
1073 | 			for i := len(args); i < 2; i++ {
1074 | 				args = append(args, "")
1075 | 			}
1076 | 		}
1077 | 		// handle "all" flag first before subsequent upper-case anti-flags
1078 | 		res = parseFirstArg(args[0])
1079 | 
1080 | 		// initialize numeric value matcher
1081 | 		if res.V && len(args[1]) > 0 {
1082 | 			n := args[1]
1083 | 			if n[0] == '-' {
1084 | 				sign = -1
1085 | 				n = n[1:]
1086 | 			}
1087 | 			var err error
1088 | 			res.vInt, err = strconv.ParseUint(n, 0, 64)
1089 | 			res.vIsInt = true
1090 | 			if err != nil {
1091 | 				res.vIsInt = false
1092 | 				// we did not consume all the input...maybe it is a float.
1093 | 				res.vFloat, err = strconv.ParseFloat(n, 64)
1094 | 				_ = res.vFloat + -5.25
1095 | 				if err != nil {
1096 | 					res.V = false
1097 | 					fmt.Fprintf(os.Stderr, "error: %v\n", err)
1098 | 				}
1099 | 			}
1100 | 		}
1101 | 	}
1102 | 	return res
1103 | }
1104 | 
1105 | func getRegexp(input string) (*regexp.Regexp, error) {
1106 | 	regexp, err := regexp.Compile(input)
1107 | 	if err != nil {
1108 | 		fmt.Fprintf(os.Stderr, "error: %v\n", err)
1109 | 	}
1110 | 	return regexp, err
1111 | }
1112 | 
1113 | // Scanner is an interace created to allow us to create some tests
1114 | type Scanner interface {
1115 | 	Scan(name string, source []byte)
1116 | }
1117 | 
1118 | func processRegularFile(name string, s Scanner) {
1119 | 	var err error
1120 | 	var data []byte
1121 | 	if isArchive(name) && isCompressed(name) {
1122 | 		name, data, err = decompress(name, nil)
1123 | 		if err != nil {
1124 | 			println(err)
1125 | 			return
1126 | 		}
1127 | 	}
1128 | 
1129 | 	var archive io.Reader
1130 | 	switch {
1131 | 	case len(data) == 0:
1132 | 		f, err := os.Open(name)
1133 | 		if err != nil {
1134 | 			println(err)
1135 | 			return
1136 | 		}
1137 | 		defer f.Close()
1138 | 		archive = f
1139 | 	default:
1140 | 		archive = bytes.NewReader(data)
1141 | 	}
1142 | 
1143 | 	ext := strings.ToLower(filepath.Ext(name))
1144 | 	switch {
1145 | 	case ext == ".cpio":
1146 | 		println("processing cpio archive", name)
1147 | 		r := cpio.NewReader(archive)
1148 | 		for {
1149 | 			hdr, err := r.Next()
1150 | 			if err == io.EOF {
1151 | 				break // End of archive
1152 | 			}
1153 | 			if err != nil {
1154 | 				println(err)
1155 | 				return
1156 | 			}
1157 | 			memberName := name + "::" + hdr.Name // "archive.cpio::file.go"
1158 | 			if !isGo(hdr.Name) {
1159 | 				println("skipping file with unrecognized extension:", memberName)
1160 | 				continue
1161 | 			}
1162 | 			bytes, err := ioutil.ReadAll(r)
1163 | 			if err != nil {
1164 | 				println(err)
1165 | 				return
1166 | 			}
1167 | 			s.Scan(memberName, bytes)
1168 | 		}
1169 | 	case ext == ".tar":
1170 | 		println("processing tar archive", name)
1171 | 		tr := tar.NewReader(archive)
1172 | 		for {
1173 | 			hdr, err := tr.Next()
1174 | 			if err == io.EOF {
1175 | 				break // End of archive
1176 | 			}
1177 | 			if err != nil {
1178 | 				println(err)
1179 | 				return
1180 | 			}
1181 | 			memberName := name + "::" + hdr.Name // "archive.tar::file.go"
1182 | 			if !isGo(hdr.Name) {
1183 | 				println("skipping file with unrecognized extension:", memberName)
1184 | 				continue
1185 | 			}
1186 | 			bytes, err := ioutil.ReadAll(tr)
1187 | 			if err != nil {
1188 | 				println(err)
1189 | 				return
1190 | 			}
1191 | 			s.Scan(memberName, bytes)
1192 | 		}
1193 | 	case ext == ".zip":
1194 | 		println("processing zip archive:", name)
1195 | 		r, err := zip.OpenReader(name)
1196 | 		if err != nil {
1197 | 			println(err)
1198 | 			return
1199 | 		}
1200 | 		defer r.Close()
1201 | 
1202 | 		for _, f := range r.File {
1203 | 			fullName := name + "::" + f.Name // "archive.zip::file.go"
1204 | 			if !isGo(f.Name) {
1205 | 				println("skipping file with unrecognized extension:", fullName)
1206 | 				continue
1207 | 			}
1208 | 			rc, err := f.Open()
1209 | 			if err != nil {
1210 | 				println(err)
1211 | 				return
1212 | 			}
1213 | 			bytes, err := ioutil.ReadAll(rc)
1214 | 			rc.Close()
1215 | 			if err != nil {
1216 | 				println(err)
1217 | 				return
1218 | 			}
1219 | 			s.Scan(fullName, bytes)
1220 | 		}
1221 | 	case isGo(name):
1222 | 		s.Scan(name, nil)
1223 | 	default:
1224 | 		println("skipping file with unrecognized extension:", name)
1225 | 	}
1226 | }
1227 | 
1228 | func getResourceUsage() (user, system float64, size uint64) {
1229 | 	var usage syscall.Rusage
1230 | 	if err := syscall.Getrusage(syscall.RUSAGE_SELF, &usage); err != nil {
1231 | 		println("Error: unable to gather resource usage data:", err)
1232 | 	}
1233 | 	user = float64(usage.Utime.Sec) + float64(usage.Utime.Usec)/1e6   // work by this process
1234 | 	system = float64(usage.Stime.Sec) + float64(usage.Stime.Usec)/1e6 // work by OS on behalf of this process (reading files)
1235 | 	size = uint64(uint32(usage.Maxrss))
1236 | 	return
1237 | }
1238 |                                                                                                                                               scan_test.go                                                                                        0000644 0001750 0001750 00000035327 13510153456 013247  0                                                                                                    ustar   nicolas                         nicolas                                                                                                                                                                                                                package main
1239 | 
1240 | import (
1241 | 	"reflect"
1242 | 	"regexp"
1243 | 	"testing"
1244 | )
1245 | 
1246 | func Test_visibleWithFlagSet(t *testing.T) {
1247 | 	*flagVisible = true
1248 | 	type args struct {
1249 | 		name string
1250 | 	}
1251 | 	tests := []struct {
1252 | 		name string
1253 | 		args func(t *testing.T) args
1254 | 
1255 | 		want1 bool
1256 | 	}{
1257 | 		{
1258 | 			name: "hidden file",
1259 | 			args: func(*testing.T) args {
1260 | 				return args{name: ".test"}
1261 | 			},
1262 | 			want1: false,
1263 | 		},
1264 | 
1265 | 		{
1266 | 			name: "normal file in hidden folder should not be visible",
1267 | 			args: func(*testing.T) args {
1268 | 				return args{name: "/home/user/.config/test.go"}
1269 | 			},
1270 | 			want1: false,
1271 | 		},
1272 | 
1273 | 		{
1274 | 			name: "normal file",
1275 | 			args: func(*testing.T) args {
1276 | 				return args{name: "test"}
1277 | 			},
1278 | 			want1: true,
1279 | 		},
1280 | 
1281 | 		{
1282 | 			name: "go source file",
1283 | 			args: func(*testing.T) args {
1284 | 				return args{name: "test.go"}
1285 | 			},
1286 | 			want1: true,
1287 | 		},
1288 | 	}
1289 | 
1290 | 	for _, tt := range tests {
1291 | 		t.Run(tt.name, func(t *testing.T) {
1292 | 			tArgs := tt.args(t)
1293 | 
1294 | 			got1 := visible(tArgs.name)
1295 | 
1296 | 			if !reflect.DeepEqual(got1, tt.want1) {
1297 | 				t.Errorf("visible got1 = %v, want1: %v", got1, tt.want1)
1298 | 			}
1299 | 		})
1300 | 	}
1301 | }
1302 | 
1303 | func Test_visibleWithoutFlagSet(t *testing.T) {
1304 | 	// flagVisible = false means that we will show results for hidden files
1305 | 	*flagVisible = false
1306 | 	type args struct {
1307 | 		name string
1308 | 	}
1309 | 	tests := []struct {
1310 | 		name string
1311 | 		args func(t *testing.T) args
1312 | 
1313 | 		want1 bool
1314 | 	}{
1315 | 		{
1316 | 			name: "hidden file",
1317 | 			args: func(*testing.T) args {
1318 | 				return args{name: ".test"}
1319 | 			},
1320 | 			want1: true,
1321 | 		},
1322 | 
1323 | 		{
1324 | 			name: "normal file in hidden folder should be visible",
1325 | 			args: func(*testing.T) args {
1326 | 				return args{name: "/home/user/.config/test.go"}
1327 | 			},
1328 | 			want1: true,
1329 | 		},
1330 | 
1331 | 		{
1332 | 			name: "normal file",
1333 | 			args: func(*testing.T) args {
1334 | 				return args{name: "test"}
1335 | 			},
1336 | 			want1: true,
1337 | 		},
1338 | 
1339 | 		{
1340 | 			name: "go source file",
1341 | 			args: func(*testing.T) args {
1342 | 				return args{name: "test.go"}
1343 | 			},
1344 | 			want1: true,
1345 | 		},
1346 | 	}
1347 | 
1348 | 	for _, tt := range tests {
1349 | 		t.Run(tt.name, func(t *testing.T) {
1350 | 			tArgs := tt.args(t)
1351 | 
1352 | 			got1 := visible(tArgs.name)
1353 | 
1354 | 			if !reflect.DeepEqual(got1, tt.want1) {
1355 | 				t.Errorf("visible got1 = %v, want1: %v", got1, tt.want1)
1356 | 			}
1357 | 		})
1358 | 	}
1359 | }
1360 | 
1361 | func Test_isCompressed(t *testing.T) {
1362 | 	type args struct {
1363 | 		name string
1364 | 	}
1365 | 	tests := []struct {
1366 | 		name string
1367 | 		args func(t *testing.T) args
1368 | 
1369 | 		want1 bool
1370 | 	}{
1371 | 		{
1372 | 			name: ".bz2 is a valid compression",
1373 | 			args: func(*testing.T) args {
1374 | 				return args{name: "test.bz2"}
1375 | 			},
1376 | 			want1: true,
1377 | 		},
1378 | 
1379 | 		{
1380 | 			name: ".gz is a valid compression",
1381 | 			args: func(*testing.T) args {
1382 | 				return args{name: "test.gz"}
1383 | 			},
1384 | 			want1: true,
1385 | 		},
1386 | 
1387 | 		{
1388 | 			name: ".zst is a valid compression",
1389 | 			args: func(*testing.T) args {
1390 | 				return args{name: "test.zst"}
1391 | 			},
1392 | 			want1: true,
1393 | 		},
1394 | 
1395 | 		{
1396 | 			name: ".go isn't a valid compression",
1397 | 			args: func(*testing.T) args {
1398 | 				return args{name: "test.go"}
1399 | 			},
1400 | 			want1: false,
1401 | 		},
1402 | 	}
1403 | 
1404 | 	for _, tt := range tests {
1405 | 		t.Run(tt.name, func(t *testing.T) {
1406 | 			tArgs := tt.args(t)
1407 | 
1408 | 			got1 := isCompressed(tArgs.name)
1409 | 
1410 | 			if !reflect.DeepEqual(got1, tt.want1) {
1411 | 				t.Errorf("isCompressed got1 = %v, want1: %v", got1, tt.want1)
1412 | 			}
1413 | 		})
1414 | 	}
1415 | }
1416 | 
1417 | func Test_isGoWithFlagSet(t *testing.T) {
1418 | 	*flagGo = true
1419 | 	type args struct {
1420 | 		name string
1421 | 	}
1422 | 	tests := []struct {
1423 | 		name string
1424 | 		args func(t *testing.T) args
1425 | 
1426 | 		want1 bool
1427 | 	}{
1428 | 		{
1429 | 			name: "go files should pass",
1430 | 			args: func(*testing.T) args {
1431 | 				return args{name: "test.go"}
1432 | 			},
1433 | 			want1: true,
1434 | 		},
1435 | 
1436 | 		{
1437 | 			name: "zip files should not pass",
1438 | 			args: func(*testing.T) args {
1439 | 				return args{name: "test.go.zip"}
1440 | 			},
1441 | 			// is this assertion right ?
1442 | 			want1: false,
1443 | 		},
1444 | 
1445 | 		{
1446 | 			name: "gz files should pass",
1447 | 			args: func(*testing.T) args {
1448 | 				return args{name: "test.go.gz"}
1449 | 			},
1450 | 			want1: true,
1451 | 		},
1452 | 
1453 | 		{
1454 | 			name: "bz2 files should pass",
1455 | 			args: func(*testing.T) args {
1456 | 				return args{name: "test.go.bz2"}
1457 | 			},
1458 | 			want1: true,
1459 | 		},
1460 | 
1461 | 		{
1462 | 			name: "zst files should pass",
1463 | 			args: func(*testing.T) args {
1464 | 				return args{name: "test.go.zst"}
1465 | 			},
1466 | 			want1: true,
1467 | 		},
1468 | 	}
1469 | 
1470 | 	for _, tt := range tests {
1471 | 		t.Run(tt.name, func(t *testing.T) {
1472 | 			tArgs := tt.args(t)
1473 | 
1474 | 			got1 := isGo(tArgs.name)
1475 | 
1476 | 			if !reflect.DeepEqual(got1, tt.want1) {
1477 | 				t.Errorf("isGo got1 = %v, want1: %v", got1, tt.want1)
1478 | 			}
1479 | 		})
1480 | 	}
1481 | }
1482 | 
1483 | func Test_isGoWithoutFlagSet(t *testing.T) {
1484 | 	// with this flag set to false our search isn't limited to .go files
1485 | 	*flagGo = false
1486 | 	type args struct {
1487 | 		name string
1488 | 	}
1489 | 	tests := []struct {
1490 | 		name string
1491 | 		args func(t *testing.T) args
1492 | 
1493 | 		want1 bool
1494 | 	}{
1495 | 		{
1496 | 			name: "go files should pass",
1497 | 			args: func(*testing.T) args {
1498 | 				return args{name: "test.go"}
1499 | 			},
1500 | 			want1: true,
1501 | 		},
1502 | 
1503 | 		{
1504 | 			name: "zipped go files should pass",
1505 | 			args: func(*testing.T) args {
1506 | 				return args{name: "test.go.zip"}
1507 | 			},
1508 | 			want1: true,
1509 | 		},
1510 | 
1511 | 		{
1512 | 			name: "anything should pass when flagGo = false",
1513 | 			args: func(*testing.T) args {
1514 | 				return args{name: "test.zip.exe"}
1515 | 			},
1516 | 			want1: true,
1517 | 		},
1518 | 	}
1519 | 
1520 | 	for _, tt := range tests {
1521 | 		t.Run(tt.name, func(t *testing.T) {
1522 | 			tArgs := tt.args(t)
1523 | 
1524 | 			got1 := isGo(tArgs.name)
1525 | 
1526 | 			if !reflect.DeepEqual(got1, tt.want1) {
1527 | 				t.Errorf("isGo got1 = %v, want1: %v", got1, tt.want1)
1528 | 			}
1529 | 		})
1530 | 	}
1531 | }
1532 | 
1533 | func Test_isArchive(t *testing.T) {
1534 | 	type args struct {
1535 | 		name string
1536 | 	}
1537 | 	tests := []struct {
1538 | 		name string
1539 | 		args func(t *testing.T) args
1540 | 
1541 | 		want1 bool
1542 | 	}{
1543 | 		{
1544 | 			name: "tar is a valid archive format",
1545 | 			args: func(*testing.T) args {
1546 | 				return args{name: "test.tar"}
1547 | 			},
1548 | 			want1: true,
1549 | 		},
1550 | 
1551 | 		{
1552 | 			name: "zip is a valid archive format",
1553 | 			args: func(*testing.T) args {
1554 | 				return args{name: "test.zip"}
1555 | 			},
1556 | 			want1: true,
1557 | 		},
1558 | 
1559 | 		{
1560 | 			name: "cpio is a valid archive format",
1561 | 			args: func(*testing.T) args {
1562 | 				return args{name: "test.cpio"}
1563 | 			},
1564 | 			want1: true,
1565 | 		},
1566 | 
1567 | 		{
1568 | 			name: "cpio.bz2 is a valid archive format",
1569 | 			args: func(*testing.T) args {
1570 | 				return args{name: "test.cpio.bz2"}
1571 | 			},
1572 | 			want1: true,
1573 | 		},
1574 | 
1575 | 		{
1576 | 			name: "cpio.exe isn't a valid archive format",
1577 | 			args: func(*testing.T) args {
1578 | 				return args{name: "test.cpio.exe"}
1579 | 			},
1580 | 			want1: false,
1581 | 		},
1582 | 	}
1583 | 
1584 | 	for _, tt := range tests {
1585 | 		t.Run(tt.name, func(t *testing.T) {
1586 | 			tArgs := tt.args(t)
1587 | 
1588 | 			got1 := isArchive(tArgs.name)
1589 | 
1590 | 			if !reflect.DeepEqual(got1, tt.want1) {
1591 | 				t.Errorf("isArchive got1 = %v, want1: %v", got1, tt.want1)
1592 | 			}
1593 | 		})
1594 | 	}
1595 | }
1596 | 
1597 | func Test_parseFirstArg(t *testing.T) {
1598 | 	type args struct {
1599 | 		input string
1600 | 	}
1601 | 	tests := []struct {
1602 | 		name string
1603 | 		args func(t *testing.T) args
1604 | 
1605 | 		want1 searchMode
1606 | 	}{
1607 | 		{
1608 | 			name: "'a' should include all",
1609 | 			args: func(*testing.T) args {
1610 | 				return args{input: "a"}
1611 | 			},
1612 | 			want1: searchMode{
1613 | 				C: true,
1614 | 				D: true,
1615 | 				I: true,
1616 | 				K: true,
1617 | 				N: true,
1618 | 				O: true,
1619 | 				P: true,
1620 | 				R: true,
1621 | 				S: true,
1622 | 				T: true,
1623 | 				V: true,
1624 | 			},
1625 | 		},
1626 | 
1627 | 		{
1628 | 			name: "'c' should include only comments",
1629 | 			args: func(*testing.T) args {
1630 | 				return args{input: "c"}
1631 | 			},
1632 | 			want1: searchMode{
1633 | 				C: true,
1634 | 			},
1635 | 		},
1636 | 
1637 | 		{
1638 | 			name: "'aC' should only exclude comments",
1639 | 			args: func(*testing.T) args {
1640 | 				return args{input: "aC"}
1641 | 			},
1642 | 			want1: searchMode{
1643 | 				C: false,
1644 | 				D: true,
1645 | 				I: true,
1646 | 				K: true,
1647 | 				N: true,
1648 | 				O: true,
1649 | 				P: true,
1650 | 				R: true,
1651 | 				S: true,
1652 | 				T: true,
1653 | 				V: true,
1654 | 			},
1655 | 		},
1656 | 
1657 | 		{
1658 | 			name: "'d' should include only defined non-types",
1659 | 			args: func(*testing.T) args {
1660 | 				return args{input: "d"}
1661 | 			},
1662 | 			want1: searchMode{
1663 | 				D: true,
1664 | 			},
1665 | 		},
1666 | 
1667 | 		{
1668 | 			name: "'aD' should only exclude defined non-types",
1669 | 			args: func(*testing.T) args {
1670 | 				return args{input: "aD"}
1671 | 			},
1672 | 			want1: searchMode{
1673 | 				C: true,
1674 | 				D: false,
1675 | 				I: true,
1676 | 				K: true,
1677 | 				N: true,
1678 | 				O: true,
1679 | 				P: true,
1680 | 				R: true,
1681 | 				S: true,
1682 | 				T: true,
1683 | 				V: true,
1684 | 			},
1685 | 		},
1686 | 
1687 | 		{
1688 | 			name: "'i' should include only identifiers",
1689 | 			args: func(*testing.T) args {
1690 | 				return args{input: "i"}
1691 | 			},
1692 | 			want1: searchMode{
1693 | 				I: true,
1694 | 			},
1695 | 		},
1696 | 
1697 | 		{
1698 | 			name: "'aI' should only exclude identifiers",
1699 | 			args: func(*testing.T) args {
1700 | 				return args{input: "aI"}
1701 | 			},
1702 | 			want1: searchMode{
1703 | 				C: true,
1704 | 				D: true,
1705 | 				I: false,
1706 | 				K: true,
1707 | 				N: true,
1708 | 				O: true,
1709 | 				P: true,
1710 | 				R: true,
1711 | 				S: true,
1712 | 				T: true,
1713 | 				V: true,
1714 | 			},
1715 | 		},
1716 | 
1717 | 		{
1718 | 			name: "'k' should include only keywords",
1719 | 			args: func(*testing.T) args {
1720 | 				return args{input: "k"}
1721 | 			},
1722 | 			want1: searchMode{
1723 | 				K: true,
1724 | 			},
1725 | 		},
1726 | 
1727 | 		{
1728 | 			name: "'aK' should only exclude keywords",
1729 | 			args: func(*testing.T) args {
1730 | 				return args{input: "aK"}
1731 | 			},
1732 | 			want1: searchMode{
1733 | 				C: true,
1734 | 				D: true,
1735 | 				I: true,
1736 | 				K: false,
1737 | 				N: true,
1738 | 				O: true,
1739 | 				P: true,
1740 | 				R: true,
1741 | 				S: true,
1742 | 				T: true,
1743 | 				V: true,
1744 | 			},
1745 | 		},
1746 | 
1747 | 		{
1748 | 			name: "'n' should include only numbers",
1749 | 			args: func(*testing.T) args {
1750 | 				return args{input: "n"}
1751 | 			},
1752 | 			want1: searchMode{
1753 | 				N: true,
1754 | 			},
1755 | 		},
1756 | 
1757 | 		{
1758 | 			name: "'aN' should only exclude numbers",
1759 | 			args: func(*testing.T) args {
1760 | 				return args{input: "aN"}
1761 | 			},
1762 | 			want1: searchMode{
1763 | 				C: true,
1764 | 				D: true,
1765 | 				I: true,
1766 | 				K: true,
1767 | 				N: false,
1768 | 				O: true,
1769 | 				P: true,
1770 | 				R: true,
1771 | 				S: true,
1772 | 				T: true,
1773 | 				V: true,
1774 | 			},
1775 | 		},
1776 | 
1777 | 		{
1778 | 			name: "'o' should include only operators",
1779 | 			args: func(*testing.T) args {
1780 | 				return args{input: "o"}
1781 | 			},
1782 | 			want1: searchMode{
1783 | 				O: true,
1784 | 			},
1785 | 		},
1786 | 
1787 | 		{
1788 | 			name: "'aO' should only exclude operators",
1789 | 			args: func(*testing.T) args {
1790 | 				return args{input: "aO"}
1791 | 			},
1792 | 			want1: searchMode{
1793 | 				C: true,
1794 | 				D: true,
1795 | 				I: true,
1796 | 				K: true,
1797 | 				N: true,
1798 | 				O: false,
1799 | 				P: true,
1800 | 				R: true,
1801 | 				S: true,
1802 | 				T: true,
1803 | 				V: true,
1804 | 			},
1805 | 		},
1806 | 
1807 | 		{
1808 | 			name: "'p' should include only package names",
1809 | 			args: func(*testing.T) args {
1810 | 				return args{input: "p"}
1811 | 			},
1812 | 			want1: searchMode{
1813 | 				P: true,
1814 | 			},
1815 | 		},
1816 | 
1817 | 		{
1818 | 			name: "'aP' should only exclude package names",
1819 | 			args: func(*testing.T) args {
1820 | 				return args{input: "aP"}
1821 | 			},
1822 | 			want1: searchMode{
1823 | 				C: true,
1824 | 				D: true,
1825 | 				I: true,
1826 | 				K: true,
1827 | 				N: true,
1828 | 				O: true,
1829 | 				P: false,
1830 | 				R: true,
1831 | 				S: true,
1832 | 				T: true,
1833 | 				V: true,
1834 | 			},
1835 | 		},
1836 | 
1837 | 		{
1838 | 			name: "'r' should include only rune literals",
1839 | 			args: func(*testing.T) args {
1840 | 				return args{input: "r"}
1841 | 			},
1842 | 			want1: searchMode{
1843 | 				R: true,
1844 | 			},
1845 | 		},
1846 | 
1847 | 		{
1848 | 			name: "'aR' should only exclude rune literals",
1849 | 			args: func(*testing.T) args {
1850 | 				return args{input: "aR"}
1851 | 			},
1852 | 			want1: searchMode{
1853 | 				C: true,
1854 | 				D: true,
1855 | 				I: true,
1856 | 				K: true,
1857 | 				N: true,
1858 | 				O: true,
1859 | 				P: true,
1860 | 				R: false,
1861 | 				S: true,
1862 | 				T: true,
1863 | 				V: true,
1864 | 			},
1865 | 		},
1866 | 
1867 | 		{
1868 | 			name: "'s' should include only strings",
1869 | 			args: func(*testing.T) args {
1870 | 				return args{input: "s"}
1871 | 			},
1872 | 			want1: searchMode{
1873 | 				S: true,
1874 | 			},
1875 | 		},
1876 | 
1877 | 		{
1878 | 			name: "'aS' should only exclude strings",
1879 | 			args: func(*testing.T) args {
1880 | 				return args{input: "aS"}
1881 | 			},
1882 | 			want1: searchMode{
1883 | 				C: true,
1884 | 				D: true,
1885 | 				I: true,
1886 | 				K: true,
1887 | 				N: true,
1888 | 				O: true,
1889 | 				P: true,
1890 | 				R: true,
1891 | 				S: false,
1892 | 				T: true,
1893 | 				V: true,
1894 | 			},
1895 | 		},
1896 | 
1897 | 		{
1898 | 			name: "'t' should include only types",
1899 | 			args: func(*testing.T) args {
1900 | 				return args{input: "t"}
1901 | 			},
1902 | 			want1: searchMode{
1903 | 				T: true,
1904 | 			},
1905 | 		},
1906 | 
1907 | 		{
1908 | 			name: "'aT' should only exclude types",
1909 | 			args: func(*testing.T) args {
1910 | 				return args{input: "aT"}
1911 | 			},
1912 | 			want1: searchMode{
1913 | 				C: true,
1914 | 				D: true,
1915 | 				I: true,
1916 | 				K: true,
1917 | 				N: true,
1918 | 				O: true,
1919 | 				P: true,
1920 | 				R: true,
1921 | 				S: true,
1922 | 				T: false,
1923 | 				V: true,
1924 | 			},
1925 | 		},
1926 | 
1927 | 		{
1928 | 			name: "'v' should include only numeric values",
1929 | 			args: func(*testing.T) args {
1930 | 				return args{input: "v"}
1931 | 			},
1932 | 			want1: searchMode{
1933 | 				V: true,
1934 | 			},
1935 | 		},
1936 | 
1937 | 		{
1938 | 			name: "'aV' should only exclude numeric values",
1939 | 			args: func(*testing.T) args {
1940 | 				return args{input: "aV"}
1941 | 			},
1942 | 			want1: searchMode{
1943 | 				C: true,
1944 | 				D: true,
1945 | 				I: true,
1946 | 				K: true,
1947 | 				N: true,
1948 | 				O: true,
1949 | 				P: true,
1950 | 				R: true,
1951 | 				S: true,
1952 | 				T: true,
1953 | 				V: false,
1954 | 			},
1955 | 		},
1956 | 
1957 | 		{
1958 | 			name: "'g' should be grep mode",
1959 | 			args: func(*testing.T) args {
1960 | 				return args{input: "g"}
1961 | 			},
1962 | 			want1: searchMode{
1963 | 				G: true,
1964 | 			},
1965 | 		},
1966 | 	}
1967 | 
1968 | 	for _, tt := range tests {
1969 | 		t.Run(tt.name, func(t *testing.T) {
1970 | 			tArgs := tt.args(t)
1971 | 
1972 | 			got1 := parseFirstArg(tArgs.input)
1973 | 
1974 | 			if !reflect.DeepEqual(got1, tt.want1) {
1975 | 				t.Errorf("parseFirstArg got1 = %v, want1: %v", got1, tt.want1)
1976 | 			}
1977 | 		})
1978 | 	}
1979 | }
1980 | 
1981 | func Test_setupModeGG(t *testing.T) {
1982 | 	type args struct {
1983 | 		args []string
1984 | 	}
1985 | 	tests := []struct {
1986 | 		name string
1987 | 		args func(t *testing.T) args
1988 | 
1989 | 		want1 searchMode
1990 | 	}{
1991 | 		{
1992 | 			name: "empty args should not set anything",
1993 | 			args: func(*testing.T) args {
1994 | 				return args{args: []string{}}
1995 | 			},
1996 | 			want1: searchMode{},
1997 | 		},
1998 | 
1999 | 		{
2000 | 			name: "empty args should not set anything",
2001 | 			args: func(*testing.T) args {
2002 | 				return args{args: []string{""}}
2003 | 			},
2004 | 			want1: searchMode{},
2005 | 		},
2006 | 
2007 | 		{
2008 | 			name: "value matcher should work for ints",
2009 | 			args: func(*testing.T) args {
2010 | 				return args{args: []string{"v", "11"}}
2011 | 			},
2012 | 			want1: searchMode{V: true, vInt: 11, vIsInt: true},
2013 | 		},
2014 | 
2015 | 		{
2016 | 			name: "value matcher should work for negative ints",
2017 | 			args: func(*testing.T) args {
2018 | 				return args{args: []string{"v", "-42"}}
2019 | 			},
2020 | 			want1: searchMode{V: true, vInt: 42, vIsInt: true},
2021 | 		},
2022 | 
2023 | 		{
2024 | 			name: "value matcher should work for floats",
2025 | 			args: func(*testing.T) args {
2026 | 				return args{args: []string{"v", "8.93"}}
2027 | 			},
2028 | 			want1: searchMode{V: true, vFloat: 8.93, vIsInt: false},
2029 | 		},
2030 | 
2031 | 		{
2032 | 			name: "value matcher should work for negative floats",
2033 | 			args: func(*testing.T) args {
2034 | 				return args{args: []string{"v", "-8.93"}}
2035 | 			},
2036 | 			want1: searchMode{V: true, vFloat: 8.93, vIsInt: false},
2037 | 		},
2038 | 
2039 | 		{
2040 | 			name: "value matcher should not work for random strings",
2041 | 			args: func(*testing.T) args {
2042 | 				return args{args: []string{"v", "asdf"}}
2043 | 			},
2044 | 			want1: searchMode{},
2045 | 		},
2046 | 	}
2047 | 
2048 | 	for _, tt := range tests {
2049 | 		t.Run(tt.name, func(t *testing.T) {
2050 | 			tArgs := tt.args(t)
2051 | 
2052 | 			got1 := setupModeGG(tArgs.args)
2053 | 
2054 | 			if !reflect.DeepEqual(got1, tt.want1) {
2055 | 				t.Errorf("setupModeGG got1 = %v, want1: %v", got1, tt.want1)
2056 | 			}
2057 | 		})
2058 | 	}
2059 | }
2060 | 
2061 | func Test_getRegexp(t *testing.T) {
2062 | 	re, _ := regexp.Compile("[0-9]test?")
2063 | 	reErr, _ := regexp.Compile("*")
2064 | 	type args struct {
2065 | 		input string
2066 | 	}
2067 | 	tests := []struct {
2068 | 		name string
2069 | 		args func(t *testing.T) args
2070 | 
2071 | 		want1      *regexp.Regexp
2072 | 		wantErr    bool
2073 | 		inspectErr func(err error, t *testing.T)
2074 | 	}{
2075 | 		{
2076 | 			name: "valid regexp should work",
2077 | 			args: func(*testing.T) args {
2078 | 				return args{input: "[0-9]test?"}
2079 | 			},
2080 | 			want1:   re,
2081 | 			wantErr: false,
2082 | 			inspectErr: func(error, *testing.T) {
2083 | 			},
2084 | 		},
2085 | 
2086 | 		{
2087 | 			name: "invalid regexp should not work",
2088 | 			args: func(*testing.T) args {
2089 | 				return args{input: "*"}
2090 | 			},
2091 | 			want1:   reErr,
2092 | 			wantErr: true,
2093 | 			inspectErr: func(error, *testing.T) {
2094 | 			},
2095 | 		},
2096 | 	}
2097 | 
2098 | 	for _, tt := range tests {
2099 | 		t.Run(tt.name, func(t *testing.T) {
2100 | 			tArgs := tt.args(t)
2101 | 
2102 | 			got1, err := getRegexp(tArgs.input)
2103 | 
2104 | 			if !reflect.DeepEqual(got1, tt.want1) {
2105 | 				t.Errorf("getRegexp got1 = %v, want1: %v", got1, tt.want1)
2106 | 			}
2107 | 
2108 | 			if (err != nil) != tt.wantErr {
2109 | 				t.Fatalf("getRegexp error = %v, wantErr: %t", err, tt.wantErr)
2110 | 			}
2111 | 
2112 | 			if tt.inspectErr != nil {
2113 | 				tt.inspectErr(err, t)
2114 | 			}
2115 | 		})
2116 | 	}
2117 | }
2118 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          


--------------------------------------------------------------------------------