├── LICENSE ├── README.md ├── exec_test.go ├── go.mod ├── strit.go └── strit_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017,2018,2019 Maxim Konakov 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 3. Neither the name of the copyright holder nor the names of its contributors 13 | may be used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 25 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # strit 2 | 3 | [![GoDoc](https://godoc.org/github.com/maxim2266/strit?status.svg)](https://pkg.go.dev/github.com/maxim2266/strit) 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/maxim2266/strit)](https://goreportcard.com/report/github.com/maxim2266/strit) 5 | [![License: BSD 3 Clause](https://img.shields.io/badge/License-BSD_3--Clause-yellow.svg)](https://opensource.org/licenses/BSD-3-Clause) 6 | 7 | Package `strit` (STRing ITerator) assists in development of string processing pipelines by providing a simple 8 | iteration model that allows for easy composition of processing stages. 9 | 10 | ### Motivation 11 | Suppose we want to develop a function that reads a file line by line, removes leading and trailing 12 | whitespace from each line, selects only non-empty lines that also do not start with the `#` symbol, and 13 | stores those lines in a slice of strings. Using the Go standard library one possible implementation 14 | of the function may look like this: 15 | ```Go 16 | func ReadConfig(fileName string) ([]string, error) { 17 | file, err := os.Open(fileName) 18 | 19 | if err != nil { 20 | return nil, err 21 | } 22 | 23 | defer file.Close() 24 | 25 | var res []string 26 | src := bufio.NewScanner(file) 27 | 28 | for src.Scan() { 29 | line := bytes.TrimSpace(src.Bytes()) 30 | 31 | if len(line) > 0 && line[0] != '#' { 32 | res = append(res, string(line)) 33 | } 34 | } 35 | 36 | if err = src.Err(); err != nil { 37 | return nil, err 38 | } 39 | 40 | return res, nil 41 | } 42 | ``` 43 | Using `strit` package the implementation can be simplified down to: 44 | ```Go 45 | func ReadConfig(fileName string) ([]string, error) { 46 | return strit.FromFile(fileName). 47 | Map(bytes.TrimSpace). 48 | Filter(strit.Not(strit.Empty).AndNot(strit.StartsWith("#"))). 49 | Strings() 50 | ``` 51 | ### Features 52 | * A number of iterator constructors for reading text from a variety of sources: 53 | * `io.Reader`: 54 | [`FromReader`](https://godoc.org/github.com/maxim2266/strit#FromReader) 55 | [`FromReaderSF`](https://godoc.org/github.com/maxim2266/strit#FromReaderSF) 56 | * `io.ReadCloser`: 57 | [`FromReadCloser`](https://godoc.org/github.com/maxim2266/strit#FromReadCloser) 58 | [`FromReadCloserSF`](https://godoc.org/github.com/maxim2266/strit#FromReadCloserSF) 59 | * `[]byte`: 60 | [`FromBytes`](https://godoc.org/github.com/maxim2266/strit#FromBytes) 61 | [`FromBytesSF`](https://godoc.org/github.com/maxim2266/strit#FromBytesSF) 62 | * `string`: 63 | [`FromString`](https://godoc.org/github.com/maxim2266/strit#FromString) 64 | [`FromStringSF`](https://godoc.org/github.com/maxim2266/strit#FromStringSF) 65 | * `[]string`: 66 | [`FromStrings`](https://godoc.org/github.com/maxim2266/strit#FromStrings) 67 | * Disk file: 68 | [`FromFile`](https://godoc.org/github.com/maxim2266/strit#FromFile) 69 | [`FromFileSF`](https://godoc.org/github.com/maxim2266/strit#FromFileSF) 70 | * Directory listing: 71 | [`FromDir`](https://godoc.org/github.com/maxim2266/strit#FromDir) 72 | * Recursive directory listing: 73 | [`FromDirWalk`](https://godoc.org/github.com/maxim2266/strit#FromDirWalk) 74 | * External command output: 75 | [`FromCommand`](https://godoc.org/github.com/maxim2266/strit#FromCommand) 76 | [`FromCommandSF`](https://godoc.org/github.com/maxim2266/strit#FromCommandSF) 77 | * Mapping and filtering primitives: 78 | [`Filter`](https://godoc.org/github.com/maxim2266/strit#Iter.Filter) 79 | [`GenMap`](https://godoc.org/github.com/maxim2266/strit#Iter.GenMap) 80 | [`Map`](https://godoc.org/github.com/maxim2266/strit#Iter.Map) 81 | * Sequence limiting functions: 82 | [`Skip`](https://godoc.org/github.com/maxim2266/strit#Iter.Skip) 83 | [`SkipWhile`](https://godoc.org/github.com/maxim2266/strit#Iter.SkipWhile) 84 | [`Take`](https://godoc.org/github.com/maxim2266/strit#Iter.Take) 85 | [`TakeWhile`](https://godoc.org/github.com/maxim2266/strit#Iter.TakeWhile) 86 | * Search function: 87 | [`FirstNonEmpty`](https://godoc.org/github.com/maxim2266/strit#Iter.FirstNonEmpty) 88 | * Piping iterator output through an external command: 89 | [`Pipe`](https://godoc.org/github.com/maxim2266/strit#Iter.Pipe) 90 | [`PipeSF`](https://godoc.org/github.com/maxim2266/strit#Iter.PipeSF) 91 | * Iterator chaining (sequential combination): 92 | [`Chain`](https://godoc.org/github.com/maxim2266/strit#Chain) 93 | * Iterator merging (parallel combination): 94 | [`Merge`](https://godoc.org/github.com/maxim2266/strit#Merge) 95 | * Output collectors that invoke the given iterator and write the result to various destinations: 96 | * `string`: 97 | [`String`](https://godoc.org/github.com/maxim2266/strit#Iter.String) 98 | [`Join`](https://godoc.org/github.com/maxim2266/strit#Iter.Join) 99 | * `[]string`: 100 | [`Strings`](https://godoc.org/github.com/maxim2266/strit#Iter.Strings) 101 | * `[]byte`: 102 | [`Bytes`](https://godoc.org/github.com/maxim2266/strit#Iter.Bytes) 103 | [`JoinBytes`](https://godoc.org/github.com/maxim2266/strit#Iter.JoinBytes) 104 | * `io.Writer`: 105 | [`WriteTo`](https://godoc.org/github.com/maxim2266/strit#Iter.WriteTo) 106 | [`WriteSepTo`](https://godoc.org/github.com/maxim2266/strit#Iter.WriteSepTo) 107 | * Disk file: 108 | [`WriteToFile`](https://godoc.org/github.com/maxim2266/strit#Iter.WriteToFile) 109 | [`WriteSepToFile`](https://godoc.org/github.com/maxim2266/strit#Iter.WriteSepToFile) 110 | * Predicates and predicate combinators for use with `Filter`: 111 | [`Empty`](https://godoc.org/github.com/maxim2266/strit#Empty) 112 | [`StartsWith`](https://godoc.org/github.com/maxim2266/strit#StartsWith) 113 | [`EndsWith`](https://godoc.org/github.com/maxim2266/strit#EndsWith) 114 | [`Not`](https://godoc.org/github.com/maxim2266/strit#Not) 115 | [`And`](https://godoc.org/github.com/maxim2266/strit#Pred.And) 116 | [`AndNot`](https://godoc.org/github.com/maxim2266/strit#Pred.AndNot) 117 | [`Or`](https://godoc.org/github.com/maxim2266/strit#Pred.Or) 118 | [`OrNot`](https://godoc.org/github.com/maxim2266/strit#Pred.OrNot) 119 | * Basic parsing supported via [`Parse`](https://godoc.org/github.com/maxim2266/strit#Iter.Parse) function. 120 | 121 | ### More examples: 122 | * Naïve `grep`: 123 | ```Go 124 | func main() { 125 | _, err := strit.FromReader(os.Stdin). 126 | Filter(regexp.MustCompile(os.Args[1]).Match). 127 | WriteSepTo(os.Stdout, "\n") 128 | 129 | if err != nil { 130 | os.Stderr.WriteString(err.Error() + "\n") 131 | os.Exit(1) 132 | } 133 | } 134 | ``` 135 | * Recursively find all the filesystem entries matching the given regular expression: 136 | ```Go 137 | func selectEntries(root string, re *regexp.Regexp) ([]string, error) { 138 | return FromDirWalk(root, nil).Filter(re.Match).Strings() 139 | } 140 | ``` 141 | * Build a list of `.flac` files in the given directory, annotating each name with its corresponding 142 | track number from FLAC metadata: 143 | ```Go 144 | func namesWithTrackNumbers(dir string) ([]string, error) { 145 | return strit.FromDir(dir, func(info os.FileInfo) bool { return info.Mode().IsRegular() }). 146 | Filter(strit.EndsWith(".flac")). 147 | GenMap(prependTrackNo). 148 | Strings() 149 | } 150 | 151 | func prependTrackNo(file []byte) ([]byte, error) { 152 | name := string(file) 153 | 154 | no, err := strit.FromCommand(exec.Command("metaflac", "--list", "--block-type=VORBIS_COMMENT", name)). 155 | FirstNonEmpty(func(s []byte) []byte { 156 | if m := match(s); len(m) == 2 { 157 | return m[1] 158 | } 159 | 160 | return nil 161 | }). 162 | String() 163 | 164 | if err != nil { 165 | return nil, err 166 | } 167 | 168 | if len(no) == 0 { 169 | return []byte("???: " + filepath.Base(name)), nil 170 | } 171 | 172 | return []byte(no + ": " + filepath.Base(name)), nil 173 | } 174 | 175 | var match = regexp.MustCompile(`tracknumber=([[:digit:]]+)$`).FindSubmatch 176 | ``` 177 | 178 | ### Project status 179 | The project is in a beta state. Tested on Linux Mint 19.1, with Go version 1.12. 180 | Should also work on other platforms supported by Go runtime, but currently this is not very well tested. 181 | 182 | ##### License: BSD 183 | -------------------------------------------------------------------------------- /exec_test.go: -------------------------------------------------------------------------------- 1 | //+build linux darwin 2 | 3 | /* 4 | Copyright (c) 2017,2018,2019 Maxim Konakov 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | 1. Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 3. Neither the name of the copyright holder nor the names of its contributors 16 | may be used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 23 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 26 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 28 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | package strit 32 | 33 | import ( 34 | "bytes" 35 | "errors" 36 | "fmt" 37 | "io" 38 | "os/exec" 39 | "sort" 40 | "testing" 41 | ) 42 | 43 | func TestLimitedWriter(t *testing.T) { 44 | type testCase struct{ a, b []byte } 45 | 46 | cases := []testCase{ 47 | {[]byte("a"), []byte("a")}, 48 | {[]byte("ab"), []byte("ab")}, 49 | {[]byte("abcdef"), []byte("abcde")}, 50 | {[]byte("abcdefghijklmopqrtuvwxyz"), []byte("abcde")}, 51 | } 52 | 53 | for i, c := range cases { 54 | var s limitedWriter 55 | 56 | s.limit = 5 57 | n, err := s.Write(c.a) 58 | 59 | switch { 60 | case n != len(c.a): 61 | t.Errorf("(%d) Unexpected write length: %d insted of %d", i, n, len(c.a)) 62 | return 63 | case err != nil: 64 | t.Errorf("(%d) Unexpected error: %s", i, err) 65 | return 66 | case bytes.Compare(s.buff, c.b) != 0: 67 | t.Errorf("(%d) Unexpected result: %q instead of %q", i, string(s.buff), string(c.b)) 68 | return 69 | } 70 | } 71 | } 72 | 73 | func TestLimitedWriter2(t *testing.T) { 74 | type testCase struct{ a, b, exp []byte } 75 | 76 | cases := []testCase{ 77 | {[]byte("a"), []byte("b"), []byte("ab")}, 78 | {[]byte("abcdef"), nil, []byte("abcde")}, 79 | {[]byte("abcd"), []byte("e"), []byte("abcde")}, 80 | {[]byte("abcd"), []byte("efg"), []byte("abcde")}, 81 | {[]byte("abcd"), []byte("efghijklmopqrtuvwxyz"), []byte("abcde")}, 82 | {[]byte("abcde"), []byte("fghijklmopqrtuvwxyz"), []byte("abcde")}, 83 | {[]byte("abcdef"), []byte("ghijklmopqrtuvwxyz"), []byte("abcde")}, 84 | } 85 | 86 | for i, c := range cases { 87 | var s limitedWriter 88 | 89 | s.limit = 5 90 | 91 | s.Write(c.a) 92 | s.Write(c.b) 93 | 94 | switch { 95 | case len(s.buff) != len(c.exp): 96 | t.Errorf("(%d) Unexpected write length: %d insted of %d", i, len(s.buff), len(c.exp)) 97 | return 98 | case bytes.Compare(s.buff, c.exp) != 0: 99 | t.Errorf("(%d) Unexpected result: %q instead of %q", i, string(s.buff), string(c.exp)) 100 | return 101 | } 102 | } 103 | } 104 | 105 | func TestEcho(t *testing.T) { 106 | type testCase struct { 107 | in string 108 | exp []string 109 | } 110 | 111 | cases := []testCase{ 112 | {`echo "ZZZ"`, []string{"ZZZ"}}, 113 | {`echo "ZZZ" ; echo "zzz"`, []string{"ZZZ", "zzz"}}, 114 | } 115 | 116 | for i, c := range cases { 117 | var buff []string 118 | 119 | ret, err := invokeCmd(exec.Command("sh", "-c", c.in), func(s []byte) error { 120 | buff = append(buff, string(s)) 121 | return nil 122 | }) 123 | 124 | switch { 125 | case err != nil: 126 | t.Errorf("(%d) Unexpected error: [%d] %s", i, ret, err) 127 | return 128 | case ret != 0: 129 | t.Errorf("(%d) Unexpected exit code %d without an error", i, ret) 130 | return 131 | case len(buff) != len(c.exp): 132 | t.Errorf("(%d) Unexpected number of lines: %d instead of %d", i, len(buff), len(c.exp)) 133 | return 134 | } 135 | 136 | for i := 0; i < len(buff); i++ { 137 | if buff[i] != c.exp[i] { 138 | t.Errorf("(%d) Unexpected line: %q instead of %q", i, buff[i], c.exp[i]) 139 | return 140 | } 141 | } 142 | } 143 | } 144 | 145 | func TestErrors(t *testing.T) { 146 | type testCase struct { 147 | cmd *exec.Cmd 148 | check checkFunc 149 | } 150 | 151 | cases := []testCase{ 152 | { 153 | exec.Command("find", ".", "-type", "f", "-name", "*.go"), 154 | checkOutput("./exec_test.go", "./strit.go", "./strit_test.go"), 155 | }, 156 | { 157 | exec.Command("findZZZ", ".", "-type", "f", "-name", "*.go"), 158 | hasError(t), 159 | }, 160 | { 161 | exec.Command("find", "./this-does-not-exist", "-type", "f", "-name", "*.go"), 162 | hasError(t), 163 | }, 164 | } 165 | 166 | for i, c := range cases { 167 | var buff []string 168 | 169 | ret, err := invokeCmd(c.cmd, func(s []byte) error { 170 | buff = append(buff, string(s)) 171 | return nil 172 | }) 173 | 174 | if msg := c.check(ret, err, buff); len(msg) > 0 { 175 | t.Errorf("(%d) %s", i, msg) 176 | return 177 | } 178 | } 179 | } 180 | 181 | func TestBreak(t *testing.T) { 182 | var count int 183 | var res []string 184 | 185 | ret, err := invokeCmd(exec.Command("sh", "-c", `echo "AAA" ; echo "BBB" ; echo "CCC"`), func(s []byte) error { 186 | if count++; count > 2 { 187 | return io.EOF 188 | } 189 | 190 | res = append(res, string(s)) 191 | return nil 192 | }) 193 | 194 | switch { 195 | case err != nil: 196 | t.Errorf("Unexpected error: [%d] %s", ret, err) 197 | return 198 | case ret != 0: 199 | t.Errorf("Unexpected exit code %d without an error", ret) 200 | return 201 | case len(res) != 2: 202 | t.Errorf("Unexpected number of lines: %d instead of 3", len(res)) 203 | return 204 | } 205 | 206 | for i, s := range []string{"AAA", "BBB"} { 207 | if s != res[i] { 208 | t.Errorf("(%d) unexpected string: %q instead of %q", i, res[i], s) 209 | return 210 | } 211 | } 212 | } 213 | 214 | func TestPipeTermination(t *testing.T) { 215 | const msg = "Just an error" 216 | 217 | // termination at the end of the pipe 218 | err := FromString("aaa\nbbb\nccc").Pipe(exec.Command("cat"))(func(s []byte) error { 219 | if bytes.Compare(s, []byte("aaa")) != 0 { 220 | return fmt.Errorf("Invalid string in callback: %q", string(s)) 221 | } 222 | 223 | return errors.New(msg) 224 | }) 225 | 226 | if err == nil { 227 | t.Error("Missing error") 228 | return 229 | } 230 | 231 | if err.Error() != msg { 232 | t.Errorf("Unexpected error: %q instead of %q", err.Error(), msg) 233 | return 234 | } 235 | 236 | // termination before pipe 237 | iter := Iter(func(fn Func) error { 238 | if err := fn([]byte("aaa")); err != nil { 239 | return err 240 | } 241 | 242 | return errors.New(msg) 243 | }) 244 | 245 | err = iter.Pipe(exec.Command("cat"))(func(s []byte) error { 246 | if bytes.Compare(s, []byte("aaa")) != 0 { 247 | return fmt.Errorf("Invalid string in callback: %q", string(s)) 248 | } 249 | 250 | return nil 251 | }) 252 | 253 | if err == nil { 254 | t.Error("Missing error") 255 | return 256 | } 257 | 258 | if err.Error() != msg { 259 | t.Errorf("Unexpected error: %q instead of %q", err.Error(), msg) 260 | return 261 | } 262 | } 263 | 264 | // helpers 265 | type checkFunc = func(int, error, []string) string 266 | 267 | func checkOutput(exp ...string) checkFunc { 268 | return func(ret int, err error, out []string) string { 269 | switch { 270 | case err != nil: 271 | return fmt.Sprintf("Unexpected error message: %q", err) 272 | case ret != 0: 273 | return fmt.Sprintf("Unexpected exit code %d without error", ret) 274 | case len(out) != len(exp): 275 | return fmt.Sprintf("Unexpected number of lines: %d instead of %d", len(out), len(exp)) 276 | } 277 | 278 | sort.Strings(out) 279 | 280 | for i := 0; i < len(out); i++ { 281 | if out[i] != exp[i] { 282 | return fmt.Sprintf("Unexpected line: %q instead of %q", out[i], exp[i]) 283 | } 284 | } 285 | 286 | return "" 287 | } 288 | } 289 | 290 | func hasError(t *testing.T) checkFunc { 291 | return func(ret int, err error, out []string) string { 292 | switch { 293 | case ret == 0: 294 | return "Unexpected exit code: 0" 295 | case err == nil: 296 | return "Missing error message" 297 | } 298 | 299 | t.Logf("exit code: %d, error message: %q", ret, err) 300 | return "" 301 | } 302 | } 303 | 304 | func invokeCmd(cmd *exec.Cmd, fn Func) (ret int, err error) { 305 | if err = FromCommand(cmd)(fn); err != nil { 306 | if e, ok := err.(*ExitError); ok { 307 | ret = e.ExitCode 308 | } else { 309 | ret = -1 310 | } 311 | } 312 | 313 | return 314 | } 315 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/maxim2266/strit 2 | 3 | go 1.13 4 | -------------------------------------------------------------------------------- /strit.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017,2018,2019 Maxim Konakov 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software without 15 | specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | /* 30 | Package strit introduces a string iterator of "push" type, as well as a number of 31 | iterator constructors and wrappers. 32 | 33 | Typically, iteration over some source of data involves creating an object of some 34 | reader type, then repeatedly calling a method on the object to get the data elements, 35 | and finally calling some method to release the resources allocated inside the reader object. 36 | And in the languages without exceptions (like Go) there must be an error check on each step. 37 | In other words, there is an interface and some protocol all the clients must follow, 38 | which usually results in certain amount of boilerplate code to write. 39 | 40 | The above example describes the so called "pull" model of iteration where the client has to call a method 41 | on the reader to "pull" the next data element out of the reader object. Alternative to that 42 | is the "push" model where client gives the iterator a callback function to be invoked once 43 | per each element of input data. 44 | 45 | Another aspect of the iteration model is about dealing with errors. In the absence of exceptions, 46 | like in Go language, the "pull" model implies that the result of each step of the 47 | iteration is either a data item, or an error. The same is true for pure "push" model where 48 | each invocation of the callback has to receive two parameters: a data element and an error. Or there 49 | maybe two callbacks, one for data and another for errors, but this is conceptually the same. 50 | 51 | This library is an experiment in combining both "push" and "pull" models: it utilises "push" 52 | model for data elements, and "pull" model for error propagation. The data elements here are strings only, 53 | as currently the Go language does not offer any generic programming mechanism. The iterator type itself is 54 | a function that invokes the supplied callback once per each input string, and it returns either an error or nil. 55 | This arrangement reduces the amount of boilerplate code, at least in the most 56 | common scenarios, plus it allows for easy iterator combination, especially useful where dynamic creation 57 | of string processing pipelines is required. 58 | 59 | The string iterator type used throughout this library is simply a function of the type func(Func) error, 60 | where Func is the type of the callback, func([]byte) error. Strings are represented as byte slices 61 | for performance reasons, to allow for in-place modifications where possible, like in bytes.TrimSpace() function. 62 | Iteration is simply an invocation of the iterator function with a callback to receive strings. The 63 | iterator function returns whatever error may have occurred during the iteration. The callback 64 | function itself may also return an error which will stop the iteration and will be returned from 65 | the iterator function. The special error value of io.EOF is treated as a request to stop the iteration and 66 | will not be propagated up to the caller. 67 | 68 | The library offers a number of iterator constructors making iterators 69 | from different sources like strings, byte slices, byte readers, files and shell command outputs. 70 | There is also a number of mapping and filtering functions, as well as writers for strings, 71 | byte slices and files. All the above is composable using fluent API. For example, reading a file 72 | line by line, removing leading and trailing whitespace from each line, selecting only non-empty 73 | lines that also do not start from the character '#', and storing the result in a slice of strings, 74 | may look like this: 75 | 76 | lines, err := strit.FromFile("some-file"). 77 | Map(bytes.TrimSpace). 78 | Filter(strit.Not(strit.Empty).OrNot(strit.StartsWith("#"))). 79 | Strings() 80 | 81 | Here strit.FromFile is an iterator constructor, Map and Filter operations are iterator wrappers each 82 | creating a new iterator from the existing one, and Strings() is the iterator invocation. Also notice the 83 | use of the provided predicate combinators in the parameter to the Filter() function. 84 | 85 | Given that each iterator is itself a function, other iterators can be derived from the existing ones. 86 | For example, the following function takes an iterator and creates a new iterator that prepends the line 87 | number to each line from the original iterator: 88 | 89 | func Numbered(iter Iter) Iter { 90 | return func(fn Func) error { // this is the new iterator function 91 | i := 0 92 | 93 | // this is the invocation of the original iterator function 94 | return iter(func(line []byte) error { 95 | i++ 96 | line = []byte(fmt.Sprintf("%d %s", i, string(line))) 97 | return fn(line) // invocation of the callback 98 | }) 99 | } 100 | } 101 | 102 | Iterators in this library are lazy, meaning that the actual iteration happens only when the iterator function is 103 | called, not when it is created. Some of the provided iterators are reusable, meaning that such iterator may be 104 | called more than once, but in general this property cannot be guaranteed for those sources that cannot be 105 | read multiple times (like io.Reader), so in general it is advised that in the absence of any specific 106 | knowledge every iterator should be treated as not reusable. All the iterator implementations have O(n) 107 | complexity in time and O(1) in space. 108 | 109 | One disadvantage of the suggested model is the complex implementation of parallel composition of iterators, 110 | see the comment to the Merge() function for more details. Also, there is a certain cost of abstraction incurred, 111 | in other words, a straight-forward implementation of the above example could be somewhat more efficient 112 | in terms of CPU cycles and memory consumption, though the difference would mostly come from the extra 113 | function calls and as such would probably be not so substantial, especially for i/o-bound sources. 114 | The actual benchmaking of some simple processing scenarios shows that the performance difference is truly minor. 115 | */ 116 | package strit 117 | 118 | import ( 119 | "bufio" 120 | "bytes" 121 | "errors" 122 | "fmt" 123 | "io" 124 | "io/ioutil" 125 | "os" 126 | "os/exec" 127 | "path/filepath" 128 | "strings" 129 | ) 130 | 131 | // Func is the type of callback function used by Iter. 132 | type Func func([]byte) error 133 | 134 | // Iter is the iterator type. 135 | type Iter func(Func) error 136 | 137 | // Filter makes a new iterator that produces only the strings for which the supplied predicate returns 'true'. 138 | func (iter Iter) Filter(pred Pred) Iter { 139 | return func(fn Func) error { 140 | return iter(func(line []byte) (err error) { 141 | if pred(line) { 142 | err = fn(line) 143 | } 144 | 145 | return 146 | }) 147 | } 148 | } 149 | 150 | // Map makes a new iterator that applies the specified function to every input string. 151 | func (iter Iter) Map(mapper func([]byte) []byte) Iter { 152 | return func(fn Func) error { 153 | return iter(func(line []byte) error { 154 | return fn(mapper(line)) 155 | }) 156 | } 157 | } 158 | 159 | // ErrSkip is a special error type to skip an item in GenMap() function. 160 | var ErrSkip = errors.New("Item skipped") 161 | 162 | // GenMap makes a new iterator that applies the supplied function to every input string. 163 | // The function may return a non-nil error, in which case the iteration will stop and 164 | // the error will be propagated back to the source. A special error value of ErrSkip instructs 165 | // the iterator to simply skip the current string. 166 | func (iter Iter) GenMap(mapper func([]byte) ([]byte, error)) Iter { 167 | return func(fn Func) error { 168 | return iter(func(line []byte) error { 169 | switch s, err := mapper(line); err { 170 | case nil: 171 | return fn(s) 172 | case ErrSkip: 173 | return nil 174 | default: 175 | return err 176 | } 177 | }) 178 | } 179 | } 180 | 181 | // FirstNonEmpty makes a new iterator that skips everything until the supplied mapper function returns 182 | // a non-empty slice, which gets passed down the pipeline, and then the iteration stops. This is 183 | // essentially a search for the first non-empty string returned from the mapper. 184 | func (iter Iter) FirstNonEmpty(mapper func([]byte) []byte) Iter { 185 | return func(fn Func) error { 186 | return iter(func(line []byte) (err error) { 187 | if line = mapper(line); len(line) > 0 { 188 | if err = fn(line); err == nil { 189 | err = io.EOF 190 | } 191 | } 192 | 193 | return 194 | }) 195 | } 196 | } 197 | 198 | // TakeWhile makes a new iterator that produces its output while the specified predicate returns 'true'. 199 | func (iter Iter) TakeWhile(pred Pred) Iter { 200 | return func(fn Func) error { 201 | return iter(func(line []byte) error { 202 | if pred(line) { 203 | return fn(line) 204 | } 205 | 206 | return io.EOF 207 | }) 208 | } 209 | } 210 | 211 | // Take makes a new iterator that produces no more than the specified number of output strings. 212 | func (iter Iter) Take(numLines uint64) Iter { 213 | return func(fn Func) error { 214 | count := numLines 215 | 216 | return iter(func(line []byte) error { 217 | if count == 0 { 218 | return io.EOF 219 | } 220 | 221 | count-- 222 | return fn(line) 223 | }) 224 | } 225 | } 226 | 227 | // SkipWhile makes a new iterator that skips input strings until the supplied predicate returns 'false' 228 | // for the first time and passes the remaining strings to the callback function. 229 | func (iter Iter) SkipWhile(pred Pred) Iter { 230 | return func(fn Func) error { 231 | skip := true 232 | 233 | return iter(func(line []byte) (err error) { 234 | if skip { 235 | skip = pred(line) 236 | } 237 | 238 | if !skip { 239 | err = fn(line) 240 | } 241 | 242 | return 243 | }) 244 | } 245 | } 246 | 247 | // Skip makes a new iterator that skips the specified number of input strings and passes 248 | // all the remaining strings to the callback function. 249 | func (iter Iter) Skip(numLines uint64) Iter { 250 | return func(fn Func) error { 251 | count := numLines 252 | 253 | return iter(func(line []byte) error { 254 | if count == 0 { 255 | return fn(line) 256 | } 257 | 258 | count-- 259 | return nil 260 | }) 261 | } 262 | } 263 | 264 | // Chain implements sequential composition of its input iterators. The returned iterator 265 | // invokes all the input iterators one after another, from left to the right. 266 | func Chain(its ...Iter) Iter { 267 | switch len(its) { 268 | case 0: 269 | panic("No iterators in Chain() function") 270 | case 1: 271 | return its[0] 272 | } 273 | 274 | return func(fn Func) error { 275 | for _, iter := range its { 276 | if err := iter(fn); err != nil { 277 | return err 278 | } 279 | } 280 | 281 | return nil 282 | } 283 | } 284 | 285 | // Merge implements parallel composition of its input iterators. The returned iterator 286 | // on each step invokes all its inputs in parallel, taking one string from each input iterator, 287 | // and then joins those strings around the specified separator to produce one output string. 288 | // The iteration stops when any input iterator gets exhausted. 289 | // It should be noted that the iterator type used in this library provides for easy 290 | // sequential composition of iterators, but the parallel composition is 291 | // substantially more complex. In the current implementation each input iterator 292 | // except the first one runs in a dedicated goroutine which pipes its output back through a channel. 293 | // That complexity is usually not a problem if at least one of the input iterators is i/o-bound, 294 | // but it is likely to become a performance bottleneck if all the iterators read from the memory. 295 | // Please use this function responsibly. 296 | func Merge(sep string, its ...Iter) Iter { 297 | // check the number of input iterators 298 | switch len(its) { 299 | case 0: 300 | panic("No iterators in Merge() function") 301 | case 1: 302 | return its[0] 303 | } 304 | 305 | // data structure for channels 306 | type cdata struct { 307 | str string 308 | err error 309 | } 310 | 311 | // the iterator 312 | return func(fn Func) error { 313 | // cancellation flag 314 | done := make(chan int) 315 | 316 | defer close(done) 317 | 318 | // number of goroutines 319 | n := len(its) - 1 320 | 321 | // channels for iterators 322 | chs := make([]chan cdata, n) 323 | 324 | // create channels and start goroutines 325 | for i := 0; i < n; i++ { 326 | chs[i] = make(chan cdata, 10) // not sure about the size... 327 | 328 | // start goroutine 329 | go func(iter Iter, ch chan<- cdata) { 330 | defer close(ch) 331 | 332 | // iterate and feed the channel 333 | err := iter(func(line []byte) error { 334 | select { 335 | case ch <- cdata{str: string(line)}: 336 | return nil 337 | case <-done: 338 | return errors.New("Interrupted") 339 | } 340 | }) 341 | 342 | // post error, if any 343 | if err != nil { 344 | select { 345 | case ch <- cdata{err: err}: 346 | case <-done: 347 | } 348 | } 349 | }(its[i+1], chs[i]) 350 | } 351 | 352 | // buffer for strings 353 | buff := make([]string, len(its)) 354 | 355 | // invoke the first iterator 356 | return its[0](func(line []byte) error { 357 | // take the string from this iterator 358 | buff[0] = string(line) 359 | 360 | // take one item from every channel 361 | for i, ch := range chs { 362 | item, ok := <-ch 363 | 364 | if !ok { 365 | // iterator has finished 366 | return io.EOF 367 | } 368 | 369 | if item.err != nil { 370 | // iterator has failed 371 | return item.err 372 | } 373 | 374 | // store the string 375 | buff[i+1] = item.str 376 | } 377 | 378 | // merge strings and invoke the callback 379 | return fn([]byte(strings.Join(buff, sep))) 380 | }) 381 | } 382 | } 383 | 384 | // String invokes the iterator and concatenates its output into one string. 385 | func (iter Iter) String() (res string, err error) { 386 | var buff bytes.Buffer 387 | 388 | if _, err = iter.WriteSepTo(&buff, ""); err == nil { 389 | res = buff.String() 390 | } 391 | 392 | return 393 | } 394 | 395 | // Bytes invokes the iterator and concatenates its output into one byte slice. 396 | func (iter Iter) Bytes() (res []byte, err error) { 397 | var buff bytes.Buffer 398 | 399 | if _, err = iter.WriteSepTo(&buff, ""); err == nil { 400 | res = buff.Bytes() 401 | } 402 | 403 | return 404 | } 405 | 406 | // Strings invokes the iterator and collects its output into a slice of strings. 407 | func (iter Iter) Strings() (res []string, err error) { 408 | if err = iter(func(line []byte) error { 409 | res = append(res, string(line)) 410 | return nil 411 | }); err != nil { 412 | res = nil 413 | } 414 | 415 | return 416 | } 417 | 418 | // Join invokes the iterator and collects its output into one string, delimited 419 | // by the specified separator. 420 | func (iter Iter) Join(sep string) (res string, err error) { 421 | var buff bytes.Buffer 422 | 423 | if _, err = iter.WriteSepTo(&buff, sep); err == nil { 424 | res = buff.String() 425 | } 426 | 427 | return 428 | } 429 | 430 | // JoinBytes invokes the iterator and collects its output into one byte slice, delimited 431 | // by the specified separator. 432 | func (iter Iter) JoinBytes(sep string) (res []byte, err error) { 433 | var buff bytes.Buffer 434 | 435 | if _, err = iter.WriteSepTo(&buff, sep); err == nil { 436 | res = buff.Bytes() 437 | } 438 | 439 | return 440 | } 441 | 442 | // WriteSepTo invokes the iterator and writes its output strings, delimited by the specified separator, 443 | // into the specified Writer. The method returns the total number of bytes written, or an error. 444 | func (iter Iter) WriteSepTo(dest io.Writer, sep string) (n int64, err error) { 445 | if len(sep) == 0 { 446 | // optimised version for empty separator 447 | err = iter(func(line []byte) error { 448 | num, e := dest.Write(line) 449 | n += int64(num) 450 | return e 451 | }) 452 | 453 | return 454 | } 455 | 456 | // full version 457 | delim := []byte(sep) 458 | 459 | var lines int64 460 | 461 | err = iter(func(line []byte) (e error) { 462 | var num int 463 | 464 | // lines is only used for detecting the first write 465 | if lines++; lines != 1 { 466 | num, e = dest.Write(delim) 467 | n += int64(num) 468 | 469 | if e != nil { 470 | return 471 | } 472 | } 473 | 474 | num, e = dest.Write(line) 475 | n += int64(num) 476 | return 477 | }) 478 | 479 | return 480 | } 481 | 482 | // WriteTo invokes the iterator and writes its output strings, delimited by new line character, 483 | // into the specified Writer. The method returns the total number of bytes written, or an error. 484 | func (iter Iter) WriteTo(dest io.Writer) (int64, error) { 485 | return iter.WriteSepTo(dest, "\n") 486 | } 487 | 488 | // WriteSepToFile creates a file with the specified name, and then invokes the iterator and writes 489 | // its output strings, delimited by the specified separator, into the file. The method returns 490 | // the total number of bytes written, or an error, in which case the resulting file gets deleted. 491 | func (iter Iter) WriteSepToFile(name, sep string) (n int64, err error) { 492 | var file *os.File 493 | 494 | if file, err = os.Create(name); err != nil { 495 | return 496 | } 497 | 498 | defer func() { 499 | // close and delete the file if panicking 500 | if p := recover(); p != nil { 501 | file.Close() 502 | os.Remove(name) 503 | panic(p) 504 | } 505 | 506 | // close and set error 507 | if e := file.Close(); e != nil && err == nil { 508 | err = e 509 | } 510 | 511 | // delete the file on error 512 | if err != nil { 513 | os.Remove(name) 514 | } 515 | }() 516 | 517 | w := bufio.NewWriter(file) 518 | 519 | if n, err = iter.WriteSepTo(w, sep); err == nil { 520 | err = w.Flush() 521 | } 522 | 523 | return 524 | } 525 | 526 | // WriteToFile creates a file with the specified name, and then invokes the iterator and writes 527 | // its output strings, delimited by new line character, into the file. The method returns 528 | // the total number of bytes written, or an error, in which case the resulting file gets deleted. 529 | func (iter Iter) WriteToFile(name string) (int64, error) { 530 | return iter.WriteSepToFile(name, "\n") 531 | } 532 | 533 | // ParserFunc represents class of functions implementing parser state machines. 534 | type ParserFunc func([]byte) (ParserFunc, error) 535 | 536 | // Parser is the interface of a line parser. 537 | type Parser interface { 538 | // Enter is the entry point of the parser and gets called on the first iteration. 539 | // The returned function usually refers to another method of the same class, thus implementing 540 | // the parser state machine. Any non-nil error stops the iteration. 541 | Enter([]byte) (ParserFunc, error) 542 | 543 | // Done is called after the iteration is complete or an error has occurred. The error 544 | // it returns (if any) becomes the return value of the iterator. 545 | Done(error) error 546 | } 547 | 548 | // Parse feeds the supplied parser from the given iterator. 549 | func (iter Iter) Parse(p Parser) error { 550 | fn := p.Enter 551 | 552 | return p.Done(iter(func(s []byte) (err error) { 553 | fn, err = fn(s) 554 | return 555 | })) 556 | } 557 | 558 | // FromReaderSF constructs a new iterator that reads its input byte stream from the specified Reader and 559 | // breaks the stream into tokens using the supplied split function. If the function is set to nil then 560 | // the iterator breaks the input into lines with line terminators stripped. Internally the iterator 561 | // is implemented using bufio.Scanner, please refer to its documentation for more details on split functions. 562 | func FromReaderSF(sf bufio.SplitFunc, input io.Reader) Iter { 563 | return func(fn Func) (err error) { 564 | if err = iterate(input, sf, fn); err == io.EOF { 565 | err = nil // io.EOF indicates early stop 566 | } 567 | 568 | return 569 | } 570 | } 571 | 572 | // FromReader constructs a new iterator that reads its input byte stream from the specified Reader and 573 | // breaks the input into lines with line termination stripped. 574 | func FromReader(input io.Reader) Iter { 575 | return FromReaderSF(nil, input) 576 | } 577 | 578 | // FromReadCloserSF is a wrapper around FromReaderSF with exactly the same functionality that also 579 | // closes the input Reader at the end of the iteration. 580 | func FromReadCloserSF(sf bufio.SplitFunc, input io.ReadCloser) Iter { 581 | return func(fn Func) error { 582 | defer input.Close() 583 | return FromReaderSF(sf, input)(fn) 584 | } 585 | } 586 | 587 | // FromReadCloser constructs a new iterator that reads its input byte stream from the specified Reader and 588 | // breaks the input into lines with line termination stripped. The input Reader gets closed at the end 589 | // of the iteration. 590 | func FromReadCloser(input io.ReadCloser) Iter { 591 | return FromReadCloserSF(nil, input) 592 | } 593 | 594 | // FromFileSF constructs a new iterator that reads its input byte stream from the specified file and 595 | // breaks the stream into tokens using the supplied split function. If the function is set to nil then 596 | // the iterator breaks the input into lines with line termination stripped. Internally the iterator 597 | // is implemented using bufio.Scanner, please refer to its documentation for more details on split functions. 598 | func FromFileSF(sf bufio.SplitFunc, name string) Iter { 599 | return func(fn Func) error { 600 | file, err := os.Open(name) 601 | 602 | if err != nil { 603 | return err 604 | } 605 | 606 | return FromReadCloserSF(sf, file)(fn) 607 | } 608 | } 609 | 610 | // FromFile constructs a new iterator that reads its input byte stream from the specified file and 611 | // breaks the stream into lines with line termination stripped. 612 | func FromFile(name string) Iter { 613 | return FromFileSF(nil, name) 614 | } 615 | 616 | // FromDir creates a new iterator that reads all entries from the specified directory and produces 617 | // the names of those entries for which the supplied predicate returns 'true'. If the predicate is 618 | // set to nil then the iterator produces only regular files, directories and symlinks to files. 619 | func FromDir(name string, pred func(os.FileInfo) bool) Iter { 620 | if pred == nil { 621 | pred = defaultFilePredicate 622 | } 623 | 624 | return func(fn Func) error { 625 | return readDir(fn, name, pred) 626 | } 627 | } 628 | 629 | // FromDirWalk creates a new iterator that wraps filepath.Walk recursive directory traversal function. 630 | // The iterator produces names of filesystem entries in accordance with the supplied WalkFunc, 631 | // please refer to the filepath.Walk documentation for more details. If the WalkFunc is set to nil 632 | // then the default function will be used which accepts all the filesystem entries. 633 | func FromDirWalk(root string, wf filepath.WalkFunc) Iter { 634 | if wf == nil { 635 | wf = defaultDirWalkFunc 636 | } 637 | 638 | return func(fn Func) (err error) { 639 | return filepath.Walk(root, func(path string, info os.FileInfo, err error) error { 640 | if err = wf(path, info, err); err == nil { 641 | err = fn([]byte(path)) 642 | } 643 | 644 | return err 645 | }) 646 | } 647 | } 648 | 649 | // FromBytesSF constructs a new iterator that reads the specified byte slice and 650 | // breaks it into tokens using the supplied split function. If the function is set to nil then 651 | // the iterator breaks the input into lines with line termination stripped. Internally the iterator 652 | // is implemented using bufio.Scanner, please refer to its documentation for more details on split functions. 653 | func FromBytesSF(sf bufio.SplitFunc, src []byte) Iter { 654 | return func(fn Func) (err error) { 655 | if err = iterate(bytes.NewBuffer(src), sf, fn); err == io.EOF { 656 | err = nil // io.EOF is not an error 657 | } 658 | 659 | return 660 | } 661 | } 662 | 663 | // FromBytes constructs a new iterator that reads the specified byte slice and 664 | // breaks it into lines with line termination stripped. 665 | func FromBytes(src []byte) Iter { 666 | return FromBytesSF(nil, src) 667 | } 668 | 669 | // FromStringSF constructs a new iterator that reads the specified string and 670 | // breaks it into tokens using the supplied split function. If the function is set to nil then 671 | // the iterator breaks the input into lines with line termination stripped. Internally the iterator 672 | // is implemented using bufio.Scanner, please refer to its documentation for more details on split functions. 673 | func FromStringSF(sf bufio.SplitFunc, src string) Iter { 674 | return func(fn Func) (err error) { 675 | if err = iterate(bytes.NewBufferString(src), sf, fn); err == io.EOF { 676 | err = nil // io.EOF is not an error 677 | } 678 | 679 | return 680 | } 681 | } 682 | 683 | // FromString constructs a new iterator that reads the specified string and 684 | // breaks it into lines with line termination stripped. 685 | func FromString(src string) Iter { 686 | return FromStringSF(nil, src) 687 | } 688 | 689 | // FromStrings constructs a new iterator that reads the specified slice of strings, one string at a time. 690 | func FromStrings(src []string) Iter { 691 | return func(fn Func) (err error) { 692 | for _, s := range src { 693 | if err = fn([]byte(s)); err != nil { 694 | break 695 | } 696 | } 697 | 698 | if err == io.EOF { // io.EOF is not an error 699 | err = nil 700 | } 701 | 702 | return 703 | } 704 | } 705 | 706 | // FromCommandSF constructs a new iterator that invokes the specified command, 707 | // reads the command output (stdout) and breaks it into tokens using the supplied split function. 708 | // If the split function is set to nil then the iterator breaks the command output into lines with line termination 709 | // stripped. Internally the iterator is implemented using bufio.Scanner, please refer to its 710 | // documentation for more details on split functions. 711 | func FromCommandSF(cmd *exec.Cmd, sf bufio.SplitFunc) Iter { 712 | return func(fn Func) (err error) { 713 | // stdout 714 | var stdout io.ReadCloser 715 | 716 | if stdout, err = cmd.StdoutPipe(); err != nil { 717 | if cmd.Stdin != nil { 718 | if s, ok := cmd.Stdin.(io.ReadCloser); ok { 719 | s.Close() 720 | } 721 | } 722 | 723 | return 724 | } 725 | 726 | // stderr 727 | var stderr limitedWriter 728 | 729 | stderr.limit = 4 * 1024 // accept only up to 4K 730 | cmd.Stderr = &stderr 731 | 732 | // start the command 733 | if err = cmd.Start(); err != nil { 734 | return 735 | } 736 | 737 | // stdout reader 738 | src := bufio.NewScanner(bufio.NewReader(stdout)) 739 | 740 | if sf != nil { 741 | src.Split(sf) 742 | } 743 | 744 | // iterate 745 | scanner: 746 | for src.Scan() { 747 | s := src.Bytes() 748 | 749 | switch err = fn(s[:len(s):len(s)]); err { 750 | case nil: 751 | // ok 752 | case io.EOF: // not an error 753 | _, err = io.Copy(ioutil.Discard, stdout) 754 | break scanner 755 | default: 756 | io.Copy(ioutil.Discard, stdout) 757 | break scanner 758 | } 759 | } 760 | 761 | if err == nil { 762 | err = src.Err() 763 | } 764 | 765 | // error check 766 | if err != nil { 767 | cmd.Wait() 768 | } else if err = cmd.Wait(); err != nil { 769 | // replace error message with stderr, if any 770 | if e, ok := err.(*exec.ExitError); ok { 771 | err = &ExitError{ 772 | ExitCode: e.ExitCode(), 773 | Stderr: string(bytes.TrimSpace(stderr.buff)), 774 | } 775 | } 776 | } 777 | 778 | // all done 779 | return 780 | } 781 | } 782 | 783 | // limited writer: discards everything beyond the specified number of bytes 784 | type limitedWriter struct { 785 | buff []byte 786 | limit int 787 | } 788 | 789 | func (w *limitedWriter) Write(s []byte) (int, error) { 790 | if n := min(len(s), w.limit-len(w.buff)); n > 0 { 791 | w.buff = append(w.buff, s[:n]...) 792 | } 793 | 794 | return len(s), nil 795 | } 796 | 797 | // ExitError is the error type used for delivering command exit code and 'stderr' output. 798 | type ExitError struct { 799 | ExitCode int 800 | Stderr string 801 | } 802 | 803 | // Error formats error message from ExitError type. 804 | func (e *ExitError) Error() string { 805 | msg := fmt.Sprintf("exit code %d", e.ExitCode) 806 | 807 | if len(e.Stderr) > 0 { 808 | msg += ": " + e.Stderr 809 | } 810 | 811 | return msg 812 | } 813 | 814 | // FromCommand constructs a new iterator that invokes the specified command, 815 | // reads the command output (stdout) and breaks it into lines with line termination stripped. 816 | func FromCommand(cmd *exec.Cmd) Iter { 817 | return FromCommandSF(cmd, nil) 818 | } 819 | 820 | // PipeSF makes an iterator that pumps the data from its parent through the specified command 821 | // and iterates over the command's stdout, using the given splitter to separate strings. 822 | func (iter Iter) PipeSF(cmd *exec.Cmd, sf bufio.SplitFunc) Iter { 823 | return func(fn Func) error { 824 | stdin, err := cmd.StdinPipe() 825 | 826 | if err != nil { 827 | return err 828 | } 829 | 830 | errch := feed(iter, stdin) 831 | 832 | if err = FromCommandSF(cmd, sf)(fn); err == nil { 833 | err = <-errch 834 | } 835 | 836 | return err 837 | } 838 | } 839 | 840 | // Pipe makes an iterator that pumps the data from its parent through the specified command 841 | // and iterates over the command's stdout. 842 | func (iter Iter) Pipe(cmd *exec.Cmd) Iter { 843 | return iter.PipeSF(cmd, nil) 844 | } 845 | 846 | func feed(iter Iter, out io.WriteCloser) (errch chan error) { 847 | errch = make(chan error, 1) 848 | 849 | go func() { 850 | defer func() { 851 | out.Close() 852 | close(errch) 853 | }() 854 | 855 | if _, e := iter.WriteTo(out); e != nil { 856 | errch <- e 857 | } 858 | }() 859 | 860 | return 861 | } 862 | 863 | // ScanNullTerminatedLines is a split function that splits input on null bytes. Useful mostly 864 | // with FromCommandSF function, in cases where the invoked command generates null-terminated 865 | // strings, like 'find ... -print0'. 866 | func ScanNullTerminatedLines(data []byte, atEOF bool) (advance int, token []byte, err error) { 867 | if atEOF && len(data) == 0 { 868 | return 869 | } 870 | 871 | if i := bytes.IndexByte(data, 0); i >= 0 { 872 | advance, token = i+1, data[:i] // got the string 873 | } else if atEOF { 874 | err = errors.New("last string is not null-terminated") 875 | } 876 | 877 | return 878 | } 879 | 880 | // Pred is the type of string predicate. The type has a number of combining methods allowing for 881 | // convenient composition of predicate functions, for example: 882 | // strit.Not(strit.Empty).AndNot(strit.StartsWith("#")) 883 | // or 884 | // strit.StartsWith("xyz").Or(strit.StartsWith("abc")) 885 | type Pred func([]byte) bool 886 | 887 | // And is a predicate combinator. It creates a new predicate that applies logical 'and' 888 | // to the original and the supplied predicates. 889 | func (orig Pred) And(other Pred) Pred { 890 | return func(line []byte) bool { 891 | return orig(line) && other(line) 892 | } 893 | } 894 | 895 | // AndNot is a predicate combinator. It creates a new predicate that returns 'true' only if 896 | // the original returns 'true' and the other predicate returns 'false'. 897 | func (orig Pred) AndNot(next Pred) Pred { 898 | return func(line []byte) bool { 899 | return orig(line) && !next(line) 900 | } 901 | } 902 | 903 | // Or is a predicate combinator. It creates a new predicate that applies logical 'or' 904 | // to the original and the supplied predicates. 905 | func (orig Pred) Or(other Pred) Pred { 906 | return func(line []byte) bool { 907 | return orig(line) || other(line) 908 | } 909 | } 910 | 911 | // OrNot is a predicate combinator. It creates a new predicate that returns 'true' if 912 | // either the original returns 'true' or the other predicate returns 'false'. 913 | func (orig Pred) OrNot(other Pred) Pred { 914 | return func(line []byte) bool { 915 | return orig(line) || !other(line) 916 | } 917 | } 918 | 919 | // Not is a predicate wrapper that returns a new predicate negating the result of the supplied predicate. 920 | func Not(pred Pred) Pred { 921 | return func(line []byte) bool { 922 | return !pred(line) 923 | } 924 | } 925 | 926 | // Empty is a predicate that returns 'true' only if the input string is empty. 927 | func Empty(line []byte) bool { 928 | return len(line) == 0 929 | } 930 | 931 | // StartsWith is a predicate that returns 'true' if the input string has the specified prefix. 932 | func StartsWith(prefix string) Pred { 933 | return func(line []byte) bool { 934 | return bytes.HasPrefix(line, []byte(prefix)) 935 | } 936 | } 937 | 938 | // EndsWith is a predicate that returns 'true' if the input string has the specified suffix. 939 | func EndsWith(prefix string) Pred { 940 | return func(line []byte) bool { 941 | return bytes.HasSuffix(line, []byte(prefix)) 942 | } 943 | } 944 | 945 | // helper functions ------------------------------------------------------------------------------- 946 | // iterator core 947 | func iterate(input io.Reader, sf bufio.SplitFunc, fn Func) error { 948 | src := bufio.NewScanner(input) 949 | 950 | if sf != nil { 951 | src.Split(sf) 952 | } 953 | 954 | for src.Scan() { 955 | s := src.Bytes() 956 | 957 | if err := fn(s[:len(s):len(s)]); err != nil { 958 | return err // returns io.EOF when stopped early 959 | } 960 | } 961 | 962 | return src.Err() // returns nil on EOF 963 | } 964 | 965 | // directory read implementation 966 | func readDir(fn Func, dirName string, pred func(os.FileInfo) bool) error { 967 | // open directory 968 | dir, err := os.Open(dirName) 969 | 970 | if err != nil { 971 | return err 972 | } 973 | 974 | defer dir.Close() 975 | 976 | // get list of all items 977 | items, err := dir.Readdir(0) 978 | 979 | if err != nil { 980 | return err 981 | } 982 | 983 | // select items and invoke callback 984 | for _, item := range items { 985 | if pred(item) { 986 | if err := fn([]byte(filepath.Join(dirName, item.Name()))); err != nil { 987 | return err 988 | } 989 | } 990 | } 991 | 992 | return nil 993 | } 994 | 995 | // default FromDir() predicate selects only regular files, directories and symlinks 996 | func defaultFilePredicate(info os.FileInfo) bool { 997 | return info.IsDir() || info.Mode().IsRegular() || info.Mode()&os.ModeType == os.ModeSymlink 998 | } 999 | 1000 | // defaultDirWalkFunc is the default function for FromDirWalk() constructor. It accepts all the entries. 1001 | func defaultDirWalkFunc(_ string, _ os.FileInfo, _ error) error { return nil } 1002 | 1003 | // min 1004 | func min(a, b int) int { 1005 | if a < b { 1006 | return a 1007 | } 1008 | 1009 | return b 1010 | } 1011 | -------------------------------------------------------------------------------- /strit_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017,2018,2019 Maxim Konakov 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software without 15 | specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | package strit 30 | 31 | import ( 32 | "bufio" 33 | "bytes" 34 | "errors" 35 | "fmt" 36 | "io" 37 | "io/ioutil" 38 | "math/rand" 39 | "os" 40 | "os/exec" 41 | "path/filepath" 42 | "runtime" 43 | "strconv" 44 | "strings" 45 | "testing" 46 | "time" 47 | "unicode" 48 | ) 49 | 50 | func TestSimpleStrings(t *testing.T) { 51 | ss := []string{"aaa", "bbb", "ccc"} 52 | fn := FromStrings(ss) 53 | i := 0 54 | 55 | if err := fn(func(line []byte) error { 56 | if string(line) != ss[i] { 57 | return fmt.Errorf("String mismatch at %d: %q instead of %q", i, string(line), ss[i]) 58 | } 59 | 60 | i++ 61 | return nil 62 | }); err != nil { 63 | t.Error(err) 64 | } 65 | } 66 | 67 | func TestOverwrite(t *testing.T) { 68 | i := 0 69 | iter := FromBytes([]byte(overwriteSrc)) 70 | 71 | err := iter(func(s []byte) error { 72 | switch i { 73 | case 0: 74 | s = append(s, " zzz"...) 75 | case 1: 76 | if bytes.Compare(s, []byte("long long long long string")) != 0 { 77 | return fmt.Errorf("Unexpected string: %q", string(s)) 78 | } 79 | } 80 | 81 | i++ 82 | return nil 83 | }) 84 | 85 | if err != nil { 86 | t.Error(err) 87 | } 88 | } 89 | 90 | const overwriteSrc = `short string 91 | long long long long string` 92 | 93 | func TestSimpleMapFilter(t *testing.T) { 94 | if err := testABC("\n aaa \n \n\n bbb\nccc ", nil); err != nil { 95 | t.Error(err) 96 | return 97 | } 98 | 99 | if err := testABC("aaa bbb ccc ", bufio.ScanWords); err != nil { 100 | t.Error(err) 101 | return 102 | } 103 | } 104 | 105 | func testABC(src string, sf bufio.SplitFunc) error { 106 | iter := FromStringSF(sf, src).Map(bytes.TrimSpace).Filter(Not(Empty)).Map(bytes.ToUpper) 107 | 108 | // try multiple times to test the iterator reusability 109 | for i := 0; i < 3; i++ { 110 | s, err := iter.Join(", ") 111 | 112 | if err != nil { 113 | return err 114 | } 115 | 116 | const expect = "AAA, BBB, CCC" 117 | 118 | if s != expect { 119 | return fmt.Errorf("Unexpected string at %d: %q instead of %q", i, s, expect) 120 | } 121 | } 122 | 123 | return nil 124 | } 125 | 126 | func TestIterators(t *testing.T) { 127 | fileName, err := createFileOfInts("FileOfInts", 10) 128 | 129 | if err != nil { 130 | t.Error(err) 131 | return 132 | } 133 | 134 | defer os.Remove(fileName) 135 | 136 | fileIter := FromFile(fileName) 137 | 138 | tests := []struct { 139 | iter Iter 140 | expect string 141 | }{ 142 | {FromString("aaa \n bbb \n ccc ").Map(bytes.TrimSpace).Map(bytes.ToUpper), 143 | "AAA, BBB, CCC"}, 144 | {FromString(" aaa \n bbb \n ccc ").Map(bytes.TrimSpace).GenMap(stopAtCcc), 145 | "aaa, bbb"}, 146 | {FromString(" aaa \n bbb \n ccc ").Map(bytes.TrimSpace).GenMap(skipBbb), 147 | "aaa, ccc"}, 148 | {FromString(" aaa \n bbb \n ccc ").Map(bytes.TrimSpace).TakeWhile(func(line []byte) bool { 149 | return bytes.ContainsAny(line, "ab") 150 | }), 151 | "aaa, bbb"}, 152 | {FromString(" aaa \n bbb \n ccc ").Take(2).Map(bytes.TrimSpace), 153 | "aaa, bbb"}, 154 | {FromString(" aaa \n bbb \n ccc ").Take(5).Map(bytes.TrimSpace), 155 | "aaa, bbb, ccc"}, 156 | {FromString(" aaa \n bbb \n ccc ").Skip(2).Map(bytes.TrimSpace), 157 | "ccc"}, 158 | {FromString(" aaa \n bbb \n ccc ").Skip(5).Map(bytes.TrimSpace), 159 | ""}, 160 | {FromString(" aaa \n bbb \n ccc ").Map(bytes.TrimSpace).SkipWhile(func(line []byte) bool { 161 | return bytes.ContainsAny(line, "ab") 162 | }), 163 | "ccc"}, 164 | {Chain(FromString("aaa\nbbb\nccc"), FromString("xxx\nyyy\nzzz").Take(2)), 165 | "aaa, bbb, ccc, xxx, yyy"}, 166 | {Chain(fileIter, fileIter.Take(2)), // test Chain() with early stop on file 167 | "0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1"}, 168 | {FromStrings([]string{"aaa", "bbb", "###ccc", "ddd###"}). 169 | Filter(Not(StartsWith("###"))). 170 | Filter(Not(EndsWith("###"))). 171 | Filter(Not(StartsWith("@@@"))), 172 | "aaa, bbb"}, 173 | {fileIter.Take(3), 174 | "0, 1, 2"}, 175 | {FromStrings([]string{"aaa", "bbb", "ccc"}).Take(2), // test for EOF in FromStrings() 176 | "aaa, bbb"}, 177 | // predicates 178 | {FromString("aaa\nbbb\nccc").Filter(StartsWith("aa").Or(StartsWith("bb"))), 179 | "aaa, bbb"}, 180 | {FromString("aaa\nbbb\nccc").Filter(StartsWith("aa").And(EndsWith("aa"))), 181 | "aaa"}, 182 | {FromString("aaa\naab\naac").Filter(StartsWith("aa").AndNot(EndsWith("c"))), 183 | "aaa, aab"}, 184 | {FromString("aaa\nbbb\nccc").Filter(StartsWith("aa").OrNot(EndsWith("c"))), 185 | "aaa, bbb"}, 186 | {FromString("aaa\nbbb\nccc").Filter(Pred(Empty).OrNot(EndsWith("c"))), 187 | "aaa, bbb"}, 188 | {FromString(" aaa \n bbb \n ccc ").Map(bytes.TrimSpace).FirstNonEmpty(func(s []byte) []byte { 189 | if bytes.ContainsAny(s, "bc") { 190 | return bytes.ToUpper(s) 191 | } 192 | 193 | return nil 194 | }), "BBB"}, 195 | } 196 | 197 | for i, test := range tests { 198 | res, err := test.iter.Join(", ") 199 | 200 | if err != nil { 201 | t.Error(err) 202 | return 203 | } 204 | 205 | if res != test.expect { 206 | t.Errorf("Unexpected result (%d): %q instead of %q", i, res, test.expect) 207 | return 208 | } 209 | } 210 | } 211 | 212 | func TestCatFileOfInts(t *testing.T) { 213 | name, err := createFileOfInts("FileOfInts", 200000) 214 | 215 | if err != nil { 216 | t.Error(err) 217 | return 218 | } 219 | 220 | defer os.Remove(name) 221 | 222 | var i int64 223 | 224 | if err = FromCommand(exec.Command(cat(), name))(func(line []byte) error { 225 | val, e := strconv.ParseInt(string(line), 10, 64) 226 | 227 | if e != nil { 228 | return e 229 | } 230 | 231 | if val != i { 232 | return fmt.Errorf("Value mismatch: %d instead of %d", val, i) 233 | } 234 | 235 | i++ 236 | return nil 237 | 238 | }); err != nil { 239 | t.Error(err) 240 | } 241 | } 242 | 243 | func TestCatSearchFileOfInts(t *testing.T) { 244 | n := runtime.NumGoroutine() // the number of goroutines for later checks 245 | 246 | name, err := createFileOfInts("FileOfInts", 200000) 247 | 248 | if err != nil { 249 | t.Error(err) 250 | return 251 | } 252 | 253 | defer os.Remove(name) 254 | 255 | var i int64 256 | 257 | if err = FromCommand(exec.Command(cat(), name))(func(line []byte) error { 258 | val, e := strconv.ParseInt(string(line), 10, 64) 259 | 260 | if e != nil { 261 | return e 262 | } 263 | 264 | if val != i { 265 | return fmt.Errorf("Value mismatch: %d instead of %d", val, i) 266 | } 267 | 268 | if val == 100 { // early stop 269 | return io.EOF 270 | } 271 | 272 | i++ 273 | return nil 274 | 275 | }); err != nil { 276 | t.Error(err) 277 | return 278 | } 279 | 280 | // wait for a short period of time to let other goroutines finish 281 | time.Sleep(100 * time.Millisecond) 282 | 283 | // see if the termination goroutine is still running 284 | if m := runtime.NumGoroutine(); m != n { 285 | t.Errorf("Number goroutines: before %d, after %d", n, m) 286 | return 287 | } 288 | } 289 | 290 | func TestCommandError(t *testing.T) { 291 | err := FromCommand(exec.Command(cat(), "nonexistent-file"))(func(_ []byte) error { 292 | t.Error("Unexpected callback invocation") 293 | return io.EOF 294 | }) 295 | 296 | if err == nil || err == io.EOF { 297 | t.Errorf("The command %q should have produced an error", cat()) 298 | return 299 | } 300 | 301 | // println(err.Error()) 302 | } 303 | 304 | func TestCommandTermination(t *testing.T) { 305 | if runtime.GOOS != "linux" { 306 | return 307 | } 308 | const msg = "Just an error" 309 | 310 | err := FromCommand(exec.Command("find", ".", "-type", "f"))(func(s []byte) error { 311 | //println("Callback invoked with text: " + string(s)) 312 | return errors.New(msg) 313 | }) 314 | 315 | if err == nil { 316 | t.Error("Missing error") 317 | return 318 | } 319 | 320 | if s := err.Error(); s != msg { 321 | t.Errorf("Unexpected error message: %q instead of %q", s, msg) 322 | return 323 | } 324 | } 325 | 326 | func TestPipe(t *testing.T) { 327 | if runtime.GOOS != "linux" { 328 | return 329 | } 330 | 331 | const str = "aaa\nbbb\nccc" 332 | 333 | res, err := FromString(str).Pipe(exec.Command("cat")).Pipe(exec.Command("cat")).Pipe(exec.Command("cat")).Join("\n") 334 | 335 | if err != nil { 336 | t.Error(err) 337 | return 338 | } 339 | 340 | if res != str { 341 | t.Errorf("Unexpected result: %q instead of %q", res, str) 342 | return 343 | } 344 | } 345 | 346 | func TestNullTerminatedLines(t *testing.T) { 347 | s, err := FromStringSF(ScanNullTerminatedLines, "aaa\000bbb\000ccc\000").String() 348 | 349 | if err != nil { 350 | t.Error(err) 351 | return 352 | } 353 | 354 | if s != "aaabbbccc" { 355 | t.Errorf("Unexpected string: %q instead of \"aaabbbccc\"", s) 356 | return 357 | } 358 | } 359 | 360 | func TestWriteFile(t *testing.T) { 361 | name, err := tempFileName("xxx") 362 | 363 | if err != nil { 364 | t.Error(err) 365 | return 366 | } 367 | 368 | defer os.Remove(name) 369 | 370 | iter := FromString(" aaa \n bbb \n ccc ").Map(bytes.TrimSpace) 371 | 372 | // write file 373 | if _, err = iter.WriteToFile(name); err != nil { 374 | t.Error(err) 375 | return 376 | } 377 | 378 | // read file 379 | lines, err := FromFile(name).Strings() // just to test Strings() method as well 380 | 381 | if err != nil { 382 | t.Error(err) 383 | return 384 | } 385 | 386 | // compare 387 | expect := []string{"aaa", "bbb", "ccc"} 388 | 389 | if len(lines) != len(expect) { 390 | t.Errorf("Result size mismatch: %d instead of %d", len(lines), len(expect)) 391 | return 392 | } 393 | 394 | for i := 0; i < len(lines); i++ { 395 | if lines[i] != expect[i] { 396 | t.Errorf("String mismatch at %d: %q instead of %q", i, lines[i], expect[i]) 397 | return 398 | } 399 | } 400 | } 401 | 402 | func TestMerge(t *testing.T) { 403 | fileName, err := createFileOfInts("FileOfInts", 10) 404 | 405 | if err != nil { 406 | t.Error(err) 407 | return 408 | } 409 | 410 | defer os.Remove(fileName) 411 | 412 | tests := []struct { 413 | iter Iter 414 | expect string 415 | }{ 416 | {Merge(",", FromString("aaa\nbbb\nccc"), FromString("xxx\nyyy\nzzz"), FromString("111\n222\n333")), 417 | "aaa,xxx,111|bbb,yyy,222|ccc,zzz,333"}, 418 | {Merge(",", FromString("aaa\nbbb\nccc"), FromString("xxx\nyyy\nzzz"), FromString("111\n222")), 419 | "aaa,xxx,111|bbb,yyy,222"}, 420 | {Merge(",", FromString("aaa"), FromString("xxx\nyyy\nzzz"), FromString("111\n222\n333")), 421 | "aaa,xxx,111"}, 422 | {Merge(",", FromString("aaa\nbbb\nccc")), 423 | "aaa|bbb|ccc"}, 424 | {Merge(",", FromString("aaa\nbbb\nccc"), FromString("xxx\nyyy\nzzz").Take(2)), 425 | "aaa,xxx|bbb,yyy"}, 426 | {Merge("", FromFile(fileName).Skip(5), FromFile(fileName).Take(5)), 427 | "50|61|72|83|94"}, 428 | } 429 | 430 | for i, test := range tests { 431 | s, err := test.iter.Join("|") 432 | 433 | if err != nil { 434 | t.Error(i, err) 435 | return 436 | } 437 | 438 | if s != test.expect { 439 | t.Errorf("String mismatch [%d]: %q instead of %q", i, s, test.expect) 440 | return 441 | } 442 | } 443 | } 444 | 445 | func TestFromDir(t *testing.T) { 446 | expect := map[string]int{ 447 | "strit.go": 0, 448 | "strit_test.go": 0, 449 | "LICENSE": 0, 450 | "README.md": 0, 451 | } 452 | 453 | if err := FromDir(".", nil)(func(line []byte) error { 454 | delete(expect, string(line)) 455 | // t.Log("#", string(line)) 456 | return nil 457 | }); err != nil { 458 | t.Error(err) 459 | return 460 | } 461 | 462 | if len(expect) > 0 { 463 | var names []string 464 | 465 | for name := range expect { 466 | names = append(names, name) 467 | } 468 | 469 | t.Errorf("Skipped entries: %s", strings.Join(names, ", ")) 470 | return 471 | } 472 | } 473 | 474 | func TestFromDirWalk(t *testing.T) { 475 | // temporary directory 476 | dir, err := ioutil.TempDir("", "DirWalk") 477 | 478 | if err != nil { 479 | t.Error(err) 480 | return 481 | } 482 | 483 | defer os.RemoveAll(dir) 484 | 485 | // a few files 486 | fileNames := [...]string{"aaa", "bbb", "ccc"} 487 | 488 | for _, file := range fileNames[:] { 489 | if err = makeFile(dir, file); err != nil { 490 | t.Error(err) 491 | return 492 | } 493 | } 494 | 495 | // sub-directory 496 | const subDir = "subdir" 497 | 498 | if err = os.Mkdir(filepath.Join(dir, subDir), 0777); err != nil { 499 | t.Error(err) 500 | return 501 | } 502 | 503 | // a few files in the sub-directory 504 | for _, file := range fileNames[:] { 505 | if err = makeFile(dir, subDir, file); err != nil { 506 | t.Error(err) 507 | return 508 | } 509 | } 510 | 511 | // tests 512 | tests := []struct { 513 | wf filepath.WalkFunc 514 | expect map[string]int 515 | }{ 516 | { 517 | func(_ string, _ os.FileInfo, _ error) error { return nil }, // accept everything 518 | map[string]int{ 519 | dir: 0, 520 | filepath.Join(dir, "aaa"): 0, 521 | filepath.Join(dir, "bbb"): 0, 522 | filepath.Join(dir, "ccc"): 0, 523 | filepath.Join(dir, subDir): 0, 524 | filepath.Join(dir, subDir, "aaa"): 0, 525 | filepath.Join(dir, subDir, "bbb"): 0, 526 | filepath.Join(dir, subDir, "ccc"): 0, 527 | }, 528 | }, 529 | { 530 | func(_ string, info os.FileInfo, _ error) error { 531 | if info.Name() == subDir { 532 | return filepath.SkipDir 533 | } 534 | 535 | return nil 536 | }, 537 | map[string]int{ 538 | dir: 0, 539 | filepath.Join(dir, "aaa"): 0, 540 | filepath.Join(dir, "bbb"): 0, 541 | filepath.Join(dir, "ccc"): 0, 542 | }, 543 | }, 544 | { 545 | func(_ string, info os.FileInfo, _ error) error { 546 | if info.Name() == "bbb" { 547 | return filepath.SkipDir 548 | } 549 | 550 | return nil 551 | }, 552 | map[string]int{ 553 | dir: 0, 554 | filepath.Join(dir, "aaa"): 0, 555 | }, 556 | }, 557 | } 558 | 559 | for i, test := range tests { 560 | if err = FromDirWalk(dir, test.wf)(func(line []byte) error { 561 | // t.Log(string(line)) 562 | path := string(line) 563 | _, ok := test.expect[path] 564 | 565 | if !ok { 566 | return fmt.Errorf("Unexpected path (%d): %q", i, path) 567 | } 568 | 569 | delete(test.expect, path) 570 | return nil 571 | }); err != nil { 572 | t.Error(err) 573 | return 574 | } 575 | 576 | if len(test.expect) > 0 { 577 | var remaining []string 578 | 579 | for path := range test.expect { 580 | remaining = append(remaining, path) 581 | } 582 | 583 | t.Errorf("Remaining items (%d): %s", i, strings.Join(remaining, ", ")) 584 | return 585 | } 586 | } 587 | } 588 | 589 | func TestStringFromBytes(t *testing.T) { 590 | res, err := FromBytes([]byte("aaa\nbbb\nccc")).String() 591 | 592 | if err != nil { 593 | t.Error(err) 594 | return 595 | } 596 | 597 | if res != "aaabbbccc" { 598 | t.Errorf("Unexpected result: %q", res) 599 | return 600 | } 601 | } 602 | 603 | func TestBytesFromBytes(t *testing.T) { 604 | res, err := FromBytes([]byte("aaa\nbbb\nccc")).Bytes() 605 | 606 | if err != nil { 607 | t.Error(err) 608 | return 609 | } 610 | 611 | if bytes.Compare(res, []byte("aaabbbccc")) != 0 { 612 | t.Errorf("Unexpected result: %q", string(res)) 613 | return 614 | } 615 | } 616 | 617 | func TestJoinBytes(t *testing.T) { 618 | res, err := FromString("aaa\nbbb\nccc").JoinBytes(" ") 619 | 620 | if err != nil { 621 | t.Error(err) 622 | return 623 | } 624 | 625 | if bytes.Compare(res, []byte("aaa bbb ccc")) != 0 { 626 | t.Errorf("Unexpected result: %q", string(res)) 627 | return 628 | } 629 | } 630 | 631 | // parser test 632 | type dataItem struct { 633 | A, B, C int 634 | } 635 | 636 | func TestParser(t *testing.T) { 637 | const input = ` 638 | A 1 639 | B 2 640 | C 3 641 | 642 | A 4 643 | B 5 644 | C 6 645 | 646 | A 7 647 | B 8 648 | C 9` 649 | 650 | i := 1 651 | 652 | fn := func(item *dataItem) error { 653 | if item.A != i || item.B != i+1 || item.C != i+2 { 654 | return fmt.Errorf("Unexpected value: A = %d, B = %d, C = %d", item.A, item.B, item.C) 655 | } 656 | 657 | i += 3 658 | return nil 659 | } 660 | 661 | err := FromString(input).Map(bytes.TrimSpace).Filter(Not(Empty)).Parse(&dataItemParser{fn: fn}) 662 | 663 | if err != nil { 664 | t.Error(err) 665 | return 666 | } 667 | 668 | if i != 10 { 669 | t.Errorf("Unexpected value of counter: %d instead of 10", i) 670 | return 671 | } 672 | } 673 | 674 | type dataItemParser struct { 675 | fn func(*dataItem) error 676 | item *dataItem 677 | } 678 | 679 | func (p *dataItemParser) Done(err error) error { 680 | return err 681 | } 682 | 683 | func (p *dataItemParser) Enter(s []byte) (ParserFunc, error) { 684 | var err error 685 | 686 | p.item = new(dataItem) 687 | p.item.A, err = getField("A", s) 688 | return p.getB, err 689 | } 690 | 691 | func (p *dataItemParser) getB(s []byte) (ParserFunc, error) { 692 | var err error 693 | 694 | p.item.B, err = getField("B", s) 695 | return p.getC, err 696 | } 697 | 698 | func (p *dataItemParser) getC(s []byte) (ParserFunc, error) { 699 | var err error 700 | 701 | if p.item.C, err = getField("C", s); err != nil { 702 | return nil, err 703 | } 704 | 705 | return p.Enter, p.fn(p.item) 706 | } 707 | 708 | func getField(name string, s []byte) (int, error) { 709 | fields := bytes.FieldsFunc(s, unicode.IsSpace) 710 | 711 | if len(fields) != 2 { 712 | return 0, errors.New("Invalid number of fields: " + string(s)) 713 | } 714 | 715 | k, v := string(fields[0]), string(fields[1]) 716 | 717 | if k != name { 718 | return 0, errors.New("Invalid field name: " + k) 719 | } 720 | 721 | return strconv.Atoi(v) 722 | } 723 | 724 | // benchmarks 725 | var benchData string 726 | var benchDataSum int 727 | 728 | func init() { 729 | var buff bytes.Buffer 730 | 731 | for i := 0; i < 1000; i++ { 732 | val := rand.Int() 733 | 734 | benchDataSum += val 735 | buff.WriteString(" " + strconv.Itoa(val) + " \n") 736 | 737 | switch rand.Int() % 4 { 738 | case 0: 739 | buff.WriteString(" zzz \n") 740 | case 1: 741 | buff.WriteString(" \n") 742 | } 743 | } 744 | 745 | benchData = buff.String() 746 | } 747 | 748 | func BenchmarkScanner(b *testing.B) { 749 | for i := 0; i < b.N; i++ { 750 | var sum int 751 | 752 | src := bufio.NewScanner(bytes.NewBufferString(benchData)) 753 | 754 | for src.Scan() { 755 | line := bytes.TrimSpace(src.Bytes()) 756 | 757 | if len(line) > 0 { 758 | if val, err := strconv.Atoi(string(line)); err == nil { 759 | sum += val 760 | } 761 | } 762 | } 763 | 764 | if err := src.Err(); err != nil { 765 | b.Error(err) 766 | return 767 | } 768 | 769 | if sum != benchDataSum { 770 | b.Errorf("Invalid sum: %d instead of %d", sum, benchDataSum) 771 | return 772 | } 773 | } 774 | } 775 | 776 | func BenchmarkFromString(b *testing.B) { 777 | for i := 0; i < b.N; i++ { 778 | var sum int 779 | 780 | if err := FromString(benchData).Map(bytes.TrimSpace).Filter(Not(Empty))(func(line []byte) error { 781 | if val, e := strconv.Atoi(string(line)); e == nil { 782 | sum += val 783 | } 784 | 785 | return nil 786 | }); err != nil { 787 | b.Error(err) 788 | return 789 | } 790 | 791 | if sum != benchDataSum { 792 | b.Errorf("Invalid sum: %d instead of %d", sum, benchDataSum) 793 | return 794 | } 795 | } 796 | } 797 | 798 | // helper functions 799 | func createFileOfInts(prefix string, N int64) (string, error) { 800 | return withTempFileWriter(prefix, func(file io.Writer) (err error) { 801 | for i := int64(0); i < N && err == nil; i++ { 802 | _, err = fmt.Fprintf(file, "%d\n", i) 803 | } 804 | 805 | return 806 | }) 807 | } 808 | 809 | func withTempFileWriter(prefix string, fn func(io.Writer) error) (fname string, err error) { 810 | var file *os.File 811 | 812 | if file, err = ioutil.TempFile("", prefix); err != nil { 813 | return 814 | } 815 | 816 | fname = file.Name() 817 | 818 | defer func() { 819 | if e := file.Close(); e != nil && err == nil { 820 | err = e 821 | } 822 | 823 | if err != nil { 824 | os.Remove(fname) 825 | } 826 | }() 827 | 828 | err = fn(file) 829 | return 830 | } 831 | 832 | func tempFileName(prefix string) (name string, err error) { 833 | var file *os.File 834 | 835 | if file, err = ioutil.TempFile("", prefix); err != nil { 836 | return 837 | } 838 | 839 | defer file.Close() 840 | 841 | name = file.Name() 842 | return 843 | } 844 | 845 | func cat() string { 846 | switch runtime.GOOS { 847 | case "windows": 848 | return "type" 849 | default: 850 | return "cat" 851 | } 852 | } 853 | 854 | func stopAtCcc(line []byte) ([]byte, error) { 855 | if bytes.Compare(line, []byte("ccc")) == 0 { 856 | return nil, io.EOF 857 | } 858 | 859 | return line, nil 860 | } 861 | 862 | func skipBbb(line []byte) ([]byte, error) { 863 | if bytes.Compare(line, []byte("bbb")) == 0 { 864 | return nil, ErrSkip 865 | } 866 | 867 | return line, nil 868 | } 869 | 870 | func makeFile(elem ...string) error { 871 | return ioutil.WriteFile(filepath.Join(elem...), []byte(elem[len(elem)-1]), 0666) 872 | } 873 | --------------------------------------------------------------------------------