├── .gitignore
├── LICENSE
├── README.md
├── doc.go
├── walker.go
└── walker_test.go


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
 2 | *.o
 3 | *.a
 4 | *.so
 5 | 
 6 | # Folders
 7 | _obj
 8 | _test
 9 | 
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 | 
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 | 
20 | _testmain.go
21 | 
22 | *.exe
23 | *.test
24 | 
25 | test_files/
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Stretchr, Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Powerwalk
 2 | 
 3 | Go package for walking files and concurrently calling user code to handle each file.  This package walks the file system in the same way `filepath.Walk` does, except instead of calling the `walkFn` inline, it uses goroutines to allow the files to be handled concurrently.
 4 | 
 5 | Powerwalk functions by walking concurrently over many files. In order to realize any benefits from this approach, you must tell the runtime to use multiple CPUs. For example:
 6 | 
 7 | ```
 8 | runtime.GOMAXPROCS(runtime.NumCPU())
 9 | ```
10 | 
11 | ## Usage
12 | 
13 | Powerwalk is a drop-in replacement for the `filepath.Walk` method ([read about that for more details](http://golang.org/pkg/path/filepath/#Walk)), and so has the same signature, even using the `filepath.WalkFunc` too.
14 | 
15 | ```
16 | powerwalk.Walk(root string, walkFn filepath.WalkFunc) error
17 | ```
18 | 
19 | By default, Powerwalk will call the `walkFn` for `powerwalk.DefaultConcurrentWalks` (currently `100`) files at a time.  To be specific about the number of concurrent files to walk, use the `WalkLimit` alternative.
20 | 
21 | ```
22 | powerwalk.WalkLimit(root string, walkFn filepath.WalkFunc, limit int) error
23 | ```
24 | 
25 | The `WalkLimit` function does the same as `Walk`, except allows you to specify the number of files to concurrently walk using the `limit` argument.  The `limit` argument must be one or higher (i.e. `>0`).  Specificying a limit that's too high, causes unnecessary overhead so sensible numbers are encouraged but not enforced.
26 | 
27 | See the [godoc documentation](http://godoc.org/github.com/stretchr/powerwalk) for more information.
28 | 
29 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
1 | // Package powerwalk concurrently walks file trees.
2 | // Aside from SkipDir functionality not working and the fact that the
3 | // WalkFunc gets run concurrently, this is a drop-in replacement
4 | // for filepath.Walk.
5 | package powerwalk
6 | 


--------------------------------------------------------------------------------
/walker.go:
--------------------------------------------------------------------------------
  1 | package powerwalk
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"os"
  6 | 	"path/filepath"
  7 | 	"sync"
  8 | )
  9 | 
 10 | // DefaultConcurrentWalks is the default number of files that will be walked at the
 11 | // same time when the Walk function is called.
 12 | // To use a value other than this one, use the WalkLimit function.
 13 | const DefaultConcurrentWalks int = 100
 14 | 
 15 | // Walk walks the file tree rooted at root, calling walkFn for each file or
 16 | // directory in the tree, including root. All errors that arise visiting files
 17 | // and directories are filtered by walkFn. The output is non-deterministic.
 18 | // WalkLimit does not follow symbolic links.
 19 | //
 20 | // For each file and directory encountered, Walk will trigger a new Go routine
 21 | // allowing you to handle each item concurrently.  A maximum of DefaultConcurrentWalks
 22 | // walkFns will be called at any one time.
 23 | func Walk(root string, walkFn filepath.WalkFunc) error {
 24 | 	return WalkLimit(root, walkFn, DefaultConcurrentWalks)
 25 | }
 26 | 
 27 | // WalkLimit walks the file tree rooted at root, calling walkFn for each file or
 28 | // directory in the tree, including root. All errors that arise visiting files
 29 | // and directories are filtered by walkFn. The output is non-deterministic.
 30 | // WalkLimit does not follow symbolic links.
 31 | //
 32 | // For each file and directory encountered, Walk will trigger a new Go routine
 33 | // allowing you to handle each item concurrently.  A maximum of limit walkFns will
 34 | // be called at any one time.
 35 | func WalkLimit(root string, walkFn filepath.WalkFunc, limit int) error {
 36 | 
 37 | 	// make sure limit is sensible
 38 | 	if limit < 1 {
 39 | 		panic("powerwalk: limit must be greater than zero.")
 40 | 	}
 41 | 
 42 | 	// filesMg is a wait group that waits for all files to
 43 | 	// be processed before finishing.
 44 | 	var filesWg sync.WaitGroup
 45 | 
 46 | 	// files is a channel that receives lists of channels
 47 | 	files := make(chan *walkArgs)
 48 | 	kill := make(chan struct{})
 49 | 	errs := make(chan error)
 50 | 
 51 | 	for i := 0; i < limit; i++ {
 52 | 		go func(i int) {
 53 | 			for {
 54 | 				select {
 55 | 				case file, ok := <-files:
 56 | 					if !ok {
 57 | 						continue
 58 | 					}
 59 | 					if err := walkFn(file.path, file.info, file.err); err != nil {
 60 | 						errs <- err
 61 | 					}
 62 | 					filesWg.Done()
 63 | 				case <-kill:
 64 | 					return
 65 | 				}
 66 | 			}
 67 | 		}(i)
 68 | 	}
 69 | 
 70 | 	var walkErr error
 71 | 
 72 | 	// check for errors
 73 | 	go func() {
 74 | 		select {
 75 | 		case walkErr = <-errs:
 76 | 			close(kill)
 77 | 		case <-kill:
 78 | 			return
 79 | 		}
 80 | 	}()
 81 | 
 82 | 	// setup a waitgroup and wait for everything to
 83 | 	// be done
 84 | 	var walkerWg sync.WaitGroup
 85 | 	walkerWg.Add(1)
 86 | 
 87 | 	go func() {
 88 | 
 89 | 		filepath.Walk(root, func(p string, info os.FileInfo, err error) error {
 90 | 			select {
 91 | 			case <-kill:
 92 | 				close(files)
 93 | 				return errors.New("kill received while walking")
 94 | 			default:
 95 | 				filesWg.Add(1)
 96 | 				select {
 97 | 				case files <- &walkArgs{path: p, info: info, err: err}:
 98 | 				}
 99 | 				return nil
100 | 			}
101 | 		})
102 | 
103 | 		// everything is done
104 | 		walkerWg.Done()
105 | 
106 | 	}()
107 | 
108 | 	// wait for all walker calls
109 | 	walkerWg.Wait()
110 | 
111 | 	if walkErr == nil {
112 | 		filesWg.Wait()
113 | 		close(kill)
114 | 	}
115 | 
116 | 	return walkErr
117 | }
118 | 
119 | // walkArgs holds the arguments that were passed to the Walk or WalkLimit
120 | // functions.
121 | type walkArgs struct {
122 | 	path string
123 | 	info os.FileInfo
124 | 	err  error
125 | }
126 | 


--------------------------------------------------------------------------------
/walker_test.go:
--------------------------------------------------------------------------------
  1 | package powerwalk
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"os"
  8 | 	"path"
  9 | 	"path/filepath"
 10 | 	"runtime"
 11 | 	"sync"
 12 | 	"testing"
 13 | 	"time"
 14 | 
 15 | 	"github.com/stretchr/testify/assert"
 16 | )
 17 | 
 18 | const testFiles string = "./test_files"
 19 | 
 20 | func makeTestFiles(dirs, files int) {
 21 | 	var counter int
 22 | 	for i := 1; i < dirs+1; i++ {
 23 | 		dir := fmt.Sprintf("%s/dir_%02d", testFiles, i)
 24 | 		if err := os.MkdirAll(dir, 0777); err == nil {
 25 | 			for j := 1; j < files+1; j++ {
 26 | 				counter++
 27 | 				filename := fmt.Sprintf("%s/file-%03d", dir, counter)
 28 | 				ioutil.WriteFile(filename, []byte(fmt.Sprintf("This is file %d", counter)), 0777)
 29 | 			}
 30 | 		} else {
 31 | 			panic(fmt.Sprintf("%s", err))
 32 | 		}
 33 | 	}
 34 | }
 35 | func deleteTestFiles() {
 36 | 	os.RemoveAll("./test_files")
 37 | }
 38 | 
 39 | // BenchFilepathWalk uses the default Go implementation of filepath.Walk
 40 | func BenchmarkWalkFilepath(b *testing.B) {
 41 | 
 42 | 	// max concurrency out
 43 | 	runtime.GOMAXPROCS(runtime.NumCPU())
 44 | 
 45 | 	b.StopTimer()
 46 | 	makeTestFiles(10, 20)
 47 | 
 48 | 	walkFunc := func(p string, info os.FileInfo, err error) error {
 49 | 		time.Sleep(10 * time.Millisecond)
 50 | 		return nil
 51 | 	}
 52 | 
 53 | 	b.StartTimer()
 54 | 
 55 | 	for i := 0; i < b.N; i++ {
 56 | 		filepath.Walk(testFiles, walkFunc)
 57 | 	}
 58 | 
 59 | 	b.StopTimer()
 60 | 	deleteTestFiles()
 61 | 
 62 | }
 63 | 
 64 | // BenchmarkPowerwalk uses the power walker.
 65 | func BenchmarkPowerwalk(b *testing.B) {
 66 | 
 67 | 	// max concurrency out
 68 | 	runtime.GOMAXPROCS(runtime.NumCPU())
 69 | 
 70 | 	b.StopTimer()
 71 | 	makeTestFiles(10, 20)
 72 | 
 73 | 	walkFunc := func(p string, info os.FileInfo, err error) error {
 74 | 		time.Sleep(10 * time.Millisecond)
 75 | 		return nil
 76 | 	}
 77 | 
 78 | 	b.StartTimer()
 79 | 
 80 | 	for i := 0; i < b.N; i++ {
 81 | 		Walk(testFiles, walkFunc)
 82 | 	}
 83 | 
 84 | 	b.StopTimer()
 85 | 	deleteTestFiles()
 86 | 
 87 | }
 88 | 
 89 | func TestWalkFilepath(t *testing.T) {
 90 | 
 91 | 	// max concurrency out
 92 | 	runtime.GOMAXPROCS(runtime.NumCPU())
 93 | 
 94 | 	makeTestFiles(10, 20)
 95 | 	defer deleteTestFiles()
 96 | 
 97 | 	seen := make(map[string]bool)
 98 | 	walkFunc := func(p string, info os.FileInfo, err error) error {
 99 | 		if !info.IsDir() {
100 | 			filename := path.Base(p)
101 | 			seen[filename] = true
102 | 		}
103 | 		return nil
104 | 	}
105 | 
106 | 	assert.NoError(t, filepath.Walk(testFiles, walkFunc))
107 | 
108 | 	// make sure everything was seen
109 | 	if assert.NotEqual(t, len(seen), 0, "Walker should visit at least one file.") {
110 | 		for k, v := range seen {
111 | 			assert.True(t, v, k)
112 | 		}
113 | 	}
114 | 
115 | }
116 | 
117 | func TestPowerWalk(t *testing.T) {
118 | 
119 | 	// max concurrency out
120 | 	runtime.GOMAXPROCS(runtime.NumCPU())
121 | 
122 | 	makeTestFiles(10, 20)
123 | 	defer deleteTestFiles()
124 | 
125 | 	var seenLock sync.Mutex
126 | 	seen := make(map[string]bool)
127 | 	walkFunc := func(p string, info os.FileInfo, err error) error {
128 | 		if !info.IsDir() {
129 | 			filename := path.Base(p)
130 | 			seenLock.Lock()
131 | 			defer seenLock.Unlock()
132 | 			seen[filename] = true
133 | 		}
134 | 		return nil
135 | 	}
136 | 
137 | 	assert.NoError(t, Walk(testFiles, walkFunc))
138 | 
139 | 	// make sure everything was seen
140 | 	if assert.NotEqual(t, len(seen), 0, "Walker should visit at least one file.") {
141 | 		for k, v := range seen {
142 | 			assert.True(t, v, k)
143 | 		}
144 | 	}
145 | 
146 | }
147 | 
148 | /*
149 | // This test is commented out as it takes an extremely long time.
150 | func TestPowerWalkMassive(t *testing.T) {
151 | 
152 | 	// max concurrency out
153 | 	runtime.GOMAXPROCS(runtime.NumCPU())
154 | 
155 | 	rand.Seed(time.Now().UnixNano())
156 | 
157 | 	makeTestFiles(200, 100)
158 | 	defer deleteTestFiles()
159 | 
160 | 	count := 0
161 | 	total := 200 * 100
162 | 
163 | 	var seenLock sync.Mutex
164 | 	seen := make(map[string]bool)
165 | 	walkFunc := func(p string, info os.FileInfo, err error) error {
166 | 		if !info.IsDir() {
167 | 			filename := path.Base(p)
168 | 			seenLock.Lock()
169 | 			seen[filename] = true
170 | 			count++
171 | 			seenLock.Unlock()
172 | 
173 | 			// simulate some processing
174 | 			time.Sleep(time.Duration(rand.Int31n(1000)) * time.Millisecond)
175 | 			os.Stdout.Sync()
176 | 		}
177 | 		return nil
178 | 	}
179 | 
180 | 	assert.NoError(t, Walk(testFiles, walkFunc))
181 | 
182 | 	// make sure everything was seen
183 | 	if assert.NotEqual(t, len(seen), 0, "Walker should visit at least one file.") {
184 | 		for k, v := range seen {
185 | 			assert.True(t, v, k)
186 | 		}
187 | 	}
188 | 
189 | }
190 | */
191 | 
192 | func TestPowerWalkLimit(t *testing.T) {
193 | 
194 | 	// max concurrency out
195 | 	runtime.GOMAXPROCS(runtime.NumCPU())
196 | 
197 | 	makeTestFiles(10, 20)
198 | 	defer deleteTestFiles()
199 | 
200 | 	var seenLock sync.Mutex
201 | 	seen := make(map[string]bool)
202 | 	walkFunc := func(p string, info os.FileInfo, err error) error {
203 | 		if !info.IsDir() {
204 | 			filename := path.Base(p)
205 | 			seenLock.Lock()
206 | 			defer seenLock.Unlock()
207 | 			seen[filename] = true
208 | 		}
209 | 		return nil
210 | 	}
211 | 
212 | 	assert.NoError(t, WalkLimit(testFiles, walkFunc, 1))
213 | 
214 | 	// make sure everything was seen
215 | 	if assert.NotEqual(t, len(seen), 0, "Walker should visit at least one file.") {
216 | 		for k, v := range seen {
217 | 			assert.True(t, v, k)
218 | 		}
219 | 	}
220 | 
221 | }
222 | 
223 | func TestPowerWalkLimitInvalidArgs(t *testing.T) {
224 | 
225 | 	makeTestFiles(10, 20)
226 | 	defer deleteTestFiles()
227 | 
228 | 	walkFunc := func(p string, info os.FileInfo, err error) error {
229 | 		return nil
230 | 	}
231 | 	assert.Panics(t, func() {
232 | 		WalkLimit(testFiles, walkFunc, 0)
233 | 	})
234 | 
235 | }
236 | 
237 | func TestPowerWalkLimitUselessThreadsDontBlock(t *testing.T) {
238 | 
239 | 	makeTestFiles(10, 20)
240 | 	defer deleteTestFiles()
241 | 
242 | 	walkFunc := func(p string, info os.FileInfo, err error) error {
243 | 		return nil
244 | 	}
245 | 	assert.NoError(t, WalkLimit(testFiles, walkFunc, 500))
246 | 
247 | }
248 | 
249 | func TestPowerWalkError(t *testing.T) {
250 | 
251 | 	// max concurrency out
252 | 	runtime.GOMAXPROCS(runtime.NumCPU())
253 | 
254 | 	makeTestFiles(10, 20)
255 | 	defer deleteTestFiles()
256 | 
257 | 	theErr := errors.New("kaboom")
258 | 	var seenLock sync.Mutex
259 | 	seen := make(map[string]bool)
260 | 	walkFunc := func(p string, info os.FileInfo, err error) error {
261 | 		if !info.IsDir() {
262 | 			filename := path.Base(p)
263 | 			seenLock.Lock()
264 | 			defer seenLock.Unlock()
265 | 			if len(seen) > 20 {
266 | 				return theErr
267 | 			}
268 | 			seen[filename] = true
269 | 		}
270 | 		return nil
271 | 	}
272 | 
273 | 	assert.Equal(t, Walk(testFiles, walkFunc), theErr)
274 | 
275 | 	// make sure everything was seen
276 | 	if assert.NotEqual(t, len(seen), 0, "Walker should visit at least one file.") {
277 | 		for k, v := range seen {
278 | 			assert.True(t, v, k)
279 | 		}
280 | 	}
281 | 
282 | }
283 | 


--------------------------------------------------------------------------------