├── .github
    ├── CODEOWNERS
    ├── FUNDING.yml
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .golangci.yml
├── LICENSE
├── README.md
├── bench_test.go
├── collector.go
├── collector_test.go
├── doc.go
├── examples
    ├── README.md
    ├── collector_errors
    │   ├── README.md
    │   ├── go.mod
    │   ├── go.sum
    │   └── main.go
    ├── collectors_chain
    │   ├── README.md
    │   ├── go.mod
    │   ├── go.sum
    │   └── main.go
    ├── direct_chain
    │   ├── README.md
    │   ├── go.mod
    │   ├── go.sum
    │   └── main.go
    ├── middleware
    │   ├── README.md
    │   ├── go.mod
    │   ├── go.sum
    │   └── main.go
    ├── parallel_files
    │   ├── README.md
    │   ├── go.mod
    │   ├── go.sum
    │   └── main.go
    ├── tokenizer_stateful
    │   ├── README.md
    │   ├── go.mod
    │   ├── go.sum
    │   └── main.go
    └── tokenizer_stateless
    │   ├── README.md
    │   ├── go.mod
    │   ├── go.sum
    │   └── main.go
├── examples_test.go
├── go.mod
├── go.sum
├── metrics
    ├── metrics.go
    └── metrics_test.go
├── middleware
    ├── middleware.go
    └── middleware_test.go
├── pool.go
└── pool_test.go


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # These owners will be the default owners for everything in the repo.
2 | # Unless a later match takes precedence, @umputun will be requested for
3 | # review when someone opens a pull request.
4 | 
5 | *       @umputun
6 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [umputun]
2 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     tags:
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - name: set up go 1.24
15 |         uses: actions/setup-go@v3
16 |         with:
17 |           go-version: "1.24"
18 |         id: go
19 | 
20 |       - name: checkout
21 |         uses: actions/checkout@v3
22 | 
23 |       - name: build and test
24 |         run: |
25 |           go get -v
26 |           # run tests for all packages except examples
27 |           go test $(go list ./... | grep -v /examples/) -timeout=60s -race -covermode=atomic -coverprofile=$GITHUB_WORKSPACE/profile.cov_tmp
28 |           cat $GITHUB_WORKSPACE/profile.cov_tmp | grep -v "_mock.go" > $GITHUB_WORKSPACE/profile.cov
29 |           go build -race
30 | 
31 |       - name: golangci-lint
32 |         uses: golangci/golangci-lint-action@v7
33 |         with:
34 |           version: v2.1.6
35 |           skip-pkg-cache: true
36 | 
37 |       - name: install goveralls
38 |         run: |
39 |           go install github.com/mattn/goveralls@latest
40 | 
41 |       - name: submit coverage
42 |         run: $(go env GOPATH)/bin/goveralls -service="github" -coverprofile=$GITHUB_WORKSPACE/profile.cov
43 |         env:
44 |           COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.prof
2 | pool.test


--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
 1 | version: "2"
 2 | run:
 3 |   concurrency: 4
 4 | linters:
 5 |   default: none
 6 |   enable:
 7 |     - contextcheck
 8 |     - copyloopvar
 9 |     - decorder
10 |     - errorlint
11 |     - exptostd
12 |     - gochecknoglobals
13 |     - gochecknoinits
14 |     - gocritic
15 |     - gosec
16 |     - govet
17 |     - ineffassign
18 |     - intrange
19 |     - nakedret
20 |     - nilerr
21 |     - prealloc
22 |     - predeclared
23 |     - revive
24 |     - staticcheck
25 |     - testifylint
26 |     - thelper
27 |     - unconvert
28 |     - unparam
29 |     - unused
30 |     - nestif
31 |   settings:
32 |     goconst:
33 |       min-len: 2
34 |       min-occurrences: 2
35 |     gocritic:
36 |       disabled-checks:
37 |         - wrapperFunc
38 |       enabled-tags:
39 |         - performance
40 |         - style
41 |         - experimental
42 |     gocyclo:
43 |       min-complexity: 15
44 |     govet:
45 |       enable:
46 |         - shadow
47 |     lll:
48 |       line-length: 140
49 |     misspell:
50 |       locale: US
51 |   exclusions:
52 |     generated: lax
53 |     rules:
54 |       - linters:
55 |           - gosec
56 |         text: 'G114: Use of net/http serve function that has no support for setting timeouts'
57 |       - linters:
58 |           - revive
59 |           - unparam
60 |         path: _test\.go$
61 |         text: unused-parameter
62 |       - linters:
63 |           - prealloc
64 |         path: _test\.go$
65 |         text: Consider pre-allocating
66 |       - linters:
67 |           - gosec
68 |           - intrange
69 |         path: _test\.go$
70 |     paths:
71 |       - third_party$
72 |       - builtin$
73 |       - examples$
74 | formatters:
75 |   enable:
76 |     - gofmt
77 |     - goimports
78 |   exclusions:
79 |     generated: lax
80 |     paths:
81 |       - third_party$
82 |       - builtin$
83 |       - examples$
84 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Umputun
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pool [![Build Status](https://github.com/go-pkgz/pool/workflows/build/badge.svg)](https://github.com/go-pkgz/pool/actions) [![Coverage Status](https://coveralls.io/repos/github/go-pkgz/pool/badge.svg?branch=master)](https://coveralls.io/github/go-pkgz/pool?branch=master) [![godoc](https://godoc.org/github.com/go-pkgz/pool?status.svg)](https://godoc.org/github.com/go-pkgz/pool)
  2 | 
  3 | `pool` is a Go package that provides a generic, efficient worker pool implementation for parallel task processing. Built for Go 1.21+, it offers a flexible API with features like batching, work distribution strategies, and comprehensive metrics collection.
  4 | 
  5 | ## Features
  6 | 
  7 | - Generic implementation supporting any data type
  8 | - Configurable number of parallel workers
  9 | - Support for both stateless shared workers and per-worker instances
 10 | - Batching capability for processing multiple items at once
 11 | - Customizable work distribution through chunk functions
 12 | - Built-in metrics collection (processing times, counts, etc.)
 13 | - Error handling with continue/stop options
 14 | - Context-based cancellation and timeouts
 15 | - Optional completion callbacks
 16 | - Extensible middleware system for custom functionality
 17 | - Built-in middlewares for common tasks
 18 | - No external dependencies except for the testing framework
 19 | 
 20 | ## Quick Start
 21 | 
 22 | Here's a practical example showing how to process a list of URLs in parallel:
 23 | 
 24 | ```go
 25 | func main() {
 26 |     // create a worker that fetches URLs
 27 |     worker := pool.WorkerFunc[string](func(ctx context.Context, url string) error {
 28 |         resp, err := http.Get(url)
 29 |         if err != nil {
 30 |             return fmt.Errorf("failed to fetch %s: %w", url, err)
 31 |         }
 32 |         defer resp.Body.Close()
 33 |         
 34 |         if resp.StatusCode != http.StatusOK {
 35 |             return fmt.Errorf("bad status code from %s: %d", url, resp.StatusCode)
 36 |         }
 37 |         return nil
 38 |     })
 39 | 	
 40 |     // create a pool with 5 workers 
 41 |     p := pool.New[string](5, worker).WithContinueOnError(), // don't stop on errors
 42 | 
 43 |     // start the pool
 44 |     if err := p.Go(context.Background()); err != nil {
 45 |         log.Fatal(err)
 46 |     }
 47 | 
 48 |     // submit URLs for processing
 49 |     urls := []string{
 50 |         "https://example.com",
 51 |         "https://example.org",
 52 |         "https://example.net",
 53 |     }
 54 |     
 55 |     go func() {
 56 |         // submit URLs and signal when done
 57 |         defer p.Close(context.Background())
 58 |         for _, url := range urls {
 59 |             p.Submit(url)
 60 |         }
 61 |     }()
 62 | 
 63 |     // wait for all URLs to be processed
 64 |     if err := p.Wait(context.Background()); err != nil {
 65 |         log.Printf("some URLs failed: %v", err)
 66 |     }
 67 | 
 68 |     // get metrics
 69 |     metrics := p.Metrics()
 70 |     stats := metrics.GetStats()
 71 |     fmt.Printf("Processed: %d, Errors: %d, Time taken: %v\n",
 72 |         stats.Processed, stats.Errors, stats.TotalTime)
 73 | }
 74 | ```
 75 | 
 76 | _For more examples, see the [examples](https://github.com/go-pkgz/pool/tree/master/exmples) directory._
 77 | 
 78 | ## Motivation
 79 | 
 80 | While Go provides excellent primitives for concurrent programming with goroutines, channels, and sync primitives, building production-ready concurrent data processing systems often requires more sophisticated patterns. This package emerged from real-world needs encountered in various projects where basic concurrency primitives weren't enough.
 81 | 
 82 | Common challenges this package addresses:
 83 | 
 84 | 1. **Stateful Processing**
 85 |    - Need to maintain worker-specific state (counters, caches, connections)
 86 |    - Each worker requires its own resources (database connections, file handles)
 87 |    - State needs to be isolated to avoid synchronization
 88 | 
 89 | 2. **Controlled Work Distribution**
 90 |    - Ensuring related items are processed by the same worker
 91 |    - Maintaining processing order for specific groups of items
 92 |    - Optimizing cache usage by routing similar items together
 93 | 
 94 | 3. **Resource Management**
 95 |    - Limiting number of goroutines in large-scale processing
 96 |    - Managing cleanup of worker resources
 97 |    - Handling graceful shutdown
 98 | 
 99 | 4. **Performance Optimization**
100 |    - Batching items to reduce channel communication overhead
101 |    - Balancing worker load with different distribution strategies
102 |    - Buffering to handle uneven processing speeds
103 | 
104 | 5. **Operational Visibility**
105 |    - Need for detailed metrics about processing
106 |    - Understanding bottlenecks and performance issues
107 |    - Monitoring system health
108 | 
109 | ## Core Concepts
110 | 
111 | ### Worker Types
112 | 
113 | The pool supports three ways to implement and manage workers:
114 | 
115 | 1. **Core Interface**:
116 |    ```go
117 |    // Worker is the interface that wraps the Do method
118 |    type Worker[T any] interface {
119 |        Do(ctx context.Context, v T) error
120 |    }
121 |    
122 |    // WorkerFunc is an adapter to allow using ordinary functions as Workers
123 |    type WorkerFunc[T any] func(ctx context.Context, v T) error
124 |    
125 |    func (f WorkerFunc[T]) Do(ctx context.Context, v T) error { return f(ctx, v) }
126 |    ```
127 | 
128 | 2. **Stateless Shared Workers**:
129 |    ```go
130 |    // single worker instance shared between all goroutines
131 |    worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error {
132 |        // process v
133 |        return nil
134 |    })
135 |    
136 |    p := pool.New[string](5, worker)
137 |    ```
138 |    - One worker instance serves all goroutines
139 |    - Good for stateless operations
140 |    - More memory efficient
141 | 
142 | 3. **Per-Worker Instances**:
143 |    ```go
144 |    type dbWorker struct {
145 |        conn *sql.DB
146 |        processed int
147 |    }
148 |    
149 |    func (w *dbWorker) Do(ctx context.Context, v string) error {
150 |        w.processed++
151 |        return w.conn.ExecContext(ctx, "INSERT INTO items (value) VALUES (?)", v)
152 |    }
153 |    
154 |    // create new instance for each goroutine
155 |    maker := func() pool.Worker[string] {
156 |        w := &dbWorker{
157 |            conn: openConnection(), // each worker gets own connection
158 |        }
159 |        return w
160 |    }
161 |    
162 |    p := pool.NewStateful[string](5, maker)
163 |    ```
164 | 
165 | ### Batching Processing
166 | 
167 | Batching reduces channel communication overhead by processing multiple items at once:
168 | 
169 | ```go
170 | // process items in batches of 10
171 | p := pool.New[string](2, worker).WithBatchSize(10)
172 | 
173 | // worker receives items one by one
174 | worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error {
175 |     // v is one item from the batch
176 |     return nil
177 | })
178 | ```
179 | 
180 | How batching works:
181 | 1. Pool accumulates submitted items internally until batch size is reached
182 | 2. Full batch is sent to worker as a single channel operation
183 | 3. Worker processes each item in the batch sequentially
184 | 4. Last batch may be smaller if items don't divide evenly
185 | 
186 | When to use batching:
187 | - High-volume processing where channel operations are a bottleneck
188 | - When processing overhead per item is low compared to channel communication
189 | 
190 | ### Work Distribution
191 | 
192 | Control how work is distributed among workers using chunk functions:
193 | 
194 | ```go
195 | // distribute by first character of string
196 | p := pool.New[string](3, worker).WithChunkFn(func(v string) string {
197 | 	return v[:1] // same first char goes to same worker
198 | })
199 | 
200 | // distribute by user ID to ensure user's tasks go to same worker
201 | p := pool.New[Task](3, worker).WithChunkFn(func(t Task) string {
202 | 	return strconv.Itoa(t.UserID)
203 | })
204 | ```
205 | 
206 | How distribution works:
207 | 1. Without chunk function:
208 |    - Items are distributed randomly among workers
209 |    - Good for independent tasks
210 | 
211 | 2. With chunk function:
212 |    - Function returns string key for each item
213 |    - Items with the same key always go to the same worker
214 |    - Uses consistent hashing to map keys to workers
215 | 
216 | When to use custom distribution:
217 | - Maintain ordering for related items
218 | - Optimize cache usage by worker
219 | - Ensure exclusive access to resources
220 | - Process data consistently
221 | 
222 | ## Middleware Support
223 | 
224 | The package supports middleware pattern similar to HTTP middleware in Go. Middleware can be used to add cross-cutting concerns like:
225 | - Retries with backoff
226 | - Timeouts
227 | - Panic recovery
228 | - Rate limiting
229 | - Metrics and logging
230 | - Error handling
231 | 
232 | Built-in middleware:
233 | ```go
234 | // Add retry with exponential backoff
235 | p.Use(middleware.Retry[string](3, time.Second))
236 | 
237 | // Add timeout per operation
238 | p.Use(middleware.Timeout[string](5 * time.Second))
239 | 
240 | // Add panic recovery
241 | p.Use(middleware.Recovery[string](func(p interface{}) {
242 |     log.Printf("recovered from panic: %v", p)
243 | }))
244 | 
245 | // Add validation before processing
246 | p.Use(middleware.Validator[string](validator))
247 | 
248 | // Add rate limiting
249 | p.Use(middleware.RateLimiter[string](10, 5))  // 10 requests/sec with burst of 5
250 | ```
251 | 
252 | Custom middleware:
253 | ```go
254 | logging := func(next pool.Worker[string]) pool.Worker[string] {
255 |     return pool.WorkerFunc[string](func(ctx context.Context, v string) error {
256 |         log.Printf("processing: %v", v)
257 |         err := next.Do(ctx, v)
258 |         log.Printf("completed: %v, err: %v", v, err)
259 |         return err
260 |     })
261 | }
262 | 
263 | p.Use(logging)
264 | ```
265 | 
266 | Multiple middleware execute in the same order as provided:
267 | ```go
268 | p.Use(logging, metrics, retry)  // order: logging -> metrics -> retry -> worker
269 | ```
270 | 
271 | ## Install and update
272 | 
273 | ```bash
274 | go get -u github.com/go-pkgz/pool
275 | ```
276 | 
277 | ## Usage Examples
278 | 
279 | ### Basic Example
280 | 
281 | ```go
282 | func main() {
283 |     // create a worker function processing strings
284 |     worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error {
285 |         fmt.Printf("processing: %s\n", v)
286 |         return nil
287 |     })
288 | 
289 |     // create a pool with 2 workers
290 |     p := pool.New[string](2, worker)
291 | 
292 |     // start the pool
293 |     if err := p.Go(context.Background()); err != nil {
294 |         log.Fatal(err)
295 |     }
296 | 
297 |     // submit work
298 |     p.Submit("task1")
299 |     p.Submit("task2")
300 |     p.Submit("task3")
301 | 
302 |     // close the pool and wait for completion
303 |     if err := p.Close(context.Background()); err != nil {
304 |         log.Fatal(err)
305 |     }
306 | }
307 | ```
308 | 
309 | ### Error Handling
310 | 
311 | ```go
312 | worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error {
313 |     if strings.Contains(v, "error") {
314 |         return fmt.Errorf("failed to process %s", v)
315 |     }
316 |     return nil
317 | })
318 | 
319 | // continue processing on errors
320 | p := pool.New[string](2, worker).WithContinueOnError()
321 | ```
322 | 
323 | ### Collecting Results
324 | 
325 | ```go
326 | // create a collector for results
327 | collector := pool.NewCollector[Result](ctx, 10)
328 | 
329 | // worker that produces results
330 | worker := pool.WorkerFunc[Input](func(ctx context.Context, v Input) error {
331 |     result := process(v)
332 |     collector.Submit(result)
333 |     return nil
334 | })
335 | 
336 | p := pool.New[Input](2, worker)
337 | 
338 | // get results through iteration
339 | for v, err := range collector.Iter() {
340 |     if err != nil {
341 |         return err
342 |     }
343 |     // use v
344 | }
345 | 
346 | // or collect all at once
347 | results, err := collector.All()
348 | ```
349 | 
350 | ### Metrics and Monitoring
351 | 
352 | ```go
353 | // create worker with metrics tracking
354 | worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error {
355 |     m := metrics.Get(ctx)
356 |     if strings.HasPrefix(v, "important") {
357 |         m.Inc("important-tasks")
358 |     }
359 |     return process(v)
360 | })
361 | 
362 | // create and run pool
363 | p := pool.New[string](2, worker)
364 | p.Go(context.Background())
365 | 
366 | // process work
367 | p.Submit("task1")
368 | p.Submit("important-task2")
369 | p.Close(context.Background())
370 | 
371 | // get metrics
372 | metrics := p.Metrics()
373 | stats := metrics.GetStats()
374 | fmt.Printf("Processed: %d\n", stats.Processed)
375 | fmt.Printf("Errors: %d\n", stats.Errors)
376 | fmt.Printf("Processing time: %v\n", stats.ProcessingTime)
377 | fmt.Printf("Wait time: %v\n", stats.WaitTime)
378 | fmt.Printf("Total time: %v\n", stats.TotalTime)
379 | 
380 | // get custom metrics
381 | fmt.Printf("Important tasks: %d\n", metrics.Get("important-tasks"))
382 | ```
383 | 
384 | ## Flow Control
385 | 
386 | The package provides several methods for flow control and completion:
387 | 
388 | ```go
389 | // Submit adds items to the pool. Not safe for concurrent use.
390 | // Used by the producer (sender) of data.
391 | p.Submit(item)
392 | 
393 | // Send safely adds items to the pool from multiple goroutines.
394 | // Used when submitting from worker to another pool, or when multiple goroutines send data.
395 | p.Send(item)
396 | 
397 | // Close tells workers no more data will be submitted.
398 | // Used by the producer (sender) of data.
399 | p.Close(ctx)  
400 | 
401 | // Wait blocks until all processing is done.
402 | // Used by the consumer (receiver) of results.
403 | p.Wait(ctx)   
404 | ```
405 | 
406 | Common usage patterns:
407 | 
408 | ```go
409 | // 1. Single producer submitting items
410 | go func() {
411 |     defer p.Close(ctx) // signal no more data
412 |     for _, task := range tasks {
413 |         p.Submit(task) // Submit is safe here - single goroutine
414 |     }
415 | }()
416 | 
417 | // 2. Workers submitting to next stage
418 | p1 := pool.New[int](5, pool.WorkerFunc[int](func(ctx context.Context, v int) error {
419 |     result := process(v)
420 |     p2.Send(result) // Send is safe for concurrent calls from workers
421 |     return nil
422 | }))
423 | 
424 | // 3. Consumer waiting for completion
425 | if err := p.Wait(ctx); err != nil {
426 |     // handle error
427 | }
428 | ```
429 | 
430 | Pool completion callback allows executing code when all workers are done:
431 | ```go
432 | p := pool.New[string](5, worker).
433 |     WithPoolCompleteFn(func(ctx context.Context) error {
434 |         // called once after all workers complete
435 |         log.Println("all workers finished")
436 |         return nil
437 |     })
438 | ```
439 | 
440 | The completion callback executes when:
441 | - All workers have completed processing
442 | - Errors occurred but pool continued (`WithContinueOnError()`)
443 | - Does not execute on context cancellation
444 | 
445 | Important notes:
446 | - Use `Submit` when sending items from a single goroutine
447 | - Use `Send` when workers need to submit items to another pool
448 | - Pool completion callback helps coordinate multi-stage processing
449 | - Errors in completion callback are included in pool's error result
450 | 
451 | ## Optional parameters
452 | 
453 | Configure pool behavior using With methods:
454 | 
455 | ```go
456 | p := pool.New[string](2, worker).  // pool with 2 workers
457 |     WithBatchSize(10).             // process items in batches
458 |     WithWorkerChanSize(5).         // set worker channel buffer size
459 |     WithChunkFn(chunkFn).          // control work distribution
460 |     WithContinueOnError().         // don't stop on errors
461 |     WithCompleteFn(completeFn)     // called when worker finishes
462 | ```
463 | 
464 | Available options:
465 | - `WithBatchSize(size int)` - enables batch processing, accumulating items before sending to workers (default: 10)
466 | - `WithWorkerChanSize(size int)` - sets buffer size for worker channels (default: 1)
467 | - `WithChunkFn(fn func(T) string)` - controls work distribution by key (default: none, random distribution)
468 | - `WithContinueOnError()` - continues processing on errors (default: false)
469 | - `WithWorkerCompleteFn(fn func(ctx, id, worker))` - called on worker completion (default: none)
470 | - `WithPoolCompleteFn(fn func(ctx))` - called on pool completion, i.e., when all workers have completed (default: none)
471 | 
472 | ## Collector
473 | 
474 | The Collector helps manage asynchronous results from pool workers in a synchronous way. It's particularly useful when you need to gather and process results from worker's processing. The Collector uses Go generics and is compatible with any result type.
475 | 
476 | ### Features
477 | - Generic implementation supporting any result type
478 | - Context awareness with graceful cancellation
479 | - Buffered collection with configurable size
480 | - Built-in iterator pattern
481 | - Ability to collect all results at once
482 | 
483 | ### Example Usage
484 | 
485 | ```go
486 | // create a collector for results with buffer of 10
487 | collector := pool.NewCollector[string](ctx, 10)
488 | 
489 | // worker submits results to collector
490 | worker := pool.WorkerFunc[int](func(ctx context.Context, v int) error {
491 |     result := process(v)
492 |     collector.Submit(result)
493 |     return nil
494 | })
495 | 
496 | // create and run pool
497 | p := pool.New[int](5, worker)
498 | require.NoError(t, p.Go(ctx))
499 | 
500 | // submit items
501 | for i := 0; i < 100; i++ {
502 |     p.Submit(i)
503 | }
504 | p.Close(ctx)
505 | 
506 | // Option 1: process results as they arrive with iterator
507 | for result, err := range collector.Iter() {
508 |     if err != nil {
509 |         return err // context cancelled or other error
510 |     }
511 |     // process result
512 | }
513 | 
514 | // Option 2: get all results at once
515 | results, err := collector.All()
516 | if err != nil {
517 |     return err
518 | }
519 | // use results slice
520 | ```
521 | 
522 | ### API Reference
523 | 
524 | ```go
525 | // create new collector
526 | collector := pool.NewCollector[ResultType](ctx, bufferSize)
527 | 
528 | // submit result to collector
529 | collector.Submit(result)
530 | 
531 | // close collector when done submitting
532 | collector.Close()
533 | 
534 | // iterate over results
535 | for result, err := range collector.Iter() {
536 |     // process result
537 | }
538 | 
539 | // get all results
540 | results, err := collector.All()
541 | ```
542 | 
543 | ### Best Practices
544 | 
545 | 1. **Buffer Size**: Choose based on expected throughput and memory constraints
546 |    - Too small: may block workers
547 |    - Too large: may use excessive memory
548 | 
549 | 2. **Error Handling**: Always check error from iterator
550 |    ```go
551 |    for result, err := range collector.Iter() {
552 |        if err != nil {
553 |            // handle context cancellation
554 |            return err
555 |        }
556 |    }
557 |    ```
558 | 
559 | 3. **Context Usage**: Pass context that matches pool's lifecycle
560 |    ```go
561 |    collector := pool.NewCollector[Result](poolCtx, size)
562 |    ```
563 | 
564 | 4. **Cleanup**: Close collector when done submitting
565 |    ```go
566 |    defer collector.Close()
567 |    ```
568 |    
569 | ## Performance
570 | 
571 | The pool package is designed for high performance and efficiency. Benchmarks show that it consistently outperforms both the standard `errgroup`-based approach and traditional goroutine patterns with shared channels.
572 | 
573 | ### Benchmark Results
574 | 
575 | Tests running 1,000,000 tasks with 8 workers on Apple M4 Max:
576 | 
577 | ```
578 | errgroup:                                     1.878s
579 | pool (default):                               1.213s (~35% faster)
580 | pool (chan size=100):                         1.199s
581 | pool (chan size=100, batch size=100):         1.105s (~41% faster)
582 | pool (with chunking):                         1.113s
583 | ```
584 | 
585 | Detailed benchmark comparison (lower is better):
586 | ```
587 | errgroup:                                     18.56ms/op
588 | pool (default):                               12.29ms/op
589 | pool (chan size=100):                         12.35ms/op
590 | pool (batch size=100):                        11.22ms/op
591 | pool (with batching and chunking):            11.43ms/op
592 | ```
593 | 
594 | ### Why Pool is Faster
595 | 
596 | 1. **Efficient Channel Usage**
597 |    - The pool uses dedicated channels per worker when chunking is enabled
598 |    - Default channel buffer size is optimized for common use cases
599 |    - Minimizes channel contention compared to shared channel approaches
600 | 
601 | 2. **Smart Batching**
602 |    - Reduces channel communication overhead by processing multiple items at once
603 |    - Default batch size of 10 provides good balance between latency and throughput
604 |    - Accumulators pre-allocated with capacity to minimize memory allocations
605 | 
606 | 3. **Work Distribution**
607 |    - Optional chunking ensures related tasks go to the same worker
608 |    - Improves cache locality and reduces cross-worker coordination
609 |    - Hash-based distribution provides good load balancing
610 | 
611 | 4. **Resource Management**
612 |    - Workers are pre-initialized and reused
613 |    - No per-task goroutine creation overhead
614 |    - Efficient cleanup and resource handling
615 | 
616 | ### Configuration Impact
617 | 
618 | - **Default Settings**: Out of the box, the pool is ~35% faster than errgroup
619 | - **Channel Buffering**: Increasing channel size can help with bursty workloads
620 | - **Batching**: Adding batching improves performance by another ~6%
621 | - **Chunking**: Optional chunking has minimal overhead when enabled
622 | 
623 | ### When to Use What
624 | 
625 | 1. **Default Settings** - Good for most use cases
626 |    ```go
627 |    p := pool.New[string](5, worker)
628 |    ```
629 | 
630 | 2. **High-Throughput** - For heavy workloads with many items
631 |    ```go
632 |    p := pool.New[string](5, worker).
633 |        WithWorkerChanSize(100).
634 |        WithBatchSize(100)
635 |    ```
636 | 
637 | 3. **Related Items** - When items need to be processed by the same worker
638 |    ```go
639 |    p := pool.New[string](5, worker).
640 |        WithChunkFn(func(v string) string {
641 |            return v[:1] // group by first character
642 |        })
643 |    ```
644 |    
645 | ### Alternative pool implementations
646 | 
647 | - [pond](https://github.com/alitto/pond) - pond is a minimalistic and high-performance Go library designed to elegantly manage concurrent tasks.
648 | - [goworker](https://github.com/benmanns/goworker) - goworker is a Resque-compatible, Go-based background worker. It allows you to push jobs into a queue using an expressive language like Ruby while harnessing the efficiency and concurrency of Go to minimize job latency and cost.
649 | - [gowp](https://github.com/xxjwxc/gowp) - golang worker pool
650 | - [conc](https://github.com/sourcegraph/conc) - better structured concurrency for go
651 | - for more see [awesome-go goroutines](https://awesome-go.com/goroutines/) list
652 | 
653 | ## Contributing
654 | 
655 | Contributions to `pool` are welcome! Please submit a pull request or open an issue for any bugs or feature requests.
656 | 
657 | ## License
658 | 
659 | `pool` is available under the MIT license. See the [LICENSE](LICENSE) file for more info.


--------------------------------------------------------------------------------
/bench_test.go:
--------------------------------------------------------------------------------
  1 | package pool
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"os"
  6 | 	"runtime/pprof"
  7 | 	"strconv"
  8 | 	"sync/atomic"
  9 | 	"testing"
 10 | 	"time"
 11 | 
 12 | 	"github.com/stretchr/testify/assert"
 13 | 	"github.com/stretchr/testify/require"
 14 | 	"golang.org/x/sync/errgroup"
 15 | )
 16 | 
 17 | // benchTask is a somewhat realistic task that combines CPU work with memory allocation
 18 | func benchTask(size int) []int { //nolint:unparam // size is used in the benchmark
 19 | 	task := func(n int) int { // simulate some CPU work
 20 | 		sum := 0
 21 | 		for i := 0; i < n; i++ {
 22 | 			sum += i
 23 | 		}
 24 | 		return sum
 25 | 	}
 26 | 	res := make([]int, 0, size)
 27 | 	for i := 0; i < size; i++ {
 28 | 		res = append(res, task(1))
 29 | 	}
 30 | 	return res
 31 | }
 32 | 
 33 | func TestPoolPerf(t *testing.T) {
 34 | 	n := 1000
 35 | 	ctx := context.Background()
 36 | 
 37 | 	var egDuration time.Duration
 38 | 	t.Run("errgroup", func(t *testing.T) {
 39 | 		var count2 int32
 40 | 		st := time.Now()
 41 | 		defer func() {
 42 | 			egDuration = time.Since(st)
 43 | 			t.Logf("elapsed errgroup: %v", time.Since(st))
 44 | 		}()
 45 | 		g, _ := errgroup.WithContext(ctx)
 46 | 		g.SetLimit(8)
 47 | 		for i := 0; i < 1000000; i++ {
 48 | 			g.Go(func() error {
 49 | 				benchTask(n)
 50 | 				atomic.AddInt32(&count2, 1)
 51 | 				return nil
 52 | 			})
 53 | 		}
 54 | 		require.NoError(t, g.Wait())
 55 | 		assert.Equal(t, int32(1000000), atomic.LoadInt32(&count2))
 56 | 	})
 57 | 
 58 | 	t.Run("pool default", func(t *testing.T) {
 59 | 		// pool with 8 workers
 60 | 		var count1 int32
 61 | 		worker := WorkerFunc[int](func(context.Context, int) error {
 62 | 			benchTask(n)
 63 | 			atomic.AddInt32(&count1, 1)
 64 | 			return nil
 65 | 		})
 66 | 
 67 | 		st := time.Now()
 68 | 		p := New[int](8, worker)
 69 | 		require.NoError(t, p.Go(ctx))
 70 | 		go func() {
 71 | 			for i := 0; i < 1000000; i++ {
 72 | 				p.Submit(i)
 73 | 			}
 74 | 			assert.NoError(t, p.Close(ctx))
 75 | 		}()
 76 | 		require.NoError(t, p.Wait(ctx))
 77 | 		assert.Equal(t, int32(1000000), atomic.LoadInt32(&count1))
 78 | 		t.Logf("elapsed pool: %v", time.Since(st))
 79 | 		assert.Less(t, time.Since(st), egDuration)
 80 | 	})
 81 | 
 82 | 	t.Run("pool with 100 chan size", func(t *testing.T) {
 83 | 		// pool with 8 workers
 84 | 		var count1 int32
 85 | 		worker := WorkerFunc[int](func(context.Context, int) error {
 86 | 			benchTask(n)
 87 | 			atomic.AddInt32(&count1, 1)
 88 | 			return nil
 89 | 		})
 90 | 
 91 | 		st := time.Now()
 92 | 		p := New[int](8, worker).WithWorkerChanSize(100)
 93 | 		require.NoError(t, p.Go(ctx))
 94 | 		go func() {
 95 | 			for i := 0; i < 1000000; i++ {
 96 | 				p.Submit(i)
 97 | 			}
 98 | 			assert.NoError(t, p.Close(ctx))
 99 | 		}()
100 | 		require.NoError(t, p.Wait(ctx))
101 | 		assert.Equal(t, int32(1000000), atomic.LoadInt32(&count1))
102 | 		t.Logf("elapsed pool: %v", time.Since(st))
103 | 		assert.Less(t, time.Since(st), egDuration)
104 | 	})
105 | 
106 | 	t.Run("pool with 100 chan size and 100 batch size", func(t *testing.T) {
107 | 		// pool with 8 workers
108 | 		var count1 int32
109 | 		worker := WorkerFunc[int](func(context.Context, int) error {
110 | 			benchTask(n)
111 | 			atomic.AddInt32(&count1, 1)
112 | 			return nil
113 | 		})
114 | 
115 | 		st := time.Now()
116 | 		p := New[int](8, worker).WithWorkerChanSize(100).WithBatchSize(100)
117 | 		require.NoError(t, p.Go(ctx))
118 | 		go func() {
119 | 			for i := 0; i < 1000000; i++ {
120 | 				p.Submit(i)
121 | 			}
122 | 			assert.NoError(t, p.Close(ctx))
123 | 		}()
124 | 		require.NoError(t, p.Wait(ctx))
125 | 		assert.Equal(t, int32(1000000), atomic.LoadInt32(&count1))
126 | 		t.Logf("elapsed pool: %v", time.Since(st))
127 | 		assert.Less(t, time.Since(st), egDuration)
128 | 	})
129 | 
130 | 	t.Run("pool with 100 chan size and 100 batch size and chunking", func(t *testing.T) {
131 | 		// pool with 8 workers
132 | 		var count1 int32
133 | 		worker := WorkerFunc[int](func(context.Context, int) error {
134 | 			benchTask(n)
135 | 			atomic.AddInt32(&count1, 1)
136 | 			return nil
137 | 		})
138 | 
139 | 		st := time.Now()
140 | 		p := New[int](8, worker).WithWorkerChanSize(100).WithBatchSize(100).WithChunkFn(func(v int) string {
141 | 			return strconv.Itoa(v % 8) // distribute by modulo
142 | 		})
143 | 		require.NoError(t, p.Go(ctx))
144 | 		go func() {
145 | 			for i := 0; i < 1000000; i++ {
146 | 				p.Submit(i)
147 | 			}
148 | 			assert.NoError(t, p.Close(ctx))
149 | 		}()
150 | 		require.NoError(t, p.Wait(ctx))
151 | 		assert.Equal(t, int32(1000000), atomic.LoadInt32(&count1))
152 | 		t.Logf("elapsed pool: %v", time.Since(st))
153 | 		assert.Less(t, time.Since(st), egDuration)
154 | 	})
155 | 
156 | }
157 | 
158 | func BenchmarkPoolCompare(b *testing.B) {
159 | 	ctx := context.Background()
160 | 	iterations := 10000
161 | 	workers := 8
162 | 	n := 1000
163 | 
164 | 	b.Run("errgroup", func(b *testing.B) {
165 | 		b.ResetTimer()
166 | 		for i := 0; i < b.N; i++ {
167 | 			var count int32
168 | 			g, _ := errgroup.WithContext(ctx)
169 | 			g.SetLimit(workers)
170 | 
171 | 			for j := 0; j < iterations; j++ {
172 | 				g.Go(func() error {
173 | 					benchTask(n)
174 | 					atomic.AddInt32(&count, 1)
175 | 					return nil
176 | 				})
177 | 			}
178 | 			require.NoError(b, g.Wait())
179 | 			require.Equal(b, int32(iterations), atomic.LoadInt32(&count))
180 | 		}
181 | 	})
182 | 
183 | 	b.Run("pool default", func(b *testing.B) {
184 | 		b.ResetTimer()
185 | 		for i := 0; i < b.N; i++ {
186 | 			var count int32
187 | 			p := New[int](workers, WorkerFunc[int](func(context.Context, int) error {
188 | 				benchTask(n)
189 | 				atomic.AddInt32(&count, 1)
190 | 				return nil
191 | 			}))
192 | 
193 | 			require.NoError(b, p.Go(ctx))
194 | 			go func() {
195 | 				for j := 0; j < iterations; j++ {
196 | 					p.Submit(j)
197 | 				}
198 | 				p.Close(ctx)
199 | 			}()
200 | 			require.NoError(b, p.Wait(ctx))
201 | 			require.Equal(b, int32(iterations), atomic.LoadInt32(&count))
202 | 		}
203 | 	})
204 | 
205 | 	b.Run("pool with chan=100", func(b *testing.B) {
206 | 		b.ResetTimer()
207 | 		for i := 0; i < b.N; i++ {
208 | 			var count int32
209 | 			p := New[int](workers, WorkerFunc[int](func(context.Context, int) error {
210 | 				benchTask(n)
211 | 				atomic.AddInt32(&count, 1)
212 | 				return nil
213 | 			})).WithWorkerChanSize(100)
214 | 
215 | 			require.NoError(b, p.Go(ctx))
216 | 			go func() {
217 | 				for j := 0; j < iterations; j++ {
218 | 					p.Submit(j)
219 | 				}
220 | 				p.Close(ctx)
221 | 			}()
222 | 			require.NoError(b, p.Wait(ctx))
223 | 			require.Equal(b, int32(iterations), atomic.LoadInt32(&count))
224 | 		}
225 | 	})
226 | 
227 | 	b.Run("pool with batching", func(b *testing.B) {
228 | 		b.ResetTimer()
229 | 		for i := 0; i < b.N; i++ {
230 | 			var count int32
231 | 			p := New[int](workers, WorkerFunc[int](func(context.Context, int) error {
232 | 				benchTask(n)
233 | 				atomic.AddInt32(&count, 1)
234 | 				return nil
235 | 			})).WithWorkerChanSize(100).WithBatchSize(100)
236 | 
237 | 			require.NoError(b, p.Go(ctx))
238 | 			go func() {
239 | 				for j := 0; j < iterations; j++ {
240 | 					p.Submit(j)
241 | 				}
242 | 				p.Close(ctx)
243 | 			}()
244 | 			require.NoError(b, p.Wait(ctx))
245 | 			require.Equal(b, int32(iterations), atomic.LoadInt32(&count))
246 | 		}
247 | 	})
248 | 
249 | 	b.Run("pool with batching and chunking", func(b *testing.B) {
250 | 		b.ResetTimer()
251 | 		for i := 0; i < b.N; i++ {
252 | 			var count int32
253 | 			p := New[int](workers, WorkerFunc[int](func(context.Context, int) error {
254 | 				benchTask(n)
255 | 				atomic.AddInt32(&count, 1)
256 | 				return nil
257 | 			})).WithWorkerChanSize(100).WithBatchSize(100).WithChunkFn(func(v int) string {
258 | 				return strconv.Itoa(v % workers)
259 | 			})
260 | 
261 | 			require.NoError(b, p.Go(ctx))
262 | 			go func() {
263 | 				for j := 0; j < iterations; j++ {
264 | 					p.Submit(j)
265 | 				}
266 | 				p.Close(ctx)
267 | 			}()
268 | 			require.NoError(b, p.Wait(ctx))
269 | 			require.Equal(b, int32(iterations), atomic.LoadInt32(&count))
270 | 		}
271 | 	})
272 | }
273 | 
274 | func TestPoolWithProfiling(t *testing.T) {
275 | 	// run only if env PROFILING is set
276 | 	if os.Getenv("PROFILING") == "" {
277 | 		t.Skip("skipping profiling test; set PROFILING to run")
278 | 	}
279 | 
280 | 	// start CPU profile
281 | 	cpuFile, err := os.Create("cpu.prof")
282 | 	require.NoError(t, err)
283 | 	defer cpuFile.Close()
284 | 	require.NoError(t, pprof.StartCPUProfile(cpuFile))
285 | 	defer pprof.StopCPUProfile()
286 | 
287 | 	// create memory profile
288 | 	memFile, err := os.Create("mem.prof")
289 | 	require.NoError(t, err)
290 | 	defer memFile.Close()
291 | 
292 | 	// run pool test
293 | 	iterations := 100000
294 | 	ctx := context.Background()
295 | 	worker := WorkerFunc[int](func(context.Context, int) error {
296 | 		benchTask(30000)
297 | 		return nil
298 | 	})
299 | 
300 | 	// test pool implementation
301 | 	p := New[int](4, worker).WithWorkerChanSize(100)
302 | 	require.NoError(t, p.Go(ctx))
303 | 
304 | 	done := make(chan struct{})
305 | 	go func() {
306 | 		for i := 0; i < iterations; i++ {
307 | 			p.Submit(i)
308 | 		}
309 | 		p.Close(ctx)
310 | 		close(done)
311 | 	}()
312 | 
313 | 	select {
314 | 	case <-done:
315 | 	case <-time.After(5 * time.Second):
316 | 		t.Fatal("timeout")
317 | 	}
318 | 
319 | 	// create memory profile after test
320 | 	require.NoError(t, pprof.WriteHeapProfile(memFile))
321 | }
322 | 


--------------------------------------------------------------------------------
/collector.go:
--------------------------------------------------------------------------------
 1 | package pool
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"iter"
 6 | )
 7 | 
 8 | // Collector provides synchronous access to async data from pool's response channel
 9 | type Collector[V any] struct {
10 | 	ch  chan V
11 | 	ctx context.Context
12 | }
13 | 
14 | // NewCollector creates a new collector with a given context and buffer size for the channel
15 | func NewCollector[V any](ctx context.Context, size int) *Collector[V] {
16 | 	return &Collector[V]{
17 | 		ch:  make(chan V, size),
18 | 		ctx: ctx,
19 | 	}
20 | }
21 | 
22 | // Submit sends a value to the collector
23 | func (c *Collector[V]) Submit(v V) {
24 | 	c.ch <- v
25 | }
26 | 
27 | // Close closes the collector
28 | func (c *Collector[V]) Close() {
29 | 	close(c.ch)
30 | }
31 | 
32 | // Iter returns an iterator over collector values
33 | func (c *Collector[V]) Iter() iter.Seq2[V, error] {
34 | 	return func(yield func(V, error) bool) {
35 | 		for {
36 | 			select {
37 | 			case v, ok := <-c.ch:
38 | 				if !ok {
39 | 					return
40 | 				}
41 | 				if !yield(v, nil) {
42 | 					return
43 | 				}
44 | 			case <-c.ctx.Done():
45 | 				var zero V
46 | 				yield(zero, c.ctx.Err())
47 | 				return
48 | 			}
49 | 		}
50 | 	}
51 | }
52 | 
53 | // All gets all data from the collector
54 | func (c *Collector[V]) All() (res []V, err error) {
55 | 	for v, err := range c.Iter() {
56 | 		if err != nil {
57 | 			return res, err
58 | 		}
59 | 		res = append(res, v)
60 | 	}
61 | 	return res, nil
62 | }
63 | 


--------------------------------------------------------------------------------
/collector_test.go:
--------------------------------------------------------------------------------
  1 | package pool
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"testing"
  6 | 	"time"
  7 | 
  8 | 	"github.com/stretchr/testify/require"
  9 | )
 10 | 
 11 | func TestCollector_Basic(t *testing.T) {
 12 | 	ctx := context.Background()
 13 | 	c := NewCollector[string](ctx, 5)
 14 | 
 15 | 	go func() {
 16 | 		c.Submit("test1")
 17 | 		c.Submit("test2")
 18 | 		c.Submit("test3")
 19 | 		c.Close()
 20 | 	}()
 21 | 
 22 | 	var values []string
 23 | 	var lastErr error
 24 | 	for v, err := range c.Iter() {
 25 | 		if err != nil {
 26 | 			lastErr = err
 27 | 			break
 28 | 		}
 29 | 		values = append(values, v)
 30 | 	}
 31 | 
 32 | 	require.NoError(t, lastErr)
 33 | 	require.Equal(t, []string{"test1", "test2", "test3"}, values)
 34 | }
 35 | 
 36 | func TestCollector_ContextCancellation(t *testing.T) {
 37 | 	ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100)
 38 | 	defer cancel()
 39 | 
 40 | 	c := NewCollector[int](ctx, 5)
 41 | 
 42 | 	go func() {
 43 | 		c.Submit(1)
 44 | 		time.Sleep(time.Second) // simulate slow producer
 45 | 		c.Submit(2)
 46 | 		c.Close()
 47 | 	}()
 48 | 
 49 | 	var values []int
 50 | 	var lastErr error
 51 | 	for v, err := range c.Iter() {
 52 | 		if err != nil {
 53 | 			lastErr = err
 54 | 			break
 55 | 		}
 56 | 		values = append(values, v)
 57 | 	}
 58 | 
 59 | 	require.ErrorIs(t, lastErr, context.DeadlineExceeded)
 60 | 	require.Equal(t, []int{1}, values)
 61 | }
 62 | 
 63 | func TestCollector_All(t *testing.T) {
 64 | 	ctx := context.Background()
 65 | 	c := NewCollector[int](ctx, 5)
 66 | 
 67 | 	go func() {
 68 | 		for i := range 3 {
 69 | 			c.Submit(i)
 70 | 		}
 71 | 		c.Close()
 72 | 	}()
 73 | 
 74 | 	values, err := c.All()
 75 | 	require.NoError(t, err)
 76 | 	require.Equal(t, []int{0, 1, 2}, values)
 77 | }
 78 | 
 79 | func TestCollector_All_WithError(t *testing.T) {
 80 | 	ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100)
 81 | 	defer cancel()
 82 | 
 83 | 	c := NewCollector[int](ctx, 5)
 84 | 
 85 | 	go func() {
 86 | 		c.Submit(1)
 87 | 		time.Sleep(time.Second)
 88 | 		c.Submit(2)
 89 | 		c.Close()
 90 | 	}()
 91 | 
 92 | 	values, err := c.All()
 93 | 	require.ErrorIs(t, err, context.DeadlineExceeded)
 94 | 	require.Equal(t, []int{1}, values)
 95 | }
 96 | 
 97 | func TestCollector_Multiple(t *testing.T) {
 98 | 	ctx := context.Background()
 99 | 	c1 := NewCollector[string](ctx, 5)
100 | 	c2 := NewCollector[string](ctx, 5)
101 | 
102 | 	go func() {
103 | 		c1.Submit("c1-1")
104 | 		c1.Submit("c1-2")
105 | 		c1.Close()
106 | 	}()
107 | 
108 | 	go func() {
109 | 		c2.Submit("c2-1")
110 | 		c2.Submit("c2-2")
111 | 		c2.Close()
112 | 	}()
113 | 
114 | 	v1, err := c1.All()
115 | 	require.NoError(t, err)
116 | 	require.Equal(t, []string{"c1-1", "c1-2"}, v1)
117 | 
118 | 	v2, err := c2.All()
119 | 	require.NoError(t, err)
120 | 	require.Equal(t, []string{"c2-1", "c2-2"}, v2)
121 | }
122 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
  1 | // Package pool provides a simple worker pool implementation with a single stage only.
  2 | // It allows submitting tasks to be processed in parallel by a number of workers.
  3 | //
  4 | // The package supports both stateless and stateful workers through two distinct constructors:
  5 | //   - New - for pools with a single shared worker instance
  6 | //   - NewStateful - for pools where each goroutine gets its own worker instance
  7 | //
  8 | // Worker Types:
  9 | //
 10 | // The package provides a simple Worker interface that can be implemented in two ways:
 11 | //
 12 | //	type Worker[T any] interface {
 13 | //	    Do(ctx context.Context, v T) error
 14 | //	}
 15 | //
 16 | // 1. Direct implementation for complex stateful workers:
 17 | //
 18 | //	type dbWorker struct {
 19 | //	    conn *sql.DB
 20 | //	}
 21 | //
 22 | //	func (w *dbWorker) Do(ctx context.Context, v string) error {
 23 | //	    return w.conn.ExecContext(ctx, "INSERT INTO items (value) VALUES (?)", v)
 24 | //	}
 25 | //
 26 | // 2. Function adapter for simple stateless workers:
 27 | //
 28 | //	worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error {
 29 | //	    // process the value
 30 | //	    return nil
 31 | //	})
 32 | //
 33 | // Basic Usage:
 34 | //
 35 | // For stateless operations (like HTTP requests, parsing operations, etc.):
 36 | //
 37 | //	worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error {
 38 | //	    resp, err := http.Get(v)
 39 | //	    if err != nil {
 40 | //	        return err
 41 | //	    }
 42 | //	    defer resp.Body.Close()
 43 | //	    return nil
 44 | //	})
 45 | //
 46 | //	p := pool.New[string](2, worker)
 47 | //	if err := p.Go(context.Background()); err != nil {
 48 | //	    return err
 49 | //	}
 50 | //
 51 | //	// submit work
 52 | //	p.Submit("task1")
 53 | //	p.Submit("task2")
 54 | //
 55 | //	if err := p.Close(context.Background()); err != nil {
 56 | //	    return err
 57 | //	}
 58 | //
 59 | // For stateful operations (like database connections, file handles, etc.):
 60 | //
 61 | //	maker := func() pool.Worker[string] {
 62 | //	    return &dbWorker{
 63 | //	        conn: openConnection(),
 64 | //	    }
 65 | //	}
 66 | //	p := pool.NewStateful[string](2, maker)
 67 | //
 68 | // Features:
 69 | //
 70 | //   - Generic worker pool implementation supporting any data type
 71 | //   - Configurable number of workers running in parallel
 72 | //   - Support for both stateless shared workers and per-worker instances
 73 | //   - Batching capability for processing multiple items at once
 74 | //   - Customizable work distribution through chunk functions
 75 | //   - Built-in metrics collection including processing times and counts
 76 | //   - Error handling with options to continue or stop on errors
 77 | //   - Context-based cancellation and timeouts
 78 | //   - Optional completion callbacks
 79 | //
 80 | // Advanced Features:
 81 | //
 82 | // Batching:
 83 | //
 84 | //	p := New[string](2, worker).WithBatchSize(10)
 85 | //
 86 | // Chunked distribution:
 87 | //
 88 | //	p := New[string](2, worker).WithChunkFn(func(v string) string {
 89 | //	    return v // items with same hash go to same worker
 90 | //	})
 91 | //
 92 | // Error handling:
 93 | //
 94 | //	p := New[string](2, worker).WithContinueOnError()
 95 | //
 96 | // Metrics:
 97 | //
 98 | // The pool automatically tracks standard stats metrics (processed counts, errors, timings).
 99 | // Workers can also record additional custom metrics:
100 | //
101 | //	m := metrics.Get(ctx)
102 | //	m.Inc("custom-counter")
103 | //
104 | // Access metrics:
105 | //
106 | //	metrics := p.Metrics()
107 | //	value := metrics.Get("custom-counter")
108 | //
109 | // Statistical metrics including:
110 | //
111 | //   - Number of processed items
112 | //   - Number of errors
113 | //   - Number of dropped items
114 | //   - Processing time
115 | //   - Wait time
116 | //   - Initialization time
117 | //   - Total time
118 | //
119 | // Access stats:
120 | //
121 | //	metrics := p.Metrics()
122 | //	stats := metrics.GetStats()
123 | //	fmt.Printf("processed: %d, errors: %d", stats.Processed, stats.Errors)
124 | //
125 | // Data Collection:
126 | //
127 | // For collecting results from workers, use the Collector:
128 | //
129 | //	collector := pool.NewCollector[Result](ctx, 10)
130 | //	worker := pool.WorkerFunc[Input](func(ctx context.Context, v Input) error {
131 | //	    result := process(v)
132 | //	    collector.Submit(result)
133 | //	    return nil
134 | //	})
135 | //
136 | // Results can be retrieved either through iteration:
137 | //
138 | //	for v, err := range collector.Iter() {
139 | //	    if err != nil {
140 | //	        return err
141 | //	    }
142 | //	    // use v
143 | //	}
144 | //
145 | // Or by collecting all at once:
146 | //
147 | //	results, err := collector.All()
148 | //
149 | // Middleware Support:
150 | //
151 | // The pool supports middleware pattern similar to HTTP middleware in Go. Middleware can be used
152 | // to add functionality like retries, timeouts, metrics, or error handling:
153 | //
154 | //	// retry middleware
155 | //	retryMiddleware := func(next Worker[string]) Worker[string] {
156 | //	    return WorkerFunc[string](func(ctx context.Context, v string) error {
157 | //	        var lastErr error
158 | //	        for i := 0; i < 3; i++ {
159 | //	            if err := next.Do(ctx, v); err == nil {
160 | //	                return nil
161 | //	            } else {
162 | //	                lastErr = err
163 | //	            }
164 | //	            time.Sleep(time.Second * time.Duration(i))
165 | //	        }
166 | //	        return fmt.Errorf("failed after 3 attempts: %w", lastErr)
167 | //	    })
168 | //	}
169 | //
170 | //	p := New[string](2, worker).Use(retryMiddleware)
171 | //
172 | // Multiple middleware can be chained, and they execute in the same order as provided:
173 | //
174 | //	p.Use(logging, metrics, retry)  // executes: logging -> metrics -> retry -> worker
175 | package pool
176 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | This directory contains examples demonstrating various aspects of the [go-pkgz/pool](https://github.com/go-pkgz/pool) package.
 4 | 
 5 | **Important Note:** These examples are intentionally minimalistic and somewhat artificial. They may not represent how one would solve similar problems in real-life applications. Instead, they focus on clearly demonstrating specific features and usage patterns of the pool package.
 6 | 
 7 | ## Available Examples
 8 | 
 9 | ### [tokenizer_stateful](./tokenizer_stateful)
10 | Shows how to use stateful workers where each worker maintains its own independent state (word frequency counters). Demonstrates:
11 | - Worker state isolation
12 | - Result collection through completion callbacks
13 | - Performance statistics tracking
14 | 
15 | ### [tokenizer_stateless](./tokenizer_stateless)
16 | Implements the same text processing but using stateless workers with shared collector. Demonstrates:
17 | - Simple worker functions
18 | - Shared result collection
19 | - Batch processing
20 | 
21 | ### [parallel_files](./parallel_files)
22 | Shows how to process multiple files in parallel using chunks. Demonstrates:
23 | - Chunk-based file processing
24 | - Custom metrics collection
25 | - Work distribution across workers
26 | 
27 | ### [middleware](./middleware)
28 | Shows how to use middleware to add cross-cutting functionality to pool processing. Demonstrates:
29 | - Built-in and custom middleware
30 | - Error handling with retries
31 | - Input validation
32 | - Structured logging
33 | - Recovery from panics
34 | 
35 | ### [direct_chain](./direct_chain)
36 | Shows how to chain multiple worker pools by having workers directly submit to the next pool. Demonstrates:
37 | - Multi-stage processing pipeline
38 | - Direct pool submission between stages
39 | - Type transformation
40 | - Pool coordination
41 | 
42 | ### [collectors_chain](./collectors_chain)
43 | Shows how to chain multiple worker pools using collectors. Demonstrates:
44 | - Multi-stage processing pipeline
45 | - Type-safe data transformation
46 | - Automatic coordination via iterators
47 | - Independent pool scaling
48 | 
49 | ### [collector_errors](./collector_errors)
50 | Shows how to handle and categorize errors in parallel processing. Demonstrates:
51 | - Error collection pattern
52 | - Error categorization and grouping
53 | - Timing information tracking
54 | - Statistical reporting on errors
55 | 
56 | ## Running Examples
57 | 
58 | Each example can be run from its directory:
59 | ```bash
60 | cd tokenizer_stateful
61 | go run main.go -file input.txt
62 | 
63 | cd ../tokenizer_stateless
64 | go run main.go -file input.txt
65 | 
66 | cd ../parallel_files
67 | go run main.go -pattern "*.txt"
68 | 
69 | cd ../middleware
70 | go run main.go -workers 4 -retries 3
71 | 
72 | cd ../direct_chain
73 | go run main.go
74 | 
75 | cd ../collectors_chain
76 | go run main.go
77 | 
78 | cd ../collector_errors
79 | go run main.go -workers 8 -jobs 100 -error-rate 0.3
80 | ```
81 | 
82 | ## Common Patterns
83 | 
84 | While the examples are simplified, they showcase important pool package features:
85 | - Worker state management (stateful vs stateless)
86 | - Result collection strategies
87 | - Error handling approaches
88 | - Metrics and monitoring
89 | - Work distribution patterns
90 | - Middleware integration
91 | - Multi-stage processing pipelines


--------------------------------------------------------------------------------
/examples/collector_errors/README.md:
--------------------------------------------------------------------------------
  1 | # Error Collection and Handling Example
  2 | 
  3 | This example demonstrates how to effectively handle and categorize errors in parallel processing using the [go-pkgz/pool](https://github.com/go-pkgz/pool) package. It shows a pattern for collecting, tracking, and analyzing errors that occur during concurrent task execution.
  4 | 
  5 | ## What Makes it Special?
  6 | 
  7 | 1. Error collection pattern:
  8 |     - Collects both successes and failures
  9 |     - Preserves error context and timing information
 10 |     - Allows post-processing analysis of error patterns
 11 |     - Continues processing despite errors
 12 | 
 13 | 2. Error classification:
 14 |     - Groups errors by type for easier analysis
 15 |     - Tracks when and where errors occurred
 16 |     - Maintains job context with each error
 17 |     - Provides statistical insights on error distribution
 18 | 
 19 | 3. Result aggregation:
 20 |     - Separates successes from failures
 21 |     - Calculates performance metrics by result type
 22 |     - Shows error distribution patterns
 23 |     - Provides comprehensive error reporting
 24 | 
 25 | ## Features
 26 | 
 27 | - Parallel job processing with configurable worker count
 28 | - Configurable error rate for testing failure scenarios
 29 | - Detailed error categorization and reporting
 30 | - Timing information for both successful and failed jobs
 31 | - Comprehensive statistics on processing performance
 32 | - Graceful handling of context cancellation
 33 | 
 34 | ## Installation
 35 | 
 36 | ```bash
 37 | go build
 38 | ```
 39 | 
 40 | ## Usage
 41 | 
 42 | ```bash
 43 | go run main.go [options]
 44 | ```
 45 | 
 46 | Options:
 47 | - `-workers` - number of worker goroutines (default: 4)
 48 | - `-jobs` - number of jobs to process (default: 20)
 49 | - `-error-rate` - probability of job failure from 0 to 1 (default: 0.3)
 50 | - `-timeout` - timeout for the entire operation (default: 10s)
 51 | - `-verbose` - enable detailed logging (default: false)
 52 | 
 53 | Example:
 54 | ```bash
 55 | go run main.go -workers 8 -jobs 100 -error-rate 0.4 -timeout 30s
 56 | ```
 57 | 
 58 | ## Implementation Details
 59 | 
 60 | The example demonstrates several key patterns:
 61 | 
 62 | 1. Result type definition:
 63 |    ```go
 64 |    type Result struct {
 65 |        JobID     string        // ID of the job
 66 |        Success   bool          // whether the job succeeded
 67 |        Error     error         // error if job failed
 68 |        Timestamp time.Time     // when the job completed
 69 |        Duration  time.Duration // how long the job took
 70 |    }
 71 |    ```
 72 | 
 73 | 2. Error collection in workers:
 74 |    ```go
 75 |    if rand.Float64() < errorRate {
 76 |        err := errors.New("operation failed")
 77 |        collector.Submit(Result{
 78 |            JobID:     jobID,
 79 |            Success:   false,
 80 |            Error:     err,
 81 |            Timestamp: time.Now(),
 82 |            Duration:  duration,
 83 |        })
 84 |        return err  // return error so pool metrics track it
 85 |    }
 86 |    ```
 87 | 
 88 | 3. Error categorization:
 89 |    ```go
 90 |    // group errors by type
 91 |    errorsByType := make(map[string][]Result)
 92 |    for _, result := range failures {
 93 |        errType := errorTypeString(result.Error)
 94 |        errorsByType[errType] = append(errorsByType[errType], result)
 95 |    }
 96 |    ```
 97 | 
 98 | ## Output Example
 99 | 
100 | ```
101 | Processing summary:
102 | Total jobs:        100
103 | Results collected: 100
104 | Successful jobs:   61
105 | Failed jobs:       39
106 | Processing time:   218ms
107 | Total time:        223ms
108 | Avg success time:  104ms
109 | Avg failure time:  106ms
110 | 
111 | Error details:
112 | • database connection failed (12 occurrences):
113 |   - job-005 (at 15:04:02.123, took 115ms)
114 |   - job-012 (at 15:04:02.247, took 98ms)
115 |   - ...
116 | 
117 | • validation failed (14 occurrences):
118 |   - job-003 (at 15:04:02.089, took 103ms)
119 |   - job-007 (at 15:04:02.187, took 112ms)
120 |   - ...
121 | 
122 | • timeout exceeded (13 occurrences):
123 |   - job-001 (at 15:04:02.042, took 95ms)
124 |   - job-014 (at 15:04:02.301, took 106ms)
125 |   - ...
126 | ```
127 | 
128 | ## Architecture
129 | 
130 | The program follows this architecture:
131 | 
132 | ```
133 | Job Submission    →    Worker Pool    →    Result Collector    →    Error Analyzer
134 | (main goroutine)       (N workers)         (buffer channel)        (main goroutine)
135 | submits jobs           processes jobs      collects results        categorizes errors
136 |                        with random         from workers            generates reports
137 |                        success/failure                             calculates statistics
138 | ```
139 | 
140 | Key components:
141 | - Pool with configurable worker count
142 | - Collector for gathering both successes and failures
143 | - Type-safe error collection through `Result` type
144 | - Error categorization by error message
145 | - Statistical processing of successes vs failures
146 | 
147 | ## Real-World Applications
148 | 
149 | This pattern is useful for:
150 | - ETL processes that need to track failed records
151 | - API clients that need to analyze error patterns
152 | - Batch processing systems that need error reporting
153 | - Monitoring systems that track error rates
154 | - System health checks with error categorization
155 | - Performance testing with error simulation
156 | 
157 | ## Notes
158 | 
159 | - The example uses simulated random errors with configurable rate
160 | - Error types are categorized by message prefix for simplicity
161 | - In real applications, you might want to use typed errors or error codes
162 | - The pattern works well with both stateless and stateful workers
163 | - This approach provides much richer error information than simple error counts


--------------------------------------------------------------------------------
/examples/collector_errors/go.mod:
--------------------------------------------------------------------------------
 1 | module examples/collector_errors
 2 | 
 3 | go 1.24
 4 | 
 5 | require github.com/go-pkgz/pool v0.5.0
 6 | 
 7 | require golang.org/x/sync v0.11.0 // indirect
 8 | 
 9 | replace github.com/go-pkgz/pool => ../..
10 | 


--------------------------------------------------------------------------------
/examples/collector_errors/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 5 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 6 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 7 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
 8 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 9 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
10 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
11 | 


--------------------------------------------------------------------------------
/examples/collector_errors/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"flag"
  7 | 	"fmt"
  8 | 	"math/rand"
  9 | 	"os"
 10 | 	"sort"
 11 | 	"strings"
 12 | 	"time"
 13 | 
 14 | 	"github.com/go-pkgz/pool"
 15 | )
 16 | 
 17 | // Result represents either a success or an error from processing
 18 | type Result struct {
 19 | 	JobID     string        // ID of the job
 20 | 	Success   bool          // whether the job succeeded
 21 | 	Error     error         // error if job failed
 22 | 	Timestamp time.Time     // when the job completed
 23 | 	Duration  time.Duration // how long the job took
 24 | }
 25 | 
 26 | func main() {
 27 | 	// parse command line arguments
 28 | 	workers := flag.Int("workers", 4, "number of workers")
 29 | 	jobs := flag.Int("jobs", 20, "number of jobs to process")
 30 | 	errorRate := flag.Float64("error-rate", 0.3, "probability of job failure (0-1)")
 31 | 	timeout := flag.Duration("timeout", 10*time.Second, "timeout for the entire operation")
 32 | 	verbose := flag.Bool("verbose", false, "verbose output")
 33 | 	flag.Parse()
 34 | 
 35 | 	// create context with timeout
 36 | 	ctx, cancel := context.WithTimeout(context.Background(), *timeout)
 37 | 	defer cancel()
 38 | 
 39 | 	// create a collector for results, both successes and errors
 40 | 	collector := pool.NewCollector[Result](ctx, 100)
 41 | 
 42 | 	workerFunc := worker(workerParam{collector: collector, verbose: *verbose, errorRate: *errorRate})
 43 | 	p := pool.New[string](*workers, pool.WorkerFunc[string](workerFunc)).WithContinueOnError().WithBatchSize(5)
 44 | 
 45 | 	// start the pool
 46 | 	if err := p.Go(ctx); err != nil {
 47 | 		fmt.Printf("Failed to start pool: %v\n", err)
 48 | 		os.Exit(1)
 49 | 	}
 50 | 
 51 | 	// submit jobs in the background, this is usually done in a separate goroutine
 52 | 	go func() {
 53 | 		for i := 0; i < *jobs; i++ {
 54 | 			jobID := fmt.Sprintf("job-%03d", i+1)
 55 | 			p.Submit(jobID)
 56 | 		}
 57 | 		// close pool to signal all jobs have been submitted
 58 | 		if err := p.Close(ctx); err != nil && *verbose {
 59 | 			fmt.Printf("Pool closed with error: %v\n", err)
 60 | 		}
 61 | 	}()
 62 | 
 63 | 	go func() {
 64 | 		// wait for all jobs to finish
 65 | 		err := p.Wait(ctx) // wait for all jobs to finish
 66 | 		if err != nil && *verbose {
 67 | 			fmt.Printf("Pool wait error: %v\n", err)
 68 | 		}
 69 | 		collector.Close() // close collector to signal all results has been submitted
 70 | 	}()
 71 | 
 72 | 	// collect results using the collector's Iter method
 73 | 	var results []Result
 74 | 	for result, err := range collector.Iter() {
 75 | 		if err != nil {
 76 | 			fmt.Printf("Collector error: %v\n", err)
 77 | 			break
 78 | 		}
 79 | 		results = append(results, result)
 80 | 	}
 81 | 
 82 | 	// separate successes and errors
 83 | 	var successes, failures []Result
 84 | 	for _, result := range results {
 85 | 		if result.Success {
 86 | 			successes = append(successes, result)
 87 | 		} else {
 88 | 			failures = append(failures, result)
 89 | 		}
 90 | 	}
 91 | 
 92 | 	// print processing summary
 93 | 	stats := p.Metrics().GetStats()
 94 | 	fmt.Printf("\nProcessing summary:\n")
 95 | 	fmt.Printf("Total jobs:        %d\n", *jobs)
 96 | 	fmt.Printf("Results collected: %d\n", len(results))
 97 | 	fmt.Printf("Successful jobs:   %d\n", len(successes))
 98 | 	fmt.Printf("Failed jobs:       %d\n", len(failures))
 99 | 	fmt.Printf("Processing time:   %v\n", stats.ProcessingTime.Round(time.Millisecond))
100 | 	fmt.Printf("Total time:        %v\n", stats.TotalTime.Round(time.Millisecond))
101 | 
102 | 	// calculate average duration for successes and failures
103 | 	var totalSuccessDuration, totalFailureDuration time.Duration
104 | 	for _, s := range successes {
105 | 		totalSuccessDuration += s.Duration
106 | 	}
107 | 	for _, f := range failures {
108 | 		totalFailureDuration += f.Duration
109 | 	}
110 | 
111 | 	if len(successes) > 0 {
112 | 		fmt.Printf("Avg success time:   %v\n", (totalSuccessDuration / time.Duration(len(successes))).Round(time.Millisecond))
113 | 	}
114 | 	if len(failures) > 0 {
115 | 		fmt.Printf("Avg failure time:   %v\n", (totalFailureDuration / time.Duration(len(failures))).Round(time.Millisecond))
116 | 	}
117 | 
118 | 	if len(failures) > 0 {
119 | 		fmt.Printf("\nError details:\n")
120 | 
121 | 		// group errors by type
122 | 		errorsByType := make(map[string][]Result)
123 | 		for _, result := range failures {
124 | 			errType := errorTypeString(result.Error)
125 | 			errorsByType[errType] = append(errorsByType[errType], result)
126 | 		}
127 | 
128 | 		// print grouped errors
129 | 		errorTypes := make([]string, 0, len(errorsByType))
130 | 		for errType := range errorsByType {
131 | 			errorTypes = append(errorTypes, errType)
132 | 		}
133 | 		sort.Strings(errorTypes)
134 | 
135 | 		for _, errType := range errorTypes {
136 | 			results := errorsByType[errType]
137 | 			fmt.Printf("\n• %s (%d occurrences):\n", errType, len(results))
138 | 
139 | 			// sort results by timestamp
140 | 			sort.Slice(results, func(i, j int) bool {
141 | 				return results[i].Timestamp.Before(results[j].Timestamp)
142 | 			})
143 | 
144 | 			for _, result := range results {
145 | 				fmt.Printf("  - %s (at %s, took %v)\n",
146 | 					result.JobID,
147 | 					result.Timestamp.Format("15:04:05.000"),
148 | 					result.Duration.Round(time.Millisecond))
149 | 			}
150 | 		}
151 | 	}
152 | }
153 | 
154 | type workerParam struct {
155 | 	verbose   bool
156 | 	errorRate float64
157 | 	collector *pool.Collector[Result]
158 | }
159 | 
160 | func worker(p workerParam) func(ctx context.Context, jobID string) error {
161 | 	return func(ctx context.Context, jobID string) error {
162 | 		start := time.Now()
163 | 
164 | 		// simulate processing time
165 | 		processingTime := time.Duration(50+rand.Intn(150)) * time.Millisecond
166 | 
167 | 		if p.verbose {
168 | 			fmt.Printf("Processing %s (will take %v)...\n", jobID, processingTime)
169 | 		}
170 | 
171 | 		// simulate work
172 | 		select {
173 | 		case <-time.After(processingTime):
174 | 			duration := time.Since(start)
175 | 
176 | 			// randomly generate error based on error rate
177 | 			if rand.Float64() < p.errorRate {
178 | 				// choose a random error type
179 | 				var err error
180 | 				switch rand.Intn(3) {
181 | 				case 0:
182 | 					err = errors.New("validation failed")
183 | 				case 1:
184 | 					err = errors.New("database connection failed")
185 | 				case 2:
186 | 					err = errors.New("timeout exceeded")
187 | 				}
188 | 
189 | 				if p.verbose {
190 | 					fmt.Printf("❌ %s failed: %v\n", jobID, err)
191 | 				}
192 | 
193 | 				// submit error result to collector
194 | 				p.collector.Submit(Result{
195 | 					JobID:     jobID,
196 | 					Success:   false,
197 | 					Error:     err,
198 | 					Timestamp: time.Now(),
199 | 					Duration:  duration,
200 | 				})
201 | 
202 | 				// return error so pool metrics track it correctly
203 | 				return err
204 | 			}
205 | 
206 | 			if p.verbose {
207 | 				fmt.Printf("✅ %s completed successfully\n", jobID)
208 | 			}
209 | 
210 | 			// submit success result to collector
211 | 			p.collector.Submit(Result{
212 | 				JobID:     jobID,
213 | 				Success:   true,
214 | 				Timestamp: time.Now(),
215 | 				Duration:  duration,
216 | 			})
217 | 
218 | 			return nil
219 | 
220 | 		case <-ctx.Done():
221 | 			p.collector.Submit(Result{
222 | 				JobID:     jobID,
223 | 				Success:   false,
224 | 				Error:     ctx.Err(),
225 | 				Timestamp: time.Now(),
226 | 				Duration:  time.Since(start),
227 | 			})
228 | 			return ctx.Err()
229 | 		}
230 | 	}
231 | }
232 | 
233 | // errorTypeString extracts a consistent string representation of error type
234 | func errorTypeString(err error) string {
235 | 	if err == nil {
236 | 		return "nil error"
237 | 	}
238 | 
239 | 	msg := err.Error()
240 | 	// extract the main error type without variable parts
241 | 	if idx := strings.IndexByte(msg, ':'); idx > 0 {
242 | 		return msg[:idx]
243 | 	}
244 | 	return msg
245 | }
246 | 


--------------------------------------------------------------------------------
/examples/collectors_chain/README.md:
--------------------------------------------------------------------------------
  1 | # Pool Chain Processing (with collectors) - Example
  2 | 
  3 | This example demonstrates how to chain multiple worker pools using [go-pkgz/pool](https://github.com/go-pkgz/pool) package to create a concurrent processing pipeline. It shows how to transform data through multiple processing stages while maintaining type safety and proper coordination between pools.
  4 | 
  5 | ## What Makes it Special?
  6 | 
  7 | 1. Pool Chaining:
  8 |     - Multiple pools connected via collectors
  9 |     - Each stage processes independently
 10 |     - Type-safe data transformation between stages
 11 |     - Automatic coordination via iterators
 12 | 
 13 | 2. Concurrent Processing:
 14 |     - Each pool runs its own workers
 15 |     - Non-blocking data flow between pools
 16 |     - Independent scaling of each stage
 17 |     - Automatic backpressure handling
 18 | 
 19 | 3. Data Flow Patterns:
 20 |     - Type transformation between stages
 21 |     - Filtering capability (skip items)
 22 |     - Progress tracking with timestamps
 23 |     - Performance metrics collection
 24 | 
 25 | ## Features
 26 | 
 27 | - Multi-stage processing pipeline
 28 | - Independent worker pools for each stage
 29 | - Type-safe data transformation
 30 | - Concurrent processing across all stages
 31 | - Automatic cleanup and resource management
 32 | - Built-in metrics collection
 33 | - Processing time tracking
 34 | - Optional data filtering between stages
 35 | 
 36 | ## Implementation Details
 37 | 
 38 | The implementation demonstrates several key concepts:
 39 | 
 40 | 1. Pool Type Definition:
 41 |    ```go
 42 |    type counterPool struct {
 43 |        *pool.WorkerGroup[stringData]      // processes input type
 44 |        collector *pool.Collector[countData] // produces output type
 45 |    }
 46 |    ```
 47 | 
 48 | 2. Pool Construction:
 49 |    ```go
 50 |    func newCounterPool(ctx context.Context, workers int) *counterPool {
 51 |        collector := pool.NewCollector[countData](ctx, workers)
 52 |        p := pool.New[stringData](workers, pool.WorkerFunc[stringData](
 53 |            func(ctx context.Context, n stringData) error {
 54 |                // process data and submit to collector
 55 |                return nil
 56 |            }))
 57 |        return &counterPool{WorkerGroup: p, collector: collector}
 58 |    }
 59 |    ```
 60 | 
 61 | 3. Pool Chaining:
 62 |    ```go
 63 |    counter := newCounterPool(ctx, 2)
 64 |    multiplier := newMultiplierPool(ctx, 4)
 65 |    squares := newSquarePool(ctx, 4)
 66 | 
 67 |    // pipe data between pools
 68 |    go func() {
 69 |        for v := range counter.collector.Iter() {
 70 |            multiplier.Submit(v)
 71 |        }
 72 |        multiplier.Close(ctx)
 73 |    }()
 74 |    ```
 75 | 
 76 | ## Architecture
 77 | 
 78 | The pipeline consists of three stages:
 79 | 
 80 | ```
 81 | Input Strings
 82 |      │
 83 |      ▼
 84 | Counter Pool (2 workers)
 85 |   │  counts 'a' chars
 86 |   │  filters count > 2
 87 |   │
 88 |   ▼
 89 | Multiplier Pool (4 workers)
 90 |   │  multiplies by 10
 91 |   │
 92 |   ▼
 93 | Square Pool (4 workers)
 94 |   │  squares the value
 95 |   │
 96 |   ▼
 97 | Final Results
 98 | ```
 99 | 
100 | Each stage:
101 | - Runs independently
102 | - Has its own workers pool
103 | - Processes items as they arrive
104 | - Transforms data to next type
105 | - Reports processing metrics
106 | 
107 | ## Data Flow Types
108 | 
109 | The pipeline uses distinct types for each stage:
110 | 
111 | ```go
112 | stringData     → countData → multipliedData → finalData
113 | {idx, ts}      {idx, count} {idx, value}     {idx, result}
114 | ```
115 | 
116 | - Each type carries minimal necessary data
117 | - Index maintains reference to original input
118 | - Timestamp tracks processing duration
119 | 
120 | ## Example Output
121 | 
122 | ```
123 | submitting: "alabama"
124 | counted 'a' in "alabama" -> 4, duration: 123ms
125 | multiplied: 4 -> 40 (src: "alabama", processing time: 234ms)
126 | squared: 40 -> 1600 (src: "alabama", processing time: 345ms)
127 | 
128 | metrics:
129 | counter: processed:11, errors:0, workers:2
130 | multiplier: processed:6, errors:0, workers:4
131 | squares: processed:6, errors:0, workers:4
132 | ```
133 | 
134 | ## Notes
135 | 
136 | - Each pool can scale independently via worker count
137 | - Collector's Iter() handles backpressure automatically
138 | - Close() must be called on both pool and collector after submission done
139 | - Metrics track processing stats for each stage
140 | - Type safety is maintained throughout the pipeline
141 | - Data filtering can happen at any stage
142 | 
143 | The example demonstrates a practical approach to building concurrent processing pipelines with proper resource management and type safety.


--------------------------------------------------------------------------------
/examples/collectors_chain/go.mod:
--------------------------------------------------------------------------------
 1 | module examples/collectors_chain
 2 | 
 3 | go 1.24
 4 | 
 5 | require github.com/go-pkgz/pool v0.7.0
 6 | 
 7 | require golang.org/x/sync v0.11.0 // indirect
 8 | 
 9 | replace github.com/go-pkgz/pool => ../..
10 | 


--------------------------------------------------------------------------------
/examples/collectors_chain/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/go-pkgz/pool v0.5.0 h1:fP0WpEGMAcFEBQ7l7aAZsh7RBkzx34FVgufJoVvDTYY=
 4 | github.com/go-pkgz/pool v0.5.0/go.mod h1:e1qn5EYmXshPcOk2buL2ZC20w7RTAWUgbug+L2SyH7I=
 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 7 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 8 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 9 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
10 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
13 | 


--------------------------------------------------------------------------------
/examples/collectors_chain/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"math/rand"
  7 | 	"strings"
  8 | 	"time"
  9 | 
 10 | 	"github.com/go-pkgz/pool"
 11 | )
 12 | 
 13 | // data types for each stage of processing pipeline.
 14 | // each pool transforms data from its input type to output type.
 15 | type stringData struct {
 16 | 	idx int       // index in the input array
 17 | 	ts  time.Time // timestamp to track processing duration
 18 | }
 19 | 
 20 | type countData struct {
 21 | 	idx   int
 22 | 	count int
 23 | 	ts    time.Time
 24 | }
 25 | 
 26 | type multipliedData struct {
 27 | 	idx   int
 28 | 	value int
 29 | 	ts    time.Time
 30 | }
 31 | 
 32 | type finalData struct {
 33 | 	idx    int
 34 | 	result int
 35 | }
 36 | 
 37 | // counterPool demonstrates the first stage of processing.
 38 | // each pool type embeds WorkerGroup to handle concurrent processing and Collector to gather results.
 39 | type counterPool struct {
 40 | 	*pool.WorkerGroup[stringData] // worker group processes stringData
 41 | 	*pool.Collector[countData]    // collector gathers countData
 42 | }
 43 | 
 44 | // newCounterPool creates a pool that counts 'a' chars in strings.
 45 | // demonstrates pool construction pattern: collector -> worker -> pool.
 46 | func newCounterPool(ctx context.Context, workers int) *counterPool {
 47 | 	collector := pool.NewCollector[countData](ctx, workers) // collector to gather results, buffer size == workers
 48 | 	p := pool.New[stringData](workers, pool.WorkerFunc[stringData](func(_ context.Context, n stringData) error {
 49 | 		time.Sleep(time.Duration(rand.Intn(5)) * time.Millisecond) // simulate heavy work
 50 | 		count := strings.Count(inputStrings[n.idx], "a")           // use global var for logging only
 51 | 		if count > 2 {
 52 | 			// demonstrates filtering: only strings with >2 'a's passed to the next stage
 53 | 			collector.Submit(countData{idx: n.idx, count: count, ts: n.ts})
 54 | 		}
 55 | 		fmt.Printf("counted 'a' in %q -> %d, duration: %v\n", inputStrings[n.idx], count, time.Since(n.ts))
 56 | 		return nil
 57 | 	}))
 58 | 	return &counterPool{WorkerGroup: p.WithBatchSize(3), Collector: collector}
 59 | }
 60 | 
 61 | type multiplierPool struct {
 62 | 	*pool.WorkerGroup[countData]
 63 | 	*pool.Collector[multipliedData]
 64 | }
 65 | 
 66 | func newMultiplierPool(ctx context.Context, workers int) *multiplierPool {
 67 | 	collector := pool.NewCollector[multipliedData](ctx, workers)
 68 | 	p := pool.New[countData](workers, pool.WorkerFunc[countData](func(_ context.Context, n countData) error {
 69 | 		time.Sleep(time.Duration(rand.Intn(5)) * time.Millisecond)
 70 | 		multiplied := n.count * 10 // transform data: multiply by 10
 71 | 		fmt.Printf("multiplied: %d -> %d (src: %q, processing time: %v)\n",
 72 | 			n.count, multiplied, inputStrings[n.idx], time.Since(n.ts))
 73 | 		collector.Submit(multipliedData{idx: n.idx, value: multiplied, ts: n.ts})
 74 | 		return nil
 75 | 	}))
 76 | 	return &multiplierPool{WorkerGroup: p.WithBatchSize(3), Collector: collector}
 77 | }
 78 | 
 79 | type squarePool struct {
 80 | 	*pool.WorkerGroup[multipliedData]
 81 | 	*pool.Collector[finalData]
 82 | }
 83 | 
 84 | func newSquarePool(ctx context.Context, workers int) *squarePool {
 85 | 	collector := pool.NewCollector[finalData](ctx, workers)
 86 | 	p := pool.New[multipliedData](workers, pool.WorkerFunc[multipliedData](func(_ context.Context, n multipliedData) error {
 87 | 		squared := n.value * n.value
 88 | 		fmt.Printf("squared: %d -> %d (src: %q, processing time: %v)\n",
 89 | 			n.value, squared, inputStrings[n.idx], time.Since(n.ts))
 90 | 		time.Sleep(time.Duration(rand.Intn(5)) * time.Millisecond)
 91 | 		collector.Submit(finalData{idx: n.idx, result: squared})
 92 | 		return nil
 93 | 	}))
 94 | 	return &squarePool{WorkerGroup: p.WithBatchSize(3), Collector: collector}
 95 | }
 96 | 
 97 | // ProcessStrings demonstrates chaining multiple pools together to create a processing pipeline.
 98 | // Each pool runs concurrently and processes items as they become available from the previous stage.
 99 | func ProcessStrings(ctx context.Context, strings []string) ([]finalData, error) {
100 | 	// create all pools before starting any processing
101 | 	counter := newCounterPool(ctx, 2)
102 | 	multiplier := newMultiplierPool(ctx, 4)
103 | 	squares := newSquarePool(ctx, 4)
104 | 
105 | 	// start all pools' workers
106 | 	// this is non-blocking operation, workers will start processing as soon as items are submitted
107 | 	counter.Go(ctx)
108 | 	multiplier.Go(ctx)
109 | 	squares.Go(ctx)
110 | 
111 | 	// first goroutine feeds input data into the pipeline
112 | 	// we use a goroutine to simulate a real-world scenario where data is coming from an external source
113 | 	go func() {
114 | 		for i := range strings {
115 | 			fmt.Printf("submitting: %q\n", strings[i])
116 | 			counter.WorkerGroup.Submit(stringData{idx: i, ts: time.Now()})
117 | 			time.Sleep(time.Duration(rand.Intn(3)) * time.Millisecond)
118 | 		}
119 | 		// close pool and collector when all inputs are submitted
120 | 		counter.WorkerGroup.Close(ctx)
121 | 		counter.Collector.Close()
122 | 	}()
123 | 
124 | 	// organize pipes between pools
125 | 	// we use goroutines to communicate between pools in a non-blocking way
126 | 	go func() {
127 | 		// pipe from counter to multiplier using collector's iterator
128 | 		for v := range counter.Iter() { // iter will stop on completion of counter pool
129 | 			multiplier.WorkerGroup.Submit(v)
130 | 		}
131 | 		multiplier.WorkerGroup.Close(ctx)
132 | 		multiplier.Collector.Close()
133 | 	}()
134 | 
135 | 	go func() {
136 | 		// pipe from multiplier to squares
137 | 		for v := range multiplier.Iter() { // iter will stop on completion of multiplier pool
138 | 			squares.WorkerGroup.Submit(v)
139 | 		}
140 | 		squares.WorkerGroup.Close(ctx)
141 | 		squares.Collector.Close()
142 | 	}()
143 | 
144 | 	// collect final results until all work is done
145 | 	var results []finalData
146 | 	// iter will stop on completion of squares pool which is the last in the chain
147 | 	// this is a blocking operation and will return when all pools are done
148 | 	// we don't need to wait for each pool to finish explicitly, the iter handles it
149 | 	for v := range squares.Iter() {
150 | 		results = append(results, v)
151 | 	}
152 | 
153 | 	// print metrics showing how each pool performed
154 | 	fmt.Printf("\nmetrics:\ncounter: %s\nmultiplier: %s\nsquares: %s\n",
155 | 		counter.Metrics().GetStats(), multiplier.Metrics().GetStats(), squares.Metrics().GetStats())
156 | 	return results, nil
157 | }
158 | 
159 | // store input array in a global for logging purposes only
160 | var inputStrings []string
161 | 
162 | func main() {
163 | 	inputStrings = []string{
164 | 		"banana",
165 | 		"alabama",
166 | 		"california",
167 | 		"canada",
168 | 		"australia",
169 | 		"alaska",
170 | 		"arkansas",
171 | 		"arizona",
172 | 		"abracadabra",
173 | 		"bandanna",
174 | 		"barbarian",
175 | 		"antarctica",
176 | 		"arctic",
177 | 		"baccarat",
178 | 	}
179 | 
180 | 	res, err := ProcessStrings(context.Background(), inputStrings)
181 | 	if err != nil {
182 | 		panic(err)
183 | 	}
184 | 	fmt.Println("\nFinal results:")
185 | 	for _, v := range res {
186 | 		fmt.Printf("src: %q, squared a-count: %d\n", inputStrings[v.idx], v.result)
187 | 	}
188 | 	fmt.Printf("\nTotal: %d", len(res))
189 | }
190 | 


--------------------------------------------------------------------------------
/examples/direct_chain/README.md:
--------------------------------------------------------------------------------
  1 | # Pool Chain Processing (direct) - Example
  2 | 
  3 | This example demonstrates how to chain multiple worker pools using [go-pkgz/pool](https://github.com/go-pkgz/pool) package to create a concurrent processing pipeline. Pools directly submit data to the next stage, with a collector only at the final stage to gather results.
  4 | 
  5 | ## Key Concepts
  6 | 
  7 | 1. Pool Chaining:
  8 |    - Pools directly reference and send to the next pool
  9 |    - Single collector at the end of chain
 10 |    - Each stage processes independently
 11 |    - Type-safe data transformation between stages
 12 | 
 13 | 2. Data Flow:
 14 |    - Input strings -> count 'a's -> multiply by 10 -> square
 15 |    - Each stage has its own worker pool
 16 |    - Final collector gathers results
 17 |    - Processing time tracked at each stage
 18 | 
 19 | ## Implementation Details
 20 | 
 21 | The example shows three key patterns:
 22 | 
 23 | 1. Pool Declaration and Cross-References:
 24 |    ```go
 25 |    var pCounter *pool.WorkerGroup[stringData]
 26 |    var pMulti *pool.WorkerGroup[countData]
 27 |    var pSquares *pool.WorkerGroup[multipliedData]
 28 |    collector := pool.NewCollector[finalData](ctx, 10)
 29 |    ```
 30 | 
 31 | 2. Direct Pool Submission:
 32 |    ```go
 33 |    pCounter = pool.New[stringData](2, pool.WorkerFunc[stringData](
 34 |        func(_ context.Context, d stringData) error {
 35 |            count := strings.Count(d.data, "a")
 36 |            if count > 2 {
 37 |                pMulti.Send(countData{...}) // direct submission to next pool, thread safe version of Submit
 38 |            }
 39 |            return nil
 40 |        }))
 41 |    ```
 42 | 
 43 | 3. Pipeline Coordination:
 44 |    ```go
 45 |    go func() {
 46 |        pCounter.Wait(ctx)  // wait for first pool
 47 |        pMulti.Close(ctx)   // close second pool
 48 |        pSquares.Close(ctx) // close final pool
 49 |        collector.Close()   // close collector
 50 |    }()
 51 |    ```
 52 | 
 53 | ## Data Flow Types
 54 | 
 55 | ```go
 56 | stringData {          countData {          multipliedData {        finalData {
 57 |     idx  int             idx   int            idx   int              idx    int
 58 |     data string          count int            value int              result int
 59 |     ts   time.Time       ts    time.Time      ts    time.Time
 60 | }                     }                     }                      }
 61 | ```
 62 | 
 63 | ## Features
 64 | 
 65 | - Batch processing (size=3) in each pool
 66 | - Filtering capabilities (count > 2)
 67 | - Processing time tracking
 68 | - Independent worker counts per stage
 69 | - Built-in metrics collection
 70 | - Simulated processing delays
 71 | 
 72 | ## Example Output
 73 | 
 74 | ```
 75 | submitting: "alabama"
 76 | counted 'a' in "alabama" -> 4, duration: 123ms
 77 | multiplied: 4 -> 40 (src: "alabama", processing time: 234ms)
 78 | squared: 40 -> 1600 (src: "alabama", processing time: 345ms)
 79 | 
 80 | metrics:
 81 | counter: processed:11, errors:0, workers:2
 82 | multiplier: processed:6, errors:0, workers:4
 83 | squares: processed:6, errors:0, workers:4
 84 | ```
 85 | 
 86 | ## Usage
 87 | 
 88 | ```go
 89 | res, err := ProcessStrings(context.Background(), []string{
 90 |     "alabama", "california", "canada", "australia",
 91 | })
 92 | ```
 93 | 
 94 | ## Important Notes
 95 | 
 96 | - Pools must be declared before creation to allow cross-references
 97 | - Each stage can filter data (skip items)
 98 | - Send can be done directly from workers
 99 | - Close() propagates through the chain
100 | - Single collector simplifies result gathering
101 | - Batch size optimizes throughput
102 | - Processing time tracked through pipeline
103 | 
104 | This simplified version demonstrates essential patterns for building concurrent processing pipelines while maintaining clean and efficient code structure.


--------------------------------------------------------------------------------
/examples/direct_chain/go.mod:
--------------------------------------------------------------------------------
 1 | module examples/direct_chain
 2 | 
 3 | go 1.24
 4 | 
 5 | require github.com/go-pkgz/pool v0.7.0
 6 | 
 7 | require golang.org/x/sync v0.11.0 // indirect
 8 | 
 9 | replace github.com/go-pkgz/pool => ../..
10 | 


--------------------------------------------------------------------------------
/examples/direct_chain/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/go-pkgz/pool v0.5.0 h1:fP0WpEGMAcFEBQ7l7aAZsh7RBkzx34FVgufJoVvDTYY=
 4 | github.com/go-pkgz/pool v0.5.0/go.mod h1:e1qn5EYmXshPcOk2buL2ZC20w7RTAWUgbug+L2SyH7I=
 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 7 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 8 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 9 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
10 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
13 | 


--------------------------------------------------------------------------------
/examples/direct_chain/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"math/rand"
  7 | 	"strings"
  8 | 	"sync/atomic"
  9 | 	"time"
 10 | 
 11 | 	"github.com/go-pkgz/pool"
 12 | )
 13 | 
 14 | // data types for each stage of processing pipeline.
 15 | // each pool transforms data from its input type to output type.
 16 | type stringData struct {
 17 | 	idx  int       // index in the input array
 18 | 	data string    // input data
 19 | 	ts   time.Time // timestamp to track processing duration
 20 | }
 21 | 
 22 | type countData struct {
 23 | 	idx   int
 24 | 	count int
 25 | 	ts    time.Time
 26 | }
 27 | 
 28 | type multipliedData struct {
 29 | 	idx   int
 30 | 	value int
 31 | 	ts    time.Time
 32 | }
 33 | 
 34 | type finalData struct {
 35 | 	idx    int
 36 | 	result int
 37 | }
 38 | 
 39 | func ProcessStrings(ctx context.Context, input []string) ([]finalData, error) {
 40 | 	// declare pools and counters for debugging
 41 | 	var pCounter *pool.WorkerGroup[stringData]
 42 | 	var pMulti *pool.WorkerGroup[countData]
 43 | 	var pSquares *pool.WorkerGroup[multipliedData]
 44 | 	var submitted, filtered, multiplied, squared atomic.Int64
 45 | 
 46 | 	collector := pool.NewCollector[finalData](ctx, 10)
 47 | 
 48 | 	pCounter = pool.New[stringData](8, pool.WorkerFunc[stringData](func(_ context.Context, d stringData) error {
 49 | 		submitted.Add(1)
 50 | 		time.Sleep(time.Duration(rand.Intn(1)) * time.Millisecond)
 51 | 		count := strings.Count(d.data, "a")
 52 | 		if count > 2 {
 53 | 			filtered.Add(1)
 54 | 			// important: we use Send instead of Submit, because we run inside multiple workers
 55 | 			// and Submit is not thread-safe. Send does the same, just in thread-safe way
 56 | 			pMulti.Send(countData{idx: d.idx, count: count, ts: d.ts})
 57 | 		}
 58 | 		fmt.Printf("counted 'a' in %q -> %d, duration: %v\n", inputStrings[d.idx], count, time.Since(d.ts))
 59 | 		return nil
 60 | 	})).WithBatchSize(3).WithPoolCompleteFn(func(ctx context.Context) error {
 61 | 		return pMulti.Close(ctx)
 62 | 	})
 63 | 
 64 | 	pMulti = pool.New[countData](10, pool.WorkerFunc[countData](func(_ context.Context, d countData) error {
 65 | 		multiplied.Add(1)
 66 | 		time.Sleep(time.Duration(rand.Intn(10)) * time.Millisecond)
 67 | 		val := d.count * 10
 68 | 		fmt.Printf("multiplied: %d -> %d (src: %q, processing time: %v)\n",
 69 | 			d.count, val, inputStrings[d.idx], time.Since(d.ts))
 70 | 		pSquares.Send(multipliedData{idx: d.idx, value: val, ts: d.ts})
 71 | 		return nil
 72 | 	})).WithBatchSize(3).WithPoolCompleteFn(func(ctx context.Context) error {
 73 | 		return pSquares.Close(ctx)
 74 | 	})
 75 | 
 76 | 	pSquares = pool.New[multipliedData](10, pool.WorkerFunc[multipliedData](func(_ context.Context, d multipliedData) error {
 77 | 		squared.Add(1)
 78 | 		val := d.value * d.value
 79 | 		fmt.Printf("squared: %d -> %d (src: %q, processing time: %v)\n",
 80 | 			d.value, val, inputStrings[d.idx], time.Since(d.ts))
 81 | 		time.Sleep(time.Duration(rand.Intn(10)) * time.Millisecond)
 82 | 		collector.Submit(finalData{idx: d.idx, result: val})
 83 | 		return nil
 84 | 	})).WithBatchSize(3).WithPoolCompleteFn(func(ctx context.Context) error {
 85 | 		collector.Close()
 86 | 		return nil
 87 | 	})
 88 | 
 89 | 	pCounter.Go(ctx)
 90 | 	pMulti.Go(ctx)
 91 | 	pSquares.Go(ctx)
 92 | 
 93 | 	go func() {
 94 | 		for i := range input {
 95 | 			for range 100 {
 96 | 				pCounter.Submit(stringData{idx: i, data: input[i], ts: time.Now()})
 97 | 				time.Sleep(time.Duration(rand.Intn(1)) * time.Millisecond)
 98 | 			}
 99 | 		}
100 | 		pCounter.Close(ctx)
101 | 	}()
102 | 
103 | 	var results []finalData
104 | 	for v := range collector.Iter() {
105 | 		results = append(results, v)
106 | 	}
107 | 
108 | 	// print debug statistics
109 | 	fmt.Printf("\nProcessing statistics:\n")
110 | 	fmt.Printf("Total items submitted: %d\n", submitted.Load())
111 | 	fmt.Printf("Items passed filter (>2 'a's): %d\n", filtered.Load())
112 | 	fmt.Printf("Items multiplied: %d\n", multiplied.Load())
113 | 	fmt.Printf("Items squared: %d\n", squared.Load())
114 | 	fmt.Printf("Results collected: %d\n", len(results))
115 | 
116 | 	fmt.Printf("\nPool metrics:\ncounter: %s\nmultiplier: %s\nsquares: %s\n",
117 | 		pCounter.Metrics().GetStats(), pMulti.Metrics().GetStats(), pSquares.Metrics().GetStats())
118 | 	return results, nil
119 | }
120 | 
121 | // store input array in a global for logging purposes only
122 | var inputStrings []string
123 | 
124 | func main() {
125 | 	inputStrings = []string{
126 | 		"banana",
127 | 		"alabama",
128 | 		"california",
129 | 		"canada",
130 | 		"australia",
131 | 		"alaska",
132 | 		"arkansas",
133 | 		"arizona",
134 | 		"abracadabra",
135 | 		"bandanna",
136 | 		"barbarian",
137 | 		"antarctica",
138 | 		"arctic",
139 | 		"baccarat",
140 | 	}
141 | 
142 | 	res, err := ProcessStrings(context.Background(), inputStrings)
143 | 	if err != nil {
144 | 		panic(err)
145 | 	}
146 | 	fmt.Printf("\nFinal results:\n")
147 | 	for i, v := range res {
148 | 		fmt.Printf(" %d src: %q, squared a-count: %d\n", i, inputStrings[v.idx], v.result)
149 | 	}
150 | 	fmt.Printf("Total: %d\n", len(res))
151 | }
152 | 


--------------------------------------------------------------------------------
/examples/middleware/README.md:
--------------------------------------------------------------------------------
  1 | # Task Processor with Middleware - Example
  2 | 
  3 | This example demonstrates how to use middleware in [go-pkgz/pool](https://github.com/go-pkgz/pool) package to build a robust task processing system. It shows both built-in middleware usage and custom middleware creation, emphasizing how middleware can add cross-cutting functionality without modifying the core processing logic.
  4 | 
  5 | ## What Makes it Special?
  6 | 
  7 | 1. Middleware composition:
  8 |     - Shows how multiple middleware work together
  9 |     - Demonstrates middleware execution order
 10 |     - Combines both built-in and custom middleware
 11 | 
 12 | 2. Cross-cutting concerns:
 13 |     - Input validation before processing
 14 |     - Automatic retries for failed tasks
 15 |     - Panic recovery for robustness
 16 |     - Rate limiting for flow control
 17 |     - Structured logging for observability
 18 | 
 19 | 3. Real-world patterns:
 20 |     - Configuration management
 21 |     - Error handling
 22 |     - Metrics collection
 23 |     - Structured logging with slog
 24 | 
 25 | ## Features
 26 | 
 27 | - Task validation before processing
 28 | - Automatic retries with exponential backoff
 29 | - Panic recovery with custom handler
 30 | - Rate limiting with token bucket algorithm
 31 | - Structured JSON logging
 32 | - Performance metrics collection
 33 | - Configurable worker count and retry attempts
 34 | 
 35 | ## Installation
 36 | 
 37 | ```bash
 38 | go build
 39 | ```
 40 | 
 41 | ## Usage
 42 | 
 43 | ```bash
 44 | go run main.go [options]
 45 | ```
 46 | 
 47 | Options:
 48 | - `-workers` - number of worker goroutines (default: 2)
 49 | - `-retries` - number of retries for failed tasks (default: 3)
 50 | 
 51 | Example:
 52 | ```bash
 53 | go run main.go -workers 4 -retries 5
 54 | ```
 55 | 
 56 | ## Implementation Details
 57 | 
 58 | The implementation demonstrates several key concepts:
 59 | 
 60 | 1. Middleware creation:
 61 |    ```go
 62 |    func makeStructuredLogger(logger *slog.Logger) pool.Middleware[Task] {
 63 |        return func(next pool.Worker[Task]) pool.Worker[Task] {
 64 |            return pool.WorkerFunc[Task](func(ctx context.Context, task Task) error {
 65 |                // pre-processing logging
 66 |                err := next.Do(ctx, task)
 67 |                // post-processing logging
 68 |                return err
 69 |            })
 70 |        }
 71 |    }
 72 |    ```
 73 | 
 74 | 2. Middleware composition:
 75 |    ```go
 76 |    pool.New[Task](workers, makeWorker()).Use(
 77 |        middleware.Validator(validator),     // validate first
 78 |        middleware.Retry[Task](retries),    // then retry on failure
 79 |        middleware.Recovery[Task](handler), // recover from panics
 80 |        middleware.RateLimiter[Task](5, 3), // rate limit to 5/sec
 81 |        customLogger,                       // log everything
 82 |    )
 83 |    ```
 84 | 
 85 | 3. Task processing:
 86 |    ```go
 87 |    type Task struct {
 88 |        ID       string `json:"id"`
 89 |        Priority int    `json:"priority"`
 90 |        Payload  string `json:"payload"`
 91 |    }
 92 |    ```
 93 | 
 94 | ## Output Example
 95 | 
 96 | ```json
 97 | {
 98 |     "time": "2025-02-12T10:00:00Z",
 99 |     "level": "DEBUG",
100 |     "msg": "processing task",
101 |     "task_id": "1",
102 |     "priority": 1,
103 |     "payload": {"id":"1","priority":1,"payload":"normal task"}
104 | }
105 | {
106 |     "time": "2025-02-12T10:00:00Z",
107 |     "level": "INFO",
108 |     "msg": "task completed",
109 |     "task_id": "1",
110 |     "duration_ms": 100
111 | }
112 | {
113 |     "time": "2025-02-12T10:00:00Z",
114 |     "level": "ERROR",
115 |     "msg": "task failed",
116 |     "task_id": "2",
117 |     "duration_ms": 100,
118 |     "error": "failed to process task 2"
119 | }
120 | {
121 |     "time": "2025-02-12T10:00:00Z",
122 |     "level": "INFO",
123 |     "msg": "submitting rate-limited tasks"
124 | }
125 | {
126 |     "time": "2025-02-12T10:00:00Z",
127 |     "level": "INFO",
128 |     "msg": "pool finished",
129 |     "processed": 14,
130 |     "errors": 2,
131 |     "total_time": "3.2s",
132 |     "duration": "2.1s"
133 | }
134 | ```
135 | 
136 | ## Architecture
137 | 
138 | The program is structured in several logical components:
139 | 
140 | ```
141 | main
142 |   ├── setupConfig    - configuration and logger setup
143 |   ├── makeWorker     - core worker implementation
144 |   ├── makeValidator  - input validation rules
145 |   ├── makePool      - pool creation with middleware
146 |   └── runPool       - execution and task submission
147 | ```
148 | 
149 | Each component is isolated and has a single responsibility, making the code easy to maintain and test.
150 | 
151 | ## Notes
152 | 
153 | - Middleware executes in the order it's added to Use()
154 | - The first middleware wraps the outermost layer
155 | - Built-in middleware handles common patterns
156 | - Custom middleware can add any functionality
157 | - Rate limiting is shared across all workers in the pool
158 | - Structured logging as an example of cross-cutting concern


--------------------------------------------------------------------------------
/examples/middleware/go.mod:
--------------------------------------------------------------------------------
 1 | module examples/middleware
 2 | 
 3 | go 1.24
 4 | 
 5 | require github.com/go-pkgz/pool v0.7.0
 6 | 
 7 | require (
 8 | 	golang.org/x/sync v0.14.0 // indirect
 9 | 	golang.org/x/time v0.11.0 // indirect
10 | )
11 | 
12 | replace github.com/go-pkgz/pool => ../..
13 | 


--------------------------------------------------------------------------------
/examples/middleware/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 5 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 6 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 7 | golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
 8 | golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
 9 | golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
10 | golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
13 | 


--------------------------------------------------------------------------------
/examples/middleware/main.go:
--------------------------------------------------------------------------------
  1 | // file: examples/middleware/main.go
  2 | package main
  3 | 
  4 | import (
  5 | 	"context"
  6 | 	"encoding/json"
  7 | 	"flag"
  8 | 	"fmt"
  9 | 	"log/slog"
 10 | 	"os"
 11 | 	"strings"
 12 | 	"time"
 13 | 
 14 | 	"github.com/go-pkgz/pool"
 15 | 	"github.com/go-pkgz/pool/middleware"
 16 | )
 17 | 
 18 | // Task represents a job to be processed
 19 | type Task struct {
 20 | 	ID       string `json:"id"`
 21 | 	Priority int    `json:"priority"`
 22 | 	Payload  string `json:"payload"`
 23 | }
 24 | 
 25 | // config holds application configuration
 26 | type config struct {
 27 | 	workers int
 28 | 	retries int
 29 | 	logger  *slog.Logger
 30 | }
 31 | 
 32 | func main() {
 33 | 	// parse config and setup logger
 34 | 	cfg := setupConfig()
 35 | 
 36 | 	// create worker pool
 37 | 	p := makePool(cfg)
 38 | 
 39 | 	// start pool and process tasks
 40 | 	if err := runPool(context.Background(), p, cfg); err != nil {
 41 | 		cfg.logger.Error("pool finished with error", "error", err)
 42 | 		os.Exit(1)
 43 | 	}
 44 | }
 45 | 
 46 | func setupConfig() config {
 47 | 	// parse flags
 48 | 	workers := flag.Int("workers", 2, "number of workers")
 49 | 	retries := flag.Int("retries", 3, "number of retries")
 50 | 	flag.Parse()
 51 | 
 52 | 	// setup structured logger
 53 | 	logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
 54 | 		Level: slog.LevelDebug,
 55 | 	}))
 56 | 
 57 | 	return config{
 58 | 		workers: *workers,
 59 | 		retries: *retries,
 60 | 		logger:  logger,
 61 | 	}
 62 | }
 63 | 
 64 | func runPool(ctx context.Context, p *pool.WorkerGroup[Task], cfg config) error {
 65 | 	// start the pool
 66 | 	if err := p.Go(ctx); err != nil {
 67 | 		return fmt.Errorf("failed to start pool: %w", err)
 68 | 	}
 69 | 
 70 | 	// submit test tasks
 71 | 	tasks := []Task{
 72 | 		{ID: "1", Priority: 1, Payload: "normal task"},
 73 | 		{ID: "2", Priority: 5, Payload: "fail me"}, // this will fail and retry
 74 | 		{ID: "3", Priority: 2, Payload: "normal task"},
 75 | 		{ID: "", Priority: 11, Payload: "invalid"}, // this will fail validation
 76 | 	}
 77 | 
 78 | 	for _, task := range tasks {
 79 | 		p.Submit(task)
 80 | 	}
 81 | 
 82 | 	// demonstrate rate limiting
 83 | 	cfg.logger.Info("submitting rate-limited tasks")
 84 | 	start := time.Now()
 85 | 	for i := 0; i < 10; i++ {
 86 | 		p.Submit(Task{ID: fmt.Sprintf("rate-%d", i), Priority: 3, Payload: "rate limited task"})
 87 | 	}
 88 | 
 89 | 	// close pool and wait for completion
 90 | 	if err := p.Close(ctx); err != nil {
 91 | 		return err
 92 | 	}
 93 | 
 94 | 	// print final metrics
 95 | 	metrics := p.Metrics().GetStats()
 96 | 	cfg.logger.Info("pool finished", "processed", metrics.Processed, "errors", metrics.Errors,
 97 | 		"total_time", metrics.TotalTime.String(), "duration", time.Since(start).String())
 98 | 
 99 | 	return nil
100 | }
101 | 
102 | func makePool(cfg config) *pool.WorkerGroup[Task] {
103 | 	return pool.New[Task](cfg.workers, makeWorker()).Use(
104 | 		middleware.Validator(makeValidator()),            // validate tasks
105 | 		middleware.Retry[Task](cfg.retries, time.Second), // retry failed tasks
106 | 		middleware.Recovery[Task](func(p interface{}) { // recover from panics
107 | 			cfg.logger.Error("panic recovered", "error", fmt.Sprint(p))
108 | 		}),
109 | 		middleware.RateLimiter[Task](5, 3), // rate limit: 5 tasks/second with burst of 3
110 | 		makeStructuredLogger(cfg.logger),   // custom structured logging
111 | 	)
112 | }
113 | 
114 | func makeWorker() pool.Worker[Task] {
115 | 	return pool.WorkerFunc[Task](func(ctx context.Context, task Task) error {
116 | 		// simulate some work with random failures
117 | 		if strings.Contains(task.Payload, "fail") {
118 | 			return fmt.Errorf("failed to process task %s", task.ID)
119 | 		}
120 | 		time.Sleep(100 * time.Millisecond)
121 | 		return nil
122 | 	})
123 | }
124 | 
125 | func makeValidator() func(Task) error {
126 | 	return func(task Task) error {
127 | 		if task.ID == "" {
128 | 			return fmt.Errorf("empty task ID")
129 | 		}
130 | 		if task.Priority < 0 || task.Priority > 10 {
131 | 			return fmt.Errorf("invalid priority %d, must be between 0 and 10", task.Priority)
132 | 		}
133 | 		return nil
134 | 	}
135 | }
136 | 
137 | func makeStructuredLogger(logger *slog.Logger) pool.Middleware[Task] {
138 | 	return func(next pool.Worker[Task]) pool.Worker[Task] {
139 | 		return pool.WorkerFunc[Task](func(ctx context.Context, task Task) error {
140 | 			start := time.Now()
141 | 			taskJSON, _ := json.Marshal(task)
142 | 
143 | 			logger.Debug("processing task", "task_id", task.ID, "priority", task.Priority, "payload", string(taskJSON))
144 | 
145 | 			err := next.Do(ctx, task)
146 | 			duration := time.Since(start)
147 | 
148 | 			if err != nil {
149 | 				logger.Error("task failed", "task_id", task.ID, "duration_ms", duration.Milliseconds(), "error", err.Error())
150 | 				return err
151 | 			}
152 | 
153 | 			logger.Info("task completed", "task_id", task.ID, "duration_ms", duration.Milliseconds())
154 | 			return nil
155 | 		})
156 | 	}
157 | }
158 | 


--------------------------------------------------------------------------------
/examples/parallel_files/README.md:
--------------------------------------------------------------------------------
  1 | # Simple Text Processor - Parallel Files Example
  2 | 
  3 | This example demonstrates how to use parallel processing with [go-pkgz/pool](https://github.com/go-pkgz/pool) package for efficient file analysis. It reads multiple files in chunks and counts word frequencies using multiple workers.
  4 | 
  5 | ## What Makes it Special?
  6 | 
  7 | 1. File chunking:
  8 |     - Files read in 32KB chunks for memory efficiency
  9 |     - Each chunk processed independently
 10 |     - Allows parallel processing of large files
 11 | 
 12 | 2. Independent worker state:
 13 |     - Each worker has its own word frequency map
 14 |     - No synchronization needed between workers
 15 |     - Results merged only on completion
 16 | 
 17 | 3. Built-in metrics:
 18 |     - Shows processing rates and latencies
 19 |     - Tracks word length distribution
 20 |     - Demonstrates metrics collection API
 21 | 
 22 | ## Features
 23 | 
 24 | - Process multiple files in parallel
 25 | - Pattern-based file selection
 26 | - Word frequency analysis
 27 | - Performance metrics tracking
 28 | - Configurable worker count
 29 | 
 30 | ## Installation
 31 | 
 32 | ```bash
 33 | go build
 34 | ```
 35 | 
 36 | ## Usage
 37 | 
 38 | ```bash
 39 | go run main.go [options]
 40 | ```
 41 | 
 42 | Options:
 43 | - `-dir` - directory to process (default: ".")
 44 | - `-pattern` - file pattern to match (default: "*.txt")
 45 | - `-workers` - number of worker goroutines (default: 4)
 46 | - `-top` - number of top words to show (default: 10)
 47 | 
 48 | Example:
 49 | ```bash
 50 | go run main.go -pattern "*.go" -workers 8
 51 | ```
 52 | 
 53 | ## Implementation Details
 54 | 
 55 | The key components are:
 56 | 
 57 | 1. Chunk-based file reading:
 58 |    ```go
 59 |    buffer := make([]byte, 32*1024)
 60 |    for {
 61 |        n, err := file.Read(buffer)
 62 |        if err == io.EOF {
 63 |            break
 64 |        }
 65 |        p.Submit(chunk{data: data})
 66 |    }
 67 |    ```
 68 | 
 69 | 2. Stateful worker processing:
 70 |    ```go
 71 |    type fileWorker struct {
 72 |        words     map[string]int
 73 |        byteCount int64
 74 |    }
 75 |    ```
 76 | 
 77 | 3. Metrics tracking:
 78 |    ```go
 79 |    m := metrics.Get(ctx)
 80 |    if len(word) > 3 {
 81 |        m.Inc("long words")
 82 |    } else {
 83 |        m.Inc("short words")
 84 |    }
 85 |    ```
 86 | 
 87 | ## Output Example
 88 | 
 89 | ```
 90 | Processing statistics: [processed:3, rate:5603.1/s, avg_latency:0s, proc:0s, total:1ms]
 91 | Total bytes: 11522
 92 | Unique words: 302
 93 | Short words: 647
 94 | Long words: 829
 95 | 
 96 | Top 10 words:
 97 | 1. "words": 29 times
 98 | 2. "return": 22 times
 99 | 3. "word": 18 times
100 | ...
101 | ```
102 | 
103 | ## Architecture
104 | 
105 | The program flows through these stages:
106 | 1. Read files in chunks (32KB)
107 | 2. Distribute chunks to worker pool
108 | 3. Process chunks in parallel
109 | 4. Collect results through collector
110 | 5. Merge and present statistics
111 | 
112 | ## Notes
113 | 
114 | - Memory efficient due to chunk-based processing
115 | - No locks needed in worker implementation
116 | - Scales well with additional workers


--------------------------------------------------------------------------------
/examples/parallel_files/go.mod:
--------------------------------------------------------------------------------
 1 | module examples/parallel_files
 2 | 
 3 | go 1.24
 4 | 
 5 | require github.com/go-pkgz/pool v0.7.0
 6 | 
 7 | require golang.org/x/sync v0.11.0 // indirect
 8 | 
 9 | replace github.com/go-pkgz/pool => ../..
10 | 


--------------------------------------------------------------------------------
/examples/parallel_files/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 5 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 6 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 7 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
 8 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 9 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
10 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
11 | 


--------------------------------------------------------------------------------
/examples/parallel_files/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"context"
  6 | 	"flag"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"log"
 10 | 	"os"
 11 | 	"path/filepath"
 12 | 	"sort"
 13 | 	"strings"
 14 | 	"time"
 15 | 
 16 | 	"github.com/go-pkgz/pool"
 17 | 	"github.com/go-pkgz/pool/metrics"
 18 | )
 19 | 
 20 | // chunk represents a piece of file to process
 21 | type chunk struct {
 22 | 	data []byte
 23 | }
 24 | 
 25 | // fileWorker counts words in chunks
 26 | type fileWorker struct {
 27 | 	words     map[string]int
 28 | 	byteCount int64
 29 | }
 30 | 
 31 | // Do implements pool.Worker interface
 32 | func (w *fileWorker) Do(ctx context.Context, c chunk) error {
 33 | 	scanner := bufio.NewScanner(strings.NewReader(string(c.data)))
 34 | 	scanner.Split(bufio.ScanWords)
 35 | 	m := metrics.Get(ctx)
 36 | 	for scanner.Scan() {
 37 | 		word := strings.ToLower(strings.Trim(scanner.Text(), ".,!?()[]{}\"';:"))
 38 | 		if len(word) > 3 {
 39 | 			w.words[word]++
 40 | 			m.Inc("long words")
 41 | 		} else {
 42 | 			m.Inc("short words")
 43 | 		}
 44 | 	}
 45 | 	w.byteCount += int64(len(c.data))
 46 | 	return scanner.Err()
 47 | }
 48 | 
 49 | func main() {
 50 | 	var (
 51 | 		dir      = flag.String("dir", ".", "directory to process")
 52 | 		workers  = flag.Int("workers", 4, "number of workers")
 53 | 		pattern  = flag.String("pattern", "*.txt", "file pattern to match")
 54 | 		topWords = flag.Int("top", 10, "number of top words to show")
 55 | 	)
 56 | 	flag.Parse()
 57 | 
 58 | 	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
 59 | 	defer cancel()
 60 | 
 61 | 	collector := pool.NewCollector[fileWorker](ctx, *workers)
 62 | 
 63 | 	p := pool.NewStateful[chunk](*workers, func() pool.Worker[chunk] {
 64 | 		return &fileWorker{words: make(map[string]int)} // create new worker with empty words map
 65 | 	})
 66 | 
 67 | 	// set batch size and complete function
 68 | 	p = p.WithBatchSize(100).WithWorkerCompleteFn(func(_ context.Context, _ int, w pool.Worker[chunk]) error {
 69 | 		collector.Submit(*w.(*fileWorker))
 70 | 		return nil
 71 | 	})
 72 | 
 73 | 	// start pool processing
 74 | 	if err := p.Go(ctx); err != nil {
 75 | 		log.Fatal(err)
 76 | 	}
 77 | 
 78 | 	// process files
 79 | 	err := filepath.Walk(*dir, func(path string, info os.FileInfo, err error) error {
 80 | 		if err != nil || info.IsDir() {
 81 | 			return err
 82 | 		}
 83 | 		if matched, err := filepath.Match(*pattern, filepath.Base(path)); err != nil || !matched {
 84 | 			return err
 85 | 		}
 86 | 
 87 | 		file, err := os.Open(path)
 88 | 		if err != nil {
 89 | 			return fmt.Errorf("failed to open %s: %w", path, err)
 90 | 		}
 91 | 		defer file.Close()
 92 | 
 93 | 		buffer := make([]byte, 32*1024)
 94 | 		for {
 95 | 			n, err := file.Read(buffer)
 96 | 			if err == io.EOF {
 97 | 				break
 98 | 			}
 99 | 			if err != nil {
100 | 				return fmt.Errorf("error reading %s: %w", path, err)
101 | 			}
102 | 
103 | 			data := make([]byte, n)
104 | 			copy(data, buffer[:n])
105 | 			p.Submit(chunk{data: data}) // submit chunk to pool
106 | 		}
107 | 		return nil
108 | 	})
109 | 	if err != nil {
110 | 		log.Printf("error walking files: %v", err)
111 | 	}
112 | 
113 | 	// close pool and collector, initiate all data sent and no more data expected
114 | 	if err := p.Close(ctx); err != nil {
115 | 		log.Printf("pool close error: %v", err)
116 | 	}
117 | 	collector.Close()
118 | 
119 | 	// merge and print results
120 | 	totalWords := make(map[string]int)
121 | 	var totalBytes int64
122 | 
123 | 	// iterate over collector results, merge words and count bytes
124 | 	for worker := range collector.Iter() {
125 | 		for word, count := range worker.words {
126 | 			totalWords[word] += count
127 | 		}
128 | 		totalBytes += worker.byteCount
129 | 	}
130 | 
131 | 	fmt.Printf("\nProcessing statistics: %+v\n", p.Metrics().GetStats())
132 | 	fmt.Printf("Total bytes: %d\n", totalBytes)
133 | 	fmt.Printf("Unique words: %d\n", len(totalWords))
134 | 	fmt.Printf("Short words: %d\n", p.Metrics().Get("short words"))
135 | 	fmt.Printf("Long words: %d\n", p.Metrics().Get("long words"))
136 | 
137 | 	// prepare sorted list of words
138 | 	type wordCount struct {
139 | 		word  string
140 | 		count int
141 | 	}
142 | 	counts := make([]wordCount, 0, len(totalWords))
143 | 	for word, count := range totalWords {
144 | 		counts = append(counts, wordCount{word, count})
145 | 	}
146 | 
147 | 	sort.Slice(counts, func(i, j int) bool {
148 | 		return counts[i].count > counts[j].count
149 | 	})
150 | 
151 | 	fmt.Printf("\nTop %d words:\n", *topWords)
152 | 	for i := 0; i < *topWords && i < len(counts); i++ {
153 | 		fmt.Printf("%d. %q: %d times\n", i+1, counts[i].word, counts[i].count)
154 | 	}
155 | }
156 | 


--------------------------------------------------------------------------------
/examples/tokenizer_stateful/README.md:
--------------------------------------------------------------------------------
  1 | # Simple Text Tokenizer - Stateful Workers Example
  2 | 
  3 | This example demonstrates how to use stateful workers with [go-pkgz/pool](https://github.com/go-pkgz/pool) package. It implements a parallel text tokenizer that counts word frequencies, where each worker maintains its own independent state.
  4 | 
  5 | ## What Makes it Stateful?
  6 | 
  7 | Stateful workers are useful when each worker needs to maintain its own independent data during processing. In this example:
  8 | 
  9 | 1. Each worker keeps its own word frequency map:
 10 |    - No shared maps or mutexes needed
 11 |    - No coordination between workers required
 12 |    - Each worker counts words it sees independently
 13 | 
 14 | 2. Results are combined only at the end:
 15 |    - Workers don't communicate during processing
 16 |    - Final results are merged after all processing is done
 17 |    - Shows how to handle independent worker results
 18 | 
 19 | 3. Real-world analogy:
 20 |    - Like having multiple people count words in different parts of a book
 21 |    - Each person keeps their own tally
 22 |    - At the end, all tallies are added together
 23 | 
 24 | This pattern is particularly useful for:
 25 | - Processing that can be partitioned (like our text analysis)
 26 | - When sharing state would create contention
 27 | - When workers need different initialization
 28 | - When tracking per-worker statistics
 29 | 
 30 | ## Stateful vs Non-Stateful Approaches
 31 | 
 32 | To understand why this example uses stateful workers, let's compare two approaches:
 33 | 
 34 | ### Non-Stateful (Wrong Way)
 35 | ```go
 36 | // Shared state between all workers - requires synchronization
 37 | sharedCounts := sync.Map{}
 38 | 
 39 | worker := pool.WorkerFunc[string](func(ctx context.Context, line string) error {
 40 |     for _, word := range strings.Fields(line) {
 41 |         // Need to synchronize access to shared map
 42 |         v, _ := sharedCounts.LoadOrStore(word, 0)
 43 |         sharedCounts.Store(word, v.(int) + 1)
 44 |     }
 45 |     return nil
 46 | })
 47 | ```
 48 | 
 49 | ### Stateful (Our Approach)
 50 | ```go
 51 | // Each worker has its own state
 52 | type TokenizingWorker struct {
 53 |     counts map[string]int  // private to this worker
 54 | }
 55 | 
 56 | func (w *TokenizingWorker) Do(ctx context.Context, line string) error {
 57 |     for _, word := range strings.Fields(line) {
 58 |         w.counts[word]++ // no synchronization needed
 59 |     }
 60 |     return nil
 61 | }
 62 | ```
 63 | 
 64 | The stateful approach is better because:
 65 | - No synchronization overhead
 66 | - Better performance due to no lock contention
 67 | - Cleaner code without mutex handling
 68 | - Easier to maintain and debug
 69 | 
 70 | ## Features
 71 | 
 72 | - Demonstration of stateful worker pattern
 73 | - Parallel processing of text files using configurable number of workers
 74 | - Batch processing support for better performance
 75 | - Word frequency counting
 76 | - Processing statistics including timing and error counts
 77 | - Word cleanup (lowercase conversion, punctuation removal)
 78 | 
 79 | ## Install
 80 | 
 81 | ```bash
 82 | # assuming you are in go-pkgz/pool/examples/tokenizer
 83 | go build
 84 | ```
 85 | 
 86 | ## Usage
 87 | 
 88 | ```bash
 89 | go run main.go [options] -file=input.txt
 90 | ```
 91 | 
 92 | Options:
 93 | - `-file` - input file to process (required)
 94 | - `-workers` - number of worker goroutines (default: 4)
 95 | - `-batch` - batch size for processing (default: 100)
 96 | 
 97 | Example:
 98 | ```bash
 99 | go run main.go -file main.go -workers 8
100 | ```
101 | 
102 | ## Output Example
103 | 
104 | ```
105 | Processing stats:
106 | Processed lines: 192
107 | Total words processed: 321
108 | Errors: 0
109 | Processing time: 171.214µs
110 | Total time: 265.541µs
111 | 
112 | Per-worker stats:
113 | Worker 0 processed 79 words
114 | Worker 1 processed 63 words
115 | Worker 2 processed 88 words
116 | Worker 3 processed 91 words
117 | 
118 | Top 10 most common words:
119 | 1. "counts": 10 times
120 | 2. "return": 8 times
121 | 3. "words": 7 times
122 | 4. "range": 7 times
123 | 5. "processed": 7 times
124 | 6. "word": 6 times
125 | 7. "type": 6 times
126 | 8. "count": 5 times
127 | 9. "line": 5 times
128 | 10. "worker": 5 times
129 | ```
130 | 
131 | ## Implementation Details
132 | 
133 | The example demonstrates true stateful worker usage in go-pkgz/pool:
134 | 
135 | 1. Stateful worker implementation:
136 |    ```go
137 |    type TokenizingWorker struct {
138 |        counts    map[string]int  // each worker maintains its own counts
139 |        processed int
140 |    }
141 |    ```
142 | 
143 | 2. Worker creation with independent state:
144 |    ```go
145 |    p := pool.NewStateful[string](workers, func() pool.Worker[string] {
146 |        return &TokenizingWorker{
147 |            counts: make(map[string]int),
148 |        }
149 |    })
150 |    ```
151 | 
152 | 3. Result collection using completion callback:
153 |    ```go
154 |    WithCompleteFn(func(ctx context.Context, id int, w pool.Worker[string]) error {
155 |        tw := w.(*TokenizingWorker)
156 |        collector.Submit(Result{
157 |            workerID:  id,
158 |            counts:    tw.counts,
159 |            processed: tw.processed,
160 |        })
161 |        return nil
162 |    })
163 |    ```
164 | 
165 | 4. Final results merging:
166 |    ```go
167 |    totalCounts := make(map[string]int)
168 |    for result := range collector.Iter() {
169 |        for word, count := range result.counts {
170 |            totalCounts[word] += count
171 |        }
172 |    }
173 |    ```
174 | 
175 | ## Architecture
176 | 
177 | ```
178 | File Reader              Worker Pool              Collector               Results Merger
179 | (main goroutine)   →    (N workers)     →     (buffer channel)    →     (main goroutine)
180 | reads lines             counts words           collects final           merges counts
181 | submits to pool         in own state          results from workers     prints statistics
182 | ```
183 | 
184 | The program demonstrates true parallel processing where:
185 | - Each worker maintains independent word counts
186 | - No state is shared between workers during processing
187 | - Workers submit their final counts when done
188 | - Main goroutine merges results and calculates totals
189 | - Per-worker statistics show work distribution
190 | 
191 | ## Notes
192 | 
193 | - The example can process any text file but works best with plain text
194 | - Processing is done in parallel, but results maintain correct counts
195 | - No word order or position information is preserved


--------------------------------------------------------------------------------
/examples/tokenizer_stateful/go.mod:
--------------------------------------------------------------------------------
 1 | module examples/tokenizer_stateful
 2 | 
 3 | go 1.24
 4 | 
 5 | require github.com/go-pkgz/pool v0.7.0
 6 | 
 7 | require golang.org/x/sync v0.11.0 // indirect
 8 | 
 9 | replace github.com/go-pkgz/pool => ../..
10 | 


--------------------------------------------------------------------------------
/examples/tokenizer_stateful/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/go-pkgz/pool v0.3.0 h1:aN5/ZhBbMPGXj+naZ6De2KNqg0D2Svpc7U1cYEue9t8=
 4 | github.com/go-pkgz/pool v0.3.0/go.mod h1:e1qn5EYmXshPcOk2buL2ZC20w7RTAWUgbug+L2SyH7I=
 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 7 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 8 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 9 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
10 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
13 | 


--------------------------------------------------------------------------------
/examples/tokenizer_stateful/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"context"
  6 | 	"flag"
  7 | 	"fmt"
  8 | 	"log"
  9 | 	"os"
 10 | 	"sort"
 11 | 	"strings"
 12 | 	"time"
 13 | 
 14 | 	"github.com/go-pkgz/pool"
 15 | )
 16 | 
 17 | // TokenizingWorker maintains its own state - counts of words it has processed
 18 | type TokenizingWorker struct {
 19 | 	counts    map[string]int
 20 | 	processed int
 21 | }
 22 | 
 23 | // Result represents final counts from a single worker
 24 | type Result struct {
 25 | 	workerID  int
 26 | 	counts    map[string]int
 27 | 	processed int
 28 | }
 29 | 
 30 | // Do implements pool.Worker interface
 31 | func (w *TokenizingWorker) Do(ctx context.Context, line string) error {
 32 | 	select {
 33 | 	case <-ctx.Done():
 34 | 		return ctx.Err()
 35 | 	default:
 36 | 	}
 37 | 
 38 | 	// split line into words and clean them up
 39 | 	words := strings.Fields(line)
 40 | 	for _, word := range words {
 41 | 		select {
 42 | 		case <-ctx.Done():
 43 | 			return ctx.Err()
 44 | 		default:
 45 | 		}
 46 | 
 47 | 		// clean up the word - remove punctuation, convert to lower case
 48 | 		word = strings.ToLower(strings.Trim(word, ".,!?()[]{}\"';:"))
 49 | 		if len(word) <= 3 { // skip short words
 50 | 			continue
 51 | 		}
 52 | 
 53 | 		w.counts[word]++
 54 | 		w.processed++
 55 | 	}
 56 | 	return nil
 57 | }
 58 | 
 59 | func main() {
 60 | 	// command line flags
 61 | 	var (
 62 | 		workers   = flag.Int("workers", 4, "number of workers")
 63 | 		batchSize = flag.Int("batch", 100, "batch size")
 64 | 		file      = flag.String("file", "", "input file to process")
 65 | 	)
 66 | 	flag.Parse()
 67 | 
 68 | 	if *file == "" {
 69 | 		log.Fatal("file parameter is required")
 70 | 	}
 71 | 
 72 | 	// create context with timeout
 73 | 	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
 74 | 	defer cancel()
 75 | 
 76 | 	// create collector for results from workers
 77 | 	collector := pool.NewCollector[Result](ctx, *workers)
 78 | 
 79 | 	// create pool with worker maker function
 80 | 	p := pool.NewStateful[string](*workers, func() pool.Worker[string] {
 81 | 		return &TokenizingWorker{
 82 | 			counts: make(map[string]int),
 83 | 		}
 84 | 	}).WithBatchSize(*batchSize).
 85 | 		WithContinueOnError().
 86 | 		WithWorkerCompleteFn(func(ctx context.Context, id int, w pool.Worker[string]) error {
 87 | 			// type assert to get our concrete worker type
 88 | 			tw, ok := w.(*TokenizingWorker)
 89 | 			if !ok {
 90 | 				return fmt.Errorf("unexpected worker type")
 91 | 			}
 92 | 			// submit worker's results
 93 | 			collector.Submit(Result{
 94 | 				workerID:  id,
 95 | 				counts:    tw.counts,
 96 | 				processed: tw.processed,
 97 | 			})
 98 | 			return nil
 99 | 		})
100 | 
101 | 	// start the pool
102 | 	if err := p.Go(ctx); err != nil {
103 | 		log.Fatal(err)
104 | 	}
105 | 
106 | 	// read file line by line and submit to pool
107 | 	go func() {
108 | 		defer p.Close(ctx)
109 | 
110 | 		f, err := os.Open(*file)
111 | 		if err != nil {
112 | 			log.Printf("failed to open file: %v", err)
113 | 			return
114 | 		}
115 | 		defer f.Close()
116 | 
117 | 		scanner := bufio.NewScanner(f)
118 | 		for scanner.Scan() {
119 | 			p.Submit(scanner.Text())
120 | 		}
121 | 
122 | 		if err := scanner.Err(); err != nil {
123 | 			log.Printf("error reading file: %v", err)
124 | 		}
125 | 	}()
126 | 
127 | 	// wait for pool to finish and then close collector
128 | 	if err := p.Wait(ctx); err != nil {
129 | 		log.Printf("pool error: %v", err)
130 | 	}
131 | 	collector.Close()
132 | 
133 | 	// merge results from all workers
134 | 	totalCounts := make(map[string]int)
135 | 	totalProcessed := 0
136 | 	workerResults := make(map[int]int) // worker ID -> words processed
137 | 
138 | 	for result, err := range collector.Iter() {
139 | 		if err != nil {
140 | 			log.Printf("error collecting result: %v", err)
141 | 			continue
142 | 		}
143 | 		// merge counts
144 | 		for word, count := range result.counts {
145 | 			totalCounts[word] += count
146 | 		}
147 | 		totalProcessed += result.processed
148 | 		workerResults[result.workerID] = result.processed
149 | 	}
150 | 
151 | 	// get pool metrics
152 | 	stats := p.Metrics().GetStats()
153 | 	fmt.Printf("\nProcessing stats:\n")
154 | 	fmt.Printf("Processed lines: %d\n", stats.Processed)
155 | 	fmt.Printf("Total words processed: %d\n", totalProcessed)
156 | 	fmt.Printf("Errors: %d\n", stats.Errors)
157 | 	fmt.Printf("Processing time: %v\n", stats.ProcessingTime)
158 | 	fmt.Printf("Total time: %v\n", stats.TotalTime)
159 | 
160 | 	// print per-worker stats
161 | 	fmt.Printf("\nPer-worker stats:\n")
162 | 	workerIDs := make([]int, 0, len(workerResults))
163 | 	for id := range workerResults {
164 | 		workerIDs = append(workerIDs, id)
165 | 	}
166 | 	sort.Ints(workerIDs)
167 | 	for _, id := range workerIDs {
168 | 		fmt.Printf("Worker %d processed %d words\n", id, workerResults[id])
169 | 	}
170 | 
171 | 	// print top N most common tokens
172 | 	const topN = 10
173 | 	type wordCount struct {
174 | 		word  string
175 | 		count int
176 | 	}
177 | 	counts := make([]wordCount, 0, len(totalCounts))
178 | 	for word, count := range totalCounts {
179 | 		counts = append(counts, wordCount{word, count})
180 | 	}
181 | 	sort.Slice(counts, func(i, j int) bool {
182 | 		return counts[i].count > counts[j].count
183 | 	})
184 | 
185 | 	fmt.Printf("\nTop %d most common words:\n", topN)
186 | 	for i, wc := range counts {
187 | 		if i >= topN {
188 | 			break
189 | 		}
190 | 		fmt.Printf("%d. %q: %d times\n", i+1, wc.word, wc.count)
191 | 	}
192 | }
193 | 


--------------------------------------------------------------------------------
/examples/tokenizer_stateless/README.md:
--------------------------------------------------------------------------------
  1 | # Simple Text Tokenizer - Stateless Example
  2 | 
  3 | This example demonstrates how to use WorkerFunc with [go-pkgz/pool](https://github.com/go-pkgz/pool) package for simple stateless parallel processing. It implements a text tokenizer that counts word frequencies using a shared collector.
  4 | 
  5 | ## What Makes it Stateless?
  6 | 
  7 | This example uses a stateless approach where:
  8 | 1. Workers are simple functions (WorkerFunc) without any state
  9 | 2. All workers share a common collector for results
 10 | 3. Word counting is done at the end in the main goroutine
 11 | 
 12 | This is simpler than the stateful approach when:
 13 | - Workers don't need to maintain state
 14 | - Single shared collection point is sufficient
 15 | - No need for per-worker initialization or cleanup
 16 | 
 17 | ## Installation
 18 | 
 19 | ```bash
 20 | # assuming you are in go-pkgz/pool/examples/tokenizer
 21 | go build
 22 | ```
 23 | 
 24 | ## Usage
 25 | 
 26 | ```bash
 27 | go run main.go [options] -file=input.txt
 28 | ```
 29 | 
 30 | Options:
 31 | - `-file` - input file to process (required)
 32 | - `-workers` - number of worker goroutines (default: 4)
 33 | - `-batch` - batch size for processing (default: 100)
 34 | 
 35 | Example:
 36 | ```bash
 37 | go run main.go -file main.go -workers 8
 38 | ```
 39 | 
 40 | ## Implementation Details
 41 | 
 42 | The key components are:
 43 | 
 44 | 1. Simple worker function:
 45 | ```go
 46 | worker := pool.WorkerFunc[string](func(ctx context.Context, line string) error {
 47 |     for _, word := range strings.Fields(line) {
 48 |         word = strings.ToLower(strings.Trim(word, ".,!?()[]{}\"';:"))
 49 |         if word == "" {
 50 |             continue
 51 |         }
 52 |         collector.Submit(word)
 53 |     }
 54 |     return nil
 55 | })
 56 | ```
 57 | 
 58 | 2. Pool creation with shared worker:
 59 | ```go
 60 | p := pool.New[string](workers, worker).
 61 |     WithBatchSize(batchSize).
 62 |     WithContinueOnError()
 63 | ```
 64 | 
 65 | 3. Result collection:
 66 | ```go
 67 | wordCounts := make(map[string]int)
 68 | for word := range collector.Iter() {
 69 |     wordCounts[word]++
 70 | }
 71 | ```
 72 | 
 73 | ## Architecture
 74 | 
 75 | ```
 76 | File Reader         Worker Pool         Collector         Word Counter
 77 | (main goroutine) → (N workers)   →   (shared channel) → (main goroutine)
 78 | reads lines        tokenize text      buffers words      counts frequencies
 79 | ```
 80 | 
 81 | The program flow:
 82 | 1. Main goroutine reads file line by line
 83 | 2. Pool distributes lines to worker functions
 84 | 3. Workers break lines into words and submit to shared collector
 85 | 4. Main goroutine counts word frequencies from collector
 86 | 
 87 | ## Output Example
 88 | 
 89 | ```
 90 | Processing stats:
 91 | Processed lines: 146
 92 | Total words: 238
 93 | Unique words: 152
 94 | Errors: 0
 95 | Processing time: 77.707µs
 96 | Total time: 245.417µs
 97 | 
 98 | Top 10 most common words:
 99 | 1. "words": 9 times
100 | 2. "word": 8 times
101 | 3. "line": 6 times
102 | 4. "pool": 5 times
103 | 5. "return": 5 times
104 | 6. "file": 5 times
105 | 7. "context": 4 times
106 | 8. "worker": 4 times
107 | 9. "%d\\n": 4 times
108 | 10. "count": 4 times
109 | ```
110 | 
111 | ## Why Use This Approach?
112 | 
113 | The stateless approach is better when:
114 | - Processing is simple and doesn't require state
115 | - Shared collection is more efficient than per-worker state
116 | - Code simplicity is more important than perfect parallelism
117 | - Memory usage needs to be minimized (no per-worker state)


--------------------------------------------------------------------------------
/examples/tokenizer_stateless/go.mod:
--------------------------------------------------------------------------------
 1 | module examples/tokenizer_stateless
 2 | 
 3 | go 1.24
 4 | 
 5 | require github.com/go-pkgz/pool v0.7.0
 6 | 
 7 | require golang.org/x/sync v0.11.0 // indirect
 8 | 
 9 | replace github.com/go-pkgz/pool => ../..
10 | 


--------------------------------------------------------------------------------
/examples/tokenizer_stateless/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/go-pkgz/pool v0.3.0 h1:aN5/ZhBbMPGXj+naZ6De2KNqg0D2Svpc7U1cYEue9t8=
 4 | github.com/go-pkgz/pool v0.3.0/go.mod h1:e1qn5EYmXshPcOk2buL2ZC20w7RTAWUgbug+L2SyH7I=
 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 7 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 8 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 9 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
10 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
13 | 


--------------------------------------------------------------------------------
/examples/tokenizer_stateless/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"context"
  6 | 	"flag"
  7 | 	"fmt"
  8 | 	"log"
  9 | 	"os"
 10 | 	"sort"
 11 | 	"strings"
 12 | 	"time"
 13 | 
 14 | 	"github.com/go-pkgz/pool"
 15 | )
 16 | 
 17 | func main() {
 18 | 	// command line flags
 19 | 	var (
 20 | 		workers   = flag.Int("workers", 4, "number of workers")
 21 | 		batchSize = flag.Int("batch", 100, "batch size")
 22 | 		file      = flag.String("file", "", "input file to process")
 23 | 	)
 24 | 	flag.Parse()
 25 | 
 26 | 	if *file == "" {
 27 | 		log.Fatal("file parameter is required")
 28 | 	}
 29 | 
 30 | 	// create context with timeout
 31 | 	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
 32 | 	defer cancel()
 33 | 
 34 | 	// create collector for words
 35 | 	collector := pool.NewCollector[string](ctx, 1000)
 36 | 
 37 | 	// create worker function that splits line into words
 38 | 	worker := pool.WorkerFunc[string](func(ctx context.Context, line string) error {
 39 | 		// check context before processing
 40 | 		select {
 41 | 		case <-ctx.Done():
 42 | 			return ctx.Err()
 43 | 		default:
 44 | 		}
 45 | 
 46 | 		// split line into words and submit each word
 47 | 		words := strings.Fields(line)
 48 | 		for _, word := range words {
 49 | 			// check context between words
 50 | 			select {
 51 | 			case <-ctx.Done():
 52 | 				return ctx.Err()
 53 | 			default:
 54 | 			}
 55 | 
 56 | 			// clean up the word - remove punctuation, convert to lower case
 57 | 			word = strings.ToLower(strings.Trim(word, ".,!?()[]{}\"';:"))
 58 | 			if len(word) <= 3 {
 59 | 				continue
 60 | 			}
 61 | 			collector.Submit(word)
 62 | 		}
 63 | 		time.Sleep(10 * time.Millisecond) // simulate slow processing time
 64 | 		return nil
 65 | 	})
 66 | 
 67 | 	// create pool with worker function
 68 | 	p := pool.New[string](*workers, worker).
 69 | 		WithBatchSize(*batchSize).
 70 | 		WithContinueOnError()
 71 | 
 72 | 	// start the pool
 73 | 	if err := p.Go(ctx); err != nil {
 74 | 		log.Fatal(err)
 75 | 	}
 76 | 
 77 | 	// read file line by line and submit to pool
 78 | 	go func() {
 79 | 		defer p.Close(ctx)
 80 | 
 81 | 		f, err := os.Open(*file)
 82 | 		if err != nil {
 83 | 			log.Printf("failed to open file: %v", err)
 84 | 			return
 85 | 		}
 86 | 		defer f.Close()
 87 | 
 88 | 		scanner := bufio.NewScanner(f)
 89 | 		for scanner.Scan() {
 90 | 			p.Submit(scanner.Text())
 91 | 		}
 92 | 
 93 | 		if err := scanner.Err(); err != nil {
 94 | 			log.Printf("error reading file: %v", err)
 95 | 		}
 96 | 	}()
 97 | 
 98 | 	// wait for pool to finish
 99 | 	if err := p.Wait(ctx); err != nil {
100 | 		log.Printf("pool error: %v", err)
101 | 	}
102 | 	collector.Close()
103 | 
104 | 	// count words from collector
105 | 	wordCounts := make(map[string]int)
106 | 	totalWords := 0
107 | 	for word, err := range collector.Iter() {
108 | 		if err != nil {
109 | 			log.Printf("error collecting word: %v", err)
110 | 			continue
111 | 		}
112 | 		wordCounts[word]++
113 | 		totalWords++
114 | 	}
115 | 
116 | 	// get pool metrics
117 | 	stats := p.Metrics().GetStats()
118 | 	fmt.Printf("\nProcessing stats:\n")
119 | 	fmt.Printf("Processed lines: %d\n", stats.Processed)
120 | 	fmt.Printf("Total words: %d\n", totalWords)
121 | 	fmt.Printf("Unique words: %d\n", len(wordCounts))
122 | 	fmt.Printf("Errors: %d\n", stats.Errors)
123 | 	fmt.Printf("Processing time: %v\n", stats.ProcessingTime)
124 | 	fmt.Printf("Total time: %v\n\n", stats.TotalTime)
125 | 	fmt.Printf("all stats: %s\n", stats)
126 | 
127 | 	// print top N most common tokens
128 | 	const topN = 10
129 | 	type wordCount struct {
130 | 		word  string
131 | 		count int
132 | 	}
133 | 	counts := make([]wordCount, 0, len(wordCounts))
134 | 	for word, count := range wordCounts {
135 | 		counts = append(counts, wordCount{word, count})
136 | 	}
137 | 	sort.Slice(counts, func(i, j int) bool {
138 | 		return counts[i].count > counts[j].count
139 | 	})
140 | 
141 | 	fmt.Printf("\nTop %d most common words:\n", topN)
142 | 	for i, wc := range counts {
143 | 		if i >= topN {
144 | 			break
145 | 		}
146 | 		fmt.Printf("%d. %q: %d times\n", i+1, wc.word, wc.count)
147 | 	}
148 | }
149 | 


--------------------------------------------------------------------------------
/examples_test.go:
--------------------------------------------------------------------------------
  1 | package pool
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"sort"
  8 | 	"sync"
  9 | 
 10 | 	"github.com/go-pkgz/pool/metrics"
 11 | )
 12 | 
 13 | func Example_basic() {
 14 | 	// collect output
 15 | 	var out []string
 16 | 	var mu sync.Mutex
 17 | 
 18 | 	worker := WorkerFunc[int](func(_ context.Context, v int) error {
 19 | 		mu.Lock()
 20 | 		out = append(out, fmt.Sprintf("processed: %d", v))
 21 | 		mu.Unlock()
 22 | 		return nil
 23 | 	})
 24 | 
 25 | 	p := New[int](2, worker)
 26 | 	if err := p.Go(context.Background()); err != nil {
 27 | 		panic(err) // handle error, don't panic in real code
 28 | 	}
 29 | 
 30 | 	// submit work
 31 | 	p.Submit(1)
 32 | 	p.Submit(2)
 33 | 	p.Submit(3)
 34 | 
 35 | 	_ = p.Close(context.Background())
 36 | 
 37 | 	// print collected output in sorted order
 38 | 	sort.Strings(out)
 39 | 	for _, s := range out {
 40 | 		fmt.Println(s)
 41 | 	}
 42 | 
 43 | 	// Output:
 44 | 	// processed: 1
 45 | 	// processed: 2
 46 | 	// processed: 3
 47 | }
 48 | 
 49 | func Example_withRouting() {
 50 | 	// collect output with sync.Map for thread safety
 51 | 	var out sync.Map
 52 | 
 53 | 	worker := WorkerFunc[int](func(ctx context.Context, v int) error {
 54 | 		out.Store(v, fmt.Sprintf("worker %d got %d", metrics.WorkerID(ctx), v))
 55 | 		return nil
 56 | 	})
 57 | 
 58 | 	// create pool with chunk function that routes based on even/odd
 59 | 	p := New[int](2, worker).WithChunkFn(func(v int) string {
 60 | 		if v%2 == 0 {
 61 | 			return "even"
 62 | 		}
 63 | 		return "odd"
 64 | 	},
 65 | 	)
 66 | 	p.Go(context.Background())
 67 | 
 68 | 	// submit all numbers
 69 | 	for i := 1; i <= 4; i++ {
 70 | 		p.Submit(i)
 71 | 	}
 72 | 
 73 | 	p.Close(context.Background())
 74 | 
 75 | 	// print in order to ensure deterministic output
 76 | 	for i := 1; i <= 4; i++ {
 77 | 		if v, ok := out.Load(i); ok {
 78 | 			fmt.Println(v)
 79 | 		}
 80 | 	}
 81 | 
 82 | 	// Output:
 83 | 	// worker 0 got 1
 84 | 	// worker 1 got 2
 85 | 	// worker 0 got 3
 86 | 	// worker 1 got 4
 87 | }
 88 | 
 89 | func Example_withError() {
 90 | 	// collect output to ensure deterministic order
 91 | 	var out []string
 92 | 	var mu sync.Mutex
 93 | 
 94 | 	worker := WorkerFunc[int](func(_ context.Context, v int) error {
 95 | 		if v == 0 {
 96 | 			return fmt.Errorf("zero value not allowed")
 97 | 		}
 98 | 		mu.Lock()
 99 | 		out = append(out, fmt.Sprintf("processed: %d", v))
100 | 		mu.Unlock()
101 | 		return nil
102 | 	})
103 | 
104 | 	p := New[int](1, worker).WithContinueOnError() // don't stop on errors
105 | 	p.Go(context.Background())
106 | 
107 | 	p.Submit(1)
108 | 	p.Submit(0) // this will fail but processing continues
109 | 	p.Submit(2)
110 | 
111 | 	err := p.Close(context.Background())
112 | 	if err != nil {
113 | 		mu.Lock()
114 | 		out = append(out, fmt.Sprintf("finished with error: %v", err))
115 | 		mu.Unlock()
116 | 	}
117 | 
118 | 	// print collected output in sorted order
119 | 	sort.Strings(out)
120 | 	for _, s := range out {
121 | 		fmt.Println(s)
122 | 	}
123 | 
124 | 	// Output:
125 | 	// finished with error: total errors: 1, last error: worker 0 failed: zero value not allowed
126 | 	// processed: 1
127 | 	// processed: 2
128 | }
129 | 
130 | func Example_withContext() {
131 | 	started := make(chan struct{})
132 | 	ctx, cancel := context.WithCancel(context.Background())
133 | 	defer cancel()
134 | 
135 | 	worker := WorkerFunc[int](func(ctx context.Context, v int) error {
136 | 		close(started) // signal that worker started
137 | 		<-ctx.Done()   // wait for cancellation
138 | 		return ctx.Err()
139 | 	})
140 | 
141 | 	p := New[int](1, worker).WithBatchSize(0) // disable batching
142 | 	p.Go(ctx)
143 | 	p.Submit(1)
144 | 
145 | 	<-started // wait for worker to start
146 | 	cancel()  // cancel context
147 | 	err := p.Close(context.Background())
148 | 	fmt.Printf("got error: %v\n", err != nil)
149 | 
150 | 	// Output:
151 | 	// got error: true
152 | }
153 | 
154 | func Example_withCollector() {
155 | 	type Item struct {
156 | 		val   int
157 | 		label string
158 | 	}
159 | 
160 | 	// create collector for results with buffer size 10
161 | 	collector := NewCollector[Item](context.Background(), 10)
162 | 
163 | 	// create worker that processes numbers and sends results to collector
164 | 	worker := WorkerFunc[int](func(_ context.Context, v int) error {
165 | 		result := Item{
166 | 			val:   v * 2,  // double the value
167 | 			label: "proc", // add label
168 | 		}
169 | 		collector.Submit(result)
170 | 		return nil
171 | 	})
172 | 
173 | 	// create and start pool
174 | 	p := New[int](2, worker)
175 | 	p.Go(context.Background())
176 | 
177 | 	// submit items asynchronously
178 | 	go func() {
179 | 		for i := 1; i <= 3; i++ {
180 | 			p.Submit(i)
181 | 		}
182 | 		p.Close(context.Background())
183 | 		collector.Close() // close collector after pool is done
184 | 	}()
185 | 
186 | 	// collect results and sort them for deterministic output
187 | 	results, _ := collector.All()
188 | 	sort.Slice(results, func(i, j int) bool {
189 | 		return results[i].val < results[j].val
190 | 	})
191 | 
192 | 	// print sorted results
193 | 	for _, res := range results {
194 | 		fmt.Printf("got result: %d (%s)\n", res.val, res.label)
195 | 	}
196 | 
197 | 	// Output:
198 | 	// got result: 2 (proc)
199 | 	// got result: 4 (proc)
200 | 	// got result: 6 (proc)
201 | }
202 | 
203 | func Example_withCollectorIterator() {
204 | 	collector := NewCollector[string](context.Background(), 5)
205 | 
206 | 	worker := WorkerFunc[int](func(_ context.Context, v int) error {
207 | 		collector.Submit(fmt.Sprintf("value %d", v))
208 | 		return nil
209 | 	})
210 | 
211 | 	p := New[int](2, worker)
212 | 	p.Go(context.Background())
213 | 
214 | 	// submit items asynchronously
215 | 	go func() {
216 | 		for i := 1; i <= 3; i++ {
217 | 			p.Submit(i)
218 | 		}
219 | 		p.Close(context.Background())
220 | 		collector.Close()
221 | 	}()
222 | 
223 | 	// collect all values first
224 | 	var values []string
225 | 	for val, err := range collector.Iter() {
226 | 		if err != nil {
227 | 			fmt.Printf("error: %v\n", err)
228 | 			continue
229 | 		}
230 | 		values = append(values, val)
231 | 	}
232 | 
233 | 	// sort and print values for deterministic output
234 | 	sort.Strings(values)
235 | 	for _, val := range values {
236 | 		fmt.Printf("processed: %s\n", val)
237 | 	}
238 | 
239 | 	// Output:
240 | 	// processed: value 1
241 | 	// processed: value 2
242 | 	// processed: value 3
243 | }
244 | 
245 | func Example_fibCalculator() {
246 | 	// FibResult type to store both input and calculated Fibonacci number
247 | 	type FibResult struct {
248 | 		n   int
249 | 		fib uint64
250 | 	}
251 | 
252 | 	// create collector for results
253 | 	collector := NewCollector[FibResult](context.Background(), 10)
254 | 
255 | 	// worker calculating fibonacci numbers
256 | 	worker := WorkerFunc[int](func(_ context.Context, n int) error {
257 | 		if n <= 0 {
258 | 			return fmt.Errorf("invalid input: %d", n)
259 | 		}
260 | 
261 | 		// calculate fibonacci number
262 | 		var a, b uint64 = 0, 1
263 | 		for i := 0; i < n; i++ {
264 | 			a, b = b, a+b
265 | 		}
266 | 
267 | 		collector.Submit(FibResult{n: n, fib: a})
268 | 		return nil
269 | 	})
270 | 
271 | 	// create pool with 3 workers
272 | 	p := New[int](3, worker)
273 | 	p.Go(context.Background())
274 | 
275 | 	// submit numbers to calculate asynchronously
276 | 	go func() {
277 | 		numbers := []int{5, 7, 10, 3, 8}
278 | 		for _, n := range numbers {
279 | 			p.Submit(n)
280 | 		}
281 | 		p.Close(context.Background())
282 | 		collector.Close()
283 | 	}()
284 | 
285 | 	// collect results and sort them by input number for consistent output
286 | 	results, _ := collector.All()
287 | 	sort.Slice(results, func(i, j int) bool {
288 | 		return results[i].n < results[j].n
289 | 	})
290 | 
291 | 	// print results
292 | 	for _, res := range results {
293 | 		fmt.Printf("fib(%d) = %d\n", res.n, res.fib)
294 | 	}
295 | 
296 | 	// Output:
297 | 	// fib(3) = 2
298 | 	// fib(5) = 5
299 | 	// fib(7) = 13
300 | 	// fib(8) = 21
301 | 	// fib(10) = 55
302 | }
303 | 
304 | func Example_chainedCalculation() {
305 | 	// stage 1: calculate fibonacci numbers in parallel
306 | 	type FibResult struct {
307 | 		n   int
308 | 		fib uint64
309 | 	}
310 | 	stage1Collector := NewCollector[FibResult](context.Background(), 10)
311 | 
312 | 	fibWorker := WorkerFunc[int](func(_ context.Context, n int) error {
313 | 		var a, b uint64 = 0, 1
314 | 		for i := 0; i < n; i++ {
315 | 			a, b = b, a+b
316 | 		}
317 | 		stage1Collector.Submit(FibResult{n: n, fib: a})
318 | 		return nil
319 | 	})
320 | 
321 | 	// stage 2: calculate factors for each fibonacci number
322 | 	type FactorsResult struct {
323 | 		n       uint64
324 | 		factors []uint64
325 | 	}
326 | 	stage2Collector := NewCollector[FactorsResult](context.Background(), 10)
327 | 
328 | 	factorsWorker := WorkerFunc[FibResult](func(_ context.Context, res FibResult) error {
329 | 		if res.fib <= 1 {
330 | 			stage2Collector.Submit(FactorsResult{n: res.fib, factors: []uint64{res.fib}})
331 | 			return nil
332 | 		}
333 | 
334 | 		var factors []uint64
335 | 		n := res.fib
336 | 		for i := uint64(2); i*i <= n; i++ {
337 | 			for n%i == 0 {
338 | 				factors = append(factors, i)
339 | 				n /= i
340 | 			}
341 | 		}
342 | 		if n > 1 {
343 | 			factors = append(factors, n)
344 | 		}
345 | 
346 | 		stage2Collector.Submit(FactorsResult{n: res.fib, factors: factors})
347 | 		return nil
348 | 	})
349 | 
350 | 	// create and start both pools
351 | 	pool1 := New[int](3, fibWorker)
352 | 	pool1.Go(context.Background())
353 | 
354 | 	pool2 := NewStateful[FibResult](2, func() Worker[FibResult] {
355 | 		return factorsWorker
356 | 	})
357 | 	pool2.Go(context.Background())
358 | 
359 | 	// submit numbers to calculate
360 | 	numbers := []int{5, 7, 10}
361 | 	for _, n := range numbers {
362 | 		pool1.Submit(n)
363 | 	}
364 | 
365 | 	// close pools and collectors in order
366 | 	pool1.Close(context.Background())
367 | 	stage1Collector.Close()
368 | 
369 | 	// process stage 1 results in stage 2
370 | 	for fibRes, err := range stage1Collector.Iter() {
371 | 		if err != nil {
372 | 			fmt.Printf("stage 1 error: %v\n", err)
373 | 			continue
374 | 		}
375 | 		pool2.Submit(fibRes)
376 | 	}
377 | 
378 | 	pool2.Close(context.Background())
379 | 	stage2Collector.Close()
380 | 
381 | 	// collect and sort final results to ensure deterministic output order
382 | 	results, _ := stage2Collector.All()
383 | 	sort.Slice(results, func(i, j int) bool {
384 | 		return results[i].n < results[j].n
385 | 	})
386 | 
387 | 	// print results in sorted order
388 | 	for _, res := range results {
389 | 		fmt.Printf("number %d has factors %v\n", res.n, res.factors)
390 | 	}
391 | 
392 | 	// Output:
393 | 	// number 5 has factors [5]
394 | 	// number 13 has factors [13]
395 | 	// number 55 has factors [5 11]
396 | }
397 | 
398 | // processingWorker implements Worker interface
399 | type processingWorker struct{}
400 | 
401 | func (w *processingWorker) Do(_ context.Context, v string) error {
402 | 	fmt.Printf("processed: %s\n", v)
403 | 	return nil
404 | }
405 | 
406 | func Example_workerTypes() {
407 | 	// these two workers are functionally equivalent:
408 | 	// 1. Implementing Worker interface explicitly
409 | 	// 2. Using WorkerFunc adapter - same thing, just shorter
410 | 	workerFn := WorkerFunc[string](func(_ context.Context, v string) error {
411 | 		fmt.Printf("processed: %s\n", v)
412 | 		return nil
413 | 	})
414 | 
415 | 	// run first pool to completion
416 | 	p1 := New[string](1, &processingWorker{})
417 | 	p1.Go(context.Background())
418 | 	p1.Submit("task1")
419 | 	p1.Close(context.Background())
420 | 
421 | 	// then run second pool
422 | 	p2 := New[string](1, workerFn)
423 | 	p2.Go(context.Background())
424 | 	p2.Submit("task2")
425 | 	p2.Close(context.Background())
426 | 
427 | 	// Output:
428 | 	// processed: task1
429 | 	// processed: task2
430 | }
431 | 
432 | func Example_middleware() {
433 | 	// create a worker that sometimes fails
434 | 	worker := WorkerFunc[string](func(_ context.Context, v string) error {
435 | 		if v == "fail" {
436 | 			return errors.New("simulated failure")
437 | 		}
438 | 		fmt.Printf("processed: %s\n", v)
439 | 		return nil
440 | 	})
441 | 
442 | 	// create logging middleware
443 | 	logging := func(next Worker[string]) Worker[string] {
444 | 		return WorkerFunc[string](func(ctx context.Context, v string) error {
445 | 			fmt.Printf("starting: %s\n", v)
446 | 			err := next.Do(ctx, v)
447 | 			fmt.Printf("completed: %s, err: %v\n", v, err)
448 | 			return err
449 | 		})
450 | 	}
451 | 
452 | 	// create retry middleware
453 | 	retry := func(attempts int) Middleware[string] {
454 | 		return func(next Worker[string]) Worker[string] {
455 | 			return WorkerFunc[string](func(ctx context.Context, v string) error {
456 | 				var lastErr error
457 | 				for i := 0; i < attempts; i++ {
458 | 					var err error
459 | 					if err = next.Do(ctx, v); err == nil {
460 | 						return nil
461 | 					}
462 | 					lastErr = err
463 | 					fmt.Printf("attempt %d failed: %v\n", i+1, err)
464 | 				}
465 | 				return fmt.Errorf("failed after %d attempts: %w", attempts, lastErr)
466 | 			})
467 | 		}
468 | 	}
469 | 
470 | 	// create pool with both middleware - retry first since we want logging to be outermost
471 | 	p := New[string](1, worker).Use(retry(2), logging)
472 | 	p.Go(context.Background())
473 | 
474 | 	// process items
475 | 	p.Submit("ok")   // should succeed first time
476 | 	p.Submit("fail") // should fail after retries
477 | 	p.Close(context.Background())
478 | 
479 | 	// Output:
480 | 	// starting: ok
481 | 	// processed: ok
482 | 	// completed: ok, err: <nil>
483 | 	// starting: fail
484 | 	// completed: fail, err: simulated failure
485 | 	// attempt 1 failed: simulated failure
486 | 	// starting: fail
487 | 	// completed: fail, err: simulated failure
488 | 	// attempt 2 failed: simulated failure
489 | }
490 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/go-pkgz/pool
 2 | 
 3 | go 1.24
 4 | 
 5 | require (
 6 | 	github.com/stretchr/testify v1.10.0
 7 | 	golang.org/x/sync v0.14.0
 8 | 	golang.org/x/time v0.11.0
 9 | )
10 | 
11 | require (
12 | 	github.com/davecgh/go-spew v1.1.1 // indirect
13 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
14 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
15 | )
16 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 5 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 6 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 7 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
 8 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 9 | golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
10 | golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
11 | golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
12 | golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
13 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
14 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
15 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
16 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
17 | 


--------------------------------------------------------------------------------
/metrics/metrics.go:
--------------------------------------------------------------------------------
  1 | // Package metrics provides a way to collect metrics in a thread-safe way
  2 | package metrics
  3 | 
  4 | import (
  5 | 	"context"
  6 | 	"fmt"
  7 | 	"sort"
  8 | 	"strings"
  9 | 	"sync"
 10 | 	"time"
 11 | )
 12 | 
 13 | type contextKey string
 14 | 
 15 | // TimerType is a type of timer to measure
 16 | type TimerType int
 17 | 
 18 | const (
 19 | 	metricsContextKey contextKey = "metrics"
 20 | 	widContextKey     contextKey = "worker-id"
 21 | )
 22 | 
 23 | // Timer types
 24 | const (
 25 | 	TimerProc TimerType = iota // processing time
 26 | 	TimerWait                  // wait time
 27 | 	TimerInit                  // initialization time
 28 | 	TimerWrap                  // wrap-up time
 29 | )
 30 | 
 31 | // Value holds both per-worker stats and shared user stats
 32 | type Value struct {
 33 | 	startTime time.Time
 34 | 
 35 | 	// per worker stats, no lock needed as each worker uses its own stats
 36 | 	workerStats []Stats
 37 | 
 38 | 	// shared user stats protected by mutex
 39 | 	mu       sync.RWMutex
 40 | 	userData map[string]int
 41 | }
 42 | 
 43 | // Stats represents worker-specific metrics with derived values
 44 | type Stats struct {
 45 | 	// raw counters
 46 | 	Processed int
 47 | 	Errors    int
 48 | 	Dropped   int
 49 | 
 50 | 	// timing
 51 | 	ProcessingTime time.Duration
 52 | 	WaitTime       time.Duration
 53 | 	InitTime       time.Duration
 54 | 	WrapTime       time.Duration
 55 | 	TotalTime      time.Duration
 56 | 
 57 | 	// derived stats, calculated on GetStats
 58 | 	RatePerSec  float64       // items processed per second
 59 | 	AvgLatency  time.Duration // average processing time per item
 60 | 	ErrorRate   float64       // portion of errors
 61 | 	DroppedRate float64       // portion of dropped items
 62 | 	Utilization float64       // portion of time spent processing vs waiting
 63 | }
 64 | 
 65 | // String returns stats info formatted as string
 66 | // String returns stats info formatted as string
 67 | func (s Stats) String() string {
 68 | 	var metrics []string
 69 | 
 70 | 	if s.Processed > 0 {
 71 | 		metrics = append(metrics, fmt.Sprintf("processed:%d", s.Processed))
 72 | 		// only add rate and latency if they are non-zero
 73 | 		if s.RatePerSec > 0 {
 74 | 			metrics = append(metrics, fmt.Sprintf("rate:%.1f/s", s.RatePerSec))
 75 | 		}
 76 | 		if s.AvgLatency > 0 {
 77 | 			metrics = append(metrics, fmt.Sprintf("avg_latency:%v", s.AvgLatency.Round(time.Millisecond)))
 78 | 		}
 79 | 	}
 80 | 	if s.Errors > 0 {
 81 | 		if s.ErrorRate > 0 {
 82 | 			metrics = append(metrics, fmt.Sprintf("errors:%d (%.1f%%)", s.Errors, s.ErrorRate*100)) //nolint:mnd // 100 is not magic number
 83 | 		} else {
 84 | 			metrics = append(metrics, fmt.Sprintf("errors:%d", s.Errors))
 85 | 		}
 86 | 	}
 87 | 	if s.Dropped > 0 {
 88 | 		if s.DroppedRate > 0 {
 89 | 			metrics = append(metrics, fmt.Sprintf("dropped:%d (%.1f%%)", s.Dropped, s.DroppedRate*100)) //nolint:mnd // 100 is not magic
 90 | 		} else {
 91 | 			metrics = append(metrics, fmt.Sprintf("dropped:%d", s.Dropped))
 92 | 		}
 93 | 	}
 94 | 	if s.ProcessingTime > 0 {
 95 | 		metrics = append(metrics, fmt.Sprintf("proc:%v", s.ProcessingTime.Round(time.Millisecond)))
 96 | 	}
 97 | 	if s.WaitTime > 0 {
 98 | 		metrics = append(metrics, fmt.Sprintf("wait:%v", s.WaitTime.Round(time.Millisecond)))
 99 | 	}
100 | 	if s.InitTime > 0 {
101 | 		metrics = append(metrics, fmt.Sprintf("init:%v", s.InitTime.Round(time.Millisecond)))
102 | 	}
103 | 	if s.WrapTime > 0 {
104 | 		metrics = append(metrics, fmt.Sprintf("wrap:%v", s.WrapTime.Round(time.Millisecond)))
105 | 	}
106 | 	if s.TotalTime > 0 {
107 | 		metrics = append(metrics, fmt.Sprintf("total:%v", s.TotalTime.Round(time.Millisecond)))
108 | 		if s.Utilization > 0 {
109 | 			metrics = append(metrics, fmt.Sprintf("utilization:%.1f%%", s.Utilization*100)) //nolint:mnd // 100 is not magic number
110 | 		}
111 | 	}
112 | 
113 | 	if len(metrics) > 0 {
114 | 		return fmt.Sprintf("[%s]", strings.Join(metrics, ", "))
115 | 	}
116 | 	return ""
117 | }
118 | 
119 | // New makes thread-safe metrics collector with specified number of workers
120 | func New(workers int) *Value {
121 | 	return &Value{
122 | 		startTime:   time.Now(),
123 | 		workerStats: make([]Stats, workers),
124 | 		userData:    make(map[string]int),
125 | 	}
126 | }
127 | 
128 | // Add increments value for a given key and returns new value
129 | func (m *Value) Add(key string, delta int) int {
130 | 	m.mu.Lock()
131 | 	defer m.mu.Unlock()
132 | 	m.userData[key] += delta
133 | 	return m.userData[key]
134 | }
135 | 
136 | // Inc increments value for given key by one
137 | func (m *Value) Inc(key string) int {
138 | 	return m.Add(key, 1)
139 | }
140 | 
141 | // Get returns value for given key from shared stats
142 | func (m *Value) Get(key string) int {
143 | 	m.mu.RLock()
144 | 	defer m.mu.RUnlock()
145 | 	return m.userData[key]
146 | }
147 | 
148 | // StartTimer returns a function that when called will record the duration in worker stats
149 | func (m *Value) StartTimer(wid int, t TimerType) func() {
150 | 	start := time.Now()
151 | 	stats := &m.workerStats[wid]
152 | 
153 | 	return func() {
154 | 		duration := time.Since(start)
155 | 		switch t {
156 | 		case TimerProc:
157 | 			stats.ProcessingTime += duration
158 | 		case TimerWait:
159 | 			stats.WaitTime += duration
160 | 		case TimerInit:
161 | 			stats.InitTime += duration
162 | 		case TimerWrap:
163 | 			stats.WrapTime += duration
164 | 		}
165 | 	}
166 | }
167 | 
168 | // AddWaitTime adds wait time directly to worker stats
169 | func (m *Value) AddWaitTime(wid int, d time.Duration) {
170 | 	m.workerStats[wid].WaitTime += d
171 | }
172 | 
173 | // IncProcessed increments processed count for worker
174 | func (m *Value) IncProcessed(wid int) {
175 | 	m.workerStats[wid].Processed++
176 | }
177 | 
178 | // IncErrors increments errors count for worker
179 | func (m *Value) IncErrors(wid int) {
180 | 	m.workerStats[wid].Errors++
181 | }
182 | 
183 | // IncDropped increments dropped count for worker
184 | func (m *Value) IncDropped(wid int) {
185 | 	m.workerStats[wid].Dropped++
186 | }
187 | 
188 | // GetStats returns combined stats from all workers
189 | func (m *Value) GetStats() Stats {
190 | 	var result Stats
191 | 
192 | 	// sum up stats from all workers
193 | 	for i := range m.workerStats {
194 | 		result.Processed += m.workerStats[i].Processed
195 | 		result.Errors += m.workerStats[i].Errors
196 | 		result.Dropped += m.workerStats[i].Dropped
197 | 
198 | 		// sum wait time - represents total idle time across all workers
199 | 		result.WaitTime += m.workerStats[i].WaitTime
200 | 
201 | 		// for processing time we take max since workers run in parallel
202 | 		result.ProcessingTime = max(result.ProcessingTime, m.workerStats[i].ProcessingTime)
203 | 
204 | 		// sum initialization and wrap times as they are sequential
205 | 		result.InitTime += m.workerStats[i].InitTime
206 | 		result.WrapTime += m.workerStats[i].WrapTime
207 | 	}
208 | 
209 | 	result.TotalTime = time.Since(m.startTime)
210 | 
211 | 	// calculate derived stats
212 | 	if result.TotalTime > 0 {
213 | 		result.RatePerSec = float64(result.Processed) / result.TotalTime.Seconds()
214 | 	}
215 | 	if result.Processed > 0 {
216 | 		// for average latency we use max processing time divided by total processed
217 | 		result.AvgLatency = result.ProcessingTime / time.Duration(result.Processed)
218 | 	}
219 | 	totalAttempted := result.Processed + result.Errors + result.Dropped
220 | 	if totalAttempted > 0 {
221 | 		result.ErrorRate = float64(result.Errors) / float64(totalAttempted)
222 | 		result.DroppedRate = float64(result.Dropped) / float64(totalAttempted)
223 | 	}
224 | 	totalWorkTime := result.ProcessingTime + result.WaitTime
225 | 	if totalWorkTime > 0 {
226 | 		result.Utilization = float64(result.ProcessingTime) / float64(totalWorkTime)
227 | 	}
228 | 
229 | 	return result
230 | }
231 | 
232 | // String returns sorted key:vals string representation of user-defined metrics
233 | func (m *Value) String() string {
234 | 	m.mu.RLock()
235 | 	defer m.mu.RUnlock()
236 | 
237 | 	keys := make([]string, 0, len(m.userData))
238 | 	for k := range m.userData {
239 | 		keys = append(keys, k)
240 | 	}
241 | 	sort.Strings(keys)
242 | 
243 | 	metrics := make([]string, 0, len(keys))
244 | 	for _, k := range keys {
245 | 		metrics = append(metrics, fmt.Sprintf("%s:%d", k, m.userData[k]))
246 | 	}
247 | 
248 | 	if len(metrics) > 0 {
249 | 		return fmt.Sprintf("[%s]", strings.Join(metrics, ", "))
250 | 	}
251 | 	return ""
252 | }
253 | 
254 | // WorkerID returns worker ID from the context
255 | func WorkerID(ctx context.Context) int {
256 | 	cid, ok := ctx.Value(widContextKey).(int)
257 | 	if !ok {
258 | 		return 0
259 | 	}
260 | 	return cid
261 | }
262 | 
263 | // WithWorkerID sets worker ID in the context
264 | func WithWorkerID(ctx context.Context, id int) context.Context {
265 | 	return context.WithValue(ctx, widContextKey, id)
266 | }
267 | 
268 | // Get metrics from context. If not found, creates new instance with same worker count as stored in context.
269 | func Get(ctx context.Context) *Value {
270 | 	if v, ok := ctx.Value(metricsContextKey).(*Value); ok {
271 | 		return v
272 | 	}
273 | 	if n, ok := ctx.Value(widContextKey).(int); ok {
274 | 		return New(n + 1) // n is max worker id, need size = n+1
275 | 	}
276 | 	return New(1) // fallback to single worker
277 | }
278 | 
279 | // Make context with metrics
280 | func Make(ctx context.Context, workers int) context.Context {
281 | 	return context.WithValue(ctx, metricsContextKey, New(workers))
282 | }
283 | 


--------------------------------------------------------------------------------
/metrics/metrics_test.go:
--------------------------------------------------------------------------------
  1 | package metrics
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"sync"
  6 | 	"testing"
  7 | 	"time"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | 	"github.com/stretchr/testify/require"
 11 | )
 12 | 
 13 | func TestMetrics_UserDefined(t *testing.T) {
 14 | 	m := New(1) // single worker is enough for user stats testing
 15 | 
 16 | 	t.Run("basic operations", func(t *testing.T) {
 17 | 		m.Add("k1", 100)
 18 | 		m.Inc("k1")
 19 | 		m.Inc("k2")
 20 | 
 21 | 		assert.Equal(t, 101, m.Get("k1"))
 22 | 		assert.Equal(t, 1, m.Get("k2"))
 23 | 		assert.Equal(t, 0, m.Get("k3"))
 24 | 
 25 | 		str := m.String()
 26 | 		assert.Contains(t, str, "k1:101")
 27 | 		assert.Contains(t, str, "k2:1")
 28 | 	})
 29 | 
 30 | 	t.Run("string formatting", func(t *testing.T) {
 31 | 		m := New(10)
 32 | 		assert.Empty(t, m.String(), "empty metrics should return empty string")
 33 | 
 34 | 		m.Inc("test")
 35 | 		assert.Equal(t, "[test:1]", m.String())
 36 | 
 37 | 		m.Add("another", 5)
 38 | 		str := m.String()
 39 | 		assert.Contains(t, str, "test:1")
 40 | 		assert.Contains(t, str, "another:5")
 41 | 	})
 42 | }
 43 | 
 44 | func TestMetrics_WorkerStats(t *testing.T) {
 45 | 	t.Run("worker timers", func(t *testing.T) {
 46 | 		m := New(2) // create metrics for 2 workers
 47 | 
 48 | 		// worker 1 operations
 49 | 		end := m.StartTimer(0, TimerProc)
 50 | 		time.Sleep(20 * time.Millisecond)
 51 | 		end()
 52 | 
 53 | 		end = m.StartTimer(0, TimerWait)
 54 | 		time.Sleep(20 * time.Millisecond)
 55 | 		end()
 56 | 
 57 | 		// worker 2 operations
 58 | 		end = m.StartTimer(1, TimerProc)
 59 | 		time.Sleep(20 * time.Millisecond)
 60 | 		end()
 61 | 
 62 | 		stats := m.GetStats()
 63 | 		assert.GreaterOrEqual(t, stats.ProcessingTime.Milliseconds(), int64(20),
 64 | 			"processing time should be at least 20ms")
 65 | 		assert.GreaterOrEqual(t, stats.WaitTime.Milliseconds(), int64(20),
 66 | 			"wait time should be at least 20ms")
 67 | 		assert.Greater(t, stats.TotalTime, stats.WaitTime,
 68 | 			"total time should be greater than wait time")
 69 | 		assert.Greater(t, stats.TotalTime, stats.ProcessingTime,
 70 | 			"total time should be greater than processing time")
 71 | 	})
 72 | 
 73 | 	t.Run("worker counters", func(t *testing.T) {
 74 | 		// worker 1 increments
 75 | 		m := New(2)
 76 | 		m.IncProcessed(0)
 77 | 		m.IncProcessed(0)
 78 | 		m.IncErrors(0)
 79 | 
 80 | 		// worker 2 increments
 81 | 		m.IncProcessed(1)
 82 | 		m.IncDropped(1)
 83 | 
 84 | 		stats := m.GetStats()
 85 | 		assert.Equal(t, 3, stats.Processed)
 86 | 		assert.Equal(t, 1, stats.Errors)
 87 | 		assert.Equal(t, 1, stats.Dropped)
 88 | 	})
 89 | 
 90 | 	t.Run("stats string", func(t *testing.T) {
 91 | 		m := New(1)
 92 | 		m.IncProcessed(0)
 93 | 		m.IncErrors(0)
 94 | 		end := m.StartTimer(0, TimerProc)
 95 | 		time.Sleep(10 * time.Millisecond)
 96 | 		end()
 97 | 
 98 | 		stats := m.GetStats()
 99 | 		str := stats.String()
100 | 		assert.Contains(t, str, "processed:1")
101 | 		assert.Contains(t, str, "errors:1")
102 | 		assert.Contains(t, str, "proc:")
103 | 		assert.Contains(t, str, "total:")
104 | 	})
105 | }
106 | 
107 | func TestMetrics_Context(t *testing.T) {
108 | 	t.Run("worker id", func(t *testing.T) {
109 | 		ctx := WithWorkerID(context.Background(), 123)
110 | 		assert.Equal(t, 123, WorkerID(ctx))
111 | 
112 | 		ctx = context.Background()
113 | 		assert.Equal(t, 0, WorkerID(ctx))
114 | 
115 | 		ctx = context.WithValue(context.Background(), widContextKey, "not an int")
116 | 		assert.Equal(t, 0, WorkerID(ctx))
117 | 	})
118 | 
119 | 	t.Run("metrics from context", func(t *testing.T) {
120 | 		ctx := Make(context.Background(), 2)
121 | 		m := Get(ctx)
122 | 		require.NotNil(t, m)
123 | 
124 | 		// verify metrics working
125 | 		m.Inc("test")
126 | 		assert.Equal(t, 1, m.Get("test"))
127 | 
128 | 		// verify worker stats
129 | 		m.IncProcessed(0)
130 | 		stats := m.GetStats()
131 | 		assert.Equal(t, 1, stats.Processed)
132 | 	})
133 | 
134 | 	t.Run("metrics isolation", func(t *testing.T) {
135 | 		ctx1 := Make(context.Background(), 1)
136 | 		ctx2 := Make(context.Background(), 1)
137 | 
138 | 		m1 := Get(ctx1)
139 | 		m2 := Get(ctx2)
140 | 
141 | 		m1.Inc("test")
142 | 		assert.Equal(t, 1, m1.Get("test"))
143 | 		assert.Equal(t, 0, m2.Get("test"))
144 | 	})
145 | 
146 | 	t.Run("metrics creation from worker id", func(t *testing.T) {
147 | 		ctx := WithWorkerID(context.Background(), 5)
148 | 		m := Get(ctx)
149 | 		require.NotNil(t, m)
150 | 
151 | 		// should be able to use worker id 5
152 | 		m.IncProcessed(5)
153 | 		stats := m.GetStats()
154 | 		assert.Equal(t, 1, stats.Processed)
155 | 	})
156 | }
157 | 
158 | func TestMetrics_Concurrent(t *testing.T) {
159 | 	t.Run("concurrent user stats access", func(t *testing.T) {
160 | 		m := New(1)
161 | 		const goroutines = 10
162 | 		const iterations = 1000
163 | 
164 | 		var wg sync.WaitGroup
165 | 		wg.Add(goroutines)
166 | 
167 | 		for i := 0; i < goroutines; i++ {
168 | 			go func() {
169 | 				defer wg.Done()
170 | 				for j := 0; j < iterations; j++ {
171 | 					m.Inc("counter")
172 | 					val := m.Get("counter")
173 | 					assert.Positive(t, val)
174 | 					m.Add("sum", 2)
175 | 				}
176 | 			}()
177 | 		}
178 | 		wg.Wait()
179 | 
180 | 		assert.Equal(t, goroutines*iterations, m.Get("counter"))
181 | 		assert.Equal(t, goroutines*iterations*2, m.Get("sum"))
182 | 	})
183 | 
184 | 	t.Run("per worker stats", func(t *testing.T) {
185 | 		const workers = 4
186 | 		m := New(workers)
187 | 		var wg sync.WaitGroup
188 | 		wg.Add(workers)
189 | 
190 | 		// each worker operates on its own stats
191 | 		for wid := 0; wid < workers; wid++ {
192 | 			go func(id int) {
193 | 				defer wg.Done()
194 | 				const iterations = 1000
195 | 
196 | 				for j := 0; j < iterations; j++ {
197 | 					m.IncProcessed(id)
198 | 					end := m.StartTimer(id, TimerProc)
199 | 					time.Sleep(time.Microsecond)
200 | 					end()
201 | 				}
202 | 			}(wid)
203 | 		}
204 | 		wg.Wait()
205 | 
206 | 		stats := m.GetStats()
207 | 		assert.Equal(t, workers*1000, stats.Processed)
208 | 		assert.Greater(t, stats.ProcessingTime, time.Duration(0))
209 | 
210 | 		// verify each worker's stats are accurate
211 | 		for wid := 0; wid < workers; wid++ {
212 | 			assert.Equal(t, 1000, m.workerStats[wid].Processed)
213 | 			assert.Greater(t, m.workerStats[wid].ProcessingTime, time.Duration(0))
214 | 		}
215 | 	})
216 | }
217 | 
218 | func TestMetrics_AllTimerTypes(t *testing.T) {
219 | 	m := New(1)
220 | 
221 | 	// record each timer type
222 | 	end := m.StartTimer(0, TimerProc)
223 | 	time.Sleep(time.Millisecond)
224 | 	end()
225 | 
226 | 	end = m.StartTimer(0, TimerWait)
227 | 	time.Sleep(time.Millisecond)
228 | 	end()
229 | 
230 | 	end = m.StartTimer(0, TimerInit)
231 | 	time.Sleep(time.Millisecond)
232 | 	end()
233 | 
234 | 	end = m.StartTimer(0, TimerWrap)
235 | 	time.Sleep(time.Millisecond)
236 | 	end()
237 | 
238 | 	// verify each timer recorded something
239 | 	stats := m.workerStats[0]
240 | 	assert.Greater(t, stats.ProcessingTime, time.Duration(0), "ProcessingTime should be recorded")
241 | 	assert.Greater(t, stats.WaitTime, time.Duration(0), "WaitTime should be recorded")
242 | 	assert.Greater(t, stats.InitTime, time.Duration(0), "InitTime should be recorded")
243 | 	assert.Greater(t, stats.WrapTime, time.Duration(0), "WrapTime should be recorded")
244 | 
245 | 	// test unknown timer type
246 | 	end = m.StartTimer(0, TimerType(99))
247 | 	time.Sleep(time.Millisecond)
248 | 	end()
249 | 	// stats should remain unchanged
250 | 	newStats := m.workerStats[0]
251 | 	assert.Equal(t, stats, newStats, "unknown timer type should not affect stats")
252 | }
253 | 
254 | func TestStats_String(t *testing.T) {
255 | 	tests := []struct {
256 | 		name     string
257 | 		stats    Stats
258 | 		expected string
259 | 	}{
260 | 		{
261 | 			name:     "empty stats",
262 | 			stats:    Stats{},
263 | 			expected: "",
264 | 		},
265 | 		{
266 | 			name: "only counters",
267 | 			stats: Stats{
268 | 				Processed: 10,
269 | 				Errors:    2,
270 | 				Dropped:   3,
271 | 			},
272 | 			expected: "[processed:10, errors:2, dropped:3]",
273 | 		},
274 | 		{
275 | 			name: "only timers",
276 | 			stats: Stats{
277 | 				ProcessingTime: time.Second,
278 | 				WaitTime:       2 * time.Second,
279 | 				InitTime:       3 * time.Second,
280 | 				WrapTime:       4 * time.Second,
281 | 				TotalTime:      10 * time.Second,
282 | 			},
283 | 			expected: "[proc:1s, wait:2s, init:3s, wrap:4s, total:10s]",
284 | 		},
285 | 		{
286 | 			name: "all fields",
287 | 			stats: Stats{
288 | 				Processed:      10,
289 | 				Errors:         2,
290 | 				Dropped:        3,
291 | 				ProcessingTime: time.Second,
292 | 				WaitTime:       2 * time.Second,
293 | 				InitTime:       3 * time.Second,
294 | 				WrapTime:       4 * time.Second,
295 | 				TotalTime:      10 * time.Second,
296 | 			},
297 | 			expected: "[processed:10, errors:2, dropped:3, proc:1s, wait:2s, init:3s, wrap:4s, total:10s]",
298 | 		},
299 | 		{
300 | 			name: "some fields zero",
301 | 			stats: Stats{
302 | 				Processed:      10,
303 | 				ProcessingTime: time.Second,
304 | 				TotalTime:      10 * time.Second,
305 | 			},
306 | 			expected: "[processed:10, proc:1s, total:10s]",
307 | 		},
308 | 		{
309 | 			name: "with derived stats",
310 | 			stats: Stats{
311 | 				Processed:      100,
312 | 				Errors:         10,
313 | 				ProcessingTime: time.Second,
314 | 				WaitTime:       time.Second,
315 | 				TotalTime:      2 * time.Second,
316 | 				RatePerSec:     50.0,
317 | 				AvgLatency:     10 * time.Millisecond,
318 | 				ErrorRate:      0.1,
319 | 				Utilization:    0.5,
320 | 			},
321 | 			expected: "[processed:100, rate:50.0/s, avg_latency:10ms, errors:10 (10.0%), proc:1s, wait:1s, total:2s, utilization:50.0%]",
322 | 		},
323 | 	}
324 | 
325 | 	for _, tt := range tests {
326 | 		t.Run(tt.name, func(t *testing.T) {
327 | 			assert.Equal(t, tt.expected, tt.stats.String())
328 | 		})
329 | 	}
330 | }
331 | 
332 | func TestMetrics_AddWaitTime(t *testing.T) {
333 | 	t.Run("basic wait time tracking", func(t *testing.T) {
334 | 		m := New(2) // two workers
335 | 
336 | 		// add some wait time to worker 0
337 | 		m.AddWaitTime(0, 100*time.Millisecond)
338 | 		m.AddWaitTime(0, 50*time.Millisecond)
339 | 
340 | 		// add different wait time to worker 1
341 | 		m.AddWaitTime(1, 75*time.Millisecond)
342 | 
343 | 		stats := m.GetStats()
344 | 		assert.Equal(t, 225*time.Millisecond, stats.WaitTime,
345 | 			"total wait time should be sum of all workers' wait times")
346 | 	})
347 | 
348 | 	t.Run("accumulation with existing timers", func(t *testing.T) {
349 | 		m := New(1)
350 | 
351 | 		// start a regular wait timer
352 | 		end := m.StartTimer(0, TimerWait)
353 | 		time.Sleep(10 * time.Millisecond)
354 | 		end()
355 | 
356 | 		// add explicit wait time
357 | 		m.AddWaitTime(0, 20*time.Millisecond)
358 | 
359 | 		stats := m.GetStats()
360 | 		assert.Greater(t, stats.WaitTime, 30*time.Millisecond,
361 | 			"wait time should include both timer and added wait time")
362 | 	})
363 | 
364 | 	t.Run("multiple workers tracking", func(t *testing.T) {
365 | 		m := New(3)
366 | 
367 | 		// simulate different wait patterns for each worker
368 | 		m.AddWaitTime(0, 10*time.Millisecond)
369 | 		m.AddWaitTime(1, 20*time.Millisecond)
370 | 		m.AddWaitTime(2, 30*time.Millisecond)
371 | 
372 | 		// add more wait time to first worker
373 | 		m.AddWaitTime(0, 15*time.Millisecond)
374 | 
375 | 		stats := m.GetStats()
376 | 		assert.Equal(t, 75*time.Millisecond, stats.WaitTime,
377 | 			"total wait time should be sum across all workers")
378 | 		assert.Equal(t, 25*time.Millisecond, m.workerStats[0].WaitTime,
379 | 			"individual worker should track its own wait time")
380 | 	})
381 | }
382 | 
383 | func TestStats_DerivedValues(t *testing.T) {
384 | 	t.Run("derived stats calculation", func(t *testing.T) {
385 | 		m := New(1)
386 | 		w := m.workerStats[0]
387 | 		w.Processed = 100
388 | 		w.Errors = 10
389 | 		w.Dropped = 5
390 | 		w.ProcessingTime = 2 * time.Second
391 | 		w.WaitTime = 1 * time.Second
392 | 		m.workerStats[0] = w
393 | 		m.startTime = time.Now().Add(-4 * time.Second) // simulate 4 seconds total time
394 | 
395 | 		stats := m.GetStats()
396 | 
397 | 		assert.InDelta(t, 25.0, stats.RatePerSec, 0.1, "should calculate rate per second")
398 | 		assert.Equal(t, 20*time.Millisecond, stats.AvgLatency, "should calculate average latency")
399 | 		assert.InDelta(t, 0.087, stats.ErrorRate, 0.01, "should calculate error rate")
400 | 		assert.InDelta(t, 0.043, stats.DroppedRate, 0.01, "should calculate dropped rate")
401 | 		assert.InDelta(t, 0.67, stats.Utilization, 0.01, "should calculate utilization")
402 | 	})
403 | 
404 | 	t.Run("string format with derived stats", func(t *testing.T) {
405 | 		stats := Stats{
406 | 			Processed:      100,
407 | 			Errors:         10,
408 | 			Dropped:        5,
409 | 			ProcessingTime: 2 * time.Second,
410 | 			WaitTime:       1 * time.Second,
411 | 			TotalTime:      4 * time.Second,
412 | 			RatePerSec:     25.0,
413 | 			AvgLatency:     20 * time.Millisecond,
414 | 			ErrorRate:      0.087,
415 | 			DroppedRate:    0.043,
416 | 			Utilization:    0.67,
417 | 		}
418 | 
419 | 		str := stats.String()
420 | 		t.Log("Stats string:", str)
421 | 		assert.Contains(t, str, "rate:25.0/s")
422 | 		assert.Contains(t, str, "avg_latency:20ms")
423 | 		assert.Contains(t, str, "errors:10 (8.7%)")
424 | 		assert.Contains(t, str, "dropped:5 (4.3%)")
425 | 		assert.Contains(t, str, "utilization:67.0%")
426 | 	})
427 | 
428 | 	t.Run("handles zero values", func(t *testing.T) {
429 | 		m := New(1)
430 | 		stats := m.GetStats()
431 | 
432 | 		assert.Zero(t, stats.RatePerSec)
433 | 		assert.Zero(t, stats.AvgLatency)
434 | 		assert.Zero(t, stats.ErrorRate)
435 | 		assert.Zero(t, stats.DroppedRate)
436 | 		assert.Zero(t, stats.Utilization)
437 | 	})
438 | }
439 | 
440 | func TestMetrics_ParallelProcessing(t *testing.T) {
441 | 	m := New(2) // two workers
442 | 
443 | 	// simulate two workers processing in parallel
444 | 	// worker 1: processes for 100ms
445 | 	m.workerStats[0].ProcessingTime = 100 * time.Millisecond
446 | 	m.workerStats[0].Processed = 50
447 | 
448 | 	// worker 2: processes for 150ms
449 | 	m.workerStats[1].ProcessingTime = 150 * time.Millisecond
450 | 	m.workerStats[1].Processed = 75
451 | 
452 | 	// set start time to simulate 200ms total elapsed time
453 | 	m.startTime = time.Now().Add(-200 * time.Millisecond)
454 | 
455 | 	stats := m.GetStats()
456 | 
457 | 	// processing time should be max of workers, not sum
458 | 	assert.Equal(t, 150*time.Millisecond, stats.ProcessingTime,
459 | 		"processing time should be max across workers")
460 | 
461 | 	// total time should be elapsed wall time
462 | 	assert.InDelta(t, 200, stats.TotalTime.Milliseconds(), 50,
463 | 		"total time should be actual elapsed time")
464 | 
465 | 	// rate should be total processed divided by total time
466 | 	expectedRate := float64(stats.Processed) / stats.TotalTime.Seconds()
467 | 	assert.InDelta(t, expectedRate, stats.RatePerSec, 1,
468 | 		"rate should be based on total processed items and elapsed time")
469 | 
470 | 	// average latency should use max processing time
471 | 	expectedLatency := stats.ProcessingTime / time.Duration(stats.Processed)
472 | 	assert.Equal(t, expectedLatency, stats.AvgLatency,
473 | 		"average latency should be based on max processing time")
474 | }
475 | 


--------------------------------------------------------------------------------
/middleware/middleware.go:
--------------------------------------------------------------------------------
  1 | // Package middleware provides common middleware implementations for the pool package.
  2 | package middleware
  3 | 
  4 | import (
  5 | 	"context"
  6 | 	"fmt"
  7 | 	"math/rand"
  8 | 	"time"
  9 | 
 10 | 	"golang.org/x/time/rate"
 11 | 
 12 | 	"github.com/go-pkgz/pool"
 13 | )
 14 | 
 15 | // Retry returns a middleware that retries failed operations up to maxAttempts times
 16 | // with exponential backoff between retries.
 17 | // baseDelay is used as the initial delay between retries, and each subsequent retry
 18 | // increases the delay exponentially (baseDelay * 2^attempt) with some random jitter.
 19 | func Retry[T any](maxAttempts int, baseDelay time.Duration) pool.Middleware[T] {
 20 | 	if maxAttempts <= 0 {
 21 | 		maxAttempts = 3 // default to 3 attempts
 22 | 	}
 23 | 	if baseDelay <= 0 {
 24 | 		baseDelay = time.Second // default to 1 second
 25 | 	}
 26 | 
 27 | 	return func(next pool.Worker[T]) pool.Worker[T] {
 28 | 		return pool.WorkerFunc[T](func(ctx context.Context, v T) error {
 29 | 			var lastErr error
 30 | 			for attempt := range maxAttempts {
 31 | 				var err error
 32 | 				if err = next.Do(ctx, v); err == nil {
 33 | 					return nil
 34 | 				}
 35 | 				lastErr = err
 36 | 
 37 | 				// don't sleep after last attempt
 38 | 				if attempt < maxAttempts-1 {
 39 | 					// exponential backoff with jitter
 40 | 					delay := baseDelay * time.Duration(1<<uint(attempt)) //nolint:gosec // won't overflow, not that many attempts
 41 | 					// add up to 20% jitter
 42 | 					jitter := time.Duration(float64(delay) * 0.2 * rand.Float64()) //nolint:gosec // not for security
 43 | 					delay += jitter
 44 | 
 45 | 					select {
 46 | 					case <-ctx.Done():
 47 | 						return ctx.Err()
 48 | 					case <-time.After(delay):
 49 | 					}
 50 | 				}
 51 | 			}
 52 | 			return fmt.Errorf("failed after %d attempts: %w", maxAttempts, lastErr)
 53 | 		})
 54 | 	}
 55 | }
 56 | 
 57 | // Timeout returns a middleware that adds a timeout to each operation.
 58 | // If the operation takes longer than the specified timeout, it will be cancelled
 59 | // and return context.DeadlineExceeded error.
 60 | func Timeout[T any](timeout time.Duration) pool.Middleware[T] {
 61 | 	if timeout <= 0 {
 62 | 		timeout = time.Minute // default to 1 minute
 63 | 	}
 64 | 
 65 | 	return func(next pool.Worker[T]) pool.Worker[T] {
 66 | 		return pool.WorkerFunc[T](func(ctx context.Context, v T) error {
 67 | 			ctx, cancel := context.WithTimeout(ctx, timeout)
 68 | 			defer cancel()
 69 | 			return next.Do(ctx, v)
 70 | 		})
 71 | 	}
 72 | }
 73 | 
 74 | // Recovery returns a middleware that recovers from panics and converts them to errors.
 75 | // If handler is provided, it will be called with the panic value before the error is returned.
 76 | func Recovery[T any](handler func(interface{})) pool.Middleware[T] {
 77 | 	return func(next pool.Worker[T]) pool.Worker[T] {
 78 | 		return pool.WorkerFunc[T](func(ctx context.Context, v T) (err error) {
 79 | 			defer func() {
 80 | 				if r := recover(); r != nil {
 81 | 					if handler != nil {
 82 | 						handler(r)
 83 | 					}
 84 | 
 85 | 					// convert panic to error
 86 | 					switch rt := r.(type) {
 87 | 					case error:
 88 | 						err = fmt.Errorf("panic recovered: %w", rt)
 89 | 					default:
 90 | 						err = fmt.Errorf("panic recovered: %v", rt)
 91 | 					}
 92 | 				}
 93 | 			}()
 94 | 			return next.Do(ctx, v)
 95 | 		})
 96 | 	}
 97 | }
 98 | 
 99 | // Validator returns a middleware that validates input values before processing.
100 | // The validator function should return an error if the input is invalid.
101 | func Validator[T any](validator func(T) error) pool.Middleware[T] {
102 | 	return func(next pool.Worker[T]) pool.Worker[T] {
103 | 		return pool.WorkerFunc[T](func(ctx context.Context, v T) error {
104 | 			if err := validator(v); err != nil {
105 | 				return fmt.Errorf("validation failed: %w", err)
106 | 			}
107 | 			return next.Do(ctx, v)
108 | 		})
109 | 	}
110 | }
111 | 
112 | // RateLimiter returns a middleware that limits the rate of task processing.
113 | // It uses a token bucket algorithm with the specified rate (tasks/second) and burst size.
114 | // When the rate limit is exceeded, it blocks until a token is available.
115 | // The middleware respects context cancellation - if the context is cancelled while waiting,
116 | // it returns the context error.
117 | // Note: The rate limit is enforced globally across all workers in the pool, not per worker.
118 | //
119 | // Example:
120 | //
121 | //	// Allow 10 tasks per second with a burst of 20
122 | //	pool.Use(middleware.RateLimiter[Task](10, 20))
123 | func RateLimiter[T any](rateLimit float64, burst int) pool.Middleware[T] {
124 | 	// validate inputs
125 | 	if rateLimit <= 0 {
126 | 		rateLimit = 1 // default to 1 task per second
127 | 	}
128 | 	if burst <= 0 {
129 | 		burst = 1 // minimum burst of 1
130 | 	}
131 | 
132 | 	// create the rate limiter
133 | 	limiter := rate.NewLimiter(rate.Limit(rateLimit), burst)
134 | 
135 | 	return func(next pool.Worker[T]) pool.Worker[T] {
136 | 		return pool.WorkerFunc[T](func(ctx context.Context, v T) error {
137 | 			// wait for permission to proceed
138 | 			if err := limiter.Wait(ctx); err != nil {
139 | 				return fmt.Errorf("rate limiter wait failed: %w", err)
140 | 			}
141 | 			return next.Do(ctx, v)
142 | 		})
143 | 	}
144 | }
145 | 


--------------------------------------------------------------------------------
/middleware/middleware_test.go:
--------------------------------------------------------------------------------
  1 | package middleware
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"sync"
  8 | 	"sync/atomic"
  9 | 	"testing"
 10 | 	"time"
 11 | 
 12 | 	"github.com/stretchr/testify/assert"
 13 | 	"github.com/stretchr/testify/require"
 14 | 
 15 | 	"github.com/go-pkgz/pool"
 16 | )
 17 | 
 18 | func TestRetry(t *testing.T) {
 19 | 	t.Run("retries on failure", func(t *testing.T) {
 20 | 		var attempts atomic.Int32
 21 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
 22 | 			if attempts.Add(1) <= 2 {
 23 | 				return errors.New("temporary error")
 24 | 			}
 25 | 			return nil
 26 | 		})
 27 | 
 28 | 		p := pool.New[string](1, worker).Use(Retry[string](3, time.Millisecond))
 29 | 		require.NoError(t, p.Go(context.Background()))
 30 | 
 31 | 		p.Submit("test")
 32 | 		require.NoError(t, p.Close(context.Background()))
 33 | 		assert.Equal(t, int32(3), attempts.Load(), "should retry until success")
 34 | 	})
 35 | 
 36 | 	t.Run("fails after max attempts", func(t *testing.T) {
 37 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
 38 | 			return errors.New("persistent error")
 39 | 		})
 40 | 
 41 | 		p := pool.New[string](1, worker).Use(Retry[string](2, time.Millisecond))
 42 | 		require.NoError(t, p.Go(context.Background()))
 43 | 
 44 | 		p.Submit("test")
 45 | 		err := p.Close(context.Background())
 46 | 		require.Error(t, err)
 47 | 		assert.Contains(t, err.Error(), "failed after 2 attempts")
 48 | 	})
 49 | 
 50 | 	t.Run("respects context cancellation", func(t *testing.T) {
 51 | 		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
 52 | 		defer cancel()
 53 | 
 54 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
 55 | 			return errors.New("error")
 56 | 		})
 57 | 
 58 | 		p := pool.New[string](1, worker).Use(Retry[string](10, 20*time.Millisecond))
 59 | 		require.NoError(t, p.Go(ctx))
 60 | 
 61 | 		p.Submit("test")
 62 | 		err := p.Close(context.Background())
 63 | 		require.Error(t, err)
 64 | 		assert.ErrorIs(t, err, context.DeadlineExceeded)
 65 | 	})
 66 | }
 67 | 
 68 | func TestTimeout(t *testing.T) {
 69 | 	t.Run("allows fast operations", func(t *testing.T) {
 70 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
 71 | 			time.Sleep(time.Millisecond)
 72 | 			return nil
 73 | 		})
 74 | 
 75 | 		p := pool.New[string](1, worker).Use(Timeout[string](100 * time.Millisecond))
 76 | 		require.NoError(t, p.Go(context.Background()))
 77 | 
 78 | 		p.Submit("test")
 79 | 		require.NoError(t, p.Close(context.Background()))
 80 | 	})
 81 | 
 82 | 	t.Run("cancels slow operations", func(t *testing.T) {
 83 | 		worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error {
 84 | 			select {
 85 | 			case <-time.After(100 * time.Millisecond):
 86 | 				return nil
 87 | 			case <-ctx.Done():
 88 | 				return ctx.Err()
 89 | 			}
 90 | 		})
 91 | 
 92 | 		p := pool.New[string](1, worker).Use(Timeout[string](10 * time.Millisecond))
 93 | 		require.NoError(t, p.Go(context.Background()))
 94 | 
 95 | 		p.Submit("test")
 96 | 		err := p.Close(context.Background())
 97 | 		require.Error(t, err)
 98 | 		assert.ErrorIs(t, err, context.DeadlineExceeded)
 99 | 	})
100 | }
101 | 
102 | func TestRecover(t *testing.T) {
103 | 	t.Run("recovers from panic", func(t *testing.T) {
104 | 		var recovered interface{}
105 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
106 | 			if v == "panic" {
107 | 				panic("test panic")
108 | 			}
109 | 			return nil
110 | 		})
111 | 
112 | 		p := pool.New[string](1, worker).Use(Recovery[string](func(p interface{}) {
113 | 			recovered = p
114 | 		}))
115 | 		require.NoError(t, p.Go(context.Background()))
116 | 
117 | 		p.Submit("panic")
118 | 		err := p.Close(context.Background())
119 | 		require.Error(t, err)
120 | 		assert.Contains(t, err.Error(), "panic recovered")
121 | 		assert.Equal(t, "test panic", recovered)
122 | 	})
123 | 
124 | 	t.Run("allows normal operations", func(t *testing.T) {
125 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
126 | 			return nil
127 | 		})
128 | 
129 | 		var recovered interface{}
130 | 		p := pool.New[string](1, worker).Use(Recovery[string](func(p interface{}) {
131 | 			recovered = p
132 | 		}))
133 | 		require.NoError(t, p.Go(context.Background()))
134 | 
135 | 		p.Submit("ok")
136 | 		require.NoError(t, p.Close(context.Background()))
137 | 		assert.Nil(t, recovered, "should not call recover handler")
138 | 	})
139 | 
140 | 	t.Run("recovers and converts errors", func(t *testing.T) {
141 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
142 | 			panic(fmt.Errorf("custom error"))
143 | 		})
144 | 
145 | 		p := pool.New[string](1, worker).Use(Recovery[string](nil)) // nil handler is valid
146 | 		require.NoError(t, p.Go(context.Background()))
147 | 
148 | 		p.Submit("test")
149 | 		err := p.Close(context.Background())
150 | 		require.Error(t, err)
151 | 		assert.Contains(t, err.Error(), "panic recovered: custom error")
152 | 	})
153 | }
154 | 
155 | func TestValidate(t *testing.T) {
156 | 	t.Run("valid input passes through", func(t *testing.T) {
157 | 		validator := func(s string) error {
158 | 			if len(s) >= 3 {
159 | 				return nil
160 | 			}
161 | 			return errors.New("string too short")
162 | 		}
163 | 
164 | 		var processed []string
165 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
166 | 			processed = append(processed, v)
167 | 			return nil
168 | 		})
169 | 
170 | 		p := pool.New[string](1, worker).Use(Validator(validator))
171 | 		require.NoError(t, p.Go(context.Background()))
172 | 
173 | 		p.Submit("test")
174 | 		require.NoError(t, p.Close(context.Background()))
175 | 
176 | 		assert.Equal(t, []string{"test"}, processed)
177 | 	})
178 | 
179 | 	t.Run("invalid input blocked", func(t *testing.T) {
180 | 		validator := func(s string) error {
181 | 			if len(s) >= 3 {
182 | 				return nil
183 | 			}
184 | 			return errors.New("string too short")
185 | 		}
186 | 
187 | 		var processed []string
188 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
189 | 			processed = append(processed, v)
190 | 			return nil
191 | 		})
192 | 
193 | 		p := pool.New[string](1, worker).Use(Validator(validator))
194 | 		require.NoError(t, p.Go(context.Background()))
195 | 
196 | 		p.Submit("ok")
197 | 		err := p.Close(context.Background())
198 | 		require.Error(t, err)
199 | 		assert.Contains(t, err.Error(), "validation failed")
200 | 		assert.Empty(t, processed)
201 | 	})
202 | }
203 | 
204 | func TestRateLimiter(t *testing.T) {
205 | 	t.Run("allows tasks within rate limit", func(t *testing.T) {
206 | 		var processed atomic.Int32
207 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
208 | 			processed.Add(1)
209 | 			return nil
210 | 		})
211 | 
212 | 		// 10 tasks per second with burst of 5
213 | 		p := pool.New[string](2, worker).Use(RateLimiter[string](10, 5))
214 | 		require.NoError(t, p.Go(context.Background()))
215 | 
216 | 		// submit 5 tasks - should all process immediately due to burst
217 | 		for i := 0; i < 5; i++ {
218 | 			p.Submit("task")
219 | 		}
220 | 
221 | 		// wait a bit for processing
222 | 		time.Sleep(50 * time.Millisecond)
223 | 		require.NoError(t, p.Close(context.Background()))
224 | 
225 | 		assert.Equal(t, int32(5), processed.Load(), "all tasks within burst should process")
226 | 	})
227 | 
228 | 	t.Run("blocks tasks exceeding rate limit", func(t *testing.T) {
229 | 		var timestamps []time.Time
230 | 		var mu sync.Mutex
231 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
232 | 			mu.Lock()
233 | 			timestamps = append(timestamps, time.Now())
234 | 			mu.Unlock()
235 | 			return nil
236 | 		})
237 | 
238 | 		// 2 tasks per second with burst of 1
239 | 		p := pool.New[string](1, worker).Use(RateLimiter[string](2, 1))
240 | 		require.NoError(t, p.Go(context.Background()))
241 | 
242 | 		// submit 3 tasks
243 | 		for i := 0; i < 3; i++ {
244 | 			p.Submit(fmt.Sprintf("task-%d", i))
245 | 		}
246 | 
247 | 		require.NoError(t, p.Close(context.Background()))
248 | 
249 | 		// verify timing
250 | 		require.Len(t, timestamps, 3)
251 | 		// first task should process immediately
252 | 		// second task should wait ~500ms (rate of 2/sec)
253 | 		// third task should wait another ~500ms
254 | 		gap1 := timestamps[1].Sub(timestamps[0])
255 | 		gap2 := timestamps[2].Sub(timestamps[1])
256 | 
257 | 		assert.Greater(t, gap1, 350*time.Millisecond, "second task should wait for rate limit")
258 | 		assert.Less(t, gap1, 650*time.Millisecond, "second task shouldn't wait too long")
259 | 		assert.Greater(t, gap2, 350*time.Millisecond, "third task should wait for rate limit")
260 | 		assert.Less(t, gap2, 650*time.Millisecond, "third task shouldn't wait too long")
261 | 	})
262 | 
263 | 	t.Run("respects context cancellation while waiting", func(t *testing.T) {
264 | 		ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
265 | 		defer cancel()
266 | 
267 | 		var processed atomic.Int32
268 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
269 | 			processed.Add(1)
270 | 			time.Sleep(10 * time.Millisecond) // simulate work
271 | 			return nil
272 | 		})
273 | 
274 | 		// very low rate to force waiting
275 | 		p := pool.New[string](1, worker).Use(RateLimiter[string](1, 1))
276 | 		require.NoError(t, p.Go(ctx))
277 | 
278 | 		// submit multiple tasks
279 | 		for i := 0; i < 5; i++ {
280 | 			p.Submit("task")
281 | 		}
282 | 
283 | 		err := p.Close(context.Background())
284 | 		require.Error(t, err)
285 | 		assert.Contains(t, err.Error(), "would exceed context deadline")
286 | 		// should process 1-2 tasks before context cancellation
287 | 		assert.Less(t, processed.Load(), int32(3), "should not process all tasks")
288 | 	})
289 | 
290 | 	t.Run("handles default values", func(t *testing.T) {
291 | 		var processed atomic.Int32
292 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
293 | 			processed.Add(1)
294 | 			return nil
295 | 		})
296 | 
297 | 		// test with invalid rate and burst
298 | 		p := pool.New[string](1, worker).Use(RateLimiter[string](-1, 0))
299 | 		require.NoError(t, p.Go(context.Background()))
300 | 
301 | 		p.Submit("task")
302 | 		require.NoError(t, p.Close(context.Background()))
303 | 
304 | 		assert.Equal(t, int32(1), processed.Load(), "should process with default values")
305 | 	})
306 | 
307 | 	t.Run("multiple workers share rate limit", func(t *testing.T) {
308 | 		var timestamps []time.Time
309 | 		var mu sync.Mutex
310 | 		worker := pool.WorkerFunc[string](func(_ context.Context, v string) error {
311 | 			mu.Lock()
312 | 			timestamps = append(timestamps, time.Now())
313 | 			mu.Unlock()
314 | 			time.Sleep(10 * time.Millisecond) // simulate work
315 | 			return nil
316 | 		})
317 | 
318 | 		// 2 tasks per second with burst of 2, but 4 workers
319 | 		p := pool.New[string](4, worker).Use(RateLimiter[string](2, 2))
320 | 		require.NoError(t, p.Go(context.Background()))
321 | 
322 | 		// submit 4 tasks
323 | 		for i := 0; i < 4; i++ {
324 | 			p.Submit(fmt.Sprintf("task-%d", i))
325 | 		}
326 | 
327 | 		require.NoError(t, p.Close(context.Background()))
328 | 
329 | 		// verify timing - even with 4 workers, rate limit is shared
330 | 		require.Len(t, timestamps, 4)
331 | 		// first 2 tasks should process immediately (burst)
332 | 		gap1 := timestamps[1].Sub(timestamps[0])
333 | 		assert.Less(t, gap1, 50*time.Millisecond, "first two tasks should process immediately")
334 | 
335 | 		// next 2 tasks should wait for rate limit
336 | 		gap2 := timestamps[2].Sub(timestamps[1])
337 | 		gap3 := timestamps[3].Sub(timestamps[2])
338 | 		assert.Greater(t, gap2, 350*time.Millisecond, "third task should wait for rate limit")
339 | 		assert.Greater(t, gap3, 350*time.Millisecond, "fourth task should wait for rate limit")
340 | 	})
341 | }
342 | 


--------------------------------------------------------------------------------
/pool.go:
--------------------------------------------------------------------------------
  1 | package pool
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"hash/fnv"
  8 | 	"math/rand"
  9 | 	"sync"
 10 | 	"sync/atomic"
 11 | 	"time"
 12 | 
 13 | 	"golang.org/x/sync/errgroup"
 14 | 
 15 | 	"github.com/go-pkgz/pool/metrics"
 16 | )
 17 | 
 18 | // WorkerGroup represents a pool of workers processing items in parallel.
 19 | // Supports both direct item processing and batching modes.
 20 | type WorkerGroup[T any] struct {
 21 | 	poolSize         int                 // number of workers (goroutines)
 22 | 	workerChanSize   int                 // size of worker channels
 23 | 	workerCompleteFn WorkerCompleteFn[T] // completion callback function, called by each worker on completion
 24 | 	poolCompleteFn   GroupCompleteFn[T]  // pool-level completion callback, called once when all workers are done
 25 | 	continueOnError  bool                // don't terminate on first error
 26 | 	chunkFn          func(T) string      // worker selector function
 27 | 	worker           Worker[T]           // worker function
 28 | 	workerMaker      WorkerMaker[T]      // worker maker function
 29 | 
 30 | 	metrics *metrics.Value // shared metrics
 31 | 
 32 | 	workersCh     []chan T     // workers input channels
 33 | 	sharedCh      chan T       // shared input channel for all workers
 34 | 	activeWorkers atomic.Int32 // track number of active workers
 35 | 
 36 | 	// batching support
 37 | 	batchSize     int        // if > 0, accumulate items up to this size
 38 | 	accumulators  [][]T      // per-worker accumulators for batching
 39 | 	workerBatchCh []chan []T // per-worker batch channels
 40 | 	sharedBatchCh chan []T   // shared batch channel
 41 | 
 42 | 	eg        *errgroup.Group
 43 | 	activated bool
 44 | 	ctx       context.Context
 45 | 
 46 | 	sendMu sync.Mutex
 47 | }
 48 | 
 49 | // Worker is the interface that wraps the Submit method.
 50 | type Worker[T any] interface {
 51 | 	Do(ctx context.Context, v T) error
 52 | }
 53 | 
 54 | // WorkerFunc is an adapter to allow the use of ordinary functions as Workers.
 55 | type WorkerFunc[T any] func(ctx context.Context, v T) error
 56 | 
 57 | // Do calls f(ctx, v).
 58 | func (f WorkerFunc[T]) Do(ctx context.Context, v T) error { return f(ctx, v) }
 59 | 
 60 | // WorkerMaker is a function that returns a new Worker
 61 | type WorkerMaker[T any] func() Worker[T]
 62 | 
 63 | // WorkerCompleteFn called on worker completion
 64 | type WorkerCompleteFn[T any] func(ctx context.Context, id int, worker Worker[T]) error
 65 | 
 66 | // GroupCompleteFn called once when all workers are done
 67 | type GroupCompleteFn[T any] func(ctx context.Context) error
 68 | 
 69 | // Send func called by worker code to publish results
 70 | type Send[T any] func(val T) error
 71 | 
 72 | // New creates a worker pool with a shared worker instance.
 73 | // All goroutines share the same worker, suitable for stateless processing.
 74 | func New[T any](size int, worker Worker[T]) *WorkerGroup[T] {
 75 | 	if size < 1 {
 76 | 		size = 1
 77 | 	}
 78 | 
 79 | 	res := &WorkerGroup[T]{
 80 | 		poolSize:       size,
 81 | 		worker:         worker,
 82 | 		workerChanSize: 1,
 83 | 		batchSize:      10, // default batch size
 84 | 
 85 | 		// initialize channels
 86 | 		workersCh:     make([]chan T, size),
 87 | 		sharedCh:      make(chan T, size),
 88 | 		workerBatchCh: make([]chan []T, size),
 89 | 		sharedBatchCh: make(chan []T, size),
 90 | 		accumulators:  make([][]T, size),
 91 | 	}
 92 | 
 93 | 	// initialize worker's channels
 94 | 	for i := range size {
 95 | 		res.workersCh[i] = make(chan T, res.workerChanSize)
 96 | 		res.workerBatchCh[i] = make(chan []T, res.workerChanSize)
 97 | 	}
 98 | 
 99 | 	return res
100 | }
101 | 
102 | // NewStateful creates a worker pool where each goroutine gets its own worker instance.
103 | // Suitable for operations requiring state (e.g., database connections).
104 | func NewStateful[T any](size int, maker func() Worker[T]) *WorkerGroup[T] {
105 | 	if size < 1 {
106 | 		size = 1
107 | 	}
108 | 
109 | 	res := &WorkerGroup[T]{
110 | 		poolSize:       size,
111 | 		workerMaker:    maker,
112 | 		workerChanSize: 1,
113 | 		batchSize:      10, // default batch size
114 | 		ctx:            context.Background(),
115 | 
116 | 		// initialize channels
117 | 		workersCh:     make([]chan T, size),
118 | 		sharedCh:      make(chan T, size),
119 | 		workerBatchCh: make([]chan []T, size),
120 | 		sharedBatchCh: make(chan []T, size),
121 | 		accumulators:  make([][]T, size),
122 | 	}
123 | 
124 | 	// initialize worker's channels
125 | 	for i := range size {
126 | 		res.workersCh[i] = make(chan T, res.workerChanSize)
127 | 		res.workerBatchCh[i] = make(chan []T, res.workerChanSize)
128 | 	}
129 | 
130 | 	return res
131 | }
132 | 
133 | // WithWorkerChanSize sets channel buffer size for each worker.
134 | // Larger sizes can help with bursty workloads but increase memory usage.
135 | // Default: 1
136 | func (p *WorkerGroup[T]) WithWorkerChanSize(size int) *WorkerGroup[T] {
137 | 	p.workerChanSize = size
138 | 	if size < 1 {
139 | 		p.workerChanSize = 1
140 | 	}
141 | 	return p
142 | }
143 | 
144 | // WithWorkerCompleteFn sets callback executed on worker completion.
145 | // Useful for cleanup or finalization of worker resources.
146 | // Default: none (disabled)
147 | func (p *WorkerGroup[T]) WithWorkerCompleteFn(fn WorkerCompleteFn[T]) *WorkerGroup[T] {
148 | 	p.workerCompleteFn = fn
149 | 	return p
150 | }
151 | 
152 | // WithPoolCompleteFn sets callback executed once when all workers are done
153 | func (p *WorkerGroup[T]) WithPoolCompleteFn(fn GroupCompleteFn[T]) *WorkerGroup[T] {
154 | 	p.poolCompleteFn = fn
155 | 	return p
156 | }
157 | 
158 | // WithChunkFn enables predictable item distribution.
159 | // Items with the same key (returned by fn) are processed by the same worker.
160 | // Useful for maintaining order within groups of related items.
161 | // Default: none (random distribution)
162 | func (p *WorkerGroup[T]) WithChunkFn(fn func(T) string) *WorkerGroup[T] {
163 | 	p.chunkFn = fn
164 | 	return p
165 | }
166 | 
167 | // WithContinueOnError sets whether the pool should continue on error.
168 | // Default: false
169 | func (p *WorkerGroup[T]) WithContinueOnError() *WorkerGroup[T] {
170 | 	p.continueOnError = true
171 | 	return p
172 | }
173 | 
174 | // WithBatchSize enables item batching with specified size.
175 | // Items are accumulated until batch is full before processing.
176 | // Set to 0 to disable batching.
177 | // Default: 10
178 | func (p *WorkerGroup[T]) WithBatchSize(size int) *WorkerGroup[T] {
179 | 	p.batchSize = size
180 | 	if size > 0 {
181 | 		// initialize accumulators with capacity
182 | 		for i := range p.poolSize {
183 | 			p.accumulators[i] = make([]T, 0, size)
184 | 		}
185 | 	}
186 | 	return p
187 | }
188 | 
189 | // Submit adds an item to the pool for processing. May block if worker channels are full.
190 | // Not thread-safe, intended for use by the main thread ot a single producer's thread.
191 | func (p *WorkerGroup[T]) Submit(v T) {
192 | 	// check context early
193 | 	select {
194 | 	case <-p.ctx.Done():
195 | 		return // don't submit if context is cancelled
196 | 	default:
197 | 	}
198 | 
199 | 	if p.batchSize == 0 {
200 | 		// direct submission mode
201 | 		if p.chunkFn == nil {
202 | 			p.sharedCh <- v
203 | 			return
204 | 		}
205 | 		h := fnv.New32a()
206 | 		_, _ = h.Write([]byte(p.chunkFn(v)))
207 | 		id := int(h.Sum32()) % p.poolSize
208 | 		p.workersCh[id] <- v
209 | 		return
210 | 	}
211 | 
212 | 	// batching mode
213 | 	var id int
214 | 	if p.chunkFn != nil {
215 | 		h := fnv.New32a()
216 | 		_, _ = h.Write([]byte(p.chunkFn(v)))
217 | 		id = int(h.Sum32()) % p.poolSize
218 | 	} else {
219 | 		id = rand.Intn(p.poolSize) //nolint:gosec // no need for secure random here
220 | 	}
221 | 
222 | 	// add to accumulator
223 | 	p.accumulators[id] = append(p.accumulators[id], v)
224 | 
225 | 	// check if we should flush
226 | 	var shouldFlush bool
227 | 	select {
228 | 	case <-p.ctx.Done():
229 | 		shouldFlush = true // always flush on context cancellation
230 | 	default:
231 | 		// in normal case, flush only when batch is full
232 | 		shouldFlush = len(p.accumulators[id]) >= p.batchSize
233 | 	}
234 | 
235 | 	if shouldFlush && len(p.accumulators[id]) > 0 {
236 | 		if p.chunkFn == nil {
237 | 			select {
238 | 			case p.sharedBatchCh <- p.accumulators[id]:
239 | 			case <-p.ctx.Done(): // handle case where channel send would block
240 | 				return
241 | 			}
242 | 		} else {
243 | 			select {
244 | 			case p.workerBatchCh[id] <- p.accumulators[id]:
245 | 			case <-p.ctx.Done():
246 | 				return
247 | 			}
248 | 		}
249 | 		p.accumulators[id] = make([]T, 0, p.batchSize)
250 | 	}
251 | }
252 | 
253 | // Send adds an item to the pool for processing.
254 | // Safe for concurrent use, intended for worker-to-pool submissions or for use by multiple concurrent producers.
255 | func (p *WorkerGroup[T]) Send(v T) {
256 | 	p.sendMu.Lock()
257 | 	defer p.sendMu.Unlock()
258 | 	p.Submit(v)
259 | }
260 | 
261 | // Go activates the pool and starts worker goroutines.
262 | // Must be called before submitting items.
263 | func (p *WorkerGroup[T]) Go(ctx context.Context) error {
264 | 	if p.activated {
265 | 		return fmt.Errorf("workers poll already activated")
266 | 	}
267 | 	defer func() { p.activated = true }()
268 | 
269 | 	var egCtx context.Context
270 | 	p.eg, egCtx = errgroup.WithContext(ctx)
271 | 	p.ctx = egCtx
272 | 
273 | 	// create metrics context for all workers
274 | 	metricsCtx := metrics.Make(egCtx, p.poolSize)
275 | 	p.metrics = metrics.Get(metricsCtx)
276 | 
277 | 	// set initial count
278 | 	p.activeWorkers.Store(int32(p.poolSize)) //nolint:gosec // no risk of overflow
279 | 
280 | 	// start all goroutines (workers)
281 | 	for i := range p.poolSize {
282 | 		withWorkerIDctx := metrics.WithWorkerID(metricsCtx, i)
283 | 		workerCh := p.sharedCh
284 | 		batchCh := p.sharedBatchCh
285 | 		if p.chunkFn != nil {
286 | 			workerCh = p.workersCh[i]
287 | 			batchCh = p.workerBatchCh[i]
288 | 		}
289 | 		r := workerRequest[T]{inCh: workerCh, batchCh: batchCh, m: p.metrics, id: i}
290 | 		p.eg.Go(p.workerProc(withWorkerIDctx, r))
291 | 	}
292 | 
293 | 	return nil
294 | }
295 | 
296 | // workerRequest is a request to worker goroutine containing all necessary data
297 | type workerRequest[T any] struct {
298 | 	inCh    <-chan T
299 | 	batchCh <-chan []T
300 | 	m       *metrics.Value
301 | 	id      int
302 | }
303 | 
304 | // workerProc is a worker goroutine function, reads from the input channel and processes records
305 | func (p *WorkerGroup[T]) workerProc(wCtx context.Context, r workerRequest[T]) func() error {
306 | 	return func() error {
307 | 		var lastErr error
308 | 		var totalErrs int
309 | 
310 | 		initEndTmr := r.m.StartTimer(r.id, metrics.TimerInit)
311 | 		worker := p.worker
312 | 		if p.workerMaker != nil {
313 | 			worker = p.workerMaker()
314 | 		}
315 | 		initEndTmr()
316 | 
317 | 		lastActivity := time.Now()
318 | 
319 | 		// processItem handles a single item with metrics
320 | 		processItem := func(v T) error {
321 | 			waitTime := time.Since(lastActivity)
322 | 			r.m.AddWaitTime(r.id, waitTime)
323 | 			lastActivity = time.Now()
324 | 
325 | 			procEndTmr := r.m.StartTimer(r.id, metrics.TimerProc)
326 | 			defer procEndTmr()
327 | 
328 | 			if err := worker.Do(wCtx, v); err != nil {
329 | 				r.m.IncErrors(r.id)
330 | 				totalErrs++
331 | 				if !p.continueOnError {
332 | 					return fmt.Errorf("worker %d failed: %w", r.id, err)
333 | 				}
334 | 				lastErr = fmt.Errorf("worker %d failed: %w", r.id, err)
335 | 				return nil // continue on error
336 | 			}
337 | 			r.m.IncProcessed(r.id)
338 | 			return nil
339 | 		}
340 | 
341 | 		// processBatch handles batch of items
342 | 		processBatch := func(items []T) error {
343 | 			waitTime := time.Since(lastActivity)
344 | 			r.m.AddWaitTime(r.id, waitTime)
345 | 			lastActivity = time.Now()
346 | 
347 | 			procEndTmr := r.m.StartTimer(r.id, metrics.TimerProc)
348 | 			defer procEndTmr()
349 | 
350 | 			for _, v := range items {
351 | 				if err := worker.Do(wCtx, v); err != nil {
352 | 					r.m.IncErrors(r.id)
353 | 					totalErrs++
354 | 					if !p.continueOnError {
355 | 						return fmt.Errorf("worker %d failed: %w", r.id, err)
356 | 					}
357 | 					lastErr = fmt.Errorf("worker %d failed: %w", r.id, err)
358 | 					continue
359 | 				}
360 | 				r.m.IncProcessed(r.id)
361 | 			}
362 | 			return nil
363 | 		}
364 | 
365 | 		// track if channels are closed
366 | 		normalClosed := false
367 | 		batchClosed := false
368 | 
369 | 		// main processing loop
370 | 		for {
371 | 			if normalClosed && batchClosed {
372 | 				return p.finishWorker(wCtx, r.id, worker, lastErr, totalErrs)
373 | 			}
374 | 
375 | 			select {
376 | 			case <-wCtx.Done():
377 | 				return p.finishWorker(wCtx, r.id, worker, wCtx.Err(), totalErrs)
378 | 
379 | 			case v, ok := <-r.inCh:
380 | 				if !ok {
381 | 					normalClosed = true
382 | 					continue
383 | 				}
384 | 				if err := processItem(v); err != nil {
385 | 					return p.finishWorker(wCtx, r.id, worker, err, totalErrs)
386 | 				}
387 | 
388 | 			case batch, ok := <-r.batchCh:
389 | 				if !ok {
390 | 					batchClosed = true
391 | 					continue
392 | 				}
393 | 				if err := processBatch(batch); err != nil {
394 | 					return p.finishWorker(wCtx, r.id, worker, err, totalErrs)
395 | 				}
396 | 			}
397 | 		}
398 | 	}
399 | }
400 | 
401 | // finishWorker handles worker completion logic
402 | func (p *WorkerGroup[T]) finishWorker(ctx context.Context, id int, worker Worker[T], lastErr error, totalErrs int) error {
403 | 	// worker completion should be called only if we are continuing on error or no error
404 | 	if p.workerCompleteFn != nil && (lastErr == nil || p.continueOnError) {
405 | 		wrapFinTmr := p.metrics.StartTimer(id, metrics.TimerWrap)
406 | 		if e := p.workerCompleteFn(ctx, id, worker); e != nil {
407 | 			if lastErr == nil {
408 | 				lastErr = fmt.Errorf("complete worker func for %d failed: %w", id, e)
409 | 			}
410 | 		}
411 | 		wrapFinTmr()
412 | 	}
413 | 
414 | 	activeWorkers := p.activeWorkers.Add(-1)
415 | 
416 | 	// pool completion should be called when this is the last worker
417 | 	// regardless of error state, except for context cancellation
418 | 	if activeWorkers == 0 && p.poolCompleteFn != nil && !errors.Is(lastErr, context.Canceled) {
419 | 		if e := p.poolCompleteFn(ctx); e != nil {
420 | 			if lastErr == nil {
421 | 				lastErr = fmt.Errorf("complete pool func for %d failed: %w", id, e)
422 | 			}
423 | 		}
424 | 	}
425 | 
426 | 	if lastErr != nil {
427 | 		return fmt.Errorf("total errors: %d, last error: %w", totalErrs, lastErr)
428 | 	}
429 | 	return nil
430 | }
431 | 
432 | // Close pool. Has to be called by consumer as the indication of "all records submitted".
433 | // The call is blocking till all processing completed by workers. After this call poll can't be reused.
434 | // Returns an error if any happened during the run
435 | func (p *WorkerGroup[T]) Close(ctx context.Context) error {
436 | 	// if context canceled, return immediately
437 | 	switch {
438 | 	case ctx.Err() != nil:
439 | 		return ctx.Err()
440 | 	default:
441 | 	}
442 | 
443 | 	// flush any remaining items in accumulators
444 | 	if p.batchSize > 0 {
445 | 		for i, acc := range p.accumulators {
446 | 			if len(acc) > 0 {
447 | 				// ensure we flush any non-empty accumulator, regardless of size
448 | 				if p.chunkFn == nil {
449 | 					p.sharedBatchCh <- acc
450 | 				} else {
451 | 					p.workerBatchCh[i] <- acc
452 | 				}
453 | 				p.accumulators[i] = nil // help GC
454 | 			}
455 | 		}
456 | 	}
457 | 
458 | 	close(p.sharedCh)
459 | 	close(p.sharedBatchCh)
460 | 	for i := range p.poolSize {
461 | 		close(p.workersCh[i])
462 | 		close(p.workerBatchCh[i])
463 | 	}
464 | 	return p.eg.Wait()
465 | }
466 | 
467 | // Wait till workers completed and the result channel closed.
468 | func (p *WorkerGroup[T]) Wait(ctx context.Context) error {
469 | 	// if context canceled, return immediately
470 | 	switch {
471 | 	case ctx.Err() != nil:
472 | 		return ctx.Err()
473 | 	default:
474 | 	}
475 | 	return p.eg.Wait()
476 | }
477 | 
478 | // Metrics returns combined metrics from all workers
479 | func (p *WorkerGroup[T]) Metrics() *metrics.Value {
480 | 	return p.metrics
481 | }
482 | 
483 | // Middleware wraps worker and adds functionality
484 | type Middleware[T any] func(Worker[T]) Worker[T]
485 | 
486 | // Use applies middlewares to the worker group's worker. Middlewares are applied
487 | // in the same order as they are provided, matching the HTTP middleware pattern in Go.
488 | // The first middleware is the outermost wrapper, and the last middleware is the
489 | // innermost wrapper closest to the original worker.
490 | func (p *WorkerGroup[T]) Use(middlewares ...Middleware[T]) *WorkerGroup[T] {
491 | 	if len(middlewares) == 0 {
492 | 		return p
493 | 	}
494 | 
495 | 	// if we have a worker maker (stateful), wrap it
496 | 	if p.workerMaker != nil {
497 | 		originalMaker := p.workerMaker
498 | 		p.workerMaker = func() Worker[T] {
499 | 			worker := originalMaker()
500 | 			// apply middlewares in order from last to first
501 | 			// this makes first middleware outermost
502 | 			wrapped := worker
503 | 			for i := len(middlewares) - 1; i >= 0; i-- {
504 | 				prev := wrapped
505 | 				wrapped = middlewares[i](prev)
506 | 			}
507 | 			return wrapped
508 | 		}
509 | 		return p
510 | 	}
511 | 
512 | 	// for stateless worker, just wrap it directly
513 | 	wrapped := p.worker
514 | 	for i := len(middlewares) - 1; i >= 0; i-- {
515 | 		prev := wrapped
516 | 		wrapped = middlewares[i](prev)
517 | 	}
518 | 	p.worker = wrapped
519 | 	return p
520 | }
521 | 


--------------------------------------------------------------------------------