├── .github ├── CODEOWNERS ├── FUNDING.yml └── workflows │ └── ci.yml ├── .gitignore ├── .golangci.yml ├── LICENSE ├── README.md ├── bench_test.go ├── collector.go ├── collector_test.go ├── doc.go ├── examples ├── README.md ├── collector_errors │ ├── README.md │ ├── go.mod │ ├── go.sum │ └── main.go ├── collectors_chain │ ├── README.md │ ├── go.mod │ ├── go.sum │ └── main.go ├── direct_chain │ ├── README.md │ ├── go.mod │ ├── go.sum │ └── main.go ├── middleware │ ├── README.md │ ├── go.mod │ ├── go.sum │ └── main.go ├── parallel_files │ ├── README.md │ ├── go.mod │ ├── go.sum │ └── main.go ├── tokenizer_stateful │ ├── README.md │ ├── go.mod │ ├── go.sum │ └── main.go └── tokenizer_stateless │ ├── README.md │ ├── go.mod │ ├── go.sum │ └── main.go ├── examples_test.go ├── go.mod ├── go.sum ├── metrics ├── metrics.go └── metrics_test.go ├── middleware ├── middleware.go └── middleware_test.go ├── pool.go └── pool_test.go /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # These owners will be the default owners for everything in the repo. 2 | # Unless a later match takes precedence, @umputun will be requested for 3 | # review when someone opens a pull request. 4 | 5 | * @umputun 6 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [umputun] 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: 6 | tags: 7 | pull_request: 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: set up go 1.24 15 | uses: actions/setup-go@v3 16 | with: 17 | go-version: "1.24" 18 | id: go 19 | 20 | - name: checkout 21 | uses: actions/checkout@v3 22 | 23 | - name: build and test 24 | run: | 25 | go get -v 26 | # run tests for all packages except examples 27 | go test $(go list ./... | grep -v /examples/) -timeout=60s -race -covermode=atomic -coverprofile=$GITHUB_WORKSPACE/profile.cov_tmp 28 | cat $GITHUB_WORKSPACE/profile.cov_tmp | grep -v "_mock.go" > $GITHUB_WORKSPACE/profile.cov 29 | go build -race 30 | 31 | - name: golangci-lint 32 | uses: golangci/golangci-lint-action@v7 33 | with: 34 | version: v2.1.6 35 | skip-pkg-cache: true 36 | 37 | - name: install goveralls 38 | run: | 39 | go install github.com/mattn/goveralls@latest 40 | 41 | - name: submit coverage 42 | run: $(go env GOPATH)/bin/goveralls -service="github" -coverprofile=$GITHUB_WORKSPACE/profile.cov 43 | env: 44 | COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.prof 2 | pool.test -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | run: 3 | concurrency: 4 4 | linters: 5 | default: none 6 | enable: 7 | - contextcheck 8 | - copyloopvar 9 | - decorder 10 | - errorlint 11 | - exptostd 12 | - gochecknoglobals 13 | - gochecknoinits 14 | - gocritic 15 | - gosec 16 | - govet 17 | - ineffassign 18 | - intrange 19 | - nakedret 20 | - nilerr 21 | - prealloc 22 | - predeclared 23 | - revive 24 | - staticcheck 25 | - testifylint 26 | - thelper 27 | - unconvert 28 | - unparam 29 | - unused 30 | - nestif 31 | settings: 32 | goconst: 33 | min-len: 2 34 | min-occurrences: 2 35 | gocritic: 36 | disabled-checks: 37 | - wrapperFunc 38 | enabled-tags: 39 | - performance 40 | - style 41 | - experimental 42 | gocyclo: 43 | min-complexity: 15 44 | govet: 45 | enable: 46 | - shadow 47 | lll: 48 | line-length: 140 49 | misspell: 50 | locale: US 51 | exclusions: 52 | generated: lax 53 | rules: 54 | - linters: 55 | - gosec 56 | text: 'G114: Use of net/http serve function that has no support for setting timeouts' 57 | - linters: 58 | - revive 59 | - unparam 60 | path: _test\.go$ 61 | text: unused-parameter 62 | - linters: 63 | - prealloc 64 | path: _test\.go$ 65 | text: Consider pre-allocating 66 | - linters: 67 | - gosec 68 | - intrange 69 | path: _test\.go$ 70 | paths: 71 | - third_party$ 72 | - builtin$ 73 | - examples$ 74 | formatters: 75 | enable: 76 | - gofmt 77 | - goimports 78 | exclusions: 79 | generated: lax 80 | paths: 81 | - third_party$ 82 | - builtin$ 83 | - examples$ 84 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Umputun 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pool [![Build Status](https://github.com/go-pkgz/pool/workflows/build/badge.svg)](https://github.com/go-pkgz/pool/actions) [![Coverage Status](https://coveralls.io/repos/github/go-pkgz/pool/badge.svg?branch=master)](https://coveralls.io/github/go-pkgz/pool?branch=master) [![godoc](https://godoc.org/github.com/go-pkgz/pool?status.svg)](https://godoc.org/github.com/go-pkgz/pool) 2 | 3 | `pool` is a Go package that provides a generic, efficient worker pool implementation for parallel task processing. Built for Go 1.21+, it offers a flexible API with features like batching, work distribution strategies, and comprehensive metrics collection. 4 | 5 | ## Features 6 | 7 | - Generic implementation supporting any data type 8 | - Configurable number of parallel workers 9 | - Support for both stateless shared workers and per-worker instances 10 | - Batching capability for processing multiple items at once 11 | - Customizable work distribution through chunk functions 12 | - Built-in metrics collection (processing times, counts, etc.) 13 | - Error handling with continue/stop options 14 | - Context-based cancellation and timeouts 15 | - Optional completion callbacks 16 | - Extensible middleware system for custom functionality 17 | - Built-in middlewares for common tasks 18 | - No external dependencies except for the testing framework 19 | 20 | ## Quick Start 21 | 22 | Here's a practical example showing how to process a list of URLs in parallel: 23 | 24 | ```go 25 | func main() { 26 | // create a worker that fetches URLs 27 | worker := pool.WorkerFunc[string](func(ctx context.Context, url string) error { 28 | resp, err := http.Get(url) 29 | if err != nil { 30 | return fmt.Errorf("failed to fetch %s: %w", url, err) 31 | } 32 | defer resp.Body.Close() 33 | 34 | if resp.StatusCode != http.StatusOK { 35 | return fmt.Errorf("bad status code from %s: %d", url, resp.StatusCode) 36 | } 37 | return nil 38 | }) 39 | 40 | // create a pool with 5 workers 41 | p := pool.New[string](5, worker).WithContinueOnError(), // don't stop on errors 42 | 43 | // start the pool 44 | if err := p.Go(context.Background()); err != nil { 45 | log.Fatal(err) 46 | } 47 | 48 | // submit URLs for processing 49 | urls := []string{ 50 | "https://example.com", 51 | "https://example.org", 52 | "https://example.net", 53 | } 54 | 55 | go func() { 56 | // submit URLs and signal when done 57 | defer p.Close(context.Background()) 58 | for _, url := range urls { 59 | p.Submit(url) 60 | } 61 | }() 62 | 63 | // wait for all URLs to be processed 64 | if err := p.Wait(context.Background()); err != nil { 65 | log.Printf("some URLs failed: %v", err) 66 | } 67 | 68 | // get metrics 69 | metrics := p.Metrics() 70 | stats := metrics.GetStats() 71 | fmt.Printf("Processed: %d, Errors: %d, Time taken: %v\n", 72 | stats.Processed, stats.Errors, stats.TotalTime) 73 | } 74 | ``` 75 | 76 | _For more examples, see the [examples](https://github.com/go-pkgz/pool/tree/master/exmples) directory._ 77 | 78 | ## Motivation 79 | 80 | While Go provides excellent primitives for concurrent programming with goroutines, channels, and sync primitives, building production-ready concurrent data processing systems often requires more sophisticated patterns. This package emerged from real-world needs encountered in various projects where basic concurrency primitives weren't enough. 81 | 82 | Common challenges this package addresses: 83 | 84 | 1. **Stateful Processing** 85 | - Need to maintain worker-specific state (counters, caches, connections) 86 | - Each worker requires its own resources (database connections, file handles) 87 | - State needs to be isolated to avoid synchronization 88 | 89 | 2. **Controlled Work Distribution** 90 | - Ensuring related items are processed by the same worker 91 | - Maintaining processing order for specific groups of items 92 | - Optimizing cache usage by routing similar items together 93 | 94 | 3. **Resource Management** 95 | - Limiting number of goroutines in large-scale processing 96 | - Managing cleanup of worker resources 97 | - Handling graceful shutdown 98 | 99 | 4. **Performance Optimization** 100 | - Batching items to reduce channel communication overhead 101 | - Balancing worker load with different distribution strategies 102 | - Buffering to handle uneven processing speeds 103 | 104 | 5. **Operational Visibility** 105 | - Need for detailed metrics about processing 106 | - Understanding bottlenecks and performance issues 107 | - Monitoring system health 108 | 109 | ## Core Concepts 110 | 111 | ### Worker Types 112 | 113 | The pool supports three ways to implement and manage workers: 114 | 115 | 1. **Core Interface**: 116 | ```go 117 | // Worker is the interface that wraps the Do method 118 | type Worker[T any] interface { 119 | Do(ctx context.Context, v T) error 120 | } 121 | 122 | // WorkerFunc is an adapter to allow using ordinary functions as Workers 123 | type WorkerFunc[T any] func(ctx context.Context, v T) error 124 | 125 | func (f WorkerFunc[T]) Do(ctx context.Context, v T) error { return f(ctx, v) } 126 | ``` 127 | 128 | 2. **Stateless Shared Workers**: 129 | ```go 130 | // single worker instance shared between all goroutines 131 | worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error { 132 | // process v 133 | return nil 134 | }) 135 | 136 | p := pool.New[string](5, worker) 137 | ``` 138 | - One worker instance serves all goroutines 139 | - Good for stateless operations 140 | - More memory efficient 141 | 142 | 3. **Per-Worker Instances**: 143 | ```go 144 | type dbWorker struct { 145 | conn *sql.DB 146 | processed int 147 | } 148 | 149 | func (w *dbWorker) Do(ctx context.Context, v string) error { 150 | w.processed++ 151 | return w.conn.ExecContext(ctx, "INSERT INTO items (value) VALUES (?)", v) 152 | } 153 | 154 | // create new instance for each goroutine 155 | maker := func() pool.Worker[string] { 156 | w := &dbWorker{ 157 | conn: openConnection(), // each worker gets own connection 158 | } 159 | return w 160 | } 161 | 162 | p := pool.NewStateful[string](5, maker) 163 | ``` 164 | 165 | ### Batching Processing 166 | 167 | Batching reduces channel communication overhead by processing multiple items at once: 168 | 169 | ```go 170 | // process items in batches of 10 171 | p := pool.New[string](2, worker).WithBatchSize(10) 172 | 173 | // worker receives items one by one 174 | worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error { 175 | // v is one item from the batch 176 | return nil 177 | }) 178 | ``` 179 | 180 | How batching works: 181 | 1. Pool accumulates submitted items internally until batch size is reached 182 | 2. Full batch is sent to worker as a single channel operation 183 | 3. Worker processes each item in the batch sequentially 184 | 4. Last batch may be smaller if items don't divide evenly 185 | 186 | When to use batching: 187 | - High-volume processing where channel operations are a bottleneck 188 | - When processing overhead per item is low compared to channel communication 189 | 190 | ### Work Distribution 191 | 192 | Control how work is distributed among workers using chunk functions: 193 | 194 | ```go 195 | // distribute by first character of string 196 | p := pool.New[string](3, worker).WithChunkFn(func(v string) string { 197 | return v[:1] // same first char goes to same worker 198 | }) 199 | 200 | // distribute by user ID to ensure user's tasks go to same worker 201 | p := pool.New[Task](3, worker).WithChunkFn(func(t Task) string { 202 | return strconv.Itoa(t.UserID) 203 | }) 204 | ``` 205 | 206 | How distribution works: 207 | 1. Without chunk function: 208 | - Items are distributed randomly among workers 209 | - Good for independent tasks 210 | 211 | 2. With chunk function: 212 | - Function returns string key for each item 213 | - Items with the same key always go to the same worker 214 | - Uses consistent hashing to map keys to workers 215 | 216 | When to use custom distribution: 217 | - Maintain ordering for related items 218 | - Optimize cache usage by worker 219 | - Ensure exclusive access to resources 220 | - Process data consistently 221 | 222 | ## Middleware Support 223 | 224 | The package supports middleware pattern similar to HTTP middleware in Go. Middleware can be used to add cross-cutting concerns like: 225 | - Retries with backoff 226 | - Timeouts 227 | - Panic recovery 228 | - Rate limiting 229 | - Metrics and logging 230 | - Error handling 231 | 232 | Built-in middleware: 233 | ```go 234 | // Add retry with exponential backoff 235 | p.Use(middleware.Retry[string](3, time.Second)) 236 | 237 | // Add timeout per operation 238 | p.Use(middleware.Timeout[string](5 * time.Second)) 239 | 240 | // Add panic recovery 241 | p.Use(middleware.Recovery[string](func(p interface{}) { 242 | log.Printf("recovered from panic: %v", p) 243 | })) 244 | 245 | // Add validation before processing 246 | p.Use(middleware.Validator[string](validator)) 247 | 248 | // Add rate limiting 249 | p.Use(middleware.RateLimiter[string](10, 5)) // 10 requests/sec with burst of 5 250 | ``` 251 | 252 | Custom middleware: 253 | ```go 254 | logging := func(next pool.Worker[string]) pool.Worker[string] { 255 | return pool.WorkerFunc[string](func(ctx context.Context, v string) error { 256 | log.Printf("processing: %v", v) 257 | err := next.Do(ctx, v) 258 | log.Printf("completed: %v, err: %v", v, err) 259 | return err 260 | }) 261 | } 262 | 263 | p.Use(logging) 264 | ``` 265 | 266 | Multiple middleware execute in the same order as provided: 267 | ```go 268 | p.Use(logging, metrics, retry) // order: logging -> metrics -> retry -> worker 269 | ``` 270 | 271 | ## Install and update 272 | 273 | ```bash 274 | go get -u github.com/go-pkgz/pool 275 | ``` 276 | 277 | ## Usage Examples 278 | 279 | ### Basic Example 280 | 281 | ```go 282 | func main() { 283 | // create a worker function processing strings 284 | worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error { 285 | fmt.Printf("processing: %s\n", v) 286 | return nil 287 | }) 288 | 289 | // create a pool with 2 workers 290 | p := pool.New[string](2, worker) 291 | 292 | // start the pool 293 | if err := p.Go(context.Background()); err != nil { 294 | log.Fatal(err) 295 | } 296 | 297 | // submit work 298 | p.Submit("task1") 299 | p.Submit("task2") 300 | p.Submit("task3") 301 | 302 | // close the pool and wait for completion 303 | if err := p.Close(context.Background()); err != nil { 304 | log.Fatal(err) 305 | } 306 | } 307 | ``` 308 | 309 | ### Error Handling 310 | 311 | ```go 312 | worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error { 313 | if strings.Contains(v, "error") { 314 | return fmt.Errorf("failed to process %s", v) 315 | } 316 | return nil 317 | }) 318 | 319 | // continue processing on errors 320 | p := pool.New[string](2, worker).WithContinueOnError() 321 | ``` 322 | 323 | ### Collecting Results 324 | 325 | ```go 326 | // create a collector for results 327 | collector := pool.NewCollector[Result](ctx, 10) 328 | 329 | // worker that produces results 330 | worker := pool.WorkerFunc[Input](func(ctx context.Context, v Input) error { 331 | result := process(v) 332 | collector.Submit(result) 333 | return nil 334 | }) 335 | 336 | p := pool.New[Input](2, worker) 337 | 338 | // get results through iteration 339 | for v, err := range collector.Iter() { 340 | if err != nil { 341 | return err 342 | } 343 | // use v 344 | } 345 | 346 | // or collect all at once 347 | results, err := collector.All() 348 | ``` 349 | 350 | ### Metrics and Monitoring 351 | 352 | ```go 353 | // create worker with metrics tracking 354 | worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error { 355 | m := metrics.Get(ctx) 356 | if strings.HasPrefix(v, "important") { 357 | m.Inc("important-tasks") 358 | } 359 | return process(v) 360 | }) 361 | 362 | // create and run pool 363 | p := pool.New[string](2, worker) 364 | p.Go(context.Background()) 365 | 366 | // process work 367 | p.Submit("task1") 368 | p.Submit("important-task2") 369 | p.Close(context.Background()) 370 | 371 | // get metrics 372 | metrics := p.Metrics() 373 | stats := metrics.GetStats() 374 | fmt.Printf("Processed: %d\n", stats.Processed) 375 | fmt.Printf("Errors: %d\n", stats.Errors) 376 | fmt.Printf("Processing time: %v\n", stats.ProcessingTime) 377 | fmt.Printf("Wait time: %v\n", stats.WaitTime) 378 | fmt.Printf("Total time: %v\n", stats.TotalTime) 379 | 380 | // get custom metrics 381 | fmt.Printf("Important tasks: %d\n", metrics.Get("important-tasks")) 382 | ``` 383 | 384 | ## Flow Control 385 | 386 | The package provides several methods for flow control and completion: 387 | 388 | ```go 389 | // Submit adds items to the pool. Not safe for concurrent use. 390 | // Used by the producer (sender) of data. 391 | p.Submit(item) 392 | 393 | // Send safely adds items to the pool from multiple goroutines. 394 | // Used when submitting from worker to another pool, or when multiple goroutines send data. 395 | p.Send(item) 396 | 397 | // Close tells workers no more data will be submitted. 398 | // Used by the producer (sender) of data. 399 | p.Close(ctx) 400 | 401 | // Wait blocks until all processing is done. 402 | // Used by the consumer (receiver) of results. 403 | p.Wait(ctx) 404 | ``` 405 | 406 | Common usage patterns: 407 | 408 | ```go 409 | // 1. Single producer submitting items 410 | go func() { 411 | defer p.Close(ctx) // signal no more data 412 | for _, task := range tasks { 413 | p.Submit(task) // Submit is safe here - single goroutine 414 | } 415 | }() 416 | 417 | // 2. Workers submitting to next stage 418 | p1 := pool.New[int](5, pool.WorkerFunc[int](func(ctx context.Context, v int) error { 419 | result := process(v) 420 | p2.Send(result) // Send is safe for concurrent calls from workers 421 | return nil 422 | })) 423 | 424 | // 3. Consumer waiting for completion 425 | if err := p.Wait(ctx); err != nil { 426 | // handle error 427 | } 428 | ``` 429 | 430 | Pool completion callback allows executing code when all workers are done: 431 | ```go 432 | p := pool.New[string](5, worker). 433 | WithPoolCompleteFn(func(ctx context.Context) error { 434 | // called once after all workers complete 435 | log.Println("all workers finished") 436 | return nil 437 | }) 438 | ``` 439 | 440 | The completion callback executes when: 441 | - All workers have completed processing 442 | - Errors occurred but pool continued (`WithContinueOnError()`) 443 | - Does not execute on context cancellation 444 | 445 | Important notes: 446 | - Use `Submit` when sending items from a single goroutine 447 | - Use `Send` when workers need to submit items to another pool 448 | - Pool completion callback helps coordinate multi-stage processing 449 | - Errors in completion callback are included in pool's error result 450 | 451 | ## Optional parameters 452 | 453 | Configure pool behavior using With methods: 454 | 455 | ```go 456 | p := pool.New[string](2, worker). // pool with 2 workers 457 | WithBatchSize(10). // process items in batches 458 | WithWorkerChanSize(5). // set worker channel buffer size 459 | WithChunkFn(chunkFn). // control work distribution 460 | WithContinueOnError(). // don't stop on errors 461 | WithCompleteFn(completeFn) // called when worker finishes 462 | ``` 463 | 464 | Available options: 465 | - `WithBatchSize(size int)` - enables batch processing, accumulating items before sending to workers (default: 10) 466 | - `WithWorkerChanSize(size int)` - sets buffer size for worker channels (default: 1) 467 | - `WithChunkFn(fn func(T) string)` - controls work distribution by key (default: none, random distribution) 468 | - `WithContinueOnError()` - continues processing on errors (default: false) 469 | - `WithWorkerCompleteFn(fn func(ctx, id, worker))` - called on worker completion (default: none) 470 | - `WithPoolCompleteFn(fn func(ctx))` - called on pool completion, i.e., when all workers have completed (default: none) 471 | 472 | ## Collector 473 | 474 | The Collector helps manage asynchronous results from pool workers in a synchronous way. It's particularly useful when you need to gather and process results from worker's processing. The Collector uses Go generics and is compatible with any result type. 475 | 476 | ### Features 477 | - Generic implementation supporting any result type 478 | - Context awareness with graceful cancellation 479 | - Buffered collection with configurable size 480 | - Built-in iterator pattern 481 | - Ability to collect all results at once 482 | 483 | ### Example Usage 484 | 485 | ```go 486 | // create a collector for results with buffer of 10 487 | collector := pool.NewCollector[string](ctx, 10) 488 | 489 | // worker submits results to collector 490 | worker := pool.WorkerFunc[int](func(ctx context.Context, v int) error { 491 | result := process(v) 492 | collector.Submit(result) 493 | return nil 494 | }) 495 | 496 | // create and run pool 497 | p := pool.New[int](5, worker) 498 | require.NoError(t, p.Go(ctx)) 499 | 500 | // submit items 501 | for i := 0; i < 100; i++ { 502 | p.Submit(i) 503 | } 504 | p.Close(ctx) 505 | 506 | // Option 1: process results as they arrive with iterator 507 | for result, err := range collector.Iter() { 508 | if err != nil { 509 | return err // context cancelled or other error 510 | } 511 | // process result 512 | } 513 | 514 | // Option 2: get all results at once 515 | results, err := collector.All() 516 | if err != nil { 517 | return err 518 | } 519 | // use results slice 520 | ``` 521 | 522 | ### API Reference 523 | 524 | ```go 525 | // create new collector 526 | collector := pool.NewCollector[ResultType](ctx, bufferSize) 527 | 528 | // submit result to collector 529 | collector.Submit(result) 530 | 531 | // close collector when done submitting 532 | collector.Close() 533 | 534 | // iterate over results 535 | for result, err := range collector.Iter() { 536 | // process result 537 | } 538 | 539 | // get all results 540 | results, err := collector.All() 541 | ``` 542 | 543 | ### Best Practices 544 | 545 | 1. **Buffer Size**: Choose based on expected throughput and memory constraints 546 | - Too small: may block workers 547 | - Too large: may use excessive memory 548 | 549 | 2. **Error Handling**: Always check error from iterator 550 | ```go 551 | for result, err := range collector.Iter() { 552 | if err != nil { 553 | // handle context cancellation 554 | return err 555 | } 556 | } 557 | ``` 558 | 559 | 3. **Context Usage**: Pass context that matches pool's lifecycle 560 | ```go 561 | collector := pool.NewCollector[Result](poolCtx, size) 562 | ``` 563 | 564 | 4. **Cleanup**: Close collector when done submitting 565 | ```go 566 | defer collector.Close() 567 | ``` 568 | 569 | ## Performance 570 | 571 | The pool package is designed for high performance and efficiency. Benchmarks show that it consistently outperforms both the standard `errgroup`-based approach and traditional goroutine patterns with shared channels. 572 | 573 | ### Benchmark Results 574 | 575 | Tests running 1,000,000 tasks with 8 workers on Apple M4 Max: 576 | 577 | ``` 578 | errgroup: 1.878s 579 | pool (default): 1.213s (~35% faster) 580 | pool (chan size=100): 1.199s 581 | pool (chan size=100, batch size=100): 1.105s (~41% faster) 582 | pool (with chunking): 1.113s 583 | ``` 584 | 585 | Detailed benchmark comparison (lower is better): 586 | ``` 587 | errgroup: 18.56ms/op 588 | pool (default): 12.29ms/op 589 | pool (chan size=100): 12.35ms/op 590 | pool (batch size=100): 11.22ms/op 591 | pool (with batching and chunking): 11.43ms/op 592 | ``` 593 | 594 | ### Why Pool is Faster 595 | 596 | 1. **Efficient Channel Usage** 597 | - The pool uses dedicated channels per worker when chunking is enabled 598 | - Default channel buffer size is optimized for common use cases 599 | - Minimizes channel contention compared to shared channel approaches 600 | 601 | 2. **Smart Batching** 602 | - Reduces channel communication overhead by processing multiple items at once 603 | - Default batch size of 10 provides good balance between latency and throughput 604 | - Accumulators pre-allocated with capacity to minimize memory allocations 605 | 606 | 3. **Work Distribution** 607 | - Optional chunking ensures related tasks go to the same worker 608 | - Improves cache locality and reduces cross-worker coordination 609 | - Hash-based distribution provides good load balancing 610 | 611 | 4. **Resource Management** 612 | - Workers are pre-initialized and reused 613 | - No per-task goroutine creation overhead 614 | - Efficient cleanup and resource handling 615 | 616 | ### Configuration Impact 617 | 618 | - **Default Settings**: Out of the box, the pool is ~35% faster than errgroup 619 | - **Channel Buffering**: Increasing channel size can help with bursty workloads 620 | - **Batching**: Adding batching improves performance by another ~6% 621 | - **Chunking**: Optional chunking has minimal overhead when enabled 622 | 623 | ### When to Use What 624 | 625 | 1. **Default Settings** - Good for most use cases 626 | ```go 627 | p := pool.New[string](5, worker) 628 | ``` 629 | 630 | 2. **High-Throughput** - For heavy workloads with many items 631 | ```go 632 | p := pool.New[string](5, worker). 633 | WithWorkerChanSize(100). 634 | WithBatchSize(100) 635 | ``` 636 | 637 | 3. **Related Items** - When items need to be processed by the same worker 638 | ```go 639 | p := pool.New[string](5, worker). 640 | WithChunkFn(func(v string) string { 641 | return v[:1] // group by first character 642 | }) 643 | ``` 644 | 645 | ### Alternative pool implementations 646 | 647 | - [pond](https://github.com/alitto/pond) - pond is a minimalistic and high-performance Go library designed to elegantly manage concurrent tasks. 648 | - [goworker](https://github.com/benmanns/goworker) - goworker is a Resque-compatible, Go-based background worker. It allows you to push jobs into a queue using an expressive language like Ruby while harnessing the efficiency and concurrency of Go to minimize job latency and cost. 649 | - [gowp](https://github.com/xxjwxc/gowp) - golang worker pool 650 | - [conc](https://github.com/sourcegraph/conc) - better structured concurrency for go 651 | - for more see [awesome-go goroutines](https://awesome-go.com/goroutines/) list 652 | 653 | ## Contributing 654 | 655 | Contributions to `pool` are welcome! Please submit a pull request or open an issue for any bugs or feature requests. 656 | 657 | ## License 658 | 659 | `pool` is available under the MIT license. See the [LICENSE](LICENSE) file for more info. -------------------------------------------------------------------------------- /bench_test.go: -------------------------------------------------------------------------------- 1 | package pool 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "runtime/pprof" 7 | "strconv" 8 | "sync/atomic" 9 | "testing" 10 | "time" 11 | 12 | "github.com/stretchr/testify/assert" 13 | "github.com/stretchr/testify/require" 14 | "golang.org/x/sync/errgroup" 15 | ) 16 | 17 | // benchTask is a somewhat realistic task that combines CPU work with memory allocation 18 | func benchTask(size int) []int { //nolint:unparam // size is used in the benchmark 19 | task := func(n int) int { // simulate some CPU work 20 | sum := 0 21 | for i := 0; i < n; i++ { 22 | sum += i 23 | } 24 | return sum 25 | } 26 | res := make([]int, 0, size) 27 | for i := 0; i < size; i++ { 28 | res = append(res, task(1)) 29 | } 30 | return res 31 | } 32 | 33 | func TestPoolPerf(t *testing.T) { 34 | n := 1000 35 | ctx := context.Background() 36 | 37 | var egDuration time.Duration 38 | t.Run("errgroup", func(t *testing.T) { 39 | var count2 int32 40 | st := time.Now() 41 | defer func() { 42 | egDuration = time.Since(st) 43 | t.Logf("elapsed errgroup: %v", time.Since(st)) 44 | }() 45 | g, _ := errgroup.WithContext(ctx) 46 | g.SetLimit(8) 47 | for i := 0; i < 1000000; i++ { 48 | g.Go(func() error { 49 | benchTask(n) 50 | atomic.AddInt32(&count2, 1) 51 | return nil 52 | }) 53 | } 54 | require.NoError(t, g.Wait()) 55 | assert.Equal(t, int32(1000000), atomic.LoadInt32(&count2)) 56 | }) 57 | 58 | t.Run("pool default", func(t *testing.T) { 59 | // pool with 8 workers 60 | var count1 int32 61 | worker := WorkerFunc[int](func(context.Context, int) error { 62 | benchTask(n) 63 | atomic.AddInt32(&count1, 1) 64 | return nil 65 | }) 66 | 67 | st := time.Now() 68 | p := New[int](8, worker) 69 | require.NoError(t, p.Go(ctx)) 70 | go func() { 71 | for i := 0; i < 1000000; i++ { 72 | p.Submit(i) 73 | } 74 | assert.NoError(t, p.Close(ctx)) 75 | }() 76 | require.NoError(t, p.Wait(ctx)) 77 | assert.Equal(t, int32(1000000), atomic.LoadInt32(&count1)) 78 | t.Logf("elapsed pool: %v", time.Since(st)) 79 | assert.Less(t, time.Since(st), egDuration) 80 | }) 81 | 82 | t.Run("pool with 100 chan size", func(t *testing.T) { 83 | // pool with 8 workers 84 | var count1 int32 85 | worker := WorkerFunc[int](func(context.Context, int) error { 86 | benchTask(n) 87 | atomic.AddInt32(&count1, 1) 88 | return nil 89 | }) 90 | 91 | st := time.Now() 92 | p := New[int](8, worker).WithWorkerChanSize(100) 93 | require.NoError(t, p.Go(ctx)) 94 | go func() { 95 | for i := 0; i < 1000000; i++ { 96 | p.Submit(i) 97 | } 98 | assert.NoError(t, p.Close(ctx)) 99 | }() 100 | require.NoError(t, p.Wait(ctx)) 101 | assert.Equal(t, int32(1000000), atomic.LoadInt32(&count1)) 102 | t.Logf("elapsed pool: %v", time.Since(st)) 103 | assert.Less(t, time.Since(st), egDuration) 104 | }) 105 | 106 | t.Run("pool with 100 chan size and 100 batch size", func(t *testing.T) { 107 | // pool with 8 workers 108 | var count1 int32 109 | worker := WorkerFunc[int](func(context.Context, int) error { 110 | benchTask(n) 111 | atomic.AddInt32(&count1, 1) 112 | return nil 113 | }) 114 | 115 | st := time.Now() 116 | p := New[int](8, worker).WithWorkerChanSize(100).WithBatchSize(100) 117 | require.NoError(t, p.Go(ctx)) 118 | go func() { 119 | for i := 0; i < 1000000; i++ { 120 | p.Submit(i) 121 | } 122 | assert.NoError(t, p.Close(ctx)) 123 | }() 124 | require.NoError(t, p.Wait(ctx)) 125 | assert.Equal(t, int32(1000000), atomic.LoadInt32(&count1)) 126 | t.Logf("elapsed pool: %v", time.Since(st)) 127 | assert.Less(t, time.Since(st), egDuration) 128 | }) 129 | 130 | t.Run("pool with 100 chan size and 100 batch size and chunking", func(t *testing.T) { 131 | // pool with 8 workers 132 | var count1 int32 133 | worker := WorkerFunc[int](func(context.Context, int) error { 134 | benchTask(n) 135 | atomic.AddInt32(&count1, 1) 136 | return nil 137 | }) 138 | 139 | st := time.Now() 140 | p := New[int](8, worker).WithWorkerChanSize(100).WithBatchSize(100).WithChunkFn(func(v int) string { 141 | return strconv.Itoa(v % 8) // distribute by modulo 142 | }) 143 | require.NoError(t, p.Go(ctx)) 144 | go func() { 145 | for i := 0; i < 1000000; i++ { 146 | p.Submit(i) 147 | } 148 | assert.NoError(t, p.Close(ctx)) 149 | }() 150 | require.NoError(t, p.Wait(ctx)) 151 | assert.Equal(t, int32(1000000), atomic.LoadInt32(&count1)) 152 | t.Logf("elapsed pool: %v", time.Since(st)) 153 | assert.Less(t, time.Since(st), egDuration) 154 | }) 155 | 156 | } 157 | 158 | func BenchmarkPoolCompare(b *testing.B) { 159 | ctx := context.Background() 160 | iterations := 10000 161 | workers := 8 162 | n := 1000 163 | 164 | b.Run("errgroup", func(b *testing.B) { 165 | b.ResetTimer() 166 | for i := 0; i < b.N; i++ { 167 | var count int32 168 | g, _ := errgroup.WithContext(ctx) 169 | g.SetLimit(workers) 170 | 171 | for j := 0; j < iterations; j++ { 172 | g.Go(func() error { 173 | benchTask(n) 174 | atomic.AddInt32(&count, 1) 175 | return nil 176 | }) 177 | } 178 | require.NoError(b, g.Wait()) 179 | require.Equal(b, int32(iterations), atomic.LoadInt32(&count)) 180 | } 181 | }) 182 | 183 | b.Run("pool default", func(b *testing.B) { 184 | b.ResetTimer() 185 | for i := 0; i < b.N; i++ { 186 | var count int32 187 | p := New[int](workers, WorkerFunc[int](func(context.Context, int) error { 188 | benchTask(n) 189 | atomic.AddInt32(&count, 1) 190 | return nil 191 | })) 192 | 193 | require.NoError(b, p.Go(ctx)) 194 | go func() { 195 | for j := 0; j < iterations; j++ { 196 | p.Submit(j) 197 | } 198 | p.Close(ctx) 199 | }() 200 | require.NoError(b, p.Wait(ctx)) 201 | require.Equal(b, int32(iterations), atomic.LoadInt32(&count)) 202 | } 203 | }) 204 | 205 | b.Run("pool with chan=100", func(b *testing.B) { 206 | b.ResetTimer() 207 | for i := 0; i < b.N; i++ { 208 | var count int32 209 | p := New[int](workers, WorkerFunc[int](func(context.Context, int) error { 210 | benchTask(n) 211 | atomic.AddInt32(&count, 1) 212 | return nil 213 | })).WithWorkerChanSize(100) 214 | 215 | require.NoError(b, p.Go(ctx)) 216 | go func() { 217 | for j := 0; j < iterations; j++ { 218 | p.Submit(j) 219 | } 220 | p.Close(ctx) 221 | }() 222 | require.NoError(b, p.Wait(ctx)) 223 | require.Equal(b, int32(iterations), atomic.LoadInt32(&count)) 224 | } 225 | }) 226 | 227 | b.Run("pool with batching", func(b *testing.B) { 228 | b.ResetTimer() 229 | for i := 0; i < b.N; i++ { 230 | var count int32 231 | p := New[int](workers, WorkerFunc[int](func(context.Context, int) error { 232 | benchTask(n) 233 | atomic.AddInt32(&count, 1) 234 | return nil 235 | })).WithWorkerChanSize(100).WithBatchSize(100) 236 | 237 | require.NoError(b, p.Go(ctx)) 238 | go func() { 239 | for j := 0; j < iterations; j++ { 240 | p.Submit(j) 241 | } 242 | p.Close(ctx) 243 | }() 244 | require.NoError(b, p.Wait(ctx)) 245 | require.Equal(b, int32(iterations), atomic.LoadInt32(&count)) 246 | } 247 | }) 248 | 249 | b.Run("pool with batching and chunking", func(b *testing.B) { 250 | b.ResetTimer() 251 | for i := 0; i < b.N; i++ { 252 | var count int32 253 | p := New[int](workers, WorkerFunc[int](func(context.Context, int) error { 254 | benchTask(n) 255 | atomic.AddInt32(&count, 1) 256 | return nil 257 | })).WithWorkerChanSize(100).WithBatchSize(100).WithChunkFn(func(v int) string { 258 | return strconv.Itoa(v % workers) 259 | }) 260 | 261 | require.NoError(b, p.Go(ctx)) 262 | go func() { 263 | for j := 0; j < iterations; j++ { 264 | p.Submit(j) 265 | } 266 | p.Close(ctx) 267 | }() 268 | require.NoError(b, p.Wait(ctx)) 269 | require.Equal(b, int32(iterations), atomic.LoadInt32(&count)) 270 | } 271 | }) 272 | } 273 | 274 | func TestPoolWithProfiling(t *testing.T) { 275 | // run only if env PROFILING is set 276 | if os.Getenv("PROFILING") == "" { 277 | t.Skip("skipping profiling test; set PROFILING to run") 278 | } 279 | 280 | // start CPU profile 281 | cpuFile, err := os.Create("cpu.prof") 282 | require.NoError(t, err) 283 | defer cpuFile.Close() 284 | require.NoError(t, pprof.StartCPUProfile(cpuFile)) 285 | defer pprof.StopCPUProfile() 286 | 287 | // create memory profile 288 | memFile, err := os.Create("mem.prof") 289 | require.NoError(t, err) 290 | defer memFile.Close() 291 | 292 | // run pool test 293 | iterations := 100000 294 | ctx := context.Background() 295 | worker := WorkerFunc[int](func(context.Context, int) error { 296 | benchTask(30000) 297 | return nil 298 | }) 299 | 300 | // test pool implementation 301 | p := New[int](4, worker).WithWorkerChanSize(100) 302 | require.NoError(t, p.Go(ctx)) 303 | 304 | done := make(chan struct{}) 305 | go func() { 306 | for i := 0; i < iterations; i++ { 307 | p.Submit(i) 308 | } 309 | p.Close(ctx) 310 | close(done) 311 | }() 312 | 313 | select { 314 | case <-done: 315 | case <-time.After(5 * time.Second): 316 | t.Fatal("timeout") 317 | } 318 | 319 | // create memory profile after test 320 | require.NoError(t, pprof.WriteHeapProfile(memFile)) 321 | } 322 | -------------------------------------------------------------------------------- /collector.go: -------------------------------------------------------------------------------- 1 | package pool 2 | 3 | import ( 4 | "context" 5 | "iter" 6 | ) 7 | 8 | // Collector provides synchronous access to async data from pool's response channel 9 | type Collector[V any] struct { 10 | ch chan V 11 | ctx context.Context 12 | } 13 | 14 | // NewCollector creates a new collector with a given context and buffer size for the channel 15 | func NewCollector[V any](ctx context.Context, size int) *Collector[V] { 16 | return &Collector[V]{ 17 | ch: make(chan V, size), 18 | ctx: ctx, 19 | } 20 | } 21 | 22 | // Submit sends a value to the collector 23 | func (c *Collector[V]) Submit(v V) { 24 | c.ch <- v 25 | } 26 | 27 | // Close closes the collector 28 | func (c *Collector[V]) Close() { 29 | close(c.ch) 30 | } 31 | 32 | // Iter returns an iterator over collector values 33 | func (c *Collector[V]) Iter() iter.Seq2[V, error] { 34 | return func(yield func(V, error) bool) { 35 | for { 36 | select { 37 | case v, ok := <-c.ch: 38 | if !ok { 39 | return 40 | } 41 | if !yield(v, nil) { 42 | return 43 | } 44 | case <-c.ctx.Done(): 45 | var zero V 46 | yield(zero, c.ctx.Err()) 47 | return 48 | } 49 | } 50 | } 51 | } 52 | 53 | // All gets all data from the collector 54 | func (c *Collector[V]) All() (res []V, err error) { 55 | for v, err := range c.Iter() { 56 | if err != nil { 57 | return res, err 58 | } 59 | res = append(res, v) 60 | } 61 | return res, nil 62 | } 63 | -------------------------------------------------------------------------------- /collector_test.go: -------------------------------------------------------------------------------- 1 | package pool 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestCollector_Basic(t *testing.T) { 12 | ctx := context.Background() 13 | c := NewCollector[string](ctx, 5) 14 | 15 | go func() { 16 | c.Submit("test1") 17 | c.Submit("test2") 18 | c.Submit("test3") 19 | c.Close() 20 | }() 21 | 22 | var values []string 23 | var lastErr error 24 | for v, err := range c.Iter() { 25 | if err != nil { 26 | lastErr = err 27 | break 28 | } 29 | values = append(values, v) 30 | } 31 | 32 | require.NoError(t, lastErr) 33 | require.Equal(t, []string{"test1", "test2", "test3"}, values) 34 | } 35 | 36 | func TestCollector_ContextCancellation(t *testing.T) { 37 | ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) 38 | defer cancel() 39 | 40 | c := NewCollector[int](ctx, 5) 41 | 42 | go func() { 43 | c.Submit(1) 44 | time.Sleep(time.Second) // simulate slow producer 45 | c.Submit(2) 46 | c.Close() 47 | }() 48 | 49 | var values []int 50 | var lastErr error 51 | for v, err := range c.Iter() { 52 | if err != nil { 53 | lastErr = err 54 | break 55 | } 56 | values = append(values, v) 57 | } 58 | 59 | require.ErrorIs(t, lastErr, context.DeadlineExceeded) 60 | require.Equal(t, []int{1}, values) 61 | } 62 | 63 | func TestCollector_All(t *testing.T) { 64 | ctx := context.Background() 65 | c := NewCollector[int](ctx, 5) 66 | 67 | go func() { 68 | for i := range 3 { 69 | c.Submit(i) 70 | } 71 | c.Close() 72 | }() 73 | 74 | values, err := c.All() 75 | require.NoError(t, err) 76 | require.Equal(t, []int{0, 1, 2}, values) 77 | } 78 | 79 | func TestCollector_All_WithError(t *testing.T) { 80 | ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) 81 | defer cancel() 82 | 83 | c := NewCollector[int](ctx, 5) 84 | 85 | go func() { 86 | c.Submit(1) 87 | time.Sleep(time.Second) 88 | c.Submit(2) 89 | c.Close() 90 | }() 91 | 92 | values, err := c.All() 93 | require.ErrorIs(t, err, context.DeadlineExceeded) 94 | require.Equal(t, []int{1}, values) 95 | } 96 | 97 | func TestCollector_Multiple(t *testing.T) { 98 | ctx := context.Background() 99 | c1 := NewCollector[string](ctx, 5) 100 | c2 := NewCollector[string](ctx, 5) 101 | 102 | go func() { 103 | c1.Submit("c1-1") 104 | c1.Submit("c1-2") 105 | c1.Close() 106 | }() 107 | 108 | go func() { 109 | c2.Submit("c2-1") 110 | c2.Submit("c2-2") 111 | c2.Close() 112 | }() 113 | 114 | v1, err := c1.All() 115 | require.NoError(t, err) 116 | require.Equal(t, []string{"c1-1", "c1-2"}, v1) 117 | 118 | v2, err := c2.All() 119 | require.NoError(t, err) 120 | require.Equal(t, []string{"c2-1", "c2-2"}, v2) 121 | } 122 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Package pool provides a simple worker pool implementation with a single stage only. 2 | // It allows submitting tasks to be processed in parallel by a number of workers. 3 | // 4 | // The package supports both stateless and stateful workers through two distinct constructors: 5 | // - New - for pools with a single shared worker instance 6 | // - NewStateful - for pools where each goroutine gets its own worker instance 7 | // 8 | // Worker Types: 9 | // 10 | // The package provides a simple Worker interface that can be implemented in two ways: 11 | // 12 | // type Worker[T any] interface { 13 | // Do(ctx context.Context, v T) error 14 | // } 15 | // 16 | // 1. Direct implementation for complex stateful workers: 17 | // 18 | // type dbWorker struct { 19 | // conn *sql.DB 20 | // } 21 | // 22 | // func (w *dbWorker) Do(ctx context.Context, v string) error { 23 | // return w.conn.ExecContext(ctx, "INSERT INTO items (value) VALUES (?)", v) 24 | // } 25 | // 26 | // 2. Function adapter for simple stateless workers: 27 | // 28 | // worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error { 29 | // // process the value 30 | // return nil 31 | // }) 32 | // 33 | // Basic Usage: 34 | // 35 | // For stateless operations (like HTTP requests, parsing operations, etc.): 36 | // 37 | // worker := pool.WorkerFunc[string](func(ctx context.Context, v string) error { 38 | // resp, err := http.Get(v) 39 | // if err != nil { 40 | // return err 41 | // } 42 | // defer resp.Body.Close() 43 | // return nil 44 | // }) 45 | // 46 | // p := pool.New[string](2, worker) 47 | // if err := p.Go(context.Background()); err != nil { 48 | // return err 49 | // } 50 | // 51 | // // submit work 52 | // p.Submit("task1") 53 | // p.Submit("task2") 54 | // 55 | // if err := p.Close(context.Background()); err != nil { 56 | // return err 57 | // } 58 | // 59 | // For stateful operations (like database connections, file handles, etc.): 60 | // 61 | // maker := func() pool.Worker[string] { 62 | // return &dbWorker{ 63 | // conn: openConnection(), 64 | // } 65 | // } 66 | // p := pool.NewStateful[string](2, maker) 67 | // 68 | // Features: 69 | // 70 | // - Generic worker pool implementation supporting any data type 71 | // - Configurable number of workers running in parallel 72 | // - Support for both stateless shared workers and per-worker instances 73 | // - Batching capability for processing multiple items at once 74 | // - Customizable work distribution through chunk functions 75 | // - Built-in metrics collection including processing times and counts 76 | // - Error handling with options to continue or stop on errors 77 | // - Context-based cancellation and timeouts 78 | // - Optional completion callbacks 79 | // 80 | // Advanced Features: 81 | // 82 | // Batching: 83 | // 84 | // p := New[string](2, worker).WithBatchSize(10) 85 | // 86 | // Chunked distribution: 87 | // 88 | // p := New[string](2, worker).WithChunkFn(func(v string) string { 89 | // return v // items with same hash go to same worker 90 | // }) 91 | // 92 | // Error handling: 93 | // 94 | // p := New[string](2, worker).WithContinueOnError() 95 | // 96 | // Metrics: 97 | // 98 | // The pool automatically tracks standard stats metrics (processed counts, errors, timings). 99 | // Workers can also record additional custom metrics: 100 | // 101 | // m := metrics.Get(ctx) 102 | // m.Inc("custom-counter") 103 | // 104 | // Access metrics: 105 | // 106 | // metrics := p.Metrics() 107 | // value := metrics.Get("custom-counter") 108 | // 109 | // Statistical metrics including: 110 | // 111 | // - Number of processed items 112 | // - Number of errors 113 | // - Number of dropped items 114 | // - Processing time 115 | // - Wait time 116 | // - Initialization time 117 | // - Total time 118 | // 119 | // Access stats: 120 | // 121 | // metrics := p.Metrics() 122 | // stats := metrics.GetStats() 123 | // fmt.Printf("processed: %d, errors: %d", stats.Processed, stats.Errors) 124 | // 125 | // Data Collection: 126 | // 127 | // For collecting results from workers, use the Collector: 128 | // 129 | // collector := pool.NewCollector[Result](ctx, 10) 130 | // worker := pool.WorkerFunc[Input](func(ctx context.Context, v Input) error { 131 | // result := process(v) 132 | // collector.Submit(result) 133 | // return nil 134 | // }) 135 | // 136 | // Results can be retrieved either through iteration: 137 | // 138 | // for v, err := range collector.Iter() { 139 | // if err != nil { 140 | // return err 141 | // } 142 | // // use v 143 | // } 144 | // 145 | // Or by collecting all at once: 146 | // 147 | // results, err := collector.All() 148 | // 149 | // Middleware Support: 150 | // 151 | // The pool supports middleware pattern similar to HTTP middleware in Go. Middleware can be used 152 | // to add functionality like retries, timeouts, metrics, or error handling: 153 | // 154 | // // retry middleware 155 | // retryMiddleware := func(next Worker[string]) Worker[string] { 156 | // return WorkerFunc[string](func(ctx context.Context, v string) error { 157 | // var lastErr error 158 | // for i := 0; i < 3; i++ { 159 | // if err := next.Do(ctx, v); err == nil { 160 | // return nil 161 | // } else { 162 | // lastErr = err 163 | // } 164 | // time.Sleep(time.Second * time.Duration(i)) 165 | // } 166 | // return fmt.Errorf("failed after 3 attempts: %w", lastErr) 167 | // }) 168 | // } 169 | // 170 | // p := New[string](2, worker).Use(retryMiddleware) 171 | // 172 | // Multiple middleware can be chained, and they execute in the same order as provided: 173 | // 174 | // p.Use(logging, metrics, retry) // executes: logging -> metrics -> retry -> worker 175 | package pool 176 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | This directory contains examples demonstrating various aspects of the [go-pkgz/pool](https://github.com/go-pkgz/pool) package. 4 | 5 | **Important Note:** These examples are intentionally minimalistic and somewhat artificial. They may not represent how one would solve similar problems in real-life applications. Instead, they focus on clearly demonstrating specific features and usage patterns of the pool package. 6 | 7 | ## Available Examples 8 | 9 | ### [tokenizer_stateful](./tokenizer_stateful) 10 | Shows how to use stateful workers where each worker maintains its own independent state (word frequency counters). Demonstrates: 11 | - Worker state isolation 12 | - Result collection through completion callbacks 13 | - Performance statistics tracking 14 | 15 | ### [tokenizer_stateless](./tokenizer_stateless) 16 | Implements the same text processing but using stateless workers with shared collector. Demonstrates: 17 | - Simple worker functions 18 | - Shared result collection 19 | - Batch processing 20 | 21 | ### [parallel_files](./parallel_files) 22 | Shows how to process multiple files in parallel using chunks. Demonstrates: 23 | - Chunk-based file processing 24 | - Custom metrics collection 25 | - Work distribution across workers 26 | 27 | ### [middleware](./middleware) 28 | Shows how to use middleware to add cross-cutting functionality to pool processing. Demonstrates: 29 | - Built-in and custom middleware 30 | - Error handling with retries 31 | - Input validation 32 | - Structured logging 33 | - Recovery from panics 34 | 35 | ### [direct_chain](./direct_chain) 36 | Shows how to chain multiple worker pools by having workers directly submit to the next pool. Demonstrates: 37 | - Multi-stage processing pipeline 38 | - Direct pool submission between stages 39 | - Type transformation 40 | - Pool coordination 41 | 42 | ### [collectors_chain](./collectors_chain) 43 | Shows how to chain multiple worker pools using collectors. Demonstrates: 44 | - Multi-stage processing pipeline 45 | - Type-safe data transformation 46 | - Automatic coordination via iterators 47 | - Independent pool scaling 48 | 49 | ### [collector_errors](./collector_errors) 50 | Shows how to handle and categorize errors in parallel processing. Demonstrates: 51 | - Error collection pattern 52 | - Error categorization and grouping 53 | - Timing information tracking 54 | - Statistical reporting on errors 55 | 56 | ## Running Examples 57 | 58 | Each example can be run from its directory: 59 | ```bash 60 | cd tokenizer_stateful 61 | go run main.go -file input.txt 62 | 63 | cd ../tokenizer_stateless 64 | go run main.go -file input.txt 65 | 66 | cd ../parallel_files 67 | go run main.go -pattern "*.txt" 68 | 69 | cd ../middleware 70 | go run main.go -workers 4 -retries 3 71 | 72 | cd ../direct_chain 73 | go run main.go 74 | 75 | cd ../collectors_chain 76 | go run main.go 77 | 78 | cd ../collector_errors 79 | go run main.go -workers 8 -jobs 100 -error-rate 0.3 80 | ``` 81 | 82 | ## Common Patterns 83 | 84 | While the examples are simplified, they showcase important pool package features: 85 | - Worker state management (stateful vs stateless) 86 | - Result collection strategies 87 | - Error handling approaches 88 | - Metrics and monitoring 89 | - Work distribution patterns 90 | - Middleware integration 91 | - Multi-stage processing pipelines -------------------------------------------------------------------------------- /examples/collector_errors/README.md: -------------------------------------------------------------------------------- 1 | # Error Collection and Handling Example 2 | 3 | This example demonstrates how to effectively handle and categorize errors in parallel processing using the [go-pkgz/pool](https://github.com/go-pkgz/pool) package. It shows a pattern for collecting, tracking, and analyzing errors that occur during concurrent task execution. 4 | 5 | ## What Makes it Special? 6 | 7 | 1. Error collection pattern: 8 | - Collects both successes and failures 9 | - Preserves error context and timing information 10 | - Allows post-processing analysis of error patterns 11 | - Continues processing despite errors 12 | 13 | 2. Error classification: 14 | - Groups errors by type for easier analysis 15 | - Tracks when and where errors occurred 16 | - Maintains job context with each error 17 | - Provides statistical insights on error distribution 18 | 19 | 3. Result aggregation: 20 | - Separates successes from failures 21 | - Calculates performance metrics by result type 22 | - Shows error distribution patterns 23 | - Provides comprehensive error reporting 24 | 25 | ## Features 26 | 27 | - Parallel job processing with configurable worker count 28 | - Configurable error rate for testing failure scenarios 29 | - Detailed error categorization and reporting 30 | - Timing information for both successful and failed jobs 31 | - Comprehensive statistics on processing performance 32 | - Graceful handling of context cancellation 33 | 34 | ## Installation 35 | 36 | ```bash 37 | go build 38 | ``` 39 | 40 | ## Usage 41 | 42 | ```bash 43 | go run main.go [options] 44 | ``` 45 | 46 | Options: 47 | - `-workers` - number of worker goroutines (default: 4) 48 | - `-jobs` - number of jobs to process (default: 20) 49 | - `-error-rate` - probability of job failure from 0 to 1 (default: 0.3) 50 | - `-timeout` - timeout for the entire operation (default: 10s) 51 | - `-verbose` - enable detailed logging (default: false) 52 | 53 | Example: 54 | ```bash 55 | go run main.go -workers 8 -jobs 100 -error-rate 0.4 -timeout 30s 56 | ``` 57 | 58 | ## Implementation Details 59 | 60 | The example demonstrates several key patterns: 61 | 62 | 1. Result type definition: 63 | ```go 64 | type Result struct { 65 | JobID string // ID of the job 66 | Success bool // whether the job succeeded 67 | Error error // error if job failed 68 | Timestamp time.Time // when the job completed 69 | Duration time.Duration // how long the job took 70 | } 71 | ``` 72 | 73 | 2. Error collection in workers: 74 | ```go 75 | if rand.Float64() < errorRate { 76 | err := errors.New("operation failed") 77 | collector.Submit(Result{ 78 | JobID: jobID, 79 | Success: false, 80 | Error: err, 81 | Timestamp: time.Now(), 82 | Duration: duration, 83 | }) 84 | return err // return error so pool metrics track it 85 | } 86 | ``` 87 | 88 | 3. Error categorization: 89 | ```go 90 | // group errors by type 91 | errorsByType := make(map[string][]Result) 92 | for _, result := range failures { 93 | errType := errorTypeString(result.Error) 94 | errorsByType[errType] = append(errorsByType[errType], result) 95 | } 96 | ``` 97 | 98 | ## Output Example 99 | 100 | ``` 101 | Processing summary: 102 | Total jobs: 100 103 | Results collected: 100 104 | Successful jobs: 61 105 | Failed jobs: 39 106 | Processing time: 218ms 107 | Total time: 223ms 108 | Avg success time: 104ms 109 | Avg failure time: 106ms 110 | 111 | Error details: 112 | • database connection failed (12 occurrences): 113 | - job-005 (at 15:04:02.123, took 115ms) 114 | - job-012 (at 15:04:02.247, took 98ms) 115 | - ... 116 | 117 | • validation failed (14 occurrences): 118 | - job-003 (at 15:04:02.089, took 103ms) 119 | - job-007 (at 15:04:02.187, took 112ms) 120 | - ... 121 | 122 | • timeout exceeded (13 occurrences): 123 | - job-001 (at 15:04:02.042, took 95ms) 124 | - job-014 (at 15:04:02.301, took 106ms) 125 | - ... 126 | ``` 127 | 128 | ## Architecture 129 | 130 | The program follows this architecture: 131 | 132 | ``` 133 | Job Submission → Worker Pool → Result Collector → Error Analyzer 134 | (main goroutine) (N workers) (buffer channel) (main goroutine) 135 | submits jobs processes jobs collects results categorizes errors 136 | with random from workers generates reports 137 | success/failure calculates statistics 138 | ``` 139 | 140 | Key components: 141 | - Pool with configurable worker count 142 | - Collector for gathering both successes and failures 143 | - Type-safe error collection through `Result` type 144 | - Error categorization by error message 145 | - Statistical processing of successes vs failures 146 | 147 | ## Real-World Applications 148 | 149 | This pattern is useful for: 150 | - ETL processes that need to track failed records 151 | - API clients that need to analyze error patterns 152 | - Batch processing systems that need error reporting 153 | - Monitoring systems that track error rates 154 | - System health checks with error categorization 155 | - Performance testing with error simulation 156 | 157 | ## Notes 158 | 159 | - The example uses simulated random errors with configurable rate 160 | - Error types are categorized by message prefix for simplicity 161 | - In real applications, you might want to use typed errors or error codes 162 | - The pattern works well with both stateless and stateful workers 163 | - This approach provides much richer error information than simple error counts -------------------------------------------------------------------------------- /examples/collector_errors/go.mod: -------------------------------------------------------------------------------- 1 | module examples/collector_errors 2 | 3 | go 1.24 4 | 5 | require github.com/go-pkgz/pool v0.5.0 6 | 7 | require golang.org/x/sync v0.11.0 // indirect 8 | 9 | replace github.com/go-pkgz/pool => ../.. 10 | -------------------------------------------------------------------------------- /examples/collector_errors/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 6 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 7 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= 8 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 9 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 10 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 11 | -------------------------------------------------------------------------------- /examples/collector_errors/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "flag" 7 | "fmt" 8 | "math/rand" 9 | "os" 10 | "sort" 11 | "strings" 12 | "time" 13 | 14 | "github.com/go-pkgz/pool" 15 | ) 16 | 17 | // Result represents either a success or an error from processing 18 | type Result struct { 19 | JobID string // ID of the job 20 | Success bool // whether the job succeeded 21 | Error error // error if job failed 22 | Timestamp time.Time // when the job completed 23 | Duration time.Duration // how long the job took 24 | } 25 | 26 | func main() { 27 | // parse command line arguments 28 | workers := flag.Int("workers", 4, "number of workers") 29 | jobs := flag.Int("jobs", 20, "number of jobs to process") 30 | errorRate := flag.Float64("error-rate", 0.3, "probability of job failure (0-1)") 31 | timeout := flag.Duration("timeout", 10*time.Second, "timeout for the entire operation") 32 | verbose := flag.Bool("verbose", false, "verbose output") 33 | flag.Parse() 34 | 35 | // create context with timeout 36 | ctx, cancel := context.WithTimeout(context.Background(), *timeout) 37 | defer cancel() 38 | 39 | // create a collector for results, both successes and errors 40 | collector := pool.NewCollector[Result](ctx, 100) 41 | 42 | workerFunc := worker(workerParam{collector: collector, verbose: *verbose, errorRate: *errorRate}) 43 | p := pool.New[string](*workers, pool.WorkerFunc[string](workerFunc)).WithContinueOnError().WithBatchSize(5) 44 | 45 | // start the pool 46 | if err := p.Go(ctx); err != nil { 47 | fmt.Printf("Failed to start pool: %v\n", err) 48 | os.Exit(1) 49 | } 50 | 51 | // submit jobs in the background, this is usually done in a separate goroutine 52 | go func() { 53 | for i := 0; i < *jobs; i++ { 54 | jobID := fmt.Sprintf("job-%03d", i+1) 55 | p.Submit(jobID) 56 | } 57 | // close pool to signal all jobs have been submitted 58 | if err := p.Close(ctx); err != nil && *verbose { 59 | fmt.Printf("Pool closed with error: %v\n", err) 60 | } 61 | }() 62 | 63 | go func() { 64 | // wait for all jobs to finish 65 | err := p.Wait(ctx) // wait for all jobs to finish 66 | if err != nil && *verbose { 67 | fmt.Printf("Pool wait error: %v\n", err) 68 | } 69 | collector.Close() // close collector to signal all results has been submitted 70 | }() 71 | 72 | // collect results using the collector's Iter method 73 | var results []Result 74 | for result, err := range collector.Iter() { 75 | if err != nil { 76 | fmt.Printf("Collector error: %v\n", err) 77 | break 78 | } 79 | results = append(results, result) 80 | } 81 | 82 | // separate successes and errors 83 | var successes, failures []Result 84 | for _, result := range results { 85 | if result.Success { 86 | successes = append(successes, result) 87 | } else { 88 | failures = append(failures, result) 89 | } 90 | } 91 | 92 | // print processing summary 93 | stats := p.Metrics().GetStats() 94 | fmt.Printf("\nProcessing summary:\n") 95 | fmt.Printf("Total jobs: %d\n", *jobs) 96 | fmt.Printf("Results collected: %d\n", len(results)) 97 | fmt.Printf("Successful jobs: %d\n", len(successes)) 98 | fmt.Printf("Failed jobs: %d\n", len(failures)) 99 | fmt.Printf("Processing time: %v\n", stats.ProcessingTime.Round(time.Millisecond)) 100 | fmt.Printf("Total time: %v\n", stats.TotalTime.Round(time.Millisecond)) 101 | 102 | // calculate average duration for successes and failures 103 | var totalSuccessDuration, totalFailureDuration time.Duration 104 | for _, s := range successes { 105 | totalSuccessDuration += s.Duration 106 | } 107 | for _, f := range failures { 108 | totalFailureDuration += f.Duration 109 | } 110 | 111 | if len(successes) > 0 { 112 | fmt.Printf("Avg success time: %v\n", (totalSuccessDuration / time.Duration(len(successes))).Round(time.Millisecond)) 113 | } 114 | if len(failures) > 0 { 115 | fmt.Printf("Avg failure time: %v\n", (totalFailureDuration / time.Duration(len(failures))).Round(time.Millisecond)) 116 | } 117 | 118 | if len(failures) > 0 { 119 | fmt.Printf("\nError details:\n") 120 | 121 | // group errors by type 122 | errorsByType := make(map[string][]Result) 123 | for _, result := range failures { 124 | errType := errorTypeString(result.Error) 125 | errorsByType[errType] = append(errorsByType[errType], result) 126 | } 127 | 128 | // print grouped errors 129 | errorTypes := make([]string, 0, len(errorsByType)) 130 | for errType := range errorsByType { 131 | errorTypes = append(errorTypes, errType) 132 | } 133 | sort.Strings(errorTypes) 134 | 135 | for _, errType := range errorTypes { 136 | results := errorsByType[errType] 137 | fmt.Printf("\n• %s (%d occurrences):\n", errType, len(results)) 138 | 139 | // sort results by timestamp 140 | sort.Slice(results, func(i, j int) bool { 141 | return results[i].Timestamp.Before(results[j].Timestamp) 142 | }) 143 | 144 | for _, result := range results { 145 | fmt.Printf(" - %s (at %s, took %v)\n", 146 | result.JobID, 147 | result.Timestamp.Format("15:04:05.000"), 148 | result.Duration.Round(time.Millisecond)) 149 | } 150 | } 151 | } 152 | } 153 | 154 | type workerParam struct { 155 | verbose bool 156 | errorRate float64 157 | collector *pool.Collector[Result] 158 | } 159 | 160 | func worker(p workerParam) func(ctx context.Context, jobID string) error { 161 | return func(ctx context.Context, jobID string) error { 162 | start := time.Now() 163 | 164 | // simulate processing time 165 | processingTime := time.Duration(50+rand.Intn(150)) * time.Millisecond 166 | 167 | if p.verbose { 168 | fmt.Printf("Processing %s (will take %v)...\n", jobID, processingTime) 169 | } 170 | 171 | // simulate work 172 | select { 173 | case <-time.After(processingTime): 174 | duration := time.Since(start) 175 | 176 | // randomly generate error based on error rate 177 | if rand.Float64() < p.errorRate { 178 | // choose a random error type 179 | var err error 180 | switch rand.Intn(3) { 181 | case 0: 182 | err = errors.New("validation failed") 183 | case 1: 184 | err = errors.New("database connection failed") 185 | case 2: 186 | err = errors.New("timeout exceeded") 187 | } 188 | 189 | if p.verbose { 190 | fmt.Printf("❌ %s failed: %v\n", jobID, err) 191 | } 192 | 193 | // submit error result to collector 194 | p.collector.Submit(Result{ 195 | JobID: jobID, 196 | Success: false, 197 | Error: err, 198 | Timestamp: time.Now(), 199 | Duration: duration, 200 | }) 201 | 202 | // return error so pool metrics track it correctly 203 | return err 204 | } 205 | 206 | if p.verbose { 207 | fmt.Printf("✅ %s completed successfully\n", jobID) 208 | } 209 | 210 | // submit success result to collector 211 | p.collector.Submit(Result{ 212 | JobID: jobID, 213 | Success: true, 214 | Timestamp: time.Now(), 215 | Duration: duration, 216 | }) 217 | 218 | return nil 219 | 220 | case <-ctx.Done(): 221 | p.collector.Submit(Result{ 222 | JobID: jobID, 223 | Success: false, 224 | Error: ctx.Err(), 225 | Timestamp: time.Now(), 226 | Duration: time.Since(start), 227 | }) 228 | return ctx.Err() 229 | } 230 | } 231 | } 232 | 233 | // errorTypeString extracts a consistent string representation of error type 234 | func errorTypeString(err error) string { 235 | if err == nil { 236 | return "nil error" 237 | } 238 | 239 | msg := err.Error() 240 | // extract the main error type without variable parts 241 | if idx := strings.IndexByte(msg, ':'); idx > 0 { 242 | return msg[:idx] 243 | } 244 | return msg 245 | } 246 | -------------------------------------------------------------------------------- /examples/collectors_chain/README.md: -------------------------------------------------------------------------------- 1 | # Pool Chain Processing (with collectors) - Example 2 | 3 | This example demonstrates how to chain multiple worker pools using [go-pkgz/pool](https://github.com/go-pkgz/pool) package to create a concurrent processing pipeline. It shows how to transform data through multiple processing stages while maintaining type safety and proper coordination between pools. 4 | 5 | ## What Makes it Special? 6 | 7 | 1. Pool Chaining: 8 | - Multiple pools connected via collectors 9 | - Each stage processes independently 10 | - Type-safe data transformation between stages 11 | - Automatic coordination via iterators 12 | 13 | 2. Concurrent Processing: 14 | - Each pool runs its own workers 15 | - Non-blocking data flow between pools 16 | - Independent scaling of each stage 17 | - Automatic backpressure handling 18 | 19 | 3. Data Flow Patterns: 20 | - Type transformation between stages 21 | - Filtering capability (skip items) 22 | - Progress tracking with timestamps 23 | - Performance metrics collection 24 | 25 | ## Features 26 | 27 | - Multi-stage processing pipeline 28 | - Independent worker pools for each stage 29 | - Type-safe data transformation 30 | - Concurrent processing across all stages 31 | - Automatic cleanup and resource management 32 | - Built-in metrics collection 33 | - Processing time tracking 34 | - Optional data filtering between stages 35 | 36 | ## Implementation Details 37 | 38 | The implementation demonstrates several key concepts: 39 | 40 | 1. Pool Type Definition: 41 | ```go 42 | type counterPool struct { 43 | *pool.WorkerGroup[stringData] // processes input type 44 | collector *pool.Collector[countData] // produces output type 45 | } 46 | ``` 47 | 48 | 2. Pool Construction: 49 | ```go 50 | func newCounterPool(ctx context.Context, workers int) *counterPool { 51 | collector := pool.NewCollector[countData](ctx, workers) 52 | p := pool.New[stringData](workers, pool.WorkerFunc[stringData]( 53 | func(ctx context.Context, n stringData) error { 54 | // process data and submit to collector 55 | return nil 56 | })) 57 | return &counterPool{WorkerGroup: p, collector: collector} 58 | } 59 | ``` 60 | 61 | 3. Pool Chaining: 62 | ```go 63 | counter := newCounterPool(ctx, 2) 64 | multiplier := newMultiplierPool(ctx, 4) 65 | squares := newSquarePool(ctx, 4) 66 | 67 | // pipe data between pools 68 | go func() { 69 | for v := range counter.collector.Iter() { 70 | multiplier.Submit(v) 71 | } 72 | multiplier.Close(ctx) 73 | }() 74 | ``` 75 | 76 | ## Architecture 77 | 78 | The pipeline consists of three stages: 79 | 80 | ``` 81 | Input Strings 82 | │ 83 | ▼ 84 | Counter Pool (2 workers) 85 | │ counts 'a' chars 86 | │ filters count > 2 87 | │ 88 | ▼ 89 | Multiplier Pool (4 workers) 90 | │ multiplies by 10 91 | │ 92 | ▼ 93 | Square Pool (4 workers) 94 | │ squares the value 95 | │ 96 | ▼ 97 | Final Results 98 | ``` 99 | 100 | Each stage: 101 | - Runs independently 102 | - Has its own workers pool 103 | - Processes items as they arrive 104 | - Transforms data to next type 105 | - Reports processing metrics 106 | 107 | ## Data Flow Types 108 | 109 | The pipeline uses distinct types for each stage: 110 | 111 | ```go 112 | stringData → countData → multipliedData → finalData 113 | {idx, ts} {idx, count} {idx, value} {idx, result} 114 | ``` 115 | 116 | - Each type carries minimal necessary data 117 | - Index maintains reference to original input 118 | - Timestamp tracks processing duration 119 | 120 | ## Example Output 121 | 122 | ``` 123 | submitting: "alabama" 124 | counted 'a' in "alabama" -> 4, duration: 123ms 125 | multiplied: 4 -> 40 (src: "alabama", processing time: 234ms) 126 | squared: 40 -> 1600 (src: "alabama", processing time: 345ms) 127 | 128 | metrics: 129 | counter: processed:11, errors:0, workers:2 130 | multiplier: processed:6, errors:0, workers:4 131 | squares: processed:6, errors:0, workers:4 132 | ``` 133 | 134 | ## Notes 135 | 136 | - Each pool can scale independently via worker count 137 | - Collector's Iter() handles backpressure automatically 138 | - Close() must be called on both pool and collector after submission done 139 | - Metrics track processing stats for each stage 140 | - Type safety is maintained throughout the pipeline 141 | - Data filtering can happen at any stage 142 | 143 | The example demonstrates a practical approach to building concurrent processing pipelines with proper resource management and type safety. -------------------------------------------------------------------------------- /examples/collectors_chain/go.mod: -------------------------------------------------------------------------------- 1 | module examples/collectors_chain 2 | 3 | go 1.24 4 | 5 | require github.com/go-pkgz/pool v0.7.0 6 | 7 | require golang.org/x/sync v0.11.0 // indirect 8 | 9 | replace github.com/go-pkgz/pool => ../.. 10 | -------------------------------------------------------------------------------- /examples/collectors_chain/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/go-pkgz/pool v0.5.0 h1:fP0WpEGMAcFEBQ7l7aAZsh7RBkzx34FVgufJoVvDTYY= 4 | github.com/go-pkgz/pool v0.5.0/go.mod h1:e1qn5EYmXshPcOk2buL2ZC20w7RTAWUgbug+L2SyH7I= 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 7 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 8 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 9 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= 10 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 13 | -------------------------------------------------------------------------------- /examples/collectors_chain/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "math/rand" 7 | "strings" 8 | "time" 9 | 10 | "github.com/go-pkgz/pool" 11 | ) 12 | 13 | // data types for each stage of processing pipeline. 14 | // each pool transforms data from its input type to output type. 15 | type stringData struct { 16 | idx int // index in the input array 17 | ts time.Time // timestamp to track processing duration 18 | } 19 | 20 | type countData struct { 21 | idx int 22 | count int 23 | ts time.Time 24 | } 25 | 26 | type multipliedData struct { 27 | idx int 28 | value int 29 | ts time.Time 30 | } 31 | 32 | type finalData struct { 33 | idx int 34 | result int 35 | } 36 | 37 | // counterPool demonstrates the first stage of processing. 38 | // each pool type embeds WorkerGroup to handle concurrent processing and Collector to gather results. 39 | type counterPool struct { 40 | *pool.WorkerGroup[stringData] // worker group processes stringData 41 | *pool.Collector[countData] // collector gathers countData 42 | } 43 | 44 | // newCounterPool creates a pool that counts 'a' chars in strings. 45 | // demonstrates pool construction pattern: collector -> worker -> pool. 46 | func newCounterPool(ctx context.Context, workers int) *counterPool { 47 | collector := pool.NewCollector[countData](ctx, workers) // collector to gather results, buffer size == workers 48 | p := pool.New[stringData](workers, pool.WorkerFunc[stringData](func(_ context.Context, n stringData) error { 49 | time.Sleep(time.Duration(rand.Intn(5)) * time.Millisecond) // simulate heavy work 50 | count := strings.Count(inputStrings[n.idx], "a") // use global var for logging only 51 | if count > 2 { 52 | // demonstrates filtering: only strings with >2 'a's passed to the next stage 53 | collector.Submit(countData{idx: n.idx, count: count, ts: n.ts}) 54 | } 55 | fmt.Printf("counted 'a' in %q -> %d, duration: %v\n", inputStrings[n.idx], count, time.Since(n.ts)) 56 | return nil 57 | })) 58 | return &counterPool{WorkerGroup: p.WithBatchSize(3), Collector: collector} 59 | } 60 | 61 | type multiplierPool struct { 62 | *pool.WorkerGroup[countData] 63 | *pool.Collector[multipliedData] 64 | } 65 | 66 | func newMultiplierPool(ctx context.Context, workers int) *multiplierPool { 67 | collector := pool.NewCollector[multipliedData](ctx, workers) 68 | p := pool.New[countData](workers, pool.WorkerFunc[countData](func(_ context.Context, n countData) error { 69 | time.Sleep(time.Duration(rand.Intn(5)) * time.Millisecond) 70 | multiplied := n.count * 10 // transform data: multiply by 10 71 | fmt.Printf("multiplied: %d -> %d (src: %q, processing time: %v)\n", 72 | n.count, multiplied, inputStrings[n.idx], time.Since(n.ts)) 73 | collector.Submit(multipliedData{idx: n.idx, value: multiplied, ts: n.ts}) 74 | return nil 75 | })) 76 | return &multiplierPool{WorkerGroup: p.WithBatchSize(3), Collector: collector} 77 | } 78 | 79 | type squarePool struct { 80 | *pool.WorkerGroup[multipliedData] 81 | *pool.Collector[finalData] 82 | } 83 | 84 | func newSquarePool(ctx context.Context, workers int) *squarePool { 85 | collector := pool.NewCollector[finalData](ctx, workers) 86 | p := pool.New[multipliedData](workers, pool.WorkerFunc[multipliedData](func(_ context.Context, n multipliedData) error { 87 | squared := n.value * n.value 88 | fmt.Printf("squared: %d -> %d (src: %q, processing time: %v)\n", 89 | n.value, squared, inputStrings[n.idx], time.Since(n.ts)) 90 | time.Sleep(time.Duration(rand.Intn(5)) * time.Millisecond) 91 | collector.Submit(finalData{idx: n.idx, result: squared}) 92 | return nil 93 | })) 94 | return &squarePool{WorkerGroup: p.WithBatchSize(3), Collector: collector} 95 | } 96 | 97 | // ProcessStrings demonstrates chaining multiple pools together to create a processing pipeline. 98 | // Each pool runs concurrently and processes items as they become available from the previous stage. 99 | func ProcessStrings(ctx context.Context, strings []string) ([]finalData, error) { 100 | // create all pools before starting any processing 101 | counter := newCounterPool(ctx, 2) 102 | multiplier := newMultiplierPool(ctx, 4) 103 | squares := newSquarePool(ctx, 4) 104 | 105 | // start all pools' workers 106 | // this is non-blocking operation, workers will start processing as soon as items are submitted 107 | counter.Go(ctx) 108 | multiplier.Go(ctx) 109 | squares.Go(ctx) 110 | 111 | // first goroutine feeds input data into the pipeline 112 | // we use a goroutine to simulate a real-world scenario where data is coming from an external source 113 | go func() { 114 | for i := range strings { 115 | fmt.Printf("submitting: %q\n", strings[i]) 116 | counter.WorkerGroup.Submit(stringData{idx: i, ts: time.Now()}) 117 | time.Sleep(time.Duration(rand.Intn(3)) * time.Millisecond) 118 | } 119 | // close pool and collector when all inputs are submitted 120 | counter.WorkerGroup.Close(ctx) 121 | counter.Collector.Close() 122 | }() 123 | 124 | // organize pipes between pools 125 | // we use goroutines to communicate between pools in a non-blocking way 126 | go func() { 127 | // pipe from counter to multiplier using collector's iterator 128 | for v := range counter.Iter() { // iter will stop on completion of counter pool 129 | multiplier.WorkerGroup.Submit(v) 130 | } 131 | multiplier.WorkerGroup.Close(ctx) 132 | multiplier.Collector.Close() 133 | }() 134 | 135 | go func() { 136 | // pipe from multiplier to squares 137 | for v := range multiplier.Iter() { // iter will stop on completion of multiplier pool 138 | squares.WorkerGroup.Submit(v) 139 | } 140 | squares.WorkerGroup.Close(ctx) 141 | squares.Collector.Close() 142 | }() 143 | 144 | // collect final results until all work is done 145 | var results []finalData 146 | // iter will stop on completion of squares pool which is the last in the chain 147 | // this is a blocking operation and will return when all pools are done 148 | // we don't need to wait for each pool to finish explicitly, the iter handles it 149 | for v := range squares.Iter() { 150 | results = append(results, v) 151 | } 152 | 153 | // print metrics showing how each pool performed 154 | fmt.Printf("\nmetrics:\ncounter: %s\nmultiplier: %s\nsquares: %s\n", 155 | counter.Metrics().GetStats(), multiplier.Metrics().GetStats(), squares.Metrics().GetStats()) 156 | return results, nil 157 | } 158 | 159 | // store input array in a global for logging purposes only 160 | var inputStrings []string 161 | 162 | func main() { 163 | inputStrings = []string{ 164 | "banana", 165 | "alabama", 166 | "california", 167 | "canada", 168 | "australia", 169 | "alaska", 170 | "arkansas", 171 | "arizona", 172 | "abracadabra", 173 | "bandanna", 174 | "barbarian", 175 | "antarctica", 176 | "arctic", 177 | "baccarat", 178 | } 179 | 180 | res, err := ProcessStrings(context.Background(), inputStrings) 181 | if err != nil { 182 | panic(err) 183 | } 184 | fmt.Println("\nFinal results:") 185 | for _, v := range res { 186 | fmt.Printf("src: %q, squared a-count: %d\n", inputStrings[v.idx], v.result) 187 | } 188 | fmt.Printf("\nTotal: %d", len(res)) 189 | } 190 | -------------------------------------------------------------------------------- /examples/direct_chain/README.md: -------------------------------------------------------------------------------- 1 | # Pool Chain Processing (direct) - Example 2 | 3 | This example demonstrates how to chain multiple worker pools using [go-pkgz/pool](https://github.com/go-pkgz/pool) package to create a concurrent processing pipeline. Pools directly submit data to the next stage, with a collector only at the final stage to gather results. 4 | 5 | ## Key Concepts 6 | 7 | 1. Pool Chaining: 8 | - Pools directly reference and send to the next pool 9 | - Single collector at the end of chain 10 | - Each stage processes independently 11 | - Type-safe data transformation between stages 12 | 13 | 2. Data Flow: 14 | - Input strings -> count 'a's -> multiply by 10 -> square 15 | - Each stage has its own worker pool 16 | - Final collector gathers results 17 | - Processing time tracked at each stage 18 | 19 | ## Implementation Details 20 | 21 | The example shows three key patterns: 22 | 23 | 1. Pool Declaration and Cross-References: 24 | ```go 25 | var pCounter *pool.WorkerGroup[stringData] 26 | var pMulti *pool.WorkerGroup[countData] 27 | var pSquares *pool.WorkerGroup[multipliedData] 28 | collector := pool.NewCollector[finalData](ctx, 10) 29 | ``` 30 | 31 | 2. Direct Pool Submission: 32 | ```go 33 | pCounter = pool.New[stringData](2, pool.WorkerFunc[stringData]( 34 | func(_ context.Context, d stringData) error { 35 | count := strings.Count(d.data, "a") 36 | if count > 2 { 37 | pMulti.Send(countData{...}) // direct submission to next pool, thread safe version of Submit 38 | } 39 | return nil 40 | })) 41 | ``` 42 | 43 | 3. Pipeline Coordination: 44 | ```go 45 | go func() { 46 | pCounter.Wait(ctx) // wait for first pool 47 | pMulti.Close(ctx) // close second pool 48 | pSquares.Close(ctx) // close final pool 49 | collector.Close() // close collector 50 | }() 51 | ``` 52 | 53 | ## Data Flow Types 54 | 55 | ```go 56 | stringData { countData { multipliedData { finalData { 57 | idx int idx int idx int idx int 58 | data string count int value int result int 59 | ts time.Time ts time.Time ts time.Time 60 | } } } } 61 | ``` 62 | 63 | ## Features 64 | 65 | - Batch processing (size=3) in each pool 66 | - Filtering capabilities (count > 2) 67 | - Processing time tracking 68 | - Independent worker counts per stage 69 | - Built-in metrics collection 70 | - Simulated processing delays 71 | 72 | ## Example Output 73 | 74 | ``` 75 | submitting: "alabama" 76 | counted 'a' in "alabama" -> 4, duration: 123ms 77 | multiplied: 4 -> 40 (src: "alabama", processing time: 234ms) 78 | squared: 40 -> 1600 (src: "alabama", processing time: 345ms) 79 | 80 | metrics: 81 | counter: processed:11, errors:0, workers:2 82 | multiplier: processed:6, errors:0, workers:4 83 | squares: processed:6, errors:0, workers:4 84 | ``` 85 | 86 | ## Usage 87 | 88 | ```go 89 | res, err := ProcessStrings(context.Background(), []string{ 90 | "alabama", "california", "canada", "australia", 91 | }) 92 | ``` 93 | 94 | ## Important Notes 95 | 96 | - Pools must be declared before creation to allow cross-references 97 | - Each stage can filter data (skip items) 98 | - Send can be done directly from workers 99 | - Close() propagates through the chain 100 | - Single collector simplifies result gathering 101 | - Batch size optimizes throughput 102 | - Processing time tracked through pipeline 103 | 104 | This simplified version demonstrates essential patterns for building concurrent processing pipelines while maintaining clean and efficient code structure. -------------------------------------------------------------------------------- /examples/direct_chain/go.mod: -------------------------------------------------------------------------------- 1 | module examples/direct_chain 2 | 3 | go 1.24 4 | 5 | require github.com/go-pkgz/pool v0.7.0 6 | 7 | require golang.org/x/sync v0.11.0 // indirect 8 | 9 | replace github.com/go-pkgz/pool => ../.. 10 | -------------------------------------------------------------------------------- /examples/direct_chain/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/go-pkgz/pool v0.5.0 h1:fP0WpEGMAcFEBQ7l7aAZsh7RBkzx34FVgufJoVvDTYY= 4 | github.com/go-pkgz/pool v0.5.0/go.mod h1:e1qn5EYmXshPcOk2buL2ZC20w7RTAWUgbug+L2SyH7I= 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 7 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 8 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 9 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= 10 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 13 | -------------------------------------------------------------------------------- /examples/direct_chain/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "math/rand" 7 | "strings" 8 | "sync/atomic" 9 | "time" 10 | 11 | "github.com/go-pkgz/pool" 12 | ) 13 | 14 | // data types for each stage of processing pipeline. 15 | // each pool transforms data from its input type to output type. 16 | type stringData struct { 17 | idx int // index in the input array 18 | data string // input data 19 | ts time.Time // timestamp to track processing duration 20 | } 21 | 22 | type countData struct { 23 | idx int 24 | count int 25 | ts time.Time 26 | } 27 | 28 | type multipliedData struct { 29 | idx int 30 | value int 31 | ts time.Time 32 | } 33 | 34 | type finalData struct { 35 | idx int 36 | result int 37 | } 38 | 39 | func ProcessStrings(ctx context.Context, input []string) ([]finalData, error) { 40 | // declare pools and counters for debugging 41 | var pCounter *pool.WorkerGroup[stringData] 42 | var pMulti *pool.WorkerGroup[countData] 43 | var pSquares *pool.WorkerGroup[multipliedData] 44 | var submitted, filtered, multiplied, squared atomic.Int64 45 | 46 | collector := pool.NewCollector[finalData](ctx, 10) 47 | 48 | pCounter = pool.New[stringData](8, pool.WorkerFunc[stringData](func(_ context.Context, d stringData) error { 49 | submitted.Add(1) 50 | time.Sleep(time.Duration(rand.Intn(1)) * time.Millisecond) 51 | count := strings.Count(d.data, "a") 52 | if count > 2 { 53 | filtered.Add(1) 54 | // important: we use Send instead of Submit, because we run inside multiple workers 55 | // and Submit is not thread-safe. Send does the same, just in thread-safe way 56 | pMulti.Send(countData{idx: d.idx, count: count, ts: d.ts}) 57 | } 58 | fmt.Printf("counted 'a' in %q -> %d, duration: %v\n", inputStrings[d.idx], count, time.Since(d.ts)) 59 | return nil 60 | })).WithBatchSize(3).WithPoolCompleteFn(func(ctx context.Context) error { 61 | return pMulti.Close(ctx) 62 | }) 63 | 64 | pMulti = pool.New[countData](10, pool.WorkerFunc[countData](func(_ context.Context, d countData) error { 65 | multiplied.Add(1) 66 | time.Sleep(time.Duration(rand.Intn(10)) * time.Millisecond) 67 | val := d.count * 10 68 | fmt.Printf("multiplied: %d -> %d (src: %q, processing time: %v)\n", 69 | d.count, val, inputStrings[d.idx], time.Since(d.ts)) 70 | pSquares.Send(multipliedData{idx: d.idx, value: val, ts: d.ts}) 71 | return nil 72 | })).WithBatchSize(3).WithPoolCompleteFn(func(ctx context.Context) error { 73 | return pSquares.Close(ctx) 74 | }) 75 | 76 | pSquares = pool.New[multipliedData](10, pool.WorkerFunc[multipliedData](func(_ context.Context, d multipliedData) error { 77 | squared.Add(1) 78 | val := d.value * d.value 79 | fmt.Printf("squared: %d -> %d (src: %q, processing time: %v)\n", 80 | d.value, val, inputStrings[d.idx], time.Since(d.ts)) 81 | time.Sleep(time.Duration(rand.Intn(10)) * time.Millisecond) 82 | collector.Submit(finalData{idx: d.idx, result: val}) 83 | return nil 84 | })).WithBatchSize(3).WithPoolCompleteFn(func(ctx context.Context) error { 85 | collector.Close() 86 | return nil 87 | }) 88 | 89 | pCounter.Go(ctx) 90 | pMulti.Go(ctx) 91 | pSquares.Go(ctx) 92 | 93 | go func() { 94 | for i := range input { 95 | for range 100 { 96 | pCounter.Submit(stringData{idx: i, data: input[i], ts: time.Now()}) 97 | time.Sleep(time.Duration(rand.Intn(1)) * time.Millisecond) 98 | } 99 | } 100 | pCounter.Close(ctx) 101 | }() 102 | 103 | var results []finalData 104 | for v := range collector.Iter() { 105 | results = append(results, v) 106 | } 107 | 108 | // print debug statistics 109 | fmt.Printf("\nProcessing statistics:\n") 110 | fmt.Printf("Total items submitted: %d\n", submitted.Load()) 111 | fmt.Printf("Items passed filter (>2 'a's): %d\n", filtered.Load()) 112 | fmt.Printf("Items multiplied: %d\n", multiplied.Load()) 113 | fmt.Printf("Items squared: %d\n", squared.Load()) 114 | fmt.Printf("Results collected: %d\n", len(results)) 115 | 116 | fmt.Printf("\nPool metrics:\ncounter: %s\nmultiplier: %s\nsquares: %s\n", 117 | pCounter.Metrics().GetStats(), pMulti.Metrics().GetStats(), pSquares.Metrics().GetStats()) 118 | return results, nil 119 | } 120 | 121 | // store input array in a global for logging purposes only 122 | var inputStrings []string 123 | 124 | func main() { 125 | inputStrings = []string{ 126 | "banana", 127 | "alabama", 128 | "california", 129 | "canada", 130 | "australia", 131 | "alaska", 132 | "arkansas", 133 | "arizona", 134 | "abracadabra", 135 | "bandanna", 136 | "barbarian", 137 | "antarctica", 138 | "arctic", 139 | "baccarat", 140 | } 141 | 142 | res, err := ProcessStrings(context.Background(), inputStrings) 143 | if err != nil { 144 | panic(err) 145 | } 146 | fmt.Printf("\nFinal results:\n") 147 | for i, v := range res { 148 | fmt.Printf(" %d src: %q, squared a-count: %d\n", i, inputStrings[v.idx], v.result) 149 | } 150 | fmt.Printf("Total: %d\n", len(res)) 151 | } 152 | -------------------------------------------------------------------------------- /examples/middleware/README.md: -------------------------------------------------------------------------------- 1 | # Task Processor with Middleware - Example 2 | 3 | This example demonstrates how to use middleware in [go-pkgz/pool](https://github.com/go-pkgz/pool) package to build a robust task processing system. It shows both built-in middleware usage and custom middleware creation, emphasizing how middleware can add cross-cutting functionality without modifying the core processing logic. 4 | 5 | ## What Makes it Special? 6 | 7 | 1. Middleware composition: 8 | - Shows how multiple middleware work together 9 | - Demonstrates middleware execution order 10 | - Combines both built-in and custom middleware 11 | 12 | 2. Cross-cutting concerns: 13 | - Input validation before processing 14 | - Automatic retries for failed tasks 15 | - Panic recovery for robustness 16 | - Rate limiting for flow control 17 | - Structured logging for observability 18 | 19 | 3. Real-world patterns: 20 | - Configuration management 21 | - Error handling 22 | - Metrics collection 23 | - Structured logging with slog 24 | 25 | ## Features 26 | 27 | - Task validation before processing 28 | - Automatic retries with exponential backoff 29 | - Panic recovery with custom handler 30 | - Rate limiting with token bucket algorithm 31 | - Structured JSON logging 32 | - Performance metrics collection 33 | - Configurable worker count and retry attempts 34 | 35 | ## Installation 36 | 37 | ```bash 38 | go build 39 | ``` 40 | 41 | ## Usage 42 | 43 | ```bash 44 | go run main.go [options] 45 | ``` 46 | 47 | Options: 48 | - `-workers` - number of worker goroutines (default: 2) 49 | - `-retries` - number of retries for failed tasks (default: 3) 50 | 51 | Example: 52 | ```bash 53 | go run main.go -workers 4 -retries 5 54 | ``` 55 | 56 | ## Implementation Details 57 | 58 | The implementation demonstrates several key concepts: 59 | 60 | 1. Middleware creation: 61 | ```go 62 | func makeStructuredLogger(logger *slog.Logger) pool.Middleware[Task] { 63 | return func(next pool.Worker[Task]) pool.Worker[Task] { 64 | return pool.WorkerFunc[Task](func(ctx context.Context, task Task) error { 65 | // pre-processing logging 66 | err := next.Do(ctx, task) 67 | // post-processing logging 68 | return err 69 | }) 70 | } 71 | } 72 | ``` 73 | 74 | 2. Middleware composition: 75 | ```go 76 | pool.New[Task](workers, makeWorker()).Use( 77 | middleware.Validator(validator), // validate first 78 | middleware.Retry[Task](retries), // then retry on failure 79 | middleware.Recovery[Task](handler), // recover from panics 80 | middleware.RateLimiter[Task](5, 3), // rate limit to 5/sec 81 | customLogger, // log everything 82 | ) 83 | ``` 84 | 85 | 3. Task processing: 86 | ```go 87 | type Task struct { 88 | ID string `json:"id"` 89 | Priority int `json:"priority"` 90 | Payload string `json:"payload"` 91 | } 92 | ``` 93 | 94 | ## Output Example 95 | 96 | ```json 97 | { 98 | "time": "2025-02-12T10:00:00Z", 99 | "level": "DEBUG", 100 | "msg": "processing task", 101 | "task_id": "1", 102 | "priority": 1, 103 | "payload": {"id":"1","priority":1,"payload":"normal task"} 104 | } 105 | { 106 | "time": "2025-02-12T10:00:00Z", 107 | "level": "INFO", 108 | "msg": "task completed", 109 | "task_id": "1", 110 | "duration_ms": 100 111 | } 112 | { 113 | "time": "2025-02-12T10:00:00Z", 114 | "level": "ERROR", 115 | "msg": "task failed", 116 | "task_id": "2", 117 | "duration_ms": 100, 118 | "error": "failed to process task 2" 119 | } 120 | { 121 | "time": "2025-02-12T10:00:00Z", 122 | "level": "INFO", 123 | "msg": "submitting rate-limited tasks" 124 | } 125 | { 126 | "time": "2025-02-12T10:00:00Z", 127 | "level": "INFO", 128 | "msg": "pool finished", 129 | "processed": 14, 130 | "errors": 2, 131 | "total_time": "3.2s", 132 | "duration": "2.1s" 133 | } 134 | ``` 135 | 136 | ## Architecture 137 | 138 | The program is structured in several logical components: 139 | 140 | ``` 141 | main 142 | ├── setupConfig - configuration and logger setup 143 | ├── makeWorker - core worker implementation 144 | ├── makeValidator - input validation rules 145 | ├── makePool - pool creation with middleware 146 | └── runPool - execution and task submission 147 | ``` 148 | 149 | Each component is isolated and has a single responsibility, making the code easy to maintain and test. 150 | 151 | ## Notes 152 | 153 | - Middleware executes in the order it's added to Use() 154 | - The first middleware wraps the outermost layer 155 | - Built-in middleware handles common patterns 156 | - Custom middleware can add any functionality 157 | - Rate limiting is shared across all workers in the pool 158 | - Structured logging as an example of cross-cutting concern -------------------------------------------------------------------------------- /examples/middleware/go.mod: -------------------------------------------------------------------------------- 1 | module examples/middleware 2 | 3 | go 1.24 4 | 5 | require github.com/go-pkgz/pool v0.7.0 6 | 7 | require ( 8 | golang.org/x/sync v0.14.0 // indirect 9 | golang.org/x/time v0.11.0 // indirect 10 | ) 11 | 12 | replace github.com/go-pkgz/pool => ../.. 13 | -------------------------------------------------------------------------------- /examples/middleware/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 6 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 7 | golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= 8 | golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= 9 | golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= 10 | golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= 11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 13 | -------------------------------------------------------------------------------- /examples/middleware/main.go: -------------------------------------------------------------------------------- 1 | // file: examples/middleware/main.go 2 | package main 3 | 4 | import ( 5 | "context" 6 | "encoding/json" 7 | "flag" 8 | "fmt" 9 | "log/slog" 10 | "os" 11 | "strings" 12 | "time" 13 | 14 | "github.com/go-pkgz/pool" 15 | "github.com/go-pkgz/pool/middleware" 16 | ) 17 | 18 | // Task represents a job to be processed 19 | type Task struct { 20 | ID string `json:"id"` 21 | Priority int `json:"priority"` 22 | Payload string `json:"payload"` 23 | } 24 | 25 | // config holds application configuration 26 | type config struct { 27 | workers int 28 | retries int 29 | logger *slog.Logger 30 | } 31 | 32 | func main() { 33 | // parse config and setup logger 34 | cfg := setupConfig() 35 | 36 | // create worker pool 37 | p := makePool(cfg) 38 | 39 | // start pool and process tasks 40 | if err := runPool(context.Background(), p, cfg); err != nil { 41 | cfg.logger.Error("pool finished with error", "error", err) 42 | os.Exit(1) 43 | } 44 | } 45 | 46 | func setupConfig() config { 47 | // parse flags 48 | workers := flag.Int("workers", 2, "number of workers") 49 | retries := flag.Int("retries", 3, "number of retries") 50 | flag.Parse() 51 | 52 | // setup structured logger 53 | logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ 54 | Level: slog.LevelDebug, 55 | })) 56 | 57 | return config{ 58 | workers: *workers, 59 | retries: *retries, 60 | logger: logger, 61 | } 62 | } 63 | 64 | func runPool(ctx context.Context, p *pool.WorkerGroup[Task], cfg config) error { 65 | // start the pool 66 | if err := p.Go(ctx); err != nil { 67 | return fmt.Errorf("failed to start pool: %w", err) 68 | } 69 | 70 | // submit test tasks 71 | tasks := []Task{ 72 | {ID: "1", Priority: 1, Payload: "normal task"}, 73 | {ID: "2", Priority: 5, Payload: "fail me"}, // this will fail and retry 74 | {ID: "3", Priority: 2, Payload: "normal task"}, 75 | {ID: "", Priority: 11, Payload: "invalid"}, // this will fail validation 76 | } 77 | 78 | for _, task := range tasks { 79 | p.Submit(task) 80 | } 81 | 82 | // demonstrate rate limiting 83 | cfg.logger.Info("submitting rate-limited tasks") 84 | start := time.Now() 85 | for i := 0; i < 10; i++ { 86 | p.Submit(Task{ID: fmt.Sprintf("rate-%d", i), Priority: 3, Payload: "rate limited task"}) 87 | } 88 | 89 | // close pool and wait for completion 90 | if err := p.Close(ctx); err != nil { 91 | return err 92 | } 93 | 94 | // print final metrics 95 | metrics := p.Metrics().GetStats() 96 | cfg.logger.Info("pool finished", "processed", metrics.Processed, "errors", metrics.Errors, 97 | "total_time", metrics.TotalTime.String(), "duration", time.Since(start).String()) 98 | 99 | return nil 100 | } 101 | 102 | func makePool(cfg config) *pool.WorkerGroup[Task] { 103 | return pool.New[Task](cfg.workers, makeWorker()).Use( 104 | middleware.Validator(makeValidator()), // validate tasks 105 | middleware.Retry[Task](cfg.retries, time.Second), // retry failed tasks 106 | middleware.Recovery[Task](func(p interface{}) { // recover from panics 107 | cfg.logger.Error("panic recovered", "error", fmt.Sprint(p)) 108 | }), 109 | middleware.RateLimiter[Task](5, 3), // rate limit: 5 tasks/second with burst of 3 110 | makeStructuredLogger(cfg.logger), // custom structured logging 111 | ) 112 | } 113 | 114 | func makeWorker() pool.Worker[Task] { 115 | return pool.WorkerFunc[Task](func(ctx context.Context, task Task) error { 116 | // simulate some work with random failures 117 | if strings.Contains(task.Payload, "fail") { 118 | return fmt.Errorf("failed to process task %s", task.ID) 119 | } 120 | time.Sleep(100 * time.Millisecond) 121 | return nil 122 | }) 123 | } 124 | 125 | func makeValidator() func(Task) error { 126 | return func(task Task) error { 127 | if task.ID == "" { 128 | return fmt.Errorf("empty task ID") 129 | } 130 | if task.Priority < 0 || task.Priority > 10 { 131 | return fmt.Errorf("invalid priority %d, must be between 0 and 10", task.Priority) 132 | } 133 | return nil 134 | } 135 | } 136 | 137 | func makeStructuredLogger(logger *slog.Logger) pool.Middleware[Task] { 138 | return func(next pool.Worker[Task]) pool.Worker[Task] { 139 | return pool.WorkerFunc[Task](func(ctx context.Context, task Task) error { 140 | start := time.Now() 141 | taskJSON, _ := json.Marshal(task) 142 | 143 | logger.Debug("processing task", "task_id", task.ID, "priority", task.Priority, "payload", string(taskJSON)) 144 | 145 | err := next.Do(ctx, task) 146 | duration := time.Since(start) 147 | 148 | if err != nil { 149 | logger.Error("task failed", "task_id", task.ID, "duration_ms", duration.Milliseconds(), "error", err.Error()) 150 | return err 151 | } 152 | 153 | logger.Info("task completed", "task_id", task.ID, "duration_ms", duration.Milliseconds()) 154 | return nil 155 | }) 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /examples/parallel_files/README.md: -------------------------------------------------------------------------------- 1 | # Simple Text Processor - Parallel Files Example 2 | 3 | This example demonstrates how to use parallel processing with [go-pkgz/pool](https://github.com/go-pkgz/pool) package for efficient file analysis. It reads multiple files in chunks and counts word frequencies using multiple workers. 4 | 5 | ## What Makes it Special? 6 | 7 | 1. File chunking: 8 | - Files read in 32KB chunks for memory efficiency 9 | - Each chunk processed independently 10 | - Allows parallel processing of large files 11 | 12 | 2. Independent worker state: 13 | - Each worker has its own word frequency map 14 | - No synchronization needed between workers 15 | - Results merged only on completion 16 | 17 | 3. Built-in metrics: 18 | - Shows processing rates and latencies 19 | - Tracks word length distribution 20 | - Demonstrates metrics collection API 21 | 22 | ## Features 23 | 24 | - Process multiple files in parallel 25 | - Pattern-based file selection 26 | - Word frequency analysis 27 | - Performance metrics tracking 28 | - Configurable worker count 29 | 30 | ## Installation 31 | 32 | ```bash 33 | go build 34 | ``` 35 | 36 | ## Usage 37 | 38 | ```bash 39 | go run main.go [options] 40 | ``` 41 | 42 | Options: 43 | - `-dir` - directory to process (default: ".") 44 | - `-pattern` - file pattern to match (default: "*.txt") 45 | - `-workers` - number of worker goroutines (default: 4) 46 | - `-top` - number of top words to show (default: 10) 47 | 48 | Example: 49 | ```bash 50 | go run main.go -pattern "*.go" -workers 8 51 | ``` 52 | 53 | ## Implementation Details 54 | 55 | The key components are: 56 | 57 | 1. Chunk-based file reading: 58 | ```go 59 | buffer := make([]byte, 32*1024) 60 | for { 61 | n, err := file.Read(buffer) 62 | if err == io.EOF { 63 | break 64 | } 65 | p.Submit(chunk{data: data}) 66 | } 67 | ``` 68 | 69 | 2. Stateful worker processing: 70 | ```go 71 | type fileWorker struct { 72 | words map[string]int 73 | byteCount int64 74 | } 75 | ``` 76 | 77 | 3. Metrics tracking: 78 | ```go 79 | m := metrics.Get(ctx) 80 | if len(word) > 3 { 81 | m.Inc("long words") 82 | } else { 83 | m.Inc("short words") 84 | } 85 | ``` 86 | 87 | ## Output Example 88 | 89 | ``` 90 | Processing statistics: [processed:3, rate:5603.1/s, avg_latency:0s, proc:0s, total:1ms] 91 | Total bytes: 11522 92 | Unique words: 302 93 | Short words: 647 94 | Long words: 829 95 | 96 | Top 10 words: 97 | 1. "words": 29 times 98 | 2. "return": 22 times 99 | 3. "word": 18 times 100 | ... 101 | ``` 102 | 103 | ## Architecture 104 | 105 | The program flows through these stages: 106 | 1. Read files in chunks (32KB) 107 | 2. Distribute chunks to worker pool 108 | 3. Process chunks in parallel 109 | 4. Collect results through collector 110 | 5. Merge and present statistics 111 | 112 | ## Notes 113 | 114 | - Memory efficient due to chunk-based processing 115 | - No locks needed in worker implementation 116 | - Scales well with additional workers -------------------------------------------------------------------------------- /examples/parallel_files/go.mod: -------------------------------------------------------------------------------- 1 | module examples/parallel_files 2 | 3 | go 1.24 4 | 5 | require github.com/go-pkgz/pool v0.7.0 6 | 7 | require golang.org/x/sync v0.11.0 // indirect 8 | 9 | replace github.com/go-pkgz/pool => ../.. 10 | -------------------------------------------------------------------------------- /examples/parallel_files/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 6 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 7 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= 8 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 9 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 10 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 11 | -------------------------------------------------------------------------------- /examples/parallel_files/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "log" 10 | "os" 11 | "path/filepath" 12 | "sort" 13 | "strings" 14 | "time" 15 | 16 | "github.com/go-pkgz/pool" 17 | "github.com/go-pkgz/pool/metrics" 18 | ) 19 | 20 | // chunk represents a piece of file to process 21 | type chunk struct { 22 | data []byte 23 | } 24 | 25 | // fileWorker counts words in chunks 26 | type fileWorker struct { 27 | words map[string]int 28 | byteCount int64 29 | } 30 | 31 | // Do implements pool.Worker interface 32 | func (w *fileWorker) Do(ctx context.Context, c chunk) error { 33 | scanner := bufio.NewScanner(strings.NewReader(string(c.data))) 34 | scanner.Split(bufio.ScanWords) 35 | m := metrics.Get(ctx) 36 | for scanner.Scan() { 37 | word := strings.ToLower(strings.Trim(scanner.Text(), ".,!?()[]{}\"';:")) 38 | if len(word) > 3 { 39 | w.words[word]++ 40 | m.Inc("long words") 41 | } else { 42 | m.Inc("short words") 43 | } 44 | } 45 | w.byteCount += int64(len(c.data)) 46 | return scanner.Err() 47 | } 48 | 49 | func main() { 50 | var ( 51 | dir = flag.String("dir", ".", "directory to process") 52 | workers = flag.Int("workers", 4, "number of workers") 53 | pattern = flag.String("pattern", "*.txt", "file pattern to match") 54 | topWords = flag.Int("top", 10, "number of top words to show") 55 | ) 56 | flag.Parse() 57 | 58 | ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 59 | defer cancel() 60 | 61 | collector := pool.NewCollector[fileWorker](ctx, *workers) 62 | 63 | p := pool.NewStateful[chunk](*workers, func() pool.Worker[chunk] { 64 | return &fileWorker{words: make(map[string]int)} // create new worker with empty words map 65 | }) 66 | 67 | // set batch size and complete function 68 | p = p.WithBatchSize(100).WithWorkerCompleteFn(func(_ context.Context, _ int, w pool.Worker[chunk]) error { 69 | collector.Submit(*w.(*fileWorker)) 70 | return nil 71 | }) 72 | 73 | // start pool processing 74 | if err := p.Go(ctx); err != nil { 75 | log.Fatal(err) 76 | } 77 | 78 | // process files 79 | err := filepath.Walk(*dir, func(path string, info os.FileInfo, err error) error { 80 | if err != nil || info.IsDir() { 81 | return err 82 | } 83 | if matched, err := filepath.Match(*pattern, filepath.Base(path)); err != nil || !matched { 84 | return err 85 | } 86 | 87 | file, err := os.Open(path) 88 | if err != nil { 89 | return fmt.Errorf("failed to open %s: %w", path, err) 90 | } 91 | defer file.Close() 92 | 93 | buffer := make([]byte, 32*1024) 94 | for { 95 | n, err := file.Read(buffer) 96 | if err == io.EOF { 97 | break 98 | } 99 | if err != nil { 100 | return fmt.Errorf("error reading %s: %w", path, err) 101 | } 102 | 103 | data := make([]byte, n) 104 | copy(data, buffer[:n]) 105 | p.Submit(chunk{data: data}) // submit chunk to pool 106 | } 107 | return nil 108 | }) 109 | if err != nil { 110 | log.Printf("error walking files: %v", err) 111 | } 112 | 113 | // close pool and collector, initiate all data sent and no more data expected 114 | if err := p.Close(ctx); err != nil { 115 | log.Printf("pool close error: %v", err) 116 | } 117 | collector.Close() 118 | 119 | // merge and print results 120 | totalWords := make(map[string]int) 121 | var totalBytes int64 122 | 123 | // iterate over collector results, merge words and count bytes 124 | for worker := range collector.Iter() { 125 | for word, count := range worker.words { 126 | totalWords[word] += count 127 | } 128 | totalBytes += worker.byteCount 129 | } 130 | 131 | fmt.Printf("\nProcessing statistics: %+v\n", p.Metrics().GetStats()) 132 | fmt.Printf("Total bytes: %d\n", totalBytes) 133 | fmt.Printf("Unique words: %d\n", len(totalWords)) 134 | fmt.Printf("Short words: %d\n", p.Metrics().Get("short words")) 135 | fmt.Printf("Long words: %d\n", p.Metrics().Get("long words")) 136 | 137 | // prepare sorted list of words 138 | type wordCount struct { 139 | word string 140 | count int 141 | } 142 | counts := make([]wordCount, 0, len(totalWords)) 143 | for word, count := range totalWords { 144 | counts = append(counts, wordCount{word, count}) 145 | } 146 | 147 | sort.Slice(counts, func(i, j int) bool { 148 | return counts[i].count > counts[j].count 149 | }) 150 | 151 | fmt.Printf("\nTop %d words:\n", *topWords) 152 | for i := 0; i < *topWords && i < len(counts); i++ { 153 | fmt.Printf("%d. %q: %d times\n", i+1, counts[i].word, counts[i].count) 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /examples/tokenizer_stateful/README.md: -------------------------------------------------------------------------------- 1 | # Simple Text Tokenizer - Stateful Workers Example 2 | 3 | This example demonstrates how to use stateful workers with [go-pkgz/pool](https://github.com/go-pkgz/pool) package. It implements a parallel text tokenizer that counts word frequencies, where each worker maintains its own independent state. 4 | 5 | ## What Makes it Stateful? 6 | 7 | Stateful workers are useful when each worker needs to maintain its own independent data during processing. In this example: 8 | 9 | 1. Each worker keeps its own word frequency map: 10 | - No shared maps or mutexes needed 11 | - No coordination between workers required 12 | - Each worker counts words it sees independently 13 | 14 | 2. Results are combined only at the end: 15 | - Workers don't communicate during processing 16 | - Final results are merged after all processing is done 17 | - Shows how to handle independent worker results 18 | 19 | 3. Real-world analogy: 20 | - Like having multiple people count words in different parts of a book 21 | - Each person keeps their own tally 22 | - At the end, all tallies are added together 23 | 24 | This pattern is particularly useful for: 25 | - Processing that can be partitioned (like our text analysis) 26 | - When sharing state would create contention 27 | - When workers need different initialization 28 | - When tracking per-worker statistics 29 | 30 | ## Stateful vs Non-Stateful Approaches 31 | 32 | To understand why this example uses stateful workers, let's compare two approaches: 33 | 34 | ### Non-Stateful (Wrong Way) 35 | ```go 36 | // Shared state between all workers - requires synchronization 37 | sharedCounts := sync.Map{} 38 | 39 | worker := pool.WorkerFunc[string](func(ctx context.Context, line string) error { 40 | for _, word := range strings.Fields(line) { 41 | // Need to synchronize access to shared map 42 | v, _ := sharedCounts.LoadOrStore(word, 0) 43 | sharedCounts.Store(word, v.(int) + 1) 44 | } 45 | return nil 46 | }) 47 | ``` 48 | 49 | ### Stateful (Our Approach) 50 | ```go 51 | // Each worker has its own state 52 | type TokenizingWorker struct { 53 | counts map[string]int // private to this worker 54 | } 55 | 56 | func (w *TokenizingWorker) Do(ctx context.Context, line string) error { 57 | for _, word := range strings.Fields(line) { 58 | w.counts[word]++ // no synchronization needed 59 | } 60 | return nil 61 | } 62 | ``` 63 | 64 | The stateful approach is better because: 65 | - No synchronization overhead 66 | - Better performance due to no lock contention 67 | - Cleaner code without mutex handling 68 | - Easier to maintain and debug 69 | 70 | ## Features 71 | 72 | - Demonstration of stateful worker pattern 73 | - Parallel processing of text files using configurable number of workers 74 | - Batch processing support for better performance 75 | - Word frequency counting 76 | - Processing statistics including timing and error counts 77 | - Word cleanup (lowercase conversion, punctuation removal) 78 | 79 | ## Install 80 | 81 | ```bash 82 | # assuming you are in go-pkgz/pool/examples/tokenizer 83 | go build 84 | ``` 85 | 86 | ## Usage 87 | 88 | ```bash 89 | go run main.go [options] -file=input.txt 90 | ``` 91 | 92 | Options: 93 | - `-file` - input file to process (required) 94 | - `-workers` - number of worker goroutines (default: 4) 95 | - `-batch` - batch size for processing (default: 100) 96 | 97 | Example: 98 | ```bash 99 | go run main.go -file main.go -workers 8 100 | ``` 101 | 102 | ## Output Example 103 | 104 | ``` 105 | Processing stats: 106 | Processed lines: 192 107 | Total words processed: 321 108 | Errors: 0 109 | Processing time: 171.214µs 110 | Total time: 265.541µs 111 | 112 | Per-worker stats: 113 | Worker 0 processed 79 words 114 | Worker 1 processed 63 words 115 | Worker 2 processed 88 words 116 | Worker 3 processed 91 words 117 | 118 | Top 10 most common words: 119 | 1. "counts": 10 times 120 | 2. "return": 8 times 121 | 3. "words": 7 times 122 | 4. "range": 7 times 123 | 5. "processed": 7 times 124 | 6. "word": 6 times 125 | 7. "type": 6 times 126 | 8. "count": 5 times 127 | 9. "line": 5 times 128 | 10. "worker": 5 times 129 | ``` 130 | 131 | ## Implementation Details 132 | 133 | The example demonstrates true stateful worker usage in go-pkgz/pool: 134 | 135 | 1. Stateful worker implementation: 136 | ```go 137 | type TokenizingWorker struct { 138 | counts map[string]int // each worker maintains its own counts 139 | processed int 140 | } 141 | ``` 142 | 143 | 2. Worker creation with independent state: 144 | ```go 145 | p := pool.NewStateful[string](workers, func() pool.Worker[string] { 146 | return &TokenizingWorker{ 147 | counts: make(map[string]int), 148 | } 149 | }) 150 | ``` 151 | 152 | 3. Result collection using completion callback: 153 | ```go 154 | WithCompleteFn(func(ctx context.Context, id int, w pool.Worker[string]) error { 155 | tw := w.(*TokenizingWorker) 156 | collector.Submit(Result{ 157 | workerID: id, 158 | counts: tw.counts, 159 | processed: tw.processed, 160 | }) 161 | return nil 162 | }) 163 | ``` 164 | 165 | 4. Final results merging: 166 | ```go 167 | totalCounts := make(map[string]int) 168 | for result := range collector.Iter() { 169 | for word, count := range result.counts { 170 | totalCounts[word] += count 171 | } 172 | } 173 | ``` 174 | 175 | ## Architecture 176 | 177 | ``` 178 | File Reader Worker Pool Collector Results Merger 179 | (main goroutine) → (N workers) → (buffer channel) → (main goroutine) 180 | reads lines counts words collects final merges counts 181 | submits to pool in own state results from workers prints statistics 182 | ``` 183 | 184 | The program demonstrates true parallel processing where: 185 | - Each worker maintains independent word counts 186 | - No state is shared between workers during processing 187 | - Workers submit their final counts when done 188 | - Main goroutine merges results and calculates totals 189 | - Per-worker statistics show work distribution 190 | 191 | ## Notes 192 | 193 | - The example can process any text file but works best with plain text 194 | - Processing is done in parallel, but results maintain correct counts 195 | - No word order or position information is preserved -------------------------------------------------------------------------------- /examples/tokenizer_stateful/go.mod: -------------------------------------------------------------------------------- 1 | module examples/tokenizer_stateful 2 | 3 | go 1.24 4 | 5 | require github.com/go-pkgz/pool v0.7.0 6 | 7 | require golang.org/x/sync v0.11.0 // indirect 8 | 9 | replace github.com/go-pkgz/pool => ../.. 10 | -------------------------------------------------------------------------------- /examples/tokenizer_stateful/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/go-pkgz/pool v0.3.0 h1:aN5/ZhBbMPGXj+naZ6De2KNqg0D2Svpc7U1cYEue9t8= 4 | github.com/go-pkgz/pool v0.3.0/go.mod h1:e1qn5EYmXshPcOk2buL2ZC20w7RTAWUgbug+L2SyH7I= 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 7 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 8 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 9 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= 10 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 13 | -------------------------------------------------------------------------------- /examples/tokenizer_stateful/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "flag" 7 | "fmt" 8 | "log" 9 | "os" 10 | "sort" 11 | "strings" 12 | "time" 13 | 14 | "github.com/go-pkgz/pool" 15 | ) 16 | 17 | // TokenizingWorker maintains its own state - counts of words it has processed 18 | type TokenizingWorker struct { 19 | counts map[string]int 20 | processed int 21 | } 22 | 23 | // Result represents final counts from a single worker 24 | type Result struct { 25 | workerID int 26 | counts map[string]int 27 | processed int 28 | } 29 | 30 | // Do implements pool.Worker interface 31 | func (w *TokenizingWorker) Do(ctx context.Context, line string) error { 32 | select { 33 | case <-ctx.Done(): 34 | return ctx.Err() 35 | default: 36 | } 37 | 38 | // split line into words and clean them up 39 | words := strings.Fields(line) 40 | for _, word := range words { 41 | select { 42 | case <-ctx.Done(): 43 | return ctx.Err() 44 | default: 45 | } 46 | 47 | // clean up the word - remove punctuation, convert to lower case 48 | word = strings.ToLower(strings.Trim(word, ".,!?()[]{}\"';:")) 49 | if len(word) <= 3 { // skip short words 50 | continue 51 | } 52 | 53 | w.counts[word]++ 54 | w.processed++ 55 | } 56 | return nil 57 | } 58 | 59 | func main() { 60 | // command line flags 61 | var ( 62 | workers = flag.Int("workers", 4, "number of workers") 63 | batchSize = flag.Int("batch", 100, "batch size") 64 | file = flag.String("file", "", "input file to process") 65 | ) 66 | flag.Parse() 67 | 68 | if *file == "" { 69 | log.Fatal("file parameter is required") 70 | } 71 | 72 | // create context with timeout 73 | ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 74 | defer cancel() 75 | 76 | // create collector for results from workers 77 | collector := pool.NewCollector[Result](ctx, *workers) 78 | 79 | // create pool with worker maker function 80 | p := pool.NewStateful[string](*workers, func() pool.Worker[string] { 81 | return &TokenizingWorker{ 82 | counts: make(map[string]int), 83 | } 84 | }).WithBatchSize(*batchSize). 85 | WithContinueOnError(). 86 | WithWorkerCompleteFn(func(ctx context.Context, id int, w pool.Worker[string]) error { 87 | // type assert to get our concrete worker type 88 | tw, ok := w.(*TokenizingWorker) 89 | if !ok { 90 | return fmt.Errorf("unexpected worker type") 91 | } 92 | // submit worker's results 93 | collector.Submit(Result{ 94 | workerID: id, 95 | counts: tw.counts, 96 | processed: tw.processed, 97 | }) 98 | return nil 99 | }) 100 | 101 | // start the pool 102 | if err := p.Go(ctx); err != nil { 103 | log.Fatal(err) 104 | } 105 | 106 | // read file line by line and submit to pool 107 | go func() { 108 | defer p.Close(ctx) 109 | 110 | f, err := os.Open(*file) 111 | if err != nil { 112 | log.Printf("failed to open file: %v", err) 113 | return 114 | } 115 | defer f.Close() 116 | 117 | scanner := bufio.NewScanner(f) 118 | for scanner.Scan() { 119 | p.Submit(scanner.Text()) 120 | } 121 | 122 | if err := scanner.Err(); err != nil { 123 | log.Printf("error reading file: %v", err) 124 | } 125 | }() 126 | 127 | // wait for pool to finish and then close collector 128 | if err := p.Wait(ctx); err != nil { 129 | log.Printf("pool error: %v", err) 130 | } 131 | collector.Close() 132 | 133 | // merge results from all workers 134 | totalCounts := make(map[string]int) 135 | totalProcessed := 0 136 | workerResults := make(map[int]int) // worker ID -> words processed 137 | 138 | for result, err := range collector.Iter() { 139 | if err != nil { 140 | log.Printf("error collecting result: %v", err) 141 | continue 142 | } 143 | // merge counts 144 | for word, count := range result.counts { 145 | totalCounts[word] += count 146 | } 147 | totalProcessed += result.processed 148 | workerResults[result.workerID] = result.processed 149 | } 150 | 151 | // get pool metrics 152 | stats := p.Metrics().GetStats() 153 | fmt.Printf("\nProcessing stats:\n") 154 | fmt.Printf("Processed lines: %d\n", stats.Processed) 155 | fmt.Printf("Total words processed: %d\n", totalProcessed) 156 | fmt.Printf("Errors: %d\n", stats.Errors) 157 | fmt.Printf("Processing time: %v\n", stats.ProcessingTime) 158 | fmt.Printf("Total time: %v\n", stats.TotalTime) 159 | 160 | // print per-worker stats 161 | fmt.Printf("\nPer-worker stats:\n") 162 | workerIDs := make([]int, 0, len(workerResults)) 163 | for id := range workerResults { 164 | workerIDs = append(workerIDs, id) 165 | } 166 | sort.Ints(workerIDs) 167 | for _, id := range workerIDs { 168 | fmt.Printf("Worker %d processed %d words\n", id, workerResults[id]) 169 | } 170 | 171 | // print top N most common tokens 172 | const topN = 10 173 | type wordCount struct { 174 | word string 175 | count int 176 | } 177 | counts := make([]wordCount, 0, len(totalCounts)) 178 | for word, count := range totalCounts { 179 | counts = append(counts, wordCount{word, count}) 180 | } 181 | sort.Slice(counts, func(i, j int) bool { 182 | return counts[i].count > counts[j].count 183 | }) 184 | 185 | fmt.Printf("\nTop %d most common words:\n", topN) 186 | for i, wc := range counts { 187 | if i >= topN { 188 | break 189 | } 190 | fmt.Printf("%d. %q: %d times\n", i+1, wc.word, wc.count) 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /examples/tokenizer_stateless/README.md: -------------------------------------------------------------------------------- 1 | # Simple Text Tokenizer - Stateless Example 2 | 3 | This example demonstrates how to use WorkerFunc with [go-pkgz/pool](https://github.com/go-pkgz/pool) package for simple stateless parallel processing. It implements a text tokenizer that counts word frequencies using a shared collector. 4 | 5 | ## What Makes it Stateless? 6 | 7 | This example uses a stateless approach where: 8 | 1. Workers are simple functions (WorkerFunc) without any state 9 | 2. All workers share a common collector for results 10 | 3. Word counting is done at the end in the main goroutine 11 | 12 | This is simpler than the stateful approach when: 13 | - Workers don't need to maintain state 14 | - Single shared collection point is sufficient 15 | - No need for per-worker initialization or cleanup 16 | 17 | ## Installation 18 | 19 | ```bash 20 | # assuming you are in go-pkgz/pool/examples/tokenizer 21 | go build 22 | ``` 23 | 24 | ## Usage 25 | 26 | ```bash 27 | go run main.go [options] -file=input.txt 28 | ``` 29 | 30 | Options: 31 | - `-file` - input file to process (required) 32 | - `-workers` - number of worker goroutines (default: 4) 33 | - `-batch` - batch size for processing (default: 100) 34 | 35 | Example: 36 | ```bash 37 | go run main.go -file main.go -workers 8 38 | ``` 39 | 40 | ## Implementation Details 41 | 42 | The key components are: 43 | 44 | 1. Simple worker function: 45 | ```go 46 | worker := pool.WorkerFunc[string](func(ctx context.Context, line string) error { 47 | for _, word := range strings.Fields(line) { 48 | word = strings.ToLower(strings.Trim(word, ".,!?()[]{}\"';:")) 49 | if word == "" { 50 | continue 51 | } 52 | collector.Submit(word) 53 | } 54 | return nil 55 | }) 56 | ``` 57 | 58 | 2. Pool creation with shared worker: 59 | ```go 60 | p := pool.New[string](workers, worker). 61 | WithBatchSize(batchSize). 62 | WithContinueOnError() 63 | ``` 64 | 65 | 3. Result collection: 66 | ```go 67 | wordCounts := make(map[string]int) 68 | for word := range collector.Iter() { 69 | wordCounts[word]++ 70 | } 71 | ``` 72 | 73 | ## Architecture 74 | 75 | ``` 76 | File Reader Worker Pool Collector Word Counter 77 | (main goroutine) → (N workers) → (shared channel) → (main goroutine) 78 | reads lines tokenize text buffers words counts frequencies 79 | ``` 80 | 81 | The program flow: 82 | 1. Main goroutine reads file line by line 83 | 2. Pool distributes lines to worker functions 84 | 3. Workers break lines into words and submit to shared collector 85 | 4. Main goroutine counts word frequencies from collector 86 | 87 | ## Output Example 88 | 89 | ``` 90 | Processing stats: 91 | Processed lines: 146 92 | Total words: 238 93 | Unique words: 152 94 | Errors: 0 95 | Processing time: 77.707µs 96 | Total time: 245.417µs 97 | 98 | Top 10 most common words: 99 | 1. "words": 9 times 100 | 2. "word": 8 times 101 | 3. "line": 6 times 102 | 4. "pool": 5 times 103 | 5. "return": 5 times 104 | 6. "file": 5 times 105 | 7. "context": 4 times 106 | 8. "worker": 4 times 107 | 9. "%d\\n": 4 times 108 | 10. "count": 4 times 109 | ``` 110 | 111 | ## Why Use This Approach? 112 | 113 | The stateless approach is better when: 114 | - Processing is simple and doesn't require state 115 | - Shared collection is more efficient than per-worker state 116 | - Code simplicity is more important than perfect parallelism 117 | - Memory usage needs to be minimized (no per-worker state) -------------------------------------------------------------------------------- /examples/tokenizer_stateless/go.mod: -------------------------------------------------------------------------------- 1 | module examples/tokenizer_stateless 2 | 3 | go 1.24 4 | 5 | require github.com/go-pkgz/pool v0.7.0 6 | 7 | require golang.org/x/sync v0.11.0 // indirect 8 | 9 | replace github.com/go-pkgz/pool => ../.. 10 | -------------------------------------------------------------------------------- /examples/tokenizer_stateless/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/go-pkgz/pool v0.3.0 h1:aN5/ZhBbMPGXj+naZ6De2KNqg0D2Svpc7U1cYEue9t8= 4 | github.com/go-pkgz/pool v0.3.0/go.mod h1:e1qn5EYmXshPcOk2buL2ZC20w7RTAWUgbug+L2SyH7I= 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 7 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 8 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 9 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= 10 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 13 | -------------------------------------------------------------------------------- /examples/tokenizer_stateless/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "flag" 7 | "fmt" 8 | "log" 9 | "os" 10 | "sort" 11 | "strings" 12 | "time" 13 | 14 | "github.com/go-pkgz/pool" 15 | ) 16 | 17 | func main() { 18 | // command line flags 19 | var ( 20 | workers = flag.Int("workers", 4, "number of workers") 21 | batchSize = flag.Int("batch", 100, "batch size") 22 | file = flag.String("file", "", "input file to process") 23 | ) 24 | flag.Parse() 25 | 26 | if *file == "" { 27 | log.Fatal("file parameter is required") 28 | } 29 | 30 | // create context with timeout 31 | ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 32 | defer cancel() 33 | 34 | // create collector for words 35 | collector := pool.NewCollector[string](ctx, 1000) 36 | 37 | // create worker function that splits line into words 38 | worker := pool.WorkerFunc[string](func(ctx context.Context, line string) error { 39 | // check context before processing 40 | select { 41 | case <-ctx.Done(): 42 | return ctx.Err() 43 | default: 44 | } 45 | 46 | // split line into words and submit each word 47 | words := strings.Fields(line) 48 | for _, word := range words { 49 | // check context between words 50 | select { 51 | case <-ctx.Done(): 52 | return ctx.Err() 53 | default: 54 | } 55 | 56 | // clean up the word - remove punctuation, convert to lower case 57 | word = strings.ToLower(strings.Trim(word, ".,!?()[]{}\"';:")) 58 | if len(word) <= 3 { 59 | continue 60 | } 61 | collector.Submit(word) 62 | } 63 | time.Sleep(10 * time.Millisecond) // simulate slow processing time 64 | return nil 65 | }) 66 | 67 | // create pool with worker function 68 | p := pool.New[string](*workers, worker). 69 | WithBatchSize(*batchSize). 70 | WithContinueOnError() 71 | 72 | // start the pool 73 | if err := p.Go(ctx); err != nil { 74 | log.Fatal(err) 75 | } 76 | 77 | // read file line by line and submit to pool 78 | go func() { 79 | defer p.Close(ctx) 80 | 81 | f, err := os.Open(*file) 82 | if err != nil { 83 | log.Printf("failed to open file: %v", err) 84 | return 85 | } 86 | defer f.Close() 87 | 88 | scanner := bufio.NewScanner(f) 89 | for scanner.Scan() { 90 | p.Submit(scanner.Text()) 91 | } 92 | 93 | if err := scanner.Err(); err != nil { 94 | log.Printf("error reading file: %v", err) 95 | } 96 | }() 97 | 98 | // wait for pool to finish 99 | if err := p.Wait(ctx); err != nil { 100 | log.Printf("pool error: %v", err) 101 | } 102 | collector.Close() 103 | 104 | // count words from collector 105 | wordCounts := make(map[string]int) 106 | totalWords := 0 107 | for word, err := range collector.Iter() { 108 | if err != nil { 109 | log.Printf("error collecting word: %v", err) 110 | continue 111 | } 112 | wordCounts[word]++ 113 | totalWords++ 114 | } 115 | 116 | // get pool metrics 117 | stats := p.Metrics().GetStats() 118 | fmt.Printf("\nProcessing stats:\n") 119 | fmt.Printf("Processed lines: %d\n", stats.Processed) 120 | fmt.Printf("Total words: %d\n", totalWords) 121 | fmt.Printf("Unique words: %d\n", len(wordCounts)) 122 | fmt.Printf("Errors: %d\n", stats.Errors) 123 | fmt.Printf("Processing time: %v\n", stats.ProcessingTime) 124 | fmt.Printf("Total time: %v\n\n", stats.TotalTime) 125 | fmt.Printf("all stats: %s\n", stats) 126 | 127 | // print top N most common tokens 128 | const topN = 10 129 | type wordCount struct { 130 | word string 131 | count int 132 | } 133 | counts := make([]wordCount, 0, len(wordCounts)) 134 | for word, count := range wordCounts { 135 | counts = append(counts, wordCount{word, count}) 136 | } 137 | sort.Slice(counts, func(i, j int) bool { 138 | return counts[i].count > counts[j].count 139 | }) 140 | 141 | fmt.Printf("\nTop %d most common words:\n", topN) 142 | for i, wc := range counts { 143 | if i >= topN { 144 | break 145 | } 146 | fmt.Printf("%d. %q: %d times\n", i+1, wc.word, wc.count) 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /examples_test.go: -------------------------------------------------------------------------------- 1 | package pool 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "sort" 8 | "sync" 9 | 10 | "github.com/go-pkgz/pool/metrics" 11 | ) 12 | 13 | func Example_basic() { 14 | // collect output 15 | var out []string 16 | var mu sync.Mutex 17 | 18 | worker := WorkerFunc[int](func(_ context.Context, v int) error { 19 | mu.Lock() 20 | out = append(out, fmt.Sprintf("processed: %d", v)) 21 | mu.Unlock() 22 | return nil 23 | }) 24 | 25 | p := New[int](2, worker) 26 | if err := p.Go(context.Background()); err != nil { 27 | panic(err) // handle error, don't panic in real code 28 | } 29 | 30 | // submit work 31 | p.Submit(1) 32 | p.Submit(2) 33 | p.Submit(3) 34 | 35 | _ = p.Close(context.Background()) 36 | 37 | // print collected output in sorted order 38 | sort.Strings(out) 39 | for _, s := range out { 40 | fmt.Println(s) 41 | } 42 | 43 | // Output: 44 | // processed: 1 45 | // processed: 2 46 | // processed: 3 47 | } 48 | 49 | func Example_withRouting() { 50 | // collect output with sync.Map for thread safety 51 | var out sync.Map 52 | 53 | worker := WorkerFunc[int](func(ctx context.Context, v int) error { 54 | out.Store(v, fmt.Sprintf("worker %d got %d", metrics.WorkerID(ctx), v)) 55 | return nil 56 | }) 57 | 58 | // create pool with chunk function that routes based on even/odd 59 | p := New[int](2, worker).WithChunkFn(func(v int) string { 60 | if v%2 == 0 { 61 | return "even" 62 | } 63 | return "odd" 64 | }, 65 | ) 66 | p.Go(context.Background()) 67 | 68 | // submit all numbers 69 | for i := 1; i <= 4; i++ { 70 | p.Submit(i) 71 | } 72 | 73 | p.Close(context.Background()) 74 | 75 | // print in order to ensure deterministic output 76 | for i := 1; i <= 4; i++ { 77 | if v, ok := out.Load(i); ok { 78 | fmt.Println(v) 79 | } 80 | } 81 | 82 | // Output: 83 | // worker 0 got 1 84 | // worker 1 got 2 85 | // worker 0 got 3 86 | // worker 1 got 4 87 | } 88 | 89 | func Example_withError() { 90 | // collect output to ensure deterministic order 91 | var out []string 92 | var mu sync.Mutex 93 | 94 | worker := WorkerFunc[int](func(_ context.Context, v int) error { 95 | if v == 0 { 96 | return fmt.Errorf("zero value not allowed") 97 | } 98 | mu.Lock() 99 | out = append(out, fmt.Sprintf("processed: %d", v)) 100 | mu.Unlock() 101 | return nil 102 | }) 103 | 104 | p := New[int](1, worker).WithContinueOnError() // don't stop on errors 105 | p.Go(context.Background()) 106 | 107 | p.Submit(1) 108 | p.Submit(0) // this will fail but processing continues 109 | p.Submit(2) 110 | 111 | err := p.Close(context.Background()) 112 | if err != nil { 113 | mu.Lock() 114 | out = append(out, fmt.Sprintf("finished with error: %v", err)) 115 | mu.Unlock() 116 | } 117 | 118 | // print collected output in sorted order 119 | sort.Strings(out) 120 | for _, s := range out { 121 | fmt.Println(s) 122 | } 123 | 124 | // Output: 125 | // finished with error: total errors: 1, last error: worker 0 failed: zero value not allowed 126 | // processed: 1 127 | // processed: 2 128 | } 129 | 130 | func Example_withContext() { 131 | started := make(chan struct{}) 132 | ctx, cancel := context.WithCancel(context.Background()) 133 | defer cancel() 134 | 135 | worker := WorkerFunc[int](func(ctx context.Context, v int) error { 136 | close(started) // signal that worker started 137 | <-ctx.Done() // wait for cancellation 138 | return ctx.Err() 139 | }) 140 | 141 | p := New[int](1, worker).WithBatchSize(0) // disable batching 142 | p.Go(ctx) 143 | p.Submit(1) 144 | 145 | <-started // wait for worker to start 146 | cancel() // cancel context 147 | err := p.Close(context.Background()) 148 | fmt.Printf("got error: %v\n", err != nil) 149 | 150 | // Output: 151 | // got error: true 152 | } 153 | 154 | func Example_withCollector() { 155 | type Item struct { 156 | val int 157 | label string 158 | } 159 | 160 | // create collector for results with buffer size 10 161 | collector := NewCollector[Item](context.Background(), 10) 162 | 163 | // create worker that processes numbers and sends results to collector 164 | worker := WorkerFunc[int](func(_ context.Context, v int) error { 165 | result := Item{ 166 | val: v * 2, // double the value 167 | label: "proc", // add label 168 | } 169 | collector.Submit(result) 170 | return nil 171 | }) 172 | 173 | // create and start pool 174 | p := New[int](2, worker) 175 | p.Go(context.Background()) 176 | 177 | // submit items asynchronously 178 | go func() { 179 | for i := 1; i <= 3; i++ { 180 | p.Submit(i) 181 | } 182 | p.Close(context.Background()) 183 | collector.Close() // close collector after pool is done 184 | }() 185 | 186 | // collect results and sort them for deterministic output 187 | results, _ := collector.All() 188 | sort.Slice(results, func(i, j int) bool { 189 | return results[i].val < results[j].val 190 | }) 191 | 192 | // print sorted results 193 | for _, res := range results { 194 | fmt.Printf("got result: %d (%s)\n", res.val, res.label) 195 | } 196 | 197 | // Output: 198 | // got result: 2 (proc) 199 | // got result: 4 (proc) 200 | // got result: 6 (proc) 201 | } 202 | 203 | func Example_withCollectorIterator() { 204 | collector := NewCollector[string](context.Background(), 5) 205 | 206 | worker := WorkerFunc[int](func(_ context.Context, v int) error { 207 | collector.Submit(fmt.Sprintf("value %d", v)) 208 | return nil 209 | }) 210 | 211 | p := New[int](2, worker) 212 | p.Go(context.Background()) 213 | 214 | // submit items asynchronously 215 | go func() { 216 | for i := 1; i <= 3; i++ { 217 | p.Submit(i) 218 | } 219 | p.Close(context.Background()) 220 | collector.Close() 221 | }() 222 | 223 | // collect all values first 224 | var values []string 225 | for val, err := range collector.Iter() { 226 | if err != nil { 227 | fmt.Printf("error: %v\n", err) 228 | continue 229 | } 230 | values = append(values, val) 231 | } 232 | 233 | // sort and print values for deterministic output 234 | sort.Strings(values) 235 | for _, val := range values { 236 | fmt.Printf("processed: %s\n", val) 237 | } 238 | 239 | // Output: 240 | // processed: value 1 241 | // processed: value 2 242 | // processed: value 3 243 | } 244 | 245 | func Example_fibCalculator() { 246 | // FibResult type to store both input and calculated Fibonacci number 247 | type FibResult struct { 248 | n int 249 | fib uint64 250 | } 251 | 252 | // create collector for results 253 | collector := NewCollector[FibResult](context.Background(), 10) 254 | 255 | // worker calculating fibonacci numbers 256 | worker := WorkerFunc[int](func(_ context.Context, n int) error { 257 | if n <= 0 { 258 | return fmt.Errorf("invalid input: %d", n) 259 | } 260 | 261 | // calculate fibonacci number 262 | var a, b uint64 = 0, 1 263 | for i := 0; i < n; i++ { 264 | a, b = b, a+b 265 | } 266 | 267 | collector.Submit(FibResult{n: n, fib: a}) 268 | return nil 269 | }) 270 | 271 | // create pool with 3 workers 272 | p := New[int](3, worker) 273 | p.Go(context.Background()) 274 | 275 | // submit numbers to calculate asynchronously 276 | go func() { 277 | numbers := []int{5, 7, 10, 3, 8} 278 | for _, n := range numbers { 279 | p.Submit(n) 280 | } 281 | p.Close(context.Background()) 282 | collector.Close() 283 | }() 284 | 285 | // collect results and sort them by input number for consistent output 286 | results, _ := collector.All() 287 | sort.Slice(results, func(i, j int) bool { 288 | return results[i].n < results[j].n 289 | }) 290 | 291 | // print results 292 | for _, res := range results { 293 | fmt.Printf("fib(%d) = %d\n", res.n, res.fib) 294 | } 295 | 296 | // Output: 297 | // fib(3) = 2 298 | // fib(5) = 5 299 | // fib(7) = 13 300 | // fib(8) = 21 301 | // fib(10) = 55 302 | } 303 | 304 | func Example_chainedCalculation() { 305 | // stage 1: calculate fibonacci numbers in parallel 306 | type FibResult struct { 307 | n int 308 | fib uint64 309 | } 310 | stage1Collector := NewCollector[FibResult](context.Background(), 10) 311 | 312 | fibWorker := WorkerFunc[int](func(_ context.Context, n int) error { 313 | var a, b uint64 = 0, 1 314 | for i := 0; i < n; i++ { 315 | a, b = b, a+b 316 | } 317 | stage1Collector.Submit(FibResult{n: n, fib: a}) 318 | return nil 319 | }) 320 | 321 | // stage 2: calculate factors for each fibonacci number 322 | type FactorsResult struct { 323 | n uint64 324 | factors []uint64 325 | } 326 | stage2Collector := NewCollector[FactorsResult](context.Background(), 10) 327 | 328 | factorsWorker := WorkerFunc[FibResult](func(_ context.Context, res FibResult) error { 329 | if res.fib <= 1 { 330 | stage2Collector.Submit(FactorsResult{n: res.fib, factors: []uint64{res.fib}}) 331 | return nil 332 | } 333 | 334 | var factors []uint64 335 | n := res.fib 336 | for i := uint64(2); i*i <= n; i++ { 337 | for n%i == 0 { 338 | factors = append(factors, i) 339 | n /= i 340 | } 341 | } 342 | if n > 1 { 343 | factors = append(factors, n) 344 | } 345 | 346 | stage2Collector.Submit(FactorsResult{n: res.fib, factors: factors}) 347 | return nil 348 | }) 349 | 350 | // create and start both pools 351 | pool1 := New[int](3, fibWorker) 352 | pool1.Go(context.Background()) 353 | 354 | pool2 := NewStateful[FibResult](2, func() Worker[FibResult] { 355 | return factorsWorker 356 | }) 357 | pool2.Go(context.Background()) 358 | 359 | // submit numbers to calculate 360 | numbers := []int{5, 7, 10} 361 | for _, n := range numbers { 362 | pool1.Submit(n) 363 | } 364 | 365 | // close pools and collectors in order 366 | pool1.Close(context.Background()) 367 | stage1Collector.Close() 368 | 369 | // process stage 1 results in stage 2 370 | for fibRes, err := range stage1Collector.Iter() { 371 | if err != nil { 372 | fmt.Printf("stage 1 error: %v\n", err) 373 | continue 374 | } 375 | pool2.Submit(fibRes) 376 | } 377 | 378 | pool2.Close(context.Background()) 379 | stage2Collector.Close() 380 | 381 | // collect and sort final results to ensure deterministic output order 382 | results, _ := stage2Collector.All() 383 | sort.Slice(results, func(i, j int) bool { 384 | return results[i].n < results[j].n 385 | }) 386 | 387 | // print results in sorted order 388 | for _, res := range results { 389 | fmt.Printf("number %d has factors %v\n", res.n, res.factors) 390 | } 391 | 392 | // Output: 393 | // number 5 has factors [5] 394 | // number 13 has factors [13] 395 | // number 55 has factors [5 11] 396 | } 397 | 398 | // processingWorker implements Worker interface 399 | type processingWorker struct{} 400 | 401 | func (w *processingWorker) Do(_ context.Context, v string) error { 402 | fmt.Printf("processed: %s\n", v) 403 | return nil 404 | } 405 | 406 | func Example_workerTypes() { 407 | // these two workers are functionally equivalent: 408 | // 1. Implementing Worker interface explicitly 409 | // 2. Using WorkerFunc adapter - same thing, just shorter 410 | workerFn := WorkerFunc[string](func(_ context.Context, v string) error { 411 | fmt.Printf("processed: %s\n", v) 412 | return nil 413 | }) 414 | 415 | // run first pool to completion 416 | p1 := New[string](1, &processingWorker{}) 417 | p1.Go(context.Background()) 418 | p1.Submit("task1") 419 | p1.Close(context.Background()) 420 | 421 | // then run second pool 422 | p2 := New[string](1, workerFn) 423 | p2.Go(context.Background()) 424 | p2.Submit("task2") 425 | p2.Close(context.Background()) 426 | 427 | // Output: 428 | // processed: task1 429 | // processed: task2 430 | } 431 | 432 | func Example_middleware() { 433 | // create a worker that sometimes fails 434 | worker := WorkerFunc[string](func(_ context.Context, v string) error { 435 | if v == "fail" { 436 | return errors.New("simulated failure") 437 | } 438 | fmt.Printf("processed: %s\n", v) 439 | return nil 440 | }) 441 | 442 | // create logging middleware 443 | logging := func(next Worker[string]) Worker[string] { 444 | return WorkerFunc[string](func(ctx context.Context, v string) error { 445 | fmt.Printf("starting: %s\n", v) 446 | err := next.Do(ctx, v) 447 | fmt.Printf("completed: %s, err: %v\n", v, err) 448 | return err 449 | }) 450 | } 451 | 452 | // create retry middleware 453 | retry := func(attempts int) Middleware[string] { 454 | return func(next Worker[string]) Worker[string] { 455 | return WorkerFunc[string](func(ctx context.Context, v string) error { 456 | var lastErr error 457 | for i := 0; i < attempts; i++ { 458 | var err error 459 | if err = next.Do(ctx, v); err == nil { 460 | return nil 461 | } 462 | lastErr = err 463 | fmt.Printf("attempt %d failed: %v\n", i+1, err) 464 | } 465 | return fmt.Errorf("failed after %d attempts: %w", attempts, lastErr) 466 | }) 467 | } 468 | } 469 | 470 | // create pool with both middleware - retry first since we want logging to be outermost 471 | p := New[string](1, worker).Use(retry(2), logging) 472 | p.Go(context.Background()) 473 | 474 | // process items 475 | p.Submit("ok") // should succeed first time 476 | p.Submit("fail") // should fail after retries 477 | p.Close(context.Background()) 478 | 479 | // Output: 480 | // starting: ok 481 | // processed: ok 482 | // completed: ok, err: 483 | // starting: fail 484 | // completed: fail, err: simulated failure 485 | // attempt 1 failed: simulated failure 486 | // starting: fail 487 | // completed: fail, err: simulated failure 488 | // attempt 2 failed: simulated failure 489 | } 490 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/go-pkgz/pool 2 | 3 | go 1.24 4 | 5 | require ( 6 | github.com/stretchr/testify v1.10.0 7 | golang.org/x/sync v0.14.0 8 | golang.org/x/time v0.11.0 9 | ) 10 | 11 | require ( 12 | github.com/davecgh/go-spew v1.1.1 // indirect 13 | github.com/pmezard/go-difflib v1.0.0 // indirect 14 | gopkg.in/yaml.v3 v3.0.1 // indirect 15 | ) 16 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 6 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 7 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= 8 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 9 | golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= 10 | golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= 11 | golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= 12 | golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= 13 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 14 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 15 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 16 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 17 | -------------------------------------------------------------------------------- /metrics/metrics.go: -------------------------------------------------------------------------------- 1 | // Package metrics provides a way to collect metrics in a thread-safe way 2 | package metrics 3 | 4 | import ( 5 | "context" 6 | "fmt" 7 | "sort" 8 | "strings" 9 | "sync" 10 | "time" 11 | ) 12 | 13 | type contextKey string 14 | 15 | // TimerType is a type of timer to measure 16 | type TimerType int 17 | 18 | const ( 19 | metricsContextKey contextKey = "metrics" 20 | widContextKey contextKey = "worker-id" 21 | ) 22 | 23 | // Timer types 24 | const ( 25 | TimerProc TimerType = iota // processing time 26 | TimerWait // wait time 27 | TimerInit // initialization time 28 | TimerWrap // wrap-up time 29 | ) 30 | 31 | // Value holds both per-worker stats and shared user stats 32 | type Value struct { 33 | startTime time.Time 34 | 35 | // per worker stats, no lock needed as each worker uses its own stats 36 | workerStats []Stats 37 | 38 | // shared user stats protected by mutex 39 | mu sync.RWMutex 40 | userData map[string]int 41 | } 42 | 43 | // Stats represents worker-specific metrics with derived values 44 | type Stats struct { 45 | // raw counters 46 | Processed int 47 | Errors int 48 | Dropped int 49 | 50 | // timing 51 | ProcessingTime time.Duration 52 | WaitTime time.Duration 53 | InitTime time.Duration 54 | WrapTime time.Duration 55 | TotalTime time.Duration 56 | 57 | // derived stats, calculated on GetStats 58 | RatePerSec float64 // items processed per second 59 | AvgLatency time.Duration // average processing time per item 60 | ErrorRate float64 // portion of errors 61 | DroppedRate float64 // portion of dropped items 62 | Utilization float64 // portion of time spent processing vs waiting 63 | } 64 | 65 | // String returns stats info formatted as string 66 | // String returns stats info formatted as string 67 | func (s Stats) String() string { 68 | var metrics []string 69 | 70 | if s.Processed > 0 { 71 | metrics = append(metrics, fmt.Sprintf("processed:%d", s.Processed)) 72 | // only add rate and latency if they are non-zero 73 | if s.RatePerSec > 0 { 74 | metrics = append(metrics, fmt.Sprintf("rate:%.1f/s", s.RatePerSec)) 75 | } 76 | if s.AvgLatency > 0 { 77 | metrics = append(metrics, fmt.Sprintf("avg_latency:%v", s.AvgLatency.Round(time.Millisecond))) 78 | } 79 | } 80 | if s.Errors > 0 { 81 | if s.ErrorRate > 0 { 82 | metrics = append(metrics, fmt.Sprintf("errors:%d (%.1f%%)", s.Errors, s.ErrorRate*100)) //nolint:mnd // 100 is not magic number 83 | } else { 84 | metrics = append(metrics, fmt.Sprintf("errors:%d", s.Errors)) 85 | } 86 | } 87 | if s.Dropped > 0 { 88 | if s.DroppedRate > 0 { 89 | metrics = append(metrics, fmt.Sprintf("dropped:%d (%.1f%%)", s.Dropped, s.DroppedRate*100)) //nolint:mnd // 100 is not magic 90 | } else { 91 | metrics = append(metrics, fmt.Sprintf("dropped:%d", s.Dropped)) 92 | } 93 | } 94 | if s.ProcessingTime > 0 { 95 | metrics = append(metrics, fmt.Sprintf("proc:%v", s.ProcessingTime.Round(time.Millisecond))) 96 | } 97 | if s.WaitTime > 0 { 98 | metrics = append(metrics, fmt.Sprintf("wait:%v", s.WaitTime.Round(time.Millisecond))) 99 | } 100 | if s.InitTime > 0 { 101 | metrics = append(metrics, fmt.Sprintf("init:%v", s.InitTime.Round(time.Millisecond))) 102 | } 103 | if s.WrapTime > 0 { 104 | metrics = append(metrics, fmt.Sprintf("wrap:%v", s.WrapTime.Round(time.Millisecond))) 105 | } 106 | if s.TotalTime > 0 { 107 | metrics = append(metrics, fmt.Sprintf("total:%v", s.TotalTime.Round(time.Millisecond))) 108 | if s.Utilization > 0 { 109 | metrics = append(metrics, fmt.Sprintf("utilization:%.1f%%", s.Utilization*100)) //nolint:mnd // 100 is not magic number 110 | } 111 | } 112 | 113 | if len(metrics) > 0 { 114 | return fmt.Sprintf("[%s]", strings.Join(metrics, ", ")) 115 | } 116 | return "" 117 | } 118 | 119 | // New makes thread-safe metrics collector with specified number of workers 120 | func New(workers int) *Value { 121 | return &Value{ 122 | startTime: time.Now(), 123 | workerStats: make([]Stats, workers), 124 | userData: make(map[string]int), 125 | } 126 | } 127 | 128 | // Add increments value for a given key and returns new value 129 | func (m *Value) Add(key string, delta int) int { 130 | m.mu.Lock() 131 | defer m.mu.Unlock() 132 | m.userData[key] += delta 133 | return m.userData[key] 134 | } 135 | 136 | // Inc increments value for given key by one 137 | func (m *Value) Inc(key string) int { 138 | return m.Add(key, 1) 139 | } 140 | 141 | // Get returns value for given key from shared stats 142 | func (m *Value) Get(key string) int { 143 | m.mu.RLock() 144 | defer m.mu.RUnlock() 145 | return m.userData[key] 146 | } 147 | 148 | // StartTimer returns a function that when called will record the duration in worker stats 149 | func (m *Value) StartTimer(wid int, t TimerType) func() { 150 | start := time.Now() 151 | stats := &m.workerStats[wid] 152 | 153 | return func() { 154 | duration := time.Since(start) 155 | switch t { 156 | case TimerProc: 157 | stats.ProcessingTime += duration 158 | case TimerWait: 159 | stats.WaitTime += duration 160 | case TimerInit: 161 | stats.InitTime += duration 162 | case TimerWrap: 163 | stats.WrapTime += duration 164 | } 165 | } 166 | } 167 | 168 | // AddWaitTime adds wait time directly to worker stats 169 | func (m *Value) AddWaitTime(wid int, d time.Duration) { 170 | m.workerStats[wid].WaitTime += d 171 | } 172 | 173 | // IncProcessed increments processed count for worker 174 | func (m *Value) IncProcessed(wid int) { 175 | m.workerStats[wid].Processed++ 176 | } 177 | 178 | // IncErrors increments errors count for worker 179 | func (m *Value) IncErrors(wid int) { 180 | m.workerStats[wid].Errors++ 181 | } 182 | 183 | // IncDropped increments dropped count for worker 184 | func (m *Value) IncDropped(wid int) { 185 | m.workerStats[wid].Dropped++ 186 | } 187 | 188 | // GetStats returns combined stats from all workers 189 | func (m *Value) GetStats() Stats { 190 | var result Stats 191 | 192 | // sum up stats from all workers 193 | for i := range m.workerStats { 194 | result.Processed += m.workerStats[i].Processed 195 | result.Errors += m.workerStats[i].Errors 196 | result.Dropped += m.workerStats[i].Dropped 197 | 198 | // sum wait time - represents total idle time across all workers 199 | result.WaitTime += m.workerStats[i].WaitTime 200 | 201 | // for processing time we take max since workers run in parallel 202 | result.ProcessingTime = max(result.ProcessingTime, m.workerStats[i].ProcessingTime) 203 | 204 | // sum initialization and wrap times as they are sequential 205 | result.InitTime += m.workerStats[i].InitTime 206 | result.WrapTime += m.workerStats[i].WrapTime 207 | } 208 | 209 | result.TotalTime = time.Since(m.startTime) 210 | 211 | // calculate derived stats 212 | if result.TotalTime > 0 { 213 | result.RatePerSec = float64(result.Processed) / result.TotalTime.Seconds() 214 | } 215 | if result.Processed > 0 { 216 | // for average latency we use max processing time divided by total processed 217 | result.AvgLatency = result.ProcessingTime / time.Duration(result.Processed) 218 | } 219 | totalAttempted := result.Processed + result.Errors + result.Dropped 220 | if totalAttempted > 0 { 221 | result.ErrorRate = float64(result.Errors) / float64(totalAttempted) 222 | result.DroppedRate = float64(result.Dropped) / float64(totalAttempted) 223 | } 224 | totalWorkTime := result.ProcessingTime + result.WaitTime 225 | if totalWorkTime > 0 { 226 | result.Utilization = float64(result.ProcessingTime) / float64(totalWorkTime) 227 | } 228 | 229 | return result 230 | } 231 | 232 | // String returns sorted key:vals string representation of user-defined metrics 233 | func (m *Value) String() string { 234 | m.mu.RLock() 235 | defer m.mu.RUnlock() 236 | 237 | keys := make([]string, 0, len(m.userData)) 238 | for k := range m.userData { 239 | keys = append(keys, k) 240 | } 241 | sort.Strings(keys) 242 | 243 | metrics := make([]string, 0, len(keys)) 244 | for _, k := range keys { 245 | metrics = append(metrics, fmt.Sprintf("%s:%d", k, m.userData[k])) 246 | } 247 | 248 | if len(metrics) > 0 { 249 | return fmt.Sprintf("[%s]", strings.Join(metrics, ", ")) 250 | } 251 | return "" 252 | } 253 | 254 | // WorkerID returns worker ID from the context 255 | func WorkerID(ctx context.Context) int { 256 | cid, ok := ctx.Value(widContextKey).(int) 257 | if !ok { 258 | return 0 259 | } 260 | return cid 261 | } 262 | 263 | // WithWorkerID sets worker ID in the context 264 | func WithWorkerID(ctx context.Context, id int) context.Context { 265 | return context.WithValue(ctx, widContextKey, id) 266 | } 267 | 268 | // Get metrics from context. If not found, creates new instance with same worker count as stored in context. 269 | func Get(ctx context.Context) *Value { 270 | if v, ok := ctx.Value(metricsContextKey).(*Value); ok { 271 | return v 272 | } 273 | if n, ok := ctx.Value(widContextKey).(int); ok { 274 | return New(n + 1) // n is max worker id, need size = n+1 275 | } 276 | return New(1) // fallback to single worker 277 | } 278 | 279 | // Make context with metrics 280 | func Make(ctx context.Context, workers int) context.Context { 281 | return context.WithValue(ctx, metricsContextKey, New(workers)) 282 | } 283 | -------------------------------------------------------------------------------- /metrics/metrics_test.go: -------------------------------------------------------------------------------- 1 | package metrics 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/assert" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestMetrics_UserDefined(t *testing.T) { 14 | m := New(1) // single worker is enough for user stats testing 15 | 16 | t.Run("basic operations", func(t *testing.T) { 17 | m.Add("k1", 100) 18 | m.Inc("k1") 19 | m.Inc("k2") 20 | 21 | assert.Equal(t, 101, m.Get("k1")) 22 | assert.Equal(t, 1, m.Get("k2")) 23 | assert.Equal(t, 0, m.Get("k3")) 24 | 25 | str := m.String() 26 | assert.Contains(t, str, "k1:101") 27 | assert.Contains(t, str, "k2:1") 28 | }) 29 | 30 | t.Run("string formatting", func(t *testing.T) { 31 | m := New(10) 32 | assert.Empty(t, m.String(), "empty metrics should return empty string") 33 | 34 | m.Inc("test") 35 | assert.Equal(t, "[test:1]", m.String()) 36 | 37 | m.Add("another", 5) 38 | str := m.String() 39 | assert.Contains(t, str, "test:1") 40 | assert.Contains(t, str, "another:5") 41 | }) 42 | } 43 | 44 | func TestMetrics_WorkerStats(t *testing.T) { 45 | t.Run("worker timers", func(t *testing.T) { 46 | m := New(2) // create metrics for 2 workers 47 | 48 | // worker 1 operations 49 | end := m.StartTimer(0, TimerProc) 50 | time.Sleep(20 * time.Millisecond) 51 | end() 52 | 53 | end = m.StartTimer(0, TimerWait) 54 | time.Sleep(20 * time.Millisecond) 55 | end() 56 | 57 | // worker 2 operations 58 | end = m.StartTimer(1, TimerProc) 59 | time.Sleep(20 * time.Millisecond) 60 | end() 61 | 62 | stats := m.GetStats() 63 | assert.GreaterOrEqual(t, stats.ProcessingTime.Milliseconds(), int64(20), 64 | "processing time should be at least 20ms") 65 | assert.GreaterOrEqual(t, stats.WaitTime.Milliseconds(), int64(20), 66 | "wait time should be at least 20ms") 67 | assert.Greater(t, stats.TotalTime, stats.WaitTime, 68 | "total time should be greater than wait time") 69 | assert.Greater(t, stats.TotalTime, stats.ProcessingTime, 70 | "total time should be greater than processing time") 71 | }) 72 | 73 | t.Run("worker counters", func(t *testing.T) { 74 | // worker 1 increments 75 | m := New(2) 76 | m.IncProcessed(0) 77 | m.IncProcessed(0) 78 | m.IncErrors(0) 79 | 80 | // worker 2 increments 81 | m.IncProcessed(1) 82 | m.IncDropped(1) 83 | 84 | stats := m.GetStats() 85 | assert.Equal(t, 3, stats.Processed) 86 | assert.Equal(t, 1, stats.Errors) 87 | assert.Equal(t, 1, stats.Dropped) 88 | }) 89 | 90 | t.Run("stats string", func(t *testing.T) { 91 | m := New(1) 92 | m.IncProcessed(0) 93 | m.IncErrors(0) 94 | end := m.StartTimer(0, TimerProc) 95 | time.Sleep(10 * time.Millisecond) 96 | end() 97 | 98 | stats := m.GetStats() 99 | str := stats.String() 100 | assert.Contains(t, str, "processed:1") 101 | assert.Contains(t, str, "errors:1") 102 | assert.Contains(t, str, "proc:") 103 | assert.Contains(t, str, "total:") 104 | }) 105 | } 106 | 107 | func TestMetrics_Context(t *testing.T) { 108 | t.Run("worker id", func(t *testing.T) { 109 | ctx := WithWorkerID(context.Background(), 123) 110 | assert.Equal(t, 123, WorkerID(ctx)) 111 | 112 | ctx = context.Background() 113 | assert.Equal(t, 0, WorkerID(ctx)) 114 | 115 | ctx = context.WithValue(context.Background(), widContextKey, "not an int") 116 | assert.Equal(t, 0, WorkerID(ctx)) 117 | }) 118 | 119 | t.Run("metrics from context", func(t *testing.T) { 120 | ctx := Make(context.Background(), 2) 121 | m := Get(ctx) 122 | require.NotNil(t, m) 123 | 124 | // verify metrics working 125 | m.Inc("test") 126 | assert.Equal(t, 1, m.Get("test")) 127 | 128 | // verify worker stats 129 | m.IncProcessed(0) 130 | stats := m.GetStats() 131 | assert.Equal(t, 1, stats.Processed) 132 | }) 133 | 134 | t.Run("metrics isolation", func(t *testing.T) { 135 | ctx1 := Make(context.Background(), 1) 136 | ctx2 := Make(context.Background(), 1) 137 | 138 | m1 := Get(ctx1) 139 | m2 := Get(ctx2) 140 | 141 | m1.Inc("test") 142 | assert.Equal(t, 1, m1.Get("test")) 143 | assert.Equal(t, 0, m2.Get("test")) 144 | }) 145 | 146 | t.Run("metrics creation from worker id", func(t *testing.T) { 147 | ctx := WithWorkerID(context.Background(), 5) 148 | m := Get(ctx) 149 | require.NotNil(t, m) 150 | 151 | // should be able to use worker id 5 152 | m.IncProcessed(5) 153 | stats := m.GetStats() 154 | assert.Equal(t, 1, stats.Processed) 155 | }) 156 | } 157 | 158 | func TestMetrics_Concurrent(t *testing.T) { 159 | t.Run("concurrent user stats access", func(t *testing.T) { 160 | m := New(1) 161 | const goroutines = 10 162 | const iterations = 1000 163 | 164 | var wg sync.WaitGroup 165 | wg.Add(goroutines) 166 | 167 | for i := 0; i < goroutines; i++ { 168 | go func() { 169 | defer wg.Done() 170 | for j := 0; j < iterations; j++ { 171 | m.Inc("counter") 172 | val := m.Get("counter") 173 | assert.Positive(t, val) 174 | m.Add("sum", 2) 175 | } 176 | }() 177 | } 178 | wg.Wait() 179 | 180 | assert.Equal(t, goroutines*iterations, m.Get("counter")) 181 | assert.Equal(t, goroutines*iterations*2, m.Get("sum")) 182 | }) 183 | 184 | t.Run("per worker stats", func(t *testing.T) { 185 | const workers = 4 186 | m := New(workers) 187 | var wg sync.WaitGroup 188 | wg.Add(workers) 189 | 190 | // each worker operates on its own stats 191 | for wid := 0; wid < workers; wid++ { 192 | go func(id int) { 193 | defer wg.Done() 194 | const iterations = 1000 195 | 196 | for j := 0; j < iterations; j++ { 197 | m.IncProcessed(id) 198 | end := m.StartTimer(id, TimerProc) 199 | time.Sleep(time.Microsecond) 200 | end() 201 | } 202 | }(wid) 203 | } 204 | wg.Wait() 205 | 206 | stats := m.GetStats() 207 | assert.Equal(t, workers*1000, stats.Processed) 208 | assert.Greater(t, stats.ProcessingTime, time.Duration(0)) 209 | 210 | // verify each worker's stats are accurate 211 | for wid := 0; wid < workers; wid++ { 212 | assert.Equal(t, 1000, m.workerStats[wid].Processed) 213 | assert.Greater(t, m.workerStats[wid].ProcessingTime, time.Duration(0)) 214 | } 215 | }) 216 | } 217 | 218 | func TestMetrics_AllTimerTypes(t *testing.T) { 219 | m := New(1) 220 | 221 | // record each timer type 222 | end := m.StartTimer(0, TimerProc) 223 | time.Sleep(time.Millisecond) 224 | end() 225 | 226 | end = m.StartTimer(0, TimerWait) 227 | time.Sleep(time.Millisecond) 228 | end() 229 | 230 | end = m.StartTimer(0, TimerInit) 231 | time.Sleep(time.Millisecond) 232 | end() 233 | 234 | end = m.StartTimer(0, TimerWrap) 235 | time.Sleep(time.Millisecond) 236 | end() 237 | 238 | // verify each timer recorded something 239 | stats := m.workerStats[0] 240 | assert.Greater(t, stats.ProcessingTime, time.Duration(0), "ProcessingTime should be recorded") 241 | assert.Greater(t, stats.WaitTime, time.Duration(0), "WaitTime should be recorded") 242 | assert.Greater(t, stats.InitTime, time.Duration(0), "InitTime should be recorded") 243 | assert.Greater(t, stats.WrapTime, time.Duration(0), "WrapTime should be recorded") 244 | 245 | // test unknown timer type 246 | end = m.StartTimer(0, TimerType(99)) 247 | time.Sleep(time.Millisecond) 248 | end() 249 | // stats should remain unchanged 250 | newStats := m.workerStats[0] 251 | assert.Equal(t, stats, newStats, "unknown timer type should not affect stats") 252 | } 253 | 254 | func TestStats_String(t *testing.T) { 255 | tests := []struct { 256 | name string 257 | stats Stats 258 | expected string 259 | }{ 260 | { 261 | name: "empty stats", 262 | stats: Stats{}, 263 | expected: "", 264 | }, 265 | { 266 | name: "only counters", 267 | stats: Stats{ 268 | Processed: 10, 269 | Errors: 2, 270 | Dropped: 3, 271 | }, 272 | expected: "[processed:10, errors:2, dropped:3]", 273 | }, 274 | { 275 | name: "only timers", 276 | stats: Stats{ 277 | ProcessingTime: time.Second, 278 | WaitTime: 2 * time.Second, 279 | InitTime: 3 * time.Second, 280 | WrapTime: 4 * time.Second, 281 | TotalTime: 10 * time.Second, 282 | }, 283 | expected: "[proc:1s, wait:2s, init:3s, wrap:4s, total:10s]", 284 | }, 285 | { 286 | name: "all fields", 287 | stats: Stats{ 288 | Processed: 10, 289 | Errors: 2, 290 | Dropped: 3, 291 | ProcessingTime: time.Second, 292 | WaitTime: 2 * time.Second, 293 | InitTime: 3 * time.Second, 294 | WrapTime: 4 * time.Second, 295 | TotalTime: 10 * time.Second, 296 | }, 297 | expected: "[processed:10, errors:2, dropped:3, proc:1s, wait:2s, init:3s, wrap:4s, total:10s]", 298 | }, 299 | { 300 | name: "some fields zero", 301 | stats: Stats{ 302 | Processed: 10, 303 | ProcessingTime: time.Second, 304 | TotalTime: 10 * time.Second, 305 | }, 306 | expected: "[processed:10, proc:1s, total:10s]", 307 | }, 308 | { 309 | name: "with derived stats", 310 | stats: Stats{ 311 | Processed: 100, 312 | Errors: 10, 313 | ProcessingTime: time.Second, 314 | WaitTime: time.Second, 315 | TotalTime: 2 * time.Second, 316 | RatePerSec: 50.0, 317 | AvgLatency: 10 * time.Millisecond, 318 | ErrorRate: 0.1, 319 | Utilization: 0.5, 320 | }, 321 | expected: "[processed:100, rate:50.0/s, avg_latency:10ms, errors:10 (10.0%), proc:1s, wait:1s, total:2s, utilization:50.0%]", 322 | }, 323 | } 324 | 325 | for _, tt := range tests { 326 | t.Run(tt.name, func(t *testing.T) { 327 | assert.Equal(t, tt.expected, tt.stats.String()) 328 | }) 329 | } 330 | } 331 | 332 | func TestMetrics_AddWaitTime(t *testing.T) { 333 | t.Run("basic wait time tracking", func(t *testing.T) { 334 | m := New(2) // two workers 335 | 336 | // add some wait time to worker 0 337 | m.AddWaitTime(0, 100*time.Millisecond) 338 | m.AddWaitTime(0, 50*time.Millisecond) 339 | 340 | // add different wait time to worker 1 341 | m.AddWaitTime(1, 75*time.Millisecond) 342 | 343 | stats := m.GetStats() 344 | assert.Equal(t, 225*time.Millisecond, stats.WaitTime, 345 | "total wait time should be sum of all workers' wait times") 346 | }) 347 | 348 | t.Run("accumulation with existing timers", func(t *testing.T) { 349 | m := New(1) 350 | 351 | // start a regular wait timer 352 | end := m.StartTimer(0, TimerWait) 353 | time.Sleep(10 * time.Millisecond) 354 | end() 355 | 356 | // add explicit wait time 357 | m.AddWaitTime(0, 20*time.Millisecond) 358 | 359 | stats := m.GetStats() 360 | assert.Greater(t, stats.WaitTime, 30*time.Millisecond, 361 | "wait time should include both timer and added wait time") 362 | }) 363 | 364 | t.Run("multiple workers tracking", func(t *testing.T) { 365 | m := New(3) 366 | 367 | // simulate different wait patterns for each worker 368 | m.AddWaitTime(0, 10*time.Millisecond) 369 | m.AddWaitTime(1, 20*time.Millisecond) 370 | m.AddWaitTime(2, 30*time.Millisecond) 371 | 372 | // add more wait time to first worker 373 | m.AddWaitTime(0, 15*time.Millisecond) 374 | 375 | stats := m.GetStats() 376 | assert.Equal(t, 75*time.Millisecond, stats.WaitTime, 377 | "total wait time should be sum across all workers") 378 | assert.Equal(t, 25*time.Millisecond, m.workerStats[0].WaitTime, 379 | "individual worker should track its own wait time") 380 | }) 381 | } 382 | 383 | func TestStats_DerivedValues(t *testing.T) { 384 | t.Run("derived stats calculation", func(t *testing.T) { 385 | m := New(1) 386 | w := m.workerStats[0] 387 | w.Processed = 100 388 | w.Errors = 10 389 | w.Dropped = 5 390 | w.ProcessingTime = 2 * time.Second 391 | w.WaitTime = 1 * time.Second 392 | m.workerStats[0] = w 393 | m.startTime = time.Now().Add(-4 * time.Second) // simulate 4 seconds total time 394 | 395 | stats := m.GetStats() 396 | 397 | assert.InDelta(t, 25.0, stats.RatePerSec, 0.1, "should calculate rate per second") 398 | assert.Equal(t, 20*time.Millisecond, stats.AvgLatency, "should calculate average latency") 399 | assert.InDelta(t, 0.087, stats.ErrorRate, 0.01, "should calculate error rate") 400 | assert.InDelta(t, 0.043, stats.DroppedRate, 0.01, "should calculate dropped rate") 401 | assert.InDelta(t, 0.67, stats.Utilization, 0.01, "should calculate utilization") 402 | }) 403 | 404 | t.Run("string format with derived stats", func(t *testing.T) { 405 | stats := Stats{ 406 | Processed: 100, 407 | Errors: 10, 408 | Dropped: 5, 409 | ProcessingTime: 2 * time.Second, 410 | WaitTime: 1 * time.Second, 411 | TotalTime: 4 * time.Second, 412 | RatePerSec: 25.0, 413 | AvgLatency: 20 * time.Millisecond, 414 | ErrorRate: 0.087, 415 | DroppedRate: 0.043, 416 | Utilization: 0.67, 417 | } 418 | 419 | str := stats.String() 420 | t.Log("Stats string:", str) 421 | assert.Contains(t, str, "rate:25.0/s") 422 | assert.Contains(t, str, "avg_latency:20ms") 423 | assert.Contains(t, str, "errors:10 (8.7%)") 424 | assert.Contains(t, str, "dropped:5 (4.3%)") 425 | assert.Contains(t, str, "utilization:67.0%") 426 | }) 427 | 428 | t.Run("handles zero values", func(t *testing.T) { 429 | m := New(1) 430 | stats := m.GetStats() 431 | 432 | assert.Zero(t, stats.RatePerSec) 433 | assert.Zero(t, stats.AvgLatency) 434 | assert.Zero(t, stats.ErrorRate) 435 | assert.Zero(t, stats.DroppedRate) 436 | assert.Zero(t, stats.Utilization) 437 | }) 438 | } 439 | 440 | func TestMetrics_ParallelProcessing(t *testing.T) { 441 | m := New(2) // two workers 442 | 443 | // simulate two workers processing in parallel 444 | // worker 1: processes for 100ms 445 | m.workerStats[0].ProcessingTime = 100 * time.Millisecond 446 | m.workerStats[0].Processed = 50 447 | 448 | // worker 2: processes for 150ms 449 | m.workerStats[1].ProcessingTime = 150 * time.Millisecond 450 | m.workerStats[1].Processed = 75 451 | 452 | // set start time to simulate 200ms total elapsed time 453 | m.startTime = time.Now().Add(-200 * time.Millisecond) 454 | 455 | stats := m.GetStats() 456 | 457 | // processing time should be max of workers, not sum 458 | assert.Equal(t, 150*time.Millisecond, stats.ProcessingTime, 459 | "processing time should be max across workers") 460 | 461 | // total time should be elapsed wall time 462 | assert.InDelta(t, 200, stats.TotalTime.Milliseconds(), 50, 463 | "total time should be actual elapsed time") 464 | 465 | // rate should be total processed divided by total time 466 | expectedRate := float64(stats.Processed) / stats.TotalTime.Seconds() 467 | assert.InDelta(t, expectedRate, stats.RatePerSec, 1, 468 | "rate should be based on total processed items and elapsed time") 469 | 470 | // average latency should use max processing time 471 | expectedLatency := stats.ProcessingTime / time.Duration(stats.Processed) 472 | assert.Equal(t, expectedLatency, stats.AvgLatency, 473 | "average latency should be based on max processing time") 474 | } 475 | -------------------------------------------------------------------------------- /middleware/middleware.go: -------------------------------------------------------------------------------- 1 | // Package middleware provides common middleware implementations for the pool package. 2 | package middleware 3 | 4 | import ( 5 | "context" 6 | "fmt" 7 | "math/rand" 8 | "time" 9 | 10 | "golang.org/x/time/rate" 11 | 12 | "github.com/go-pkgz/pool" 13 | ) 14 | 15 | // Retry returns a middleware that retries failed operations up to maxAttempts times 16 | // with exponential backoff between retries. 17 | // baseDelay is used as the initial delay between retries, and each subsequent retry 18 | // increases the delay exponentially (baseDelay * 2^attempt) with some random jitter. 19 | func Retry[T any](maxAttempts int, baseDelay time.Duration) pool.Middleware[T] { 20 | if maxAttempts <= 0 { 21 | maxAttempts = 3 // default to 3 attempts 22 | } 23 | if baseDelay <= 0 { 24 | baseDelay = time.Second // default to 1 second 25 | } 26 | 27 | return func(next pool.Worker[T]) pool.Worker[T] { 28 | return pool.WorkerFunc[T](func(ctx context.Context, v T) error { 29 | var lastErr error 30 | for attempt := range maxAttempts { 31 | var err error 32 | if err = next.Do(ctx, v); err == nil { 33 | return nil 34 | } 35 | lastErr = err 36 | 37 | // don't sleep after last attempt 38 | if attempt < maxAttempts-1 { 39 | // exponential backoff with jitter 40 | delay := baseDelay * time.Duration(1<= 3 { 159 | return nil 160 | } 161 | return errors.New("string too short") 162 | } 163 | 164 | var processed []string 165 | worker := pool.WorkerFunc[string](func(_ context.Context, v string) error { 166 | processed = append(processed, v) 167 | return nil 168 | }) 169 | 170 | p := pool.New[string](1, worker).Use(Validator(validator)) 171 | require.NoError(t, p.Go(context.Background())) 172 | 173 | p.Submit("test") 174 | require.NoError(t, p.Close(context.Background())) 175 | 176 | assert.Equal(t, []string{"test"}, processed) 177 | }) 178 | 179 | t.Run("invalid input blocked", func(t *testing.T) { 180 | validator := func(s string) error { 181 | if len(s) >= 3 { 182 | return nil 183 | } 184 | return errors.New("string too short") 185 | } 186 | 187 | var processed []string 188 | worker := pool.WorkerFunc[string](func(_ context.Context, v string) error { 189 | processed = append(processed, v) 190 | return nil 191 | }) 192 | 193 | p := pool.New[string](1, worker).Use(Validator(validator)) 194 | require.NoError(t, p.Go(context.Background())) 195 | 196 | p.Submit("ok") 197 | err := p.Close(context.Background()) 198 | require.Error(t, err) 199 | assert.Contains(t, err.Error(), "validation failed") 200 | assert.Empty(t, processed) 201 | }) 202 | } 203 | 204 | func TestRateLimiter(t *testing.T) { 205 | t.Run("allows tasks within rate limit", func(t *testing.T) { 206 | var processed atomic.Int32 207 | worker := pool.WorkerFunc[string](func(_ context.Context, v string) error { 208 | processed.Add(1) 209 | return nil 210 | }) 211 | 212 | // 10 tasks per second with burst of 5 213 | p := pool.New[string](2, worker).Use(RateLimiter[string](10, 5)) 214 | require.NoError(t, p.Go(context.Background())) 215 | 216 | // submit 5 tasks - should all process immediately due to burst 217 | for i := 0; i < 5; i++ { 218 | p.Submit("task") 219 | } 220 | 221 | // wait a bit for processing 222 | time.Sleep(50 * time.Millisecond) 223 | require.NoError(t, p.Close(context.Background())) 224 | 225 | assert.Equal(t, int32(5), processed.Load(), "all tasks within burst should process") 226 | }) 227 | 228 | t.Run("blocks tasks exceeding rate limit", func(t *testing.T) { 229 | var timestamps []time.Time 230 | var mu sync.Mutex 231 | worker := pool.WorkerFunc[string](func(_ context.Context, v string) error { 232 | mu.Lock() 233 | timestamps = append(timestamps, time.Now()) 234 | mu.Unlock() 235 | return nil 236 | }) 237 | 238 | // 2 tasks per second with burst of 1 239 | p := pool.New[string](1, worker).Use(RateLimiter[string](2, 1)) 240 | require.NoError(t, p.Go(context.Background())) 241 | 242 | // submit 3 tasks 243 | for i := 0; i < 3; i++ { 244 | p.Submit(fmt.Sprintf("task-%d", i)) 245 | } 246 | 247 | require.NoError(t, p.Close(context.Background())) 248 | 249 | // verify timing 250 | require.Len(t, timestamps, 3) 251 | // first task should process immediately 252 | // second task should wait ~500ms (rate of 2/sec) 253 | // third task should wait another ~500ms 254 | gap1 := timestamps[1].Sub(timestamps[0]) 255 | gap2 := timestamps[2].Sub(timestamps[1]) 256 | 257 | assert.Greater(t, gap1, 350*time.Millisecond, "second task should wait for rate limit") 258 | assert.Less(t, gap1, 650*time.Millisecond, "second task shouldn't wait too long") 259 | assert.Greater(t, gap2, 350*time.Millisecond, "third task should wait for rate limit") 260 | assert.Less(t, gap2, 650*time.Millisecond, "third task shouldn't wait too long") 261 | }) 262 | 263 | t.Run("respects context cancellation while waiting", func(t *testing.T) { 264 | ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) 265 | defer cancel() 266 | 267 | var processed atomic.Int32 268 | worker := pool.WorkerFunc[string](func(_ context.Context, v string) error { 269 | processed.Add(1) 270 | time.Sleep(10 * time.Millisecond) // simulate work 271 | return nil 272 | }) 273 | 274 | // very low rate to force waiting 275 | p := pool.New[string](1, worker).Use(RateLimiter[string](1, 1)) 276 | require.NoError(t, p.Go(ctx)) 277 | 278 | // submit multiple tasks 279 | for i := 0; i < 5; i++ { 280 | p.Submit("task") 281 | } 282 | 283 | err := p.Close(context.Background()) 284 | require.Error(t, err) 285 | assert.Contains(t, err.Error(), "would exceed context deadline") 286 | // should process 1-2 tasks before context cancellation 287 | assert.Less(t, processed.Load(), int32(3), "should not process all tasks") 288 | }) 289 | 290 | t.Run("handles default values", func(t *testing.T) { 291 | var processed atomic.Int32 292 | worker := pool.WorkerFunc[string](func(_ context.Context, v string) error { 293 | processed.Add(1) 294 | return nil 295 | }) 296 | 297 | // test with invalid rate and burst 298 | p := pool.New[string](1, worker).Use(RateLimiter[string](-1, 0)) 299 | require.NoError(t, p.Go(context.Background())) 300 | 301 | p.Submit("task") 302 | require.NoError(t, p.Close(context.Background())) 303 | 304 | assert.Equal(t, int32(1), processed.Load(), "should process with default values") 305 | }) 306 | 307 | t.Run("multiple workers share rate limit", func(t *testing.T) { 308 | var timestamps []time.Time 309 | var mu sync.Mutex 310 | worker := pool.WorkerFunc[string](func(_ context.Context, v string) error { 311 | mu.Lock() 312 | timestamps = append(timestamps, time.Now()) 313 | mu.Unlock() 314 | time.Sleep(10 * time.Millisecond) // simulate work 315 | return nil 316 | }) 317 | 318 | // 2 tasks per second with burst of 2, but 4 workers 319 | p := pool.New[string](4, worker).Use(RateLimiter[string](2, 2)) 320 | require.NoError(t, p.Go(context.Background())) 321 | 322 | // submit 4 tasks 323 | for i := 0; i < 4; i++ { 324 | p.Submit(fmt.Sprintf("task-%d", i)) 325 | } 326 | 327 | require.NoError(t, p.Close(context.Background())) 328 | 329 | // verify timing - even with 4 workers, rate limit is shared 330 | require.Len(t, timestamps, 4) 331 | // first 2 tasks should process immediately (burst) 332 | gap1 := timestamps[1].Sub(timestamps[0]) 333 | assert.Less(t, gap1, 50*time.Millisecond, "first two tasks should process immediately") 334 | 335 | // next 2 tasks should wait for rate limit 336 | gap2 := timestamps[2].Sub(timestamps[1]) 337 | gap3 := timestamps[3].Sub(timestamps[2]) 338 | assert.Greater(t, gap2, 350*time.Millisecond, "third task should wait for rate limit") 339 | assert.Greater(t, gap3, 350*time.Millisecond, "fourth task should wait for rate limit") 340 | }) 341 | } 342 | -------------------------------------------------------------------------------- /pool.go: -------------------------------------------------------------------------------- 1 | package pool 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "hash/fnv" 8 | "math/rand" 9 | "sync" 10 | "sync/atomic" 11 | "time" 12 | 13 | "golang.org/x/sync/errgroup" 14 | 15 | "github.com/go-pkgz/pool/metrics" 16 | ) 17 | 18 | // WorkerGroup represents a pool of workers processing items in parallel. 19 | // Supports both direct item processing and batching modes. 20 | type WorkerGroup[T any] struct { 21 | poolSize int // number of workers (goroutines) 22 | workerChanSize int // size of worker channels 23 | workerCompleteFn WorkerCompleteFn[T] // completion callback function, called by each worker on completion 24 | poolCompleteFn GroupCompleteFn[T] // pool-level completion callback, called once when all workers are done 25 | continueOnError bool // don't terminate on first error 26 | chunkFn func(T) string // worker selector function 27 | worker Worker[T] // worker function 28 | workerMaker WorkerMaker[T] // worker maker function 29 | 30 | metrics *metrics.Value // shared metrics 31 | 32 | workersCh []chan T // workers input channels 33 | sharedCh chan T // shared input channel for all workers 34 | activeWorkers atomic.Int32 // track number of active workers 35 | 36 | // batching support 37 | batchSize int // if > 0, accumulate items up to this size 38 | accumulators [][]T // per-worker accumulators for batching 39 | workerBatchCh []chan []T // per-worker batch channels 40 | sharedBatchCh chan []T // shared batch channel 41 | 42 | eg *errgroup.Group 43 | activated bool 44 | ctx context.Context 45 | 46 | sendMu sync.Mutex 47 | } 48 | 49 | // Worker is the interface that wraps the Submit method. 50 | type Worker[T any] interface { 51 | Do(ctx context.Context, v T) error 52 | } 53 | 54 | // WorkerFunc is an adapter to allow the use of ordinary functions as Workers. 55 | type WorkerFunc[T any] func(ctx context.Context, v T) error 56 | 57 | // Do calls f(ctx, v). 58 | func (f WorkerFunc[T]) Do(ctx context.Context, v T) error { return f(ctx, v) } 59 | 60 | // WorkerMaker is a function that returns a new Worker 61 | type WorkerMaker[T any] func() Worker[T] 62 | 63 | // WorkerCompleteFn called on worker completion 64 | type WorkerCompleteFn[T any] func(ctx context.Context, id int, worker Worker[T]) error 65 | 66 | // GroupCompleteFn called once when all workers are done 67 | type GroupCompleteFn[T any] func(ctx context.Context) error 68 | 69 | // Send func called by worker code to publish results 70 | type Send[T any] func(val T) error 71 | 72 | // New creates a worker pool with a shared worker instance. 73 | // All goroutines share the same worker, suitable for stateless processing. 74 | func New[T any](size int, worker Worker[T]) *WorkerGroup[T] { 75 | if size < 1 { 76 | size = 1 77 | } 78 | 79 | res := &WorkerGroup[T]{ 80 | poolSize: size, 81 | worker: worker, 82 | workerChanSize: 1, 83 | batchSize: 10, // default batch size 84 | 85 | // initialize channels 86 | workersCh: make([]chan T, size), 87 | sharedCh: make(chan T, size), 88 | workerBatchCh: make([]chan []T, size), 89 | sharedBatchCh: make(chan []T, size), 90 | accumulators: make([][]T, size), 91 | } 92 | 93 | // initialize worker's channels 94 | for i := range size { 95 | res.workersCh[i] = make(chan T, res.workerChanSize) 96 | res.workerBatchCh[i] = make(chan []T, res.workerChanSize) 97 | } 98 | 99 | return res 100 | } 101 | 102 | // NewStateful creates a worker pool where each goroutine gets its own worker instance. 103 | // Suitable for operations requiring state (e.g., database connections). 104 | func NewStateful[T any](size int, maker func() Worker[T]) *WorkerGroup[T] { 105 | if size < 1 { 106 | size = 1 107 | } 108 | 109 | res := &WorkerGroup[T]{ 110 | poolSize: size, 111 | workerMaker: maker, 112 | workerChanSize: 1, 113 | batchSize: 10, // default batch size 114 | ctx: context.Background(), 115 | 116 | // initialize channels 117 | workersCh: make([]chan T, size), 118 | sharedCh: make(chan T, size), 119 | workerBatchCh: make([]chan []T, size), 120 | sharedBatchCh: make(chan []T, size), 121 | accumulators: make([][]T, size), 122 | } 123 | 124 | // initialize worker's channels 125 | for i := range size { 126 | res.workersCh[i] = make(chan T, res.workerChanSize) 127 | res.workerBatchCh[i] = make(chan []T, res.workerChanSize) 128 | } 129 | 130 | return res 131 | } 132 | 133 | // WithWorkerChanSize sets channel buffer size for each worker. 134 | // Larger sizes can help with bursty workloads but increase memory usage. 135 | // Default: 1 136 | func (p *WorkerGroup[T]) WithWorkerChanSize(size int) *WorkerGroup[T] { 137 | p.workerChanSize = size 138 | if size < 1 { 139 | p.workerChanSize = 1 140 | } 141 | return p 142 | } 143 | 144 | // WithWorkerCompleteFn sets callback executed on worker completion. 145 | // Useful for cleanup or finalization of worker resources. 146 | // Default: none (disabled) 147 | func (p *WorkerGroup[T]) WithWorkerCompleteFn(fn WorkerCompleteFn[T]) *WorkerGroup[T] { 148 | p.workerCompleteFn = fn 149 | return p 150 | } 151 | 152 | // WithPoolCompleteFn sets callback executed once when all workers are done 153 | func (p *WorkerGroup[T]) WithPoolCompleteFn(fn GroupCompleteFn[T]) *WorkerGroup[T] { 154 | p.poolCompleteFn = fn 155 | return p 156 | } 157 | 158 | // WithChunkFn enables predictable item distribution. 159 | // Items with the same key (returned by fn) are processed by the same worker. 160 | // Useful for maintaining order within groups of related items. 161 | // Default: none (random distribution) 162 | func (p *WorkerGroup[T]) WithChunkFn(fn func(T) string) *WorkerGroup[T] { 163 | p.chunkFn = fn 164 | return p 165 | } 166 | 167 | // WithContinueOnError sets whether the pool should continue on error. 168 | // Default: false 169 | func (p *WorkerGroup[T]) WithContinueOnError() *WorkerGroup[T] { 170 | p.continueOnError = true 171 | return p 172 | } 173 | 174 | // WithBatchSize enables item batching with specified size. 175 | // Items are accumulated until batch is full before processing. 176 | // Set to 0 to disable batching. 177 | // Default: 10 178 | func (p *WorkerGroup[T]) WithBatchSize(size int) *WorkerGroup[T] { 179 | p.batchSize = size 180 | if size > 0 { 181 | // initialize accumulators with capacity 182 | for i := range p.poolSize { 183 | p.accumulators[i] = make([]T, 0, size) 184 | } 185 | } 186 | return p 187 | } 188 | 189 | // Submit adds an item to the pool for processing. May block if worker channels are full. 190 | // Not thread-safe, intended for use by the main thread ot a single producer's thread. 191 | func (p *WorkerGroup[T]) Submit(v T) { 192 | // check context early 193 | select { 194 | case <-p.ctx.Done(): 195 | return // don't submit if context is cancelled 196 | default: 197 | } 198 | 199 | if p.batchSize == 0 { 200 | // direct submission mode 201 | if p.chunkFn == nil { 202 | p.sharedCh <- v 203 | return 204 | } 205 | h := fnv.New32a() 206 | _, _ = h.Write([]byte(p.chunkFn(v))) 207 | id := int(h.Sum32()) % p.poolSize 208 | p.workersCh[id] <- v 209 | return 210 | } 211 | 212 | // batching mode 213 | var id int 214 | if p.chunkFn != nil { 215 | h := fnv.New32a() 216 | _, _ = h.Write([]byte(p.chunkFn(v))) 217 | id = int(h.Sum32()) % p.poolSize 218 | } else { 219 | id = rand.Intn(p.poolSize) //nolint:gosec // no need for secure random here 220 | } 221 | 222 | // add to accumulator 223 | p.accumulators[id] = append(p.accumulators[id], v) 224 | 225 | // check if we should flush 226 | var shouldFlush bool 227 | select { 228 | case <-p.ctx.Done(): 229 | shouldFlush = true // always flush on context cancellation 230 | default: 231 | // in normal case, flush only when batch is full 232 | shouldFlush = len(p.accumulators[id]) >= p.batchSize 233 | } 234 | 235 | if shouldFlush && len(p.accumulators[id]) > 0 { 236 | if p.chunkFn == nil { 237 | select { 238 | case p.sharedBatchCh <- p.accumulators[id]: 239 | case <-p.ctx.Done(): // handle case where channel send would block 240 | return 241 | } 242 | } else { 243 | select { 244 | case p.workerBatchCh[id] <- p.accumulators[id]: 245 | case <-p.ctx.Done(): 246 | return 247 | } 248 | } 249 | p.accumulators[id] = make([]T, 0, p.batchSize) 250 | } 251 | } 252 | 253 | // Send adds an item to the pool for processing. 254 | // Safe for concurrent use, intended for worker-to-pool submissions or for use by multiple concurrent producers. 255 | func (p *WorkerGroup[T]) Send(v T) { 256 | p.sendMu.Lock() 257 | defer p.sendMu.Unlock() 258 | p.Submit(v) 259 | } 260 | 261 | // Go activates the pool and starts worker goroutines. 262 | // Must be called before submitting items. 263 | func (p *WorkerGroup[T]) Go(ctx context.Context) error { 264 | if p.activated { 265 | return fmt.Errorf("workers poll already activated") 266 | } 267 | defer func() { p.activated = true }() 268 | 269 | var egCtx context.Context 270 | p.eg, egCtx = errgroup.WithContext(ctx) 271 | p.ctx = egCtx 272 | 273 | // create metrics context for all workers 274 | metricsCtx := metrics.Make(egCtx, p.poolSize) 275 | p.metrics = metrics.Get(metricsCtx) 276 | 277 | // set initial count 278 | p.activeWorkers.Store(int32(p.poolSize)) //nolint:gosec // no risk of overflow 279 | 280 | // start all goroutines (workers) 281 | for i := range p.poolSize { 282 | withWorkerIDctx := metrics.WithWorkerID(metricsCtx, i) 283 | workerCh := p.sharedCh 284 | batchCh := p.sharedBatchCh 285 | if p.chunkFn != nil { 286 | workerCh = p.workersCh[i] 287 | batchCh = p.workerBatchCh[i] 288 | } 289 | r := workerRequest[T]{inCh: workerCh, batchCh: batchCh, m: p.metrics, id: i} 290 | p.eg.Go(p.workerProc(withWorkerIDctx, r)) 291 | } 292 | 293 | return nil 294 | } 295 | 296 | // workerRequest is a request to worker goroutine containing all necessary data 297 | type workerRequest[T any] struct { 298 | inCh <-chan T 299 | batchCh <-chan []T 300 | m *metrics.Value 301 | id int 302 | } 303 | 304 | // workerProc is a worker goroutine function, reads from the input channel and processes records 305 | func (p *WorkerGroup[T]) workerProc(wCtx context.Context, r workerRequest[T]) func() error { 306 | return func() error { 307 | var lastErr error 308 | var totalErrs int 309 | 310 | initEndTmr := r.m.StartTimer(r.id, metrics.TimerInit) 311 | worker := p.worker 312 | if p.workerMaker != nil { 313 | worker = p.workerMaker() 314 | } 315 | initEndTmr() 316 | 317 | lastActivity := time.Now() 318 | 319 | // processItem handles a single item with metrics 320 | processItem := func(v T) error { 321 | waitTime := time.Since(lastActivity) 322 | r.m.AddWaitTime(r.id, waitTime) 323 | lastActivity = time.Now() 324 | 325 | procEndTmr := r.m.StartTimer(r.id, metrics.TimerProc) 326 | defer procEndTmr() 327 | 328 | if err := worker.Do(wCtx, v); err != nil { 329 | r.m.IncErrors(r.id) 330 | totalErrs++ 331 | if !p.continueOnError { 332 | return fmt.Errorf("worker %d failed: %w", r.id, err) 333 | } 334 | lastErr = fmt.Errorf("worker %d failed: %w", r.id, err) 335 | return nil // continue on error 336 | } 337 | r.m.IncProcessed(r.id) 338 | return nil 339 | } 340 | 341 | // processBatch handles batch of items 342 | processBatch := func(items []T) error { 343 | waitTime := time.Since(lastActivity) 344 | r.m.AddWaitTime(r.id, waitTime) 345 | lastActivity = time.Now() 346 | 347 | procEndTmr := r.m.StartTimer(r.id, metrics.TimerProc) 348 | defer procEndTmr() 349 | 350 | for _, v := range items { 351 | if err := worker.Do(wCtx, v); err != nil { 352 | r.m.IncErrors(r.id) 353 | totalErrs++ 354 | if !p.continueOnError { 355 | return fmt.Errorf("worker %d failed: %w", r.id, err) 356 | } 357 | lastErr = fmt.Errorf("worker %d failed: %w", r.id, err) 358 | continue 359 | } 360 | r.m.IncProcessed(r.id) 361 | } 362 | return nil 363 | } 364 | 365 | // track if channels are closed 366 | normalClosed := false 367 | batchClosed := false 368 | 369 | // main processing loop 370 | for { 371 | if normalClosed && batchClosed { 372 | return p.finishWorker(wCtx, r.id, worker, lastErr, totalErrs) 373 | } 374 | 375 | select { 376 | case <-wCtx.Done(): 377 | return p.finishWorker(wCtx, r.id, worker, wCtx.Err(), totalErrs) 378 | 379 | case v, ok := <-r.inCh: 380 | if !ok { 381 | normalClosed = true 382 | continue 383 | } 384 | if err := processItem(v); err != nil { 385 | return p.finishWorker(wCtx, r.id, worker, err, totalErrs) 386 | } 387 | 388 | case batch, ok := <-r.batchCh: 389 | if !ok { 390 | batchClosed = true 391 | continue 392 | } 393 | if err := processBatch(batch); err != nil { 394 | return p.finishWorker(wCtx, r.id, worker, err, totalErrs) 395 | } 396 | } 397 | } 398 | } 399 | } 400 | 401 | // finishWorker handles worker completion logic 402 | func (p *WorkerGroup[T]) finishWorker(ctx context.Context, id int, worker Worker[T], lastErr error, totalErrs int) error { 403 | // worker completion should be called only if we are continuing on error or no error 404 | if p.workerCompleteFn != nil && (lastErr == nil || p.continueOnError) { 405 | wrapFinTmr := p.metrics.StartTimer(id, metrics.TimerWrap) 406 | if e := p.workerCompleteFn(ctx, id, worker); e != nil { 407 | if lastErr == nil { 408 | lastErr = fmt.Errorf("complete worker func for %d failed: %w", id, e) 409 | } 410 | } 411 | wrapFinTmr() 412 | } 413 | 414 | activeWorkers := p.activeWorkers.Add(-1) 415 | 416 | // pool completion should be called when this is the last worker 417 | // regardless of error state, except for context cancellation 418 | if activeWorkers == 0 && p.poolCompleteFn != nil && !errors.Is(lastErr, context.Canceled) { 419 | if e := p.poolCompleteFn(ctx); e != nil { 420 | if lastErr == nil { 421 | lastErr = fmt.Errorf("complete pool func for %d failed: %w", id, e) 422 | } 423 | } 424 | } 425 | 426 | if lastErr != nil { 427 | return fmt.Errorf("total errors: %d, last error: %w", totalErrs, lastErr) 428 | } 429 | return nil 430 | } 431 | 432 | // Close pool. Has to be called by consumer as the indication of "all records submitted". 433 | // The call is blocking till all processing completed by workers. After this call poll can't be reused. 434 | // Returns an error if any happened during the run 435 | func (p *WorkerGroup[T]) Close(ctx context.Context) error { 436 | // if context canceled, return immediately 437 | switch { 438 | case ctx.Err() != nil: 439 | return ctx.Err() 440 | default: 441 | } 442 | 443 | // flush any remaining items in accumulators 444 | if p.batchSize > 0 { 445 | for i, acc := range p.accumulators { 446 | if len(acc) > 0 { 447 | // ensure we flush any non-empty accumulator, regardless of size 448 | if p.chunkFn == nil { 449 | p.sharedBatchCh <- acc 450 | } else { 451 | p.workerBatchCh[i] <- acc 452 | } 453 | p.accumulators[i] = nil // help GC 454 | } 455 | } 456 | } 457 | 458 | close(p.sharedCh) 459 | close(p.sharedBatchCh) 460 | for i := range p.poolSize { 461 | close(p.workersCh[i]) 462 | close(p.workerBatchCh[i]) 463 | } 464 | return p.eg.Wait() 465 | } 466 | 467 | // Wait till workers completed and the result channel closed. 468 | func (p *WorkerGroup[T]) Wait(ctx context.Context) error { 469 | // if context canceled, return immediately 470 | switch { 471 | case ctx.Err() != nil: 472 | return ctx.Err() 473 | default: 474 | } 475 | return p.eg.Wait() 476 | } 477 | 478 | // Metrics returns combined metrics from all workers 479 | func (p *WorkerGroup[T]) Metrics() *metrics.Value { 480 | return p.metrics 481 | } 482 | 483 | // Middleware wraps worker and adds functionality 484 | type Middleware[T any] func(Worker[T]) Worker[T] 485 | 486 | // Use applies middlewares to the worker group's worker. Middlewares are applied 487 | // in the same order as they are provided, matching the HTTP middleware pattern in Go. 488 | // The first middleware is the outermost wrapper, and the last middleware is the 489 | // innermost wrapper closest to the original worker. 490 | func (p *WorkerGroup[T]) Use(middlewares ...Middleware[T]) *WorkerGroup[T] { 491 | if len(middlewares) == 0 { 492 | return p 493 | } 494 | 495 | // if we have a worker maker (stateful), wrap it 496 | if p.workerMaker != nil { 497 | originalMaker := p.workerMaker 498 | p.workerMaker = func() Worker[T] { 499 | worker := originalMaker() 500 | // apply middlewares in order from last to first 501 | // this makes first middleware outermost 502 | wrapped := worker 503 | for i := len(middlewares) - 1; i >= 0; i-- { 504 | prev := wrapped 505 | wrapped = middlewares[i](prev) 506 | } 507 | return wrapped 508 | } 509 | return p 510 | } 511 | 512 | // for stateless worker, just wrap it directly 513 | wrapped := p.worker 514 | for i := len(middlewares) - 1; i >= 0; i-- { 515 | prev := wrapped 516 | wrapped = middlewares[i](prev) 517 | } 518 | p.worker = wrapped 519 | return p 520 | } 521 | --------------------------------------------------------------------------------