├── .gitignore ├── .idea ├── .gitignore ├── gobatch.iml ├── modules.xml └── vcs.xml ├── LICENSE ├── README.md ├── executor.go ├── executor_default.go ├── go.mod ├── main.go ├── rate_limiter.go ├── rate_limiter_test.go └── types.go /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/gobatch.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Ankit Solanki 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # gobatch 3 | 4 | `gobatch` is a GOlang package for efficient and robust rate-limiter for concurrent asynchronous batch processing. Optimized for I/O-intensive tasks, such as HTTP requests, but can be applied to CPU bounded operations also. 5 | 6 | ## Key Features 7 | 8 | - Concurrent Processing 9 | - Customizable Batch Size 10 | - Sophisticated Error Handling and Retries 11 | - Configurable Timeouts and Delays 12 | - Resource Management 13 | - Progress Reporting 14 | - Extensibility and Custom Scheduling 15 | 16 | ## Installation 17 | 18 | Get started with `gobatch` by running: 19 | 20 | ```shell 21 | go get -u github.com/techcentaur/gobatch 22 | ``` 23 | 24 | 25 | ## Executor Options 26 | 27 | ```go 28 | opts := []Option{ 29 | WithCores(8), 30 | WithRateLimiter(rateLimiter), 31 | WithBatchSize(5), 32 | WithStopOnError(false), 33 | WithTimeout(5 * time.Minute), 34 | WithMaxRetries(5), 35 | WithBeforeStartHook(func() { 36 | logger.Println("Starting batch operation...") 37 | }), 38 | WithAfterCompletionHook(func() { 39 | logger.Println("Batch operation completed.") 40 | }), 41 | WithBeforeRetryHook(func(err error) { 42 | logger.Printf("Retrying operation due to error: %v\n", err) 43 | }), 44 | WithProgressReportFunc( 45 | func(numProcessed int) {}), 46 | WithLogger(logger), 47 | WithCustomSchedulerFunc(func(data []interface{}) []interface{} { 48 | // Custom scheduler logic 49 | return data 50 | }), 51 | WithRetryDelay(5 * time.Second), 52 | WithReportBenchmarkDuration(true), 53 | } 54 | ``` 55 | 56 | ## Async Limiter Options 57 | 58 | ```go 59 | NewAsyncLimiter(maxRate, timePeriodInSeconds) 60 | // APIs that are 100 credits per minute give (100, 60) 61 | 62 | ``` 63 | 64 | 65 | ## Usage Example 66 | 67 | ```go 68 | package main 69 | 70 | import ( 71 | "context" 72 | "fmt" 73 | "github.com/techcentaur/gobatch" 74 | "log" 75 | "os" 76 | "time" 77 | ) 78 | 79 | func main() { 80 | logger := log.New(os.Stdout, "executor: ", log.LstdFlags) 81 | rateLimiter := gobatch.NewAsyncLimiter(100, 1) 82 | opts := []gobatch.Option{ 83 | gobatch.WithCores(8), 84 | gobatch.WithRateLimiter(rateLimiter), 85 | // ... other options ... 86 | } 87 | 88 | dataBatch := make([]interface{}, 100) // Example data 89 | for i := range dataBatch { 90 | // your data variables logic here 91 | dataBatch[i] = fmt.Sprintf("data-%d", i) 92 | } 93 | 94 | err := gobatch.ExecuteBatchAsync( 95 | context.Background(), 96 | myOperationFunc, 97 | dataBatch, 98 | opts... 99 | ) 100 | if err != nil { 101 | logger.Printf("Batch execution error: %v\n", err) 102 | } 103 | } 104 | 105 | func myOperationFunc(ctx context.Context, data interface{}) error { 106 | // Your operation logic here 107 | } 108 | ``` 109 | 110 | 111 | ## Contributing 112 | 113 | We welcome contributions to `gobatch`! Whether it's bug reports, feature requests, or code contributions, your input is valuable. 114 | 115 | 1. Fork the repository. 116 | 2. Create a new branch for your feature or fix. 117 | 3. Write and test your code. 118 | 4. Submit a pull request. 119 | 120 | ## License 121 | 122 | `gobatch` is released under the [MIT License](LICENSE). 123 | 124 | --- 125 | -------------------------------------------------------------------------------- /executor.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "log" 7 | "runtime" 8 | "sync" 9 | "time" 10 | ) 11 | 12 | func ExecuteBatchAsync(operationFunc func(context.Context, interface{}) error, dataBatch []interface{}, opts []ExecutorOptions) error { 13 | cfg := NewExecutorOptions() 14 | 15 | // Apply provided options to override defaults 16 | for _, o := range opts { 17 | o(cfg) 18 | } 19 | 20 | // Validate configuration settings 21 | if err := cfg.Validate(); err != nil { 22 | return err 23 | } 24 | 25 | // Custom scheduler adjustment (if provided) 26 | if cfg.customSchedulerFunc != nil { 27 | dataBatch = cfg.customSchedulerFunc(dataBatch) 28 | } 29 | 30 | // Execute any 'before start' hook 31 | if cfg.beforeStartHook != nil { 32 | cfg.beforeStartHook() 33 | } 34 | 35 | // Setting maximum CPU cores 36 | runtime.GOMAXPROCS(cfg.cores) 37 | 38 | var wg sync.WaitGroup 39 | var errorsCount int 40 | 41 | ctx, cancel := context.WithCancel(context.Background()) 42 | defer cancel() 43 | 44 | size := cfg.batchSize 45 | batches := (len(dataBatch) + size - 1) / size 46 | 47 | startTime := time.Now() 48 | 49 | for i := 0; i < batches; i++ { 50 | batch := dataBatch[i*size : min((i+1)*size, len(dataBatch))] 51 | 52 | wg.Add(1) 53 | go func(b []interface{}) { 54 | defer wg.Done() 55 | for _, j := range b { 56 | // Acquire capacity from the rate limiter 57 | if cfg.rateLimiter != nil { 58 | err := cfg.rateLimiter.Acquire(1) // Acquire capacity for one operation 59 | if err != nil { 60 | cfg.logger.Printf("Rate limit error: %v\n", err) 61 | return 62 | } 63 | } 64 | 65 | _, err := attemptOperationWithRetries(ctx, operationFunc, j, cfg) 66 | if err != nil { 67 | handleErrors(&errorsCount, err, cancel, cfg) 68 | if cfg.stopOnError { 69 | return 70 | } 71 | } 72 | 73 | if cfg.progressReportFunc != nil { 74 | cfg.progressReportFunc(1) // Reporting progress after each operation 75 | } 76 | } 77 | }(batch) 78 | } 79 | wg.Wait() 80 | 81 | if cfg.reportBenchmarkDuration { 82 | duration := time.Now().Sub(startTime) 83 | cfg.logger.Printf("Time benchmark to execute: %v\n", duration) 84 | } 85 | 86 | // Execute any 'after completion' hook 87 | if cfg.afterCompletionHook != nil { 88 | cfg.afterCompletionHook() 89 | } 90 | 91 | return nil 92 | } 93 | 94 | // attemptOperationWithRetries tries to execute the operation with retries. 95 | func attemptOperationWithRetries(ctx context.Context, operationFunc func(context.Context, interface{}) error, data interface{}, conf *ExecutorArguments) (interface{}, error) { 96 | var result interface{} 97 | var err error 98 | 99 | for i := 0; i <= conf.maxRetries; i++ { 100 | err = operationFunc(ctx, data) 101 | if err == nil || conf.retryDelay <= 0 { 102 | break 103 | } 104 | if conf.beforeRetryHook != nil { 105 | conf.beforeRetryHook(err) 106 | } 107 | time.Sleep(conf.retryDelay) 108 | } 109 | 110 | return result, err 111 | } 112 | 113 | // handleErrors manages error counting and circuit breaker logic. 114 | func handleErrors(errorsCount *int, err error, cancel context.CancelFunc, conf *ExecutorArguments) { 115 | if err != nil { 116 | *errorsCount++ 117 | if conf.stopOnError { 118 | cancel() 119 | } 120 | if *errorsCount > conf.circuitBreakerLimit && conf.circuitBreakerLimit > 0 { 121 | cancel() 122 | } 123 | } 124 | if conf.progressReportFunc != nil { 125 | conf.progressReportFunc(1) 126 | } 127 | } 128 | 129 | func WithCores(cores int) ExecutorOptions { 130 | return func(opt *ExecutorArguments) { 131 | opt.cores = cores 132 | } 133 | } 134 | 135 | func WithTimeout(timeout time.Duration) ExecutorOptions { 136 | return func(opt *ExecutorArguments) { 137 | opt.timeout = timeout 138 | } 139 | } 140 | 141 | func WithMaxRetries(maxRetries int) ExecutorOptions { 142 | return func(opt *ExecutorArguments) { 143 | opt.maxRetries = maxRetries 144 | } 145 | } 146 | 147 | func WithRetryDelay(retryDelay time.Duration) ExecutorOptions { 148 | return func(opt *ExecutorArguments) { 149 | opt.retryDelay = retryDelay 150 | } 151 | } 152 | 153 | func WithStopOnError(stopOnError bool) ExecutorOptions { 154 | return func(opt *ExecutorArguments) { 155 | opt.stopOnError = stopOnError 156 | } 157 | } 158 | 159 | func WithBatchSize(batchSize int) ExecutorOptions { 160 | return func(opt *ExecutorArguments) { 161 | opt.batchSize = batchSize 162 | } 163 | } 164 | 165 | func WithProgressReportFunc(progressReportFunc func(int)) ExecutorOptions { 166 | return func(opt *ExecutorArguments) { 167 | opt.progressReportFunc = progressReportFunc 168 | } 169 | } 170 | 171 | func WithLogger(logger *log.Logger) ExecutorOptions { 172 | return func(opt *ExecutorArguments) { 173 | opt.logger = logger 174 | } 175 | } 176 | 177 | func WithBeforeStartHook(hook func()) ExecutorOptions { 178 | return func(opt *ExecutorArguments) { 179 | opt.beforeStartHook = hook 180 | } 181 | } 182 | 183 | func WithAfterCompletionHook(hook func()) ExecutorOptions { 184 | return func(opt *ExecutorArguments) { 185 | opt.afterCompletionHook = hook 186 | } 187 | } 188 | 189 | func WithBeforeRetryHook(hook func(error)) ExecutorOptions { 190 | return func(opt *ExecutorArguments) { 191 | opt.beforeRetryHook = hook 192 | } 193 | } 194 | 195 | func WithCustomSchedulerFunc(customSchedulerFunc func([]interface{}) []interface{}) ExecutorOptions { 196 | return func(opt *ExecutorArguments) { 197 | opt.customSchedulerFunc = customSchedulerFunc 198 | } 199 | } 200 | 201 | func WithRateLimiter(rateLimiter *AsyncLimiter) ExecutorOptions { 202 | return func(opt *ExecutorArguments) { 203 | opt.rateLimiter = rateLimiter 204 | } 205 | } 206 | 207 | func WithReportBenchmarkDuration(reportBenchmark bool) ExecutorOptions { 208 | return func(opt *ExecutorArguments) { 209 | opt.reportBenchmarkDuration = reportBenchmark 210 | } 211 | } 212 | 213 | // Validate checks the provided configuration for validity. 214 | func (e *ExecutorArguments) Validate() error { 215 | if e.cores <= 0 { 216 | return errors.New("number of cores must be greater than 0") 217 | } 218 | if e.timeout <= 0 { 219 | return errors.New("timeout must be greater than 0") 220 | } 221 | if e.maxRetries < 0 { 222 | return errors.New("maximum retries cannot be negative") 223 | } 224 | if e.retryDelay < 0 { 225 | return errors.New("retry delay cannot be negative") 226 | } 227 | if e.batchSize <= 0 { 228 | return errors.New("batch size must be greater than 0") 229 | } 230 | if e.circuitBreakerLimit < 0 { 231 | return errors.New("circuit breaker limit cannot be negative") 232 | } 233 | if e.logger == nil { 234 | return errors.New("logger cannot be nil") 235 | } 236 | // No validation required for boolean fields, hooks, and customSchedulerFunc as they are optional and can be nil. 237 | return nil 238 | } 239 | -------------------------------------------------------------------------------- /executor_default.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "time" 7 | ) 8 | 9 | // Constant definitions for default settings. 10 | const ( 11 | DefaultCores = 4 // Default number of CPU cores to use. 12 | DefaultTimeout = 5 * time.Minute // Default timeout for operations 13 | DefaultMaxRetries = 3 // Default number of retries for an operation. 14 | DefaultRetryDelay = 5 * time.Second // Default delay between retries. 15 | DefaultBatchSize = 10 // Default size of each batch of operations. 16 | DefaultCircuitBreakerLimit = 10 // Default limit for circuit breaker. 17 | ) 18 | 19 | func NewExecutorOptions() *ExecutorArguments { 20 | return &ExecutorArguments{ 21 | cores: DefaultCores, 22 | timeout: DefaultTimeout, 23 | maxRetries: DefaultMaxRetries, 24 | retryDelay: DefaultRetryDelay, 25 | stopOnError: false, 26 | reportBenchmarkDuration: false, 27 | batchSize: DefaultBatchSize, 28 | circuitBreakerLimit: DefaultCircuitBreakerLimit, 29 | logger: log.New(os.Stdout, "asyncbatch: ", log.LstdFlags), 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module gobatch 2 | 3 | go 1.21.1 4 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "os" 8 | "time" 9 | ) 10 | 11 | func MockIOOperation(ctx context.Context, data interface{}) error { 12 | // Simulate processing time of an IO bounded operation 13 | time.Sleep(100 * time.Millisecond) 14 | return nil 15 | } 16 | 17 | func main() { 18 | logger := log.New(os.Stdout, "executor: ", log.LstdFlags) 19 | rateLimiter := NewAsyncLimiter(100000, 1) 20 | 21 | opts := []ExecutorOptions{ 22 | WithCores(8), 23 | WithRateLimiter(rateLimiter), 24 | WithBatchSize(5), 25 | WithStopOnError(false), 26 | WithTimeout(5 * time.Minute), 27 | WithMaxRetries(5), 28 | WithBeforeStartHook(func() { 29 | logger.Println("Starting batch operation...") 30 | }), 31 | WithAfterCompletionHook(func() { 32 | logger.Println("Batch operation completed.") 33 | }), 34 | WithBeforeRetryHook(func(err error) { 35 | logger.Printf("Retrying operation due to error: %v\n", err) 36 | }), 37 | WithProgressReportFunc( 38 | func(numProcessed int) {}), 39 | WithLogger(logger), 40 | WithCustomSchedulerFunc(func(data []interface{}) []interface{} { 41 | // Custom scheduler logic 42 | return data 43 | }), 44 | WithRetryDelay(5 * time.Second), 45 | WithReportBenchmarkDuration(true), 46 | } 47 | 48 | // Create a batch of data to process 49 | dataBatch := make([]interface{}, 100) // Example data 50 | for i := range dataBatch { 51 | dataBatch[i] = fmt.Sprintf("data-%d", i) 52 | } 53 | 54 | // Execute batch operation 55 | err := ExecuteBatchAsync(MockIOOperation, dataBatch, opts) 56 | if err != nil { 57 | logger.Printf("Batch execution error: %v\n", err) 58 | } 59 | 60 | return 61 | } 62 | -------------------------------------------------------------------------------- /rate_limiter.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "sync" 6 | "time" 7 | ) 8 | 9 | type AsyncLimiter struct { 10 | maxRate float64 // Maximum rate of operations 11 | timePeriod time.Duration // Time period for the rate limit 12 | ratePerSec float64 // Calculated rate per second 13 | level float64 // Current level of the bucket 14 | lastCheck time.Time // Last time the bucket was checked/leaked 15 | mu sync.Mutex // Mutex to protect concurrent access 16 | waiters chan bool // Channel to signal waiting goroutines 17 | } 18 | 19 | func NewAsyncLimiter(maxRate float64, timePeriod time.Duration) *AsyncLimiter { 20 | return &AsyncLimiter{ 21 | maxRate: maxRate, 22 | timePeriod: timePeriod, 23 | ratePerSec: maxRate / timePeriod.Seconds(), 24 | level: 0.0, 25 | lastCheck: time.Now(), 26 | waiters: make(chan bool, 1), 27 | } 28 | } 29 | 30 | func (l *AsyncLimiter) leak() { 31 | l.mu.Lock() 32 | defer l.mu.Unlock() 33 | 34 | if l.level > 0 { 35 | elapsed := time.Since(l.lastCheck).Seconds() 36 | decrement := elapsed * l.ratePerSec 37 | l.level = max(0, l.level-decrement) 38 | } 39 | l.lastCheck = time.Now() 40 | } 41 | 42 | func (l *AsyncLimiter) hasCapacity(amount float64) bool { 43 | l.leak() 44 | requested := l.level + amount 45 | if requested <= l.maxRate { 46 | select { 47 | case l.waiters <- true: 48 | default: 49 | } 50 | return true 51 | } 52 | return false 53 | } 54 | 55 | func (l *AsyncLimiter) Acquire(amount float64) error { 56 | if amount > l.maxRate { 57 | return errors.New("cannot acquire more than the maximum capacity") 58 | } 59 | 60 | for !l.hasCapacity(amount) { 61 | select { 62 | case <-l.waiters: 63 | case <-time.After(time.Duration(1/l.ratePerSec*amount) * time.Second): 64 | } 65 | } 66 | 67 | l.mu.Lock() 68 | l.level += amount 69 | l.mu.Unlock() 70 | 71 | return nil 72 | } 73 | -------------------------------------------------------------------------------- /rate_limiter_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestAsyncLimiter(t *testing.T) { 10 | // Create a new AsyncLimiter with a rate of 2 operations per second 11 | limiter := NewAsyncLimiter(2, 1) 12 | 13 | // Number of goroutines to simulate 14 | const numGoroutines = 5 15 | 16 | var wg sync.WaitGroup 17 | wg.Add(numGoroutines) 18 | 19 | for i := 0; i < numGoroutines; i++ { 20 | go func(id int) { 21 | defer wg.Done() 22 | 23 | // Each goroutine tries to acquire capacity from the limiter 24 | t.Logf("Goroutine %d attempting to acquire capacity...\n", id) 25 | err := limiter.Acquire(1) 26 | if err != nil { 27 | t.Logf("Goroutine %d failed to acquire capacity: %v\n", id, err) 28 | return 29 | } 30 | t.Logf("Goroutine %d acquired capacity. Performing operation...\n", id) 31 | 32 | // Simulate some work 33 | time.Sleep(100 * time.Millisecond) 34 | 35 | t.Logf("Goroutine %d completed operation.\n", id) 36 | }(i) 37 | } 38 | 39 | wg.Wait() 40 | t.Logf("All goroutines completed.") 41 | } 42 | -------------------------------------------------------------------------------- /types.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "time" 7 | ) 8 | 9 | // IOOperation represents a function type for I/O-based operations. 10 | // It takes a context and an interface{} as input, and returns an interface{} and an error. 11 | type IOOperation func(context.Context, interface{}) (interface{}, error) 12 | 13 | // BatchResult represents the outcome of a batch operation. 14 | // It stores the data result and any error that occurred during the operation. 15 | type BatchResult struct { 16 | Data interface{} 17 | Err error 18 | } 19 | 20 | // ExecutorArguments defines configuration options for the Executor. 21 | // It includes settings for rate limiting, concurrency, retries, hooks, and more. 22 | type ExecutorArguments struct { 23 | rateLimiter *AsyncLimiter // Controls the rate of operations to prevent overloading. 24 | 25 | cores int // Number of cores to use for parallel processing. 26 | batchSize int // Size of each batch of operations to process. 27 | timeout time.Duration // Maximum duration to wait for an operation to complete. 28 | maxRetries int // Maximum number of retries for a failed operation. 29 | retryDelay time.Duration // Duration to wait before retrying a failed operation. 30 | 31 | stopOnError bool // Whether to stop processing on the first error encountered. 32 | progressReportFunc func(int) // Function to report progress of operations. 33 | circuitBreakerLimit int // Threshold for tripping the circuit breaker to stop operations. 34 | 35 | logger *log.Logger // Logger for logging messages. 36 | beforeStartHook func() // Hook function to be called before starting operations. 37 | afterCompletionHook func() // Hook function to be called after completing all operations. 38 | beforeRetryHook func(error) // Hook function to be called before retrying a failed operation. 39 | 40 | customSchedulerFunc func([]interface{}) []interface{} // Custom function to schedule operations. 41 | reportBenchmarkDuration bool // Flag to enable/disable reporting of benchmark durations. 42 | } 43 | 44 | // ExecutorOptions is a function type used for applying configuration options to ExecutorArguments. 45 | type ExecutorOptions func(*ExecutorArguments) 46 | --------------------------------------------------------------------------------