├── .gitignore
├── .idea
├── .gitignore
├── gobatch.iml
├── modules.xml
└── vcs.xml
├── LICENSE
├── README.md
├── executor.go
├── executor_default.go
├── go.mod
├── main.go
├── rate_limiter.go
├── rate_limiter_test.go
└── types.go
/.gitignore:
--------------------------------------------------------------------------------
1 | # If you prefer the allow list template instead of the deny list, see community template:
2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
3 | #
4 | # Binaries for programs and plugins
5 | *.exe
6 | *.exe~
7 | *.dll
8 | *.so
9 | *.dylib
10 |
11 | # Test binary, built with `go test -c`
12 | *.test
13 |
14 | # Output of the go coverage tool, specifically when used with LiteIDE
15 | *.out
16 |
17 | # Dependency directories (remove the comment below to include it)
18 | # vendor/
19 |
20 | # Go workspace file
21 | go.work
22 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 |
--------------------------------------------------------------------------------
/.idea/gobatch.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Ankit Solanki
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # gobatch
3 |
4 | `gobatch` is a GOlang package for efficient and robust rate-limiter for concurrent asynchronous batch processing. Optimized for I/O-intensive tasks, such as HTTP requests, but can be applied to CPU bounded operations also.
5 |
6 | ## Key Features
7 |
8 | - Concurrent Processing
9 | - Customizable Batch Size
10 | - Sophisticated Error Handling and Retries
11 | - Configurable Timeouts and Delays
12 | - Resource Management
13 | - Progress Reporting
14 | - Extensibility and Custom Scheduling
15 |
16 | ## Installation
17 |
18 | Get started with `gobatch` by running:
19 |
20 | ```shell
21 | go get -u github.com/techcentaur/gobatch
22 | ```
23 |
24 |
25 | ## Executor Options
26 |
27 | ```go
28 | opts := []Option{
29 | WithCores(8),
30 | WithRateLimiter(rateLimiter),
31 | WithBatchSize(5),
32 | WithStopOnError(false),
33 | WithTimeout(5 * time.Minute),
34 | WithMaxRetries(5),
35 | WithBeforeStartHook(func() {
36 | logger.Println("Starting batch operation...")
37 | }),
38 | WithAfterCompletionHook(func() {
39 | logger.Println("Batch operation completed.")
40 | }),
41 | WithBeforeRetryHook(func(err error) {
42 | logger.Printf("Retrying operation due to error: %v\n", err)
43 | }),
44 | WithProgressReportFunc(
45 | func(numProcessed int) {}),
46 | WithLogger(logger),
47 | WithCustomSchedulerFunc(func(data []interface{}) []interface{} {
48 | // Custom scheduler logic
49 | return data
50 | }),
51 | WithRetryDelay(5 * time.Second),
52 | WithReportBenchmarkDuration(true),
53 | }
54 | ```
55 |
56 | ## Async Limiter Options
57 |
58 | ```go
59 | NewAsyncLimiter(maxRate, timePeriodInSeconds)
60 | // APIs that are 100 credits per minute give (100, 60)
61 |
62 | ```
63 |
64 |
65 | ## Usage Example
66 |
67 | ```go
68 | package main
69 |
70 | import (
71 | "context"
72 | "fmt"
73 | "github.com/techcentaur/gobatch"
74 | "log"
75 | "os"
76 | "time"
77 | )
78 |
79 | func main() {
80 | logger := log.New(os.Stdout, "executor: ", log.LstdFlags)
81 | rateLimiter := gobatch.NewAsyncLimiter(100, 1)
82 | opts := []gobatch.Option{
83 | gobatch.WithCores(8),
84 | gobatch.WithRateLimiter(rateLimiter),
85 | // ... other options ...
86 | }
87 |
88 | dataBatch := make([]interface{}, 100) // Example data
89 | for i := range dataBatch {
90 | // your data variables logic here
91 | dataBatch[i] = fmt.Sprintf("data-%d", i)
92 | }
93 |
94 | err := gobatch.ExecuteBatchAsync(
95 | context.Background(),
96 | myOperationFunc,
97 | dataBatch,
98 | opts...
99 | )
100 | if err != nil {
101 | logger.Printf("Batch execution error: %v\n", err)
102 | }
103 | }
104 |
105 | func myOperationFunc(ctx context.Context, data interface{}) error {
106 | // Your operation logic here
107 | }
108 | ```
109 |
110 |
111 | ## Contributing
112 |
113 | We welcome contributions to `gobatch`! Whether it's bug reports, feature requests, or code contributions, your input is valuable.
114 |
115 | 1. Fork the repository.
116 | 2. Create a new branch for your feature or fix.
117 | 3. Write and test your code.
118 | 4. Submit a pull request.
119 |
120 | ## License
121 |
122 | `gobatch` is released under the [MIT License](LICENSE).
123 |
124 | ---
125 |
--------------------------------------------------------------------------------
/executor.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "log"
7 | "runtime"
8 | "sync"
9 | "time"
10 | )
11 |
12 | func ExecuteBatchAsync(operationFunc func(context.Context, interface{}) error, dataBatch []interface{}, opts []ExecutorOptions) error {
13 | cfg := NewExecutorOptions()
14 |
15 | // Apply provided options to override defaults
16 | for _, o := range opts {
17 | o(cfg)
18 | }
19 |
20 | // Validate configuration settings
21 | if err := cfg.Validate(); err != nil {
22 | return err
23 | }
24 |
25 | // Custom scheduler adjustment (if provided)
26 | if cfg.customSchedulerFunc != nil {
27 | dataBatch = cfg.customSchedulerFunc(dataBatch)
28 | }
29 |
30 | // Execute any 'before start' hook
31 | if cfg.beforeStartHook != nil {
32 | cfg.beforeStartHook()
33 | }
34 |
35 | // Setting maximum CPU cores
36 | runtime.GOMAXPROCS(cfg.cores)
37 |
38 | var wg sync.WaitGroup
39 | var errorsCount int
40 |
41 | ctx, cancel := context.WithCancel(context.Background())
42 | defer cancel()
43 |
44 | size := cfg.batchSize
45 | batches := (len(dataBatch) + size - 1) / size
46 |
47 | startTime := time.Now()
48 |
49 | for i := 0; i < batches; i++ {
50 | batch := dataBatch[i*size : min((i+1)*size, len(dataBatch))]
51 |
52 | wg.Add(1)
53 | go func(b []interface{}) {
54 | defer wg.Done()
55 | for _, j := range b {
56 | // Acquire capacity from the rate limiter
57 | if cfg.rateLimiter != nil {
58 | err := cfg.rateLimiter.Acquire(1) // Acquire capacity for one operation
59 | if err != nil {
60 | cfg.logger.Printf("Rate limit error: %v\n", err)
61 | return
62 | }
63 | }
64 |
65 | _, err := attemptOperationWithRetries(ctx, operationFunc, j, cfg)
66 | if err != nil {
67 | handleErrors(&errorsCount, err, cancel, cfg)
68 | if cfg.stopOnError {
69 | return
70 | }
71 | }
72 |
73 | if cfg.progressReportFunc != nil {
74 | cfg.progressReportFunc(1) // Reporting progress after each operation
75 | }
76 | }
77 | }(batch)
78 | }
79 | wg.Wait()
80 |
81 | if cfg.reportBenchmarkDuration {
82 | duration := time.Now().Sub(startTime)
83 | cfg.logger.Printf("Time benchmark to execute: %v\n", duration)
84 | }
85 |
86 | // Execute any 'after completion' hook
87 | if cfg.afterCompletionHook != nil {
88 | cfg.afterCompletionHook()
89 | }
90 |
91 | return nil
92 | }
93 |
94 | // attemptOperationWithRetries tries to execute the operation with retries.
95 | func attemptOperationWithRetries(ctx context.Context, operationFunc func(context.Context, interface{}) error, data interface{}, conf *ExecutorArguments) (interface{}, error) {
96 | var result interface{}
97 | var err error
98 |
99 | for i := 0; i <= conf.maxRetries; i++ {
100 | err = operationFunc(ctx, data)
101 | if err == nil || conf.retryDelay <= 0 {
102 | break
103 | }
104 | if conf.beforeRetryHook != nil {
105 | conf.beforeRetryHook(err)
106 | }
107 | time.Sleep(conf.retryDelay)
108 | }
109 |
110 | return result, err
111 | }
112 |
113 | // handleErrors manages error counting and circuit breaker logic.
114 | func handleErrors(errorsCount *int, err error, cancel context.CancelFunc, conf *ExecutorArguments) {
115 | if err != nil {
116 | *errorsCount++
117 | if conf.stopOnError {
118 | cancel()
119 | }
120 | if *errorsCount > conf.circuitBreakerLimit && conf.circuitBreakerLimit > 0 {
121 | cancel()
122 | }
123 | }
124 | if conf.progressReportFunc != nil {
125 | conf.progressReportFunc(1)
126 | }
127 | }
128 |
129 | func WithCores(cores int) ExecutorOptions {
130 | return func(opt *ExecutorArguments) {
131 | opt.cores = cores
132 | }
133 | }
134 |
135 | func WithTimeout(timeout time.Duration) ExecutorOptions {
136 | return func(opt *ExecutorArguments) {
137 | opt.timeout = timeout
138 | }
139 | }
140 |
141 | func WithMaxRetries(maxRetries int) ExecutorOptions {
142 | return func(opt *ExecutorArguments) {
143 | opt.maxRetries = maxRetries
144 | }
145 | }
146 |
147 | func WithRetryDelay(retryDelay time.Duration) ExecutorOptions {
148 | return func(opt *ExecutorArguments) {
149 | opt.retryDelay = retryDelay
150 | }
151 | }
152 |
153 | func WithStopOnError(stopOnError bool) ExecutorOptions {
154 | return func(opt *ExecutorArguments) {
155 | opt.stopOnError = stopOnError
156 | }
157 | }
158 |
159 | func WithBatchSize(batchSize int) ExecutorOptions {
160 | return func(opt *ExecutorArguments) {
161 | opt.batchSize = batchSize
162 | }
163 | }
164 |
165 | func WithProgressReportFunc(progressReportFunc func(int)) ExecutorOptions {
166 | return func(opt *ExecutorArguments) {
167 | opt.progressReportFunc = progressReportFunc
168 | }
169 | }
170 |
171 | func WithLogger(logger *log.Logger) ExecutorOptions {
172 | return func(opt *ExecutorArguments) {
173 | opt.logger = logger
174 | }
175 | }
176 |
177 | func WithBeforeStartHook(hook func()) ExecutorOptions {
178 | return func(opt *ExecutorArguments) {
179 | opt.beforeStartHook = hook
180 | }
181 | }
182 |
183 | func WithAfterCompletionHook(hook func()) ExecutorOptions {
184 | return func(opt *ExecutorArguments) {
185 | opt.afterCompletionHook = hook
186 | }
187 | }
188 |
189 | func WithBeforeRetryHook(hook func(error)) ExecutorOptions {
190 | return func(opt *ExecutorArguments) {
191 | opt.beforeRetryHook = hook
192 | }
193 | }
194 |
195 | func WithCustomSchedulerFunc(customSchedulerFunc func([]interface{}) []interface{}) ExecutorOptions {
196 | return func(opt *ExecutorArguments) {
197 | opt.customSchedulerFunc = customSchedulerFunc
198 | }
199 | }
200 |
201 | func WithRateLimiter(rateLimiter *AsyncLimiter) ExecutorOptions {
202 | return func(opt *ExecutorArguments) {
203 | opt.rateLimiter = rateLimiter
204 | }
205 | }
206 |
207 | func WithReportBenchmarkDuration(reportBenchmark bool) ExecutorOptions {
208 | return func(opt *ExecutorArguments) {
209 | opt.reportBenchmarkDuration = reportBenchmark
210 | }
211 | }
212 |
213 | // Validate checks the provided configuration for validity.
214 | func (e *ExecutorArguments) Validate() error {
215 | if e.cores <= 0 {
216 | return errors.New("number of cores must be greater than 0")
217 | }
218 | if e.timeout <= 0 {
219 | return errors.New("timeout must be greater than 0")
220 | }
221 | if e.maxRetries < 0 {
222 | return errors.New("maximum retries cannot be negative")
223 | }
224 | if e.retryDelay < 0 {
225 | return errors.New("retry delay cannot be negative")
226 | }
227 | if e.batchSize <= 0 {
228 | return errors.New("batch size must be greater than 0")
229 | }
230 | if e.circuitBreakerLimit < 0 {
231 | return errors.New("circuit breaker limit cannot be negative")
232 | }
233 | if e.logger == nil {
234 | return errors.New("logger cannot be nil")
235 | }
236 | // No validation required for boolean fields, hooks, and customSchedulerFunc as they are optional and can be nil.
237 | return nil
238 | }
239 |
--------------------------------------------------------------------------------
/executor_default.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "log"
5 | "os"
6 | "time"
7 | )
8 |
9 | // Constant definitions for default settings.
10 | const (
11 | DefaultCores = 4 // Default number of CPU cores to use.
12 | DefaultTimeout = 5 * time.Minute // Default timeout for operations
13 | DefaultMaxRetries = 3 // Default number of retries for an operation.
14 | DefaultRetryDelay = 5 * time.Second // Default delay between retries.
15 | DefaultBatchSize = 10 // Default size of each batch of operations.
16 | DefaultCircuitBreakerLimit = 10 // Default limit for circuit breaker.
17 | )
18 |
19 | func NewExecutorOptions() *ExecutorArguments {
20 | return &ExecutorArguments{
21 | cores: DefaultCores,
22 | timeout: DefaultTimeout,
23 | maxRetries: DefaultMaxRetries,
24 | retryDelay: DefaultRetryDelay,
25 | stopOnError: false,
26 | reportBenchmarkDuration: false,
27 | batchSize: DefaultBatchSize,
28 | circuitBreakerLimit: DefaultCircuitBreakerLimit,
29 | logger: log.New(os.Stdout, "asyncbatch: ", log.LstdFlags),
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module gobatch
2 |
3 | go 1.21.1
4 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "log"
7 | "os"
8 | "time"
9 | )
10 |
11 | func MockIOOperation(ctx context.Context, data interface{}) error {
12 | // Simulate processing time of an IO bounded operation
13 | time.Sleep(100 * time.Millisecond)
14 | return nil
15 | }
16 |
17 | func main() {
18 | logger := log.New(os.Stdout, "executor: ", log.LstdFlags)
19 | rateLimiter := NewAsyncLimiter(100000, 1)
20 |
21 | opts := []ExecutorOptions{
22 | WithCores(8),
23 | WithRateLimiter(rateLimiter),
24 | WithBatchSize(5),
25 | WithStopOnError(false),
26 | WithTimeout(5 * time.Minute),
27 | WithMaxRetries(5),
28 | WithBeforeStartHook(func() {
29 | logger.Println("Starting batch operation...")
30 | }),
31 | WithAfterCompletionHook(func() {
32 | logger.Println("Batch operation completed.")
33 | }),
34 | WithBeforeRetryHook(func(err error) {
35 | logger.Printf("Retrying operation due to error: %v\n", err)
36 | }),
37 | WithProgressReportFunc(
38 | func(numProcessed int) {}),
39 | WithLogger(logger),
40 | WithCustomSchedulerFunc(func(data []interface{}) []interface{} {
41 | // Custom scheduler logic
42 | return data
43 | }),
44 | WithRetryDelay(5 * time.Second),
45 | WithReportBenchmarkDuration(true),
46 | }
47 |
48 | // Create a batch of data to process
49 | dataBatch := make([]interface{}, 100) // Example data
50 | for i := range dataBatch {
51 | dataBatch[i] = fmt.Sprintf("data-%d", i)
52 | }
53 |
54 | // Execute batch operation
55 | err := ExecuteBatchAsync(MockIOOperation, dataBatch, opts)
56 | if err != nil {
57 | logger.Printf("Batch execution error: %v\n", err)
58 | }
59 |
60 | return
61 | }
62 |
--------------------------------------------------------------------------------
/rate_limiter.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "errors"
5 | "sync"
6 | "time"
7 | )
8 |
9 | type AsyncLimiter struct {
10 | maxRate float64 // Maximum rate of operations
11 | timePeriod time.Duration // Time period for the rate limit
12 | ratePerSec float64 // Calculated rate per second
13 | level float64 // Current level of the bucket
14 | lastCheck time.Time // Last time the bucket was checked/leaked
15 | mu sync.Mutex // Mutex to protect concurrent access
16 | waiters chan bool // Channel to signal waiting goroutines
17 | }
18 |
19 | func NewAsyncLimiter(maxRate float64, timePeriod time.Duration) *AsyncLimiter {
20 | return &AsyncLimiter{
21 | maxRate: maxRate,
22 | timePeriod: timePeriod,
23 | ratePerSec: maxRate / timePeriod.Seconds(),
24 | level: 0.0,
25 | lastCheck: time.Now(),
26 | waiters: make(chan bool, 1),
27 | }
28 | }
29 |
30 | func (l *AsyncLimiter) leak() {
31 | l.mu.Lock()
32 | defer l.mu.Unlock()
33 |
34 | if l.level > 0 {
35 | elapsed := time.Since(l.lastCheck).Seconds()
36 | decrement := elapsed * l.ratePerSec
37 | l.level = max(0, l.level-decrement)
38 | }
39 | l.lastCheck = time.Now()
40 | }
41 |
42 | func (l *AsyncLimiter) hasCapacity(amount float64) bool {
43 | l.leak()
44 | requested := l.level + amount
45 | if requested <= l.maxRate {
46 | select {
47 | case l.waiters <- true:
48 | default:
49 | }
50 | return true
51 | }
52 | return false
53 | }
54 |
55 | func (l *AsyncLimiter) Acquire(amount float64) error {
56 | if amount > l.maxRate {
57 | return errors.New("cannot acquire more than the maximum capacity")
58 | }
59 |
60 | for !l.hasCapacity(amount) {
61 | select {
62 | case <-l.waiters:
63 | case <-time.After(time.Duration(1/l.ratePerSec*amount) * time.Second):
64 | }
65 | }
66 |
67 | l.mu.Lock()
68 | l.level += amount
69 | l.mu.Unlock()
70 |
71 | return nil
72 | }
73 |
--------------------------------------------------------------------------------
/rate_limiter_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "sync"
5 | "testing"
6 | "time"
7 | )
8 |
9 | func TestAsyncLimiter(t *testing.T) {
10 | // Create a new AsyncLimiter with a rate of 2 operations per second
11 | limiter := NewAsyncLimiter(2, 1)
12 |
13 | // Number of goroutines to simulate
14 | const numGoroutines = 5
15 |
16 | var wg sync.WaitGroup
17 | wg.Add(numGoroutines)
18 |
19 | for i := 0; i < numGoroutines; i++ {
20 | go func(id int) {
21 | defer wg.Done()
22 |
23 | // Each goroutine tries to acquire capacity from the limiter
24 | t.Logf("Goroutine %d attempting to acquire capacity...\n", id)
25 | err := limiter.Acquire(1)
26 | if err != nil {
27 | t.Logf("Goroutine %d failed to acquire capacity: %v\n", id, err)
28 | return
29 | }
30 | t.Logf("Goroutine %d acquired capacity. Performing operation...\n", id)
31 |
32 | // Simulate some work
33 | time.Sleep(100 * time.Millisecond)
34 |
35 | t.Logf("Goroutine %d completed operation.\n", id)
36 | }(i)
37 | }
38 |
39 | wg.Wait()
40 | t.Logf("All goroutines completed.")
41 | }
42 |
--------------------------------------------------------------------------------
/types.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "log"
6 | "time"
7 | )
8 |
9 | // IOOperation represents a function type for I/O-based operations.
10 | // It takes a context and an interface{} as input, and returns an interface{} and an error.
11 | type IOOperation func(context.Context, interface{}) (interface{}, error)
12 |
13 | // BatchResult represents the outcome of a batch operation.
14 | // It stores the data result and any error that occurred during the operation.
15 | type BatchResult struct {
16 | Data interface{}
17 | Err error
18 | }
19 |
20 | // ExecutorArguments defines configuration options for the Executor.
21 | // It includes settings for rate limiting, concurrency, retries, hooks, and more.
22 | type ExecutorArguments struct {
23 | rateLimiter *AsyncLimiter // Controls the rate of operations to prevent overloading.
24 |
25 | cores int // Number of cores to use for parallel processing.
26 | batchSize int // Size of each batch of operations to process.
27 | timeout time.Duration // Maximum duration to wait for an operation to complete.
28 | maxRetries int // Maximum number of retries for a failed operation.
29 | retryDelay time.Duration // Duration to wait before retrying a failed operation.
30 |
31 | stopOnError bool // Whether to stop processing on the first error encountered.
32 | progressReportFunc func(int) // Function to report progress of operations.
33 | circuitBreakerLimit int // Threshold for tripping the circuit breaker to stop operations.
34 |
35 | logger *log.Logger // Logger for logging messages.
36 | beforeStartHook func() // Hook function to be called before starting operations.
37 | afterCompletionHook func() // Hook function to be called after completing all operations.
38 | beforeRetryHook func(error) // Hook function to be called before retrying a failed operation.
39 |
40 | customSchedulerFunc func([]interface{}) []interface{} // Custom function to schedule operations.
41 | reportBenchmarkDuration bool // Flag to enable/disable reporting of benchmark durations.
42 | }
43 |
44 | // ExecutorOptions is a function type used for applying configuration options to ExecutorArguments.
45 | type ExecutorOptions func(*ExecutorArguments)
46 |
--------------------------------------------------------------------------------