├── README.md ├── cmd └── main.go ├── go.mod ├── go.sum └── internal ├── core ├── global_queue.go ├── global_queue_test.go ├── goroutine.go ├── goroutine_test.go ├── processor.go └── processor_test.go ├── poller ├── poller.go └── poller_test.go ├── scheduler ├── scheduler.go └── scheduler_test.go └── visualization ├── colors.go └── terminal.go /README.md: -------------------------------------------------------------------------------- 1 | # Go Work-Stealing Scheduler 2 | 3 | ## Overview 4 | 5 | This is an advanced, high-performance concurrent task scheduling system implemented in Go, featuring sophisticated work-stealing mechanisms, comprehensive task management, and detailed runtime monitoring. 6 | 7 | ## Project Architecture 8 | 9 | ``` 10 | workstealing/ 11 | ├── internal/ 12 | │ ├── core/ 13 | │ │ ├── goroutine.go # Task lifecycle and management 14 | │ │ ├── processor.go # Processor implementation 15 | │ │ └── global_queue.go # Centralized task queue 16 | │ ├── scheduler/ 17 | │ │ └── scheduler.go # Task distribution coordinator 18 | │ └── poller/ 19 | │ └── network_poller.go # Blocking I/O event management 20 | ``` 21 | 22 | ## Key Components 23 | 24 | ### 1. Goroutine (`core.Goroutine`) 25 | - Lightweight task representation 26 | - Unique ID generation 27 | - Advanced state tracking (Created, Runnable, Running, Blocked, Finished) 28 | - Transition history logging 29 | - Support for blocking/non-blocking tasks 30 | 31 | ### 2. Processor (`core.Processor`) 32 | - Local task queue management 33 | - Work-stealing from other processors 34 | - Performance metrics tracking 35 | - Dynamic task allocation strategies 36 | 37 | ### 3. Global Queue (`core.GlobalQueue`) 38 | - Centralized, thread-safe task repository 39 | - Task submission and stealing mechanisms 40 | - Capacity-based task management 41 | - Atomic metric tracking 42 | 43 | ### 4. Network Poller (`poller.NetworkPoller`) 44 | - Event-driven blocking I/O management 45 | - Timeout handling 46 | - Dynamic task rescheduling 47 | - Non-blocking task processing 48 | 49 | ### 5. Scheduler (`scheduler.Scheduler`) 50 | - Multi-processor task orchestration 51 | - Global/local queue coordination 52 | - Advanced work-stealing implementation 53 | - Real-time statistics collection 54 | 55 | ## Features 56 | 57 | - 🚀 Efficient work-stealing algorithm 58 | - 📊 Comprehensive performance metrics 59 | - 🔄 Dynamic task distribution 60 | - 🌐 Blocking and non-blocking task support 61 | - 📈 Real-time monitoring 62 | 63 | ## Performance Characteristics 64 | 65 | - Minimal lock contention 66 | - Atomic operation-based synchronization 67 | - Randomized work-stealing victim selection 68 | - Low-overhead task tracking 69 | - Adaptive load balancing 70 | 71 | ## Usage Example 72 | 73 | ```go 74 | // Create a scheduler with 4 processors and global queue size of 1000 75 | scheduler := NewScheduler(4, 1000) 76 | scheduler.Start() 77 | 78 | // Create a non-blocking task with 100ms workload 79 | task := NewGoroutine(100 * time.Millisecond, false) 80 | scheduler.Submit(task) 81 | 82 | // Retrieve runtime statistics 83 | stats := scheduler.GetStats() 84 | ``` 85 | 86 | ## Configuration Options 87 | 88 | - Processor count customization 89 | - Global queue size configuration 90 | - Configurable work-stealing strategies 91 | - Flexible timeout settings 92 | 93 | ## Metrics Tracking 94 | 95 | The system provides detailed metrics across multiple dimensions: 96 | - Tasks scheduled and completed 97 | - Work-stealing statistics 98 | - Processor utilization 99 | - Network poller performance 100 | - Queue state tracking 101 | 102 | ## Performance Tuning Recommendations 103 | 104 | 1. Match processor count to available CPU cores 105 | 2. Configure queue sizes based on expected workload 106 | 3. Monitor steal rates and queue utilization 107 | 4. Adjust timeout configurations for I/O-bound tasks 108 | 109 | ## Dependencies 110 | 111 | - Go 1.20+ 112 | - Standard library concurrency primitives 113 | -------------------------------------------------------------------------------- /cmd/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "os" 8 | "os/signal" 9 | "runtime" 10 | "sync/atomic" 11 | "syscall" 12 | "time" 13 | 14 | "workstealing/internal/core" 15 | "workstealing/internal/scheduler" 16 | "workstealing/internal/visualization" 17 | ) 18 | 19 | var ( 20 | numProcessors = flag.Int("p", runtime.NumCPU(), "Number of processors") 21 | queueSize = flag.Int("q", 2000, "Global queue size") 22 | totalTasks = flag.Int("t", 2500, "Total number of tasks to process") 23 | updateInterval = flag.Duration("i", 100*time.Millisecond, "Visualization update interval") 24 | batchSize = flag.Int("b", 5, "Batch size for task submission") 25 | ) 26 | 27 | func main() { 28 | flag.Parse() 29 | 30 | // Initialize scheduler 31 | s := scheduler.NewScheduler(*numProcessors, int32(*queueSize)) 32 | if s == nil { 33 | log.Fatal("Failed to create scheduler") 34 | } 35 | 36 | // Initialize visualizer 37 | vis, err := visualization.NewTerminalVisualizer(s, *updateInterval) 38 | if err != nil { 39 | log.Fatalf("Failed to initialize visualizer: %v", err) 40 | } 41 | 42 | // Channels for completion and tracking 43 | done := make(chan struct{}) 44 | var ( 45 | completedTasks uint64 46 | submittedTasks uint64 47 | ) 48 | 49 | // Start scheduler 50 | if err := s.Start(); err != nil { 51 | log.Fatalf("Failed to start scheduler: %v", err) 52 | } 53 | 54 | // Start visualization in background 55 | go vis.Start() 56 | 57 | // Generate workload in background 58 | go generateWorkload(s, *totalTasks, *batchSize, vis, &completedTasks, &submittedTasks, done) 59 | 60 | // Wait for interrupt or completion 61 | sigChan := make(chan os.Signal, 1) 62 | signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) 63 | 64 | select { 65 | case <-sigChan: 66 | // Handle interrupt 67 | vis.Stop() 68 | s.Stop() 69 | case <-done: 70 | // Work completed, show final statistics 71 | time.Sleep(500 * time.Millisecond) // Allow final updates 72 | vis.Stop() // Stop the UI first 73 | fmt.Println("\033[H\033[2J") // Clear the screen 74 | showFinalStats(s) 75 | s.Stop() 76 | } 77 | } 78 | 79 | func generateWorkload(s *scheduler.Scheduler, totalTasks, batchSize int, 80 | vis *visualization.TerminalVisualizer, 81 | completed, submitted *uint64, 82 | done chan struct{}) { 83 | 84 | taskID := 0 85 | rejectedCount := 0 86 | startTime := time.Now() 87 | // blockcount := 0 88 | // Submit all tasks 89 | for taskID < totalTasks { 90 | for i := 0; i < batchSize && taskID < totalTasks; i++ { 91 | taskID++ 92 | isBlocking := taskID%3 == 0 93 | // if isBlocking { 94 | // blockcount++ 95 | // } 96 | workload := time.Duration(50+taskID%100) * time.Millisecond 97 | 98 | g := core.NewGoroutine(workload, isBlocking) 99 | if !s.Submit(g) { 100 | rejectedCount++ 101 | vis.AddEvent(fmt.Sprintf("Task G%d rejected: Queue full", taskID)) 102 | } else { 103 | atomic.AddUint64(submitted, 1) 104 | if isBlocking { 105 | vis.AddEvent(fmt.Sprintf("Submitted blocking task G%d", taskID)) 106 | } 107 | } 108 | 109 | } 110 | time.Sleep(10 * time.Millisecond) 111 | } 112 | 113 | submissionTime := time.Since(startTime) 114 | vis.AddEvent(fmt.Sprintf("All tasks submitted in %v. Waiting for completion...", 115 | submissionTime.Round(time.Millisecond))) 116 | 117 | // Wait for all non-rejected tasks to complete 118 | expectedComplete := uint64(totalTasks - rejectedCount) 119 | lastProgress := uint64(0) 120 | 121 | for { 122 | stats := s.GetStats() 123 | currentCompleted := stats.TasksCompleted 124 | atomic.StoreUint64(completed, currentCompleted) 125 | 126 | // Check if all tasks are completed 127 | if currentCompleted >= expectedComplete { 128 | vis.AddEvent(fmt.Sprintf("All %d tasks completed!", expectedComplete)) 129 | done <- struct{}{} 130 | break 131 | } 132 | 133 | // Show progress updates for every 100 completed tasks 134 | if currentCompleted/100 > lastProgress/100 { 135 | vis.AddEvent(fmt.Sprintf("Progress: %d/%d tasks completed (%.1f%%)", 136 | currentCompleted, expectedComplete, 137 | float64(currentCompleted)/float64(expectedComplete)*100)) 138 | lastProgress = currentCompleted 139 | } 140 | 141 | time.Sleep(100 * time.Millisecond) 142 | } 143 | 144 | close(done) 145 | } 146 | 147 | func showFinalStats(s *scheduler.Scheduler) { 148 | stats := s.GetStats() 149 | 150 | fmt.Println("========================================") 151 | fmt.Println(" Final Statistics ") 152 | fmt.Println("========================================") 153 | 154 | fmt.Printf("\nOverall Performance:\n") 155 | fmt.Printf("--------------------\n") 156 | fmt.Printf("Total Tasks Scheduled: %d\n", stats.TasksScheduled) 157 | fmt.Printf("Total Tasks Completed: %d\n", stats.TasksCompleted) 158 | fmt.Printf("Tasks Still In Progress: %d\n", 159 | stats.TasksScheduled-stats.TasksCompleted-stats.GlobalQueueStats.Rejected) 160 | fmt.Printf("Total Tasks Rejected: %d\n", stats.GlobalQueueStats.Rejected) 161 | fmt.Printf("Total Execution Time: %v\n", stats.RunningTime.Round(time.Millisecond)) 162 | fmt.Printf("Total Steals: %d (Global: %d, Local: %d)\n\n", 163 | stats.TotalSteals, stats.GlobalQueueSteals, stats.LocalQueueSteals) 164 | 165 | fmt.Println("Processor Statistics:") 166 | fmt.Println("--------------------") 167 | for _, p := range stats.ProcessorMetrics { 168 | fmt.Printf("Processor P%d:\n", p.ID) 169 | fmt.Printf(" Tasks Executed: %d\n", p.TasksExecuted) 170 | fmt.Printf(" Steals: Local=%d, Global=%d\n", p.LocalSteals, p.GlobalSteals) 171 | fmt.Printf(" Idle Time: %v\n", p.IdleTime.Round(time.Millisecond)) 172 | fmt.Printf(" Running Time: %v\n\n", p.RunningTime.Round(time.Millisecond)) 173 | } 174 | 175 | fmt.Println("Network Poller Statistics:") 176 | fmt.Println("-------------------------") 177 | fmt.Printf("Total Events Handled: %d\n", stats.PollerMetrics.TotalEvents) 178 | fmt.Printf("Successfully Completed: %d\n", stats.PollerMetrics.CompletedEvents) 179 | fmt.Printf("Timeouts: %d\n", stats.PollerMetrics.Timeouts) 180 | fmt.Printf("Errors: %d\n", stats.PollerMetrics.Errors) 181 | fmt.Printf("Average Block Time: %v\n\n", 182 | stats.PollerMetrics.AverageBlockTime.Round(time.Millisecond)) 183 | 184 | fmt.Println("Global Queue Statistics:") 185 | fmt.Println("----------------------") 186 | fmt.Printf("Total Submitted: %d\n", stats.GlobalQueueStats.Submitted) 187 | fmt.Printf("Total Executed: %d\n", stats.GlobalQueueStats.Executed) 188 | fmt.Printf("Total Rejected: %d\n", stats.GlobalQueueStats.Rejected) 189 | fmt.Printf("Total Stolen: %d\n\n", stats.GlobalQueueStats.Stolen) 190 | 191 | fmt.Printf("Performance Metrics:\n") 192 | fmt.Printf("-------------------\n") 193 | throughput := float64(stats.TasksCompleted) / stats.RunningTime.Seconds() 194 | fmt.Printf("Average Throughput: %.2f tasks/second\n", throughput) 195 | if stats.TasksScheduled > 0 { 196 | successRate := float64(stats.TasksCompleted) / float64(stats.TasksScheduled) * 100 197 | fmt.Printf("Task Success Rate: %.2f%%\n\n", successRate) 198 | } 199 | 200 | fmt.Println("========================================") 201 | fmt.Println("Press Enter to exit...") 202 | fmt.Scanln() // Wait for user input before exiting 203 | } 204 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module workstealing 2 | 3 | go 1.23.4 4 | 5 | require github.com/gizak/termui/v3 v3.1.0 6 | 7 | require ( 8 | github.com/mattn/go-runewidth v0.0.2 // indirect 9 | github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 // indirect 10 | github.com/nsf/termbox-go v0.0.0-20190121233118-02980233997d // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/gizak/termui/v3 v3.1.0 h1:ZZmVDgwHl7gR7elfKf1xc4IudXZ5qqfDh4wExk4Iajc= 2 | github.com/gizak/termui/v3 v3.1.0/go.mod h1:bXQEBkJpzxUAKf0+xq9MSWAvWZlE7c+aidmyFlkYTrY= 3 | github.com/mattn/go-runewidth v0.0.2 h1:UnlwIPBGaTZfPQ6T1IGzPI0EkYAQmT9fAEJ/poFC63o= 4 | github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= 5 | github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 h1:DpOJ2HYzCv8LZP15IdmG+YdwD2luVPHITV96TkirNBM= 6 | github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= 7 | github.com/nsf/termbox-go v0.0.0-20190121233118-02980233997d h1:x3S6kxmy49zXVVyhcnrFqxvNVCBPb2KZ9hV2RBdS840= 8 | github.com/nsf/termbox-go v0.0.0-20190121233118-02980233997d/go.mod h1:IuKpRQcYE1Tfu+oAQqaLisqDeXgjyyltCfsaoYN18NQ= 9 | -------------------------------------------------------------------------------- /internal/core/global_queue.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "math/rand" 5 | "sync" 6 | "sync/atomic" 7 | "time" 8 | ) 9 | 10 | const ( 11 | defaultCapacity = 10000 12 | minStealSize = 1 13 | ) 14 | 15 | // GlobalQueueStats represents current queue statistics 16 | type GlobalQueueStats struct { 17 | CurrentSize int32 18 | Capacity int32 19 | Submitted uint64 20 | Executed uint64 21 | Rejected uint64 22 | Stolen uint64 23 | Utilization float64 24 | LastStealTime time.Time 25 | } 26 | 27 | // GlobalQueue manages the central task pool 28 | type GlobalQueue struct { 29 | tasks []*Goroutine 30 | capacity int32 31 | size atomic.Int32 32 | 33 | metrics struct { 34 | submitted atomic.Uint64 35 | executed atomic.Uint64 36 | rejected atomic.Uint64 37 | stolen atomic.Uint64 38 | } 39 | 40 | mu sync.RWMutex 41 | rand *rand.Rand 42 | lastStealTime time.Time 43 | } 44 | 45 | // NewGlobalQueue creates a new global queue instance 46 | func NewGlobalQueue(capacity int32) *GlobalQueue { 47 | if capacity <= 0 { 48 | capacity = defaultCapacity 49 | } 50 | 51 | return &GlobalQueue{ 52 | tasks: make([]*Goroutine, 0, capacity), 53 | capacity: capacity, 54 | rand: rand.New(rand.NewSource(time.Now().UnixNano())), 55 | lastStealTime: time.Now(), 56 | } 57 | } 58 | 59 | // Submit adds a new goroutine to the queue 60 | func (gq *GlobalQueue) Submit(g *Goroutine) bool { 61 | if g == nil { 62 | return false 63 | } 64 | 65 | gq.mu.Lock() 66 | defer gq.mu.Unlock() 67 | 68 | if int32(len(gq.tasks)) >= gq.capacity { 69 | gq.metrics.rejected.Add(1) 70 | return false 71 | } 72 | 73 | g.SetSource(SourceGlobalQueue, 0) 74 | gq.tasks = append(gq.tasks, g) 75 | gq.size.Add(1) 76 | gq.metrics.submitted.Add(1) 77 | return true 78 | } 79 | 80 | // TrySteal attempts to steal tasks from the global queue 81 | func (gq *GlobalQueue) TrySteal(maxTasks int) []*Goroutine { 82 | if maxTasks < minStealSize { 83 | return nil 84 | } 85 | 86 | gq.mu.Lock() 87 | defer gq.mu.Unlock() 88 | 89 | currentSize := len(gq.tasks) 90 | if currentSize == 0 { 91 | return nil 92 | } 93 | 94 | // Steal up to half of available tasks 95 | stealCount := min(maxTasks, (currentSize+1)/2) 96 | stolen := make([]*Goroutine, 0, stealCount) 97 | 98 | for i := 0; i < stealCount; i++ { 99 | idx := gq.rand.Intn(len(gq.tasks)) 100 | g := gq.tasks[idx] 101 | 102 | // Remove the stolen task 103 | lastIdx := len(gq.tasks) - 1 104 | gq.tasks[idx] = gq.tasks[lastIdx] 105 | gq.tasks = gq.tasks[:lastIdx] 106 | 107 | stolen = append(stolen, g) 108 | } 109 | 110 | if len(stolen) > 0 { 111 | gq.size.Add(int32(-len(stolen))) 112 | gq.metrics.stolen.Add(uint64(len(stolen))) 113 | gq.lastStealTime = time.Now() 114 | } 115 | 116 | return stolen 117 | } 118 | 119 | // Take removes and returns a specific number of tasks 120 | func (gq *GlobalQueue) Take(count int) []*Goroutine { 121 | if count <= 0 { 122 | return nil 123 | } 124 | 125 | gq.mu.Lock() 126 | defer gq.mu.Unlock() 127 | 128 | if len(gq.tasks) == 0 { 129 | return nil 130 | } 131 | 132 | n := min(count, len(gq.tasks)) 133 | tasks := make([]*Goroutine, n) 134 | 135 | for i := 0; i < n; i++ { 136 | tasks[i] = gq.tasks[i] 137 | } 138 | 139 | // Remove taken tasks 140 | gq.tasks = gq.tasks[n:] 141 | gq.size.Add(int32(-n)) 142 | gq.metrics.executed.Add(uint64(n)) 143 | 144 | return tasks 145 | } 146 | 147 | // Size returns the current number of tasks 148 | func (gq *GlobalQueue) Size() int32 { 149 | return gq.size.Load() 150 | } 151 | 152 | // IsEmpty returns whether the queue is empty 153 | func (gq *GlobalQueue) IsEmpty() bool { 154 | return gq.Size() == 0 155 | } 156 | 157 | // IsFull returns whether the queue is at capacity 158 | func (gq *GlobalQueue) IsFull() bool { 159 | return gq.Size() >= gq.capacity 160 | } 161 | 162 | // Stats returns current queue statistics 163 | func (gq *GlobalQueue) Stats() GlobalQueueStats { 164 | gq.mu.RLock() 165 | defer gq.mu.RUnlock() 166 | 167 | currentSize := int32(len(gq.tasks)) 168 | return GlobalQueueStats{ 169 | CurrentSize: currentSize, 170 | Capacity: gq.capacity, 171 | Submitted: gq.metrics.submitted.Load(), 172 | Executed: gq.metrics.executed.Load(), 173 | Rejected: gq.metrics.rejected.Load(), 174 | Stolen: gq.metrics.stolen.Load(), 175 | Utilization: float64(currentSize) / float64(gq.capacity), 176 | LastStealTime: gq.lastStealTime, 177 | } 178 | } 179 | 180 | // Clear removes all tasks and resets the queue 181 | func (gq *GlobalQueue) Clear() { 182 | gq.mu.Lock() 183 | defer gq.mu.Unlock() 184 | 185 | removed := len(gq.tasks) 186 | gq.tasks = gq.tasks[:0] 187 | gq.size.Store(0) 188 | gq.metrics.executed.Add(uint64(removed)) 189 | } 190 | 191 | // ResetMetrics resets all metrics counters 192 | func (gq *GlobalQueue) ResetMetrics() { 193 | gq.metrics.submitted.Store(0) 194 | gq.metrics.executed.Store(0) 195 | gq.metrics.rejected.Store(0) 196 | gq.metrics.stolen.Store(0) 197 | } 198 | 199 | func min(a, b int) int { 200 | if a < b { 201 | return a 202 | } 203 | return b 204 | } 205 | -------------------------------------------------------------------------------- /internal/core/global_queue_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestNewGlobalQueue(t *testing.T) { 10 | tests := []struct { 11 | name string 12 | capacity int32 13 | wantCapacity int32 14 | }{ 15 | { 16 | name: "Valid capacity", 17 | capacity: 100, 18 | wantCapacity: 100, 19 | }, 20 | { 21 | name: "Zero capacity", 22 | capacity: 0, 23 | wantCapacity: defaultCapacity, 24 | }, 25 | { 26 | name: "Negative capacity", 27 | capacity: -1, 28 | wantCapacity: defaultCapacity, 29 | }, 30 | } 31 | 32 | for _, tt := range tests { 33 | t.Run(tt.name, func(t *testing.T) { 34 | gq := NewGlobalQueue(tt.capacity) 35 | if gq == nil { 36 | t.Fatal("NewGlobalQueue returned nil") 37 | } 38 | if gq.capacity != tt.wantCapacity { 39 | t.Errorf("capacity = %v, want %v", gq.capacity, tt.wantCapacity) 40 | } 41 | if gq.Size() != 0 { 42 | t.Errorf("initial size = %v, want 0", gq.Size()) 43 | } 44 | }) 45 | } 46 | } 47 | 48 | func TestGlobalQueueSubmit(t *testing.T) { 49 | gq := NewGlobalQueue(5) 50 | 51 | t.Run("Basic Submit", func(t *testing.T) { 52 | g := NewGoroutine(10*time.Millisecond, false) 53 | if !gq.Submit(g) { 54 | t.Error("Submit should succeed") 55 | } 56 | if gq.Size() != 1 { 57 | t.Errorf("size = %v, want 1", gq.Size()) 58 | } 59 | }) 60 | 61 | t.Run("Submit Nil", func(t *testing.T) { 62 | if gq.Submit(nil) { 63 | t.Error("Submit(nil) should return false") 64 | } 65 | }) 66 | 67 | t.Run("Submit Until Full", func(t *testing.T) { 68 | // Fill the queue 69 | for i := 0; i < 4; i++ { // Already has 1 item 70 | g := NewGoroutine(10*time.Millisecond, false) 71 | if !gq.Submit(g) { 72 | t.Errorf("Submit %d should succeed", i) 73 | } 74 | } 75 | 76 | // Try to submit when full 77 | g := NewGoroutine(10*time.Millisecond, false) 78 | if gq.Submit(g) { 79 | t.Error("Submit should fail when queue is full") 80 | } 81 | 82 | stats := gq.Stats() 83 | if stats.Rejected != 1 { 84 | t.Errorf("rejected count = %v, want 1", stats.Rejected) 85 | } 86 | }) 87 | } 88 | 89 | func TestGlobalQueueTrySteal(t *testing.T) { 90 | gq := NewGlobalQueue(10) 91 | 92 | // Add tasks 93 | for i := 0; i < 6; i++ { 94 | g := NewGoroutine(10*time.Millisecond, false) 95 | gq.Submit(g) 96 | } 97 | 98 | t.Run("Valid Steal", func(t *testing.T) { 99 | stolen := gq.TrySteal(2) 100 | if len(stolen) != 2 { 101 | t.Errorf("stolen count = %v, want 2", len(stolen)) 102 | } 103 | 104 | // Verify stolen tasks 105 | for _, g := range stolen { 106 | if g == nil { 107 | t.Error("stolen task should not be nil") 108 | } 109 | if g.Source() != SourceGlobalQueue { 110 | t.Error("task source should be global queue") 111 | } 112 | } 113 | 114 | if gq.Size() != 4 { 115 | t.Errorf("remaining size = %v, want 4", gq.Size()) 116 | } 117 | }) 118 | 119 | t.Run("Invalid Steal Parameters", func(t *testing.T) { 120 | if stolen := gq.TrySteal(0); stolen != nil { 121 | t.Error("TrySteal(0) should return nil") 122 | } 123 | if stolen := gq.TrySteal(-1); stolen != nil { 124 | t.Error("TrySteal(-1) should return nil") 125 | } 126 | }) 127 | } 128 | 129 | func TestGlobalQueueTake(t *testing.T) { 130 | gq := NewGlobalQueue(10) 131 | 132 | // Add tasks 133 | for i := 0; i < 5; i++ { 134 | g := NewGoroutine(10*time.Millisecond, false) 135 | gq.Submit(g) 136 | } 137 | 138 | t.Run("Valid Take", func(t *testing.T) { 139 | taken := gq.Take(3) 140 | if len(taken) != 3 { 141 | t.Errorf("taken count = %v, want 3", len(taken)) 142 | } 143 | if gq.Size() != 2 { 144 | t.Errorf("remaining size = %v, want 2", gq.Size()) 145 | } 146 | }) 147 | 148 | t.Run("Take More Than Available", func(t *testing.T) { 149 | taken := gq.Take(5) 150 | if len(taken) != 2 { // Only 2 remaining 151 | t.Errorf("taken count = %v, want 2", len(taken)) 152 | } 153 | if !gq.IsEmpty() { 154 | t.Error("queue should be empty after taking all tasks") 155 | } 156 | }) 157 | 158 | t.Run("Take From Empty Queue", func(t *testing.T) { 159 | if taken := gq.Take(1); taken != nil { 160 | t.Error("Take from empty queue should return nil") 161 | } 162 | }) 163 | } 164 | 165 | func TestGlobalQueueConcurrent(t *testing.T) { 166 | gq := NewGlobalQueue(1000) 167 | const numGoroutines = 50 168 | const opsPerGoroutine = 20 169 | 170 | var wg sync.WaitGroup 171 | wg.Add(numGoroutines * 2) // Producers and consumers 172 | 173 | // Start producers 174 | for i := 0; i < numGoroutines; i++ { 175 | go func() { 176 | defer wg.Done() 177 | for j := 0; j < opsPerGoroutine; j++ { 178 | g := NewGoroutine(time.Millisecond, false) 179 | gq.Submit(g) 180 | } 181 | }() 182 | } 183 | 184 | // Start consumers 185 | for i := 0; i < numGoroutines; i++ { 186 | go func() { 187 | defer wg.Done() 188 | for j := 0; j < opsPerGoroutine; j++ { 189 | if j%2 == 0 { 190 | gq.Take(1) 191 | } else { 192 | gq.TrySteal(1) 193 | } 194 | } 195 | }() 196 | } 197 | 198 | // Wait with timeout 199 | done := make(chan bool) 200 | go func() { 201 | wg.Wait() 202 | done <- true 203 | }() 204 | 205 | select { 206 | case <-done: 207 | // Success 208 | case <-time.After(5 * time.Second): 209 | t.Fatal("concurrent test timed out") 210 | } 211 | 212 | // Verify final state 213 | stats := gq.Stats() 214 | if stats.Submitted == 0 { 215 | t.Error("no tasks were submitted") 216 | } 217 | if stats.Executed+stats.Stolen == 0 { 218 | t.Error("no tasks were processed") 219 | } 220 | } 221 | 222 | func TestGlobalQueueStats(t *testing.T) { 223 | gq := NewGlobalQueue(10) 224 | 225 | // Submit tasks 226 | for i := 0; i < 5; i++ { 227 | g := NewGoroutine(10*time.Millisecond, false) 228 | gq.Submit(g) 229 | } 230 | 231 | // Take some tasks 232 | gq.Take(2) 233 | 234 | // Steal some tasks 235 | gq.TrySteal(2) 236 | 237 | stats := gq.Stats() 238 | if stats.Submitted != 5 { 239 | t.Errorf("submitted count = %v, want 5", stats.Submitted) 240 | } 241 | if stats.Executed != 2 { 242 | t.Errorf("executed count = %v, want 2", stats.Executed) 243 | } 244 | if stats.Stolen != 2 { 245 | t.Errorf("stolen count = %v, want 2", stats.Stolen) 246 | } 247 | if stats.CurrentSize != 1 { 248 | t.Errorf("current size = %v, want 1", stats.CurrentSize) 249 | } 250 | } 251 | 252 | func TestGlobalQueueClearAndReset(t *testing.T) { 253 | gq := NewGlobalQueue(10) 254 | 255 | // Add tasks 256 | for i := 0; i < 5; i++ { 257 | g := NewGoroutine(10*time.Millisecond, false) 258 | gq.Submit(g) 259 | } 260 | 261 | t.Run("Clear", func(t *testing.T) { 262 | gq.Clear() 263 | if !gq.IsEmpty() { 264 | t.Error("queue should be empty after Clear") 265 | } 266 | if gq.Size() != 0 { 267 | t.Errorf("size = %v, want 0", gq.Size()) 268 | } 269 | }) 270 | 271 | t.Run("Reset Metrics", func(t *testing.T) { 272 | gq.ResetMetrics() 273 | stats := gq.Stats() 274 | if stats.Submitted != 0 || stats.Executed != 0 || stats.Stolen != 0 || stats.Rejected != 0 { 275 | t.Error("metrics should be zero after reset") 276 | } 277 | }) 278 | } 279 | -------------------------------------------------------------------------------- /internal/core/goroutine.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "sync" 7 | "sync/atomic" 8 | "time" 9 | ) 10 | 11 | // Global variable for ID generation with atomic operations 12 | var globalGID uint64 13 | 14 | // GoroutineState represents the current state of a goroutine 15 | type GoroutineState int32 16 | 17 | const ( 18 | GoroutineCreated GoroutineState = iota 19 | GoroutineRunnable 20 | GoroutineRunning 21 | GoroutineBlocked 22 | GoroutineFinished 23 | ) 24 | 25 | // TaskSource represents where the goroutine came from 26 | type TaskSource int32 27 | 28 | const ( 29 | SourceGlobalQueue TaskSource = iota 30 | SourceLocalQueue 31 | SourceStolen 32 | SourceNetworkPoller 33 | ) 34 | 35 | // GoroutineTransition represents a state or location change of a goroutine 36 | type GoroutineTransition struct { 37 | From string // Source state/location 38 | To string // Destination state/location 39 | Timestamp time.Time // When the transition occurred 40 | Reason string // Why the transition happened 41 | Duration time.Duration // Time spent in 'From' state 42 | } 43 | 44 | // Goroutine represents a lightweight unit of execution 45 | type Goroutine struct { 46 | id uint64 47 | state atomic.Int32 // Current state 48 | workload time.Duration // Expected execution time 49 | startTime time.Time // Execution start time 50 | endTime time.Time // Execution end time 51 | blocking bool // Whether this performs blocking operations 52 | result interface{} // Execution result 53 | err error // Any error that occurred 54 | source TaskSource // Where this goroutine came from 55 | stolenFrom uint32 // ID of processor it was stolen from 56 | 57 | // Protected by mutex 58 | mu sync.RWMutex 59 | transitions []GoroutineTransition 60 | lastTransitionTime time.Time 61 | } 62 | 63 | // NewGoroutine creates a new goroutine instance 64 | func NewGoroutine(workload time.Duration, blocking bool) *Goroutine { 65 | if workload <= 0 { 66 | workload = time.Millisecond // Minimum workload 67 | } 68 | 69 | now := time.Now() 70 | g := &Goroutine{ 71 | id: atomic.AddUint64(&globalGID, 1), 72 | workload: workload, 73 | blocking: blocking, 74 | source: SourceLocalQueue, 75 | transitions: make([]GoroutineTransition, 0, 10), // Pre-allocate space 76 | lastTransitionTime: now, 77 | } 78 | g.state.Store(int32(GoroutineCreated)) 79 | 80 | // Record initial transition 81 | g.addTransitionLocked("created", "ready", "initialization", now) 82 | return g 83 | } 84 | 85 | // Internal method for adding transitions without locking 86 | func (g *Goroutine) addTransitionLocked(from, to, reason string, timestamp time.Time) { 87 | if from == "" || to == "" { 88 | return // Prevent invalid transitions 89 | } 90 | 91 | duration := timestamp.Sub(g.lastTransitionTime) 92 | g.transitions = append(g.transitions, GoroutineTransition{ 93 | From: from, 94 | To: to, 95 | Timestamp: timestamp, 96 | Reason: reason, 97 | Duration: duration, 98 | }) 99 | g.lastTransitionTime = timestamp 100 | } 101 | 102 | // AddTransition adds a new transition with proper locking 103 | func (g *Goroutine) AddTransition(from, to, reason string) { 104 | g.mu.Lock() 105 | defer g.mu.Unlock() 106 | g.addTransitionLocked(from, to, reason, time.Now()) 107 | } 108 | 109 | // GetTransitions returns a copy of transition history 110 | func (g *Goroutine) GetTransitions() []GoroutineTransition { 111 | g.mu.RLock() 112 | defer g.mu.RUnlock() 113 | 114 | result := make([]GoroutineTransition, len(g.transitions)) 115 | copy(result, g.transitions) 116 | return result 117 | } 118 | 119 | // ID returns the goroutine's unique identifier 120 | func (g *Goroutine) ID() uint64 { 121 | return g.id 122 | } 123 | 124 | // State returns the current state 125 | func (g *Goroutine) State() GoroutineState { 126 | return GoroutineState(g.state.Load()) 127 | } 128 | 129 | // SetState atomically updates the goroutine's state 130 | func (g *Goroutine) SetState(state GoroutineState) { 131 | oldState := g.State() 132 | g.state.Store(int32(state)) 133 | 134 | g.mu.Lock() 135 | defer g.mu.Unlock() 136 | 137 | g.addTransitionLocked( 138 | oldState.String(), 139 | state.String(), 140 | "state_change", 141 | time.Now(), 142 | ) 143 | } 144 | 145 | // IsBlocking returns whether this goroutine performs blocking operations 146 | func (g *Goroutine) IsBlocking() bool { 147 | return g.blocking 148 | } 149 | 150 | // Start marks the goroutine as running 151 | func (g *Goroutine) Start() { 152 | now := time.Now() 153 | g.mu.Lock() 154 | defer g.mu.Unlock() 155 | 156 | g.startTime = now 157 | g.state.Store(int32(GoroutineRunning)) 158 | g.addTransitionLocked("ready", "running", "execution_started", now) 159 | } 160 | 161 | // Finish marks the goroutine as completed 162 | func (g *Goroutine) Finish(result interface{}, err error) { 163 | now := time.Now() 164 | g.mu.Lock() 165 | defer g.mu.Unlock() 166 | 167 | g.endTime = now 168 | g.result = result 169 | g.err = err 170 | g.state.Store(int32(GoroutineFinished)) 171 | g.addTransitionLocked("running", "finished", "execution_completed", now) 172 | } 173 | 174 | // ExecutionTime returns the total execution time 175 | func (g *Goroutine) ExecutionTime() time.Duration { 176 | g.mu.RLock() 177 | defer g.mu.RUnlock() 178 | 179 | if g.State() != GoroutineFinished { 180 | return time.Since(g.startTime) 181 | } 182 | return g.endTime.Sub(g.startTime) 183 | } 184 | 185 | // Workload returns the expected execution time 186 | func (g *Goroutine) Workload() time.Duration { 187 | return g.workload 188 | } 189 | 190 | // Source returns where the goroutine came from 191 | func (g *Goroutine) Source() TaskSource { 192 | return g.source 193 | } 194 | 195 | // SetSource updates source information 196 | func (g *Goroutine) SetSource(source TaskSource, stolenFrom uint32) { 197 | g.mu.Lock() 198 | defer g.mu.Unlock() 199 | 200 | oldSource := g.source 201 | g.source = source 202 | g.stolenFrom = stolenFrom 203 | 204 | g.addTransitionLocked( 205 | oldSource.String(), 206 | source.String(), 207 | fmt.Sprintf("source_change_stolen_from_P%d", stolenFrom), 208 | time.Now(), 209 | ) 210 | } 211 | 212 | // StolenFrom returns the processor ID this was stolen from 213 | func (g *Goroutine) StolenFrom() uint32 { 214 | return g.stolenFrom 215 | } 216 | 217 | // String representations 218 | func (s GoroutineState) String() string { 219 | switch s { 220 | case GoroutineCreated: 221 | return "created" 222 | case GoroutineRunnable: 223 | return "runnable" 224 | case GoroutineRunning: 225 | return "running" 226 | case GoroutineBlocked: 227 | return "blocked" 228 | case GoroutineFinished: 229 | return "finished" 230 | default: 231 | return "unknown" 232 | } 233 | } 234 | 235 | func (s TaskSource) String() string { 236 | switch s { 237 | case SourceGlobalQueue: 238 | return "global_queue" 239 | case SourceLocalQueue: 240 | return "local_queue" 241 | case SourceStolen: 242 | return "stolen" 243 | case SourceNetworkPoller: 244 | return "network_poller" 245 | default: 246 | return "unknown" 247 | } 248 | } 249 | 250 | // GetHistory returns formatted transition history 251 | func (g *Goroutine) GetHistory() string { 252 | g.mu.RLock() 253 | defer g.mu.RUnlock() 254 | 255 | var history strings.Builder 256 | history.WriteString(fmt.Sprintf("G%d History:\n", g.id)) 257 | 258 | for i, t := range g.transitions { 259 | history.WriteString(fmt.Sprintf("%d. %s -> %s (%s) [duration: %v]\n", 260 | i+1, t.From, t.To, t.Reason, t.Duration)) 261 | } 262 | 263 | return history.String() 264 | } 265 | -------------------------------------------------------------------------------- /internal/core/goroutine_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "sync" 7 | "testing" 8 | "time" 9 | ) 10 | 11 | func TestGoroutineCreation(t *testing.T) { 12 | workload := 100 * time.Millisecond 13 | g := NewGoroutine(workload, false) 14 | 15 | // Test basic properties 16 | if g.ID() == 0 { 17 | t.Fatal("Expected non-zero ID") 18 | } 19 | 20 | if g.State() != GoroutineCreated { 21 | t.Errorf("Expected initial state Created, got %v", g.State()) 22 | } 23 | 24 | if g.Workload() != workload { 25 | t.Errorf("Expected workload %v, got %v", workload, g.Workload()) 26 | } 27 | 28 | if g.IsBlocking() { 29 | t.Error("Expected non-blocking goroutine") 30 | } 31 | 32 | // Test initial transition 33 | transitions := g.GetTransitions() 34 | if len(transitions) != 1 { 35 | t.Fatalf("Expected 1 initial transition, got %d", len(transitions)) 36 | } 37 | 38 | initialTransition := transitions[0] 39 | if initialTransition.From != "created" || initialTransition.To != "ready" { 40 | t.Errorf("Unexpected initial transition: %s -> %s", 41 | initialTransition.From, initialTransition.To) 42 | } 43 | 44 | // Test zero workload case 45 | g2 := NewGoroutine(0, false) 46 | if g2.Workload() <= 0 { 47 | t.Error("Expected minimum workload for zero input") 48 | } 49 | } 50 | 51 | func TestGoroutineLifecycle(t *testing.T) { 52 | testDone := make(chan bool) 53 | 54 | go func() { 55 | defer func() { testDone <- true }() 56 | 57 | g := NewGoroutine(50*time.Millisecond, false) 58 | 59 | // Define and test state transitions 60 | transitions := []struct { 61 | name string 62 | operation func() 63 | expected GoroutineState 64 | }{ 65 | {"To Runnable", func() { g.SetState(GoroutineRunnable) }, GoroutineRunnable}, 66 | {"Start", func() { g.Start() }, GoroutineRunning}, 67 | {"Finish", func() { g.Finish(nil, nil) }, GoroutineFinished}, 68 | } 69 | 70 | for _, tr := range transitions { 71 | tr.operation() 72 | if state := g.State(); state != tr.expected { 73 | t.Errorf("%s: Expected state %v, got %v", tr.name, tr.expected, state) 74 | } 75 | } 76 | 77 | // Verify execution time 78 | execTime := g.ExecutionTime() 79 | if execTime <= 0 { 80 | t.Error("Expected positive execution time, got", execTime) 81 | } 82 | }() 83 | 84 | // Add timeout to prevent test hanging 85 | select { 86 | case <-testDone: 87 | // Test completed successfully 88 | case <-time.After(1 * time.Second): 89 | t.Fatal("Test timed out after 1 second") 90 | } 91 | } 92 | 93 | func TestGoroutineTransitions(t *testing.T) { 94 | g := NewGoroutine(100*time.Millisecond, false) 95 | 96 | expectedTransitions := []struct { 97 | from string 98 | to string 99 | reason string 100 | }{ 101 | {"ready", "running", "test_transition_1"}, 102 | {"running", "blocked", "test_transition_2"}, 103 | {"blocked", "runnable", "test_transition_3"}, 104 | } 105 | 106 | // Add transitions with small delays to ensure measurable durations 107 | for _, tr := range expectedTransitions { 108 | time.Sleep(10 * time.Millisecond) 109 | g.AddTransition(tr.from, tr.to, tr.reason) 110 | } 111 | 112 | // Verify transitions 113 | transitions := g.GetTransitions() 114 | if len(transitions) != len(expectedTransitions)+1 { // +1 for initial transition 115 | t.Fatalf("Expected %d transitions, got %d", 116 | len(expectedTransitions)+1, len(transitions)) 117 | } 118 | 119 | // Skip initial transition in verification 120 | for i, expected := range expectedTransitions { 121 | actual := transitions[i+1] 122 | if actual.From != expected.from || 123 | actual.To != expected.to || 124 | actual.Reason != expected.reason { 125 | t.Errorf("Transition %d mismatch: expected %v->%v(%s), got %v->%v(%s)", 126 | i, expected.from, expected.to, expected.reason, 127 | actual.From, actual.To, actual.Reason) 128 | } 129 | if actual.Duration <= 0 { 130 | t.Errorf("Transition %d: expected positive duration, got %v", 131 | i, actual.Duration) 132 | } 133 | } 134 | } 135 | 136 | func TestConcurrentTransitions(t *testing.T) { 137 | g := NewGoroutine(100*time.Millisecond, false) 138 | const numGoroutines = 100 139 | 140 | var wg sync.WaitGroup 141 | wg.Add(numGoroutines) 142 | 143 | // Launch concurrent goroutines adding transitions 144 | for i := 0; i < numGoroutines; i++ { 145 | go func(id int) { 146 | defer wg.Done() 147 | g.AddTransition( 148 | fmt.Sprintf("state_%d", id), 149 | fmt.Sprintf("state_%d", id+1), 150 | fmt.Sprintf("transition_%d", id), 151 | ) 152 | }(i) 153 | } 154 | 155 | // Wait with timeout 156 | done := make(chan bool) 157 | go func() { 158 | wg.Wait() 159 | done <- true 160 | }() 161 | 162 | select { 163 | case <-done: 164 | // Success 165 | case <-time.After(2 * time.Second): 166 | t.Fatal("Concurrent transitions test timed out") 167 | } 168 | 169 | // Verify transitions 170 | transitions := g.GetTransitions() 171 | if len(transitions) != numGoroutines+1 { // +1 for initial transition 172 | t.Errorf("Expected %d transitions, got %d", 173 | numGoroutines+1, len(transitions)) 174 | } 175 | } 176 | 177 | func TestSourceChanges(t *testing.T) { 178 | g := NewGoroutine(100*time.Millisecond, false) 179 | 180 | changes := []struct { 181 | source TaskSource 182 | stolenFrom uint32 183 | expectStr string 184 | }{ 185 | {SourceGlobalQueue, 0, "global_queue"}, 186 | {SourceStolen, 1, "stolen"}, 187 | {SourceNetworkPoller, 2, "network_poller"}, 188 | {SourceLocalQueue, 3, "local_queue"}, 189 | } 190 | 191 | for i, change := range changes { 192 | g.SetSource(change.source, change.stolenFrom) 193 | 194 | if g.Source() != change.source { 195 | t.Errorf("Case %d: Expected source %v, got %v", 196 | i, change.source, g.Source()) 197 | } 198 | 199 | if g.StolenFrom() != change.stolenFrom { 200 | t.Errorf("Case %d: Expected stolenFrom %v, got %v", 201 | i, change.stolenFrom, g.StolenFrom()) 202 | } 203 | 204 | if g.Source().String() != change.expectStr { 205 | t.Errorf("Case %d: Expected source string %v, got %v", 206 | i, change.expectStr, g.Source().String()) 207 | } 208 | } 209 | } 210 | 211 | func TestStateStrings(t *testing.T) { 212 | tests := []struct { 213 | state GoroutineState 214 | expected string 215 | }{ 216 | {GoroutineCreated, "created"}, 217 | {GoroutineRunnable, "runnable"}, 218 | {GoroutineRunning, "running"}, 219 | {GoroutineBlocked, "blocked"}, 220 | {GoroutineFinished, "finished"}, 221 | {GoroutineState(99), "unknown"}, 222 | } 223 | 224 | for _, test := range tests { 225 | if got := test.state.String(); got != test.expected { 226 | t.Errorf("State %v: expected %q, got %q", 227 | test.state, test.expected, got) 228 | } 229 | } 230 | } 231 | 232 | func TestTaskSourceStrings(t *testing.T) { 233 | tests := []struct { 234 | source TaskSource 235 | expected string 236 | }{ 237 | {SourceGlobalQueue, "global_queue"}, 238 | {SourceLocalQueue, "local_queue"}, 239 | {SourceStolen, "stolen"}, 240 | {SourceNetworkPoller, "network_poller"}, 241 | {TaskSource(99), "unknown"}, 242 | } 243 | 244 | for _, test := range tests { 245 | if got := test.source.String(); got != test.expected { 246 | t.Errorf("Source %v: expected %q, got %q", 247 | test.source, test.expected, got) 248 | } 249 | } 250 | } 251 | 252 | func TestGetHistory(t *testing.T) { 253 | g := NewGoroutine(100*time.Millisecond, true) 254 | 255 | // Create a sequence of transitions 256 | transitions := []struct { 257 | from string 258 | to string 259 | reason string 260 | }{ 261 | {"ready", "running", "start"}, 262 | {"running", "blocked", "io_wait"}, 263 | {"blocked", "runnable", "io_complete"}, 264 | {"runnable", "finished", "completion"}, 265 | } 266 | 267 | for _, tr := range transitions { 268 | time.Sleep(10 * time.Millisecond) // Ensure measurable durations 269 | g.AddTransition(tr.from, tr.to, tr.reason) 270 | } 271 | 272 | history := g.GetHistory() 273 | 274 | // Verify history contains all transitions 275 | for _, tr := range transitions { 276 | if !strings.Contains(history, tr.from) || 277 | !strings.Contains(history, tr.to) || 278 | !strings.Contains(history, tr.reason) { 279 | t.Errorf("History missing transition: %s->%s(%s)", 280 | tr.from, tr.to, tr.reason) 281 | } 282 | } 283 | 284 | // Verify format 285 | if !strings.Contains(history, fmt.Sprintf("G%d History:", g.ID())) { 286 | t.Error("History missing header") 287 | } 288 | 289 | if !strings.Contains(history, "duration:") { 290 | t.Error("History missing duration information") 291 | } 292 | } 293 | -------------------------------------------------------------------------------- /internal/core/processor.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "math/rand" 5 | "sync" 6 | "sync/atomic" 7 | "time" 8 | ) 9 | 10 | type ProcessorState int32 11 | 12 | const ( 13 | ProcessorIdle ProcessorState = iota 14 | ProcessorRunning 15 | ProcessorStealing 16 | ) 17 | 18 | // ProcessorMetrics tracks runtime statistics 19 | type ProcessorMetrics struct { 20 | tasksExecuted atomic.Uint64 21 | stealsAttempted atomic.Uint64 22 | stealsSuccessful atomic.Uint64 23 | totalIdleTime atomic.Int64 24 | totalRunningTime atomic.Int64 25 | globalQueueSteals atomic.Uint64 26 | localQueueSteals atomic.Uint64 27 | } 28 | 29 | // ProcessorStats represents externally visible metrics 30 | type ProcessorStats struct { 31 | ID uint32 32 | State ProcessorState 33 | CurrentTask *Goroutine 34 | QueueSize int 35 | TasksExecuted uint64 36 | StealsAttempted uint64 37 | StealsSuccessful uint64 38 | IdleTime time.Duration 39 | RunningTime time.Duration 40 | GlobalSteals uint64 41 | LocalSteals uint64 42 | LastStateChange time.Time 43 | } 44 | 45 | // Processor represents a logical processor (P) 46 | type Processor struct { 47 | id uint32 48 | state atomic.Int32 49 | maxQueueSize int 50 | currentG atomic.Pointer[Goroutine] 51 | metrics ProcessorMetrics 52 | lastStateTime time.Time 53 | 54 | mu sync.RWMutex 55 | localQueue *LocalRunQueue 56 | globalQueue *GlobalQueue // Reference to global queue in same package 57 | rand *rand.Rand 58 | 59 | // List of other processors for work stealing 60 | processors []*Processor 61 | } 62 | 63 | // LocalRunQueue manages the processor's local task queue 64 | type LocalRunQueue struct { 65 | tasks []*Goroutine 66 | size int 67 | count int 68 | mu sync.RWMutex 69 | } 70 | 71 | // NewProcessor creates a new processor instance 72 | func NewProcessor(id uint32, queueSize int, globalQueue *GlobalQueue) *Processor { 73 | if queueSize <= 0 { 74 | queueSize = 256 75 | } 76 | 77 | return &Processor{ 78 | id: id, 79 | maxQueueSize: queueSize, 80 | localQueue: newLocalRunQueue(queueSize), 81 | globalQueue: globalQueue, 82 | lastStateTime: time.Now(), 83 | rand: rand.New(rand.NewSource(time.Now().UnixNano())), 84 | } 85 | } 86 | 87 | // SetProcessors sets the list of processors for work stealing 88 | func (p *Processor) SetProcessors(processors []*Processor) { 89 | p.mu.Lock() 90 | defer p.mu.Unlock() 91 | p.processors = processors 92 | } 93 | 94 | func newLocalRunQueue(size int) *LocalRunQueue { 95 | return &LocalRunQueue{ 96 | tasks: make([]*Goroutine, 0, size), 97 | size: size, 98 | } 99 | } 100 | 101 | // ID returns the processor's identifier 102 | func (p *Processor) ID() uint32 { 103 | return p.id 104 | } 105 | 106 | // State returns current processor state 107 | func (p *Processor) State() ProcessorState { 108 | return ProcessorState(p.state.Load()) 109 | } 110 | 111 | // SetState updates processor state with metrics 112 | func (p *Processor) SetState(newState ProcessorState) { 113 | p.mu.Lock() 114 | defer p.mu.Unlock() 115 | 116 | oldState := p.State() 117 | now := time.Now() 118 | duration := now.Sub(p.lastStateTime) 119 | 120 | switch oldState { 121 | case ProcessorIdle: 122 | p.metrics.totalIdleTime.Add(int64(duration)) 123 | case ProcessorRunning: 124 | p.metrics.totalRunningTime.Add(int64(duration)) 125 | } 126 | 127 | p.state.Store(int32(newState)) 128 | p.lastStateTime = now 129 | } 130 | 131 | // Push adds a goroutine to local queue 132 | func (p *Processor) Push(g *Goroutine) bool { 133 | if g == nil { 134 | return false 135 | } 136 | 137 | p.mu.Lock() 138 | defer p.mu.Unlock() 139 | 140 | if p.localQueue.count >= p.maxQueueSize { 141 | return false 142 | } 143 | 144 | return p.localQueue.push(g) 145 | } 146 | 147 | // Pop removes and returns a goroutine from local queue 148 | func (p *Processor) Pop() *Goroutine { 149 | p.mu.Lock() 150 | defer p.mu.Unlock() 151 | return p.localQueue.pop() 152 | } 153 | 154 | // FindWork attempts to get work from various sources 155 | func (p *Processor) FindWork() *Goroutine { 156 | // First try local queue 157 | if g := p.Pop(); g != nil { 158 | return g 159 | } 160 | 161 | // Then try stealing from other processors 162 | if stolen := p.tryStealFromProcessors(); len(stolen) > 0 { 163 | // Push extra stolen tasks to local queue 164 | for i := 1; i < len(stolen); i++ { 165 | p.Push(stolen[i]) 166 | } 167 | return stolen[0] 168 | } 169 | 170 | // Finally try global queue 171 | if stolen := p.tryStealFromGlobalQueue(); len(stolen) > 0 { 172 | // Push extra stolen tasks to local queue 173 | for i := 1; i < len(stolen); i++ { 174 | p.Push(stolen[i]) 175 | } 176 | return stolen[0] 177 | } 178 | 179 | return nil 180 | } 181 | 182 | // selectVictim randomly selects a processor to steal from 183 | func (p *Processor) selectVictim() *Processor { 184 | p.mu.RLock() 185 | defer p.mu.RUnlock() 186 | 187 | if len(p.processors) <= 1 { 188 | return nil 189 | } 190 | 191 | // Try up to 3 random processors 192 | for i := 0; i < 3; i++ { 193 | idx := p.rand.Intn(len(p.processors)) 194 | victim := p.processors[idx] 195 | if victim.ID() != p.ID() && victim.QueueSize() > 0 { 196 | return victim 197 | } 198 | } 199 | return nil 200 | } 201 | 202 | // tryStealFromProcessors attempts to steal from other processors 203 | func (p *Processor) tryStealFromProcessors() []*Goroutine { 204 | p.SetState(ProcessorStealing) 205 | defer p.SetState(ProcessorIdle) 206 | 207 | if victim := p.selectVictim(); victim != nil { 208 | p.metrics.stealsAttempted.Add(1) 209 | stolen := victim.localQueue.steal((victim.localQueue.count + 1) / 2) 210 | if len(stolen) > 0 { 211 | p.metrics.stealsSuccessful.Add(1) 212 | p.metrics.localQueueSteals.Add(uint64(len(stolen))) 213 | // Mark tasks as stolen 214 | for _, g := range stolen { 215 | g.SetSource(SourceStolen, victim.ID()) 216 | } 217 | return stolen 218 | } 219 | } 220 | return nil 221 | } 222 | 223 | // tryStealFromGlobalQueue attempts to steal from global queue 224 | func (p *Processor) tryStealFromGlobalQueue() []*Goroutine { 225 | if p.globalQueue == nil { 226 | return nil 227 | } 228 | 229 | stolen := p.globalQueue.TrySteal(p.maxQueueSize / 2) 230 | if len(stolen) > 0 { 231 | p.metrics.globalQueueSteals.Add(uint64(len(stolen))) 232 | return stolen 233 | } 234 | return nil 235 | } 236 | 237 | // Execute runs a goroutine 238 | func (p *Processor) Execute(g *Goroutine) { 239 | if g == nil { 240 | return 241 | } 242 | 243 | p.currentG.Store(g) 244 | defer p.currentG.Store(nil) 245 | 246 | p.SetState(ProcessorRunning) 247 | 248 | g.Start() 249 | time.Sleep(g.Workload()) // Simulate execution 250 | g.Finish(nil, nil) 251 | 252 | p.metrics.tasksExecuted.Add(1) 253 | p.SetState(ProcessorIdle) 254 | } 255 | 256 | // LocalRunQueue methods 257 | func (lrq *LocalRunQueue) push(g *Goroutine) bool { 258 | lrq.mu.Lock() 259 | defer lrq.mu.Unlock() 260 | 261 | if lrq.count >= lrq.size { 262 | return false 263 | } 264 | 265 | lrq.tasks = append(lrq.tasks, g) 266 | lrq.count++ 267 | return true 268 | } 269 | 270 | func (lrq *LocalRunQueue) pop() *Goroutine { 271 | lrq.mu.Lock() 272 | defer lrq.mu.Unlock() 273 | 274 | if lrq.count == 0 { 275 | return nil 276 | } 277 | 278 | g := lrq.tasks[0] 279 | lrq.tasks = lrq.tasks[1:] 280 | lrq.count-- 281 | return g 282 | } 283 | 284 | func (lrq *LocalRunQueue) steal(n int) []*Goroutine { 285 | lrq.mu.Lock() 286 | defer lrq.mu.Unlock() 287 | 288 | if n <= 0 || lrq.count == 0 { 289 | return nil 290 | } 291 | 292 | if n > lrq.count { 293 | n = lrq.count 294 | } 295 | 296 | stolen := make([]*Goroutine, n) 297 | stealIndex := lrq.count - n 298 | copy(stolen, lrq.tasks[stealIndex:]) 299 | lrq.tasks = lrq.tasks[:stealIndex] 300 | lrq.count = stealIndex 301 | 302 | return stolen 303 | } 304 | 305 | // GetStats returns current processor statistics 306 | func (p *Processor) GetStats() ProcessorStats { 307 | p.mu.RLock() 308 | defer p.mu.RUnlock() 309 | 310 | return ProcessorStats{ 311 | ID: p.id, 312 | State: p.State(), 313 | CurrentTask: p.currentG.Load(), 314 | QueueSize: p.localQueue.count, 315 | TasksExecuted: p.metrics.tasksExecuted.Load(), 316 | StealsAttempted: p.metrics.stealsAttempted.Load(), 317 | StealsSuccessful: p.metrics.stealsSuccessful.Load(), 318 | IdleTime: time.Duration(p.metrics.totalIdleTime.Load()), 319 | RunningTime: time.Duration(p.metrics.totalRunningTime.Load()), 320 | GlobalSteals: p.metrics.globalQueueSteals.Load(), 321 | LocalSteals: p.metrics.localQueueSteals.Load(), 322 | LastStateChange: p.lastStateTime, 323 | } 324 | } 325 | 326 | // QueueSize returns current local queue size 327 | func (p *Processor) QueueSize() int { 328 | p.mu.RLock() 329 | defer p.mu.RUnlock() 330 | return p.localQueue.count 331 | } 332 | 333 | func (p *Processor) CurrentGoroutine() *Goroutine { 334 | return p.currentG.Load() 335 | } 336 | 337 | func (s ProcessorState) String() string { 338 | switch s { 339 | case ProcessorIdle: 340 | return "idle" 341 | case ProcessorRunning: 342 | return "running" 343 | case ProcessorStealing: 344 | return "stealing" 345 | default: 346 | return "unknown" 347 | } 348 | } 349 | -------------------------------------------------------------------------------- /internal/core/processor_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | type testSetup struct { 10 | globalQueue *GlobalQueue 11 | processors []*Processor 12 | } 13 | 14 | func newTestSetup(t *testing.T, numProcessors int) *testSetup { 15 | globalQueue := NewGlobalQueue(1000) 16 | if globalQueue == nil { 17 | t.Fatal("Failed to create global queue") 18 | } 19 | 20 | processors := make([]*Processor, numProcessors) 21 | for i := 0; i < numProcessors; i++ { 22 | processors[i] = NewProcessor(uint32(i), 100, globalQueue) 23 | if processors[i] == nil { 24 | t.Fatalf("Failed to create processor %d", i) 25 | } 26 | } 27 | 28 | // Set processor list for work stealing 29 | for _, p := range processors { 30 | p.SetProcessors(processors) 31 | } 32 | 33 | return &testSetup{ 34 | globalQueue: globalQueue, 35 | processors: processors, 36 | } 37 | } 38 | 39 | func TestProcessorCreation(t *testing.T) { 40 | tests := []struct { 41 | name string 42 | id uint32 43 | queueSize int 44 | wantSize int 45 | }{ 46 | { 47 | name: "Normal creation", 48 | id: 1, 49 | queueSize: 100, 50 | wantSize: 100, 51 | }, 52 | { 53 | name: "Zero queue size defaults to 256", 54 | id: 2, 55 | queueSize: 0, 56 | wantSize: 256, 57 | }, 58 | { 59 | name: "Negative queue size defaults to 256", 60 | id: 3, 61 | queueSize: -1, 62 | wantSize: 256, 63 | }, 64 | } 65 | 66 | globalQueue := NewGlobalQueue(1000) 67 | for _, tt := range tests { 68 | t.Run(tt.name, func(t *testing.T) { 69 | p := NewProcessor(tt.id, tt.queueSize, globalQueue) 70 | 71 | if p.ID() != tt.id { 72 | t.Errorf("ID = %v, want %v", p.ID(), tt.id) 73 | } 74 | if p.maxQueueSize != tt.wantSize { 75 | t.Errorf("maxQueueSize = %v, want %v", p.maxQueueSize, tt.wantSize) 76 | } 77 | if p.State() != ProcessorIdle { 78 | t.Errorf("Initial state = %v, want Idle", p.State()) 79 | } 80 | if p.globalQueue != globalQueue { 81 | t.Error("Global queue not properly set") 82 | } 83 | }) 84 | } 85 | } 86 | 87 | func TestLocalQueueOperations(t *testing.T) { 88 | setup := newTestSetup(t, 1) 89 | p := setup.processors[0] 90 | 91 | t.Run("Push Operations", func(t *testing.T) { 92 | // Test successful push 93 | g := NewGoroutine(10*time.Millisecond, false) 94 | if !p.Push(g) { 95 | t.Error("First push failed") 96 | } 97 | 98 | // Fill queue 99 | for i := 0; i < p.maxQueueSize-1; i++ { 100 | if !p.Push(NewGoroutine(10*time.Millisecond, false)) { 101 | t.Errorf("Push %d failed unexpectedly", i) 102 | } 103 | } 104 | 105 | // Test queue full 106 | if p.Push(NewGoroutine(10*time.Millisecond, false)) { 107 | t.Error("Push succeeded when queue should be full") 108 | } 109 | 110 | // Test nil push 111 | if p.Push(nil) { 112 | t.Error("Nil push should fail") 113 | } 114 | }) 115 | 116 | t.Run("Pop Operations", func(t *testing.T) { 117 | if g := p.Pop(); g == nil { 118 | t.Error("Pop should return goroutine") 119 | } 120 | 121 | // Pop until empty 122 | for p.Pop() != nil { 123 | } 124 | 125 | // Test empty queue 126 | if g := p.Pop(); g != nil { 127 | t.Error("Pop from empty queue should return nil") 128 | } 129 | }) 130 | } 131 | 132 | func TestWorkStealing(t *testing.T) { 133 | setup := newTestSetup(t, 3) 134 | victim := setup.processors[0] 135 | thief := setup.processors[1] 136 | 137 | // Add tasks to victim 138 | numTasks := 6 139 | for i := 0; i < numTasks; i++ { 140 | victim.Push(NewGoroutine(10*time.Millisecond, false)) 141 | } 142 | 143 | // Test local queue stealing 144 | t.Run("Local Queue Stealing", func(t *testing.T) { 145 | stolen := thief.tryStealFromProcessors() 146 | if len(stolen) == 0 { 147 | t.Error("Failed to steal from local queue") 148 | } 149 | 150 | stats := thief.GetStats() 151 | if stats.LocalSteals == 0 { 152 | t.Error("Local steal not recorded in metrics") 153 | } 154 | }) 155 | 156 | // Test global queue stealing 157 | t.Run("Global Queue Stealing", func(t *testing.T) { 158 | // Add tasks to global queue 159 | setup.globalQueue.Submit(NewGoroutine(10*time.Millisecond, false)) 160 | setup.globalQueue.Submit(NewGoroutine(10*time.Millisecond, false)) 161 | 162 | stolen := thief.tryStealFromGlobalQueue() 163 | if len(stolen) == 0 { 164 | t.Error("Failed to steal from global queue") 165 | } 166 | 167 | stats := thief.GetStats() 168 | if stats.GlobalSteals == 0 { 169 | t.Error("Global steal not recorded in metrics") 170 | } 171 | }) 172 | } 173 | 174 | func TestFindWork(t *testing.T) { 175 | setup := newTestSetup(t, 2) 176 | p := setup.processors[0] 177 | 178 | t.Run("Local Queue Priority", func(t *testing.T) { 179 | // Add task to local queue 180 | g := NewGoroutine(10*time.Millisecond, false) 181 | p.Push(g) 182 | 183 | found := p.FindWork() 184 | if found != g { 185 | t.Error("Should find work from local queue first") 186 | } 187 | }) 188 | 189 | t.Run("Work Stealing Order", func(t *testing.T) { 190 | // Add tasks to global queue 191 | setup.globalQueue.Submit(NewGoroutine(10*time.Millisecond, false)) 192 | 193 | found := p.FindWork() 194 | if found == nil { 195 | t.Error("Should find work from global queue") 196 | } 197 | }) 198 | } 199 | 200 | func TestProcessorExecution(t *testing.T) { 201 | setup := newTestSetup(t, 1) 202 | p := setup.processors[0] 203 | 204 | g := NewGoroutine(50*time.Millisecond, false) 205 | 206 | start := time.Now() 207 | p.Execute(g) 208 | duration := time.Since(start) 209 | 210 | if duration < g.Workload() { 211 | t.Errorf("Execution time %v shorter than workload %v", duration, g.Workload()) 212 | } 213 | 214 | stats := p.GetStats() 215 | if stats.TasksExecuted != 1 { 216 | t.Error("Task execution not recorded in metrics") 217 | } 218 | if p.State() != ProcessorIdle { 219 | t.Error("Processor should return to idle state") 220 | } 221 | if p.CurrentGoroutine() != nil { 222 | t.Error("Current goroutine should be nil after execution") 223 | } 224 | } 225 | 226 | func TestConcurrentOperations(t *testing.T) { 227 | setup := newTestSetup(t, 4) 228 | 229 | const numGoroutines = 100 230 | const opsPerGoroutine = 10 231 | 232 | var wg sync.WaitGroup 233 | wg.Add(numGoroutines) 234 | 235 | // Start concurrent operations 236 | for i := 0; i < numGoroutines; i++ { 237 | go func(id int) { 238 | defer wg.Done() 239 | p := setup.processors[id%len(setup.processors)] 240 | 241 | for j := 0; j < opsPerGoroutine; j++ { 242 | switch j % 3 { 243 | case 0: 244 | p.Push(NewGoroutine(time.Millisecond, false)) 245 | case 1: 246 | if g := p.FindWork(); g != nil { 247 | p.Execute(g) 248 | } 249 | case 2: 250 | p.tryStealFromGlobalQueue() 251 | } 252 | } 253 | }(i) 254 | } 255 | 256 | // Add some work to global queue 257 | for i := 0; i < numGoroutines/2; i++ { 258 | setup.globalQueue.Submit(NewGoroutine(time.Millisecond, false)) 259 | } 260 | 261 | // Wait with timeout 262 | done := make(chan bool) 263 | go func() { 264 | wg.Wait() 265 | done <- true 266 | }() 267 | 268 | select { 269 | case <-done: 270 | // Success 271 | case <-time.After(5 * time.Second): 272 | t.Fatal("Concurrent operations test timed out") 273 | } 274 | 275 | // Verify operations completed 276 | var totalExecuted uint64 277 | for _, p := range setup.processors { 278 | stats := p.GetStats() 279 | totalExecuted += stats.TasksExecuted 280 | } 281 | 282 | if totalExecuted == 0 { 283 | t.Error("No tasks were executed") 284 | } 285 | } 286 | 287 | func TestProcessorMetrics(t *testing.T) { 288 | setup := newTestSetup(t, 1) 289 | p := setup.processors[0] 290 | 291 | // Execute some tasks 292 | for i := 0; i < 3; i++ { 293 | g := NewGoroutine(10*time.Millisecond, false) 294 | p.Push(g) 295 | if g := p.Pop(); g != nil { 296 | p.Execute(g) 297 | } 298 | } 299 | 300 | stats := p.GetStats() 301 | if stats.TasksExecuted != 3 { 302 | t.Errorf("Expected 3 tasks executed, got %d", stats.TasksExecuted) 303 | } 304 | if stats.RunningTime == 0 { 305 | t.Error("Running time not recorded") 306 | } 307 | if stats.IdleTime == 0 { 308 | t.Error("Idle time not recorded") 309 | } 310 | } 311 | 312 | func TestProcessorStateTransitions(t *testing.T) { 313 | setup := newTestSetup(t, 1) 314 | p := setup.processors[0] 315 | 316 | states := []struct { 317 | state ProcessorState 318 | expected string 319 | }{ 320 | {ProcessorRunning, "running"}, 321 | {ProcessorStealing, "stealing"}, 322 | {ProcessorIdle, "idle"}, 323 | } 324 | 325 | for _, s := range states { 326 | t.Run(s.expected, func(t *testing.T) { 327 | p.SetState(s.state) 328 | 329 | if p.State() != s.state { 330 | t.Errorf("State = %v, want %v", p.State(), s.state) 331 | } 332 | if s.state.String() != s.expected { 333 | t.Errorf("State string = %v, want %v", s.state.String(), s.expected) 334 | } 335 | }) 336 | } 337 | } 338 | -------------------------------------------------------------------------------- /internal/poller/poller.go: -------------------------------------------------------------------------------- 1 | package poller 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | "sync/atomic" 7 | "time" 8 | "workstealing/internal/core" 9 | ) 10 | 11 | // EventType represents different types of blocking events 12 | type EventType int32 13 | 14 | const ( 15 | EventRead EventType = iota 16 | EventWrite 17 | EventTimeout 18 | EventError 19 | ) 20 | 21 | const ( 22 | defaultEventBufferSize = 1000 23 | timeoutCheckInterval = 10 * time.Millisecond 24 | ) 25 | 26 | // Event represents a blocking operation 27 | type Event struct { 28 | ID uint64 29 | Type EventType 30 | Goroutine *core.Goroutine 31 | Result interface{} 32 | Error error 33 | Deadline time.Time 34 | Created time.Time 35 | ProcessorID uint32 36 | done chan struct{} 37 | } 38 | 39 | // BlockedGoroutineInfo tracks details of blocked goroutines 40 | type BlockedGoroutineInfo struct { 41 | StartTime time.Time 42 | EventType EventType 43 | ProcessorID uint32 44 | Deadline time.Time 45 | } 46 | 47 | // PollerMetrics represents runtime statistics 48 | type PollerMetrics struct { 49 | TotalEvents uint64 50 | CompletedEvents uint64 51 | Timeouts uint64 52 | Errors uint64 53 | CurrentlyBlocked int32 54 | AverageBlockTime time.Duration 55 | ActiveEvents int 56 | } 57 | 58 | // NetworkPoller manages blocking operations 59 | type NetworkPoller struct { 60 | events map[uint64]*Event // Active events 61 | processors []*core.Processor // Available processors 62 | 63 | metrics struct { 64 | totalEvents atomic.Uint64 65 | completedEvents atomic.Uint64 66 | timeouts atomic.Uint64 67 | errors atomic.Uint64 68 | avgBlockTime atomic.Int64 // nanoseconds 69 | currentBlocked atomic.Int32 70 | } 71 | 72 | blockedGoroutines map[uint64]*BlockedGoroutineInfo 73 | 74 | eventCh chan *Event // Channel for new events 75 | doneCh chan uint64 // Channel for completed events 76 | 77 | ctx context.Context 78 | cancel context.CancelFunc 79 | wg sync.WaitGroup 80 | running atomic.Bool 81 | 82 | mu sync.RWMutex // Protects maps and internal state 83 | } 84 | 85 | // NewNetworkPoller creates a new poller instance 86 | func NewNetworkPoller(processors []*core.Processor) *NetworkPoller { 87 | if len(processors) == 0 { 88 | return nil 89 | } 90 | 91 | ctx, cancel := context.WithCancel(context.Background()) 92 | 93 | return &NetworkPoller{ 94 | events: make(map[uint64]*Event), 95 | processors: processors, 96 | blockedGoroutines: make(map[uint64]*BlockedGoroutineInfo), 97 | eventCh: make(chan *Event, defaultEventBufferSize), 98 | doneCh: make(chan uint64, defaultEventBufferSize), 99 | ctx: ctx, 100 | cancel: cancel, 101 | } 102 | } 103 | 104 | // Start begins the polling operation 105 | func (np *NetworkPoller) Start() { 106 | if !np.running.CompareAndSwap(false, true) { 107 | return 108 | } 109 | 110 | np.wg.Add(2) 111 | go np.eventLoop() 112 | go np.timeoutChecker() 113 | } 114 | 115 | // Stop gracefully stops the poller 116 | func (np *NetworkPoller) Stop() { 117 | if !np.running.CompareAndSwap(true, false) { 118 | return 119 | } 120 | 121 | np.cancel() 122 | np.wg.Wait() 123 | 124 | // Clean up remaining events 125 | np.mu.Lock() 126 | defer np.mu.Unlock() 127 | 128 | for id, event := range np.events { 129 | np.handleTimeout(id, event) 130 | } 131 | 132 | // Clear channels 133 | for len(np.eventCh) > 0 { 134 | <-np.eventCh 135 | } 136 | for len(np.doneCh) > 0 { 137 | <-np.doneCh 138 | } 139 | } 140 | 141 | // Register adds a new blocking operation 142 | func (np *NetworkPoller) Register(g *core.Goroutine, eventType EventType, deadline time.Time, processorID uint32, done chan struct{}) { 143 | if g == nil || !np.running.Load() { 144 | return 145 | } 146 | 147 | event := &Event{ 148 | ID: g.ID(), 149 | Type: eventType, 150 | Goroutine: g, 151 | Deadline: deadline, 152 | Created: time.Now(), 153 | ProcessorID: processorID, 154 | done: done, 155 | } 156 | 157 | np.mu.Lock() 158 | np.events[g.ID()] = event 159 | np.blockedGoroutines[g.ID()] = &BlockedGoroutineInfo{ 160 | StartTime: event.Created, 161 | EventType: eventType, 162 | ProcessorID: processorID, 163 | Deadline: deadline, 164 | } 165 | np.mu.Unlock() 166 | 167 | np.metrics.totalEvents.Add(1) 168 | np.metrics.currentBlocked.Add(1) 169 | g.SetState(core.GoroutineBlocked) 170 | 171 | select { 172 | case np.eventCh <- event: 173 | default: 174 | // If channel is full, handle as timeout 175 | np.handleTimeout(g.ID(), event) 176 | } 177 | } 178 | 179 | // eventLoop processes events 180 | func (np *NetworkPoller) eventLoop() { 181 | defer np.wg.Done() 182 | 183 | for { 184 | select { 185 | case <-np.ctx.Done(): 186 | return 187 | 188 | case event := <-np.eventCh: 189 | go np.processEvent(event) 190 | 191 | case gid := <-np.doneCh: 192 | np.completeEvent(gid) 193 | } 194 | } 195 | } 196 | 197 | // processEvent simulates I/O operation 198 | func (np *NetworkPoller) processEvent(event *Event) { 199 | if event == nil { 200 | return 201 | } 202 | 203 | simulatedWork := time.Duration(float64(event.Goroutine.Workload()) * 0.8) 204 | 205 | select { 206 | case <-np.ctx.Done(): 207 | return 208 | case <-time.After(simulatedWork): 209 | np.doneCh <- event.ID 210 | } 211 | } 212 | 213 | // completeEvent handles event completion 214 | func (np *NetworkPoller) completeEvent(gid uint64) { 215 | np.mu.Lock() 216 | event, exists := np.events[gid] 217 | if !exists { 218 | np.mu.Unlock() 219 | return 220 | } 221 | 222 | delete(np.events, gid) 223 | delete(np.blockedGoroutines, gid) 224 | np.mu.Unlock() 225 | 226 | np.metrics.completedEvents.Add(1) 227 | np.metrics.currentBlocked.Add(-1) 228 | 229 | blockTime := time.Since(event.Created) 230 | np.updateAverageBlockTime(blockTime) 231 | 232 | event.Goroutine.SetState(core.GoroutineRunnable) 233 | event.Goroutine.SetSource(core.SourceNetworkPoller, event.ProcessorID) 234 | 235 | if processor := np.findLeastLoadedProcessor(); processor != nil { 236 | // processor.Push(event.Goroutine) 237 | event.done <- struct{}{} 238 | } 239 | } 240 | 241 | // timeoutChecker monitors for deadline expiration 242 | func (np *NetworkPoller) timeoutChecker() { 243 | defer np.wg.Done() 244 | 245 | ticker := time.NewTicker(timeoutCheckInterval) 246 | defer ticker.Stop() 247 | 248 | for { 249 | select { 250 | case <-np.ctx.Done(): 251 | return 252 | case <-ticker.C: 253 | np.checkTimeouts() 254 | } 255 | } 256 | } 257 | 258 | // checkTimeouts verifies deadlines 259 | func (np *NetworkPoller) checkTimeouts() { 260 | np.mu.Lock() 261 | defer np.mu.Unlock() 262 | 263 | now := time.Now() 264 | for id, event := range np.events { 265 | if now.After(event.Deadline) { 266 | np.handleTimeout(id, event) 267 | } 268 | } 269 | } 270 | 271 | // handleTimeout processes timeout events 272 | func (np *NetworkPoller) handleTimeout(id uint64, event *Event) { 273 | delete(np.events, id) 274 | delete(np.blockedGoroutines, id) 275 | 276 | np.metrics.timeouts.Add(1) 277 | np.metrics.currentBlocked.Add(-1) 278 | 279 | event.Type = EventTimeout 280 | event.Error = context.DeadlineExceeded 281 | event.Goroutine.SetState(core.GoroutineRunnable) 282 | event.Goroutine.SetSource(core.SourceNetworkPoller, 0) 283 | 284 | if processor := np.findLeastLoadedProcessor(); processor != nil { 285 | processor.Push(event.Goroutine) 286 | } 287 | } 288 | 289 | // findLeastLoadedProcessor returns processor with minimum queue size 290 | func (np *NetworkPoller) findLeastLoadedProcessor() *core.Processor { 291 | var minLoad = int(^uint(0) >> 1) 292 | var target *core.Processor 293 | 294 | for _, p := range np.processors { 295 | if size := p.QueueSize(); size < minLoad { 296 | minLoad = size 297 | target = p 298 | } 299 | } 300 | 301 | return target 302 | } 303 | 304 | // updateAverageBlockTime updates running average of block times 305 | func (np *NetworkPoller) updateAverageBlockTime(blockTime time.Duration) { 306 | current := time.Duration(np.metrics.avgBlockTime.Load()) 307 | completed := np.metrics.completedEvents.Load() 308 | 309 | if completed == 1 { 310 | np.metrics.avgBlockTime.Store(int64(blockTime)) 311 | return 312 | } 313 | 314 | newAvg := (current*time.Duration(completed-1) + blockTime) / time.Duration(completed) 315 | np.metrics.avgBlockTime.Store(int64(newAvg)) 316 | } 317 | 318 | // GetMetrics returns current statistics 319 | func (np *NetworkPoller) GetMetrics() PollerMetrics { 320 | np.mu.RLock() 321 | activeEvents := len(np.events) 322 | np.mu.RUnlock() 323 | 324 | return PollerMetrics{ 325 | TotalEvents: np.metrics.totalEvents.Load(), 326 | CompletedEvents: np.metrics.completedEvents.Load(), 327 | Timeouts: np.metrics.timeouts.Load(), 328 | Errors: np.metrics.errors.Load(), 329 | CurrentlyBlocked: np.metrics.currentBlocked.Load(), 330 | AverageBlockTime: time.Duration(np.metrics.avgBlockTime.Load()), 331 | ActiveEvents: activeEvents, 332 | } 333 | } 334 | 335 | // GetBlockedGoroutineInfo returns information about a blocked goroutine 336 | func (np *NetworkPoller) GetBlockedGoroutineInfo(id uint64) *BlockedGoroutineInfo { 337 | np.mu.RLock() 338 | defer np.mu.RUnlock() 339 | return np.blockedGoroutines[id] 340 | } 341 | 342 | func (et EventType) String() string { 343 | switch et { 344 | case EventRead: 345 | return "read" 346 | case EventWrite: 347 | return "write" 348 | case EventTimeout: 349 | return "timeout" 350 | case EventError: 351 | return "error" 352 | default: 353 | return "unknown" 354 | } 355 | } 356 | -------------------------------------------------------------------------------- /internal/poller/poller_test.go: -------------------------------------------------------------------------------- 1 | package poller 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | "time" 7 | "workstealing/internal/core" 8 | ) 9 | 10 | type TestSetup struct { 11 | processors []*core.Processor 12 | poller *NetworkPoller 13 | globalQueue *core.GlobalQueue 14 | } 15 | 16 | func newTestSetup(t *testing.T) *TestSetup { 17 | globalQueue := core.NewGlobalQueue(1000) 18 | if globalQueue == nil { 19 | t.Fatal("Failed to create global queue") 20 | } 21 | 22 | processors := []*core.Processor{ 23 | core.NewProcessor(1, 100, globalQueue), 24 | core.NewProcessor(2, 100, globalQueue), 25 | } 26 | 27 | // Set processor list for work stealing 28 | for _, p := range processors { 29 | p.SetProcessors(processors) 30 | } 31 | 32 | poller := NewNetworkPoller(processors) 33 | if poller == nil { 34 | t.Fatal("Failed to create network poller") 35 | } 36 | 37 | return &TestSetup{ 38 | processors: processors, 39 | poller: poller, 40 | globalQueue: globalQueue, 41 | } 42 | } 43 | 44 | func TestPollerInitialization(t *testing.T) { 45 | tests := []struct { 46 | name string 47 | processors []*core.Processor 48 | wantNil bool 49 | }{ 50 | { 51 | name: "Valid initialization", 52 | processors: []*core.Processor{core.NewProcessor(1, 100, core.NewGlobalQueue(1000))}, 53 | wantNil: false, 54 | }, 55 | { 56 | name: "Empty processors", 57 | processors: []*core.Processor{}, 58 | wantNil: true, 59 | }, 60 | { 61 | name: "Nil processors", 62 | processors: nil, 63 | wantNil: true, 64 | }, 65 | } 66 | 67 | for _, tt := range tests { 68 | t.Run(tt.name, func(t *testing.T) { 69 | poller := NewNetworkPoller(tt.processors) 70 | if (poller == nil) != tt.wantNil { 71 | t.Errorf("NewNetworkPoller() nil = %v, want %v", poller == nil, tt.wantNil) 72 | } 73 | 74 | if poller != nil { 75 | if poller.events == nil { 76 | t.Error("Events map not initialized") 77 | } 78 | if poller.blockedGoroutines == nil { 79 | t.Error("BlockedGoroutines map not initialized") 80 | } 81 | } 82 | }) 83 | } 84 | } 85 | 86 | func TestPollerStartStop(t *testing.T) { 87 | setup := newTestSetup(t) 88 | 89 | // Test Start 90 | setup.poller.Start() 91 | if !setup.poller.running.Load() { 92 | t.Error("Poller should be running after Start") 93 | } 94 | 95 | // Test double Start 96 | setup.poller.Start() // Should not panic 97 | if !setup.poller.running.Load() { 98 | t.Error("Poller should remain running after second Start") 99 | } 100 | 101 | // Test Stop 102 | setup.poller.Stop() 103 | if setup.poller.running.Load() { 104 | t.Error("Poller should not be running after Stop") 105 | } 106 | } 107 | 108 | func TestPollerRegistration(t *testing.T) { 109 | setup := newTestSetup(t) 110 | setup.poller.Start() 111 | defer setup.poller.Stop() 112 | 113 | g := core.NewGoroutine(100*time.Millisecond, true) 114 | deadline := time.Now().Add(200 * time.Millisecond) 115 | done := make(chan struct{}) 116 | 117 | // Register goroutine 118 | setup.poller.Register(g, EventRead, deadline, setup.processors[0].ID(), done) 119 | 120 | // Start a goroutine to wait for completion signal 121 | completed := make(chan bool) 122 | go func() { 123 | <-done 124 | completed <- true 125 | }() 126 | 127 | // Wait for completion or timeout 128 | select { 129 | case <-completed: 130 | // Verify goroutine state after completion 131 | if g.State() != core.GoroutineRunnable { 132 | t.Errorf("Expected goroutine state Runnable, got %v", g.State()) 133 | } 134 | case <-time.After(300 * time.Millisecond): 135 | t.Fatal("Registration test timed out") 136 | } 137 | 138 | metrics := setup.poller.GetMetrics() 139 | if metrics.CompletedEvents != 1 { 140 | t.Errorf("Expected 1 completed event, got %d", metrics.CompletedEvents) 141 | } 142 | } 143 | 144 | func TestPollerTimeout(t *testing.T) { 145 | setup := newTestSetup(t) 146 | setup.poller.Start() 147 | defer setup.poller.Stop() 148 | 149 | g := core.NewGoroutine(100*time.Millisecond, true) 150 | deadline := time.Now().Add(50 * time.Millisecond) 151 | 152 | done := make(chan struct{}) 153 | setup.poller.Register(g, EventRead, deadline, setup.processors[0].ID(), done) 154 | 155 | // Wait for timeout with buffer 156 | time.Sleep(75 * time.Millisecond) 157 | 158 | metrics := setup.poller.GetMetrics() 159 | if metrics.Timeouts != 1 { 160 | t.Errorf("Expected 1 timeout, got %d", metrics.Timeouts) 161 | } 162 | 163 | if metrics.CurrentlyBlocked != 0 { 164 | t.Errorf("Expected 0 currently blocked, got %d", metrics.CurrentlyBlocked) 165 | } 166 | 167 | if info := setup.poller.GetBlockedGoroutineInfo(g.ID()); info != nil { 168 | t.Error("Goroutine still marked as blocked after timeout") 169 | } 170 | } 171 | 172 | func TestPollerEventCompletion(t *testing.T) { 173 | setup := newTestSetup(t) 174 | setup.poller.Start() 175 | defer setup.poller.Stop() 176 | 177 | g := core.NewGoroutine(50*time.Millisecond, true) 178 | deadline := time.Now().Add(200 * time.Millisecond) 179 | done := make(chan struct{}) 180 | 181 | setup.poller.Register(g, EventRead, deadline, setup.processors[0].ID(), done) 182 | 183 | // Wait for completion signal 184 | select { 185 | case <-done: 186 | // Success case 187 | if g.State() != core.GoroutineRunnable { 188 | t.Errorf("Expected goroutine state Runnable, got %v", g.State()) 189 | } 190 | case <-time.After(250 * time.Millisecond): 191 | t.Fatal("Event completion test timed out") 192 | } 193 | 194 | metrics := setup.poller.GetMetrics() 195 | if metrics.CompletedEvents != 1 { 196 | t.Errorf("Expected 1 completed event, got %d", metrics.CompletedEvents) 197 | } 198 | } 199 | 200 | func TestConcurrentOperations(t *testing.T) { 201 | setup := newTestSetup(t) 202 | setup.poller.Start() 203 | defer setup.poller.Stop() 204 | 205 | const numGoroutines = 50 206 | var wg sync.WaitGroup 207 | wg.Add(numGoroutines) 208 | 209 | for i := 0; i < numGoroutines; i++ { 210 | go func() { 211 | defer wg.Done() 212 | g := core.NewGoroutine(20*time.Millisecond, true) 213 | deadline := time.Now().Add(100 * time.Millisecond) 214 | done := make(chan struct{}) 215 | 216 | setup.poller.Register(g, EventRead, deadline, setup.processors[0].ID(), done) 217 | 218 | // Wait for completion signal 219 | select { 220 | case <-done: 221 | // Verify goroutine is in correct state 222 | if g.State() != core.GoroutineRunnable { 223 | t.Errorf("Expected goroutine state Runnable, got %v", g.State()) 224 | } 225 | case <-time.After(150 * time.Millisecond): 226 | t.Errorf("Operation timed out for goroutine") 227 | } 228 | }() 229 | } 230 | 231 | // Wait for all goroutines to complete 232 | waitChan := make(chan bool) 233 | go func() { 234 | wg.Wait() 235 | waitChan <- true 236 | }() 237 | 238 | select { 239 | case <-waitChan: 240 | // All goroutines completed successfully 241 | case <-time.After(2 * time.Second): 242 | t.Fatal("Concurrent operations test timed out") 243 | } 244 | 245 | // Optional: verify no goroutines are left in blocked state 246 | time.Sleep(50 * time.Millisecond) // Small buffer for cleanup 247 | if len(setup.poller.events) > 0 { 248 | t.Errorf("Expected all events to be cleared, found %d remaining", len(setup.poller.events)) 249 | } 250 | } 251 | 252 | func TestMultipleGoroutineHandling(t *testing.T) { 253 | setup := newTestSetup(t) 254 | setup.poller.Start() 255 | defer setup.poller.Stop() 256 | 257 | g1 := core.NewGoroutine(30*time.Millisecond, true) 258 | g2 := core.NewGoroutine(30*time.Millisecond, true) 259 | 260 | done1 := make(chan struct{}) 261 | done2 := make(chan struct{}) 262 | 263 | // Register both goroutines 264 | setup.poller.Register(g1, EventRead, time.Now().Add(100*time.Millisecond), setup.processors[0].ID(), done1) 265 | setup.poller.Register(g2, EventRead, time.Now().Add(100*time.Millisecond), setup.processors[0].ID(), done2) 266 | 267 | // Use WaitGroup to track completions 268 | var wg sync.WaitGroup 269 | wg.Add(2) 270 | 271 | // Handle completion for g1 272 | go func() { 273 | defer wg.Done() 274 | select { 275 | case <-done1: 276 | if g1.State() != core.GoroutineRunnable { 277 | t.Errorf("G1: Expected state Runnable, got %v", g1.State()) 278 | } 279 | case <-time.After(500 * time.Millisecond): 280 | t.Error("Timeout waiting for g1") 281 | } 282 | }() 283 | 284 | // Handle completion for g2 285 | go func() { 286 | defer wg.Done() 287 | select { 288 | case <-done2: 289 | if g2.State() != core.GoroutineRunnable { 290 | t.Errorf("G2: Expected state Runnable, got %v", g2.State()) 291 | } 292 | case <-time.After(500 * time.Millisecond): 293 | t.Error("Timeout waiting for g2") 294 | } 295 | }() 296 | 297 | // Wait for both completions with timeout 298 | done := make(chan struct{}) 299 | go func() { 300 | wg.Wait() 301 | close(done) 302 | }() 303 | 304 | select { 305 | case <-done: 306 | // Success case 307 | case <-time.After(1 * time.Second): 308 | t.Fatal("Test timed out waiting for goroutine completion") 309 | } 310 | 311 | // Give some time for cleanup 312 | time.Sleep(50 * time.Millisecond) 313 | 314 | // Verify cleanup 315 | if len(setup.poller.events) != 0 { 316 | t.Errorf("Expected all events to be cleared, found %d remaining", 317 | len(setup.poller.events)) 318 | } 319 | 320 | if len(setup.poller.blockedGoroutines) != 0 { 321 | t.Errorf("Expected no blocked goroutines, found %d remaining", 322 | len(setup.poller.blockedGoroutines)) 323 | } 324 | } 325 | 326 | func TestEventTypeString(t *testing.T) { 327 | tests := []struct { 328 | eventType EventType 329 | want string 330 | }{ 331 | {EventRead, "read"}, 332 | {EventWrite, "write"}, 333 | {EventTimeout, "timeout"}, 334 | {EventError, "error"}, 335 | {EventType(99), "unknown"}, 336 | } 337 | 338 | for _, tt := range tests { 339 | t.Run(tt.want, func(t *testing.T) { 340 | if got := tt.eventType.String(); got != tt.want { 341 | t.Errorf("EventType(%d).String() = %v, want %v", 342 | tt.eventType, got, tt.want) 343 | } 344 | }) 345 | } 346 | } 347 | -------------------------------------------------------------------------------- /internal/scheduler/scheduler.go: -------------------------------------------------------------------------------- 1 | package scheduler 2 | 3 | import ( 4 | "context" 5 | "math/rand" 6 | "sync" 7 | "sync/atomic" 8 | "time" 9 | "workstealing/internal/core" 10 | "workstealing/internal/poller" 11 | ) 12 | 13 | type SchedulerState int32 14 | 15 | const ( 16 | SchedulerStopped SchedulerState = iota 17 | SchedulerRunning 18 | SchedulerStopping 19 | ) 20 | 21 | // SchedulerStats holds runtime statistics 22 | type SchedulerStats struct { 23 | TasksScheduled uint64 24 | TasksCompleted uint64 25 | TotalSteals uint64 26 | GlobalQueueSteals uint64 27 | LocalQueueSteals uint64 28 | RunningTime time.Duration 29 | ProcessorMetrics []core.ProcessorStats 30 | GlobalQueueStats core.GlobalQueueStats 31 | PollerMetrics poller.PollerMetrics 32 | } 33 | 34 | // Scheduler manages task distribution and execution 35 | type Scheduler struct { 36 | state atomic.Int32 37 | globalQueue *core.GlobalQueue 38 | processors []*core.Processor 39 | networkPoller *poller.NetworkPoller 40 | 41 | // Task tracking 42 | activeTasksCount atomic.Int32 43 | blockingTasks sync.Map // tracks blocking tasks by ID 44 | 45 | metrics struct { 46 | tasksScheduled atomic.Uint64 47 | tasksCompleted atomic.Uint64 48 | totalSteals atomic.Uint64 49 | globalQueueSteals atomic.Uint64 50 | localQueueSteals atomic.Uint64 51 | startTime time.Time 52 | } 53 | 54 | mu sync.RWMutex 55 | wg sync.WaitGroup 56 | ctx context.Context 57 | cancel context.CancelFunc 58 | rand *rand.Rand 59 | } 60 | 61 | func NewScheduler(numProcessors int, globalQueueSize int32) *Scheduler { 62 | if numProcessors <= 0 { 63 | numProcessors = 1 64 | } 65 | 66 | ctx, cancel := context.WithCancel(context.Background()) 67 | 68 | globalQueue := core.NewGlobalQueue(globalQueueSize) 69 | if globalQueue == nil { 70 | cancel() 71 | return nil 72 | } 73 | 74 | s := &Scheduler{ 75 | globalQueue: globalQueue, 76 | processors: make([]*core.Processor, numProcessors), 77 | ctx: ctx, 78 | cancel: cancel, 79 | rand: rand.New(rand.NewSource(time.Now().UnixNano())), 80 | } 81 | 82 | // Initialize processors 83 | for i := 0; i < numProcessors; i++ { 84 | processor := core.NewProcessor(uint32(i), 100, globalQueue) 85 | if processor == nil { 86 | cancel() 87 | return nil 88 | } 89 | s.processors[i] = processor 90 | } 91 | 92 | // Set up processor relationships 93 | for _, p := range s.processors { 94 | p.SetProcessors(s.processors) 95 | } 96 | 97 | // Initialize network poller 98 | s.networkPoller = poller.NewNetworkPoller(s.processors) 99 | if s.networkPoller == nil { 100 | cancel() 101 | return nil 102 | } 103 | 104 | s.state.Store(int32(SchedulerStopped)) 105 | return s 106 | } 107 | 108 | func (s *Scheduler) Start() error { 109 | if !s.state.CompareAndSwap(int32(SchedulerStopped), int32(SchedulerRunning)) { 110 | return nil 111 | } 112 | 113 | s.metrics.startTime = time.Now() 114 | s.networkPoller.Start() 115 | 116 | for i := range s.processors { 117 | s.wg.Add(1) 118 | go s.runProcessor(s.processors[i]) 119 | } 120 | 121 | return nil 122 | } 123 | 124 | func (s *Scheduler) Stop() { 125 | if !s.state.CompareAndSwap(int32(SchedulerRunning), int32(SchedulerStopping)) { 126 | return 127 | } 128 | 129 | s.cancel() 130 | s.networkPoller.Stop() 131 | s.wg.Wait() 132 | s.state.Store(int32(SchedulerStopped)) 133 | } 134 | 135 | func (s *Scheduler) Submit(g *core.Goroutine) bool { 136 | if s.state.Load() != int32(SchedulerRunning) || g == nil { 137 | return false 138 | } 139 | 140 | // Track task 141 | s.activeTasksCount.Add(1) 142 | s.metrics.tasksScheduled.Add(1) 143 | 144 | // Try direct processor assignment (80% probability) 145 | if s.rand.Float64() < 0.8 { 146 | processor := s.processors[s.rand.Intn(len(s.processors))] 147 | if processor.Push(g) { 148 | return true 149 | } 150 | } 151 | 152 | // Fall back to global queue 153 | return s.globalQueue.Submit(g) 154 | } 155 | 156 | func (s *Scheduler) runProcessor(p *core.Processor) { 157 | defer s.wg.Done() 158 | 159 | for { 160 | select { 161 | case <-s.ctx.Done(): 162 | return 163 | default: 164 | if g := p.FindWork(); g != nil { 165 | s.processTask(p, g) 166 | } else { 167 | time.Sleep(time.Millisecond) 168 | } 169 | } 170 | } 171 | } 172 | 173 | func (s *Scheduler) processTask(p *core.Processor, g *core.Goroutine) { 174 | // Track steal metrics first 175 | if g.Source() == core.SourceStolen { 176 | s.metrics.totalSteals.Add(1) 177 | s.metrics.localQueueSteals.Add(1) 178 | } else if g.Source() == core.SourceGlobalQueue { 179 | s.metrics.globalQueueSteals.Add(1) 180 | } 181 | 182 | if g.IsBlocking() { 183 | // Check if task is already being handled 184 | if _, exists := s.blockingTasks.LoadOrStore(g.ID(), true); !exists { 185 | // Register with poller only if not already registered 186 | deadline := time.Now().Add(g.Workload()) 187 | done := make(chan struct{}, 1) 188 | s.networkPoller.Register(g, poller.EventRead, deadline, p.ID(), done) 189 | 190 | select { 191 | case <-done: 192 | s.blockingTasks.Delete(g.ID()) 193 | // p.Execute(g) 194 | // s.metrics.tasksCompleted.Add(1) 195 | if g.State() == core.GoroutineRunnable { 196 | p.Execute(g) 197 | s.metrics.tasksCompleted.Add(1) 198 | s.activeTasksCount.Add(-1) 199 | } 200 | case <-s.ctx.Done(): 201 | return 202 | } 203 | } 204 | } else { 205 | // Execute non-blocking task 206 | p.Execute(g) 207 | s.metrics.tasksCompleted.Add(1) 208 | s.activeTasksCount.Add(-1) 209 | } 210 | } 211 | 212 | func (s *Scheduler) GetStats() SchedulerStats { 213 | s.mu.RLock() 214 | defer s.mu.RUnlock() 215 | 216 | stats := SchedulerStats{ 217 | TasksScheduled: s.metrics.tasksScheduled.Load(), 218 | TasksCompleted: s.metrics.tasksCompleted.Load(), 219 | TotalSteals: s.metrics.totalSteals.Load(), 220 | GlobalQueueSteals: s.metrics.globalQueueSteals.Load(), 221 | LocalQueueSteals: s.metrics.localQueueSteals.Load(), 222 | RunningTime: time.Since(s.metrics.startTime), 223 | ProcessorMetrics: make([]core.ProcessorStats, len(s.processors)), 224 | GlobalQueueStats: s.globalQueue.Stats(), 225 | PollerMetrics: s.networkPoller.GetMetrics(), 226 | } 227 | 228 | for i, p := range s.processors { 229 | stats.ProcessorMetrics[i] = p.GetStats() 230 | } 231 | 232 | return stats 233 | } 234 | 235 | func (s *Scheduler) GetProcessors() []*core.Processor { 236 | s.mu.RLock() 237 | defer s.mu.RUnlock() 238 | return s.processors 239 | } 240 | 241 | func (s *Scheduler) State() SchedulerState { 242 | return SchedulerState(s.state.Load()) 243 | } 244 | 245 | func (s SchedulerState) String() string { 246 | switch s { 247 | case SchedulerStopped: 248 | return "stopped" 249 | case SchedulerRunning: 250 | return "running" 251 | case SchedulerStopping: 252 | return "stopping" 253 | default: 254 | return "unknown" 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /internal/scheduler/scheduler_test.go: -------------------------------------------------------------------------------- 1 | package scheduler 2 | 3 | import ( 4 | "math/rand" 5 | "sync" 6 | "sync/atomic" 7 | "testing" 8 | "time" 9 | "workstealing/internal/core" 10 | ) 11 | 12 | type testSetup struct { 13 | scheduler *Scheduler 14 | cleanup func() 15 | } 16 | 17 | func newTestSetup(t *testing.T, numProcessors int) *testSetup { 18 | s := NewScheduler(numProcessors, 1000) 19 | if s == nil { 20 | t.Fatal("Failed to create scheduler") 21 | } 22 | 23 | return &testSetup{ 24 | scheduler: s, 25 | cleanup: func() { 26 | if s.State() == SchedulerRunning { 27 | s.Stop() 28 | } 29 | }, 30 | } 31 | } 32 | 33 | func waitForTaskCompletion(t *testing.T, g *core.Goroutine, timeout time.Duration) bool { 34 | t.Helper() 35 | deadline := time.After(timeout) 36 | ticker := time.NewTicker(10 * time.Millisecond) 37 | defer ticker.Stop() 38 | 39 | for { 40 | select { 41 | case <-deadline: 42 | t.Logf("Task %d timed out in state: %v", g.ID(), g.State()) 43 | return false 44 | case <-ticker.C: 45 | if g.State() == core.GoroutineRunnable { 46 | return true 47 | } 48 | } 49 | } 50 | } 51 | 52 | // Helper function for waiting for multiple tasks 53 | func waitForTasks(t *testing.T, tasks []*core.Goroutine, timeout time.Duration) bool { 54 | t.Helper() 55 | deadline := time.After(timeout) 56 | ticker := time.NewTicker(10 * time.Millisecond) 57 | defer ticker.Stop() 58 | 59 | for { 60 | select { 61 | case <-deadline: 62 | for _, g := range tasks { 63 | t.Logf("Task %d state: %v", g.ID(), g.State()) 64 | } 65 | return false 66 | case <-ticker.C: 67 | completed := 0 68 | for _, g := range tasks { 69 | state := g.State() 70 | if state == core.GoroutineFinished { 71 | completed++ 72 | } 73 | } 74 | if completed == len(tasks) { 75 | return true 76 | } 77 | t.Logf("Progress: %d/%d tasks completed", completed, len(tasks)) 78 | } 79 | } 80 | } 81 | 82 | func TestSchedulerCreation(t *testing.T) { 83 | tests := []struct { 84 | name string 85 | numProcessors int 86 | queueSize int32 87 | wantNil bool 88 | }{ 89 | { 90 | name: "Valid creation", 91 | numProcessors: 4, 92 | queueSize: 1000, 93 | wantNil: false, 94 | }, 95 | { 96 | name: "Zero processors defaults to 1", 97 | numProcessors: 0, 98 | queueSize: 1000, 99 | wantNil: false, 100 | }, 101 | { 102 | name: "Negative processors defaults to 1", 103 | numProcessors: -1, 104 | queueSize: 0, 105 | wantNil: false, 106 | }, 107 | } 108 | 109 | for _, tt := range tests { 110 | t.Run(tt.name, func(t *testing.T) { 111 | s := NewScheduler(tt.numProcessors, tt.queueSize) 112 | if (s == nil) != tt.wantNil { 113 | t.Errorf("NewScheduler() nil = %v, want %v", s == nil, tt.wantNil) 114 | } 115 | 116 | if s != nil { 117 | expectedProcessors := tt.numProcessors 118 | if expectedProcessors <= 0 { 119 | expectedProcessors = 1 120 | } 121 | if len(s.processors) != expectedProcessors { 122 | t.Errorf("Expected %d processors, got %d", 123 | expectedProcessors, len(s.processors)) 124 | } 125 | } 126 | }) 127 | } 128 | } 129 | 130 | func TestSchedulerStartStop(t *testing.T) { 131 | setup := newTestSetup(t, 2) 132 | defer setup.cleanup() 133 | 134 | if err := setup.scheduler.Start(); err != nil { 135 | t.Errorf("Start() error = %v", err) 136 | } 137 | 138 | if setup.scheduler.State() != SchedulerRunning { 139 | t.Errorf("After Start(): state = %v, want %v", 140 | setup.scheduler.State(), SchedulerRunning) 141 | } 142 | 143 | // Test double Start 144 | if err := setup.scheduler.Start(); err != nil { 145 | t.Error("Second Start() should not return error") 146 | } 147 | 148 | setup.scheduler.Stop() 149 | if setup.scheduler.State() != SchedulerStopped { 150 | t.Errorf("After Stop(): state = %v, want %v", 151 | setup.scheduler.State(), SchedulerStopped) 152 | } 153 | 154 | // Test double Stop 155 | setup.scheduler.Stop() // Should not panic 156 | } 157 | 158 | func TestSchedulerTaskSubmission(t *testing.T) { 159 | setup := newTestSetup(t, 2) 160 | defer setup.cleanup() 161 | 162 | if err := setup.scheduler.Start(); err != nil { 163 | t.Fatal(err) 164 | } 165 | 166 | t.Run("Non-blocking Task", func(t *testing.T) { 167 | g := core.NewGoroutine(10*time.Millisecond, false) 168 | if !setup.scheduler.Submit(g) { 169 | t.Error("Submit should succeed") 170 | } 171 | 172 | // Wait longer for task completion 173 | deadline := time.After(100 * time.Millisecond) 174 | ticker := time.NewTicker(5 * time.Millisecond) 175 | defer ticker.Stop() 176 | 177 | for { 178 | select { 179 | case <-deadline: 180 | t.Errorf("Non-blocking task did not complete in time, state: %v", g.State()) 181 | return 182 | case <-ticker.C: 183 | state := g.State() 184 | if state == core.GoroutineFinished || state == core.GoroutineRunnable { 185 | return // Success 186 | } 187 | } 188 | } 189 | }) 190 | 191 | t.Run("Blocking Task", func(t *testing.T) { 192 | g := core.NewGoroutine(20*time.Millisecond, true) 193 | if !setup.scheduler.Submit(g) { 194 | t.Error("Submit should succeed") 195 | } 196 | 197 | deadline := time.After(500 * time.Millisecond) 198 | ticker := time.NewTicker(5 * time.Millisecond) 199 | defer ticker.Stop() 200 | 201 | for { 202 | select { 203 | case <-deadline: 204 | t.Errorf("Blocking task did not complete in time, state: %v", g.State()) 205 | return 206 | case <-ticker.C: 207 | state := g.State() 208 | if state == core.GoroutineFinished { 209 | return // Success 210 | } 211 | t.Logf("Current state: %v", state) 212 | } 213 | } 214 | }) 215 | 216 | t.Run("Nil Task", func(t *testing.T) { 217 | if setup.scheduler.Submit(nil) { 218 | t.Error("Nil submission should fail") 219 | } 220 | }) 221 | } 222 | 223 | // Add new test for load distribution 224 | func TestSchedulerLoadDistribution(t *testing.T) { 225 | setup := newTestSetup(t, 4) 226 | defer setup.cleanup() 227 | 228 | if err := setup.scheduler.Start(); err != nil { 229 | t.Fatal(err) 230 | } 231 | 232 | const tasksPerProcessor = 10 233 | totalTasks := len(setup.scheduler.processors) * tasksPerProcessor 234 | tasks := make([]*core.Goroutine, totalTasks) 235 | 236 | // Submit tasks with varying durations 237 | for i := 0; i < totalTasks; i++ { 238 | duration := time.Duration(20+i*5) * time.Millisecond 239 | tasks[i] = core.NewGoroutine(duration, i%2 == 0) 240 | if !setup.scheduler.Submit(tasks[i]) { 241 | t.Fatalf("Failed to submit task %d", i) 242 | } 243 | } 244 | 245 | if !waitForTasks(t, tasks, 5*time.Second) { 246 | t.Fatal("Tasks did not complete in time") 247 | } 248 | 249 | // Verify load distribution 250 | stats := setup.scheduler.GetStats() 251 | // Verify load distribution across processors 252 | processorLoads := make([]uint64, len(setup.scheduler.processors)) 253 | for i := range setup.scheduler.processors { 254 | processorLoads[i] = stats.ProcessorMetrics[i].TasksExecuted 255 | } 256 | 257 | // Check for reasonable load distribution 258 | var minLoad, maxLoad uint64 259 | minLoad = ^uint64(0) // Set to max possible value 260 | maxLoad = 0 261 | 262 | for _, load := range processorLoads { 263 | if load < minLoad { 264 | minLoad = load 265 | } 266 | if load > maxLoad { 267 | maxLoad = load 268 | } 269 | } 270 | 271 | // Verify that load difference is not too extreme 272 | if minLoad < uint64(tasksPerProcessor/2) { 273 | t.Errorf("Some processors underutilized. Min load: %d, expected at least: %d", 274 | minLoad, tasksPerProcessor/2) 275 | } 276 | 277 | loadDiff := maxLoad - minLoad 278 | if loadDiff > uint64(tasksPerProcessor) { 279 | t.Errorf("Load imbalance too high. Difference between max and min: %d", loadDiff) 280 | } 281 | } 282 | 283 | // Add test for work stealing 284 | func TestSchedulerWorkStealing(t *testing.T) { 285 | setup := newTestSetup(t, 4) 286 | defer setup.cleanup() 287 | 288 | if err := setup.scheduler.Start(); err != nil { 289 | t.Fatal(err) 290 | } 291 | 292 | // Create imbalanced load 293 | longTasks := make([]*core.Goroutine, 5) 294 | shortTasks := make([]*core.Goroutine, 15) 295 | 296 | // Submit long tasks to first processor 297 | for i := range longTasks { 298 | longTasks[i] = core.NewGoroutine(100*time.Millisecond, true) 299 | setup.scheduler.processors[0].Push(longTasks[i]) // Direct push to processor 300 | } 301 | 302 | // Submit short tasks 303 | for i := range shortTasks { 304 | shortTasks[i] = core.NewGoroutine(20*time.Millisecond, false) 305 | setup.scheduler.Submit(shortTasks[i]) 306 | } 307 | 308 | allTasks := append(longTasks, shortTasks...) 309 | if !waitForTasks(t, allTasks, 5*time.Second) { 310 | t.Fatal("Work stealing didn't complete tasks in time") 311 | } 312 | } 313 | 314 | // Add test for processor state transitions 315 | func TestProcessorStateTransitions(t *testing.T) { 316 | setup := newTestSetup(t, 2) 317 | defer setup.cleanup() 318 | 319 | // Test transitions before start 320 | for i := range setup.scheduler.processors { 321 | if setup.scheduler.processors[i].State() != core.ProcessorIdle { // Changed from ProcessorIdle to core.ProcessorIdle 322 | t.Errorf("Processor %d should be idle before start", i) 323 | } 324 | } 325 | 326 | if err := setup.scheduler.Start(); err != nil { 327 | t.Fatal(err) 328 | } 329 | 330 | // Test transitions during operation 331 | task := core.NewGoroutine(50*time.Millisecond, true) 332 | if !setup.scheduler.Submit(task) { 333 | t.Fatal("Failed to submit task") 334 | } 335 | 336 | time.Sleep(50 * time.Millisecond) // Give more time for state transition 337 | 338 | // Check processor states multiple times 339 | for attempt := 0; attempt < 5; attempt++ { 340 | running := false 341 | for _, p := range setup.scheduler.processors { 342 | state := p.State() 343 | if state == core.ProcessorRunning { 344 | running = true 345 | break 346 | } 347 | t.Logf("Processor state: %v", state) 348 | } 349 | if running { 350 | return // Success 351 | } 352 | time.Sleep(10 * time.Millisecond) 353 | } 354 | t.Error("No processor transitioned to running state") 355 | } 356 | 357 | // Add new test for processor interaction 358 | func TestProcessorInteraction(t *testing.T) { 359 | setup := newTestSetup(t, 4) 360 | defer setup.cleanup() 361 | 362 | if err := setup.scheduler.Start(); err != nil { 363 | t.Fatal(err) 364 | } 365 | 366 | // Test processor-to-processor stealing 367 | t.Run("Processor Work Stealing", func(t *testing.T) { 368 | // Load one processor heavily 369 | heavyProc := setup.scheduler.processors[0] 370 | for i := 0; i < 10; i++ { 371 | g := core.NewGoroutine(50*time.Millisecond, false) 372 | heavyProc.Push(g) 373 | } 374 | 375 | // Submit some quick tasks to other processors 376 | for i := 1; i < len(setup.scheduler.processors); i++ { 377 | g := core.NewGoroutine(10*time.Millisecond, false) 378 | setup.scheduler.processors[i].Push(g) 379 | } 380 | 381 | // Allow time for work stealing to occur 382 | time.Sleep(100 * time.Millisecond) 383 | 384 | // Verify work distribution 385 | maxTasks := 0 386 | minTasks := int(^uint(0) >> 1) 387 | 388 | for _, p := range setup.scheduler.processors { 389 | tasks := p.QueueSize() 390 | if tasks > maxTasks { 391 | maxTasks = tasks 392 | } 393 | if tasks < minTasks { 394 | minTasks = tasks 395 | } 396 | } 397 | 398 | // Check if work was reasonably balanced 399 | if maxTasks-minTasks > 5 { 400 | t.Errorf("Work not balanced: max=%d, min=%d", maxTasks, minTasks) 401 | } 402 | }) 403 | } 404 | 405 | // Add test for blocking task handling 406 | func TestBlockingTaskHandling(t *testing.T) { 407 | setup := newTestSetup(t, 2) 408 | defer setup.cleanup() 409 | 410 | if err := setup.scheduler.Start(); err != nil { 411 | t.Fatal(err) 412 | } 413 | 414 | t.Run("Network Poller Integration", func(t *testing.T) { 415 | const numTasks = 5 416 | tasks := make([]*core.Goroutine, numTasks) 417 | 418 | // Submit blocking tasks 419 | for i := 0; i < numTasks; i++ { 420 | tasks[i] = core.NewGoroutine(30*time.Millisecond, true) 421 | if !setup.scheduler.Submit(tasks[i]) { 422 | t.Fatalf("Failed to submit blocking task %d", i) 423 | } 424 | } 425 | 426 | // Wait for poller to handle tasks 427 | time.Sleep(100 * time.Millisecond) 428 | 429 | // Verify poller metrics 430 | stats := setup.scheduler.GetStats() 431 | pollerMetrics := stats.PollerMetrics 432 | 433 | // Check both currently blocked and total events 434 | if pollerMetrics.CurrentlyBlocked == 0 && pollerMetrics.TotalEvents == 0 { 435 | t.Error("No tasks registered with poller") 436 | } 437 | 438 | // Increased timeout for completion 439 | if !waitForTasks(t, tasks, 5*time.Second) { 440 | var states []string 441 | for _, task := range tasks { 442 | states = append(states, task.State().String()) 443 | } 444 | t.Fatalf("Blocking tasks not completed in time. States: %v", states) 445 | } 446 | }) 447 | } 448 | 449 | // Add test for global queue operations 450 | func TestGlobalQueueOperations(t *testing.T) { 451 | setup := newTestSetup(t, 2) 452 | defer setup.cleanup() 453 | 454 | if err := setup.scheduler.Start(); err != nil { 455 | t.Fatal(err) 456 | } 457 | 458 | t.Run("Global Queue Overflow Prevention", func(t *testing.T) { 459 | submitted := 0 460 | rejected := 0 461 | 462 | // Try to overflow global queue 463 | for i := 0; i < 2000; i++ { 464 | g := core.NewGoroutine(10*time.Millisecond, false) 465 | if setup.scheduler.Submit(g) { 466 | submitted++ 467 | } else { 468 | rejected++ 469 | } 470 | } 471 | 472 | stats := setup.scheduler.GetStats() 473 | queueStats := stats.GlobalQueueStats 474 | 475 | if queueStats.Rejected == 0 { 476 | t.Error("Queue overflow prevention not working") 477 | } 478 | 479 | t.Logf("Submitted: %d, Rejected: %d, Queue Size: %d", 480 | submitted, rejected, queueStats.CurrentSize) 481 | }) 482 | } 483 | 484 | // Add stress test for scheduler 485 | func TestSchedulerStress(t *testing.T) { 486 | if testing.Short() { 487 | t.Skip("Skipping stress test in short mode") 488 | } 489 | 490 | setup := newTestSetup(t, 8) 491 | defer setup.cleanup() 492 | 493 | if err := setup.scheduler.Start(); err != nil { 494 | t.Fatal(err) 495 | } 496 | 497 | const ( 498 | numWorkers = 10 499 | tasksPerWorker = 100 500 | totalDuration = 5 * time.Second 501 | ) 502 | 503 | var ( 504 | wg sync.WaitGroup 505 | successCount atomic.Int32 506 | failureCount atomic.Int32 507 | ) 508 | 509 | deadline := time.After(totalDuration) 510 | start := time.Now() 511 | 512 | wg.Add(numWorkers) 513 | for i := 0; i < numWorkers; i++ { 514 | go func(workerID int) { 515 | defer wg.Done() 516 | for j := 0; j < tasksPerWorker; j++ { 517 | select { 518 | case <-deadline: 519 | return 520 | default: 521 | duration := time.Duration(rand.Intn(50)+10) * time.Millisecond 522 | g := core.NewGoroutine(duration, rand.Float32() < 0.3) 523 | if setup.scheduler.Submit(g) { 524 | successCount.Add(1) 525 | } else { 526 | failureCount.Add(1) 527 | } 528 | time.Sleep(time.Duration(rand.Intn(10)) * time.Millisecond) 529 | } 530 | } 531 | }(i) 532 | } 533 | 534 | wg.Wait() 535 | 536 | stats := setup.scheduler.GetStats() 537 | t.Logf("Stress test results (duration: %v):", time.Since(start)) 538 | t.Logf(" Tasks submitted: %d", successCount.Load()) 539 | t.Logf(" Tasks rejected: %d", failureCount.Load()) 540 | t.Logf(" Tasks completed: %d", stats.TasksCompleted) 541 | t.Logf(" Work stealing attempts: %d", stats.TotalSteals) 542 | } 543 | -------------------------------------------------------------------------------- /internal/visualization/colors.go: -------------------------------------------------------------------------------- 1 | package visualization 2 | 3 | import ( 4 | "workstealing/internal/core" 5 | 6 | ui "github.com/gizak/termui/v3" 7 | ) 8 | 9 | var ColorScheme = struct { 10 | // State colors 11 | ProcessorIdle ui.Color 12 | ProcessorRunning ui.Color 13 | ProcessorStealing ui.Color 14 | 15 | // Queue colors 16 | QueueLow ui.Color 17 | QueueMedium ui.Color 18 | QueueHigh ui.Color 19 | 20 | // UI elements 21 | HeaderText ui.Color 22 | Border ui.Color 23 | Text ui.Color 24 | GraphLine ui.Color 25 | GraphAxis ui.Color 26 | Success ui.Color 27 | Warning ui.Color 28 | Error ui.Color 29 | }{ 30 | ProcessorIdle: ui.ColorBlue, 31 | ProcessorRunning: ui.ColorGreen, 32 | ProcessorStealing: ui.ColorYellow, 33 | 34 | QueueLow: ui.ColorGreen, 35 | QueueMedium: ui.ColorYellow, 36 | QueueHigh: ui.ColorRed, 37 | 38 | HeaderText: ui.ColorCyan, 39 | Border: ui.ColorWhite, 40 | Text: ui.ColorWhite, 41 | GraphLine: ui.ColorGreen, 42 | GraphAxis: ui.ColorWhite, 43 | Success: ui.ColorGreen, 44 | Warning: ui.ColorYellow, 45 | Error: ui.ColorRed, 46 | } 47 | 48 | func getStateColor(state core.ProcessorState) ui.Color { 49 | switch state { 50 | case core.ProcessorIdle: 51 | return ColorScheme.ProcessorIdle 52 | case core.ProcessorRunning: 53 | return ColorScheme.ProcessorRunning 54 | case core.ProcessorStealing: 55 | return ColorScheme.ProcessorStealing 56 | default: 57 | return ColorScheme.Text 58 | } 59 | } 60 | 61 | func getQueueColor(utilization float64) ui.Color { 62 | switch { 63 | case utilization < 0.5: 64 | return ColorScheme.QueueLow 65 | case utilization < 0.8: 66 | return ColorScheme.QueueMedium 67 | default: 68 | return ColorScheme.QueueHigh 69 | } 70 | } 71 | 72 | func getLoadColor(load float64) ui.Color { 73 | switch { 74 | case load < 0.3: 75 | return ColorScheme.ProcessorIdle 76 | case load < 0.7: 77 | return ColorScheme.ProcessorRunning 78 | default: 79 | return ColorScheme.ProcessorStealing 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /internal/visualization/terminal.go: -------------------------------------------------------------------------------- 1 | package visualization 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "time" 7 | "workstealing/internal/scheduler" 8 | 9 | ui "github.com/gizak/termui/v3" 10 | "github.com/gizak/termui/v3/widgets" 11 | ) 12 | 13 | type TerminalVisualizer struct { 14 | scheduler *scheduler.Scheduler 15 | updateInterval time.Duration 16 | mu sync.RWMutex 17 | stop chan struct{} 18 | 19 | // UI Components 20 | header *widgets.Paragraph 21 | globalQueue *widgets.Gauge 22 | processorGrid *widgets.Table 23 | pollerStats *widgets.Paragraph 24 | eventLog *widgets.List 25 | 26 | // Event tracking 27 | events []string 28 | maxEvents int 29 | lastTaskCount uint64 30 | lastStealCount uint64 31 | } 32 | 33 | func NewTerminalVisualizer(s *scheduler.Scheduler, interval time.Duration) (*TerminalVisualizer, error) { 34 | if err := ui.Init(); err != nil { 35 | return nil, fmt.Errorf("failed to initialize UI: %v", err) 36 | } 37 | 38 | tv := &TerminalVisualizer{ 39 | scheduler: s, 40 | updateInterval: interval, 41 | stop: make(chan struct{}), 42 | maxEvents: 20, 43 | events: make([]string, 0, 20), 44 | } 45 | 46 | if err := tv.initComponents(); err != nil { 47 | ui.Close() 48 | return nil, err 49 | } 50 | 51 | return tv, nil 52 | } 53 | 54 | func (tv *TerminalVisualizer) initComponents() error { 55 | termWidth, termHeight := ui.TerminalDimensions() 56 | 57 | // Header with system info 58 | tv.header = widgets.NewParagraph() 59 | tv.header.Title = "Work Stealing Scheduler Monitor" 60 | tv.header.SetRect(0, 0, termWidth, 3) 61 | tv.header.BorderStyle = ui.NewStyle(ColorScheme.HeaderText) 62 | 63 | // Global Queue gauge 64 | tv.globalQueue = widgets.NewGauge() 65 | tv.globalQueue.Title = "Global Queue" 66 | tv.globalQueue.SetRect(0, 3, termWidth, 6) 67 | tv.globalQueue.BarColor = ColorScheme.QueueLow 68 | tv.globalQueue.BorderStyle = ui.NewStyle(ColorScheme.Border) 69 | 70 | // Processor status grid 71 | tv.processorGrid = widgets.NewTable() 72 | tv.processorGrid.Title = "Processors" 73 | tv.processorGrid.SetRect(0, 6, termWidth, 15) 74 | tv.processorGrid.BorderStyle = ui.NewStyle(ColorScheme.Border) 75 | tv.processorGrid.RowSeparator = true 76 | tv.processorGrid.FillRow = true 77 | tv.processorGrid.Rows = [][]string{ 78 | { 79 | "ID", 80 | "State", 81 | "Current Task", 82 | "Queue Size", 83 | "Tasks Done", 84 | "Steals (L/G)", 85 | "Idle Time", 86 | }, 87 | } 88 | 89 | // Network Poller stats 90 | tv.pollerStats = widgets.NewParagraph() 91 | tv.pollerStats.Title = "Network Poller" 92 | tv.pollerStats.SetRect(0, 15, termWidth, 19) 93 | tv.pollerStats.BorderStyle = ui.NewStyle(ColorScheme.Border) 94 | 95 | // Event log 96 | tv.eventLog = widgets.NewList() 97 | tv.eventLog.Title = "Event Log" 98 | tv.eventLog.SetRect(0, 19, termWidth, termHeight-1) 99 | tv.eventLog.BorderStyle = ui.NewStyle(ColorScheme.Border) 100 | 101 | return nil 102 | } 103 | 104 | func (tv *TerminalVisualizer) Start() { 105 | go tv.updateLoop() 106 | 107 | uiEvents := ui.PollEvents() 108 | for { 109 | select { 110 | case e := <-uiEvents: 111 | switch e.ID { 112 | case "q", "": 113 | tv.Stop() 114 | return 115 | case "": 116 | payload := e.Payload.(ui.Resize) 117 | tv.handleResize(payload.Width, payload.Height) 118 | } 119 | case <-tv.stop: 120 | return 121 | } 122 | } 123 | } 124 | 125 | func (tv *TerminalVisualizer) updateLoop() { 126 | ticker := time.NewTicker(tv.updateInterval) 127 | defer ticker.Stop() 128 | 129 | for { 130 | select { 131 | case <-ticker.C: 132 | tv.update() 133 | case <-tv.stop: 134 | return 135 | } 136 | } 137 | } 138 | 139 | func (tv *TerminalVisualizer) update() { 140 | tv.mu.Lock() 141 | defer tv.mu.Unlock() 142 | 143 | stats := tv.scheduler.GetStats() 144 | 145 | // Update components 146 | tv.updateHeader(stats) 147 | tv.updateGlobalQueue(stats) 148 | tv.updateProcessorGrid(stats) 149 | tv.updatePollerStats(stats) 150 | 151 | // Render all components 152 | tv.render() 153 | } 154 | 155 | func (tv *TerminalVisualizer) updateHeader(stats scheduler.SchedulerStats) { 156 | runTime := stats.RunningTime.Round(time.Second) 157 | taskRate := float64(stats.TasksCompleted-tv.lastTaskCount) / tv.updateInterval.Seconds() 158 | stealRate := float64(stats.TotalSteals-tv.lastStealCount) / tv.updateInterval.Seconds() 159 | 160 | tv.header.Text = fmt.Sprintf( 161 | " Runtime: %v | Tasks: %d/%d | Rate: %.1f/s | Steals: %.1f/s", 162 | runTime, 163 | stats.TasksCompleted, 164 | stats.TasksScheduled, 165 | taskRate, 166 | stealRate, 167 | ) 168 | 169 | tv.lastTaskCount = stats.TasksCompleted 170 | tv.lastStealCount = stats.TotalSteals 171 | } 172 | 173 | func (tv *TerminalVisualizer) updateGlobalQueue(stats scheduler.SchedulerStats) { 174 | gq := stats.GlobalQueueStats 175 | percent := int(gq.Utilization * 100) 176 | tv.globalQueue.Percent = percent 177 | tv.globalQueue.Label = fmt.Sprintf("%d/%d (%.1f%%) | Submitted: %d | Rejected: %d", 178 | gq.CurrentSize, gq.Capacity, gq.Utilization*100, gq.Submitted, gq.Rejected) 179 | tv.globalQueue.BarColor = getQueueColor(gq.Utilization) 180 | } 181 | 182 | func (tv *TerminalVisualizer) updateProcessorGrid(stats scheduler.SchedulerStats) { 183 | rows := make([][]string, len(stats.ProcessorMetrics)+1) 184 | rows[0] = tv.processorGrid.Rows[0] // Keep header 185 | 186 | for i, p := range stats.ProcessorMetrics { 187 | currentTask := "Idle" 188 | if p.CurrentTask != nil { 189 | currentTask = fmt.Sprintf("G%d (%s)", 190 | p.CurrentTask.ID(), formatDuration(p.CurrentTask.ExecutionTime())) 191 | } 192 | 193 | rows[i+1] = []string{ 194 | fmt.Sprintf("P%d", p.ID), 195 | p.State.String(), 196 | currentTask, 197 | fmt.Sprintf("%d", p.QueueSize), 198 | fmt.Sprintf("%d", p.TasksExecuted), 199 | fmt.Sprintf("%d/%d", p.LocalSteals, p.GlobalSteals), 200 | formatDuration(p.IdleTime), 201 | } 202 | } 203 | 204 | tv.processorGrid.Rows = rows 205 | } 206 | func (tv *TerminalVisualizer) updatePollerStats(stats scheduler.SchedulerStats) { 207 | pm := stats.PollerMetrics 208 | tv.pollerStats.Text = fmt.Sprintf( 209 | "Currently Blocked: %d | Completed: %d | Timeouts: %d | Errors: %d\n"+ 210 | "Average Block Time: %v | Active Events: %d", 211 | pm.CurrentlyBlocked, 212 | pm.CompletedEvents, 213 | pm.Timeouts, 214 | pm.Errors, 215 | pm.AverageBlockTime.Round(time.Millisecond), 216 | pm.ActiveEvents, 217 | ) 218 | } 219 | 220 | func (tv *TerminalVisualizer) AddEvent(msg string) { 221 | tv.mu.Lock() 222 | defer tv.mu.Unlock() 223 | 224 | timestamp := time.Now().Format("15:04:05") 225 | event := fmt.Sprintf("[%s] %s", timestamp, msg) 226 | 227 | tv.events = append([]string{event}, tv.events...) 228 | if len(tv.events) > tv.maxEvents { 229 | tv.events = tv.events[:tv.maxEvents] 230 | } 231 | 232 | tv.eventLog.Rows = tv.events 233 | } 234 | 235 | func (tv *TerminalVisualizer) Stop() { 236 | close(tv.stop) 237 | ui.Close() 238 | } 239 | 240 | func (tv *TerminalVisualizer) render() { 241 | ui.Render( 242 | tv.header, 243 | tv.globalQueue, 244 | tv.processorGrid, 245 | tv.pollerStats, 246 | tv.eventLog, 247 | ) 248 | } 249 | 250 | func (tv *TerminalVisualizer) handleResize(width, height int) { 251 | tv.mu.Lock() 252 | defer tv.mu.Unlock() 253 | 254 | tv.header.SetRect(0, 0, width, 3) 255 | tv.globalQueue.SetRect(0, 3, width, 6) 256 | tv.processorGrid.SetRect(0, 6, width, 15) 257 | tv.pollerStats.SetRect(0, 15, width, 19) 258 | tv.eventLog.SetRect(0, 19, width, height-1) 259 | 260 | tv.render() 261 | } 262 | 263 | func formatDuration(d time.Duration) string { 264 | if d < time.Second { 265 | return fmt.Sprintf("%dms", d.Milliseconds()) 266 | } 267 | return fmt.Sprintf("%.1fs", d.Seconds()) 268 | } 269 | 270 | func colorize(text string, color ui.Color) string { 271 | return fmt.Sprintf("[%s](fg:%v)", text, color) 272 | } 273 | --------------------------------------------------------------------------------