├── .gitignore ├── JobSystem.sln ├── JobSystem.vcxproj ├── JobSystem.vcxproj.filters ├── JobSystem.vcxproj.user ├── README.md └── src ├── JobSystem.cpp ├── JobSystem.h ├── RingBuffer.h ├── SpinLock.h ├── main.cpp └── types.h /.gitignore: -------------------------------------------------------------------------------- 1 | #Visual Studio stuff 2 | x64/* 3 | .vs/* 4 | Debug/* 5 | Release/* -------------------------------------------------------------------------------- /JobSystem.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.29001.49 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "JobSystem", "JobSystem.vcxproj", "{E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Debug|x64.ActiveCfg = Debug|x64 17 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Debug|x64.Build.0 = Debug|x64 18 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Debug|x86.ActiveCfg = Debug|Win32 19 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Debug|x86.Build.0 = Debug|Win32 20 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Release|x64.ActiveCfg = Release|x64 21 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Release|x64.Build.0 = Release|x64 22 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Release|x86.ActiveCfg = Release|Win32 23 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {60A78AC9-F9F5-4348-8D09-D512DE0924C2} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /JobSystem.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 16.0 23 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1} 24 | JobSystem 25 | 10.0.17763.0 26 | 27 | 28 | 29 | Application 30 | true 31 | v141 32 | MultiByte 33 | 34 | 35 | Application 36 | false 37 | v141 38 | true 39 | MultiByte 40 | 41 | 42 | Application 43 | true 44 | v141 45 | MultiByte 46 | 47 | 48 | Application 49 | false 50 | v141 51 | true 52 | MultiByte 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | Level3 76 | Disabled 77 | true 78 | true 79 | stdcpp17 80 | /std:c++17 /GT %(AdditionalOptions) 81 | 82 | 83 | Console 84 | 85 | 86 | 87 | 88 | Level3 89 | Disabled 90 | true 91 | true 92 | stdcpp17 93 | /std:c++17 /GT %(AdditionalOptions) 94 | 95 | 96 | Console 97 | 98 | 99 | 100 | 101 | Level3 102 | MaxSpeed 103 | true 104 | true 105 | true 106 | true 107 | stdcpp17 108 | /std:c++17 /GT %(AdditionalOptions) 109 | 110 | 111 | Console 112 | true 113 | true 114 | 115 | 116 | 117 | 118 | Level3 119 | MaxSpeed 120 | true 121 | true 122 | true 123 | true 124 | stdcpp17 125 | /std:c++17 /GT %(AdditionalOptions) 126 | 127 | 128 | Console 129 | true 130 | true 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /JobSystem.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | Header Files 31 | 32 | 33 | Header Files 34 | 35 | 36 | Header Files 37 | 38 | 39 | -------------------------------------------------------------------------------- /JobSystem.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Fiber based Job System 3 | 4 | Fiber based job system, with additional ability for fiber->thread communication. 5 | Based on: 6 | Christian Gyrling's GDC talk: [Parallelizing the Naughty Dog Engine Using Fibers](https://www.gdcvault.com/play/1022186/Parallelizing-the-Naughty-Dog-Engine) 7 | [Game Engine Architecture 3rd Edition](https://www.gameenginebook.com/) 8 | 9 | Implementation is for Windows only. 10 | Besides std::optional from C++17, it should compile with C++11. 11 | 12 | 13 | ## API: 14 | ``` 15 | 16 | class JobSystem { 17 | public: 18 | // entry point for each job 19 | using EntryPoint = void(void* param); 20 | 21 | // jobs' priority 22 | enum class Priority { 23 | LOW = 0, NORMAL = 1, HIGH = 2 24 | }; 25 | 26 | // counter used for synchronizng jobs 27 | class Counter { 28 | explicit Counter(I32 counter); 29 | }; 30 | 31 | // declaration of each job 32 | struct Declaration { 33 | EntryPoint* m_pEntryPoint = nullptr; 34 | void* m_param = nullptr; 35 | Priority m_priority = Priority::LOW; 36 | JobSystem::Counter* m_pCounter = nullptr; 37 | }; 38 | 39 | // kick jobs 40 | void KickJobs(int count, const Declaration aDecl[]); 41 | void KickJob(const Declaration& decl); 42 | 43 | // wait for counter to become 0 44 | void WaitForCounter(Counter* pCounter); 45 | 46 | // kick jobs and wait for completion 47 | void KickJobsAndWait(int count, Declaration aDecl[]); 48 | void KickJobAndWait(Declaration& decl); 49 | 50 | // for easy control of initialization and shut down order 51 | void Initialize(U32 numberOfThreads); 52 | void JoinAndTerminate(); 53 | }; 54 | 55 | ``` 56 | 57 | ### Example usage 58 | ``` 59 | #include // std::cout 60 | #include // std::string 61 | 62 | #include "JobSystem.h" // JobSystem 63 | 64 | JobSystem g_jobSystem; 65 | 66 | void TheMostCreativeWayToCalculateFibonacci(void* pNumberVoid) { 67 | int* pNumber = reinterpret_cast(pNumberVoid); 68 | int n = *pNumber; 69 | if (n > 1) { 70 | int fibNMinus1 = n-1; 71 | int fibNMinus2 = n-2; 72 | JobSystem::Declaration adecl[2]; 73 | adecl[0].m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci; 74 | adecl[0].m_param = &fibNMinus1; 75 | adecl[1].m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci; 76 | adecl[1].m_param = &fibNMinus2; 77 | 78 | g_jobSystem.KickJobsAndWait(2, adecl); 79 | n = fibNMinus1 + fibNMinus2; 80 | *pNumber = n; 81 | } 82 | } 83 | 84 | int main() { 85 | const int numberOfThreads = std::thread::hardware_concurrency(); 86 | g_jobSystem.Initialize(numberOfThreads); 87 | 88 | for (int i = 0; i < 2; i++) { 89 | int n = 10+i; 90 | printf("Fibonacci(%i)=", n); 91 | JobSystem::Declaration decl; 92 | decl.m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci; 93 | decl.m_param = &n; 94 | g_jobSystem.KickJobAndWait(decl); 95 | printf("%i\nSleeping for 10s...\n", n); 96 | std::this_thread::sleep_for(std::chrono::seconds(10)); // to show that workers go to sleep 97 | } 98 | 99 | printf("Closing!\n"); 100 | g_jobSystem.JoinAndTerminate(); 101 | } 102 | 103 | ``` 104 | Higher numbers with kicking two jobs at the time will make you run out of fibers, so increase sizes of queues, fiber pool and wait list appropriately. 105 | 106 | ### Differences between GDC talk 107 | All fibers have the same stack space, but different sizes can be easily implemented. 108 | No adaptive mutexes were implemented, only basic spin lock. 109 | No visualization method was implemented. 110 | 111 | ### How to make it multiplatform 112 | Provide equivalents for things listed below and you are good to go. 113 | #### Fibers related: 114 | ::CreateFiber, ::SwitchToFiber, ::ConvertThreadToFiber 115 | #### Setting affinity for worker threads: 116 | SetThreadAffinityMask 117 | #### TLS access 118 | /GT compiler flag, or use workaround mentioned in GDC talk 119 | ##### Sidenote: 120 | G++ emits NOP instruction with _mm_pause() instead of desired pause instruction, so you probably would want to use inline assembly instead when working with that compiler. -------------------------------------------------------------------------------- /src/JobSystem.cpp: -------------------------------------------------------------------------------- 1 | #include "JobSystem.h" 2 | 3 | thread_local U32 tl_workerThreadId = -1; 4 | thread_local PFiber tl_pCurrentFiber = nullptr; 5 | thread_local PFiber tl_pFiberToBeAddedToPoolAfterSwitch = nullptr; 6 | thread_local StatefullFiber* tl_pStatefullFiberToBeUnlockedAfterSwitch = nullptr; 7 | 8 | // the only point of this class is because counter can get decremented to 0 after waiting fiber is added to wait list, but before that waiting fiber switched to another 9 | // (pulled from fiber pool), so we need this extra lock that we take in WaitForCounter, and release in WorkerMainLoop after switch is performed, so we can go back to trully awaiting fiber 10 | class StatefullFiber { 11 | public: 12 | explicit StatefullFiber(LPVOID pFiber) : m_pFiber(pFiber) {} 13 | PFiber GetRawFiber() { return m_pFiber; } 14 | private: 15 | friend void WorkerMainLoop(void*); // release the lock in main loop after switch was performed 16 | friend void JobWrapper(JobSystem::Declaration declaration, JobSystem& rJobSystem); // GetRawFiber 17 | friend class JobSystem; // whole class, because I can't make friend with private function from JobSystem, if this class isn't nested in JobSystem 18 | PFiber m_pFiber; 19 | SpinLock m_lock; 20 | }; 21 | 22 | 23 | 24 | 25 | void WorkerMainLoop(void* job_system) { 26 | JobSystem & jobSystem = *reinterpret_cast(job_system); 27 | while (jobSystem.m_keepWorking) { 28 | if (tl_pStatefullFiberToBeUnlockedAfterSwitch != nullptr) { 29 | tl_pStatefullFiberToBeUnlockedAfterSwitch->m_lock.unlock(); 30 | tl_pStatefullFiberToBeUnlockedAfterSwitch = nullptr; 31 | } 32 | std::optional decl; 33 | for (int i = 0; i < 100; i++) { 34 | decl = jobSystem.PullJob(); 35 | if (decl) 36 | break; 37 | else 38 | _mm_pause(); 39 | } 40 | 41 | if (decl) { 42 | JobWrapper(decl.value(), jobSystem); 43 | } 44 | else { 45 | std::unique_lock lock(jobSystem.m_workersMainLoopMutex); 46 | jobSystem.m_workersMainLoopConditionVariable.wait(lock, [&jobSystem, &decl] { decl = jobSystem.PullJob(); return decl != std::nullopt || !jobSystem.m_keepWorking; }); 47 | 48 | if (jobSystem.m_keepWorking) { 49 | assert(decl); 50 | lock.unlock(); 51 | JobWrapper(decl.value(), jobSystem); 52 | } 53 | } 54 | } 55 | } 56 | 57 | void JobWrapper(JobSystem::Declaration declaration, JobSystem& rJobSystem) { 58 | 59 | // execute job 60 | declaration.m_pEntryPoint(declaration.m_param); 61 | 62 | // shorthand 63 | JobSystem::Counter* pCounter = declaration.m_pCounter; 64 | 65 | // if no associated Counter, we're done 66 | if (pCounter == nullptr) 67 | return; 68 | 69 | const bool wasCounterAddedToWaitList = !pCounter->m_signalAfterCompletion; 70 | 71 | I32 newCounterValue; 72 | { 73 | if (pCounter->m_signalAfterCompletion) { 74 | // notify needs to be under same lock as decrement because: 75 | // 1. multiple workers can finish job at the same, decrement counter and get timesliced before they check if counter reached 0 76 | // 2. one of them check that it's 0 and notify 77 | // 3. the guy notified resumes and free Counter's memory 78 | // 4. later, woken up other worker would checks whether released counter is 0, which could be "true" on garbage memory 79 | // and then tries to do notify_all() on released conditional variable, that is in fact a garbage memory 80 | std::lock_guard lock(pCounter->m_mutex); 81 | newCounterValue = --(pCounter->m_counter); 82 | if (newCounterValue == 0) 83 | pCounter->m_condVar.notify_all(); 84 | } 85 | else { 86 | newCounterValue = --(pCounter->m_counter); 87 | } 88 | } 89 | // DEREFERENCING COUNTER PAST THIS LINE IS FORBIDDEN, AS IT MIGHT POINT TO RELEASED MEMORY IN FIBER->THREAD NOTIFY SCENARIO 90 | 91 | if (!wasCounterAddedToWaitList) { 92 | #ifdef _DEBUG 93 | rJobSystem.m_waitListLock.lock(); 94 | assert(rJobSystem.m_waitList.find(pCounter) == rJobSystem.m_waitList.end()); 95 | rJobSystem.m_waitListLock.unlock(); 96 | #endif 97 | return; 98 | } 99 | 100 | // decrement counter 101 | if (newCounterValue == 0) { 102 | rJobSystem.m_waitListLock.lock(); 103 | auto foundIterator = rJobSystem.m_waitList.find(pCounter); 104 | if (foundIterator != rJobSystem.m_waitList.end()) { 105 | // take fiber from wait list 106 | StatefullFiber* pAwaitingFiber = foundIterator->second; 107 | assert(pAwaitingFiber->GetRawFiber() != nullptr); 108 | rJobSystem.m_waitList.erase(pCounter); 109 | rJobSystem.m_waitListLock.unlock(); // we have to relese it before we try to obtain the lock on fiber in order to avoid deadlock 110 | 111 | 112 | // in global lock scenario (naked Kick + WaitForCounter somewhere else), awaiting fiber (added to wait list) can actually could still not switch to 113 | // another fiber from pool, so we spin until that happen 114 | pAwaitingFiber->m_lock.lock(); 115 | // and imediately unlock, because pAwaitingFiber is now trully awaiting and it was the only purpose of this lock 116 | pAwaitingFiber->m_lock.unlock(); 117 | 118 | 119 | // save current fiber to be added to fiber pool after switch is done 120 | tl_pFiberToBeAddedToPoolAfterSwitch = tl_pCurrentFiber; 121 | tl_pCurrentFiber = pAwaitingFiber->GetRawFiber(); 122 | // switch to fiber pulled from wait list 123 | ::SwitchToFiber(pAwaitingFiber->GetRawFiber()); 124 | 125 | // We push previous fiber to fiber pool only if we were on wait list and we came back from it. 126 | // Here, we wasn't, so we are back again only because someone else got pushed to wait list, 127 | // so we can't add him to pool, so tl_pFiberToBeAddedToPoolAfterSwitch has to be nullptr 128 | assert(tl_pFiberToBeAddedToPoolAfterSwitch == nullptr); 129 | assert(tl_pCurrentFiber != nullptr); 130 | } 131 | else { 132 | // This can happen if counter is decremented before JobSystem::WaitForCounter() add fiber to wait list, 133 | // or after fiber was added to wait list, but also after the WaitForCounter() noticed, that counter is 0 and already removed itself from wait list. 134 | // This situation is gonna be detecded in fiber that called JobSystem::WaitForCounter(), 135 | // so here, we just release m_waitListLock 136 | rJobSystem.m_waitListLock.unlock(); 137 | } 138 | } 139 | } 140 | 141 | void JobSystem::KickJobs(int count, const Declaration aDecl[]) 142 | { 143 | for (int i = 0; i < count; i++) 144 | KickJobWithoutNotifingWorkers(aDecl[i]); 145 | 146 | if (count > 1) 147 | NotifyAllWorkers(); 148 | else 149 | NotifyOneWorker(); 150 | } 151 | 152 | bool JobSystem::IsThisThreadAFiber() 153 | { 154 | return tl_pCurrentFiber != nullptr; 155 | } 156 | 157 | std::optional JobSystem::PullJob() 158 | { 159 | std::optional declaration = m_pJobQueueHigh->PopFront(); 160 | if (!declaration) 161 | declaration = m_pJobQueueNormal->PopFront(); 162 | if (!declaration) 163 | declaration = m_pJobQueueLow->PopFront(); 164 | return declaration; 165 | } 166 | 167 | void JobSystem::AddPreviousFiberToPool() 168 | { 169 | // back again, add fiber that we switched from to fiber pool, set it to nullptr afterwards; 170 | // tl_pFiberToBeAddedToPoolAfterSwitch cannot be null, because we can get here only, because someone pulled us 171 | // from wait list and then switch to us, so we have to add previous fiber to fiber pool 172 | assert(tl_pCurrentFiber != nullptr); 173 | assert(tl_pFiberToBeAddedToPoolAfterSwitch != nullptr); 174 | m_pFiberPool->PushBack(tl_pFiberToBeAddedToPoolAfterSwitch); 175 | tl_pFiberToBeAddedToPoolAfterSwitch = nullptr; 176 | } 177 | 178 | void JobSystem::WaitForCounter(Counter* pCounter) 179 | { 180 | // fiber cannot wait on counter created on thread and vice versa 181 | assert(pCounter->m_signalAfterCompletion == !IsThisThreadAFiber()); 182 | if (IsThisThreadAFiber()) 183 | WaitForCounterFromFiber(pCounter); 184 | else 185 | pCounter->Wait(); 186 | } 187 | 188 | void JobSystem::WaitForCounterFromFiber(Counter * pCounter) 189 | { 190 | StatefullFiber statefullFiber(tl_pCurrentFiber); 191 | statefullFiber.m_lock.lock(); 192 | 193 | // add itself to wait list 194 | assert(tl_pCurrentFiber != nullptr); 195 | m_waitListLock.lock(); 196 | m_waitList[pCounter] = &statefullFiber; 197 | m_waitListLock.unlock(); 198 | 199 | if (pCounter->GetCounter() == 0) { 200 | std::lock_guard guard(m_waitListLock); 201 | // we are here in one of 2 scenarios: 202 | // 1st - jobs was completed before we added ourselfs to wait list, or jobs were completed after we added ourselfs to wait list, but last job didn't take a m_waitListLock before us, 203 | // so we just remove ourselves from wait list and continue execution 204 | // 2nd - jobs were completed after we added ourselfs to wait list and last job took m_waitListLock before us removed us from wait list, 205 | // and now it's spinning on StatefullFiber::m_lock, so we have to switch to free fiber, so we go to another fiber (and then releasing fiber lock) as fast as possible 206 | 207 | auto foundIterator = m_waitList.find(pCounter); 208 | 209 | if (foundIterator != m_waitList.end()) { 210 | // 1st scenario 211 | // jobs were already completed, we remove ourselves from wait list and continue execution 212 | m_waitList.erase(pCounter); 213 | return; 214 | } 215 | // 2nd scenario and counter not equal to 0 has the same logic, thats why it's outside if statement to remove code duplication 216 | } 217 | 218 | // pop free fiber 219 | std::optional newFiber = m_pFiberPool->PopFront(); 220 | assert(newFiber.has_value()); 221 | tl_pCurrentFiber = newFiber.value(); 222 | // do not add ourselft to tl_pFiberToBeAddedToPoolAfterSwitch, because last job is gonna switch to ass 223 | 224 | // fiber we switch to will unlock the lock on statefullFiber in WorkerMainLoop 225 | tl_pStatefullFiberToBeUnlockedAfterSwitch = &statefullFiber; 226 | 227 | ::SwitchToFiber(newFiber.value()); 228 | 229 | AddPreviousFiberToPool(); 230 | } 231 | 232 | void JobSystem::KickJobWithoutNotifingWorkers(const Declaration& decl) 233 | { 234 | assert(decl.m_pCounter); 235 | 236 | if (decl.m_priority == Priority::LOW) 237 | m_pJobQueueLow->PushBack(decl); 238 | else if (decl.m_priority == Priority::NORMAL) 239 | m_pJobQueueNormal->PushBack(decl); 240 | else if (decl.m_priority == Priority::HIGH) 241 | m_pJobQueueHigh->PushBack(decl); 242 | else 243 | assert(false && "UNHANDLED JOB PRIORITY"); 244 | } 245 | 246 | void JobSystem::NotifyOneWorker() 247 | { 248 | // this lock is to ensure, that we won't miss notification when worker was about 249 | // to put itself to sleep, but haven't done it yet 250 | std::lock_guard loc(m_workersMainLoopMutex); 251 | m_workersMainLoopConditionVariable.notify_one(); 252 | } 253 | 254 | void JobSystem::NotifyAllWorkers() 255 | { 256 | // look at NotifyOneWorker comment 257 | std::lock_guard loc(m_workersMainLoopMutex); 258 | m_workersMainLoopConditionVariable.notify_all(); 259 | } 260 | 261 | 262 | void JobSystem::KickJobsAndWait(int count, Declaration aDecl[]) 263 | { 264 | Counter counter(count); 265 | 266 | for (int i = 0; i < count; i++) { 267 | assert(aDecl[i].m_pCounter == nullptr); 268 | aDecl[i].m_pCounter = &counter; 269 | } 270 | KickJobs(count, aDecl); 271 | WaitForCounter(&counter); 272 | } 273 | 274 | void JobSystem::Initialize(U32 numberOfThreads) 275 | { 276 | // init job queues 277 | m_pJobQueueLow = new RingBuffer; 278 | m_pJobQueueNormal = new RingBuffer; 279 | m_pJobQueueHigh = new RingBuffer; 280 | // init fiber pool 281 | m_pFiberPool = new RingBuffer; 282 | for (size_t i = 0; i < m_pFiberPool->Size(); i++) 283 | m_pFiberPool->PushBack( ::CreateFiberEx(g_sKiBStack, g_sKiBStack, 0, (LPFIBER_START_ROUTINE)WorkerMainLoop, this) ); 284 | // reserve memory for workers and fiberPool 285 | m_workers.reserve(numberOfThreads); 286 | m_waitList.reserve(g_sWaitList); 287 | // init workers 288 | for (U32 i = 0; i < numberOfThreads; i++) { 289 | m_workers.emplace_back([this, i] { 290 | tl_workerThreadId = i; 291 | tl_pCurrentFiber = ::ConvertThreadToFiber(nullptr); 292 | // got to main loop 293 | WorkerMainLoop(this); 294 | }); 295 | // set affinity 296 | HANDLE handle = reinterpret_cast( m_workers[i].native_handle() ); 297 | DWORD_PTR affinityMask = DWORD_PTR(1) << i; 298 | DWORD_PTR result = SetThreadAffinityMask(handle, affinityMask); 299 | assert(result != 0); 300 | } 301 | } 302 | 303 | void JobSystem::JoinAndTerminate() 304 | { 305 | { 306 | std::lock_guard lock(m_workersMainLoopMutex); 307 | m_keepWorking = false; 308 | m_workersMainLoopConditionVariable.notify_all(); 309 | } 310 | 311 | 312 | for (std::thread& thread : m_workers) { 313 | assert(thread.joinable()); 314 | thread.join(); 315 | } 316 | 317 | delete m_pJobQueueLow; 318 | delete m_pJobQueueNormal; 319 | delete m_pJobQueueHigh; 320 | 321 | delete m_pFiberPool; 322 | } 323 | 324 | void JobSystem::Counter::Wait() 325 | { 326 | assert(!IsThisThreadAFiber()); 327 | std::unique_lock lock(m_mutex); 328 | m_condVar.wait(lock, [this] {return m_counter == 0; }); 329 | } 330 | -------------------------------------------------------------------------------- /src/JobSystem.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include // assert 3 | #include // std::optional 4 | #include // std::atomic 5 | #include // std::lock_guard 6 | #include // std::vector 7 | #include // std::unordered_map 8 | 9 | #include // ::CreateFiber, ::SwitchToFiber, ::ConvertThreadToFiber 10 | // HANDLE, DWORD_PTR, SetThreadAffinityMask, LPVOID 11 | 12 | #include "types.h" 13 | #include "SpinLock.h" // SpinLock 14 | #include "RingBuffer.h" // RingBuffer 15 | 16 | const size_t g_sKiBStack = 512 * 1024; 17 | const size_t g_sJobQueue = 1024; 18 | const size_t g_sFiberPool = 160; 19 | const size_t g_sWaitList = g_sFiberPool; 20 | using PFiber = LPVOID; 21 | 22 | class JobSystem { 23 | public: 24 | // entry point for each job 25 | using EntryPoint = void(void* param); 26 | 27 | // jobs' priority 28 | enum class Priority { 29 | LOW = 0, NORMAL = 1, HIGH = 2 30 | }; 31 | 32 | class Counter; 33 | 34 | // declaration of each job 35 | struct Declaration { 36 | EntryPoint* m_pEntryPoint = nullptr; 37 | void* m_param = nullptr; 38 | Priority m_priority = Priority::LOW; 39 | JobSystem::Counter* m_pCounter = nullptr; 40 | }; 41 | 42 | 43 | // kick jobs 44 | void KickJobs(int count, const Declaration aDecl[]); 45 | void KickJob(const Declaration& decl) { 46 | KickJobs(1, &decl); 47 | } 48 | 49 | // wait for counter to become 0 50 | void WaitForCounter(Counter* pCounter); 51 | 52 | // kick jobs and wait for completion 53 | void KickJobsAndWait(int count, Declaration aDecl[]); 54 | void KickJobAndWait(Declaration& decl) { 55 | KickJobsAndWait(1, &decl); 56 | } 57 | 58 | // for easy control of initialization and shut down order 59 | void Initialize(U32 numberOfThreads); 60 | void JoinAndTerminate(); 61 | 62 | private: 63 | std::optional PullJob(); 64 | void AddPreviousFiberToPool(); // this is meant to be used only in waiting functions: KickJob(s)AndWait, WaitforCounter 65 | void WaitForCounterFromFiber(Counter* pCounter); 66 | void KickJobWithoutNotifingWorkers(const Declaration& decl); 67 | void NotifyOneWorker(); 68 | void NotifyAllWorkers(); 69 | static bool IsThisThreadAFiber(); 70 | 71 | friend void WorkerMainLoop(void*); // PullJob 72 | friend void JobWrapper(JobSystem::Declaration declaration, JobSystem& rJobSystem); // m_waitList, m_waitListLock 73 | 74 | RingBuffer* m_pJobQueueLow = nullptr; 75 | RingBuffer* m_pJobQueueNormal = nullptr; 76 | RingBuffer* m_pJobQueueHigh = nullptr; 77 | RingBuffer* m_pFiberPool = nullptr; 78 | std::vector m_workers; 79 | std::unordered_map m_waitList; 80 | alignas(64) SpinLock m_waitListLock; 81 | alignas(64) std::atomic m_keepWorking = true; 82 | std::mutex m_workersMainLoopMutex; 83 | std::condition_variable m_workersMainLoopConditionVariable; 84 | 85 | 86 | public: 87 | // counter used for synchronizng jobs 88 | class Counter { 89 | std::atomic m_counter; 90 | 91 | // whether fiber->thread notify is needed after m_counter reaches 0 92 | bool m_signalAfterCompletion = false; 93 | 94 | // used only if needed for fiber->thread communication 95 | std::mutex m_mutex; 96 | std::condition_variable m_condVar; 97 | 98 | I32 GetCounter() const { return m_counter; } 99 | void Wait(); 100 | 101 | friend void JobWrapper(JobSystem::Declaration declaration, JobSystem& rJobSystem); // GetCounter, NotifyIfNeeded 102 | friend void JobSystem::KickJobWithoutNotifingWorkers(const Declaration& decl); // assert SignalAfterCompetion 103 | friend void JobSystem::WaitForCounter(Counter* pCounter); // Wait() 104 | friend void JobSystem::WaitForCounterFromFiber(Counter* pCounter); // GetCounter 105 | public: 106 | explicit Counter(I32 counter) : m_counter(counter), m_signalAfterCompletion(!JobSystem::IsThisThreadAFiber()) {} 107 | }; 108 | }; 109 | -------------------------------------------------------------------------------- /src/RingBuffer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include // std::atomic 3 | #include // std::optional<> 4 | #include // std::lock_guard 5 | #include "SpinLock.h" // SpinLock 6 | 7 | template 8 | class RingBuffer { 9 | public: 10 | 11 | void PushBack(T data) { 12 | std::lock_guard guard(m_writerSpinLock); 13 | size_t next = (m_head + 1) % capacity; 14 | assert(next != m_tail); // if already full, this will fail 15 | m_queue[m_head] = data; 16 | m_head = next; 17 | } 18 | 19 | std::optional PopFront() { 20 | if (m_head != m_tail) { 21 | std::lock_guard guard(m_readerSpinLock); 22 | if (m_head != m_tail) { 23 | T data = m_queue[m_tail]; 24 | m_tail = (m_tail + 1) % capacity; 25 | return data; 26 | } 27 | } 28 | return std::nullopt; 29 | } 30 | // -1, because if m_head == m_tail, the RingBuffer is empty, 31 | // if m_head == (m_tail - 1), the RingBuffer is full 32 | constexpr size_t Size() { return capacity - 1; } 33 | 34 | private: 35 | T& operator[](size_t i) { return m_queue[i]; } 36 | friend class JobSystem; 37 | T m_queue[capacity]; 38 | alignas(64) SpinLock m_writerSpinLock; 39 | alignas(64) SpinLock m_readerSpinLock; 40 | // atomics are needed, so updates for m_head and m_tail won't 41 | // get rearanged and we can easly have two seperate locks for 42 | // readers and writers 43 | alignas(64) std::atomic m_head = 0; 44 | alignas(64) std::atomic m_tail = 0; 45 | }; -------------------------------------------------------------------------------- /src/SpinLock.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include // std::atomic 3 | #include // _mm_pause() 4 | class SpinLock { 5 | std::atomic m_lock = false; 6 | public: 7 | void Lock() { 8 | // https://www.slideshare.net/ssuser052dd11/igc2018-amd-don-woligroski-why-ryzen 9 | // page 43 10 | while (true) { 11 | while (m_lock) { 12 | _mm_pause(); 13 | } 14 | // exit only if we change from false, to true 15 | // i.e. until we aquire the (spin)lock 16 | if (!m_lock.exchange(true)) 17 | break; 18 | } 19 | } 20 | void Unlock() { 21 | m_lock.store(false); 22 | } 23 | void lock() { return Lock(); } 24 | void unlock() { return Unlock(); } 25 | void Aquire() { return Lock(); } 26 | void Release() { return Unlock(); } 27 | }; -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include // std::cout 2 | #include // std::string 3 | 4 | #include "JobSystem.h" // JobSystem 5 | 6 | JobSystem g_jobSystem; 7 | 8 | void TheMostCreativeWayToCalculateFibonacci(void* pNumberVoid) { 9 | int* pNumber = reinterpret_cast(pNumberVoid); 10 | int n = *pNumber; 11 | if (n > 1) { 12 | int fibNMinus1 = n-1; 13 | int fibNMinus2 = n-2; 14 | JobSystem::Declaration adecl[2]; 15 | adecl[0].m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci; 16 | adecl[0].m_param = &fibNMinus1; 17 | adecl[1].m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci; 18 | adecl[1].m_param = &fibNMinus2; 19 | 20 | g_jobSystem.KickJobsAndWait(2, adecl); 21 | n = fibNMinus1 + fibNMinus2; 22 | *pNumber = n; 23 | } 24 | } 25 | 26 | int main() { 27 | const int numberOfThreads = std::thread::hardware_concurrency(); 28 | g_jobSystem.Initialize(numberOfThreads); 29 | 30 | for (int i = 0; i < 2; i++) { 31 | int n = 10+i; 32 | printf("Fibonacci(%i)=", n); 33 | JobSystem::Declaration decl; 34 | decl.m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci; 35 | decl.m_param = &n; 36 | g_jobSystem.KickJobAndWait(decl); 37 | printf("%i\nSleeping for 10s...\n", n); 38 | std::this_thread::sleep_for(std::chrono::seconds(10)); // to show that workers go to sleep 39 | } 40 | 41 | printf("Closing!\n"); 42 | g_jobSystem.JoinAndTerminate(); 43 | } -------------------------------------------------------------------------------- /src/types.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include // size specified std types 3 | 4 | using U8 = std::uint8_t; 5 | using U16 = std::uint16_t; 6 | using U32 = std::uint32_t; 7 | using U64 = std::uint64_t; 8 | 9 | using I8 = std::int8_t; 10 | using I16 = std::int16_t; 11 | using I32 = std::int32_t; 12 | using I64 = std::uint64_t; 13 | 14 | using uchar = unsigned char; --------------------------------------------------------------------------------