├── .gitignore
├── JobSystem.sln
├── JobSystem.vcxproj
├── JobSystem.vcxproj.filters
├── JobSystem.vcxproj.user
├── README.md
└── src
├── JobSystem.cpp
├── JobSystem.h
├── RingBuffer.h
├── SpinLock.h
├── main.cpp
└── types.h
/.gitignore:
--------------------------------------------------------------------------------
1 | #Visual Studio stuff
2 | x64/*
3 | .vs/*
4 | Debug/*
5 | Release/*
--------------------------------------------------------------------------------
/JobSystem.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.29001.49
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "JobSystem", "JobSystem.vcxproj", "{E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Debug|x86 = Debug|x86
12 | Release|x64 = Release|x64
13 | Release|x86 = Release|x86
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Debug|x64.ActiveCfg = Debug|x64
17 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Debug|x64.Build.0 = Debug|x64
18 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Debug|x86.ActiveCfg = Debug|Win32
19 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Debug|x86.Build.0 = Debug|Win32
20 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Release|x64.ActiveCfg = Release|x64
21 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Release|x64.Build.0 = Release|x64
22 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Release|x86.ActiveCfg = Release|Win32
23 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}.Release|x86.Build.0 = Release|Win32
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | GlobalSection(ExtensibilityGlobals) = postSolution
29 | SolutionGuid = {60A78AC9-F9F5-4348-8D09-D512DE0924C2}
30 | EndGlobalSection
31 | EndGlobal
32 |
--------------------------------------------------------------------------------
/JobSystem.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | Win32
7 |
8 |
9 | Release
10 | Win32
11 |
12 |
13 | Debug
14 | x64
15 |
16 |
17 | Release
18 | x64
19 |
20 |
21 |
22 | 16.0
23 | {E4AD0763-7878-47F1-9BC4-5BB0B1F724B1}
24 | JobSystem
25 | 10.0.17763.0
26 |
27 |
28 |
29 | Application
30 | true
31 | v141
32 | MultiByte
33 |
34 |
35 | Application
36 | false
37 | v141
38 | true
39 | MultiByte
40 |
41 |
42 | Application
43 | true
44 | v141
45 | MultiByte
46 |
47 |
48 | Application
49 | false
50 | v141
51 | true
52 | MultiByte
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 | Level3
76 | Disabled
77 | true
78 | true
79 | stdcpp17
80 | /std:c++17 /GT %(AdditionalOptions)
81 |
82 |
83 | Console
84 |
85 |
86 |
87 |
88 | Level3
89 | Disabled
90 | true
91 | true
92 | stdcpp17
93 | /std:c++17 /GT %(AdditionalOptions)
94 |
95 |
96 | Console
97 |
98 |
99 |
100 |
101 | Level3
102 | MaxSpeed
103 | true
104 | true
105 | true
106 | true
107 | stdcpp17
108 | /std:c++17 /GT %(AdditionalOptions)
109 |
110 |
111 | Console
112 | true
113 | true
114 |
115 |
116 |
117 |
118 | Level3
119 | MaxSpeed
120 | true
121 | true
122 | true
123 | true
124 | stdcpp17
125 | /std:c++17 /GT %(AdditionalOptions)
126 |
127 |
128 | Console
129 | true
130 | true
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
--------------------------------------------------------------------------------
/JobSystem.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 | Source Files
23 |
24 |
25 |
26 |
27 | Header Files
28 |
29 |
30 | Header Files
31 |
32 |
33 | Header Files
34 |
35 |
36 | Header Files
37 |
38 |
39 |
--------------------------------------------------------------------------------
/JobSystem.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | true
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Fiber based Job System
3 |
4 | Fiber based job system, with additional ability for fiber->thread communication.
5 | Based on:
6 | Christian Gyrling's GDC talk: [Parallelizing the Naughty Dog Engine Using Fibers](https://www.gdcvault.com/play/1022186/Parallelizing-the-Naughty-Dog-Engine)
7 | [Game Engine Architecture 3rd Edition](https://www.gameenginebook.com/)
8 |
9 | Implementation is for Windows only.
10 | Besides std::optional from C++17, it should compile with C++11.
11 |
12 |
13 | ## API:
14 | ```
15 |
16 | class JobSystem {
17 | public:
18 | // entry point for each job
19 | using EntryPoint = void(void* param);
20 |
21 | // jobs' priority
22 | enum class Priority {
23 | LOW = 0, NORMAL = 1, HIGH = 2
24 | };
25 |
26 | // counter used for synchronizng jobs
27 | class Counter {
28 | explicit Counter(I32 counter);
29 | };
30 |
31 | // declaration of each job
32 | struct Declaration {
33 | EntryPoint* m_pEntryPoint = nullptr;
34 | void* m_param = nullptr;
35 | Priority m_priority = Priority::LOW;
36 | JobSystem::Counter* m_pCounter = nullptr;
37 | };
38 |
39 | // kick jobs
40 | void KickJobs(int count, const Declaration aDecl[]);
41 | void KickJob(const Declaration& decl);
42 |
43 | // wait for counter to become 0
44 | void WaitForCounter(Counter* pCounter);
45 |
46 | // kick jobs and wait for completion
47 | void KickJobsAndWait(int count, Declaration aDecl[]);
48 | void KickJobAndWait(Declaration& decl);
49 |
50 | // for easy control of initialization and shut down order
51 | void Initialize(U32 numberOfThreads);
52 | void JoinAndTerminate();
53 | };
54 |
55 | ```
56 |
57 | ### Example usage
58 | ```
59 | #include // std::cout
60 | #include // std::string
61 |
62 | #include "JobSystem.h" // JobSystem
63 |
64 | JobSystem g_jobSystem;
65 |
66 | void TheMostCreativeWayToCalculateFibonacci(void* pNumberVoid) {
67 | int* pNumber = reinterpret_cast(pNumberVoid);
68 | int n = *pNumber;
69 | if (n > 1) {
70 | int fibNMinus1 = n-1;
71 | int fibNMinus2 = n-2;
72 | JobSystem::Declaration adecl[2];
73 | adecl[0].m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci;
74 | adecl[0].m_param = &fibNMinus1;
75 | adecl[1].m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci;
76 | adecl[1].m_param = &fibNMinus2;
77 |
78 | g_jobSystem.KickJobsAndWait(2, adecl);
79 | n = fibNMinus1 + fibNMinus2;
80 | *pNumber = n;
81 | }
82 | }
83 |
84 | int main() {
85 | const int numberOfThreads = std::thread::hardware_concurrency();
86 | g_jobSystem.Initialize(numberOfThreads);
87 |
88 | for (int i = 0; i < 2; i++) {
89 | int n = 10+i;
90 | printf("Fibonacci(%i)=", n);
91 | JobSystem::Declaration decl;
92 | decl.m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci;
93 | decl.m_param = &n;
94 | g_jobSystem.KickJobAndWait(decl);
95 | printf("%i\nSleeping for 10s...\n", n);
96 | std::this_thread::sleep_for(std::chrono::seconds(10)); // to show that workers go to sleep
97 | }
98 |
99 | printf("Closing!\n");
100 | g_jobSystem.JoinAndTerminate();
101 | }
102 |
103 | ```
104 | Higher numbers with kicking two jobs at the time will make you run out of fibers, so increase sizes of queues, fiber pool and wait list appropriately.
105 |
106 | ### Differences between GDC talk
107 | All fibers have the same stack space, but different sizes can be easily implemented.
108 | No adaptive mutexes were implemented, only basic spin lock.
109 | No visualization method was implemented.
110 |
111 | ### How to make it multiplatform
112 | Provide equivalents for things listed below and you are good to go.
113 | #### Fibers related:
114 | ::CreateFiber, ::SwitchToFiber, ::ConvertThreadToFiber
115 | #### Setting affinity for worker threads:
116 | SetThreadAffinityMask
117 | #### TLS access
118 | /GT compiler flag, or use workaround mentioned in GDC talk
119 | ##### Sidenote:
120 | G++ emits NOP instruction with _mm_pause() instead of desired pause instruction, so you probably would want to use inline assembly instead when working with that compiler.
--------------------------------------------------------------------------------
/src/JobSystem.cpp:
--------------------------------------------------------------------------------
1 | #include "JobSystem.h"
2 |
3 | thread_local U32 tl_workerThreadId = -1;
4 | thread_local PFiber tl_pCurrentFiber = nullptr;
5 | thread_local PFiber tl_pFiberToBeAddedToPoolAfterSwitch = nullptr;
6 | thread_local StatefullFiber* tl_pStatefullFiberToBeUnlockedAfterSwitch = nullptr;
7 |
8 | // the only point of this class is because counter can get decremented to 0 after waiting fiber is added to wait list, but before that waiting fiber switched to another
9 | // (pulled from fiber pool), so we need this extra lock that we take in WaitForCounter, and release in WorkerMainLoop after switch is performed, so we can go back to trully awaiting fiber
10 | class StatefullFiber {
11 | public:
12 | explicit StatefullFiber(LPVOID pFiber) : m_pFiber(pFiber) {}
13 | PFiber GetRawFiber() { return m_pFiber; }
14 | private:
15 | friend void WorkerMainLoop(void*); // release the lock in main loop after switch was performed
16 | friend void JobWrapper(JobSystem::Declaration declaration, JobSystem& rJobSystem); // GetRawFiber
17 | friend class JobSystem; // whole class, because I can't make friend with private function from JobSystem, if this class isn't nested in JobSystem
18 | PFiber m_pFiber;
19 | SpinLock m_lock;
20 | };
21 |
22 |
23 |
24 |
25 | void WorkerMainLoop(void* job_system) {
26 | JobSystem & jobSystem = *reinterpret_cast(job_system);
27 | while (jobSystem.m_keepWorking) {
28 | if (tl_pStatefullFiberToBeUnlockedAfterSwitch != nullptr) {
29 | tl_pStatefullFiberToBeUnlockedAfterSwitch->m_lock.unlock();
30 | tl_pStatefullFiberToBeUnlockedAfterSwitch = nullptr;
31 | }
32 | std::optional decl;
33 | for (int i = 0; i < 100; i++) {
34 | decl = jobSystem.PullJob();
35 | if (decl)
36 | break;
37 | else
38 | _mm_pause();
39 | }
40 |
41 | if (decl) {
42 | JobWrapper(decl.value(), jobSystem);
43 | }
44 | else {
45 | std::unique_lock lock(jobSystem.m_workersMainLoopMutex);
46 | jobSystem.m_workersMainLoopConditionVariable.wait(lock, [&jobSystem, &decl] { decl = jobSystem.PullJob(); return decl != std::nullopt || !jobSystem.m_keepWorking; });
47 |
48 | if (jobSystem.m_keepWorking) {
49 | assert(decl);
50 | lock.unlock();
51 | JobWrapper(decl.value(), jobSystem);
52 | }
53 | }
54 | }
55 | }
56 |
57 | void JobWrapper(JobSystem::Declaration declaration, JobSystem& rJobSystem) {
58 |
59 | // execute job
60 | declaration.m_pEntryPoint(declaration.m_param);
61 |
62 | // shorthand
63 | JobSystem::Counter* pCounter = declaration.m_pCounter;
64 |
65 | // if no associated Counter, we're done
66 | if (pCounter == nullptr)
67 | return;
68 |
69 | const bool wasCounterAddedToWaitList = !pCounter->m_signalAfterCompletion;
70 |
71 | I32 newCounterValue;
72 | {
73 | if (pCounter->m_signalAfterCompletion) {
74 | // notify needs to be under same lock as decrement because:
75 | // 1. multiple workers can finish job at the same, decrement counter and get timesliced before they check if counter reached 0
76 | // 2. one of them check that it's 0 and notify
77 | // 3. the guy notified resumes and free Counter's memory
78 | // 4. later, woken up other worker would checks whether released counter is 0, which could be "true" on garbage memory
79 | // and then tries to do notify_all() on released conditional variable, that is in fact a garbage memory
80 | std::lock_guard lock(pCounter->m_mutex);
81 | newCounterValue = --(pCounter->m_counter);
82 | if (newCounterValue == 0)
83 | pCounter->m_condVar.notify_all();
84 | }
85 | else {
86 | newCounterValue = --(pCounter->m_counter);
87 | }
88 | }
89 | // DEREFERENCING COUNTER PAST THIS LINE IS FORBIDDEN, AS IT MIGHT POINT TO RELEASED MEMORY IN FIBER->THREAD NOTIFY SCENARIO
90 |
91 | if (!wasCounterAddedToWaitList) {
92 | #ifdef _DEBUG
93 | rJobSystem.m_waitListLock.lock();
94 | assert(rJobSystem.m_waitList.find(pCounter) == rJobSystem.m_waitList.end());
95 | rJobSystem.m_waitListLock.unlock();
96 | #endif
97 | return;
98 | }
99 |
100 | // decrement counter
101 | if (newCounterValue == 0) {
102 | rJobSystem.m_waitListLock.lock();
103 | auto foundIterator = rJobSystem.m_waitList.find(pCounter);
104 | if (foundIterator != rJobSystem.m_waitList.end()) {
105 | // take fiber from wait list
106 | StatefullFiber* pAwaitingFiber = foundIterator->second;
107 | assert(pAwaitingFiber->GetRawFiber() != nullptr);
108 | rJobSystem.m_waitList.erase(pCounter);
109 | rJobSystem.m_waitListLock.unlock(); // we have to relese it before we try to obtain the lock on fiber in order to avoid deadlock
110 |
111 |
112 | // in global lock scenario (naked Kick + WaitForCounter somewhere else), awaiting fiber (added to wait list) can actually could still not switch to
113 | // another fiber from pool, so we spin until that happen
114 | pAwaitingFiber->m_lock.lock();
115 | // and imediately unlock, because pAwaitingFiber is now trully awaiting and it was the only purpose of this lock
116 | pAwaitingFiber->m_lock.unlock();
117 |
118 |
119 | // save current fiber to be added to fiber pool after switch is done
120 | tl_pFiberToBeAddedToPoolAfterSwitch = tl_pCurrentFiber;
121 | tl_pCurrentFiber = pAwaitingFiber->GetRawFiber();
122 | // switch to fiber pulled from wait list
123 | ::SwitchToFiber(pAwaitingFiber->GetRawFiber());
124 |
125 | // We push previous fiber to fiber pool only if we were on wait list and we came back from it.
126 | // Here, we wasn't, so we are back again only because someone else got pushed to wait list,
127 | // so we can't add him to pool, so tl_pFiberToBeAddedToPoolAfterSwitch has to be nullptr
128 | assert(tl_pFiberToBeAddedToPoolAfterSwitch == nullptr);
129 | assert(tl_pCurrentFiber != nullptr);
130 | }
131 | else {
132 | // This can happen if counter is decremented before JobSystem::WaitForCounter() add fiber to wait list,
133 | // or after fiber was added to wait list, but also after the WaitForCounter() noticed, that counter is 0 and already removed itself from wait list.
134 | // This situation is gonna be detecded in fiber that called JobSystem::WaitForCounter(),
135 | // so here, we just release m_waitListLock
136 | rJobSystem.m_waitListLock.unlock();
137 | }
138 | }
139 | }
140 |
141 | void JobSystem::KickJobs(int count, const Declaration aDecl[])
142 | {
143 | for (int i = 0; i < count; i++)
144 | KickJobWithoutNotifingWorkers(aDecl[i]);
145 |
146 | if (count > 1)
147 | NotifyAllWorkers();
148 | else
149 | NotifyOneWorker();
150 | }
151 |
152 | bool JobSystem::IsThisThreadAFiber()
153 | {
154 | return tl_pCurrentFiber != nullptr;
155 | }
156 |
157 | std::optional JobSystem::PullJob()
158 | {
159 | std::optional declaration = m_pJobQueueHigh->PopFront();
160 | if (!declaration)
161 | declaration = m_pJobQueueNormal->PopFront();
162 | if (!declaration)
163 | declaration = m_pJobQueueLow->PopFront();
164 | return declaration;
165 | }
166 |
167 | void JobSystem::AddPreviousFiberToPool()
168 | {
169 | // back again, add fiber that we switched from to fiber pool, set it to nullptr afterwards;
170 | // tl_pFiberToBeAddedToPoolAfterSwitch cannot be null, because we can get here only, because someone pulled us
171 | // from wait list and then switch to us, so we have to add previous fiber to fiber pool
172 | assert(tl_pCurrentFiber != nullptr);
173 | assert(tl_pFiberToBeAddedToPoolAfterSwitch != nullptr);
174 | m_pFiberPool->PushBack(tl_pFiberToBeAddedToPoolAfterSwitch);
175 | tl_pFiberToBeAddedToPoolAfterSwitch = nullptr;
176 | }
177 |
178 | void JobSystem::WaitForCounter(Counter* pCounter)
179 | {
180 | // fiber cannot wait on counter created on thread and vice versa
181 | assert(pCounter->m_signalAfterCompletion == !IsThisThreadAFiber());
182 | if (IsThisThreadAFiber())
183 | WaitForCounterFromFiber(pCounter);
184 | else
185 | pCounter->Wait();
186 | }
187 |
188 | void JobSystem::WaitForCounterFromFiber(Counter * pCounter)
189 | {
190 | StatefullFiber statefullFiber(tl_pCurrentFiber);
191 | statefullFiber.m_lock.lock();
192 |
193 | // add itself to wait list
194 | assert(tl_pCurrentFiber != nullptr);
195 | m_waitListLock.lock();
196 | m_waitList[pCounter] = &statefullFiber;
197 | m_waitListLock.unlock();
198 |
199 | if (pCounter->GetCounter() == 0) {
200 | std::lock_guard guard(m_waitListLock);
201 | // we are here in one of 2 scenarios:
202 | // 1st - jobs was completed before we added ourselfs to wait list, or jobs were completed after we added ourselfs to wait list, but last job didn't take a m_waitListLock before us,
203 | // so we just remove ourselves from wait list and continue execution
204 | // 2nd - jobs were completed after we added ourselfs to wait list and last job took m_waitListLock before us removed us from wait list,
205 | // and now it's spinning on StatefullFiber::m_lock, so we have to switch to free fiber, so we go to another fiber (and then releasing fiber lock) as fast as possible
206 |
207 | auto foundIterator = m_waitList.find(pCounter);
208 |
209 | if (foundIterator != m_waitList.end()) {
210 | // 1st scenario
211 | // jobs were already completed, we remove ourselves from wait list and continue execution
212 | m_waitList.erase(pCounter);
213 | return;
214 | }
215 | // 2nd scenario and counter not equal to 0 has the same logic, thats why it's outside if statement to remove code duplication
216 | }
217 |
218 | // pop free fiber
219 | std::optional newFiber = m_pFiberPool->PopFront();
220 | assert(newFiber.has_value());
221 | tl_pCurrentFiber = newFiber.value();
222 | // do not add ourselft to tl_pFiberToBeAddedToPoolAfterSwitch, because last job is gonna switch to ass
223 |
224 | // fiber we switch to will unlock the lock on statefullFiber in WorkerMainLoop
225 | tl_pStatefullFiberToBeUnlockedAfterSwitch = &statefullFiber;
226 |
227 | ::SwitchToFiber(newFiber.value());
228 |
229 | AddPreviousFiberToPool();
230 | }
231 |
232 | void JobSystem::KickJobWithoutNotifingWorkers(const Declaration& decl)
233 | {
234 | assert(decl.m_pCounter);
235 |
236 | if (decl.m_priority == Priority::LOW)
237 | m_pJobQueueLow->PushBack(decl);
238 | else if (decl.m_priority == Priority::NORMAL)
239 | m_pJobQueueNormal->PushBack(decl);
240 | else if (decl.m_priority == Priority::HIGH)
241 | m_pJobQueueHigh->PushBack(decl);
242 | else
243 | assert(false && "UNHANDLED JOB PRIORITY");
244 | }
245 |
246 | void JobSystem::NotifyOneWorker()
247 | {
248 | // this lock is to ensure, that we won't miss notification when worker was about
249 | // to put itself to sleep, but haven't done it yet
250 | std::lock_guard loc(m_workersMainLoopMutex);
251 | m_workersMainLoopConditionVariable.notify_one();
252 | }
253 |
254 | void JobSystem::NotifyAllWorkers()
255 | {
256 | // look at NotifyOneWorker comment
257 | std::lock_guard loc(m_workersMainLoopMutex);
258 | m_workersMainLoopConditionVariable.notify_all();
259 | }
260 |
261 |
262 | void JobSystem::KickJobsAndWait(int count, Declaration aDecl[])
263 | {
264 | Counter counter(count);
265 |
266 | for (int i = 0; i < count; i++) {
267 | assert(aDecl[i].m_pCounter == nullptr);
268 | aDecl[i].m_pCounter = &counter;
269 | }
270 | KickJobs(count, aDecl);
271 | WaitForCounter(&counter);
272 | }
273 |
274 | void JobSystem::Initialize(U32 numberOfThreads)
275 | {
276 | // init job queues
277 | m_pJobQueueLow = new RingBuffer;
278 | m_pJobQueueNormal = new RingBuffer;
279 | m_pJobQueueHigh = new RingBuffer;
280 | // init fiber pool
281 | m_pFiberPool = new RingBuffer;
282 | for (size_t i = 0; i < m_pFiberPool->Size(); i++)
283 | m_pFiberPool->PushBack( ::CreateFiberEx(g_sKiBStack, g_sKiBStack, 0, (LPFIBER_START_ROUTINE)WorkerMainLoop, this) );
284 | // reserve memory for workers and fiberPool
285 | m_workers.reserve(numberOfThreads);
286 | m_waitList.reserve(g_sWaitList);
287 | // init workers
288 | for (U32 i = 0; i < numberOfThreads; i++) {
289 | m_workers.emplace_back([this, i] {
290 | tl_workerThreadId = i;
291 | tl_pCurrentFiber = ::ConvertThreadToFiber(nullptr);
292 | // got to main loop
293 | WorkerMainLoop(this);
294 | });
295 | // set affinity
296 | HANDLE handle = reinterpret_cast( m_workers[i].native_handle() );
297 | DWORD_PTR affinityMask = DWORD_PTR(1) << i;
298 | DWORD_PTR result = SetThreadAffinityMask(handle, affinityMask);
299 | assert(result != 0);
300 | }
301 | }
302 |
303 | void JobSystem::JoinAndTerminate()
304 | {
305 | {
306 | std::lock_guard lock(m_workersMainLoopMutex);
307 | m_keepWorking = false;
308 | m_workersMainLoopConditionVariable.notify_all();
309 | }
310 |
311 |
312 | for (std::thread& thread : m_workers) {
313 | assert(thread.joinable());
314 | thread.join();
315 | }
316 |
317 | delete m_pJobQueueLow;
318 | delete m_pJobQueueNormal;
319 | delete m_pJobQueueHigh;
320 |
321 | delete m_pFiberPool;
322 | }
323 |
324 | void JobSystem::Counter::Wait()
325 | {
326 | assert(!IsThisThreadAFiber());
327 | std::unique_lock lock(m_mutex);
328 | m_condVar.wait(lock, [this] {return m_counter == 0; });
329 | }
330 |
--------------------------------------------------------------------------------
/src/JobSystem.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include // assert
3 | #include // std::optional
4 | #include // std::atomic
5 | #include // std::lock_guard
6 | #include // std::vector
7 | #include // std::unordered_map
8 |
9 | #include // ::CreateFiber, ::SwitchToFiber, ::ConvertThreadToFiber
10 | // HANDLE, DWORD_PTR, SetThreadAffinityMask, LPVOID
11 |
12 | #include "types.h"
13 | #include "SpinLock.h" // SpinLock
14 | #include "RingBuffer.h" // RingBuffer
15 |
16 | const size_t g_sKiBStack = 512 * 1024;
17 | const size_t g_sJobQueue = 1024;
18 | const size_t g_sFiberPool = 160;
19 | const size_t g_sWaitList = g_sFiberPool;
20 | using PFiber = LPVOID;
21 |
22 | class JobSystem {
23 | public:
24 | // entry point for each job
25 | using EntryPoint = void(void* param);
26 |
27 | // jobs' priority
28 | enum class Priority {
29 | LOW = 0, NORMAL = 1, HIGH = 2
30 | };
31 |
32 | class Counter;
33 |
34 | // declaration of each job
35 | struct Declaration {
36 | EntryPoint* m_pEntryPoint = nullptr;
37 | void* m_param = nullptr;
38 | Priority m_priority = Priority::LOW;
39 | JobSystem::Counter* m_pCounter = nullptr;
40 | };
41 |
42 |
43 | // kick jobs
44 | void KickJobs(int count, const Declaration aDecl[]);
45 | void KickJob(const Declaration& decl) {
46 | KickJobs(1, &decl);
47 | }
48 |
49 | // wait for counter to become 0
50 | void WaitForCounter(Counter* pCounter);
51 |
52 | // kick jobs and wait for completion
53 | void KickJobsAndWait(int count, Declaration aDecl[]);
54 | void KickJobAndWait(Declaration& decl) {
55 | KickJobsAndWait(1, &decl);
56 | }
57 |
58 | // for easy control of initialization and shut down order
59 | void Initialize(U32 numberOfThreads);
60 | void JoinAndTerminate();
61 |
62 | private:
63 | std::optional PullJob();
64 | void AddPreviousFiberToPool(); // this is meant to be used only in waiting functions: KickJob(s)AndWait, WaitforCounter
65 | void WaitForCounterFromFiber(Counter* pCounter);
66 | void KickJobWithoutNotifingWorkers(const Declaration& decl);
67 | void NotifyOneWorker();
68 | void NotifyAllWorkers();
69 | static bool IsThisThreadAFiber();
70 |
71 | friend void WorkerMainLoop(void*); // PullJob
72 | friend void JobWrapper(JobSystem::Declaration declaration, JobSystem& rJobSystem); // m_waitList, m_waitListLock
73 |
74 | RingBuffer* m_pJobQueueLow = nullptr;
75 | RingBuffer* m_pJobQueueNormal = nullptr;
76 | RingBuffer* m_pJobQueueHigh = nullptr;
77 | RingBuffer* m_pFiberPool = nullptr;
78 | std::vector m_workers;
79 | std::unordered_map m_waitList;
80 | alignas(64) SpinLock m_waitListLock;
81 | alignas(64) std::atomic m_keepWorking = true;
82 | std::mutex m_workersMainLoopMutex;
83 | std::condition_variable m_workersMainLoopConditionVariable;
84 |
85 |
86 | public:
87 | // counter used for synchronizng jobs
88 | class Counter {
89 | std::atomic m_counter;
90 |
91 | // whether fiber->thread notify is needed after m_counter reaches 0
92 | bool m_signalAfterCompletion = false;
93 |
94 | // used only if needed for fiber->thread communication
95 | std::mutex m_mutex;
96 | std::condition_variable m_condVar;
97 |
98 | I32 GetCounter() const { return m_counter; }
99 | void Wait();
100 |
101 | friend void JobWrapper(JobSystem::Declaration declaration, JobSystem& rJobSystem); // GetCounter, NotifyIfNeeded
102 | friend void JobSystem::KickJobWithoutNotifingWorkers(const Declaration& decl); // assert SignalAfterCompetion
103 | friend void JobSystem::WaitForCounter(Counter* pCounter); // Wait()
104 | friend void JobSystem::WaitForCounterFromFiber(Counter* pCounter); // GetCounter
105 | public:
106 | explicit Counter(I32 counter) : m_counter(counter), m_signalAfterCompletion(!JobSystem::IsThisThreadAFiber()) {}
107 | };
108 | };
109 |
--------------------------------------------------------------------------------
/src/RingBuffer.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include // std::atomic
3 | #include // std::optional<>
4 | #include // std::lock_guard
5 | #include "SpinLock.h" // SpinLock
6 |
7 | template
8 | class RingBuffer {
9 | public:
10 |
11 | void PushBack(T data) {
12 | std::lock_guard guard(m_writerSpinLock);
13 | size_t next = (m_head + 1) % capacity;
14 | assert(next != m_tail); // if already full, this will fail
15 | m_queue[m_head] = data;
16 | m_head = next;
17 | }
18 |
19 | std::optional PopFront() {
20 | if (m_head != m_tail) {
21 | std::lock_guard guard(m_readerSpinLock);
22 | if (m_head != m_tail) {
23 | T data = m_queue[m_tail];
24 | m_tail = (m_tail + 1) % capacity;
25 | return data;
26 | }
27 | }
28 | return std::nullopt;
29 | }
30 | // -1, because if m_head == m_tail, the RingBuffer is empty,
31 | // if m_head == (m_tail - 1), the RingBuffer is full
32 | constexpr size_t Size() { return capacity - 1; }
33 |
34 | private:
35 | T& operator[](size_t i) { return m_queue[i]; }
36 | friend class JobSystem;
37 | T m_queue[capacity];
38 | alignas(64) SpinLock m_writerSpinLock;
39 | alignas(64) SpinLock m_readerSpinLock;
40 | // atomics are needed, so updates for m_head and m_tail won't
41 | // get rearanged and we can easly have two seperate locks for
42 | // readers and writers
43 | alignas(64) std::atomic m_head = 0;
44 | alignas(64) std::atomic m_tail = 0;
45 | };
--------------------------------------------------------------------------------
/src/SpinLock.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include // std::atomic
3 | #include // _mm_pause()
4 | class SpinLock {
5 | std::atomic m_lock = false;
6 | public:
7 | void Lock() {
8 | // https://www.slideshare.net/ssuser052dd11/igc2018-amd-don-woligroski-why-ryzen
9 | // page 43
10 | while (true) {
11 | while (m_lock) {
12 | _mm_pause();
13 | }
14 | // exit only if we change from false, to true
15 | // i.e. until we aquire the (spin)lock
16 | if (!m_lock.exchange(true))
17 | break;
18 | }
19 | }
20 | void Unlock() {
21 | m_lock.store(false);
22 | }
23 | void lock() { return Lock(); }
24 | void unlock() { return Unlock(); }
25 | void Aquire() { return Lock(); }
26 | void Release() { return Unlock(); }
27 | };
--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
1 | #include // std::cout
2 | #include // std::string
3 |
4 | #include "JobSystem.h" // JobSystem
5 |
6 | JobSystem g_jobSystem;
7 |
8 | void TheMostCreativeWayToCalculateFibonacci(void* pNumberVoid) {
9 | int* pNumber = reinterpret_cast(pNumberVoid);
10 | int n = *pNumber;
11 | if (n > 1) {
12 | int fibNMinus1 = n-1;
13 | int fibNMinus2 = n-2;
14 | JobSystem::Declaration adecl[2];
15 | adecl[0].m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci;
16 | adecl[0].m_param = &fibNMinus1;
17 | adecl[1].m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci;
18 | adecl[1].m_param = &fibNMinus2;
19 |
20 | g_jobSystem.KickJobsAndWait(2, adecl);
21 | n = fibNMinus1 + fibNMinus2;
22 | *pNumber = n;
23 | }
24 | }
25 |
26 | int main() {
27 | const int numberOfThreads = std::thread::hardware_concurrency();
28 | g_jobSystem.Initialize(numberOfThreads);
29 |
30 | for (int i = 0; i < 2; i++) {
31 | int n = 10+i;
32 | printf("Fibonacci(%i)=", n);
33 | JobSystem::Declaration decl;
34 | decl.m_pEntryPoint = TheMostCreativeWayToCalculateFibonacci;
35 | decl.m_param = &n;
36 | g_jobSystem.KickJobAndWait(decl);
37 | printf("%i\nSleeping for 10s...\n", n);
38 | std::this_thread::sleep_for(std::chrono::seconds(10)); // to show that workers go to sleep
39 | }
40 |
41 | printf("Closing!\n");
42 | g_jobSystem.JoinAndTerminate();
43 | }
--------------------------------------------------------------------------------
/src/types.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include // size specified std types
3 |
4 | using U8 = std::uint8_t;
5 | using U16 = std::uint16_t;
6 | using U32 = std::uint32_t;
7 | using U64 = std::uint64_t;
8 |
9 | using I8 = std::int8_t;
10 | using I16 = std::int16_t;
11 | using I32 = std::int32_t;
12 | using I64 = std::uint64_t;
13 |
14 | using uchar = unsigned char;
--------------------------------------------------------------------------------