├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── CHANGES.md
├── CODE_OF_CONDUCT.md
├── LICENSE.md
├── Makefile
├── README.md
├── domainslib.opam
├── dune-project
├── lib
    ├── chan.ml
    ├── chan.mli
    ├── domainslib.ml
    ├── dune
    ├── fun_queue.ml
    ├── fun_queue.mli
    ├── multi_channel.ml
    ├── task.ml
    └── task.mli
└── test
    ├── LU_decomposition_multicore.ml
    ├── backtrace.ml
    ├── chan_stm_tests.ml
    ├── dune
    ├── enumerate_par.ml
    ├── fib.ml
    ├── fib_par.ml
    ├── game_of_life.ml
    ├── game_of_life_multicore.ml
    ├── kcas_integration.ml
    ├── off_by_one.ml
    ├── prefix_sum.ml
    ├── spectralnorm2.ml
    ├── spectralnorm2_multicore.ml
    ├── sum_par.ml
    ├── summed_area_table.ml
    ├── task_more_deps.ml
    ├── task_one_dep.ml
    ├── task_parallel.ml
    ├── task_throughput.ml
    ├── test_chan.ml
    ├── test_deadlock.ml
    ├── test_parallel_find.ml
    ├── test_parallel_scan.ml
    ├── test_task.ml
    ├── test_task_crash.ml
    └── test_task_empty.ml


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: main
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |   schedule:
 7 |     # Prime the caches every Monday
 8 |     - cron: 0 1 * * MON
 9 | 
10 | jobs:
11 |   windows:
12 |     runs-on: windows-latest
13 | 
14 |     env:
15 |       QCHECK_MSG_INTERVAL: '60'
16 | 
17 |     steps:
18 |       - name: Checkout code
19 |         uses: actions/checkout@v2
20 | 
21 |       - name: Use OCaml ${{ matrix.ocaml-compiler }}
22 |         uses: ocaml/setup-ocaml@v2
23 |         with:
24 |           opam-pin: false
25 |           opam-depext: false
26 |           ocaml-compiler: ocaml.5.0.0,ocaml-option-mingw
27 |           opam-repositories: |
28 |             dra27: https://github.com/dra27/opam-repository.git#windows-5.0
29 |             default: https://github.com/ocaml-opam/opam-repository-mingw.git#sunset
30 |             upstream: https://github.com/ocaml/opam-repository.git
31 |           cache-prefix: ${{ steps.multicore_hash.outputs.commit }}
32 | 
33 |       - run: opam install . --deps-only --with-test
34 | 
35 |       - run: opam exec -- dune build
36 | 
37 |       - run: opam exec -- dune runtest


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | _build
3 | .merlin
4 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
 1 | ## 0.5.2
 2 | 
 3 | * Upgrade to Saturn 1.0 (#129, @Sudha247)
 4 | * Update README.md instruction to use OCaml 5.1.0 (#123, @punchagan)
 5 | * Fix Saturn.Queue function (#121, @Sudha247)
 6 | * Make parallel_scan work on noncommutative functions (#118, @aytao)
 7 | * Test condition tweaks (#113, @jmid)
 8 | * Adjust PBTs based on recommended_domain_count (#112, @jmid)
 9 | 
10 | ## 0.5.1
11 | 
12 | * Add parallel_find (#90, @gasche)
13 | * Update CI (#93, @Sudha247)
14 | * Optimisation to work-stealing (#96, @art-w)
15 | * Improve docs presentation (#99, @metanivek)
16 | * Property based tests (#100, jmid)
17 | * Task: avoid double handler installation (#101, @gasche & @clef-men)
18 | * Fix a benign data-race in Chan reported by ocaml-tsan (#103, @art-w)
19 | * Dune, opam, and GitHub Actions fixes (#105, @MisterDA)
20 | * domain local await support (#107, @polytypic)
21 | * Windows run on GitHub Actions (#110, @Sudha247)
22 | * Adjust PBTs based on recommended_domain_count (#112, @jmid)
23 | * Test condition tweaks (#113, @jmid)
24 | 
25 | ## 0.5.0
26 | 
27 | This release includes:
28 | 
29 | * Bug fix for `parallel_for_reduce` on empty loops.
30 | * Make Chan.t and Task.promise injective #69
31 | * Add lockfree dependency #70
32 | * CI fixes (#73, #76)
33 | * Breaking change: Rename `num_additional_domains` to `num_domains` for setup_pool
34 | * Documentation updates (#80, #81, #82)
35 | 
36 | ## 0.4.2
37 | 
38 | Includes Effect.eff -> Effect.t change from OCaml trunk. (#65)
39 | 
40 | ## 0.4.1
41 | 
42 | This release fixes compatibility with OCaml 5.00.0+trunk in #61. Breaks compatibility with older Multicore variants 4.12.0+domains and 4.12.0+domains+effects
43 | 
44 | ## 0.4.0
45 | 
46 | This release includes:
47 | 
48 | * Usage of effect handlers for task creation. This introduces a breaking change; all computations need to be enclosed in a Task.run function. See #51.
49 | * Multi_channel uses a per-channel domain-local key, removing the global key. #50
50 | * Bug fixes in parallel_scan. #60
51 | 
52 | ## 0.3.2
53 | 
54 | Corresponding updates for breaking changes introduced in ocaml-multicore/ocaml-multicore#704
55 | 
56 | * Updated with the new interface Domain.cpu_relax
57 | * Domain.timer_ticks replaced with Mirage clock.
58 | 
59 | ## 0.3.1
60 | 
61 | * #45 adds support for named pools. This is a breaking change with setup_pool taking an optional name parameter and an extra unit parameter.
62 | * A minor bug fix in parallel_for_reduce.
63 | 
64 | ## 0.3.0
65 | 
66 | This release includes:
67 | 
68 | * A breaking change for Task pools where the num_domains argument has been renamed num_additional_domains to clear up potential confusion; see #31.
69 | * A new work-stealing scheduler for Task pools using domain local Chase Lev deques #29; this can improve performance significantly for some workloads.
70 | * A removal of closure allocation in Chan #28.
71 | * A move to using the Mutex & Condition modules for the implementation of Chan #24.
72 | * Various documentation and packaging improvements (#21, #27, #30, #32).
73 | 
74 | ## 0.2.2
75 | 
76 | Updates to:
77 | 
78 | * parallel_for to use new task distribution algorithm and allow default chunk_size (#16)
79 | * parallel_for_reduce to use new task distribution algorithm and allow default chunk_size parameter (#18)
80 | 
81 | ## 0.2.1
82 | 
83 | * `recv_poll` made non-allocating
84 | * Addition of parallel_scan #5
85 | 
86 | ## 0.2.0
87 | 
88 | * New Tasks library with support for async/await parallelism and parallel for loops.
89 | * Adds support for non-blocking Chan.send_poll and Chan.recv_poll.
90 | 
91 | Thanks to @gasche for API design discussions.
92 | 
93 | ## 0.1.0
94 | 
95 | Initial release


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | This project has adopted the [OCaml Code of Conduct](https://github.com/ocaml/code-of-conduct/blob/main/CODE_OF_CONDUCT.md).
 4 | 
 5 | # Enforcement
 6 | 
 7 | This project follows the OCaml Code of Conduct
 8 | [enforcement policy](https://github.com/ocaml/code-of-conduct/blob/main/CODE_OF_CONDUCT.md#enforcement).
 9 | 
10 | To report any violations, please contact:
11 | 
12 | * KC Sivaramakrishnan <kc [at] tarides [dot] com>
13 | * Sudha Parimala <sudha [at] tarides [dot] com>
14 | * Vesa Karvonen <vesa [at] tarides [dot] com>
15 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016 KC Sivaramakrishnan
 2 | 
 3 | Permission to use, copy, modify, and/or distribute this software for any
 4 | purpose with or without fee is hereby granted, provided that the above
 5 | copyright notice and this permission notice appear in all copies.
 6 | 
 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	dune build @install
3 | 
4 | run_test:
5 | 	OCAMLRUNPARAM="b=1" dune runtest -f
6 | 
7 | clean:
8 | 	dune clean
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Domainslib - Nested-parallel programming
  2 | 
  3 | Domainslib provides support for nested-parallel programming. Domainslib provides async/await mechanism for spawning parallel tasks and awaiting their results. On top of this mechanism, domainslib provides parallel iteration functions. At its core, domainslib has an efficient implementation of work-stealing queue in order to efficiently share tasks with other domains.
  4 | 
  5 | Here is a _sequential_ program that computes nth Fibonacci number using recursion:
  6 | 
  7 | ```ocaml
  8 | (* fib.ml *)
  9 | let n = try int_of_string Sys.argv.(1) with _ -> 1
 10 | 
 11 | let rec fib n = if n < 2 then 1 else fib (n - 1) + fib (n - 2)
 12 | 
 13 | let main () =
 14 |   let r = fib n in
 15 |   Printf.printf "fib(%d) = %d\n%!" n r
 16 | 
 17 | let _ = main ()
 18 | ```
 19 | 
 20 | We can parallelise this program using Domainslib:
 21 | 
 22 | ```ocaml
 23 | (* fib_par.ml *)
 24 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1
 25 | let n = try int_of_string Sys.argv.(2) with _ -> 1
 26 | 
 27 | (* Sequential Fibonacci *)
 28 | let rec fib n = 
 29 |   if n < 2 then 1 else fib (n - 1) + fib (n - 2)
 30 | 
 31 | module T = Domainslib.Task
 32 | 
 33 | let rec fib_par pool n =
 34 |   if n > 20 then begin
 35 |     let a = T.async pool (fun _ -> fib_par pool (n-1)) in
 36 |     let b = T.async pool (fun _ -> fib_par pool (n-2)) in
 37 |     T.await pool a + T.await pool b
 38 |   end else 
 39 |     (* Call sequential Fibonacci if the available work is small *)
 40 |     fib n
 41 | 
 42 | let main () =
 43 |   let pool = T.setup_pool ~num_domains:(num_domains - 1) () in
 44 |   let res = T.run pool (fun _ -> fib_par pool n) in
 45 |   T.teardown_pool pool;
 46 |   Printf.printf "fib(%d) = %d\n" n res
 47 | 
 48 | let _ = main ()
 49 | ```
 50 | 
 51 | The parallel program scales nicely compared to the sequential version. The results presented below were obtained on a 2.3 GHz Quad-Core Intel Core i7 MacBook Pro with 4 cores and 8 hardware threads.
 52 | 
 53 | ```bash
 54 | $ hyperfine './fib.exe 42' './fib_par.exe 2 42' \
 55 |             './fib_par.exe 4 42' './fib_par.exe 8 42'
 56 | Benchmark 1: ./fib.exe 42
 57 |   Time (mean ± sd):     1.217 s ±  0.018 s    [User: 1.203 s, System: 0.004 s]
 58 |   Range (min … max):    1.202 s …  1.261 s    10 runs
 59 | 
 60 | Benchmark 2: ./fib_par.exe 2 42
 61 |   Time (mean ± sd):    628.2 ms ±   2.9 ms    [User: 1243.1 ms, System: 4.9 ms]
 62 |   Range (min … max):   625.7 ms … 634.5 ms    10 runs
 63 | 
 64 | Benchmark 3: ./fib_par.exe 4 42
 65 |   Time (mean ± sd):    337.6 ms ±  23.4 ms    [User: 1321.8 ms, System: 8.4 ms]
 66 |   Range (min … max):   318.5 ms … 377.6 ms    10 runs
 67 | 
 68 | Benchmark 4: ./fib_par.exe 8 42
 69 |   Time (mean ± sd):    250.0 ms ±   9.4 ms    [User: 1877.1 ms, System: 12.6 ms]
 70 |   Range (min … max):   242.5 ms … 277.3 ms    11 runs
 71 | 
 72 | Summary
 73 |   './fib_par2.exe 8 42' ran
 74 |     1.35 ± 0.11 times faster than './fib_par.exe 4 42'
 75 |     2.51 ± 0.10 times faster than './fib_par.exe 2 42'
 76 |     4.87 ± 0.20 times faster than './fib.exe 42'
 77 | ```
 78 | 
 79 | More example programs are available [here](https://github.com/ocaml-multicore/domainslib/tree/master/test).
 80 | 
 81 | ## Installation
 82 | 
 83 | You can install this library using `OPAM`. 
 84 | 
 85 | ```bash
 86 | $ opam switch create 5.3.0
 87 | $ opam install domainslib
 88 | ```
 89 | 
 90 | ## Development
 91 | 
 92 | If you are interested in hacking on the implementation, then `opam pin` this repository:
 93 | 
 94 | ```bash
 95 | $ opam switch create 5.0.0+trunk --repo=default,alpha=git+https://github.com/kit-ty-kate/opam-alpha-repository.git
 96 | $ git clone https://github.com/ocaml-multicore/domainslib
 97 | $ cd domainslib
 98 | $ opam pin add domainslib file://`pwd`
 99 | ```
100 | 


--------------------------------------------------------------------------------
/domainslib.opam:
--------------------------------------------------------------------------------
 1 | # This file is generated by dune, edit dune-project instead
 2 | opam-version: "2.0"
 3 | synopsis: "Parallel Structures over Domains for Multicore OCaml"
 4 | maintainer: ["KC Sivaramakrishnan <kc@kcsrk.info>" "Sudha Parimala"]
 5 | authors: ["KC Sivaramakrishnan <kc@kcsrk.info>"]
 6 | license: "ISC"
 7 | homepage: "https://github.com/ocaml-multicore/domainslib"
 8 | doc: "https://ocaml-multicore.github.io/domainslib/doc"
 9 | bug-reports: "https://github.com/ocaml-multicore/domainslib/issues"
10 | depends: [
11 |   "dune" {>= "3.0"}
12 |   "ocaml" {>= "5.0"}
13 |   "saturn" {>= "1.0.0"}
14 |   "domain-local-await" {>= "0.1.0"}
15 |   "kcas" {>= "0.3.0" & with-test}
16 |   "mirage-clock-unix" {with-test & >= "4.2.0"}
17 |   "qcheck-core" {with-test & >= "0.20"}
18 |   "qcheck-multicoretests-util" {with-test & >= "0.1"}
19 |   "qcheck-stm" {with-test & >= "0.1"}
20 |   "odoc" {with-doc}
21 | ]
22 | build: [
23 |   ["dune" "subst"] {dev}
24 |   [
25 |     "dune"
26 |     "build"
27 |     "-p"
28 |     name
29 |     "-j"
30 |     jobs
31 |     "@install"
32 |     "@runtest" {with-test}
33 |     "@doc" {with-doc}
34 |   ]
35 | ]
36 | dev-repo: "git+https://github.com/ocaml-multicore/domainslib.git"
37 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
 1 | (lang dune 3.0)
 2 | (name domainslib)
 3 | (formatting disabled)
 4 | (generate_opam_files true)
 5 | 
 6 | (source (github ocaml-multicore/domainslib))
 7 | (authors "KC Sivaramakrishnan <kc@kcsrk.info>")
 8 | (maintainers "KC Sivaramakrishnan <kc@kcsrk.info>" "Sudha Parimala")
 9 | (documentation "https://ocaml-multicore.github.io/domainslib/doc")
10 | (license "ISC")
11 | 
12 | (package
13 |  (name domainslib)
14 |  (synopsis "Parallel Structures over Domains for Multicore OCaml")
15 |  (depends
16 |   (ocaml (>= "5.0"))
17 |   (saturn (>= "0.4.0"))
18 |   (domain-local-await (>= 0.1.0))
19 |   (kcas (and (>= 0.3.0) :with-test))
20 |   (mirage-clock-unix (and :with-test (>= "4.2.0")))
21 |   (qcheck-core (and :with-test (>= "0.20")))
22 |   (qcheck-multicoretests-util (and :with-test (>= "0.1")))
23 |   (qcheck-stm (and :with-test (>= "0.1")))))
24 | 


--------------------------------------------------------------------------------
/lib/chan.ml:
--------------------------------------------------------------------------------
  1 | (* mutex_condvar will be used per domain; so multiple fibers or
  2 |    systhreads may share a mutex_condvar variable *)
  3 | type mutex_condvar = {
  4 |   mutex: Mutex.t;
  5 |   condition: Condition.t
  6 | }
  7 | 
  8 | type waiting_notified =
  9 |   | Waiting
 10 |   | Notified
 11 | 
 12 | type 'a contents =
 13 |   | Empty of {receivers: ('a option ref * mutex_condvar) Fun_queue.t}
 14 |   | NotEmpty of {senders: ('a * waiting_notified ref * mutex_condvar) Fun_queue.t; messages: 'a Fun_queue.t}
 15 | 
 16 | type 'a t = {
 17 |   buffer_size: int option;
 18 |   contents: 'a contents Atomic.t
 19 | }
 20 | 
 21 | let mutex_condvar_key =
 22 |   Domain.DLS.new_key (fun () ->
 23 |     let m = Mutex.create () in
 24 |     let c = Condition.create () in
 25 |     {mutex=m; condition=c})
 26 | 
 27 | let make_bounded n =
 28 |   if n < 0 then invalid_arg "Chan.make_bounded" ;
 29 |   {buffer_size= Some n;
 30 |    contents = Atomic.make (Empty {receivers= Fun_queue.empty; })}
 31 | 
 32 | let make_unbounded () =
 33 |   {buffer_size= None;
 34 |    contents = Atomic.make (Empty {receivers= Fun_queue.empty})}
 35 | 
 36 | (* [send'] is shared by both the blocking and polling versions. Returns a
 37 |  * boolean indicating whether the send was successful. Hence, it always returns
 38 |  * [true] if [polling] is [false]. *)
 39 | let rec send' {buffer_size; contents} v ~polling =
 40 |   let open Fun_queue in
 41 |   let old_contents = Atomic.get contents in
 42 |   match old_contents with
 43 |   | Empty {receivers} -> begin
 44 |     (* The channel is empty (no senders) *)
 45 |     match pop receivers with
 46 |     | None ->
 47 |         (* The channel is empty (no senders) and no waiting receivers *)
 48 |         if buffer_size = Some 0 then
 49 |           (* The channel is empty (no senders), no waiting receivers, and
 50 |             * buffer size is 0 *)
 51 |           begin if not polling then begin
 52 |             (* The channel is empty (no senders), no waiting receivers,
 53 |               * buffer size is 0 and we're not polling *)
 54 |             let mc = Domain.DLS.get mutex_condvar_key in
 55 |             let cond_slot = ref Waiting in
 56 |             let new_contents =
 57 |               NotEmpty
 58 |                 {messages= empty; senders= push empty (v, cond_slot, mc)}
 59 |             in
 60 |             if Atomic.compare_and_set contents old_contents new_contents
 61 |             then begin
 62 |               Mutex.lock mc.mutex;
 63 |               while !cond_slot = Waiting do
 64 |                 Condition.wait mc.condition mc.mutex
 65 |               done;
 66 |               Mutex.unlock mc.mutex;
 67 |               true
 68 |             end else send' {buffer_size; contents} v ~polling
 69 |           end else
 70 |             (* The channel is empty (no senders), no waiting receivers,
 71 |               * buffer size is 0 and we're polling *)
 72 |             false
 73 |           end
 74 |         else
 75 |           (* The channel is empty (no senders), no waiting receivers, and
 76 |             * the buffer size is non-zero *)
 77 |           let new_contents =
 78 |             NotEmpty {messages= push empty v; senders= empty}
 79 |           in
 80 |           if Atomic.compare_and_set contents old_contents new_contents
 81 |           then true
 82 |           else send' {buffer_size; contents} v ~polling
 83 |     | Some ((r, mc), receivers') ->
 84 |         (* The channel is empty (no senders) and there are waiting receivers
 85 |          * *)
 86 |         let new_contents = Empty {receivers= receivers'} in
 87 |         if Atomic.compare_and_set contents old_contents new_contents
 88 |         then begin
 89 |           Mutex.lock mc.mutex;
 90 |           r := Some v;
 91 |           Mutex.unlock mc.mutex;
 92 |           Condition.broadcast mc.condition;
 93 |           true
 94 |          end else send' {buffer_size; contents} v ~polling
 95 |   end
 96 |   | NotEmpty {senders; messages} ->
 97 |       (* The channel is not empty *)
 98 |       if buffer_size = Some (length messages) then
 99 |         (* The channel is not empty, and the buffer is full *)
100 |         begin if not polling then
101 |           (* The channel is not empty, the buffer is full and we're not
102 |             * polling *)
103 |           let cond_slot = ref Waiting in
104 |           let mc = Domain.DLS.get mutex_condvar_key in
105 |           let new_contents =
106 |             NotEmpty {senders= push senders (v, cond_slot, mc); messages}
107 |           in
108 |           if Atomic.compare_and_set contents old_contents new_contents then begin
109 |             Mutex.lock mc.mutex;
110 |             while !cond_slot = Waiting do
111 |               Condition.wait mc.condition mc.mutex;
112 |             done;
113 |             Mutex.unlock mc.mutex;
114 |             true
115 |           end else send' {buffer_size; contents} v ~polling
116 |         else
117 |           (* The channel is not empty, the buffer is full and we're
118 |             * polling *)
119 |           false
120 |         end
121 |       else
122 |         (* The channel is not empty, and the buffer is not full *)
123 |         let new_contents =
124 |           NotEmpty {messages= push messages v; senders}
125 |         in
126 |         if Atomic.compare_and_set contents old_contents new_contents
127 |         then true
128 |         else send' {buffer_size; contents} v ~polling
129 | 
130 | let send c v =
131 |   let r = send' c v ~polling:false in
132 |   assert r
133 | 
134 | let send_poll c v = send' c v ~polling:true
135 | 
136 | (* [recv'] is shared by both the blocking and polling versions. Returns a an
137 |  * optional value indicating whether the receive was successful. Hence, it
138 |  * always returns [Some v] if [polling] is [false]. *)
139 | let rec recv' {buffer_size; contents} ~polling =
140 |   let open Fun_queue in
141 |   let old_contents = Atomic.get contents in
142 |   match old_contents with
143 |   | Empty {receivers} ->
144 |       (* The channel is empty (no senders) *)
145 |       if not polling then begin
146 |         (* The channel is empty (no senders), and we're not polling *)
147 |         let msg_slot = ref None in
148 |         let mc = Domain.DLS.get mutex_condvar_key in
149 |         let new_contents =
150 |           Empty {receivers= push receivers (msg_slot, mc)}
151 |         in
152 |         if Atomic.compare_and_set contents old_contents new_contents then
153 |         begin
154 |           Mutex.lock mc.mutex;
155 |           while !msg_slot = None do
156 |             Condition.wait mc.condition mc.mutex;
157 |           done;
158 |           Mutex.unlock mc.mutex;
159 |           !msg_slot
160 |         end else recv' {buffer_size; contents} ~polling
161 |       end else
162 |         (* The channel is empty (no senders), and we're polling *)
163 |         None
164 |   | NotEmpty {senders; messages} ->
165 |       (* The channel is not empty *)
166 |       match (pop messages, pop senders) with
167 |       | None, None ->
168 |           (* The channel is not empty, but no senders or messages *)
169 |           failwith "Chan.recv: Impossible - channel state"
170 |       | Some (m, messages'), None ->
171 |           (* The channel is not empty, there is a message and no
172 |             * waiting senders *)
173 |           let new_contents =
174 |             if length messages' = 0 then
175 |               Empty {receivers = empty}
176 |             else
177 |               NotEmpty {messages= messages'; senders}
178 |           in
179 |           if Atomic.compare_and_set contents old_contents new_contents
180 |           then Some m
181 |           else recv' {buffer_size; contents} ~polling
182 |       | None, Some ((m, c, mc), senders') ->
183 |           (* The channel is not empty, there are no messages, and there
184 |             * is a waiting sender. This is only possible is the buffer
185 |             * size is 0. *)
186 |           assert (buffer_size = Some 0) ;
187 |           let new_contents =
188 |             if length senders' = 0 then
189 |               Empty {receivers = empty}
190 |             else
191 |               NotEmpty {messages; senders= senders'}
192 |           in
193 |           if Atomic.compare_and_set contents old_contents new_contents
194 |           then begin
195 |             Mutex.lock mc.mutex;
196 |             c := Notified;
197 |             Mutex.unlock mc.mutex;
198 |             Condition.broadcast mc.condition;
199 |             Some m
200 |           end else recv' {buffer_size; contents} ~polling
201 |       | Some (m, messages'), Some ((ms, sc, mc), senders') ->
202 |           (* The channel is not empty, there is a message, and there is a
203 |             * waiting sender. *)
204 |           let new_contents =
205 |             NotEmpty {messages= push messages' ms; senders= senders'}
206 |           in
207 |           if Atomic.compare_and_set contents old_contents new_contents
208 |           then begin
209 |             Mutex.lock mc.mutex;
210 |             sc := Notified;
211 |             Mutex.unlock mc.mutex;
212 |             Condition.broadcast mc.condition;
213 |             Some m
214 |           end else recv' {buffer_size; contents} ~polling
215 | 
216 | let recv c =
217 |   match recv' c ~polling:false with
218 |   | None -> failwith "Chan.recv: impossible - no message"
219 |   | Some m -> m
220 | 
221 | let recv_poll c =
222 |   match Atomic.get c.contents with
223 |   | Empty _ -> None
224 |   | _ -> recv' c ~polling:true
225 | 


--------------------------------------------------------------------------------
/lib/chan.mli:
--------------------------------------------------------------------------------
 1 | type !'a t
 2 | (** The type of channels *)
 3 | 
 4 | val make_bounded : int -> 'a t
 5 | (** [make_bounded n] makes a bounded channel with a buffer of size [n]. Raises
 6 |     [Invalid_argument "Chan.make_bounded"] if the buffer size is less than 0.
 7 | 
 8 |     With a buffer size of 0, the send operation becomes synchronous. With a
 9 |     buffer size of 1, you get the familiar MVar structure. The channel may be
10 |     shared between many sending and receiving domains. *)
11 | 
12 | val make_unbounded : unit -> 'a t
13 | (** Returns an unbounded channel *)
14 | 
15 | val send : 'a t -> 'a -> unit
16 | (** [send c v] sends the values [v] over the channel [c]. If the channel buffer
17 |     is full then the sending domain blocks until space becomes available. *)
18 | 
19 | val send_poll : 'a t -> 'a -> bool
20 | (** [send_poll c v] attempts to send the value [v] over the channel [c]. If the
21 |     channel buffer is not full, the message is sent and returns [true]. Otherwise,
22 |     returns [false]. *)
23 | 
24 | val recv : 'a t -> 'a
25 | (** [recv c] returns a value [v] received over the channel. If the channel
26 |     buffer is empty then the domain blocks until a message is sent on the
27 |     channel. *)
28 | 
29 | val recv_poll : 'a t -> 'a option
30 | (** [recv_poll c] attempts to receive a message on the channel [c]. If a
31 |     message [v] is available on the channel then [Some v] is returned.
32 |     Otherwise, returns [None]. *)
33 | 


--------------------------------------------------------------------------------
/lib/domainslib.ml:
--------------------------------------------------------------------------------
1 | module Chan = Chan
2 | module Task = Task
3 | 


--------------------------------------------------------------------------------
/lib/dune:
--------------------------------------------------------------------------------
1 | (library
2 |  (name domainslib)
3 |  (public_name domainslib)
4 |  (libraries saturn domain-local-await))
5 | 


--------------------------------------------------------------------------------
/lib/fun_queue.ml:
--------------------------------------------------------------------------------
 1 | type 'a t = {length: int; front: 'a list; back: 'a list}
 2 | 
 3 | let empty = {length= 0; front= []; back= []}
 4 | 
 5 | let push {length; front; back} v = {length= length + 1; front; back= v :: back}
 6 | 
 7 | let length {length; _} = length
 8 | 
 9 | let pop {length; front; back} =
10 |   match front with
11 |   | [] -> (
12 |     match List.rev back with
13 |     | [] ->
14 |         None
15 |     | x :: xs ->
16 |         Some (x, {front= xs; length= length - 1; back= []}) )
17 |   | x :: xs ->
18 |       Some (x, {front= xs; length= length - 1; back})
19 | 


--------------------------------------------------------------------------------
/lib/fun_queue.mli:
--------------------------------------------------------------------------------
 1 | type 'a t
 2 | (** The type of functional queue *)
 3 | 
 4 | val empty : 'a t
 5 | (** Empty queue *)
 6 | 
 7 | val length : 'a t -> int
 8 | (** Returns the length of the queue *)
 9 | 
10 | val push : 'a t -> 'a -> 'a t
11 | (** [push q v] returns a new queue with [v] pushed to the back of [q] *)
12 | 
13 | val pop : 'a t -> ('a * 'a t) option
14 | (** [pop q] returns [None] if the queue is empty. If the queue is non-empty, it
15 |     returns [Some (v,q')] where [v] is the element popped from the head of [q]
16 |     and [q'] is the rest of the queue. *)
17 | 


--------------------------------------------------------------------------------
/lib/multi_channel.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Copyright (c) 2021, Tom Kelly <ctk21@cl.cam.ac.uk>
  3 |  *
  4 |  * Permission to use, copy, modify, and/or distribute this software for any
  5 |  * purpose with or without fee is hereby granted, provided that the above
  6 |  * copyright notice and this permission notice appear in all copies.
  7 |  *
  8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15 |  *)
 16 | 
 17 | module Ws_deque = Saturn.Work_stealing_deque
 18 | 
 19 | type mutex_condvar = {
 20 |   mutex: Mutex.t;
 21 |   condition: Condition.t
 22 | }
 23 | 
 24 | type waiting_status =
 25 |   | Waiting
 26 |   | Released
 27 | 
 28 | type dls_state = {
 29 |   mutable id: int;
 30 |   mutable steal_offsets: int array;
 31 |   rng_state: Random.State.t;
 32 |   mc: mutex_condvar;
 33 | }
 34 | 
 35 | module Foreign_queue = Saturn.Queue
 36 | 
 37 | type 'a t = {
 38 |   channels: 'a Ws_deque.t array;
 39 |   (* Queue for enqueuing work from outside of the pool. *)
 40 |   foreign_queue: 'a Foreign_queue.t;
 41 |   waiters: (waiting_status ref * mutex_condvar ) Chan.t;
 42 |   next_domain_id: int Atomic.t;
 43 |   recv_block_spins: int;
 44 |   dls_key: dls_state Domain.DLS.key;
 45 | }
 46 | 
 47 | let dls_make_key () =
 48 |   Domain.DLS.new_key (fun () ->
 49 |     {
 50 |       id = -1;
 51 |       steal_offsets = Array.make 1 0;
 52 |       rng_state = Random.State.make_self_init ();
 53 |       mc = {mutex=Mutex.create (); condition=Condition.create ()};
 54 |     })
 55 | 
 56 | let rec log2 n =
 57 |   if n <= 1 then 0 else 1 + (log2 (n asr 1))
 58 | 
 59 | let make ?(recv_block_spins = 2048) n =
 60 |   { channels = Array.init n (fun _ -> Ws_deque.create ());
 61 |     foreign_queue = Foreign_queue.create ();
 62 |     waiters = Chan.make_unbounded ();
 63 |     next_domain_id = Atomic.make 0;
 64 |     recv_block_spins;
 65 |     dls_key = dls_make_key ()
 66 |     }
 67 | 
 68 | let register_domain mchan =
 69 |   let id = Atomic.fetch_and_add mchan.next_domain_id 1 in
 70 |   assert(id < Array.length mchan.channels);
 71 |   id
 72 | 
 73 | let init_domain_state mchan dls_state =
 74 |   let id = register_domain mchan in
 75 |   let len = Array.length mchan.channels in
 76 |   dls_state.id <- id;
 77 |   dls_state.steal_offsets <- Array.init (len - 1) (fun i -> (id + i + 1) mod len);
 78 |   dls_state
 79 |   [@@inline never]
 80 | 
 81 | let get_local_state mchan =
 82 |   let dls_state = Domain.DLS.get mchan.dls_key in
 83 |   if dls_state.id >= 0 then begin
 84 |     assert (dls_state.id < Array.length mchan.channels);
 85 |     dls_state
 86 |   end
 87 |   else (init_domain_state mchan dls_state)
 88 |   [@@inline]
 89 | 
 90 | let clear_local_state mchan =
 91 |   let dls_state = Domain.DLS.get mchan.dls_key in
 92 |   dls_state.id <- (-1)
 93 | 
 94 | let rec check_waiters mchan =
 95 |   match Chan.recv_poll mchan.waiters with
 96 |     | None -> ()
 97 |     | Some (status, mc) ->
 98 |       (* avoid the lock if we possibly can *)
 99 |       if !status = Released then check_waiters mchan
100 |       else begin
101 |         Mutex.lock mc.mutex;
102 |         match !status with
103 |         | Waiting ->
104 |           begin
105 |             status := Released;
106 |             Mutex.unlock mc.mutex;
107 |             Condition.broadcast mc.condition
108 |           end
109 |         | Released ->
110 |           begin
111 |             (* this waiter is already released, it might have found something on a poll *)
112 |             Mutex.unlock mc.mutex;
113 |             check_waiters mchan
114 |           end
115 |       end
116 | 
117 | let send_foreign mchan v =
118 |   Foreign_queue.push mchan.foreign_queue v;
119 |   check_waiters mchan
120 | 
121 | let send mchan v =
122 |   let id = (get_local_state mchan).id in
123 |   Ws_deque.push (Array.unsafe_get mchan.channels id) v;
124 |   check_waiters mchan
125 | 
126 | let rec recv_poll_loop mchan dls cur_offset =
127 |   let offsets = dls.steal_offsets in
128 |   let k = (Array.length offsets) - cur_offset in
129 |   if k = 0 then raise Exit
130 |   else begin
131 |     let idx = cur_offset + (Random.State.int dls.rng_state k) in
132 |     let t = Array.unsafe_get offsets idx in
133 |     let channel = Array.unsafe_get mchan.channels t in
134 |     try
135 |       Ws_deque.steal_exn channel
136 |     with
137 |       | Saturn.Work_stealing_deque.Empty ->
138 |         begin
139 |           Array.unsafe_set offsets idx (Array.unsafe_get offsets cur_offset);
140 |           Array.unsafe_set offsets cur_offset t;
141 |           recv_poll_loop mchan dls (cur_offset+1)
142 |         end
143 |   end
144 | 
145 | let recv_poll_with_dls mchan dls =
146 |   try
147 |     Ws_deque.pop_exn (Array.unsafe_get mchan.channels dls.id)
148 |   with
149 |     | Saturn.Work_stealing_deque.Empty ->
150 |       match Foreign_queue.pop_opt mchan.foreign_queue with
151 |       | None -> recv_poll_loop mchan dls 0
152 |       | Some v -> v
153 |   [@@inline]
154 | 
155 | let recv_poll mchan =
156 |   recv_poll_with_dls mchan (get_local_state mchan)
157 | 
158 | let rec recv_poll_repeated mchan dls repeats =
159 |   try
160 |     recv_poll_with_dls mchan dls
161 |   with
162 |     | Exit ->
163 |       if repeats = 1 then raise Exit
164 |       else begin
165 |         Domain.cpu_relax ();
166 |         recv_poll_repeated mchan dls (repeats - 1)
167 |       end
168 | 
169 | let rec recv mchan =
170 |   let dls = get_local_state mchan in
171 |   try
172 |     recv_poll_repeated mchan dls mchan.recv_block_spins
173 |   with
174 |     Exit ->
175 |       begin
176 |         (* Didn't find anything, prepare to block:
177 |          *  - enqueue our wait block in the waiter queue
178 |          *  - check the queue again
179 |          *  - go to sleep if our wait block has not been notified
180 |          *  - when notified retry the recieve
181 |          *)
182 |         let status = ref Waiting in
183 |         let mc = dls.mc in
184 |         Chan.send mchan.waiters (status, mc);
185 |         try
186 |           let v = recv_poll mchan in
187 |           (* need to check the status as might take an item
188 |             which is not the one an existing sender has woken us
189 |             to take *)
190 |           Mutex.lock mc.mutex;
191 |           begin match !status with
192 |           | Waiting -> (status := Released; Mutex.unlock mc.mutex)
193 |           | Released ->
194 |             (* we were simultaneously released from a sender;
195 |               so need to release a waiter *)
196 |             (Mutex.unlock mc.mutex; check_waiters mchan)
197 |           end;
198 |           v
199 |         with
200 |           | Exit ->
201 |             if !status = Waiting then begin
202 |                Mutex.lock mc.mutex;
203 |                while !status = Waiting do
204 |                  Condition.wait mc.condition mc.mutex
205 |                done;
206 |                Mutex.unlock mc.mutex
207 |             end;
208 |             recv mchan
209 |       end
210 | 


--------------------------------------------------------------------------------
/lib/task.ml:
--------------------------------------------------------------------------------
  1 | open Effect
  2 | open Effect.Deep
  3 | 
  4 | type 'a task = unit -> 'a
  5 | 
  6 | type message =
  7 | | Work of (unit -> unit)
  8 |   (* Invariant: the Work function does not need to run under the 'step' handler,
  9 |      it installs its own handler or re-invokes a deep-handler continuation. *)
 10 | | Quit
 11 | 
 12 | type task_chan = message Multi_channel.t
 13 | 
 14 | type pool_data = {
 15 |   domains : unit Domain.t array;
 16 |   task_chan : task_chan;
 17 |   name: string option
 18 | }
 19 | 
 20 | type pool = pool_data option Atomic.t
 21 | 
 22 | type 'a promise_state =
 23 |   Returned of 'a
 24 | | Raised of exn * Printexc.raw_backtrace
 25 | | Pending of (('a, unit) continuation * task_chan) list
 26 | 
 27 | type 'a promise = 'a promise_state Atomic.t
 28 | 
 29 | type _ t += Wait : 'a promise * task_chan -> 'a t
 30 | 
 31 | let get_pool_data p =
 32 |   match Atomic.get p with
 33 |   | None -> invalid_arg "pool already torn down"
 34 |   | Some p -> p
 35 | 
 36 | let cont v (k, c) = Multi_channel.send c (Work (fun _ -> continue k v))
 37 | let discont e bt (k, c) = Multi_channel.send c (Work (fun _ ->
 38 |   discontinue_with_backtrace k e bt))
 39 | 
 40 | let do_task (type a) (f : unit -> a) (p : a promise) : unit =
 41 |   let action, result =
 42 |     try
 43 |       let v = f () in
 44 |       cont v, Returned v
 45 |     with e ->
 46 |       let bt = Printexc.get_raw_backtrace () in
 47 |       discont e bt, Raised (e, bt)
 48 |   in
 49 |   match Atomic.exchange p result with
 50 |   | Pending l -> List.iter action l
 51 |   |  _ -> failwith "Task.do_task: impossible, can only set result of task once"
 52 | 
 53 | let await pool promise =
 54 |   let pd = get_pool_data pool in
 55 |   match Atomic.get promise with
 56 |   | Returned v -> v
 57 |   | Raised (e, bt) -> Printexc.raise_with_backtrace e bt
 58 |   | Pending _ -> perform (Wait (promise, pd.task_chan))
 59 | 
 60 | let step (type a) (f : a -> unit) (v : a) : unit =
 61 |   try_with f v
 62 |   { effc = fun (type a) (e : a t) ->
 63 |       match e with
 64 |       | Wait (p,c) -> Some (fun (k : (a, _) continuation) ->
 65 |           let rec loop () =
 66 |             let old = Atomic.get p in
 67 |             match old with
 68 |             | Pending l ->
 69 |                 if Atomic.compare_and_set p old (Pending ((k,c)::l)) then ()
 70 |                 else (Domain.cpu_relax (); loop ())
 71 |             | Returned v -> continue k v
 72 |             | Raised (e,bt) -> discontinue_with_backtrace k e bt
 73 |           in
 74 |           loop ())
 75 |       | _ -> None }
 76 | 
 77 | let async pool f =
 78 |   let pd = get_pool_data pool in
 79 |   let p = Atomic.make (Pending []) in
 80 |   Multi_channel.send pd.task_chan (Work (fun _ -> step (do_task f) p));
 81 |   p
 82 | 
 83 | let prepare_for_await chan () =
 84 |   let promise = Atomic.make (Pending []) in
 85 |   let release () =
 86 |     match Atomic.get promise with
 87 |     | (Returned _ | Raised _) -> ()
 88 |     | Pending _ ->
 89 |       match Atomic.exchange promise (Returned ()) with
 90 |       | Pending ks ->
 91 |         ks
 92 |         |> List.iter @@ fun (k, c) ->
 93 |            Multi_channel.send_foreign c (Work (fun _ -> continue k ()))
 94 |       | _ -> ()
 95 |   and await () =
 96 |     match Atomic.get promise with
 97 |     | (Returned _ | Raised _) -> ()
 98 |     | Pending _ -> perform (Wait (promise, chan))
 99 |   in
100 |   Domain_local_await.{ release; await }
101 | 
102 | let rec worker task_chan =
103 |   match Multi_channel.recv task_chan with
104 |   | Quit -> Multi_channel.clear_local_state task_chan
105 |   | Work f -> f (); worker task_chan
106 | 
107 | let worker task_chan =
108 |   Domain_local_await.using
109 |     ~prepare_for_await:(prepare_for_await task_chan)
110 |     ~while_running:(fun () -> worker task_chan)
111 | 
112 | let run (type a) pool (f : unit -> a) : a =
113 |   let pd = get_pool_data pool in
114 |   let p = Atomic.make (Pending []) in
115 |   step (fun _ -> do_task f p) ();
116 |   let rec loop () : a =
117 |     match Atomic.get p with
118 |     | Pending _ ->
119 |         begin
120 |           try
121 |             match Multi_channel.recv_poll pd.task_chan with
122 |             | Work f -> f ()
123 |             | Quit -> failwith "Task.run: tasks are active on pool"
124 |           with Exit -> Domain.cpu_relax ()
125 |         end;
126 |         loop ()
127 |    | Returned v -> v
128 |    | Raised (e, bt) -> Printexc.raise_with_backtrace e bt
129 |   in
130 |   loop ()
131 | 
132 | let run pool f =
133 |   Domain_local_await.using
134 |     ~prepare_for_await:(prepare_for_await (get_pool_data pool).task_chan)
135 |     ~while_running:(fun () -> run pool f)
136 | 
137 | let named_pools = Hashtbl.create 8
138 | let named_pools_mutex = Mutex.create ()
139 | 
140 | let setup_pool ?name ~num_domains () =
141 |   if num_domains < 0 then
142 |     invalid_arg "Task.setup_pool: num_domains must be at least 0"
143 |   else
144 |   let task_chan = Multi_channel.make (num_domains+1) in
145 |   let domains = Array.init num_domains (fun _ ->
146 |     Domain.spawn (fun _ -> worker task_chan))
147 |   in
148 |   let p = Atomic.make (Some {domains; task_chan; name}) in
149 |   begin match name with
150 |     | None -> ()
151 |     | Some x ->
152 |         Mutex.lock named_pools_mutex;
153 |         Hashtbl.add named_pools x p;
154 |         Mutex.unlock named_pools_mutex
155 |   end;
156 |   p
157 | 
158 | let teardown_pool pool =
159 |   let pd = get_pool_data pool in
160 |   for _i=1 to Array.length pd.domains do
161 |     Multi_channel.send pd.task_chan Quit
162 |   done;
163 |   Multi_channel.clear_local_state pd.task_chan;
164 |   Array.iter Domain.join pd.domains;
165 |   (* Remove the pool from the table *)
166 |   begin match pd.name with
167 |   | None -> ()
168 |   | Some n ->
169 |       Mutex.lock named_pools_mutex;
170 |       Hashtbl.remove named_pools n;
171 |       Mutex.unlock named_pools_mutex
172 |   end;
173 |   Atomic.set pool None
174 | 
175 | let lookup_pool name =
176 |   Mutex.lock named_pools_mutex;
177 |   let p = Hashtbl.find_opt named_pools name in
178 |   Mutex.unlock named_pools_mutex;
179 |   p
180 | 
181 | let get_num_domains pool =
182 |   let pd = get_pool_data pool in
183 |   Array.length pd.domains + 1
184 | 
185 | let parallel_for_reduce ?(chunk_size=0) ~start ~finish ~body pool reduce_fun init =
186 |   let pd = get_pool_data pool in
187 |   let chunk_size = if chunk_size > 0 then chunk_size
188 |       else begin
189 |         let n_domains = (Array.length pd.domains) + 1 in
190 |         let n_tasks = finish - start + 1 in
191 |         if n_domains = 1 then n_tasks
192 |         else max 1 (n_tasks/(8*n_domains))
193 |       end
194 |   in
195 |   let rec work s e =
196 |     if e - s < chunk_size then
197 |       let rec loop i acc =
198 |         if i > e then acc
199 |         else loop (i+1) (reduce_fun acc (body i))
200 |       in
201 |       loop (s+1) (body s)
202 |     else begin
203 |       let d = s + ((e - s) / 2) in
204 |       let p = async pool (fun _ -> work s d) in
205 |       let right = work (d+1) e in
206 |       let left = await pool p in
207 |       reduce_fun left right
208 |     end
209 |   in
210 |   if finish < start
211 |   then init
212 |   else reduce_fun init (work start finish)
213 | 
214 | let parallel_for ?(chunk_size=0) ~start ~finish ~body pool =
215 |   let pd = get_pool_data pool in
216 |   let chunk_size = if chunk_size > 0 then chunk_size
217 |       else begin
218 |         let n_domains = (Array.length pd.domains) + 1 in
219 |         let n_tasks = finish - start + 1 in
220 |         if n_domains = 1 then n_tasks
221 |         else max 1 (n_tasks/(8*n_domains))
222 |       end
223 |   in
224 |   let rec work pool fn s e =
225 |     if e - s < chunk_size then
226 |       for i = s to e do fn i done
227 |     else begin
228 |       let d = s + ((e - s) / 2) in
229 |       let left = async pool (fun _ -> work pool fn s d) in
230 |       work pool fn (d+1) e;
231 |       await pool left
232 |     end
233 |   in
234 |   work pool body start finish
235 | 
236 | let parallel_scan pool op elements =
237 |   let pd = get_pool_data pool in
238 |   let n = Array.length elements in
239 |   let p = min (n - 1) ((Array.length pd.domains) + 1) in
240 |   let prefix_s = Array.copy elements in
241 |   let scan_part op elements prefix_sum start finish =
242 |     assert (Array.length elements > (finish - start));
243 |     for i = (start + 1) to finish do
244 |       prefix_sum.(i) <- op prefix_sum.(i - 1) elements.(i)
245 |     done
246 |   in
247 |   if p < 2 then begin
248 |     (* Do a sequential scan when number of domains or array's length is less
249 |     than 2 *)
250 |     scan_part op elements prefix_s 0 (n - 1);
251 |     prefix_s
252 |   end
253 |   else begin
254 |   let add_offset op prefix_sum offset start finish =
255 |     assert (Array.length prefix_sum > (finish - start));
256 |     for i = start to finish do
257 |       prefix_sum.(i) <- op offset prefix_sum.(i)
258 |     done
259 |   in
260 | 
261 |   parallel_for pool ~chunk_size:1 ~start:0 ~finish:(p - 1)
262 |   ~body:(fun i ->
263 |     let s = (i * n) / (p ) in
264 |     let e = (i + 1) * n / (p ) - 1 in
265 |     scan_part op elements prefix_s s e);
266 | 
267 |   let x = ref prefix_s.(n/p - 1) in
268 |   for i = 2 to p do
269 |       let ind = i * n / p - 1 in
270 |       x := op !x prefix_s.(ind);
271 |       prefix_s.(ind) <- !x
272 |   done;
273 | 
274 |   parallel_for pool ~chunk_size:1 ~start:1 ~finish:(p - 1)
275 |   ~body:( fun i ->
276 |     let s = i * n / (p) in
277 |     let e = (i + 1) * n / (p) - 2 in
278 |     let offset = prefix_s.(s - 1) in
279 |       add_offset op prefix_s offset s e
280 |     );
281 | 
282 |   prefix_s
283 |   end
284 | 
285 | let parallel_find (type a) ?(chunk_size=0) ~start ~finish ~body pool =
286 |   let pd = get_pool_data pool in
287 |   let found : a option Atomic.t = Atomic.make None in
288 |   let chunk_size = if chunk_size > 0 then chunk_size
289 |       else begin
290 |         let n_domains = (Array.length pd.domains) + 1 in
291 |         let n_tasks = finish - start + 1 in
292 |         if n_domains = 1 then n_tasks
293 |         else max 1 (n_tasks/(8*n_domains))
294 |       end
295 |   in
296 |   let rec work pool fn s e =
297 |     if e - s < chunk_size then
298 |       let i = ref s in
299 |       while !i <= e && Option.is_none (Atomic.get found) do
300 |         begin match fn !i with
301 |           | None -> ()
302 |           | Some _ as some -> Atomic.set found some
303 |         end;
304 |         incr i;
305 |       done
306 |     else if Option.is_some (Atomic.get found) then ()
307 |     else begin
308 |       let d = s + ((e - s) / 2) in
309 |       let left = async pool (fun _ -> work pool fn s d) in
310 |       work pool fn (d+1) e;
311 |       await pool left
312 |     end
313 |   in
314 |   work pool body start finish;
315 |   Atomic.get found
316 | 


--------------------------------------------------------------------------------
/lib/task.mli:
--------------------------------------------------------------------------------
  1 | type 'a task = unit -> 'a
  2 | (** Type of task *)
  3 | 
  4 | type !'a promise
  5 | (** Type of promises *)
  6 | 
  7 | type pool
  8 | (** Type of task pool *)
  9 | 
 10 | val setup_pool : ?name:string -> num_domains:int -> unit -> pool
 11 | (** Sets up a task execution pool with [num_domains] new domains. If [name] is
 12 |     provided, the pool is mapped to [name] which can be looked up later with
 13 |     [lookup_pool name].
 14 | 
 15 |     When [num_domains] is 0, the new pool will be empty, and when an empty
 16 |     pool is in use, every function in this module will run effectively
 17 |     sequentially, using the calling domain as the only available domain.
 18 | 
 19 |     Raises {!Invalid_argument} when [num_domains] is less than 0. *)
 20 | 
 21 | val teardown_pool : pool -> unit
 22 | (** Tears down the task execution pool. *)
 23 | 
 24 | val lookup_pool : string -> pool option
 25 | (** [lookup_pool name] returns [Some pool] if [pool] is associated to [name] or
 26 |     returns [None] if no value is associated to it. *)
 27 | 
 28 | val get_num_domains : pool -> int
 29 | (** [get_num_domains pool] returns the total number of domains in [pool]
 30 |     including the parent domain. *)
 31 | 
 32 | val run : pool -> 'a task -> 'a
 33 | (** [run p t] runs the task [t] synchronously with the calling domain and the
 34 |     domains in the pool [p]. If the task [t] blocks on a promise, then tasks
 35 |     from the pool [p] are executed until the promise blocking [t] is resolved.
 36 | 
 37 |     This function should be used at the top level to enclose the calls to other
 38 |     functions that may await on promises. This includes {!await},
 39 |     {!parallel_for} and its variants. Otherwise, those functions will raise
 40 |     [Unhandled] exception. *)
 41 | 
 42 | val async : pool -> 'a task -> 'a promise
 43 | (** [async p t] runs the task [t] asynchronously in the pool [p]. The function
 44 |     returns a promise [r] in which the result of the task [t] will be stored. *)
 45 | 
 46 | val await : pool -> 'a promise -> 'a
 47 | (** [await p r] waits for the promise [r] to be resolved. During the resolution,
 48 |     other tasks in the pool [p] might be run using the calling domain and/or the
 49 |     domains in the pool [p]. If the task associated with the promise have
 50 |     completed successfully, then the result of the task will be returned. If the
 51 |     task have raised an exception, then [await] raises the same exception.
 52 | 
 53 |     Must be called with a call to {!run} in the dynamic scope to handle the
 54 |     internal algebraic effects for task synchronization. *)
 55 | 
 56 | val parallel_for : ?chunk_size:int -> start:int -> finish:int ->
 57 |                    body:(int -> unit) -> pool -> unit
 58 | (** [parallel_for c s f b p] behaves similar to [for i=s to f do b i done], but
 59 |     runs the for loop in parallel with the calling domain and/or the domains in
 60 |     the pool [p]. The chunk size [c] determines the number of body applications
 61 |     done in one task; this will default to [max(1, (finish-start + 1) / (8 *
 62 |     num_domains))]. Individual iterations may be run in any order. Tasks are
 63 |     distributed to the participating domains using a divide-and-conquer scheme.
 64 | 
 65 |     Must be called with a call to {!run} in the dynamic scope to handle the
 66 |     internal algebraic effects for task synchronization. *)
 67 | 
 68 | val parallel_for_reduce : ?chunk_size:int -> start:int -> finish:int ->
 69 |                 body:(int -> 'a) -> pool -> ('a -> 'a -> 'a) -> 'a -> 'a
 70 | (** [parallel_for_reduce c s f b p r i] is similar to [parallel_for] except
 71 |     that the result returned by each iteration is reduced with [r] with initial
 72 |     value [i]. The reduce operations are performed in an arbitrary order and
 73 |     the reduce function needs to be associative in order to obtain a
 74 |     deterministic result.
 75 | 
 76 |     Must be called with a call to {!run} in the dynamic scope to handle the
 77 |     internal algebraic effects for task synchronization. *)
 78 | 
 79 | val parallel_scan : pool -> ('a -> 'a -> 'a) -> 'a array -> 'a array
 80 | (** [parallel_scan p op a] computes the scan of the array [a] in parallel with
 81 |     binary operator [op] and returns the result array, using the calling domain
 82 |     and/or the domains in the pool [p]. Scan is similar to [Array.fold_left]
 83 |     but returns an array of reduced intermediate values. The reduce operations
 84 |     are performed in an arbitrary order and the reduce function needs to be
 85 |     associative in order to obtain a deterministic result.
 86 | 
 87 |     Must be called with a call to {!run} in the dynamic scope to handle the
 88 |     internal algebraic effects for task synchronization. *)
 89 | 
 90 | val parallel_find : ?chunk_size:int -> start:int -> finish:int ->
 91 |   body:(int -> 'a option) -> pool -> 'a option
 92 | (** [parallel_find ~start ~finish ~body pool] calls [body] in parallel
 93 |     on the indices from [start] to [finish], in any order, until at
 94 |     least one of them returns [Some v].
 95 | 
 96 |     Search stops when a value is found, but there is no guarantee that
 97 |     it stops as early as possible, other calls to [body] may happen in
 98 |     parallel or afterwards.
 99 | 
100 |     See {!parallel_for} for the description of the [chunk_size]
101 |     parameter and the scheduling strategy.
102 | 
103 |     Must be called with a call to {!run} in the dynamic scope to
104 |     handle the internal algebraic effects for task synchronization.
105 | *)
106 | 


--------------------------------------------------------------------------------
/test/LU_decomposition_multicore.ml:
--------------------------------------------------------------------------------
 1 | module T = Domainslib.Task
 2 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1
 3 | let mat_size = try int_of_string Sys.argv.(2) with _ -> 1200
 4 | 
 5 | let k = Domain.DLS.new_key Random.State.make_self_init
 6 | 
 7 | module SquareMatrix = struct
 8 |   let parallel_create pool f : float array =
 9 |     let fa = Array.create_float (mat_size * mat_size) in
10 |     T.parallel_for pool ~start:0 ~finish:( mat_size * mat_size - 1)
11 |       ~body:(fun i -> fa.(i) <- f (i / mat_size) (i mod mat_size));
12 |     fa
13 | 
14 |   let get (m : float array) r c = m.(r * mat_size + c)
15 |   let set (m : float array) r c v = m.(r * mat_size + c) <- v
16 |   let parallel_copy pool a =
17 |     let n = Array.length a in
18 |     let copy_part a b i =
19 |       let s = (i * n / num_domains) in
20 |       let e = (i+1) * n / num_domains - 1 in
21 |       Array.blit a s b s (e - s + 1) in
22 |     let b = Array.create_float n in
23 |     let rec aux acc num_domains i =
24 |       if (i = num_domains) then
25 |         (List.iter (fun e -> T.await pool e) acc)
26 |       else begin
27 |         aux ((T.async pool (fun _ -> copy_part a b i))::acc) num_domains (i+1)
28 |       end
29 |     in
30 |     aux [] num_domains 0;
31 |     b
32 | end
33 | 
34 | open SquareMatrix
35 | 
36 | let lup pool (a0 : float array) =
37 |   let a = parallel_copy pool a0 in
38 |   for k = 0 to (mat_size - 2) do
39 |   T.parallel_for pool ~start:(k + 1) ~finish:(mat_size  -1)
40 |   ~body:(fun row ->
41 |     let factor = get a row k /. get a k k in
42 |     for col = k + 1 to mat_size-1 do
43 |       set a row col (get a row col -. factor *. (get a k col))
44 |       done;
45 |     set a row k factor )
46 |   done ;
47 |   a
48 | 
49 | let () =
50 |   let pool = T.setup_pool ~num_domains:(num_domains - 1) () in
51 |   T.run pool (fun _ ->
52 |     let a = parallel_create pool
53 |       (fun _ _ -> (Random.State.float (Domain.DLS.get k) 100.0) +. 1.0 ) in
54 |     let lu = lup pool a in
55 |     let _l = parallel_create pool (fun i j -> if i > j then get lu i j else if i = j then 1.0 else 0.0) in
56 |     let _u = parallel_create pool (fun i j -> if i <= j then get lu i j else 0.0) in
57 |     ());
58 |   T.teardown_pool pool
59 | 


--------------------------------------------------------------------------------
/test/backtrace.ml:
--------------------------------------------------------------------------------
 1 | module T = Domainslib.Task
 2 | 
 3 | let rec foo i =
 4 |   if i = 0 then ()
 5 |   else begin
 6 |     ignore (failwith "exn");
 7 |     foo i
 8 |   end
 9 |   [@@inline never]
10 | 
11 | let rec bar i =
12 |   if i = 0 then ()
13 |   else begin
14 |     foo i;
15 |     bar i
16 |   end
17 |   [@@inline never]
18 | 
19 | let main () =
20 |   let pool = T.setup_pool ~num_domains:0 () in
21 |   T.run pool (fun _ ->
22 |     let p = T.async pool (fun _ -> bar 42) in
23 |     T.await pool p;
24 |     Printf.printf "should not reach here\n%!");
25 |   T.teardown_pool pool
26 | 
27 | let _ =
28 |   Printexc.record_backtrace true;
29 |   try main ()
30 |   with _ ->
31 |     let open Printexc in
32 |     let bt = get_raw_backtrace () in
33 |     let bt_slot_arr = Option.get (backtrace_slots bt) in
34 |     let name = Option.get (Slot.name bt_slot_arr.(1)) in
35 |     assert (name = "Backtrace.foo" || name = "Dune__exe__Backtrace.foo");
36 |     let s = raw_backtrace_to_string bt in
37 |     print_string s
38 | 


--------------------------------------------------------------------------------
/test/chan_stm_tests.ml:
--------------------------------------------------------------------------------
 1 | open QCheck
 2 | open Domainslib
 3 | open STM
 4 | 
 5 | (** This contains sequential and parallel model-based tests of [Domainslib.Chan] *)
 6 | 
 7 | module ChConf =
 8 | struct
 9 |   type state = int list
10 |   type sut = int Domainslib.Chan.t
11 |   type cmd =
12 |     | Send of int
13 |     | Send_poll of int
14 |     | Recv
15 |     | Recv_poll
16 | 
17 |   let show_cmd c = match c with
18 |     | Send i -> "Send" ^ (string_of_int i)
19 |     | Send_poll i -> "Send_poll" ^ (string_of_int i)
20 |     | Recv -> "Recv"
21 |     | Recv_poll -> "Recv_poll"
22 | 
23 |   let capacity = 8
24 | 
25 |   let arb_cmd s =
26 |     let int_gen = Gen.nat in
27 |     QCheck.make ~print:show_cmd
28 |       (if s=[]
29 |        then
30 |          Gen.oneof
31 |            [Gen.map (fun i -> Send i) int_gen;
32 | 	    Gen.map (fun i -> Send_poll i) int_gen;
33 | 	    Gen.return Recv_poll] (* don't generate blocking Recv cmds on an empty channel *)
34 |        else
35 |        if List.length s >= capacity
36 |        then
37 |          Gen.oneof
38 |            [Gen.map (fun i -> Send_poll i) int_gen;
39 |             Gen.return Recv;
40 | 	    Gen.return Recv_poll] (* don't generate blocking Send cmds on a full channel *)
41 |        else
42 |          Gen.oneof
43 |            [Gen.map (fun i -> Send i) int_gen;
44 | 	    Gen.map (fun i -> Send_poll i) int_gen;
45 |             Gen.return Recv;
46 | 	    Gen.return Recv_poll])
47 |   let init_state  = []
48 |   let init_sut () = Chan.make_bounded capacity
49 |   let cleanup _   = ()
50 | 
51 |   let next_state c s = match c with
52 |     | Send i      -> if List.length s < capacity then s@[i] else s
53 |     | Send_poll i -> if List.length s < capacity then s@[i] else s
54 |     | Recv        -> begin match s with [] -> [] | _::s' -> s' end
55 |     | Recv_poll   -> begin match s with [] -> [] | _::s' -> s' end
56 | 
57 |   let precond c s = match c,s with
58 |     | Recv,   [] -> false
59 |     | Send _, _  -> List.length s < capacity
60 |     | _,      _  -> true
61 | 
62 |   let run c chan =
63 |     match c with
64 |     | Send i       -> Res (unit, Chan.send chan i)
65 |     | Send_poll i  -> Res (bool, Chan.send_poll chan i)
66 |     | Recv         -> Res (int, Chan.recv chan)
67 |     | Recv_poll    -> Res (option int, Chan.recv_poll chan)
68 | 
69 |   let postcond c s res = match c,res with
70 |     | Send _,      Res ((Unit,_),_) -> (List.length s < capacity)
71 |     | Send_poll _, Res ((Bool,_),res) -> res = (List.length s < capacity)
72 |     | Recv,        Res ((Int,_),res) -> (match s with [] -> false | res'::_ -> Int.equal res res')
73 |     | Recv_poll,   Res ((Option Int,_),opt) -> (match s with [] -> None | res'::_ -> Some res') = opt
74 |     | _,_ -> false
75 | end
76 | 
77 | 
78 | module ChT_seq = STM_sequential.Make(ChConf)
79 | module ChT_dom = STM_domain.Make(ChConf)
80 | 
81 | let () =
82 |   let count = 500 in
83 |   QCheck_base_runner.run_tests_main [
84 |     ChT_seq.agree_test     ~count ~name:"STM Domainslib.Chan test sequential";
85 |     ChT_dom.agree_test_par ~count ~name:"STM Domainslib.Chan test parallel";
86 |   ]
87 | 


--------------------------------------------------------------------------------
/test/dune:
--------------------------------------------------------------------------------
  1 | (test
  2 |  (name test_chan)
  3 |  (libraries domainslib)
  4 |  (modules test_chan))
  5 | 
  6 | (test
  7 |  (name fib)
  8 |  (modules fib))
  9 | 
 10 | (test
 11 |  (name fib_par)
 12 |  (libraries domainslib)
 13 |  (modules fib_par))
 14 | 
 15 | (test
 16 |  (name kcas_integration)
 17 |  (libraries domainslib kcas)
 18 |  (modules kcas_integration))
 19 | 
 20 | (test
 21 |  (name enumerate_par)
 22 |  (libraries domainslib)
 23 |  (modules enumerate_par))
 24 | 
 25 | (test
 26 |  (name game_of_life)
 27 |  (modules game_of_life))
 28 | 
 29 | (test
 30 |  (name game_of_life_multicore)
 31 |  (libraries domainslib)
 32 |  (modules game_of_life_multicore))
 33 | 
 34 | (test
 35 |  (name LU_decomposition_multicore)
 36 |  (libraries domainslib)
 37 |  (flags (:standard -runtime-variant d))
 38 |  (modules LU_decomposition_multicore)
 39 |  (enabled_if (or (= %{arch_sixtyfour} true) (<> %{architecture} arm))))
 40 |    ;; disabled temporarily on arm32 due to failure: ocaml/ocaml#12267
 41 | 
 42 | 
 43 | (test
 44 |  (name spectralnorm2)
 45 |  (modules spectralnorm2))
 46 | 
 47 | (test
 48 |  (name sum_par)
 49 |  (libraries domainslib)
 50 |  (modules sum_par))
 51 | 
 52 | (test
 53 |  (name task_throughput)
 54 |  (libraries domainslib mirage-clock-unix)
 55 |  (modules task_throughput))
 56 | 
 57 | (test
 58 |  (name spectralnorm2_multicore)
 59 |  (libraries domainslib)
 60 |  (modules spectralnorm2_multicore))
 61 | 
 62 | (test
 63 |  (name summed_area_table)
 64 |  (libraries domainslib)
 65 |  (modules summed_area_table))
 66 | 
 67 | (test
 68 |  (name prefix_sum)
 69 |  (libraries domainslib unix)
 70 |  (modules prefix_sum))
 71 | 
 72 | (test
 73 |  (name test_task)
 74 |  (libraries domainslib)
 75 |  (modules test_task))
 76 | 
 77 | (test
 78 |  (name test_parallel_find)
 79 |  (libraries domainslib)
 80 |  (modules test_parallel_find))
 81 | 
 82 | (test
 83 |  (name test_parallel_scan)
 84 |  (libraries domainslib)
 85 |  (modules test_parallel_scan))
 86 | 
 87 | (test
 88 |  (name test_deadlock)
 89 |  (libraries domainslib)
 90 |  (modules test_deadlock))
 91 | 
 92 | (test
 93 |  (name test_task_crash)
 94 |  (libraries domainslib)
 95 |  (modules test_task_crash))
 96 | 
 97 | (test
 98 |  (name test_task_empty)
 99 |  (libraries domainslib)
100 |  (modules test_task_empty))
101 | 
102 | (test
103 |  (name backtrace)
104 |  (libraries domainslib)
105 |  (modules backtrace)
106 |  (enabled_if (<> %{system} mingw64)) ;; triggers a known issue on mingw https://github.com/ocaml/ocaml/pull/12231
107 |  (modes byte native))
108 |  ;; byte_complete .exes don't include debug+trace info https://github.com/ocaml/dune/issues/7845
109 |  ;; so on a bytecode switch/platform we build a plain bytecode version w/trace info
110 |  ;; and rename it to .exe
111 | (rule
112 |  (target backtrace.exe)
113 |  (action (copy backtrace.bc backtrace.exe))
114 |  (enabled_if (and (= %{bin-available:ocamlopt} false) (<> %{system} mingw64))))
115 | 
116 | (test
117 |  (name off_by_one)
118 |  (libraries domainslib)
119 |  (modules off_by_one))
120 | 
121 | ;; Custom property-based tests using QCheck
122 | 
123 | (test
124 |  (name task_one_dep)
125 |  (modules task_one_dep)
126 |  (libraries qcheck-multicoretests-util qcheck-core qcheck-core.runner domainslib)
127 |  (enabled_if %{bin-available:ocamlopt}) ;; takes forever on bytecode
128 |  (action (run %{test} --verbose)))
129 | 
130 | (test
131 |  (name task_more_deps)
132 |  (modules task_more_deps)
133 |  (libraries qcheck-multicoretests-util qcheck-core qcheck-core.runner domainslib)
134 |  (enabled_if %{bin-available:ocamlopt}) ;; takes forever on bytecode
135 |  (action (run %{test} --verbose)))
136 | 
137 | (test
138 |  (name task_parallel)
139 |  (modules task_parallel)
140 |  (libraries qcheck-multicoretests-util qcheck-core qcheck-core.runner domainslib)
141 |  (action (run %{test} --verbose)))
142 | 
143 | ;; STM_sequential and STM_domain test of Domainslib.Chan
144 | 
145 | (test
146 |  (name chan_stm_tests)
147 |  (modules chan_stm_tests)
148 |  (libraries qcheck-stm.sequential qcheck-stm.domain domainslib)
149 |  (action (run %{test} --verbose)))
150 | 


--------------------------------------------------------------------------------
/test/enumerate_par.ml:
--------------------------------------------------------------------------------
 1 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1
 2 | let n = try int_of_string Sys.argv.(2) with _ -> 100
 3 | 
 4 | module T = Domainslib.Task
 5 | 
 6 | let _ =
 7 |   let p = T.setup_pool ~num_domains:(num_domains - 1) () in
 8 |   T.run p (fun _ ->
 9 |     T.parallel_for p ~start:0 ~finish:(n-1) ~chunk_size:16 ~body:(fun i ->
10 |       print_string @@ Printf.sprintf "[%d] %d\n%!" (Domain.self () :> int) i));
11 |   T.teardown_pool p
12 | 


--------------------------------------------------------------------------------
/test/fib.ml:
--------------------------------------------------------------------------------
1 | let n = try int_of_string Sys.argv.(1) with _ -> 43
2 | 
3 | let rec fib n =
4 |   if n < 2 then 1
5 |   else fib (n-1) + fib (n-2)
6 | 
7 | let _ = Printf.printf "fib(%d) = %d\n" n (fib n)
8 | 


--------------------------------------------------------------------------------
/test/fib_par.ml:
--------------------------------------------------------------------------------
 1 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1
 2 | let n = try int_of_string Sys.argv.(2) with _ -> 43
 3 | 
 4 | module T = Domainslib.Task
 5 | 
 6 | let rec fib n =
 7 |   if n < 2 then 1
 8 |   else fib (n-1) + fib (n-2)
 9 | 
10 | let rec fib_par pool n =
11 |   if n <= 40 then fib n
12 |   else
13 |     let a = T.async pool (fun _ -> fib_par pool (n-1)) in
14 |     let b = T.async pool (fun _ -> fib_par pool (n-2)) in
15 |     T.await pool a + T.await pool b
16 | 
17 | let main =
18 |   let pool = T.setup_pool ~num_domains:(num_domains - 1) () in
19 |   let res = T.run pool (fun _ -> fib_par pool n) in
20 |   T.teardown_pool pool;
21 |   Printf.printf "fib(%d) = %d\n" n res
22 | 
23 | let () = main
24 | 


--------------------------------------------------------------------------------
/test/game_of_life.ml:
--------------------------------------------------------------------------------
 1 | let n_times = try int_of_string Sys.argv.(1) with _ -> 20
 2 | let board_size = try int_of_string Sys.argv.(2) with _ -> 16
 3 | 
 4 | let rg =
 5 |   ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2)))
 6 | let rg' =
 7 |   ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2)))
 8 | let buf = Bytes.create board_size
 9 | 
10 | let get g x y =
11 |   try g.(x).(y)
12 |   with _ -> 0
13 | 
14 | let neighbourhood g x y =
15 |   (get g (x-1) (y-1)) +
16 |   (get g (x-1) (y  )) +
17 |   (get g (x-1) (y+1)) +
18 |   (get g (x  ) (y-1)) +
19 |   (get g (x  ) (y+1)) +
20 |   (get g (x+1) (y-1)) +
21 |   (get g (x+1) (y  )) +
22 |   (get g (x+1) (y+1))
23 | 
24 | let next_cell g x y =
25 |   let n = neighbourhood g x y in
26 |   match g.(x).(y), n with
27 |   | 1, 0 | 1, 1                      -> 0  (* lonely *)
28 |   | 1, 4 | 1, 5 | 1, 6 | 1, 7 | 1, 8 -> 0  (* overcrowded *)
29 |   | 1, 2 | 1, 3                      -> 1  (* lives *)
30 |   | 0, 3                             -> 1  (* get birth *)
31 |   | _ (* 0, (0|1|2|4|5|6|7|8) *)     -> 0  (* barren *)
32 | 
33 | let print g =
34 |   for x = 0 to board_size - 1 do
35 |     for y = 0 to board_size - 1 do
36 |       if g.(x).(y) = 0
37 |       then Bytes.set buf y '.'
38 |       else Bytes.set buf y 'o'
39 |     done;
40 |     print_endline (Bytes.unsafe_to_string buf)
41 |   done;
42 |   print_endline ""
43 | 
44 | let next () =
45 |   let g = !rg in
46 |   let new_g = !rg' in
47 |   for x = 0 to board_size - 1 do
48 |     for y = 0 to board_size - 1 do
49 |       new_g.(x).(y) <- next_cell g x y
50 |     done
51 |   done;
52 |   rg := new_g;
53 |   rg' := g
54 | 
55 | let rec repeat n =
56 |   match n with
57 |   | 0 -> ()
58 |   | _ -> next (); repeat (n-1)
59 | 
60 | let ()=
61 |   print !rg;
62 |   repeat n_times;
63 |   print !rg
64 | 


--------------------------------------------------------------------------------
/test/game_of_life_multicore.ml:
--------------------------------------------------------------------------------
 1 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1
 2 | let n_times = try int_of_string Sys.argv.(2) with _ -> 20
 3 | let board_size = try int_of_string Sys.argv.(3) with _ -> 16
 4 | 
 5 | module T = Domainslib.Task
 6 | 
 7 | let rg =
 8 |   ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2)))
 9 | let rg' =
10 |   ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2)))
11 | let buf = Bytes.create board_size
12 | 
13 | let get g x y =
14 |   try g.(x).(y)
15 |   with _ -> 0
16 | 
17 | let neighbourhood g x y =
18 |   (get g (x-1) (y-1)) +
19 |   (get g (x-1) (y  )) +
20 |   (get g (x-1) (y+1)) +
21 |   (get g (x  ) (y-1)) +
22 |   (get g (x  ) (y+1)) +
23 |   (get g (x+1) (y-1)) +
24 |   (get g (x+1) (y  )) +
25 |   (get g (x+1) (y+1))
26 | 
27 | let next_cell g x y =
28 |   let n = neighbourhood g x y in
29 |   match g.(x).(y), n with
30 |   | 1, 0 | 1, 1                      -> 0  (* lonely *)
31 |   | 1, 4 | 1, 5 | 1, 6 | 1, 7 | 1, 8 -> 0  (* overcrowded *)
32 |   | 1, 2 | 1, 3                      -> 1  (* lives *)
33 |   | 0, 3                             -> 1  (* get birth *)
34 |   | _ (* 0, (0|1|2|4|5|6|7|8) *)     -> 0  (* barren *)
35 | 
36 | let print g =
37 |   for x = 0 to board_size - 1 do
38 |     for y = 0 to board_size - 1 do
39 |       if g.(x).(y) = 0
40 |       then Bytes.set buf y '.'
41 |       else Bytes.set buf y 'o'
42 |     done;
43 |     print_endline (Bytes.unsafe_to_string buf)
44 |   done;
45 |   print_endline ""
46 | 
47 | let next pool =
48 |   let g = !rg in
49 |   let new_g = !rg' in
50 |   T.parallel_for pool ~start:0
51 |     ~finish:(board_size - 1) ~body:(fun x ->
52 |       for y = 0 to board_size - 1 do
53 |         new_g.(x).(y) <- next_cell g x y
54 |       done);
55 |   rg := new_g;
56 |   rg' := g
57 | 
58 | 
59 | let rec repeat pool n =
60 |   match n with
61 |   | 0-> ()
62 |   | _-> next pool; repeat pool (n-1)
63 | 
64 | let ()=
65 |   let pool = T.setup_pool ~num_domains:(num_domains - 1) () in
66 |   print !rg;
67 |   T.run pool (fun _ -> repeat pool n_times);
68 |   print !rg;
69 |   T.teardown_pool pool
70 | 


--------------------------------------------------------------------------------
/test/kcas_integration.ml:
--------------------------------------------------------------------------------
 1 | open Kcas
 2 | module T = Domainslib.Task
 3 | 
 4 | let var = Loc.make None
 5 | 
 6 | let () =
 7 |   let n = 100 in
 8 |   let pool_domain =
 9 |     Domain.spawn @@ fun () ->
10 |     let pool =
11 |       T.setup_pool ~num_domains:(Domain.recommended_domain_count () - 2) ()
12 |     in
13 |     T.run pool (fun () ->
14 |         T.parallel_for ~start:1 ~finish:n
15 |           ~body:(fun i ->
16 |             ignore @@ Loc.update var
17 |             @@ function None -> Some i | _ -> Retry.later ())
18 |           pool);
19 |     T.teardown_pool pool;
20 |     Printf.printf "Done\n%!"
21 |   in
22 |   for _ = 1 to n do
23 |     match
24 |       Loc.update var @@ function None -> Retry.later () | Some _ -> None
25 |     with
26 |     | None -> failwith "impossible"
27 |     | Some i -> Printf.printf "Got %d\n%!" i
28 |   done;
29 |   Domain.join pool_domain
30 | 


--------------------------------------------------------------------------------
/test/off_by_one.ml:
--------------------------------------------------------------------------------
 1 | open Domainslib
 2 | 
 3 | let print_array a =
 4 |   let b = Buffer.create 25 in
 5 |   Buffer.add_string b "[|";
 6 |   Array.iter (fun elem -> Buffer.add_string b (string_of_int elem ^ "; ")) a;
 7 |   Buffer.add_string b "|]";
 8 |   Buffer.contents b
 9 | 
10 | let r = Array.init 20 (fun i -> i + 1)
11 | 
12 | let scan_task num_doms =
13 |   try
14 |     let pool = Task.setup_pool ~num_domains:num_doms () in
15 |     let a = Task.run pool (fun () -> Task.parallel_scan pool (+) (Array.make 20 1)) in
16 |     Task.teardown_pool pool;
17 |     Printf.printf "%i:  %s\n%!" num_doms (print_array a);
18 |     assert (a = r)
19 |   with Failure msg ->
20 |     begin
21 |       assert (msg = "failed to allocate domain");
22 |       Printf.printf "Failed to allocate %i domains, recommended_domain_count: %i\n%!"
23 |         num_doms (Domain.recommended_domain_count ());
24 |     end
25 | ;;
26 | for num_dom=0 to 21 do
27 |   scan_task num_dom;
28 | done
29 | 


--------------------------------------------------------------------------------
/test/prefix_sum.ml:
--------------------------------------------------------------------------------
 1 | module T = Domainslib.Task
 2 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 4
 3 | let n = try int_of_string Sys.argv.(2) with _ -> 100000
 4 | 
 5 | let gen n = Array.make n 1 (*(fun _ -> Random.int n)*)
 6 | 
 7 | let prefix_sum pool = T.parallel_scan pool (+)
 8 | 
 9 | let _ =
10 |   let pool = T.setup_pool ~num_domains:(num_domains - 1) () in
11 |   let arr = gen n in
12 |   let t = Unix.gettimeofday() in
13 |   ignore (T.run pool (fun _ -> prefix_sum pool arr));
14 |   Printf.printf "Execution time: %fs\n" (Unix.gettimeofday() -. t);
15 |   T.teardown_pool pool
16 | 


--------------------------------------------------------------------------------
/test/spectralnorm2.ml:
--------------------------------------------------------------------------------
 1 | (* The Computer Language Benchmarks Game
 2 |  * https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
 3 |  *
 4 |  * Contributed by Sebastien Loisel
 5 |  * Cleanup by Troestler Christophe
 6 |  * Modified by Mauricio Fernandez
 7 |  *)
 8 | 
 9 | let n = try int_of_string Sys.argv.(1) with _ ->  2000
10 | 
11 | let eval_A i j = 1. /. float((i+j)*(i+j+1)/2+i+1)
12 | 
13 | let eval_A_times_u u v =
14 |   let n = Array.length v - 1 in
15 |   for i = 0 to  n do
16 |     let vi = ref 0. in
17 |       for j = 0 to n do vi := !vi +. eval_A i j *. u.(j) done;
18 |       v.(i) <- !vi
19 |   done
20 | 
21 | let eval_At_times_u u v =
22 |   let n = Array.length v -1 in
23 |   for i = 0 to n do
24 |     let vi = ref 0. in
25 |       for j = 0 to n do vi := !vi +. eval_A j i *. u.(j) done;
26 |       v.(i) <- !vi
27 |   done
28 | 
29 | let eval_AtA_times_u u v =
30 |   let w = Array.make (Array.length u) 0.0 in
31 |   eval_A_times_u u w; eval_At_times_u w v
32 | 
33 | 
34 | let () =
35 |   let u = Array.make n 1.0  and  v = Array.make n 0.0 in
36 |   for _i = 0 to 9 do
37 |     eval_AtA_times_u u v; eval_AtA_times_u v u
38 |   done;
39 | 
40 |   let vv = ref 0.0  and  vBv = ref 0.0 in
41 |   for i=0 to n-1 do
42 |     vv := !vv +. v.(i) *. v.(i);
43 |     vBv := !vBv +. u.(i) *. v.(i)
44 |   done;
45 |   Printf.printf "%0.9f\n" (sqrt(!vBv /. !vv))
46 | 


--------------------------------------------------------------------------------
/test/spectralnorm2_multicore.ml:
--------------------------------------------------------------------------------
 1 | (* The Computer Language Benchmarks Game
 2 |  * https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
 3 |  *
 4 |  * Contributed by Sebastien Loisel
 5 |  * Cleanup by Troestler Christophe
 6 |  * Modified by Mauricio Fernandez
 7 |  *)
 8 | 
 9 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1
10 | let n = try int_of_string Sys.argv.(2) with _ ->  2000
11 | 
12 | module T = Domainslib.Task
13 | 
14 | let eval_A i j = 1. /. float((i+j)*(i+j+1)/2+i+1)
15 | 
16 | let eval_A_times_u pool u v =
17 |   let n = Array.length v - 1 in
18 |   T.parallel_for pool ~start:0 ~finish:n ~chunk_size:(n/num_domains)
19 |     ~body:(fun i ->
20 |       let vi = ref 0. in
21 |       for j = 0 to n do vi := !vi +. eval_A i j *. u.(j) done;
22 |       v.(i) <- !vi)
23 | 
24 | let eval_At_times_u pool u v =
25 |   let n = Array.length v -1 in
26 |   T.parallel_for pool ~start:0 ~finish:n ~chunk_size:(n/num_domains)
27 |     ~body:(fun i ->
28 |     let vi = ref 0. in
29 |     for j = 0 to n do vi := !vi +. eval_A j i *. u.(j) done;
30 |     v.(i) <- !vi)
31 | 
32 | let eval_AtA_times_u pool u v =
33 |   let w = Array.make (Array.length u) 0.0 in
34 |   eval_A_times_u pool u w; eval_At_times_u pool w v
35 | 
36 | let () =
37 |   let pool = T.setup_pool ~num_domains:(num_domains - 1) () in
38 |   let u = Array.make n 1.0  and  v = Array.make n 0.0 in
39 |   T.run pool (fun _ ->
40 |     for _i = 0 to 9 do
41 |       eval_AtA_times_u pool u v; eval_AtA_times_u pool v u
42 |     done);
43 |   T.teardown_pool pool;
44 | 
45 |   let vv = ref 0.0  and  vBv = ref 0.0 in
46 |   for i=0 to n-1 do
47 |     vv := !vv +. v.(i) *. v.(i);
48 |     vBv := !vBv +. u.(i) *. v.(i)
49 |   done;
50 |   Printf.printf "%0.9f\n" (sqrt(!vBv /. !vv))
51 | 


--------------------------------------------------------------------------------
/test/sum_par.ml:
--------------------------------------------------------------------------------
 1 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 2
 2 | let n = try int_of_string Sys.argv.(2) with _ -> 100
 3 | 
 4 | module T = Domainslib.Task
 5 | 
 6 | let _ =
 7 |   (* use parallel_for_reduce *)
 8 |   let p = T.setup_pool ~num_domains:(num_domains - 1) () in
 9 |   let sum = T.run p (fun _ ->
10 |     T.parallel_for_reduce p (+) 0 ~chunk_size:(n/(4*num_domains)) ~start:0
11 |       ~finish:(n-1) ~body:(fun _i -> 1))
12 |   in
13 |   T.teardown_pool p;
14 |   Printf.printf "Sum is %d\n" sum;
15 |   assert (sum = n)
16 | 
17 | let _ =
18 |   (* explictly use empty pool and default chunk_size *)
19 |   let p = T.setup_pool ~num_domains:0 () in
20 |   let sum = Atomic.make 0 in
21 |   T.run p (fun _ ->
22 |     T.parallel_for p ~start:0 ~finish:(n-1)
23 |         ~body:(fun _i -> ignore (Atomic.fetch_and_add sum 1)));
24 |   let sum = Atomic.get sum in
25 |   T.teardown_pool p;
26 |   Printf.printf "Sum is %d\n" sum;
27 |   assert (sum = n)
28 | 
29 | let _ =
30 |   (* configured num_domains and default chunk_size *)
31 |   let p = T.setup_pool ~num_domains:(num_domains - 1) () in
32 |   let sum = Atomic.make 0 in
33 |   T.run p (fun _ ->
34 |     T.parallel_for p ~start:0 ~finish:(n-1)
35 |         ~body:(fun _i -> ignore (Atomic.fetch_and_add sum 1)));
36 |   let sum = Atomic.get sum in
37 |   T.teardown_pool p;
38 |   Printf.printf "Sum is %d\n" sum;
39 |   assert (sum = n)
40 | 
41 | 


--------------------------------------------------------------------------------
/test/summed_area_table.ml:
--------------------------------------------------------------------------------
 1 | module T = Domainslib.Task
 2 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 4
 3 | let size = try int_of_string Sys.argv.(2) with _ -> 100
 4 | 
 5 | let transpose a =
 6 |   let r = Array.length a in
 7 |   let c = Array.length a.(0) in
 8 |   let b = Array.copy a in
 9 |   for i = 0 to (pred r) do
10 |     for j = 0 to (pred c) do
11 |       b.(j).(i) <- a.(i).(j)
12 |     done
13 |   done;
14 |   b
15 | 
16 | let calc_table pool mat =
17 |   let l = Array.length mat in
18 |   let res = Array.copy mat in
19 |   for i = 0 to (l - 1) do
20 |     res.(i) <- T.parallel_scan pool (fun x y -> x + y) mat.(i)
21 |   done;
22 |   let k = transpose res in
23 | 
24 |   for i = 0 to (l - 1) do
25 |     res.(i) <- T.parallel_scan pool (fun x y -> x + y) k.(i)
26 |   done;
27 |   (transpose res)
28 | 
29 | let _ =
30 |   let m = Array.make_matrix size size 1 (*Array.init size (fun _ -> Array.init size (fun _ -> Random.int size))*)
31 |   in
32 |   let pool = T.setup_pool ~num_domains:(num_domains - 1) () in
33 |   let _ = T.run pool (fun _ -> calc_table pool m) in
34 | 
35 |   (* for i = 0 to size-1 do
36 |     for j = 0 to size-1 do
37 |       print_int a.(i).(j); print_string "  "
38 |     done;
39 |     print_newline()
40 |   done; *)
41 |   T.teardown_pool pool
42 | 


--------------------------------------------------------------------------------
/test/task_more_deps.ml:
--------------------------------------------------------------------------------
  1 | (**
  2 |   Generate tests of async+await from Domainslib.Task.
  3 |   It does so by generating a random, acyclic dependency graph of [async] tasks,
  4 |   each [await]ing on its dependency.
  5 |  *)
  6 | 
  7 | open QCheck
  8 | open Domainslib
  9 | 
 10 | (* a simple work item, from ocaml/testsuite/tests/misc/takc.ml *)
 11 | let rec tak x y z =
 12 |   if x > y then tak (tak (x-1) y z) (tak (y-1) z x) (tak (z-1) x y)
 13 |            else z
 14 | 
 15 | let work () =
 16 |   for _ = 1 to 200 do
 17 |     assert (7 = tak 18 12 6);
 18 |   done
 19 | 
 20 | (* Generates a DAG of dependencies                          *)
 21 | (* Each task is represented by an array index w/a deps.list *)
 22 | (* This example DAG
 23 | 
 24 |      A/0 <--- B/1 <
 25 |       ^.           \
 26 |         \           \
 27 |          `- C/2 <--- D/3
 28 | 
 29 |    is represented as: [| []; [0]; [0]; [1;2] |] *)
 30 | let gen_dag n st =
 31 |   Array.init n (fun i ->
 32 |       let deps = ref [] in
 33 |       for dep = 0 to i-1 do
 34 |         if Gen.bool st then deps := dep :: !deps
 35 |       done;
 36 |       List.rev !deps)
 37 | 
 38 | type test_input =
 39 |   {
 40 |     num_domains  : int;
 41 |     length       : int;
 42 |     dependencies : int list array
 43 |   }
 44 | 
 45 | let show_test_input t =
 46 |   Printf.sprintf
 47 |     "{ num_domains : %i\n  length : %i\n  dependencies : %s }"
 48 |     t.num_domains t.length Print.(array (list int) t.dependencies)
 49 | 
 50 | let shrink_deps test_input =
 51 |   let ls = Array.to_list test_input.dependencies in
 52 |   let is = Shrink.list ~shrink:Shrink.list ls in
 53 |   Iter.map
 54 |     (fun deps ->
 55 |        let len = List.length deps in
 56 |        let arr = Array.of_list deps in
 57 |        let deps = Array.mapi (fun i i_deps -> match i,i_deps with
 58 |            | 0, _
 59 |            | _,[] -> []
 60 |            | _,[0] -> [0]
 61 |            | _, _ ->
 62 |              List.map (fun j ->
 63 |                  if j<0 || j>=len || j>=i (* ensure reduced dep is valid *)
 64 |                  then ((j + i) mod i)
 65 |                  else j) i_deps) arr in
 66 |        { test_input with length=len; dependencies=deps }) is
 67 | 
 68 | let arb_deps domain_bound promise_bound =
 69 |   let gen_deps =
 70 |     Gen.(pair (int_bound (domain_bound-1)) (int_bound promise_bound) >>= fun (num_domains,length) ->
 71 |          let num_domains = succ num_domains in
 72 |          let length = succ length in
 73 |          gen_dag length >>= fun dependencies -> return { num_domains; length; dependencies }) in
 74 |   make ~print:show_test_input ~shrink:(shrink_deps) gen_deps
 75 | 
 76 | let build_dep_graph pool test_input =
 77 |   let len = test_input.length in
 78 |   let deps = test_input.dependencies in
 79 |   let rec build i promise_acc =
 80 |     if i=len
 81 |     then promise_acc
 82 |     else
 83 |       let p = (match deps.(i) with
 84 |           | [] ->
 85 |             Task.async pool work
 86 |           | deps ->
 87 |             Task.async pool (fun () ->
 88 |                 work ();
 89 |                 List.iter (fun dep -> Task.await pool (List.nth promise_acc (i-1-dep))) deps)) in
 90 |       build (i+1) (p::promise_acc)
 91 |   in
 92 |   build 0 []
 93 | 
 94 | let test_one_pool ~domain_bound ~promise_bound =
 95 |   Test.make ~name:"Domainslib.Task.async/await, more deps, 1 work pool" ~count:100
 96 |     (arb_deps domain_bound promise_bound)
 97 |     (Util.repeat 10
 98 |        (fun test_input ->
 99 |           let pool = Task.setup_pool ~num_domains:test_input.num_domains () in
100 |           Task.run pool (fun () ->
101 |               let ps = build_dep_graph pool test_input in
102 |               List.iter (fun p -> Task.await pool p) ps);
103 |           Task.teardown_pool pool;
104 |           true))
105 | 
106 | let () =
107 |   QCheck_base_runner.run_tests_main [test_one_pool ~domain_bound:8 ~promise_bound:10]
108 | 


--------------------------------------------------------------------------------
/test/task_one_dep.ml:
--------------------------------------------------------------------------------
  1 | (**
  2 |   Generate tests of async+await from Domainslib.Task.
  3 |   It does so by generating a random, acyclic dependency graph of [async] tasks,
  4 |   each [await]ing on its dependency.
  5 |  *)
  6 | 
  7 | open QCheck
  8 | open Domainslib
  9 | 
 10 | (* a simple work item, from ocaml/testsuite/tests/misc/takc.ml *)
 11 | let rec tak x y z =
 12 |   if x > y then tak (tak (x-1) y z) (tak (y-1) z x) (tak (z-1) x y)
 13 |            else z
 14 | 
 15 | let work () =
 16 |   for _ = 1 to 200 do
 17 |     assert (7 = tak 18 12 6);
 18 |   done
 19 | 
 20 | (* Generates a sparse DAG of dependencies                           *)
 21 | (* Each task is represented by an array index w/at most 1 dep. each *)
 22 | (* This example DAG
 23 | 
 24 |      A/0 <--- B/1
 25 |       ^.
 26 |         \
 27 |          `- C/2 <--- D/3
 28 | 
 29 |    is represented as: [| None; Some 0; Some 0; Some 2 |] *)
 30 | let gen_deps n st =
 31 |   let a = Array.make n None in
 32 |   for i=1 to n-1 do
 33 |     if Gen.bool st then a.(i) <- Some (Gen.int_bound (i-1) st)
 34 |   done;
 35 |   a
 36 | 
 37 | type test_input =
 38 |   {
 39 |     num_domains  : int;
 40 |     length       : int;
 41 |     dependencies : int option array
 42 |   }
 43 | 
 44 | let show_test_input t =
 45 |   Printf.sprintf
 46 |     "{ num_domains : %i\n  length : %i\n  dependencies : %s }"
 47 |     t.num_domains t.length Print.(array (option int) t.dependencies)
 48 | 
 49 | let shrink_deps test_input =
 50 |   let ls = Array.to_list test_input.dependencies in
 51 |   let is = Shrink.list ~shrink:Shrink.(option nil) ls in
 52 |   Iter.map
 53 |     (fun deps ->
 54 |        let len = List.length deps in
 55 |        let arr = Array.of_list deps in
 56 |        let deps = Array.mapi (fun i j_opt -> match i,j_opt with
 57 |             | 0, _
 58 |             | _,None -> None
 59 |             | _,Some 0 -> Some 0
 60 |             | _, Some j ->
 61 |               if j<0 || j>=len || j>=i (* ensure reduced dep is valid *)
 62 |               then Some ((j + i) mod i)
 63 |               else Some j) arr in
 64 |        { test_input with length=len; dependencies=deps }) is
 65 | 
 66 | let arb_deps domain_bound promise_bound =
 67 |   let gen_deps =
 68 |     Gen.(pair (int_bound (domain_bound-1)) (int_bound promise_bound) >>= fun (num_domains,length) ->
 69 |          let num_domains = succ num_domains in
 70 |          let length = succ length in
 71 |          gen_deps length >>= fun dependencies -> return { num_domains; length; dependencies }) in
 72 |   let shrink_input input =
 73 |     Iter.append
 74 |       (Iter.map (fun doms' -> { input with num_domains = doms' }) (Shrink.int input.num_domains))
 75 |       (shrink_deps input) in
 76 |   make ~print:show_test_input ~shrink:shrink_input gen_deps
 77 | 
 78 | let build_dep_graph pool test_input =
 79 |   let len = test_input.length in
 80 |   let deps = test_input.dependencies in
 81 |   let rec build i promise_acc =
 82 |     if i=len
 83 |     then promise_acc
 84 |     else
 85 |       let p = (match deps.(i) with
 86 |           | None ->
 87 |             Task.async pool work
 88 |           | Some dep ->
 89 |             Task.async pool (fun () ->
 90 |                 work();
 91 |                 Task.await pool (List.nth promise_acc (i-1-dep)))) in
 92 |       build (i+1) (p::promise_acc)
 93 |   in
 94 |   build 0 []
 95 | 
 96 | let test_one_pool ~domain_bound ~promise_bound =
 97 |   Test.make ~name:"Domainslib.Task.async/await, one dep, 1 work pool" ~count:100
 98 |     (arb_deps domain_bound promise_bound)
 99 |     (Util.repeat 10 @@
100 |      fun input ->
101 |      let pool = Task.setup_pool ~num_domains:input.num_domains () in
102 |      Task.run pool (fun () ->
103 |          let ps = build_dep_graph pool input in
104 |          List.iter (fun p -> Task.await pool p) ps);
105 |      Task.teardown_pool pool;
106 |      true)
107 | 
108 | let test_two_pools_sync_last ~domain_bound ~promise_bound =
109 |   let gen = arb_deps domain_bound promise_bound in
110 |   Test.make ~name:"Domainslib.Task.async/await, one dep, w.2 pools, syncing at the end" ~count:100
111 |     (pair gen gen)
112 |     (Util.repeat 10 @@
113 |      fun (input1,input2) ->
114 |      try
115 |        let pool1 = Task.setup_pool ~num_domains:input1.num_domains () in
116 |        let pool2 = Task.setup_pool ~num_domains:input2.num_domains () in
117 |        let ps1 = build_dep_graph pool1 input1 in
118 |        let ps2 = build_dep_graph pool2 input2 in
119 |        Task.run pool1 (fun () -> List.iter (fun p -> Task.await pool1 p) ps1);
120 |        Task.run pool2 (fun () -> List.iter (fun p -> Task.await pool2 p) ps2);
121 |        Task.teardown_pool pool1;
122 |        Task.teardown_pool pool2;
123 |        true
124 |      with
125 |        Failure err -> err = "failed to allocate domain")
126 | 
127 | let test_two_nested_pools ~domain_bound ~promise_bound =
128 |   let gen = arb_deps domain_bound promise_bound in
129 |   Test.make ~name:"Domainslib.Task.async/await, one dep, w.2 nested pools" ~count:100
130 |     (pair gen gen)
131 |     (Util.repeat 10 @@
132 |      fun (input1,input2) ->
133 |      try
134 |        let pool1 = Task.setup_pool ~num_domains:input1.num_domains () in
135 |        let pool2 = Task.setup_pool ~num_domains:input2.num_domains () in
136 |        Task.run pool1 (fun () ->
137 |            Task.run pool2 (fun () ->
138 |                let ps1 = build_dep_graph pool1 input1 in
139 |                let ps2 = build_dep_graph pool2 input2 in
140 |                List.iter (fun p -> Task.await pool1 p) ps1;
141 |                List.iter (fun p -> Task.await pool2 p) ps2));
142 |        Task.teardown_pool pool1;
143 |        Task.teardown_pool pool2;
144 |        true
145 |      with
146 |        Failure err -> err = "failed to allocate domain")
147 | 
148 | let () =
149 |   let domain_bound = max 1 (Domain.recommended_domain_count () / 2) in
150 |   let promise_bound = max 2 domain_bound in
151 |   QCheck_base_runner.run_tests_main [
152 |     test_one_pool            ~domain_bound ~promise_bound;
153 |     test_two_pools_sync_last ~domain_bound ~promise_bound;
154 |     test_two_nested_pools    ~domain_bound ~promise_bound;
155 |   ]
156 | 


--------------------------------------------------------------------------------
/test/task_parallel.ml:
--------------------------------------------------------------------------------
 1 | open QCheck
 2 | open Domainslib
 3 | 
 4 | (** Property-based QCheck tests of Task.parallel_* *)
 5 | 
 6 | let count = 250
 7 | 
 8 | let test_parallel_for =
 9 |   Test.make ~name:"Domainslib.Task.parallel_for test" ~count
10 |     (triple (int_bound 10) small_nat small_nat)
11 |     (fun (num_domains,array_size,chunk_size) ->
12 |        let pool = Task.setup_pool ~num_domains () in
13 |        let res = Task.run pool (fun () ->
14 |            let a = Atomic.make 0 in
15 |            Task.parallel_for ~chunk_size ~start:0 ~finish:(array_size-1) ~body:(fun _ -> Atomic.incr a) pool;
16 |            Atomic.get a) in
17 |        Task.teardown_pool pool;
18 |        res = array_size)
19 | 
20 | let test_parallel_for_reduce =
21 |   Test.make ~name:"Domainslib.Task.parallel_for_reduce test" ~count
22 |     (triple (int_bound 10) small_nat small_nat)
23 |     (fun (num_domains,array_size,chunk_size) ->
24 |        let pool = Task.setup_pool ~num_domains () in
25 |        let res = Task.run pool (fun () ->
26 |            Task.parallel_for_reduce ~chunk_size ~start:0 ~finish:(array_size-1) ~body:(fun _ -> 1) pool (+) 0) in
27 |        Task.teardown_pool pool;
28 |        res = array_size)
29 | 
30 | let test_parallel_scan =
31 |   Test.make ~name:"Domainslib.Task.parallel_scan test" ~count
32 |     (pair (int_bound 10) small_nat)
33 |     (fun (num_domains,array_size) ->
34 |        let pool = Task.setup_pool ~num_domains () in
35 |        let a = Task.run pool (fun () -> Task.parallel_scan pool (+) (Array.make array_size 1)) in
36 |        Task.teardown_pool pool;
37 |        a = Array.init array_size (fun i -> i + 1))
38 | 
39 | let () =
40 |   QCheck_base_runner.run_tests_main [
41 |     test_parallel_for;
42 |     test_parallel_for_reduce;
43 |     test_parallel_scan;
44 |   ]
45 | 


--------------------------------------------------------------------------------
/test/task_throughput.ml:
--------------------------------------------------------------------------------
 1 | 
 2 | let n_domains = try int_of_string Sys.argv.(1) with _ -> 1
 3 | let n_iterations = try int_of_string Sys.argv.(2) with _ -> 1024
 4 | let n_tasks = try int_of_string Sys.argv.(3) with _ -> 1024
 5 | 
 6 | module T = Domainslib.Task
 7 | 
 8 | module TimingHist = struct
 9 |   type t = {
10 |     data: int array;
11 |     min_n: int;
12 |     max_n: int;
13 |     mutable count: int;
14 |     mutable sum  : float;
15 |     }
16 | 
17 |   let make min_n max_n =
18 |     { data=Array.make (max_n - min_n) 0; min_n; max_n; count=0; sum=0. }
19 | 
20 |   let rec log2 n =
21 |     if n <= 1 then 0 else 1 + log2(n asr 1)
22 | 
23 |   let add_point a x =
24 |     let i = (log2 x) in
25 |     let i = max (i-a.min_n+1) 0 in
26 |     let i = min i ((Array.length a.data)-1) in
27 |     a.data.(i) <- a.data.(i) + 1;
28 |     a.sum <- a.sum +. (float_of_int x);
29 |     a.count <- a.count + 1
30 | 
31 |   let mean a =
32 |     a.sum /. (float_of_int a.count)
33 | 
34 |   let print_hist a =
35 |     Printf.printf "Timings (ns): n=%d  mean=%.1f\n" a.count (mean a);
36 |     let fn n = (Int.shift_left 1 (a.min_n+n)) in
37 |     let len = Array.length a.data in
38 |     for i = 0 to (len - 1) do
39 |       match i with
40 |       | i when i=0 ->
41 |         Printf.printf " (%8d, %8d): %6d\n" 0 (fn i) a.data.(i);
42 |       | i when i=(len-1) ->
43 |         Printf.printf " [%8d,      Inf): %6d\n" (fn (i-1)) a.data.(i);
44 |       | i ->
45 |         Printf.printf " [%8d, %8d): %6d\n" (fn (i-1)) (fn i) a.data.(i);
46 |     done
47 | 
48 | end
49 | 
50 | let _ =
51 |   Printf.printf "n_iterations: %d   n_units: %d  n_domains: %d\n"
52 |     n_iterations n_tasks n_domains;
53 |   let pool = T.setup_pool ~num_domains:(n_domains - 1) () in
54 | 
55 |   let hist = TimingHist.make 5 25 in
56 |   for _ = 1 to n_iterations do
57 |     let t0 = Mclock.elapsed_ns() in
58 |     T.run pool (fun _ ->
59 |       T.parallel_for pool ~start:1 ~finish:n_tasks ~body:(fun _ -> ()));
60 |     let t = Int64.sub (Mclock.elapsed_ns ()) t0 in
61 |     TimingHist.add_point hist (Int64.to_int t);
62 |   done;
63 | 
64 |   TimingHist.print_hist hist;
65 | 
66 |   T.teardown_pool pool
67 | 


--------------------------------------------------------------------------------
/test/test_chan.ml:
--------------------------------------------------------------------------------
 1 | let buffer_size = try int_of_string Sys.argv.(1) with _ -> 1
 2 | let num_items = try int_of_string Sys.argv.(2) with _ -> 100
 3 | let num_senders = try int_of_string Sys.argv.(3) with _ -> 1
 4 | let num_receivers = try int_of_string Sys.argv.(4) with _ -> 1
 5 | 
 6 | module C = Domainslib.Chan
 7 | 
 8 | let c = C.make_bounded buffer_size
 9 | 
10 | let rec receiver i n =
11 |   if i = n then
12 |     print_endline @@ Printf.sprintf "Receiver on domain %d done" (Domain.self () :> int)
13 |   else (
14 |     ignore @@ C.recv c;
15 |     receiver (i+1) n )
16 | 
17 | let rec sender i n =
18 |   if i = n then
19 |     print_endline @@ Printf.sprintf "Sender on domain %d done" (Domain.self () :> int)
20 |   else (
21 |     C.send c i;
22 |     sender (i+1) n )
23 | 
24 | let _ =
25 |   assert (num_items mod num_senders == 0);
26 |   assert (num_items mod num_receivers == 0);
27 |   let senders =
28 |     Array.init num_senders (fun _ ->
29 |       Domain.spawn (fun _ -> sender 0 (num_items / num_senders)))
30 |   in
31 |   let receivers =
32 |     Array.init num_receivers (fun _ ->
33 |       Domain.spawn (fun _ -> receiver 0 (num_items / num_receivers)))
34 |   in
35 |   Array.iter Domain.join senders;
36 |   Array.iter Domain.join receivers;
37 |   begin match C.recv_poll c with
38 |   | None -> ()
39 |   | Some _ -> assert false
40 |   end;
41 |   for _i=1 to buffer_size do
42 |     C.send c 0
43 |   done;
44 |   for _i=1 to buffer_size do
45 |     ignore (C.recv c)
46 |   done;
47 |   begin match C.recv_poll c with
48 |   | None -> ()
49 |   | Some _ -> assert false
50 |   end
51 | 


--------------------------------------------------------------------------------
/test/test_deadlock.ml:
--------------------------------------------------------------------------------
 1 | (* Despite what the name says, this test will not deadlock. A similar test will
 2 |  * deadlock in the version not using effect handlers. See
 3 |  * https://github.com/ocaml-multicore/ocaml-multicore/issues/670 *)
 4 | 
 5 | module T = Domainslib.Task
 6 | 
 7 | let n = try int_of_string Sys.argv.(1) with _ -> 1_000_000
 8 | 
 9 | let rec loop n =
10 |   if n = 0 then
11 |     Printf.printf "Looping finished on domain %d\n%!" (Domain.self () :> int)
12 |   else (Domain.cpu_relax (); loop (n-1))
13 | 
14 | let () =
15 |   let pool = T.setup_pool ~num_domains:2 () in
16 |   T.run pool (fun _ ->
17 |     let a = T.async pool (fun _ ->
18 |       Printf.printf "Task A running on domain %d\n%!" (Domain.self () :> int);
19 |       loop n)
20 |     in
21 |     let b = T.async pool (fun _ ->
22 |       Printf.printf "Task B running on domain %d\n%!" (Domain.self () :> int);
23 |       T.await pool a)
24 |     in
25 |     let c = T.async pool (fun _ ->
26 |       Printf.printf "Task C running on domain %d\n%!" (Domain.self () :> int);
27 |       T.await pool b)
28 |     in
29 |     loop n;
30 |     T.await pool c);
31 |   T.teardown_pool pool
32 | 


--------------------------------------------------------------------------------
/test/test_parallel_find.ml:
--------------------------------------------------------------------------------
 1 | let len = 1_000_000
 2 | let nb_needles = 4
 3 | 
 4 | let () = Random.init 42
 5 | 
 6 | let needles =
 7 |   Array.init nb_needles (fun _ -> Random.int len)
 8 | 
 9 | let input =
10 |   let t = Array.make len false in
11 |   needles |> Array.iter (fun needle ->
12 |     t.(needle) <- true
13 |   );
14 |   t
15 | 
16 | open Domainslib
17 | 
18 | let search_needle pool ~chunk_size =
19 |   Task.parallel_find pool ~chunk_size ~start:0 ~finish:(len - 1) ~body:(fun i ->
20 |     if input.(i) then Some i
21 |     else None
22 |   )
23 | 
24 | let test_search pool ~chunk_size =
25 |   match search_needle pool ~chunk_size with
26 |   | None -> assert false
27 |   | Some needle ->
28 |     assert (Array.exists ((=) needle) needles)
29 | 
30 | let () =
31 |   (* [num_domains] is the number of *new* domains spawned by the pool
32 |      performing computations in addition to the current domain. *)
33 |   let num_domains = Domain.recommended_domain_count () - 1 in
34 |   Printf.eprintf "test_parallel_find on %d domains.\n" (num_domains + 1);
35 |   let pool = Task.setup_pool ~num_domains ~name:"pool" () in
36 |   Task.run pool begin fun () ->
37 |     [0; 16; 32; 1000] |> List.iter (fun chunk_size ->
38 |       test_search pool ~chunk_size)
39 |   end;
40 |   Task.teardown_pool pool;
41 |   prerr_endline "Success.";
42 | 


--------------------------------------------------------------------------------
/test/test_parallel_scan.ml:
--------------------------------------------------------------------------------
 1 | let len = 1_000_000
 2 | 
 3 | let singleton_interval i = (i, i + 1)
 4 | 
 5 | let combine_intervals interval1 interval2  =
 6 |   let b1, e1 = interval1
 7 |   and b2, e2 = interval2 in
 8 |   if e1 <> b2 then begin
 9 |     Printf.eprintf "Invalid intervals: (%d, %d), (%d, %d)\n" b1 e1 b2 e2;
10 |     assert false
11 |   end
12 |   else (b1, e2)
13 | 
14 | open Domainslib
15 | 
16 | let test_scan_ordering pool =
17 |   let check_interval i interval =
18 |     let (b, e) = interval in
19 |     assert (b = 0 && e = i + 1)
20 |   in
21 |   Array.init len singleton_interval
22 |   |> Task.parallel_scan pool combine_intervals
23 |   |> Array.iteri check_interval
24 | 
25 | let () =
26 |   (* [num_domains] is the number of *new* domains spawned by the pool
27 |      performing computations in addition to the current domain. *)
28 |   let num_domains = Domain.recommended_domain_count () - 1 in
29 |   Printf.eprintf "test_parallel_scan on %d domains.\n" (num_domains + 1);
30 |   let pool = Task.setup_pool ~num_domains ~name:"pool" () in
31 |   Task.run pool begin fun () ->
32 |     test_scan_ordering pool
33 |   end;
34 |   Task.teardown_pool pool;
35 |   prerr_endline "Success.";


--------------------------------------------------------------------------------
/test/test_task.ml:
--------------------------------------------------------------------------------
 1 | (* Generic tests for the task module *)
 2 | 
 3 | (* Parallel for *)
 4 | 
 5 | open Domainslib
 6 | let modify_arr pool chunk_size = fun () ->
 7 |   let arr1 = Array.init 100 (fun i -> i + 1) in
 8 |   Task.parallel_for ~chunk_size ~start:0 ~finish:99
 9 |     ~body:(fun i -> arr1.(i) <- arr1.(i) * 2) pool;
10 |   let arr_res = Array.init 100 (fun i -> (i + 1) * 2) in
11 |   assert (arr1 = arr_res)
12 | 
13 | let inc_ctr pool chunk_size = fun () ->
14 |   let ctr = Atomic.make 0 in
15 |   Task.parallel_for ~chunk_size ~start:1 ~finish:1000
16 |     ~body:(fun _ -> Atomic.incr ctr) pool;
17 |   assert (Atomic.get ctr = 1000)
18 | 
19 | (* Parallel for reduce *)
20 | 
21 | let sum_sequence pool chunk_size init = fun () ->
22 |   let v = Task.parallel_for_reduce ~chunk_size ~start:1
23 |     ~finish:100 ~body:(fun i -> i) pool (+) init in
24 |   assert (v = 5050 + init)
25 | 
26 | (* Parallel scan *)
27 | 
28 | let prefix_sum pool = fun () ->
29 |   let prefix_s l = List.rev (List.fold_left (fun a y -> match a with
30 |     | [] -> [y]
31 |     | x::_ -> (x+y)::a) [] l) in
32 |   let arr = Array.make 1000 1 in
33 |   let v1 = Task.parallel_scan pool (+) arr in
34 |   let ls = Array.to_list arr in
35 |   let v2 = prefix_s ls in
36 |   assert (v1 = Array.of_list v2)
37 | 
38 | 
39 | let () =
40 |   let pool1 = Task.setup_pool ~num_domains:2 ~name:"pool1" () in
41 |   let pool2 = Task.setup_pool ~num_domains:2 ~name:"pool2" () in
42 |   Task.run pool1 (fun _ ->
43 |     let p1 = Option.get @@ Task.lookup_pool "pool1" in
44 |     modify_arr pool1 0 ();
45 |     modify_arr pool1 25 ();
46 |     modify_arr pool1 100 ();
47 |     inc_ctr p1 0 ();
48 |     inc_ctr p1 16 ();
49 |     inc_ctr p1 32 ();
50 |     inc_ctr p1 1000 ());
51 |   Task.run pool2 (fun _ ->
52 |     let p2 = Option.get @@ Task.lookup_pool "pool2" in
53 |     sum_sequence pool2 0 0 ();
54 |     sum_sequence pool2 10 10 ();
55 |     sum_sequence pool2 1 0 ();
56 |     sum_sequence p2 1 10 ();
57 |     sum_sequence p2 100 10 ();
58 |     sum_sequence p2 100 100 ();
59 |     prefix_sum p2 ());
60 |   Task.teardown_pool pool1;
61 |   Task.teardown_pool pool2;
62 | 
63 |   try
64 |     sum_sequence pool2 0 0 ();
65 |     assert false
66 |   with Invalid_argument _ -> ();
67 | 
68 |   assert (Task.lookup_pool "pool1" = None);
69 | 
70 |   try
71 |     let _ = Task.setup_pool ~num_domains:(-1) () in ()
72 |   with Invalid_argument _ -> ();
73 |   print_endline "ok"
74 | 


--------------------------------------------------------------------------------
/test/test_task_crash.ml:
--------------------------------------------------------------------------------
 1 | open Domainslib
 2 | 
 3 | (* a simple work item, from ocaml/testsuite/tests/misc/takc.ml *)
 4 | let rec tak x y z =
 5 |   if x > y then tak (tak (x-1) y z) (tak (y-1) z x) (tak (z-1) x y)
 6 |            else z
 7 | 
 8 | let work () =
 9 |   for _ = 1 to 200 do
10 |     assert (7 = tak 18 12 6);
11 |   done
12 | ;;
13 | begin
14 |   let pool1 = Task.setup_pool ~num_domains:2 () in
15 |   let pool2 = Task.setup_pool ~num_domains:1 () in
16 | 
17 |   let pool1_prom0 = Task.async pool1 work in
18 | 
19 |   let pool2_prom0 = Task.async pool2 work in
20 |   let pool2_prom1 = Task.async pool2 work in
21 | 
22 |   Task.run pool1 (fun () -> List.iter (fun p -> Task.await pool1 p) [pool1_prom0]);
23 |   Task.run pool2 (fun () -> List.iter (fun p -> Task.await pool2 p) [pool2_prom0; pool2_prom1]);
24 | 
25 |   Task.teardown_pool pool1;
26 |   Task.teardown_pool pool2;
27 | end
28 | 


--------------------------------------------------------------------------------
/test/test_task_empty.ml:
--------------------------------------------------------------------------------
 1 | open Domainslib
 2 | 
 3 | let array_size = 0
 4 | 
 5 | let pool = Task.setup_pool ~num_domains:0 ()
 6 | let res = Task.run pool (fun () ->
 7 |     Task.parallel_for_reduce ~chunk_size:0 ~start:0 ~finish:(array_size-1) ~body:(fun _ -> 1) pool (+) 0);;
 8 | Task.teardown_pool pool;;
 9 | assert(res = array_size)
10 | 


--------------------------------------------------------------------------------