├── .github └── workflows │ └── main.yml ├── .gitignore ├── CHANGES.md ├── CODE_OF_CONDUCT.md ├── LICENSE.md ├── Makefile ├── README.md ├── domainslib.opam ├── dune-project ├── lib ├── chan.ml ├── chan.mli ├── domainslib.ml ├── dune ├── fun_queue.ml ├── fun_queue.mli ├── multi_channel.ml ├── task.ml └── task.mli └── test ├── LU_decomposition_multicore.ml ├── backtrace.ml ├── chan_stm_tests.ml ├── dune ├── enumerate_par.ml ├── fib.ml ├── fib_par.ml ├── game_of_life.ml ├── game_of_life_multicore.ml ├── kcas_integration.ml ├── off_by_one.ml ├── prefix_sum.ml ├── spectralnorm2.ml ├── spectralnorm2_multicore.ml ├── sum_par.ml ├── summed_area_table.ml ├── task_more_deps.ml ├── task_one_dep.ml ├── task_parallel.ml ├── task_throughput.ml ├── test_chan.ml ├── test_deadlock.ml ├── test_parallel_find.ml ├── test_parallel_scan.ml ├── test_task.ml ├── test_task_crash.ml └── test_task_empty.ml /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: main 2 | 3 | on: 4 | pull_request: 5 | push: 6 | schedule: 7 | # Prime the caches every Monday 8 | - cron: 0 1 * * MON 9 | 10 | jobs: 11 | windows: 12 | runs-on: windows-latest 13 | 14 | env: 15 | QCHECK_MSG_INTERVAL: '60' 16 | 17 | steps: 18 | - name: Checkout code 19 | uses: actions/checkout@v2 20 | 21 | - name: Use OCaml ${{ matrix.ocaml-compiler }} 22 | uses: ocaml/setup-ocaml@v2 23 | with: 24 | opam-pin: false 25 | opam-depext: false 26 | ocaml-compiler: ocaml.5.0.0,ocaml-option-mingw 27 | opam-repositories: | 28 | dra27: https://github.com/dra27/opam-repository.git#windows-5.0 29 | default: https://github.com/ocaml-opam/opam-repository-mingw.git#sunset 30 | upstream: https://github.com/ocaml/opam-repository.git 31 | cache-prefix: ${{ steps.multicore_hash.outputs.commit }} 32 | 33 | - run: opam install . --deps-only --with-test 34 | 35 | - run: opam exec -- dune build 36 | 37 | - run: opam exec -- dune runtest -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | _build 3 | .merlin 4 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | ## 0.5.2 2 | 3 | * Upgrade to Saturn 1.0 (#129, @Sudha247) 4 | * Update README.md instruction to use OCaml 5.1.0 (#123, @punchagan) 5 | * Fix Saturn.Queue function (#121, @Sudha247) 6 | * Make parallel_scan work on noncommutative functions (#118, @aytao) 7 | * Test condition tweaks (#113, @jmid) 8 | * Adjust PBTs based on recommended_domain_count (#112, @jmid) 9 | 10 | ## 0.5.1 11 | 12 | * Add parallel_find (#90, @gasche) 13 | * Update CI (#93, @Sudha247) 14 | * Optimisation to work-stealing (#96, @art-w) 15 | * Improve docs presentation (#99, @metanivek) 16 | * Property based tests (#100, jmid) 17 | * Task: avoid double handler installation (#101, @gasche & @clef-men) 18 | * Fix a benign data-race in Chan reported by ocaml-tsan (#103, @art-w) 19 | * Dune, opam, and GitHub Actions fixes (#105, @MisterDA) 20 | * domain local await support (#107, @polytypic) 21 | * Windows run on GitHub Actions (#110, @Sudha247) 22 | * Adjust PBTs based on recommended_domain_count (#112, @jmid) 23 | * Test condition tweaks (#113, @jmid) 24 | 25 | ## 0.5.0 26 | 27 | This release includes: 28 | 29 | * Bug fix for `parallel_for_reduce` on empty loops. 30 | * Make Chan.t and Task.promise injective #69 31 | * Add lockfree dependency #70 32 | * CI fixes (#73, #76) 33 | * Breaking change: Rename `num_additional_domains` to `num_domains` for setup_pool 34 | * Documentation updates (#80, #81, #82) 35 | 36 | ## 0.4.2 37 | 38 | Includes Effect.eff -> Effect.t change from OCaml trunk. (#65) 39 | 40 | ## 0.4.1 41 | 42 | This release fixes compatibility with OCaml 5.00.0+trunk in #61. Breaks compatibility with older Multicore variants 4.12.0+domains and 4.12.0+domains+effects 43 | 44 | ## 0.4.0 45 | 46 | This release includes: 47 | 48 | * Usage of effect handlers for task creation. This introduces a breaking change; all computations need to be enclosed in a Task.run function. See #51. 49 | * Multi_channel uses a per-channel domain-local key, removing the global key. #50 50 | * Bug fixes in parallel_scan. #60 51 | 52 | ## 0.3.2 53 | 54 | Corresponding updates for breaking changes introduced in ocaml-multicore/ocaml-multicore#704 55 | 56 | * Updated with the new interface Domain.cpu_relax 57 | * Domain.timer_ticks replaced with Mirage clock. 58 | 59 | ## 0.3.1 60 | 61 | * #45 adds support for named pools. This is a breaking change with setup_pool taking an optional name parameter and an extra unit parameter. 62 | * A minor bug fix in parallel_for_reduce. 63 | 64 | ## 0.3.0 65 | 66 | This release includes: 67 | 68 | * A breaking change for Task pools where the num_domains argument has been renamed num_additional_domains to clear up potential confusion; see #31. 69 | * A new work-stealing scheduler for Task pools using domain local Chase Lev deques #29; this can improve performance significantly for some workloads. 70 | * A removal of closure allocation in Chan #28. 71 | * A move to using the Mutex & Condition modules for the implementation of Chan #24. 72 | * Various documentation and packaging improvements (#21, #27, #30, #32). 73 | 74 | ## 0.2.2 75 | 76 | Updates to: 77 | 78 | * parallel_for to use new task distribution algorithm and allow default chunk_size (#16) 79 | * parallel_for_reduce to use new task distribution algorithm and allow default chunk_size parameter (#18) 80 | 81 | ## 0.2.1 82 | 83 | * `recv_poll` made non-allocating 84 | * Addition of parallel_scan #5 85 | 86 | ## 0.2.0 87 | 88 | * New Tasks library with support for async/await parallelism and parallel for loops. 89 | * Adds support for non-blocking Chan.send_poll and Chan.recv_poll. 90 | 91 | Thanks to @gasche for API design discussions. 92 | 93 | ## 0.1.0 94 | 95 | Initial release -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | This project has adopted the [OCaml Code of Conduct](https://github.com/ocaml/code-of-conduct/blob/main/CODE_OF_CONDUCT.md). 4 | 5 | # Enforcement 6 | 7 | This project follows the OCaml Code of Conduct 8 | [enforcement policy](https://github.com/ocaml/code-of-conduct/blob/main/CODE_OF_CONDUCT.md#enforcement). 9 | 10 | To report any violations, please contact: 11 | 12 | * KC Sivaramakrishnan 13 | * Sudha Parimala 14 | * Vesa Karvonen 15 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 KC Sivaramakrishnan 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | dune build @install 3 | 4 | run_test: 5 | OCAMLRUNPARAM="b=1" dune runtest -f 6 | 7 | clean: 8 | dune clean 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Domainslib - Nested-parallel programming 2 | 3 | Domainslib provides support for nested-parallel programming. Domainslib provides async/await mechanism for spawning parallel tasks and awaiting their results. On top of this mechanism, domainslib provides parallel iteration functions. At its core, domainslib has an efficient implementation of work-stealing queue in order to efficiently share tasks with other domains. 4 | 5 | Here is a _sequential_ program that computes nth Fibonacci number using recursion: 6 | 7 | ```ocaml 8 | (* fib.ml *) 9 | let n = try int_of_string Sys.argv.(1) with _ -> 1 10 | 11 | let rec fib n = if n < 2 then 1 else fib (n - 1) + fib (n - 2) 12 | 13 | let main () = 14 | let r = fib n in 15 | Printf.printf "fib(%d) = %d\n%!" n r 16 | 17 | let _ = main () 18 | ``` 19 | 20 | We can parallelise this program using Domainslib: 21 | 22 | ```ocaml 23 | (* fib_par.ml *) 24 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 25 | let n = try int_of_string Sys.argv.(2) with _ -> 1 26 | 27 | (* Sequential Fibonacci *) 28 | let rec fib n = 29 | if n < 2 then 1 else fib (n - 1) + fib (n - 2) 30 | 31 | module T = Domainslib.Task 32 | 33 | let rec fib_par pool n = 34 | if n > 20 then begin 35 | let a = T.async pool (fun _ -> fib_par pool (n-1)) in 36 | let b = T.async pool (fun _ -> fib_par pool (n-2)) in 37 | T.await pool a + T.await pool b 38 | end else 39 | (* Call sequential Fibonacci if the available work is small *) 40 | fib n 41 | 42 | let main () = 43 | let pool = T.setup_pool ~num_domains:(num_domains - 1) () in 44 | let res = T.run pool (fun _ -> fib_par pool n) in 45 | T.teardown_pool pool; 46 | Printf.printf "fib(%d) = %d\n" n res 47 | 48 | let _ = main () 49 | ``` 50 | 51 | The parallel program scales nicely compared to the sequential version. The results presented below were obtained on a 2.3 GHz Quad-Core Intel Core i7 MacBook Pro with 4 cores and 8 hardware threads. 52 | 53 | ```bash 54 | $ hyperfine './fib.exe 42' './fib_par.exe 2 42' \ 55 | './fib_par.exe 4 42' './fib_par.exe 8 42' 56 | Benchmark 1: ./fib.exe 42 57 | Time (mean ± sd): 1.217 s ± 0.018 s [User: 1.203 s, System: 0.004 s] 58 | Range (min … max): 1.202 s … 1.261 s 10 runs 59 | 60 | Benchmark 2: ./fib_par.exe 2 42 61 | Time (mean ± sd): 628.2 ms ± 2.9 ms [User: 1243.1 ms, System: 4.9 ms] 62 | Range (min … max): 625.7 ms … 634.5 ms 10 runs 63 | 64 | Benchmark 3: ./fib_par.exe 4 42 65 | Time (mean ± sd): 337.6 ms ± 23.4 ms [User: 1321.8 ms, System: 8.4 ms] 66 | Range (min … max): 318.5 ms … 377.6 ms 10 runs 67 | 68 | Benchmark 4: ./fib_par.exe 8 42 69 | Time (mean ± sd): 250.0 ms ± 9.4 ms [User: 1877.1 ms, System: 12.6 ms] 70 | Range (min … max): 242.5 ms … 277.3 ms 11 runs 71 | 72 | Summary 73 | './fib_par2.exe 8 42' ran 74 | 1.35 ± 0.11 times faster than './fib_par.exe 4 42' 75 | 2.51 ± 0.10 times faster than './fib_par.exe 2 42' 76 | 4.87 ± 0.20 times faster than './fib.exe 42' 77 | ``` 78 | 79 | More example programs are available [here](https://github.com/ocaml-multicore/domainslib/tree/master/test). 80 | 81 | ## Installation 82 | 83 | You can install this library using `OPAM`. 84 | 85 | ```bash 86 | $ opam switch create 5.3.0 87 | $ opam install domainslib 88 | ``` 89 | 90 | ## Development 91 | 92 | If you are interested in hacking on the implementation, then `opam pin` this repository: 93 | 94 | ```bash 95 | $ opam switch create 5.0.0+trunk --repo=default,alpha=git+https://github.com/kit-ty-kate/opam-alpha-repository.git 96 | $ git clone https://github.com/ocaml-multicore/domainslib 97 | $ cd domainslib 98 | $ opam pin add domainslib file://`pwd` 99 | ``` 100 | -------------------------------------------------------------------------------- /domainslib.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | synopsis: "Parallel Structures over Domains for Multicore OCaml" 4 | maintainer: ["KC Sivaramakrishnan " "Sudha Parimala"] 5 | authors: ["KC Sivaramakrishnan "] 6 | license: "ISC" 7 | homepage: "https://github.com/ocaml-multicore/domainslib" 8 | doc: "https://ocaml-multicore.github.io/domainslib/doc" 9 | bug-reports: "https://github.com/ocaml-multicore/domainslib/issues" 10 | depends: [ 11 | "dune" {>= "3.0"} 12 | "ocaml" {>= "5.0"} 13 | "saturn" {>= "1.0.0"} 14 | "domain-local-await" {>= "0.1.0"} 15 | "kcas" {>= "0.3.0" & with-test} 16 | "mirage-clock-unix" {with-test & >= "4.2.0"} 17 | "qcheck-core" {with-test & >= "0.20"} 18 | "qcheck-multicoretests-util" {with-test & >= "0.1"} 19 | "qcheck-stm" {with-test & >= "0.1"} 20 | "odoc" {with-doc} 21 | ] 22 | build: [ 23 | ["dune" "subst"] {dev} 24 | [ 25 | "dune" 26 | "build" 27 | "-p" 28 | name 29 | "-j" 30 | jobs 31 | "@install" 32 | "@runtest" {with-test} 33 | "@doc" {with-doc} 34 | ] 35 | ] 36 | dev-repo: "git+https://github.com/ocaml-multicore/domainslib.git" 37 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 3.0) 2 | (name domainslib) 3 | (formatting disabled) 4 | (generate_opam_files true) 5 | 6 | (source (github ocaml-multicore/domainslib)) 7 | (authors "KC Sivaramakrishnan ") 8 | (maintainers "KC Sivaramakrishnan " "Sudha Parimala") 9 | (documentation "https://ocaml-multicore.github.io/domainslib/doc") 10 | (license "ISC") 11 | 12 | (package 13 | (name domainslib) 14 | (synopsis "Parallel Structures over Domains for Multicore OCaml") 15 | (depends 16 | (ocaml (>= "5.0")) 17 | (saturn (>= "0.4.0")) 18 | (domain-local-await (>= 0.1.0)) 19 | (kcas (and (>= 0.3.0) :with-test)) 20 | (mirage-clock-unix (and :with-test (>= "4.2.0"))) 21 | (qcheck-core (and :with-test (>= "0.20"))) 22 | (qcheck-multicoretests-util (and :with-test (>= "0.1"))) 23 | (qcheck-stm (and :with-test (>= "0.1"))))) 24 | -------------------------------------------------------------------------------- /lib/chan.ml: -------------------------------------------------------------------------------- 1 | (* mutex_condvar will be used per domain; so multiple fibers or 2 | systhreads may share a mutex_condvar variable *) 3 | type mutex_condvar = { 4 | mutex: Mutex.t; 5 | condition: Condition.t 6 | } 7 | 8 | type waiting_notified = 9 | | Waiting 10 | | Notified 11 | 12 | type 'a contents = 13 | | Empty of {receivers: ('a option ref * mutex_condvar) Fun_queue.t} 14 | | NotEmpty of {senders: ('a * waiting_notified ref * mutex_condvar) Fun_queue.t; messages: 'a Fun_queue.t} 15 | 16 | type 'a t = { 17 | buffer_size: int option; 18 | contents: 'a contents Atomic.t 19 | } 20 | 21 | let mutex_condvar_key = 22 | Domain.DLS.new_key (fun () -> 23 | let m = Mutex.create () in 24 | let c = Condition.create () in 25 | {mutex=m; condition=c}) 26 | 27 | let make_bounded n = 28 | if n < 0 then invalid_arg "Chan.make_bounded" ; 29 | {buffer_size= Some n; 30 | contents = Atomic.make (Empty {receivers= Fun_queue.empty; })} 31 | 32 | let make_unbounded () = 33 | {buffer_size= None; 34 | contents = Atomic.make (Empty {receivers= Fun_queue.empty})} 35 | 36 | (* [send'] is shared by both the blocking and polling versions. Returns a 37 | * boolean indicating whether the send was successful. Hence, it always returns 38 | * [true] if [polling] is [false]. *) 39 | let rec send' {buffer_size; contents} v ~polling = 40 | let open Fun_queue in 41 | let old_contents = Atomic.get contents in 42 | match old_contents with 43 | | Empty {receivers} -> begin 44 | (* The channel is empty (no senders) *) 45 | match pop receivers with 46 | | None -> 47 | (* The channel is empty (no senders) and no waiting receivers *) 48 | if buffer_size = Some 0 then 49 | (* The channel is empty (no senders), no waiting receivers, and 50 | * buffer size is 0 *) 51 | begin if not polling then begin 52 | (* The channel is empty (no senders), no waiting receivers, 53 | * buffer size is 0 and we're not polling *) 54 | let mc = Domain.DLS.get mutex_condvar_key in 55 | let cond_slot = ref Waiting in 56 | let new_contents = 57 | NotEmpty 58 | {messages= empty; senders= push empty (v, cond_slot, mc)} 59 | in 60 | if Atomic.compare_and_set contents old_contents new_contents 61 | then begin 62 | Mutex.lock mc.mutex; 63 | while !cond_slot = Waiting do 64 | Condition.wait mc.condition mc.mutex 65 | done; 66 | Mutex.unlock mc.mutex; 67 | true 68 | end else send' {buffer_size; contents} v ~polling 69 | end else 70 | (* The channel is empty (no senders), no waiting receivers, 71 | * buffer size is 0 and we're polling *) 72 | false 73 | end 74 | else 75 | (* The channel is empty (no senders), no waiting receivers, and 76 | * the buffer size is non-zero *) 77 | let new_contents = 78 | NotEmpty {messages= push empty v; senders= empty} 79 | in 80 | if Atomic.compare_and_set contents old_contents new_contents 81 | then true 82 | else send' {buffer_size; contents} v ~polling 83 | | Some ((r, mc), receivers') -> 84 | (* The channel is empty (no senders) and there are waiting receivers 85 | * *) 86 | let new_contents = Empty {receivers= receivers'} in 87 | if Atomic.compare_and_set contents old_contents new_contents 88 | then begin 89 | Mutex.lock mc.mutex; 90 | r := Some v; 91 | Mutex.unlock mc.mutex; 92 | Condition.broadcast mc.condition; 93 | true 94 | end else send' {buffer_size; contents} v ~polling 95 | end 96 | | NotEmpty {senders; messages} -> 97 | (* The channel is not empty *) 98 | if buffer_size = Some (length messages) then 99 | (* The channel is not empty, and the buffer is full *) 100 | begin if not polling then 101 | (* The channel is not empty, the buffer is full and we're not 102 | * polling *) 103 | let cond_slot = ref Waiting in 104 | let mc = Domain.DLS.get mutex_condvar_key in 105 | let new_contents = 106 | NotEmpty {senders= push senders (v, cond_slot, mc); messages} 107 | in 108 | if Atomic.compare_and_set contents old_contents new_contents then begin 109 | Mutex.lock mc.mutex; 110 | while !cond_slot = Waiting do 111 | Condition.wait mc.condition mc.mutex; 112 | done; 113 | Mutex.unlock mc.mutex; 114 | true 115 | end else send' {buffer_size; contents} v ~polling 116 | else 117 | (* The channel is not empty, the buffer is full and we're 118 | * polling *) 119 | false 120 | end 121 | else 122 | (* The channel is not empty, and the buffer is not full *) 123 | let new_contents = 124 | NotEmpty {messages= push messages v; senders} 125 | in 126 | if Atomic.compare_and_set contents old_contents new_contents 127 | then true 128 | else send' {buffer_size; contents} v ~polling 129 | 130 | let send c v = 131 | let r = send' c v ~polling:false in 132 | assert r 133 | 134 | let send_poll c v = send' c v ~polling:true 135 | 136 | (* [recv'] is shared by both the blocking and polling versions. Returns a an 137 | * optional value indicating whether the receive was successful. Hence, it 138 | * always returns [Some v] if [polling] is [false]. *) 139 | let rec recv' {buffer_size; contents} ~polling = 140 | let open Fun_queue in 141 | let old_contents = Atomic.get contents in 142 | match old_contents with 143 | | Empty {receivers} -> 144 | (* The channel is empty (no senders) *) 145 | if not polling then begin 146 | (* The channel is empty (no senders), and we're not polling *) 147 | let msg_slot = ref None in 148 | let mc = Domain.DLS.get mutex_condvar_key in 149 | let new_contents = 150 | Empty {receivers= push receivers (msg_slot, mc)} 151 | in 152 | if Atomic.compare_and_set contents old_contents new_contents then 153 | begin 154 | Mutex.lock mc.mutex; 155 | while !msg_slot = None do 156 | Condition.wait mc.condition mc.mutex; 157 | done; 158 | Mutex.unlock mc.mutex; 159 | !msg_slot 160 | end else recv' {buffer_size; contents} ~polling 161 | end else 162 | (* The channel is empty (no senders), and we're polling *) 163 | None 164 | | NotEmpty {senders; messages} -> 165 | (* The channel is not empty *) 166 | match (pop messages, pop senders) with 167 | | None, None -> 168 | (* The channel is not empty, but no senders or messages *) 169 | failwith "Chan.recv: Impossible - channel state" 170 | | Some (m, messages'), None -> 171 | (* The channel is not empty, there is a message and no 172 | * waiting senders *) 173 | let new_contents = 174 | if length messages' = 0 then 175 | Empty {receivers = empty} 176 | else 177 | NotEmpty {messages= messages'; senders} 178 | in 179 | if Atomic.compare_and_set contents old_contents new_contents 180 | then Some m 181 | else recv' {buffer_size; contents} ~polling 182 | | None, Some ((m, c, mc), senders') -> 183 | (* The channel is not empty, there are no messages, and there 184 | * is a waiting sender. This is only possible is the buffer 185 | * size is 0. *) 186 | assert (buffer_size = Some 0) ; 187 | let new_contents = 188 | if length senders' = 0 then 189 | Empty {receivers = empty} 190 | else 191 | NotEmpty {messages; senders= senders'} 192 | in 193 | if Atomic.compare_and_set contents old_contents new_contents 194 | then begin 195 | Mutex.lock mc.mutex; 196 | c := Notified; 197 | Mutex.unlock mc.mutex; 198 | Condition.broadcast mc.condition; 199 | Some m 200 | end else recv' {buffer_size; contents} ~polling 201 | | Some (m, messages'), Some ((ms, sc, mc), senders') -> 202 | (* The channel is not empty, there is a message, and there is a 203 | * waiting sender. *) 204 | let new_contents = 205 | NotEmpty {messages= push messages' ms; senders= senders'} 206 | in 207 | if Atomic.compare_and_set contents old_contents new_contents 208 | then begin 209 | Mutex.lock mc.mutex; 210 | sc := Notified; 211 | Mutex.unlock mc.mutex; 212 | Condition.broadcast mc.condition; 213 | Some m 214 | end else recv' {buffer_size; contents} ~polling 215 | 216 | let recv c = 217 | match recv' c ~polling:false with 218 | | None -> failwith "Chan.recv: impossible - no message" 219 | | Some m -> m 220 | 221 | let recv_poll c = 222 | match Atomic.get c.contents with 223 | | Empty _ -> None 224 | | _ -> recv' c ~polling:true 225 | -------------------------------------------------------------------------------- /lib/chan.mli: -------------------------------------------------------------------------------- 1 | type !'a t 2 | (** The type of channels *) 3 | 4 | val make_bounded : int -> 'a t 5 | (** [make_bounded n] makes a bounded channel with a buffer of size [n]. Raises 6 | [Invalid_argument "Chan.make_bounded"] if the buffer size is less than 0. 7 | 8 | With a buffer size of 0, the send operation becomes synchronous. With a 9 | buffer size of 1, you get the familiar MVar structure. The channel may be 10 | shared between many sending and receiving domains. *) 11 | 12 | val make_unbounded : unit -> 'a t 13 | (** Returns an unbounded channel *) 14 | 15 | val send : 'a t -> 'a -> unit 16 | (** [send c v] sends the values [v] over the channel [c]. If the channel buffer 17 | is full then the sending domain blocks until space becomes available. *) 18 | 19 | val send_poll : 'a t -> 'a -> bool 20 | (** [send_poll c v] attempts to send the value [v] over the channel [c]. If the 21 | channel buffer is not full, the message is sent and returns [true]. Otherwise, 22 | returns [false]. *) 23 | 24 | val recv : 'a t -> 'a 25 | (** [recv c] returns a value [v] received over the channel. If the channel 26 | buffer is empty then the domain blocks until a message is sent on the 27 | channel. *) 28 | 29 | val recv_poll : 'a t -> 'a option 30 | (** [recv_poll c] attempts to receive a message on the channel [c]. If a 31 | message [v] is available on the channel then [Some v] is returned. 32 | Otherwise, returns [None]. *) 33 | -------------------------------------------------------------------------------- /lib/domainslib.ml: -------------------------------------------------------------------------------- 1 | module Chan = Chan 2 | module Task = Task 3 | -------------------------------------------------------------------------------- /lib/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name domainslib) 3 | (public_name domainslib) 4 | (libraries saturn domain-local-await)) 5 | -------------------------------------------------------------------------------- /lib/fun_queue.ml: -------------------------------------------------------------------------------- 1 | type 'a t = {length: int; front: 'a list; back: 'a list} 2 | 3 | let empty = {length= 0; front= []; back= []} 4 | 5 | let push {length; front; back} v = {length= length + 1; front; back= v :: back} 6 | 7 | let length {length; _} = length 8 | 9 | let pop {length; front; back} = 10 | match front with 11 | | [] -> ( 12 | match List.rev back with 13 | | [] -> 14 | None 15 | | x :: xs -> 16 | Some (x, {front= xs; length= length - 1; back= []}) ) 17 | | x :: xs -> 18 | Some (x, {front= xs; length= length - 1; back}) 19 | -------------------------------------------------------------------------------- /lib/fun_queue.mli: -------------------------------------------------------------------------------- 1 | type 'a t 2 | (** The type of functional queue *) 3 | 4 | val empty : 'a t 5 | (** Empty queue *) 6 | 7 | val length : 'a t -> int 8 | (** Returns the length of the queue *) 9 | 10 | val push : 'a t -> 'a -> 'a t 11 | (** [push q v] returns a new queue with [v] pushed to the back of [q] *) 12 | 13 | val pop : 'a t -> ('a * 'a t) option 14 | (** [pop q] returns [None] if the queue is empty. If the queue is non-empty, it 15 | returns [Some (v,q')] where [v] is the element popped from the head of [q] 16 | and [q'] is the rest of the queue. *) 17 | -------------------------------------------------------------------------------- /lib/multi_channel.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2021, Tom Kelly 3 | * 4 | * Permission to use, copy, modify, and/or distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | *) 16 | 17 | module Ws_deque = Saturn.Work_stealing_deque 18 | 19 | type mutex_condvar = { 20 | mutex: Mutex.t; 21 | condition: Condition.t 22 | } 23 | 24 | type waiting_status = 25 | | Waiting 26 | | Released 27 | 28 | type dls_state = { 29 | mutable id: int; 30 | mutable steal_offsets: int array; 31 | rng_state: Random.State.t; 32 | mc: mutex_condvar; 33 | } 34 | 35 | module Foreign_queue = Saturn.Queue 36 | 37 | type 'a t = { 38 | channels: 'a Ws_deque.t array; 39 | (* Queue for enqueuing work from outside of the pool. *) 40 | foreign_queue: 'a Foreign_queue.t; 41 | waiters: (waiting_status ref * mutex_condvar ) Chan.t; 42 | next_domain_id: int Atomic.t; 43 | recv_block_spins: int; 44 | dls_key: dls_state Domain.DLS.key; 45 | } 46 | 47 | let dls_make_key () = 48 | Domain.DLS.new_key (fun () -> 49 | { 50 | id = -1; 51 | steal_offsets = Array.make 1 0; 52 | rng_state = Random.State.make_self_init (); 53 | mc = {mutex=Mutex.create (); condition=Condition.create ()}; 54 | }) 55 | 56 | let rec log2 n = 57 | if n <= 1 then 0 else 1 + (log2 (n asr 1)) 58 | 59 | let make ?(recv_block_spins = 2048) n = 60 | { channels = Array.init n (fun _ -> Ws_deque.create ()); 61 | foreign_queue = Foreign_queue.create (); 62 | waiters = Chan.make_unbounded (); 63 | next_domain_id = Atomic.make 0; 64 | recv_block_spins; 65 | dls_key = dls_make_key () 66 | } 67 | 68 | let register_domain mchan = 69 | let id = Atomic.fetch_and_add mchan.next_domain_id 1 in 70 | assert(id < Array.length mchan.channels); 71 | id 72 | 73 | let init_domain_state mchan dls_state = 74 | let id = register_domain mchan in 75 | let len = Array.length mchan.channels in 76 | dls_state.id <- id; 77 | dls_state.steal_offsets <- Array.init (len - 1) (fun i -> (id + i + 1) mod len); 78 | dls_state 79 | [@@inline never] 80 | 81 | let get_local_state mchan = 82 | let dls_state = Domain.DLS.get mchan.dls_key in 83 | if dls_state.id >= 0 then begin 84 | assert (dls_state.id < Array.length mchan.channels); 85 | dls_state 86 | end 87 | else (init_domain_state mchan dls_state) 88 | [@@inline] 89 | 90 | let clear_local_state mchan = 91 | let dls_state = Domain.DLS.get mchan.dls_key in 92 | dls_state.id <- (-1) 93 | 94 | let rec check_waiters mchan = 95 | match Chan.recv_poll mchan.waiters with 96 | | None -> () 97 | | Some (status, mc) -> 98 | (* avoid the lock if we possibly can *) 99 | if !status = Released then check_waiters mchan 100 | else begin 101 | Mutex.lock mc.mutex; 102 | match !status with 103 | | Waiting -> 104 | begin 105 | status := Released; 106 | Mutex.unlock mc.mutex; 107 | Condition.broadcast mc.condition 108 | end 109 | | Released -> 110 | begin 111 | (* this waiter is already released, it might have found something on a poll *) 112 | Mutex.unlock mc.mutex; 113 | check_waiters mchan 114 | end 115 | end 116 | 117 | let send_foreign mchan v = 118 | Foreign_queue.push mchan.foreign_queue v; 119 | check_waiters mchan 120 | 121 | let send mchan v = 122 | let id = (get_local_state mchan).id in 123 | Ws_deque.push (Array.unsafe_get mchan.channels id) v; 124 | check_waiters mchan 125 | 126 | let rec recv_poll_loop mchan dls cur_offset = 127 | let offsets = dls.steal_offsets in 128 | let k = (Array.length offsets) - cur_offset in 129 | if k = 0 then raise Exit 130 | else begin 131 | let idx = cur_offset + (Random.State.int dls.rng_state k) in 132 | let t = Array.unsafe_get offsets idx in 133 | let channel = Array.unsafe_get mchan.channels t in 134 | try 135 | Ws_deque.steal_exn channel 136 | with 137 | | Saturn.Work_stealing_deque.Empty -> 138 | begin 139 | Array.unsafe_set offsets idx (Array.unsafe_get offsets cur_offset); 140 | Array.unsafe_set offsets cur_offset t; 141 | recv_poll_loop mchan dls (cur_offset+1) 142 | end 143 | end 144 | 145 | let recv_poll_with_dls mchan dls = 146 | try 147 | Ws_deque.pop_exn (Array.unsafe_get mchan.channels dls.id) 148 | with 149 | | Saturn.Work_stealing_deque.Empty -> 150 | match Foreign_queue.pop_opt mchan.foreign_queue with 151 | | None -> recv_poll_loop mchan dls 0 152 | | Some v -> v 153 | [@@inline] 154 | 155 | let recv_poll mchan = 156 | recv_poll_with_dls mchan (get_local_state mchan) 157 | 158 | let rec recv_poll_repeated mchan dls repeats = 159 | try 160 | recv_poll_with_dls mchan dls 161 | with 162 | | Exit -> 163 | if repeats = 1 then raise Exit 164 | else begin 165 | Domain.cpu_relax (); 166 | recv_poll_repeated mchan dls (repeats - 1) 167 | end 168 | 169 | let rec recv mchan = 170 | let dls = get_local_state mchan in 171 | try 172 | recv_poll_repeated mchan dls mchan.recv_block_spins 173 | with 174 | Exit -> 175 | begin 176 | (* Didn't find anything, prepare to block: 177 | * - enqueue our wait block in the waiter queue 178 | * - check the queue again 179 | * - go to sleep if our wait block has not been notified 180 | * - when notified retry the recieve 181 | *) 182 | let status = ref Waiting in 183 | let mc = dls.mc in 184 | Chan.send mchan.waiters (status, mc); 185 | try 186 | let v = recv_poll mchan in 187 | (* need to check the status as might take an item 188 | which is not the one an existing sender has woken us 189 | to take *) 190 | Mutex.lock mc.mutex; 191 | begin match !status with 192 | | Waiting -> (status := Released; Mutex.unlock mc.mutex) 193 | | Released -> 194 | (* we were simultaneously released from a sender; 195 | so need to release a waiter *) 196 | (Mutex.unlock mc.mutex; check_waiters mchan) 197 | end; 198 | v 199 | with 200 | | Exit -> 201 | if !status = Waiting then begin 202 | Mutex.lock mc.mutex; 203 | while !status = Waiting do 204 | Condition.wait mc.condition mc.mutex 205 | done; 206 | Mutex.unlock mc.mutex 207 | end; 208 | recv mchan 209 | end 210 | -------------------------------------------------------------------------------- /lib/task.ml: -------------------------------------------------------------------------------- 1 | open Effect 2 | open Effect.Deep 3 | 4 | type 'a task = unit -> 'a 5 | 6 | type message = 7 | | Work of (unit -> unit) 8 | (* Invariant: the Work function does not need to run under the 'step' handler, 9 | it installs its own handler or re-invokes a deep-handler continuation. *) 10 | | Quit 11 | 12 | type task_chan = message Multi_channel.t 13 | 14 | type pool_data = { 15 | domains : unit Domain.t array; 16 | task_chan : task_chan; 17 | name: string option 18 | } 19 | 20 | type pool = pool_data option Atomic.t 21 | 22 | type 'a promise_state = 23 | Returned of 'a 24 | | Raised of exn * Printexc.raw_backtrace 25 | | Pending of (('a, unit) continuation * task_chan) list 26 | 27 | type 'a promise = 'a promise_state Atomic.t 28 | 29 | type _ t += Wait : 'a promise * task_chan -> 'a t 30 | 31 | let get_pool_data p = 32 | match Atomic.get p with 33 | | None -> invalid_arg "pool already torn down" 34 | | Some p -> p 35 | 36 | let cont v (k, c) = Multi_channel.send c (Work (fun _ -> continue k v)) 37 | let discont e bt (k, c) = Multi_channel.send c (Work (fun _ -> 38 | discontinue_with_backtrace k e bt)) 39 | 40 | let do_task (type a) (f : unit -> a) (p : a promise) : unit = 41 | let action, result = 42 | try 43 | let v = f () in 44 | cont v, Returned v 45 | with e -> 46 | let bt = Printexc.get_raw_backtrace () in 47 | discont e bt, Raised (e, bt) 48 | in 49 | match Atomic.exchange p result with 50 | | Pending l -> List.iter action l 51 | | _ -> failwith "Task.do_task: impossible, can only set result of task once" 52 | 53 | let await pool promise = 54 | let pd = get_pool_data pool in 55 | match Atomic.get promise with 56 | | Returned v -> v 57 | | Raised (e, bt) -> Printexc.raise_with_backtrace e bt 58 | | Pending _ -> perform (Wait (promise, pd.task_chan)) 59 | 60 | let step (type a) (f : a -> unit) (v : a) : unit = 61 | try_with f v 62 | { effc = fun (type a) (e : a t) -> 63 | match e with 64 | | Wait (p,c) -> Some (fun (k : (a, _) continuation) -> 65 | let rec loop () = 66 | let old = Atomic.get p in 67 | match old with 68 | | Pending l -> 69 | if Atomic.compare_and_set p old (Pending ((k,c)::l)) then () 70 | else (Domain.cpu_relax (); loop ()) 71 | | Returned v -> continue k v 72 | | Raised (e,bt) -> discontinue_with_backtrace k e bt 73 | in 74 | loop ()) 75 | | _ -> None } 76 | 77 | let async pool f = 78 | let pd = get_pool_data pool in 79 | let p = Atomic.make (Pending []) in 80 | Multi_channel.send pd.task_chan (Work (fun _ -> step (do_task f) p)); 81 | p 82 | 83 | let prepare_for_await chan () = 84 | let promise = Atomic.make (Pending []) in 85 | let release () = 86 | match Atomic.get promise with 87 | | (Returned _ | Raised _) -> () 88 | | Pending _ -> 89 | match Atomic.exchange promise (Returned ()) with 90 | | Pending ks -> 91 | ks 92 | |> List.iter @@ fun (k, c) -> 93 | Multi_channel.send_foreign c (Work (fun _ -> continue k ())) 94 | | _ -> () 95 | and await () = 96 | match Atomic.get promise with 97 | | (Returned _ | Raised _) -> () 98 | | Pending _ -> perform (Wait (promise, chan)) 99 | in 100 | Domain_local_await.{ release; await } 101 | 102 | let rec worker task_chan = 103 | match Multi_channel.recv task_chan with 104 | | Quit -> Multi_channel.clear_local_state task_chan 105 | | Work f -> f (); worker task_chan 106 | 107 | let worker task_chan = 108 | Domain_local_await.using 109 | ~prepare_for_await:(prepare_for_await task_chan) 110 | ~while_running:(fun () -> worker task_chan) 111 | 112 | let run (type a) pool (f : unit -> a) : a = 113 | let pd = get_pool_data pool in 114 | let p = Atomic.make (Pending []) in 115 | step (fun _ -> do_task f p) (); 116 | let rec loop () : a = 117 | match Atomic.get p with 118 | | Pending _ -> 119 | begin 120 | try 121 | match Multi_channel.recv_poll pd.task_chan with 122 | | Work f -> f () 123 | | Quit -> failwith "Task.run: tasks are active on pool" 124 | with Exit -> Domain.cpu_relax () 125 | end; 126 | loop () 127 | | Returned v -> v 128 | | Raised (e, bt) -> Printexc.raise_with_backtrace e bt 129 | in 130 | loop () 131 | 132 | let run pool f = 133 | Domain_local_await.using 134 | ~prepare_for_await:(prepare_for_await (get_pool_data pool).task_chan) 135 | ~while_running:(fun () -> run pool f) 136 | 137 | let named_pools = Hashtbl.create 8 138 | let named_pools_mutex = Mutex.create () 139 | 140 | let setup_pool ?name ~num_domains () = 141 | if num_domains < 0 then 142 | invalid_arg "Task.setup_pool: num_domains must be at least 0" 143 | else 144 | let task_chan = Multi_channel.make (num_domains+1) in 145 | let domains = Array.init num_domains (fun _ -> 146 | Domain.spawn (fun _ -> worker task_chan)) 147 | in 148 | let p = Atomic.make (Some {domains; task_chan; name}) in 149 | begin match name with 150 | | None -> () 151 | | Some x -> 152 | Mutex.lock named_pools_mutex; 153 | Hashtbl.add named_pools x p; 154 | Mutex.unlock named_pools_mutex 155 | end; 156 | p 157 | 158 | let teardown_pool pool = 159 | let pd = get_pool_data pool in 160 | for _i=1 to Array.length pd.domains do 161 | Multi_channel.send pd.task_chan Quit 162 | done; 163 | Multi_channel.clear_local_state pd.task_chan; 164 | Array.iter Domain.join pd.domains; 165 | (* Remove the pool from the table *) 166 | begin match pd.name with 167 | | None -> () 168 | | Some n -> 169 | Mutex.lock named_pools_mutex; 170 | Hashtbl.remove named_pools n; 171 | Mutex.unlock named_pools_mutex 172 | end; 173 | Atomic.set pool None 174 | 175 | let lookup_pool name = 176 | Mutex.lock named_pools_mutex; 177 | let p = Hashtbl.find_opt named_pools name in 178 | Mutex.unlock named_pools_mutex; 179 | p 180 | 181 | let get_num_domains pool = 182 | let pd = get_pool_data pool in 183 | Array.length pd.domains + 1 184 | 185 | let parallel_for_reduce ?(chunk_size=0) ~start ~finish ~body pool reduce_fun init = 186 | let pd = get_pool_data pool in 187 | let chunk_size = if chunk_size > 0 then chunk_size 188 | else begin 189 | let n_domains = (Array.length pd.domains) + 1 in 190 | let n_tasks = finish - start + 1 in 191 | if n_domains = 1 then n_tasks 192 | else max 1 (n_tasks/(8*n_domains)) 193 | end 194 | in 195 | let rec work s e = 196 | if e - s < chunk_size then 197 | let rec loop i acc = 198 | if i > e then acc 199 | else loop (i+1) (reduce_fun acc (body i)) 200 | in 201 | loop (s+1) (body s) 202 | else begin 203 | let d = s + ((e - s) / 2) in 204 | let p = async pool (fun _ -> work s d) in 205 | let right = work (d+1) e in 206 | let left = await pool p in 207 | reduce_fun left right 208 | end 209 | in 210 | if finish < start 211 | then init 212 | else reduce_fun init (work start finish) 213 | 214 | let parallel_for ?(chunk_size=0) ~start ~finish ~body pool = 215 | let pd = get_pool_data pool in 216 | let chunk_size = if chunk_size > 0 then chunk_size 217 | else begin 218 | let n_domains = (Array.length pd.domains) + 1 in 219 | let n_tasks = finish - start + 1 in 220 | if n_domains = 1 then n_tasks 221 | else max 1 (n_tasks/(8*n_domains)) 222 | end 223 | in 224 | let rec work pool fn s e = 225 | if e - s < chunk_size then 226 | for i = s to e do fn i done 227 | else begin 228 | let d = s + ((e - s) / 2) in 229 | let left = async pool (fun _ -> work pool fn s d) in 230 | work pool fn (d+1) e; 231 | await pool left 232 | end 233 | in 234 | work pool body start finish 235 | 236 | let parallel_scan pool op elements = 237 | let pd = get_pool_data pool in 238 | let n = Array.length elements in 239 | let p = min (n - 1) ((Array.length pd.domains) + 1) in 240 | let prefix_s = Array.copy elements in 241 | let scan_part op elements prefix_sum start finish = 242 | assert (Array.length elements > (finish - start)); 243 | for i = (start + 1) to finish do 244 | prefix_sum.(i) <- op prefix_sum.(i - 1) elements.(i) 245 | done 246 | in 247 | if p < 2 then begin 248 | (* Do a sequential scan when number of domains or array's length is less 249 | than 2 *) 250 | scan_part op elements prefix_s 0 (n - 1); 251 | prefix_s 252 | end 253 | else begin 254 | let add_offset op prefix_sum offset start finish = 255 | assert (Array.length prefix_sum > (finish - start)); 256 | for i = start to finish do 257 | prefix_sum.(i) <- op offset prefix_sum.(i) 258 | done 259 | in 260 | 261 | parallel_for pool ~chunk_size:1 ~start:0 ~finish:(p - 1) 262 | ~body:(fun i -> 263 | let s = (i * n) / (p ) in 264 | let e = (i + 1) * n / (p ) - 1 in 265 | scan_part op elements prefix_s s e); 266 | 267 | let x = ref prefix_s.(n/p - 1) in 268 | for i = 2 to p do 269 | let ind = i * n / p - 1 in 270 | x := op !x prefix_s.(ind); 271 | prefix_s.(ind) <- !x 272 | done; 273 | 274 | parallel_for pool ~chunk_size:1 ~start:1 ~finish:(p - 1) 275 | ~body:( fun i -> 276 | let s = i * n / (p) in 277 | let e = (i + 1) * n / (p) - 2 in 278 | let offset = prefix_s.(s - 1) in 279 | add_offset op prefix_s offset s e 280 | ); 281 | 282 | prefix_s 283 | end 284 | 285 | let parallel_find (type a) ?(chunk_size=0) ~start ~finish ~body pool = 286 | let pd = get_pool_data pool in 287 | let found : a option Atomic.t = Atomic.make None in 288 | let chunk_size = if chunk_size > 0 then chunk_size 289 | else begin 290 | let n_domains = (Array.length pd.domains) + 1 in 291 | let n_tasks = finish - start + 1 in 292 | if n_domains = 1 then n_tasks 293 | else max 1 (n_tasks/(8*n_domains)) 294 | end 295 | in 296 | let rec work pool fn s e = 297 | if e - s < chunk_size then 298 | let i = ref s in 299 | while !i <= e && Option.is_none (Atomic.get found) do 300 | begin match fn !i with 301 | | None -> () 302 | | Some _ as some -> Atomic.set found some 303 | end; 304 | incr i; 305 | done 306 | else if Option.is_some (Atomic.get found) then () 307 | else begin 308 | let d = s + ((e - s) / 2) in 309 | let left = async pool (fun _ -> work pool fn s d) in 310 | work pool fn (d+1) e; 311 | await pool left 312 | end 313 | in 314 | work pool body start finish; 315 | Atomic.get found 316 | -------------------------------------------------------------------------------- /lib/task.mli: -------------------------------------------------------------------------------- 1 | type 'a task = unit -> 'a 2 | (** Type of task *) 3 | 4 | type !'a promise 5 | (** Type of promises *) 6 | 7 | type pool 8 | (** Type of task pool *) 9 | 10 | val setup_pool : ?name:string -> num_domains:int -> unit -> pool 11 | (** Sets up a task execution pool with [num_domains] new domains. If [name] is 12 | provided, the pool is mapped to [name] which can be looked up later with 13 | [lookup_pool name]. 14 | 15 | When [num_domains] is 0, the new pool will be empty, and when an empty 16 | pool is in use, every function in this module will run effectively 17 | sequentially, using the calling domain as the only available domain. 18 | 19 | Raises {!Invalid_argument} when [num_domains] is less than 0. *) 20 | 21 | val teardown_pool : pool -> unit 22 | (** Tears down the task execution pool. *) 23 | 24 | val lookup_pool : string -> pool option 25 | (** [lookup_pool name] returns [Some pool] if [pool] is associated to [name] or 26 | returns [None] if no value is associated to it. *) 27 | 28 | val get_num_domains : pool -> int 29 | (** [get_num_domains pool] returns the total number of domains in [pool] 30 | including the parent domain. *) 31 | 32 | val run : pool -> 'a task -> 'a 33 | (** [run p t] runs the task [t] synchronously with the calling domain and the 34 | domains in the pool [p]. If the task [t] blocks on a promise, then tasks 35 | from the pool [p] are executed until the promise blocking [t] is resolved. 36 | 37 | This function should be used at the top level to enclose the calls to other 38 | functions that may await on promises. This includes {!await}, 39 | {!parallel_for} and its variants. Otherwise, those functions will raise 40 | [Unhandled] exception. *) 41 | 42 | val async : pool -> 'a task -> 'a promise 43 | (** [async p t] runs the task [t] asynchronously in the pool [p]. The function 44 | returns a promise [r] in which the result of the task [t] will be stored. *) 45 | 46 | val await : pool -> 'a promise -> 'a 47 | (** [await p r] waits for the promise [r] to be resolved. During the resolution, 48 | other tasks in the pool [p] might be run using the calling domain and/or the 49 | domains in the pool [p]. If the task associated with the promise have 50 | completed successfully, then the result of the task will be returned. If the 51 | task have raised an exception, then [await] raises the same exception. 52 | 53 | Must be called with a call to {!run} in the dynamic scope to handle the 54 | internal algebraic effects for task synchronization. *) 55 | 56 | val parallel_for : ?chunk_size:int -> start:int -> finish:int -> 57 | body:(int -> unit) -> pool -> unit 58 | (** [parallel_for c s f b p] behaves similar to [for i=s to f do b i done], but 59 | runs the for loop in parallel with the calling domain and/or the domains in 60 | the pool [p]. The chunk size [c] determines the number of body applications 61 | done in one task; this will default to [max(1, (finish-start + 1) / (8 * 62 | num_domains))]. Individual iterations may be run in any order. Tasks are 63 | distributed to the participating domains using a divide-and-conquer scheme. 64 | 65 | Must be called with a call to {!run} in the dynamic scope to handle the 66 | internal algebraic effects for task synchronization. *) 67 | 68 | val parallel_for_reduce : ?chunk_size:int -> start:int -> finish:int -> 69 | body:(int -> 'a) -> pool -> ('a -> 'a -> 'a) -> 'a -> 'a 70 | (** [parallel_for_reduce c s f b p r i] is similar to [parallel_for] except 71 | that the result returned by each iteration is reduced with [r] with initial 72 | value [i]. The reduce operations are performed in an arbitrary order and 73 | the reduce function needs to be associative in order to obtain a 74 | deterministic result. 75 | 76 | Must be called with a call to {!run} in the dynamic scope to handle the 77 | internal algebraic effects for task synchronization. *) 78 | 79 | val parallel_scan : pool -> ('a -> 'a -> 'a) -> 'a array -> 'a array 80 | (** [parallel_scan p op a] computes the scan of the array [a] in parallel with 81 | binary operator [op] and returns the result array, using the calling domain 82 | and/or the domains in the pool [p]. Scan is similar to [Array.fold_left] 83 | but returns an array of reduced intermediate values. The reduce operations 84 | are performed in an arbitrary order and the reduce function needs to be 85 | associative in order to obtain a deterministic result. 86 | 87 | Must be called with a call to {!run} in the dynamic scope to handle the 88 | internal algebraic effects for task synchronization. *) 89 | 90 | val parallel_find : ?chunk_size:int -> start:int -> finish:int -> 91 | body:(int -> 'a option) -> pool -> 'a option 92 | (** [parallel_find ~start ~finish ~body pool] calls [body] in parallel 93 | on the indices from [start] to [finish], in any order, until at 94 | least one of them returns [Some v]. 95 | 96 | Search stops when a value is found, but there is no guarantee that 97 | it stops as early as possible, other calls to [body] may happen in 98 | parallel or afterwards. 99 | 100 | See {!parallel_for} for the description of the [chunk_size] 101 | parameter and the scheduling strategy. 102 | 103 | Must be called with a call to {!run} in the dynamic scope to 104 | handle the internal algebraic effects for task synchronization. 105 | *) 106 | -------------------------------------------------------------------------------- /test/LU_decomposition_multicore.ml: -------------------------------------------------------------------------------- 1 | module T = Domainslib.Task 2 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 3 | let mat_size = try int_of_string Sys.argv.(2) with _ -> 1200 4 | 5 | let k = Domain.DLS.new_key Random.State.make_self_init 6 | 7 | module SquareMatrix = struct 8 | let parallel_create pool f : float array = 9 | let fa = Array.create_float (mat_size * mat_size) in 10 | T.parallel_for pool ~start:0 ~finish:( mat_size * mat_size - 1) 11 | ~body:(fun i -> fa.(i) <- f (i / mat_size) (i mod mat_size)); 12 | fa 13 | 14 | let get (m : float array) r c = m.(r * mat_size + c) 15 | let set (m : float array) r c v = m.(r * mat_size + c) <- v 16 | let parallel_copy pool a = 17 | let n = Array.length a in 18 | let copy_part a b i = 19 | let s = (i * n / num_domains) in 20 | let e = (i+1) * n / num_domains - 1 in 21 | Array.blit a s b s (e - s + 1) in 22 | let b = Array.create_float n in 23 | let rec aux acc num_domains i = 24 | if (i = num_domains) then 25 | (List.iter (fun e -> T.await pool e) acc) 26 | else begin 27 | aux ((T.async pool (fun _ -> copy_part a b i))::acc) num_domains (i+1) 28 | end 29 | in 30 | aux [] num_domains 0; 31 | b 32 | end 33 | 34 | open SquareMatrix 35 | 36 | let lup pool (a0 : float array) = 37 | let a = parallel_copy pool a0 in 38 | for k = 0 to (mat_size - 2) do 39 | T.parallel_for pool ~start:(k + 1) ~finish:(mat_size -1) 40 | ~body:(fun row -> 41 | let factor = get a row k /. get a k k in 42 | for col = k + 1 to mat_size-1 do 43 | set a row col (get a row col -. factor *. (get a k col)) 44 | done; 45 | set a row k factor ) 46 | done ; 47 | a 48 | 49 | let () = 50 | let pool = T.setup_pool ~num_domains:(num_domains - 1) () in 51 | T.run pool (fun _ -> 52 | let a = parallel_create pool 53 | (fun _ _ -> (Random.State.float (Domain.DLS.get k) 100.0) +. 1.0 ) in 54 | let lu = lup pool a in 55 | let _l = parallel_create pool (fun i j -> if i > j then get lu i j else if i = j then 1.0 else 0.0) in 56 | let _u = parallel_create pool (fun i j -> if i <= j then get lu i j else 0.0) in 57 | ()); 58 | T.teardown_pool pool 59 | -------------------------------------------------------------------------------- /test/backtrace.ml: -------------------------------------------------------------------------------- 1 | module T = Domainslib.Task 2 | 3 | let rec foo i = 4 | if i = 0 then () 5 | else begin 6 | ignore (failwith "exn"); 7 | foo i 8 | end 9 | [@@inline never] 10 | 11 | let rec bar i = 12 | if i = 0 then () 13 | else begin 14 | foo i; 15 | bar i 16 | end 17 | [@@inline never] 18 | 19 | let main () = 20 | let pool = T.setup_pool ~num_domains:0 () in 21 | T.run pool (fun _ -> 22 | let p = T.async pool (fun _ -> bar 42) in 23 | T.await pool p; 24 | Printf.printf "should not reach here\n%!"); 25 | T.teardown_pool pool 26 | 27 | let _ = 28 | Printexc.record_backtrace true; 29 | try main () 30 | with _ -> 31 | let open Printexc in 32 | let bt = get_raw_backtrace () in 33 | let bt_slot_arr = Option.get (backtrace_slots bt) in 34 | let name = Option.get (Slot.name bt_slot_arr.(1)) in 35 | assert (name = "Backtrace.foo" || name = "Dune__exe__Backtrace.foo"); 36 | let s = raw_backtrace_to_string bt in 37 | print_string s 38 | -------------------------------------------------------------------------------- /test/chan_stm_tests.ml: -------------------------------------------------------------------------------- 1 | open QCheck 2 | open Domainslib 3 | open STM 4 | 5 | (** This contains sequential and parallel model-based tests of [Domainslib.Chan] *) 6 | 7 | module ChConf = 8 | struct 9 | type state = int list 10 | type sut = int Domainslib.Chan.t 11 | type cmd = 12 | | Send of int 13 | | Send_poll of int 14 | | Recv 15 | | Recv_poll 16 | 17 | let show_cmd c = match c with 18 | | Send i -> "Send" ^ (string_of_int i) 19 | | Send_poll i -> "Send_poll" ^ (string_of_int i) 20 | | Recv -> "Recv" 21 | | Recv_poll -> "Recv_poll" 22 | 23 | let capacity = 8 24 | 25 | let arb_cmd s = 26 | let int_gen = Gen.nat in 27 | QCheck.make ~print:show_cmd 28 | (if s=[] 29 | then 30 | Gen.oneof 31 | [Gen.map (fun i -> Send i) int_gen; 32 | Gen.map (fun i -> Send_poll i) int_gen; 33 | Gen.return Recv_poll] (* don't generate blocking Recv cmds on an empty channel *) 34 | else 35 | if List.length s >= capacity 36 | then 37 | Gen.oneof 38 | [Gen.map (fun i -> Send_poll i) int_gen; 39 | Gen.return Recv; 40 | Gen.return Recv_poll] (* don't generate blocking Send cmds on a full channel *) 41 | else 42 | Gen.oneof 43 | [Gen.map (fun i -> Send i) int_gen; 44 | Gen.map (fun i -> Send_poll i) int_gen; 45 | Gen.return Recv; 46 | Gen.return Recv_poll]) 47 | let init_state = [] 48 | let init_sut () = Chan.make_bounded capacity 49 | let cleanup _ = () 50 | 51 | let next_state c s = match c with 52 | | Send i -> if List.length s < capacity then s@[i] else s 53 | | Send_poll i -> if List.length s < capacity then s@[i] else s 54 | | Recv -> begin match s with [] -> [] | _::s' -> s' end 55 | | Recv_poll -> begin match s with [] -> [] | _::s' -> s' end 56 | 57 | let precond c s = match c,s with 58 | | Recv, [] -> false 59 | | Send _, _ -> List.length s < capacity 60 | | _, _ -> true 61 | 62 | let run c chan = 63 | match c with 64 | | Send i -> Res (unit, Chan.send chan i) 65 | | Send_poll i -> Res (bool, Chan.send_poll chan i) 66 | | Recv -> Res (int, Chan.recv chan) 67 | | Recv_poll -> Res (option int, Chan.recv_poll chan) 68 | 69 | let postcond c s res = match c,res with 70 | | Send _, Res ((Unit,_),_) -> (List.length s < capacity) 71 | | Send_poll _, Res ((Bool,_),res) -> res = (List.length s < capacity) 72 | | Recv, Res ((Int,_),res) -> (match s with [] -> false | res'::_ -> Int.equal res res') 73 | | Recv_poll, Res ((Option Int,_),opt) -> (match s with [] -> None | res'::_ -> Some res') = opt 74 | | _,_ -> false 75 | end 76 | 77 | 78 | module ChT_seq = STM_sequential.Make(ChConf) 79 | module ChT_dom = STM_domain.Make(ChConf) 80 | 81 | let () = 82 | let count = 500 in 83 | QCheck_base_runner.run_tests_main [ 84 | ChT_seq.agree_test ~count ~name:"STM Domainslib.Chan test sequential"; 85 | ChT_dom.agree_test_par ~count ~name:"STM Domainslib.Chan test parallel"; 86 | ] 87 | -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (test 2 | (name test_chan) 3 | (libraries domainslib) 4 | (modules test_chan)) 5 | 6 | (test 7 | (name fib) 8 | (modules fib)) 9 | 10 | (test 11 | (name fib_par) 12 | (libraries domainslib) 13 | (modules fib_par)) 14 | 15 | (test 16 | (name kcas_integration) 17 | (libraries domainslib kcas) 18 | (modules kcas_integration)) 19 | 20 | (test 21 | (name enumerate_par) 22 | (libraries domainslib) 23 | (modules enumerate_par)) 24 | 25 | (test 26 | (name game_of_life) 27 | (modules game_of_life)) 28 | 29 | (test 30 | (name game_of_life_multicore) 31 | (libraries domainslib) 32 | (modules game_of_life_multicore)) 33 | 34 | (test 35 | (name LU_decomposition_multicore) 36 | (libraries domainslib) 37 | (flags (:standard -runtime-variant d)) 38 | (modules LU_decomposition_multicore) 39 | (enabled_if (or (= %{arch_sixtyfour} true) (<> %{architecture} arm)))) 40 | ;; disabled temporarily on arm32 due to failure: ocaml/ocaml#12267 41 | 42 | 43 | (test 44 | (name spectralnorm2) 45 | (modules spectralnorm2)) 46 | 47 | (test 48 | (name sum_par) 49 | (libraries domainslib) 50 | (modules sum_par)) 51 | 52 | (test 53 | (name task_throughput) 54 | (libraries domainslib mirage-clock-unix) 55 | (modules task_throughput)) 56 | 57 | (test 58 | (name spectralnorm2_multicore) 59 | (libraries domainslib) 60 | (modules spectralnorm2_multicore)) 61 | 62 | (test 63 | (name summed_area_table) 64 | (libraries domainslib) 65 | (modules summed_area_table)) 66 | 67 | (test 68 | (name prefix_sum) 69 | (libraries domainslib unix) 70 | (modules prefix_sum)) 71 | 72 | (test 73 | (name test_task) 74 | (libraries domainslib) 75 | (modules test_task)) 76 | 77 | (test 78 | (name test_parallel_find) 79 | (libraries domainslib) 80 | (modules test_parallel_find)) 81 | 82 | (test 83 | (name test_parallel_scan) 84 | (libraries domainslib) 85 | (modules test_parallel_scan)) 86 | 87 | (test 88 | (name test_deadlock) 89 | (libraries domainslib) 90 | (modules test_deadlock)) 91 | 92 | (test 93 | (name test_task_crash) 94 | (libraries domainslib) 95 | (modules test_task_crash)) 96 | 97 | (test 98 | (name test_task_empty) 99 | (libraries domainslib) 100 | (modules test_task_empty)) 101 | 102 | (test 103 | (name backtrace) 104 | (libraries domainslib) 105 | (modules backtrace) 106 | (enabled_if (<> %{system} mingw64)) ;; triggers a known issue on mingw https://github.com/ocaml/ocaml/pull/12231 107 | (modes byte native)) 108 | ;; byte_complete .exes don't include debug+trace info https://github.com/ocaml/dune/issues/7845 109 | ;; so on a bytecode switch/platform we build a plain bytecode version w/trace info 110 | ;; and rename it to .exe 111 | (rule 112 | (target backtrace.exe) 113 | (action (copy backtrace.bc backtrace.exe)) 114 | (enabled_if (and (= %{bin-available:ocamlopt} false) (<> %{system} mingw64)))) 115 | 116 | (test 117 | (name off_by_one) 118 | (libraries domainslib) 119 | (modules off_by_one)) 120 | 121 | ;; Custom property-based tests using QCheck 122 | 123 | (test 124 | (name task_one_dep) 125 | (modules task_one_dep) 126 | (libraries qcheck-multicoretests-util qcheck-core qcheck-core.runner domainslib) 127 | (enabled_if %{bin-available:ocamlopt}) ;; takes forever on bytecode 128 | (action (run %{test} --verbose))) 129 | 130 | (test 131 | (name task_more_deps) 132 | (modules task_more_deps) 133 | (libraries qcheck-multicoretests-util qcheck-core qcheck-core.runner domainslib) 134 | (enabled_if %{bin-available:ocamlopt}) ;; takes forever on bytecode 135 | (action (run %{test} --verbose))) 136 | 137 | (test 138 | (name task_parallel) 139 | (modules task_parallel) 140 | (libraries qcheck-multicoretests-util qcheck-core qcheck-core.runner domainslib) 141 | (action (run %{test} --verbose))) 142 | 143 | ;; STM_sequential and STM_domain test of Domainslib.Chan 144 | 145 | (test 146 | (name chan_stm_tests) 147 | (modules chan_stm_tests) 148 | (libraries qcheck-stm.sequential qcheck-stm.domain domainslib) 149 | (action (run %{test} --verbose))) 150 | -------------------------------------------------------------------------------- /test/enumerate_par.ml: -------------------------------------------------------------------------------- 1 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 2 | let n = try int_of_string Sys.argv.(2) with _ -> 100 3 | 4 | module T = Domainslib.Task 5 | 6 | let _ = 7 | let p = T.setup_pool ~num_domains:(num_domains - 1) () in 8 | T.run p (fun _ -> 9 | T.parallel_for p ~start:0 ~finish:(n-1) ~chunk_size:16 ~body:(fun i -> 10 | print_string @@ Printf.sprintf "[%d] %d\n%!" (Domain.self () :> int) i)); 11 | T.teardown_pool p 12 | -------------------------------------------------------------------------------- /test/fib.ml: -------------------------------------------------------------------------------- 1 | let n = try int_of_string Sys.argv.(1) with _ -> 43 2 | 3 | let rec fib n = 4 | if n < 2 then 1 5 | else fib (n-1) + fib (n-2) 6 | 7 | let _ = Printf.printf "fib(%d) = %d\n" n (fib n) 8 | -------------------------------------------------------------------------------- /test/fib_par.ml: -------------------------------------------------------------------------------- 1 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 2 | let n = try int_of_string Sys.argv.(2) with _ -> 43 3 | 4 | module T = Domainslib.Task 5 | 6 | let rec fib n = 7 | if n < 2 then 1 8 | else fib (n-1) + fib (n-2) 9 | 10 | let rec fib_par pool n = 11 | if n <= 40 then fib n 12 | else 13 | let a = T.async pool (fun _ -> fib_par pool (n-1)) in 14 | let b = T.async pool (fun _ -> fib_par pool (n-2)) in 15 | T.await pool a + T.await pool b 16 | 17 | let main = 18 | let pool = T.setup_pool ~num_domains:(num_domains - 1) () in 19 | let res = T.run pool (fun _ -> fib_par pool n) in 20 | T.teardown_pool pool; 21 | Printf.printf "fib(%d) = %d\n" n res 22 | 23 | let () = main 24 | -------------------------------------------------------------------------------- /test/game_of_life.ml: -------------------------------------------------------------------------------- 1 | let n_times = try int_of_string Sys.argv.(1) with _ -> 20 2 | let board_size = try int_of_string Sys.argv.(2) with _ -> 16 3 | 4 | let rg = 5 | ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2))) 6 | let rg' = 7 | ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2))) 8 | let buf = Bytes.create board_size 9 | 10 | let get g x y = 11 | try g.(x).(y) 12 | with _ -> 0 13 | 14 | let neighbourhood g x y = 15 | (get g (x-1) (y-1)) + 16 | (get g (x-1) (y )) + 17 | (get g (x-1) (y+1)) + 18 | (get g (x ) (y-1)) + 19 | (get g (x ) (y+1)) + 20 | (get g (x+1) (y-1)) + 21 | (get g (x+1) (y )) + 22 | (get g (x+1) (y+1)) 23 | 24 | let next_cell g x y = 25 | let n = neighbourhood g x y in 26 | match g.(x).(y), n with 27 | | 1, 0 | 1, 1 -> 0 (* lonely *) 28 | | 1, 4 | 1, 5 | 1, 6 | 1, 7 | 1, 8 -> 0 (* overcrowded *) 29 | | 1, 2 | 1, 3 -> 1 (* lives *) 30 | | 0, 3 -> 1 (* get birth *) 31 | | _ (* 0, (0|1|2|4|5|6|7|8) *) -> 0 (* barren *) 32 | 33 | let print g = 34 | for x = 0 to board_size - 1 do 35 | for y = 0 to board_size - 1 do 36 | if g.(x).(y) = 0 37 | then Bytes.set buf y '.' 38 | else Bytes.set buf y 'o' 39 | done; 40 | print_endline (Bytes.unsafe_to_string buf) 41 | done; 42 | print_endline "" 43 | 44 | let next () = 45 | let g = !rg in 46 | let new_g = !rg' in 47 | for x = 0 to board_size - 1 do 48 | for y = 0 to board_size - 1 do 49 | new_g.(x).(y) <- next_cell g x y 50 | done 51 | done; 52 | rg := new_g; 53 | rg' := g 54 | 55 | let rec repeat n = 56 | match n with 57 | | 0 -> () 58 | | _ -> next (); repeat (n-1) 59 | 60 | let ()= 61 | print !rg; 62 | repeat n_times; 63 | print !rg 64 | -------------------------------------------------------------------------------- /test/game_of_life_multicore.ml: -------------------------------------------------------------------------------- 1 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 2 | let n_times = try int_of_string Sys.argv.(2) with _ -> 20 3 | let board_size = try int_of_string Sys.argv.(3) with _ -> 16 4 | 5 | module T = Domainslib.Task 6 | 7 | let rg = 8 | ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2))) 9 | let rg' = 10 | ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2))) 11 | let buf = Bytes.create board_size 12 | 13 | let get g x y = 14 | try g.(x).(y) 15 | with _ -> 0 16 | 17 | let neighbourhood g x y = 18 | (get g (x-1) (y-1)) + 19 | (get g (x-1) (y )) + 20 | (get g (x-1) (y+1)) + 21 | (get g (x ) (y-1)) + 22 | (get g (x ) (y+1)) + 23 | (get g (x+1) (y-1)) + 24 | (get g (x+1) (y )) + 25 | (get g (x+1) (y+1)) 26 | 27 | let next_cell g x y = 28 | let n = neighbourhood g x y in 29 | match g.(x).(y), n with 30 | | 1, 0 | 1, 1 -> 0 (* lonely *) 31 | | 1, 4 | 1, 5 | 1, 6 | 1, 7 | 1, 8 -> 0 (* overcrowded *) 32 | | 1, 2 | 1, 3 -> 1 (* lives *) 33 | | 0, 3 -> 1 (* get birth *) 34 | | _ (* 0, (0|1|2|4|5|6|7|8) *) -> 0 (* barren *) 35 | 36 | let print g = 37 | for x = 0 to board_size - 1 do 38 | for y = 0 to board_size - 1 do 39 | if g.(x).(y) = 0 40 | then Bytes.set buf y '.' 41 | else Bytes.set buf y 'o' 42 | done; 43 | print_endline (Bytes.unsafe_to_string buf) 44 | done; 45 | print_endline "" 46 | 47 | let next pool = 48 | let g = !rg in 49 | let new_g = !rg' in 50 | T.parallel_for pool ~start:0 51 | ~finish:(board_size - 1) ~body:(fun x -> 52 | for y = 0 to board_size - 1 do 53 | new_g.(x).(y) <- next_cell g x y 54 | done); 55 | rg := new_g; 56 | rg' := g 57 | 58 | 59 | let rec repeat pool n = 60 | match n with 61 | | 0-> () 62 | | _-> next pool; repeat pool (n-1) 63 | 64 | let ()= 65 | let pool = T.setup_pool ~num_domains:(num_domains - 1) () in 66 | print !rg; 67 | T.run pool (fun _ -> repeat pool n_times); 68 | print !rg; 69 | T.teardown_pool pool 70 | -------------------------------------------------------------------------------- /test/kcas_integration.ml: -------------------------------------------------------------------------------- 1 | open Kcas 2 | module T = Domainslib.Task 3 | 4 | let var = Loc.make None 5 | 6 | let () = 7 | let n = 100 in 8 | let pool_domain = 9 | Domain.spawn @@ fun () -> 10 | let pool = 11 | T.setup_pool ~num_domains:(Domain.recommended_domain_count () - 2) () 12 | in 13 | T.run pool (fun () -> 14 | T.parallel_for ~start:1 ~finish:n 15 | ~body:(fun i -> 16 | ignore @@ Loc.update var 17 | @@ function None -> Some i | _ -> Retry.later ()) 18 | pool); 19 | T.teardown_pool pool; 20 | Printf.printf "Done\n%!" 21 | in 22 | for _ = 1 to n do 23 | match 24 | Loc.update var @@ function None -> Retry.later () | Some _ -> None 25 | with 26 | | None -> failwith "impossible" 27 | | Some i -> Printf.printf "Got %d\n%!" i 28 | done; 29 | Domain.join pool_domain 30 | -------------------------------------------------------------------------------- /test/off_by_one.ml: -------------------------------------------------------------------------------- 1 | open Domainslib 2 | 3 | let print_array a = 4 | let b = Buffer.create 25 in 5 | Buffer.add_string b "[|"; 6 | Array.iter (fun elem -> Buffer.add_string b (string_of_int elem ^ "; ")) a; 7 | Buffer.add_string b "|]"; 8 | Buffer.contents b 9 | 10 | let r = Array.init 20 (fun i -> i + 1) 11 | 12 | let scan_task num_doms = 13 | try 14 | let pool = Task.setup_pool ~num_domains:num_doms () in 15 | let a = Task.run pool (fun () -> Task.parallel_scan pool (+) (Array.make 20 1)) in 16 | Task.teardown_pool pool; 17 | Printf.printf "%i: %s\n%!" num_doms (print_array a); 18 | assert (a = r) 19 | with Failure msg -> 20 | begin 21 | assert (msg = "failed to allocate domain"); 22 | Printf.printf "Failed to allocate %i domains, recommended_domain_count: %i\n%!" 23 | num_doms (Domain.recommended_domain_count ()); 24 | end 25 | ;; 26 | for num_dom=0 to 21 do 27 | scan_task num_dom; 28 | done 29 | -------------------------------------------------------------------------------- /test/prefix_sum.ml: -------------------------------------------------------------------------------- 1 | module T = Domainslib.Task 2 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 4 3 | let n = try int_of_string Sys.argv.(2) with _ -> 100000 4 | 5 | let gen n = Array.make n 1 (*(fun _ -> Random.int n)*) 6 | 7 | let prefix_sum pool = T.parallel_scan pool (+) 8 | 9 | let _ = 10 | let pool = T.setup_pool ~num_domains:(num_domains - 1) () in 11 | let arr = gen n in 12 | let t = Unix.gettimeofday() in 13 | ignore (T.run pool (fun _ -> prefix_sum pool arr)); 14 | Printf.printf "Execution time: %fs\n" (Unix.gettimeofday() -. t); 15 | T.teardown_pool pool 16 | -------------------------------------------------------------------------------- /test/spectralnorm2.ml: -------------------------------------------------------------------------------- 1 | (* The Computer Language Benchmarks Game 2 | * https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ 3 | * 4 | * Contributed by Sebastien Loisel 5 | * Cleanup by Troestler Christophe 6 | * Modified by Mauricio Fernandez 7 | *) 8 | 9 | let n = try int_of_string Sys.argv.(1) with _ -> 2000 10 | 11 | let eval_A i j = 1. /. float((i+j)*(i+j+1)/2+i+1) 12 | 13 | let eval_A_times_u u v = 14 | let n = Array.length v - 1 in 15 | for i = 0 to n do 16 | let vi = ref 0. in 17 | for j = 0 to n do vi := !vi +. eval_A i j *. u.(j) done; 18 | v.(i) <- !vi 19 | done 20 | 21 | let eval_At_times_u u v = 22 | let n = Array.length v -1 in 23 | for i = 0 to n do 24 | let vi = ref 0. in 25 | for j = 0 to n do vi := !vi +. eval_A j i *. u.(j) done; 26 | v.(i) <- !vi 27 | done 28 | 29 | let eval_AtA_times_u u v = 30 | let w = Array.make (Array.length u) 0.0 in 31 | eval_A_times_u u w; eval_At_times_u w v 32 | 33 | 34 | let () = 35 | let u = Array.make n 1.0 and v = Array.make n 0.0 in 36 | for _i = 0 to 9 do 37 | eval_AtA_times_u u v; eval_AtA_times_u v u 38 | done; 39 | 40 | let vv = ref 0.0 and vBv = ref 0.0 in 41 | for i=0 to n-1 do 42 | vv := !vv +. v.(i) *. v.(i); 43 | vBv := !vBv +. u.(i) *. v.(i) 44 | done; 45 | Printf.printf "%0.9f\n" (sqrt(!vBv /. !vv)) 46 | -------------------------------------------------------------------------------- /test/spectralnorm2_multicore.ml: -------------------------------------------------------------------------------- 1 | (* The Computer Language Benchmarks Game 2 | * https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ 3 | * 4 | * Contributed by Sebastien Loisel 5 | * Cleanup by Troestler Christophe 6 | * Modified by Mauricio Fernandez 7 | *) 8 | 9 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 10 | let n = try int_of_string Sys.argv.(2) with _ -> 2000 11 | 12 | module T = Domainslib.Task 13 | 14 | let eval_A i j = 1. /. float((i+j)*(i+j+1)/2+i+1) 15 | 16 | let eval_A_times_u pool u v = 17 | let n = Array.length v - 1 in 18 | T.parallel_for pool ~start:0 ~finish:n ~chunk_size:(n/num_domains) 19 | ~body:(fun i -> 20 | let vi = ref 0. in 21 | for j = 0 to n do vi := !vi +. eval_A i j *. u.(j) done; 22 | v.(i) <- !vi) 23 | 24 | let eval_At_times_u pool u v = 25 | let n = Array.length v -1 in 26 | T.parallel_for pool ~start:0 ~finish:n ~chunk_size:(n/num_domains) 27 | ~body:(fun i -> 28 | let vi = ref 0. in 29 | for j = 0 to n do vi := !vi +. eval_A j i *. u.(j) done; 30 | v.(i) <- !vi) 31 | 32 | let eval_AtA_times_u pool u v = 33 | let w = Array.make (Array.length u) 0.0 in 34 | eval_A_times_u pool u w; eval_At_times_u pool w v 35 | 36 | let () = 37 | let pool = T.setup_pool ~num_domains:(num_domains - 1) () in 38 | let u = Array.make n 1.0 and v = Array.make n 0.0 in 39 | T.run pool (fun _ -> 40 | for _i = 0 to 9 do 41 | eval_AtA_times_u pool u v; eval_AtA_times_u pool v u 42 | done); 43 | T.teardown_pool pool; 44 | 45 | let vv = ref 0.0 and vBv = ref 0.0 in 46 | for i=0 to n-1 do 47 | vv := !vv +. v.(i) *. v.(i); 48 | vBv := !vBv +. u.(i) *. v.(i) 49 | done; 50 | Printf.printf "%0.9f\n" (sqrt(!vBv /. !vv)) 51 | -------------------------------------------------------------------------------- /test/sum_par.ml: -------------------------------------------------------------------------------- 1 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 2 2 | let n = try int_of_string Sys.argv.(2) with _ -> 100 3 | 4 | module T = Domainslib.Task 5 | 6 | let _ = 7 | (* use parallel_for_reduce *) 8 | let p = T.setup_pool ~num_domains:(num_domains - 1) () in 9 | let sum = T.run p (fun _ -> 10 | T.parallel_for_reduce p (+) 0 ~chunk_size:(n/(4*num_domains)) ~start:0 11 | ~finish:(n-1) ~body:(fun _i -> 1)) 12 | in 13 | T.teardown_pool p; 14 | Printf.printf "Sum is %d\n" sum; 15 | assert (sum = n) 16 | 17 | let _ = 18 | (* explictly use empty pool and default chunk_size *) 19 | let p = T.setup_pool ~num_domains:0 () in 20 | let sum = Atomic.make 0 in 21 | T.run p (fun _ -> 22 | T.parallel_for p ~start:0 ~finish:(n-1) 23 | ~body:(fun _i -> ignore (Atomic.fetch_and_add sum 1))); 24 | let sum = Atomic.get sum in 25 | T.teardown_pool p; 26 | Printf.printf "Sum is %d\n" sum; 27 | assert (sum = n) 28 | 29 | let _ = 30 | (* configured num_domains and default chunk_size *) 31 | let p = T.setup_pool ~num_domains:(num_domains - 1) () in 32 | let sum = Atomic.make 0 in 33 | T.run p (fun _ -> 34 | T.parallel_for p ~start:0 ~finish:(n-1) 35 | ~body:(fun _i -> ignore (Atomic.fetch_and_add sum 1))); 36 | let sum = Atomic.get sum in 37 | T.teardown_pool p; 38 | Printf.printf "Sum is %d\n" sum; 39 | assert (sum = n) 40 | 41 | -------------------------------------------------------------------------------- /test/summed_area_table.ml: -------------------------------------------------------------------------------- 1 | module T = Domainslib.Task 2 | let num_domains = try int_of_string Sys.argv.(1) with _ -> 4 3 | let size = try int_of_string Sys.argv.(2) with _ -> 100 4 | 5 | let transpose a = 6 | let r = Array.length a in 7 | let c = Array.length a.(0) in 8 | let b = Array.copy a in 9 | for i = 0 to (pred r) do 10 | for j = 0 to (pred c) do 11 | b.(j).(i) <- a.(i).(j) 12 | done 13 | done; 14 | b 15 | 16 | let calc_table pool mat = 17 | let l = Array.length mat in 18 | let res = Array.copy mat in 19 | for i = 0 to (l - 1) do 20 | res.(i) <- T.parallel_scan pool (fun x y -> x + y) mat.(i) 21 | done; 22 | let k = transpose res in 23 | 24 | for i = 0 to (l - 1) do 25 | res.(i) <- T.parallel_scan pool (fun x y -> x + y) k.(i) 26 | done; 27 | (transpose res) 28 | 29 | let _ = 30 | let m = Array.make_matrix size size 1 (*Array.init size (fun _ -> Array.init size (fun _ -> Random.int size))*) 31 | in 32 | let pool = T.setup_pool ~num_domains:(num_domains - 1) () in 33 | let _ = T.run pool (fun _ -> calc_table pool m) in 34 | 35 | (* for i = 0 to size-1 do 36 | for j = 0 to size-1 do 37 | print_int a.(i).(j); print_string " " 38 | done; 39 | print_newline() 40 | done; *) 41 | T.teardown_pool pool 42 | -------------------------------------------------------------------------------- /test/task_more_deps.ml: -------------------------------------------------------------------------------- 1 | (** 2 | Generate tests of async+await from Domainslib.Task. 3 | It does so by generating a random, acyclic dependency graph of [async] tasks, 4 | each [await]ing on its dependency. 5 | *) 6 | 7 | open QCheck 8 | open Domainslib 9 | 10 | (* a simple work item, from ocaml/testsuite/tests/misc/takc.ml *) 11 | let rec tak x y z = 12 | if x > y then tak (tak (x-1) y z) (tak (y-1) z x) (tak (z-1) x y) 13 | else z 14 | 15 | let work () = 16 | for _ = 1 to 200 do 17 | assert (7 = tak 18 12 6); 18 | done 19 | 20 | (* Generates a DAG of dependencies *) 21 | (* Each task is represented by an array index w/a deps.list *) 22 | (* This example DAG 23 | 24 | A/0 <--- B/1 < 25 | ^. \ 26 | \ \ 27 | `- C/2 <--- D/3 28 | 29 | is represented as: [| []; [0]; [0]; [1;2] |] *) 30 | let gen_dag n st = 31 | Array.init n (fun i -> 32 | let deps = ref [] in 33 | for dep = 0 to i-1 do 34 | if Gen.bool st then deps := dep :: !deps 35 | done; 36 | List.rev !deps) 37 | 38 | type test_input = 39 | { 40 | num_domains : int; 41 | length : int; 42 | dependencies : int list array 43 | } 44 | 45 | let show_test_input t = 46 | Printf.sprintf 47 | "{ num_domains : %i\n length : %i\n dependencies : %s }" 48 | t.num_domains t.length Print.(array (list int) t.dependencies) 49 | 50 | let shrink_deps test_input = 51 | let ls = Array.to_list test_input.dependencies in 52 | let is = Shrink.list ~shrink:Shrink.list ls in 53 | Iter.map 54 | (fun deps -> 55 | let len = List.length deps in 56 | let arr = Array.of_list deps in 57 | let deps = Array.mapi (fun i i_deps -> match i,i_deps with 58 | | 0, _ 59 | | _,[] -> [] 60 | | _,[0] -> [0] 61 | | _, _ -> 62 | List.map (fun j -> 63 | if j<0 || j>=len || j>=i (* ensure reduced dep is valid *) 64 | then ((j + i) mod i) 65 | else j) i_deps) arr in 66 | { test_input with length=len; dependencies=deps }) is 67 | 68 | let arb_deps domain_bound promise_bound = 69 | let gen_deps = 70 | Gen.(pair (int_bound (domain_bound-1)) (int_bound promise_bound) >>= fun (num_domains,length) -> 71 | let num_domains = succ num_domains in 72 | let length = succ length in 73 | gen_dag length >>= fun dependencies -> return { num_domains; length; dependencies }) in 74 | make ~print:show_test_input ~shrink:(shrink_deps) gen_deps 75 | 76 | let build_dep_graph pool test_input = 77 | let len = test_input.length in 78 | let deps = test_input.dependencies in 79 | let rec build i promise_acc = 80 | if i=len 81 | then promise_acc 82 | else 83 | let p = (match deps.(i) with 84 | | [] -> 85 | Task.async pool work 86 | | deps -> 87 | Task.async pool (fun () -> 88 | work (); 89 | List.iter (fun dep -> Task.await pool (List.nth promise_acc (i-1-dep))) deps)) in 90 | build (i+1) (p::promise_acc) 91 | in 92 | build 0 [] 93 | 94 | let test_one_pool ~domain_bound ~promise_bound = 95 | Test.make ~name:"Domainslib.Task.async/await, more deps, 1 work pool" ~count:100 96 | (arb_deps domain_bound promise_bound) 97 | (Util.repeat 10 98 | (fun test_input -> 99 | let pool = Task.setup_pool ~num_domains:test_input.num_domains () in 100 | Task.run pool (fun () -> 101 | let ps = build_dep_graph pool test_input in 102 | List.iter (fun p -> Task.await pool p) ps); 103 | Task.teardown_pool pool; 104 | true)) 105 | 106 | let () = 107 | QCheck_base_runner.run_tests_main [test_one_pool ~domain_bound:8 ~promise_bound:10] 108 | -------------------------------------------------------------------------------- /test/task_one_dep.ml: -------------------------------------------------------------------------------- 1 | (** 2 | Generate tests of async+await from Domainslib.Task. 3 | It does so by generating a random, acyclic dependency graph of [async] tasks, 4 | each [await]ing on its dependency. 5 | *) 6 | 7 | open QCheck 8 | open Domainslib 9 | 10 | (* a simple work item, from ocaml/testsuite/tests/misc/takc.ml *) 11 | let rec tak x y z = 12 | if x > y then tak (tak (x-1) y z) (tak (y-1) z x) (tak (z-1) x y) 13 | else z 14 | 15 | let work () = 16 | for _ = 1 to 200 do 17 | assert (7 = tak 18 12 6); 18 | done 19 | 20 | (* Generates a sparse DAG of dependencies *) 21 | (* Each task is represented by an array index w/at most 1 dep. each *) 22 | (* This example DAG 23 | 24 | A/0 <--- B/1 25 | ^. 26 | \ 27 | `- C/2 <--- D/3 28 | 29 | is represented as: [| None; Some 0; Some 0; Some 2 |] *) 30 | let gen_deps n st = 31 | let a = Array.make n None in 32 | for i=1 to n-1 do 33 | if Gen.bool st then a.(i) <- Some (Gen.int_bound (i-1) st) 34 | done; 35 | a 36 | 37 | type test_input = 38 | { 39 | num_domains : int; 40 | length : int; 41 | dependencies : int option array 42 | } 43 | 44 | let show_test_input t = 45 | Printf.sprintf 46 | "{ num_domains : %i\n length : %i\n dependencies : %s }" 47 | t.num_domains t.length Print.(array (option int) t.dependencies) 48 | 49 | let shrink_deps test_input = 50 | let ls = Array.to_list test_input.dependencies in 51 | let is = Shrink.list ~shrink:Shrink.(option nil) ls in 52 | Iter.map 53 | (fun deps -> 54 | let len = List.length deps in 55 | let arr = Array.of_list deps in 56 | let deps = Array.mapi (fun i j_opt -> match i,j_opt with 57 | | 0, _ 58 | | _,None -> None 59 | | _,Some 0 -> Some 0 60 | | _, Some j -> 61 | if j<0 || j>=len || j>=i (* ensure reduced dep is valid *) 62 | then Some ((j + i) mod i) 63 | else Some j) arr in 64 | { test_input with length=len; dependencies=deps }) is 65 | 66 | let arb_deps domain_bound promise_bound = 67 | let gen_deps = 68 | Gen.(pair (int_bound (domain_bound-1)) (int_bound promise_bound) >>= fun (num_domains,length) -> 69 | let num_domains = succ num_domains in 70 | let length = succ length in 71 | gen_deps length >>= fun dependencies -> return { num_domains; length; dependencies }) in 72 | let shrink_input input = 73 | Iter.append 74 | (Iter.map (fun doms' -> { input with num_domains = doms' }) (Shrink.int input.num_domains)) 75 | (shrink_deps input) in 76 | make ~print:show_test_input ~shrink:shrink_input gen_deps 77 | 78 | let build_dep_graph pool test_input = 79 | let len = test_input.length in 80 | let deps = test_input.dependencies in 81 | let rec build i promise_acc = 82 | if i=len 83 | then promise_acc 84 | else 85 | let p = (match deps.(i) with 86 | | None -> 87 | Task.async pool work 88 | | Some dep -> 89 | Task.async pool (fun () -> 90 | work(); 91 | Task.await pool (List.nth promise_acc (i-1-dep)))) in 92 | build (i+1) (p::promise_acc) 93 | in 94 | build 0 [] 95 | 96 | let test_one_pool ~domain_bound ~promise_bound = 97 | Test.make ~name:"Domainslib.Task.async/await, one dep, 1 work pool" ~count:100 98 | (arb_deps domain_bound promise_bound) 99 | (Util.repeat 10 @@ 100 | fun input -> 101 | let pool = Task.setup_pool ~num_domains:input.num_domains () in 102 | Task.run pool (fun () -> 103 | let ps = build_dep_graph pool input in 104 | List.iter (fun p -> Task.await pool p) ps); 105 | Task.teardown_pool pool; 106 | true) 107 | 108 | let test_two_pools_sync_last ~domain_bound ~promise_bound = 109 | let gen = arb_deps domain_bound promise_bound in 110 | Test.make ~name:"Domainslib.Task.async/await, one dep, w.2 pools, syncing at the end" ~count:100 111 | (pair gen gen) 112 | (Util.repeat 10 @@ 113 | fun (input1,input2) -> 114 | try 115 | let pool1 = Task.setup_pool ~num_domains:input1.num_domains () in 116 | let pool2 = Task.setup_pool ~num_domains:input2.num_domains () in 117 | let ps1 = build_dep_graph pool1 input1 in 118 | let ps2 = build_dep_graph pool2 input2 in 119 | Task.run pool1 (fun () -> List.iter (fun p -> Task.await pool1 p) ps1); 120 | Task.run pool2 (fun () -> List.iter (fun p -> Task.await pool2 p) ps2); 121 | Task.teardown_pool pool1; 122 | Task.teardown_pool pool2; 123 | true 124 | with 125 | Failure err -> err = "failed to allocate domain") 126 | 127 | let test_two_nested_pools ~domain_bound ~promise_bound = 128 | let gen = arb_deps domain_bound promise_bound in 129 | Test.make ~name:"Domainslib.Task.async/await, one dep, w.2 nested pools" ~count:100 130 | (pair gen gen) 131 | (Util.repeat 10 @@ 132 | fun (input1,input2) -> 133 | try 134 | let pool1 = Task.setup_pool ~num_domains:input1.num_domains () in 135 | let pool2 = Task.setup_pool ~num_domains:input2.num_domains () in 136 | Task.run pool1 (fun () -> 137 | Task.run pool2 (fun () -> 138 | let ps1 = build_dep_graph pool1 input1 in 139 | let ps2 = build_dep_graph pool2 input2 in 140 | List.iter (fun p -> Task.await pool1 p) ps1; 141 | List.iter (fun p -> Task.await pool2 p) ps2)); 142 | Task.teardown_pool pool1; 143 | Task.teardown_pool pool2; 144 | true 145 | with 146 | Failure err -> err = "failed to allocate domain") 147 | 148 | let () = 149 | let domain_bound = max 1 (Domain.recommended_domain_count () / 2) in 150 | let promise_bound = max 2 domain_bound in 151 | QCheck_base_runner.run_tests_main [ 152 | test_one_pool ~domain_bound ~promise_bound; 153 | test_two_pools_sync_last ~domain_bound ~promise_bound; 154 | test_two_nested_pools ~domain_bound ~promise_bound; 155 | ] 156 | -------------------------------------------------------------------------------- /test/task_parallel.ml: -------------------------------------------------------------------------------- 1 | open QCheck 2 | open Domainslib 3 | 4 | (** Property-based QCheck tests of Task.parallel_* *) 5 | 6 | let count = 250 7 | 8 | let test_parallel_for = 9 | Test.make ~name:"Domainslib.Task.parallel_for test" ~count 10 | (triple (int_bound 10) small_nat small_nat) 11 | (fun (num_domains,array_size,chunk_size) -> 12 | let pool = Task.setup_pool ~num_domains () in 13 | let res = Task.run pool (fun () -> 14 | let a = Atomic.make 0 in 15 | Task.parallel_for ~chunk_size ~start:0 ~finish:(array_size-1) ~body:(fun _ -> Atomic.incr a) pool; 16 | Atomic.get a) in 17 | Task.teardown_pool pool; 18 | res = array_size) 19 | 20 | let test_parallel_for_reduce = 21 | Test.make ~name:"Domainslib.Task.parallel_for_reduce test" ~count 22 | (triple (int_bound 10) small_nat small_nat) 23 | (fun (num_domains,array_size,chunk_size) -> 24 | let pool = Task.setup_pool ~num_domains () in 25 | let res = Task.run pool (fun () -> 26 | Task.parallel_for_reduce ~chunk_size ~start:0 ~finish:(array_size-1) ~body:(fun _ -> 1) pool (+) 0) in 27 | Task.teardown_pool pool; 28 | res = array_size) 29 | 30 | let test_parallel_scan = 31 | Test.make ~name:"Domainslib.Task.parallel_scan test" ~count 32 | (pair (int_bound 10) small_nat) 33 | (fun (num_domains,array_size) -> 34 | let pool = Task.setup_pool ~num_domains () in 35 | let a = Task.run pool (fun () -> Task.parallel_scan pool (+) (Array.make array_size 1)) in 36 | Task.teardown_pool pool; 37 | a = Array.init array_size (fun i -> i + 1)) 38 | 39 | let () = 40 | QCheck_base_runner.run_tests_main [ 41 | test_parallel_for; 42 | test_parallel_for_reduce; 43 | test_parallel_scan; 44 | ] 45 | -------------------------------------------------------------------------------- /test/task_throughput.ml: -------------------------------------------------------------------------------- 1 | 2 | let n_domains = try int_of_string Sys.argv.(1) with _ -> 1 3 | let n_iterations = try int_of_string Sys.argv.(2) with _ -> 1024 4 | let n_tasks = try int_of_string Sys.argv.(3) with _ -> 1024 5 | 6 | module T = Domainslib.Task 7 | 8 | module TimingHist = struct 9 | type t = { 10 | data: int array; 11 | min_n: int; 12 | max_n: int; 13 | mutable count: int; 14 | mutable sum : float; 15 | } 16 | 17 | let make min_n max_n = 18 | { data=Array.make (max_n - min_n) 0; min_n; max_n; count=0; sum=0. } 19 | 20 | let rec log2 n = 21 | if n <= 1 then 0 else 1 + log2(n asr 1) 22 | 23 | let add_point a x = 24 | let i = (log2 x) in 25 | let i = max (i-a.min_n+1) 0 in 26 | let i = min i ((Array.length a.data)-1) in 27 | a.data.(i) <- a.data.(i) + 1; 28 | a.sum <- a.sum +. (float_of_int x); 29 | a.count <- a.count + 1 30 | 31 | let mean a = 32 | a.sum /. (float_of_int a.count) 33 | 34 | let print_hist a = 35 | Printf.printf "Timings (ns): n=%d mean=%.1f\n" a.count (mean a); 36 | let fn n = (Int.shift_left 1 (a.min_n+n)) in 37 | let len = Array.length a.data in 38 | for i = 0 to (len - 1) do 39 | match i with 40 | | i when i=0 -> 41 | Printf.printf " (%8d, %8d): %6d\n" 0 (fn i) a.data.(i); 42 | | i when i=(len-1) -> 43 | Printf.printf " [%8d, Inf): %6d\n" (fn (i-1)) a.data.(i); 44 | | i -> 45 | Printf.printf " [%8d, %8d): %6d\n" (fn (i-1)) (fn i) a.data.(i); 46 | done 47 | 48 | end 49 | 50 | let _ = 51 | Printf.printf "n_iterations: %d n_units: %d n_domains: %d\n" 52 | n_iterations n_tasks n_domains; 53 | let pool = T.setup_pool ~num_domains:(n_domains - 1) () in 54 | 55 | let hist = TimingHist.make 5 25 in 56 | for _ = 1 to n_iterations do 57 | let t0 = Mclock.elapsed_ns() in 58 | T.run pool (fun _ -> 59 | T.parallel_for pool ~start:1 ~finish:n_tasks ~body:(fun _ -> ())); 60 | let t = Int64.sub (Mclock.elapsed_ns ()) t0 in 61 | TimingHist.add_point hist (Int64.to_int t); 62 | done; 63 | 64 | TimingHist.print_hist hist; 65 | 66 | T.teardown_pool pool 67 | -------------------------------------------------------------------------------- /test/test_chan.ml: -------------------------------------------------------------------------------- 1 | let buffer_size = try int_of_string Sys.argv.(1) with _ -> 1 2 | let num_items = try int_of_string Sys.argv.(2) with _ -> 100 3 | let num_senders = try int_of_string Sys.argv.(3) with _ -> 1 4 | let num_receivers = try int_of_string Sys.argv.(4) with _ -> 1 5 | 6 | module C = Domainslib.Chan 7 | 8 | let c = C.make_bounded buffer_size 9 | 10 | let rec receiver i n = 11 | if i = n then 12 | print_endline @@ Printf.sprintf "Receiver on domain %d done" (Domain.self () :> int) 13 | else ( 14 | ignore @@ C.recv c; 15 | receiver (i+1) n ) 16 | 17 | let rec sender i n = 18 | if i = n then 19 | print_endline @@ Printf.sprintf "Sender on domain %d done" (Domain.self () :> int) 20 | else ( 21 | C.send c i; 22 | sender (i+1) n ) 23 | 24 | let _ = 25 | assert (num_items mod num_senders == 0); 26 | assert (num_items mod num_receivers == 0); 27 | let senders = 28 | Array.init num_senders (fun _ -> 29 | Domain.spawn (fun _ -> sender 0 (num_items / num_senders))) 30 | in 31 | let receivers = 32 | Array.init num_receivers (fun _ -> 33 | Domain.spawn (fun _ -> receiver 0 (num_items / num_receivers))) 34 | in 35 | Array.iter Domain.join senders; 36 | Array.iter Domain.join receivers; 37 | begin match C.recv_poll c with 38 | | None -> () 39 | | Some _ -> assert false 40 | end; 41 | for _i=1 to buffer_size do 42 | C.send c 0 43 | done; 44 | for _i=1 to buffer_size do 45 | ignore (C.recv c) 46 | done; 47 | begin match C.recv_poll c with 48 | | None -> () 49 | | Some _ -> assert false 50 | end 51 | -------------------------------------------------------------------------------- /test/test_deadlock.ml: -------------------------------------------------------------------------------- 1 | (* Despite what the name says, this test will not deadlock. A similar test will 2 | * deadlock in the version not using effect handlers. See 3 | * https://github.com/ocaml-multicore/ocaml-multicore/issues/670 *) 4 | 5 | module T = Domainslib.Task 6 | 7 | let n = try int_of_string Sys.argv.(1) with _ -> 1_000_000 8 | 9 | let rec loop n = 10 | if n = 0 then 11 | Printf.printf "Looping finished on domain %d\n%!" (Domain.self () :> int) 12 | else (Domain.cpu_relax (); loop (n-1)) 13 | 14 | let () = 15 | let pool = T.setup_pool ~num_domains:2 () in 16 | T.run pool (fun _ -> 17 | let a = T.async pool (fun _ -> 18 | Printf.printf "Task A running on domain %d\n%!" (Domain.self () :> int); 19 | loop n) 20 | in 21 | let b = T.async pool (fun _ -> 22 | Printf.printf "Task B running on domain %d\n%!" (Domain.self () :> int); 23 | T.await pool a) 24 | in 25 | let c = T.async pool (fun _ -> 26 | Printf.printf "Task C running on domain %d\n%!" (Domain.self () :> int); 27 | T.await pool b) 28 | in 29 | loop n; 30 | T.await pool c); 31 | T.teardown_pool pool 32 | -------------------------------------------------------------------------------- /test/test_parallel_find.ml: -------------------------------------------------------------------------------- 1 | let len = 1_000_000 2 | let nb_needles = 4 3 | 4 | let () = Random.init 42 5 | 6 | let needles = 7 | Array.init nb_needles (fun _ -> Random.int len) 8 | 9 | let input = 10 | let t = Array.make len false in 11 | needles |> Array.iter (fun needle -> 12 | t.(needle) <- true 13 | ); 14 | t 15 | 16 | open Domainslib 17 | 18 | let search_needle pool ~chunk_size = 19 | Task.parallel_find pool ~chunk_size ~start:0 ~finish:(len - 1) ~body:(fun i -> 20 | if input.(i) then Some i 21 | else None 22 | ) 23 | 24 | let test_search pool ~chunk_size = 25 | match search_needle pool ~chunk_size with 26 | | None -> assert false 27 | | Some needle -> 28 | assert (Array.exists ((=) needle) needles) 29 | 30 | let () = 31 | (* [num_domains] is the number of *new* domains spawned by the pool 32 | performing computations in addition to the current domain. *) 33 | let num_domains = Domain.recommended_domain_count () - 1 in 34 | Printf.eprintf "test_parallel_find on %d domains.\n" (num_domains + 1); 35 | let pool = Task.setup_pool ~num_domains ~name:"pool" () in 36 | Task.run pool begin fun () -> 37 | [0; 16; 32; 1000] |> List.iter (fun chunk_size -> 38 | test_search pool ~chunk_size) 39 | end; 40 | Task.teardown_pool pool; 41 | prerr_endline "Success."; 42 | -------------------------------------------------------------------------------- /test/test_parallel_scan.ml: -------------------------------------------------------------------------------- 1 | let len = 1_000_000 2 | 3 | let singleton_interval i = (i, i + 1) 4 | 5 | let combine_intervals interval1 interval2 = 6 | let b1, e1 = interval1 7 | and b2, e2 = interval2 in 8 | if e1 <> b2 then begin 9 | Printf.eprintf "Invalid intervals: (%d, %d), (%d, %d)\n" b1 e1 b2 e2; 10 | assert false 11 | end 12 | else (b1, e2) 13 | 14 | open Domainslib 15 | 16 | let test_scan_ordering pool = 17 | let check_interval i interval = 18 | let (b, e) = interval in 19 | assert (b = 0 && e = i + 1) 20 | in 21 | Array.init len singleton_interval 22 | |> Task.parallel_scan pool combine_intervals 23 | |> Array.iteri check_interval 24 | 25 | let () = 26 | (* [num_domains] is the number of *new* domains spawned by the pool 27 | performing computations in addition to the current domain. *) 28 | let num_domains = Domain.recommended_domain_count () - 1 in 29 | Printf.eprintf "test_parallel_scan on %d domains.\n" (num_domains + 1); 30 | let pool = Task.setup_pool ~num_domains ~name:"pool" () in 31 | Task.run pool begin fun () -> 32 | test_scan_ordering pool 33 | end; 34 | Task.teardown_pool pool; 35 | prerr_endline "Success."; -------------------------------------------------------------------------------- /test/test_task.ml: -------------------------------------------------------------------------------- 1 | (* Generic tests for the task module *) 2 | 3 | (* Parallel for *) 4 | 5 | open Domainslib 6 | let modify_arr pool chunk_size = fun () -> 7 | let arr1 = Array.init 100 (fun i -> i + 1) in 8 | Task.parallel_for ~chunk_size ~start:0 ~finish:99 9 | ~body:(fun i -> arr1.(i) <- arr1.(i) * 2) pool; 10 | let arr_res = Array.init 100 (fun i -> (i + 1) * 2) in 11 | assert (arr1 = arr_res) 12 | 13 | let inc_ctr pool chunk_size = fun () -> 14 | let ctr = Atomic.make 0 in 15 | Task.parallel_for ~chunk_size ~start:1 ~finish:1000 16 | ~body:(fun _ -> Atomic.incr ctr) pool; 17 | assert (Atomic.get ctr = 1000) 18 | 19 | (* Parallel for reduce *) 20 | 21 | let sum_sequence pool chunk_size init = fun () -> 22 | let v = Task.parallel_for_reduce ~chunk_size ~start:1 23 | ~finish:100 ~body:(fun i -> i) pool (+) init in 24 | assert (v = 5050 + init) 25 | 26 | (* Parallel scan *) 27 | 28 | let prefix_sum pool = fun () -> 29 | let prefix_s l = List.rev (List.fold_left (fun a y -> match a with 30 | | [] -> [y] 31 | | x::_ -> (x+y)::a) [] l) in 32 | let arr = Array.make 1000 1 in 33 | let v1 = Task.parallel_scan pool (+) arr in 34 | let ls = Array.to_list arr in 35 | let v2 = prefix_s ls in 36 | assert (v1 = Array.of_list v2) 37 | 38 | 39 | let () = 40 | let pool1 = Task.setup_pool ~num_domains:2 ~name:"pool1" () in 41 | let pool2 = Task.setup_pool ~num_domains:2 ~name:"pool2" () in 42 | Task.run pool1 (fun _ -> 43 | let p1 = Option.get @@ Task.lookup_pool "pool1" in 44 | modify_arr pool1 0 (); 45 | modify_arr pool1 25 (); 46 | modify_arr pool1 100 (); 47 | inc_ctr p1 0 (); 48 | inc_ctr p1 16 (); 49 | inc_ctr p1 32 (); 50 | inc_ctr p1 1000 ()); 51 | Task.run pool2 (fun _ -> 52 | let p2 = Option.get @@ Task.lookup_pool "pool2" in 53 | sum_sequence pool2 0 0 (); 54 | sum_sequence pool2 10 10 (); 55 | sum_sequence pool2 1 0 (); 56 | sum_sequence p2 1 10 (); 57 | sum_sequence p2 100 10 (); 58 | sum_sequence p2 100 100 (); 59 | prefix_sum p2 ()); 60 | Task.teardown_pool pool1; 61 | Task.teardown_pool pool2; 62 | 63 | try 64 | sum_sequence pool2 0 0 (); 65 | assert false 66 | with Invalid_argument _ -> (); 67 | 68 | assert (Task.lookup_pool "pool1" = None); 69 | 70 | try 71 | let _ = Task.setup_pool ~num_domains:(-1) () in () 72 | with Invalid_argument _ -> (); 73 | print_endline "ok" 74 | -------------------------------------------------------------------------------- /test/test_task_crash.ml: -------------------------------------------------------------------------------- 1 | open Domainslib 2 | 3 | (* a simple work item, from ocaml/testsuite/tests/misc/takc.ml *) 4 | let rec tak x y z = 5 | if x > y then tak (tak (x-1) y z) (tak (y-1) z x) (tak (z-1) x y) 6 | else z 7 | 8 | let work () = 9 | for _ = 1 to 200 do 10 | assert (7 = tak 18 12 6); 11 | done 12 | ;; 13 | begin 14 | let pool1 = Task.setup_pool ~num_domains:2 () in 15 | let pool2 = Task.setup_pool ~num_domains:1 () in 16 | 17 | let pool1_prom0 = Task.async pool1 work in 18 | 19 | let pool2_prom0 = Task.async pool2 work in 20 | let pool2_prom1 = Task.async pool2 work in 21 | 22 | Task.run pool1 (fun () -> List.iter (fun p -> Task.await pool1 p) [pool1_prom0]); 23 | Task.run pool2 (fun () -> List.iter (fun p -> Task.await pool2 p) [pool2_prom0; pool2_prom1]); 24 | 25 | Task.teardown_pool pool1; 26 | Task.teardown_pool pool2; 27 | end 28 | -------------------------------------------------------------------------------- /test/test_task_empty.ml: -------------------------------------------------------------------------------- 1 | open Domainslib 2 | 3 | let array_size = 0 4 | 5 | let pool = Task.setup_pool ~num_domains:0 () 6 | let res = Task.run pool (fun () -> 7 | Task.parallel_for_reduce ~chunk_size:0 ~start:0 ~finish:(array_size-1) ~body:(fun _ -> 1) pool (+) 0);; 8 | Task.teardown_pool pool;; 9 | assert(res = array_size) 10 | --------------------------------------------------------------------------------