├── .gitignore ├── tests ├── params.zig ├── main.zig ├── barrier.zig ├── flush.zig ├── masked.zig ├── reduction.zig ├── critical.zig ├── return.zig ├── errors.zig ├── sections.zig ├── parallel.zig └── task.zig ├── src ├── ompt.zig ├── workshare_env.zig ├── input_handler.zig ├── reduce.zig ├── kmp.zig └── omp.zig ├── flake.nix ├── LICENSE ├── flake.lock └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /zig-*/ 2 | .direnv/ 3 | .envrc 4 | flake.lock 5 | 6 | /omp.rep/ 7 | omp.gpr 8 | omp.loc* 9 | 10 | /llvm-project/ 11 | -------------------------------------------------------------------------------- /tests/params.zig: -------------------------------------------------------------------------------- 1 | pub const loop_count: usize = 1000; 2 | pub const repetitions: usize = 10; 3 | 4 | pub const sleep_time: usize = 500_000_000; 5 | 6 | pub const num_tasks: usize = 15; 7 | pub const max_tasks_per_thread: usize = 5; 8 | -------------------------------------------------------------------------------- /tests/main.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub const errors = @import("errors.zig"); 4 | pub const @"return" = @import("return.zig"); 5 | pub const barrier = @import("barrier.zig"); 6 | pub const flush = @import("flush.zig"); 7 | pub const masked = @import("masked.zig"); 8 | pub const task = @import("task.zig"); 9 | pub const critical = @import("critical.zig"); 10 | pub const reduction = @import("reduction.zig"); 11 | pub const sections = @import("sections.zig"); 12 | pub const parallel = @import("parallel.zig"); 13 | 14 | test "all" { 15 | std.testing.refAllDecls(@This()); 16 | } 17 | -------------------------------------------------------------------------------- /src/ompt.zig: -------------------------------------------------------------------------------- 1 | const opts = @import("build_options"); 2 | const kmp = @import("kmp.zig"); 3 | 4 | pub const data_t = extern union { 5 | val: usize, 6 | ptr: *anyopaque, 7 | }; 8 | 9 | pub const frame_t = extern struct { 10 | exit_frame: data_t, 11 | enter_frame: data_t, 12 | exit_frame_flags: c_int, 13 | enter_frame_flags: c_int, 14 | }; 15 | 16 | pub const dispatch_chunk_t = extern struct { 17 | start: usize, 18 | iterations: usize, 19 | }; 20 | 21 | pub const task_info_t = extern struct { 22 | frame: frame_t, 23 | task_data: data_t, 24 | scheduling_parent: *kmp.task_data_t, 25 | thread_num: c_int, 26 | dispatch_chunk: dispatch_chunk_t, 27 | }; 28 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "Python shell flake"; 3 | 4 | inputs = { 5 | nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; 6 | flake-utils.url = "github:numtide/flake-utils"; 7 | }; 8 | 9 | outputs = { self, nixpkgs, flake-utils, ... }: 10 | flake-utils.lib.eachDefaultSystem (system: 11 | let 12 | pkgs = nixpkgs.legacyPackages.${system}; 13 | lib = nixpkgs.lib; 14 | in 15 | { 16 | devShells.default = pkgs.mkShell { 17 | packages = with pkgs; 18 | [ 19 | zig 20 | zls 21 | 22 | llvmPackages_18.openmp 23 | llvmPackages_18.clang 24 | llvmPackages_18.llvm 25 | perl 26 | cmake 27 | ]; 28 | }; 29 | } 30 | ); 31 | } 32 | -------------------------------------------------------------------------------- /tests/barrier.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp"); 3 | const params = @import("params.zig"); 4 | 5 | fn test_omp_barrier() bool { 6 | var result1: u32 = 0; 7 | var result2: u32 = 0; 8 | 9 | omp.parallel(.{}) 10 | .run(.{ .shared = .{ &result1, &result2 } }, struct { 11 | fn f(f_result1: *u32, f_result2: *u32) void { 12 | const rank: u32 = omp.get_thread_num(); 13 | if (rank == 1) { 14 | std.time.sleep(params.sleep_time); 15 | f_result2.* = 3; 16 | } 17 | 18 | omp.barrier(); 19 | if (rank == 2) { 20 | f_result1.* = f_result2.*; 21 | } 22 | } 23 | }.f); 24 | 25 | return result1 == 3; 26 | } 27 | 28 | test "barrier" { 29 | var num_failed: u32 = 0; 30 | omp.set_dynamic(false); 31 | omp.set_num_threads(4); 32 | for (0..params.repetitions) |_| { 33 | if (!test_omp_barrier()) { 34 | num_failed += 1; 35 | } 36 | } 37 | 38 | try std.testing.expect(num_failed == 0); 39 | } 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Luca Bancale 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/flush.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp"); 3 | const params = @import("params.zig"); 4 | 5 | pub fn test_omp_flush() bool { 6 | var result1: u32 = 0; 7 | var result2: u32 = 0; 8 | var dummy: u32 = 0; 9 | 10 | omp.parallel(.{}) 11 | .run(.{ .shared = .{ &result1, &result2, &dummy } }, struct { 12 | fn f(f_result1: *u32, f_result2: *u32, f_dummy: *u32) void { 13 | const rank: u32 = omp.get_thread_num(); 14 | omp.barrier(); 15 | 16 | if (rank == 1) { 17 | f_result2.* = 3; 18 | omp.flush(.{f_result2}); 19 | f_dummy.* = f_result2.*; 20 | } 21 | 22 | if (rank == 0) { 23 | std.time.sleep(params.sleep_time); 24 | omp.flush(.{f_result2}); 25 | f_result1.* = f_result2.*; 26 | } 27 | } 28 | }.f); 29 | 30 | if (result1 != 3 or result2 != 3 or dummy != 3) { 31 | std.debug.print("result1: {}, result2: {}, dummy: {}\n", .{ result1, result2, dummy }); 32 | } 33 | 34 | return result1 == 3 and result2 == 3 and dummy == 3; 35 | } 36 | 37 | test "flush" { 38 | var num_failed: u32 = 0; 39 | omp.set_dynamic(false); 40 | if (omp.get_max_threads() == 1) { 41 | omp.set_num_threads(2); 42 | } 43 | 44 | for (0..params.repetitions) |_| { 45 | if (!test_omp_flush()) { 46 | num_failed += 1; 47 | } 48 | } 49 | 50 | try std.testing.expect(num_failed == 0); 51 | } 52 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "flake-utils": { 4 | "inputs": { 5 | "systems": "systems" 6 | }, 7 | "locked": { 8 | "lastModified": 1710146030, 9 | "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", 10 | "owner": "numtide", 11 | "repo": "flake-utils", 12 | "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", 13 | "type": "github" 14 | }, 15 | "original": { 16 | "owner": "numtide", 17 | "repo": "flake-utils", 18 | "type": "github" 19 | } 20 | }, 21 | "nixpkgs": { 22 | "locked": { 23 | "lastModified": 1724479785, 24 | "narHash": "sha256-pP3Azj5d6M5nmG68Fu4JqZmdGt4S4vqI5f8te+E/FTw=", 25 | "owner": "nixos", 26 | "repo": "nixpkgs", 27 | "rev": "d0e1602ddde669d5beb01aec49d71a51937ed7be", 28 | "type": "github" 29 | }, 30 | "original": { 31 | "owner": "nixos", 32 | "ref": "nixos-unstable", 33 | "repo": "nixpkgs", 34 | "type": "github" 35 | } 36 | }, 37 | "root": { 38 | "inputs": { 39 | "flake-utils": "flake-utils", 40 | "nixpkgs": "nixpkgs" 41 | } 42 | }, 43 | "systems": { 44 | "locked": { 45 | "lastModified": 1681028828, 46 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 47 | "owner": "nix-systems", 48 | "repo": "default", 49 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 50 | "type": "github" 51 | }, 52 | "original": { 53 | "owner": "nix-systems", 54 | "repo": "default", 55 | "type": "github" 56 | } 57 | } 58 | }, 59 | "root": "root", 60 | "version": 7 61 | } 62 | -------------------------------------------------------------------------------- /tests/masked.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp"); 3 | const params = @import("params.zig"); 4 | 5 | fn test_omp_masked() bool { 6 | var nthreads: u32 = 0; 7 | var executing_thread: i32 = -1; 8 | var tid_result: u32 = 0; 9 | 10 | omp.parallel(.{}) 11 | .run(.{ .shared = .{ &nthreads, &executing_thread, &tid_result } }, struct { 12 | fn f(f_nthreads: *u32, f_executing_thread: *i32, f_tid_result: *u32) void { 13 | omp.masked() 14 | .run(.{ f_nthreads, f_executing_thread, f_tid_result }, omp.only_master, struct { 15 | fn f(ff_nthreads: *u32, ff_executing_thread: *i32, ff_tid_result: *u32) void { 16 | const tid: i32 = @intCast(omp.get_thread_num()); 17 | 18 | if (tid != 0) { 19 | omp.critical(.{}) 20 | .run(.{ff_tid_result}, struct { 21 | fn f(fff_tid_result: *u32) void { 22 | fff_tid_result.* += 1; 23 | } 24 | }.f); 25 | } 26 | 27 | omp.critical(.{}) 28 | .run(.{ff_nthreads}, struct { 29 | fn f(fff_nthreads: *u32) void { 30 | fff_nthreads.* += 1; 31 | } 32 | }.f); 33 | ff_executing_thread.* = @intCast(omp.get_thread_num()); 34 | } 35 | }.f); 36 | } 37 | }.f); 38 | 39 | return (nthreads == 1) and (executing_thread == 0) and (tid_result == 0); 40 | } 41 | 42 | test "masked" { 43 | var num_failed: u32 = 0; 44 | 45 | for (params.repetitions) |_| { 46 | if (!test_omp_masked()) { 47 | num_failed += 1; 48 | } 49 | } 50 | 51 | try std.testing.expect(num_failed == 0); 52 | } 53 | -------------------------------------------------------------------------------- /tests/reduction.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp"); 3 | const params = @import("params.zig"); 4 | 5 | fn parallel_reduction_plus() bool { 6 | var sum: u32 = 0; 7 | const known_sum: u32 = (params.loop_count * (params.loop_count + 1)) / 2; 8 | 9 | omp.parallel(.{}) 10 | .run(.{ .shared = .{&sum} }, struct { 11 | fn f(f_sum: *u32) void { 12 | omp.loop(u32, .{ .reduction = &.{.plus} }) 13 | .run(.{ .reduction = .{f_sum} }, 1, params.loop_count + 1, 1, struct { 14 | fn f(i: u32, ff_sum: *u32) void { 15 | ff_sum.* += i; 16 | } 17 | }.f); 18 | } 19 | }.f); 20 | 21 | if (known_sum != sum) { 22 | std.debug.print("red KNOWN_SUM = {}\n", .{known_sum}); 23 | std.debug.print("SUM = {}\n", .{sum}); 24 | } 25 | 26 | return known_sum == sum; 27 | } 28 | 29 | test "parallel_reduction_plus" { 30 | var num_failed: u32 = 0; 31 | for (0..params.repetitions) |_| { 32 | if (!parallel_reduction_plus()) { 33 | num_failed += 1; 34 | } 35 | } 36 | 37 | try std.testing.expect(num_failed == 0); 38 | } 39 | 40 | fn parallel_loop_reduction_plus() bool { 41 | var sum: u32 = 0; 42 | const known_sum: u32 = (params.loop_count * (params.loop_count + 1)) / 2; 43 | 44 | omp.parallel(.{}) 45 | .loop(u32, .{ .reduction = &.{.plus} }) 46 | .run(.{ .reduction = .{&sum} }, 1, params.loop_count + 1, 1, struct { 47 | fn f(i: u32, f_sum: *u32) void { 48 | f_sum.* += i; 49 | } 50 | }.f); 51 | 52 | if (known_sum != sum) { 53 | std.debug.print("red KNOWN_SUM = {}\n", .{known_sum}); 54 | std.debug.print("SUM = {}\n", .{sum}); 55 | } 56 | 57 | return known_sum == sum; 58 | } 59 | 60 | test "parallel_loop_reduction_plus" { 61 | var num_failed: u32 = 0; 62 | omp.set_num_threads(3); 63 | for (0..params.repetitions) |_| { 64 | if (!parallel_loop_reduction_plus()) { 65 | num_failed += 1; 66 | } 67 | } 68 | 69 | try std.testing.expect(num_failed == 0); 70 | } 71 | -------------------------------------------------------------------------------- /tests/critical.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp"); 3 | const params = @import("params.zig"); 4 | 5 | fn test_omp_critical() bool { 6 | var sum: u32 = 0; 7 | const known_sum: u32 = 999 * 1000 / 2; 8 | 9 | omp.parallel(.{}) 10 | .run(.{ .shared = .{&sum} }, struct { 11 | fn f(f_sum: *u32) void { 12 | var mysum: u32 = 0; 13 | 14 | omp.loop(u32, .{}) 15 | .run(.{ .shared = .{&mysum} }, 1, params.loop_count, 1, struct { 16 | fn f(i: u32, f_mysum: *u32) void { 17 | f_mysum.* = f_mysum.* + i; 18 | } 19 | }.f); 20 | 21 | omp.critical(.{}) 22 | .run(.{ f_sum, &mysum }, struct { 23 | fn f(ff_sum: *u32, f_mysum: *u32) void { 24 | ff_sum.* += f_mysum.*; 25 | } 26 | }.f); 27 | } 28 | }.f); 29 | 30 | if (sum != known_sum) { 31 | std.debug.print("sum: {}, known_sum: {}\n", .{ sum, known_sum }); 32 | } 33 | 34 | return known_sum == sum; 35 | } 36 | 37 | test "critical" { 38 | var num_failed: u32 = 0; 39 | 40 | for (params.repetitions) |_| { 41 | if (!test_omp_critical()) { 42 | num_failed += 1; 43 | } 44 | } 45 | 46 | try std.testing.expect(num_failed == 0); 47 | } 48 | 49 | fn omp_critical_hint(iter: u32) bool { 50 | var sum: u32 = 0; 51 | const known_sum: u32 = (999 * 1000) / 2; 52 | 53 | omp.parallel(.{}) 54 | .run(.{ .shared = .{ &sum, iter } }, struct { 55 | fn f(f_sum: *u32, f_iter: u32) void { 56 | var mysum: u32 = 0; 57 | omp.loop(u32, .{}) 58 | .run(.{ .shared = .{&mysum} }, 0, params.loop_count, 1, struct { 59 | fn f(i: u32, f_mysum: *u32) void { 60 | f_mysum.* = f_mysum.* + i; 61 | } 62 | }.f); 63 | 64 | const fun = struct { 65 | fn f(ff_sum: *u32, f_mysum: *u32) void { 66 | ff_sum.* += f_mysum.*; 67 | } 68 | }.f; 69 | 70 | switch (f_iter % 4) { 71 | 0 => { 72 | omp.critical(.{ .name = "a", .sync = .uncontended }).run(.{ f_sum, &mysum }, fun); 73 | }, 74 | 1 => { 75 | omp.critical(.{ .name = "b", .sync = .contended }).run(.{ f_sum, &mysum }, fun); 76 | }, 77 | 2 => { 78 | omp.critical(.{ .name = "c", .sync = .nonspeculative }).run(.{ f_sum, &mysum }, fun); 79 | }, 80 | 3 => { 81 | omp.critical(.{ .name = "d", .sync = .speculative }).run(.{ f_sum, &mysum }, fun); 82 | }, 83 | else => { 84 | unreachable; 85 | }, 86 | } 87 | } 88 | }.f); 89 | 90 | if (sum != known_sum) { 91 | std.debug.print("sum: {}, known_sum: {}\n", .{ sum, known_sum }); 92 | } 93 | 94 | return known_sum == sum; 95 | } 96 | 97 | test "critical_hint" { 98 | var num_failed: u32 = 0; 99 | 100 | for (0..params.repetitions) |i| { 101 | if (!omp_critical_hint(@intCast(i))) { 102 | num_failed += 1; 103 | } 104 | } 105 | 106 | try std.testing.expect(num_failed == 0); 107 | } 108 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenMP-zig 2 | 3 | This wrapper implements (almost all) the OpenMP directives up-to version 3.1 and some newer stuff. 4 | All of this is (mostly, see below) without any allocation from the zig part. 5 | 6 | This is implemented using the `libomp' library of LLVM. (Gomp support is not planned) **Disclaimer** This project is not affiliated with LLVM in any capacity. 7 | 8 | ```zig 9 | const std = @import("std"); 10 | const omp = @import("omp"); 11 | 12 | fn main() void { 13 | omp.parallel(.{}) 14 | .run(.{}, struct { 15 | fn f() void { 16 | std.debug.print("Hello world {}!", .{omp.get_thread_num()}); 17 | } 18 | }.f); 19 | } 20 | ``` 21 | 22 | ## Build 23 | 24 | ```sh 25 | zig fetch --save git+https://github.com/sbancuz/OpenMP-zig 26 | ``` 27 | 28 | ```zig 29 | // build.zig 30 | const OpenMP_zig_dep = b.dependency("OpenMP-zig", .{ 31 | .target = target, 32 | .optimize = optimize, 33 | }); 34 | exe.root_module.addImport("omp", OpenMP_zig_dep.module("omp")); 35 | ``` 36 | 37 | ## Features 38 | - [x] `#pragma omp parallel` 39 | - [x] `All reductions` 40 | - [x] `#pragma omp for` 41 | - [x] `#pragma omp sections` 42 | - [x] `#pragma omp single` 43 | - [x] `#pragma omp master/masked` 44 | - [x] `#pragma omp critical` 45 | - [x] `#pragma omp barrier` 46 | - [x] `#pragma omp task` 47 | - [ ] `#pragma omp atomic` NOT POSSIBLE TO IMPLEMENT 48 | - [ ] `#pragma omp simd` NOT POSSIBLE TO IMPLEMENT 49 | 50 | To see some other examples of the library check the tests folder. 51 | 52 | ## Extensions 53 | 54 | ```zig 55 | fn test_omp_task_error() !bool { 56 | // The ret reduction parameter tells the directive how it should reduce the return value 57 | const result = omp.parallel(.{ .ret_reduction = .plus }) 58 | .run(.{}, struct { 59 | // You can return whatever you want! 60 | fn f() !usize { 61 | const maybe = omp.single() 62 | .run(.{}, struct { 63 | // Only for tasks, you have to put the explicit error type in the promise, 64 | // otherwise it won't be able to infer the type 65 | fn f() *omp.promise(error{WompWomp}!usize) { 66 | return omp.task(.{}) 67 | .run(.{}, struct { 68 | // Same deal here 69 | fn f() error{WompWomp}!usize { 70 | return error.WompWomp; 71 | } 72 | }.f); 73 | } 74 | }.f); 75 | if (maybe) |pro| { 76 | defer pro.deinit(); 77 | return pro.get(); 78 | } 79 | return 0; 80 | } 81 | }.f) catch |err| switch (err) { 82 | error.WompWomp => std.debug.print("Caught an error :^(", .{}); 83 | }; 84 | 85 | std.debug.print("No errors here!". /{}); 86 | } 87 | ``` 88 | 89 | ### Return 90 | 91 | All of the directives can return values. To return something you may need to specify the `ret_reduction` parameter. 92 | 93 | > [!WARNING] 94 | > The promises that are returned from the `task` directive will be heap allocated. So make sure to deinit() them! 95 | 96 | ### Errors 97 | 98 | All of the directive can return error types. 99 | > [!WARNING] 100 | > Returning more than one type of error from a directive it's clearly a race condition! 101 | 102 | ## Goal 103 | 104 | The goal of this library is to provide at least OpenMP 4.5 to zig and be production ready, along with the mentioned extensions. 105 | -------------------------------------------------------------------------------- /tests/return.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp"); 3 | const params = @import("params.zig"); 4 | 5 | fn test_omp_parallel_return() !bool { 6 | const sum = omp.parallel(.{ 7 | .ret_reduction = .plus, 8 | }).run(.{}, struct { 9 | fn f() usize { 10 | return 1; 11 | } 12 | }.f); 13 | 14 | return omp.get_max_threads() == sum; 15 | } 16 | 17 | test "parallel_return" { 18 | var num_failed: u32 = 0; 19 | if (!try test_omp_parallel_return()) { 20 | num_failed += 1; 21 | } 22 | 23 | try std.testing.expect(num_failed == 0); 24 | } 25 | 26 | fn test_omp_single_return() !bool { 27 | const result = omp.parallel(.{ .ret_reduction = .plus }) 28 | .run(.{}, struct { 29 | fn f() usize { 30 | const maybe = omp.single() 31 | .run(.{}, struct { 32 | fn f() usize { 33 | return 1; 34 | } 35 | }.f); 36 | if (maybe) |r| { 37 | return r; 38 | } 39 | return 0; 40 | } 41 | }.f); 42 | 43 | return result == 1; 44 | } 45 | 46 | test "single_return" { 47 | if (omp.get_max_threads() < 2) { 48 | omp.set_num_threads(8); 49 | } 50 | 51 | var num_failed: u32 = 0; 52 | for (0..params.repetitions) |_| { 53 | if (!try test_omp_single_return()) { 54 | num_failed += 1; 55 | } 56 | } 57 | 58 | try std.testing.expect(num_failed == 0); 59 | } 60 | 61 | fn test_omp_task_return() !bool { 62 | const result = omp.parallel(.{ .ret_reduction = .plus }) 63 | .run(.{}, struct { 64 | fn f() usize { 65 | const maybe = omp.single() 66 | .run(.{}, struct { 67 | fn f() *omp.promise(usize) { 68 | return omp.task(.{}) 69 | .run(.{}, struct { 70 | fn f() usize { 71 | return 1; 72 | } 73 | }.f); 74 | } 75 | }.f); 76 | if (maybe) |pro| { 77 | defer pro.deinit(); 78 | return pro.get(); 79 | } 80 | return 0; 81 | } 82 | }.f); 83 | 84 | return result == 1; 85 | } 86 | 87 | test "task_return" { 88 | if (omp.get_max_threads() < 2) { 89 | omp.set_num_threads(8); 90 | } 91 | 92 | var num_failed: u32 = 0; 93 | for (0..params.repetitions) |_| { 94 | if (!try test_omp_task_return()) { 95 | num_failed += 1; 96 | } 97 | } 98 | 99 | try std.testing.expect(num_failed == 0); 100 | } 101 | 102 | fn test_omp_loop_return() !bool { 103 | const res = omp.parallel(.{ .ret_reduction = .plus }) 104 | .run(.{}, struct { 105 | fn f() usize { 106 | const a = omp.loop(u32, .{ .ret_reduction = .plus }) 107 | .run(.{}, 0, params.loop_count, 1, struct { 108 | fn f(i: u32) usize { 109 | _ = i; 110 | return 1; 111 | } 112 | }.f); 113 | return a; 114 | } 115 | }.f); 116 | 117 | return params.loop_count * omp.get_max_threads() == res; 118 | } 119 | 120 | test "loop_return" { 121 | if (omp.get_max_threads() < 2) { 122 | omp.set_num_threads(8); 123 | } 124 | 125 | var num_failed: u32 = 0; 126 | for (0..1) |_| { 127 | if (!try test_omp_loop_return()) { 128 | num_failed += 1; 129 | } 130 | } 131 | 132 | try std.testing.expect(num_failed == 0); 133 | } 134 | -------------------------------------------------------------------------------- /tests/errors.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp"); 3 | const params = @import("params.zig"); 4 | 5 | fn test_omp_parallel_error() !bool { 6 | _ = omp.parallel(.{ 7 | .ret_reduction = .plus, 8 | }).run(.{}, struct { 9 | fn f() !usize { 10 | if (omp.get_thread_num() % 2 == 0) { 11 | return error.WompWomp; 12 | } else { 13 | return 1; 14 | } 15 | } 16 | }.f) catch |err| switch (err) { 17 | error.WompWomp => return true, 18 | else => return false, 19 | }; 20 | 21 | return false; 22 | } 23 | 24 | test "parallel_error" { 25 | omp.set_num_threads(8); 26 | var num_failed: u32 = 0; 27 | for (0..params.repetitions * 100) |_| { 28 | if (!try test_omp_parallel_error()) { 29 | num_failed += 1; 30 | } 31 | } 32 | 33 | try std.testing.expect(num_failed == 0); 34 | } 35 | 36 | fn test_omp_single_error() !bool { 37 | _ = omp.parallel(.{ .ret_reduction = .plus }) 38 | .run(.{}, struct { 39 | fn f() !usize { 40 | const maybe = omp.single() 41 | .run(.{}, struct { 42 | fn f() !usize { 43 | return error.WompWomp; 44 | } 45 | }.f); 46 | if (maybe) |r| { 47 | return r; 48 | } 49 | return 0; 50 | } 51 | }.f) catch |err| switch (err) { 52 | error.WompWomp => return true, 53 | }; 54 | 55 | return false; 56 | } 57 | 58 | test "single_error" { 59 | if (omp.get_max_threads() < 2) { 60 | omp.set_num_threads(8); 61 | } 62 | 63 | var num_failed: u32 = 0; 64 | for (0..params.repetitions * 100) |_| { 65 | if (!try test_omp_single_error()) { 66 | num_failed += 1; 67 | } 68 | } 69 | 70 | try std.testing.expect(num_failed == 0); 71 | } 72 | 73 | fn test_omp_task_error() !bool { 74 | _ = omp.parallel(.{ .ret_reduction = .plus }) 75 | .run(.{}, struct { 76 | fn f() !usize { 77 | const maybe = omp.single() 78 | .run(.{}, struct { 79 | fn f() *omp.promise(error{WompWomp}!usize) { 80 | return omp.task(.{}) 81 | .run(.{}, struct { 82 | fn f() error{WompWomp}!usize { 83 | return error.WompWomp; 84 | } 85 | }.f); 86 | } 87 | }.f); 88 | if (maybe) |pro| { 89 | defer pro.deinit(); 90 | return pro.get(); 91 | } 92 | return 0; 93 | } 94 | }.f) catch |err| switch (err) { 95 | error.WompWomp => return true, 96 | }; 97 | 98 | return false; 99 | } 100 | 101 | test "task_error" { 102 | if (omp.get_max_threads() < 2) { 103 | omp.set_num_threads(8); 104 | } 105 | 106 | var num_failed: u32 = 0; 107 | for (0..params.repetitions) |_| { 108 | if (!try test_omp_task_error()) { 109 | num_failed += 1; 110 | } 111 | } 112 | 113 | try std.testing.expect(num_failed == 0); 114 | } 115 | 116 | fn test_omp_loop_error() !bool { 117 | _ = omp.parallel(.{ .ret_reduction = .plus }) 118 | .run(.{}, struct { 119 | fn f() !usize { 120 | const a = omp.loop(u32, .{ .ret_reduction = .plus }) 121 | .run(.{}, 0, params.loop_count, 1, struct { 122 | fn f(i: u32) error{WompWomp}!usize { 123 | _ = i; 124 | return error.WompWomp; 125 | } 126 | }.f); 127 | return a; 128 | } 129 | }.f) catch |err| switch (err) { 130 | error.WompWomp => return true, 131 | }; 132 | 133 | return false; 134 | } 135 | 136 | test "loop_error" { 137 | if (omp.get_max_threads() < 2) { 138 | omp.set_num_threads(8); 139 | } 140 | 141 | var num_failed: u32 = 0; 142 | for (0..1) |_| { 143 | if (!try test_omp_loop_error()) { 144 | num_failed += 1; 145 | } 146 | } 147 | 148 | try std.testing.expect(num_failed == 0); 149 | } 150 | -------------------------------------------------------------------------------- /src/workshare_env.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const reduce = @import("reduce.zig"); 3 | const kmp = @import("kmp.zig"); 4 | const in = @import("input_handler.zig"); 5 | 6 | pub const options = struct { 7 | return_optional: bool, 8 | do_copy: bool, 9 | is_omp_func: bool = false, 10 | }; 11 | 12 | pub inline fn make( 13 | comptime red: []const reduce.operators, 14 | comptime f: anytype, 15 | comptime ret_t: type, 16 | comptime opts: options, 17 | ) type { 18 | return struct { 19 | const static = struct { 20 | var lck: kmp.critical_name_t = @bitCast([_]u8{0} ** 32); 21 | }; 22 | 23 | pub inline fn run( 24 | pre: anytype, 25 | args: anytype, 26 | post: anytype, 27 | ret_reduction: *ret_t, 28 | ) if (opts.return_optional) ?ret_t else ret_t { 29 | const private_copy = if (opts.do_copy) in.make_another(args.private) else args.private; 30 | const firstprivate_copy = if (opts.do_copy) in.shallow_copy(args.firstprivate) else args.firstprivate; 31 | const reduction_copy = if (opts.do_copy) in.shallow_copy(args.reduction) else args.reduction; 32 | const true_args = pre ++ brk: { 33 | const r = if (opts.do_copy) 34 | args.shared ++ private_copy ++ firstprivate_copy ++ reduction_copy 35 | else 36 | .{args}; 37 | 38 | break :brk if (opts.is_omp_func) r else .{r}; 39 | } ++ post; 40 | 41 | const ret = @call(.always_inline, f, true_args); 42 | 43 | const id: kmp.ident_t = .{ 44 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), 45 | .psource = "parallel" ++ @typeName(@TypeOf(f)), 46 | }; 47 | 48 | const no_err_ret_t = in.no_error(ret_t); 49 | 50 | if (red.len > 0 or no_err_ret_t != void) { 51 | if (no_err_ret_t != void) { 52 | // If it's an error_union AND we caught an error just reduce the other parameters that need to be reduced. 53 | // This has to happen since once a reduce starts, every thread needs to call the proper kmp function calls 54 | // to signal to OMP that the reduce actually happened. 55 | // 56 | // Also apparently there needs to be the same memory structure for all the reduce args, so we just pass in 57 | // fake data that won't do anything 58 | var ret_no_err = if (@typeInfo(ret_t) == .ErrorUnion) ret catch |err| { 59 | var tmp: no_err_ret_t = undefined; 60 | var tmp2: no_err_ret_t = undefined; 61 | 62 | const reduce_args = if (no_err_ret_t == void) reduction_copy else reduction_copy ++ .{&tmp2}; 63 | const reduce_dest = if (no_err_ret_t == void) args.reduction else args.reduction ++ .{&tmp}; 64 | _ = reduce.reduce(&id, true, reduce_dest, reduce_args, red[0 .. red.len - 1] ++ .{.id}, &static.lck); 65 | 66 | ret_reduction.* = err; 67 | return ret_reduction.*; 68 | } else ret; 69 | 70 | // If an error didn't occur then we can just append the return_reduce parameter to the end and proceed normally 71 | var tmp: no_err_ret_t = if (@typeInfo(ret_t) != .ErrorUnion) ret_reduction.* else ret_reduction.* catch unreachable; 72 | const reduce_args = if (no_err_ret_t == void) reduction_copy else reduction_copy ++ .{&ret_no_err}; 73 | const reduce_dest = if (no_err_ret_t == void) args.reduction else args.reduction ++ .{&tmp}; 74 | const has_result = reduce.reduce(&id, true, reduce_dest, reduce_args, red, &static.lck); 75 | 76 | if (has_result > 0) { 77 | ret_reduction.* = tmp; 78 | return ret_reduction.*; 79 | } 80 | } else { 81 | const has_result = reduce.reduce(&id, true, args.reduction, reduction_copy, red, &static.lck); 82 | if (has_result > 0) { 83 | return ret_reduction.*; 84 | } 85 | } 86 | } 87 | 88 | if (ret_t != void) { 89 | if (opts.return_optional) { 90 | return null; 91 | } 92 | return ret; 93 | } 94 | } 95 | }; 96 | } 97 | -------------------------------------------------------------------------------- /tests/sections.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp"); 3 | const params = @import("params.zig"); 4 | 5 | fn test_omp_sections_default() !bool { 6 | var sum: u32 = 7; 7 | const known_sum: u32 = @as(u32, (params.loop_count * (params.loop_count - 1)) / 2) + sum; 8 | 9 | omp.parallel(.{}) 10 | .run(.{ .shared = .{&sum} }, struct { 11 | fn f(f_sum: *u32) void { 12 | var mysum: u32 = 0; 13 | var i: u32 = 0; 14 | const summer = struct { 15 | fn f(s: *u32, ms: *u32) void { 16 | s.* += ms.*; 17 | } 18 | }.f; 19 | 20 | omp.sections(.{}) 21 | .run(.{ .shared = .{f_sum}, .firstprivate = .{ &mysum, &i } }, .{ 22 | &struct { 23 | fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void { 24 | ff_i.* = 1; 25 | while (ff_i.* < 400) : (ff_i.* += 1) { 26 | ff_mysum.* += ff_i.*; 27 | } 28 | omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer); 29 | } 30 | }.section, 31 | &struct { 32 | fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void { 33 | ff_i.* = 400; 34 | while (ff_i.* < 700) : (ff_i.* += 1) { 35 | ff_mysum.* += ff_i.*; 36 | } 37 | omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer); 38 | } 39 | }.section, 40 | &struct { 41 | fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void { 42 | ff_i.* = 700; 43 | while (ff_i.* < 1000) : (ff_i.* += 1) { 44 | ff_mysum.* += ff_i.*; 45 | } 46 | omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer); 47 | } 48 | }.section, 49 | }); 50 | } 51 | }.f); 52 | 53 | if (known_sum != sum) { 54 | std.debug.print("KNOWN_SUM = {}\n", .{known_sum}); 55 | std.debug.print("SUM = {}\n", .{sum}); 56 | } 57 | 58 | return known_sum == sum; 59 | } 60 | 61 | test "sections_default" { 62 | var num_failed: u32 = 0; 63 | for (0..params.repetitions) |_| { 64 | if (!try test_omp_sections_default()) { 65 | num_failed += 1; 66 | } 67 | } 68 | 69 | try std.testing.expect(num_failed == 0); 70 | } 71 | 72 | fn test_omp_parallel_sections_default() !bool { 73 | var sum: u32 = 7; 74 | const known_sum: u32 = @as(u32, (params.loop_count * (params.loop_count - 1)) / 2) + sum; 75 | 76 | var mysum: u32 = 0; 77 | var i: u32 = 0; 78 | const summer = struct { 79 | fn f(s: *u32, ms: *u32) void { 80 | s.* += ms.*; 81 | } 82 | }.f; 83 | 84 | omp.parallel(.{}) 85 | .sections(.{}) 86 | .run(.{ .shared = .{&sum}, .firstprivate = .{ &mysum, &i } }, .{ 87 | &struct { 88 | fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void { 89 | ff_i.* = 1; 90 | while (ff_i.* < 400) : (ff_i.* += 1) { 91 | ff_mysum.* += ff_i.*; 92 | } 93 | omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer); 94 | } 95 | }.section, 96 | &struct { 97 | fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void { 98 | ff_i.* = 400; 99 | while (ff_i.* < 700) : (ff_i.* += 1) { 100 | ff_mysum.* += ff_i.*; 101 | } 102 | omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer); 103 | } 104 | }.section, 105 | &struct { 106 | fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void { 107 | ff_i.* = 700; 108 | while (ff_i.* < 1000) : (ff_i.* += 1) { 109 | ff_mysum.* += ff_i.*; 110 | } 111 | omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer); 112 | } 113 | }.section, 114 | }); 115 | 116 | if (known_sum != sum) { 117 | std.debug.print("KNOWN_SUM = {}\n", .{known_sum}); 118 | std.debug.print("SUM = {}\n", .{sum}); 119 | } 120 | 121 | return known_sum == sum; 122 | } 123 | 124 | test "parallel_sections_default" { 125 | var num_failed: u32 = 0; 126 | for (0..params.repetitions) |_| { 127 | if (!try test_omp_parallel_sections_default()) { 128 | num_failed += 1; 129 | } 130 | } 131 | 132 | try std.testing.expect(num_failed == 0); 133 | } 134 | -------------------------------------------------------------------------------- /tests/parallel.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp"); 3 | const params = @import("params.zig"); 4 | 5 | fn test_omp_parallel_default() !bool { 6 | var sum: u32 = 0; 7 | var mysum: u32 = 0; 8 | const known_sum: u32 = (params.loop_count * (params.loop_count + 1)) / 2; 9 | 10 | omp.parallel(.{}) 11 | .run(.{ .shared = .{&sum}, .private = .{&mysum} }, struct { 12 | fn f(f_sum: *u32, f_mysum: *u32) void { 13 | f_mysum.* = 0; 14 | omp.loop(u32, .{}) 15 | .run(.{ .shared = .{f_mysum} }, 0, params.loop_count + 1, 1, struct { 16 | fn f(i: u32, ff_mysum: *u32) void { 17 | ff_mysum.* += i; 18 | } 19 | }.f); 20 | 21 | omp.critical(.{}) 22 | .run(.{ f_sum, f_mysum.* }, struct { 23 | fn f(ff_sum: *u32, ff_mysum: u32) void { 24 | ff_sum.* += ff_mysum; 25 | } 26 | }.f); 27 | } 28 | }.f); 29 | 30 | if (known_sum != sum) { 31 | std.debug.print("KNOWN_SUM = {}\n", .{known_sum}); 32 | std.debug.print("SUM = {}\n", .{sum}); 33 | } 34 | 35 | try std.testing.expect(mysum == 0); 36 | 37 | return known_sum == sum; 38 | } 39 | 40 | test "parallel_default" { 41 | var num_failed: u32 = 0; 42 | for (0..params.repetitions) |_| { 43 | if (!try test_omp_parallel_default()) { 44 | num_failed += 1; 45 | } 46 | } 47 | 48 | try std.testing.expect(num_failed == 0); 49 | } 50 | 51 | fn test_omp_parallel_if() !bool { 52 | var sum: u32 = 0; 53 | var mysum: u32 = 0; 54 | const control: u32 = 1; 55 | const known_sum: u32 = (params.loop_count * (params.loop_count + 1)) / 2; 56 | 57 | omp.parallel(.{ .iff = true }) 58 | .run(.{ .shared = .{&sum}, .private = .{&mysum} }, control == 0, struct { 59 | fn f(f_sum: *u32, f_mysum: *u32) void { 60 | f_mysum.* = 0; 61 | for (0..params.loop_count + 1) |i| { 62 | f_mysum.* += @as(u32, @intCast(i)); 63 | } 64 | 65 | omp.critical(.{}) 66 | .run(.{ f_sum, f_mysum.* }, struct { 67 | fn f(ff_sum: *u32, ff_mysum: u32) void { 68 | ff_sum.* += ff_mysum; 69 | } 70 | }.f); 71 | } 72 | }.f); 73 | 74 | if (known_sum != sum) { 75 | std.debug.print("KNOWN_SUM = {}\n", .{known_sum}); 76 | std.debug.print("SUM = {}\n", .{sum}); 77 | } 78 | 79 | try std.testing.expect(mysum == 0); 80 | 81 | return known_sum == sum; 82 | } 83 | 84 | test "parallel_if" { 85 | var num_failed: u32 = 0; 86 | for (0..params.repetitions) |_| { 87 | if (!try test_omp_parallel_if()) { 88 | num_failed += 1; 89 | } 90 | } 91 | 92 | try std.testing.expect(num_failed == 0); 93 | } 94 | 95 | fn test_omp_parallel_nested() bool { 96 | if (omp.get_max_threads() > 4) { 97 | omp.set_num_threads(4); 98 | } else if (omp.get_max_threads() < 2) { 99 | omp.set_num_threads(2); 100 | } 101 | 102 | var counter: i32 = 0; 103 | 104 | omp.set_nested(true); 105 | omp.set_max_active_levels(omp.get_max_active_levels()); 106 | 107 | omp.parallel(.{}) 108 | .run(.{ .shared = .{&counter} }, struct { 109 | fn f(f_counter: *i32) void { 110 | omp.critical(.{}) 111 | .run(.{f_counter}, struct { 112 | fn f(ff_counter: *i32) void { 113 | ff_counter.* += 1; 114 | } 115 | }.f); 116 | 117 | omp.parallel(.{}) 118 | .run(.{ .shared = .{f_counter} }, struct { 119 | fn f(pf_counter: *i32) void { 120 | omp.critical(.{}) 121 | .run(.{pf_counter}, struct { 122 | fn f(fpf_counter: *i32) void { 123 | fpf_counter.* -= 1; 124 | } 125 | }.f); 126 | } 127 | }.f); 128 | } 129 | }.f); 130 | 131 | return counter != 0; 132 | } 133 | 134 | test "parallel_nested" { 135 | var num_failed: u32 = 0; 136 | for (0..params.repetitions) |_| { 137 | if (!test_omp_parallel_nested()) { 138 | num_failed += 1; 139 | } 140 | } 141 | 142 | try std.testing.expect(num_failed == 0); 143 | } 144 | 145 | fn test_omp_parallel_private() !bool { 146 | var sum: u32 = 0; 147 | var num_threads: u32 = 0; 148 | var sum1: u32 = 0; 149 | 150 | omp.parallel(.{}) 151 | .run(.{ .shared = .{ &sum, &num_threads }, .private = .{&sum1} }, struct { 152 | fn f(f_sum: *u32, f_num_threads: *u32, f_sum1: *u32) void { 153 | f_sum1.* = 7; 154 | 155 | omp.loop(u32, .{}) 156 | .run(.{ .shared = .{f_sum1} }, 1, 1000, 1, struct { 157 | fn f(i: u32, ff_sum1: *u32) void { 158 | ff_sum1.* += i; 159 | } 160 | }.f); 161 | 162 | omp.critical(.{}) 163 | .run(.{ f_sum, f_num_threads, f_sum1.* }, struct { 164 | fn f(ff_sum: *u32, ff_num_threads: *u32, ff_sum1: u32) void { 165 | ff_sum.* += ff_sum1; 166 | ff_num_threads.* += 1; 167 | } 168 | }.f); 169 | } 170 | }.f); 171 | 172 | const known_sum: u32 = ((999 * 1000) / 2) + (7 * num_threads); 173 | if (known_sum != sum) { 174 | std.debug.print("NUM_THREADS = {}\n", .{num_threads}); 175 | std.debug.print("KNOWN_SUM = {}\n", .{known_sum}); 176 | std.debug.print("SUM = {}\n", .{sum}); 177 | } 178 | return known_sum == sum; 179 | } 180 | 181 | test "parallel_private" { 182 | var num_failed: u32 = 0; 183 | for (0..params.repetitions) |_| { 184 | if (!try test_omp_parallel_private()) { 185 | num_failed += 1; 186 | } 187 | } 188 | 189 | try std.testing.expect(num_failed == 0); 190 | } 191 | -------------------------------------------------------------------------------- /src/input_handler.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp.zig"); 3 | 4 | fn get_field_idx(comptime T: type, comptime field_name: []const u8) u32 { 5 | return comptime brk: { 6 | var idx: u32 = 0; 7 | for (@typeInfo(T).Struct.fields) |field| { 8 | if (std.mem.eql(u8, field_name, field.name)) { 9 | break :brk idx; 10 | } 11 | idx += 1; 12 | } 13 | break :brk idx; 14 | }; 15 | } 16 | 17 | pub inline fn no_error(comptime T: type) type { 18 | comptime { 19 | const info = @typeInfo(T); 20 | if (info != .ErrorUnion) { 21 | return T; 22 | } 23 | 24 | return info.ErrorUnion.payload; 25 | } 26 | } 27 | 28 | pub fn zigc_ret(comptime f: anytype, comptime args_type: type) type { 29 | const f_type_info = @typeInfo(@TypeOf(f)); 30 | if (f_type_info != .Fn) { 31 | @compileError("Expected function with signature `fn(, ...)`, got " ++ @typeName(@TypeOf(f)) ++ " instead."); 32 | } 33 | return struct { 34 | ret: copy_ret(f) = undefined, 35 | v: args_type = undefined, 36 | }; 37 | } 38 | 39 | pub fn copy_ret(comptime f: anytype) type { 40 | const typ = @typeInfo(@TypeOf(f)); 41 | if (typ == .Fn) return typ.Fn.return_type orelse void; 42 | if (typ == .Pointer) return @typeInfo(typ.Pointer.child).Fn.return_type orelse void; 43 | @compileError("You need to provide either a function pointer or a function"); 44 | } 45 | 46 | fn normalize_type(comptime T: type) type { 47 | var param_count: u32 = 0; 48 | const fields = @typeInfo(T).Struct.fields; 49 | const shared = val: { 50 | const idx = get_field_idx(T, "shared"); 51 | if (fields.len > idx) { 52 | param_count += 1; 53 | break :val fields[idx].type; 54 | } else { 55 | break :val @TypeOf(.{}); 56 | } 57 | }; 58 | 59 | const private = val: { 60 | const idx = get_field_idx(T, "private"); 61 | if (fields.len > idx) { 62 | param_count += 1; 63 | break :val fields[idx].type; 64 | } else { 65 | break :val @TypeOf(.{}); 66 | } 67 | }; 68 | 69 | const firstprivate = val: { 70 | const idx = get_field_idx(T, "firstprivate"); 71 | if (fields.len > idx) { 72 | param_count += 1; 73 | break :val fields[idx].type; 74 | } else { 75 | break :val @TypeOf(.{}); 76 | } 77 | }; 78 | 79 | const reduction = val: { 80 | const idx = get_field_idx(T, "reduction"); 81 | if (fields.len > idx) { 82 | param_count += 1; 83 | break :val fields[idx].type; 84 | } else { 85 | break :val @TypeOf(.{}); 86 | } 87 | }; 88 | 89 | if (@typeInfo(T) != .Struct or param_count != @typeInfo(T).Struct.fields.len) { 90 | @compileError("Expected struct like .{ .shared = .{...}, .private = .{...}, firstprivate = .{...}, .reduction = {...} }, got " ++ @typeName(T) ++ " instead."); 91 | } 92 | 93 | return struct { 94 | shared: shared, 95 | private: private, 96 | firstprivate: firstprivate, 97 | reduction: reduction, 98 | }; 99 | } 100 | 101 | pub fn has_field(comptime T: type, comptime field_name: []const u8) bool { 102 | for (std.meta.fieldNames(T)) |field| { 103 | if (std.mem.eql(u8, field_name, field)) { 104 | return true; 105 | } 106 | } 107 | return false; 108 | } 109 | 110 | pub fn normalize_args(args: anytype) normalize_type(@TypeOf(args)) { 111 | const args_type = @TypeOf(args); 112 | const shared = val: { 113 | if (comptime has_field(args_type, "shared")) { 114 | break :val args.shared; 115 | } 116 | break :val .{}; 117 | }; 118 | 119 | const private = val: { 120 | if (comptime has_field(args_type, "private")) { 121 | break :val args.private; 122 | } 123 | break :val .{}; 124 | }; 125 | 126 | const firstprivate = val: { 127 | if (comptime has_field(args_type, "firstprivate")) { 128 | break :val args.firstprivate; 129 | } 130 | break :val .{}; 131 | }; 132 | 133 | const reduction = val: { 134 | if (comptime has_field(args_type, "reduction")) { 135 | break :val args.reduction; 136 | } 137 | break :val .{}; 138 | }; 139 | 140 | return .{ .shared = shared, .private = private, .firstprivate = firstprivate, .reduction = reduction }; 141 | } 142 | 143 | pub fn check_fn_signature(comptime f: anytype) void { 144 | const f_type_info = @typeInfo(@TypeOf(f)); 145 | if (f_type_info == .Fn) 146 | return; 147 | if (@typeInfo(f_type_info.Pointer.child) == .Fn) { 148 | return; 149 | } 150 | @compileError("Expected function with signature `fn(, ...)`, got " ++ @typeName(@TypeOf(f)) ++ " instead."); 151 | } 152 | 153 | pub fn check_args(comptime T: type) void { 154 | const args_type_info = @typeInfo(T); 155 | if (args_type_info != .Struct) { 156 | @compileError("Expected struct or tuple, got " ++ @typeName(T) ++ " instead."); 157 | } 158 | } 159 | 160 | pub fn deep_size_of(comptime T: type) usize { 161 | var size: usize = @sizeOf(T); 162 | inline for (@typeInfo(T).Struct.fields) |field| { 163 | if (@typeInfo(field.type) == .Pointer) { 164 | size += @sizeOf(@typeInfo(field.type).Pointer.child); 165 | } 166 | } 167 | return size; 168 | } 169 | 170 | /// Shallow copy a struct with pointers 171 | /// This function will copy the struct and all the pointers it contains 172 | /// but it won't go more than one level deep 173 | /// 174 | /// WARNING: This function may be not memory safe if it doesn't get inlined 175 | pub inline fn shallow_copy(original: anytype) @TypeOf(original) { 176 | var copy: @TypeOf(original) = .{} ++ original; 177 | inline for (original, ©) |og, *v| { 178 | if (@typeInfo(@TypeOf(og)) == .Pointer) { 179 | var tmp = og.*; 180 | v.* = &tmp; 181 | } else { 182 | v.* = og; 183 | } 184 | } 185 | return copy; 186 | } 187 | 188 | /// Make another struct with the same fields as the original, but all values are set to undefined 189 | pub inline fn make_another(original: anytype) @TypeOf(original) { 190 | var copy: @TypeOf(original) = .{} ++ original; 191 | inline for (original, ©) |og, *v| { 192 | if (@typeInfo(@TypeOf(og)) == .Pointer) { 193 | var tmp: @TypeOf(og.*) = undefined; 194 | v.* = &tmp; 195 | } else { 196 | v.* = undefined; 197 | } 198 | } 199 | 200 | return copy; 201 | } 202 | -------------------------------------------------------------------------------- /tests/task.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp"); 3 | const params = @import("params.zig"); 4 | 5 | fn test_omp_task_default() !bool { 6 | var tids = [_]u32{0} ** params.num_tasks; 7 | 8 | omp.parallel(.{}) 9 | .run(.{ .shared = .{&tids} }, struct { 10 | fn f(f_tids: *[params.num_tasks]u32) void { 11 | omp.single() 12 | .run(.{f_tids}, struct { 13 | fn f(ff_tids: *[params.num_tasks]u32) void { 14 | for (0..params.num_tasks) |i| { 15 | // First we have to store the value of the loop index in a new variable 16 | // which will be private for each task because otherwise it will be overwritten 17 | // if the execution of the task takes longer than the time which is needed to 18 | // enter the next step of the loop! 19 | 20 | const myi = i; 21 | omp.task(.{}).run(.{ .shared = .{ff_tids}, .firstprivate = .{myi} }, struct { 22 | fn f(fff_tids: *[params.num_tasks]u32, f_myi: usize) void { 23 | std.time.sleep(params.sleep_time); 24 | fff_tids[f_myi] = omp.get_thread_num(); 25 | } 26 | }.f); 27 | } 28 | } 29 | }.f); 30 | } 31 | }.f); 32 | 33 | var uses_only_one_thread = true; 34 | for (tids) |t| { 35 | uses_only_one_thread = uses_only_one_thread and t == tids[0]; 36 | } 37 | 38 | try std.testing.expect(!uses_only_one_thread); 39 | 40 | return true; 41 | } 42 | 43 | test "task_default" { 44 | if (omp.get_max_threads() < 2) { 45 | omp.set_num_threads(8); 46 | } 47 | 48 | var num_failed: u32 = 0; 49 | for (0..params.repetitions) |_| { 50 | if (!try test_omp_task_default()) { 51 | num_failed += 1; 52 | } 53 | } 54 | 55 | try std.testing.expect(num_failed == 0); 56 | } 57 | 58 | fn test_omp_task_if() !bool { 59 | var count: usize = 0; 60 | var result: usize = 0; 61 | 62 | omp.parallel(.{}) 63 | .run(.{ .shared = .{ &count, &result } }, struct { 64 | fn f(f_count: *usize, f_result: *usize) void { 65 | omp.single() 66 | .run(.{ f_count, f_result }, struct { 67 | fn f(ff_count: *usize, ff_result: *usize) void { 68 | // Try to see if the if makes it so that the task is never deferred 69 | // to another thread, in fact the critical block below must wait the sleep to run 70 | omp.task(.{ .iff = true }).run(false, .{ .shared = .{ ff_count, ff_result } }, struct { 71 | fn f(fff_count: *usize, fff_result: *usize) void { 72 | std.time.sleep(params.sleep_time); 73 | omp.critical(.{}).run(.{ fff_count, fff_result }, struct { 74 | fn f(_count: *usize, _result: *usize) void { 75 | _result.* = if (_count.* == 0) 1 else 0; 76 | } 77 | }.f); 78 | } 79 | }.f); 80 | 81 | // Now that the task is finished we can update the count 82 | omp.critical(.{}).run(.{ff_count}, struct { 83 | fn f(_count: *usize) void { 84 | _count.* = 1; 85 | } 86 | }.f); 87 | } 88 | }.f); 89 | } 90 | }.f); 91 | 92 | return result == 1; 93 | } 94 | 95 | test "task_if" { 96 | if (omp.get_max_threads() < 2) { 97 | omp.set_num_threads(8); 98 | } 99 | 100 | var num_failed: u32 = 0; 101 | for (0..params.repetitions) |_| { 102 | if (!try test_omp_task_if()) { 103 | num_failed += 1; 104 | } 105 | } 106 | 107 | try std.testing.expect(num_failed == 0); 108 | } 109 | 110 | fn test_omp_task_result() !bool { 111 | const t_type = *[params.num_tasks]u32; 112 | var tids = [_]u32{0} ** params.num_tasks; 113 | var include_tids = [_]u32{0} ** params.num_tasks; 114 | var err: usize = 0; 115 | 116 | omp.parallel(.{}) 117 | .run(.{ .shared = .{ &tids, &include_tids } }, struct { 118 | fn f(f_tids: t_type, f_inctids: t_type) void { 119 | omp.single() 120 | .run(.{ f_tids, f_inctids }, struct { 121 | fn f(ff_tids: t_type, ff_inctids: t_type) void { 122 | for (0..params.num_tasks) |i| { 123 | // First we have to store the value of the loop index in a new variable 124 | // which will be private for each task because otherwise it will be overwritten 125 | // if the execution of the task takes longer than the time which is needed to 126 | // enter the next step of the loop! 127 | 128 | const myi = i; 129 | omp.task(.{ .final = true }) 130 | .run(i >= 5, .{ .shared = .{ ff_tids, ff_inctids }, .firstprivate = .{myi} }, struct { 131 | fn f(fff_tids: t_type, fff_inctids: t_type, f_myi: usize) void { 132 | fff_tids[f_myi] = omp.get_thread_num(); 133 | 134 | if (f_myi >= 5) { 135 | const included = f_myi; 136 | 137 | omp.task(.{}) 138 | .run(.{ .shared = .{fff_inctids}, .firstprivate = .{included} }, struct { 139 | fn f(_inctids: t_type, f_included: usize) void { 140 | std.time.sleep(params.sleep_time); 141 | _inctids[f_included] = omp.get_thread_num(); 142 | } 143 | }.f); 144 | 145 | std.time.sleep(params.sleep_time); 146 | } 147 | } 148 | }.f); 149 | } 150 | } 151 | }.f); 152 | } 153 | }.f); 154 | 155 | // Now we ckeck if more than one thread executed the final task and its included task. 156 | for (5..params.num_tasks) |t| { 157 | if (include_tids[t] != tids[t]) { 158 | err += 1; 159 | } 160 | } 161 | 162 | return err == 0; 163 | } 164 | 165 | test "task_result" { 166 | if (omp.get_max_threads() < 2) { 167 | omp.set_num_threads(8); 168 | } 169 | 170 | var num_failed: u32 = 0; 171 | for (0..params.repetitions) |_| { 172 | if (!try test_omp_task_result()) { 173 | num_failed += 1; 174 | } 175 | } 176 | 177 | try std.testing.expect(num_failed == 0); 178 | } 179 | -------------------------------------------------------------------------------- /src/reduce.zig: -------------------------------------------------------------------------------- 1 | const kmp = @import("kmp.zig"); 2 | const std = @import("std"); 3 | 4 | pub const operators = enum(c_int) { 5 | plus = 0, 6 | mult = 1, 7 | minus = 2, 8 | bitwise_and = 3, 9 | bitwise_or = 4, 10 | bitwise_xor = 5, 11 | logical_and = 6, 12 | logical_or = 7, 13 | max = 8, 14 | min = 9, 15 | none = 10, 16 | id = 11, 17 | custom = 12, 18 | }; 19 | 20 | pub inline fn reduce( 21 | comptime id: *const kmp.ident_t, 22 | comptime nowait: bool, 23 | out_reduction: anytype, 24 | copies: @TypeOf(out_reduction), 25 | comptime ops: []const operators, 26 | lck: *kmp.critical_name_t, 27 | ) c_int { 28 | const reduction_funcs = create(@typeInfo(@TypeOf(out_reduction)).Struct.fields, ops); 29 | const kmpc_reduce = if (nowait) 30 | kmp.reduce_nowait 31 | else 32 | kmp.reduce; 33 | 34 | const num_vars = copies.len; 35 | const reduce_size = @sizeOf(@TypeOf(out_reduction)); 36 | 37 | const has_data = kmpc_reduce( 38 | id, 39 | kmp.ctx.global_tid, 40 | num_vars, 41 | reduce_size, 42 | @ptrCast(@constCast(&copies)), 43 | reduction_funcs.for_omp, 44 | lck, 45 | ); 46 | 47 | switch (has_data) { 48 | 1 => { 49 | reduction_funcs.finalize(out_reduction, copies); 50 | const end_id = comptime .{ 51 | .flags = id.*.flags, 52 | .psource = id.*.psource, 53 | .reserved_3 = 0x1c, 54 | }; 55 | kmp.end_reduce_nowait(&end_id, kmp.ctx.global_tid, lck); 56 | }, 57 | 2 => { 58 | reduction_funcs.finalize_atomic(out_reduction, copies); 59 | }, 60 | else => {}, 61 | } 62 | 63 | return has_data; 64 | } 65 | 66 | pub inline fn create( 67 | comptime types: []const std.builtin.Type.StructField, 68 | comptime reduce_operators: []const operators, 69 | ) type { 70 | if (types.len != reduce_operators.len) { 71 | @compileError("The number of types and operators must match"); 72 | } 73 | 74 | return struct { 75 | pub inline fn finalize( 76 | lhs: anytype, 77 | rhs: @TypeOf(lhs), 78 | ) void { 79 | inline for (lhs, rhs) |l, r| { 80 | inline for (reduce_operators) |op| { 81 | switch (op) { 82 | .plus => { 83 | l.* += r.*; 84 | }, 85 | .mult => { 86 | l.* *= r.*; 87 | }, 88 | .minus => { 89 | l.* -= r.*; 90 | }, 91 | .bitwise_and => { 92 | l.* &= r.*; 93 | }, 94 | .bitwise_or => { 95 | l.* |= r.*; 96 | }, 97 | .bitwise_xor => { 98 | l.* ^= r.*; 99 | }, 100 | .logical_and => { 101 | l.* = l.* and r.*; 102 | }, 103 | .logical_or => { 104 | l.* = l.* or r.*; 105 | }, 106 | .max => { 107 | l.* = @max(l.*, r.*); 108 | }, 109 | .min => { 110 | l.* = @min(l.*, r.*); 111 | }, 112 | .id => {}, 113 | .custom => l.reduce(r.*), 114 | .none => { 115 | @compileError("Specify the reduction operator"); 116 | }, 117 | } 118 | } 119 | } 120 | } 121 | 122 | pub inline fn single( 123 | lhs: anytype, 124 | rhs: @TypeOf(lhs.*), 125 | ) void { 126 | var l, const r = ret: { 127 | if (@typeInfo(@TypeOf(lhs.*)) == .ErrorUnion) { 128 | // Here we have to unwrap the error union to find out if there is an error, if there is just put it in 129 | // the left part since that is the one that will aggregate the data. This will propagate to all the 130 | // other computations since it will keep checking the left side. 131 | // 132 | // TODO: Find a more efficient way to do this, right now I don't think we can short the computation 133 | // and return the error directly, but if we can it would be better 134 | const t = lhs.* catch return; 135 | const t1 = rhs catch { 136 | lhs.* = rhs; 137 | return; 138 | }; 139 | 140 | break :ret .{ &t, t1 }; 141 | } else { 142 | break :ret .{ lhs, rhs }; 143 | } 144 | }; 145 | 146 | // @compileLog(l); 147 | inline for (reduce_operators) |op| { 148 | switch (op) { 149 | .plus => { 150 | l.* += r; 151 | }, 152 | .mult => { 153 | l.* *= r; 154 | }, 155 | .minus => { 156 | l.* -= r; 157 | }, 158 | .bitwise_and => { 159 | l.* &= r; 160 | }, 161 | .bitwise_or => { 162 | l.* |= r; 163 | }, 164 | .bitwise_xor => { 165 | l.* ^= r; 166 | }, 167 | .logical_and => { 168 | l.* = l.* and r; 169 | }, 170 | .logical_or => { 171 | l.* = l.* or r; 172 | }, 173 | .max => { 174 | l.* = @max(l.*, r); 175 | }, 176 | .min => { 177 | l.* = @min(l.*, r); 178 | }, 179 | .custom => l.reduce(r.*), 180 | .id => {}, 181 | .none => {}, 182 | } 183 | } 184 | } 185 | pub inline fn finalize_atomic( 186 | lhs: anytype, 187 | rhs: @TypeOf(lhs), 188 | ) void { 189 | inline for (lhs, rhs) |l, r| { 190 | inline for (reduce_operators, types) |op, type_field| { 191 | const T = @typeInfo(type_field.type).Pointer.child; 192 | switch (op) { 193 | .plus => { 194 | _ = @atomicRmw(T, l, .Add, r.*, .acq_rel); 195 | }, 196 | .mult => { 197 | _ = @atomicRmw(T, l, .Mul, r.*, .acq_rel); 198 | }, 199 | .minus => { 200 | _ = @atomicRmw(T, l, .Sub, r.*, .acq_rel); 201 | }, 202 | .bitwise_and => { 203 | _ = @atomicRmw(T, l, .And, r.*, .acq_rel); 204 | }, 205 | .bitwise_or => { 206 | _ = @atomicRmw(T, l, .Or, r.*, .acq_rel); 207 | }, 208 | .bitwise_xor => { 209 | _ = @atomicRmw(T, l, .Xor, r.*, .acq_rel); 210 | }, 211 | .logical_and => { 212 | _ = @atomicRmw(T, l, .And, r.*, .acq_rel); 213 | }, 214 | .logical_or => { 215 | _ = @atomicRmw(T, l, .Or, r.*, .acq_rel); 216 | }, 217 | .max => { 218 | _ = @atomicRmw(T, l, .Max, r.*, .acq_rel); 219 | }, 220 | .min => { 221 | _ = @atomicRmw(T, l, .Min, r.*, .acq_rel); 222 | }, 223 | .custom => l.atomic_reduce(r.*), 224 | .id => {}, 225 | .none => { 226 | @compileError("Specify the reduction operator"); 227 | }, 228 | } 229 | } 230 | } 231 | } 232 | 233 | fn for_omp( 234 | lhs: *anyopaque, 235 | rhs: *anyopaque, 236 | ) callconv(.C) void { 237 | inline for (reduce_operators, types) |op, T| { 238 | switch (op) { 239 | .plus => { 240 | const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*; 241 | l.* += @as(*T.type, @ptrCast(@alignCast(rhs))).*.*; 242 | }, 243 | .mult => { 244 | const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*; 245 | l.* *= @as(*T.type, @ptrCast(@alignCast(rhs))).*.*; 246 | }, 247 | .minus => { 248 | const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*; 249 | l.* -= @as(*T.type, @ptrCast(@alignCast(rhs))).*.*; 250 | }, 251 | .bitwise_and => { 252 | const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*; 253 | l.* &= @as(*T.type, @ptrCast(@alignCast(rhs))).*.*; 254 | }, 255 | .bitwise_or => { 256 | const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*; 257 | l.* |= @as(*T.type, @ptrCast(@alignCast(rhs))).*.*; 258 | }, 259 | .bitwise_xor => { 260 | const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*; 261 | l.* ^= @as(*T.type, @ptrCast(@alignCast(rhs))).*.*; 262 | }, 263 | .logical_and => { 264 | const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*; 265 | l.* = l.* and @as(*T.type, @ptrCast(@alignCast(rhs))).*.*; 266 | }, 267 | .logical_or => { 268 | const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*; 269 | l.* = l.* or @as(*T.type, @ptrCast(@alignCast(rhs))).*.*; 270 | }, 271 | .max => { 272 | const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*; 273 | l.* = @max(l.*, @as(*T.type, @ptrCast(@alignCast(rhs))).*.*); 274 | }, 275 | .min => { 276 | const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*; 277 | l.* = @min(l.*, @as(*T.type, @ptrCast(@alignCast(rhs))).*.*); 278 | }, 279 | .custom => { 280 | const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*; 281 | l.reduce(@as(*T.type, @ptrCast(@alignCast(rhs))).*.*); 282 | }, 283 | .id => {}, 284 | .none => { 285 | @compileError("Specify the reduction operator"); 286 | }, 287 | } 288 | } 289 | } 290 | }; 291 | } 292 | -------------------------------------------------------------------------------- /src/kmp.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const omp = @import("omp.zig"); 3 | const opts = @import("build_options"); 4 | const ompt = @import("ompt.zig"); 5 | 6 | pub threadlocal var ctx: context = undefined; 7 | pub const context = struct { 8 | global_tid: c_int, 9 | bound_tid: c_int, 10 | }; 11 | 12 | pub const ident_flags = enum(c_int) { 13 | // /*! Use trampoline for internal microtasks */ 14 | IDENT_IMB = 0x01, 15 | // /*! Use c-style ident structure */ 16 | IDENT_KMPC = 0x02, 17 | // /* 0x04 is no longer used */ 18 | // /*! Entry point generated by auto-parallelization */ 19 | IDENT_AUTOPAR = 0x08, 20 | // /*! Compiler generates atomic reduction option for kmpc_reduce* */ 21 | IDENT_ATOMIC_REDUCE = 0x10, 22 | // /*! To mark a 'barrier' directive in user code */ 23 | IDENT_BARRIER_EXPL = 0x20, 24 | // /*! To Mark implicit barriers. */ 25 | // IDENT_BARRIER_IMPL_FOR = 0x0040, 26 | IDENT_BARRIER_IMPL = 0x0040, 27 | IDENT_BARRIER_IMPL_SECTIONS = 0x00C0, 28 | 29 | IDENT_BARRIER_IMPL_SINGLE = 0x0140, 30 | // IDENT_BARRIER_IMPL_MASK = 0x01C0, 31 | IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0, 32 | 33 | // /*! To mark a static loop in OMPT callbacks */ 34 | IDENT_WORK_LOOP = 0x200, 35 | // /*! To mark a sections directive in OMPT callbacks */ 36 | IDENT_WORK_SECTIONS = 0x400, 37 | // /*! To mark a distribute construct in OMPT callbacks */ 38 | IDENT_WORK_DISTRIBUTE = 0x800, 39 | // /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and 40 | // not currently used. If one day we need more bits, then we can use 41 | // an invalid combination of hints to mean that another, larger field 42 | // should be used in a different flag. */ 43 | // IDENT_ATOMIC_HINT_MASK = 0xFF0000, 44 | // IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000, 45 | // IDENT_ATOMIC_HINT_CONTENDED = 0x020000, 46 | // IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000, 47 | // IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000, 48 | // IDENT_OPENMP_SPEC_VERSION_MASK = 0xFF000000, 49 | }; 50 | 51 | pub const sched_t = enum(c_int) { 52 | StaticChunked = 33, 53 | StaticNonChunked = 34, 54 | Dynamic = 35, 55 | Guided = 36, 56 | Runtime = 37, 57 | }; 58 | 59 | pub const ident_t = extern struct { 60 | // might be used in fortran, we can just keep it 0 61 | reserved_1: c_int = 0, 62 | // flags from above 63 | flags: c_int = 0, 64 | reserved_2: c_int = 0, 65 | reserved_3: c_int = 35, 66 | psource: [*:0]const u8, 67 | }; 68 | 69 | // TODO: see this alignment because it seems strange 70 | pub const kmpc_micro_t = fn (global_tid: *c_int, bound_tid: *c_int, args: *align(@alignOf(usize)) anyopaque) callconv(.C) void; 71 | 72 | extern "omp" fn __kmpc_fork_call(name: *const ident_t, argc: c_int, fun: *const kmpc_micro_t, ...) void; 73 | pub inline fn fork_call(comptime name: *const ident_t, argc: c_int, fun: *const kmpc_micro_t, args: anytype) void { 74 | __kmpc_fork_call(name, argc, fun, args); 75 | } 76 | // it's not really variadic, so make sure to pass only one argument 77 | extern "omp" fn __kmpc_fork_call_if(name: *const ident_t, argc: c_int, fun: *const kmpc_micro_t, cond: c_int, ...) void; 78 | pub inline fn fork_call_if(comptime name: *const ident_t, argc: c_int, fun: *const kmpc_micro_t, cond: c_int, args: anytype) void { 79 | __kmpc_fork_call_if(name, argc, fun, cond, args); 80 | } 81 | 82 | extern "omp" fn __kmpc_for_static_init_4(loc: *const ident_t, gtid: c_int, schedtype: c_int, plastiter: *c_int, plower: *c_int, pupper: *c_int, pstride: *c_int, incr: c_int, chunk: c_int) void; 83 | extern "omp" fn __kmpc_for_static_init_4u(loc: *const ident_t, gtid: c_int, schedtype: c_int, plastiter: *c_int, plower: *c_uint, pupper: *c_uint, pstride: *c_int, incr: c_int, chunk: c_int) void; 84 | extern "omp" fn __kmpc_for_static_init_8(loc: *const ident_t, gtid: c_int, schedtype: c_int, plastiter: *c_int, plower: *c_long, pupper: *c_long, pstride: *c_long, incr: c_long, chunk: c_long) void; 85 | extern "omp" fn __kmpc_for_static_init_8u(loc: *const ident_t, gtid: c_int, schedtype: c_int, plastiter: *c_int, plower: *c_ulong, pupper: *c_ulong, pstride: *c_long, incr: c_long, chunk: c_long) void; 86 | pub inline fn for_static_init(comptime T: type, comptime loc: *const ident_t, gtid: c_int, schedtype: sched_t, plastiter: *c_int, plower: *T, pupper: *T, pstride: *T, incr: T, chunk: T) void { 87 | if (@typeInfo(T).Int.signedness == .signed) 88 | if (@typeInfo(T).Int.bits <= 32) { 89 | __kmpc_for_static_init_4(loc, gtid, @intFromEnum(schedtype), plastiter, @ptrCast(plower), @ptrCast(pupper), @ptrCast(pstride), @bitCast(incr), @bitCast(chunk)); 90 | } else if (@typeInfo(T).Int.bits <= 64) { 91 | __kmpc_for_static_init_8(loc, gtid, @intFromEnum(schedtype), plastiter, @ptrCast(plower), @ptrCast(pupper), @ptrCast(pstride), @bitCast(incr), @bitCast(chunk)); 92 | } else { 93 | @compileError("Unsupported integer size"); 94 | } 95 | else if (@typeInfo(T).Int.signedness == .unsigned) { 96 | if (@typeInfo(T).Int.bits <= 32) { 97 | __kmpc_for_static_init_4u(loc, gtid, @intFromEnum(schedtype), plastiter, @ptrCast(plower), @ptrCast(pupper), @ptrCast(pstride), @bitCast(incr), @bitCast(chunk)); 98 | } else if (@typeInfo(T).Int.bits <= 64) { 99 | __kmpc_for_static_init_8u(loc, gtid, @intFromEnum(schedtype), plastiter, @ptrCast(plower), @ptrCast(pupper), @ptrCast(pstride), @bitCast(incr), @bitCast(chunk)); 100 | } else { 101 | @compileError("Unsupported unsigned integer size"); 102 | } 103 | } else { 104 | unreachable; 105 | } 106 | } 107 | 108 | extern "omp" fn __kmpc_for_static_fini(loc: *const ident_t, global_tid: c_int) void; 109 | pub inline fn for_static_fini(comptime name: *const ident_t, global_tid: c_int) void { 110 | __kmpc_for_static_fini(name, global_tid); 111 | } 112 | 113 | extern "omp" fn __kmpc_dispatch_init_4(loc: *const ident_t, gtid: c_int, schedule: c_int, lb: c_int, ub: c_int, st: c_int, chunk: c_int) void; 114 | extern "omp" fn __kmpc_dispatch_init_4u(loc: *const ident_t, gtid: c_int, schedule: c_int, lb: c_uint, ub: c_uint, st: c_int, chunk: c_int) void; 115 | extern "omp" fn __kmpc_dispatch_init_8(loc: *const ident_t, gtid: c_int, schedule: c_int, lb: c_long, ub: c_long, st: c_long, chunk: c_long) void; 116 | extern "omp" fn __kmpc_dispatch_init_8u(loc: *const ident_t, gtid: c_int, schedule: c_int, lb: c_ulong, ub: c_ulong, st: c_long, chunk: c_long) void; 117 | pub inline fn dispatch_init(comptime T: type, comptime loc: *const ident_t, gtid: c_int, schedule: sched_t, lb: T, ub: T, st: T, chunk: T) void { 118 | if (@typeInfo(T).Int.signedness == .signed) { 119 | if (@typeInfo(T).Int.bits <= 32) { 120 | __kmpc_dispatch_init_4(loc, gtid, @intFromEnum(schedule), @intCast(lb), @intCast(ub), @intCast(st), @intCast(chunk)); 121 | } else if (@typeInfo(T).Int.bits <= 64) { 122 | __kmpc_dispatch_init_8(loc, gtid, @intFromEnum(schedule), @intCast(lb), @intCast(ub), @intCast(st), @intCast(chunk)); 123 | } else { 124 | @compileError("Unsupported integer size"); 125 | } 126 | } else if (@typeInfo(T).Int.signedness == .unsigned) { 127 | if (@typeInfo(T).Int.bits <= 32) { 128 | __kmpc_dispatch_init_4u(loc, gtid, @intFromEnum(schedule), @intCast(lb), @intCast(ub), @intCast(st), @intCast(chunk)); 129 | } else if (@typeInfo(T).Int.bits <= 64) { 130 | __kmpc_dispatch_init_8u(loc, gtid, @intFromEnum(schedule), @intCast(lb), @intCast(ub), @intCast(st), @intCast(chunk)); 131 | } else { 132 | @compileError("Unsupported unsigned integer size"); 133 | } 134 | } else { 135 | unreachable; 136 | } 137 | } 138 | 139 | extern "omp" fn __kmpc_dispatch_next_4(loc: *const ident_t, gtid: c_int, p_last: *c_int, p_lb: *c_int, p_ub: *c_int, p_st: *c_int) c_int; 140 | extern "omp" fn __kmpc_dispatch_next_4u(loc: *const ident_t, gtid: c_int, p_last: *c_int, p_lb: *c_uint, p_ub: *c_uint, p_st: *c_int) c_int; 141 | extern "omp" fn __kmpc_dispatch_next_8(loc: *const ident_t, gtid: c_int, p_last: *c_int, p_lb: *c_long, p_ub: *c_long, p_st: *c_long) c_int; 142 | extern "omp" fn __kmpc_dispatch_next_8u(loc: *const ident_t, gtid: c_int, p_last: *c_int, p_lb: *c_ulong, p_ub: *c_ulong, p_st: *c_long) c_int; 143 | pub inline fn dispatch_next(comptime T: type, comptime loc: *const ident_t, gtid: c_int, p_last: *c_int, p_lb: *T, p_ub: *T, p_st: *T) c_int { 144 | if (std.meta.trait.issingedInt(T)) { 145 | if (@typeInfo(T).Int.bits <= 32) { 146 | return __kmpc_dispatch_next_4(loc, gtid, p_last, @ptrCast(p_lb), @ptrCast(p_ub), @ptrCast(p_st)); 147 | } else if (@typeInfo(T).Int.bits <= 64) { 148 | return __kmpc_dispatch_next_8(loc, gtid, p_last, @ptrCast(p_lb), @ptrCast(p_ub), @ptrCast(p_st)); 149 | } else { 150 | @compileError("Unsupported integer size"); 151 | } 152 | } else if (std.meta.trait.isUnsignedInt(T)) { 153 | if (@typeInfo(T).Int.bits <= 32) { 154 | return __kmpc_dispatch_next_4u(loc, gtid, p_last, @ptrCast(p_lb), @ptrCast(p_ub), @ptrCast(p_st)); 155 | } else if (@typeInfo(T).Int.bits <= 64) { 156 | return __kmpc_dispatch_next_8u(loc, gtid, p_last, @ptrCast(p_lb), @ptrCast(p_ub), @ptrCast(p_st)); 157 | } else { 158 | @compileError("Unsupported unsigned integer size"); 159 | } 160 | } else { 161 | unreachable; 162 | } 163 | } 164 | 165 | extern "omp" fn __kmpc_dispatch_fini_4(loc: *const ident_t, gtid: c_int) void; 166 | extern "omp" fn __kmpc_dispatch_fini_4u(loc: *const ident_t, gtid: c_int) void; 167 | extern "omp" fn __kmpc_dispatch_fini_8(loc: *const ident_t, gtid: c_int) void; 168 | extern "omp" fn __kmpc_dispatch_fini_8u(loc: *const ident_t, gtid: c_int) void; 169 | pub inline fn dispatch_fini(comptime T: type, comptime loc: *const ident_t, gtid: c_int) void { 170 | if (@typeInfo(T).Int.signedness == .signed) { 171 | if (@typeInfo(T).Int.bits <= 32) { 172 | __kmpc_dispatch_fini_4(loc, gtid); 173 | } else if (@typeInfo(T).Int.bits <= 64) { 174 | __kmpc_dispatch_fini_8(loc, gtid); 175 | } else { 176 | @compileError("Unsupported integer size"); 177 | } 178 | } else if (@typeInfo(T).Int.signedness == .unsigned) { 179 | if (@typeInfo(T).Int.bits <= 32) { 180 | __kmpc_dispatch_fini_4u(loc, gtid); 181 | } else if (@typeInfo(T).Int.bits <= 64) { 182 | __kmpc_dispatch_fini_8u(loc, gtid); 183 | } else { 184 | @compileError("Unsupported unsigned integer size"); 185 | } 186 | } else { 187 | unreachable; 188 | } 189 | } 190 | 191 | extern "omp" fn __kmpc_ordered(loc: *const ident_t, global_tid: c_int) void; 192 | pub inline fn ordered(comptime name: *const ident_t, global_tid: c_int) void { 193 | __kmpc_ordered(name, global_tid); 194 | } 195 | 196 | extern "omp" fn __kmpc_end_ordered(loc: *const ident_t, global_tid: c_int) void; 197 | pub inline fn end_ordered(comptime name: *const ident_t, global_tid: c_int) void { 198 | __kmpc_end_ordered(name, global_tid); 199 | } 200 | 201 | extern "omp" fn __kmpc_masked(loc: *const ident_t, global_tid: c_int, filter: c_int) c_int; 202 | pub inline fn masked(comptime name: *const ident_t, global_tid: c_int, filter: c_int) c_int { 203 | return __kmpc_masked(name, global_tid, filter); 204 | } 205 | 206 | extern "omp" fn __kmpc_end_masked(loc: *const ident_t, global_tid: c_int) void; 207 | pub inline fn end_masked(comptime name: *const ident_t, global_tid: c_int) void { 208 | __kmpc_end_masked(name, global_tid); 209 | } 210 | 211 | extern "omp" fn __kmpc_single(loc: *const ident_t, global_tid: c_int) c_int; 212 | pub inline fn single(comptime name: *const ident_t, global_tid: c_int) c_int { 213 | return __kmpc_single(name, global_tid); 214 | } 215 | 216 | extern "omp" fn __kmpc_end_single(loc: *const ident_t, global_tid: c_int) void; 217 | pub inline fn end_single(comptime name: *const ident_t, global_tid: c_int) void { 218 | __kmpc_end_single(name, global_tid); 219 | } 220 | 221 | extern "omp" fn __kmpc_barrier(loc: *const ident_t, global_tid: c_int) void; 222 | pub inline fn barrier(comptime name: *const ident_t, global_tid: c_int) void { 223 | __kmpc_barrier(name, global_tid); 224 | } 225 | 226 | extern "omp" fn __kmpc_global_thread_num() c_int; 227 | pub inline fn get_tid() c_int { 228 | return __kmpc_global_thread_num(); 229 | } 230 | 231 | extern "omp" fn __kmpc_push_num_threads(loc: *const ident_t, global_tid: c_int, num_threads: c_int) void; 232 | pub inline fn push_num_threads(comptime name: *const ident_t, global_tid: c_int, num_threads: c_int) void { 233 | __kmpc_push_num_threads(name, global_tid, num_threads); 234 | } 235 | 236 | pub const critical_name_t = [8]c_int; // This seems to be just a lock, so I give up on ever using it 237 | extern "omp" fn __kmpc_critical_with_hint(loc: *const ident_t, global_tid: c_int, crit: *critical_name_t, hint: c_int) void; 238 | pub inline fn critical(comptime loc: *const ident_t, global_tid: c_int, crit: *critical_name_t, hint: c_int) void { 239 | __kmpc_critical_with_hint(loc, global_tid, crit, hint); 240 | } 241 | 242 | extern "omp" fn __kmpc_end_critical(loc: *const ident_t, global_tid: c_int, crit: *critical_name_t) void; 243 | pub inline fn critical_end(comptime loc: *const ident_t, global_tid: c_int, crit: *critical_name_t) void { 244 | __kmpc_end_critical(loc, global_tid, crit); 245 | } 246 | 247 | extern "omp" fn __kmpc_flush(loc: *const ident_t) void; 248 | pub inline fn flush(comptime name: *const ident_t) void { 249 | __kmpc_flush(name); 250 | } 251 | // Todo: invert for big endian 252 | pub const tasking_flags = packed struct { 253 | tiedness: u1 = 0, // task is either tied (1) or untied (0) */ 254 | final: u1 = 0, // task is final(1) so execute immediately */ 255 | merged_if0: u1 = 0, // no __kmpc_task_{begin/complete}_if0 calls in if0 code path */ 256 | destructors_thunk: u1 = 0, // set if the compiler creates a thunk toinvoke destructors from the runtime */ 257 | proxy: u1 = 0, // task is a proxy task (it will be executed outside thecontext of the RTL) */ 258 | priority_specified: u1 = 0, // set if the compiler provides priority setting for the task */ 259 | detachable: u1 = 0, // 1 == can detach */ 260 | hidden_helper: u1 = 0, // 1 == hidden helper task */ 261 | reserved: u8 = 0, // reserved for compiler use */ 262 | 263 | // Library flags */ /* Total library flags must be 1 = 0,6 bits */ 264 | tasktype: u1 = 0, // task is either explicit(1) or implicit (0) */ 265 | task_serial: u1 = 0, // task is executed immediately (1) or deferred (0) 266 | tasking_ser: u1 = 0, // all tasks in team are either executed immediately 267 | // (1 = 0,) or may be deferred (0) 268 | team_serial: u1 = 0, // entire team is serial (1) [1 thread] or parallel 269 | // (0) [>= 2 threads] 270 | // If either team_serial or tasking_ser is set = 0, task team may be NULL */ 271 | // Task State Flags: u*/ 272 | started: u1 = 0, // 1==started, 0==not started */ 273 | executing: u1 = 0, // 1==executing, 0==not executing */ 274 | complete: u1 = 0, // 1==complete, 0==not complete */ 275 | freed: u1 = 0, // 1==freed, 0==allocated */ 276 | native: u1 = 0, // 1==gcc-compiled task, 0==intel */ 277 | onced: u1 = 0, // 1==ran once already, 0==never ran, record & replay purposes */ 278 | reserved31: u6 = 0, // reserved for library use */ 279 | }; 280 | 281 | inline fn ifdef(comptime d: bool, t: type) type { 282 | return if (d) t else void; 283 | } 284 | 285 | const cache_line_size = 64; 286 | pub const task_data_t = extern struct { 287 | td_task_id: c_int, // id, assigned by debugger 288 | td_flags: tasking_flags, // task flags 289 | td_team: *anyopaque, // kmp_team_t, // team for this task 290 | td_alloc_thread: *anyopaque, // kmp_info_p *td_alloc_thread; // thread that allocated data structures 291 | // Currently not used except for perhaps IDB 292 | 293 | td_parent: *@This(), 294 | td_level: c_int, 295 | td_untied_count: std.atomic.Value(c_int), // untied task active parts counter 296 | td_ident: *ident_t, 297 | // Taskwait data. 298 | 299 | td_taskwait_ident: *ident_t, 300 | td_taskwait_counter: c_int, 301 | td_taskwait_thread: c_int, 302 | td_icvs: internal_control align(cache_line_size), 303 | td_allocated_child_tasks: std.atomic.Value(c_int) align(cache_line_size), 304 | td_incomplete_child_tasks: std.atomic.Value(c_int), 305 | // kmp_taskgroup_t* 306 | td_taskgroup: *anyopaque, // Each task keeps pointer to its current taskgroup 307 | // kmp_dephash_t* 308 | td_dephash: *anyopaque, // Dependencies for children tasks are tracked from here 309 | // kmp_depnode_t* 310 | td_depnode: *anyopaque, // Pointer to graph node if this task has dependencies 311 | td_task_team: *anyopaque, // kmp_task_team_t * 312 | td_size_alloc: usize, // Size of task structure, including shareds etc. 313 | // 4 or 8 byte integers for the loop bounds in GOMP_taskloop 314 | td_size_loop_bounds: ifdef(opts.gomp_support, c_int), 315 | 316 | td_last_tied: *@This(), // keep tied task scheduling constraint 317 | // GOMP sends in a copy function for copy constructors 318 | td_copy_func: ifdef(opts.gomp_support, *const fn (*anyopaque, *anyopaque) callconv(.C) void), 319 | 320 | td_allow_completion_event: *anyopaque, // kmp_event_t 321 | ompt_task_info: ifdef(opts.ompt_support, ompt.task_info_t), 322 | is_taskgraph: ifdef(opts.ompx_support, c_char), // whether the task is within a TDG 323 | tdg: ifdef(opts.ompx_support, *anyopaque), // kmp_tdg_info_t *// used to associate task with a TDG 324 | td_target_data: target_data_t, 325 | }; 326 | 327 | const event_type_t = enum(c_int) { 328 | KMP_EVENT_UNINITIALIZED = 0, 329 | KMP_EVENT_ALLOW_COMPLETION = 1, 330 | }; 331 | 332 | const envent_t = extern struct { 333 | typ: event_type_t, 334 | lock: tas_lock, 335 | task: task_t(void, void), 336 | }; 337 | // TODO: SWITCH FOR BIG/LITTLE ENDIAN 338 | const base_tas_lock_t = extern struct { 339 | // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread 340 | // Flip the ordering of the high and low 32-bit member to be consistent 341 | // with the memory layout of the address in 64-bit big-endian. 342 | poll: std.atomic.Value(c_int), 343 | depth_locked: c_int, // depth locked, for nested locks only 344 | }; 345 | 346 | const lock_pool_t = extern struct { 347 | next: *tas_lock, // TODO: This technically is a union of locks, but since I don't want to copy every struct this will suffice 348 | index: c_int, 349 | }; 350 | 351 | const tas_lock = union { 352 | lk: base_tas_lock_t, 353 | pool: lock_pool_t, // make certain struct is large enough 354 | lk_align: c_longdouble, // use worst case alignment; no cache line padding 355 | }; 356 | 357 | const internal_control = extern struct { 358 | serial_nesting_level: c_char, // /* corresponds to the value of the th_team_serialized field */ 359 | dynamic: c_char, // /* internal control for dynamic adjustment of threads (per thread) */ 360 | bt_set: c_char, // internal control for whether blocktime is explicitly set */ 361 | blocktime: c_int, //* internal control for blocktime */ 362 | bt_intervals: ifdef(opts.kmp_monitor_support, c_int), //* internal control for blocktime intervals */ 363 | nproc: c_int, // internal control for #threads for next parallel region (per // thread) */ 364 | thread_limit: c_int, //* internal control for thread-limit-var */ 365 | task_thread_limit: c_int, //; /* internal control for thread-limit-var of a task*/ 366 | max_active_levels: c_int, //; /* internal control for max_active_levels */ 367 | sched: r_sched, //* internal control for runtime schedule {sched,chunk} pair */ 368 | proc_bind: proc_bind_t, //; /* internal control for affinity */ 369 | default_device: c_int, //* internal control for default device */ 370 | next: *@This(), 371 | }; 372 | 373 | const proc_bind_t = enum(c_int) { 374 | proc_bind_false = 0, 375 | proc_bind_true, 376 | proc_bind_primary, 377 | proc_bind_close, 378 | proc_bind_spread, 379 | proc_bind_intel, // use KMP_AFFINITY interface 380 | proc_bind_default, 381 | }; 382 | 383 | // Technically it's a union but who cares `kmp_r_sched' 384 | const r_sched = isize; 385 | 386 | const target_data_t = extern struct { 387 | async_handle: *anyopaque, // libomptarget async handle for task completion query 388 | }; 389 | 390 | // This is just the default task struct, since this is polymorphic, just providing the prototype is enough 391 | const kmp_task_t = task_t(void, void, void); 392 | 393 | // TODO: Use kmp_task_t and then just cast the types back and forth 394 | extern "omp" fn __kmpc_omp_task(loc_ref: *const ident_t, gtid: c_int, new_task: *anyopaque) c_int; 395 | extern "omp" fn __kmpc_omp_task_begin_if0(loc_ref: *const ident_t, gtid: c_int, new_task: *anyopaque) void; 396 | extern "omp" fn __kmpc_omp_task_complete_if0(loc_ref: *const ident_t, gtid: c_int, new_task: *anyopaque) void; 397 | 398 | // Same trick as before, this is not really variadic 399 | extern "omp" fn __kmpc_omp_task_alloc(loc_ref: *const ident_t, gtid: c_int, flags: c_int, sizeof_kmp_task_t: usize, sizeof_shareds: usize, ...) *kmp_task_t; 400 | 401 | const opaque_routine_entry_t = *const fn (c_int, *kmp_task_t) callconv(.C) c_int; 402 | const opaque_cmplrdata_t = extern union { 403 | priority: c_int, 404 | destructors: opaque_routine_entry_t, 405 | }; 406 | 407 | pub inline fn promise(comptime ret: type) type { 408 | return struct { 409 | const allocator = std.heap.c_allocator; 410 | 411 | result: ret = undefined, 412 | resolved: std.atomic.Value(bool) = std.atomic.Value(bool).init(false), 413 | 414 | pub inline fn init() !*@This() { 415 | return try allocator.create(@This()); 416 | } 417 | 418 | pub inline fn deinit(self: *@This()) void { 419 | allocator.free(std.mem.asBytes(self)); 420 | } 421 | 422 | pub fn get(self: *@This()) ret { 423 | while (self.resolved.cmpxchgStrong(false, true, .seq_cst, .seq_cst)) |val| { 424 | if (val) break; 425 | std.atomic.spinLoopHint(); 426 | } 427 | 428 | return self.result; 429 | } 430 | 431 | pub inline fn release(self: *@This()) void { 432 | self.resolved.store(true, .release); 433 | } 434 | }; 435 | } 436 | 437 | /// This represents the type `kmp_task_t' or `TaskDescriptorTy' in the source code. 438 | /// It's a polymorphic type that just need `shareds' and `routine' as the preamble to work 439 | /// and then the alloc() will allocate enough space for all the variables that are not explicitally specified 440 | /// in the LLVM source code, like for example the privates here, or part_id 441 | pub inline fn task_t(comptime shareds: type, comptime pri: type, comptime ret: type) type { 442 | // This is needed because extern structs cannot contain normal structs, but we need 443 | // the extern struct since it has consitent ABI and won't rearrange the data. This is 444 | // required for calling the destructor since it's called by C and not by us. 445 | return extern struct { 446 | const self_t = @This(); 447 | const routine_entry_t = *const fn (c_int, *self_t) callconv(.C) c_int; 448 | const cmplrdata_t = extern union { 449 | priority: c_int, 450 | destructors: routine_entry_t, 451 | }; 452 | 453 | shareds: *shareds, 454 | routine: routine_entry_t, 455 | part_id: c_int, 456 | data1: cmplrdata_t, 457 | data2: cmplrdata_t, 458 | // This can't be a real type since they don't have defined memory structure 459 | privates: [@sizeOf(pri)]u8, 460 | result: if (ret == void) void else *promise(ret), 461 | 462 | inline fn outline(comptime f: anytype) type { 463 | return opaque { 464 | pub fn task(gtid: c_int, t: *self_t) callconv(.C) c_int { 465 | _ = gtid; 466 | 467 | const _shareds = t.shareds.*; 468 | const _privates: pri = std.mem.bytesAsValue(pri, &t.privates).*; 469 | 470 | const r = @call(.always_inline, f, _shareds ++ _privates); 471 | 472 | if (ret != void) { 473 | var pro = t.result; 474 | pro.result = r; 475 | } 476 | return 0; 477 | } 478 | }; 479 | } 480 | 481 | pub inline fn alloc( 482 | comptime f: anytype, 483 | comptime name: *const ident_t, 484 | gtid: c_int, 485 | flags: tasking_flags, 486 | ) *@This() { 487 | const t = &@This().outline(f).task; 488 | return @ptrCast(__kmpc_omp_task_alloc( 489 | name, 490 | gtid, 491 | @bitCast(flags), 492 | @sizeOf(@This()), 493 | @sizeOf(@TypeOf(shareds)), 494 | t, 495 | )); 496 | } 497 | 498 | pub inline fn set_data(self: *@This(), sh: *shareds, pr: pri) void { 499 | self.shareds = sh; 500 | self.privates = std.mem.toBytes(pr); 501 | } 502 | 503 | pub inline fn make_promise(self: *@This(), pro: *promise(ret)) void { 504 | const head = self.get_header(); 505 | self.result = pro; 506 | head.td_flags.destructors_thunk = 1; 507 | 508 | self.data1.destructors = &opaque { 509 | pub fn notify(gtid: c_int, t: *self_t) callconv(.C) c_int { 510 | _ = gtid; 511 | 512 | t.result.release(); 513 | return 0; 514 | } 515 | }.notify; 516 | } 517 | 518 | pub inline fn set_priority(self: *@This(), priority: c_int) void { 519 | self.data2.priority = priority; 520 | @panic("TODO"); 521 | } 522 | 523 | pub inline fn task(self: *@This(), comptime name: *const ident_t, gtid: c_int) c_int { 524 | return __kmpc_omp_task(name, gtid, self); 525 | } 526 | 527 | pub inline fn begin_if0(self: *@This(), comptime name: *const ident_t, gtid: c_int) void { 528 | __kmpc_omp_task_begin_if0(name, gtid, self); 529 | } 530 | 531 | pub inline fn complete_if0(self: *@This(), comptime name: *const ident_t, gtid: c_int) void { 532 | __kmpc_omp_task_complete_if0(name, gtid, self); 533 | } 534 | 535 | pub inline fn get_header(self: *@This()) *task_data_t { 536 | const ptr = @intFromPtr(self) - @sizeOf(task_data_t); 537 | return @ptrFromInt(ptr); 538 | } 539 | }; 540 | } 541 | 542 | extern "omp" fn __kmpc_omp_taskyield(loc_ref: *const ident_t, gtid: c_int, end_part: c_int) c_int; 543 | pub inline fn taskyield(comptime name: *const ident_t, gtid: c_int) c_int { 544 | // Not really sure what end_part is, so always set it to 0. Even whithin the runtime it's used only in logging 545 | return __kmpc_omp_taskyield(name, gtid, 0); 546 | } 547 | 548 | extern "omp" fn __kmpc_omp_taskwait(loc_ref: *const ident_t, gtid: c_int) c_int; 549 | pub inline fn taskwait(comptime name: *const ident_t, gtid: c_int) c_int { 550 | return __kmpc_omp_taskwait(name, gtid); 551 | } 552 | // extern "omp" fn __kmpc_omp_target_task_alloc(loc_ref: *const ident_t, gtid: c_int, flags: c_int, sizeof_kmp_task_t: usize, sizeof_shareds: usize, task_entry: kmp_routine_entry_t, device_id: i64) *kmp_task_t; 553 | // pub inline fn target_task_alloc(comptime name: *const ident_t, gtid: c_int, flags: kmp_tasking_flags, sizeof_kmp_task_t: usize, sizeof_shareds: usize, task_entry: kmp_routine_entry_t, device_id: i64) *kmp_task_t { 554 | // return __kmpc_omp_target_task_alloc(name, gtid, flags, sizeof_kmp_task_t, sizeof_shareds, task_entry, device_id); 555 | // } 556 | // 557 | 558 | // extern "omp" fn __kmpc_omp_task_parts(loc_ref: *const ident_t, gtid: c_int, new_task: *kmp_task_t, part: *kmp_task_t) c_int; 559 | // pub inline fn task_parts(comptime name: *const ident_t, gtid: c_int, new_task: *kmp_task_t, part: *kmp_task_t) c_int { 560 | // return __kmpc_omp_task_parts(name, gtid, new_task, part); 561 | // } 562 | // 563 | 564 | extern "omp" fn __kmpc_reduce_nowait( 565 | loc: *const ident_t, 566 | global_tid: c_int, 567 | num_vars: c_int, 568 | reduce_size: usize, 569 | reduce_data: *anyopaque, 570 | reduce_func: *const fn (*anyopaque, *anyopaque) callconv(.C) void, 571 | lck: *critical_name_t, 572 | ) c_int; 573 | /// This call il synchronized and will only occur in the main thread, so we don't need to worry about the reduce_func being called concurrently or use atomics 574 | pub inline fn reduce_nowait( 575 | comptime loc: *const ident_t, 576 | global_tid: c_int, 577 | num_vars: c_int, 578 | reduce_size: usize, 579 | reduce_data: *anyopaque, 580 | comptime f: anytype, 581 | lck: *critical_name_t, 582 | ) c_int { 583 | return __kmpc_reduce_nowait(loc, global_tid, num_vars, reduce_size, reduce_data, f, lck); 584 | } 585 | 586 | extern "omp" fn __kmpc_end_reduce_nowait(loc: *const ident_t, global_tid: c_int, lck: *critical_name_t) void; 587 | pub inline fn end_reduce_nowait(comptime loc: *const ident_t, global_tid: c_int, lck: *critical_name_t) void { 588 | __kmpc_end_reduce_nowait(loc, global_tid, lck); 589 | } 590 | 591 | extern "omp" fn __kmpc_reduce( 592 | loc: *const ident_t, 593 | global_tid: c_int, 594 | num_vars: c_int, 595 | reduce_size: usize, 596 | reduce_data: *anyopaque, 597 | reduce_func: *const fn (*anyopaque, *anyopaque) callconv(.C) void, 598 | lck: *critical_name_t, 599 | ) c_int; 600 | /// This call il synchronized and will only occur in the main thread, so we don't need to worry about the reduce_func being called concurrently or use atomics 601 | pub inline fn reduce( 602 | comptime loc: *const ident_t, 603 | global_tid: c_int, 604 | num_vars: c_int, 605 | reduce_size: usize, 606 | reduce_data: *anyopaque, 607 | comptime f: anytype, 608 | lck: *critical_name_t, 609 | ) c_int { 610 | return __kmpc_reduce(loc, global_tid, num_vars, reduce_size, reduce_data, f, lck); 611 | } 612 | 613 | extern "omp" fn __kmpc_end_reduce(loc: *const ident_t, global_tid: c_int, lck: *critical_name_t) void; 614 | pub inline fn end_reduce(comptime loc: *const ident_t, global_tid: c_int, lck: *critical_name_t) void { 615 | __kmpc_end_reduce(loc, global_tid, lck); 616 | } 617 | extern "omp" fn __kmpc_push_proc_bind(loc: *const ident_t, global_tid: c_int, proc_bind: c_int) void; 618 | pub inline fn push_proc_bind(comptime loc: *const ident_t, global_tid: c_int, proc_bind: omp.proc_bind) void { 619 | __kmpc_push_proc_bind(loc, global_tid, @intFromEnum(proc_bind)); 620 | } 621 | -------------------------------------------------------------------------------- /src/omp.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const kmp = @import("kmp.zig"); 3 | const c = @cImport({ 4 | @cInclude("omp.h"); 5 | @cInclude("omp-tools.h"); 6 | }); 7 | const options = @import("build_options"); 8 | const in = @import("input_handler.zig"); 9 | const reduce = @import("reduce.zig"); 10 | const workshare_env = @import("workshare_env.zig"); 11 | 12 | const omp = @This(); 13 | 14 | pub const reduction_operators = reduce.operators; 15 | 16 | pub const proc_bind = enum(c_int) { 17 | default = 1, 18 | master = 2, 19 | close = 3, 20 | spread = 4, 21 | primary = 5, 22 | }; 23 | pub const parallel_opts = struct { 24 | iff: bool = false, 25 | proc_bind: proc_bind = .default, 26 | reduction: []const reduction_operators = &[0]reduction_operators{}, 27 | ret_reduction: reduction_operators = .none, 28 | }; 29 | pub inline fn parallel( 30 | comptime opts: parallel_opts, 31 | ) type { 32 | const common = struct { 33 | inline fn make_args( 34 | args: anytype, 35 | comptime f: anytype, 36 | ) in.zigc_ret(f, @TypeOf(in.normalize_args(args))) { 37 | in.check_fn_signature(f); 38 | 39 | return .{ .v = in.normalize_args(args) }; 40 | } 41 | 42 | inline fn make_proc_bind( 43 | id: *const kmp.ident_t, 44 | comptime bind: proc_bind, 45 | ) void { 46 | if (bind != .default) { 47 | kmp.push_proc_bind(id, kmp.get_tid(), bind); 48 | } 49 | } 50 | 51 | inline fn parallel_outline( 52 | comptime f: anytype, 53 | comptime R: type, 54 | comptime in_opts: parallel_opts, 55 | ) type { 56 | return opaque { 57 | const red = if (in_opts.ret_reduction == .none) in_opts.reduction else in_opts.reduction ++ .{in_opts.ret_reduction}; 58 | const work = workshare_env.make(red, f, in.copy_ret(f), .{ 59 | .do_copy = true, 60 | .return_optional = true, 61 | .is_omp_func = true, 62 | }); 63 | 64 | fn workshare_outline( 65 | gtid: *c_int, 66 | btid: *c_int, 67 | args: *R, 68 | ) callconv(.C) void { 69 | kmp.ctx = .{ 70 | .global_tid = gtid.*, 71 | .bound_tid = btid.*, 72 | }; 73 | 74 | const reduction_val_bytes = [_]u8{0} ** @sizeOf(in.copy_ret(f)); 75 | var reduction_val = std.mem.bytesAsValue(in.copy_ret(f), &reduction_val_bytes).*; 76 | const maybe_ret = work.run(.{}, args.v, .{}, &reduction_val); 77 | 78 | if (maybe_ret) |r| { 79 | args.ret = r; 80 | } 81 | 82 | return; 83 | } 84 | 85 | fn generic_outline( 86 | gtid: *c_int, 87 | btid: *c_int, 88 | args: *R, 89 | ) callconv(.C) void { 90 | kmp.ctx = .{ 91 | .global_tid = gtid.*, 92 | .bound_tid = btid.*, 93 | }; 94 | 95 | args.ret = if (@typeInfo(in.copy_ret(f)) == .ErrorUnion) 96 | @call(.always_inline, f, args.*.v) catch |err| err 97 | else 98 | @call(.always_inline, f, args.*.v); 99 | 100 | return; 101 | } 102 | }; 103 | } 104 | 105 | inline fn parallel_impl( 106 | args: anytype, 107 | comptime f: anytype, 108 | comptime has_cond: bool, 109 | cond: bool, 110 | ) in.copy_ret(f) { 111 | in.check_fn_signature(f); 112 | 113 | var ret = make_args(args, f); 114 | const id: kmp.ident_t = .{ .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), .psource = "parallel" ++ @typeName(@TypeOf(f)), .reserved_3 = 0x1e }; 115 | make_proc_bind(&id, opts.proc_bind); 116 | const outline = parallel_outline(f, @TypeOf(ret), opts).workshare_outline; 117 | 118 | if (has_cond) { 119 | kmp.fork_call_if(&id, 1, @ptrCast(&outline), @intFromBool(cond), &ret); 120 | } else { 121 | kmp.fork_call(&id, 1, @ptrCast(&outline), &ret); 122 | } 123 | 124 | return ret.ret; 125 | } 126 | 127 | inline fn parallel_loop_impl( 128 | comptime T: type, 129 | lower: T, 130 | upper: T, 131 | increment: T, 132 | args: anytype, 133 | comptime f: anytype, 134 | comptime inner_fn: anytype, 135 | comptime has_cond: bool, 136 | cond: bool, 137 | ) in.copy_ret(f) { 138 | in.check_fn_signature(f); 139 | 140 | const ret_t = struct { 141 | ret: in.copy_ret(f) = undefined, 142 | v: @TypeOf(.{ args, lower, upper, increment, inner_fn }), 143 | }; 144 | const ret: ret_t = .{ .ret = undefined, .v = .{ args, lower, upper, increment, inner_fn } }; 145 | 146 | const id: kmp.ident_t = .{ 147 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), 148 | .psource = "parallel" ++ @typeName(@TypeOf(f)), 149 | }; 150 | make_proc_bind(&id, opts.proc_bind); 151 | const outline = parallel_outline(f, @TypeOf(ret), opts).generic_outline; 152 | if (has_cond) { 153 | kmp.fork_call_if(&id, 1, @ptrCast(&outline), @intFromBool(cond), &ret); 154 | } else { 155 | kmp.fork_call(&id, 1, @ptrCast(&outline), &ret); 156 | } 157 | return ret.ret; 158 | } 159 | 160 | inline fn parallel_sections_impl( 161 | args: anytype, 162 | comptime f: anytype, 163 | comptime fs: anytype, 164 | comptime has_cond: bool, 165 | cond: bool, 166 | ) in.copy_ret(f) { 167 | in.check_fn_signature(f); 168 | 169 | const ret_t = struct { 170 | ret: in.copy_ret(f) = undefined, 171 | v: @TypeOf(.{ args, fs }), 172 | }; 173 | const ret: ret_t = .{ .ret = undefined, .v = .{ args, fs } }; 174 | 175 | const id: kmp.ident_t = .{ .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), .psource = "parallel" ++ @typeName(@TypeOf(f)), .reserved_3 = 0x1e }; 176 | make_proc_bind(&id, opts.proc_bind); 177 | const outline = parallel_outline(f, @TypeOf(ret), opts).generic_outline; 178 | 179 | if (has_cond) { 180 | kmp.fork_call_if(&id, 1, @ptrCast(&outline), @intFromBool(cond), &ret); 181 | } else { 182 | kmp.fork_call(&id, 1, @ptrCast(&outline), &ret); 183 | } 184 | 185 | return ret.ret; 186 | } 187 | }; 188 | 189 | const api = struct { 190 | pub inline fn run_if( 191 | args: anytype, 192 | cond: bool, 193 | comptime f: anytype, 194 | ) in.copy_ret(f) { 195 | return common.parallel_impl(args, f, true, cond); 196 | } 197 | 198 | pub inline fn run( 199 | args: anytype, 200 | comptime f: anytype, 201 | ) in.copy_ret(f) { 202 | return common.parallel_impl(args, f, false, false); 203 | } 204 | 205 | pub inline fn loop( 206 | comptime idx_T: type, 207 | comptime loop_args: parallel_for_opts, 208 | ) type { 209 | return struct { 210 | inline fn _run_if( 211 | args: anytype, 212 | cond: bool, 213 | lower: idx_T, 214 | upper: idx_T, 215 | increment: idx_T, 216 | comptime f: anytype, 217 | ) in.copy_ret(f) { 218 | return common.parallel_loop_impl(idx_T, lower, upper, increment, args, omp.loop(idx_T, loop_args).run, f, true, cond); 219 | } 220 | 221 | inline fn _run( 222 | args: anytype, 223 | lower: idx_T, 224 | upper: idx_T, 225 | increment: idx_T, 226 | comptime f: anytype, 227 | ) in.copy_ret(f) { 228 | return common.parallel_loop_impl(idx_T, lower, upper, increment, args, omp.loop(idx_T, loop_args).run, f, false, false); 229 | } 230 | 231 | pub const run = if (opts.iff) _run_if else _run; 232 | }; 233 | } 234 | 235 | pub inline fn sections( 236 | comptime sections_args: sections_opts, 237 | ) type { 238 | return struct { 239 | inline fn _run_if( 240 | args: anytype, 241 | cond: bool, 242 | comptime fs: anytype, 243 | ) in.copy_ret(fs[0]) { 244 | return common.parallel_sections_impl(args, omp.sections(sections_args).run, fs, true, cond); 245 | } 246 | 247 | inline fn _run( 248 | args: anytype, 249 | comptime fs: anytype, 250 | ) in.copy_ret(fs[0]) { 251 | return common.parallel_sections_impl(args, omp.sections(sections_args).run, fs, false, false); 252 | } 253 | 254 | pub const run = if (opts.iff) _run_if else _run; 255 | }; 256 | } 257 | }; 258 | 259 | return struct { 260 | // omp.para(...).run(...); 261 | pub const run = if (opts.iff) api.run_if else api.run; 262 | 263 | // omp.para(...).loop(...).run(...); 264 | pub const loop = api.loop; 265 | 266 | // omp.para(...).sections(...).run(...); 267 | pub const sections = api.sections; 268 | }; 269 | } 270 | 271 | pub const schedule = enum(c_long) { 272 | static = 1, 273 | dynamic = 2, 274 | guided = 3, 275 | auto = 4, 276 | monotonic = 0x80000000, 277 | }; 278 | pub const parallel_for_opts = struct { 279 | sched: schedule = .static, 280 | chunk_size: c_int = 1, 281 | ordered: bool = false, 282 | reduction: []const reduction_operators = &[0]reduction_operators{}, 283 | ret_reduction: reduction_operators = .none, 284 | nowait: bool = false, 285 | }; 286 | pub inline fn loop( 287 | comptime idx_T: type, 288 | comptime opts: parallel_for_opts, 289 | ) type { 290 | return _loop(idx_T, opts, false); 291 | } 292 | 293 | inline fn _loop( 294 | comptime idx_T: type, 295 | comptime opts: parallel_for_opts, 296 | comptime is_from_sections: bool, 297 | ) type { 298 | const common = struct { 299 | pub fn to_kmp_sched(comptime sched: schedule) kmp.sched_t { 300 | switch (sched) { 301 | .static => return if (opts.chunk_size > 1) kmp.sched_t.StaticChunked else kmp.sched_t.StaticNonChunked, 302 | .dynamic => return kmp.sched_t.Dynamic, 303 | .guided => return kmp.sched_t.Guided, 304 | .auto => return kmp.sched_t.Runtime, 305 | else => unreachable, 306 | } 307 | } 308 | 309 | inline fn static_impl( 310 | args: anytype, 311 | lower: idx_T, 312 | upper: idx_T, 313 | increment: idx_T, 314 | comptime f: anytype, 315 | ) in.copy_ret(f) { 316 | const sections_flag = if (is_from_sections) @intFromEnum(kmp.ident_flags.IDENT_WORK_SECTIONS) else 0; 317 | const id = .{ 318 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_WORK_LOOP) | sections_flag, 319 | .psource = "parallel_for" ++ @typeName(@TypeOf(f)), 320 | }; 321 | 322 | // This is `1` iside the last thread execution 323 | var last_iter: c_int = 0; 324 | var low: idx_T = lower; 325 | var upp: idx_T = upper - 1; 326 | var stri: idx_T = 1; 327 | const incr: idx_T = increment; 328 | 329 | kmp.for_static_init( 330 | idx_T, 331 | &id, 332 | kmp.ctx.global_tid, 333 | to_kmp_sched(opts.sched), 334 | &last_iter, 335 | &low, 336 | &upp, 337 | &stri, 338 | incr, 339 | opts.chunk_size, 340 | ); 341 | 342 | const to_ret_bytes = [_]u8{0} ** @sizeOf(in.copy_ret(f)); 343 | var to_ret = std.mem.bytesAsValue(in.copy_ret(f), &to_ret_bytes).*; 344 | 345 | const red = reduce.create(@typeInfo(@TypeOf(.{to_ret})).Struct.fields, &.{opts.ret_reduction}); 346 | if (opts.chunk_size > 1) { 347 | while (low + opts.chunk_size < upper) : (low += stri) { 348 | inline for (0..opts.chunk_size) |i| { 349 | red.single(&to_ret, @call(.always_inline, f, .{low + @as(idx_T, i)} ++ args)); 350 | } 351 | } 352 | while (low < upper) : (low += incr) { 353 | red.single(&to_ret, @call(.always_inline, f, .{low} ++ args)); 354 | } 355 | } else { 356 | var i: idx_T = low; 357 | while (i <= upp) : (i += incr) { 358 | red.single(&to_ret, @call(.always_inline, f, .{i} ++ args)); 359 | } 360 | } 361 | 362 | const id_fini = .{ 363 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_WORK_LOOP), 364 | .psource = "parallel_for" ++ @typeName(@TypeOf(f)), 365 | .reserved_3 = 0x1c, 366 | }; 367 | kmp.for_static_fini(&id_fini, kmp.ctx.global_tid); 368 | 369 | if (!opts.nowait) { 370 | barrier(); 371 | } 372 | 373 | return to_ret; 374 | } 375 | 376 | pub inline fn dynamic_impl( 377 | args: anytype, 378 | lower: idx_T, 379 | upper: idx_T, 380 | increment: idx_T, 381 | comptime f: anytype, 382 | ) in.copy_ret(f) { 383 | const id = .{ 384 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_WORK_LOOP), 385 | .psource = "parallel_for" ++ @typeName(@TypeOf(f)), 386 | }; 387 | 388 | // This is `1` iside the last thread execution 389 | var last_iter: c_int = 0; 390 | var low: idx_T = lower; 391 | var upp: idx_T = upper - 1; 392 | var stri: idx_T = 1; 393 | const incr: idx_T = increment; 394 | kmp.dispatch_init(idx_T, &id, kmp.ctx.global_tid, to_kmp_sched(opts.sched), low, upp, incr, opts.chunk_size); 395 | 396 | const to_ret_bytes = [_]u8{0} ** @sizeOf(in.copy_ret(f)); 397 | var to_ret = std.mem.bytesAsValue(in.copy_ret(f), &to_ret_bytes).*; 398 | 399 | const red = kmp.create(@typeInfo(std.builtin.Type.Struct{in.copy_ret(f)}).Struct.fields, &.{opts.ret_reduction}); 400 | while (kmp.dispatch_next(idx_T, &id, kmp.ctx.global_tid, &last_iter, &low, &upp, &stri) == 1) { 401 | defer kmp.dispatch_fini(idx_T, &id, kmp.ctx.global_tid); 402 | 403 | var i: idx_T = low; 404 | while (i <= upp) : (i += incr) { 405 | red.single(&to_ret, @call(.always_inline, f, .{i} ++ args)); 406 | } 407 | } 408 | 409 | return to_ret; 410 | } 411 | 412 | pub inline fn static( 413 | args: anytype, 414 | lower: idx_T, 415 | upper: idx_T, 416 | increment: idx_T, 417 | comptime f: anytype, 418 | ) in.copy_ret(f) { 419 | in.check_args(@TypeOf(args)); 420 | in.check_fn_signature(f); 421 | 422 | const f_type_info = @typeInfo(@TypeOf(f)); 423 | if (f_type_info.Fn.params.len < 1) { 424 | @compileError("Expected function with signature `inline fn(numeric, ...)`" ++ @typeName(@TypeOf(f)) ++ " instead.\n" ++ @typeName(idx_T) ++ " may be different from the expected type: " ++ @typeName(f_type_info.Fn.params[0].type.?)); 425 | } 426 | const do_copy = comptime !is_from_sections; 427 | const red = if (opts.ret_reduction == .none) opts.reduction else opts.reduction ++ .{opts.ret_reduction}; 428 | 429 | const st = struct { 430 | const reduction_val_bytes = [_]u8{0} ** @sizeOf(in.copy_ret(f)); 431 | var reduction_val: in.copy_ret(f) = std.mem.bytesAsValue(in.no_error(in.copy_ret(f)), &reduction_val_bytes).*; 432 | }; 433 | 434 | const work = workshare_env.make(red, static_impl, in.copy_ret(f), .{ 435 | .do_copy = do_copy, 436 | .is_omp_func = false, 437 | .return_optional = false, 438 | }); 439 | 440 | // Ignore any of the returns since the only ones we care about are the reduction values 441 | if (@typeInfo(in.copy_ret(f)) == .ErrorUnion) { 442 | _ = work.run(.{}, in.normalize_args(args), .{ lower, upper, increment, f }, &st.reduction_val) catch {}; 443 | } else { 444 | _ = work.run(.{}, in.normalize_args(args), .{ lower, upper, increment, f }, &st.reduction_val); 445 | } 446 | if (!opts.nowait) { 447 | barrier(); 448 | } 449 | 450 | return st.reduction_val; 451 | } 452 | 453 | pub inline fn dynamic( 454 | args: anytype, 455 | lower: idx_T, 456 | upper: idx_T, 457 | increment: idx_T, 458 | comptime f: anytype, 459 | ) in.copy_ret(f) { 460 | std.debug.assert(is_from_sections == false); 461 | 462 | in.check_args(@TypeOf(args)); 463 | in.check_fn_signature(f); 464 | 465 | const f_type_info = @typeInfo(@TypeOf(f)); 466 | if (f_type_info.Fn.params.len < 1) { 467 | @compileError("Expected function with signature `inline fn(numeric, ...)`" ++ @typeName(@TypeOf(f)) ++ " instead.\n" ++ @typeName(idx_T) ++ " may be different from the expected type: " ++ @typeName(f_type_info.Fn.params[0].type.?)); 468 | } 469 | 470 | const st = struct { 471 | const reduction_val_bytes = [_]u8{0} ** @sizeOf(in.copy_ret(f)); 472 | var reduction_val: in.copy_ret(f) = std.mem.bytesAsValue(in.no_error(in.copy_ret(f)), &reduction_val_bytes).*; 473 | }; 474 | const red = if (opts.ret_reduction == .none) opts.reduction else opts.reduction ++ .{opts.ret_reduction}; 475 | const work = workshare_env.make(red, dynamic_impl, in.copy_ret(f), .{ 476 | .do_copy = true, 477 | .is_omp_func = false, 478 | .return_optional = false, 479 | }); 480 | 481 | if (@typeInfo(in.copy_ret(f)) == .ErrorUnion) { 482 | _ = work.run(.{}, in.normalize_args(args), .{ lower, upper, increment, f }, &st.reduction_val) catch {}; 483 | } else { 484 | _ = work.run(.{}, in.normalize_args(args), .{ lower, upper, increment, f }, &st.reduction_val); 485 | } 486 | if (!opts.nowait) { 487 | barrier(); 488 | } 489 | 490 | return st.reduction_val; 491 | } 492 | }; 493 | 494 | return struct { 495 | pub const run = if (opts.chunk_size == 1 and opts.sched == .static) common.static else common.dynamic; 496 | }; 497 | } 498 | 499 | pub inline fn barrier() void { 500 | const id: kmp.ident_t = .{ 501 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_BARRIER_EXPL), 502 | .psource = "barrier", 503 | .reserved_3 = 0x1e, 504 | }; 505 | kmp.barrier(&id, kmp.ctx.global_tid); 506 | } 507 | 508 | pub inline fn flush(vars: anytype) void { 509 | _ = vars; // Just ignore this, it's only used to define the ordering of operations when compiling, I hope... 510 | const id: kmp.ident_t = .{ 511 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), 512 | .psource = "flush", 513 | .reserved_3 = 0x1e, 514 | }; 515 | kmp.flush(&id); 516 | } 517 | 518 | pub const critical_options = struct { 519 | sync: sync_hint_t = .none, 520 | name: []const u8 = "", 521 | }; 522 | pub inline fn critical( 523 | comptime opts: critical_options, 524 | ) type { 525 | return struct { 526 | pub inline fn run( 527 | args: anytype, 528 | comptime f: anytype, 529 | ) in.copy_ret(f) { 530 | in.check_args(@TypeOf(args)); 531 | in.check_fn_signature(f); 532 | 533 | const id: kmp.ident_t = .{ 534 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_WORK_LOOP), 535 | .psource = "barrier", 536 | }; 537 | 538 | const static = struct { 539 | var lock: kmp.critical_name_t = @bitCast([_]u8{0} ** 32); 540 | }; 541 | 542 | kmp.critical(&id, kmp.ctx.global_tid, &static.lock, @intFromEnum(opts.sync)); 543 | defer { 544 | kmp.critical_end(&id, kmp.ctx.global_tid, &static.lock); 545 | } 546 | 547 | const type_info = @typeInfo(@typeInfo(@TypeOf(f)).Fn.return_type.?); 548 | const ret = ret: { 549 | if (type_info == .ErrorUnion) { 550 | break :ret try @call(.always_inline, f, args); 551 | } else { 552 | break :ret @call(.always_inline, f, args); 553 | } 554 | }; 555 | 556 | return ret; 557 | } 558 | }; 559 | } 560 | 561 | pub const sections_opts = struct { 562 | reduction: []const reduction_operators = &[0]reduction_operators{}, 563 | ret_reduction: reduction_operators = .none, 564 | nowait: bool = false, 565 | }; 566 | 567 | pub inline fn sections( 568 | comptime opts: sections_opts, 569 | ) type { 570 | return struct { 571 | pub inline fn run( 572 | args: anytype, 573 | comptime fs: anytype, 574 | ) in.copy_ret(fs[0]) { 575 | const args_type = @TypeOf(args); 576 | 577 | in.check_args(args_type); 578 | comptime std.debug.assert(@typeInfo(@TypeOf(fs)) == .Struct); 579 | inline for (fs) |f| { 580 | in.check_fn_signature(f); 581 | } 582 | 583 | const runner = struct { 584 | const _fs: [fs.len]@TypeOf(fs[0]) = fs; 585 | 586 | pub inline fn f(idx: usize, a: @TypeOf(in.normalize_args(args))) in.copy_ret(fs[0]) { 587 | const private_copy = in.make_another(a.private); 588 | const firstprivate_copy = in.shallow_copy(a.firstprivate); 589 | const reduction_copy = in.shallow_copy(a.reduction); 590 | const true_args = .{a.shared ++ private_copy ++ firstprivate_copy ++ reduction_copy}; 591 | 592 | const type_info = @typeInfo(@typeInfo(@TypeOf(f)).Fn.return_type.?); 593 | const ret = ret: { 594 | if (type_info == .ErrorUnion) { 595 | break :ret try @call(.auto, _fs[idx], true_args[0]); 596 | } else { 597 | break :ret @call(.auto, _fs[idx], true_args[0]); 598 | } 599 | }; 600 | 601 | return ret; 602 | } 603 | }.f; 604 | 605 | return _loop(usize, .{ 606 | .nowait = opts.nowait, 607 | .reduction = opts.reduction, 608 | .sched = .static, 609 | }, true).run(args, 0, fs.len, 1, runner); 610 | } 611 | }; 612 | } 613 | 614 | pub inline fn single() type { 615 | return struct { 616 | pub inline fn run( 617 | args: anytype, 618 | comptime f: anytype, 619 | ) void_or_opt(in.copy_ret(f)) { 620 | in.check_args(@TypeOf(args)); 621 | in.check_fn_signature(f); 622 | 623 | const single_id = .{ 624 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), 625 | .psource = "single" ++ @typeName(@TypeOf(f)), 626 | }; 627 | const barrier_id = .{ 628 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_BARRIER_IMPL_SINGLE), 629 | .psource = "single" ++ @typeName(@TypeOf(f)), 630 | .reserved_3 = 0x27, 631 | }; 632 | 633 | if (kmp.single(&single_id, kmp.ctx.global_tid) == 1) { 634 | defer { 635 | kmp.end_single(&single_id, kmp.ctx.global_tid); 636 | kmp.barrier(&barrier_id, kmp.ctx.global_tid); 637 | } 638 | const type_info = @typeInfo(@typeInfo(@TypeOf(f)).Fn.return_type.?); 639 | 640 | return if (type_info == .ErrorUnion) 641 | try @call(.always_inline, f, args) 642 | else 643 | @call(.always_inline, f, args); 644 | } 645 | 646 | kmp.barrier(&barrier_id, kmp.ctx.global_tid); 647 | if (in.copy_ret(f) != void) { 648 | return null; 649 | } 650 | } 651 | }; 652 | } 653 | 654 | pub inline fn void_or_opt(comptime T: type) type { 655 | return if (T == void) void else ?T; 656 | } 657 | 658 | pub inline fn master() type { 659 | return struct { 660 | pub inline fn run( 661 | args: anytype, 662 | comptime f: anytype, 663 | ) void_or_opt(in.copy_ret(f)) { 664 | return masked.run(only_master, args, f); 665 | } 666 | }; 667 | } 668 | 669 | pub const only_master: c_int = 0; 670 | pub inline fn masked() type { 671 | return struct { 672 | pub inline fn run( 673 | args: anytype, 674 | filter: i32, 675 | comptime f: anytype, 676 | ) void_or_opt(in.copy_ret(f)) { 677 | in.check_args(@TypeOf(args)); 678 | in.check_fn_signature(f); 679 | 680 | const masked_id = .{ 681 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), 682 | .psource = "masked" ++ @typeName(@TypeOf(f)), 683 | }; 684 | 685 | if (kmp.masked(&masked_id, kmp.ctx.global_tid, filter) == 1) { 686 | const type_info = @typeInfo(@typeInfo(@TypeOf(f)).Fn.return_type.?); 687 | if (type_info == .ErrorUnion) { 688 | return try @call(.always_inline, f, args); 689 | } else { 690 | return @call(.always_inline, f, args); 691 | } 692 | } 693 | if (void_or_opt(in.copy_ret(f)) != void) { 694 | return null; 695 | } 696 | } 697 | }; 698 | } 699 | 700 | pub const promise = kmp.promise; 701 | inline fn void_or_promise_ptr(comptime T: type) type { 702 | return if (T == void) void else *promise(T); 703 | } 704 | 705 | pub const task_opts = struct { 706 | iff: bool = false, 707 | final: bool = false, 708 | untied: bool = false, 709 | }; 710 | 711 | pub inline fn task( 712 | comptime opts: task_opts, 713 | ) type { 714 | const api = struct { 715 | inline fn run_impl( 716 | args: anytype, 717 | comptime f: anytype, 718 | cond: bool, 719 | fin: bool, 720 | ) void_or_promise_ptr(in.copy_ret(f)) { 721 | const id = .{ 722 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), 723 | .psource = "task" ++ @typeName(@TypeOf(f)), 724 | }; 725 | var norm = in.normalize_args(args); 726 | 727 | const private_copy = in.make_another(norm.private); 728 | const firstprivate_copy = in.shallow_copy(norm.firstprivate); 729 | const private_args = private_copy ++ firstprivate_copy; 730 | 731 | // in.check_args(@TypeOf(private_args)); 732 | in.check_fn_signature(f); 733 | 734 | const t_type = kmp.task_t( 735 | @TypeOf(norm.shared), 736 | @TypeOf(private_args), 737 | in.copy_ret(f), 738 | ); 739 | 740 | const flags = kmp.tasking_flags{ 741 | .tiedness = @intFromBool(!opts.untied), 742 | .final = @intFromBool(fin), 743 | }; 744 | 745 | const real_task = t_type.alloc( 746 | f, 747 | &id, 748 | kmp.ctx.global_tid, 749 | flags, 750 | ); 751 | real_task.set_data(&norm.shared, private_args); 752 | 753 | // TODO: do something better with this error... 754 | var pro: void_or_promise_ptr(in.copy_ret(f)) = if (in.copy_ret(f) == void) undefined else promise(in.copy_ret(f)).init() catch @panic("Buy more RAM lol"); 755 | if (@TypeOf(pro) == *kmp.promise(in.copy_ret(f))) { 756 | real_task.make_promise(pro); 757 | } 758 | 759 | if (comptime opts.iff) { 760 | if (!cond) { 761 | real_task.begin_if0(&id, kmp.ctx.global_tid); 762 | 763 | if (@typeInfo(in.copy_ret(f)) == .ErrorUnion) { 764 | _ = @call(.always_inline, f, norm.shared ++ private_args) catch |err| err; 765 | } else { 766 | _ = @call(.always_inline, f, norm.shared ++ private_args); 767 | } 768 | 769 | real_task.complete_if0(&id, kmp.ctx.global_tid); 770 | } 771 | 772 | if (@TypeOf(pro) == *promise(in.copy_ret(f))) { 773 | pro.release(); 774 | } 775 | return pro; 776 | } 777 | 778 | _ = real_task.task(&id, kmp.ctx.global_tid); 779 | return pro; 780 | } 781 | 782 | pub inline fn run( 783 | args: anytype, 784 | comptime f: anytype, 785 | ) void_or_promise_ptr(in.copy_ret(f)) { 786 | return run_impl(args, f, false, false); 787 | } 788 | 789 | pub inline fn run_if( 790 | cond: bool, 791 | args: anytype, 792 | comptime f: anytype, 793 | ) void_or_promise_ptr(in.copy_ret(f)) { 794 | return run_impl(args, f, cond, false); 795 | } 796 | 797 | pub inline fn run_final( 798 | final: bool, 799 | args: anytype, 800 | comptime f: anytype, 801 | ) void_or_promise_ptr(in.copy_ret(f)) { 802 | return run_impl(args, f, false, final); 803 | } 804 | 805 | pub inline fn run_if_final( 806 | cond: bool, 807 | final: bool, 808 | args: anytype, 809 | comptime f: anytype, 810 | ) void_or_promise_ptr(in.copy_ret(f)) { 811 | return run_impl(args, f, cond, final); 812 | } 813 | }; 814 | 815 | return struct { 816 | // TODO: Find a way to format it better 817 | pub const run = if (opts.iff and opts.final) api.run_if_final else if (opts.iff and !opts.final) api.run_if else if (!opts.iff and opts.final) api.run_final else api.run; 818 | }; 819 | } 820 | 821 | pub inline fn taskyeild() void { 822 | const id = .{ 823 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), 824 | .psource = "taskyeild", 825 | }; 826 | kmp.taskyield(&id, kmp.ctx.global_tid); 827 | } 828 | 829 | pub inline fn taskwait() void { 830 | const id = .{ 831 | .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), 832 | .psource = "taskwait", 833 | }; 834 | kmp.taskwait(&id, kmp.ctx.global_tid); 835 | } 836 | 837 | // ////////////////////////////////////////////////////////////////////////////////// 838 | // / Runtime API //////////////////////////////////////////////////////////////////// 839 | // ////////////////////////////////////////////////////////////////////////////////// 840 | 841 | // Setters 842 | pub inline fn set_num_threads(num_threads: u32) void { 843 | c.omp_set_num_threads(@intCast(num_threads)); 844 | } 845 | 846 | pub inline fn set_dynamic(dynamic_threads: bool) void { 847 | c.omp_set_dynamic(@intFromBool(dynamic_threads)); 848 | } 849 | 850 | pub inline fn set_nested(nested: bool) void { 851 | c.omp_set_nested(@intFromBool(nested)); 852 | } 853 | 854 | pub inline fn set_max_active_levels(max_levels: u32) void { 855 | c.omp_set_max_active_levels(@intCast(max_levels)); 856 | } 857 | 858 | extern "c" fn omp_set_schedule(kind: schedule, chunk_size: c_int) void; 859 | pub inline fn set_schedule(kind: schedule, chunk_size: u32) void { 860 | c.omp_set_schedule(kind, chunk_size); 861 | } 862 | 863 | // Getters 864 | pub inline fn get_num_threads() u32 { 865 | return @intCast(c.omp_get_num_threads()); 866 | } 867 | 868 | pub inline fn get_dynamic() bool { 869 | return c.omp_get_dynamic(); 870 | } 871 | 872 | pub inline fn get_nested() bool { 873 | return c.omp_get_nested(); 874 | } 875 | 876 | pub inline fn get_max_threads() u32 { 877 | return @intCast(c.omp_get_max_threads()); 878 | } 879 | 880 | pub inline fn get_thread_num() u32 { 881 | return @intCast(c.omp_get_thread_num()); 882 | } 883 | 884 | pub inline fn get_num_procs() u32 { 885 | return @intCast(c.omp_get_num_procs()); 886 | } 887 | 888 | pub inline fn in_parallel() bool { 889 | return c.omp_in_parallel(); 890 | } 891 | 892 | pub inline fn in_final() bool { 893 | return c.omp_in_final(); 894 | } 895 | 896 | pub inline fn get_active_level() u32 { 897 | return @intCast(c.omp_get_active_level()); 898 | } 899 | 900 | pub inline fn get_level() u32 { 901 | return @intCast(c.omp_get_level()); 902 | } 903 | 904 | pub inline fn get_ancestor_thread_num(level: u32) u32 { 905 | return @intCast(c.omp_get_ancestor_thread_num(@intCast(level))); 906 | } 907 | 908 | pub inline fn get_team_size(level: u32) u32 { 909 | return @intCast(c.omp_get_team_size(@intCast(level))); 910 | } 911 | 912 | pub inline fn get_thread_limit() u32 { 913 | return @intCast(c.omp_get_thread_limit()); 914 | } 915 | 916 | pub inline fn get_max_active_levels() u32 { 917 | return @intCast(c.omp_get_max_active_levels()); 918 | } 919 | pub inline fn get_schedule(kind: *schedule, chunk_size: *u32) void { 920 | c.omp_get_schedule(kind, @intCast(chunk_size)); 921 | } 922 | 923 | pub inline fn get_max_task_priority() u32 { 924 | return @intCast(c.omp_get_max_task_priority()); 925 | } 926 | 927 | // Locks 928 | // OpenMP 5.0 Synchronization hints 929 | pub const sync_hint_t = enum(c_int) { 930 | none = 0, 931 | uncontended = 1, 932 | contended = 1 << 1, 933 | nonspeculative = 1 << 2, 934 | speculative = 1 << 3, 935 | hle = 1 << 16, 936 | rtm = 1 << 17, 937 | adaptive = 1 << 18, 938 | }; 939 | 940 | /// lock hint type for dynamic user lock 941 | pub const lock_hint_t = sync_hint_t; 942 | const lock_t = extern struct { 943 | _lk: *anyopaque, 944 | }; 945 | 946 | pub const lock = struct { 947 | const Self = @This(); 948 | _lk: lock_t, 949 | 950 | pub inline fn init(this: *Self) void { 951 | c.omp_init_lock(this._lk); 952 | } 953 | 954 | pub inline fn set(this: *Self) void { 955 | c.omp_set_lock(this._lk); 956 | } 957 | 958 | pub inline fn unset(this: *Self) void { 959 | c.omp_unset_lock(this._lk); 960 | } 961 | 962 | pub inline fn destroy(this: *Self) void { 963 | c.omp_destroy_lock(this._lk); 964 | } 965 | 966 | pub inline fn test_(this: *Self) bool { 967 | return c.omp_test_lock(this._lk) != 0; 968 | } 969 | }; 970 | 971 | const nest_lock_t = extern struct { 972 | _lk: *anyopaque, 973 | }; 974 | 975 | pub const nest_lock = struct { 976 | const Self = @This(); 977 | _lk: nest_lock_t, 978 | 979 | pub inline fn init(this: *Self) void { 980 | c.omp_init_nest_lock(this._lk); 981 | } 982 | 983 | pub inline fn set(this: *Self) void { 984 | c.omp_set_nest_lock(this._lk); 985 | } 986 | 987 | pub inline fn unset(this: *Self) void { 988 | c.omp_unset_nest_lock(this._lk); 989 | } 990 | 991 | pub inline fn destroy(this: *Self) void { 992 | c.omp_destroy_nest_lock(this._lk); 993 | } 994 | 995 | pub inline fn test_(this: *Self) bool { 996 | return c.omp_test_nest_lock(this._lk) != 0; 997 | } 998 | }; 999 | 1000 | /// time API functions 1001 | pub inline fn get_wtime() f64 { 1002 | return c.omp_get_wtime(); 1003 | } 1004 | 1005 | pub inline fn get_wtick() f64 { 1006 | return c.omp_get_wtick(); 1007 | } 1008 | 1009 | /// OpenMP 4.0 1010 | pub inline fn get_default_device() u32 { 1011 | return @intCast(c.omp_get_default_device()); 1012 | } 1013 | 1014 | pub inline fn set_default_device(device: u32) void { 1015 | c.omp_set_default_device(@intCast(device)); 1016 | } 1017 | 1018 | pub inline fn is_initial_device() bool { 1019 | return c.omp_is_initial_device(); 1020 | } 1021 | 1022 | pub inline fn get_num_devices() u32 { 1023 | return @intCast(c.omp_get_num_devices()); 1024 | } 1025 | 1026 | pub inline fn get_num_teams() u32 { 1027 | return @intCast(c.omp_get_num_teams()); 1028 | } 1029 | 1030 | pub inline fn get_team_num() u32 { 1031 | return @intCast(c.omp_get_team_num()); 1032 | } 1033 | 1034 | pub inline fn get_cancellation() bool { 1035 | return c.omp_get_cancellation(); 1036 | } 1037 | 1038 | // /* OpenMP 4.5 */ 1039 | pub inline fn get_initial_device() u32 { 1040 | return @intCast(c.omp_get_initial_device()); 1041 | } 1042 | 1043 | inline fn target_alloc(size: usize, device_num: u32) *u8 { 1044 | return c.omp_target_alloc(size, @intCast(device_num)); 1045 | } 1046 | 1047 | inline fn target_free(ptr: *anyopaque, device_num: u32) void { 1048 | c.omp_target_free(ptr, @intCast(device_num)); 1049 | } 1050 | 1051 | inline fn target_is_present(ptr: *anyopaque, device_num: u32) bool { 1052 | return c.omp_target_is_present(ptr, @intCast(device_num)) != 0; 1053 | } 1054 | 1055 | inline fn target_memcpy(dst: *u8, src: *const u8, length: usize, dst_offset: usize, src_offset: usize, device_num: u32) void { 1056 | c.omp_target_memcpy(dst, src, length, dst_offset, src_offset, @intCast(device_num)); 1057 | } 1058 | 1059 | inline fn target_memcpy_rect( 1060 | dst: *u8, 1061 | src: *const u8, 1062 | element_size: usize, 1063 | num_dims: c_int, 1064 | volume: *usize, 1065 | dst_offsets: *usize, 1066 | src_offsets: *usize, 1067 | dst_dimensions: *usize, 1068 | src_dimensions: *usize, 1069 | dst_device_num: u32, 1070 | src_device_num: u32, 1071 | ) void { 1072 | c.omp_target_memcpy_rect( 1073 | dst, 1074 | src, 1075 | element_size, 1076 | num_dims, 1077 | volume, 1078 | dst_offsets, 1079 | src_offsets, 1080 | dst_dimensions, 1081 | src_dimensions, 1082 | @intCast(dst_device_num), 1083 | @intCast(src_device_num), 1084 | ); 1085 | } 1086 | 1087 | inline fn target_associate_ptr(host_ptr: *const anyopaque, device_ptr: *const anyopaque, size: usize, device_num: u32) void { 1088 | c.omp_target_associate_ptr(host_ptr, device_ptr, size, @intCast(device_num)); 1089 | } 1090 | 1091 | inline fn target_disassociate_ptr(ptr: *const anyopaque, device_num: u32) void { 1092 | c.omp_target_disassociate_ptr(ptr, @intCast(device_num)); 1093 | } 1094 | 1095 | // OpenMP 5.0 1096 | pub inline fn get_device_num() u32 { 1097 | return @intCast(c.omp_get_device_num()); 1098 | } 1099 | 1100 | // typedef void * omp_depend_t; 1101 | pub const depend_t = *anyopaque; 1102 | 1103 | // OpenMP 5.1 interop 1104 | // TODO: Maybe `usize` is better here, but intptr_t is supposed to be an int 1105 | pub const intptr_t = isize; 1106 | // 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined properties 1107 | pub const interop_property_t = enum(c_int) { 1108 | fr_id = -1, 1109 | fr_name = -2, 1110 | vendor = -3, 1111 | vendor_name = -4, 1112 | device_num = -5, 1113 | platform = -6, 1114 | device = -7, 1115 | device_context = -8, 1116 | targetsync = -9, 1117 | first = -9, 1118 | }; 1119 | 1120 | pub const interop_rc_t = enum(c_int) { 1121 | no_value = 1, 1122 | success = 0, 1123 | empty = -1, 1124 | out_of_range = -2, 1125 | type_int = -3, 1126 | type_ptr = -4, 1127 | type_str = -5, 1128 | other = -6, 1129 | }; 1130 | 1131 | pub const interop_fr = enum(c_int) { 1132 | cuda = 1, 1133 | cuda_driver = 2, 1134 | opencl = 3, 1135 | sycl = 4, 1136 | hip = 5, 1137 | level_zero = 6, 1138 | last = 7, 1139 | }; 1140 | 1141 | pub const interop = *opaque { 1142 | const Self = @This(); 1143 | 1144 | /// None is defined as '&0' in the C API 1145 | inline fn init() Self { 1146 | return @bitCast(0); 1147 | } 1148 | 1149 | /// 1150 | /// The `omp_get_num_interop_properties` routine retrieves the number of implementation-defined properties available for an `omp_interop_t` object 1151 | /// 1152 | inline fn get_num_interop_properties(this: Self) interop_property_t { 1153 | return @enumFromInt(c.omp_get_num_interop_properties(this)); 1154 | } 1155 | 1156 | /// 1157 | /// The `omp_get_interop_int` routine retrieves an integer property from an `omp_interop_t` object. 1158 | /// 1159 | inline fn get_int(this: Self, property: interop_property_t, ret_code: *c_int) intptr_t { 1160 | return c.omp_get_interop_int(this, property, ret_code); 1161 | } 1162 | 1163 | /// 1164 | /// The `omp_get_interop_ptr` routine retrieves a pointer property from an `omp_interop_t` object. 1165 | /// 1166 | inline fn get_interop_ptr(this: Self, property: interop_property_t, ret_code: *c_int) *anyopaque { 1167 | return c.omp_get_interop_ptr(this, property, ret_code); 1168 | } 1169 | 1170 | /// 1171 | /// The `omp_get_interop_str` routine retrieves a string property from an `omp_interop_t` object. 1172 | /// 1173 | inline fn get_str(this: Self, property: interop_property_t, ret_code: *c_int) [:0]const u8 { 1174 | return c.omp_get_interop_str(this, property, ret_code); 1175 | } 1176 | 1177 | /// 1178 | /// The `omp_get_interop_name` routine retrieves a property name from an `omp_interop_t` object. 1179 | /// 1180 | inline fn get_name(this: Self, property: interop_property_t) [:0]const u8 { 1181 | return c.omp_get_interop_name(this, property); 1182 | } 1183 | 1184 | /// 1185 | /// The `omp_get_interop_type_desc` routine retrieves a description of the type of a property associated with an `omp_interop_t` object. 1186 | /// 1187 | inline fn get_type_desc(this: Self, property: interop_property_t) [:0]const u8 { 1188 | return c.omp_get_interop_type_desc(this, property); 1189 | } 1190 | 1191 | /// 1192 | /// The `omp_get_interop_rc_desc` routine retrieves a description of the return code associated with an `omp_interop_t` object. 1193 | /// 1194 | inline fn get_rc_desc(this: Self, ret_code: interop_rc_t) [:0]const u8 { 1195 | return c.omp_get_interop_rc_desc(this, ret_code); 1196 | } 1197 | }; 1198 | 1199 | /// OpenMP 5.1 device memory routines 1200 | /// 1201 | /// The `omp_target_memcpy_async` routine asynchronously performs a copy between any combination of host and device pointers. 1202 | /// 1203 | inline fn target_memcpy_async( 1204 | dst: *u8, 1205 | src: *const u8, 1206 | length: usize, 1207 | dst_offset: usize, 1208 | src_offset: usize, 1209 | device_num: c_int, 1210 | dep: *depend_t, 1211 | ) c_int { 1212 | return c.omp_target_memcpy_async(dst, src, length, dst_offset, src_offset, device_num, dep); 1213 | } 1214 | 1215 | /// 1216 | /// The `omp_target_memcpy_rect_async` routine asynchronously performs a copy between any combination of host and device pointers. 1217 | /// 1218 | inline fn target_memcpy_rect_async( 1219 | dst: *u8, 1220 | src: *const u8, 1221 | element_size: usize, 1222 | num_dims: c_int, 1223 | volume: *usize, 1224 | dst_offsets: *usize, 1225 | src_offsets: *usize, 1226 | dst_dimensions: *usize, 1227 | src_dimensions: *usize, 1228 | dst_device_num: c_int, 1229 | src_device_num: c_int, 1230 | dep: *depend_t, 1231 | ) c_int { 1232 | return c.omp_target_memcpy_rect_async( 1233 | dst, 1234 | src, 1235 | element_size, 1236 | num_dims, 1237 | volume, 1238 | dst_offsets, 1239 | src_offsets, 1240 | dst_dimensions, 1241 | src_dimensions, 1242 | dst_device_num, 1243 | src_device_num, 1244 | dep, 1245 | ); 1246 | } 1247 | 1248 | // OpenMP 6.0 device memory routines 1249 | pub inline fn target_memsset(ptr: *u8, value: c_int, size: usize, device_num: c_int) *u8 { 1250 | return c.omp_target_memset(ptr, value, size, device_num); 1251 | } 1252 | pub inline fn target_memsset_async(ptr: *u8, value: c_int, size: usize, device_num: c_int, dep: *depend_t) *u8 { 1253 | return c.omp_target_memset_async(ptr, value, size, device_num, dep); 1254 | } 1255 | /// 1256 | /// The `omp_get_mapped_ptr` routine returns the device pointer that is associated with a host pointer for a given device. 1257 | /// 1258 | inline fn get_mapped_ptr(ptr: *const anyopaque, device_num: c_int) *anyopaque { 1259 | return c.omp_get_mapped_ptr(ptr, device_num); 1260 | } 1261 | /// 1262 | /// The `omp_target_associate_ptr` routine associates a host pointer with a device pointer. 1263 | inline fn target_is_accessible(ptr: *const anyopaque, size: usize, device_num: c_int) c_int { 1264 | return c.omp_target_is_accessible(ptr, size, device_num); 1265 | } 1266 | 1267 | // / kmp API functions 1268 | // extern "c" inline fn kmp_get_stacksize (void)int ; 1269 | // extern "c" inline fn kmp_set_stacksize (int)void ; 1270 | // extern "c" inline fn kmp_get_stacksize_s (void)size_t ; 1271 | // extern "c" inline fn kmp_set_stacksize_s (size_t)void ; 1272 | // extern "c" inline fn kmp_get_blocktime (void)int ; 1273 | // extern "c" inline fn kmp_get_library (void)int ; 1274 | // extern "c" inline fn kmp_set_blocktime (int)void ; 1275 | // extern "c" inline fn kmp_set_library (int)void ; 1276 | // extern "c" inline fn kmp_set_library_serial (void)void ; 1277 | // extern "c" inline fn kmp_set_library_turnaround (void)void ; 1278 | // extern "c" inline fn kmp_set_library_throughput (void)void ; 1279 | // extern "c" inline fn kmp_set_defaults (char const *)void ; 1280 | // extern "c" inline fn kmp_set_disp_num_buffers (int)void ; 1281 | // // 1282 | // // /* Intel affinity API */ 1283 | // // typedef void * kmp_affinity_mask_t; 1284 | // // 1285 | // // extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); 1286 | // // extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); 1287 | // // extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); 1288 | // // extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); 1289 | // // extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); 1290 | // // extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); 1291 | // // extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); 1292 | // // extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); 1293 | // // 1294 | // // /* OpenMP 4.0 affinity API */ 1295 | // // typedef enum omp_proc_bind_t { 1296 | // // omp_proc_bind_false = 0, 1297 | // // omp_proc_bind_true = 1, 1298 | // // omp_proc_bind_master = 2, 1299 | // // omp_proc_bind_close = 3, 1300 | // // omp_proc_bind_spread = 4 1301 | // // } omp_proc_bind_t; 1302 | // // 1303 | // // extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); 1304 | // // 1305 | // // /* OpenMP 4.5 affinity API */ 1306 | // // extern int __KAI_KMPC_CONVENTION omp_get_num_places (void); 1307 | // // extern int __KAI_KMPC_CONVENTION omp_get_place_num_procs (int); 1308 | // // extern void __KAI_KMPC_CONVENTION omp_get_place_proc_ids (int, int *); 1309 | // // extern int __KAI_KMPC_CONVENTION omp_get_place_num (void); 1310 | // // extern int __KAI_KMPC_CONVENTION omp_get_partition_num_places (void); 1311 | // // extern void __KAI_KMPC_CONVENTION omp_get_partition_place_nums (int *); 1312 | // // 1313 | // // extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); 1314 | // // extern void * __KAI_KMPC_CONVENTION kmp_aligned_malloc (size_t, size_t); 1315 | // // extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); 1316 | // // extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); 1317 | // // extern void __KAI_KMPC_CONVENTION kmp_free (void *); 1318 | // // 1319 | // // extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); 1320 | // // extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); 1321 | // // 1322 | // // /* OpenMP 5.0 Tool Control */ 1323 | // // typedef enum omp_control_tool_result_t { 1324 | // // omp_control_tool_notool = -2, 1325 | // // omp_control_tool_nocallback = -1, 1326 | // // omp_control_tool_success = 0, 1327 | // // omp_control_tool_ignored = 1 1328 | // // } omp_control_tool_result_t; 1329 | // // 1330 | // // typedef enum omp_control_tool_t { 1331 | // // omp_control_tool_start = 1, 1332 | // // omp_control_tool_pause = 2, 1333 | // // omp_control_tool_flush = 3, 1334 | // // omp_control_tool_end = 4 1335 | // // } omp_control_tool_t; 1336 | // // 1337 | // // extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*); 1338 | // // 1339 | // // /* OpenMP 5.0 Memory Management */ 1340 | // // typedef uintptr_t omp_uintptr_t; 1341 | // // 1342 | // // typedef enum { 1343 | // // omp_atk_sync_hint = 1, 1344 | // // omp_atk_alignment = 2, 1345 | // // omp_atk_access = 3, 1346 | // // omp_atk_pool_size = 4, 1347 | // // omp_atk_fallback = 5, 1348 | // // omp_atk_fb_data = 6, 1349 | // // omp_atk_pinned = 7, 1350 | // // omp_atk_partition = 8 1351 | // // } omp_alloctrait_key_t; 1352 | // // 1353 | // // typedef enum { 1354 | // // omp_atv_false = 0, 1355 | // // omp_atv_true = 1, 1356 | // // omp_atv_contended = 3, 1357 | // // omp_atv_uncontended = 4, 1358 | // // omp_atv_serialized = 5, 1359 | // // omp_atv_sequential = omp_atv_serialized, // (deprecated) 1360 | // // omp_atv_private = 6, 1361 | // // omp_atv_all = 7, 1362 | // // omp_atv_thread = 8, 1363 | // // omp_atv_pteam = 9, 1364 | // // omp_atv_cgroup = 10, 1365 | // // omp_atv_default_mem_fb = 11, 1366 | // // omp_atv_null_fb = 12, 1367 | // // omp_atv_abort_fb = 13, 1368 | // // omp_atv_allocator_fb = 14, 1369 | // // omp_atv_environment = 15, 1370 | // // omp_atv_nearest = 16, 1371 | // // omp_atv_blocked = 17, 1372 | // // omp_atv_interleaved = 18 1373 | // // } omp_alloctrait_value_t; 1374 | // // #define omp_atv_default ((omp_uintptr_t)-1) 1375 | // // 1376 | // // typedef struct { 1377 | // // omp_alloctrait_key_t key; 1378 | // // omp_uintptr_t value; 1379 | // // } omp_alloctrait_t; 1380 | // // 1381 | // // # if defined(_WIN32) 1382 | // // // On Windows cl and icl do not support 64-bit enum, let's use integer then. 1383 | // // typedef omp_uintptr_t omp_allocator_handle_t; 1384 | // // extern __KMP_IMP omp_allocator_handle_t const omp_null_allocator; 1385 | // // extern __KMP_IMP omp_allocator_handle_t const omp_default_mem_alloc; 1386 | // // extern __KMP_IMP omp_allocator_handle_t const omp_large_cap_mem_alloc; 1387 | // // extern __KMP_IMP omp_allocator_handle_t const omp_const_mem_alloc; 1388 | // // extern __KMP_IMP omp_allocator_handle_t const omp_high_bw_mem_alloc; 1389 | // // extern __KMP_IMP omp_allocator_handle_t const omp_low_lat_mem_alloc; 1390 | // // extern __KMP_IMP omp_allocator_handle_t const omp_cgroup_mem_alloc; 1391 | // // extern __KMP_IMP omp_allocator_handle_t const omp_pteam_mem_alloc; 1392 | // // extern __KMP_IMP omp_allocator_handle_t const omp_thread_mem_alloc; 1393 | // // extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_host_mem_alloc; 1394 | // // extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc; 1395 | // // extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_device_mem_alloc; 1396 | // // 1397 | // // typedef omp_uintptr_t omp_memspace_handle_t; 1398 | // // extern __KMP_IMP omp_memspace_handle_t const omp_default_mem_space; 1399 | // // extern __KMP_IMP omp_memspace_handle_t const omp_large_cap_mem_space; 1400 | // // extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space; 1401 | // // extern __KMP_IMP omp_memspace_handle_t const omp_high_bw_mem_space; 1402 | // // extern __KMP_IMP omp_memspace_handle_t const omp_low_lat_mem_space; 1403 | // // extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_host_mem_space; 1404 | // // extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_shared_mem_space; 1405 | // // extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_device_mem_space; 1406 | // // # else 1407 | // // # if __cplusplus >= 201103 1408 | // // typedef enum omp_allocator_handle_t : omp_uintptr_t 1409 | // // # else 1410 | // // typedef enum omp_allocator_handle_t 1411 | // // # endif 1412 | // // { 1413 | // // omp_null_allocator = 0, 1414 | // // omp_default_mem_alloc = 1, 1415 | // // omp_large_cap_mem_alloc = 2, 1416 | // // omp_const_mem_alloc = 3, 1417 | // // omp_high_bw_mem_alloc = 4, 1418 | // // omp_low_lat_mem_alloc = 5, 1419 | // // omp_cgroup_mem_alloc = 6, 1420 | // // omp_pteam_mem_alloc = 7, 1421 | // // omp_thread_mem_alloc = 8, 1422 | // // llvm_omp_target_host_mem_alloc = 100, 1423 | // // llvm_omp_target_shared_mem_alloc = 101, 1424 | // // llvm_omp_target_device_mem_alloc = 102, 1425 | // // KMP_ALLOCATOR_MAX_HANDLE = UINTPTR_MAX 1426 | // // } omp_allocator_handle_t; 1427 | // // # if __cplusplus >= 201103 1428 | // // typedef enum omp_memspace_handle_t : omp_uintptr_t 1429 | // // # else 1430 | // // typedef enum omp_memspace_handle_t 1431 | // // # endif 1432 | // // { 1433 | // // omp_default_mem_space = 0, 1434 | // // omp_large_cap_mem_space = 1, 1435 | // // omp_const_mem_space = 2, 1436 | // // omp_high_bw_mem_space = 3, 1437 | // // omp_low_lat_mem_space = 4, 1438 | // // llvm_omp_target_host_mem_space = 100, 1439 | // // llvm_omp_target_shared_mem_space = 101, 1440 | // // llvm_omp_target_device_mem_space = 102, 1441 | // // KMP_MEMSPACE_MAX_HANDLE = UINTPTR_MAX 1442 | // // } omp_memspace_handle_t; 1443 | // // # endif 1444 | // // extern omp_allocator_handle_t __KAI_KMPC_CONVENTION omp_init_allocator(omp_memspace_handle_t m, 1445 | // // int ntraits, omp_alloctrait_t traits[]); 1446 | // // extern void __KAI_KMPC_CONVENTION omp_destroy_allocator(omp_allocator_handle_t allocator); 1447 | // // 1448 | // // extern void __KAI_KMPC_CONVENTION omp_set_default_allocator(omp_allocator_handle_t a); 1449 | // // extern omp_allocator_handle_t __KAI_KMPC_CONVENTION omp_get_default_allocator(void); 1450 | // // # ifdef __cplusplus 1451 | // // extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, omp_allocator_handle_t a = omp_null_allocator); 1452 | // // extern void *__KAI_KMPC_CONVENTION omp_aligned_alloc(size_t align, size_t size, 1453 | // // omp_allocator_handle_t a = omp_null_allocator); 1454 | // // extern void *__KAI_KMPC_CONVENTION omp_calloc(size_t nmemb, size_t size, 1455 | // // omp_allocator_handle_t a = omp_null_allocator); 1456 | // // extern void *__KAI_KMPC_CONVENTION omp_aligned_calloc(size_t align, size_t nmemb, size_t size, 1457 | // // omp_allocator_handle_t a = omp_null_allocator); 1458 | // // extern void *__KAI_KMPC_CONVENTION omp_realloc(void *ptr, size_t size, 1459 | // // omp_allocator_handle_t allocator = omp_null_allocator, 1460 | // // omp_allocator_handle_t free_allocator = omp_null_allocator); 1461 | // // extern void __KAI_KMPC_CONVENTION omp_free(void * ptr, omp_allocator_handle_t a = omp_null_allocator); 1462 | // // # else 1463 | // // extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, omp_allocator_handle_t a); 1464 | // // extern void *__KAI_KMPC_CONVENTION omp_aligned_alloc(size_t align, size_t size, 1465 | // // omp_allocator_handle_t a); 1466 | // // extern void *__KAI_KMPC_CONVENTION omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t a); 1467 | // // extern void *__KAI_KMPC_CONVENTION omp_aligned_calloc(size_t align, size_t nmemb, size_t size, 1468 | // // omp_allocator_handle_t a); 1469 | // // extern void *__KAI_KMPC_CONVENTION omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 1470 | // // omp_allocator_handle_t free_allocator); 1471 | // // extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, omp_allocator_handle_t a); 1472 | // // # endif 1473 | // // 1474 | // // /* OpenMP 5.0 Affinity Format */ 1475 | // // extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *); 1476 | // // extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t); 1477 | // // extern void __KAI_KMPC_CONVENTION omp_display_affinity(char const *); 1478 | // // extern size_t __KAI_KMPC_CONVENTION omp_capture_affinity(char *, size_t, char const *); 1479 | // // 1480 | // // /* OpenMP 5.0 events */ 1481 | // // # if defined(_WIN32) 1482 | // // // On Windows cl and icl do not support 64-bit enum, let's use integer then. 1483 | // // typedef omp_uintptr_t omp_event_handle_t; 1484 | // // # else 1485 | // // typedef enum omp_event_handle_t { KMP_EVENT_MAX_HANDLE = UINTPTR_MAX } omp_event_handle_t; 1486 | // // # endif 1487 | // // extern void __KAI_KMPC_CONVENTION omp_fulfill_event ( omp_event_handle_t event ); 1488 | // // 1489 | // // /* OpenMP 5.0 Pause Resources */ 1490 | // // typedef enum omp_pause_resource_t { 1491 | // // omp_pause_resume = 0, 1492 | // // omp_pause_soft = 1, 1493 | // // omp_pause_hard = 2 1494 | // // } omp_pause_resource_t; 1495 | // // extern int __KAI_KMPC_CONVENTION omp_pause_resource(omp_pause_resource_t, int); 1496 | // // extern int __KAI_KMPC_CONVENTION omp_pause_resource_all(omp_pause_resource_t); 1497 | // // 1498 | // // extern int __KAI_KMPC_CONVENTION omp_get_supported_active_levels(void); 1499 | // // 1500 | // // /* OpenMP 5.1 */ 1501 | // // extern void __KAI_KMPC_CONVENTION omp_set_num_teams(int num_teams); 1502 | // // extern int __KAI_KMPC_CONVENTION omp_get_max_teams(void); 1503 | // // extern void __KAI_KMPC_CONVENTION omp_set_teams_thread_limit(int limit); 1504 | // // extern int __KAI_KMPC_CONVENTION omp_get_teams_thread_limit(void); 1505 | // // 1506 | // // /* OpenMP 5.1 Display Environment */ 1507 | // // extern void omp_display_env(int verbose); 1508 | // // 1509 | // // # if defined(_OPENMP) && _OPENMP >= 201811 1510 | // // #pragma omp begin declare variant match(device={kind(host)}) 1511 | // // static inline int omp_is_initial_device(void) { return 1; } 1512 | // // #pragma omp end declare variant 1513 | // // #pragma omp begin declare variant match(device={kind(nohost)}) 1514 | // // static inline int omp_is_initial_device(void) { return 0; } 1515 | // // #pragma omp end declare variant 1516 | // // # endif 1517 | // // 1518 | // // /* OpenMP 5.2 */ 1519 | // // extern int __KAI_KMPC_CONVENTION omp_in_explicit_task(void); 1520 | // // 1521 | // // /* LLVM Extensions */ 1522 | // // extern void *llvm_omp_target_dynamic_shared_alloc(void); 1523 | // // 1524 | // // # undef __KAI_KMPC_CONVENTION 1525 | // // # undef __KMP_IMP 1526 | // // 1527 | // // /* Warning: 1528 | // // The following typedefs are not standard, deprecated and will be removed in a future release. 1529 | // // */ 1530 | // // typedef int omp_int_t; 1531 | // // typedef double omp_wtime_t; 1532 | // // 1533 | // // # ifdef __cplusplus 1534 | // // } 1535 | // // # endif 1536 | // // 1537 | // // #endif /* __OMP_H */ 1538 | --------------------------------------------------------------------------------