├── .gitignore
├── tests
    ├── params.zig
    ├── main.zig
    ├── barrier.zig
    ├── flush.zig
    ├── masked.zig
    ├── reduction.zig
    ├── critical.zig
    ├── return.zig
    ├── errors.zig
    ├── sections.zig
    ├── parallel.zig
    └── task.zig
├── src
    ├── ompt.zig
    ├── workshare_env.zig
    ├── input_handler.zig
    ├── reduce.zig
    ├── kmp.zig
    └── omp.zig
├── flake.nix
├── LICENSE
├── flake.lock
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
 1 | /zig-*/
 2 | .direnv/
 3 | .envrc
 4 | flake.lock
 5 | 
 6 | /omp.rep/
 7 | omp.gpr
 8 | omp.loc*
 9 | 
10 | /llvm-project/
11 | 


--------------------------------------------------------------------------------
/tests/params.zig:
--------------------------------------------------------------------------------
1 | pub const loop_count: usize = 1000;
2 | pub const repetitions: usize = 10;
3 | 
4 | pub const sleep_time: usize = 500_000_000;
5 | 
6 | pub const num_tasks: usize = 15;
7 | pub const max_tasks_per_thread: usize = 5;
8 | 


--------------------------------------------------------------------------------
/tests/main.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | 
 3 | pub const errors = @import("errors.zig");
 4 | pub const @"return" = @import("return.zig");
 5 | pub const barrier = @import("barrier.zig");
 6 | pub const flush = @import("flush.zig");
 7 | pub const masked = @import("masked.zig");
 8 | pub const task = @import("task.zig");
 9 | pub const critical = @import("critical.zig");
10 | pub const reduction = @import("reduction.zig");
11 | pub const sections = @import("sections.zig");
12 | pub const parallel = @import("parallel.zig");
13 | 
14 | test "all" {
15 |     std.testing.refAllDecls(@This());
16 | }
17 | 


--------------------------------------------------------------------------------
/src/ompt.zig:
--------------------------------------------------------------------------------
 1 | const opts = @import("build_options");
 2 | const kmp = @import("kmp.zig");
 3 | 
 4 | pub const data_t = extern union {
 5 |     val: usize,
 6 |     ptr: *anyopaque,
 7 | };
 8 | 
 9 | pub const frame_t = extern struct {
10 |     exit_frame: data_t,
11 |     enter_frame: data_t,
12 |     exit_frame_flags: c_int,
13 |     enter_frame_flags: c_int,
14 | };
15 | 
16 | pub const dispatch_chunk_t = extern struct {
17 |     start: usize,
18 |     iterations: usize,
19 | };
20 | 
21 | pub const task_info_t = extern struct {
22 |     frame: frame_t,
23 |     task_data: data_t,
24 |     scheduling_parent: *kmp.task_data_t,
25 |     thread_num: c_int,
26 |     dispatch_chunk: dispatch_chunk_t,
27 | };
28 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
 1 | {
 2 |   description = "Python shell flake";
 3 | 
 4 |   inputs = {
 5 |     nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable";
 6 |     flake-utils.url = "github:numtide/flake-utils";
 7 |   };
 8 | 
 9 |   outputs = { self, nixpkgs, flake-utils, ... }:
10 |     flake-utils.lib.eachDefaultSystem (system:
11 |       let
12 |         pkgs = nixpkgs.legacyPackages.${system};
13 |         lib = nixpkgs.lib;
14 |       in
15 |       {
16 |         devShells.default = pkgs.mkShell {
17 |           packages = with pkgs;
18 |             [
19 |               zig
20 |               zls
21 | 
22 |               llvmPackages_18.openmp
23 |               llvmPackages_18.clang
24 |               llvmPackages_18.llvm
25 |               perl
26 |               cmake
27 |             ];
28 |         };
29 |       }
30 |     );
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/barrier.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | const omp = @import("omp");
 3 | const params = @import("params.zig");
 4 | 
 5 | fn test_omp_barrier() bool {
 6 |     var result1: u32 = 0;
 7 |     var result2: u32 = 0;
 8 | 
 9 |     omp.parallel(.{})
10 |         .run(.{ .shared = .{ &result1, &result2 } }, struct {
11 |         fn f(f_result1: *u32, f_result2: *u32) void {
12 |             const rank: u32 = omp.get_thread_num();
13 |             if (rank == 1) {
14 |                 std.time.sleep(params.sleep_time);
15 |                 f_result2.* = 3;
16 |             }
17 | 
18 |             omp.barrier();
19 |             if (rank == 2) {
20 |                 f_result1.* = f_result2.*;
21 |             }
22 |         }
23 |     }.f);
24 | 
25 |     return result1 == 3;
26 | }
27 | 
28 | test "barrier" {
29 |     var num_failed: u32 = 0;
30 |     omp.set_dynamic(false);
31 |     omp.set_num_threads(4);
32 |     for (0..params.repetitions) |_| {
33 |         if (!test_omp_barrier()) {
34 |             num_failed += 1;
35 |         }
36 |     }
37 | 
38 |     try std.testing.expect(num_failed == 0);
39 | }
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Luca Bancale
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/flush.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | const omp = @import("omp");
 3 | const params = @import("params.zig");
 4 | 
 5 | pub fn test_omp_flush() bool {
 6 |     var result1: u32 = 0;
 7 |     var result2: u32 = 0;
 8 |     var dummy: u32 = 0;
 9 | 
10 |     omp.parallel(.{})
11 |         .run(.{ .shared = .{ &result1, &result2, &dummy } }, struct {
12 |         fn f(f_result1: *u32, f_result2: *u32, f_dummy: *u32) void {
13 |             const rank: u32 = omp.get_thread_num();
14 |             omp.barrier();
15 | 
16 |             if (rank == 1) {
17 |                 f_result2.* = 3;
18 |                 omp.flush(.{f_result2});
19 |                 f_dummy.* = f_result2.*;
20 |             }
21 | 
22 |             if (rank == 0) {
23 |                 std.time.sleep(params.sleep_time);
24 |                 omp.flush(.{f_result2});
25 |                 f_result1.* = f_result2.*;
26 |             }
27 |         }
28 |     }.f);
29 | 
30 |     if (result1 != 3 or result2 != 3 or dummy != 3) {
31 |         std.debug.print("result1: {}, result2: {}, dummy: {}\n", .{ result1, result2, dummy });
32 |     }
33 | 
34 |     return result1 == 3 and result2 == 3 and dummy == 3;
35 | }
36 | 
37 | test "flush" {
38 |     var num_failed: u32 = 0;
39 |     omp.set_dynamic(false);
40 |     if (omp.get_max_threads() == 1) {
41 |         omp.set_num_threads(2);
42 |     }
43 | 
44 |     for (0..params.repetitions) |_| {
45 |         if (!test_omp_flush()) {
46 |             num_failed += 1;
47 |         }
48 |     }
49 | 
50 |     try std.testing.expect(num_failed == 0);
51 | }
52 | 


--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nodes": {
 3 |     "flake-utils": {
 4 |       "inputs": {
 5 |         "systems": "systems"
 6 |       },
 7 |       "locked": {
 8 |         "lastModified": 1710146030,
 9 |         "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
10 |         "owner": "numtide",
11 |         "repo": "flake-utils",
12 |         "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
13 |         "type": "github"
14 |       },
15 |       "original": {
16 |         "owner": "numtide",
17 |         "repo": "flake-utils",
18 |         "type": "github"
19 |       }
20 |     },
21 |     "nixpkgs": {
22 |       "locked": {
23 |         "lastModified": 1724479785,
24 |         "narHash": "sha256-pP3Azj5d6M5nmG68Fu4JqZmdGt4S4vqI5f8te+E/FTw=",
25 |         "owner": "nixos",
26 |         "repo": "nixpkgs",
27 |         "rev": "d0e1602ddde669d5beb01aec49d71a51937ed7be",
28 |         "type": "github"
29 |       },
30 |       "original": {
31 |         "owner": "nixos",
32 |         "ref": "nixos-unstable",
33 |         "repo": "nixpkgs",
34 |         "type": "github"
35 |       }
36 |     },
37 |     "root": {
38 |       "inputs": {
39 |         "flake-utils": "flake-utils",
40 |         "nixpkgs": "nixpkgs"
41 |       }
42 |     },
43 |     "systems": {
44 |       "locked": {
45 |         "lastModified": 1681028828,
46 |         "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
47 |         "owner": "nix-systems",
48 |         "repo": "default",
49 |         "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
50 |         "type": "github"
51 |       },
52 |       "original": {
53 |         "owner": "nix-systems",
54 |         "repo": "default",
55 |         "type": "github"
56 |       }
57 |     }
58 |   },
59 |   "root": "root",
60 |   "version": 7
61 | }
62 | 


--------------------------------------------------------------------------------
/tests/masked.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | const omp = @import("omp");
 3 | const params = @import("params.zig");
 4 | 
 5 | fn test_omp_masked() bool {
 6 |     var nthreads: u32 = 0;
 7 |     var executing_thread: i32 = -1;
 8 |     var tid_result: u32 = 0;
 9 | 
10 |     omp.parallel(.{})
11 |         .run(.{ .shared = .{ &nthreads, &executing_thread, &tid_result } }, struct {
12 |         fn f(f_nthreads: *u32, f_executing_thread: *i32, f_tid_result: *u32) void {
13 |             omp.masked()
14 |                 .run(.{ f_nthreads, f_executing_thread, f_tid_result }, omp.only_master, struct {
15 |                 fn f(ff_nthreads: *u32, ff_executing_thread: *i32, ff_tid_result: *u32) void {
16 |                     const tid: i32 = @intCast(omp.get_thread_num());
17 | 
18 |                     if (tid != 0) {
19 |                         omp.critical(.{})
20 |                             .run(.{ff_tid_result}, struct {
21 |                             fn f(fff_tid_result: *u32) void {
22 |                                 fff_tid_result.* += 1;
23 |                             }
24 |                         }.f);
25 |                     }
26 | 
27 |                     omp.critical(.{})
28 |                         .run(.{ff_nthreads}, struct {
29 |                         fn f(fff_nthreads: *u32) void {
30 |                             fff_nthreads.* += 1;
31 |                         }
32 |                     }.f);
33 |                     ff_executing_thread.* = @intCast(omp.get_thread_num());
34 |                 }
35 |             }.f);
36 |         }
37 |     }.f);
38 | 
39 |     return (nthreads == 1) and (executing_thread == 0) and (tid_result == 0);
40 | }
41 | 
42 | test "masked" {
43 |     var num_failed: u32 = 0;
44 | 
45 |     for (params.repetitions) |_| {
46 |         if (!test_omp_masked()) {
47 |             num_failed += 1;
48 |         }
49 |     }
50 | 
51 |     try std.testing.expect(num_failed == 0);
52 | }
53 | 


--------------------------------------------------------------------------------
/tests/reduction.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | const omp = @import("omp");
 3 | const params = @import("params.zig");
 4 | 
 5 | fn parallel_reduction_plus() bool {
 6 |     var sum: u32 = 0;
 7 |     const known_sum: u32 = (params.loop_count * (params.loop_count + 1)) / 2;
 8 | 
 9 |     omp.parallel(.{})
10 |         .run(.{ .shared = .{&sum} }, struct {
11 |         fn f(f_sum: *u32) void {
12 |             omp.loop(u32, .{ .reduction = &.{.plus} })
13 |                 .run(.{ .reduction = .{f_sum} }, 1, params.loop_count + 1, 1, struct {
14 |                 fn f(i: u32, ff_sum: *u32) void {
15 |                     ff_sum.* += i;
16 |                 }
17 |             }.f);
18 |         }
19 |     }.f);
20 | 
21 |     if (known_sum != sum) {
22 |         std.debug.print("red KNOWN_SUM = {}\n", .{known_sum});
23 |         std.debug.print("SUM = {}\n", .{sum});
24 |     }
25 | 
26 |     return known_sum == sum;
27 | }
28 | 
29 | test "parallel_reduction_plus" {
30 |     var num_failed: u32 = 0;
31 |     for (0..params.repetitions) |_| {
32 |         if (!parallel_reduction_plus()) {
33 |             num_failed += 1;
34 |         }
35 |     }
36 | 
37 |     try std.testing.expect(num_failed == 0);
38 | }
39 | 
40 | fn parallel_loop_reduction_plus() bool {
41 |     var sum: u32 = 0;
42 |     const known_sum: u32 = (params.loop_count * (params.loop_count + 1)) / 2;
43 | 
44 |     omp.parallel(.{})
45 |         .loop(u32, .{ .reduction = &.{.plus} })
46 |         .run(.{ .reduction = .{&sum} }, 1, params.loop_count + 1, 1, struct {
47 |         fn f(i: u32, f_sum: *u32) void {
48 |             f_sum.* += i;
49 |         }
50 |     }.f);
51 | 
52 |     if (known_sum != sum) {
53 |         std.debug.print("red KNOWN_SUM = {}\n", .{known_sum});
54 |         std.debug.print("SUM = {}\n", .{sum});
55 |     }
56 | 
57 |     return known_sum == sum;
58 | }
59 | 
60 | test "parallel_loop_reduction_plus" {
61 |     var num_failed: u32 = 0;
62 |     omp.set_num_threads(3);
63 |     for (0..params.repetitions) |_| {
64 |         if (!parallel_loop_reduction_plus()) {
65 |             num_failed += 1;
66 |         }
67 |     }
68 | 
69 |     try std.testing.expect(num_failed == 0);
70 | }
71 | 


--------------------------------------------------------------------------------
/tests/critical.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const omp = @import("omp");
  3 | const params = @import("params.zig");
  4 | 
  5 | fn test_omp_critical() bool {
  6 |     var sum: u32 = 0;
  7 |     const known_sum: u32 = 999 * 1000 / 2;
  8 | 
  9 |     omp.parallel(.{})
 10 |         .run(.{ .shared = .{&sum} }, struct {
 11 |         fn f(f_sum: *u32) void {
 12 |             var mysum: u32 = 0;
 13 | 
 14 |             omp.loop(u32, .{})
 15 |                 .run(.{ .shared = .{&mysum} }, 1, params.loop_count, 1, struct {
 16 |                 fn f(i: u32, f_mysum: *u32) void {
 17 |                     f_mysum.* = f_mysum.* + i;
 18 |                 }
 19 |             }.f);
 20 | 
 21 |             omp.critical(.{})
 22 |                 .run(.{ f_sum, &mysum }, struct {
 23 |                 fn f(ff_sum: *u32, f_mysum: *u32) void {
 24 |                     ff_sum.* += f_mysum.*;
 25 |                 }
 26 |             }.f);
 27 |         }
 28 |     }.f);
 29 | 
 30 |     if (sum != known_sum) {
 31 |         std.debug.print("sum: {}, known_sum: {}\n", .{ sum, known_sum });
 32 |     }
 33 | 
 34 |     return known_sum == sum;
 35 | }
 36 | 
 37 | test "critical" {
 38 |     var num_failed: u32 = 0;
 39 | 
 40 |     for (params.repetitions) |_| {
 41 |         if (!test_omp_critical()) {
 42 |             num_failed += 1;
 43 |         }
 44 |     }
 45 | 
 46 |     try std.testing.expect(num_failed == 0);
 47 | }
 48 | 
 49 | fn omp_critical_hint(iter: u32) bool {
 50 |     var sum: u32 = 0;
 51 |     const known_sum: u32 = (999 * 1000) / 2;
 52 | 
 53 |     omp.parallel(.{})
 54 |         .run(.{ .shared = .{ &sum, iter } }, struct {
 55 |         fn f(f_sum: *u32, f_iter: u32) void {
 56 |             var mysum: u32 = 0;
 57 |             omp.loop(u32, .{})
 58 |                 .run(.{ .shared = .{&mysum} }, 0, params.loop_count, 1, struct {
 59 |                 fn f(i: u32, f_mysum: *u32) void {
 60 |                     f_mysum.* = f_mysum.* + i;
 61 |                 }
 62 |             }.f);
 63 | 
 64 |             const fun = struct {
 65 |                 fn f(ff_sum: *u32, f_mysum: *u32) void {
 66 |                     ff_sum.* += f_mysum.*;
 67 |                 }
 68 |             }.f;
 69 | 
 70 |             switch (f_iter % 4) {
 71 |                 0 => {
 72 |                     omp.critical(.{ .name = "a", .sync = .uncontended }).run(.{ f_sum, &mysum }, fun);
 73 |                 },
 74 |                 1 => {
 75 |                     omp.critical(.{ .name = "b", .sync = .contended }).run(.{ f_sum, &mysum }, fun);
 76 |                 },
 77 |                 2 => {
 78 |                     omp.critical(.{ .name = "c", .sync = .nonspeculative }).run(.{ f_sum, &mysum }, fun);
 79 |                 },
 80 |                 3 => {
 81 |                     omp.critical(.{ .name = "d", .sync = .speculative }).run(.{ f_sum, &mysum }, fun);
 82 |                 },
 83 |                 else => {
 84 |                     unreachable;
 85 |                 },
 86 |             }
 87 |         }
 88 |     }.f);
 89 | 
 90 |     if (sum != known_sum) {
 91 |         std.debug.print("sum: {}, known_sum: {}\n", .{ sum, known_sum });
 92 |     }
 93 | 
 94 |     return known_sum == sum;
 95 | }
 96 | 
 97 | test "critical_hint" {
 98 |     var num_failed: u32 = 0;
 99 | 
100 |     for (0..params.repetitions) |i| {
101 |         if (!omp_critical_hint(@intCast(i))) {
102 |             num_failed += 1;
103 |         }
104 |     }
105 | 
106 |     try std.testing.expect(num_failed == 0);
107 | }
108 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # OpenMP-zig
  2 | 
  3 | This wrapper implements (almost all) the OpenMP directives up-to version 3.1 and some newer stuff.
  4 | All of this is (mostly, see below) without any allocation from the zig part.
  5 | 
  6 | This is implemented using the `libomp' library of LLVM. (Gomp support is not planned) **Disclaimer** This project is not affiliated with LLVM in any capacity.
  7 | 
  8 | ```zig
  9 | const std = @import("std");
 10 | const omp = @import("omp");
 11 | 
 12 | fn main() void {
 13 |     omp.parallel(.{})
 14 |         .run(.{}, struct {
 15 |         fn f() void {
 16 |             std.debug.print("Hello world {}!", .{omp.get_thread_num()});
 17 |         }
 18 |     }.f);
 19 | }
 20 | ```
 21 | 
 22 | ## Build
 23 | 
 24 | ```sh
 25 | zig fetch --save git+https://github.com/sbancuz/OpenMP-zig
 26 | ```
 27 | 
 28 | ```zig
 29 | // build.zig
 30 | const OpenMP_zig_dep = b.dependency("OpenMP-zig", .{
 31 |       .target = target,
 32 |       .optimize = optimize,
 33 | });
 34 | exe.root_module.addImport("omp", OpenMP_zig_dep.module("omp"));
 35 | ```
 36 | 
 37 | ## Features
 38 | - [x] `#pragma omp parallel`
 39 | - [x] `All reductions`
 40 | - [x] `#pragma omp for`
 41 | - [x] `#pragma omp sections`
 42 | - [x] `#pragma omp single`
 43 | - [x] `#pragma omp master/masked`
 44 | - [x] `#pragma omp critical`
 45 | - [x] `#pragma omp barrier`
 46 | - [x] `#pragma omp task`
 47 | - [ ] `#pragma omp atomic` NOT POSSIBLE TO IMPLEMENT
 48 | - [ ] `#pragma omp simd` NOT POSSIBLE TO IMPLEMENT
 49 | 
 50 | To see some other examples of the library check the tests folder.
 51 | 
 52 | ## Extensions
 53 | 
 54 | ```zig
 55 | fn test_omp_task_error() !bool {
 56 |     // The ret reduction parameter tells the directive how it should reduce the return value
 57 |     const result = omp.parallel(.{ .ret_reduction = .plus })
 58 |         .run(.{}, struct {
 59 |         // You can return whatever you want!
 60 |         fn f() !usize {
 61 |             const maybe = omp.single()
 62 |                 .run(.{}, struct {
 63 |                 // Only for tasks, you have to put the explicit error type in the promise,
 64 |                 // otherwise it won't be able to infer the type
 65 |                 fn f() *omp.promise(error{WompWomp}!usize) {
 66 |                     return omp.task(.{})
 67 |                         .run(.{}, struct {
 68 |                         // Same deal here
 69 |                         fn f() error{WompWomp}!usize {
 70 |                             return error.WompWomp;
 71 |                         }
 72 |                     }.f);
 73 |                 }
 74 |             }.f);
 75 |             if (maybe) |pro| {
 76 |                 defer pro.deinit();
 77 |                 return pro.get();
 78 |             }
 79 |             return 0;
 80 |         }
 81 |     }.f) catch |err| switch (err) {
 82 |         error.WompWomp => std.debug.print("Caught an error :^(", .{});
 83 |     };
 84 | 
 85 |     std.debug.print("No errors here!". /{});
 86 | }
 87 | ```
 88 | 
 89 | ### Return
 90 | 
 91 | All of the directives can return values. To return something you may need to specify the `ret_reduction` parameter.
 92 | 
 93 | > [!WARNING]
 94 | > The promises that are returned from the `task` directive will be heap allocated. So make sure to deinit() them!
 95 | 
 96 | ### Errors
 97 | 
 98 | All of the directive can return error types.
 99 | > [!WARNING]
100 | > Returning more than one type of error from a directive it's clearly a race condition!
101 | 
102 | ## Goal
103 | 
104 | The goal of this library is to provide at least OpenMP 4.5 to zig and be production ready, along with the mentioned extensions.
105 | 


--------------------------------------------------------------------------------
/tests/return.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const omp = @import("omp");
  3 | const params = @import("params.zig");
  4 | 
  5 | fn test_omp_parallel_return() !bool {
  6 |     const sum = omp.parallel(.{
  7 |         .ret_reduction = .plus,
  8 |     }).run(.{}, struct {
  9 |         fn f() usize {
 10 |             return 1;
 11 |         }
 12 |     }.f);
 13 | 
 14 |     return omp.get_max_threads() == sum;
 15 | }
 16 | 
 17 | test "parallel_return" {
 18 |     var num_failed: u32 = 0;
 19 |     if (!try test_omp_parallel_return()) {
 20 |         num_failed += 1;
 21 |     }
 22 | 
 23 |     try std.testing.expect(num_failed == 0);
 24 | }
 25 | 
 26 | fn test_omp_single_return() !bool {
 27 |     const result = omp.parallel(.{ .ret_reduction = .plus })
 28 |         .run(.{}, struct {
 29 |         fn f() usize {
 30 |             const maybe = omp.single()
 31 |                 .run(.{}, struct {
 32 |                 fn f() usize {
 33 |                     return 1;
 34 |                 }
 35 |             }.f);
 36 |             if (maybe) |r| {
 37 |                 return r;
 38 |             }
 39 |             return 0;
 40 |         }
 41 |     }.f);
 42 | 
 43 |     return result == 1;
 44 | }
 45 | 
 46 | test "single_return" {
 47 |     if (omp.get_max_threads() < 2) {
 48 |         omp.set_num_threads(8);
 49 |     }
 50 | 
 51 |     var num_failed: u32 = 0;
 52 |     for (0..params.repetitions) |_| {
 53 |         if (!try test_omp_single_return()) {
 54 |             num_failed += 1;
 55 |         }
 56 |     }
 57 | 
 58 |     try std.testing.expect(num_failed == 0);
 59 | }
 60 | 
 61 | fn test_omp_task_return() !bool {
 62 |     const result = omp.parallel(.{ .ret_reduction = .plus })
 63 |         .run(.{}, struct {
 64 |         fn f() usize {
 65 |             const maybe = omp.single()
 66 |                 .run(.{}, struct {
 67 |                 fn f() *omp.promise(usize) {
 68 |                     return omp.task(.{})
 69 |                         .run(.{}, struct {
 70 |                         fn f() usize {
 71 |                             return 1;
 72 |                         }
 73 |                     }.f);
 74 |                 }
 75 |             }.f);
 76 |             if (maybe) |pro| {
 77 |                 defer pro.deinit();
 78 |                 return pro.get();
 79 |             }
 80 |             return 0;
 81 |         }
 82 |     }.f);
 83 | 
 84 |     return result == 1;
 85 | }
 86 | 
 87 | test "task_return" {
 88 |     if (omp.get_max_threads() < 2) {
 89 |         omp.set_num_threads(8);
 90 |     }
 91 | 
 92 |     var num_failed: u32 = 0;
 93 |     for (0..params.repetitions) |_| {
 94 |         if (!try test_omp_task_return()) {
 95 |             num_failed += 1;
 96 |         }
 97 |     }
 98 | 
 99 |     try std.testing.expect(num_failed == 0);
100 | }
101 | 
102 | fn test_omp_loop_return() !bool {
103 |     const res = omp.parallel(.{ .ret_reduction = .plus })
104 |         .run(.{}, struct {
105 |         fn f() usize {
106 |             const a = omp.loop(u32, .{ .ret_reduction = .plus })
107 |                 .run(.{}, 0, params.loop_count, 1, struct {
108 |                 fn f(i: u32) usize {
109 |                     _ = i;
110 |                     return 1;
111 |                 }
112 |             }.f);
113 |             return a;
114 |         }
115 |     }.f);
116 | 
117 |     return params.loop_count * omp.get_max_threads() == res;
118 | }
119 | 
120 | test "loop_return" {
121 |     if (omp.get_max_threads() < 2) {
122 |         omp.set_num_threads(8);
123 |     }
124 | 
125 |     var num_failed: u32 = 0;
126 |     for (0..1) |_| {
127 |         if (!try test_omp_loop_return()) {
128 |             num_failed += 1;
129 |         }
130 |     }
131 | 
132 |     try std.testing.expect(num_failed == 0);
133 | }
134 | 


--------------------------------------------------------------------------------
/tests/errors.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const omp = @import("omp");
  3 | const params = @import("params.zig");
  4 | 
  5 | fn test_omp_parallel_error() !bool {
  6 |     _ = omp.parallel(.{
  7 |         .ret_reduction = .plus,
  8 |     }).run(.{}, struct {
  9 |         fn f() !usize {
 10 |             if (omp.get_thread_num() % 2 == 0) {
 11 |                 return error.WompWomp;
 12 |             } else {
 13 |                 return 1;
 14 |             }
 15 |         }
 16 |     }.f) catch |err| switch (err) {
 17 |         error.WompWomp => return true,
 18 |         else => return false,
 19 |     };
 20 | 
 21 |     return false;
 22 | }
 23 | 
 24 | test "parallel_error" {
 25 |     omp.set_num_threads(8);
 26 |     var num_failed: u32 = 0;
 27 |     for (0..params.repetitions * 100) |_| {
 28 |         if (!try test_omp_parallel_error()) {
 29 |             num_failed += 1;
 30 |         }
 31 |     }
 32 | 
 33 |     try std.testing.expect(num_failed == 0);
 34 | }
 35 | 
 36 | fn test_omp_single_error() !bool {
 37 |     _ = omp.parallel(.{ .ret_reduction = .plus })
 38 |         .run(.{}, struct {
 39 |         fn f() !usize {
 40 |             const maybe = omp.single()
 41 |                 .run(.{}, struct {
 42 |                 fn f() !usize {
 43 |                     return error.WompWomp;
 44 |                 }
 45 |             }.f);
 46 |             if (maybe) |r| {
 47 |                 return r;
 48 |             }
 49 |             return 0;
 50 |         }
 51 |     }.f) catch |err| switch (err) {
 52 |         error.WompWomp => return true,
 53 |     };
 54 | 
 55 |     return false;
 56 | }
 57 | 
 58 | test "single_error" {
 59 |     if (omp.get_max_threads() < 2) {
 60 |         omp.set_num_threads(8);
 61 |     }
 62 | 
 63 |     var num_failed: u32 = 0;
 64 |     for (0..params.repetitions * 100) |_| {
 65 |         if (!try test_omp_single_error()) {
 66 |             num_failed += 1;
 67 |         }
 68 |     }
 69 | 
 70 |     try std.testing.expect(num_failed == 0);
 71 | }
 72 | 
 73 | fn test_omp_task_error() !bool {
 74 |     _ = omp.parallel(.{ .ret_reduction = .plus })
 75 |         .run(.{}, struct {
 76 |         fn f() !usize {
 77 |             const maybe = omp.single()
 78 |                 .run(.{}, struct {
 79 |                 fn f() *omp.promise(error{WompWomp}!usize) {
 80 |                     return omp.task(.{})
 81 |                         .run(.{}, struct {
 82 |                         fn f() error{WompWomp}!usize {
 83 |                             return error.WompWomp;
 84 |                         }
 85 |                     }.f);
 86 |                 }
 87 |             }.f);
 88 |             if (maybe) |pro| {
 89 |                 defer pro.deinit();
 90 |                 return pro.get();
 91 |             }
 92 |             return 0;
 93 |         }
 94 |     }.f) catch |err| switch (err) {
 95 |         error.WompWomp => return true,
 96 |     };
 97 | 
 98 |     return false;
 99 | }
100 | 
101 | test "task_error" {
102 |     if (omp.get_max_threads() < 2) {
103 |         omp.set_num_threads(8);
104 |     }
105 | 
106 |     var num_failed: u32 = 0;
107 |     for (0..params.repetitions) |_| {
108 |         if (!try test_omp_task_error()) {
109 |             num_failed += 1;
110 |         }
111 |     }
112 | 
113 |     try std.testing.expect(num_failed == 0);
114 | }
115 | 
116 | fn test_omp_loop_error() !bool {
117 |     _ = omp.parallel(.{ .ret_reduction = .plus })
118 |         .run(.{}, struct {
119 |         fn f() !usize {
120 |             const a = omp.loop(u32, .{ .ret_reduction = .plus })
121 |                 .run(.{}, 0, params.loop_count, 1, struct {
122 |                 fn f(i: u32) error{WompWomp}!usize {
123 |                     _ = i;
124 |                     return error.WompWomp;
125 |                 }
126 |             }.f);
127 |             return a;
128 |         }
129 |     }.f) catch |err| switch (err) {
130 |         error.WompWomp => return true,
131 |     };
132 | 
133 |     return false;
134 | }
135 | 
136 | test "loop_error" {
137 |     if (omp.get_max_threads() < 2) {
138 |         omp.set_num_threads(8);
139 |     }
140 | 
141 |     var num_failed: u32 = 0;
142 |     for (0..1) |_| {
143 |         if (!try test_omp_loop_error()) {
144 |             num_failed += 1;
145 |         }
146 |     }
147 | 
148 |     try std.testing.expect(num_failed == 0);
149 | }
150 | 


--------------------------------------------------------------------------------
/src/workshare_env.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | const reduce = @import("reduce.zig");
 3 | const kmp = @import("kmp.zig");
 4 | const in = @import("input_handler.zig");
 5 | 
 6 | pub const options = struct {
 7 |     return_optional: bool,
 8 |     do_copy: bool,
 9 |     is_omp_func: bool = false,
10 | };
11 | 
12 | pub inline fn make(
13 |     comptime red: []const reduce.operators,
14 |     comptime f: anytype,
15 |     comptime ret_t: type,
16 |     comptime opts: options,
17 | ) type {
18 |     return struct {
19 |         const static = struct {
20 |             var lck: kmp.critical_name_t = @bitCast([_]u8{0} ** 32);
21 |         };
22 | 
23 |         pub inline fn run(
24 |             pre: anytype,
25 |             args: anytype,
26 |             post: anytype,
27 |             ret_reduction: *ret_t,
28 |         ) if (opts.return_optional) ?ret_t else ret_t {
29 |             const private_copy = if (opts.do_copy) in.make_another(args.private) else args.private;
30 |             const firstprivate_copy = if (opts.do_copy) in.shallow_copy(args.firstprivate) else args.firstprivate;
31 |             const reduction_copy = if (opts.do_copy) in.shallow_copy(args.reduction) else args.reduction;
32 |             const true_args = pre ++ brk: {
33 |                 const r = if (opts.do_copy)
34 |                     args.shared ++ private_copy ++ firstprivate_copy ++ reduction_copy
35 |                 else
36 |                     .{args};
37 | 
38 |                 break :brk if (opts.is_omp_func) r else .{r};
39 |             } ++ post;
40 | 
41 |             const ret = @call(.always_inline, f, true_args);
42 | 
43 |             const id: kmp.ident_t = .{
44 |                 .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC),
45 |                 .psource = "parallel" ++ @typeName(@TypeOf(f)),
46 |             };
47 | 
48 |             const no_err_ret_t = in.no_error(ret_t);
49 | 
50 |             if (red.len > 0 or no_err_ret_t != void) {
51 |                 if (no_err_ret_t != void) {
52 |                     // If it's an error_union AND we caught an error just reduce the other parameters that need to be reduced.
53 |                     // This has to happen since once a reduce starts, every thread needs to call the proper kmp function calls
54 |                     // to signal to OMP that the reduce actually happened.
55 |                     //
56 |                     // Also apparently there needs to be the same memory structure for all the reduce args, so we just pass in
57 |                     // fake data that won't do anything
58 |                     var ret_no_err = if (@typeInfo(ret_t) == .ErrorUnion) ret catch |err| {
59 |                         var tmp: no_err_ret_t = undefined;
60 |                         var tmp2: no_err_ret_t = undefined;
61 | 
62 |                         const reduce_args = if (no_err_ret_t == void) reduction_copy else reduction_copy ++ .{&tmp2};
63 |                         const reduce_dest = if (no_err_ret_t == void) args.reduction else args.reduction ++ .{&tmp};
64 |                         _ = reduce.reduce(&id, true, reduce_dest, reduce_args, red[0 .. red.len - 1] ++ .{.id}, &static.lck);
65 | 
66 |                         ret_reduction.* = err;
67 |                         return ret_reduction.*;
68 |                     } else ret;
69 | 
70 |                     // If an error didn't occur then we can just append the return_reduce parameter to the end and proceed normally
71 |                     var tmp: no_err_ret_t = if (@typeInfo(ret_t) != .ErrorUnion) ret_reduction.* else ret_reduction.* catch unreachable;
72 |                     const reduce_args = if (no_err_ret_t == void) reduction_copy else reduction_copy ++ .{&ret_no_err};
73 |                     const reduce_dest = if (no_err_ret_t == void) args.reduction else args.reduction ++ .{&tmp};
74 |                     const has_result = reduce.reduce(&id, true, reduce_dest, reduce_args, red, &static.lck);
75 | 
76 |                     if (has_result > 0) {
77 |                         ret_reduction.* = tmp;
78 |                         return ret_reduction.*;
79 |                     }
80 |                 } else {
81 |                     const has_result = reduce.reduce(&id, true, args.reduction, reduction_copy, red, &static.lck);
82 |                     if (has_result > 0) {
83 |                         return ret_reduction.*;
84 |                     }
85 |                 }
86 |             }
87 | 
88 |             if (ret_t != void) {
89 |                 if (opts.return_optional) {
90 |                     return null;
91 |                 }
92 |                 return ret;
93 |             }
94 |         }
95 |     };
96 | }
97 | 


--------------------------------------------------------------------------------
/tests/sections.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const omp = @import("omp");
  3 | const params = @import("params.zig");
  4 | 
  5 | fn test_omp_sections_default() !bool {
  6 |     var sum: u32 = 7;
  7 |     const known_sum: u32 = @as(u32, (params.loop_count * (params.loop_count - 1)) / 2) + sum;
  8 | 
  9 |     omp.parallel(.{})
 10 |         .run(.{ .shared = .{&sum} }, struct {
 11 |         fn f(f_sum: *u32) void {
 12 |             var mysum: u32 = 0;
 13 |             var i: u32 = 0;
 14 |             const summer = struct {
 15 |                 fn f(s: *u32, ms: *u32) void {
 16 |                     s.* += ms.*;
 17 |                 }
 18 |             }.f;
 19 | 
 20 |             omp.sections(.{})
 21 |                 .run(.{ .shared = .{f_sum}, .firstprivate = .{ &mysum, &i } }, .{
 22 |                 &struct {
 23 |                     fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void {
 24 |                         ff_i.* = 1;
 25 |                         while (ff_i.* < 400) : (ff_i.* += 1) {
 26 |                             ff_mysum.* += ff_i.*;
 27 |                         }
 28 |                         omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer);
 29 |                     }
 30 |                 }.section,
 31 |                 &struct {
 32 |                     fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void {
 33 |                         ff_i.* = 400;
 34 |                         while (ff_i.* < 700) : (ff_i.* += 1) {
 35 |                             ff_mysum.* += ff_i.*;
 36 |                         }
 37 |                         omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer);
 38 |                     }
 39 |                 }.section,
 40 |                 &struct {
 41 |                     fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void {
 42 |                         ff_i.* = 700;
 43 |                         while (ff_i.* < 1000) : (ff_i.* += 1) {
 44 |                             ff_mysum.* += ff_i.*;
 45 |                         }
 46 |                         omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer);
 47 |                     }
 48 |                 }.section,
 49 |             });
 50 |         }
 51 |     }.f);
 52 | 
 53 |     if (known_sum != sum) {
 54 |         std.debug.print("KNOWN_SUM = {}\n", .{known_sum});
 55 |         std.debug.print("SUM = {}\n", .{sum});
 56 |     }
 57 | 
 58 |     return known_sum == sum;
 59 | }
 60 | 
 61 | test "sections_default" {
 62 |     var num_failed: u32 = 0;
 63 |     for (0..params.repetitions) |_| {
 64 |         if (!try test_omp_sections_default()) {
 65 |             num_failed += 1;
 66 |         }
 67 |     }
 68 | 
 69 |     try std.testing.expect(num_failed == 0);
 70 | }
 71 | 
 72 | fn test_omp_parallel_sections_default() !bool {
 73 |     var sum: u32 = 7;
 74 |     const known_sum: u32 = @as(u32, (params.loop_count * (params.loop_count - 1)) / 2) + sum;
 75 | 
 76 |     var mysum: u32 = 0;
 77 |     var i: u32 = 0;
 78 |     const summer = struct {
 79 |         fn f(s: *u32, ms: *u32) void {
 80 |             s.* += ms.*;
 81 |         }
 82 |     }.f;
 83 | 
 84 |     omp.parallel(.{})
 85 |         .sections(.{})
 86 |         .run(.{ .shared = .{&sum}, .firstprivate = .{ &mysum, &i } }, .{
 87 |         &struct {
 88 |             fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void {
 89 |                 ff_i.* = 1;
 90 |                 while (ff_i.* < 400) : (ff_i.* += 1) {
 91 |                     ff_mysum.* += ff_i.*;
 92 |                 }
 93 |                 omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer);
 94 |             }
 95 |         }.section,
 96 |         &struct {
 97 |             fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void {
 98 |                 ff_i.* = 400;
 99 |                 while (ff_i.* < 700) : (ff_i.* += 1) {
100 |                     ff_mysum.* += ff_i.*;
101 |                 }
102 |                 omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer);
103 |             }
104 |         }.section,
105 |         &struct {
106 |             fn section(ff_sum: *u32, ff_mysum: *u32, ff_i: *u32) void {
107 |                 ff_i.* = 700;
108 |                 while (ff_i.* < 1000) : (ff_i.* += 1) {
109 |                     ff_mysum.* += ff_i.*;
110 |                 }
111 |                 omp.critical(.{}).run(.{ ff_sum, ff_mysum }, summer);
112 |             }
113 |         }.section,
114 |     });
115 | 
116 |     if (known_sum != sum) {
117 |         std.debug.print("KNOWN_SUM = {}\n", .{known_sum});
118 |         std.debug.print("SUM = {}\n", .{sum});
119 |     }
120 | 
121 |     return known_sum == sum;
122 | }
123 | 
124 | test "parallel_sections_default" {
125 |     var num_failed: u32 = 0;
126 |     for (0..params.repetitions) |_| {
127 |         if (!try test_omp_parallel_sections_default()) {
128 |             num_failed += 1;
129 |         }
130 |     }
131 | 
132 |     try std.testing.expect(num_failed == 0);
133 | }
134 | 


--------------------------------------------------------------------------------
/tests/parallel.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const omp = @import("omp");
  3 | const params = @import("params.zig");
  4 | 
  5 | fn test_omp_parallel_default() !bool {
  6 |     var sum: u32 = 0;
  7 |     var mysum: u32 = 0;
  8 |     const known_sum: u32 = (params.loop_count * (params.loop_count + 1)) / 2;
  9 | 
 10 |     omp.parallel(.{})
 11 |         .run(.{ .shared = .{&sum}, .private = .{&mysum} }, struct {
 12 |         fn f(f_sum: *u32, f_mysum: *u32) void {
 13 |             f_mysum.* = 0;
 14 |             omp.loop(u32, .{})
 15 |                 .run(.{ .shared = .{f_mysum} }, 0, params.loop_count + 1, 1, struct {
 16 |                 fn f(i: u32, ff_mysum: *u32) void {
 17 |                     ff_mysum.* += i;
 18 |                 }
 19 |             }.f);
 20 | 
 21 |             omp.critical(.{})
 22 |                 .run(.{ f_sum, f_mysum.* }, struct {
 23 |                 fn f(ff_sum: *u32, ff_mysum: u32) void {
 24 |                     ff_sum.* += ff_mysum;
 25 |                 }
 26 |             }.f);
 27 |         }
 28 |     }.f);
 29 | 
 30 |     if (known_sum != sum) {
 31 |         std.debug.print("KNOWN_SUM = {}\n", .{known_sum});
 32 |         std.debug.print("SUM = {}\n", .{sum});
 33 |     }
 34 | 
 35 |     try std.testing.expect(mysum == 0);
 36 | 
 37 |     return known_sum == sum;
 38 | }
 39 | 
 40 | test "parallel_default" {
 41 |     var num_failed: u32 = 0;
 42 |     for (0..params.repetitions) |_| {
 43 |         if (!try test_omp_parallel_default()) {
 44 |             num_failed += 1;
 45 |         }
 46 |     }
 47 | 
 48 |     try std.testing.expect(num_failed == 0);
 49 | }
 50 | 
 51 | fn test_omp_parallel_if() !bool {
 52 |     var sum: u32 = 0;
 53 |     var mysum: u32 = 0;
 54 |     const control: u32 = 1;
 55 |     const known_sum: u32 = (params.loop_count * (params.loop_count + 1)) / 2;
 56 | 
 57 |     omp.parallel(.{ .iff = true })
 58 |         .run(.{ .shared = .{&sum}, .private = .{&mysum} }, control == 0, struct {
 59 |         fn f(f_sum: *u32, f_mysum: *u32) void {
 60 |             f_mysum.* = 0;
 61 |             for (0..params.loop_count + 1) |i| {
 62 |                 f_mysum.* += @as(u32, @intCast(i));
 63 |             }
 64 | 
 65 |             omp.critical(.{})
 66 |                 .run(.{ f_sum, f_mysum.* }, struct {
 67 |                 fn f(ff_sum: *u32, ff_mysum: u32) void {
 68 |                     ff_sum.* += ff_mysum;
 69 |                 }
 70 |             }.f);
 71 |         }
 72 |     }.f);
 73 | 
 74 |     if (known_sum != sum) {
 75 |         std.debug.print("KNOWN_SUM = {}\n", .{known_sum});
 76 |         std.debug.print("SUM = {}\n", .{sum});
 77 |     }
 78 | 
 79 |     try std.testing.expect(mysum == 0);
 80 | 
 81 |     return known_sum == sum;
 82 | }
 83 | 
 84 | test "parallel_if" {
 85 |     var num_failed: u32 = 0;
 86 |     for (0..params.repetitions) |_| {
 87 |         if (!try test_omp_parallel_if()) {
 88 |             num_failed += 1;
 89 |         }
 90 |     }
 91 | 
 92 |     try std.testing.expect(num_failed == 0);
 93 | }
 94 | 
 95 | fn test_omp_parallel_nested() bool {
 96 |     if (omp.get_max_threads() > 4) {
 97 |         omp.set_num_threads(4);
 98 |     } else if (omp.get_max_threads() < 2) {
 99 |         omp.set_num_threads(2);
100 |     }
101 | 
102 |     var counter: i32 = 0;
103 | 
104 |     omp.set_nested(true);
105 |     omp.set_max_active_levels(omp.get_max_active_levels());
106 | 
107 |     omp.parallel(.{})
108 |         .run(.{ .shared = .{&counter} }, struct {
109 |         fn f(f_counter: *i32) void {
110 |             omp.critical(.{})
111 |                 .run(.{f_counter}, struct {
112 |                 fn f(ff_counter: *i32) void {
113 |                     ff_counter.* += 1;
114 |                 }
115 |             }.f);
116 | 
117 |             omp.parallel(.{})
118 |                 .run(.{ .shared = .{f_counter} }, struct {
119 |                 fn f(pf_counter: *i32) void {
120 |                     omp.critical(.{})
121 |                         .run(.{pf_counter}, struct {
122 |                         fn f(fpf_counter: *i32) void {
123 |                             fpf_counter.* -= 1;
124 |                         }
125 |                     }.f);
126 |                 }
127 |             }.f);
128 |         }
129 |     }.f);
130 | 
131 |     return counter != 0;
132 | }
133 | 
134 | test "parallel_nested" {
135 |     var num_failed: u32 = 0;
136 |     for (0..params.repetitions) |_| {
137 |         if (!test_omp_parallel_nested()) {
138 |             num_failed += 1;
139 |         }
140 |     }
141 | 
142 |     try std.testing.expect(num_failed == 0);
143 | }
144 | 
145 | fn test_omp_parallel_private() !bool {
146 |     var sum: u32 = 0;
147 |     var num_threads: u32 = 0;
148 |     var sum1: u32 = 0;
149 | 
150 |     omp.parallel(.{})
151 |         .run(.{ .shared = .{ &sum, &num_threads }, .private = .{&sum1} }, struct {
152 |         fn f(f_sum: *u32, f_num_threads: *u32, f_sum1: *u32) void {
153 |             f_sum1.* = 7;
154 | 
155 |             omp.loop(u32, .{})
156 |                 .run(.{ .shared = .{f_sum1} }, 1, 1000, 1, struct {
157 |                 fn f(i: u32, ff_sum1: *u32) void {
158 |                     ff_sum1.* += i;
159 |                 }
160 |             }.f);
161 | 
162 |             omp.critical(.{})
163 |                 .run(.{ f_sum, f_num_threads, f_sum1.* }, struct {
164 |                 fn f(ff_sum: *u32, ff_num_threads: *u32, ff_sum1: u32) void {
165 |                     ff_sum.* += ff_sum1;
166 |                     ff_num_threads.* += 1;
167 |                 }
168 |             }.f);
169 |         }
170 |     }.f);
171 | 
172 |     const known_sum: u32 = ((999 * 1000) / 2) + (7 * num_threads);
173 |     if (known_sum != sum) {
174 |         std.debug.print("NUM_THREADS = {}\n", .{num_threads});
175 |         std.debug.print("KNOWN_SUM = {}\n", .{known_sum});
176 |         std.debug.print("SUM = {}\n", .{sum});
177 |     }
178 |     return known_sum == sum;
179 | }
180 | 
181 | test "parallel_private" {
182 |     var num_failed: u32 = 0;
183 |     for (0..params.repetitions) |_| {
184 |         if (!try test_omp_parallel_private()) {
185 |             num_failed += 1;
186 |         }
187 |     }
188 | 
189 |     try std.testing.expect(num_failed == 0);
190 | }
191 | 


--------------------------------------------------------------------------------
/src/input_handler.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const omp = @import("omp.zig");
  3 | 
  4 | fn get_field_idx(comptime T: type, comptime field_name: []const u8) u32 {
  5 |     return comptime brk: {
  6 |         var idx: u32 = 0;
  7 |         for (@typeInfo(T).Struct.fields) |field| {
  8 |             if (std.mem.eql(u8, field_name, field.name)) {
  9 |                 break :brk idx;
 10 |             }
 11 |             idx += 1;
 12 |         }
 13 |         break :brk idx;
 14 |     };
 15 | }
 16 | 
 17 | pub inline fn no_error(comptime T: type) type {
 18 |     comptime {
 19 |         const info = @typeInfo(T);
 20 |         if (info != .ErrorUnion) {
 21 |             return T;
 22 |         }
 23 | 
 24 |         return info.ErrorUnion.payload;
 25 |     }
 26 | }
 27 | 
 28 | pub fn zigc_ret(comptime f: anytype, comptime args_type: type) type {
 29 |     const f_type_info = @typeInfo(@TypeOf(f));
 30 |     if (f_type_info != .Fn) {
 31 |         @compileError("Expected function with signature `fn(, ...)`, got " ++ @typeName(@TypeOf(f)) ++ " instead.");
 32 |     }
 33 |     return struct {
 34 |         ret: copy_ret(f) = undefined,
 35 |         v: args_type = undefined,
 36 |     };
 37 | }
 38 | 
 39 | pub fn copy_ret(comptime f: anytype) type {
 40 |     const typ = @typeInfo(@TypeOf(f));
 41 |     if (typ == .Fn) return typ.Fn.return_type orelse void;
 42 |     if (typ == .Pointer) return @typeInfo(typ.Pointer.child).Fn.return_type orelse void;
 43 |     @compileError("You need to provide either a function pointer or a function");
 44 | }
 45 | 
 46 | fn normalize_type(comptime T: type) type {
 47 |     var param_count: u32 = 0;
 48 |     const fields = @typeInfo(T).Struct.fields;
 49 |     const shared = val: {
 50 |         const idx = get_field_idx(T, "shared");
 51 |         if (fields.len > idx) {
 52 |             param_count += 1;
 53 |             break :val fields[idx].type;
 54 |         } else {
 55 |             break :val @TypeOf(.{});
 56 |         }
 57 |     };
 58 | 
 59 |     const private = val: {
 60 |         const idx = get_field_idx(T, "private");
 61 |         if (fields.len > idx) {
 62 |             param_count += 1;
 63 |             break :val fields[idx].type;
 64 |         } else {
 65 |             break :val @TypeOf(.{});
 66 |         }
 67 |     };
 68 | 
 69 |     const firstprivate = val: {
 70 |         const idx = get_field_idx(T, "firstprivate");
 71 |         if (fields.len > idx) {
 72 |             param_count += 1;
 73 |             break :val fields[idx].type;
 74 |         } else {
 75 |             break :val @TypeOf(.{});
 76 |         }
 77 |     };
 78 | 
 79 |     const reduction = val: {
 80 |         const idx = get_field_idx(T, "reduction");
 81 |         if (fields.len > idx) {
 82 |             param_count += 1;
 83 |             break :val fields[idx].type;
 84 |         } else {
 85 |             break :val @TypeOf(.{});
 86 |         }
 87 |     };
 88 | 
 89 |     if (@typeInfo(T) != .Struct or param_count != @typeInfo(T).Struct.fields.len) {
 90 |         @compileError("Expected struct like .{ .shared = .{...}, .private = .{...}, firstprivate = .{...}, .reduction = {...} }, got " ++ @typeName(T) ++ " instead.");
 91 |     }
 92 | 
 93 |     return struct {
 94 |         shared: shared,
 95 |         private: private,
 96 |         firstprivate: firstprivate,
 97 |         reduction: reduction,
 98 |     };
 99 | }
100 | 
101 | pub fn has_field(comptime T: type, comptime field_name: []const u8) bool {
102 |     for (std.meta.fieldNames(T)) |field| {
103 |         if (std.mem.eql(u8, field_name, field)) {
104 |             return true;
105 |         }
106 |     }
107 |     return false;
108 | }
109 | 
110 | pub fn normalize_args(args: anytype) normalize_type(@TypeOf(args)) {
111 |     const args_type = @TypeOf(args);
112 |     const shared = val: {
113 |         if (comptime has_field(args_type, "shared")) {
114 |             break :val args.shared;
115 |         }
116 |         break :val .{};
117 |     };
118 | 
119 |     const private = val: {
120 |         if (comptime has_field(args_type, "private")) {
121 |             break :val args.private;
122 |         }
123 |         break :val .{};
124 |     };
125 | 
126 |     const firstprivate = val: {
127 |         if (comptime has_field(args_type, "firstprivate")) {
128 |             break :val args.firstprivate;
129 |         }
130 |         break :val .{};
131 |     };
132 | 
133 |     const reduction = val: {
134 |         if (comptime has_field(args_type, "reduction")) {
135 |             break :val args.reduction;
136 |         }
137 |         break :val .{};
138 |     };
139 | 
140 |     return .{ .shared = shared, .private = private, .firstprivate = firstprivate, .reduction = reduction };
141 | }
142 | 
143 | pub fn check_fn_signature(comptime f: anytype) void {
144 |     const f_type_info = @typeInfo(@TypeOf(f));
145 |     if (f_type_info == .Fn)
146 |         return;
147 |     if (@typeInfo(f_type_info.Pointer.child) == .Fn) {
148 |         return;
149 |     }
150 |     @compileError("Expected function with signature `fn(, ...)`, got " ++ @typeName(@TypeOf(f)) ++ " instead.");
151 | }
152 | 
153 | pub fn check_args(comptime T: type) void {
154 |     const args_type_info = @typeInfo(T);
155 |     if (args_type_info != .Struct) {
156 |         @compileError("Expected struct or tuple, got " ++ @typeName(T) ++ " instead.");
157 |     }
158 | }
159 | 
160 | pub fn deep_size_of(comptime T: type) usize {
161 |     var size: usize = @sizeOf(T);
162 |     inline for (@typeInfo(T).Struct.fields) |field| {
163 |         if (@typeInfo(field.type) == .Pointer) {
164 |             size += @sizeOf(@typeInfo(field.type).Pointer.child);
165 |         }
166 |     }
167 |     return size;
168 | }
169 | 
170 | /// Shallow copy a struct with pointers
171 | /// This function will copy the struct and all the pointers it contains
172 | /// but it won't go more than one level deep
173 | ///
174 | /// WARNING: This function may be not memory safe if it doesn't get inlined
175 | pub inline fn shallow_copy(original: anytype) @TypeOf(original) {
176 |     var copy: @TypeOf(original) = .{} ++ original;
177 |     inline for (original, &copy) |og, *v| {
178 |         if (@typeInfo(@TypeOf(og)) == .Pointer) {
179 |             var tmp = og.*;
180 |             v.* = &tmp;
181 |         } else {
182 |             v.* = og;
183 |         }
184 |     }
185 |     return copy;
186 | }
187 | 
188 | /// Make another struct with the same fields as the original, but all values are set to undefined
189 | pub inline fn make_another(original: anytype) @TypeOf(original) {
190 |     var copy: @TypeOf(original) = .{} ++ original;
191 |     inline for (original, &copy) |og, *v| {
192 |         if (@typeInfo(@TypeOf(og)) == .Pointer) {
193 |             var tmp: @TypeOf(og.*) = undefined;
194 |             v.* = &tmp;
195 |         } else {
196 |             v.* = undefined;
197 |         }
198 |     }
199 | 
200 |     return copy;
201 | }
202 | 


--------------------------------------------------------------------------------
/tests/task.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const omp = @import("omp");
  3 | const params = @import("params.zig");
  4 | 
  5 | fn test_omp_task_default() !bool {
  6 |     var tids = [_]u32{0} ** params.num_tasks;
  7 | 
  8 |     omp.parallel(.{})
  9 |         .run(.{ .shared = .{&tids} }, struct {
 10 |         fn f(f_tids: *[params.num_tasks]u32) void {
 11 |             omp.single()
 12 |                 .run(.{f_tids}, struct {
 13 |                 fn f(ff_tids: *[params.num_tasks]u32) void {
 14 |                     for (0..params.num_tasks) |i| {
 15 |                         // First we have to store the value of the loop index in a new variable
 16 |                         // which will be private for each task because otherwise it will be overwritten
 17 |                         // if the execution of the task takes longer than the time which is needed to
 18 |                         // enter the next step of the loop!
 19 | 
 20 |                         const myi = i;
 21 |                         omp.task(.{}).run(.{ .shared = .{ff_tids}, .firstprivate = .{myi} }, struct {
 22 |                             fn f(fff_tids: *[params.num_tasks]u32, f_myi: usize) void {
 23 |                                 std.time.sleep(params.sleep_time);
 24 |                                 fff_tids[f_myi] = omp.get_thread_num();
 25 |                             }
 26 |                         }.f);
 27 |                     }
 28 |                 }
 29 |             }.f);
 30 |         }
 31 |     }.f);
 32 | 
 33 |     var uses_only_one_thread = true;
 34 |     for (tids) |t| {
 35 |         uses_only_one_thread = uses_only_one_thread and t == tids[0];
 36 |     }
 37 | 
 38 |     try std.testing.expect(!uses_only_one_thread);
 39 | 
 40 |     return true;
 41 | }
 42 | 
 43 | test "task_default" {
 44 |     if (omp.get_max_threads() < 2) {
 45 |         omp.set_num_threads(8);
 46 |     }
 47 | 
 48 |     var num_failed: u32 = 0;
 49 |     for (0..params.repetitions) |_| {
 50 |         if (!try test_omp_task_default()) {
 51 |             num_failed += 1;
 52 |         }
 53 |     }
 54 | 
 55 |     try std.testing.expect(num_failed == 0);
 56 | }
 57 | 
 58 | fn test_omp_task_if() !bool {
 59 |     var count: usize = 0;
 60 |     var result: usize = 0;
 61 | 
 62 |     omp.parallel(.{})
 63 |         .run(.{ .shared = .{ &count, &result } }, struct {
 64 |         fn f(f_count: *usize, f_result: *usize) void {
 65 |             omp.single()
 66 |                 .run(.{ f_count, f_result }, struct {
 67 |                 fn f(ff_count: *usize, ff_result: *usize) void {
 68 |                     // Try to see if the if makes it so that the task is never deferred
 69 |                     // to another thread, in fact the critical block below must wait the sleep to run
 70 |                     omp.task(.{ .iff = true }).run(false, .{ .shared = .{ ff_count, ff_result } }, struct {
 71 |                         fn f(fff_count: *usize, fff_result: *usize) void {
 72 |                             std.time.sleep(params.sleep_time);
 73 |                             omp.critical(.{}).run(.{ fff_count, fff_result }, struct {
 74 |                                 fn f(_count: *usize, _result: *usize) void {
 75 |                                     _result.* = if (_count.* == 0) 1 else 0;
 76 |                                 }
 77 |                             }.f);
 78 |                         }
 79 |                     }.f);
 80 | 
 81 |                     // Now that the task is finished we can update the count
 82 |                     omp.critical(.{}).run(.{ff_count}, struct {
 83 |                         fn f(_count: *usize) void {
 84 |                             _count.* = 1;
 85 |                         }
 86 |                     }.f);
 87 |                 }
 88 |             }.f);
 89 |         }
 90 |     }.f);
 91 | 
 92 |     return result == 1;
 93 | }
 94 | 
 95 | test "task_if" {
 96 |     if (omp.get_max_threads() < 2) {
 97 |         omp.set_num_threads(8);
 98 |     }
 99 | 
100 |     var num_failed: u32 = 0;
101 |     for (0..params.repetitions) |_| {
102 |         if (!try test_omp_task_if()) {
103 |             num_failed += 1;
104 |         }
105 |     }
106 | 
107 |     try std.testing.expect(num_failed == 0);
108 | }
109 | 
110 | fn test_omp_task_result() !bool {
111 |     const t_type = *[params.num_tasks]u32;
112 |     var tids = [_]u32{0} ** params.num_tasks;
113 |     var include_tids = [_]u32{0} ** params.num_tasks;
114 |     var err: usize = 0;
115 | 
116 |     omp.parallel(.{})
117 |         .run(.{ .shared = .{ &tids, &include_tids } }, struct {
118 |         fn f(f_tids: t_type, f_inctids: t_type) void {
119 |             omp.single()
120 |                 .run(.{ f_tids, f_inctids }, struct {
121 |                 fn f(ff_tids: t_type, ff_inctids: t_type) void {
122 |                     for (0..params.num_tasks) |i| {
123 |                         // First we have to store the value of the loop index in a new variable
124 |                         // which will be private for each task because otherwise it will be overwritten
125 |                         // if the execution of the task takes longer than the time which is needed to
126 |                         // enter the next step of the loop!
127 | 
128 |                         const myi = i;
129 |                         omp.task(.{ .final = true })
130 |                             .run(i >= 5, .{ .shared = .{ ff_tids, ff_inctids }, .firstprivate = .{myi} }, struct {
131 |                             fn f(fff_tids: t_type, fff_inctids: t_type, f_myi: usize) void {
132 |                                 fff_tids[f_myi] = omp.get_thread_num();
133 | 
134 |                                 if (f_myi >= 5) {
135 |                                     const included = f_myi;
136 | 
137 |                                     omp.task(.{})
138 |                                         .run(.{ .shared = .{fff_inctids}, .firstprivate = .{included} }, struct {
139 |                                         fn f(_inctids: t_type, f_included: usize) void {
140 |                                             std.time.sleep(params.sleep_time);
141 |                                             _inctids[f_included] = omp.get_thread_num();
142 |                                         }
143 |                                     }.f);
144 | 
145 |                                     std.time.sleep(params.sleep_time);
146 |                                 }
147 |                             }
148 |                         }.f);
149 |                     }
150 |                 }
151 |             }.f);
152 |         }
153 |     }.f);
154 | 
155 |     // Now we ckeck if more than one thread executed the final task and its included task.
156 |     for (5..params.num_tasks) |t| {
157 |         if (include_tids[t] != tids[t]) {
158 |             err += 1;
159 |         }
160 |     }
161 | 
162 |     return err == 0;
163 | }
164 | 
165 | test "task_result" {
166 |     if (omp.get_max_threads() < 2) {
167 |         omp.set_num_threads(8);
168 |     }
169 | 
170 |     var num_failed: u32 = 0;
171 |     for (0..params.repetitions) |_| {
172 |         if (!try test_omp_task_result()) {
173 |             num_failed += 1;
174 |         }
175 |     }
176 | 
177 |     try std.testing.expect(num_failed == 0);
178 | }
179 | 


--------------------------------------------------------------------------------
/src/reduce.zig:
--------------------------------------------------------------------------------
  1 | const kmp = @import("kmp.zig");
  2 | const std = @import("std");
  3 | 
  4 | pub const operators = enum(c_int) {
  5 |     plus = 0,
  6 |     mult = 1,
  7 |     minus = 2,
  8 |     bitwise_and = 3,
  9 |     bitwise_or = 4,
 10 |     bitwise_xor = 5,
 11 |     logical_and = 6,
 12 |     logical_or = 7,
 13 |     max = 8,
 14 |     min = 9,
 15 |     none = 10,
 16 |     id = 11,
 17 |     custom = 12,
 18 | };
 19 | 
 20 | pub inline fn reduce(
 21 |     comptime id: *const kmp.ident_t,
 22 |     comptime nowait: bool,
 23 |     out_reduction: anytype,
 24 |     copies: @TypeOf(out_reduction),
 25 |     comptime ops: []const operators,
 26 |     lck: *kmp.critical_name_t,
 27 | ) c_int {
 28 |     const reduction_funcs = create(@typeInfo(@TypeOf(out_reduction)).Struct.fields, ops);
 29 |     const kmpc_reduce = if (nowait)
 30 |         kmp.reduce_nowait
 31 |     else
 32 |         kmp.reduce;
 33 | 
 34 |     const num_vars = copies.len;
 35 |     const reduce_size = @sizeOf(@TypeOf(out_reduction));
 36 | 
 37 |     const has_data = kmpc_reduce(
 38 |         id,
 39 |         kmp.ctx.global_tid,
 40 |         num_vars,
 41 |         reduce_size,
 42 |         @ptrCast(@constCast(&copies)),
 43 |         reduction_funcs.for_omp,
 44 |         lck,
 45 |     );
 46 | 
 47 |     switch (has_data) {
 48 |         1 => {
 49 |             reduction_funcs.finalize(out_reduction, copies);
 50 |             const end_id = comptime .{
 51 |                 .flags = id.*.flags,
 52 |                 .psource = id.*.psource,
 53 |                 .reserved_3 = 0x1c,
 54 |             };
 55 |             kmp.end_reduce_nowait(&end_id, kmp.ctx.global_tid, lck);
 56 |         },
 57 |         2 => {
 58 |             reduction_funcs.finalize_atomic(out_reduction, copies);
 59 |         },
 60 |         else => {},
 61 |     }
 62 | 
 63 |     return has_data;
 64 | }
 65 | 
 66 | pub inline fn create(
 67 |     comptime types: []const std.builtin.Type.StructField,
 68 |     comptime reduce_operators: []const operators,
 69 | ) type {
 70 |     if (types.len != reduce_operators.len) {
 71 |         @compileError("The number of types and operators must match");
 72 |     }
 73 | 
 74 |     return struct {
 75 |         pub inline fn finalize(
 76 |             lhs: anytype,
 77 |             rhs: @TypeOf(lhs),
 78 |         ) void {
 79 |             inline for (lhs, rhs) |l, r| {
 80 |                 inline for (reduce_operators) |op| {
 81 |                     switch (op) {
 82 |                         .plus => {
 83 |                             l.* += r.*;
 84 |                         },
 85 |                         .mult => {
 86 |                             l.* *= r.*;
 87 |                         },
 88 |                         .minus => {
 89 |                             l.* -= r.*;
 90 |                         },
 91 |                         .bitwise_and => {
 92 |                             l.* &= r.*;
 93 |                         },
 94 |                         .bitwise_or => {
 95 |                             l.* |= r.*;
 96 |                         },
 97 |                         .bitwise_xor => {
 98 |                             l.* ^= r.*;
 99 |                         },
100 |                         .logical_and => {
101 |                             l.* = l.* and r.*;
102 |                         },
103 |                         .logical_or => {
104 |                             l.* = l.* or r.*;
105 |                         },
106 |                         .max => {
107 |                             l.* = @max(l.*, r.*);
108 |                         },
109 |                         .min => {
110 |                             l.* = @min(l.*, r.*);
111 |                         },
112 |                         .id => {},
113 |                         .custom => l.reduce(r.*),
114 |                         .none => {
115 |                             @compileError("Specify the reduction operator");
116 |                         },
117 |                     }
118 |                 }
119 |             }
120 |         }
121 | 
122 |         pub inline fn single(
123 |             lhs: anytype,
124 |             rhs: @TypeOf(lhs.*),
125 |         ) void {
126 |             var l, const r = ret: {
127 |                 if (@typeInfo(@TypeOf(lhs.*)) == .ErrorUnion) {
128 |                     // Here we have to unwrap the error union to find out if there is an error, if there is just put it in
129 |                     // the left part since that is the one that will aggregate the data. This will propagate to all the
130 |                     // other computations since it will keep checking the left side.
131 |                     //
132 |                     // TODO: Find a more efficient way to do this, right now I don't think we can short the computation
133 |                     // and return the error directly, but if we can it would be better
134 |                     const t = lhs.* catch return;
135 |                     const t1 = rhs catch {
136 |                         lhs.* = rhs;
137 |                         return;
138 |                     };
139 | 
140 |                     break :ret .{ &t, t1 };
141 |                 } else {
142 |                     break :ret .{ lhs, rhs };
143 |                 }
144 |             };
145 | 
146 |             // @compileLog(l);
147 |             inline for (reduce_operators) |op| {
148 |                 switch (op) {
149 |                     .plus => {
150 |                         l.* += r;
151 |                     },
152 |                     .mult => {
153 |                         l.* *= r;
154 |                     },
155 |                     .minus => {
156 |                         l.* -= r;
157 |                     },
158 |                     .bitwise_and => {
159 |                         l.* &= r;
160 |                     },
161 |                     .bitwise_or => {
162 |                         l.* |= r;
163 |                     },
164 |                     .bitwise_xor => {
165 |                         l.* ^= r;
166 |                     },
167 |                     .logical_and => {
168 |                         l.* = l.* and r;
169 |                     },
170 |                     .logical_or => {
171 |                         l.* = l.* or r;
172 |                     },
173 |                     .max => {
174 |                         l.* = @max(l.*, r);
175 |                     },
176 |                     .min => {
177 |                         l.* = @min(l.*, r);
178 |                     },
179 |                     .custom => l.reduce(r.*),
180 |                     .id => {},
181 |                     .none => {},
182 |                 }
183 |             }
184 |         }
185 |         pub inline fn finalize_atomic(
186 |             lhs: anytype,
187 |             rhs: @TypeOf(lhs),
188 |         ) void {
189 |             inline for (lhs, rhs) |l, r| {
190 |                 inline for (reduce_operators, types) |op, type_field| {
191 |                     const T = @typeInfo(type_field.type).Pointer.child;
192 |                     switch (op) {
193 |                         .plus => {
194 |                             _ = @atomicRmw(T, l, .Add, r.*, .acq_rel);
195 |                         },
196 |                         .mult => {
197 |                             _ = @atomicRmw(T, l, .Mul, r.*, .acq_rel);
198 |                         },
199 |                         .minus => {
200 |                             _ = @atomicRmw(T, l, .Sub, r.*, .acq_rel);
201 |                         },
202 |                         .bitwise_and => {
203 |                             _ = @atomicRmw(T, l, .And, r.*, .acq_rel);
204 |                         },
205 |                         .bitwise_or => {
206 |                             _ = @atomicRmw(T, l, .Or, r.*, .acq_rel);
207 |                         },
208 |                         .bitwise_xor => {
209 |                             _ = @atomicRmw(T, l, .Xor, r.*, .acq_rel);
210 |                         },
211 |                         .logical_and => {
212 |                             _ = @atomicRmw(T, l, .And, r.*, .acq_rel);
213 |                         },
214 |                         .logical_or => {
215 |                             _ = @atomicRmw(T, l, .Or, r.*, .acq_rel);
216 |                         },
217 |                         .max => {
218 |                             _ = @atomicRmw(T, l, .Max, r.*, .acq_rel);
219 |                         },
220 |                         .min => {
221 |                             _ = @atomicRmw(T, l, .Min, r.*, .acq_rel);
222 |                         },
223 |                         .custom => l.atomic_reduce(r.*),
224 |                         .id => {},
225 |                         .none => {
226 |                             @compileError("Specify the reduction operator");
227 |                         },
228 |                     }
229 |                 }
230 |             }
231 |         }
232 | 
233 |         fn for_omp(
234 |             lhs: *anyopaque,
235 |             rhs: *anyopaque,
236 |         ) callconv(.C) void {
237 |             inline for (reduce_operators, types) |op, T| {
238 |                 switch (op) {
239 |                     .plus => {
240 |                         const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*;
241 |                         l.* += @as(*T.type, @ptrCast(@alignCast(rhs))).*.*;
242 |                     },
243 |                     .mult => {
244 |                         const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*;
245 |                         l.* *= @as(*T.type, @ptrCast(@alignCast(rhs))).*.*;
246 |                     },
247 |                     .minus => {
248 |                         const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*;
249 |                         l.* -= @as(*T.type, @ptrCast(@alignCast(rhs))).*.*;
250 |                     },
251 |                     .bitwise_and => {
252 |                         const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*;
253 |                         l.* &= @as(*T.type, @ptrCast(@alignCast(rhs))).*.*;
254 |                     },
255 |                     .bitwise_or => {
256 |                         const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*;
257 |                         l.* |= @as(*T.type, @ptrCast(@alignCast(rhs))).*.*;
258 |                     },
259 |                     .bitwise_xor => {
260 |                         const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*;
261 |                         l.* ^= @as(*T.type, @ptrCast(@alignCast(rhs))).*.*;
262 |                     },
263 |                     .logical_and => {
264 |                         const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*;
265 |                         l.* = l.* and @as(*T.type, @ptrCast(@alignCast(rhs))).*.*;
266 |                     },
267 |                     .logical_or => {
268 |                         const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*;
269 |                         l.* = l.* or @as(*T.type, @ptrCast(@alignCast(rhs))).*.*;
270 |                     },
271 |                     .max => {
272 |                         const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*;
273 |                         l.* = @max(l.*, @as(*T.type, @ptrCast(@alignCast(rhs))).*.*);
274 |                     },
275 |                     .min => {
276 |                         const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*;
277 |                         l.* = @min(l.*, @as(*T.type, @ptrCast(@alignCast(rhs))).*.*);
278 |                     },
279 |                     .custom => {
280 |                         const l = @as(*T.type, @ptrCast(@alignCast(lhs))).*;
281 |                         l.reduce(@as(*T.type, @ptrCast(@alignCast(rhs))).*.*);
282 |                     },
283 |                     .id => {},
284 |                     .none => {
285 |                         @compileError("Specify the reduction operator");
286 |                     },
287 |                 }
288 |             }
289 |         }
290 |     };
291 | }
292 | 


--------------------------------------------------------------------------------
/src/kmp.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const omp = @import("omp.zig");
  3 | const opts = @import("build_options");
  4 | const ompt = @import("ompt.zig");
  5 | 
  6 | pub threadlocal var ctx: context = undefined;
  7 | pub const context = struct {
  8 |     global_tid: c_int,
  9 |     bound_tid: c_int,
 10 | };
 11 | 
 12 | pub const ident_flags = enum(c_int) {
 13 |     // /*! Use trampoline for internal microtasks */
 14 |     IDENT_IMB = 0x01,
 15 |     // /*! Use c-style ident structure */
 16 |     IDENT_KMPC = 0x02,
 17 |     // /* 0x04 is no longer used */
 18 |     // /*! Entry point generated by auto-parallelization */
 19 |     IDENT_AUTOPAR = 0x08,
 20 |     // /*! Compiler generates atomic reduction option for kmpc_reduce* */
 21 |     IDENT_ATOMIC_REDUCE = 0x10,
 22 |     // /*! To mark a 'barrier' directive in user code */
 23 |     IDENT_BARRIER_EXPL = 0x20,
 24 |     // /*! To Mark implicit barriers. */
 25 |     // IDENT_BARRIER_IMPL_FOR = 0x0040,
 26 |     IDENT_BARRIER_IMPL = 0x0040,
 27 |     IDENT_BARRIER_IMPL_SECTIONS = 0x00C0,
 28 | 
 29 |     IDENT_BARRIER_IMPL_SINGLE = 0x0140,
 30 |     // IDENT_BARRIER_IMPL_MASK = 0x01C0,
 31 |     IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0,
 32 | 
 33 |     // /*! To mark a static loop in OMPT callbacks */
 34 |     IDENT_WORK_LOOP = 0x200,
 35 |     // /*! To mark a sections directive in OMPT callbacks */
 36 |     IDENT_WORK_SECTIONS = 0x400,
 37 |     // /*! To mark a distribute construct in OMPT callbacks */
 38 |     IDENT_WORK_DISTRIBUTE = 0x800,
 39 |     // /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and
 40 |     //     not currently used. If one day we need more bits, then we can use
 41 |     //     an invalid combination of hints to mean that another, larger field
 42 |     //     should be used in a different flag. */
 43 |     // IDENT_ATOMIC_HINT_MASK = 0xFF0000,
 44 |     // IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000,
 45 |     // IDENT_ATOMIC_HINT_CONTENDED = 0x020000,
 46 |     // IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000,
 47 |     // IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000,
 48 |     // IDENT_OPENMP_SPEC_VERSION_MASK = 0xFF000000,
 49 | };
 50 | 
 51 | pub const sched_t = enum(c_int) {
 52 |     StaticChunked = 33,
 53 |     StaticNonChunked = 34,
 54 |     Dynamic = 35,
 55 |     Guided = 36,
 56 |     Runtime = 37,
 57 | };
 58 | 
 59 | pub const ident_t = extern struct {
 60 |     // might be used in fortran, we can just keep it 0
 61 |     reserved_1: c_int = 0,
 62 |     // flags from above
 63 |     flags: c_int = 0,
 64 |     reserved_2: c_int = 0,
 65 |     reserved_3: c_int = 35,
 66 |     psource: [*:0]const u8,
 67 | };
 68 | 
 69 | // TODO: see this alignment because it seems strange
 70 | pub const kmpc_micro_t = fn (global_tid: *c_int, bound_tid: *c_int, args: *align(@alignOf(usize)) anyopaque) callconv(.C) void;
 71 | 
 72 | extern "omp" fn __kmpc_fork_call(name: *const ident_t, argc: c_int, fun: *const kmpc_micro_t, ...) void;
 73 | pub inline fn fork_call(comptime name: *const ident_t, argc: c_int, fun: *const kmpc_micro_t, args: anytype) void {
 74 |     __kmpc_fork_call(name, argc, fun, args);
 75 | }
 76 | // it's not really variadic, so make sure to pass only one argument
 77 | extern "omp" fn __kmpc_fork_call_if(name: *const ident_t, argc: c_int, fun: *const kmpc_micro_t, cond: c_int, ...) void;
 78 | pub inline fn fork_call_if(comptime name: *const ident_t, argc: c_int, fun: *const kmpc_micro_t, cond: c_int, args: anytype) void {
 79 |     __kmpc_fork_call_if(name, argc, fun, cond, args);
 80 | }
 81 | 
 82 | extern "omp" fn __kmpc_for_static_init_4(loc: *const ident_t, gtid: c_int, schedtype: c_int, plastiter: *c_int, plower: *c_int, pupper: *c_int, pstride: *c_int, incr: c_int, chunk: c_int) void;
 83 | extern "omp" fn __kmpc_for_static_init_4u(loc: *const ident_t, gtid: c_int, schedtype: c_int, plastiter: *c_int, plower: *c_uint, pupper: *c_uint, pstride: *c_int, incr: c_int, chunk: c_int) void;
 84 | extern "omp" fn __kmpc_for_static_init_8(loc: *const ident_t, gtid: c_int, schedtype: c_int, plastiter: *c_int, plower: *c_long, pupper: *c_long, pstride: *c_long, incr: c_long, chunk: c_long) void;
 85 | extern "omp" fn __kmpc_for_static_init_8u(loc: *const ident_t, gtid: c_int, schedtype: c_int, plastiter: *c_int, plower: *c_ulong, pupper: *c_ulong, pstride: *c_long, incr: c_long, chunk: c_long) void;
 86 | pub inline fn for_static_init(comptime T: type, comptime loc: *const ident_t, gtid: c_int, schedtype: sched_t, plastiter: *c_int, plower: *T, pupper: *T, pstride: *T, incr: T, chunk: T) void {
 87 |     if (@typeInfo(T).Int.signedness == .signed)
 88 |         if (@typeInfo(T).Int.bits <= 32) {
 89 |             __kmpc_for_static_init_4(loc, gtid, @intFromEnum(schedtype), plastiter, @ptrCast(plower), @ptrCast(pupper), @ptrCast(pstride), @bitCast(incr), @bitCast(chunk));
 90 |         } else if (@typeInfo(T).Int.bits <= 64) {
 91 |             __kmpc_for_static_init_8(loc, gtid, @intFromEnum(schedtype), plastiter, @ptrCast(plower), @ptrCast(pupper), @ptrCast(pstride), @bitCast(incr), @bitCast(chunk));
 92 |         } else {
 93 |             @compileError("Unsupported integer size");
 94 |         }
 95 |     else if (@typeInfo(T).Int.signedness == .unsigned) {
 96 |         if (@typeInfo(T).Int.bits <= 32) {
 97 |             __kmpc_for_static_init_4u(loc, gtid, @intFromEnum(schedtype), plastiter, @ptrCast(plower), @ptrCast(pupper), @ptrCast(pstride), @bitCast(incr), @bitCast(chunk));
 98 |         } else if (@typeInfo(T).Int.bits <= 64) {
 99 |             __kmpc_for_static_init_8u(loc, gtid, @intFromEnum(schedtype), plastiter, @ptrCast(plower), @ptrCast(pupper), @ptrCast(pstride), @bitCast(incr), @bitCast(chunk));
100 |         } else {
101 |             @compileError("Unsupported unsigned integer size");
102 |         }
103 |     } else {
104 |         unreachable;
105 |     }
106 | }
107 | 
108 | extern "omp" fn __kmpc_for_static_fini(loc: *const ident_t, global_tid: c_int) void;
109 | pub inline fn for_static_fini(comptime name: *const ident_t, global_tid: c_int) void {
110 |     __kmpc_for_static_fini(name, global_tid);
111 | }
112 | 
113 | extern "omp" fn __kmpc_dispatch_init_4(loc: *const ident_t, gtid: c_int, schedule: c_int, lb: c_int, ub: c_int, st: c_int, chunk: c_int) void;
114 | extern "omp" fn __kmpc_dispatch_init_4u(loc: *const ident_t, gtid: c_int, schedule: c_int, lb: c_uint, ub: c_uint, st: c_int, chunk: c_int) void;
115 | extern "omp" fn __kmpc_dispatch_init_8(loc: *const ident_t, gtid: c_int, schedule: c_int, lb: c_long, ub: c_long, st: c_long, chunk: c_long) void;
116 | extern "omp" fn __kmpc_dispatch_init_8u(loc: *const ident_t, gtid: c_int, schedule: c_int, lb: c_ulong, ub: c_ulong, st: c_long, chunk: c_long) void;
117 | pub inline fn dispatch_init(comptime T: type, comptime loc: *const ident_t, gtid: c_int, schedule: sched_t, lb: T, ub: T, st: T, chunk: T) void {
118 |     if (@typeInfo(T).Int.signedness == .signed) {
119 |         if (@typeInfo(T).Int.bits <= 32) {
120 |             __kmpc_dispatch_init_4(loc, gtid, @intFromEnum(schedule), @intCast(lb), @intCast(ub), @intCast(st), @intCast(chunk));
121 |         } else if (@typeInfo(T).Int.bits <= 64) {
122 |             __kmpc_dispatch_init_8(loc, gtid, @intFromEnum(schedule), @intCast(lb), @intCast(ub), @intCast(st), @intCast(chunk));
123 |         } else {
124 |             @compileError("Unsupported integer size");
125 |         }
126 |     } else if (@typeInfo(T).Int.signedness == .unsigned) {
127 |         if (@typeInfo(T).Int.bits <= 32) {
128 |             __kmpc_dispatch_init_4u(loc, gtid, @intFromEnum(schedule), @intCast(lb), @intCast(ub), @intCast(st), @intCast(chunk));
129 |         } else if (@typeInfo(T).Int.bits <= 64) {
130 |             __kmpc_dispatch_init_8u(loc, gtid, @intFromEnum(schedule), @intCast(lb), @intCast(ub), @intCast(st), @intCast(chunk));
131 |         } else {
132 |             @compileError("Unsupported unsigned integer size");
133 |         }
134 |     } else {
135 |         unreachable;
136 |     }
137 | }
138 | 
139 | extern "omp" fn __kmpc_dispatch_next_4(loc: *const ident_t, gtid: c_int, p_last: *c_int, p_lb: *c_int, p_ub: *c_int, p_st: *c_int) c_int;
140 | extern "omp" fn __kmpc_dispatch_next_4u(loc: *const ident_t, gtid: c_int, p_last: *c_int, p_lb: *c_uint, p_ub: *c_uint, p_st: *c_int) c_int;
141 | extern "omp" fn __kmpc_dispatch_next_8(loc: *const ident_t, gtid: c_int, p_last: *c_int, p_lb: *c_long, p_ub: *c_long, p_st: *c_long) c_int;
142 | extern "omp" fn __kmpc_dispatch_next_8u(loc: *const ident_t, gtid: c_int, p_last: *c_int, p_lb: *c_ulong, p_ub: *c_ulong, p_st: *c_long) c_int;
143 | pub inline fn dispatch_next(comptime T: type, comptime loc: *const ident_t, gtid: c_int, p_last: *c_int, p_lb: *T, p_ub: *T, p_st: *T) c_int {
144 |     if (std.meta.trait.issingedInt(T)) {
145 |         if (@typeInfo(T).Int.bits <= 32) {
146 |             return __kmpc_dispatch_next_4(loc, gtid, p_last, @ptrCast(p_lb), @ptrCast(p_ub), @ptrCast(p_st));
147 |         } else if (@typeInfo(T).Int.bits <= 64) {
148 |             return __kmpc_dispatch_next_8(loc, gtid, p_last, @ptrCast(p_lb), @ptrCast(p_ub), @ptrCast(p_st));
149 |         } else {
150 |             @compileError("Unsupported integer size");
151 |         }
152 |     } else if (std.meta.trait.isUnsignedInt(T)) {
153 |         if (@typeInfo(T).Int.bits <= 32) {
154 |             return __kmpc_dispatch_next_4u(loc, gtid, p_last, @ptrCast(p_lb), @ptrCast(p_ub), @ptrCast(p_st));
155 |         } else if (@typeInfo(T).Int.bits <= 64) {
156 |             return __kmpc_dispatch_next_8u(loc, gtid, p_last, @ptrCast(p_lb), @ptrCast(p_ub), @ptrCast(p_st));
157 |         } else {
158 |             @compileError("Unsupported unsigned integer size");
159 |         }
160 |     } else {
161 |         unreachable;
162 |     }
163 | }
164 | 
165 | extern "omp" fn __kmpc_dispatch_fini_4(loc: *const ident_t, gtid: c_int) void;
166 | extern "omp" fn __kmpc_dispatch_fini_4u(loc: *const ident_t, gtid: c_int) void;
167 | extern "omp" fn __kmpc_dispatch_fini_8(loc: *const ident_t, gtid: c_int) void;
168 | extern "omp" fn __kmpc_dispatch_fini_8u(loc: *const ident_t, gtid: c_int) void;
169 | pub inline fn dispatch_fini(comptime T: type, comptime loc: *const ident_t, gtid: c_int) void {
170 |     if (@typeInfo(T).Int.signedness == .signed) {
171 |         if (@typeInfo(T).Int.bits <= 32) {
172 |             __kmpc_dispatch_fini_4(loc, gtid);
173 |         } else if (@typeInfo(T).Int.bits <= 64) {
174 |             __kmpc_dispatch_fini_8(loc, gtid);
175 |         } else {
176 |             @compileError("Unsupported integer size");
177 |         }
178 |     } else if (@typeInfo(T).Int.signedness == .unsigned) {
179 |         if (@typeInfo(T).Int.bits <= 32) {
180 |             __kmpc_dispatch_fini_4u(loc, gtid);
181 |         } else if (@typeInfo(T).Int.bits <= 64) {
182 |             __kmpc_dispatch_fini_8u(loc, gtid);
183 |         } else {
184 |             @compileError("Unsupported unsigned integer size");
185 |         }
186 |     } else {
187 |         unreachable;
188 |     }
189 | }
190 | 
191 | extern "omp" fn __kmpc_ordered(loc: *const ident_t, global_tid: c_int) void;
192 | pub inline fn ordered(comptime name: *const ident_t, global_tid: c_int) void {
193 |     __kmpc_ordered(name, global_tid);
194 | }
195 | 
196 | extern "omp" fn __kmpc_end_ordered(loc: *const ident_t, global_tid: c_int) void;
197 | pub inline fn end_ordered(comptime name: *const ident_t, global_tid: c_int) void {
198 |     __kmpc_end_ordered(name, global_tid);
199 | }
200 | 
201 | extern "omp" fn __kmpc_masked(loc: *const ident_t, global_tid: c_int, filter: c_int) c_int;
202 | pub inline fn masked(comptime name: *const ident_t, global_tid: c_int, filter: c_int) c_int {
203 |     return __kmpc_masked(name, global_tid, filter);
204 | }
205 | 
206 | extern "omp" fn __kmpc_end_masked(loc: *const ident_t, global_tid: c_int) void;
207 | pub inline fn end_masked(comptime name: *const ident_t, global_tid: c_int) void {
208 |     __kmpc_end_masked(name, global_tid);
209 | }
210 | 
211 | extern "omp" fn __kmpc_single(loc: *const ident_t, global_tid: c_int) c_int;
212 | pub inline fn single(comptime name: *const ident_t, global_tid: c_int) c_int {
213 |     return __kmpc_single(name, global_tid);
214 | }
215 | 
216 | extern "omp" fn __kmpc_end_single(loc: *const ident_t, global_tid: c_int) void;
217 | pub inline fn end_single(comptime name: *const ident_t, global_tid: c_int) void {
218 |     __kmpc_end_single(name, global_tid);
219 | }
220 | 
221 | extern "omp" fn __kmpc_barrier(loc: *const ident_t, global_tid: c_int) void;
222 | pub inline fn barrier(comptime name: *const ident_t, global_tid: c_int) void {
223 |     __kmpc_barrier(name, global_tid);
224 | }
225 | 
226 | extern "omp" fn __kmpc_global_thread_num() c_int;
227 | pub inline fn get_tid() c_int {
228 |     return __kmpc_global_thread_num();
229 | }
230 | 
231 | extern "omp" fn __kmpc_push_num_threads(loc: *const ident_t, global_tid: c_int, num_threads: c_int) void;
232 | pub inline fn push_num_threads(comptime name: *const ident_t, global_tid: c_int, num_threads: c_int) void {
233 |     __kmpc_push_num_threads(name, global_tid, num_threads);
234 | }
235 | 
236 | pub const critical_name_t = [8]c_int; // This seems to be just a lock, so I give up on ever using it
237 | extern "omp" fn __kmpc_critical_with_hint(loc: *const ident_t, global_tid: c_int, crit: *critical_name_t, hint: c_int) void;
238 | pub inline fn critical(comptime loc: *const ident_t, global_tid: c_int, crit: *critical_name_t, hint: c_int) void {
239 |     __kmpc_critical_with_hint(loc, global_tid, crit, hint);
240 | }
241 | 
242 | extern "omp" fn __kmpc_end_critical(loc: *const ident_t, global_tid: c_int, crit: *critical_name_t) void;
243 | pub inline fn critical_end(comptime loc: *const ident_t, global_tid: c_int, crit: *critical_name_t) void {
244 |     __kmpc_end_critical(loc, global_tid, crit);
245 | }
246 | 
247 | extern "omp" fn __kmpc_flush(loc: *const ident_t) void;
248 | pub inline fn flush(comptime name: *const ident_t) void {
249 |     __kmpc_flush(name);
250 | }
251 | // Todo: invert for big endian
252 | pub const tasking_flags = packed struct {
253 |     tiedness: u1 = 0, // task is either tied (1) or untied (0) */
254 |     final: u1 = 0, // task is final(1) so execute immediately */
255 |     merged_if0: u1 = 0, // no __kmpc_task_{begin/complete}_if0 calls in if0               code path */
256 |     destructors_thunk: u1 = 0, // set if the compiler creates a thunk toinvoke destructors from the runtime */
257 |     proxy: u1 = 0, // task is a proxy task (it will be executed outside thecontext of the RTL) */
258 |     priority_specified: u1 = 0, // set if the compiler provides priority setting for the task */
259 |     detachable: u1 = 0, // 1 == can detach */
260 |     hidden_helper: u1 = 0, // 1 == hidden helper task */
261 |     reserved: u8 = 0, // reserved for compiler use */
262 | 
263 |     // Library flags */ /* Total library flags must be 1 = 0,6 bits */
264 |     tasktype: u1 = 0, // task is either explicit(1) or implicit (0) */
265 |     task_serial: u1 = 0, // task is executed immediately (1) or deferred (0)
266 |     tasking_ser: u1 = 0, // all tasks in team are either executed immediately
267 |     // (1 = 0,) or may be deferred (0)
268 |     team_serial: u1 = 0, // entire team is serial (1) [1 thread] or parallel
269 |     // (0) [>= 2 threads]
270 |     // If either team_serial or tasking_ser is set = 0, task team may be NULL */
271 |     // Task State Flags: u*/
272 |     started: u1 = 0, // 1==started, 0==not started     */
273 |     executing: u1 = 0, // 1==executing, 0==not executing */
274 |     complete: u1 = 0, // 1==complete, 0==not complete   */
275 |     freed: u1 = 0, // 1==freed, 0==allocated        */
276 |     native: u1 = 0, // 1==gcc-compiled task, 0==intel */
277 |     onced: u1 = 0, // 1==ran once already, 0==never ran, record & replay purposes */
278 |     reserved31: u6 = 0, // reserved for library use */
279 | };
280 | 
281 | inline fn ifdef(comptime d: bool, t: type) type {
282 |     return if (d) t else void;
283 | }
284 | 
285 | const cache_line_size = 64;
286 | pub const task_data_t = extern struct {
287 |     td_task_id: c_int, // id, assigned by debugger
288 |     td_flags: tasking_flags, // task flags
289 |     td_team: *anyopaque, // kmp_team_t, // team for this task
290 |     td_alloc_thread: *anyopaque, //   kmp_info_p *td_alloc_thread; // thread that allocated data structures
291 |     // Currently not used except for perhaps IDB
292 | 
293 |     td_parent: *@This(),
294 |     td_level: c_int,
295 |     td_untied_count: std.atomic.Value(c_int), // untied task active parts counter
296 |     td_ident: *ident_t,
297 |     // Taskwait data.
298 | 
299 |     td_taskwait_ident: *ident_t,
300 |     td_taskwait_counter: c_int,
301 |     td_taskwait_thread: c_int,
302 |     td_icvs: internal_control align(cache_line_size),
303 |     td_allocated_child_tasks: std.atomic.Value(c_int) align(cache_line_size),
304 |     td_incomplete_child_tasks: std.atomic.Value(c_int),
305 |     //   kmp_taskgroup_t*
306 |     td_taskgroup: *anyopaque, // Each task keeps pointer to its current taskgroup
307 |     //   kmp_dephash_t*
308 |     td_dephash: *anyopaque, // Dependencies for children tasks are tracked from here
309 |     //   kmp_depnode_t*
310 |     td_depnode: *anyopaque, // Pointer to graph node if this task has dependencies
311 |     td_task_team: *anyopaque, // kmp_task_team_t *
312 |     td_size_alloc: usize, // Size of task structure, including shareds etc.
313 |     // 4 or 8 byte integers for the loop bounds in GOMP_taskloop
314 |     td_size_loop_bounds: ifdef(opts.gomp_support, c_int),
315 | 
316 |     td_last_tied: *@This(), // keep tied task scheduling constraint
317 |     // GOMP sends in a copy function for copy constructors
318 |     td_copy_func: ifdef(opts.gomp_support, *const fn (*anyopaque, *anyopaque) callconv(.C) void),
319 | 
320 |     td_allow_completion_event: *anyopaque, // kmp_event_t
321 |     ompt_task_info: ifdef(opts.ompt_support, ompt.task_info_t),
322 |     is_taskgraph: ifdef(opts.ompx_support, c_char), // whether the task is within a TDG
323 |     tdg: ifdef(opts.ompx_support, *anyopaque), // kmp_tdg_info_t *// used to associate task with a TDG
324 |     td_target_data: target_data_t,
325 | };
326 | 
327 | const event_type_t = enum(c_int) {
328 |     KMP_EVENT_UNINITIALIZED = 0,
329 |     KMP_EVENT_ALLOW_COMPLETION = 1,
330 | };
331 | 
332 | const envent_t = extern struct {
333 |     typ: event_type_t,
334 |     lock: tas_lock,
335 |     task: task_t(void, void),
336 | };
337 | // TODO: SWITCH FOR BIG/LITTLE ENDIAN
338 | const base_tas_lock_t = extern struct {
339 |     // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread
340 |     // Flip the ordering of the high and low 32-bit member to be consistent
341 |     // with the memory layout of the address in 64-bit big-endian.
342 |     poll: std.atomic.Value(c_int),
343 |     depth_locked: c_int, // depth locked, for nested locks only
344 | };
345 | 
346 | const lock_pool_t = extern struct {
347 |     next: *tas_lock, // TODO: This technically is a union of locks, but since I don't want to copy every struct this will suffice
348 |     index: c_int,
349 | };
350 | 
351 | const tas_lock = union {
352 |     lk: base_tas_lock_t,
353 |     pool: lock_pool_t, // make certain struct is large enough
354 |     lk_align: c_longdouble, // use worst case alignment; no cache line padding
355 | };
356 | 
357 | const internal_control = extern struct {
358 |     serial_nesting_level: c_char, // /* corresponds to the value of the th_team_serialized field */
359 |     dynamic: c_char, // /* internal control for dynamic adjustment of threads (per thread) */
360 |     bt_set: c_char, // internal control for whether blocktime is explicitly set */
361 |     blocktime: c_int, //* internal control for blocktime */
362 |     bt_intervals: ifdef(opts.kmp_monitor_support, c_int), //* internal control for blocktime intervals */
363 |     nproc: c_int, // internal control for #threads for next parallel region (per //                 thread) */
364 |     thread_limit: c_int, //* internal control for thread-limit-var */
365 |     task_thread_limit: c_int, //; /* internal control for thread-limit-var of a task*/
366 |     max_active_levels: c_int, //; /* internal control for max_active_levels */
367 |     sched: r_sched, //* internal control for runtime schedule {sched,chunk} pair */
368 |     proc_bind: proc_bind_t, //; /* internal control for affinity  */
369 |     default_device: c_int, //* internal control for default device */
370 |     next: *@This(),
371 | };
372 | 
373 | const proc_bind_t = enum(c_int) {
374 |     proc_bind_false = 0,
375 |     proc_bind_true,
376 |     proc_bind_primary,
377 |     proc_bind_close,
378 |     proc_bind_spread,
379 |     proc_bind_intel, // use KMP_AFFINITY interface
380 |     proc_bind_default,
381 | };
382 | 
383 | // Technically it's a union but who cares `kmp_r_sched'
384 | const r_sched = isize;
385 | 
386 | const target_data_t = extern struct {
387 |     async_handle: *anyopaque, // libomptarget async handle for task completion query
388 | };
389 | 
390 | // This is just the default task struct, since this is polymorphic, just providing the prototype is enough
391 | const kmp_task_t = task_t(void, void, void);
392 | 
393 | // TODO: Use kmp_task_t and then just cast the types back and forth
394 | extern "omp" fn __kmpc_omp_task(loc_ref: *const ident_t, gtid: c_int, new_task: *anyopaque) c_int;
395 | extern "omp" fn __kmpc_omp_task_begin_if0(loc_ref: *const ident_t, gtid: c_int, new_task: *anyopaque) void;
396 | extern "omp" fn __kmpc_omp_task_complete_if0(loc_ref: *const ident_t, gtid: c_int, new_task: *anyopaque) void;
397 | 
398 | // Same trick as before, this is not really variadic
399 | extern "omp" fn __kmpc_omp_task_alloc(loc_ref: *const ident_t, gtid: c_int, flags: c_int, sizeof_kmp_task_t: usize, sizeof_shareds: usize, ...) *kmp_task_t;
400 | 
401 | const opaque_routine_entry_t = *const fn (c_int, *kmp_task_t) callconv(.C) c_int;
402 | const opaque_cmplrdata_t = extern union {
403 |     priority: c_int,
404 |     destructors: opaque_routine_entry_t,
405 | };
406 | 
407 | pub inline fn promise(comptime ret: type) type {
408 |     return struct {
409 |         const allocator = std.heap.c_allocator;
410 | 
411 |         result: ret = undefined,
412 |         resolved: std.atomic.Value(bool) = std.atomic.Value(bool).init(false),
413 | 
414 |         pub inline fn init() !*@This() {
415 |             return try allocator.create(@This());
416 |         }
417 | 
418 |         pub inline fn deinit(self: *@This()) void {
419 |             allocator.free(std.mem.asBytes(self));
420 |         }
421 | 
422 |         pub fn get(self: *@This()) ret {
423 |             while (self.resolved.cmpxchgStrong(false, true, .seq_cst, .seq_cst)) |val| {
424 |                 if (val) break;
425 |                 std.atomic.spinLoopHint();
426 |             }
427 | 
428 |             return self.result;
429 |         }
430 | 
431 |         pub inline fn release(self: *@This()) void {
432 |             self.resolved.store(true, .release);
433 |         }
434 |     };
435 | }
436 | 
437 | /// This represents the type `kmp_task_t' or `TaskDescriptorTy' in the source code.
438 | /// It's a polymorphic type that just need `shareds' and `routine' as the preamble to work
439 | /// and then the alloc() will allocate enough space for all the variables that are not explicitally specified
440 | /// in the LLVM source code, like for example the privates here, or part_id
441 | pub inline fn task_t(comptime shareds: type, comptime pri: type, comptime ret: type) type {
442 |     // This is needed because extern structs cannot contain normal structs, but we need
443 |     // the extern struct since it has consitent ABI and won't rearrange the data. This is
444 |     // required for calling the destructor since it's called by C and not by us.
445 |     return extern struct {
446 |         const self_t = @This();
447 |         const routine_entry_t = *const fn (c_int, *self_t) callconv(.C) c_int;
448 |         const cmplrdata_t = extern union {
449 |             priority: c_int,
450 |             destructors: routine_entry_t,
451 |         };
452 | 
453 |         shareds: *shareds,
454 |         routine: routine_entry_t,
455 |         part_id: c_int,
456 |         data1: cmplrdata_t,
457 |         data2: cmplrdata_t,
458 |         // This can't be a real type since they don't have defined memory structure
459 |         privates: [@sizeOf(pri)]u8,
460 |         result: if (ret == void) void else *promise(ret),
461 | 
462 |         inline fn outline(comptime f: anytype) type {
463 |             return opaque {
464 |                 pub fn task(gtid: c_int, t: *self_t) callconv(.C) c_int {
465 |                     _ = gtid;
466 | 
467 |                     const _shareds = t.shareds.*;
468 |                     const _privates: pri = std.mem.bytesAsValue(pri, &t.privates).*;
469 | 
470 |                     const r = @call(.always_inline, f, _shareds ++ _privates);
471 | 
472 |                     if (ret != void) {
473 |                         var pro = t.result;
474 |                         pro.result = r;
475 |                     }
476 |                     return 0;
477 |                 }
478 |             };
479 |         }
480 | 
481 |         pub inline fn alloc(
482 |             comptime f: anytype,
483 |             comptime name: *const ident_t,
484 |             gtid: c_int,
485 |             flags: tasking_flags,
486 |         ) *@This() {
487 |             const t = &@This().outline(f).task;
488 |             return @ptrCast(__kmpc_omp_task_alloc(
489 |                 name,
490 |                 gtid,
491 |                 @bitCast(flags),
492 |                 @sizeOf(@This()),
493 |                 @sizeOf(@TypeOf(shareds)),
494 |                 t,
495 |             ));
496 |         }
497 | 
498 |         pub inline fn set_data(self: *@This(), sh: *shareds, pr: pri) void {
499 |             self.shareds = sh;
500 |             self.privates = std.mem.toBytes(pr);
501 |         }
502 | 
503 |         pub inline fn make_promise(self: *@This(), pro: *promise(ret)) void {
504 |             const head = self.get_header();
505 |             self.result = pro;
506 |             head.td_flags.destructors_thunk = 1;
507 | 
508 |             self.data1.destructors = &opaque {
509 |                 pub fn notify(gtid: c_int, t: *self_t) callconv(.C) c_int {
510 |                     _ = gtid;
511 | 
512 |                     t.result.release();
513 |                     return 0;
514 |                 }
515 |             }.notify;
516 |         }
517 | 
518 |         pub inline fn set_priority(self: *@This(), priority: c_int) void {
519 |             self.data2.priority = priority;
520 |             @panic("TODO");
521 |         }
522 | 
523 |         pub inline fn task(self: *@This(), comptime name: *const ident_t, gtid: c_int) c_int {
524 |             return __kmpc_omp_task(name, gtid, self);
525 |         }
526 | 
527 |         pub inline fn begin_if0(self: *@This(), comptime name: *const ident_t, gtid: c_int) void {
528 |             __kmpc_omp_task_begin_if0(name, gtid, self);
529 |         }
530 | 
531 |         pub inline fn complete_if0(self: *@This(), comptime name: *const ident_t, gtid: c_int) void {
532 |             __kmpc_omp_task_complete_if0(name, gtid, self);
533 |         }
534 | 
535 |         pub inline fn get_header(self: *@This()) *task_data_t {
536 |             const ptr = @intFromPtr(self) - @sizeOf(task_data_t);
537 |             return @ptrFromInt(ptr);
538 |         }
539 |     };
540 | }
541 | 
542 | extern "omp" fn __kmpc_omp_taskyield(loc_ref: *const ident_t, gtid: c_int, end_part: c_int) c_int;
543 | pub inline fn taskyield(comptime name: *const ident_t, gtid: c_int) c_int {
544 |     // Not really sure what end_part is, so always set it to 0. Even whithin the runtime it's used only in logging
545 |     return __kmpc_omp_taskyield(name, gtid, 0);
546 | }
547 | 
548 | extern "omp" fn __kmpc_omp_taskwait(loc_ref: *const ident_t, gtid: c_int) c_int;
549 | pub inline fn taskwait(comptime name: *const ident_t, gtid: c_int) c_int {
550 |     return __kmpc_omp_taskwait(name, gtid);
551 | }
552 | // extern "omp" fn __kmpc_omp_target_task_alloc(loc_ref: *const ident_t, gtid: c_int, flags: c_int, sizeof_kmp_task_t: usize, sizeof_shareds: usize, task_entry: kmp_routine_entry_t, device_id: i64) *kmp_task_t;
553 | // pub inline fn target_task_alloc(comptime name: *const ident_t, gtid: c_int, flags: kmp_tasking_flags, sizeof_kmp_task_t: usize, sizeof_shareds: usize, task_entry: kmp_routine_entry_t, device_id: i64) *kmp_task_t {
554 | //     return __kmpc_omp_target_task_alloc(name, gtid, flags, sizeof_kmp_task_t, sizeof_shareds, task_entry, device_id);
555 | // }
556 | //
557 | 
558 | // extern "omp" fn __kmpc_omp_task_parts(loc_ref: *const ident_t, gtid: c_int, new_task: *kmp_task_t, part: *kmp_task_t) c_int;
559 | // pub inline fn task_parts(comptime name: *const ident_t, gtid: c_int, new_task: *kmp_task_t, part: *kmp_task_t) c_int {
560 | //     return __kmpc_omp_task_parts(name, gtid, new_task, part);
561 | // }
562 | //
563 | 
564 | extern "omp" fn __kmpc_reduce_nowait(
565 |     loc: *const ident_t,
566 |     global_tid: c_int,
567 |     num_vars: c_int,
568 |     reduce_size: usize,
569 |     reduce_data: *anyopaque,
570 |     reduce_func: *const fn (*anyopaque, *anyopaque) callconv(.C) void,
571 |     lck: *critical_name_t,
572 | ) c_int;
573 | /// This call il synchronized and will only occur in the main thread, so we don't need to worry about the reduce_func being called concurrently or use atomics
574 | pub inline fn reduce_nowait(
575 |     comptime loc: *const ident_t,
576 |     global_tid: c_int,
577 |     num_vars: c_int,
578 |     reduce_size: usize,
579 |     reduce_data: *anyopaque,
580 |     comptime f: anytype,
581 |     lck: *critical_name_t,
582 | ) c_int {
583 |     return __kmpc_reduce_nowait(loc, global_tid, num_vars, reduce_size, reduce_data, f, lck);
584 | }
585 | 
586 | extern "omp" fn __kmpc_end_reduce_nowait(loc: *const ident_t, global_tid: c_int, lck: *critical_name_t) void;
587 | pub inline fn end_reduce_nowait(comptime loc: *const ident_t, global_tid: c_int, lck: *critical_name_t) void {
588 |     __kmpc_end_reduce_nowait(loc, global_tid, lck);
589 | }
590 | 
591 | extern "omp" fn __kmpc_reduce(
592 |     loc: *const ident_t,
593 |     global_tid: c_int,
594 |     num_vars: c_int,
595 |     reduce_size: usize,
596 |     reduce_data: *anyopaque,
597 |     reduce_func: *const fn (*anyopaque, *anyopaque) callconv(.C) void,
598 |     lck: *critical_name_t,
599 | ) c_int;
600 | /// This call il synchronized and will only occur in the main thread, so we don't need to worry about the reduce_func being called concurrently or use atomics
601 | pub inline fn reduce(
602 |     comptime loc: *const ident_t,
603 |     global_tid: c_int,
604 |     num_vars: c_int,
605 |     reduce_size: usize,
606 |     reduce_data: *anyopaque,
607 |     comptime f: anytype,
608 |     lck: *critical_name_t,
609 | ) c_int {
610 |     return __kmpc_reduce(loc, global_tid, num_vars, reduce_size, reduce_data, f, lck);
611 | }
612 | 
613 | extern "omp" fn __kmpc_end_reduce(loc: *const ident_t, global_tid: c_int, lck: *critical_name_t) void;
614 | pub inline fn end_reduce(comptime loc: *const ident_t, global_tid: c_int, lck: *critical_name_t) void {
615 |     __kmpc_end_reduce(loc, global_tid, lck);
616 | }
617 | extern "omp" fn __kmpc_push_proc_bind(loc: *const ident_t, global_tid: c_int, proc_bind: c_int) void;
618 | pub inline fn push_proc_bind(comptime loc: *const ident_t, global_tid: c_int, proc_bind: omp.proc_bind) void {
619 |     __kmpc_push_proc_bind(loc, global_tid, @intFromEnum(proc_bind));
620 | }
621 | 


--------------------------------------------------------------------------------
/src/omp.zig:
--------------------------------------------------------------------------------
   1 | const std = @import("std");
   2 | const kmp = @import("kmp.zig");
   3 | const c = @cImport({
   4 |     @cInclude("omp.h");
   5 |     @cInclude("omp-tools.h");
   6 | });
   7 | const options = @import("build_options");
   8 | const in = @import("input_handler.zig");
   9 | const reduce = @import("reduce.zig");
  10 | const workshare_env = @import("workshare_env.zig");
  11 | 
  12 | const omp = @This();
  13 | 
  14 | pub const reduction_operators = reduce.operators;
  15 | 
  16 | pub const proc_bind = enum(c_int) {
  17 |     default = 1,
  18 |     master = 2,
  19 |     close = 3,
  20 |     spread = 4,
  21 |     primary = 5,
  22 | };
  23 | pub const parallel_opts = struct {
  24 |     iff: bool = false,
  25 |     proc_bind: proc_bind = .default,
  26 |     reduction: []const reduction_operators = &[0]reduction_operators{},
  27 |     ret_reduction: reduction_operators = .none,
  28 | };
  29 | pub inline fn parallel(
  30 |     comptime opts: parallel_opts,
  31 | ) type {
  32 |     const common = struct {
  33 |         inline fn make_args(
  34 |             args: anytype,
  35 |             comptime f: anytype,
  36 |         ) in.zigc_ret(f, @TypeOf(in.normalize_args(args))) {
  37 |             in.check_fn_signature(f);
  38 | 
  39 |             return .{ .v = in.normalize_args(args) };
  40 |         }
  41 | 
  42 |         inline fn make_proc_bind(
  43 |             id: *const kmp.ident_t,
  44 |             comptime bind: proc_bind,
  45 |         ) void {
  46 |             if (bind != .default) {
  47 |                 kmp.push_proc_bind(id, kmp.get_tid(), bind);
  48 |             }
  49 |         }
  50 | 
  51 |         inline fn parallel_outline(
  52 |             comptime f: anytype,
  53 |             comptime R: type,
  54 |             comptime in_opts: parallel_opts,
  55 |         ) type {
  56 |             return opaque {
  57 |                 const red = if (in_opts.ret_reduction == .none) in_opts.reduction else in_opts.reduction ++ .{in_opts.ret_reduction};
  58 |                 const work = workshare_env.make(red, f, in.copy_ret(f), .{
  59 |                     .do_copy = true,
  60 |                     .return_optional = true,
  61 |                     .is_omp_func = true,
  62 |                 });
  63 | 
  64 |                 fn workshare_outline(
  65 |                     gtid: *c_int,
  66 |                     btid: *c_int,
  67 |                     args: *R,
  68 |                 ) callconv(.C) void {
  69 |                     kmp.ctx = .{
  70 |                         .global_tid = gtid.*,
  71 |                         .bound_tid = btid.*,
  72 |                     };
  73 | 
  74 |                     const reduction_val_bytes = [_]u8{0} ** @sizeOf(in.copy_ret(f));
  75 |                     var reduction_val = std.mem.bytesAsValue(in.copy_ret(f), &reduction_val_bytes).*;
  76 |                     const maybe_ret = work.run(.{}, args.v, .{}, &reduction_val);
  77 | 
  78 |                     if (maybe_ret) |r| {
  79 |                         args.ret = r;
  80 |                     }
  81 | 
  82 |                     return;
  83 |                 }
  84 | 
  85 |                 fn generic_outline(
  86 |                     gtid: *c_int,
  87 |                     btid: *c_int,
  88 |                     args: *R,
  89 |                 ) callconv(.C) void {
  90 |                     kmp.ctx = .{
  91 |                         .global_tid = gtid.*,
  92 |                         .bound_tid = btid.*,
  93 |                     };
  94 | 
  95 |                     args.ret = if (@typeInfo(in.copy_ret(f)) == .ErrorUnion)
  96 |                         @call(.always_inline, f, args.*.v) catch |err| err
  97 |                     else
  98 |                         @call(.always_inline, f, args.*.v);
  99 | 
 100 |                     return;
 101 |                 }
 102 |             };
 103 |         }
 104 | 
 105 |         inline fn parallel_impl(
 106 |             args: anytype,
 107 |             comptime f: anytype,
 108 |             comptime has_cond: bool,
 109 |             cond: bool,
 110 |         ) in.copy_ret(f) {
 111 |             in.check_fn_signature(f);
 112 | 
 113 |             var ret = make_args(args, f);
 114 |             const id: kmp.ident_t = .{ .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), .psource = "parallel" ++ @typeName(@TypeOf(f)), .reserved_3 = 0x1e };
 115 |             make_proc_bind(&id, opts.proc_bind);
 116 |             const outline = parallel_outline(f, @TypeOf(ret), opts).workshare_outline;
 117 | 
 118 |             if (has_cond) {
 119 |                 kmp.fork_call_if(&id, 1, @ptrCast(&outline), @intFromBool(cond), &ret);
 120 |             } else {
 121 |                 kmp.fork_call(&id, 1, @ptrCast(&outline), &ret);
 122 |             }
 123 | 
 124 |             return ret.ret;
 125 |         }
 126 | 
 127 |         inline fn parallel_loop_impl(
 128 |             comptime T: type,
 129 |             lower: T,
 130 |             upper: T,
 131 |             increment: T,
 132 |             args: anytype,
 133 |             comptime f: anytype,
 134 |             comptime inner_fn: anytype,
 135 |             comptime has_cond: bool,
 136 |             cond: bool,
 137 |         ) in.copy_ret(f) {
 138 |             in.check_fn_signature(f);
 139 | 
 140 |             const ret_t = struct {
 141 |                 ret: in.copy_ret(f) = undefined,
 142 |                 v: @TypeOf(.{ args, lower, upper, increment, inner_fn }),
 143 |             };
 144 |             const ret: ret_t = .{ .ret = undefined, .v = .{ args, lower, upper, increment, inner_fn } };
 145 | 
 146 |             const id: kmp.ident_t = .{
 147 |                 .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC),
 148 |                 .psource = "parallel" ++ @typeName(@TypeOf(f)),
 149 |             };
 150 |             make_proc_bind(&id, opts.proc_bind);
 151 |             const outline = parallel_outline(f, @TypeOf(ret), opts).generic_outline;
 152 |             if (has_cond) {
 153 |                 kmp.fork_call_if(&id, 1, @ptrCast(&outline), @intFromBool(cond), &ret);
 154 |             } else {
 155 |                 kmp.fork_call(&id, 1, @ptrCast(&outline), &ret);
 156 |             }
 157 |             return ret.ret;
 158 |         }
 159 | 
 160 |         inline fn parallel_sections_impl(
 161 |             args: anytype,
 162 |             comptime f: anytype,
 163 |             comptime fs: anytype,
 164 |             comptime has_cond: bool,
 165 |             cond: bool,
 166 |         ) in.copy_ret(f) {
 167 |             in.check_fn_signature(f);
 168 | 
 169 |             const ret_t = struct {
 170 |                 ret: in.copy_ret(f) = undefined,
 171 |                 v: @TypeOf(.{ args, fs }),
 172 |             };
 173 |             const ret: ret_t = .{ .ret = undefined, .v = .{ args, fs } };
 174 | 
 175 |             const id: kmp.ident_t = .{ .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC), .psource = "parallel" ++ @typeName(@TypeOf(f)), .reserved_3 = 0x1e };
 176 |             make_proc_bind(&id, opts.proc_bind);
 177 |             const outline = parallel_outline(f, @TypeOf(ret), opts).generic_outline;
 178 | 
 179 |             if (has_cond) {
 180 |                 kmp.fork_call_if(&id, 1, @ptrCast(&outline), @intFromBool(cond), &ret);
 181 |             } else {
 182 |                 kmp.fork_call(&id, 1, @ptrCast(&outline), &ret);
 183 |             }
 184 | 
 185 |             return ret.ret;
 186 |         }
 187 |     };
 188 | 
 189 |     const api = struct {
 190 |         pub inline fn run_if(
 191 |             args: anytype,
 192 |             cond: bool,
 193 |             comptime f: anytype,
 194 |         ) in.copy_ret(f) {
 195 |             return common.parallel_impl(args, f, true, cond);
 196 |         }
 197 | 
 198 |         pub inline fn run(
 199 |             args: anytype,
 200 |             comptime f: anytype,
 201 |         ) in.copy_ret(f) {
 202 |             return common.parallel_impl(args, f, false, false);
 203 |         }
 204 | 
 205 |         pub inline fn loop(
 206 |             comptime idx_T: type,
 207 |             comptime loop_args: parallel_for_opts,
 208 |         ) type {
 209 |             return struct {
 210 |                 inline fn _run_if(
 211 |                     args: anytype,
 212 |                     cond: bool,
 213 |                     lower: idx_T,
 214 |                     upper: idx_T,
 215 |                     increment: idx_T,
 216 |                     comptime f: anytype,
 217 |                 ) in.copy_ret(f) {
 218 |                     return common.parallel_loop_impl(idx_T, lower, upper, increment, args, omp.loop(idx_T, loop_args).run, f, true, cond);
 219 |                 }
 220 | 
 221 |                 inline fn _run(
 222 |                     args: anytype,
 223 |                     lower: idx_T,
 224 |                     upper: idx_T,
 225 |                     increment: idx_T,
 226 |                     comptime f: anytype,
 227 |                 ) in.copy_ret(f) {
 228 |                     return common.parallel_loop_impl(idx_T, lower, upper, increment, args, omp.loop(idx_T, loop_args).run, f, false, false);
 229 |                 }
 230 | 
 231 |                 pub const run = if (opts.iff) _run_if else _run;
 232 |             };
 233 |         }
 234 | 
 235 |         pub inline fn sections(
 236 |             comptime sections_args: sections_opts,
 237 |         ) type {
 238 |             return struct {
 239 |                 inline fn _run_if(
 240 |                     args: anytype,
 241 |                     cond: bool,
 242 |                     comptime fs: anytype,
 243 |                 ) in.copy_ret(fs[0]) {
 244 |                     return common.parallel_sections_impl(args, omp.sections(sections_args).run, fs, true, cond);
 245 |                 }
 246 | 
 247 |                 inline fn _run(
 248 |                     args: anytype,
 249 |                     comptime fs: anytype,
 250 |                 ) in.copy_ret(fs[0]) {
 251 |                     return common.parallel_sections_impl(args, omp.sections(sections_args).run, fs, false, false);
 252 |                 }
 253 | 
 254 |                 pub const run = if (opts.iff) _run_if else _run;
 255 |             };
 256 |         }
 257 |     };
 258 | 
 259 |     return struct {
 260 |         // omp.para(...).run(...);
 261 |         pub const run = if (opts.iff) api.run_if else api.run;
 262 | 
 263 |         // omp.para(...).loop(...).run(...);
 264 |         pub const loop = api.loop;
 265 | 
 266 |         // omp.para(...).sections(...).run(...);
 267 |         pub const sections = api.sections;
 268 |     };
 269 | }
 270 | 
 271 | pub const schedule = enum(c_long) {
 272 |     static = 1,
 273 |     dynamic = 2,
 274 |     guided = 3,
 275 |     auto = 4,
 276 |     monotonic = 0x80000000,
 277 | };
 278 | pub const parallel_for_opts = struct {
 279 |     sched: schedule = .static,
 280 |     chunk_size: c_int = 1,
 281 |     ordered: bool = false,
 282 |     reduction: []const reduction_operators = &[0]reduction_operators{},
 283 |     ret_reduction: reduction_operators = .none,
 284 |     nowait: bool = false,
 285 | };
 286 | pub inline fn loop(
 287 |     comptime idx_T: type,
 288 |     comptime opts: parallel_for_opts,
 289 | ) type {
 290 |     return _loop(idx_T, opts, false);
 291 | }
 292 | 
 293 | inline fn _loop(
 294 |     comptime idx_T: type,
 295 |     comptime opts: parallel_for_opts,
 296 |     comptime is_from_sections: bool,
 297 | ) type {
 298 |     const common = struct {
 299 |         pub fn to_kmp_sched(comptime sched: schedule) kmp.sched_t {
 300 |             switch (sched) {
 301 |                 .static => return if (opts.chunk_size > 1) kmp.sched_t.StaticChunked else kmp.sched_t.StaticNonChunked,
 302 |                 .dynamic => return kmp.sched_t.Dynamic,
 303 |                 .guided => return kmp.sched_t.Guided,
 304 |                 .auto => return kmp.sched_t.Runtime,
 305 |                 else => unreachable,
 306 |             }
 307 |         }
 308 | 
 309 |         inline fn static_impl(
 310 |             args: anytype,
 311 |             lower: idx_T,
 312 |             upper: idx_T,
 313 |             increment: idx_T,
 314 |             comptime f: anytype,
 315 |         ) in.copy_ret(f) {
 316 |             const sections_flag = if (is_from_sections) @intFromEnum(kmp.ident_flags.IDENT_WORK_SECTIONS) else 0;
 317 |             const id = .{
 318 |                 .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_WORK_LOOP) | sections_flag,
 319 |                 .psource = "parallel_for" ++ @typeName(@TypeOf(f)),
 320 |             };
 321 | 
 322 |             // This is `1` iside the last thread execution
 323 |             var last_iter: c_int = 0;
 324 |             var low: idx_T = lower;
 325 |             var upp: idx_T = upper - 1;
 326 |             var stri: idx_T = 1;
 327 |             const incr: idx_T = increment;
 328 | 
 329 |             kmp.for_static_init(
 330 |                 idx_T,
 331 |                 &id,
 332 |                 kmp.ctx.global_tid,
 333 |                 to_kmp_sched(opts.sched),
 334 |                 &last_iter,
 335 |                 &low,
 336 |                 &upp,
 337 |                 &stri,
 338 |                 incr,
 339 |                 opts.chunk_size,
 340 |             );
 341 | 
 342 |             const to_ret_bytes = [_]u8{0} ** @sizeOf(in.copy_ret(f));
 343 |             var to_ret = std.mem.bytesAsValue(in.copy_ret(f), &to_ret_bytes).*;
 344 | 
 345 |             const red = reduce.create(@typeInfo(@TypeOf(.{to_ret})).Struct.fields, &.{opts.ret_reduction});
 346 |             if (opts.chunk_size > 1) {
 347 |                 while (low + opts.chunk_size < upper) : (low += stri) {
 348 |                     inline for (0..opts.chunk_size) |i| {
 349 |                         red.single(&to_ret, @call(.always_inline, f, .{low + @as(idx_T, i)} ++ args));
 350 |                     }
 351 |                 }
 352 |                 while (low < upper) : (low += incr) {
 353 |                     red.single(&to_ret, @call(.always_inline, f, .{low} ++ args));
 354 |                 }
 355 |             } else {
 356 |                 var i: idx_T = low;
 357 |                 while (i <= upp) : (i += incr) {
 358 |                     red.single(&to_ret, @call(.always_inline, f, .{i} ++ args));
 359 |                 }
 360 |             }
 361 | 
 362 |             const id_fini = .{
 363 |                 .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_WORK_LOOP),
 364 |                 .psource = "parallel_for" ++ @typeName(@TypeOf(f)),
 365 |                 .reserved_3 = 0x1c,
 366 |             };
 367 |             kmp.for_static_fini(&id_fini, kmp.ctx.global_tid);
 368 | 
 369 |             if (!opts.nowait) {
 370 |                 barrier();
 371 |             }
 372 | 
 373 |             return to_ret;
 374 |         }
 375 | 
 376 |         pub inline fn dynamic_impl(
 377 |             args: anytype,
 378 |             lower: idx_T,
 379 |             upper: idx_T,
 380 |             increment: idx_T,
 381 |             comptime f: anytype,
 382 |         ) in.copy_ret(f) {
 383 |             const id = .{
 384 |                 .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_WORK_LOOP),
 385 |                 .psource = "parallel_for" ++ @typeName(@TypeOf(f)),
 386 |             };
 387 | 
 388 |             // This is `1` iside the last thread execution
 389 |             var last_iter: c_int = 0;
 390 |             var low: idx_T = lower;
 391 |             var upp: idx_T = upper - 1;
 392 |             var stri: idx_T = 1;
 393 |             const incr: idx_T = increment;
 394 |             kmp.dispatch_init(idx_T, &id, kmp.ctx.global_tid, to_kmp_sched(opts.sched), low, upp, incr, opts.chunk_size);
 395 | 
 396 |             const to_ret_bytes = [_]u8{0} ** @sizeOf(in.copy_ret(f));
 397 |             var to_ret = std.mem.bytesAsValue(in.copy_ret(f), &to_ret_bytes).*;
 398 | 
 399 |             const red = kmp.create(@typeInfo(std.builtin.Type.Struct{in.copy_ret(f)}).Struct.fields, &.{opts.ret_reduction});
 400 |             while (kmp.dispatch_next(idx_T, &id, kmp.ctx.global_tid, &last_iter, &low, &upp, &stri) == 1) {
 401 |                 defer kmp.dispatch_fini(idx_T, &id, kmp.ctx.global_tid);
 402 | 
 403 |                 var i: idx_T = low;
 404 |                 while (i <= upp) : (i += incr) {
 405 |                     red.single(&to_ret, @call(.always_inline, f, .{i} ++ args));
 406 |                 }
 407 |             }
 408 | 
 409 |             return to_ret;
 410 |         }
 411 | 
 412 |         pub inline fn static(
 413 |             args: anytype,
 414 |             lower: idx_T,
 415 |             upper: idx_T,
 416 |             increment: idx_T,
 417 |             comptime f: anytype,
 418 |         ) in.copy_ret(f) {
 419 |             in.check_args(@TypeOf(args));
 420 |             in.check_fn_signature(f);
 421 | 
 422 |             const f_type_info = @typeInfo(@TypeOf(f));
 423 |             if (f_type_info.Fn.params.len < 1) {
 424 |                 @compileError("Expected function with signature `inline fn(numeric, ...)`" ++ @typeName(@TypeOf(f)) ++ " instead.\n" ++ @typeName(idx_T) ++ " may be different from the expected type: " ++ @typeName(f_type_info.Fn.params[0].type.?));
 425 |             }
 426 |             const do_copy = comptime !is_from_sections;
 427 |             const red = if (opts.ret_reduction == .none) opts.reduction else opts.reduction ++ .{opts.ret_reduction};
 428 | 
 429 |             const st = struct {
 430 |                 const reduction_val_bytes = [_]u8{0} ** @sizeOf(in.copy_ret(f));
 431 |                 var reduction_val: in.copy_ret(f) = std.mem.bytesAsValue(in.no_error(in.copy_ret(f)), &reduction_val_bytes).*;
 432 |             };
 433 | 
 434 |             const work = workshare_env.make(red, static_impl, in.copy_ret(f), .{
 435 |                 .do_copy = do_copy,
 436 |                 .is_omp_func = false,
 437 |                 .return_optional = false,
 438 |             });
 439 | 
 440 |             // Ignore any of the returns since the only ones we care about are the reduction values
 441 |             if (@typeInfo(in.copy_ret(f)) == .ErrorUnion) {
 442 |                 _ = work.run(.{}, in.normalize_args(args), .{ lower, upper, increment, f }, &st.reduction_val) catch {};
 443 |             } else {
 444 |                 _ = work.run(.{}, in.normalize_args(args), .{ lower, upper, increment, f }, &st.reduction_val);
 445 |             }
 446 |             if (!opts.nowait) {
 447 |                 barrier();
 448 |             }
 449 | 
 450 |             return st.reduction_val;
 451 |         }
 452 | 
 453 |         pub inline fn dynamic(
 454 |             args: anytype,
 455 |             lower: idx_T,
 456 |             upper: idx_T,
 457 |             increment: idx_T,
 458 |             comptime f: anytype,
 459 |         ) in.copy_ret(f) {
 460 |             std.debug.assert(is_from_sections == false);
 461 | 
 462 |             in.check_args(@TypeOf(args));
 463 |             in.check_fn_signature(f);
 464 | 
 465 |             const f_type_info = @typeInfo(@TypeOf(f));
 466 |             if (f_type_info.Fn.params.len < 1) {
 467 |                 @compileError("Expected function with signature `inline fn(numeric, ...)`" ++ @typeName(@TypeOf(f)) ++ " instead.\n" ++ @typeName(idx_T) ++ " may be different from the expected type: " ++ @typeName(f_type_info.Fn.params[0].type.?));
 468 |             }
 469 | 
 470 |             const st = struct {
 471 |                 const reduction_val_bytes = [_]u8{0} ** @sizeOf(in.copy_ret(f));
 472 |                 var reduction_val: in.copy_ret(f) = std.mem.bytesAsValue(in.no_error(in.copy_ret(f)), &reduction_val_bytes).*;
 473 |             };
 474 |             const red = if (opts.ret_reduction == .none) opts.reduction else opts.reduction ++ .{opts.ret_reduction};
 475 |             const work = workshare_env.make(red, dynamic_impl, in.copy_ret(f), .{
 476 |                 .do_copy = true,
 477 |                 .is_omp_func = false,
 478 |                 .return_optional = false,
 479 |             });
 480 | 
 481 |             if (@typeInfo(in.copy_ret(f)) == .ErrorUnion) {
 482 |                 _ = work.run(.{}, in.normalize_args(args), .{ lower, upper, increment, f }, &st.reduction_val) catch {};
 483 |             } else {
 484 |                 _ = work.run(.{}, in.normalize_args(args), .{ lower, upper, increment, f }, &st.reduction_val);
 485 |             }
 486 |             if (!opts.nowait) {
 487 |                 barrier();
 488 |             }
 489 | 
 490 |             return st.reduction_val;
 491 |         }
 492 |     };
 493 | 
 494 |     return struct {
 495 |         pub const run = if (opts.chunk_size == 1 and opts.sched == .static) common.static else common.dynamic;
 496 |     };
 497 | }
 498 | 
 499 | pub inline fn barrier() void {
 500 |     const id: kmp.ident_t = .{
 501 |         .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_BARRIER_EXPL),
 502 |         .psource = "barrier",
 503 |         .reserved_3 = 0x1e,
 504 |     };
 505 |     kmp.barrier(&id, kmp.ctx.global_tid);
 506 | }
 507 | 
 508 | pub inline fn flush(vars: anytype) void {
 509 |     _ = vars; // Just ignore this, it's only used to define the ordering of operations when compiling, I hope...
 510 |     const id: kmp.ident_t = .{
 511 |         .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC),
 512 |         .psource = "flush",
 513 |         .reserved_3 = 0x1e,
 514 |     };
 515 |     kmp.flush(&id);
 516 | }
 517 | 
 518 | pub const critical_options = struct {
 519 |     sync: sync_hint_t = .none,
 520 |     name: []const u8 = "",
 521 | };
 522 | pub inline fn critical(
 523 |     comptime opts: critical_options,
 524 | ) type {
 525 |     return struct {
 526 |         pub inline fn run(
 527 |             args: anytype,
 528 |             comptime f: anytype,
 529 |         ) in.copy_ret(f) {
 530 |             in.check_args(@TypeOf(args));
 531 |             in.check_fn_signature(f);
 532 | 
 533 |             const id: kmp.ident_t = .{
 534 |                 .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_WORK_LOOP),
 535 |                 .psource = "barrier",
 536 |             };
 537 | 
 538 |             const static = struct {
 539 |                 var lock: kmp.critical_name_t = @bitCast([_]u8{0} ** 32);
 540 |             };
 541 | 
 542 |             kmp.critical(&id, kmp.ctx.global_tid, &static.lock, @intFromEnum(opts.sync));
 543 |             defer {
 544 |                 kmp.critical_end(&id, kmp.ctx.global_tid, &static.lock);
 545 |             }
 546 | 
 547 |             const type_info = @typeInfo(@typeInfo(@TypeOf(f)).Fn.return_type.?);
 548 |             const ret = ret: {
 549 |                 if (type_info == .ErrorUnion) {
 550 |                     break :ret try @call(.always_inline, f, args);
 551 |                 } else {
 552 |                     break :ret @call(.always_inline, f, args);
 553 |                 }
 554 |             };
 555 | 
 556 |             return ret;
 557 |         }
 558 |     };
 559 | }
 560 | 
 561 | pub const sections_opts = struct {
 562 |     reduction: []const reduction_operators = &[0]reduction_operators{},
 563 |     ret_reduction: reduction_operators = .none,
 564 |     nowait: bool = false,
 565 | };
 566 | 
 567 | pub inline fn sections(
 568 |     comptime opts: sections_opts,
 569 | ) type {
 570 |     return struct {
 571 |         pub inline fn run(
 572 |             args: anytype,
 573 |             comptime fs: anytype,
 574 |         ) in.copy_ret(fs[0]) {
 575 |             const args_type = @TypeOf(args);
 576 | 
 577 |             in.check_args(args_type);
 578 |             comptime std.debug.assert(@typeInfo(@TypeOf(fs)) == .Struct);
 579 |             inline for (fs) |f| {
 580 |                 in.check_fn_signature(f);
 581 |             }
 582 | 
 583 |             const runner = struct {
 584 |                 const _fs: [fs.len]@TypeOf(fs[0]) = fs;
 585 | 
 586 |                 pub inline fn f(idx: usize, a: @TypeOf(in.normalize_args(args))) in.copy_ret(fs[0]) {
 587 |                     const private_copy = in.make_another(a.private);
 588 |                     const firstprivate_copy = in.shallow_copy(a.firstprivate);
 589 |                     const reduction_copy = in.shallow_copy(a.reduction);
 590 |                     const true_args = .{a.shared ++ private_copy ++ firstprivate_copy ++ reduction_copy};
 591 | 
 592 |                     const type_info = @typeInfo(@typeInfo(@TypeOf(f)).Fn.return_type.?);
 593 |                     const ret = ret: {
 594 |                         if (type_info == .ErrorUnion) {
 595 |                             break :ret try @call(.auto, _fs[idx], true_args[0]);
 596 |                         } else {
 597 |                             break :ret @call(.auto, _fs[idx], true_args[0]);
 598 |                         }
 599 |                     };
 600 | 
 601 |                     return ret;
 602 |                 }
 603 |             }.f;
 604 | 
 605 |             return _loop(usize, .{
 606 |                 .nowait = opts.nowait,
 607 |                 .reduction = opts.reduction,
 608 |                 .sched = .static,
 609 |             }, true).run(args, 0, fs.len, 1, runner);
 610 |         }
 611 |     };
 612 | }
 613 | 
 614 | pub inline fn single() type {
 615 |     return struct {
 616 |         pub inline fn run(
 617 |             args: anytype,
 618 |             comptime f: anytype,
 619 |         ) void_or_opt(in.copy_ret(f)) {
 620 |             in.check_args(@TypeOf(args));
 621 |             in.check_fn_signature(f);
 622 | 
 623 |             const single_id = .{
 624 |                 .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC),
 625 |                 .psource = "single" ++ @typeName(@TypeOf(f)),
 626 |             };
 627 |             const barrier_id = .{
 628 |                 .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC) | @intFromEnum(kmp.ident_flags.IDENT_BARRIER_IMPL_SINGLE),
 629 |                 .psource = "single" ++ @typeName(@TypeOf(f)),
 630 |                 .reserved_3 = 0x27,
 631 |             };
 632 | 
 633 |             if (kmp.single(&single_id, kmp.ctx.global_tid) == 1) {
 634 |                 defer {
 635 |                     kmp.end_single(&single_id, kmp.ctx.global_tid);
 636 |                     kmp.barrier(&barrier_id, kmp.ctx.global_tid);
 637 |                 }
 638 |                 const type_info = @typeInfo(@typeInfo(@TypeOf(f)).Fn.return_type.?);
 639 | 
 640 |                 return if (type_info == .ErrorUnion)
 641 |                     try @call(.always_inline, f, args)
 642 |                 else
 643 |                     @call(.always_inline, f, args);
 644 |             }
 645 | 
 646 |             kmp.barrier(&barrier_id, kmp.ctx.global_tid);
 647 |             if (in.copy_ret(f) != void) {
 648 |                 return null;
 649 |             }
 650 |         }
 651 |     };
 652 | }
 653 | 
 654 | pub inline fn void_or_opt(comptime T: type) type {
 655 |     return if (T == void) void else ?T;
 656 | }
 657 | 
 658 | pub inline fn master() type {
 659 |     return struct {
 660 |         pub inline fn run(
 661 |             args: anytype,
 662 |             comptime f: anytype,
 663 |         ) void_or_opt(in.copy_ret(f)) {
 664 |             return masked.run(only_master, args, f);
 665 |         }
 666 |     };
 667 | }
 668 | 
 669 | pub const only_master: c_int = 0;
 670 | pub inline fn masked() type {
 671 |     return struct {
 672 |         pub inline fn run(
 673 |             args: anytype,
 674 |             filter: i32,
 675 |             comptime f: anytype,
 676 |         ) void_or_opt(in.copy_ret(f)) {
 677 |             in.check_args(@TypeOf(args));
 678 |             in.check_fn_signature(f);
 679 | 
 680 |             const masked_id = .{
 681 |                 .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC),
 682 |                 .psource = "masked" ++ @typeName(@TypeOf(f)),
 683 |             };
 684 | 
 685 |             if (kmp.masked(&masked_id, kmp.ctx.global_tid, filter) == 1) {
 686 |                 const type_info = @typeInfo(@typeInfo(@TypeOf(f)).Fn.return_type.?);
 687 |                 if (type_info == .ErrorUnion) {
 688 |                     return try @call(.always_inline, f, args);
 689 |                 } else {
 690 |                     return @call(.always_inline, f, args);
 691 |                 }
 692 |             }
 693 |             if (void_or_opt(in.copy_ret(f)) != void) {
 694 |                 return null;
 695 |             }
 696 |         }
 697 |     };
 698 | }
 699 | 
 700 | pub const promise = kmp.promise;
 701 | inline fn void_or_promise_ptr(comptime T: type) type {
 702 |     return if (T == void) void else *promise(T);
 703 | }
 704 | 
 705 | pub const task_opts = struct {
 706 |     iff: bool = false,
 707 |     final: bool = false,
 708 |     untied: bool = false,
 709 | };
 710 | 
 711 | pub inline fn task(
 712 |     comptime opts: task_opts,
 713 | ) type {
 714 |     const api = struct {
 715 |         inline fn run_impl(
 716 |             args: anytype,
 717 |             comptime f: anytype,
 718 |             cond: bool,
 719 |             fin: bool,
 720 |         ) void_or_promise_ptr(in.copy_ret(f)) {
 721 |             const id = .{
 722 |                 .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC),
 723 |                 .psource = "task" ++ @typeName(@TypeOf(f)),
 724 |             };
 725 |             var norm = in.normalize_args(args);
 726 | 
 727 |             const private_copy = in.make_another(norm.private);
 728 |             const firstprivate_copy = in.shallow_copy(norm.firstprivate);
 729 |             const private_args = private_copy ++ firstprivate_copy;
 730 | 
 731 |             // in.check_args(@TypeOf(private_args));
 732 |             in.check_fn_signature(f);
 733 | 
 734 |             const t_type = kmp.task_t(
 735 |                 @TypeOf(norm.shared),
 736 |                 @TypeOf(private_args),
 737 |                 in.copy_ret(f),
 738 |             );
 739 | 
 740 |             const flags = kmp.tasking_flags{
 741 |                 .tiedness = @intFromBool(!opts.untied),
 742 |                 .final = @intFromBool(fin),
 743 |             };
 744 | 
 745 |             const real_task = t_type.alloc(
 746 |                 f,
 747 |                 &id,
 748 |                 kmp.ctx.global_tid,
 749 |                 flags,
 750 |             );
 751 |             real_task.set_data(&norm.shared, private_args);
 752 | 
 753 |             // TODO: do something better with this error...
 754 |             var pro: void_or_promise_ptr(in.copy_ret(f)) = if (in.copy_ret(f) == void) undefined else promise(in.copy_ret(f)).init() catch @panic("Buy more RAM lol");
 755 |             if (@TypeOf(pro) == *kmp.promise(in.copy_ret(f))) {
 756 |                 real_task.make_promise(pro);
 757 |             }
 758 | 
 759 |             if (comptime opts.iff) {
 760 |                 if (!cond) {
 761 |                     real_task.begin_if0(&id, kmp.ctx.global_tid);
 762 | 
 763 |                     if (@typeInfo(in.copy_ret(f)) == .ErrorUnion) {
 764 |                         _ = @call(.always_inline, f, norm.shared ++ private_args) catch |err| err;
 765 |                     } else {
 766 |                         _ = @call(.always_inline, f, norm.shared ++ private_args);
 767 |                     }
 768 | 
 769 |                     real_task.complete_if0(&id, kmp.ctx.global_tid);
 770 |                 }
 771 | 
 772 |                 if (@TypeOf(pro) == *promise(in.copy_ret(f))) {
 773 |                     pro.release();
 774 |                 }
 775 |                 return pro;
 776 |             }
 777 | 
 778 |             _ = real_task.task(&id, kmp.ctx.global_tid);
 779 |             return pro;
 780 |         }
 781 | 
 782 |         pub inline fn run(
 783 |             args: anytype,
 784 |             comptime f: anytype,
 785 |         ) void_or_promise_ptr(in.copy_ret(f)) {
 786 |             return run_impl(args, f, false, false);
 787 |         }
 788 | 
 789 |         pub inline fn run_if(
 790 |             cond: bool,
 791 |             args: anytype,
 792 |             comptime f: anytype,
 793 |         ) void_or_promise_ptr(in.copy_ret(f)) {
 794 |             return run_impl(args, f, cond, false);
 795 |         }
 796 | 
 797 |         pub inline fn run_final(
 798 |             final: bool,
 799 |             args: anytype,
 800 |             comptime f: anytype,
 801 |         ) void_or_promise_ptr(in.copy_ret(f)) {
 802 |             return run_impl(args, f, false, final);
 803 |         }
 804 | 
 805 |         pub inline fn run_if_final(
 806 |             cond: bool,
 807 |             final: bool,
 808 |             args: anytype,
 809 |             comptime f: anytype,
 810 |         ) void_or_promise_ptr(in.copy_ret(f)) {
 811 |             return run_impl(args, f, cond, final);
 812 |         }
 813 |     };
 814 | 
 815 |     return struct {
 816 |         // TODO: Find a way to format it better
 817 |         pub const run = if (opts.iff and opts.final) api.run_if_final else if (opts.iff and !opts.final) api.run_if else if (!opts.iff and opts.final) api.run_final else api.run;
 818 |     };
 819 | }
 820 | 
 821 | pub inline fn taskyeild() void {
 822 |     const id = .{
 823 |         .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC),
 824 |         .psource = "taskyeild",
 825 |     };
 826 |     kmp.taskyield(&id, kmp.ctx.global_tid);
 827 | }
 828 | 
 829 | pub inline fn taskwait() void {
 830 |     const id = .{
 831 |         .flags = @intFromEnum(kmp.ident_flags.IDENT_KMPC),
 832 |         .psource = "taskwait",
 833 |     };
 834 |     kmp.taskwait(&id, kmp.ctx.global_tid);
 835 | }
 836 | 
 837 | // //////////////////////////////////////////////////////////////////////////////////
 838 | // / Runtime API ////////////////////////////////////////////////////////////////////
 839 | // //////////////////////////////////////////////////////////////////////////////////
 840 | 
 841 | // Setters
 842 | pub inline fn set_num_threads(num_threads: u32) void {
 843 |     c.omp_set_num_threads(@intCast(num_threads));
 844 | }
 845 | 
 846 | pub inline fn set_dynamic(dynamic_threads: bool) void {
 847 |     c.omp_set_dynamic(@intFromBool(dynamic_threads));
 848 | }
 849 | 
 850 | pub inline fn set_nested(nested: bool) void {
 851 |     c.omp_set_nested(@intFromBool(nested));
 852 | }
 853 | 
 854 | pub inline fn set_max_active_levels(max_levels: u32) void {
 855 |     c.omp_set_max_active_levels(@intCast(max_levels));
 856 | }
 857 | 
 858 | extern "c" fn omp_set_schedule(kind: schedule, chunk_size: c_int) void;
 859 | pub inline fn set_schedule(kind: schedule, chunk_size: u32) void {
 860 |     c.omp_set_schedule(kind, chunk_size);
 861 | }
 862 | 
 863 | // Getters
 864 | pub inline fn get_num_threads() u32 {
 865 |     return @intCast(c.omp_get_num_threads());
 866 | }
 867 | 
 868 | pub inline fn get_dynamic() bool {
 869 |     return c.omp_get_dynamic();
 870 | }
 871 | 
 872 | pub inline fn get_nested() bool {
 873 |     return c.omp_get_nested();
 874 | }
 875 | 
 876 | pub inline fn get_max_threads() u32 {
 877 |     return @intCast(c.omp_get_max_threads());
 878 | }
 879 | 
 880 | pub inline fn get_thread_num() u32 {
 881 |     return @intCast(c.omp_get_thread_num());
 882 | }
 883 | 
 884 | pub inline fn get_num_procs() u32 {
 885 |     return @intCast(c.omp_get_num_procs());
 886 | }
 887 | 
 888 | pub inline fn in_parallel() bool {
 889 |     return c.omp_in_parallel();
 890 | }
 891 | 
 892 | pub inline fn in_final() bool {
 893 |     return c.omp_in_final();
 894 | }
 895 | 
 896 | pub inline fn get_active_level() u32 {
 897 |     return @intCast(c.omp_get_active_level());
 898 | }
 899 | 
 900 | pub inline fn get_level() u32 {
 901 |     return @intCast(c.omp_get_level());
 902 | }
 903 | 
 904 | pub inline fn get_ancestor_thread_num(level: u32) u32 {
 905 |     return @intCast(c.omp_get_ancestor_thread_num(@intCast(level)));
 906 | }
 907 | 
 908 | pub inline fn get_team_size(level: u32) u32 {
 909 |     return @intCast(c.omp_get_team_size(@intCast(level)));
 910 | }
 911 | 
 912 | pub inline fn get_thread_limit() u32 {
 913 |     return @intCast(c.omp_get_thread_limit());
 914 | }
 915 | 
 916 | pub inline fn get_max_active_levels() u32 {
 917 |     return @intCast(c.omp_get_max_active_levels());
 918 | }
 919 | pub inline fn get_schedule(kind: *schedule, chunk_size: *u32) void {
 920 |     c.omp_get_schedule(kind, @intCast(chunk_size));
 921 | }
 922 | 
 923 | pub inline fn get_max_task_priority() u32 {
 924 |     return @intCast(c.omp_get_max_task_priority());
 925 | }
 926 | 
 927 | // Locks
 928 | //     OpenMP 5.0  Synchronization hints
 929 | pub const sync_hint_t = enum(c_int) {
 930 |     none = 0,
 931 |     uncontended = 1,
 932 |     contended = 1 << 1,
 933 |     nonspeculative = 1 << 2,
 934 |     speculative = 1 << 3,
 935 |     hle = 1 << 16,
 936 |     rtm = 1 << 17,
 937 |     adaptive = 1 << 18,
 938 | };
 939 | 
 940 | /// lock hint type for dynamic user lock
 941 | pub const lock_hint_t = sync_hint_t;
 942 | const lock_t = extern struct {
 943 |     _lk: *anyopaque,
 944 | };
 945 | 
 946 | pub const lock = struct {
 947 |     const Self = @This();
 948 |     _lk: lock_t,
 949 | 
 950 |     pub inline fn init(this: *Self) void {
 951 |         c.omp_init_lock(this._lk);
 952 |     }
 953 | 
 954 |     pub inline fn set(this: *Self) void {
 955 |         c.omp_set_lock(this._lk);
 956 |     }
 957 | 
 958 |     pub inline fn unset(this: *Self) void {
 959 |         c.omp_unset_lock(this._lk);
 960 |     }
 961 | 
 962 |     pub inline fn destroy(this: *Self) void {
 963 |         c.omp_destroy_lock(this._lk);
 964 |     }
 965 | 
 966 |     pub inline fn test_(this: *Self) bool {
 967 |         return c.omp_test_lock(this._lk) != 0;
 968 |     }
 969 | };
 970 | 
 971 | const nest_lock_t = extern struct {
 972 |     _lk: *anyopaque,
 973 | };
 974 | 
 975 | pub const nest_lock = struct {
 976 |     const Self = @This();
 977 |     _lk: nest_lock_t,
 978 | 
 979 |     pub inline fn init(this: *Self) void {
 980 |         c.omp_init_nest_lock(this._lk);
 981 |     }
 982 | 
 983 |     pub inline fn set(this: *Self) void {
 984 |         c.omp_set_nest_lock(this._lk);
 985 |     }
 986 | 
 987 |     pub inline fn unset(this: *Self) void {
 988 |         c.omp_unset_nest_lock(this._lk);
 989 |     }
 990 | 
 991 |     pub inline fn destroy(this: *Self) void {
 992 |         c.omp_destroy_nest_lock(this._lk);
 993 |     }
 994 | 
 995 |     pub inline fn test_(this: *Self) bool {
 996 |         return c.omp_test_nest_lock(this._lk) != 0;
 997 |     }
 998 | };
 999 | 
1000 | /// time API functions
1001 | pub inline fn get_wtime() f64 {
1002 |     return c.omp_get_wtime();
1003 | }
1004 | 
1005 | pub inline fn get_wtick() f64 {
1006 |     return c.omp_get_wtick();
1007 | }
1008 | 
1009 | /// OpenMP 4.0
1010 | pub inline fn get_default_device() u32 {
1011 |     return @intCast(c.omp_get_default_device());
1012 | }
1013 | 
1014 | pub inline fn set_default_device(device: u32) void {
1015 |     c.omp_set_default_device(@intCast(device));
1016 | }
1017 | 
1018 | pub inline fn is_initial_device() bool {
1019 |     return c.omp_is_initial_device();
1020 | }
1021 | 
1022 | pub inline fn get_num_devices() u32 {
1023 |     return @intCast(c.omp_get_num_devices());
1024 | }
1025 | 
1026 | pub inline fn get_num_teams() u32 {
1027 |     return @intCast(c.omp_get_num_teams());
1028 | }
1029 | 
1030 | pub inline fn get_team_num() u32 {
1031 |     return @intCast(c.omp_get_team_num());
1032 | }
1033 | 
1034 | pub inline fn get_cancellation() bool {
1035 |     return c.omp_get_cancellation();
1036 | }
1037 | 
1038 | //     /* OpenMP 4.5 */
1039 | pub inline fn get_initial_device() u32 {
1040 |     return @intCast(c.omp_get_initial_device());
1041 | }
1042 | 
1043 | inline fn target_alloc(size: usize, device_num: u32) *u8 {
1044 |     return c.omp_target_alloc(size, @intCast(device_num));
1045 | }
1046 | 
1047 | inline fn target_free(ptr: *anyopaque, device_num: u32) void {
1048 |     c.omp_target_free(ptr, @intCast(device_num));
1049 | }
1050 | 
1051 | inline fn target_is_present(ptr: *anyopaque, device_num: u32) bool {
1052 |     return c.omp_target_is_present(ptr, @intCast(device_num)) != 0;
1053 | }
1054 | 
1055 | inline fn target_memcpy(dst: *u8, src: *const u8, length: usize, dst_offset: usize, src_offset: usize, device_num: u32) void {
1056 |     c.omp_target_memcpy(dst, src, length, dst_offset, src_offset, @intCast(device_num));
1057 | }
1058 | 
1059 | inline fn target_memcpy_rect(
1060 |     dst: *u8,
1061 |     src: *const u8,
1062 |     element_size: usize,
1063 |     num_dims: c_int,
1064 |     volume: *usize,
1065 |     dst_offsets: *usize,
1066 |     src_offsets: *usize,
1067 |     dst_dimensions: *usize,
1068 |     src_dimensions: *usize,
1069 |     dst_device_num: u32,
1070 |     src_device_num: u32,
1071 | ) void {
1072 |     c.omp_target_memcpy_rect(
1073 |         dst,
1074 |         src,
1075 |         element_size,
1076 |         num_dims,
1077 |         volume,
1078 |         dst_offsets,
1079 |         src_offsets,
1080 |         dst_dimensions,
1081 |         src_dimensions,
1082 |         @intCast(dst_device_num),
1083 |         @intCast(src_device_num),
1084 |     );
1085 | }
1086 | 
1087 | inline fn target_associate_ptr(host_ptr: *const anyopaque, device_ptr: *const anyopaque, size: usize, device_num: u32) void {
1088 |     c.omp_target_associate_ptr(host_ptr, device_ptr, size, @intCast(device_num));
1089 | }
1090 | 
1091 | inline fn target_disassociate_ptr(ptr: *const anyopaque, device_num: u32) void {
1092 |     c.omp_target_disassociate_ptr(ptr, @intCast(device_num));
1093 | }
1094 | 
1095 | //     OpenMP 5.0
1096 | pub inline fn get_device_num() u32 {
1097 |     return @intCast(c.omp_get_device_num());
1098 | }
1099 | 
1100 | //     typedef void * omp_depend_t;
1101 | pub const depend_t = *anyopaque;
1102 | 
1103 | //     OpenMP 5.1 interop
1104 | // TODO: Maybe `usize` is better here, but intptr_t is supposed to be an int
1105 | pub const intptr_t = isize;
1106 | // 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined properties
1107 | pub const interop_property_t = enum(c_int) {
1108 |     fr_id = -1,
1109 |     fr_name = -2,
1110 |     vendor = -3,
1111 |     vendor_name = -4,
1112 |     device_num = -5,
1113 |     platform = -6,
1114 |     device = -7,
1115 |     device_context = -8,
1116 |     targetsync = -9,
1117 |     first = -9,
1118 | };
1119 | 
1120 | pub const interop_rc_t = enum(c_int) {
1121 |     no_value = 1,
1122 |     success = 0,
1123 |     empty = -1,
1124 |     out_of_range = -2,
1125 |     type_int = -3,
1126 |     type_ptr = -4,
1127 |     type_str = -5,
1128 |     other = -6,
1129 | };
1130 | 
1131 | pub const interop_fr = enum(c_int) {
1132 |     cuda = 1,
1133 |     cuda_driver = 2,
1134 |     opencl = 3,
1135 |     sycl = 4,
1136 |     hip = 5,
1137 |     level_zero = 6,
1138 |     last = 7,
1139 | };
1140 | 
1141 | pub const interop = *opaque {
1142 |     const Self = @This();
1143 | 
1144 |     /// None is defined as '&0' in the C API
1145 |     inline fn init() Self {
1146 |         return @bitCast(0);
1147 |     }
1148 | 
1149 |     ///
1150 |     /// The `omp_get_num_interop_properties` routine retrieves the number of implementation-defined properties available for an `omp_interop_t` object
1151 |     ///
1152 |     inline fn get_num_interop_properties(this: Self) interop_property_t {
1153 |         return @enumFromInt(c.omp_get_num_interop_properties(this));
1154 |     }
1155 | 
1156 |     ///
1157 |     /// The `omp_get_interop_int` routine retrieves an integer property from an `omp_interop_t` object.
1158 |     ///
1159 |     inline fn get_int(this: Self, property: interop_property_t, ret_code: *c_int) intptr_t {
1160 |         return c.omp_get_interop_int(this, property, ret_code);
1161 |     }
1162 | 
1163 |     ///
1164 |     /// The `omp_get_interop_ptr` routine retrieves a pointer property from an `omp_interop_t` object.
1165 |     ///
1166 |     inline fn get_interop_ptr(this: Self, property: interop_property_t, ret_code: *c_int) *anyopaque {
1167 |         return c.omp_get_interop_ptr(this, property, ret_code);
1168 |     }
1169 | 
1170 |     ///
1171 |     /// The `omp_get_interop_str` routine retrieves a string property from an `omp_interop_t` object.
1172 |     ///
1173 |     inline fn get_str(this: Self, property: interop_property_t, ret_code: *c_int) [:0]const u8 {
1174 |         return c.omp_get_interop_str(this, property, ret_code);
1175 |     }
1176 | 
1177 |     ///
1178 |     /// The `omp_get_interop_name` routine retrieves a property name from an `omp_interop_t` object.
1179 |     ///
1180 |     inline fn get_name(this: Self, property: interop_property_t) [:0]const u8 {
1181 |         return c.omp_get_interop_name(this, property);
1182 |     }
1183 | 
1184 |     ///
1185 |     /// The `omp_get_interop_type_desc` routine retrieves a description of the type of a property associated with an `omp_interop_t` object.
1186 |     ///
1187 |     inline fn get_type_desc(this: Self, property: interop_property_t) [:0]const u8 {
1188 |         return c.omp_get_interop_type_desc(this, property);
1189 |     }
1190 | 
1191 |     ///
1192 |     /// The `omp_get_interop_rc_desc` routine retrieves a description of the return code associated with an `omp_interop_t` object.
1193 |     ///
1194 |     inline fn get_rc_desc(this: Self, ret_code: interop_rc_t) [:0]const u8 {
1195 |         return c.omp_get_interop_rc_desc(this, ret_code);
1196 |     }
1197 | };
1198 | 
1199 | /// OpenMP 5.1 device memory routines
1200 | ///
1201 | /// The `omp_target_memcpy_async` routine asynchronously performs a copy between any combination of host and device pointers.
1202 | ///
1203 | inline fn target_memcpy_async(
1204 |     dst: *u8,
1205 |     src: *const u8,
1206 |     length: usize,
1207 |     dst_offset: usize,
1208 |     src_offset: usize,
1209 |     device_num: c_int,
1210 |     dep: *depend_t,
1211 | ) c_int {
1212 |     return c.omp_target_memcpy_async(dst, src, length, dst_offset, src_offset, device_num, dep);
1213 | }
1214 | 
1215 | ///
1216 | /// The `omp_target_memcpy_rect_async` routine asynchronously performs a copy between any combination of host and device pointers.
1217 | ///
1218 | inline fn target_memcpy_rect_async(
1219 |     dst: *u8,
1220 |     src: *const u8,
1221 |     element_size: usize,
1222 |     num_dims: c_int,
1223 |     volume: *usize,
1224 |     dst_offsets: *usize,
1225 |     src_offsets: *usize,
1226 |     dst_dimensions: *usize,
1227 |     src_dimensions: *usize,
1228 |     dst_device_num: c_int,
1229 |     src_device_num: c_int,
1230 |     dep: *depend_t,
1231 | ) c_int {
1232 |     return c.omp_target_memcpy_rect_async(
1233 |         dst,
1234 |         src,
1235 |         element_size,
1236 |         num_dims,
1237 |         volume,
1238 |         dst_offsets,
1239 |         src_offsets,
1240 |         dst_dimensions,
1241 |         src_dimensions,
1242 |         dst_device_num,
1243 |         src_device_num,
1244 |         dep,
1245 |     );
1246 | }
1247 | 
1248 | // OpenMP 6.0 device memory routines
1249 | pub inline fn target_memsset(ptr: *u8, value: c_int, size: usize, device_num: c_int) *u8 {
1250 |     return c.omp_target_memset(ptr, value, size, device_num);
1251 | }
1252 | pub inline fn target_memsset_async(ptr: *u8, value: c_int, size: usize, device_num: c_int, dep: *depend_t) *u8 {
1253 |     return c.omp_target_memset_async(ptr, value, size, device_num, dep);
1254 | }
1255 | ///
1256 | /// The `omp_get_mapped_ptr` routine returns the device pointer that is associated with a host pointer for a given device.
1257 | ///
1258 | inline fn get_mapped_ptr(ptr: *const anyopaque, device_num: c_int) *anyopaque {
1259 |     return c.omp_get_mapped_ptr(ptr, device_num);
1260 | }
1261 | ///
1262 | /// The `omp_target_associate_ptr` routine associates a host pointer with a device pointer.
1263 | inline fn target_is_accessible(ptr: *const anyopaque, size: usize, device_num: c_int) c_int {
1264 |     return c.omp_target_is_accessible(ptr, size, device_num);
1265 | }
1266 | 
1267 | // / kmp API functions
1268 | // extern "c" inline fn kmp_get_stacksize          (void)int    ;
1269 | // extern "c" inline fn kmp_set_stacksize          (int)void   ;
1270 | // extern "c" inline fn kmp_get_stacksize_s        (void)size_t ;
1271 | // extern "c" inline fn kmp_set_stacksize_s        (size_t)void   ;
1272 | // extern "c" inline fn kmp_get_blocktime          (void)int    ;
1273 | // extern "c" inline fn kmp_get_library            (void)int    ;
1274 | // extern "c" inline fn kmp_set_blocktime          (int)void   ;
1275 | // extern "c" inline fn kmp_set_library            (int)void   ;
1276 | // extern "c" inline fn kmp_set_library_serial     (void)void   ;
1277 | // extern "c" inline fn kmp_set_library_turnaround (void)void   ;
1278 | // extern "c" inline fn kmp_set_library_throughput (void)void   ;
1279 | // extern "c" inline fn kmp_set_defaults           (char const *)void   ;
1280 | // extern "c" inline fn kmp_set_disp_num_buffers   (int)void   ;
1281 | // //
1282 | // //     /* Intel affinity API */
1283 | // //     typedef void * kmp_affinity_mask_t;
1284 | // //
1285 | // //     extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity             (kmp_affinity_mask_t *);
1286 | // //     extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity             (kmp_affinity_mask_t *);
1287 | // //     extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_max_proc    (void);
1288 | // //     extern void   __KAI_KMPC_CONVENTION  kmp_create_affinity_mask     (kmp_affinity_mask_t *);
1289 | // //     extern void   __KAI_KMPC_CONVENTION  kmp_destroy_affinity_mask    (kmp_affinity_mask_t *);
1290 | // //     extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity_mask_proc   (int, kmp_affinity_mask_t *);
1291 | // //     extern int    __KAI_KMPC_CONVENTION  kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *);
1292 | // //     extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_mask_proc   (int, kmp_affinity_mask_t *);
1293 | // //
1294 | // //     /* OpenMP 4.0 affinity API */
1295 | // //     typedef enum omp_proc_bind_t {
1296 | // //         omp_proc_bind_false = 0,
1297 | // //         omp_proc_bind_true = 1,
1298 | // //         omp_proc_bind_master = 2,
1299 | // //         omp_proc_bind_close = 3,
1300 | // //         omp_proc_bind_spread = 4
1301 | // //     } omp_proc_bind_t;
1302 | // //
1303 | // //     extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void);
1304 | // //
1305 | // //     /* OpenMP 4.5 affinity API */
1306 | // //     extern int  __KAI_KMPC_CONVENTION omp_get_num_places (void);
1307 | // //     extern int  __KAI_KMPC_CONVENTION omp_get_place_num_procs (int);
1308 | // //     extern void __KAI_KMPC_CONVENTION omp_get_place_proc_ids (int, int *);
1309 | // //     extern int  __KAI_KMPC_CONVENTION omp_get_place_num (void);
1310 | // //     extern int  __KAI_KMPC_CONVENTION omp_get_partition_num_places (void);
1311 | // //     extern void __KAI_KMPC_CONVENTION omp_get_partition_place_nums (int *);
1312 | // //
1313 | // //     extern void * __KAI_KMPC_CONVENTION  kmp_malloc  (size_t);
1314 | // //     extern void * __KAI_KMPC_CONVENTION  kmp_aligned_malloc  (size_t, size_t);
1315 | // //     extern void * __KAI_KMPC_CONVENTION  kmp_calloc  (size_t, size_t);
1316 | // //     extern void * __KAI_KMPC_CONVENTION  kmp_realloc (void *, size_t);
1317 | // //     extern void   __KAI_KMPC_CONVENTION  kmp_free    (void *);
1318 | // //
1319 | // //     extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_on(void);
1320 | // //     extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_off(void);
1321 | // //
1322 | // //     /* OpenMP 5.0 Tool Control */
1323 | // //     typedef enum omp_control_tool_result_t {
1324 | // //         omp_control_tool_notool = -2,
1325 | // //         omp_control_tool_nocallback = -1,
1326 | // //         omp_control_tool_success = 0,
1327 | // //         omp_control_tool_ignored = 1
1328 | // //     } omp_control_tool_result_t;
1329 | // //
1330 | // //     typedef enum omp_control_tool_t {
1331 | // //         omp_control_tool_start = 1,
1332 | // //         omp_control_tool_pause = 2,
1333 | // //         omp_control_tool_flush = 3,
1334 | // //         omp_control_tool_end = 4
1335 | // //     } omp_control_tool_t;
1336 | // //
1337 | // //     extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*);
1338 | // //
1339 | // //     /* OpenMP 5.0 Memory Management */
1340 | // //     typedef uintptr_t omp_uintptr_t;
1341 | // //
1342 | // //     typedef enum {
1343 | // //         omp_atk_sync_hint = 1,
1344 | // //         omp_atk_alignment = 2,
1345 | // //         omp_atk_access = 3,
1346 | // //         omp_atk_pool_size = 4,
1347 | // //         omp_atk_fallback = 5,
1348 | // //         omp_atk_fb_data = 6,
1349 | // //         omp_atk_pinned = 7,
1350 | // //         omp_atk_partition = 8
1351 | // //     } omp_alloctrait_key_t;
1352 | // //
1353 | // //     typedef enum {
1354 | // //         omp_atv_false = 0,
1355 | // //         omp_atv_true = 1,
1356 | // //         omp_atv_contended = 3,
1357 | // //         omp_atv_uncontended = 4,
1358 | // //         omp_atv_serialized = 5,
1359 | // //         omp_atv_sequential = omp_atv_serialized, // (deprecated)
1360 | // //         omp_atv_private = 6,
1361 | // //         omp_atv_all = 7,
1362 | // //         omp_atv_thread = 8,
1363 | // //         omp_atv_pteam = 9,
1364 | // //         omp_atv_cgroup = 10,
1365 | // //         omp_atv_default_mem_fb = 11,
1366 | // //         omp_atv_null_fb = 12,
1367 | // //         omp_atv_abort_fb = 13,
1368 | // //         omp_atv_allocator_fb = 14,
1369 | // //         omp_atv_environment = 15,
1370 | // //         omp_atv_nearest = 16,
1371 | // //         omp_atv_blocked = 17,
1372 | // //         omp_atv_interleaved = 18
1373 | // //     } omp_alloctrait_value_t;
1374 | // //     #define omp_atv_default ((omp_uintptr_t)-1)
1375 | // //
1376 | // //     typedef struct {
1377 | // //         omp_alloctrait_key_t key;
1378 | // //         omp_uintptr_t value;
1379 | // //     } omp_alloctrait_t;
1380 | // //
1381 | // // #   if defined(_WIN32)
1382 | // //     // On Windows cl and icl do not support 64-bit enum, let's use integer then.
1383 | // //     typedef omp_uintptr_t omp_allocator_handle_t;
1384 | // //     extern __KMP_IMP omp_allocator_handle_t const omp_null_allocator;
1385 | // //     extern __KMP_IMP omp_allocator_handle_t const omp_default_mem_alloc;
1386 | // //     extern __KMP_IMP omp_allocator_handle_t const omp_large_cap_mem_alloc;
1387 | // //     extern __KMP_IMP omp_allocator_handle_t const omp_const_mem_alloc;
1388 | // //     extern __KMP_IMP omp_allocator_handle_t const omp_high_bw_mem_alloc;
1389 | // //     extern __KMP_IMP omp_allocator_handle_t const omp_low_lat_mem_alloc;
1390 | // //     extern __KMP_IMP omp_allocator_handle_t const omp_cgroup_mem_alloc;
1391 | // //     extern __KMP_IMP omp_allocator_handle_t const omp_pteam_mem_alloc;
1392 | // //     extern __KMP_IMP omp_allocator_handle_t const omp_thread_mem_alloc;
1393 | // //     extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
1394 | // //     extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
1395 | // //     extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
1396 | // //
1397 | // //     typedef omp_uintptr_t omp_memspace_handle_t;
1398 | // //     extern __KMP_IMP omp_memspace_handle_t const omp_default_mem_space;
1399 | // //     extern __KMP_IMP omp_memspace_handle_t const omp_large_cap_mem_space;
1400 | // //     extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space;
1401 | // //     extern __KMP_IMP omp_memspace_handle_t const omp_high_bw_mem_space;
1402 | // //     extern __KMP_IMP omp_memspace_handle_t const omp_low_lat_mem_space;
1403 | // //     extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_host_mem_space;
1404 | // //     extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
1405 | // //     extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_device_mem_space;
1406 | // // #   else
1407 | // // #       if __cplusplus >= 201103
1408 | // //     typedef enum omp_allocator_handle_t : omp_uintptr_t
1409 | // // #       else
1410 | // //     typedef enum omp_allocator_handle_t
1411 | // // #       endif
1412 | // //     {
1413 | // //       omp_null_allocator = 0,
1414 | // //       omp_default_mem_alloc = 1,
1415 | // //       omp_large_cap_mem_alloc = 2,
1416 | // //       omp_const_mem_alloc = 3,
1417 | // //       omp_high_bw_mem_alloc = 4,
1418 | // //       omp_low_lat_mem_alloc = 5,
1419 | // //       omp_cgroup_mem_alloc = 6,
1420 | // //       omp_pteam_mem_alloc = 7,
1421 | // //       omp_thread_mem_alloc = 8,
1422 | // //       llvm_omp_target_host_mem_alloc = 100,
1423 | // //       llvm_omp_target_shared_mem_alloc = 101,
1424 | // //       llvm_omp_target_device_mem_alloc = 102,
1425 | // //       KMP_ALLOCATOR_MAX_HANDLE = UINTPTR_MAX
1426 | // //     } omp_allocator_handle_t;
1427 | // // #       if __cplusplus >= 201103
1428 | // //     typedef enum omp_memspace_handle_t : omp_uintptr_t
1429 | // // #       else
1430 | // //     typedef enum omp_memspace_handle_t
1431 | // // #       endif
1432 | // //     {
1433 | // //       omp_default_mem_space = 0,
1434 | // //       omp_large_cap_mem_space = 1,
1435 | // //       omp_const_mem_space = 2,
1436 | // //       omp_high_bw_mem_space = 3,
1437 | // //       omp_low_lat_mem_space = 4,
1438 | // //       llvm_omp_target_host_mem_space = 100,
1439 | // //       llvm_omp_target_shared_mem_space = 101,
1440 | // //       llvm_omp_target_device_mem_space = 102,
1441 | // //       KMP_MEMSPACE_MAX_HANDLE = UINTPTR_MAX
1442 | // //     } omp_memspace_handle_t;
1443 | // // #   endif
1444 | // //     extern omp_allocator_handle_t __KAI_KMPC_CONVENTION omp_init_allocator(omp_memspace_handle_t m,
1445 | // //                                                        int ntraits, omp_alloctrait_t traits[]);
1446 | // //     extern void __KAI_KMPC_CONVENTION omp_destroy_allocator(omp_allocator_handle_t allocator);
1447 | // //
1448 | // //     extern void __KAI_KMPC_CONVENTION omp_set_default_allocator(omp_allocator_handle_t a);
1449 | // //     extern omp_allocator_handle_t __KAI_KMPC_CONVENTION omp_get_default_allocator(void);
1450 | // // #   ifdef __cplusplus
1451 | // //     extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, omp_allocator_handle_t a = omp_null_allocator);
1452 | // //     extern void *__KAI_KMPC_CONVENTION omp_aligned_alloc(size_t align, size_t size,
1453 | // //                                                          omp_allocator_handle_t a = omp_null_allocator);
1454 | // //     extern void *__KAI_KMPC_CONVENTION omp_calloc(size_t nmemb, size_t size,
1455 | // //                                                   omp_allocator_handle_t a = omp_null_allocator);
1456 | // //     extern void *__KAI_KMPC_CONVENTION omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
1457 | // //                                                           omp_allocator_handle_t a = omp_null_allocator);
1458 | // //     extern void *__KAI_KMPC_CONVENTION omp_realloc(void *ptr, size_t size,
1459 | // //                                                    omp_allocator_handle_t allocator = omp_null_allocator,
1460 | // //                                                    omp_allocator_handle_t free_allocator = omp_null_allocator);
1461 | // //     extern void __KAI_KMPC_CONVENTION omp_free(void * ptr, omp_allocator_handle_t a = omp_null_allocator);
1462 | // // #   else
1463 | // //     extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, omp_allocator_handle_t a);
1464 | // //     extern void *__KAI_KMPC_CONVENTION omp_aligned_alloc(size_t align, size_t size,
1465 | // //                                                          omp_allocator_handle_t a);
1466 | // //     extern void *__KAI_KMPC_CONVENTION omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t a);
1467 | // //     extern void *__KAI_KMPC_CONVENTION omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
1468 | // //                                                           omp_allocator_handle_t a);
1469 | // //     extern void *__KAI_KMPC_CONVENTION omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
1470 | // //                                                    omp_allocator_handle_t free_allocator);
1471 | // //     extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, omp_allocator_handle_t a);
1472 | // // #   endif
1473 | // //
1474 | // //     /* OpenMP 5.0 Affinity Format */
1475 | // //     extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *);
1476 | // //     extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t);
1477 | // //     extern void __KAI_KMPC_CONVENTION omp_display_affinity(char const *);
1478 | // //     extern size_t __KAI_KMPC_CONVENTION omp_capture_affinity(char *, size_t, char const *);
1479 | // //
1480 | // //     /* OpenMP 5.0 events */
1481 | // // #   if defined(_WIN32)
1482 | // //     // On Windows cl and icl do not support 64-bit enum, let's use integer then.
1483 | // //     typedef omp_uintptr_t omp_event_handle_t;
1484 | // // #   else
1485 | // //     typedef enum omp_event_handle_t { KMP_EVENT_MAX_HANDLE = UINTPTR_MAX } omp_event_handle_t;
1486 | // // #   endif
1487 | // //     extern void __KAI_KMPC_CONVENTION omp_fulfill_event ( omp_event_handle_t event );
1488 | // //
1489 | // //     /* OpenMP 5.0 Pause Resources */
1490 | // //     typedef enum omp_pause_resource_t {
1491 | // //       omp_pause_resume = 0,
1492 | // //       omp_pause_soft = 1,
1493 | // //       omp_pause_hard = 2
1494 | // //     } omp_pause_resource_t;
1495 | // //     extern int __KAI_KMPC_CONVENTION omp_pause_resource(omp_pause_resource_t, int);
1496 | // //     extern int __KAI_KMPC_CONVENTION omp_pause_resource_all(omp_pause_resource_t);
1497 | // //
1498 | // //     extern int __KAI_KMPC_CONVENTION omp_get_supported_active_levels(void);
1499 | // //
1500 | // //     /* OpenMP 5.1 */
1501 | // //     extern void __KAI_KMPC_CONVENTION omp_set_num_teams(int num_teams);
1502 | // //     extern int __KAI_KMPC_CONVENTION omp_get_max_teams(void);
1503 | // //     extern void __KAI_KMPC_CONVENTION omp_set_teams_thread_limit(int limit);
1504 | // //     extern int __KAI_KMPC_CONVENTION omp_get_teams_thread_limit(void);
1505 | // //
1506 | // //     /* OpenMP 5.1 Display Environment */
1507 | // //     extern void omp_display_env(int verbose);
1508 | // //
1509 | // // #   if defined(_OPENMP) && _OPENMP >= 201811
1510 | // //     #pragma omp begin declare variant match(device={kind(host)})
1511 | // //     static inline int omp_is_initial_device(void) { return 1; }
1512 | // //     #pragma omp end declare variant
1513 | // //     #pragma omp begin declare variant match(device={kind(nohost)})
1514 | // //     static inline int omp_is_initial_device(void) { return 0; }
1515 | // //     #pragma omp end declare variant
1516 | // // #   endif
1517 | // //
1518 | // //     /* OpenMP 5.2 */
1519 | // //     extern int __KAI_KMPC_CONVENTION omp_in_explicit_task(void);
1520 | // //
1521 | // //     /* LLVM Extensions */
1522 | // //     extern void *llvm_omp_target_dynamic_shared_alloc(void);
1523 | // //
1524 | // // #   undef __KAI_KMPC_CONVENTION
1525 | // // #   undef __KMP_IMP
1526 | // //
1527 | // //     /* Warning:
1528 | // //        The following typedefs are not standard, deprecated and will be removed in a future release.
1529 | // //     */
1530 | // //     typedef int     omp_int_t;
1531 | // //     typedef double  omp_wtime_t;
1532 | // //
1533 | // // #   ifdef __cplusplus
1534 | // //     }
1535 | // // #   endif
1536 | // //
1537 | // // #endif /* __OMP_H */
1538 | 


--------------------------------------------------------------------------------