├── .gitignore
├── .gitmodules
├── src
    ├── root.zig
    ├── kernels.cl
    ├── utils.zig
    ├── idx.zig
    ├── defines.zig
    ├── main.zig
    ├── vtk.zig
    ├── opencl_hello_world.zig
    ├── ibm.zig
    ├── lbm.zig
    └── cl.zig
├── README.md
└── LICENSE


/.gitignore:
--------------------------------------------------------------------------------
1 | output
2 | .zig-cache
3 | zig-out


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "OpenCL-Headers"]
2 | 	path = OpenCL-Headers
3 | 	url = https://github.com/KhronosGroup/OpenCL-Headers
4 | 


--------------------------------------------------------------------------------
/src/root.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | const testing = std.testing;
 3 | 
 4 | comptime {
 5 |     _ = @import("lbm.zig");
 6 |     _ = @import("vtk.zig");
 7 | }
 8 | 
 9 | export fn add(a: i32, b: i32) i32 {
10 |     return a + b;
11 | }
12 | 
13 | test "basic add functionality" {
14 |     try testing.expect(add(3, 7) == 10);
15 | }
16 | 


--------------------------------------------------------------------------------
/src/kernels.cl:
--------------------------------------------------------------------------------
 1 | __kernel void square_array(__global int* input_array, __global int* output_array) {
 2 |     int i = get_global_id(0);
 3 |     int value = input_array[i];
 4 |     output_array[i] = value * value;
 5 | }
 6 | 
 7 | __kernel void lbm_kernel(
 8 |     __global float* popA,
 9 |     __global float* popB,
10 |     __global float* u,
11 |     __global float* rho,
12 |     __global float* force_ibm,
13 |     const int time_step
14 | ) {
15 |     // streaming (popA, popB)
16 | 
17 |     // macroscopics
18 | 
19 |     // collision
20 | 
21 |     // macroscopics
22 | }


--------------------------------------------------------------------------------
/src/utils.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | 
 3 | pub fn writeArrayListToFile(filename: []const u8, content: []const u8) !void {
 4 |     // Open the file for writing
 5 |     var file = try std.fs.cwd().createFile(filename, .{});
 6 |     defer file.close();
 7 | 
 8 |     // Get a writer for the file
 9 |     var writer = file.writer();
10 | 
11 |     // Write the contents of the ArrayList to the file
12 |     try writer.writeAll(content);
13 | }
14 | 
15 | // Function to append formatted strings to the list
16 | pub fn appendFormatted(list: *std.ArrayList(u8), comptime format_string: []const u8, args: anytype) !void {
17 |     var buffer: [512]u8 = undefined;
18 |     const buffer_slice = buffer[0..];
19 | 
20 |     const str_add = try std.fmt.bufPrint(buffer_slice, format_string, args);
21 | 
22 |     try list.appendSlice(str_add);
23 | }
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # zLBM
 2 | 
 3 | LBM (lattice Boltzmann method) solver written in Zig for learning purposes.
 4 | 
 5 | The project is done by [Waine](https://github.com/wainejr/) and you can find the videos on its developing on his [YouTube](https://www.youtube.com/@waine_jr), in the [zLBM playlist](https://www.youtube.com/watch?v=BZobw0vnSHo&list=PL2WQTg3Tx5wO79IqfPwQhvgTqZsfIob9V).
 6 | 
 7 | ## Cloning
 8 | 
 9 | Don't forget to clone and initialize the submodules
10 | 
11 | ```bash
12 | git clone --recursive https://github.com/wainejr/zLBM
13 | # or
14 | git clone https://github.com/wainejr/zLBM
15 | git submodule update --init --recursive
16 | ```
17 | 
18 | ## Building & Running
19 | 
20 | To build the project, make sure you have [Zig](https://ziglang.org/) installed.
21 | The solver was developed and tested under version 0.13.0 and 0.14 on development.
22 | 
23 | After that, you can build the program running
24 | 
25 | ```bash
26 | zig build
27 | ```
28 | 
29 | And then run with
30 | 
31 | ```bash
32 | ./zig-out/bin/zLBM
33 | ```
34 | 
35 | Or just run the project directly with
36 | 
37 | ```bash
38 | zig run src/main.zig
39 | ```
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Waine Junior
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/idx.zig:
--------------------------------------------------------------------------------
 1 | const defs = @import("defines.zig");
 2 | 
 3 | pub inline fn idx2pos(idx: usize) [defs.dim]u32 {
 4 |     if (defs.dim == 2) {
 5 |         return .{ @intCast(idx % defs.domain_size[0]), @intCast(idx / defs.domain_size[0]) };
 6 |     } else {
 7 |         return .{ @intCast(idx % defs.domain_size[0]), @intCast((idx / defs.domain_size[0]) % defs.domain_size[1]), @intCast(idx / (defs.domain_size[0] * defs.domain_size[1])) };
 8 |     }
 9 | }
10 | 
11 | pub inline fn pos2idx(pos: [defs.dim]u32) usize {
12 |     if (defs.dim == 2) {
13 |         return pos[0] + pos[1] * defs.domain_size[0];
14 |     } else {
15 |         return pos[0] + defs.domain_size[0] * (pos[1] + pos[2] * defs.domain_size[1]);
16 |     }
17 | }
18 | 
19 | pub inline fn idxPop(pos: [defs.dim]u32, i: u8) usize {
20 |     return i + defs.n_pop * (pos2idx(pos));
21 | }
22 | 
23 | test "test Idx" {
24 |     const std = @import("std");
25 |     const assert = std.debug.assert;
26 |     var count: usize = 0;
27 |     for (0..defs.n_nodes) |idx| {
28 |         const pos = idx2pos(idx);
29 |         const retIdx = pos2idx(pos);
30 |         assert(retIdx == idx);
31 |         for (0..defs.dim) |d| {
32 |             assert(pos[d] >= 0);
33 |             assert(pos[d] < defs.domain_size[d]);
34 |         }
35 |         for (0..defs.n_pop) |i| {
36 |             const popIdx = idxPop(pos, @intCast(i));
37 |             assert(count == popIdx);
38 |             count += 1;
39 |         }
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/defines.zig:
--------------------------------------------------------------------------------
 1 | const utils = @import("utils.zig");
 2 | 
 3 | pub const cs2: f32 = 1.0 / 3.0;
 4 | pub const VelSet = enum { D2Q9, D3Q19 };
 5 | pub const dim = switch (vel_set_use) {
 6 |     VelSet.D2Q9 => 2,
 7 |     VelSet.D3Q19 => 3,
 8 | };
 9 | pub const n_pop = switch (vel_set_use) {
10 |     VelSet.D2Q9 => 9,
11 |     VelSet.D3Q19 => 19,
12 | };
13 | pub const pop_dir: [n_pop][dim]i8 = switch (vel_set_use) {
14 |     VelSet.D2Q9 => .{ [_]i8{ 0, 0 }, [_]i8{ 1, 0 }, [_]i8{ 0, 1 }, [_]i8{ -1, 0 }, [_]i8{ 0, -1 }, [_]i8{ 1, 1 }, [_]i8{ -1, 1 }, [_]i8{ -1, -1 }, [_]i8{ 1, -1 } },
15 |     VelSet.D3Q19 => .{
16 |         [_]i8{ 0, 0, 0 },
17 |         [_]i8{ 1, 0, 0 },
18 |         [_]i8{ -1, 0, 0 },
19 |         [_]i8{ 0, 1, 0 },
20 |         [_]i8{ 0, -1, 0 },
21 |         [_]i8{ 0, 0, 1 },
22 |         [_]i8{ 0, 0, -1 },
23 |         [_]i8{ 1, 1, 0 },
24 |         [_]i8{ -1, -1, 0 },
25 |         [_]i8{ 1, 0, 1 },
26 |         [_]i8{ -1, 0, -1 },
27 |         [_]i8{ 0, 1, 1 },
28 |         [_]i8{ 0, -1, -1 },
29 |         [_]i8{ 1, -1, 0 },
30 |         [_]i8{ -1, 1, 0 },
31 |         [_]i8{ 1, 0, -1 },
32 |         [_]i8{ -1, 0, 1 },
33 |         [_]i8{ 0, 1, -1 },
34 |         [_]i8{ 0, -1, 1 },
35 |     },
36 | };
37 | pub const n_nodes = domain_size[0] * domain_size[1] * (if (dim == 2) 1 else domain_size[2]);
38 | pub const pop_weights: [n_pop]f32 = switch (vel_set_use) {
39 |     VelSet.D2Q9 => .{ 4.0 / 9.0, 1.0 / 9.0, 1.0 / 9.0, 1.0 / 9.0, 1.0 / 9.0, 1.0 / 36.0, 1.0 / 36.0, 1.0 / 36.0, 1.0 / 36.0 },
40 |     VelSet.D3Q19 => .{1.0 / 3.0} ++ [_]f32{1.0 / 18.0} ** 6 ++ [_]f32{1.0 / 36.0} ** 12,
41 | };
42 | 
43 | // Parameters
44 | pub const tau: f32 = 0.9;
45 | pub const domain_size: [dim]u32 = .{ 96, 96, 96 };
46 | pub const vel_set_use = VelSet.D3Q19;
47 | pub const freq_export = 100;
48 | pub const n_steps = 5000;
49 | pub const global_force: [dim]f32 = .{ 0, 0, 0 };
50 | 
51 | // IBM parameters
52 | pub const forces_relaxation_factor = 0.5;
53 | pub const ibm_n_iterations = 1;
54 | 


--------------------------------------------------------------------------------
/src/main.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | const lbm = @import("lbm.zig");
 3 | const ibm = @import("ibm.zig");
 4 | const vtk = @import("vtk.zig");
 5 | const defs = @import("defines.zig");
 6 | const utils = @import("utils.zig");
 7 | 
 8 | pub fn main() !void {
 9 |     var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
10 |     defer arena.deinit();
11 | 
12 |     const allocator = arena.allocator();
13 | 
14 |     const lbm_arrays = try lbm.allocate_arrs(&allocator);
15 |     lbm_arrays.initialize();
16 |     // const body_ibm = try ibm.BodyIBM.create_basic_body(allocator);
17 |     // try body_ibm.export_csv(allocator, "output/body_pos_0.csv");
18 |     const bodies: [0]ibm.BodyIBM = .{};
19 | 
20 |     try lbm_arrays.export_arrays(allocator, 0);
21 |     var timer = try std.time.Timer.start();
22 | 
23 |     for (1..(defs.n_steps + 1)) |time_step| {
24 |         lbm.run_time_step(lbm_arrays, @intCast(time_step));
25 |         // for (0..defs.ibm_n_iterations) |_| {
26 |         for (0..1) |_| { // Use 1 iteration to not update macrs
27 |             lbm.run_IBM_iteration(bodies[0..], lbm_arrays, @intCast(time_step));
28 |         }
29 |         if (time_step % defs.freq_export == 0) {
30 |             try lbm_arrays.export_arrays(allocator, @intCast(time_step));
31 | 
32 |             for (bodies) |b| {
33 |                 var buffer: [100]u8 = undefined;
34 |                 const buffer_slice = buffer[0..];
35 |                 const body_path = try std.fmt.bufPrint(buffer_slice, "output/body_pos_{}.csv", .{time_step});
36 |                 try b.export_csv(allocator, body_path);
37 |             }
38 | 
39 |             std.debug.print("Exported arrays in time step {}\n", .{time_step});
40 |         }
41 |     }
42 | 
43 |     const time_passed_nano: f64 = @floatFromInt(timer.lap());
44 |     const time_passed_sec: f64 = time_passed_nano / 1e9;
45 | 
46 |     const mlups: f64 = (@as(usize, @intCast(defs.n_nodes)) * defs.n_steps) / (time_passed_sec * 1e6);
47 | 
48 |     std.debug.print("Finished simulation!\n", .{});
49 |     std.debug.print("MLUPS {d:0.2}\n", .{mlups});
50 |     std.debug.print("Time elapsed {d:0.2}s\n", .{time_passed_sec});
51 | }
52 | 


--------------------------------------------------------------------------------
/src/vtk.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | const utils = @import("utils.zig");
 3 | 
 4 | fn write_vtk_header(vtk_string: *std.ArrayList(u8), dims: []const u32) !void {
 5 |     const dims_use: [3]u32 = .{ dims[0], dims[1], if (dims.len < 3) 1 else dims[2] };
 6 | 
 7 |     try vtk_string.appendSlice("# vtk DataFile Version 3.0\nData\nBINARY\nDATASET STRUCTURED_POINTS\n");
 8 | 
 9 |     // "DIMENSIONS "+to_string(Nx)+" "+to_string(Ny)+" "+to_string(Nz)+"\n"
10 |     try vtk_string.appendSlice("DIMENSIONS ");
11 |     for (dims_use) |d| {
12 |         try utils.appendFormatted(vtk_string, "{} ", .{d});
13 |     }
14 |     try vtk_string.appendSlice("\n");
15 |     // "ORIGIN "+to_string(origin.x)+" "+to_string(origin.y)+" "+to_string(origin.z)+"\n"
16 |     try vtk_string.appendSlice("ORIGIN ");
17 |     for (dims_use) |_| {
18 |         try utils.appendFormatted(vtk_string, "{} ", .{0});
19 |     }
20 |     try vtk_string.appendSlice("\n");
21 |     // "SPACING "+to_string(spacing)+" "+to_string(spacing)+" "+to_string(spacing)+"\n"
22 |     try vtk_string.appendSlice("SPACING ");
23 |     for (dims_use) |_| {
24 |         try utils.appendFormatted(vtk_string, "{} ", .{1});
25 |     }
26 |     try vtk_string.appendSlice("\n");
27 |     // "POINT_DATA "+to_string((ulong)Nx*(ulong)Ny*(ulong)Nz)+
28 |     try utils.appendFormatted(vtk_string, "POINT_DATA {}", .{dims_use[0] * dims_use[1] * dims_use[2]});
29 |     // "\nSCALARS data "+vtk_type()+" "+to_string(dimensions())+"\nLOOKUP_TABLE default\n"
30 | }
31 | 
32 | fn write_vtk_data(vtk_string: *std.ArrayList(u8), scalar_name: []const u8, arr: []const f32) !void {
33 |     try utils.appendFormatted(vtk_string, "\nSCALARS {s} float 1", .{scalar_name});
34 |     // std.debug.print("my string {s}\n", .{vtk_string.items});
35 |     try vtk_string.appendSlice("\nLOOKUP_TABLE default\n");
36 | 
37 |     // Write the scalar data in big-endian format
38 |     var be_scalar: [4]u8 = undefined;
39 |     for (arr) |value| {
40 |         be_scalar = @bitCast(value);
41 |         // std.debug.print("value {} bits {b:0>32} rev {b:0>32} scalars {x:} {x:} {x:} {x:}\n", .{ value, be_value, be_rev, be_scalar[0], be_scalar[1], be_scalar[2], be_scalar[3] });
42 |         const be_use: [4]u8 = .{ be_scalar[3], be_scalar[2], be_scalar[1], be_scalar[0] };
43 |         try vtk_string.appendSlice(be_use[0..4]);
44 |     }
45 | }
46 | 
47 | pub fn write_vtk(vtk_string: *std.ArrayList(u8), kv_arr: std.StringArrayHashMap([]const f32), dims: []const u32) !void {
48 |     try write_vtk_header(vtk_string, dims);
49 | 
50 |     var arr_size: usize = 1;
51 |     for (dims) |d| {
52 |         arr_size *= d;
53 |     }
54 | 
55 |     var it = kv_arr.iterator();
56 |     while (it.next()) |entry| {
57 |         std.debug.assert(entry.value_ptr.len == arr_size);
58 |         try write_vtk_data(vtk_string, entry.key_ptr.*, entry.value_ptr.*);
59 |     }
60 | }
61 | 
62 | test "export array" {
63 |     var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
64 |     defer arena.deinit();
65 | 
66 |     const allocator = arena.allocator();
67 | 
68 |     // Define a new ArrayHashMap with keys of type []const u8 (strings) and values of type i32 (integers)
69 |     var map = std.StringArrayHashMap([]const f32).init(allocator);
70 |     defer map.deinit();
71 | 
72 |     const dims: [2]u32 = .{ 2, 4 };
73 | 
74 |     inline for (.{ "rho", "ux" }) |macr| {
75 |         const my_arr: [8]f32 = .{ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8 };
76 |         try map.put(macr, my_arr[0..my_arr.len]);
77 |     }
78 |     var data_wr = std.ArrayList(u8).init(allocator);
79 | 
80 |     try write_vtk(&data_wr, map, &dims);
81 | 
82 |     try utils.writeArrayListToFile("teste.vtk", data_wr.items);
83 | }
84 | 


--------------------------------------------------------------------------------
/src/opencl_hello_world.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const info = std.log.info;
  3 | 
  4 | const c = @cImport({
  5 |     @cDefine("CL_TARGET_OPENCL_VERSION", "110");
  6 |     @cInclude("CL/cl.h");
  7 | });
  8 | 
  9 | const program_src =
 10 |     \\__kernel void square_array(__global int* input_array, __global int* output_array) {
 11 |     \\    int i = get_global_id(0);
 12 |     \\    int value = input_array[i];
 13 |     \\    output_array[i] = value * value;
 14 |     \\}
 15 | ;
 16 | 
 17 | const CLError = error{
 18 |     GetPlatformsFailed,
 19 |     GetPlatformInfoFailed,
 20 |     NoPlatformsFound,
 21 |     GetDevicesFailed,
 22 |     GetDeviceInfoFailed,
 23 |     NoDevicesFound,
 24 |     CreateContextFailed,
 25 |     CreateCommandQueueFailed,
 26 |     CreateProgramFailed,
 27 |     BuildProgramFailed,
 28 |     CreateKernelFailed,
 29 |     SetKernelArgFailed,
 30 |     EnqueueNDRangeKernel,
 31 |     CreateBufferFailed,
 32 |     EnqueueWriteBufferFailed,
 33 |     EnqueueReadBufferFailed,
 34 | };
 35 | 
 36 | fn get_cl_device() CLError!c.cl_device_id {
 37 |     var platform_ids: [16]c.cl_platform_id = undefined;
 38 |     var platform_count: c.cl_uint = undefined;
 39 |     if (c.clGetPlatformIDs(platform_ids.len, &platform_ids, &platform_count) != c.CL_SUCCESS) {
 40 |         return CLError.GetPlatformsFailed;
 41 |     }
 42 |     info("{} cl platform(s) found:", .{@as(u32, platform_count)});
 43 | 
 44 |     for (platform_ids[0..platform_count], 0..) |id, i| {
 45 |         var name: [1024]u8 = undefined;
 46 |         var name_len: usize = undefined;
 47 |         if (c.clGetPlatformInfo(id, c.CL_PLATFORM_NAME, name.len, &name, &name_len) != c.CL_SUCCESS) {
 48 |             return CLError.GetPlatformInfoFailed;
 49 |         }
 50 |         info("  platform {}: {s}", .{ i, name[0..name_len] });
 51 |     }
 52 | 
 53 |     if (platform_count == 0) {
 54 |         return CLError.NoPlatformsFound;
 55 |     }
 56 | 
 57 |     info("choosing platform 0...", .{});
 58 | 
 59 |     var device_ids: [16]c.cl_device_id = undefined;
 60 |     var device_count: c.cl_uint = undefined;
 61 |     if (c.clGetDeviceIDs(platform_ids[0], c.CL_DEVICE_TYPE_ALL, device_ids.len, &device_ids, &device_count) != c.CL_SUCCESS) {
 62 |         return CLError.GetDevicesFailed;
 63 |     }
 64 |     info("{} cl device(s) found on platform 0:", .{@as(u32, device_count)});
 65 | 
 66 |     for (device_ids[0..device_count], 0..) |id, i| {
 67 |         var name: [1024]u8 = undefined;
 68 |         var name_len: usize = undefined;
 69 |         if (c.clGetDeviceInfo(id, c.CL_DEVICE_NAME, name.len, &name, &name_len) != c.CL_SUCCESS) {
 70 |             return CLError.GetDeviceInfoFailed;
 71 |         }
 72 |         info("  device {}: {s}", .{ i, name[0..name_len] });
 73 |     }
 74 | 
 75 |     if (device_count == 0) {
 76 |         return CLError.NoDevicesFound;
 77 |     }
 78 | 
 79 |     info("choosing device 0...", .{});
 80 | 
 81 |     return device_ids[0];
 82 | }
 83 | 
 84 | fn run_test(device: c.cl_device_id) CLError!void {
 85 |     info("** running test **", .{});
 86 | 
 87 |     const ctx = c.clCreateContext(null, 1, &device, null, null, null); // future: last arg is error code
 88 |     if (ctx == null) {
 89 |         return CLError.CreateContextFailed;
 90 |     }
 91 |     defer _ = c.clReleaseContext(ctx);
 92 | 
 93 |     const command_queue = c.clCreateCommandQueue(ctx, device, 0, null); // future: last arg is error code
 94 |     if (command_queue == null) {
 95 |         return CLError.CreateCommandQueueFailed;
 96 |     }
 97 |     defer {
 98 |         _ = c.clFlush(command_queue);
 99 |         _ = c.clFinish(command_queue);
100 |         _ = c.clReleaseCommandQueue(command_queue);
101 |     }
102 | 
103 |     var program_src_c: [*c]const u8 = program_src;
104 |     const program = c.clCreateProgramWithSource(ctx, 1, &program_src_c, null, null); // future: last arg is error code
105 |     if (program == null) {
106 |         return CLError.CreateProgramFailed;
107 |     }
108 |     defer _ = c.clReleaseProgram(program);
109 | 
110 |     if (c.clBuildProgram(program, 1, &device, null, null, null) != c.CL_SUCCESS) {
111 |         return CLError.BuildProgramFailed;
112 |     }
113 | 
114 |     const kernel = c.clCreateKernel(program, "square_array", null);
115 |     if (kernel == null) {
116 |         return CLError.CreateKernelFailed;
117 |     }
118 |     defer _ = c.clReleaseKernel(kernel);
119 | 
120 |     // Create buffers
121 |     var input_array = init: {
122 |         var init_value: [1024]i32 = undefined;
123 |         for (0..1024) |i| {
124 |             init_value[i] = @as(i32, @intCast(i));
125 |         }
126 |         break :init init_value;
127 |     };
128 | 
129 |     var input_buffer = c.clCreateBuffer(ctx, c.CL_MEM_READ_ONLY, input_array.len * @sizeOf(i32), null, null);
130 |     if (input_buffer == null) {
131 |         return CLError.CreateBufferFailed;
132 |     }
133 |     defer _ = c.clReleaseMemObject(input_buffer);
134 | 
135 |     var output_buffer = c.clCreateBuffer(ctx, c.CL_MEM_WRITE_ONLY, input_array.len * @sizeOf(i32), null, null);
136 |     if (output_buffer == null) {
137 |         return CLError.CreateBufferFailed;
138 |     }
139 |     defer _ = c.clReleaseMemObject(output_buffer);
140 | 
141 |     // Fill input buffer
142 |     if (c.clEnqueueWriteBuffer(command_queue, input_buffer, c.CL_TRUE, 0, input_array.len * @sizeOf(i32), &input_array, 0, null, null) != c.CL_SUCCESS) {
143 |         return CLError.EnqueueWriteBufferFailed;
144 |     }
145 | 
146 |     // Execute kernel
147 |     if (c.clSetKernelArg(kernel, 0, @sizeOf(c.cl_mem), @ptrCast(&input_buffer)) != c.CL_SUCCESS) {
148 |         return CLError.SetKernelArgFailed;
149 |     }
150 |     if (c.clSetKernelArg(kernel, 1, @sizeOf(c.cl_mem), @ptrCast(&output_buffer)) != c.CL_SUCCESS) {
151 |         return CLError.SetKernelArgFailed;
152 |     }
153 | 
154 |     var global_item_size: usize = input_array.len;
155 |     var local_item_size: usize = 64;
156 |     if (c.clEnqueueNDRangeKernel(command_queue, kernel, 1, null, &global_item_size, &local_item_size, 0, null, null) != c.CL_SUCCESS) {
157 |         return CLError.EnqueueNDRangeKernel;
158 |     }
159 | 
160 |     var output_array: [1024]i32 = undefined;
161 |     if (c.clEnqueueReadBuffer(command_queue, output_buffer, c.CL_TRUE, 0, output_array.len * @sizeOf(i32), &output_array, 0, null, null) != c.CL_SUCCESS) {
162 |         return CLError.EnqueueReadBufferFailed;
163 |     }
164 | 
165 |     info("** done **", .{});
166 | 
167 |     info("** results **", .{});
168 | 
169 |     for (output_array, 0..) |val, i| {
170 |         if (i % 100 == 0) {
171 |             info("{} ^ 2 = {}", .{ i, val });
172 |         }
173 |     }
174 | 
175 |     info("** done, exiting **", .{});
176 | }
177 | 
178 | pub fn main() anyerror!void {
179 |     const device = try get_cl_device();
180 |     try run_test(device);
181 | }
182 | 


--------------------------------------------------------------------------------
/src/ibm.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const utils = @import("utils.zig");
  3 | const defs = @import("defines.zig");
  4 | const fidx = @import("idx.zig");
  5 | const Allocator = std.mem.Allocator;
  6 | 
  7 | const DIRAC_RADIUS = 1.5;
  8 | 
  9 | fn dirac_delta(r: f32) f32 {
 10 |     const ar = if (r < 0) -r else r;
 11 |     if (ar >= 1.5) {
 12 |         return 0;
 13 |     }
 14 |     if (ar >= 0.5) {
 15 |         return 9.0 / 8.0 - 3.0 * ar / 2.0 + r * r / 2.0;
 16 |     }
 17 |     return 0.75 - ar * ar;
 18 | }
 19 | 
 20 | pub const NodeIBM = struct {
 21 |     const Self = @This();
 22 | 
 23 |     pos: [defs.dim]f32,
 24 |     u_interp: [defs.dim]f32,
 25 |     rho_interp: f32,
 26 |     area: f32,
 27 |     f_spread: [defs.dim]f32,
 28 |     dirac_sum: f32,
 29 | 
 30 |     pub fn init() NodeIBM {
 31 |         const n: NodeIBM = .{
 32 |             .pos = .{ 0, 0, 0 },
 33 |             .u_interp = .{ 0, 0, 0 },
 34 |             .f_spread = .{ 0, 0, 0 },
 35 |             .rho_interp = 0,
 36 |             .area = 0,
 37 |             .dirac_sum = 0,
 38 |         };
 39 |         return n;
 40 |     }
 41 | 
 42 |     pub fn interp(self: *Self, rho: []f32, u: [defs.dim][]f32) void {
 43 |         const npos = self.pos;
 44 |         const r = DIRAC_RADIUS;
 45 |         const min_pos: [3]usize = .{ @intFromFloat(@ceil(npos[0] - r)), @intFromFloat(@ceil(npos[1] - r)), @intFromFloat(@ceil(npos[2] - r)) };
 46 |         const max_pos: [3]usize = .{ @intFromFloat(@floor(npos[0] + r)), @intFromFloat(@floor(npos[1] + r)), @intFromFloat(@floor(npos[2] + r)) };
 47 | 
 48 |         self.rho_interp = 0;
 49 |         self.u_interp = .{ 0, 0, 0 };
 50 |         self.dirac_sum = 0;
 51 |         for (min_pos[2]..max_pos[2] + 1) |z| {
 52 |             const pz: f32 = @floatFromInt(z);
 53 |             const rz = pz - npos[2];
 54 |             const dirac_z = dirac_delta(rz);
 55 |             for (min_pos[1]..max_pos[1] + 1) |y| {
 56 |                 const py: f32 = @floatFromInt(y);
 57 |                 const ry = py - npos[1];
 58 |                 const dirac_y = dirac_delta(ry);
 59 |                 for (min_pos[0]..max_pos[0] + 1) |x| {
 60 |                     const px: f32 = @floatFromInt(x);
 61 |                     const rx = px - npos[0];
 62 |                     const dirac_x = dirac_delta(rx);
 63 | 
 64 |                     const lpos: [defs.dim]u32 = .{ @intCast(x), @intCast(y), @intCast(z) };
 65 |                     const idx = fidx.pos2idx(lpos);
 66 |                     const rho_local = rho[idx];
 67 |                     const u_local = .{ u[0][idx], u[1][idx], u[2][idx] };
 68 | 
 69 |                     const dirac = dirac_x * dirac_y * dirac_z;
 70 | 
 71 |                     self.rho_interp += rho_local * dirac;
 72 |                     self.u_interp[0] += u_local[0] * dirac;
 73 |                     self.u_interp[1] += u_local[1] * dirac;
 74 |                     self.u_interp[2] += u_local[2] * dirac;
 75 |                     self.dirac_sum += dirac;
 76 |                 }
 77 |             }
 78 |         }
 79 |     }
 80 | 
 81 |     pub fn update_f_spread(self: *Self) void {
 82 |         self.f_spread[0] = 2 * self.rho_interp * (-self.u_interp[0]) * self.area * defs.forces_relaxation_factor;
 83 |         self.f_spread[1] = 2 * self.rho_interp * (-self.u_interp[1]) * self.area * defs.forces_relaxation_factor;
 84 |         self.f_spread[2] = 2 * self.rho_interp * (-self.u_interp[2]) * self.area * defs.forces_relaxation_factor;
 85 |     }
 86 | 
 87 |     pub fn spread(self: Self, force: [defs.dim][]f32) void {
 88 |         const npos = self.pos;
 89 |         const r = DIRAC_RADIUS;
 90 |         const min_pos: [3]usize = .{ @intFromFloat(@ceil(npos[0] - r)), @intFromFloat(@ceil(npos[1] - r)), @intFromFloat(@ceil(npos[2] - r)) };
 91 |         const max_pos: [3]usize = .{ @intFromFloat(@floor(npos[0] + r)), @intFromFloat(@floor(npos[1] + r)), @intFromFloat(@floor(npos[2] + r)) };
 92 | 
 93 |         for (min_pos[2]..max_pos[2] + 1) |z| {
 94 |             const pz: f32 = @floatFromInt(z);
 95 |             const rz = pz - npos[2];
 96 |             const dirac_z = dirac_delta(rz);
 97 |             for (min_pos[1]..max_pos[1] + 1) |y| {
 98 |                 const py: f32 = @floatFromInt(y);
 99 |                 const ry = py - npos[1];
100 |                 const dirac_y = dirac_delta(ry);
101 |                 for (min_pos[0]..max_pos[0] + 1) |x| {
102 |                     const px: f32 = @floatFromInt(x);
103 |                     const rx = px - npos[0];
104 |                     const dirac_x = dirac_delta(rx);
105 | 
106 |                     const lpos: [defs.dim]u32 = .{ @intCast(x), @intCast(y), @intCast(z) };
107 |                     const idx = fidx.pos2idx(lpos);
108 | 
109 |                     const dirac = dirac_x * dirac_y * dirac_z;
110 | 
111 |                     force[0][idx] += self.f_spread[0] * dirac;
112 |                     force[1][idx] += self.f_spread[1] * dirac;
113 |                     force[2][idx] += self.f_spread[2] * dirac;
114 |                 }
115 |             }
116 |         }
117 |     }
118 | };
119 | 
120 | pub const BodyIBM = struct {
121 |     const Self = @This();
122 | 
123 |     nodes: []NodeIBM,
124 | 
125 |     pub fn create_basic_body(alloc: Allocator) !BodyIBM {
126 |         const xmin = 10;
127 |         const xmax = defs.domain_size[0] - 10;
128 |         const zmin = 10;
129 |         const zmax = defs.domain_size[1] - 10;
130 |         const y_use = 5;
131 | 
132 |         const n_nodes = (xmax - xmin) * (zmax - zmin);
133 | 
134 |         var body_nodes = try alloc.alloc(NodeIBM, n_nodes);
135 |         for (0..n_nodes) |idx| {
136 |             body_nodes[idx] = NodeIBM.init();
137 |         }
138 | 
139 |         var i: usize = 0;
140 |         for (xmin..xmax) |x| {
141 |             for (zmin..zmax) |z| {
142 |                 body_nodes[i].pos = .{ @floatFromInt(x), y_use, @floatFromInt(z) };
143 |                 body_nodes[i].area = 1;
144 |                 i += 1;
145 |             }
146 |         }
147 | 
148 |         return .{ .nodes = body_nodes };
149 |     }
150 | 
151 |     pub fn export_csv(self: Self, allocator: Allocator, path: []const u8) !void {
152 |         var data_wr = std.ArrayList(u8).init(allocator);
153 |         try data_wr.appendSlice("x,y,z,rho,ux,uy,uz,fx,fy,fz\n");
154 |         for (self.nodes) |n| {
155 |             try utils.appendFormatted(&data_wr, "{}, {}, {}, {}, {}, {}, {}, {}, {}, {}\n", .{
156 |                 n.pos[0],
157 |                 n.pos[1],
158 |                 n.pos[2],
159 |                 n.rho_interp,
160 |                 n.u_interp[0],
161 |                 n.u_interp[1],
162 |                 n.u_interp[2],
163 |                 n.f_spread[0],
164 |                 n.f_spread[1],
165 |                 n.f_spread[2],
166 |             });
167 |         }
168 |         try utils.writeArrayListToFile(path, data_wr.items);
169 |         data_wr.clearAndFree();
170 |     }
171 | 
172 |     pub fn interpolate_spread(self: Self, rho: []f32, u: [defs.dim][]f32, force: [defs.dim][]f32) void {
173 |         for (0..self.nodes.len) |idx| {
174 |             self.nodes[idx].interp(rho, u);
175 |             self.nodes[idx].update_f_spread();
176 |             self.nodes[idx].spread(force);
177 |         }
178 |     }
179 | 
180 |     pub fn run_ibm(self: *const Self, rho: []f32, u: [defs.dim][]f32, force: [defs.dim][]f32) void {
181 |         self.interpolate_spread(rho, u, force);
182 |     }
183 | };
184 | 
185 | test "create basic body" {
186 |     const allocator = std.testing.allocator;
187 | 
188 |     const body = try BodyIBM.create_basic_body(allocator);
189 |     defer allocator.free(body.nodes);
190 |     for (body.nodes) |node| {
191 |         try std.testing.expectEqual(node.area, 1);
192 |     }
193 | }
194 | 
195 | test "interpolate body" {
196 |     const lbm = @import("lbm.zig");
197 |     var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
198 |     defer arena.deinit();
199 | 
200 |     const allocator = arena.allocator();
201 | 
202 |     // const allocator = std.testing.allocator;
203 | 
204 |     const body = try BodyIBM.create_basic_body(allocator);
205 |     defer allocator.free(body.nodes);
206 | 
207 |     const lbm_arrays = try lbm.allocate_arrs(&allocator);
208 |     lbm_arrays.initialize();
209 | 
210 |     body.interpolate_spread(lbm_arrays.rho, lbm_arrays.u, lbm_arrays.force_ibm);
211 | 
212 |     for (body.nodes) |node| {
213 |         try std.testing.expectApproxEqAbs(1, node.dirac_sum, 0.01);
214 |     }
215 | }
216 | 
217 | test "spread body" {
218 |     const lbm = @import("lbm.zig");
219 |     var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
220 |     defer arena.deinit();
221 | 
222 |     const allocator = arena.allocator();
223 | 
224 |     const body = try BodyIBM.create_basic_body(allocator);
225 |     defer allocator.free(body.nodes);
226 | 
227 |     const lbm_arrays = try lbm.allocate_arrs(&allocator);
228 |     lbm_arrays.initialize();
229 | 
230 |     body.run_ibm(lbm_arrays.rho, lbm_arrays.u, lbm_arrays.force_ibm);
231 | }
232 | 


--------------------------------------------------------------------------------
/src/lbm.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const vtk = @import("vtk.zig");
  3 | const utils = @import("utils.zig");
  4 | const defs = @import("defines.zig");
  5 | const fidx = @import("idx.zig");
  6 | const ibm = @import("ibm.zig");
  7 | const Allocator = std.mem.Allocator;
  8 | 
  9 | inline fn dot_prod(comptime T: type, x: *const [defs.dim]T, y: *const [defs.dim]T) T {
 10 |     var sum: T = 0;
 11 |     for (x, y) |i, j| {
 12 |         sum += i * j;
 13 |     }
 14 |     return sum;
 15 | }
 16 | 
 17 | inline fn func_feq(rho: f32, u: [defs.dim]f32, comptime i: usize) f32 {
 18 |     // const ud = .{ u[0], u[1] };
 19 |     var popDir: [defs.dim]f32 = undefined;
 20 |     inline for (0..defs.dim) |d| {
 21 |         popDir[d] = @floatFromInt(defs.pop_dir[i][d]);
 22 |     }
 23 |     const uc: f32 = dot_prod(f32, &u, &popDir);
 24 |     const uu: f32 = dot_prod(f32, &u, &u);
 25 | 
 26 |     return rho * defs.pop_weights[i] * (1 + uc / defs.cs2 + (uc * uc) / (2 * defs.cs2 * defs.cs2) - (uu) / (2 * defs.cs2));
 27 | }
 28 | 
 29 | inline fn source_term(u: [defs.dim]f32, force: [defs.dim]f32, comptime i: usize) f32 {
 30 |     // const ud = .{ u[0], u[1] };
 31 |     var popDir: [defs.dim]f32 = undefined;
 32 |     inline for (0..defs.dim) |d| {
 33 |         popDir[d] = @floatFromInt(defs.pop_dir[i][d]);
 34 |     }
 35 |     var si: f32 = 0;
 36 |     const mul_term = (1 - 2 / defs.tau) * defs.pop_weights[i];
 37 |     inline for (0..defs.dim) |alfa| {
 38 |         const cia = popDir[alfa];
 39 |         si += mul_term * cia / defs.cs2 * force[alfa];
 40 |         inline for (0..defs.dim) |beta| {
 41 |             const cib = popDir[beta];
 42 |             const ciab = cia * cib;
 43 |             const k_dirac: f32 = if (alfa == beta) 1 else 0;
 44 |             si += mul_term * force[alfa] * (u[beta] * (ciab - defs.cs2 * k_dirac) / (defs.cs2 * defs.cs2));
 45 |         }
 46 |     }
 47 |     return si;
 48 | }
 49 | 
 50 | test "func_eq const" {
 51 |     const assert = std.debug.assert;
 52 |     const rho: f32 = 1;
 53 |     const u: [defs.dim]f32 = .{0} ** defs.dim;
 54 |     inline for (0..defs.n_pop) |i| {
 55 |         const feq = func_feq(rho, u, i);
 56 |         assert(feq == defs.pop_weights[i]);
 57 |     }
 58 | }
 59 | 
 60 | pub fn macroscopics(idx: usize, pop: *[defs.n_pop]f32, rho: *f32, u: *[defs.dim]f32, force: *[defs.dim]f32) void {
 61 |     _ = idx;
 62 |     rho.* = 0;
 63 |     inline for (pop) |p| {
 64 |         rho.* += p;
 65 |     }
 66 |     u.* = .{0} ** defs.dim;
 67 |     inline for (0..defs.n_pop) |j| {
 68 |         inline for (0..defs.dim) |d| {
 69 |             if (defs.pop_dir[j][d] == 0) {
 70 |                 continue;
 71 |             }
 72 |             const fdir: f32 = @floatFromInt(defs.pop_dir[j][d]);
 73 |             u.*[d] += pop[j] * fdir;
 74 |         }
 75 |     }
 76 |     inline for (0..defs.dim) |d| {
 77 |         u.*[d] += force.*[d] / 2;
 78 |     }
 79 | 
 80 |     inline for (0..defs.dim) |d| {
 81 |         u.*[d] /= rho.*;
 82 |     }
 83 | }
 84 | 
 85 | //  Open Security Training 2
 86 | 
 87 | pub fn collision(idx: usize, pop: *[defs.n_pop]f32, rho: f32, u: [defs.dim]f32, force: [defs.dim]f32) void {
 88 |     _ = idx;
 89 | 
 90 |     inline for (0..defs.n_pop) |i| {
 91 |         const feq = func_feq(rho, u, i);
 92 |         const si = source_term(u, force, i);
 93 |         const f_coll = pop[i] - (pop[i] - feq) / defs.tau - si;
 94 |         pop[i] = f_coll;
 95 |     }
 96 | }
 97 | 
 98 | pub fn streaming(idx: usize, pop: *[defs.n_pop]f32, popStream_arr: []f32) void {
 99 |     const pos = fidx.idx2pos(idx);
100 |     for (0..defs.n_pop) |i| {
101 |         // posTo = pos + defs.pop_dir[i]
102 |         var popDir: [defs.dim]i32 = undefined;
103 |         inline for (0..defs.dim) |d| {
104 |             popDir[d] = @intCast(defs.pop_dir[i][d]);
105 |         }
106 |         var posFrom: [defs.dim]i32 = undefined;
107 |         inline for (0..defs.dim) |d| {
108 |             posFrom[d] = @intCast(pos[d]);
109 |             posFrom[d] -= popDir[d] + defs.domain_size[d];
110 |             posFrom[d] = @mod(posFrom[d], defs.domain_size[d]);
111 |         }
112 |         var posFromU: [defs.dim]u32 = undefined;
113 |         inline for (0..defs.dim) |d| {
114 |             posFromU[d] = @intCast(posFrom[d]);
115 |         }
116 |         // std.debug.print("pop {} pos to {} {} pos {} {} dir {} {}\n", .{ i, posToU[0], posToU[1], pos[0], pos[1], popDir[0], popDir[1] });
117 | 
118 |         pop[i] = popStream_arr[fidx.idxPop(posFromU, @intCast(i))];
119 |     }
120 | }
121 | 
122 | const LBMArrays = struct {
123 |     const Self = @This();
124 | 
125 |     popA: []f32,
126 |     popB: []f32,
127 |     u: [defs.dim][]f32,
128 |     rho: []f32,
129 |     force_ibm: [defs.dim][]f32,
130 | 
131 |     pub fn initialize(self: *const Self) void {
132 |         for (0..defs.n_nodes) |idx| {
133 |             const pos = fidx.idx2pos(idx);
134 |             // std.debug.print("pos {} {}\n", .{ pos[0], pos[1] });s
135 | 
136 |             self.rho[idx] = 1;
137 |             var posF: [defs.dim]f32 = undefined;
138 |             var posNorm: [defs.dim]f32 = undefined;
139 |             inline for (0..defs.dim) |d| {
140 |                 posF[d] = @floatFromInt(pos[d]);
141 |                 posNorm[d] = posF[d] / defs.domain_size[d];
142 |             }
143 | 
144 |             const velNorm = 0.01;
145 |             // const ux = velNorm * std.math.sin(posNorm[0] * 2 * std.math.pi) * std.math.cos(posNorm[1] * 2 * std.math.pi);
146 |             // const uy = -velNorm * std.math.cos(posNorm[0] * 2 * std.math.pi) * std.math.sin(posNorm[1] * 2 * std.math.pi);
147 | 
148 |             self.u[0][idx] = velNorm * ((1 - posNorm[1]) * posNorm[1]);
149 |             self.u[1][idx] = 0;
150 |             if (defs.dim == 3) {
151 |                 self.u[2][idx] = 0;
152 |             }
153 | 
154 |             var u: [defs.dim]f32 = undefined;
155 |             inline for (0..defs.dim) |d| {
156 |                 u[d] = self.u[d][idx];
157 |                 self.force_ibm[d][idx] = 0;
158 |             }
159 | 
160 |             inline for (0..defs.n_pop) |j| {
161 |                 self.popA[fidx.idxPop(pos, j)] = func_feq(self.rho[idx], u, j);
162 |                 self.popB[fidx.idxPop(pos, j)] = func_feq(self.rho[idx], u, j);
163 |             }
164 |         }
165 |     }
166 | 
167 |     pub fn update_macroscopics(self: Self, pop_arr: []f32) void {
168 |         for (0..defs.n_nodes) |idx| {
169 |             var pop: [defs.n_pop]f32 = undefined;
170 |             const pos = fidx.idx2pos(idx);
171 |             inline for (0..defs.n_pop) |j| {
172 |                 pop[j] = pop_arr[fidx.idxPop(pos, @intCast(j))];
173 |             }
174 |             var rho: f32 = 0;
175 |             var u: [defs.dim]f32 = .{0} ** defs.dim;
176 |             var force: [defs.dim]f32 = .{0} ** defs.dim;
177 |             inline for (0..defs.dim) |d| {
178 |                 force[d] += defs.global_force[d];
179 |                 force[d] += self.force_ibm[d][idx];
180 |             }
181 |             macroscopics(idx, &pop, &rho, &u, &force);
182 |             self.rho[idx] = rho;
183 |             inline for (0..defs.dim) |d| {
184 |                 self.u[d][idx] = u[d];
185 |             }
186 |         }
187 |     }
188 | 
189 |     pub fn export_arrays(self: *const Self, allocator: std.mem.Allocator, time_step: u32) !void {
190 |         var buff: [50]u8 = undefined;
191 |         const buff_slice = buff[0..];
192 | 
193 |         var map = std.StringArrayHashMap([]const f32).init(allocator);
194 |         defer map.deinit();
195 |         var data_wr = std.ArrayList(u8).init(allocator);
196 |         defer data_wr.deinit();
197 | 
198 |         try map.put("rho", @field(self, "rho"));
199 |         const u_names: [defs.dim][]const u8 = if (defs.dim == 2) .{ "ux", "uy" } else .{ "ux", "uy", "uz" };
200 |         inline for (0..defs.dim, u_names) |d, macr_name| {
201 |             try map.put(macr_name, self.u[d]);
202 |         }
203 |         const f_names: [defs.dim][]const u8 = if (defs.dim == 2) .{ "force_IBMx", "force_IBMy" } else .{ "force_IBMx", "force_IBMy", "force_IBMz" };
204 |         inline for (0..defs.dim, f_names) |d, macr_name| {
205 |             try map.put(macr_name, self.force_ibm[d]);
206 |         }
207 | 
208 |         const filename_use = try std.fmt.bufPrint(buff_slice, "output/macrs{d:0>5}.vtk", .{time_step});
209 |         try vtk.write_vtk(&data_wr, map, &defs.domain_size);
210 |         try utils.writeArrayListToFile(filename_use, data_wr.items);
211 |     }
212 | };
213 | 
214 | pub fn run_IBM_iteration(bodies: []const ibm.BodyIBM, lbm_arr: LBMArrays, time_step: u32) void {
215 |     _ = time_step;
216 |     if (bodies.len == 0) {
217 |         return;
218 |     }
219 | 
220 |     for (bodies) |b| {
221 |         b.run_ibm(lbm_arr.rho, lbm_arr.u, lbm_arr.force_ibm);
222 |     }
223 | }
224 | 
225 | pub fn run_time_step(lbm_arr: LBMArrays, time_step: u32) void {
226 |     const popMain_arr = if (time_step % 2 == 0) lbm_arr.popA else lbm_arr.popB;
227 |     const popAux_arr = if (time_step % 2 == 1) lbm_arr.popA else lbm_arr.popB;
228 | 
229 |     for (0..defs.n_nodes) |idx| {
230 |         var pop: [defs.n_pop]f32 = undefined;
231 |         streaming(idx, &pop, popMain_arr);
232 | 
233 |         var rho: f32 = 0;
234 |         var u: [defs.dim]f32 = .{0} ** defs.dim;
235 |         var force: [defs.dim]f32 = .{0} ** defs.dim;
236 |         var reset_forces = false;
237 |         inline for (0..defs.dim) |d| {
238 |             force[d] += defs.global_force[d];
239 |             const fibm = 0;
240 |             if (fibm != 0) {
241 |                 force[d] += fibm;
242 |                 reset_forces = true;
243 |             }
244 |         }
245 |         macroscopics(idx, &pop, &rho, &u, &force);
246 |         collision(idx, &pop, rho, u, force);
247 | 
248 |         // Update populations
249 |         const pos = fidx.idx2pos(idx);
250 |         inline for (0..defs.n_pop) |j| {
251 |             popAux_arr[fidx.idxPop(pos, @intCast(j))] = pop[j];
252 |         }
253 | 
254 |         // Update and save post collision macrs
255 |         macroscopics(idx, &pop, &rho, &u, &force);
256 |         lbm_arr.rho[idx] = rho;
257 |         inline for (0..defs.dim) |d| {
258 |             lbm_arr.u[d][idx] = u[d];
259 |         }
260 |         if (reset_forces) {
261 |             inline for (0..defs.dim) |d| {
262 |                 lbm_arr.force_ibm[d][idx] = 0;
263 |             }
264 |         }
265 |     }
266 | }
267 | 
268 | pub fn allocate_arrs(allocator: *const Allocator) !LBMArrays {
269 |     const popA: []f32 = try allocator.alloc(f32, defs.n_nodes * defs.n_pop);
270 |     const popB: []f32 = try allocator.alloc(f32, defs.n_nodes * defs.n_pop);
271 |     const rho: []f32 = try allocator.alloc(f32, defs.n_nodes);
272 |     var u: [defs.dim][]f32 = undefined;
273 |     var force_ibm: [defs.dim][]f32 = undefined;
274 |     inline for (0..defs.dim) |d| {
275 |         u[d] = try allocator.alloc(f32, defs.n_nodes);
276 |         force_ibm[d] = try allocator.alloc(f32, defs.n_nodes);
277 |     }
278 | 
279 |     return LBMArrays{ .popA = popA, .popB = popB, .rho = rho, .u = u, .force_ibm = force_ibm };
280 | }
281 | 


--------------------------------------------------------------------------------
/src/cl.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | const info = std.log.info;
  3 | 
  4 | const c = @cImport({
  5 |     @cDefine("CL_TARGET_OPENCL_VERSION", "110");
  6 |     @cInclude("CL/cl.h");
  7 | });
  8 | 
  9 | const CLError = error{
 10 |     GetPlatformsFailed,
 11 |     GetPlatformInfoFailed,
 12 |     NoPlatformsFound,
 13 |     GetDevicesFailed,
 14 |     GetDeviceInfoFailed,
 15 |     NoDevicesFound,
 16 |     CreateContextFailed,
 17 |     CreateCommandQueueFailed,
 18 |     CreateProgramFailed,
 19 |     BuildProgramFailed,
 20 |     FreeProgramFailed,
 21 |     CreateKernelFailed,
 22 |     FreeKernelFailed,
 23 |     SetKernelArgFailed,
 24 |     EnqueueNDRangeKernel,
 25 |     CreateBufferFailed,
 26 |     EnqueueWriteBufferFailed,
 27 |     EnqueueReadBufferFailed,
 28 | };
 29 | 
 30 | pub fn cl_get_device() CLError!c.cl_device_id {
 31 |     var platform_ids: [16]c.cl_platform_id = undefined;
 32 |     var platform_count: c.cl_uint = undefined;
 33 |     if (c.clGetPlatformIDs(platform_ids.len, &platform_ids, &platform_count) != c.CL_SUCCESS) {
 34 |         return CLError.GetPlatformsFailed;
 35 |     }
 36 |     info("{} cl platform(s) found:", .{@as(u32, platform_count)});
 37 | 
 38 |     for (platform_ids[0..platform_count], 0..) |id, i| {
 39 |         var name: [1024]u8 = undefined;
 40 |         var name_len: usize = undefined;
 41 |         if (c.clGetPlatformInfo(id, c.CL_PLATFORM_NAME, name.len, &name, &name_len) != c.CL_SUCCESS) {
 42 |             return CLError.GetPlatformInfoFailed;
 43 |         }
 44 |         info("  platform {}: {s}", .{ i, name[0..name_len] });
 45 |     }
 46 | 
 47 |     if (platform_count == 0) {
 48 |         return CLError.NoPlatformsFound;
 49 |     }
 50 | 
 51 |     info("choosing platform 0...", .{});
 52 | 
 53 |     var device_ids: [16]c.cl_device_id = undefined;
 54 |     var device_count: c.cl_uint = undefined;
 55 |     if (c.clGetDeviceIDs(platform_ids[0], c.CL_DEVICE_TYPE_ALL, device_ids.len, &device_ids, &device_count) != c.CL_SUCCESS) {
 56 |         return CLError.GetDevicesFailed;
 57 |     }
 58 |     info("{} cl device(s) found on platform 0:", .{@as(u32, device_count)});
 59 | 
 60 |     for (device_ids[0..device_count], 0..) |id, i| {
 61 |         var name: [1024]u8 = undefined;
 62 |         var name_len: usize = undefined;
 63 |         if (c.clGetDeviceInfo(id, c.CL_DEVICE_NAME, name.len, &name, &name_len) != c.CL_SUCCESS) {
 64 |             return CLError.GetDeviceInfoFailed;
 65 |         }
 66 |         info("  device {}: {s}", .{ i, name[0..name_len] });
 67 |     }
 68 | 
 69 |     if (device_count == 0) {
 70 |         return CLError.NoDevicesFound;
 71 |     }
 72 | 
 73 |     info("choosing device 0...", .{});
 74 | 
 75 |     return device_ids[0];
 76 | }
 77 | 
 78 | pub const CLBuffer = struct {
 79 |     const Self = @This();
 80 | 
 81 |     ctx: c.cl_context,
 82 |     d_buff: c.cl_mem,
 83 |     size: usize,
 84 | 
 85 |     pub fn init(size: usize, ctx: c.cl_context) CLError!CLBuffer {
 86 |         const input_buffer = c.clCreateBuffer(ctx, c.CL_MEM_READ_WRITE, size, null, null);
 87 |         if (input_buffer == null) {
 88 |             return CLError.CreateBufferFailed;
 89 |         }
 90 |         return .{ .ctx = ctx, .d_buff = input_buffer.?, .size = size };
 91 |     }
 92 | 
 93 |     pub fn free(self: Self) void {
 94 |         if (c.clReleaseMemObject(self.d_buff) != c.CL_SUCCESS) {
 95 |             std.log.err("Error on buffer free. {any}", .{self});
 96 |         }
 97 |     }
 98 | 
 99 |     pub fn read(self: Self, h_buff: ?*anyopaque, cmd_queue: CLQueue) CLError!void {
100 |         // Fill input buffer
101 |         if (c.clEnqueueReadBuffer(cmd_queue.queue, self.d_buff, c.CL_TRUE, 0, self.size, h_buff, 0, null, null) != c.CL_SUCCESS) {
102 |             return CLError.EnqueueReadBufferFailed;
103 |         }
104 |     }
105 | 
106 |     pub fn write(self: Self, h_buff: ?*const anyopaque, cmd_queue: CLQueue) CLError!void {
107 |         // Fill input buffer
108 |         if (c.clEnqueueWriteBuffer(cmd_queue.queue, self.d_buff, c.CL_TRUE, 0, self.size, h_buff, 0, null, null) != c.CL_SUCCESS) {
109 |             return CLError.EnqueueWriteBufferFailed;
110 |         }
111 |     }
112 | };
113 | 
114 | pub const CLQueue = struct {
115 |     const Self = @This();
116 | 
117 |     queue: c.cl_command_queue,
118 | 
119 |     pub fn init(ctx: c.cl_context, device: c.cl_device_id) CLError!Self {
120 |         const command_queue = c.clCreateCommandQueue(ctx, device, 0, null); // future: last arg is error code
121 |         if (command_queue == null) {
122 |             return CLError.CreateCommandQueueFailed;
123 |         }
124 |         return .{ .queue = command_queue };
125 |     }
126 | 
127 |     pub fn free(self: Self) void {
128 |         _ = c.clFlush(self.queue);
129 |         _ = c.clFinish(self.queue);
130 |         _ = c.clReleaseCommandQueue(self.queue);
131 |     }
132 | };
133 | 
134 | test "test OpenCL memory buffer" {
135 |     const device = try cl_get_device();
136 |     const ctx = c.clCreateContext(null, 1, &device, null, null, null); // future: last arg is error code
137 |     if (ctx == null) {
138 |         return CLError.CreateContextFailed;
139 |     }
140 |     defer _ = c.clReleaseContext(ctx);
141 | 
142 |     const hbuff_write: [3]u8 = .{ 1, 2, 3 };
143 |     var hbuff_read: [3]u8 = undefined;
144 | 
145 |     const queue = try CLQueue.init(ctx, device);
146 |     var dbuff = try CLBuffer.init(3, ctx);
147 |     defer {
148 |         dbuff.free();
149 |         queue.free();
150 |     }
151 | 
152 |     try dbuff.write(&hbuff_write, queue);
153 |     try dbuff.read(&hbuff_read, queue);
154 |     _ = c.clFlush(queue.queue);
155 |     _ = c.clFinish(queue.queue);
156 | 
157 |     try std.testing.expectEqual(1, hbuff_read[0]);
158 |     try std.testing.expectEqual(2, hbuff_read[1]);
159 |     try std.testing.expectEqual(3, hbuff_read[2]);
160 | }
161 | 
162 | pub const CLProgram = struct {
163 |     const Self = @This();
164 | 
165 |     program: c.cl_program,
166 | 
167 |     pub fn init(ctx: c.cl_context, device: c.cl_device_id, program_src_c: []const u8) CLError!Self {
168 |         const program = c.clCreateProgramWithSource(ctx, 1, @ptrCast(@constCast(&program_src_c.ptr)), null, null); // future: last arg is error code
169 |         if (program == null) {
170 |             return CLError.CreateProgramFailed;
171 |         }
172 |         if (c.clBuildProgram(program, 1, &device, null, null, null) != c.CL_SUCCESS) {
173 |             return CLError.BuildProgramFailed;
174 |         }
175 |         return .{ .program = program };
176 |     }
177 | 
178 |     pub fn free(self: Self) void {
179 |         if (c.clReleaseProgram(self.program) != c.CL_SUCCESS) {
180 |             std.log.err("Error on program free. {any}", .{self});
181 |         }
182 |     }
183 | };
184 | 
185 | test "test OpenCL program" {
186 |     const program_src =
187 |         \\__kernel void square_array(__global int* input_array, __global int* output_array) {
188 |         \\    int i = get_global_id(0);
189 |         \\    int value = input_array[i];
190 |         \\    output_array[i] = value * value;
191 |         \\}
192 |     ;
193 | 
194 |     const device = try cl_get_device();
195 |     const ctx = c.clCreateContext(null, 1, &device, null, null, null); // future: last arg is error code
196 |     if (ctx == null) {
197 |         return CLError.CreateContextFailed;
198 |     }
199 |     defer _ = c.clReleaseContext(ctx);
200 | 
201 |     const program = try CLProgram.init(ctx, device, program_src);
202 |     program.free();
203 | }
204 | 
205 | pub const CLKernel = struct {
206 |     const Self = @This();
207 | 
208 |     kernel: c.cl_kernel,
209 | 
210 |     pub fn init(program: CLProgram, kernel_name: []const u8) CLError!CLKernel {
211 |         const kernel = c.clCreateKernel(program.program, @ptrCast(kernel_name), null);
212 |         if (kernel == null) {
213 |             return CLError.CreateKernelFailed;
214 |         }
215 |         return .{ .kernel = kernel };
216 |     }
217 | 
218 |     pub fn free(self: Self) void {
219 |         if (c.clReleaseKernel(self.kernel) != c.CL_SUCCESS) {
220 |             std.log.err("Error on kernel free. {any}", .{self});
221 |         }
222 |     }
223 | };
224 | 
225 | pub const CLKernelCall = struct {
226 |     const Self = @This();
227 |     const ArgType = union(enum) {
228 |         int: i32,
229 |         float: f32,
230 |         buffer: CLBuffer,
231 |     };
232 | 
233 |     kernel: CLKernel,
234 |     queue: CLQueue,
235 | 
236 |     args: []ArgType,
237 |     work_dim: u32,
238 |     global_work_size: [3]usize,
239 |     local_work_size: [3]usize,
240 | 
241 |     pub fn call(self: Self) CLError!void {
242 |         for (self.args, 0..) |arg, i| {
243 |             switch (arg) {
244 |                 .int => |v| {
245 |                     if (c.clSetKernelArg(self.kernel.kernel, @intCast(i), @sizeOf(i32), &v) != c.CL_SUCCESS) {
246 |                         return CLError.SetKernelArgFailed;
247 |                     }
248 |                 },
249 |                 .float => |v| {
250 |                     if (c.clSetKernelArg(self.kernel.kernel, @intCast(i), @sizeOf(f32), &v) != c.CL_SUCCESS) {
251 |                         return CLError.SetKernelArgFailed;
252 |                     }
253 |                 },
254 |                 .buffer => |v| {
255 |                     if (c.clSetKernelArg(self.kernel.kernel, @intCast(i), @sizeOf(c.cl_mem), @ptrCast(&v.d_buff)) != c.CL_SUCCESS) {
256 |                         return CLError.SetKernelArgFailed;
257 |                     }
258 |                 },
259 |             }
260 |         }
261 |         if (c.clEnqueueNDRangeKernel(self.queue.queue, self.kernel.kernel, self.work_dim, null, &self.global_work_size, &self.local_work_size, 0, null, null) != c.CL_SUCCESS) {
262 |             return CLError.EnqueueNDRangeKernel;
263 |         }
264 |     }
265 | };
266 | 
267 | test "test OpenCL kernel" {
268 |     const program_src =
269 |         \\__kernel void square_array(__global int* input_array, __global int* output_array) {
270 |         \\    int i = get_global_id(0);
271 |         \\    int value = input_array[i];
272 |         \\    output_array[i] = value * value;
273 |         \\}
274 |     ;
275 | 
276 |     const device = try cl_get_device();
277 |     const ctx = c.clCreateContext(null, 1, &device, null, null, null); // future: last arg is error code
278 |     if (ctx == null) {
279 |         return CLError.CreateContextFailed;
280 |     }
281 |     defer _ = c.clReleaseContext(ctx);
282 | 
283 |     const program = try CLProgram.init(ctx, device, program_src);
284 |     defer program.free();
285 |     const kernel = try CLKernel.init(program, "square_array");
286 |     defer kernel.free();
287 | }
288 | 
289 | test "test OpenCL kernel call" {
290 |     const program_src =
291 |         \\__kernel void square_array(__global int* input_array, __global int* output_array) {
292 |         \\    int i = get_global_id(0);
293 |         \\    int value = input_array[i];
294 |         \\    output_array[i] = value * value;
295 |         \\}
296 |     ;
297 | 
298 |     const device = try cl_get_device();
299 |     const ctx = c.clCreateContext(null, 1, &device, null, null, null); // future: last arg is error code
300 |     if (ctx == null) {
301 |         return CLError.CreateContextFailed;
302 |     }
303 |     defer _ = c.clReleaseContext(ctx);
304 |     const queue = try CLQueue.init(ctx, device);
305 | 
306 |     const program = try CLProgram.init(ctx, device, program_src);
307 |     defer program.free();
308 |     const kernel = try CLKernel.init(program, "square_array");
309 |     defer kernel.free();
310 | 
311 |     // Create buffers
312 |     var input_array = init: {
313 |         var init_value: [1024]i32 = undefined;
314 |         for (0..1024) |i| {
315 |             init_value[i] = @as(i32, @intCast(i));
316 |         }
317 |         break :init init_value;
318 |     };
319 |     const input_buffer = try CLBuffer.init(1024 * @sizeOf(i32), ctx);
320 |     defer input_buffer.free();
321 |     try input_buffer.write(&input_array, queue);
322 |     const output_buffer = try CLBuffer.init(1024 * @sizeOf(i32), ctx);
323 |     defer output_buffer.free();
324 | 
325 |     const ArgType = CLKernelCall.ArgType;
326 |     const args: [2]ArgType = .{
327 |         ArgType{ .buffer = input_buffer },
328 |         ArgType{ .buffer = output_buffer },
329 |     };
330 | 
331 |     const kernel_call: CLKernelCall = .{
332 |         .kernel = kernel,
333 |         .queue = queue,
334 |         .args = @ptrCast(@constCast(&args)),
335 |         .global_work_size = .{ input_array.len, 0, 0 },
336 |         .local_work_size = .{ 64, 0, 0 },
337 |         .work_dim = 1,
338 |     };
339 |     try kernel_call.call();
340 | 
341 |     var output_array: [1024]i32 = undefined;
342 |     try output_buffer.read(&output_array, queue);
343 |     for (output_array, 0..) |val, i| {
344 |         if (i % 100 == 0) {
345 |             try std.testing.expect(val == (i * i));
346 |             info("{} ^ 2 = {}", .{ i, val });
347 |         }
348 |     }
349 | }
350 | 


--------------------------------------------------------------------------------