├── .gitignore ├── src ├── rtnetlink │ ├── address.zig │ ├── utils.zig │ ├── route.zig │ ├── link.zig │ ├── link │ │ ├── del.zig │ │ ├── add.zig │ │ ├── set.zig │ │ ├── attrs.zig │ │ ├── get.zig │ │ ├── attrs │ │ │ └── link_info.zig │ │ └── link.zig │ ├── route │ │ ├── add.zig │ │ ├── attrs.zig │ │ ├── get.zig │ │ └── route.zig │ ├── address │ │ ├── add.zig │ │ ├── attrs.zig │ │ └── address.zig │ └── rtnetlink.zig ├── ip.zig ├── fs.zig ├── utils.zig ├── main.zig ├── ps.zig ├── cgroup.zig ├── args.zig ├── container.zig └── net.zig ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | zig-* 2 | .zig-cache 3 | -------------------------------------------------------------------------------- /src/rtnetlink/address.zig: -------------------------------------------------------------------------------- 1 | pub const AddrAdd = @import("address/add.zig"); 2 | -------------------------------------------------------------------------------- /src/rtnetlink/utils.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub fn nalign(value: usize) usize { 4 | return std.mem.alignForward(usize, value, 4); 5 | } 6 | -------------------------------------------------------------------------------- /src/rtnetlink/route.zig: -------------------------------------------------------------------------------- 1 | pub const RouteAdd = @import("route/add.zig"); 2 | pub const RouteGet = @import("route/get.zig"); 3 | pub const RouteMessage = @import("route/route.zig"); 4 | -------------------------------------------------------------------------------- /src/ip.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const log = std.log; 3 | 4 | // TODO: make sure that the ip address is not used 5 | pub fn getRandomIpv4Addr() [4]u8 { 6 | const num = std.crypto.random.int(u8); 7 | return .{ 10, 0, 0, num }; 8 | } 9 | -------------------------------------------------------------------------------- /src/rtnetlink/link.zig: -------------------------------------------------------------------------------- 1 | pub const LinkAdd = @import("link/add.zig"); 2 | pub const LinkGet = @import("link/get.zig"); 3 | pub const LinkSet = @import("link/set.zig"); 4 | pub const LinkDelete = @import("link/del.zig"); 5 | pub const LinkMessage = @import("link/link.zig"); 6 | -------------------------------------------------------------------------------- /src/rtnetlink/link/del.zig: -------------------------------------------------------------------------------- 1 | const LinkMessage = @import("link.zig"); 2 | const RtNetLink = @import("../rtnetlink.zig"); 3 | const std = @import("std"); 4 | const linux = std.os.linux; 5 | 6 | const LinkDel = @This(); 7 | 8 | msg: LinkMessage, 9 | nl: *RtNetLink, 10 | 11 | pub fn init(allocator: std.mem.Allocator, nl: *RtNetLink, index: c_int) LinkDel { 12 | var msg = LinkMessage.init(allocator, .delete); 13 | msg.msg.header.index = index; 14 | return LinkDel{ .msg = msg, .nl = nl }; 15 | } 16 | 17 | pub fn exec(self: *LinkDel) !void { 18 | const data = try self.msg.compose(); 19 | defer self.msg.allocator.free(data); 20 | 21 | try self.nl.send(data); 22 | return self.nl.recv_ack(); 23 | } 24 | -------------------------------------------------------------------------------- /src/fs.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const linux = std.os.linux; 3 | const checkErr = @import("utils.zig").checkErr; 4 | 5 | rootfs: []const u8, 6 | 7 | const Fs = @This(); 8 | 9 | pub fn init(rootfs: []const u8) Fs { 10 | return .{ .rootfs = rootfs }; 11 | } 12 | 13 | pub fn setup(self: *Fs) !void { 14 | try checkErr(linux.chroot(@ptrCast(self.rootfs)), error.Chroot); 15 | try checkErr(linux.chdir("/"), error.Chdir); 16 | 17 | // TODO: mount more filesystems 18 | // from list: https://github.com/opencontainers/runtime-spec/blob/main/config-linux.md 19 | try checkErr(linux.mount("proc", "proc", "proc", 0, 0), error.MountProc); 20 | try checkErr(linux.mount("tmpfs", "tmp", "tmpfs", 0, 0), error.MountTmpFs); 21 | // ignore sysfs mount error since it can fail when 22 | // executed in a new user namespace 23 | _ = linux.mount("sysfs", "sys", "sysfs", 0, 0); 24 | } 25 | -------------------------------------------------------------------------------- /src/rtnetlink/route/add.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const NetLink = @import("../rtnetlink.zig"); 3 | const RouteMessage = @import("route.zig"); 4 | 5 | pub const Options = struct { 6 | gateway: ?[4]u8 = null, 7 | }; 8 | 9 | const Add = @This(); 10 | msg: RouteMessage, 11 | nl: *NetLink, 12 | opts: Options, 13 | 14 | pub fn init(allocator: std.mem.Allocator, nl: *NetLink, opts: Options) Add { 15 | var msg = RouteMessage.init(allocator, .create); 16 | 17 | msg.msg.hdr.protocol = .Static; 18 | msg.msg.hdr.type = .Unicast; 19 | 20 | return .{ 21 | .msg = msg, 22 | .opts = opts, 23 | .nl = nl, 24 | }; 25 | } 26 | 27 | fn applyOptions(self: *Add) !void { 28 | if (self.opts.gateway) |addr| { 29 | try self.msg.addAttr(.{ .gateway = addr }); 30 | } 31 | } 32 | 33 | pub fn exec(self: *Add) !void { 34 | try self.applyOptions(); 35 | 36 | const data = try self.msg.compose(); 37 | defer self.msg.allocator.free(data); 38 | 39 | try self.nl.send(data); 40 | try self.nl.recv_ack(); 41 | } 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zcrun 2 | a simple linux container runtime built with zig 3 | 4 | 5 | # Features 6 | - namespaces: 7 | - isolate user, network, pid, mount, ipc, and uts namespace 8 | - cgroups: 9 | - support cgroups v2 10 | - limit memory, cpu, or pids (# of procs). 11 | - internet access inside containers using SNAT 12 | 13 | # Usage 14 | > [!NOTE] 15 | > make sure that ip forwarding is enabled to be able to access the internet inside containers. 16 | > run `sysctl net.ipv4.ip_forward` to check if it is enabled. 17 | > if not, run `sudo sysctl -w net.ipv4.ip_forward=1` to enable it. 18 | 19 | > [!Important] 20 | > zcrun must be run as root 21 | 22 | ```sh 23 | $ mkdir rootfs 24 | # export container rootfs dir using docker 25 | $ docker export $(docker create busybox) | tar -C rootfs -xvf - 26 | # run the container using zcrun 27 | # zcrun run [-mem] [-cpu] [-pids] 28 | $ zcrun run busybox rootfs sh 29 | ``` 30 | 31 | # Dependencies: 32 | - The `iptables` command. 33 | - Zig. This branch was tested using version `0.12.0-dev.3191+9cf28d1e9`. 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 nwf 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/utils.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const linux = std.os.linux; 3 | 4 | pub const CGROUP_PATH = "/sys/fs/cgroup/"; 5 | pub const INFO_PATH = "/var/run/zcrun/containers/"; 6 | pub const NETNS_PATH = INFO_PATH ++ "netns/"; 7 | pub const BRIDGE_NAME = "zcrun0"; 8 | 9 | pub fn checkErr(val: usize, err: anyerror) !void { 10 | const e = std.posix.errno(val); 11 | // we ignore busy errors here because this fn is used 12 | // to check the error of mount sycalls. 13 | // busy is returned when the fs being mounted is currently in use 14 | // which means that it was previously maounted 15 | if (e != .SUCCESS and e != .BUSY) { 16 | std.log.err("err: {}", .{e}); 17 | return err; 18 | } 19 | } 20 | 21 | pub fn createDirIfNotExists(path: []const u8) !bool { 22 | std.fs.makeDirAbsolute(path) catch |e| { 23 | return switch (e) { 24 | error.PathAlreadyExists => false, 25 | else => e, 26 | }; 27 | }; 28 | return true; 29 | } 30 | 31 | pub fn createFileIfNotExists(path: []const u8) !bool { 32 | const f = std.fs.createFileAbsolute(path, .{ .exclusive = true }) catch |e| { 33 | return switch (e) { 34 | error.PathAlreadyExists => false, 35 | else => e, 36 | }; 37 | }; 38 | f.close(); 39 | return true; 40 | } 41 | -------------------------------------------------------------------------------- /src/rtnetlink/address/add.zig: -------------------------------------------------------------------------------- 1 | const Addr = @import("address.zig"); 2 | const RtNetLink = @import("../rtnetlink.zig"); 3 | const std = @import("std"); 4 | const linux = std.os.linux; 5 | 6 | const AddrAdd = @This(); 7 | 8 | pub const Options = struct { 9 | index: c_int, 10 | addr: [4]u8, 11 | prefix_len: u8, 12 | }; 13 | 14 | msg: Addr, 15 | nl: *RtNetLink, 16 | opts: Options, 17 | 18 | pub fn init(allocator: std.mem.Allocator, nl: *RtNetLink, options: Options) AddrAdd { 19 | const msg = Addr.init(allocator, .create); 20 | return .{ .msg = msg, .nl = nl, .opts = options }; 21 | } 22 | 23 | fn applyOptions(self: *AddrAdd) !void { 24 | self.msg.msg.hdr.index = @intCast(self.opts.index); 25 | self.msg.msg.hdr.prefix_len = self.opts.prefix_len; 26 | try self.msg.addAttr(.{ .address = self.opts.addr }); 27 | try self.msg.addAttr(.{ .local = self.opts.addr }); 28 | 29 | if (self.opts.prefix_len == 32) { 30 | try self.msg.addAttr(.{ .broadcast = self.opts.addr }); 31 | } else { 32 | const brd = (@as(u32, 0xffff_ffff) >> @intCast(self.opts.prefix_len)) | std.mem.bytesAsValue(u32, &self.opts.addr).*; 33 | try self.msg.addAttr(.{ .broadcast = std.mem.toBytes(brd) }); 34 | } 35 | } 36 | 37 | pub fn exec(self: *AddrAdd) !void { 38 | try self.applyOptions(); 39 | 40 | const data = try self.msg.compose(); 41 | defer self.msg.allocator.free(data); 42 | 43 | try self.nl.send(data); 44 | return self.nl.recv_ack(); 45 | } 46 | -------------------------------------------------------------------------------- /src/main.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const log = std.log; 3 | const linux = std.os.linux; 4 | const Container = @import("container.zig"); 5 | const args = @import("args.zig"); 6 | const ps = @import("ps.zig"); 7 | const utils = @import("utils.zig"); 8 | 9 | pub fn main() !void { 10 | var arena_allocator = std.heap.ArenaAllocator.init(std.heap.page_allocator); 11 | const allocator = arena_allocator.allocator(); 12 | const cmd = try args.parseArgs(allocator); 13 | 14 | switch (cmd) { 15 | .run => |r| { 16 | try zcrunInit(); 17 | var container = try Container.init(r, allocator); 18 | defer container.deinit(); 19 | try container.run(); 20 | }, 21 | .help => { 22 | _ = try std.io.getStdOut().write(args.help); 23 | }, 24 | .ps => { 25 | const containers = try ps.runningContainers(allocator); 26 | var stdout = std.io.getStdOut().writer(); 27 | _ = try stdout.print("Running Containers:\n", .{}); 28 | for (containers) |c| { 29 | try c.print(stdout); 30 | } 31 | }, 32 | } 33 | } 34 | 35 | pub fn zcrunInit() !void { 36 | _ = try utils.createDirIfNotExists("/var/run/zcrun"); 37 | _ = try utils.createDirIfNotExists("/var/run/zcrun/containers"); 38 | _ = try utils.createDirIfNotExists("/var/run/zcrun/containers/netns"); 39 | const path = utils.CGROUP_PATH ++ "zcrun/"; 40 | if (!try utils.createDirIfNotExists(path)) return; 41 | 42 | // setup root cgroup 43 | const root_cgroup = path ++ "cgroup.subtree_control"; 44 | var root_cgroup_file = try std.fs.openFileAbsolute(root_cgroup, .{ .mode = .write_only }); 45 | defer root_cgroup_file.close(); 46 | _ = try root_cgroup_file.write("+cpu +memory +pids"); // enable cpu, mem, and pid controllers in the root cgroup 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/rtnetlink/link/add.zig: -------------------------------------------------------------------------------- 1 | const LinkMessage = @import("link.zig"); 2 | const RtNetLink = @import("../rtnetlink.zig"); 3 | const std = @import("std"); 4 | const linux = std.os.linux; 5 | 6 | const LinkAdd = @This(); 7 | 8 | pub const Options = struct { 9 | name: ?[]const u8 = null, 10 | veth: ?struct { []const u8, []const u8 } = null, 11 | bridge: ?[]const u8 = null, 12 | }; 13 | 14 | msg: LinkMessage, 15 | nl: *RtNetLink, 16 | opts: Options, 17 | 18 | pub fn init(allocator: std.mem.Allocator, nl: *RtNetLink, options: Options) LinkAdd { 19 | const msg = LinkMessage.init(allocator, .create); 20 | return LinkAdd{ .msg = msg, .nl = nl, .opts = options }; 21 | } 22 | 23 | fn name(self: *LinkAdd, val: []const u8) !void { 24 | try self.msg.addAttr(.{ .name = val }); 25 | } 26 | 27 | fn veth(self: *LinkAdd, if_name: []const u8, peer_name: []const u8) !void { 28 | try self.name(if_name); 29 | 30 | var peer_info = LinkMessage.LinkInfo.init(self.msg.allocator); 31 | try peer_info.attrs.append(.{ .name = peer_name }); 32 | 33 | try self.msg.addAttr(.{ 34 | .link_info = .{ .info = .{ .peer_info = peer_info }, .kind = .veth }, 35 | }); 36 | } 37 | 38 | fn bridge(self: *LinkAdd, br_name: []const u8) !void { 39 | try self.msg.addAttr(.{ .link_info = .{ .kind = .bridge } }); 40 | try self.msg.addAttr(.{ .name = br_name }); 41 | } 42 | 43 | fn applyOptions(self: *LinkAdd) !void { 44 | if (self.opts.name) |val| { 45 | try self.name(val); 46 | } 47 | if (self.opts.veth) |val| { 48 | try self.veth(val[0], val[1]); 49 | } 50 | if (self.opts.bridge) |val| { 51 | try self.bridge(val); 52 | } 53 | } 54 | 55 | pub fn exec(self: *LinkAdd) !void { 56 | try self.applyOptions(); 57 | 58 | const data = try self.msg.compose(); 59 | defer self.msg.allocator.free(data); 60 | 61 | try self.nl.send(data); 62 | return self.nl.recv_ack(); 63 | } 64 | -------------------------------------------------------------------------------- /src/rtnetlink/address/attrs.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const nalign = @import("../utils.zig").nalign; 3 | const linux = std.os.linux; 4 | const c = @cImport(@cInclude("linux/rtnetlink.h")); 5 | 6 | const IFA_ADDRESS: linux.IFLA = @enumFromInt(1); 7 | const IFA_LOCAL: linux.IFLA = @enumFromInt(2); 8 | const IFA_LABEL: linux.IFLA = @enumFromInt(3); 9 | const IFA_BROADCAST: linux.IFLA = @enumFromInt(4); 10 | const IFA_ANYCAST: linux.IFLA = @enumFromInt(5); 11 | const IFA_CACHEINFO: linux.IFLA = @enumFromInt(6); 12 | const IFA_MULTICAST: linux.IFLA = @enumFromInt(7); 13 | const IFA_FLAGS: linux.IFLA = @enumFromInt(8); 14 | 15 | // TODO: support IPv6 16 | pub const AddressAttr = union(enum) { 17 | address: [4]u8, 18 | local: [4]u8, 19 | broadcast: [4]u8, 20 | 21 | fn getAttr(self: AddressAttr) linux.rtattr { 22 | var attr: linux.rtattr = switch (self) { 23 | .address => |val| .{ .len = @intCast(val.len), .type = IFA_ADDRESS }, 24 | .local => |val| .{ .len = @intCast(val.len), .type = IFA_LOCAL }, 25 | .broadcast => |val| .{ .len = @intCast(val.len), .type = IFA_BROADCAST }, 26 | }; 27 | 28 | attr.len = @intCast(nalign(attr.len + @sizeOf(linux.rtattr))); 29 | return attr; 30 | } 31 | 32 | pub fn size(self: AddressAttr) usize { 33 | const len = switch (self) { 34 | inline else => |val| val.len, 35 | }; 36 | return nalign(len + @sizeOf(linux.rtattr)); 37 | } 38 | 39 | pub fn encode(self: AddressAttr, buff: []u8) !usize { 40 | const header = self.getAttr(); 41 | @memcpy(buff[0..@sizeOf(linux.rtattr)], std.mem.asBytes(&header)); 42 | _ = try self.encodeVal(buff[@sizeOf(linux.rtattr)..]) + @sizeOf(linux.rtattr); 43 | return nalign(header.len); 44 | } 45 | 46 | inline fn encodeVal(self: AddressAttr, buff: []u8) !usize { 47 | return switch (self) { 48 | inline else => |val| { 49 | @memcpy(buff[0..val.len], &val); 50 | return val.len; 51 | }, 52 | }; 53 | } 54 | }; 55 | -------------------------------------------------------------------------------- /src/rtnetlink/link/set.zig: -------------------------------------------------------------------------------- 1 | const LinkMessage = @import("link.zig"); 2 | const RtNetLink = @import("../rtnetlink.zig"); 3 | const std = @import("std"); 4 | const linux = std.os.linux; 5 | 6 | const LinkSet = @This(); 7 | 8 | pub const Options = struct { 9 | index: c_int, 10 | name: ?[]const u8 = null, 11 | master: ?c_int = null, 12 | up: bool = false, 13 | down: bool = false, 14 | nomaster: bool = false, 15 | netns_fd: ?linux.fd_t = null, 16 | }; 17 | 18 | msg: LinkMessage, 19 | nl: *RtNetLink, 20 | opts: Options, 21 | pub fn init(allocator: std.mem.Allocator, nl: *RtNetLink, options: Options) LinkSet { 22 | var msg = LinkMessage.init(allocator, .set); 23 | msg.msg.header.index = options.index; 24 | return .{ .msg = msg, .nl = nl, .opts = options }; 25 | } 26 | 27 | fn up(self: *LinkSet) void { 28 | self.msg.msg.header.flags |= LinkMessage.Flags.UP; 29 | self.msg.msg.header.change |= LinkMessage.Flags.UP; 30 | } 31 | 32 | fn down(self: *LinkSet) void { 33 | self.msg.msg.header.flags &= ~LinkMessage.Flags.UP; 34 | self.msg.msg.header.change |= LinkMessage.Flags.UP; 35 | } 36 | 37 | fn name(self: *LinkSet, value: []const u8) !void { 38 | try self.msg.addAttr(.{ .name = value }); 39 | } 40 | 41 | fn master(self: *LinkSet, idx: c_int) !void { 42 | try self.msg.addAttr(.{ .master = @intCast(idx) }); 43 | } 44 | 45 | fn nomaster(self: *LinkSet) !void { 46 | try self.msg.addAttr(.{ .master = 0 }); 47 | } 48 | 49 | fn applyOptions(self: *LinkSet) !void { 50 | if (self.opts.up) { 51 | self.up(); 52 | } else if (self.opts.down) { 53 | self.down(); 54 | } 55 | 56 | if (self.opts.name) |val| { 57 | try self.name(val); 58 | } 59 | 60 | if (self.opts.master) |val| { 61 | try self.master(val); 62 | } else if (self.opts.nomaster) { 63 | try self.nomaster(); 64 | } 65 | 66 | if (self.opts.netns_fd) |fd| { 67 | try self.msg.addAttr(.{ .netns_fd = fd }); 68 | } 69 | } 70 | 71 | pub fn exec(self: *LinkSet) !void { 72 | try self.applyOptions(); 73 | 74 | const data = try self.msg.compose(); 75 | defer self.msg.allocator.free(data); 76 | 77 | try self.nl.send(data); 78 | return self.nl.recv_ack(); 79 | } 80 | -------------------------------------------------------------------------------- /src/ps.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const utils = @import("utils.zig"); 3 | 4 | pub const ContainerInfo = struct { 5 | name: []const u8, 6 | cmd: []const u8, 7 | 8 | pub fn print(self: ContainerInfo, writer: anytype) !void { 9 | try writer.print("{s}: {s}\n", .{ self.name, self.cmd }); 10 | } 11 | }; 12 | 13 | pub fn runningContainers(allocator: std.mem.Allocator) ![]ContainerInfo { 14 | const cgroup_path = utils.CGROUP_PATH ++ "zcrun/"; 15 | var info = std.ArrayList(ContainerInfo).init(allocator); 16 | errdefer info.deinit(); 17 | 18 | var cgroup_dir = std.fs.openDirAbsolute(cgroup_path, .{ .iterate = true }) catch |e| { 19 | switch (e) { 20 | error.FileNotFound => return &.{}, 21 | else => return e, 22 | } 23 | }; 24 | defer cgroup_dir.close(); 25 | 26 | var iter = cgroup_dir.iterate(); 27 | 28 | while (try iter.next()) |val| { 29 | if (val.kind != .directory) continue; 30 | const c = (try getContainerInfo(allocator, val.name)) orelse continue; 31 | try info.append(c); 32 | } 33 | 34 | return info.toOwnedSlice(); 35 | } 36 | 37 | fn getContainerInfo(allocator: std.mem.Allocator, name: []const u8) !?ContainerInfo { 38 | const procs_path = try std.mem.concat(allocator, u8, &.{ utils.CGROUP_PATH, "zcrun/", name, "/cgroup.procs" }); 39 | defer allocator.free(procs_path); 40 | 41 | const procs_file = try std.fs.openFileAbsolute(procs_path, .{}); 42 | defer procs_file.close(); 43 | 44 | const procs = try procs_file.reader().readAllAlloc(allocator, std.math.maxInt(u8)); 45 | var iter = std.mem.splitBackwardsScalar(u8, procs, '\n'); 46 | _ = iter.next(); // skip empty line 47 | const running_proc = iter.next() orelse return null; 48 | 49 | const proc_exe = try std.mem.concat(allocator, u8, &.{ "/proc/", running_proc, "/exe" }); 50 | var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; 51 | 52 | // TODO: strip container rootfs path from cmd 53 | const cmd = try std.fs.readLinkAbsolute(proc_exe, &buffer); 54 | var cmd_name = try allocator.alloc(u8, cmd.len); 55 | @memcpy(cmd_name[0..cmd.len], cmd); 56 | 57 | return .{ 58 | .name = name, 59 | .cmd = cmd_name, 60 | }; 61 | } 62 | -------------------------------------------------------------------------------- /src/rtnetlink/link/attrs.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const linux = std.os.linux; 3 | const link = @import("link.zig"); 4 | const nalign = @import("../utils.zig").nalign; 5 | 6 | const LinkInfoAttr = @import("attrs/link_info.zig").LinkInfoAttr; 7 | 8 | pub const LinkAttribute = union(enum) { 9 | name: []const u8, 10 | master: u32, 11 | link_info: LinkInfoAttr, 12 | netns_fd: linux.fd_t, 13 | 14 | pub fn size(self: LinkAttribute) usize { 15 | const val_len = switch (self) { 16 | .name => |val| val.len + 1, 17 | .link_info => |val| val.size(), 18 | .master, .netns_fd => 4, 19 | }; 20 | 21 | return nalign(val_len + @sizeOf(linux.rtattr)); 22 | } 23 | 24 | fn getAttr(self: LinkAttribute) linux.rtattr { 25 | var attr: linux.rtattr = switch (self) { 26 | .name => |val| .{ .len = @intCast(val.len + 1), .type = .IFNAME }, 27 | .link_info => |val| .{ .len = @intCast(val.size()), .type = .LINKINFO }, 28 | .master => .{ .len = 4, .type = .MASTER }, 29 | .netns_fd => .{ .len = 4, .type = .NET_NS_FD }, 30 | }; 31 | 32 | attr.len = @intCast(std.mem.alignForward(usize, attr.len + @sizeOf(linux.rtattr), 4)); 33 | return attr; 34 | } 35 | 36 | pub fn encode(self: LinkAttribute, buff: []u8) !usize { 37 | const header = self.getAttr(); 38 | @memcpy(buff[0..@sizeOf(linux.rtattr)], std.mem.asBytes(&header)); 39 | const len = try self.encodeVal(buff[@sizeOf(linux.rtattr)..]); 40 | return nalign(len + @sizeOf(linux.rtattr)); 41 | } 42 | 43 | inline fn encodeVal(self: LinkAttribute, buff: []u8) !usize { 44 | return switch (self) { 45 | .name => |val| { 46 | @memcpy(buff[0..val.len], val); 47 | buff[val.len] = 0; 48 | return val.len + 1; 49 | }, 50 | .link_info => |val| try val.encode(buff), 51 | .master => |val| { 52 | @memcpy(buff[0..4], std.mem.asBytes(&val)); 53 | return 4; 54 | }, 55 | .netns_fd => |val| { 56 | @memcpy(buff[0..4], std.mem.asBytes(&val)); 57 | return 4; 58 | }, 59 | }; 60 | } 61 | }; 62 | -------------------------------------------------------------------------------- /src/cgroup.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const linux = std.os.linux; 3 | const utils = @import("utils.zig"); 4 | const ResourceArgs = @import("args.zig").Resources; 5 | 6 | const Resource = enum { 7 | cpu, 8 | memory, 9 | pids, 10 | 11 | fn max(self: Resource) []const u8 { 12 | return switch (self) { 13 | inline else => |v| @tagName(v) ++ ".max", 14 | }; 15 | } 16 | }; 17 | 18 | /// container id 19 | cid: []const u8, 20 | options: ResourceArgs, 21 | allocator: std.mem.Allocator, 22 | 23 | const Cgroup = @This(); 24 | 25 | pub fn init(cid: []const u8, options: ResourceArgs, allocator: std.mem.Allocator) !Cgroup { 26 | var cgroups = Cgroup{ 27 | .cid = cid, 28 | .options = options, 29 | .allocator = allocator, 30 | }; 31 | try cgroups.initDirs(); 32 | try cgroups.applyResourceLimits(); 33 | return cgroups; 34 | } 35 | 36 | fn applyResourceLimits(self: *Cgroup) !void { 37 | if (self.options.mem) |val| { 38 | try self.setResourceMax(.memory, val); 39 | } 40 | 41 | if (self.options.cpu) |val| { 42 | try self.setResourceMax(.cpu, val); 43 | } 44 | 45 | if (self.options.pids) |val| { 46 | try self.setResourceMax(.pids, val); 47 | } 48 | } 49 | 50 | fn initDirs(self: *Cgroup) !void { 51 | const path = try std.mem.concat(self.allocator, u8, &.{ utils.CGROUP_PATH ++ "zcrun/", self.cid }); 52 | defer self.allocator.free(path); 53 | _ = try utils.createDirIfNotExists(path); 54 | } 55 | 56 | pub fn setResourceMax(self: *Cgroup, resource: Resource, limit: []const u8) !void { 57 | const path = try std.mem.concat(self.allocator, u8, &.{ utils.CGROUP_PATH, "zcrun/", self.cid, "/", resource.max() }); 58 | defer self.allocator.free(path); 59 | var file = try std.fs.openFileAbsolute(path, .{ .mode = .read_write }); 60 | defer file.close(); 61 | std.debug.assert(try file.write(limit) == limit.len); 62 | } 63 | 64 | pub fn enterCgroup(self: *Cgroup, pid: linux.pid_t) !void { 65 | const cgroup_path = try std.mem.concat(self.allocator, u8, &.{ utils.CGROUP_PATH, "zcrun/", self.cid, "/cgroup.procs" }); 66 | defer self.allocator.free(cgroup_path); 67 | const file = try std.fs.openFileAbsolute(cgroup_path, .{ .mode = .write_only }); 68 | defer file.close(); 69 | try file.writer().print("{}", .{pid}); 70 | } 71 | 72 | pub fn deinit(self: *Cgroup) !void { 73 | const path = try std.mem.concat(self.allocator, u8, &.{ utils.CGROUP_PATH ++ "zcrun/", self.cid }); 74 | defer self.allocator.free(path); 75 | try std.fs.deleteDirAbsolute(path); 76 | } 77 | -------------------------------------------------------------------------------- /src/rtnetlink/route/attrs.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const nalign = @import("../utils.zig").nalign; 3 | const c = @cImport(@cInclude("linux/rtnetlink.h")); 4 | 5 | comptime { 6 | std.debug.assert(@sizeOf(std.os.linux.rtattr) == @sizeOf(RtAttr)); 7 | } 8 | 9 | pub const RtAttr = packed struct { 10 | len: u16, 11 | type: AttrType, 12 | }; 13 | 14 | pub const AttrType = enum(u16) { 15 | Unspec = c.RTA_UNSPEC, 16 | Dst = c.RTA_DST, 17 | Src = c.RTA_SRC, 18 | Iif = c.RTA_IIF, 19 | Oif = c.RTA_OIF, 20 | Gateway = c.RTA_GATEWAY, 21 | Priority = c.RTA_PRIORITY, 22 | Prefsrc = c.RTA_PREFSRC, 23 | Metrics = c.RTA_METRICS, 24 | Multipath = c.RTA_MULTIPATH, 25 | Flow = c.RTA_FLOW, 26 | CacheInfo = c.RTA_CACHEINFO, 27 | Table = c.RTA_TABLE, 28 | Mark = c.RTA_MARK, 29 | Stats = c.RTA_MFC_STATS, 30 | Via = c.RTA_VIA, 31 | NewDst = c.RTA_NEWDST, 32 | Pref = c.RTA_PREF, 33 | Type = c.RTA_ENCAP_TYPE, 34 | Encap = c.RTA_ENCAP, 35 | Expires = c.RTA_EXPIRES, 36 | Pad = c.RTA_PAD, 37 | Uid = c.RTA_UID, 38 | Propagate = c.RTA_TTL_PROPAGATE, 39 | Proto = c.RTA_IP_PROTO, 40 | Sport = c.RTA_SPORT, 41 | Dport = c.RTA_DPORT, 42 | Id = c.RTA_NH_ID, 43 | }; 44 | // TODO: support IPv6 45 | pub const Attr = union(enum) { 46 | gateway: [4]u8, 47 | output_if: u32, 48 | 49 | fn getAttr(self: Attr) RtAttr { 50 | var attr: RtAttr = switch (self) { 51 | .gateway => |val| .{ .len = val.len, .type = .Gateway }, 52 | .output_if => .{ .len = 4, .type = .Oif }, 53 | }; 54 | 55 | attr.len = @intCast(nalign(attr.len + @sizeOf(RtAttr))); 56 | return attr; 57 | } 58 | 59 | pub fn size(self: Attr) usize { 60 | const len = switch (self) { 61 | .gateway => |val| val.len, 62 | .output_if => 4, 63 | }; 64 | return nalign(len + @sizeOf(RtAttr)); 65 | } 66 | 67 | pub fn encode(self: Attr, buff: []u8) !usize { 68 | const header = self.getAttr(); 69 | @memcpy(buff[0..@sizeOf(RtAttr)], std.mem.asBytes(&header)); 70 | _ = try self.encodeVal(buff[@sizeOf(RtAttr)..]); 71 | return nalign(header.len); 72 | } 73 | 74 | inline fn encodeVal(self: Attr, buff: []u8) !usize { 75 | return switch (self) { 76 | .gateway => |val| { 77 | @memcpy(buff[0..val.len], &val); 78 | return val.len; 79 | }, 80 | .output_if => |val| { 81 | @memcpy(buff[0..4], std.mem.asBytes(&val)); 82 | return 4; 83 | }, 84 | }; 85 | } 86 | }; 87 | -------------------------------------------------------------------------------- /src/args.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | inline fn eql(a: []const u8, b: []const u8) bool { 4 | return std.mem.eql(u8, a, b); 5 | } 6 | 7 | /// zcrun run 8 | pub const RunArgs = struct { 9 | name: []const u8, 10 | rootfs_path: []const u8, 11 | cmd: []const []const u8, 12 | resources: Resources, 13 | 14 | fn parse(allocator: std.mem.Allocator, args: *std.process.ArgIterator) !RunArgs { 15 | var run_args: RunArgs = .{ 16 | .resources = try Resources.parse(args), 17 | .name = args.next() orelse return error.MissingName, 18 | .rootfs_path = args.next() orelse return error.MissingRootfs, 19 | .cmd = undefined, 20 | }; 21 | 22 | var cmd = std.ArrayList([]const u8).init(allocator); 23 | 24 | while (args.next()) |val| { 25 | try cmd.append(val); 26 | } 27 | if (cmd.items.len == 0) return error.MissingCmd; 28 | 29 | run_args.cmd = try cmd.toOwnedSlice(); 30 | return run_args; 31 | } 32 | }; 33 | 34 | pub const Resources = struct { 35 | mem: ?[]const u8 = null, 36 | cpu: ?[]const u8 = null, 37 | pids: ?[]const u8 = null, 38 | fn parse(args: *std.process.ArgIterator) !Resources { 39 | var r = Resources{}; 40 | while (args.next()) |arg| { 41 | var found_option = false; 42 | inline for (comptime std.meta.fieldNames(Resources)) |field| { 43 | // options can be passed as "-m [val]" or "-mem [val]" 44 | if (eql(arg, "-" ++ field[0..1]) or eql(arg, "-" ++ field)) { 45 | found_option = true; 46 | @field(r, field) = args.next() orelse return error.MissingValue; 47 | } 48 | } 49 | 50 | if (found_option) continue; 51 | // resource opts not passed 52 | // reset iterator and return parsed values 53 | args.inner.index -= 1; 54 | break; 55 | } 56 | return r; 57 | } 58 | }; 59 | 60 | pub const Args = union(enum) { 61 | run: RunArgs, 62 | ps, 63 | help, 64 | }; 65 | 66 | pub const help = 67 | \\zcrun: linux container runtime 68 | \\ 69 | \\arguments: 70 | \\run [-mem] [-cpu] [-pids] 71 | \\ps 72 | \\help 73 | \\ 74 | ; 75 | 76 | pub fn parseArgs(allocator: std.mem.Allocator) !Args { 77 | var cli_args = try std.process.argsWithAllocator(allocator); 78 | _ = cli_args.next(); // skip first arg 79 | const cmd = cli_args.next() orelse return error.InvalidArgs; 80 | 81 | inline for (std.meta.fields(Args)) |f| { 82 | if (f.type != void and !@hasDecl(f.type, "parse")) @compileError("must define parse fn"); 83 | if (eql(cmd, f.name)) { 84 | if (f.type == void) { 85 | return @unionInit(Args, f.name, {}); 86 | } else { 87 | return @unionInit(Args, f.name, try f.type.parse(allocator, &cli_args)); 88 | } 89 | } 90 | } 91 | 92 | return error.InvalidArgs; 93 | } 94 | -------------------------------------------------------------------------------- /src/rtnetlink/link/get.zig: -------------------------------------------------------------------------------- 1 | const LinkMessage = @import("link.zig"); 2 | const RtNetLink = @import("../rtnetlink.zig"); 3 | const std = @import("std"); 4 | const log = std.log; 5 | const nalign = @import("../utils.zig").nalign; 6 | const linux = std.os.linux; 7 | 8 | const LinkGet = @This(); 9 | pub const Options = struct { 10 | name: ?[]const u8 = null, 11 | index: ?u32 = null, 12 | }; 13 | 14 | msg: LinkMessage, 15 | nl: *RtNetLink, 16 | opts: Options, 17 | allocator: std.mem.Allocator, 18 | pub fn init(allocator: std.mem.Allocator, nl: *RtNetLink, options: Options) LinkGet { 19 | const msg = LinkMessage.init(allocator, .get); 20 | return .{ 21 | .msg = msg, 22 | .nl = nl, 23 | .opts = options, 24 | .allocator = allocator, 25 | }; 26 | } 27 | 28 | fn name(self: *LinkGet, value: []const u8) !void { 29 | try self.msg.addAttr(.{ .name = value }); 30 | } 31 | 32 | fn applyOptions(self: *LinkGet) !void { 33 | if (self.opts.name) |val| { 34 | try self.name(val); 35 | } 36 | if (self.opts.index) |val| { 37 | self.msg.msg.header.index = @intCast(val); 38 | } 39 | } 40 | 41 | pub fn exec(self: *LinkGet) !LinkMessage { 42 | try self.applyOptions(); 43 | 44 | const data = try self.msg.compose(); 45 | defer self.msg.allocator.free(data); 46 | 47 | try self.nl.send(data); 48 | return self.recv(); 49 | } 50 | 51 | fn recv(self: *LinkGet) !LinkMessage { 52 | var buff: [512]u8 = undefined; 53 | const n = try self.nl.recv(&buff); 54 | var start: usize = 0; 55 | var link_info = LinkMessage.init(self.nl.allocator, .create); // req_type doesn't matter here 56 | 57 | const header = std.mem.bytesAsValue(linux.nlmsghdr, buff[0..@sizeOf(linux.nlmsghdr)]); 58 | if (header.type == .ERROR) { 59 | const response = std.mem.bytesAsValue(RtNetLink.NlMsgError, buff[0..]); 60 | try RtNetLink.handle_ack(response.*); 61 | unreachable; 62 | } 63 | start += @sizeOf(linux.nlmsghdr); 64 | link_info.hdr = header.*; 65 | 66 | log.info("header: {}", .{header}); 67 | const ifinfo = std.mem.bytesAsValue(linux.ifinfomsg, buff[start .. start + @sizeOf(linux.ifinfomsg)]); 68 | start += @sizeOf(linux.ifinfomsg); 69 | link_info.msg.header = ifinfo.*; 70 | 71 | log.info("ifinfo: {}", .{ifinfo}); 72 | while (start < n) { 73 | const rtattr = std.mem.bytesAsValue(linux.rtattr, buff[start .. start + @sizeOf(linux.rtattr)]); 74 | switch (rtattr.type) { 75 | .IFNAME => { 76 | const value = buff[start + @sizeOf(linux.rtattr) .. start + rtattr.len - 1]; // skip null terminating byte 77 | const ifname = try self.allocator.alloc(u8, value.len); 78 | @memcpy(ifname, value); 79 | log.info("name: {s}", .{ifname}); 80 | try link_info.addAttr(.{ .name = ifname }); 81 | }, 82 | else => {}, 83 | } 84 | start += nalign(rtattr.len); 85 | } 86 | 87 | // TODO: handle multipart messages 88 | // parse ACK/NACK response 89 | try self.nl.recv_ack(); 90 | return link_info; 91 | } 92 | -------------------------------------------------------------------------------- /src/rtnetlink/route/get.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const linux = std.os.linux; 3 | const log = std.log; 4 | const NetLink = @import("../rtnetlink.zig"); 5 | const RouteMessage = @import("route.zig"); 6 | const Attr = @import("attrs.zig").RtAttr; 7 | const nalign = @import("../utils.zig").nalign; 8 | 9 | const Get = @This(); 10 | 11 | msg: RouteMessage, 12 | nl: *NetLink, 13 | allocator: std.mem.Allocator, 14 | 15 | pub fn init(allocator: std.mem.Allocator, nl: *NetLink) Get { 16 | var msg = RouteMessage.init(allocator, .get); 17 | msg.msg.hdr.scope = .Universe; 18 | msg.msg.hdr.type = .Unspec; 19 | msg.msg.hdr.table = .Unspec; 20 | msg.msg.hdr.protocol = .Unspec; 21 | 22 | msg.hdr.flags |= linux.NLM_F_DUMP; 23 | 24 | return .{ 25 | .nl = nl, 26 | .msg = msg, 27 | .allocator = allocator, 28 | }; 29 | } 30 | 31 | pub fn exec(self: *Get) ![]RouteMessage { 32 | const msg = try self.msg.compose(); 33 | defer self.allocator.free(msg); 34 | 35 | try self.nl.send(msg); 36 | return try self.recv(); 37 | } 38 | 39 | fn recv(self: *Get) ![]RouteMessage { 40 | var buff: [4096]u8 = undefined; 41 | 42 | var n = try self.nl.recv(&buff); 43 | 44 | var response = std.ArrayList(RouteMessage).init(self.allocator); 45 | errdefer response.deinit(); 46 | outer: while (n != 0) { 47 | var d: usize = 0; 48 | while (d < n) { 49 | const msg = (try self.parseMessage(buff[d..])) orelse break :outer; 50 | try response.append(msg); 51 | d += msg.hdr.len; 52 | } 53 | n = try self.nl.recv(&buff); 54 | } 55 | return response.toOwnedSlice(); 56 | } 57 | 58 | fn parseMessage(self: *Get, buff: []u8) !?RouteMessage { 59 | const header = std.mem.bytesAsValue(linux.nlmsghdr, buff[0..@sizeOf(linux.nlmsghdr)]); 60 | if (header.type == .ERROR) { 61 | const response = std.mem.bytesAsValue(NetLink.NlMsgError, buff[0..]); 62 | try NetLink.handle_ack(response.*); 63 | unreachable; 64 | } else if (header.type == .DONE) { 65 | return null; 66 | } 67 | 68 | var msg = RouteMessage.init(self.allocator, .create); 69 | errdefer msg.deinit(); 70 | 71 | const len = header.len; 72 | msg.hdr = header.*; 73 | 74 | const hdr = std.mem.bytesAsValue(RouteMessage.RouteHeader, buff[@sizeOf(linux.nlmsghdr)..]); 75 | msg.msg.hdr = hdr.*; 76 | 77 | var start: usize = @sizeOf(RouteMessage.RouteHeader) + @sizeOf(linux.nlmsghdr); 78 | while (start < len) { 79 | const attr = std.mem.bytesAsValue(Attr, buff[start..]); 80 | // TODO: parse more attrs 81 | switch (attr.type) { 82 | .Gateway => { 83 | try msg.addAttr(.{ .gateway = buff[start + @sizeOf(Attr) .. start + attr.len][0..4].* }); 84 | }, 85 | .Oif => { 86 | const value = std.mem.bytesAsValue(u32, buff[start + @sizeOf(Attr) .. start + attr.len]); 87 | try msg.addAttr(.{ .output_if = value.* }); 88 | }, 89 | else => {}, 90 | } 91 | 92 | start += nalign(attr.len); 93 | } 94 | 95 | return msg; 96 | } 97 | -------------------------------------------------------------------------------- /src/rtnetlink/link/attrs/link_info.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const link = @import("../link.zig"); 3 | const linux = std.os.linux; 4 | const nalign = @import("../../utils.zig").nalign; 5 | const c = @cImport({ 6 | @cInclude("linux/if_link.h"); 7 | @cInclude("linux/veth.h"); 8 | }); 9 | 10 | const LinkInfoKind: linux.IFLA = @enumFromInt(c.IFLA_INFO_KIND); 11 | const LinkInfoData: linux.IFLA = @enumFromInt(c.IFLA_INFO_DATA); 12 | const VethInfoPeer: linux.IFLA = @enumFromInt(c.VETH_INFO_PEER); 13 | 14 | const Kind = enum { 15 | veth, 16 | bridge, 17 | 18 | fn size(self: Kind) usize { 19 | return nalign(@tagName(self).len + @sizeOf(linux.rtattr)); 20 | } 21 | 22 | fn encode(self: Kind, buff: []u8) !usize { 23 | const attr_size = @sizeOf(linux.rtattr); 24 | const hdr = linux.rtattr{ .len = @intCast(self.size()), .type = LinkInfoKind }; 25 | @memcpy(buff[0..attr_size], std.mem.asBytes(&hdr)); 26 | 27 | const value = @tagName(self); 28 | @memcpy(buff[attr_size .. attr_size + value.len], value); 29 | return hdr.len; 30 | } 31 | }; 32 | 33 | const Info = union(enum) { 34 | peer_info: link.LinkInfo, 35 | fn info_type(self: Info) linux.IFLA { 36 | return switch (self) { 37 | .peer_info => VethInfoPeer, 38 | }; 39 | } 40 | 41 | fn encode(self: Info, buff: []u8) !usize { 42 | const header = linux.rtattr{ .len = @intCast(self.size()), .type = self.info_type() }; 43 | @memcpy(buff[0..@sizeOf(linux.rtattr)], std.mem.asBytes(&header)); 44 | switch (self) { 45 | inline else => |v| { 46 | try v.encode(buff[@sizeOf(linux.rtattr)..]); 47 | }, 48 | } 49 | 50 | return header.len; 51 | } 52 | 53 | fn size(self: Info) usize { 54 | const val_size = switch (self) { 55 | inline else => |v| v.size(), 56 | }; 57 | return nalign(val_size + @sizeOf(linux.rtattr)); 58 | } 59 | 60 | fn deinit(self: *Info) void { 61 | switch (self.*) { 62 | .peer_info => |*val| val.deinit(), 63 | } 64 | } 65 | }; 66 | 67 | pub const LinkInfoAttr = struct { 68 | info: ?Info = null, 69 | kind: Kind, 70 | 71 | pub fn encode(self: LinkInfoAttr, buff: []u8) anyerror!usize { 72 | var start: usize = 0; 73 | const len = self.size(); 74 | 75 | // link info kind 76 | start += try self.kind.encode(buff[0..]); 77 | 78 | // link info data 79 | const hdr3 = linux.rtattr{ .len = @intCast(len - start), .type = LinkInfoData }; 80 | @memcpy(buff[start .. start + @sizeOf(linux.rtattr)], std.mem.asBytes(&hdr3)); 81 | start += @sizeOf(linux.rtattr); 82 | 83 | if (self.info) |info| { 84 | _ = try info.encode(buff[start..]); 85 | } 86 | return len; 87 | } 88 | 89 | pub fn size(self: LinkInfoAttr) usize { 90 | var len = @sizeOf(linux.rtattr) + self.kind.size(); 91 | if (self.info) |info| { 92 | len += info.size(); 93 | } 94 | return nalign(len); 95 | } 96 | 97 | pub fn deinit(self: *LinkInfoAttr) void { 98 | if (self.info != null) { 99 | self.info.?.deinit(); 100 | } 101 | } 102 | }; 103 | -------------------------------------------------------------------------------- /src/rtnetlink/address/address.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const linux = std.os.linux; 3 | const nalign = @import("../utils.zig").nalign; 4 | const c = @cImport(@cInclude("linux/rtnetlink.h")); 5 | const AddressAttr = @import("attrs.zig").AddressAttr; 6 | 7 | pub const AddressScope = enum(u8) { 8 | Universe = c.RT_SCOPE_UNIVERSE, 9 | Site = c.RT_SCOPE_SITE, 10 | Link = c.RT_SCOPE_LINK, 11 | Host = c.RT_SCOPE_HOST, 12 | Nowhere = c.RT_SCOPE_NOWHERE, 13 | _, 14 | }; 15 | 16 | const AddressHeader = packed struct { 17 | family: u8, 18 | prefix_len: u8, 19 | flags: u8, 20 | scope: AddressScope, 21 | index: u32, 22 | }; 23 | 24 | pub const AddressInfo = struct { 25 | hdr: AddressHeader, 26 | attrs: std.ArrayList(AddressAttr), 27 | pub fn init(allocator: std.mem.Allocator) AddressInfo { 28 | return .{ 29 | .hdr = .{ 30 | .family = linux.AF.INET, 31 | .prefix_len = 0, 32 | .flags = 0, 33 | .scope = .Universe, 34 | .index = 0, 35 | }, 36 | .attrs = std.ArrayList(AddressAttr).init(allocator), 37 | }; 38 | } 39 | 40 | pub fn size(self: *const AddressInfo) usize { 41 | var s: usize = @sizeOf(AddressHeader); 42 | for (self.attrs.items) |a| { 43 | s += a.size(); 44 | } 45 | return nalign(s); 46 | } 47 | 48 | pub fn encode(self: *const AddressInfo, buff: []u8) !void { 49 | var start: usize = 0; 50 | @memcpy(buff[start .. start + @sizeOf(AddressHeader)], std.mem.asBytes(&self.hdr)); 51 | start += @sizeOf(AddressHeader); 52 | 53 | for (self.attrs.items) |attr| { 54 | start += try attr.encode(buff[start..]); 55 | } 56 | } 57 | }; 58 | 59 | const RequestType = enum { 60 | create, 61 | delete, 62 | get, 63 | 64 | fn toMsgType(self: RequestType) linux.NetlinkMessageType { 65 | return switch (self) { 66 | .create => .RTM_NEWADDR, 67 | .delete => .RTM_DELADDR, 68 | .get => .RTM_GETADDR, 69 | }; 70 | } 71 | 72 | fn getFlags(self: RequestType) u16 { 73 | var flags: u16 = linux.NLM_F_REQUEST | linux.NLM_F_ACK; 74 | switch (self) { 75 | .create => flags |= linux.NLM_F_CREATE | linux.NLM_F_EXCL, 76 | else => {}, 77 | } 78 | 79 | return flags; 80 | } 81 | }; 82 | 83 | const Addr = @This(); 84 | 85 | hdr: linux.nlmsghdr, 86 | msg: AddressInfo, 87 | allocator: std.mem.Allocator, 88 | 89 | pub fn init(allocator: std.mem.Allocator, req_type: RequestType) Addr { 90 | return .{ 91 | .hdr = .{ 92 | .type = req_type.toMsgType(), 93 | .flags = req_type.getFlags(), 94 | .len = 0, 95 | .pid = 0, 96 | .seq = 0, 97 | }, 98 | .msg = AddressInfo.init(allocator), 99 | .allocator = allocator, 100 | }; 101 | } 102 | 103 | pub fn compose(self: *Addr) ![]u8 { 104 | const size: usize = self.msg.size() + @sizeOf(linux.nlmsghdr); 105 | 106 | var buff = try self.allocator.alloc(u8, size); 107 | self.hdr.len = @intCast(size); 108 | 109 | // copy data into buff 110 | @memset(buff, 0); 111 | var start: usize = 0; 112 | @memcpy(buff[0..@sizeOf(linux.nlmsghdr)], std.mem.asBytes(&self.hdr)); 113 | start += @sizeOf(linux.nlmsghdr); 114 | try self.msg.encode(buff[start..]); 115 | 116 | return buff; 117 | } 118 | 119 | pub fn addAttr(self: *Addr, attr: AddressAttr) !void { 120 | try self.msg.attrs.append(attr); 121 | } 122 | -------------------------------------------------------------------------------- /src/rtnetlink/rtnetlink.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const log = std.log; 3 | const linux = std.os.linux; 4 | const link = @import("link.zig"); 5 | const addr = @import("address.zig"); 6 | const route = @import("route.zig"); 7 | 8 | const Self = @This(); 9 | 10 | fd: std.posix.socket_t, 11 | allocator: std.mem.Allocator, 12 | 13 | pub fn init(allocator: std.mem.Allocator) !Self { 14 | const fd: i32 = @intCast(linux.socket(linux.AF.NETLINK, linux.SOCK.RAW, linux.NETLINK.ROUTE)); 15 | const kernel_addr = linux.sockaddr.nl{ .pid = 0, .groups = 0 }; 16 | const res = linux.bind(fd, @ptrCast(&kernel_addr), @sizeOf(@TypeOf(kernel_addr))); 17 | if (std.posix.errno(res) != .SUCCESS) { 18 | return error.BindFailed; 19 | } 20 | 21 | return .{ .allocator = allocator, .fd = fd }; 22 | } 23 | 24 | pub fn deinit(self: *Self) void { 25 | std.posix.close(self.fd); 26 | } 27 | 28 | pub fn send(self: *Self, msg: []const u8) !void { 29 | std.debug.assert(try std.posix.send(self.fd, msg, 0) == msg.len); 30 | } 31 | 32 | pub fn recv(self: *Self, buff: []u8) !usize { 33 | const n = try std.posix.recv(self.fd, buff, 0); 34 | if (n == 0) { 35 | return error.InvalidResponse; 36 | } 37 | return n; 38 | } 39 | 40 | pub fn recv_ack(self: *Self) !void { 41 | var buff: [512]u8 = std.mem.zeroes([512]u8); 42 | const n = try std.posix.recv(self.fd, &buff, 0); 43 | if (n == 0) { 44 | return error.InvalidResponse; 45 | } 46 | 47 | const header = std.mem.bytesAsValue(linux.nlmsghdr, buff[0..@sizeOf(linux.nlmsghdr)]); 48 | if (header.type == .DONE) { 49 | return; 50 | } else if (header.type == .ERROR) { // ACK/NACK response 51 | const response = std.mem.bytesAsValue(NlMsgError, buff[0..]); 52 | return handle_ack(response.*); 53 | } 54 | } 55 | 56 | pub const NlMsgError = struct { 57 | hdr: linux.nlmsghdr, 58 | err: i32, 59 | msg: linux.nlmsghdr, 60 | }; 61 | 62 | pub fn handle_ack(msg: NlMsgError) !void { 63 | const code: linux.E = @enumFromInt(-1 * msg.err); 64 | if (code != .SUCCESS) { 65 | log.info("err: {}", .{code}); 66 | return switch (code) { 67 | .EXIST => error.Exists, 68 | else => error.Error, 69 | }; 70 | } 71 | } 72 | 73 | pub fn linkAdd(self: *Self, options: link.LinkAdd.Options) !void { 74 | var la = link.LinkAdd.init(self.allocator, self, options); 75 | defer la.msg.deinit(); 76 | return la.exec(); 77 | } 78 | 79 | pub fn linkGet(self: *Self, options: link.LinkGet.Options) !link.LinkMessage { 80 | var lg = link.LinkGet.init(self.allocator, self, options); 81 | defer lg.msg.deinit(); 82 | return lg.exec(); 83 | } 84 | 85 | pub fn linkSet(self: *Self, options: link.LinkSet.Options) !void { 86 | var ls = link.LinkSet.init(self.allocator, self, options); 87 | defer ls.msg.deinit(); 88 | try ls.exec(); 89 | } 90 | 91 | pub fn linkDel(self: *Self, index: c_int) !void { 92 | var ls = link.LinkDelete.init(self.allocator, self, index); 93 | defer ls.msg.deinit(); 94 | try ls.exec(); 95 | } 96 | 97 | pub fn addrAdd(self: *Self, options: addr.AddrAdd.Options) !void { 98 | var a = addr.AddrAdd.init(self.allocator, self, options); 99 | return a.exec(); 100 | } 101 | 102 | pub fn routeAdd(self: *Self, options: route.RouteAdd.Options) !void { 103 | var ls = route.RouteAdd.init(self.allocator, self, options); 104 | defer ls.msg.deinit(); 105 | try ls.exec(); 106 | } 107 | 108 | /// get all ipv4 routes 109 | pub fn routeGet(self: *Self) ![]route.RouteMessage { 110 | var ls = route.RouteGet.init(self.allocator, self); 111 | defer ls.msg.deinit(); 112 | return ls.exec(); 113 | } 114 | -------------------------------------------------------------------------------- /src/rtnetlink/link/link.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const linux = std.os.linux; 3 | const LinkAttribute = @import("attrs.zig").LinkAttribute; 4 | 5 | const Link = @This(); 6 | 7 | pub const Flags = struct { 8 | pub const UP: c_uint = 1 << 0; 9 | pub const BROADCAST: c_uint = 1 << 1; 10 | pub const DEBUG: c_uint = 1 << 2; 11 | pub const LOOPBACK: c_uint = 1 << 3; 12 | pub const POINTOPOINT: c_uint = 1 << 4; 13 | pub const NOTRAILERS: c_uint = 1 << 5; 14 | pub const RUNNING: c_uint = 1 << 6; 15 | pub const NOARP: c_uint = 1 << 7; 16 | pub const PROMISC: c_uint = 1 << 8; 17 | pub const ALLMULTI: c_uint = 1 << 9; 18 | pub const MASTER: c_uint = 1 << 10; 19 | pub const SLAVE: c_uint = 1 << 11; 20 | pub const MULTICAST: c_uint = 1 << 12; 21 | pub const PORTSEL: c_uint = 1 << 13; 22 | pub const AUTOMEDIA: c_uint = 1 << 14; 23 | pub const DYNAMIC: c_uint = 1 << 15; 24 | pub const LOWER_UP: c_uint = 1 << 16; 25 | pub const DORMANT: c_uint = 1 << 17; 26 | pub const ECHO: c_uint = 1 << 18; 27 | }; 28 | 29 | const RequestType = enum { 30 | create, 31 | delete, 32 | get, 33 | set, 34 | 35 | fn toMsgType(self: RequestType) linux.NetlinkMessageType { 36 | return switch (self) { 37 | .create => .RTM_NEWLINK, 38 | .delete => .RTM_DELLINK, 39 | .get => .RTM_GETLINK, 40 | .set => .RTM_SETLINK, 41 | }; 42 | } 43 | 44 | fn getFlags(self: RequestType) u16 { 45 | var flags: u16 = linux.NLM_F_REQUEST | linux.NLM_F_ACK; 46 | switch (self) { 47 | .create, .set => flags |= linux.NLM_F_CREATE | linux.NLM_F_EXCL, 48 | else => {}, 49 | } 50 | 51 | return flags; 52 | } 53 | }; 54 | 55 | pub const LinkInfo = struct { 56 | header: linux.ifinfomsg, 57 | attrs: std.ArrayList(LinkAttribute), 58 | 59 | pub fn init(allocator: std.mem.Allocator) LinkInfo { 60 | return .{ 61 | .header = .{ 62 | .family = linux.AF.UNSPEC, 63 | .type = 0, 64 | .flags = 0, 65 | .index = 0, 66 | .change = 0, 67 | }, 68 | 69 | .attrs = std.ArrayList(LinkAttribute).init(allocator), 70 | }; 71 | } 72 | 73 | pub fn size(self: *const LinkInfo) usize { 74 | var s: usize = @sizeOf(linux.ifinfomsg); 75 | for (self.attrs.items) |a| { 76 | s += a.size(); 77 | } 78 | return s; 79 | } 80 | 81 | pub fn encode(self: *const LinkInfo, buff: []u8) !void { 82 | var start: usize = 0; 83 | @memcpy(buff[start .. start + @sizeOf(linux.ifinfomsg)], std.mem.asBytes(&self.header)); 84 | start += @sizeOf(linux.ifinfomsg); 85 | 86 | for (self.attrs.items) |attr| { 87 | start += try attr.encode(buff[start..]); 88 | } 89 | } 90 | 91 | pub fn deinit(self: *LinkInfo) void { 92 | for (self.attrs.items) |*attr| { 93 | switch (attr.*) { 94 | .link_info => |*val| val.deinit(), 95 | else => {}, 96 | } 97 | } 98 | self.attrs.deinit(); 99 | } 100 | }; 101 | 102 | hdr: linux.nlmsghdr, 103 | msg: LinkInfo, 104 | allocator: std.mem.Allocator, 105 | 106 | pub fn init(allocator: std.mem.Allocator, req_type: RequestType) Link { 107 | return .{ 108 | .hdr = .{ 109 | .type = req_type.toMsgType(), 110 | .flags = req_type.getFlags(), 111 | .len = 0, 112 | .pid = 0, 113 | .seq = 0, 114 | }, 115 | .msg = LinkInfo.init(allocator), 116 | .allocator = allocator, 117 | }; 118 | } 119 | 120 | pub fn compose(self: *Link) ![]u8 { 121 | const size: usize = self.msg.size() + @sizeOf(linux.nlmsghdr); 122 | 123 | var buff = try self.allocator.alloc(u8, size); 124 | self.hdr.len = @intCast(size); 125 | 126 | // copy data into buff 127 | @memset(buff, 0); 128 | var start: usize = 0; 129 | @memcpy(buff[0..@sizeOf(linux.nlmsghdr)], std.mem.asBytes(&self.hdr)); 130 | start += @sizeOf(linux.nlmsghdr); 131 | try self.msg.encode(buff[start..]); 132 | 133 | return buff; 134 | } 135 | 136 | pub fn addAttr(self: *Link, attr: LinkAttribute) !void { 137 | try self.msg.attrs.append(attr); 138 | } 139 | 140 | pub fn deinit(self: *Link) void { 141 | self.msg.deinit(); 142 | } 143 | -------------------------------------------------------------------------------- /src/container.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const log = std.log; 3 | const linux = std.os.linux; 4 | const checkErr = @import("utils.zig").checkErr; 5 | const c = @cImport(@cInclude("signal.h")); 6 | const Net = @import("net.zig"); 7 | const Cgroup = @import("cgroup.zig"); 8 | const Fs = @import("fs.zig"); 9 | const RunArgs = @import("args.zig").RunArgs; 10 | 11 | const ChildProcessArgs = struct { 12 | container: *Container, 13 | pipe: [2]i32, 14 | uid: linux.uid_t, 15 | gid: linux.gid_t, 16 | }; 17 | 18 | const Container = @This(); 19 | name: []const u8, 20 | cmd: []const []const u8, 21 | 22 | fs: Fs, 23 | net: Net, 24 | cgroup: Cgroup, 25 | allocator: std.mem.Allocator, 26 | 27 | pub fn init(run_args: RunArgs, allocator: std.mem.Allocator) !Container { 28 | return .{ 29 | .name = run_args.name, 30 | .fs = Fs.init(run_args.rootfs_path), 31 | .cmd = run_args.cmd, 32 | 33 | .net = try Net.init(allocator, run_args.name), 34 | .allocator = allocator, 35 | .cgroup = try Cgroup.init(run_args.name, run_args.resources, allocator), 36 | }; 37 | } 38 | 39 | fn initNetwork(self: *Container) !void { 40 | try self.net.enableNat(); 41 | try self.net.setUpBridge(); 42 | try self.net.createVethPair(); 43 | try self.net.setupDnsResolverConfig(self.fs.rootfs); 44 | } 45 | 46 | fn sethostname(self: *Container) void { 47 | _ = linux.syscall2(.sethostname, @intFromPtr(self.name.ptr), self.name.len); 48 | } 49 | 50 | pub fn run(self: *Container) !void { 51 | // setup network virtual interfaces and namespace 52 | try self.initNetwork(); 53 | 54 | var childp_args = ChildProcessArgs{ .container = self, .pipe = undefined, .uid = 0, .gid = 0 }; 55 | try checkErr(linux.pipe(&childp_args.pipe), error.Pipe); 56 | var stack = try self.allocator.alloc(u8, 1024 * 1024); 57 | var ctid: i32 = 0; 58 | var ptid: i32 = 0; 59 | const clone_flags: u32 = linux.CLONE.NEWNET | linux.CLONE.NEWNS | linux.CLONE.NEWPID | linux.CLONE.NEWUTS | linux.CLONE.NEWIPC | linux.CLONE.NEWUSER | c.SIGCHLD; 60 | const pid = linux.clone(childFn, @intFromPtr(&stack[0]) + stack.len, clone_flags, @intFromPtr(&childp_args), &ptid, 0, &ctid); 61 | try checkErr(pid, error.CloneFailed); 62 | std.posix.close(childp_args.pipe[0]); 63 | 64 | // move one of the veth pairs to 65 | // the child process network namespace 66 | try self.net.moveVethToNs(@intCast(pid)); 67 | // enter container cgroup 68 | try self.cgroup.enterCgroup(@intCast(pid)); 69 | self.createUserRootMappings(@intCast(pid)) catch @panic("creating root user mapping failed"); 70 | 71 | // signal done by writing to pipe 72 | const buff = [_]u8{0}; 73 | _ = try std.posix.write(childp_args.pipe[1], &buff); 74 | 75 | const wait_res = std.posix.waitpid(@intCast(pid), 0); 76 | if (wait_res.status != 0) { 77 | return error.CmdFailed; 78 | } 79 | } 80 | 81 | // initializes the container environment 82 | // and executes the user passed cmd 83 | fn execCmd(self: *Container, uid: linux.uid_t, gid: linux.gid_t) !void { 84 | try checkErr(linux.setreuid(uid, uid), error.UID); 85 | try checkErr(linux.setregid(gid, gid), error.GID); 86 | 87 | self.sethostname(); 88 | try self.fs.setup(); 89 | try self.net.setupContainerVethIf(); 90 | 91 | std.process.execv(self.allocator, self.cmd) catch return error.CmdFailed; 92 | } 93 | 94 | export fn childFn(a: usize) u8 { 95 | const arg: *ChildProcessArgs = @ptrFromInt(a); 96 | std.posix.close(arg.pipe[1]); 97 | // block until parent sets up needed resources 98 | { 99 | var buff = [_]u8{1}; 100 | _ = std.posix.read(arg.pipe[0], &buff) catch @panic("pipe read failed"); 101 | } 102 | 103 | arg.container.execCmd(arg.uid, arg.gid) catch |e| { 104 | log.err("err: {}", .{e}); 105 | @panic("run failed"); 106 | }; 107 | 108 | return 0; 109 | } 110 | 111 | fn createUserRootMappings(self: *Container, pid: linux.pid_t) !void { 112 | const uidmap_path = try std.fmt.allocPrint(self.allocator, "/proc/{}/uid_map", .{pid}); 113 | defer self.allocator.free(uidmap_path); 114 | const gidmap_path = try std.fmt.allocPrint(self.allocator, "/proc/{}/gid_map", .{pid}); 115 | defer self.allocator.free(gidmap_path); 116 | 117 | const uid_map = try std.fs.openFileAbsolute(uidmap_path, .{ .mode = .write_only }); 118 | defer uid_map.close(); 119 | const gid_map = try std.fs.openFileAbsolute(gidmap_path, .{ .mode = .write_only }); 120 | defer gid_map.close(); 121 | 122 | // map root inside user namespace to the "nobody" user and group outside the namespace 123 | _ = try uid_map.write("0 65534 1"); 124 | _ = try gid_map.write("0 65534 1"); 125 | } 126 | 127 | pub fn deinit(self: *Container) void { 128 | self.cgroup.deinit() catch |e| { 129 | log.err("cgroup deinit failed: {}", .{e}); 130 | }; 131 | self.net.deinit() catch log.err("net deinit failed", .{}); 132 | } 133 | -------------------------------------------------------------------------------- /src/rtnetlink/route/route.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const nalign = @import("../utils.zig").nalign; 3 | const linux = std.os.linux; 4 | const RouteScope = @import("../address/address.zig").AddressScope; 5 | const c = @cImport(@cInclude("linux/rtnetlink.h")); 6 | const Attr = @import("attrs.zig").Attr; 7 | 8 | const Flags = enum(u32) { 9 | // next hop flags 10 | Dead = c.RTNH_F_DEAD, // Nexthop is dead (used by multipath) 11 | Pervasive = c.RTNH_F_PERVASIVE, // Do recursive gateway lookup 12 | Onlink = c.RTNH_F_ONLINK, // Gateway is forced on link 13 | Offload = c.RTNH_F_OFFLOAD, // Nexthop is offloaded 14 | Linkdown = c.RTNH_F_LINKDOWN, // carrier-down on nexthop 15 | Unresolved = c.RTNH_F_UNRESOLVED, // The entry is unresolved (ipmr) 16 | Trap = c.RTNH_F_TRAP, // Nexthop is trapping packets 17 | // flags 18 | Notify = c.RTM_F_NOTIFY, // Notify user of route change 19 | Cloned = c.RTM_F_CLONED, // This route is cloned 20 | Equalize = c.RTM_F_EQUALIZE, // Multipath equalizer: NI 21 | Prefix = c.RTM_F_PREFIX, // Prefix addresses 22 | LookupTable = c.RTM_F_LOOKUP_TABLE, // set rtm_table to FIB lookup result 23 | FibMatch = c.RTM_F_FIB_MATCH, // return full fib lookup match 24 | RtOffload = c.RTM_F_OFFLOAD, // route is offloaded 25 | RtTrap = c.RTM_F_TRAP, // route is trapping packets 26 | OffloadFailed = c.RTM_F_OFFLOAD_FAILED, 27 | 28 | _, 29 | }; 30 | 31 | const RouteType = enum(u8) { 32 | Unspec = c.RTN_UNSPEC, 33 | Unicast = c.RTN_UNICAST, 34 | Local = c.RTN_LOCAL, 35 | Broadcast = c.RTN_BROADCAST, 36 | Anycast = c.RTN_ANYCAST, 37 | Multicast = c.RTN_MULTICAST, 38 | BlackHole = c.RTN_BLACKHOLE, 39 | Unreachable = c.RTN_UNREACHABLE, 40 | Prohibit = c.RTN_PROHIBIT, 41 | Throw = c.RTN_THROW, 42 | Nat = c.RTN_NAT, 43 | ExternalResolve = c.RTN_XRESOLVE, 44 | _, 45 | }; 46 | 47 | const Protocol = enum(u8) { 48 | Unspec = c.RTPROT_UNSPEC, 49 | IcmpRedirect = c.RTPROT_REDIRECT, 50 | Kernel = c.RTPROT_KERNEL, 51 | Boot = c.RTPROT_BOOT, 52 | Static = c.RTPROT_STATIC, 53 | Gated = c.RTPROT_GATED, 54 | Ra = c.RTPROT_RA, 55 | Mrt = c.RTPROT_MRT, 56 | Zebra = c.RTPROT_ZEBRA, 57 | Bird = c.RTPROT_BIRD, 58 | DnRouted = c.RTPROT_DNROUTED, 59 | Xorp = c.RTPROT_XORP, 60 | Ntk = c.RTPROT_NTK, 61 | Dhcp = c.RTPROT_DHCP, 62 | Mrouted = c.RTPROT_MROUTED, 63 | KeepAlived = c.RTPROT_KEEPALIVED, 64 | Babel = c.RTPROT_BABEL, 65 | OpenNr = c.RTPROT_OPENR, 66 | Bgp = c.RTPROT_BGP, 67 | Isis = c.RTPROT_ISIS, 68 | Ospf = c.RTPROT_OSPF, 69 | Rip = c.RTPROT_RIP, 70 | Eigrp = c.RTPROT_EIGRP, 71 | _, 72 | }; 73 | 74 | const RouteTable = enum(u8) { 75 | Unspec = c.RT_TABLE_UNSPEC, 76 | Compat = c.RT_TABLE_COMPAT, 77 | Default = c.RT_TABLE_DEFAULT, 78 | Main = c.RT_TABLE_MAIN, 79 | Local = c.RT_TABLE_LOCAL, 80 | }; 81 | 82 | pub const RouteHeader = extern struct { 83 | family: u8 = linux.AF.INET, 84 | dest_prefix_len: u8 = 0, 85 | src_prefix_len: u8 = 0, 86 | tos: u8 = 0, 87 | table: RouteTable = .Main, 88 | protocol: Protocol = .Unspec, 89 | scope: RouteScope = .Universe, 90 | type: RouteType = .Unspec, 91 | flags: u32 = 0, 92 | }; 93 | 94 | pub const RouteInfo = struct { 95 | hdr: RouteHeader, 96 | attrs: std.ArrayList(Attr), 97 | 98 | pub fn init(allocator: std.mem.Allocator) RouteInfo { 99 | return .{ 100 | .hdr = .{}, 101 | .attrs = std.ArrayList(Attr).init(allocator), 102 | }; 103 | } 104 | 105 | pub fn size(self: *const RouteInfo) usize { 106 | var s: usize = @sizeOf(RouteHeader); 107 | for (self.attrs.items) |a| { 108 | s += a.size(); 109 | } 110 | return nalign(s); 111 | } 112 | 113 | pub fn encode(self: *const RouteInfo, buff: []u8) !void { 114 | @memcpy(buff[0..@sizeOf(RouteHeader)], std.mem.asBytes(&self.hdr)); 115 | var start: usize = @sizeOf(RouteHeader); 116 | 117 | for (self.attrs.items) |attr| { 118 | start += try attr.encode(buff[start..]); 119 | } 120 | } 121 | pub fn deinit(self: *RouteInfo) void { 122 | self.attrs.deinit(); 123 | } 124 | }; 125 | 126 | const RequestType = enum { 127 | create, 128 | delete, 129 | get, 130 | 131 | fn toMsgType(self: RequestType) linux.NetlinkMessageType { 132 | return switch (self) { 133 | .create => .RTM_NEWROUTE, 134 | .delete => .RTM_DELROUTE, 135 | .get => .RTM_GETROUTE, 136 | }; 137 | } 138 | 139 | fn getFlags(self: RequestType) u16 { 140 | var flags: u16 = linux.NLM_F_REQUEST | linux.NLM_F_ACK; 141 | switch (self) { 142 | .create => flags |= linux.NLM_F_CREATE | linux.NLM_F_EXCL, 143 | else => {}, 144 | } 145 | 146 | return flags; 147 | } 148 | }; 149 | 150 | hdr: linux.nlmsghdr, 151 | msg: RouteInfo, 152 | allocator: std.mem.Allocator, 153 | 154 | const Route = @This(); 155 | pub fn init(allocator: std.mem.Allocator, req_type: RequestType) Route { 156 | return .{ 157 | .hdr = .{ 158 | .type = req_type.toMsgType(), 159 | .flags = req_type.getFlags(), 160 | .len = 0, 161 | .pid = 0, 162 | .seq = 0, 163 | }, 164 | .msg = RouteInfo.init(allocator), 165 | .allocator = allocator, 166 | }; 167 | } 168 | 169 | pub fn compose(self: *Route) ![]u8 { 170 | const size: usize = self.msg.size() + @sizeOf(linux.nlmsghdr); 171 | 172 | var buff = try self.allocator.alloc(u8, size); 173 | self.hdr.len = @intCast(size); 174 | 175 | // copy data into buff 176 | @memset(buff, 0); 177 | var start: usize = 0; 178 | @memcpy(buff[0..@sizeOf(linux.nlmsghdr)], std.mem.asBytes(&self.hdr)); 179 | start += @sizeOf(linux.nlmsghdr); 180 | try self.msg.encode(buff[start..]); 181 | 182 | return buff; 183 | } 184 | 185 | pub fn addAttr(self: *Route, attr: Attr) !void { 186 | try self.msg.attrs.append(attr); 187 | } 188 | 189 | pub fn deinit(self: *Route) void { 190 | self.msg.deinit(); 191 | } 192 | -------------------------------------------------------------------------------- /src/net.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const log = std.log; 3 | const linux = std.os.linux; 4 | const utils = @import("utils.zig"); 5 | const checkErr = utils.checkErr; 6 | const INFO_PATH = utils.INFO_PATH; 7 | const NETNS_PATH = utils.NETNS_PATH; 8 | const ip = @import("ip.zig"); 9 | 10 | const NetLink = @import("rtnetlink/rtnetlink.zig"); 11 | 12 | cid: []const u8, 13 | nl: NetLink, 14 | allocator: std.mem.Allocator, 15 | const Net = @This(); 16 | 17 | pub fn init(allocator: std.mem.Allocator, cid: []const u8) !Net { 18 | return .{ 19 | .cid = cid, 20 | .nl = try NetLink.init(allocator), 21 | .allocator = allocator, 22 | }; 23 | } 24 | 25 | pub fn setUpBridge(self: *Net) !void { 26 | if (self.linkExists(utils.BRIDGE_NAME)) return; 27 | try self.nl.linkAdd(.{ .bridge = utils.BRIDGE_NAME }); 28 | 29 | var bridge = try self.nl.linkGet(.{ .name = utils.BRIDGE_NAME }); 30 | defer bridge.deinit(); 31 | try self.nl.linkSet(.{ .index = bridge.msg.header.index, .up = true }); 32 | try self.nl.addrAdd(.{ .index = bridge.msg.header.index, .addr = .{ 10, 0, 0, 1 }, .prefix_len = 24 }); // 33 | } 34 | 35 | fn setNetNs(fd: linux.fd_t) !void { 36 | const res = linux.syscall2(.setns, @intCast(fd), linux.CLONE.NEWNET); 37 | try checkErr(res, error.NetNsFailed); 38 | } 39 | 40 | /// enables snat on default interface 41 | /// this allows containers to access the internet 42 | pub fn enableNat(self: *Net) !void { 43 | const default_ifname = try self.getDefaultGatewayIfName(); 44 | try self.if_enable_snat(default_ifname); 45 | } 46 | 47 | fn getDefaultGatewayIfName(self: *Net) ![]const u8 { 48 | const res = try self.nl.routeGet(); 49 | var if_index: ?u32 = null; 50 | var has_gtw = false; 51 | for (res) |*msg| { 52 | defer msg.deinit(); 53 | if (has_gtw) continue; 54 | for (msg.msg.attrs.items) |attr| { 55 | switch (attr) { 56 | .gateway => has_gtw = true, 57 | .output_if => |val| if_index = val, 58 | } 59 | } 60 | } 61 | const idx = if_index orelse return error.NotFound; 62 | var if_info = try self.nl.linkGet(.{ .index = idx }); 63 | defer if_info.deinit(); 64 | var name: ?[]const u8 = null; 65 | for (if_info.msg.attrs.items) |attr| { 66 | switch (attr) { 67 | .name => |val| { 68 | name = val; 69 | break; 70 | }, 71 | else => {}, 72 | } 73 | } 74 | 75 | return name orelse error.NotFound; 76 | } 77 | 78 | fn if_enable_snat(self: *Net, if_name: []const u8) !void { 79 | var check_rule = std.process.Child.init(&.{ "iptables", "-t", "nat", "-C", "POSTROUTING", "-o", if_name, "-j", "MASQUERADE" }, self.allocator); 80 | check_rule.stdout_behavior = .Ignore; 81 | check_rule.stderr_behavior = .Ignore; 82 | const check_rule_res = try check_rule.spawnAndWait(); 83 | if (check_rule_res.Exited == 0) return; 84 | 85 | // add rule if it doesn't exist 86 | var ch = std.process.Child.init(&.{ "iptables", "-t", "nat", "-A", "POSTROUTING", "-o", if_name, "-j", "MASQUERADE" }, self.allocator); 87 | ch.stdout_behavior = .Ignore; 88 | ch.stderr_behavior = .Ignore; 89 | const term = try ch.spawnAndWait(); 90 | if (term.Exited != 0) { 91 | return error.CmdFailed; 92 | } 93 | } 94 | 95 | pub fn createVethPair(self: *Net) !void { 96 | const veth0 = try std.mem.concat(self.allocator, u8, &.{ "veth0-", self.cid }); 97 | const veth1 = try std.mem.concat(self.allocator, u8, &.{ "veth1-", self.cid }); 98 | defer { 99 | self.allocator.free(veth0); 100 | self.allocator.free(veth1); 101 | } 102 | 103 | if (self.linkExists(veth0)) return; 104 | log.info("creating veth pair: {s} -- {s}", .{ veth0, veth1 }); 105 | 106 | try self.nl.linkAdd(.{ .veth = .{ veth0, veth1 } }); 107 | 108 | var veth0_info = try self.nl.linkGet(.{ .name = veth0 }); 109 | defer veth0_info.deinit(); 110 | 111 | // attach veth0 to host bridge 112 | var bridge = try self.nl.linkGet(.{ .name = utils.BRIDGE_NAME }); 113 | defer bridge.deinit(); 114 | try self.nl.linkSet(.{ .index = veth0_info.msg.header.index, .master = bridge.msg.header.index, .up = true }); 115 | 116 | var veth1_info = try self.nl.linkGet(.{ .name = veth1 }); 117 | defer veth1_info.deinit(); 118 | } 119 | 120 | // move veth1-xxx net interface to the pid's network namespace 121 | pub fn moveVethToNs(self: *Net, pid: linux.pid_t) !void { 122 | const pid_netns_path = try std.fmt.allocPrint(self.allocator, "/proc/{}/ns/net", .{pid}); 123 | defer self.allocator.free(pid_netns_path); 124 | const pid_netns = try std.fs.openFileAbsolute(pid_netns_path, .{}); 125 | defer pid_netns.close(); 126 | 127 | const veth_name = try std.fmt.allocPrint(self.allocator, "veth1-{s}", .{self.cid}); 128 | defer self.allocator.free(veth_name); 129 | const veth_info = try self.nl.linkGet(.{ .name = veth_name }); 130 | try self.nl.linkSet(.{ .index = veth_info.msg.header.index, .netns_fd = pid_netns.handle }); 131 | } 132 | 133 | // this must be executed in the child process 134 | // after creating a new network namespace using clone. 135 | pub fn setupContainerVethIf(self: *Net) !void { 136 | const veth_name = try std.fmt.allocPrint(self.allocator, "veth1-{s}", .{self.cid}); 137 | defer self.allocator.free(veth_name); 138 | const pid_netns_path = try std.fmt.allocPrint(self.allocator, "/proc/{}/ns/net", .{linux.getpid()}); 139 | defer self.allocator.free(pid_netns_path); 140 | 141 | // need to create new netlink connection because 142 | // the existing one is tied to the parent namespace 143 | var nl = try NetLink.init(self.allocator); 144 | defer nl.deinit(); 145 | var veth1_info = try nl.linkGet(.{ .name = veth_name }); 146 | defer veth1_info.deinit(); 147 | 148 | try nl.linkSet(.{ .index = veth1_info.msg.header.index, .up = true }); 149 | // TODO: use random private ip addrs that are not used 150 | try nl.addrAdd(.{ .index = veth1_info.msg.header.index, .addr = ip.getRandomIpv4Addr(), .prefix_len = 24 }); 151 | try nl.routeAdd(.{ .gateway = .{ 10, 0, 0, 1 } }); 152 | 153 | // setup container loopback interface 154 | var lo = try nl.linkGet(.{ .name = "lo" }); 155 | defer lo.deinit(); 156 | 157 | nl.addrAdd(.{ .index = lo.msg.header.index, .addr = .{ 127, 0, 0, 1 }, .prefix_len = 8 }) catch |e| { 158 | if (e != error.Exists) return e; 159 | }; 160 | try nl.linkSet(.{ .index = lo.msg.header.index, .up = true }); 161 | } 162 | 163 | fn linkExists(self: *Net, name: []const u8) bool { 164 | var info = self.nl.linkGet(.{ .name = name }) catch return false; 165 | defer info.deinit(); 166 | return true; 167 | } 168 | 169 | pub fn setupDnsResolverConfig(_: *Net, rootfs: []const u8) !void { 170 | var rootfs_dir = try std.fs.cwd().openDir(rootfs, .{}); 171 | var etc_dir = try std.fs.cwd().openDir("/etc", .{}); 172 | defer rootfs_dir.close(); 173 | defer etc_dir.close(); 174 | 175 | try etc_dir.copyFile("resolv.conf", rootfs_dir, "etc/resolv.conf", .{}); 176 | } 177 | 178 | pub fn deinit(self: *Net) !void { 179 | // delete created veth pairs 180 | // deleting one will automatically remove the other 181 | const veth0_name = try std.mem.concat(self.allocator, u8, &.{ "veth0-", self.cid }); 182 | defer self.allocator.free(veth0_name); 183 | var veth0 = try self.nl.linkGet(.{ .name = veth0_name }); 184 | defer veth0.deinit(); 185 | try self.nl.linkDel(veth0.msg.header.index); 186 | 187 | self.nl.deinit(); 188 | } 189 | --------------------------------------------------------------------------------