From a93a0a4f92edae5ec8a8638bb2511e16daf51aa9 Mon Sep 17 00:00:00 2001
From: Jose Colon Rodriguez <jec.rod@gmail.com>
Date: Fri, 26 Jan 2024 19:44:34 -0400
Subject: [PATCH] Initial TigerBeetle specifics strip. All 7 tests passing.

---
 .gitignore                     |   13 +
 build.zig                      |   18 +
 src/bounded_array.zig          |   76 ++
 src/fifo.zig                   |  152 ++++
 src/io.zig                     |   30 +
 src/io/darwin.zig              |  823 ++++++++++++++++++++++
 src/io/linux.zig               | 1126 +++++++++++++++++++++++++++++
 src/io/windows.zig             | 1209 ++++++++++++++++++++++++++++++++
 src/low_level_hash_vectors.zig |  142 ++++
 src/stdx.zig                   |  728 +++++++++++++++++++
 src/test.zig                   |  654 +++++++++++++++++
 src/time.zig                   |  112 +++
 12 files changed, 5083 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 build.zig
 create mode 100644 src/bounded_array.zig
 create mode 100644 src/fifo.zig
 create mode 100644 src/io.zig
 create mode 100644 src/io/darwin.zig
 create mode 100644 src/io/linux.zig
 create mode 100644 src/io/windows.zig
 create mode 100644 src/low_level_hash_vectors.zig
 create mode 100644 src/stdx.zig
 create mode 100644 src/test.zig
 create mode 100644 src/time.zig

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c04a0ab
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+# This file is for zig-specific build artifacts.
+# If you have OS-specific or editor-specific files to ignore,
+# such as *.swp or .DS_Store, put those in your global
+# ~/.gitignore and put this in your ~/.gitconfig:
+#
+# [core]
+#     excludesfile = ~/.gitignore
+#
+# Cheers!
+# -andrewrk
+
+zig-cache/
+zig-out/
diff --git a/build.zig b/build.zig
new file mode 100644
index 0000000..9ab5f74
--- /dev/null
+++ b/build.zig
@@ -0,0 +1,18 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+    const target = b.standardTargetOptions(.{});
+    const optimize = b.standardOptimizeOption(.{});
+
+    _ = b.addModule("io", .{ .source_file = .{ .path = "src/io.zig" } });
+
+    const main_tests = b.addTest(.{
+        .root_source_file = .{ .path = "src/test.zig" },
+        .target = target,
+        .optimize = optimize,
+    });
+
+    const run_main_tests = b.addRunArtifact(main_tests);
+    const test_step = b.step("test", "Run library tests");
+    test_step.dependOn(&run_main_tests.step);
+}
diff --git a/src/bounded_array.zig b/src/bounded_array.zig
new file mode 100644
index 0000000..c288093
--- /dev/null
+++ b/src/bounded_array.zig
@@ -0,0 +1,76 @@
+const std = @import("std");
+const assert = std.debug.assert;
+
+/// A version of standard `BoundedArray` with TigerBeetle-idiomatic APIs.
+///
+/// See <https://github.com/tigerbeetle/tigerbeetle/pull/1121> for the original reason for
+/// wrapping --- we need an `fn count` which returns an `usize`, instead of potentially much smaller
+/// type which stores the length internally.
+pub fn BoundedArray(comptime T: type, comptime capacity: usize) type {
+    const Inner = @import("std").BoundedArray(T, capacity); // smuggle the std version past tidy
+
+    return struct {
+        inner: Inner = Inner{},
+
+        const Self = @This();
+
+        pub inline fn from_slice(items: []const T) error{Overflow}!Self {
+            return .{ .inner = try Inner.fromSlice(items) };
+        }
+
+        pub inline fn count(array: *const Self) usize {
+            return array.inner.len;
+        }
+
+        /// Returns count of elements in this BoundedArray in the specified integer types,
+        /// checking at compile time that it indeed can represent the length.
+        pub inline fn count_as(array: *const Self, comptime Int: type) Int {
+            return array.inner.len;
+        }
+
+        pub inline fn full(self: Self) bool {
+            return self.count() == capacity;
+        }
+
+        pub inline fn empty(self: Self) bool {
+            return self.count() == 0;
+        }
+
+        pub inline fn get(array: *const Self, index: usize) T {
+            return array.inner.get(index);
+        }
+
+        pub inline fn slice(array: *Self) []T {
+            return array.inner.slice();
+        }
+
+        pub inline fn const_slice(array: *const Self) []const T {
+            return array.inner.constSlice();
+        }
+
+        pub inline fn add_one_assume_capacity(array: *Self) *T {
+            return array.inner.addOneAssumeCapacity();
+        }
+
+        pub inline fn append_assume_capacity(array: *Self, item: T) void {
+            array.inner.appendAssumeCapacity(item);
+        }
+
+        pub inline fn writer(self: *Self) Inner.Writer {
+            return self.inner.writer();
+        }
+
+        pub inline fn swap_remove(array: *Self, index: usize) T {
+            return array.inner.swapRemove(index);
+        }
+
+        pub inline fn truncate(array: *Self, count_new: usize) void {
+            assert(count_new <= array.count());
+            array.inner.len = @intCast(count_new); // can't overflow due to check above.
+        }
+
+        pub inline fn clear(array: *Self) void {
+            array.inner.len = 0;
+        }
+    };
+}
diff --git a/src/fifo.zig b/src/fifo.zig
new file mode 100644
index 0000000..22e13b2
--- /dev/null
+++ b/src/fifo.zig
@@ -0,0 +1,152 @@
+const std = @import("std");
+const assert = std.debug.assert;
+
+/// An intrusive first in/first out linked list.
+/// The element type T must have a field called "next" of type ?*T
+pub fn FIFO(comptime T: type) type {
+    return struct {
+        const Self = @This();
+
+        in: ?*T = null,
+        out: ?*T = null,
+        count: u64 = 0,
+        // This should only be null if you're sure we'll never want to monitor `count`.
+        name: ?[]const u8,
+
+        pub fn push(self: *Self, elem: *T) void {
+            assert(elem.next == null);
+            if (self.in) |in| {
+                in.next = elem;
+                self.in = elem;
+            } else {
+                assert(self.out == null);
+                self.in = elem;
+                self.out = elem;
+            }
+            self.count += 1;
+        }
+
+        pub fn pop(self: *Self) ?*T {
+            const ret = self.out orelse return null;
+            self.out = ret.next;
+            ret.next = null;
+            if (self.in == ret) self.in = null;
+            self.count -= 1;
+            return ret;
+        }
+
+        pub fn peek_last(self: Self) ?*T {
+            return self.in;
+        }
+
+        pub fn peek(self: Self) ?*T {
+            return self.out;
+        }
+
+        pub fn empty(self: Self) bool {
+            return self.peek() == null;
+        }
+
+        /// Returns whether the linked list contains the given *exact element* (pointer comparison).
+        pub fn contains(self: *const Self, elem_needle: *const T) bool {
+            var iterator = self.peek();
+            while (iterator) |elem| : (iterator = elem.next) {
+                if (elem == elem_needle) return true;
+            }
+            return false;
+        }
+
+        /// Remove an element from the FIFO. Asserts that the element is
+        /// in the FIFO. This operation is O(N), if this is done often you
+        /// probably want a different data structure.
+        pub fn remove(self: *Self, to_remove: *T) void {
+            if (to_remove == self.out) {
+                _ = self.pop();
+                return;
+            }
+            var it = self.out;
+            while (it) |elem| : (it = elem.next) {
+                if (to_remove == elem.next) {
+                    if (to_remove == self.in) self.in = elem;
+                    elem.next = to_remove.next;
+                    to_remove.next = null;
+                    self.count -= 1;
+                    break;
+                }
+            } else unreachable;
+        }
+
+        pub fn reset(self: *Self) void {
+            self.* = .{ .name = self.name };
+        }
+    };
+}
+
+test "FIFO: push/pop/peek/remove/empty" {
+    const testing = @import("std").testing;
+
+    const Foo = struct { next: ?*@This() = null };
+
+    var one: Foo = .{};
+    var two: Foo = .{};
+    var three: Foo = .{};
+
+    var fifo: FIFO(Foo) = .{ .name = null };
+    try testing.expect(fifo.empty());
+
+    fifo.push(&one);
+    try testing.expect(!fifo.empty());
+    try testing.expectEqual(@as(?*Foo, &one), fifo.peek());
+    try testing.expect(fifo.contains(&one));
+    try testing.expect(!fifo.contains(&two));
+    try testing.expect(!fifo.contains(&three));
+
+    fifo.push(&two);
+    fifo.push(&three);
+    try testing.expect(!fifo.empty());
+    try testing.expectEqual(@as(?*Foo, &one), fifo.peek());
+    try testing.expect(fifo.contains(&one));
+    try testing.expect(fifo.contains(&two));
+    try testing.expect(fifo.contains(&three));
+
+    fifo.remove(&one);
+    try testing.expect(!fifo.empty());
+    try testing.expectEqual(@as(?*Foo, &two), fifo.pop());
+    try testing.expectEqual(@as(?*Foo, &three), fifo.pop());
+    try testing.expectEqual(@as(?*Foo, null), fifo.pop());
+    try testing.expect(fifo.empty());
+    try testing.expect(!fifo.contains(&one));
+    try testing.expect(!fifo.contains(&two));
+    try testing.expect(!fifo.contains(&three));
+
+    fifo.push(&one);
+    fifo.push(&two);
+    fifo.push(&three);
+    fifo.remove(&two);
+    try testing.expect(!fifo.empty());
+    try testing.expectEqual(@as(?*Foo, &one), fifo.pop());
+    try testing.expectEqual(@as(?*Foo, &three), fifo.pop());
+    try testing.expectEqual(@as(?*Foo, null), fifo.pop());
+    try testing.expect(fifo.empty());
+
+    fifo.push(&one);
+    fifo.push(&two);
+    fifo.push(&three);
+    fifo.remove(&three);
+    try testing.expect(!fifo.empty());
+    try testing.expectEqual(@as(?*Foo, &one), fifo.pop());
+    try testing.expect(!fifo.empty());
+    try testing.expectEqual(@as(?*Foo, &two), fifo.pop());
+    try testing.expect(fifo.empty());
+    try testing.expectEqual(@as(?*Foo, null), fifo.pop());
+    try testing.expect(fifo.empty());
+
+    fifo.push(&one);
+    fifo.push(&two);
+    fifo.remove(&two);
+    fifo.push(&three);
+    try testing.expectEqual(@as(?*Foo, &one), fifo.pop());
+    try testing.expectEqual(@as(?*Foo, &three), fifo.pop());
+    try testing.expectEqual(@as(?*Foo, null), fifo.pop());
+    try testing.expect(fifo.empty());
+}
diff --git a/src/io.zig b/src/io.zig
new file mode 100644
index 0000000..f4bf877
--- /dev/null
+++ b/src/io.zig
@@ -0,0 +1,30 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+const os = std.os;
+
+const FIFO = @import("fifo.zig").FIFO;
+const IO_Linux = @import("io/linux.zig").IO;
+const IO_Darwin = @import("io/darwin.zig").IO;
+const IO_Windows = @import("io/windows.zig").IO;
+
+pub const IO = switch (builtin.target.os.tag) {
+    .linux => IO_Linux,
+    .windows => IO_Windows,
+    .macos, .tvos, .watchos, .ios => IO_Darwin,
+    else => @compileError("IO is not supported for platform"),
+};
+
+pub fn bufferLimit(buffer_len: usize) usize {
+    // Linux limits how much may be written in a `pwrite()/pread()` call, which is `0x7ffff000` on
+    // both 64-bit and 32-bit systems, due to using a signed C int as the return value, as well as
+    // stuffing the errno codes into the last `4096` values.
+    // Darwin limits writes to `0x7fffffff` bytes, more than that returns `EINVAL`.
+    // The corresponding POSIX limit is `std.math.maxInt(isize)`.
+    const limit = switch (builtin.target.os.tag) {
+        .linux => 0x7ffff000,
+        .macos, .ios, .watchos, .tvos => std.math.maxInt(i32),
+        else => std.math.maxInt(isize),
+    };
+    return @min(limit, buffer_len);
+}
diff --git a/src/io/darwin.zig b/src/io/darwin.zig
new file mode 100644
index 0000000..8e4058d
--- /dev/null
+++ b/src/io/darwin.zig
@@ -0,0 +1,823 @@
+const std = @import("std");
+const os = std.os;
+const mem = std.mem;
+const assert = std.debug.assert;
+const log = std.log.scoped(.io);
+
+const FIFO = @import("../fifo.zig").FIFO;
+const Time = @import("../time.zig").Time;
+const bufferLimit = @import("../io.zig").bufferLimit;
+
+const sector_size = 4096;
+const direct_io = true;
+
+pub const IO = struct {
+    kq: os.fd_t,
+    time: Time = .{},
+    io_inflight: usize = 0,
+    timeouts: FIFO(Completion) = .{ .name = "io_timeouts" },
+    completed: FIFO(Completion) = .{ .name = "io_completed" },
+    io_pending: FIFO(Completion) = .{ .name = "io_pending" },
+
+    pub fn init(entries: u12, flags: u32) !IO {
+        _ = entries;
+        _ = flags;
+
+        const kq = try os.kqueue();
+        assert(kq > -1);
+        return IO{ .kq = kq };
+    }
+
+    pub fn deinit(self: *IO) void {
+        assert(self.kq > -1);
+        os.close(self.kq);
+        self.kq = -1;
+    }
+
+    /// Pass all queued submissions to the kernel and peek for completions.
+    pub fn tick(self: *IO) !void {
+        return self.flush(false);
+    }
+
+    /// Pass all queued submissions to the kernel and run for `nanoseconds`.
+    /// The `nanoseconds` argument is a u63 to allow coercion to the i64 used
+    /// in the __kernel_timespec struct.
+    pub fn run_for_ns(self: *IO, nanoseconds: u63) !void {
+        var timed_out = false;
+        var completion: Completion = undefined;
+        const on_timeout = struct {
+            fn callback(
+                timed_out_ptr: *bool,
+                _completion: *Completion,
+                result: TimeoutError!void,
+            ) void {
+                _ = _completion;
+                _ = result catch unreachable;
+
+                timed_out_ptr.* = true;
+            }
+        }.callback;
+
+        // Submit a timeout which sets the timed_out value to true to terminate the loop below.
+        self.timeout(
+            *bool,
+            &timed_out,
+            on_timeout,
+            &completion,
+            nanoseconds,
+        );
+
+        // Loop until our timeout completion is processed above, which sets timed_out to true.
+        // LLVM shouldn't be able to cache timed_out's value here since its address escapes above.
+        while (!timed_out) {
+            try self.flush(true);
+        }
+    }
+
+    fn flush(self: *IO, wait_for_completions: bool) !void {
+        var io_pending = self.io_pending.peek();
+        var events: [256]os.Kevent = undefined;
+
+        // Check timeouts and fill events with completions in io_pending
+        // (they will be submitted through kevent).
+        // Timeouts are expired here and possibly pushed to the completed queue.
+        const next_timeout = self.flush_timeouts();
+        const change_events = self.flush_io(&events, &io_pending);
+
+        // Only call kevent() if we need to submit io events or if we need to wait for completions.
+        if (change_events > 0 or self.completed.empty()) {
+            // Zero timeouts for kevent() implies a non-blocking poll
+            var ts = std.mem.zeroes(os.timespec);
+
+            // We need to wait (not poll) on kevent if there's nothing to submit or complete.
+            // We should never wait indefinitely (timeout_ptr = null for kevent) given:
+            // - tick() is non-blocking (wait_for_completions = false)
+            // - run_for_ns() always submits a timeout
+            if (change_events == 0 and self.completed.empty()) {
+                if (wait_for_completions) {
+                    const timeout_ns = next_timeout orelse @panic("kevent() blocking forever");
+                    ts.tv_nsec = @as(@TypeOf(ts.tv_nsec), @intCast(timeout_ns % std.time.ns_per_s));
+                    ts.tv_sec = @as(@TypeOf(ts.tv_sec), @intCast(timeout_ns / std.time.ns_per_s));
+                } else if (self.io_inflight == 0) {
+                    return;
+                }
+            }
+
+            const new_events = try os.kevent(
+                self.kq,
+                events[0..change_events],
+                events[0..events.len],
+                &ts,
+            );
+
+            // Mark the io events submitted only after kevent() successfully processed them
+            self.io_pending.out = io_pending;
+            if (io_pending == null) {
+                self.io_pending.in = null;
+            }
+
+            self.io_inflight += change_events;
+            self.io_inflight -= new_events;
+
+            for (events[0..new_events]) |event| {
+                const completion = @as(*Completion, @ptrFromInt(event.udata));
+                completion.next = null;
+                self.completed.push(completion);
+            }
+        }
+
+        var completed = self.completed;
+        self.completed.reset();
+        while (completed.pop()) |completion| {
+            (completion.callback)(self, completion);
+        }
+    }
+
+    fn flush_io(_: *IO, events: []os.Kevent, io_pending_top: *?*Completion) usize {
+        for (events, 0..) |*event, flushed| {
+            const completion = io_pending_top.* orelse return flushed;
+            io_pending_top.* = completion.next;
+
+            const event_info = switch (completion.operation) {
+                .accept => |op| [2]c_int{ op.socket, os.system.EVFILT_READ },
+                .connect => |op| [2]c_int{ op.socket, os.system.EVFILT_WRITE },
+                .read => |op| [2]c_int{ op.fd, os.system.EVFILT_READ },
+                .write => |op| [2]c_int{ op.fd, os.system.EVFILT_WRITE },
+                .recv => |op| [2]c_int{ op.socket, os.system.EVFILT_READ },
+                .send => |op| [2]c_int{ op.socket, os.system.EVFILT_WRITE },
+                else => @panic("invalid completion operation queued for io"),
+            };
+
+            event.* = .{
+                .ident = @as(u32, @intCast(event_info[0])),
+                .filter = @as(i16, @intCast(event_info[1])),
+                .flags = os.system.EV_ADD | os.system.EV_ENABLE | os.system.EV_ONESHOT,
+                .fflags = 0,
+                .data = 0,
+                .udata = @intFromPtr(completion),
+            };
+        }
+        return events.len;
+    }
+
+    fn flush_timeouts(self: *IO) ?u64 {
+        var min_timeout: ?u64 = null;
+        var timeouts: ?*Completion = self.timeouts.peek();
+        while (timeouts) |completion| {
+            timeouts = completion.next;
+
+            // NOTE: We could cache `now` above the loop but monotonic() should be cheap to call.
+            const now = self.time.monotonic();
+            const expires = completion.operation.timeout.expires;
+
+            // NOTE: remove() could be O(1) here with a doubly-linked-list
+            // since we know the previous Completion.
+            if (now >= expires) {
+                self.timeouts.remove(completion);
+                self.completed.push(completion);
+                continue;
+            }
+
+            const timeout_ns = expires - now;
+            if (min_timeout) |min_ns| {
+                min_timeout = @min(min_ns, timeout_ns);
+            } else {
+                min_timeout = timeout_ns;
+            }
+        }
+        return min_timeout;
+    }
+
+    /// This struct holds the data needed for a single IO operation
+    pub const Completion = struct {
+        next: ?*Completion,
+        context: ?*anyopaque,
+        callback: *const fn (*IO, *Completion) void,
+        operation: Operation,
+    };
+
+    const Operation = union(enum) {
+        accept: struct {
+            socket: os.socket_t,
+        },
+        close: struct {
+            fd: os.fd_t,
+        },
+        connect: struct {
+            socket: os.socket_t,
+            address: std.net.Address,
+            initiated: bool,
+        },
+        read: struct {
+            fd: os.fd_t,
+            buf: [*]u8,
+            len: u32,
+            offset: u64,
+        },
+        recv: struct {
+            socket: os.socket_t,
+            buf: [*]u8,
+            len: u32,
+        },
+        send: struct {
+            socket: os.socket_t,
+            buf: [*]const u8,
+            len: u32,
+        },
+        timeout: struct {
+            expires: u64,
+        },
+        write: struct {
+            fd: os.fd_t,
+            buf: [*]const u8,
+            len: u32,
+            offset: u64,
+        },
+    };
+
+    fn submit(
+        self: *IO,
+        context: anytype,
+        comptime callback: anytype,
+        completion: *Completion,
+        comptime operation_tag: std.meta.Tag(Operation),
+        operation_data: anytype,
+        comptime OperationImpl: type,
+    ) void {
+        const onCompleteFn = struct {
+            fn onComplete(io: *IO, _completion: *Completion) void {
+                // Perform the actual operaton
+                const op_data = &@field(_completion.operation, @tagName(operation_tag));
+                const result = OperationImpl.do_operation(op_data);
+
+                // Requeue onto io_pending if error.WouldBlock
+                switch (operation_tag) {
+                    .accept, .connect, .read, .write, .send, .recv => {
+                        _ = result catch |err| switch (err) {
+                            error.WouldBlock => {
+                                _completion.next = null;
+                                io.io_pending.push(_completion);
+                                return;
+                            },
+                            else => {},
+                        };
+                    },
+                    else => {},
+                }
+
+                // Complete the Completion
+
+                return callback(
+                    @ptrCast(@alignCast(_completion.context)),
+                    _completion,
+                    result,
+                );
+            }
+        }.onComplete;
+
+        completion.* = .{
+            .next = null,
+            .context = context,
+            .callback = onCompleteFn,
+            .operation = @unionInit(Operation, @tagName(operation_tag), operation_data),
+        };
+
+        switch (operation_tag) {
+            .timeout => self.timeouts.push(completion),
+            else => self.completed.push(completion),
+        }
+    }
+
+    pub const AcceptError = os.AcceptError || os.SetSockOptError;
+
+    pub fn accept(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: AcceptError!os.socket_t,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .accept,
+            .{
+                .socket = socket,
+            },
+            struct {
+                fn do_operation(op: anytype) AcceptError!os.socket_t {
+                    const fd = try os.accept(
+                        op.socket,
+                        null,
+                        null,
+                        os.SOCK.NONBLOCK | os.SOCK.CLOEXEC,
+                    );
+                    errdefer os.close(fd);
+
+                    // Darwin doesn't support os.MSG_NOSIGNAL to avoid getting SIGPIPE on socket send().
+                    // Instead, it uses the SO_NOSIGPIPE socket option which does the same for all send()s.
+                    os.setsockopt(
+                        fd,
+                        os.SOL.SOCKET,
+                        os.SO.NOSIGPIPE,
+                        &mem.toBytes(@as(c_int, 1)),
+                    ) catch |err| return switch (err) {
+                        error.TimeoutTooBig => unreachable,
+                        error.PermissionDenied => error.NetworkSubsystemFailed,
+                        error.AlreadyConnected => error.NetworkSubsystemFailed,
+                        error.InvalidProtocolOption => error.ProtocolFailure,
+                        else => |e| e,
+                    };
+
+                    return fd;
+                }
+            },
+        );
+    }
+
+    pub const CloseError = error{
+        FileDescriptorInvalid,
+        DiskQuota,
+        InputOutput,
+        NoSpaceLeft,
+    } || os.UnexpectedError;
+
+    pub fn close(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: CloseError!void,
+        ) void,
+        completion: *Completion,
+        fd: os.fd_t,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .close,
+            .{
+                .fd = fd,
+            },
+            struct {
+                fn do_operation(op: anytype) CloseError!void {
+                    return switch (os.errno(os.system.close(op.fd))) {
+                        .SUCCESS => {},
+                        .BADF => error.FileDescriptorInvalid,
+                        .INTR => {}, // A success, see https://github.com/ziglang/zig/issues/2425
+                        .IO => error.InputOutput,
+                        else => |errno| os.unexpectedErrno(errno),
+                    };
+                }
+            },
+        );
+    }
+
+    pub const ConnectError = os.ConnectError;
+
+    pub fn connect(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: ConnectError!void,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+        address: std.net.Address,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .connect,
+            .{
+                .socket = socket,
+                .address = address,
+                .initiated = false,
+            },
+            struct {
+                fn do_operation(op: anytype) ConnectError!void {
+                    // Don't call connect after being rescheduled by io_pending as it gives EISCONN.
+                    // Instead, check the socket error to see if has been connected successfully.
+                    const result = switch (op.initiated) {
+                        true => os.getsockoptError(op.socket),
+                        else => os.connect(op.socket, &op.address.any, op.address.getOsSockLen()),
+                    };
+
+                    op.initiated = true;
+                    return result;
+                }
+            },
+        );
+    }
+
+    pub const ReadError = error{
+        WouldBlock,
+        NotOpenForReading,
+        ConnectionResetByPeer,
+        Alignment,
+        InputOutput,
+        IsDir,
+        SystemResources,
+        Unseekable,
+        ConnectionTimedOut,
+    } || os.UnexpectedError;
+
+    pub fn read(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: ReadError!usize,
+        ) void,
+        completion: *Completion,
+        fd: os.fd_t,
+        buffer: []u8,
+        offset: u64,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .read,
+            .{
+                .fd = fd,
+                .buf = buffer.ptr,
+                .len = @as(u32, @intCast(bufferLimit(buffer.len))),
+                .offset = offset,
+            },
+            struct {
+                fn do_operation(op: anytype) ReadError!usize {
+                    while (true) {
+                        const rc = os.system.pread(
+                            op.fd,
+                            op.buf,
+                            op.len,
+                            @as(isize, @bitCast(op.offset)),
+                        );
+                        return switch (os.errno(rc)) {
+                            .SUCCESS => @as(usize, @intCast(rc)),
+                            .INTR => continue,
+                            .AGAIN => error.WouldBlock,
+                            .BADF => error.NotOpenForReading,
+                            .CONNRESET => error.ConnectionResetByPeer,
+                            .FAULT => unreachable,
+                            .INVAL => error.Alignment,
+                            .IO => error.InputOutput,
+                            .ISDIR => error.IsDir,
+                            .NOBUFS => error.SystemResources,
+                            .NOMEM => error.SystemResources,
+                            .NXIO => error.Unseekable,
+                            .OVERFLOW => error.Unseekable,
+                            .SPIPE => error.Unseekable,
+                            .TIMEDOUT => error.ConnectionTimedOut,
+                            else => |err| os.unexpectedErrno(err),
+                        };
+                    }
+                }
+            },
+        );
+    }
+
+    pub const RecvError = os.RecvFromError;
+
+    pub fn recv(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: RecvError!usize,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+        buffer: []u8,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .recv,
+            .{
+                .socket = socket,
+                .buf = buffer.ptr,
+                .len = @as(u32, @intCast(bufferLimit(buffer.len))),
+            },
+            struct {
+                fn do_operation(op: anytype) RecvError!usize {
+                    return os.recv(op.socket, op.buf[0..op.len], 0);
+                }
+            },
+        );
+    }
+
+    pub const SendError = os.SendError;
+
+    pub fn send(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: SendError!usize,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+        buffer: []const u8,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .send,
+            .{
+                .socket = socket,
+                .buf = buffer.ptr,
+                .len = @as(u32, @intCast(bufferLimit(buffer.len))),
+            },
+            struct {
+                fn do_operation(op: anytype) SendError!usize {
+                    return os.send(op.socket, op.buf[0..op.len], 0);
+                }
+            },
+        );
+    }
+
+    pub const TimeoutError = error{Canceled} || os.UnexpectedError;
+
+    pub fn timeout(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: TimeoutError!void,
+        ) void,
+        completion: *Completion,
+        nanoseconds: u63,
+    ) void {
+        // Special case a zero timeout as a yield.
+        if (nanoseconds == 0) {
+            completion.* = .{
+                .next = null,
+                .context = context,
+                .operation = undefined,
+                .callback = struct {
+                    fn on_complete(_io: *IO, _completion: *Completion) void {
+                        _ = _io;
+                        const _context: Context = @ptrCast(@alignCast(_completion.context));
+                        callback(_context, _completion, {});
+                    }
+                }.on_complete,
+            };
+
+            self.completed.push(completion);
+            return;
+        }
+
+        self.submit(
+            context,
+            callback,
+            completion,
+            .timeout,
+            .{
+                .expires = self.time.monotonic() + nanoseconds,
+            },
+            struct {
+                fn do_operation(_: anytype) TimeoutError!void {
+                    return; // timeouts don't have errors for now
+                }
+            },
+        );
+    }
+
+    pub const WriteError = os.PWriteError;
+
+    pub fn write(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: WriteError!usize,
+        ) void,
+        completion: *Completion,
+        fd: os.fd_t,
+        buffer: []const u8,
+        offset: u64,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .write,
+            .{
+                .fd = fd,
+                .buf = buffer.ptr,
+                .len = @as(u32, @intCast(bufferLimit(buffer.len))),
+                .offset = offset,
+            },
+            struct {
+                fn do_operation(op: anytype) WriteError!usize {
+                    return os.pwrite(op.fd, op.buf[0..op.len], op.offset);
+                }
+            },
+        );
+    }
+
+    pub const INVALID_SOCKET = -1;
+
+    /// Creates a socket that can be used for async operations with the IO instance.
+    pub fn open_socket(self: *IO, family: u32, sock_type: u32, protocol: u32) !os.socket_t {
+        _ = self;
+
+        const fd = try os.socket(family, sock_type | os.SOCK.NONBLOCK, protocol);
+        errdefer os.closeSocket(fd);
+
+        // darwin doesn't support os.MSG_NOSIGNAL, but instead a socket option to avoid SIGPIPE.
+        try os.setsockopt(fd, os.SOL.SOCKET, os.SO.NOSIGPIPE, &mem.toBytes(@as(c_int, 1)));
+        return fd;
+    }
+
+    /// Opens a directory with read only access.
+    pub fn open_dir(dir_path: []const u8) !os.fd_t {
+        return os.open(dir_path, os.O.CLOEXEC | os.O.RDONLY, 0);
+    }
+
+    pub const INVALID_FILE: os.fd_t = -1;
+
+    /// Opens or creates a journal file:
+    /// - For reading and writing.
+    /// - For Direct I/O (required on darwin).
+    /// - Obtains an advisory exclusive lock to the file descriptor.
+    /// - Allocates the file contiguously on disk if this is supported by the file system.
+    /// - Ensures that the file data (and file inode in the parent directory) is durable on disk.
+    ///   The caller is responsible for ensuring that the parent directory inode is durable.
+    /// - Verifies that the file size matches the expected file size before returning.
+    pub fn open_file(
+        dir_fd: os.fd_t,
+        relative_path: []const u8,
+        size: u64,
+        method: enum { create, create_or_open, open },
+    ) !os.fd_t {
+        assert(relative_path.len > 0);
+        assert(size % sector_size == 0);
+
+        // TODO Use O_EXCL when opening as a block device to obtain a mandatory exclusive lock.
+        // This is much stronger than an advisory exclusive lock, and is required on some platforms.
+
+        // Opening with O_DSYNC is essential for both durability and correctness.
+        // O_DSYNC enables us to omit fsync() calls in the data plane, since we sync to the disk on every write.
+        var flags: u32 = os.O.CLOEXEC | os.O.RDWR | os.O.DSYNC;
+        var mode: os.mode_t = 0;
+
+        // TODO Document this and investigate whether this is in fact correct to set here.
+        if (@hasDecl(os.O, "LARGEFILE")) flags |= os.O.LARGEFILE;
+
+        switch (method) {
+            .create => {
+                flags |= os.O.CREAT;
+                flags |= os.O.EXCL;
+                mode = 0o666;
+                log.info("creating \"{s}\"...", .{relative_path});
+            },
+            .create_or_open => {
+                flags |= os.O.CREAT;
+                mode = 0o666;
+                log.info("opening or creating \"{s}\"...", .{relative_path});
+            },
+            .open => {
+                log.info("opening \"{s}\"...", .{relative_path});
+            },
+        }
+
+        // This is critical as we rely on O_DSYNC for fsync() whenever we write to the file:
+        assert((flags & os.O.DSYNC) > 0);
+
+        // Be careful with openat(2): "If pathname is absolute, then dirfd is ignored." (man page)
+        assert(!std.fs.path.isAbsolute(relative_path));
+        const fd = try os.openat(dir_fd, relative_path, flags, mode);
+        // TODO Return a proper error message when the path exists or does not exist (init/start).
+        errdefer os.close(fd);
+
+        // TODO Check that the file is actually a file.
+
+        // On darwin assume that Direct I/O is always supported.
+        // Use F_NOCACHE to disable the page cache as O_DIRECT doesn't exist.
+        if (direct_io) {
+            _ = try os.fcntl(fd, os.F.NOCACHE, 1);
+        }
+
+        // Obtain an advisory exclusive lock that works only if all processes actually use flock().
+        // LOCK_NB means that we want to fail the lock without waiting if another process has it.
+        os.flock(fd, os.LOCK.EX | os.LOCK.NB) catch |err| switch (err) {
+            error.WouldBlock => @panic("another process holds the data file lock"),
+            else => return err,
+        };
+
+        // Ask the file system to allocate contiguous sectors for the file (if possible):
+        // If the file system does not support `fallocate()`, then this could mean more seeks or a
+        // panic if we run out of disk space (ENOSPC).
+        if (method == .create) try fs_allocate(fd, size);
+
+        // The best fsync strategy is always to fsync before reading because this prevents us from
+        // making decisions on data that was never durably written by a previously crashed process.
+        // We therefore always fsync when we open the path, also to wait for any pending O_DSYNC.
+        // Thanks to Alex Miller from FoundationDB for diving into our source and pointing this out.
+        try fs_sync(fd);
+
+        // We fsync the parent directory to ensure that the file inode is durably written.
+        // The caller is responsible for the parent directory inode stored under the grandparent.
+        // We always do this when opening because we don't know if this was done before crashing.
+        try fs_sync(dir_fd);
+
+        // TODO Document that `size` is now `data_file_size_min` from `main.zig`.
+        const stat = try os.fstat(fd);
+        if (stat.size < size) @panic("data file inode size was truncated or corrupted");
+
+        return fd;
+    }
+
+    /// Darwin's fsync() syscall does not flush past the disk cache. We must use F_FULLFSYNC instead.
+    /// https://twitter.com/TigerBeetleDB/status/1422491736224436225
+    fn fs_sync(fd: os.fd_t) !void {
+        _ = os.fcntl(fd, os.F.FULLFSYNC, 1) catch return os.fsync(fd);
+    }
+
+    /// Allocates a file contiguously using fallocate() if supported.
+    /// Alternatively, writes to the last sector so that at least the file size is correct.
+    fn fs_allocate(fd: os.fd_t, size: u64) !void {
+        log.info("allocating {}...", .{std.fmt.fmtIntSizeBin(size)});
+
+        // Darwin doesn't have fallocate() but we can simulate it using fcntl()s.
+        //
+        // https://stackoverflow.com/a/11497568
+        // https://api.kde.org/frameworks/kcoreaddons/html/posix__fallocate__mac_8h_source.html
+        // http://hg.mozilla.org/mozilla-central/file/3d846420a907/xpcom/glue/FileUtils.cpp#l61
+
+        const F_ALLOCATECONTIG = 0x2; // Allocate contiguous space.
+        const F_ALLOCATEALL = 0x4; // Allocate all or nothing.
+        const F_PEOFPOSMODE = 3; // Use relative offset from the seek pos mode.
+        const fstore_t = extern struct {
+            fst_flags: c_uint,
+            fst_posmode: c_int,
+            fst_offset: os.off_t,
+            fst_length: os.off_t,
+            fst_bytesalloc: os.off_t,
+        };
+
+        var store = fstore_t{
+            .fst_flags = F_ALLOCATECONTIG | F_ALLOCATEALL,
+            .fst_posmode = F_PEOFPOSMODE,
+            .fst_offset = 0,
+            .fst_length = @as(os.off_t, @intCast(size)),
+            .fst_bytesalloc = 0,
+        };
+
+        // Try to pre-allocate contiguous space and fall back to default non-contiguous.
+        var res = os.system.fcntl(fd, os.F.PREALLOCATE, @intFromPtr(&store));
+        if (os.errno(res) != .SUCCESS) {
+            store.fst_flags = F_ALLOCATEALL;
+            res = os.system.fcntl(fd, os.F.PREALLOCATE, @intFromPtr(&store));
+        }
+
+        switch (os.errno(res)) {
+            .SUCCESS => {},
+            .ACCES => unreachable, // F_SETLK or F_SETSIZE of F_WRITEBOOTSTRAP
+            .BADF => return error.FileDescriptorInvalid,
+            .DEADLK => unreachable, // F_SETLKW
+            .INTR => unreachable, // F_SETLKW
+            .INVAL => return error.ArgumentsInvalid, // for F_PREALLOCATE (offset invalid)
+            .MFILE => unreachable, // F_DUPFD or F_DUPED
+            .NOLCK => unreachable, // F_SETLK or F_SETLKW
+            .OVERFLOW => return error.FileTooBig,
+            .SRCH => unreachable, // F_SETOWN
+            .OPNOTSUPP => return error.OperationNotSupported, // not reported but need same error union
+            else => |errno| return os.unexpectedErrno(errno),
+        }
+
+        // Now actually perform the allocation.
+        return os.ftruncate(fd, size) catch |err| switch (err) {
+            error.AccessDenied => error.PermissionDenied,
+            else => |e| e,
+        };
+    }
+};
diff --git a/src/io/linux.zig b/src/io/linux.zig
new file mode 100644
index 0000000..eefb08c
--- /dev/null
+++ b/src/io/linux.zig
@@ -0,0 +1,1126 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const os = std.os;
+const linux = os.linux;
+const IO_Uring = linux.IO_Uring;
+const io_uring_cqe = linux.io_uring_cqe;
+const io_uring_sqe = linux.io_uring_sqe;
+const log = std.log.scoped(.io);
+
+const stdx = @import("../stdx.zig");
+const FIFO = @import("../fifo.zig").FIFO;
+const bufferLimit = @import("../io.zig").bufferLimit;
+const parse_dirty_semver = stdx.parse_dirty_semver;
+
+const direct_io = true;
+const direct_io_required = true;
+const sector_size = 4096;
+
+pub const IO = struct {
+    ring: IO_Uring,
+
+    /// Operations not yet submitted to the kernel and waiting on available space in the
+    /// submission queue.
+    unqueued: FIFO(Completion) = .{ .name = "io_unqueued" },
+
+    /// Completions that are ready to have their callbacks run.
+    completed: FIFO(Completion) = .{ .name = "io_completed" },
+
+    ios_queued: u64 = 0,
+    ios_in_kernel: u64 = 0,
+
+    pub fn init(entries: u12, flags: u32) !IO {
+        // Detect the linux version to ensure that we support all io_uring ops used.
+        const uts = std.os.uname();
+        const version = try parse_dirty_semver(&uts.release);
+        if (version.order(std.SemanticVersion{ .major = 5, .minor = 5, .patch = 0 }) == .lt) {
+            @panic("Linux kernel 5.5 or greater is required for io_uring OP_ACCEPT");
+        }
+
+        return IO{ .ring = try IO_Uring.init(entries, flags) };
+    }
+
+    pub fn deinit(self: *IO) void {
+        self.ring.deinit();
+    }
+
+    /// Pass all queued submissions to the kernel and peek for completions.
+    pub fn tick(self: *IO) !void {
+        // We assume that all timeouts submitted by `run_for_ns()` will be reaped by `run_for_ns()`
+        // and that `tick()` and `run_for_ns()` cannot be run concurrently.
+        // Therefore `timeouts` here will never be decremented and `etime` will always be false.
+        var timeouts: usize = 0;
+        var etime = false;
+
+        try self.flush(0, &timeouts, &etime);
+        assert(etime == false);
+
+        // Flush any SQEs that were queued while running completion callbacks in `flush()`:
+        // This is an optimization to avoid delaying submissions until the next tick.
+        // At the same time, we do not flush any ready CQEs since SQEs may complete synchronously.
+        // We guard against an io_uring_enter() syscall if we know we do not have any queued SQEs.
+        // We cannot use `self.ring.sq_ready()` here since this counts flushed and unflushed SQEs.
+        const queued = self.ring.sq.sqe_tail -% self.ring.sq.sqe_head;
+        if (queued > 0) {
+            try self.flush_submissions(0, &timeouts, &etime);
+            assert(etime == false);
+        }
+    }
+
+    /// Pass all queued submissions to the kernel and run for `nanoseconds`.
+    /// The `nanoseconds` argument is a u63 to allow coercion to the i64 used
+    /// in the kernel_timespec struct.
+    pub fn run_for_ns(self: *IO, nanoseconds: u63) !void {
+        // We must use the same clock source used by io_uring (CLOCK_MONOTONIC) since we specify the
+        // timeout below as an absolute value. Otherwise, we may deadlock if the clock sources are
+        // dramatically different. Any kernel that supports io_uring will support CLOCK_MONOTONIC.
+        var current_ts: os.timespec = undefined;
+        os.clock_gettime(os.CLOCK.MONOTONIC, &current_ts) catch unreachable;
+        // The absolute CLOCK_MONOTONIC time after which we may return from this function:
+        const timeout_ts: os.linux.kernel_timespec = .{
+            .tv_sec = current_ts.tv_sec,
+            .tv_nsec = current_ts.tv_nsec + nanoseconds,
+        };
+        var timeouts: usize = 0;
+        var etime = false;
+        while (!etime) {
+            const timeout_sqe = self.ring.get_sqe() catch blk: {
+                // The submission queue is full, so flush submissions to make space:
+                try self.flush_submissions(0, &timeouts, &etime);
+                break :blk self.ring.get_sqe() catch unreachable;
+            };
+            // Submit an absolute timeout that will be canceled if any other SQE completes first:
+            linux.io_uring_prep_timeout(timeout_sqe, &timeout_ts, 1, os.linux.IORING_TIMEOUT_ABS);
+            timeout_sqe.user_data = 0;
+            timeouts += 1;
+
+            // We don't really want to count this timeout as an io,
+            // but it's tricky to track separately.
+            self.ios_queued += 1;
+
+            // The amount of time this call will block is bounded by the timeout we just submitted:
+            try self.flush(1, &timeouts, &etime);
+        }
+        // Reap any remaining timeouts, which reference the timespec in the current stack frame.
+        // The busy loop here is required to avoid a potential deadlock, as the kernel determines
+        // when the timeouts are pushed to the completion queue, not us.
+        while (timeouts > 0) _ = try self.flush_completions(0, &timeouts, &etime);
+    }
+
+    fn flush(self: *IO, wait_nr: u32, timeouts: *usize, etime: *bool) !void {
+        // Flush any queued SQEs and reuse the same syscall to wait for completions if required:
+        try self.flush_submissions(wait_nr, timeouts, etime);
+        // We can now just peek for any CQEs without waiting and without another syscall:
+        try self.flush_completions(0, timeouts, etime);
+
+        // The SQE array is empty from flush_submissions(). Fill it up with unqueued completions.
+        // This runs before `self.completed` is flushed below to prevent new IO from reserving SQE
+        // slots and potentially starving those in `self.unqueued`.
+        // Loop over a copy to avoid an infinite loop of `enqueue()` re-adding to `self.unqueued`.
+        {
+            var copy = self.unqueued;
+            self.unqueued.reset();
+            while (copy.pop()) |completion| self.enqueue(completion);
+        }
+
+        // Run completions only after all completions have been flushed:
+        // Loop until all completions are processed. Calls to complete() may queue more work
+        // and extend the duration of the loop, but this is fine as it 1) executes completions
+        // that become ready without going through another syscall from flush_submissions() and
+        // 2) potentially queues more SQEs to take advantage more of the next flush_submissions().
+        while (self.completed.pop()) |completion| completion.complete();
+
+        // At this point, unqueued could have completions either by 1) those who didn't get an SQE
+        // during the popping of unqueued or 2) completion.complete() which start new IO. These
+        // unqueued completions will get priority to acquiring SQEs on the next flush().
+    }
+
+    fn flush_completions(self: *IO, wait_nr: u32, timeouts: *usize, etime: *bool) !void {
+        var cqes: [256]io_uring_cqe = undefined;
+        var wait_remaining = wait_nr;
+        while (true) {
+            // Guard against waiting indefinitely (if there are too few requests inflight),
+            // especially if this is not the first time round the loop:
+            const completed = self.ring.copy_cqes(&cqes, wait_remaining) catch |err| switch (err) {
+                error.SignalInterrupt => continue,
+                else => return err,
+            };
+            if (completed > wait_remaining) wait_remaining = 0 else wait_remaining -= completed;
+            for (cqes[0..completed]) |cqe| {
+                self.ios_in_kernel -= 1;
+
+                if (cqe.user_data == 0) {
+                    timeouts.* -= 1;
+                    // We are only done if the timeout submitted was completed due to time, not if
+                    // it was completed due to the completion of an event, in which case `cqe.res`
+                    // would be 0. It is possible for multiple timeout operations to complete at the
+                    // same time if the nanoseconds value passed to `run_for_ns()` is very short.
+                    if (-cqe.res == @intFromEnum(os.E.TIME)) etime.* = true;
+                    continue;
+                }
+                const completion = @as(*Completion, @ptrFromInt(@as(usize, @intCast(cqe.user_data))));
+                completion.result = cqe.res;
+                // We do not run the completion here (instead appending to a linked list) to avoid:
+                // * recursion through `flush_submissions()` and `flush_completions()`,
+                // * unbounded stack usage, and
+                // * confusing stack traces.
+                self.completed.push(completion);
+            }
+
+            if (completed < cqes.len) break;
+        }
+    }
+
+    fn flush_submissions(self: *IO, wait_nr: u32, timeouts: *usize, etime: *bool) !void {
+        while (true) {
+            const submitted = self.ring.submit_and_wait(wait_nr) catch |err| switch (err) {
+                error.SignalInterrupt => continue,
+                // Wait for some completions and then try again:
+                // See https://github.com/axboe/liburing/issues/281 re: error.SystemResources.
+                // Be careful also that copy_cqes() will flush before entering to wait (it does):
+                // https://github.com/axboe/liburing/commit/35c199c48dfd54ad46b96e386882e7ac341314c5
+                error.CompletionQueueOvercommitted, error.SystemResources => {
+                    try self.flush_completions(1, timeouts, etime);
+                    continue;
+                },
+                else => return err,
+            };
+
+            self.ios_queued -= submitted;
+            self.ios_in_kernel += submitted;
+
+            break;
+        }
+    }
+
+    fn enqueue(self: *IO, completion: *Completion) void {
+        const sqe = self.ring.get_sqe() catch |err| switch (err) {
+            error.SubmissionQueueFull => {
+                self.unqueued.push(completion);
+                return;
+            },
+        };
+        completion.prep(sqe);
+
+        self.ios_queued += 1;
+    }
+
+    /// This struct holds the data needed for a single io_uring operation
+    pub const Completion = struct {
+        io: *IO,
+        result: i32 = undefined,
+        next: ?*Completion = null,
+        operation: Operation,
+        context: ?*anyopaque,
+        callback: *const fn (context: ?*anyopaque, completion: *Completion, result: *const anyopaque) void,
+
+        fn prep(completion: *Completion, sqe: *io_uring_sqe) void {
+            switch (completion.operation) {
+                .accept => |*op| {
+                    linux.io_uring_prep_accept(
+                        sqe,
+                        op.socket,
+                        &op.address,
+                        &op.address_size,
+                        os.SOCK.CLOEXEC,
+                    );
+                },
+                .close => |op| {
+                    linux.io_uring_prep_close(sqe, op.fd);
+                },
+                .connect => |*op| {
+                    linux.io_uring_prep_connect(
+                        sqe,
+                        op.socket,
+                        &op.address.any,
+                        op.address.getOsSockLen(),
+                    );
+                },
+                .read => |op| {
+                    linux.io_uring_prep_read(
+                        sqe,
+                        op.fd,
+                        op.buffer[0..bufferLimit(op.buffer.len)],
+                        op.offset,
+                    );
+                },
+                .recv => |op| {
+                    linux.io_uring_prep_recv(sqe, op.socket, op.buffer, os.MSG.NOSIGNAL);
+                },
+                .send => |op| {
+                    linux.io_uring_prep_send(sqe, op.socket, op.buffer, os.MSG.NOSIGNAL);
+                },
+                .timeout => |*op| {
+                    linux.io_uring_prep_timeout(sqe, &op.timespec, 0, 0);
+                },
+                .write => |op| {
+                    linux.io_uring_prep_write(
+                        sqe,
+                        op.fd,
+                        op.buffer[0..bufferLimit(op.buffer.len)],
+                        op.offset,
+                    );
+                },
+            }
+            sqe.user_data = @intFromPtr(completion);
+        }
+
+        fn complete(completion: *Completion) void {
+            switch (completion.operation) {
+                .accept => {
+                    const result: anyerror!os.socket_t = blk: {
+                        if (completion.result < 0) {
+                            const err = switch (@as(os.E, @enumFromInt(-completion.result))) {
+                                .INTR => {
+                                    completion.io.enqueue(completion);
+                                    return;
+                                },
+                                .AGAIN => error.WouldBlock,
+                                .BADF => error.FileDescriptorInvalid,
+                                .CONNABORTED => error.ConnectionAborted,
+                                .FAULT => unreachable,
+                                .INVAL => error.SocketNotListening,
+                                .MFILE => error.ProcessFdQuotaExceeded,
+                                .NFILE => error.SystemFdQuotaExceeded,
+                                .NOBUFS => error.SystemResources,
+                                .NOMEM => error.SystemResources,
+                                .NOTSOCK => error.FileDescriptorNotASocket,
+                                .OPNOTSUPP => error.OperationNotSupported,
+                                .PERM => error.PermissionDenied,
+                                .PROTO => error.ProtocolFailure,
+                                else => |errno| os.unexpectedErrno(errno),
+                            };
+                            break :blk err;
+                        } else {
+                            break :blk @as(os.socket_t, @intCast(completion.result));
+                        }
+                    };
+                    call_callback(completion, &result);
+                },
+                .close => {
+                    const result: anyerror!void = blk: {
+                        if (completion.result < 0) {
+                            const err = switch (@as(os.E, @enumFromInt(-completion.result))) {
+                                .INTR => {}, // A success, see https://github.com/ziglang/zig/issues/2425
+                                .BADF => error.FileDescriptorInvalid,
+                                .DQUOT => error.DiskQuota,
+                                .IO => error.InputOutput,
+                                .NOSPC => error.NoSpaceLeft,
+                                else => |errno| os.unexpectedErrno(errno),
+                            };
+                            break :blk err;
+                        } else {
+                            assert(completion.result == 0);
+                        }
+                    };
+                    call_callback(completion, &result);
+                },
+                .connect => {
+                    const result: anyerror!void = blk: {
+                        if (completion.result < 0) {
+                            const err = switch (@as(os.E, @enumFromInt(-completion.result))) {
+                                .INTR => {
+                                    completion.io.enqueue(completion);
+                                    return;
+                                },
+                                .ACCES => error.AccessDenied,
+                                .ADDRINUSE => error.AddressInUse,
+                                .ADDRNOTAVAIL => error.AddressNotAvailable,
+                                .AFNOSUPPORT => error.AddressFamilyNotSupported,
+                                .AGAIN, .INPROGRESS => error.WouldBlock,
+                                .ALREADY => error.OpenAlreadyInProgress,
+                                .BADF => error.FileDescriptorInvalid,
+                                .CONNREFUSED => error.ConnectionRefused,
+                                .CONNRESET => error.ConnectionResetByPeer,
+                                .FAULT => unreachable,
+                                .ISCONN => error.AlreadyConnected,
+                                .NETUNREACH => error.NetworkUnreachable,
+                                .NOENT => error.FileNotFound,
+                                .NOTSOCK => error.FileDescriptorNotASocket,
+                                .PERM => error.PermissionDenied,
+                                .PROTOTYPE => error.ProtocolNotSupported,
+                                .TIMEDOUT => error.ConnectionTimedOut,
+                                else => |errno| os.unexpectedErrno(errno),
+                            };
+                            break :blk err;
+                        } else {
+                            assert(completion.result == 0);
+                        }
+                    };
+                    call_callback(completion, &result);
+                },
+                .read => {
+                    const result: anyerror!usize = blk: {
+                        if (completion.result < 0) {
+                            const err = switch (@as(os.E, @enumFromInt(-completion.result))) {
+                                .INTR => {
+                                    completion.io.enqueue(completion);
+                                    return;
+                                },
+                                .AGAIN => error.WouldBlock,
+                                .BADF => error.NotOpenForReading,
+                                .CONNRESET => error.ConnectionResetByPeer,
+                                .FAULT => unreachable,
+                                .INVAL => error.Alignment,
+                                .IO => error.InputOutput,
+                                .ISDIR => error.IsDir,
+                                .NOBUFS => error.SystemResources,
+                                .NOMEM => error.SystemResources,
+                                .NXIO => error.Unseekable,
+                                .OVERFLOW => error.Unseekable,
+                                .SPIPE => error.Unseekable,
+                                .TIMEDOUT => error.ConnectionTimedOut,
+                                else => |errno| os.unexpectedErrno(errno),
+                            };
+                            break :blk err;
+                        } else {
+                            break :blk @as(usize, @intCast(completion.result));
+                        }
+                    };
+                    call_callback(completion, &result);
+                },
+                .recv => {
+                    const result: anyerror!usize = blk: {
+                        if (completion.result < 0) {
+                            const err = switch (@as(os.E, @enumFromInt(-completion.result))) {
+                                .INTR => {
+                                    completion.io.enqueue(completion);
+                                    return;
+                                },
+                                .AGAIN => error.WouldBlock,
+                                .BADF => error.FileDescriptorInvalid,
+                                .CONNREFUSED => error.ConnectionRefused,
+                                .FAULT => unreachable,
+                                .INVAL => unreachable,
+                                .NOMEM => error.SystemResources,
+                                .NOTCONN => error.SocketNotConnected,
+                                .NOTSOCK => error.FileDescriptorNotASocket,
+                                .CONNRESET => error.ConnectionResetByPeer,
+                                .TIMEDOUT => error.ConnectionTimedOut,
+                                .OPNOTSUPP => error.OperationNotSupported,
+                                else => |errno| os.unexpectedErrno(errno),
+                            };
+                            break :blk err;
+                        } else {
+                            break :blk @as(usize, @intCast(completion.result));
+                        }
+                    };
+                    call_callback(completion, &result);
+                },
+                .send => {
+                    const result: anyerror!usize = blk: {
+                        if (completion.result < 0) {
+                            const err = switch (@as(os.E, @enumFromInt(-completion.result))) {
+                                .INTR => {
+                                    completion.io.enqueue(completion);
+                                    return;
+                                },
+                                .ACCES => error.AccessDenied,
+                                .AGAIN => error.WouldBlock,
+                                .ALREADY => error.FastOpenAlreadyInProgress,
+                                .AFNOSUPPORT => error.AddressFamilyNotSupported,
+                                .BADF => error.FileDescriptorInvalid,
+                                .CONNRESET => error.ConnectionResetByPeer,
+                                .DESTADDRREQ => unreachable,
+                                .FAULT => unreachable,
+                                .INVAL => unreachable,
+                                .ISCONN => unreachable,
+                                .MSGSIZE => error.MessageTooBig,
+                                .NOBUFS => error.SystemResources,
+                                .NOMEM => error.SystemResources,
+                                .NOTCONN => error.SocketNotConnected,
+                                .NOTSOCK => error.FileDescriptorNotASocket,
+                                .OPNOTSUPP => error.OperationNotSupported,
+                                .PIPE => error.BrokenPipe,
+                                .TIMEDOUT => error.ConnectionTimedOut,
+                                else => |errno| os.unexpectedErrno(errno),
+                            };
+                            break :blk err;
+                        } else {
+                            break :blk @as(usize, @intCast(completion.result));
+                        }
+                    };
+                    call_callback(completion, &result);
+                },
+                .timeout => {
+                    assert(completion.result < 0);
+                    const result: anyerror!void = switch (@as(os.E, @enumFromInt(-completion.result))) {
+                        .INTR => {
+                            completion.io.enqueue(completion);
+                            return;
+                        },
+                        .CANCELED => error.Canceled,
+                        .TIME => {}, // A success.
+                        else => |errno| os.unexpectedErrno(errno),
+                    };
+                    call_callback(completion, &result);
+                },
+                .write => {
+                    const result: anyerror!usize = blk: {
+                        if (completion.result < 0) {
+                            const err = switch (@as(os.E, @enumFromInt(-completion.result))) {
+                                .INTR => {
+                                    completion.io.enqueue(completion);
+                                    return;
+                                },
+                                .AGAIN => error.WouldBlock,
+                                .BADF => error.NotOpenForWriting,
+                                .DESTADDRREQ => error.NotConnected,
+                                .DQUOT => error.DiskQuota,
+                                .FAULT => unreachable,
+                                .FBIG => error.FileTooBig,
+                                .INVAL => error.Alignment,
+                                .IO => error.InputOutput,
+                                .NOSPC => error.NoSpaceLeft,
+                                .NXIO => error.Unseekable,
+                                .OVERFLOW => error.Unseekable,
+                                .PERM => error.AccessDenied,
+                                .PIPE => error.BrokenPipe,
+                                .SPIPE => error.Unseekable,
+                                else => |errno| os.unexpectedErrno(errno),
+                            };
+                            break :blk err;
+                        } else {
+                            break :blk @as(usize, @intCast(completion.result));
+                        }
+                    };
+                    call_callback(completion, &result);
+                },
+            }
+        }
+    };
+
+    fn call_callback(
+        completion: *Completion,
+        result: *const anyopaque,
+    ) void {
+        completion.callback(completion.context, completion, result);
+    }
+
+    /// This union encodes the set of operations supported as well as their arguments.
+    const Operation = union(enum) {
+        accept: struct {
+            socket: os.socket_t,
+            address: os.sockaddr = undefined,
+            address_size: os.socklen_t = @sizeOf(os.sockaddr),
+        },
+        close: struct {
+            fd: os.fd_t,
+        },
+        connect: struct {
+            socket: os.socket_t,
+            address: std.net.Address,
+        },
+        read: struct {
+            fd: os.fd_t,
+            buffer: []u8,
+            offset: u64,
+        },
+        recv: struct {
+            socket: os.socket_t,
+            buffer: []u8,
+        },
+        send: struct {
+            socket: os.socket_t,
+            buffer: []const u8,
+        },
+        timeout: struct {
+            timespec: os.linux.kernel_timespec,
+        },
+        write: struct {
+            fd: os.fd_t,
+            buffer: []const u8,
+            offset: u64,
+        },
+    };
+
+    pub const AcceptError = error{
+        WouldBlock,
+        FileDescriptorInvalid,
+        ConnectionAborted,
+        SocketNotListening,
+        ProcessFdQuotaExceeded,
+        SystemFdQuotaExceeded,
+        SystemResources,
+        FileDescriptorNotASocket,
+        OperationNotSupported,
+        PermissionDenied,
+        ProtocolFailure,
+    } || os.UnexpectedError;
+
+    pub fn accept(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: AcceptError!os.socket_t,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+    ) void {
+        completion.* = .{
+            .io = self,
+            .context = context,
+            .callback = struct {
+                fn wrapper(ctx: ?*anyopaque, comp: *Completion, res: *const anyopaque) void {
+                    callback(
+                        @ptrCast(@alignCast(ctx)),
+                        comp,
+                        @as(*const AcceptError!os.socket_t, @ptrCast(@alignCast(res))).*,
+                    );
+                }
+            }.wrapper,
+            .operation = .{
+                .accept = .{
+                    .socket = socket,
+                    .address = undefined,
+                    .address_size = @sizeOf(os.sockaddr),
+                },
+            },
+        };
+        self.enqueue(completion);
+    }
+
+    pub const CloseError = error{
+        FileDescriptorInvalid,
+        DiskQuota,
+        InputOutput,
+        NoSpaceLeft,
+    } || os.UnexpectedError;
+
+    pub fn close(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: CloseError!void,
+        ) void,
+        completion: *Completion,
+        fd: os.fd_t,
+    ) void {
+        completion.* = .{
+            .io = self,
+            .context = context,
+            .callback = struct {
+                fn wrapper(ctx: ?*anyopaque, comp: *Completion, res: *const anyopaque) void {
+                    callback(
+                        @ptrCast(@alignCast(ctx)),
+                        comp,
+                        @as(*const CloseError!void, @ptrCast(@alignCast(res))).*,
+                    );
+                }
+            }.wrapper,
+            .operation = .{
+                .close = .{ .fd = fd },
+            },
+        };
+        self.enqueue(completion);
+    }
+
+    pub const ConnectError = error{
+        AccessDenied,
+        AddressInUse,
+        AddressNotAvailable,
+        AddressFamilyNotSupported,
+        WouldBlock,
+        OpenAlreadyInProgress,
+        FileDescriptorInvalid,
+        ConnectionRefused,
+        AlreadyConnected,
+        NetworkUnreachable,
+        FileNotFound,
+        FileDescriptorNotASocket,
+        PermissionDenied,
+        ProtocolNotSupported,
+        ConnectionTimedOut,
+        SystemResources,
+    } || os.UnexpectedError;
+
+    pub fn connect(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: ConnectError!void,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+        address: std.net.Address,
+    ) void {
+        completion.* = .{
+            .io = self,
+            .context = context,
+            .callback = struct {
+                fn wrapper(ctx: ?*anyopaque, comp: *Completion, res: *const anyopaque) void {
+                    callback(
+                        @ptrCast(@alignCast(ctx)),
+                        comp,
+                        @as(*const ConnectError!void, @ptrCast(@alignCast(res))).*,
+                    );
+                }
+            }.wrapper,
+            .operation = .{
+                .connect = .{
+                    .socket = socket,
+                    .address = address,
+                },
+            },
+        };
+        self.enqueue(completion);
+    }
+
+    pub const ReadError = error{
+        WouldBlock,
+        NotOpenForReading,
+        ConnectionResetByPeer,
+        Alignment,
+        InputOutput,
+        IsDir,
+        SystemResources,
+        Unseekable,
+        ConnectionTimedOut,
+    } || os.UnexpectedError;
+
+    pub fn read(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: ReadError!usize,
+        ) void,
+        completion: *Completion,
+        fd: os.fd_t,
+        buffer: []u8,
+        offset: u64,
+    ) void {
+        completion.* = .{
+            .io = self,
+            .context = context,
+            .callback = struct {
+                fn wrapper(ctx: ?*anyopaque, comp: *Completion, res: *const anyopaque) void {
+                    callback(
+                        @ptrCast(@alignCast(ctx)),
+                        comp,
+                        @as(*const ReadError!usize, @ptrCast(@alignCast(res))).*,
+                    );
+                }
+            }.wrapper,
+            .operation = .{
+                .read = .{
+                    .fd = fd,
+                    .buffer = buffer,
+                    .offset = offset,
+                },
+            },
+        };
+        self.enqueue(completion);
+    }
+
+    pub const RecvError = error{
+        WouldBlock,
+        FileDescriptorInvalid,
+        ConnectionRefused,
+        SystemResources,
+        SocketNotConnected,
+        FileDescriptorNotASocket,
+        ConnectionTimedOut,
+        OperationNotSupported,
+    } || os.UnexpectedError;
+
+    pub fn recv(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: RecvError!usize,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+        buffer: []u8,
+    ) void {
+        completion.* = .{
+            .io = self,
+            .context = context,
+            .callback = struct {
+                fn wrapper(ctx: ?*anyopaque, comp: *Completion, res: *const anyopaque) void {
+                    callback(
+                        @ptrCast(@alignCast(ctx)),
+                        comp,
+                        @as(*const RecvError!usize, @ptrCast(@alignCast(res))).*,
+                    );
+                }
+            }.wrapper,
+            .operation = .{
+                .recv = .{
+                    .socket = socket,
+                    .buffer = buffer,
+                },
+            },
+        };
+        self.enqueue(completion);
+    }
+
+    pub const SendError = error{
+        AccessDenied,
+        WouldBlock,
+        FastOpenAlreadyInProgress,
+        AddressFamilyNotSupported,
+        FileDescriptorInvalid,
+        ConnectionResetByPeer,
+        MessageTooBig,
+        SystemResources,
+        SocketNotConnected,
+        FileDescriptorNotASocket,
+        OperationNotSupported,
+        BrokenPipe,
+        ConnectionTimedOut,
+    } || os.UnexpectedError;
+
+    pub fn send(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: SendError!usize,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+        buffer: []const u8,
+    ) void {
+        completion.* = .{
+            .io = self,
+            .context = context,
+            .callback = struct {
+                fn wrapper(ctx: ?*anyopaque, comp: *Completion, res: *const anyopaque) void {
+                    callback(
+                        @ptrCast(@alignCast(ctx)),
+                        comp,
+                        @as(*const SendError!usize, @ptrCast(@alignCast(res))).*,
+                    );
+                }
+            }.wrapper,
+            .operation = .{
+                .send = .{
+                    .socket = socket,
+                    .buffer = buffer,
+                },
+            },
+        };
+        self.enqueue(completion);
+    }
+
+    pub const TimeoutError = error{Canceled} || os.UnexpectedError;
+
+    pub fn timeout(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: TimeoutError!void,
+        ) void,
+        completion: *Completion,
+        nanoseconds: u63,
+    ) void {
+        completion.* = .{
+            .io = self,
+            .context = context,
+            .callback = struct {
+                fn wrapper(ctx: ?*anyopaque, comp: *Completion, res: *const anyopaque) void {
+                    callback(
+                        @ptrCast(@alignCast(ctx)),
+                        comp,
+                        @as(*const TimeoutError!void, @ptrCast(@alignCast(res))).*,
+                    );
+                }
+            }.wrapper,
+            .operation = .{
+                .timeout = .{
+                    .timespec = .{ .tv_sec = 0, .tv_nsec = nanoseconds },
+                },
+            },
+        };
+
+        // Special case a zero timeout as a yield.
+        if (nanoseconds == 0) {
+            completion.result = -@as(i32, @intCast(@intFromEnum(std.os.E.TIME)));
+            self.completed.push(completion);
+            return;
+        }
+
+        self.enqueue(completion);
+    }
+
+    pub const WriteError = error{
+        WouldBlock,
+        NotOpenForWriting,
+        NotConnected,
+        DiskQuota,
+        FileTooBig,
+        Alignment,
+        InputOutput,
+        NoSpaceLeft,
+        Unseekable,
+        AccessDenied,
+        BrokenPipe,
+    } || os.UnexpectedError;
+
+    pub fn write(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: WriteError!usize,
+        ) void,
+        completion: *Completion,
+        fd: os.fd_t,
+        buffer: []const u8,
+        offset: u64,
+    ) void {
+        completion.* = .{
+            .io = self,
+            .context = context,
+            .callback = struct {
+                fn wrapper(ctx: ?*anyopaque, comp: *Completion, res: *const anyopaque) void {
+                    callback(
+                        @ptrCast(@alignCast(ctx)),
+                        comp,
+                        @as(*const WriteError!usize, @ptrCast(@alignCast(res))).*,
+                    );
+                }
+            }.wrapper,
+            .operation = .{
+                .write = .{
+                    .fd = fd,
+                    .buffer = buffer,
+                    .offset = offset,
+                },
+            },
+        };
+        self.enqueue(completion);
+    }
+
+    pub const INVALID_SOCKET = -1;
+
+    /// Creates a socket that can be used for async operations with the IO instance.
+    pub fn open_socket(self: *IO, family: u32, sock_type: u32, protocol: u32) !os.socket_t {
+        _ = self;
+        return os.socket(family, sock_type, protocol);
+    }
+
+    /// Opens a directory with read only access.
+    pub fn open_dir(dir_path: []const u8) !os.fd_t {
+        return os.open(dir_path, os.O.CLOEXEC | os.O.RDONLY, 0);
+    }
+
+    pub const INVALID_FILE: os.fd_t = -1;
+
+    /// Opens or creates a journal file:
+    /// - For reading and writing.
+    /// - For Direct I/O (if possible in development mode, but required in production mode).
+    /// - Obtains an advisory exclusive lock to the file descriptor.
+    /// - Allocates the file contiguously on disk if this is supported by the file system.
+    /// - Ensures that the file data (and file inode in the parent directory) is durable on disk.
+    ///   The caller is responsible for ensuring that the parent directory inode is durable.
+    /// - Verifies that the file size matches the expected file size before returning.
+    pub fn open_file(
+        dir_fd: os.fd_t,
+        relative_path: []const u8,
+        size: u64,
+        method: enum { create, create_or_open, open },
+    ) !os.fd_t {
+        assert(relative_path.len > 0);
+        assert(size % sector_size == 0);
+
+        // TODO Use O_EXCL when opening as a block device to obtain a mandatory exclusive lock.
+        // This is much stronger than an advisory exclusive lock, and is required on some platforms.
+
+        var flags: u32 = os.O.CLOEXEC | os.O.RDWR | os.O.DSYNC;
+        var mode: os.mode_t = 0;
+
+        // TODO Document this and investigate whether this is in fact correct to set here.
+        if (@hasDecl(os.O, "LARGEFILE")) flags |= os.O.LARGEFILE;
+
+        var direct_io_supported = false;
+        var dir_on_tmpfs = try fs_is_tmpfs(dir_fd);
+
+        if (dir_on_tmpfs) {
+            log.warn("tmpfs is not durable, and your data will be lost on reboot", .{});
+        }
+
+        // Special case. tmpfs doesn't support Direct I/O. Normally we would panic here (see below)
+        // but being able to benchmark production workloads on tmpfs is very useful for removing
+        // disk speed from the equation.
+        if (direct_io and !dir_on_tmpfs) {
+            direct_io_supported = try fs_supports_direct_io(dir_fd);
+            if (direct_io_supported) {
+                flags |= os.O.DIRECT;
+            } else if (!direct_io_required) {
+                log.warn("file system does not support Direct I/O", .{});
+            } else {
+                // We require Direct I/O for safety to handle fsync failure correctly, and therefore
+                // panic in production if it is not supported.
+                @panic("file system does not support Direct I/O");
+            }
+        }
+
+        switch (method) {
+            .create => {
+                flags |= os.O.CREAT;
+                flags |= os.O.EXCL;
+                mode = 0o666;
+                log.info("creating \"{s}\"...", .{relative_path});
+            },
+            .create_or_open => {
+                flags |= os.O.CREAT;
+                mode = 0o666;
+                log.info("opening or creating \"{s}\"...", .{relative_path});
+            },
+            .open => {
+                log.info("opening \"{s}\"...", .{relative_path});
+            },
+        }
+
+        // This is critical as we rely on O_DSYNC for fsync() whenever we write to the file:
+        assert((flags & os.O.DSYNC) > 0);
+
+        // Be careful with openat(2): "If pathname is absolute, then dirfd is ignored." (man page)
+        assert(!std.fs.path.isAbsolute(relative_path));
+        const fd = try os.openat(dir_fd, relative_path, flags, mode);
+        // TODO Return a proper error message when the path exists or does not exist (init/start).
+        errdefer os.close(fd);
+
+        // TODO Check that the file is actually a file.
+
+        // Obtain an advisory exclusive lock that works only if all processes actually use flock().
+        // LOCK_NB means that we want to fail the lock without waiting if another process has it.
+        os.flock(fd, os.LOCK.EX | os.LOCK.NB) catch |err| switch (err) {
+            error.WouldBlock => @panic("another process holds the data file lock"),
+            else => return err,
+        };
+
+        // Ask the file system to allocate contiguous sectors for the file (if possible):
+        // If the file system does not support `fallocate()`, then this could mean more seeks or a
+        // panic if we run out of disk space (ENOSPC).
+        if (method == .create) {
+            log.info("allocating {}...", .{std.fmt.fmtIntSizeBin(size)});
+            fs_allocate(fd, size) catch |err| switch (err) {
+                error.OperationNotSupported => {
+                    log.warn("file system does not support fallocate(), an ENOSPC will panic", .{});
+                    log.info("allocating by writing to the last sector of the file instead...", .{});
+
+                    const sector: [sector_size]u8 align(sector_size) = [_]u8{0} ** sector_size;
+
+                    // Handle partial writes where the physical sector is less than a logical sector:
+                    const write_offset = size - sector.len;
+                    var written: usize = 0;
+                    while (written < sector.len) {
+                        written += try os.pwrite(fd, sector[written..], write_offset + written);
+                    }
+                },
+                else => |e| return e,
+            };
+        }
+
+        // The best fsync strategy is always to fsync before reading because this prevents us from
+        // making decisions on data that was never durably written by a previously crashed process.
+        // We therefore always fsync when we open the path, also to wait for any pending O_DSYNC.
+        // Thanks to Alex Miller from FoundationDB for diving into our source and pointing this out.
+        try os.fsync(fd);
+
+        // We fsync the parent directory to ensure that the file inode is durably written.
+        // The caller is responsible for the parent directory inode stored under the grandparent.
+        // We always do this when opening because we don't know if this was done before crashing.
+        try os.fsync(dir_fd);
+
+        const stat = try os.fstat(fd);
+        if (stat.size < size) @panic("data file inode size was truncated or corrupted");
+
+        return fd;
+    }
+
+    /// Detects whether the underlying file system for a given directory fd is tmpfs. This is used
+    /// to relax our Direct I/O check - running on tmpfs for benchmarking is useful.
+    fn fs_is_tmpfs(dir_fd: std.os.fd_t) !bool {
+        var statfs: stdx.StatFs = undefined;
+
+        while (true) {
+            const res = stdx.fstatfs(dir_fd, &statfs);
+            switch (os.linux.getErrno(res)) {
+                .SUCCESS => {
+                    return statfs.f_type == stdx.TmpfsMagic;
+                },
+                .INTR => continue,
+                else => |err| return os.unexpectedErrno(err),
+            }
+        }
+    }
+
+    /// Detects whether the underlying file system for a given directory fd supports Direct I/O.
+    /// Not all Linux file systems support `O_DIRECT`, e.g. a shared macOS volume.
+    fn fs_supports_direct_io(dir_fd: std.os.fd_t) !bool {
+        if (!@hasDecl(std.os.O, "DIRECT")) return false;
+
+        const path = "fs_supports_direct_io";
+        const dir = std.fs.Dir{ .fd = dir_fd };
+        const fd = try os.openatZ(dir_fd, path, os.O.CLOEXEC | os.O.CREAT | os.O.TRUNC, 0o666);
+        defer os.close(fd);
+        defer dir.deleteFile(path) catch {};
+
+        while (true) {
+            const res = os.linux.openat(dir_fd, path, os.O.CLOEXEC | os.O.RDONLY | os.O.DIRECT, 0);
+            switch (os.linux.getErrno(res)) {
+                .SUCCESS => {
+                    os.close(@as(os.fd_t, @intCast(res)));
+                    return true;
+                },
+                .INTR => continue,
+                .INVAL => return false,
+                else => |err| return os.unexpectedErrno(err),
+            }
+        }
+    }
+
+    /// Allocates a file contiguously using fallocate() if supported.
+    /// Alternatively, writes to the last sector so that at least the file size is correct.
+    fn fs_allocate(fd: os.fd_t, size: u64) !void {
+        const mode: i32 = 0;
+        const offset: i64 = 0;
+        const length = @as(i64, @intCast(size));
+
+        while (true) {
+            const rc = os.linux.fallocate(fd, mode, offset, length);
+            switch (os.linux.getErrno(rc)) {
+                .SUCCESS => return,
+                .BADF => return error.FileDescriptorInvalid,
+                .FBIG => return error.FileTooBig,
+                .INTR => continue,
+                .INVAL => return error.ArgumentsInvalid,
+                .IO => return error.InputOutput,
+                .NODEV => return error.NoDevice,
+                .NOSPC => return error.NoSpaceLeft,
+                .NOSYS => return error.SystemOutdated,
+                .OPNOTSUPP => return error.OperationNotSupported,
+                .PERM => return error.PermissionDenied,
+                .SPIPE => return error.Unseekable,
+                .TXTBSY => return error.FileBusy,
+                else => |errno| return os.unexpectedErrno(errno),
+            }
+        }
+    }
+};
diff --git a/src/io/windows.zig b/src/io/windows.zig
new file mode 100644
index 0000000..206b380
--- /dev/null
+++ b/src/io/windows.zig
@@ -0,0 +1,1209 @@
+const std = @import("std");
+const os = std.os;
+const assert = std.debug.assert;
+const log = std.log.scoped(.io);
+
+const FIFO = @import("../fifo.zig").FIFO;
+const Time = @import("../time.zig").Time;
+const bufferLimit = @import("../io.zig").bufferLimit;
+
+const sector_size = 4096;
+
+pub const IO = struct {
+    iocp: os.windows.HANDLE,
+    timer: Time = .{},
+    io_pending: usize = 0,
+    timeouts: FIFO(Completion) = .{ .name = "io_timeouts" },
+    completed: FIFO(Completion) = .{ .name = "io_completed" },
+
+    pub fn init(entries: u12, flags: u32) !IO {
+        _ = entries;
+        _ = flags;
+
+        _ = try os.windows.WSAStartup(2, 2);
+        errdefer os.windows.WSACleanup() catch unreachable;
+
+        const iocp = try os.windows.CreateIoCompletionPort(os.windows.INVALID_HANDLE_VALUE, null, 0, 0);
+        return IO{ .iocp = iocp };
+    }
+
+    pub fn deinit(self: *IO) void {
+        assert(self.iocp != os.windows.INVALID_HANDLE_VALUE);
+        os.windows.CloseHandle(self.iocp);
+        self.iocp = os.windows.INVALID_HANDLE_VALUE;
+
+        os.windows.WSACleanup() catch unreachable;
+    }
+
+    pub fn tick(self: *IO) !void {
+        return self.flush(.non_blocking);
+    }
+
+    pub fn run_for_ns(self: *IO, nanoseconds: u63) !void {
+        const Callback = struct {
+            fn on_timeout(timed_out: *bool, completion: *Completion, result: TimeoutError!void) void {
+                _ = result catch unreachable;
+                _ = completion;
+                timed_out.* = true;
+            }
+        };
+
+        var timed_out = false;
+        var completion: Completion = undefined;
+        self.timeout(*bool, &timed_out, Callback.on_timeout, &completion, nanoseconds);
+
+        while (!timed_out) {
+            try self.flush(.blocking);
+        }
+    }
+
+    const FlushMode = enum {
+        blocking,
+        non_blocking,
+    };
+
+    fn flush(self: *IO, mode: FlushMode) !void {
+        if (self.completed.empty()) {
+            // Compute how long to poll by flushing timeout completions.
+            // NOTE: this may push to completed queue
+            var timeout_ms: ?os.windows.DWORD = null;
+            if (self.flush_timeouts()) |expires_ns| {
+                // 0ns expires should have been completed not returned
+                assert(expires_ns != 0);
+                // Round up sub-millisecond expire times to the next millisecond
+                const expires_ms = (expires_ns + (std.time.ns_per_ms / 2)) / std.time.ns_per_ms;
+                // Saturating cast to DWORD milliseconds
+                const expires = std.math.cast(os.windows.DWORD, expires_ms) orelse std.math.maxInt(os.windows.DWORD);
+                // max DWORD is reserved for INFINITE so cap the cast at max - 1
+                timeout_ms = if (expires == os.windows.INFINITE) expires - 1 else expires;
+            }
+
+            // Poll for IO iff theres IO pending and flush_timeouts() found no ready completions
+            if (self.io_pending > 0 and self.completed.empty()) {
+                // In blocking mode, we're always waiting at least until the timeout by run_for_ns.
+                // In non-blocking mode, we shouldn't wait at all.
+                const io_timeout = switch (mode) {
+                    .blocking => timeout_ms orelse @panic("IO.flush blocking unbounded"),
+                    .non_blocking => 0,
+                };
+
+                var events: [64]os.windows.OVERLAPPED_ENTRY = undefined;
+                const num_events: u32 = os.windows.GetQueuedCompletionStatusEx(
+                    self.iocp,
+                    &events,
+                    io_timeout,
+                    false, // non-alertable wait
+                ) catch |err| switch (err) {
+                    error.Timeout => 0,
+                    error.Aborted => unreachable,
+                    else => |e| return e,
+                };
+
+                assert(self.io_pending >= num_events);
+                self.io_pending -= num_events;
+
+                for (events[0..num_events]) |event| {
+                    const raw_overlapped = event.lpOverlapped;
+                    const overlapped = @fieldParentPtr(Completion.Overlapped, "raw", raw_overlapped);
+                    const completion = overlapped.completion;
+                    completion.next = null;
+                    self.completed.push(completion);
+                }
+            }
+        }
+
+        // Dequeue and invoke all the completions currently ready.
+        // Must read all `completions` before invoking the callbacks
+        // as the callbacks could potentially submit more completions.
+        var completed = self.completed;
+        self.completed.reset();
+        while (completed.pop()) |completion| {
+            (completion.callback)(Completion.Context{
+                .io = self,
+                .completion = completion,
+            });
+        }
+    }
+
+    fn flush_timeouts(self: *IO) ?u64 {
+        var min_expires: ?u64 = null;
+        var current_time: ?u64 = null;
+        var timeouts: ?*Completion = self.timeouts.peek();
+
+        // iterate through the timeouts, returning min_expires at the end
+        while (timeouts) |completion| {
+            timeouts = completion.next;
+
+            // lazily get the current time
+            const now = current_time orelse self.timer.monotonic();
+            current_time = now;
+
+            // move the completion to completed if it expired
+            if (now >= completion.operation.timeout.deadline) {
+                self.timeouts.remove(completion);
+                self.completed.push(completion);
+                continue;
+            }
+
+            // if it's still waiting, update min_timeout
+            const expires = completion.operation.timeout.deadline - now;
+            if (min_expires) |current_min_expires| {
+                min_expires = @min(expires, current_min_expires);
+            } else {
+                min_expires = expires;
+            }
+        }
+
+        return min_expires;
+    }
+
+    /// This struct holds the data needed for a single IO operation
+    pub const Completion = struct {
+        next: ?*Completion,
+        context: ?*anyopaque,
+        callback: *const fn (Context) void,
+        operation: Operation,
+
+        const Context = struct {
+            io: *IO,
+            completion: *Completion,
+        };
+
+        const Overlapped = struct {
+            raw: os.windows.OVERLAPPED,
+            completion: *Completion,
+        };
+
+        const Transfer = struct {
+            socket: os.socket_t,
+            buf: os.windows.ws2_32.WSABUF,
+            overlapped: Overlapped,
+            pending: bool,
+        };
+
+        const Operation = union(enum) {
+            accept: struct {
+                overlapped: Overlapped,
+                listen_socket: os.socket_t,
+                client_socket: os.socket_t,
+                addr_buffer: [(@sizeOf(std.net.Address) + 16) * 2]u8 align(4),
+            },
+            connect: struct {
+                socket: os.socket_t,
+                address: std.net.Address,
+                overlapped: Overlapped,
+                pending: bool,
+            },
+            send: Transfer,
+            recv: Transfer,
+            read: struct {
+                fd: os.fd_t,
+                buf: [*]u8,
+                len: u32,
+                offset: u64,
+            },
+            write: struct {
+                fd: os.fd_t,
+                buf: [*]const u8,
+                len: u32,
+                offset: u64,
+            },
+            close: struct {
+                fd: os.fd_t,
+            },
+            timeout: struct {
+                deadline: u64,
+            },
+        };
+    };
+
+    fn submit(
+        self: *IO,
+        context: anytype,
+        comptime callback: anytype,
+        completion: *Completion,
+        comptime op_tag: std.meta.Tag(Completion.Operation),
+        op_data: anytype,
+        comptime OperationImpl: type,
+    ) void {
+        const Callback = struct {
+            fn onComplete(ctx: Completion.Context) void {
+                // Perform the operation and get the result
+                const data = &@field(ctx.completion.operation, @tagName(op_tag));
+                const result = OperationImpl.do_operation(ctx, data);
+
+                // For OVERLAPPED IO, error.WouldBlock assumes that it will be completed by IOCP.
+                switch (op_tag) {
+                    .accept, .read, .recv, .connect, .write, .send => {
+                        _ = result catch |err| switch (err) {
+                            error.WouldBlock => {
+                                ctx.io.io_pending += 1;
+                                return;
+                            },
+                            else => {},
+                        };
+                    },
+                    else => {},
+                }
+
+                // The completion is finally ready to invoke the callback
+                callback(
+                    @ptrCast(@alignCast(ctx.completion.context)),
+                    ctx.completion,
+                    result,
+                );
+            }
+        };
+
+        // Setup the completion with the callback wrapper above
+        completion.* = .{
+            .next = null,
+            .context = @as(?*anyopaque, @ptrCast(context)),
+            .callback = Callback.onComplete,
+            .operation = @unionInit(Completion.Operation, @tagName(op_tag), op_data),
+        };
+
+        // Submit the completion onto the right queue
+        switch (op_tag) {
+            .timeout => self.timeouts.push(completion),
+            else => self.completed.push(completion),
+        }
+    }
+
+    pub const AcceptError = os.AcceptError || os.SetSockOptError;
+
+    pub fn accept(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: AcceptError!os.socket_t,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .accept,
+            .{
+                .overlapped = undefined,
+                .listen_socket = socket,
+                .client_socket = INVALID_SOCKET,
+                .addr_buffer = undefined,
+            },
+            struct {
+                fn do_operation(ctx: Completion.Context, op: anytype) AcceptError!os.socket_t {
+                    var flags: os.windows.DWORD = undefined;
+                    var transferred: os.windows.DWORD = undefined;
+
+                    const rc = switch (op.client_socket) {
+                        // When first called, the client_socket is invalid so we start the op.
+                        INVALID_SOCKET => blk: {
+                            // Create the socket that will be used for accept.
+                            op.client_socket = ctx.io.open_socket(
+                                os.AF.INET,
+                                os.SOCK.STREAM,
+                                os.IPPROTO.TCP,
+                            ) catch |err| switch (err) {
+                                error.AddressFamilyNotSupported, error.ProtocolNotSupported => unreachable,
+                                else => |e| return e,
+                            };
+
+                            var sync_bytes_read: os.windows.DWORD = undefined;
+                            op.overlapped = .{
+                                .raw = std.mem.zeroes(os.windows.OVERLAPPED),
+                                .completion = ctx.completion,
+                            };
+
+                            // Start the asynchronous accept with the created socket.
+                            break :blk os.windows.ws2_32.AcceptEx(
+                                op.listen_socket,
+                                op.client_socket,
+                                &op.addr_buffer,
+                                0,
+                                @sizeOf(std.net.Address) + 16,
+                                @sizeOf(std.net.Address) + 16,
+                                &sync_bytes_read,
+                                &op.overlapped.raw,
+                            );
+                        },
+                        // Called after accept was started, so get the result
+                        else => os.windows.ws2_32.WSAGetOverlappedResult(
+                            op.listen_socket,
+                            &op.overlapped.raw,
+                            &transferred,
+                            os.windows.FALSE, // dont wait
+                            &flags,
+                        ),
+                    };
+
+                    // return the socket if we succeed in accepting.
+                    if (rc != os.windows.FALSE) {
+                        // enables getsockopt, setsockopt, getsockname, getpeername
+                        _ = os.windows.ws2_32.setsockopt(
+                            op.client_socket,
+                            os.windows.ws2_32.SOL.SOCKET,
+                            os.windows.ws2_32.SO.UPDATE_ACCEPT_CONTEXT,
+                            null,
+                            0,
+                        );
+
+                        return op.client_socket;
+                    }
+
+                    // destroy the client_socket we created if we get a non WouldBlock error
+                    errdefer |err| switch (err) {
+                        error.WouldBlock => {},
+                        else => {
+                            os.closeSocket(op.client_socket);
+                            op.client_socket = INVALID_SOCKET;
+                        },
+                    };
+
+                    return switch (os.windows.ws2_32.WSAGetLastError()) {
+                        .WSA_IO_PENDING, .WSAEWOULDBLOCK, .WSA_IO_INCOMPLETE => error.WouldBlock,
+                        .WSANOTINITIALISED => unreachable, // WSAStartup() was called
+                        .WSAENETDOWN => unreachable, // WinSock error
+                        .WSAENOTSOCK => error.FileDescriptorNotASocket,
+                        .WSAEOPNOTSUPP => error.OperationNotSupported,
+                        .WSA_INVALID_HANDLE => unreachable, // we dont use hEvent in OVERLAPPED
+                        .WSAEFAULT, .WSA_INVALID_PARAMETER => unreachable, // params should be ok
+                        .WSAECONNRESET => error.ConnectionAborted,
+                        .WSAEMFILE => unreachable, // we create our own descriptor so its available
+                        .WSAENOBUFS => error.SystemResources,
+                        .WSAEINTR, .WSAEINPROGRESS => unreachable, // no blocking calls
+                        else => |err| os.windows.unexpectedWSAError(err),
+                    };
+                }
+            },
+        );
+    }
+
+    pub const CloseError = error{
+        FileDescriptorInvalid,
+        DiskQuota,
+        InputOutput,
+        NoSpaceLeft,
+    } || os.UnexpectedError;
+
+    pub const ConnectError = os.ConnectError || error{FileDescriptorNotASocket};
+
+    pub fn connect(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: ConnectError!void,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+        address: std.net.Address,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .connect,
+            .{
+                .socket = socket,
+                .address = address,
+                .overlapped = undefined,
+                .pending = false,
+            },
+            struct {
+                fn do_operation(ctx: Completion.Context, op: anytype) ConnectError!void {
+                    var flags: os.windows.DWORD = undefined;
+                    var transferred: os.windows.DWORD = undefined;
+
+                    const rc = blk: {
+                        // Poll for the result if we've already started the connect op.
+                        if (op.pending) {
+                            break :blk os.windows.ws2_32.WSAGetOverlappedResult(
+                                op.socket,
+                                &op.overlapped.raw,
+                                &transferred,
+                                os.windows.FALSE, // dont wait
+                                &flags,
+                            );
+                        }
+
+                        // ConnectEx requires the socket to be initially bound (INADDR_ANY)
+                        const inaddr_any = std.mem.zeroes([4]u8);
+                        const bind_addr = std.net.Address.initIp4(inaddr_any, 0);
+                        os.bind(
+                            op.socket,
+                            &bind_addr.any,
+                            bind_addr.getOsSockLen(),
+                        ) catch |err| switch (err) {
+                            error.AccessDenied => unreachable,
+                            error.SymLinkLoop => unreachable,
+                            error.NameTooLong => unreachable,
+                            error.NotDir => unreachable,
+                            error.ReadOnlyFileSystem => unreachable,
+                            error.NetworkSubsystemFailed => unreachable,
+                            error.AlreadyBound => unreachable,
+                            else => |e| return e,
+                        };
+
+                        const LPFN_CONNECTEX = *const fn (
+                            Socket: os.windows.ws2_32.SOCKET,
+                            SockAddr: *const os.windows.ws2_32.sockaddr,
+                            SockLen: os.socklen_t,
+                            SendBuf: ?*const anyopaque,
+                            SendBufLen: os.windows.DWORD,
+                            BytesSent: *os.windows.DWORD,
+                            Overlapped: *os.windows.OVERLAPPED,
+                        ) callconv(os.windows.WINAPI) os.windows.BOOL;
+
+                        // Find the ConnectEx function by dynamically looking it up on the socket.
+                        // TODO: use `os.windows.loadWinsockExtensionFunction` once the function
+                        //       pointer is no longer required to be comptime.
+                        var connect_ex: LPFN_CONNECTEX = undefined;
+                        var num_bytes: os.windows.DWORD = undefined;
+                        const guid = os.windows.ws2_32.WSAID_CONNECTEX;
+                        switch (os.windows.ws2_32.WSAIoctl(
+                            op.socket,
+                            os.windows.ws2_32.SIO_GET_EXTENSION_FUNCTION_POINTER,
+                            @as(*const anyopaque, @ptrCast(&guid)),
+                            @sizeOf(os.windows.GUID),
+                            @as(*anyopaque, @ptrCast(&connect_ex)),
+                            @sizeOf(LPFN_CONNECTEX),
+                            &num_bytes,
+                            null,
+                            null,
+                        )) {
+                            os.windows.ws2_32.SOCKET_ERROR => switch (os.windows.ws2_32.WSAGetLastError()) {
+                                .WSAEOPNOTSUPP => unreachable,
+                                .WSAENOTSOCK => unreachable,
+                                else => |err| return os.windows.unexpectedWSAError(err),
+                            },
+                            else => assert(num_bytes == @sizeOf(LPFN_CONNECTEX)),
+                        }
+
+                        op.pending = true;
+                        op.overlapped = .{
+                            .raw = std.mem.zeroes(os.windows.OVERLAPPED),
+                            .completion = ctx.completion,
+                        };
+
+                        // Start the connect operation.
+                        break :blk (connect_ex)(
+                            op.socket,
+                            &op.address.any,
+                            op.address.getOsSockLen(),
+                            null,
+                            0,
+                            &transferred,
+                            &op.overlapped.raw,
+                        );
+                    };
+
+                    // return if we succeeded in connecting
+                    if (rc != os.windows.FALSE) {
+                        // enables getsockopt, setsockopt, getsockname, getpeername
+                        _ = os.windows.ws2_32.setsockopt(
+                            op.socket,
+                            os.windows.ws2_32.SOL.SOCKET,
+                            os.windows.ws2_32.SO.UPDATE_CONNECT_CONTEXT,
+                            null,
+                            0,
+                        );
+
+                        return;
+                    }
+
+                    return switch (os.windows.ws2_32.WSAGetLastError()) {
+                        .WSA_IO_PENDING, .WSAEWOULDBLOCK, .WSA_IO_INCOMPLETE, .WSAEALREADY => error.WouldBlock,
+                        .WSANOTINITIALISED => unreachable, // WSAStartup() was called
+                        .WSAENETDOWN => unreachable, // network subsystem is down
+                        .WSAEADDRNOTAVAIL => error.AddressNotAvailable,
+                        .WSAEAFNOSUPPORT => error.AddressFamilyNotSupported,
+                        .WSAECONNREFUSED => error.ConnectionRefused,
+                        .WSAEFAULT => unreachable, // all addresses should be valid
+                        .WSAEINVAL => unreachable, // invalid socket type
+                        .WSAEHOSTUNREACH, .WSAENETUNREACH => error.NetworkUnreachable,
+                        .WSAENOBUFS => error.SystemResources,
+                        .WSAENOTSOCK => unreachable, // socket is not bound or is listening
+                        .WSAETIMEDOUT => error.ConnectionTimedOut,
+                        .WSA_INVALID_HANDLE => unreachable, // we dont use hEvent in OVERLAPPED
+                        else => |err| os.windows.unexpectedWSAError(err),
+                    };
+                }
+            },
+        );
+    }
+
+    pub const SendError = os.SendError;
+
+    pub fn send(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: SendError!usize,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+        buffer: []const u8,
+    ) void {
+        const transfer = Completion.Transfer{
+            .socket = socket,
+            .buf = os.windows.ws2_32.WSABUF{
+                .len = @as(u32, @intCast(bufferLimit(buffer.len))),
+                .buf = @constCast(buffer.ptr),
+            },
+            .overlapped = undefined,
+            .pending = false,
+        };
+
+        self.submit(
+            context,
+            callback,
+            completion,
+            .send,
+            transfer,
+            struct {
+                fn do_operation(ctx: Completion.Context, op: anytype) SendError!usize {
+                    var flags: os.windows.DWORD = undefined;
+                    var transferred: os.windows.DWORD = undefined;
+
+                    const rc = blk: {
+                        // Poll for the result if we've already started the send op.
+                        if (op.pending) {
+                            break :blk os.windows.ws2_32.WSAGetOverlappedResult(
+                                op.socket,
+                                &op.overlapped.raw,
+                                &transferred,
+                                os.windows.FALSE, // dont wait
+                                &flags,
+                            );
+                        }
+
+                        op.pending = true;
+                        op.overlapped = .{
+                            .raw = std.mem.zeroes(os.windows.OVERLAPPED),
+                            .completion = ctx.completion,
+                        };
+
+                        // Start the send operation.
+                        break :blk switch (os.windows.ws2_32.WSASend(
+                            op.socket,
+                            @as([*]os.windows.ws2_32.WSABUF, @ptrCast(&op.buf)),
+                            1, // one buffer
+                            &transferred,
+                            0, // no flags
+                            &op.overlapped.raw,
+                            null,
+                        )) {
+                            os.windows.ws2_32.SOCKET_ERROR => @as(os.windows.BOOL, os.windows.FALSE),
+                            0 => os.windows.TRUE,
+                            else => unreachable,
+                        };
+                    };
+
+                    // Return bytes transferred on success.
+                    if (rc != os.windows.FALSE)
+                        return transferred;
+
+                    return switch (os.windows.ws2_32.WSAGetLastError()) {
+                        .WSA_IO_PENDING, .WSAEWOULDBLOCK, .WSA_IO_INCOMPLETE => error.WouldBlock,
+                        .WSANOTINITIALISED => unreachable, // WSAStartup() was called
+                        .WSA_INVALID_HANDLE => unreachable, // we dont use OVERLAPPED.hEvent
+                        .WSA_INVALID_PARAMETER => unreachable, // parameters are fine
+                        .WSAECONNABORTED => error.ConnectionResetByPeer,
+                        .WSAECONNRESET => error.ConnectionResetByPeer,
+                        .WSAEFAULT => unreachable, // invalid buffer
+                        .WSAEINTR => unreachable, // this is non blocking
+                        .WSAEINPROGRESS => unreachable, // this is non blocking
+                        .WSAEINVAL => unreachable, // invalid socket type
+                        .WSAEMSGSIZE => error.MessageTooBig,
+                        .WSAENETDOWN => error.NetworkSubsystemFailed,
+                        .WSAENETRESET => error.ConnectionResetByPeer,
+                        .WSAENOBUFS => error.SystemResources,
+                        .WSAENOTCONN => error.FileDescriptorNotASocket,
+                        .WSAEOPNOTSUPP => unreachable, // we dont use MSG_OOB or MSG_PARTIAL
+                        .WSAESHUTDOWN => error.BrokenPipe,
+                        .WSA_OPERATION_ABORTED => unreachable, // operation was cancelled
+                        else => |err| os.windows.unexpectedWSAError(err),
+                    };
+                }
+            },
+        );
+    }
+
+    pub const RecvError = os.RecvFromError;
+
+    pub fn recv(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: RecvError!usize,
+        ) void,
+        completion: *Completion,
+        socket: os.socket_t,
+        buffer: []u8,
+    ) void {
+        const transfer = Completion.Transfer{
+            .socket = socket,
+            .buf = os.windows.ws2_32.WSABUF{
+                .len = @as(u32, @intCast(bufferLimit(buffer.len))),
+                .buf = buffer.ptr,
+            },
+            .overlapped = undefined,
+            .pending = false,
+        };
+
+        self.submit(
+            context,
+            callback,
+            completion,
+            .recv,
+            transfer,
+            struct {
+                fn do_operation(ctx: Completion.Context, op: anytype) RecvError!usize {
+                    var flags: os.windows.DWORD = 0; // used both as input and output
+                    var transferred: os.windows.DWORD = undefined;
+
+                    const rc = blk: {
+                        // Poll for the result if we've already started the recv op.
+                        if (op.pending) {
+                            break :blk os.windows.ws2_32.WSAGetOverlappedResult(
+                                op.socket,
+                                &op.overlapped.raw,
+                                &transferred,
+                                os.windows.FALSE, // dont wait
+                                &flags,
+                            );
+                        }
+
+                        op.pending = true;
+                        op.overlapped = .{
+                            .raw = std.mem.zeroes(os.windows.OVERLAPPED),
+                            .completion = ctx.completion,
+                        };
+
+                        // Start the recv operation.
+                        break :blk switch (os.windows.ws2_32.WSARecv(
+                            op.socket,
+                            @as([*]os.windows.ws2_32.WSABUF, @ptrCast(&op.buf)),
+                            1, // one buffer
+                            &transferred,
+                            &flags,
+                            &op.overlapped.raw,
+                            null,
+                        )) {
+                            os.windows.ws2_32.SOCKET_ERROR => @as(os.windows.BOOL, os.windows.FALSE),
+                            0 => os.windows.TRUE,
+                            else => unreachable,
+                        };
+                    };
+
+                    // Return bytes received on success.
+                    if (rc != os.windows.FALSE)
+                        return transferred;
+
+                    return switch (os.windows.ws2_32.WSAGetLastError()) {
+                        .WSA_IO_PENDING, .WSAEWOULDBLOCK, .WSA_IO_INCOMPLETE => error.WouldBlock,
+                        .WSANOTINITIALISED => unreachable, // WSAStartup() was called
+                        .WSA_INVALID_HANDLE => unreachable, // we dont use OVERLAPPED.hEvent
+                        .WSA_INVALID_PARAMETER => unreachable, // parameters are fine
+                        .WSAECONNABORTED => error.ConnectionRefused,
+                        .WSAECONNRESET => error.ConnectionResetByPeer,
+                        .WSAEDISCON => unreachable, // we only stream sockets
+                        .WSAEFAULT => unreachable, // invalid buffer
+                        .WSAEINTR => unreachable, // this is non blocking
+                        .WSAEINPROGRESS => unreachable, // this is non blocking
+                        .WSAEINVAL => unreachable, // invalid socket type
+                        .WSAEMSGSIZE => error.MessageTooBig,
+                        .WSAENETDOWN => error.NetworkSubsystemFailed,
+                        .WSAENETRESET => error.ConnectionResetByPeer,
+                        .WSAENOTCONN => error.SocketNotConnected,
+                        .WSAEOPNOTSUPP => unreachable, // we dont use MSG_OOB or MSG_PARTIAL
+                        .WSAESHUTDOWN => error.SocketNotConnected,
+                        .WSAETIMEDOUT => error.ConnectionRefused,
+                        .WSA_OPERATION_ABORTED => unreachable, // operation was cancelled
+                        else => |err| os.windows.unexpectedWSAError(err),
+                    };
+                }
+            },
+        );
+    }
+
+    pub const ReadError = error{
+        WouldBlock,
+        NotOpenForReading,
+        ConnectionResetByPeer,
+        Alignment,
+        InputOutput,
+        IsDir,
+        SystemResources,
+        Unseekable,
+        ConnectionTimedOut,
+    } || os.UnexpectedError;
+
+    pub fn read(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: ReadError!usize,
+        ) void,
+        completion: *Completion,
+        fd: os.fd_t,
+        buffer: []u8,
+        offset: u64,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .read,
+            .{
+                .fd = fd,
+                .buf = buffer.ptr,
+                .len = @as(u32, @intCast(bufferLimit(buffer.len))),
+                .offset = offset,
+            },
+            struct {
+                fn do_operation(ctx: Completion.Context, op: anytype) ReadError!usize {
+                    // Do a synchronous read for now.
+                    _ = ctx;
+                    return os.pread(op.fd, op.buf[0..op.len], op.offset) catch |err| switch (err) {
+                        error.OperationAborted => unreachable,
+                        error.BrokenPipe => unreachable,
+                        error.ConnectionTimedOut => unreachable,
+                        error.AccessDenied => error.InputOutput,
+                        error.NetNameDeleted => unreachable,
+                        else => |e| e,
+                    };
+                }
+            },
+        );
+    }
+
+    pub const WriteError = os.PWriteError;
+
+    pub fn write(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: WriteError!usize,
+        ) void,
+        completion: *Completion,
+        fd: os.fd_t,
+        buffer: []const u8,
+        offset: u64,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .write,
+            .{
+                .fd = fd,
+                .buf = buffer.ptr,
+                .len = @as(u32, @intCast(bufferLimit(buffer.len))),
+                .offset = offset,
+            },
+            struct {
+                fn do_operation(ctx: Completion.Context, op: anytype) WriteError!usize {
+                    // Do a synchronous write for now.
+                    _ = ctx;
+                    return os.pwrite(op.fd, op.buf[0..op.len], op.offset);
+                }
+            },
+        );
+    }
+
+    pub fn close(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: CloseError!void,
+        ) void,
+        completion: *Completion,
+        fd: os.fd_t,
+    ) void {
+        self.submit(
+            context,
+            callback,
+            completion,
+            .close,
+            .{ .fd = fd },
+            struct {
+                fn do_operation(ctx: Completion.Context, op: anytype) CloseError!void {
+                    _ = ctx;
+
+                    // Check if the fd is a SOCKET by seeing if getsockopt() returns ENOTSOCK
+                    // https://stackoverflow.com/a/50981652
+                    const socket: os.socket_t = @ptrCast(op.fd);
+                    getsockoptError(socket) catch |err| switch (err) {
+                        error.FileDescriptorNotASocket => return os.windows.CloseHandle(op.fd),
+                        else => {},
+                    };
+
+                    os.closeSocket(socket);
+                }
+            },
+        );
+    }
+
+    pub const TimeoutError = error{Canceled} || os.UnexpectedError;
+
+    pub fn timeout(
+        self: *IO,
+        comptime Context: type,
+        context: Context,
+        comptime callback: fn (
+            context: Context,
+            completion: *Completion,
+            result: TimeoutError!void,
+        ) void,
+        completion: *Completion,
+        nanoseconds: u63,
+    ) void {
+        // Special case a zero timeout as a yield.
+        if (nanoseconds == 0) {
+            completion.* = .{
+                .next = null,
+                .context = @ptrCast(context),
+                .operation = undefined,
+                .callback = struct {
+                    fn on_complete(ctx: Completion.Context) void {
+                        const _context: Context = @ptrCast(@alignCast(ctx.completion.context));
+                        callback(_context, ctx.completion, {});
+                    }
+                }.on_complete,
+            };
+
+            self.completed.push(completion);
+            return;
+        }
+
+        self.submit(
+            context,
+            callback,
+            completion,
+            .timeout,
+            .{ .deadline = self.timer.monotonic() + nanoseconds },
+            struct {
+                fn do_operation(ctx: Completion.Context, op: anytype) TimeoutError!void {
+                    _ = ctx;
+                    _ = op;
+                    return;
+                }
+            },
+        );
+    }
+
+    pub const INVALID_SOCKET = os.windows.ws2_32.INVALID_SOCKET;
+
+    /// Creates a socket that can be used for async operations with the IO instance.
+    pub fn open_socket(self: *IO, family: u32, sock_type: u32, protocol: u32) !os.socket_t {
+        // SOCK_NONBLOCK | SOCK_CLOEXEC
+        var flags: os.windows.DWORD = 0;
+        flags |= os.windows.ws2_32.WSA_FLAG_OVERLAPPED;
+        flags |= os.windows.ws2_32.WSA_FLAG_NO_HANDLE_INHERIT;
+
+        const socket = try os.windows.WSASocketW(
+            @as(i32, @bitCast(family)),
+            @as(i32, @bitCast(sock_type)),
+            @as(i32, @bitCast(protocol)),
+            null,
+            0,
+            flags,
+        );
+        errdefer os.closeSocket(socket);
+
+        const socket_iocp = try os.windows.CreateIoCompletionPort(socket, self.iocp, 0, 0);
+        assert(socket_iocp == self.iocp);
+
+        // Ensure that synchronous IO completion doesn't queue an unneeded overlapped
+        // and that the event for the socket (WaitForSingleObject) doesn't need to be set.
+        var mode: os.windows.BYTE = 0;
+        mode |= os.windows.FILE_SKIP_COMPLETION_PORT_ON_SUCCESS;
+        mode |= os.windows.FILE_SKIP_SET_EVENT_ON_HANDLE;
+
+        const handle = @as(os.windows.HANDLE, @ptrCast(socket));
+        try os.windows.SetFileCompletionNotificationModes(handle, mode);
+
+        return socket;
+    }
+
+    /// Opens a directory with read only access.
+    pub fn open_dir(dir_path: []const u8) !os.fd_t {
+        const dir = try std.fs.cwd().openDir(dir_path, .{});
+        return dir.fd;
+    }
+
+    pub const INVALID_FILE = os.windows.INVALID_HANDLE_VALUE;
+
+    fn open_file_handle(relative_path: []const u8, method: enum { create, open }) !os.fd_t {
+        const path_w = try os.windows.sliceToPrefixedFileW(relative_path);
+
+        // FILE_CREATE = O_CREAT | O_EXCL
+        var creation_disposition: os.windows.DWORD = 0;
+        switch (method) {
+            .create => {
+                creation_disposition = os.windows.FILE_CREATE;
+                log.info("creating \"{s}\"...", .{relative_path});
+            },
+            .open => {
+                creation_disposition = os.windows.OPEN_EXISTING;
+                log.info("opening \"{s}\"...", .{relative_path});
+            },
+        }
+
+        // O_EXCL
+        var shared_mode: os.windows.DWORD = 0;
+
+        // O_RDWR
+        var access_mask: os.windows.DWORD = 0;
+        access_mask |= os.windows.GENERIC_READ;
+        access_mask |= os.windows.GENERIC_WRITE;
+
+        // O_DIRECT | O_DSYNC
+        var attributes: os.windows.DWORD = 0;
+        attributes |= os.windows.FILE_FLAG_NO_BUFFERING;
+        attributes |= os.windows.FILE_FLAG_WRITE_THROUGH;
+
+        // This is critical as we rely on O_DSYNC for fsync() whenever we write to the file:
+        assert((attributes & os.windows.FILE_FLAG_WRITE_THROUGH) > 0);
+
+        // TODO: Add ReadFileEx/WriteFileEx support.
+        // Not currently needed for O_DIRECT disk IO.
+        // attributes |= os.windows.FILE_FLAG_OVERLAPPED;
+
+        const handle = os.windows.kernel32.CreateFileW(
+            path_w.span(),
+            access_mask,
+            shared_mode,
+            null, // no security attributes required
+            creation_disposition,
+            attributes,
+            null, // no existing template file
+        );
+
+        if (handle == os.windows.INVALID_HANDLE_VALUE) {
+            return switch (os.windows.kernel32.GetLastError()) {
+                .FILE_NOT_FOUND => error.FileNotFound,
+                .SHARING_VIOLATION, .ACCESS_DENIED => error.AccessDenied,
+                else => |err| {
+                    log.warn("CreateFileW(): {}", .{err});
+                    return os.windows.unexpectedError(err);
+                },
+            };
+        }
+
+        return handle;
+    }
+
+    /// Opens or creates a journal file:
+    /// - For reading and writing.
+    /// - For Direct I/O (required on windows).
+    /// - Obtains an advisory exclusive lock to the file descriptor.
+    /// - Allocates the file contiguously on disk if this is supported by the file system.
+    /// - Ensures that the file data is durable on disk.
+    ///   The caller is responsible for ensuring that the parent directory inode is durable.
+    /// - Verifies that the file size matches the expected file size before returning.
+    pub fn open_file(
+        dir_handle: os.fd_t,
+        relative_path: []const u8,
+        size: u64,
+        method: enum { create, create_or_open, open },
+    ) !os.fd_t {
+        assert(relative_path.len > 0);
+        assert(size % sector_size == 0);
+
+        const handle = switch (method) {
+            .open => try open_file_handle(relative_path, .open),
+            .create => try open_file_handle(relative_path, .create),
+            .create_or_open => open_file_handle(relative_path, .open) catch |err| switch (err) {
+                error.FileNotFound => try open_file_handle(relative_path, .create),
+                else => return err,
+            },
+        };
+        errdefer os.windows.CloseHandle(handle);
+
+        // Obtain an advisory exclusive lock
+        // even when we haven't given shared access to other processes.
+        fs_lock(handle, size) catch |err| switch (err) {
+            error.WouldBlock => @panic("another process holds the data file lock"),
+            else => return err,
+        };
+
+        // Ask the file system to allocate contiguous sectors for the file (if possible):
+        if (method == .create) {
+            log.info("allocating {}...", .{std.fmt.fmtIntSizeBin(size)});
+            fs_allocate(handle, size) catch {
+                log.warn("file system failed to preallocate the file memory", .{});
+                log.info("allocating by writing to the last sector of the file instead...", .{});
+
+                const sector: [sector_size]u8 align(sector_size) = [_]u8{0} ** sector_size;
+
+                // Handle partial writes where the physical sector is less than a logical sector:
+                const write_offset = size - sector.len;
+                var written: usize = 0;
+                while (written < sector.len) {
+                    written += try os.pwrite(handle, sector[written..], write_offset + written);
+                }
+            };
+        }
+
+        // The best fsync strategy is always to fsync before reading because this prevents us from
+        // making decisions on data that was never durably written by a previously crashed process.
+        // We therefore always fsync when we open the path, also to wait for any pending O_DSYNC.
+        // Thanks to Alex Miller from FoundationDB for diving into our source and pointing this out.
+        try os.fsync(handle);
+
+        // We cannot fsync the directory handle on Windows.
+        // We have no way to open a directory with write access.
+        //
+        // try os.fsync(dir_handle);
+        _ = dir_handle;
+
+        const file_size = try os.windows.GetFileSizeEx(handle);
+        if (file_size < size) @panic("data file inode size was truncated or corrupted");
+
+        return handle;
+    }
+
+    fn fs_lock(handle: os.fd_t, size: u64) !void {
+        // TODO: Look into using SetFileIoOverlappedRange() for better unbuffered async IO perf
+        // NOTE: Requires SeLockMemoryPrivilege.
+
+        const kernel32 = struct {
+            const LOCKFILE_EXCLUSIVE_LOCK = 0x2;
+            const LOCKFILE_FAIL_IMMEDIATELY = 0x1;
+
+            extern "kernel32" fn LockFileEx(
+                hFile: os.windows.HANDLE,
+                dwFlags: os.windows.DWORD,
+                dwReserved: os.windows.DWORD,
+                nNumberOfBytesToLockLow: os.windows.DWORD,
+                nNumberOfBytesToLockHigh: os.windows.DWORD,
+                lpOverlapped: ?*os.windows.OVERLAPPED,
+            ) callconv(os.windows.WINAPI) os.windows.BOOL;
+        };
+
+        // hEvent = null
+        // Offset & OffsetHigh = 0
+        var lock_overlapped = std.mem.zeroes(os.windows.OVERLAPPED);
+
+        // LOCK_EX | LOCK_NB
+        var lock_flags: os.windows.DWORD = 0;
+        lock_flags |= kernel32.LOCKFILE_EXCLUSIVE_LOCK;
+        lock_flags |= kernel32.LOCKFILE_FAIL_IMMEDIATELY;
+
+        const locked = kernel32.LockFileEx(
+            handle,
+            lock_flags,
+            0, // reserved param is always zero
+            @as(u32, @truncate(size)), // low bits of size
+            @as(u32, @truncate(size >> 32)), // high bits of size
+            &lock_overlapped,
+        );
+
+        if (locked == os.windows.FALSE) {
+            return switch (os.windows.kernel32.GetLastError()) {
+                .IO_PENDING => error.WouldBlock,
+                else => |err| os.windows.unexpectedError(err),
+            };
+        }
+    }
+
+    fn fs_allocate(handle: os.fd_t, size: u64) !void {
+        // TODO: Look into using SetFileValidData() instead
+        // NOTE: Requires SE_MANAGE_VOLUME_NAME privilege
+
+        // Move the file pointer to the start + size
+        const seeked = os.windows.kernel32.SetFilePointerEx(
+            handle,
+            @as(i64, @intCast(size)),
+            null, // no reference to new file pointer
+            os.windows.FILE_BEGIN,
+        );
+
+        if (seeked == os.windows.FALSE) {
+            return switch (os.windows.kernel32.GetLastError()) {
+                .INVALID_HANDLE => unreachable,
+                .INVALID_PARAMETER => unreachable,
+                else => |err| os.windows.unexpectedError(err),
+            };
+        }
+
+        // Mark the moved file pointer (start + size) as the physical EOF.
+        const allocated = os.windows.kernel32.SetEndOfFile(handle);
+        if (allocated == os.windows.FALSE) {
+            const err = os.windows.kernel32.GetLastError();
+            return os.windows.unexpectedError(err);
+        }
+    }
+};
+
+// TODO: use os.getsockoptError when fixed for windows in stdlib
+fn getsockoptError(socket: os.socket_t) IO.ConnectError!void {
+    var err_code: u32 = undefined;
+    var size: i32 = @sizeOf(u32);
+    const rc = os.windows.ws2_32.getsockopt(
+        socket,
+        os.SOL.SOCKET,
+        os.SO.ERROR,
+        std.mem.asBytes(&err_code),
+        &size,
+    );
+
+    if (rc != 0) {
+        switch (os.windows.ws2_32.WSAGetLastError()) {
+            .WSAENETDOWN => return error.NetworkUnreachable,
+            .WSANOTINITIALISED => unreachable, // WSAStartup() was never called
+            .WSAEFAULT => unreachable, // The address pointed to by optval or optlen is not in a valid part of the process address space.
+            .WSAEINVAL => unreachable, // The level parameter is unknown or invalid
+            .WSAENOPROTOOPT => unreachable, // The option is unknown at the level indicated.
+            .WSAENOTSOCK => return error.FileDescriptorNotASocket,
+            else => |err| return os.windows.unexpectedWSAError(err),
+        }
+    }
+
+    assert(size == 4);
+    if (err_code == 0)
+        return;
+
+    const ws_err = @as(os.windows.ws2_32.WinsockError, @enumFromInt(@as(u16, @intCast(err_code))));
+    return switch (ws_err) {
+        .WSAEACCES => error.PermissionDenied,
+        .WSAEADDRINUSE => error.AddressInUse,
+        .WSAEADDRNOTAVAIL => error.AddressNotAvailable,
+        .WSAEAFNOSUPPORT => error.AddressFamilyNotSupported,
+        .WSAEALREADY => error.ConnectionPending,
+        .WSAEBADF => unreachable,
+        .WSAECONNREFUSED => error.ConnectionRefused,
+        .WSAEFAULT => unreachable,
+        .WSAEISCONN => unreachable, // error.AlreadyConnected,
+        .WSAENETUNREACH => error.NetworkUnreachable,
+        .WSAENOTSOCK => error.FileDescriptorNotASocket,
+        .WSAEPROTOTYPE => unreachable,
+        .WSAETIMEDOUT => error.ConnectionTimedOut,
+        .WSAECONNRESET => error.ConnectionResetByPeer,
+        else => |e| os.windows.unexpectedWSAError(e),
+    };
+}
diff --git a/src/low_level_hash_vectors.zig b/src/low_level_hash_vectors.zig
new file mode 100644
index 0000000..9d15e46
--- /dev/null
+++ b/src/low_level_hash_vectors.zig
@@ -0,0 +1,142 @@
+//! Test vectors for `stdx.inline_hash` from
+//!
+//! <https://github.com/abseil/abseil-cpp/blob/511ad6492eabb7797910ce8689577c45f57bce40/absl/hash/internal/low_level_hash_test.cc>
+
+pub const Case = struct { seed: u64, hash: u64, b64: []const u8 };
+
+pub const cases = [_]Case{
+    .{ .seed = 0xec42b7ab404b8acb, .hash = 0xe5a40d39ab796423, .b64 = "" },
+    .{ .seed = 0, .hash = 0x1766974bf7527d81, .b64 = "ICAg" },
+    .{ .seed = 0, .hash = 0x5c3bbbe230db17a8, .b64 = "YWFhYQ==" },
+    .{ .seed = 0, .hash = 0xa6630143a7e6aa6f, .b64 = "AQID" },
+    .{ .seed = 0, .hash = 0x8787cb2d04b0c984, .b64 = "AQIDBA==" },
+    .{ .seed = 0, .hash = 0x33603654ff574ac2, .b64 = "dGhpcmRfcGFydHl8d3loYXNofDY0" },
+    .{ .seed = 0xeeee074043a3ee0f, .hash = 0xa6564b468248c683, .b64 = "Zw==" },
+    .{ .seed = 0x857902089c393de, .hash = 0xef192f401b116e1c, .b64 = "xmk=" },
+    .{ .seed = 0x993df040024ca3af, .hash = 0xbe8dc0c54617639d, .b64 = "c1H/" },
+    .{ .seed = 0xc4e4c2acea740e96, .hash = 0x93d7f665b5521c8e, .b64 = "SuwpzQ==" },
+    .{ .seed = 0x6a214b3db872d0cf, .hash = 0x646d70bb42445f28, .b64 = "uqvy++M=" },
+    .{ .seed = 0x44343db6a89dba4d, .hash = 0x96a7b1e3cc9bd426, .b64 = "RnzCVPgb" },
+    .{ .seed = 0x77b5d6d1ae1dd483, .hash = 0x76020289ab0790c4, .b64 = "6OeNdlouYw==" },
+    .{ .seed = 0x89ab8ecb44d221f1, .hash = 0x39f842e4133b9b44, .b64 = "M5/JmmYyDbc=" },
+    .{ .seed = 0x60244b17577ca81b, .hash = 0x2b8d7047be4bcaab, .b64 = "MVijWiVdBRdY" },
+    .{ .seed = 0x59a08dcee0717067, .hash = 0x99628abef6716a97, .b64 = "6V7Uq7LNxpu0VA==" },
+    .{ .seed = 0xf5f20db3ade57396, .hash = 0x4432e02ba42b2740, .b64 = "EQ6CdEEhPdyHcOk=" },
+    .{ .seed = 0xbf8dee0751ad3efb, .hash = 0x74d810efcad7918a, .b64 = "PqFB4fxnPgF+l+rc" },
+    .{ .seed = 0x6b7a06b268d63e30, .hash = 0x88c84e986002507f, .b64 = "a5aPOFwq7LA7+zKvPA==" },
+    .{ .seed = 0xb8c37f0ae0f54c82, .hash = 0x4f99acf193cf39b9, .b64 = "VOwY21wCGv5D+/qqOvs=" },
+    .{ .seed = 0x9fcbed0c38e50eef, .hash = 0xd90e7a3655891e37, .b64 = "KdHmBTx8lHXYvmGJ+Vy7" },
+    .{ .seed = 0x2af4bade1d8e3a1d, .hash = 0x3bb378b1d4df8fcf, .b64 = "qJkPlbHr8bMF7/cA6aE65Q==" },
+    .{ .seed = 0x714e3aa912da2f2c, .hash = 0xf78e94045c052d47, .b64 = "ygvL0EhHZL0fIx6oHHtkxRQ=" },
+    .{ .seed = 0xf5ee75e3cbb82c1c, .hash = 0x26da0b2130da6b40, .b64 = "c1rFXkt5YztwZCQRngncqtSs" },
+    .{ .seed = 0x620e7007321b93b9, .hash = 0x30b4d426af8c6986, .b64 = "8hsQrzszzeNQSEcVXLtvIhm6mw==" },
+    .{ .seed = 0xc08528cac2e551fc, .hash = 0x5413b4aaf3baaeae, .b64 = "ffUL4RocfyP4KfikGxO1yk7omDI=" },
+    .{ .seed = 0x6a1debf9cc3ad39, .hash = 0x756ab265370a1597, .b64 = "OOB5TT00vF9Od/rLbAWshiErqhpV" },
+    .{ .seed = 0x7e0a3c88111fc226, .hash = 0xdaf5f4b7d09814fb, .b64 = "or5wtXM7BFzTNpSzr+Lw5J5PMhVJ/Q==" },
+    .{ .seed = 0x1301fef15df39edb, .hash = 0x8f874ae37742b75e, .b64 = "gk6pCHDUsoopVEiaCrzVDhioRKxb844=" },
+    .{ .seed = 0x64e181f3d5817ab, .hash = 0x8fecd03956121ce8, .b64 = "TNctmwlC5QbEM6/No4R/La3UdkfeMhzs" },
+    .{ .seed = 0xafafc44961078ecb, .hash = 0x229c292ea7a08285, .b64 = "SsQw9iAjhWz7sgcE9OwLuSC6hsM+BfHs2Q==" },
+    .{ .seed = 0x4f7bb45549250094, .hash = 0xbb4bf0692d14bae, .b64 = "ZzO3mVCj4xTT2TT3XqDyEKj2BZQBvrS8RHg=" },
+    .{ .seed = 0xa30061abaa2818c, .hash = 0x207b24ca3bdac1db, .b64 = "+klp5iPQGtppan5MflEls0iEUzqU+zGZkDJX" },
+    .{ .seed = 0xd902ee3e44a5705f, .hash = 0x64f6cd6745d3825b, .b64 = "RO6bvOnlJc8I9eniXlNgqtKy0IX6VNg16NRmgg==" },
+    .{ .seed = 0x316d36da516f583, .hash = 0xa2b2e1656b58df1e, .b64 = "ZJjZqId1ZXBaij9igClE3nyliU5XWdNRrayGlYA=" },
+    .{ .seed = 0x402d83f9f834f616, .hash = 0xd01d30d9ee7a148, .b64 = "7BfkhfGMDGbxfMB8uyL85GbaYQtjr2K8g7RpLzr/" },
+    .{ .seed = 0x9c604164c016b72c, .hash = 0x1cb4cd00ab804e3b, .b64 = "rycWk6wHH7htETQtje9PidS2YzXBx+Qkg2fY7ZYS7A==" },
+    .{ .seed = 0x3f4507e01f9e73ba, .hash = 0x4697f2637fd90999, .b64 = "RTkC2OUK+J13CdGllsH0H5WqgspsSa6QzRZouqx6pvI=" },
+    .{ .seed = 0xc3fe0d5be8d2c7c7, .hash = 0x8383a756b5688c07, .b64 = "tKjKmbLCNyrLCM9hycOAXm4DKNpM12oZ7dLTmUx5iwAi" },
+    .{ .seed = 0x531858a40bfa7ea1, .hash = 0x695c29cb3696a975, .b64 = "VprUGNH+5NnNRaORxgH/ySrZFQFDL+4VAodhfBNinmn8cg==" },
+    .{ .seed = 0x86689478a7a7e8fa, .hash = 0xda2e5a5a5e971521, .b64 = "gc1xZaY+q0nPcUvOOnWnT3bqfmT/geth/f7Dm2e/DemMfk4=" },
+    .{ .seed = 0x4ec948b8e7f27288, .hash = 0x7935d4befa056b2b, .b64 = "Mr35fIxqx1ukPAL0su1yFuzzAU3wABCLZ8+ZUFsXn47UmAph" },
+    .{ .seed = 0xce46c7213c10032, .hash = 0x38dd541ca95420fe, .b64 = "A9G8pw2+m7+rDtWYAdbl8tb2fT7FFo4hLi2vAsa5Y8mKH3CX3g==" },
+    .{ .seed = 0xf63e96ee6f32a8b6, .hash = 0xcc06c7a4963f967f, .b64 = "DFaJGishGwEHDdj9ixbCoaTjz9KS0phLNWHVVdFsM93CvPft3hM=" },
+    .{ .seed = 0x1cfe85e65fc5225, .hash = 0xbf0f6f66e232fb20, .b64 = "7+Ugx+Kr3aRNgYgcUxru62YkTDt5Hqis+2po81hGBkcrJg4N0uuy" },
+    .{ .seed = 0x45c474f1cee1d2e8, .hash = 0xf7efb32d373fe71a, .b64 = "H2w6O8BUKqu6Tvj2xxaecxEI2wRgIgqnTTG1WwOgDSINR13Nm4d4Vg==" },
+    .{ .seed = 0x6e024e14015f329c, .hash = 0xe2e64634b1c12660, .b64 = "1XBMnIbqD5jy65xTDaf6WtiwtdtQwv1dCVoqpeKj+7cTR1SaMWMyI04=" },
+    .{ .seed = 0x760c40502103ae1c, .hash = 0x285b8fd1638e306d, .b64 = "znZbdXG2TSFrKHEuJc83gPncYpzXGbAebUpP0XxzH0rpe8BaMQ17nDbt" },
+    .{ .seed = 0x17fd05c3c560c320, .hash = 0x658e8a4e3b714d6c, .b64 = "ylu8Atu13j1StlcC1MRMJJXIl7USgDDS22HgVv0WQ8hx/8pNtaiKB17hCQ==" },
+    .{ .seed = 0x8b34200a6f8e90d9, .hash = 0xf391fb968e0eb398, .b64 = "M6ZVVzsd7vAvbiACSYHioH/440dp4xG2mLlBnxgiqEvI/aIEGpD0Sf4VS0g=" },
+    .{ .seed = 0x6be89e50818bdf69, .hash = 0x744a9ea0cc144bf2, .b64 = "li3oFSXLXI+ubUVGJ4blP6mNinGKLHWkvGruun85AhVn6iuMtocbZPVhqxzn" },
+    .{ .seed = 0xfb389773315b47d8, .hash = 0x12636f2be11012f1, .b64 = "kFuQHuUCqBF3Tc3hO4dgdIp223ShaCoog48d5Do5zMqUXOh5XpGK1t5XtxnfGA==" },
+    .{ .seed = 0x4f2512a23f61efee, .hash = 0x29c57de825948f80, .b64 = "jWmOad0v0QhXVJd1OdGuBZtDYYS8wBVHlvOeTQx9ZZnm8wLEItPMeihj72E0nWY=" },
+    .{ .seed = 0x59ccd92fc16c6fda, .hash = 0x58c6f99ab0d1c021, .b64 = "z+DHU52HaOQdW4JrZwDQAebEA6rm13Zg/9lPYA3txt3NjTBqFZlOMvTRnVzRbl23" },
+    .{ .seed = 0x25c5a7f5bd330919, .hash = 0x13e7b5a7b82fe3bb, .b64 = "MmBiGDfYeTayyJa/tVycg+rN7f9mPDFaDc+23j0TlW9094er0ADigsl4QX7V3gG/qw==" },
+    .{ .seed = 0x51df4174d34c97d7, .hash = 0x10fbc87901e02b63, .b64 = "774RK+9rOL4iFvs1q2qpo/JVc/I39buvNjqEFDtDvyoB0FXxPI2vXqOrk08VPfIHkmU=" },
+    .{ .seed = 0x80ce6d76f89cb57, .hash = 0xa24c9184901b748b, .b64 = "+slatXiQ7/2lK0BkVUI1qzNxOOLP3I1iK6OfHaoxgqT63FpzbElwEXSwdsryq3UlHK0I" },
+    .{ .seed = 0x20961c911965f684, .hash = 0xcac4fd4c5080e581, .b64 = "64mVTbQ47dHjHlOHGS/hjJwr/K2frCNpn87exOqMzNUVYiPKmhCbfS7vBUce5tO6Ec9osQ==" },
+    .{ .seed = 0x4e5b926ec83868e7, .hash = 0xc38bdb7483ba68e1, .b64 = "fIsaG1r530SFrBqaDj1kqE0AJnvvK8MNEZbII2Yw1OK77v0V59xabIh0B5axaz/+a2V5WpA=" },
+    .{ .seed = 0x3927b30b922eecef, .hash = 0xdb2a8069b2ceaffa, .b64 = "PGih0zDEOWCYGxuHGDFu9Ivbff/iE7BNUq65tycTR2R76TerrXALRosnzaNYO5fjFhTi+CiS" },
+    .{ .seed = 0xbd0291284a49b61c, .hash = 0xdf9fe91d0d1c7887, .b64 = "RnpA/zJnEnnLjmICORByRVb9bCOgxF44p3VMiW10G7PvW7IhwsWajlP9kIwNA9FjAD2GoQHk2Q==" },
+    .{ .seed = 0x73a77c575bcc956, .hash = 0xe83f49e96e2e6a08, .b64 = "qFklMceaTHqJpy2qavJE+EVBiNFOi6OxjOA3LeIcBop1K7w8xQi3TrDk+BrWPRIbfprszSaPfrI=" },
+    .{ .seed = 0x766a0e2ade6d09a6, .hash = 0xc69e61b62ca2b62, .b64 = "cLbfUtLl3EcQmITWoTskUR8da/VafRDYF/ylPYwk7/zazk6ssyrzxMN3mmSyvrXR2yDGNZ3WDrTT" },
+    .{ .seed = 0x2599f4f905115869, .hash = 0xb4a4f3f85f8298fe, .b64 = "s/Jf1+FbsbCpXWPTUSeWyMH6e4CvTFvPE5Fs6Z8hvFITGyr0dtukHzkI84oviVLxhM1xMxrMAy1dbw==" },
+    .{ .seed = 0xd8256e5444d21e53, .hash = 0x167a1b39e1e95f41, .b64 = "FvyQ00+j7nmYZVQ8hI1Edxd0AWplhTfWuFGiu34AK5X8u2hLX1bE97sZM0CmeLe+7LgoUT1fJ/axybE=" },
+    .{ .seed = 0xf664a91333fb8dfd, .hash = 0xf8a2a5649855ee41, .b64 = "L8ncxMaYLBH3g9buPu8hfpWZNlOF7nvWLNv9IozH07uQsIBWSKxoPy8+LW4tTuzC6CIWbRGRRD1sQV/4" },
+    .{ .seed = 0x9625b859be372cd1, .hash = 0x27992565b595c498, .b64 = "CDK0meI07yrgV2kQlZZ+wuVqhc2NmzqeLH7bmcA6kchsRWFPeVF5Wqjjaj556ABeUoUr3yBmfU3kWOakkg==" },
+    .{ .seed = 0x7b99940782e29898, .hash = 0x3e08cca5b71f9346, .b64 = "d23/vc5ONh/HkMiq+gYk4gaCNYyuFKwUkvn46t+dfVcKfBTYykr4kdvAPNXGYLjM4u1YkAEFpJP+nX7eOvs=" },
+    .{ .seed = 0x4fe12fa5383b51a8, .hash = 0xad406b10c770a6d2, .b64 = "NUR3SRxBkxTSbtQORJpu/GdR6b/h6sSGfsMj/KFd99ahbh+9r7LSgSGmkGVB/mGoT0pnMTQst7Lv2q6QN6Vm" },
+    .{ .seed = 0xe2ccb09ac0f5b4b6, .hash = 0xd1713ce6e552bcf2, .b64 = "2BOFlcI3Z0RYDtS9T9Ie9yJoXlOdigpPeeT+CRujb/O39Ih5LPC9hP6RQk1kYESGyaLZZi3jtabHs7DiVx/VDg==" },
+    .{ .seed = 0x7d0a37adbd7b753b, .hash = 0x753b287194c73ad3, .b64 = "FF2HQE1FxEvWBpg6Z9zAMH+Zlqx8S1JD/wIlViL6ZDZY63alMDrxB0GJQahmAtjlm26RGLnjW7jmgQ4Ie3I+014=" },
+    .{ .seed = 0xd3ae96ef9f7185f2, .hash = 0x5ae41a95f600af1c, .b64 = "tHmO7mqVL/PX11nZrz50Hc+M17Poj5lpnqHkEN+4bpMx/YGbkrGOaYjoQjgmt1X2QyypK7xClFrjeWrCMdlVYtbW" },
+    .{ .seed = 0x4fb88ea63f79a0d8, .hash = 0x4a61163b86a8bb4c, .b64 = "/WiHi9IQcxRImsudkA/KOTqGe8/gXkhKIHkjddv5S9hi02M049dIK3EUyAEjkjpdGLUs+BN0QzPtZqjIYPOgwsYE9g==" },
+    .{ .seed = 0xed564e259bb5ebe9, .hash = 0x42eeaa79e760c7e4, .b64 = "qds+1ExSnU11L4fTSDz/QE90g4Jh6ioqSh3KDOTOAo2pQGL1k/9CCC7J23YF27dUTzrWsCQA2m4epXoCc3yPHb3xElA=" },
+    .{ .seed = 0x3e3256b60c428000, .hash = 0x698df622ef465b0a, .b64 = "8FVYHx40lSQPTHheh08Oq0/pGm2OlG8BEf8ezvAxHuGGdgCkqpXIueJBF2mQJhTfDy5NncO8ntS7vaKs7sCNdDaNGOEi" },
+    .{ .seed = 0xfb05bad59ec8705, .hash = 0x157583111e1a6026, .b64 = "4ZoEIrJtstiCkeew3oRzmyJHVt/pAs2pj0HgHFrBPztbQ10NsQ/lM6DM439QVxpznnBSiHMgMQJhER+70l72LqFTO1JiIQ==" },
+    .{ .seed = 0xafdc251dbf97b5f8, .hash = 0xaa1388f078e793e0, .b64 = "hQPtaYI+wJyxXgwD5n8jGIKFKaFA/P83KqCKZfPthnjwdOFysqEOYwAaZuaaiv4cDyi9TyS8hk5cEbNP/jrI7q6pYGBLbsM=" },
+    .{ .seed = 0x10ec9c92ddb5dcbc, .hash = 0xf10d68d0f3309360, .b64 = "S4gpMSKzMD7CWPsSfLeYyhSpfWOntyuVZdX1xSBjiGvsspwOZcxNKCRIOqAA0moUfOh3I5+juQV4rsqYElMD/gWfDGpsWZKQ" },
+    .{ .seed = 0x9a767d5822c7dac4, .hash = 0x2af056184457a3de, .b64 = "oswxop+bthuDLT4j0PcoSKby4LhF47ZKg8K17xxHf74UsGCzTBbOz0MM8hQEGlyqDT1iUiAYnaPaUpL2mRK0rcIUYA4qLt5uOw==" },
+    .{ .seed = 0xee46254080d6e2db, .hash = 0x6d0058e1590b2489, .b64 = "0II/697p+BtLSjxj5989OXI004TogEb94VUnDzOVSgMXie72cuYRvTFNIBgtXlKfkiUjeqVpd4a+n5bxNOD1TGrjQtzKU5r7obo=" },
+    .{ .seed = 0xbbb669588d8bf398, .hash = 0x638f287f68817f12, .b64 = "E84YZW2qipAlMPmctrg7TKlwLZ68l4L+c0xRDUfyyFrA4MAti0q9sHq3TDFviH0Y+Kq3tEE5srWFA8LM9oomtmvm5PYxoaarWPLc" },
+    .{ .seed = 0xdc2afaa529beef44, .hash = 0xc46b71fecefd5467, .b64 = "x3pa4HIElyZG0Nj7Vdy9IdJIR4izLmypXw5PCmZB5y68QQ4uRaVVi3UthsoJROvbjDJkP2DQ6L/eN8pFeLFzNPKBYzcmuMOb5Ull7w==" },
+    .{ .seed = 0xf1f67391d45013a8, .hash = 0x2c8e94679d964e0a, .b64 = "jVDKGYIuWOP/QKLdd2wi8B2VJA8Wh0c8PwrXJVM8FOGM3voPDVPyDJOU6QsBDPseoR8uuKd19OZ/zAvSCB+zlf6upAsBlheUKgCfKww=" },
+    .{ .seed = 0x16fce2b8c65a3429, .hash = 0x8612b797ce22503a, .b64 = "mkquunhmYe1aR2wmUz4vcvLEcKBoe6H+kjUok9VUn2+eTSkWs4oDDtJvNCWtY5efJwg/j4PgjRYWtqnrCkhaqJaEvkkOwVfgMIwF3e+d" },
+    .{ .seed = 0xf4b096699f49fe67, .hash = 0x59f929babfba7170, .b64 = "fRelvKYonTQ+s+rnnvQw+JzGfFoPixtna0vzcSjiDqX5s2Kg2//UGrK+AVCyMUhO98WoB1DDbrsOYSw2QzrcPe0+3ck9sePvb+Q/IRaHbw==" },
+    .{ .seed = 0xca584c4bc8198682, .hash = 0x9527556923fb49a0, .b64 = "DUwXFJzagljo44QeJ7/6ZKw4QXV18lhkYT2jglMr8WB3CHUU4vdsytvw6AKv42ZcG6fRkZkq9fpnmXy6xG0aO3WPT1eHuyFirAlkW+zKtwg=" },
+    .{ .seed = 0xed269fc3818b6aad, .hash = 0x1039ab644f5e150b, .b64 = "cYmZCrOOBBongNTr7e4nYn52uQUy2mfe48s50JXx2AZ6cRAt/xRHJ5QbEoEJOeOHsJyM4nbzwFm++SlT6gFZZHJpkXJ92JkR86uS/eV1hJUR" },
+    .{ .seed = 0x33f253cbb8fe66a8, .hash = 0x7816c83f3aa05e6d, .b64 = "EXeHBDfhwzAKFhsMcH9+2RHwV+mJaN01+9oacF6vgm8mCXRd6jeN9U2oAb0of5c5cO4i+Vb/LlHZSMI490SnHU0bejhSCC2gsC5d2K30ER3iNA==" },
+    .{ .seed = 0xd0b76b2c1523d99c, .hash = 0xf51d2f564518c619, .b64 = "FzkzRYoNjkxFhZDso94IHRZaJUP61nFYrh5MwDwv9FNoJ5jyNCY/eazPZk+tbmzDyJIGw2h3GxaWZ9bSlsol/vK98SbkMKCQ/wbfrXRLcDzdd/8=" },
+    .{ .seed = 0xfd28f0811a2a237f, .hash = 0x67d494cff03ac004, .b64 = "Re4aXISCMlYY/XsX7zkIFR04ta03u4zkL9dVbLXMa/q6hlY/CImVIIYRN3VKP4pnd0AUr/ugkyt36JcstAInb4h9rpAGQ7GMVOgBniiMBZ/MGU7H" },
+    .{ .seed = 0x6261fb136482e84, .hash = 0x2802d636ced1cfbb, .b64 = "ueLyMcqJXX+MhO4UApylCN9WlTQ+ltJmItgG7vFUtqs2qNwBMjmAvr5u0sAKd8jpzV0dDPTwchbIeAW5zbtkA2NABJV6hFM48ib4/J3A5mseA3cS8w==" },
+    .{ .seed = 0x458efc750bca7c3a, .hash = 0xf64e20bad771cb12, .b64 = "6Si7Yi11L+jZMkwaN+GUuzXMrlvEqviEkGOilNq0h8TdQyYKuFXzkYc/q74gP3pVCyiwz9KpVGMM9vfnq36riMHRknkmhQutxLZs5fbmOgEO69HglCU=" },
+    .{ .seed = 0xa7e69ff84e5e7c27, .hash = 0xb9a6cf84a83e15e, .b64 = "Q6AbOofGuTJOegPh9Clm/9crtUMQqylKrTc1fhfJo1tqvpXxhU4k08kntL1RG7woRnFrVh2UoMrL1kjin+s9CanT+y4hHwLqRranl9FjvxfVKm3yvg68" },
+    .{ .seed = 0x3c59bfd0c29efe9e, .hash = 0x8da6630319609301, .b64 = "ieQEbIPvqY2YfIjHnqfJiO1/MIVRk0RoaG/WWi3kFrfIGiNLCczYoklgaecHMm/1sZ96AjO+a5stQfZbJQwS7Sc1ODABEdJKcTsxeW2hbh9A6CFzpowP1A==" },
+    .{ .seed = 0x10befacc6afd298d, .hash = 0x40946a86e2a996f3, .b64 = "zQUv8hFB3zh2GGl3KTvCmnfzE+SUgQPVaSVIELFX5H9cE3FuVFGmymkPQZJLAyzC90Cmi8GqYCvPqTuAAB//XTJxy4bCcVArgZG9zJXpjowpNBfr3ngWrSE=" },
+    .{ .seed = 0x41d5320b0a38efa7, .hash = 0xcab7f5997953fa76, .b64 = "US4hcC1+op5JKGC7eIs8CUgInjKWKlvKQkapulxW262E/B2ye79QxOexf188u2mFwwe3WTISJHRZzS61IwljqAWAWoBAqkUnW8SHmIDwHUP31J0p5sGdP47L" },
+    .{ .seed = 0x58db1c7450fe17f3, .hash = 0x39129ca0e04fc465, .b64 = "9bHUWFna2LNaGF6fQLlkx1Hkt24nrkLE2CmFdWgTQV3FFbUe747SSqYw6ebpTa07MWSpWRPsHesVo2B9tqHbe7eQmqYebPDFnNqrhSdZwFm9arLQVs+7a3Ic6A==" },
+    .{ .seed = 0x6098c055a335b7a6, .hash = 0x5238221fd685e1b8, .b64 = "Kb3DpHRUPhtyqgs3RuXjzA08jGb59hjKTOeFt1qhoINfYyfTt2buKhD6YVffRCPsgK9SeqZqRPJSyaqsa0ovyq1WnWW8jI/NhvAkZTVHUrX2pC+cD3OPYT05Dag=" },
+    .{ .seed = 0x1bbacec67845a801, .hash = 0x175130c407dbcaab, .b64 = "gzxyMJIPlU+bJBwhFUCHSofZ/319LxqMoqnt3+L6h2U2+ZXJCSsYpE80xmR0Ta77Jq54o92SMH87HV8dGOaCTuAYF+lDL42SY1P316Cl0sZTS2ow3ZqwGbcPNs/1" },
+    .{ .seed = 0xc419cfc7442190, .hash = 0x2f20e7536c0b0df, .b64 = "uR7V0TW+FGVMpsifnaBAQ3IGlr1wx5sKd7TChuqRe6OvUXTlD4hKWy8S+8yyOw8lQabism19vOQxfmocEOW/vzY0pEa87qHrAZy4s9fH2Bltu8vaOIe+agYohhYORQ==" },
+    .{ .seed = 0xc95e510d94ba270c, .hash = 0x2742cb488a04ad56, .b64 = "1UR5eoo2aCwhacjZHaCh9bkOsITp6QunUxHQ2SfeHv0imHetzt/Z70mhyWZBalv6eAx+YfWKCUib2SHDtz/A2dc3hqUWX5VfAV7FQsghPUAtu6IiRatq4YSLpDvKZBQ=" },
+    .{ .seed = 0xff1ae05c98089c3f, .hash = 0xd6afb593879ff93b, .b64 = "opubR7H63BH7OtY+Avd7QyQ25UZ8kLBdFDsBTwZlY6gA/u+x+czC9AaZMgmQrUy15DH7YMGsvdXnviTtI4eVI4aF1H9Rl3NXMKZgwFOsdTfdcZeeHVRzBBKX8jUfh1il" },
+    .{ .seed = 0x90c02b8dceced493, .hash = 0xf50ad64caac0ca7f, .b64 = "DC0kXcSXtfQ9FbSRwirIn5tgPri0sbzHSa78aDZVDUKCMaBGyFU6BmrulywYX8yzvwprdLsoOwTWN2wMjHlPDqrvVHNEjnmufRDblW+nSS+xtKNs3N5xsxXdv6JXDrAB/Q==" },
+    .{ .seed = 0x9f8a76697ab1aa36, .hash = 0x2ade95c4261364ae, .b64 = "BXRBk+3wEP3Lpm1y75wjoz+PgB0AMzLe8tQ1AYU2/oqrQB2YMC6W+9QDbcOfkGbeH+b7IBkt/gwCMw2HaQsRFEsurXtcQ3YwRuPz5XNaw5NAvrNa67Fm7eRzdE1+hWLKtA8=" },
+    .{ .seed = 0x6ba1bf3d811a531d, .hash = 0x5c4f3299faacd07a, .b64 = "RRBSvEGYnzR9E45Aps/+WSnpCo/X7gJLO4DRnUqFrJCV/kzWlusLE/6ZU6RoUf2ROwcgEvUiXTGjLs7ts3t9SXnJHxC1KiOzxHdYLMhVvgNd3hVSAXODpKFSkVXND55G2L1W" },
+    .{ .seed = 0x6a418974109c67b4, .hash = 0xfffe3bff0ae5e9bc, .b64 = "jeh6Qazxmdi57pa9S3XSnnZFIRrnc6s8QLrah5OX3SB/V2ErSPoEAumavzQPkdKF1/SfvmdL+qgF1C+Yawy562QaFqwVGq7+tW0yxP8FStb56ZRgNI4IOmI30s1Ei7iops9Uuw==" },
+    .{ .seed = 0x8472f1c2b3d230a3, .hash = 0x1db785c0005166e4, .b64 = "6QO5nnDrY2/wrUXpltlKy2dSBcmK15fOY092CR7KxAjNfaY+aAmtWbbzQk3MjBg03x39afSUN1fkrWACdyQKRaGxgwq6MGNxI6W+8DLWJBHzIXrntrE/ml6fnNXEpxplWJ1vEs4=" },
+    .{ .seed = 0x5e06068f884e73a7, .hash = 0xea000d962ad18418, .b64 = "0oPxeEHhqhcFuwonNfLd5jF3RNATGZS6NPoS0WklnzyokbTqcl4BeBkMn07+fDQv83j/BpGUwcWO05f3+DYzocfnizpFjLJemFGsls3gxcBYxcbqWYev51tG3lN9EvRE+X9+Pwww" },
+    .{ .seed = 0x55290b1a8f170f59, .hash = 0xe42aef38359362d9, .b64 = "naSBSjtOKgAOg8XVbR5cHAW3Y+QL4Pb/JO9/oy6L08wvVRZqo0BrssMwhzBP401Um7A4ppAupbQeJFdMrysY34AuSSNvtNUy5VxjNECwiNtgwYHw7yakDUv8WvonctmnoSPKENegQg==" },
+    .{ .seed = 0x5501cfd83dfe706a, .hash = 0xc8e95657348a3891, .b64 = "vPyl8DxVeRe1OpilKb9KNwpGkQRtA94UpAHetNh+95V7nIW38v7PpzhnTWIml5kw3So1Si0TXtIUPIbsu32BNhoH7QwFvLM+JACgSpc5e3RjsL6Qwxxi11npwxRmRUqATDeMUfRAjxg=" },
+    .{ .seed = 0xe43ed13d13a66990, .hash = 0xc162eca864f238c6, .b64 = "QC9i2GjdTMuNC1xQJ74ngKfrlA4w3o58FhvNCltdIpuMhHP1YsDA78scQPLbZ3OCUgeQguYf/vw6zAaVKSgwtaykqg5ka/4vhz4hYqWU5ficdXqClHl+zkWEY26slCNYOM5nnDlly8Cj" },
+    .{ .seed = 0xdf43bc375cf5283f, .hash = 0xbe1fb373e20579ad, .b64 = "7CNIgQhAHX27nxI0HeB5oUTnTdgKpRDYDKwRcXfSFGP1XeT9nQF6WKCMjL1tBV6x7KuJ91GZz11F4c+8s+MfqEAEpd4FHzamrMNjGcjCyrVtU6y+7HscMVzr7Q/ODLcPEFztFnwjvCjmHw==" },
+    .{ .seed = 0x8112b806d288d7b5, .hash = 0x628a1d4f40aa6ffd, .b64 = "Qa/hC2RPXhANSospe+gUaPfjdK/yhQvfm4cCV6/pdvCYWPv8p1kMtKOX3h5/8oZ31fsmx4Axphu5qXJokuhZKkBUJueuMpxRyXpwSWz2wELx5glxF7CM0Fn+OevnkhUn5jsPlG2r5jYlVn8=" },
+    .{ .seed = 0xd52a18abb001cb46, .hash = 0xa87bdb7456340f90, .b64 = "kUw/0z4l3a89jTwN5jpG0SHY5km/IVhTjgM5xCiPRLncg40aqWrJ5vcF891AOq5hEpSq0bUCJUMFXgct7kvnys905HjerV7Vs1Gy84tgVJ70/2+pAZTsB/PzNOE/G6sOj4+GbTzkQu819OLB" },
+    .{ .seed = 0xe12b76a2433a1236, .hash = 0x5960ef3ba982c801, .b64 = "VDdfSDbO8Tdj3T5W0XM3EI7iHh5xpIutiM6dvcJ/fhe23V/srFEkDy5iZf/VnA9kfi2C79ENnFnbOReeuZW1b3MUXB9lgC6U4pOTuC+jHK3Qnpyiqzj7h3ISJSuo2pob7vY6VHZo6Fn7exEqHg==" },
+    .{ .seed = 0x175bf7319cf1fa00, .hash = 0x5026586df9a431ec, .b64 = "Ldfvy3ORdquM/R2fIkhH/ONi69mcP1AEJ6n/oropwecAsLJzQSgezSY8bEiEs0VnFTBBsW+RtZY6tDj03fnb3amNUOq1b7jbqyQkL9hpl+2Z2J8IaVSeownWl+bQcsR5/xRktIMckC5AtF4YHfU=" },
+    .{ .seed = 0xd63d57b3f67525ae, .hash = 0xfe4b8a20fdf0840b, .b64 = "BrbNpb42+VzZAjJw6QLirXzhweCVRfwlczzZ0VX2xluskwBqyfnGovz5EuX79JJ31VNXa5hTkAyQat3lYKRADTdAdwE5PqM1N7YaMqqsqoAAAeuYVXuk5eWCykYmClNdSspegwgCuT+403JigBzi" },
+    .{ .seed = 0x933faea858832b73, .hash = 0xdcb761867da7072f, .b64 = "gB3NGHJJvVcuPyF0ZSvHwnWSIfmaI7La24VMPQVoIIWF7Z74NltPZZpx2f+cocESM+ILzQW9p+BC8x5IWz7N4Str2WLGKMdgmaBfNkEhSHQDU0IJEOnpUt0HmjhFaBlx0/LTmhua+rQ6Wup8ezLwfg==" },
+    .{ .seed = 0x53d061e5f8e7c04f, .hash = 0xc10d4653667275b7, .b64 = "hTKHlRxx6Pl4gjG+6ksvvj0CWFicUg3WrPdSJypDpq91LUWRni2KF6+81ZoHBFhEBrCdogKqeK+hy9bLDnx7g6rAFUjtn1+cWzQ2YjiOpz4+ROBB7lnwjyTGWzJD1rXtlso1g2qVH8XJVigC5M9AIxM=" },
+    .{ .seed = 0xdb4124556dd515e0, .hash = 0x727720deec13110b, .b64 = "IWQBelSQnhrr0F3BhUpXUIDauhX6f95Qp+A0diFXiUK7irwPG1oqBiqHyK/SH/9S+rln9DlFROAmeFdH0OCJi2tFm4afxYzJTFR4HnR4cG4x12JqHaZLQx6iiu6CE3rtWBVz99oAwCZUOEXIsLU24o2Y" },
+    .{ .seed = 0x4fb31a0dd681ee71, .hash = 0x710b009662858dc9, .b64 = "TKo+l+1dOXdLvIrFqeLaHdm0HZnbcdEgOoLVcGRiCbAMR0j5pIFw8D36tefckAS1RCFOH5IgP8yiFT0Gd0a2hI3+fTKA7iK96NekxWeoeqzJyctc6QsoiyBlkZerRxs5RplrxoeNg29kKDTM0K94mnhD9g==" },
+    .{ .seed = 0x27cc72eefa138e4c, .hash = 0xfbf8f7a3ecac1eb7, .b64 = "YU4e7G6EfQYvxCFoCrrT0EFgVLHFfOWRTJQJ5gxM3G2b+1kJf9YPrpsxF6Xr6nYtS8reEEbDoZJYqnlk9lXSkVArm88Cqn6d25VCx3+49MqC0trIlXtb7SXUUhwpJK16T0hJUfPH7s5cMZXc6YmmbFuBNPE=" },
+    .{ .seed = 0x44bc2dfba4bd3ced, .hash = 0xb6fc4fcd0722e3df, .b64 = "/I/eImMwPo1U6wekNFD1Jxjk9XQVi1D+FPdqcHifYXQuP5aScNQfxMAmaPR2XhuOQhADV5tTVbBKwCDCX4E3jcDNHzCiPvViZF1W27txaf2BbFQdwKrNCmrtzcluBFYu0XZfc7RU1RmxK/RtnF1qHsq/O4pp" },
+    .{ .seed = 0x242da1e3a439bed8, .hash = 0x7cb86dcc55104aac, .b64 = "CJTT9WGcY2XykTdo8KodRIA29qsqY0iHzWZRjKHb9alwyJ7RZAE3V5Juv4MY3MeYEr1EPCCMxO7yFXqT8XA8YTjaMp3bafRt17Pw8JC4iKJ1zN+WWKOESrj+3aluGQqn8z1EzqY4PH7rLG575PYeWsP98BugdA==" },
+    .{ .seed = 0xdc559c746e35c139, .hash = 0x19e71e9b45c3a51e, .b64 = "ZlhyQwLhXQyIUEnMH/AEW27vh9xrbNKJxpWGtrEmKhd+nFqAfbeNBQjW0SfG1YI0xQkQMHXjuTt4P/EpZRtA47ibZDVS8TtaxwyBjuIDwqcN09eCtpC+Ls+vWDTLmBeDM3u4hmzz4DQAYsLiZYSJcldg9Q3wszw=" },
+    .{ .seed = 0xd0b0350275b9989, .hash = 0x51de38573c2bea48, .b64 = "v2KU8y0sCrBghmnm8lzGJlwo6D6ObccAxCf10heoDtYLosk4ztTpLlpSFEyu23MLA1tJkcgRko04h19QMG0mOw/wc93EXAweriBqXfvdaP85sZABwiKO+6rtS9pacRVpYYhHJeVTQ5NzrvBvi1huxAr+xswhVMfL" },
+    .{ .seed = 0xb04489e41d17730c, .hash = 0xa73ab6996d6df158, .b64 = "QhKlnIS6BuVCTQsnoE67E/yrgogE8EwO7xLaEGei26m0gEU4OksefJgppDh3X0x0Cs78Dr9IHK5b977CmZlrTRmwhlP8pM+UzXPNRNIZuN3ntOum/QhUWP8SGpirheXENWsXMQ/nxtxakyEtrNkKk471Oov9juP8oQ==" },
+    .{ .seed = 0x2217285eb4572156, .hash = 0x55ef2b8c930817b2, .b64 = "/ZRMgnoRt+Uo6fUPr9FqQvKX7syhgVqWu+WUSsiQ68UlN0efSP6Eced5gJZL6tg9gcYJIkhjuQNITU0Q3TjVAnAcobgbJikCn6qZ6pRxKBY4MTiAlfGD3T7R7hwJwx554MAy++Zb/YUFlnCaCJiwQMnowF7aQzwYFCo=" },
+    .{ .seed = 0x12c2e8e68aede73b, .hash = 0xb2850bf5fae87157, .b64 = "NB7tU5fNE8nI+SXGfipc7sRkhnSkUF1krjeo6k+8FITaAtdyz+o7mONgXmGLulBPH9bEwyYhKNVY0L+njNQrZ9YC2aXsFD3PdZsxAFaBT3VXEzh+NGBTjDASNL3mXyS8Yv1iThGfHoY7T4aR0NYGJ+k+pR6f+KrPC96M" },
+    .{ .seed = 0x4d612125bdc4fd00, .hash = 0xecf3de1acd04651f, .b64 = "8T6wrqCtEO6/rwxF6lvMeyuigVOLwPipX/FULvwyu+1wa5sQGav/2FsLHUVn6cGSi0LlFwLewGHPFJDLR0u4t7ZUyM//x6da0sWgOa5hzDqjsVGmjxEHXiaXKW3i4iSZNuxoNbMQkIbVML+DkYu9ND0O2swg4itGeVSzXA==" },
+    .{ .seed = 0x81826b553954464e, .hash = 0xcc0a40552559ff32, .b64 = "Ntf1bMRdondtMv1CYr3G80iDJ4WSAlKy5H34XdGruQiCrnRGDBa+eUi7vKp4gp3BBcVGl8eYSasVQQjn7MLvb3BjtXx6c/bCL7JtpzQKaDnPr9GWRxpBXVxKREgMM7d8lm35EODv0w+hQLfVSh8OGs7fsBb68nNWPLeeSOo=" },
+    .{ .seed = 0xc2e5d345dc0ddd2d, .hash = 0xc385c374f20315b1, .b64 = "VsSAw72Ro6xks02kaiLuiTEIWBC5bgqr4WDnmP8vglXzAhixk7td926rm9jNimL+kroPSygZ9gl63aF5DCPOACXmsbmhDrAQuUzoh9ZKhWgElLQsrqo1KIjWoZT5b5QfVUXY9lSIBg3U75SqORoTPq7HalxxoIT5diWOcJQi" },
+    .{ .seed = 0x3da6830a9e32631e, .hash = 0xb90208a4c7234183, .b64 = "j+loZ+C87+bJxNVebg94gU0mSLeDulcHs84tQT7BZM2rzDSLiCNxUedHr1ZWJ9ejTiBa0dqy2I2ABc++xzOLcv+//YfibtjKtYggC6/3rv0XCc7xu6d/O6xO+XOBhOWAQ+IHJVHf7wZnDxIXB8AUHsnjEISKj7823biqXjyP3g==" },
+    .{ .seed = 0xc9ae5c8759b4877a, .hash = 0x58aa1ca7a4c075d9, .b64 = "f3LlpcPElMkspNtDq5xXyWU62erEaKn7RWKlo540gR6mZsNpK1czV/sOmqaq8XAQLEn68LKj6/cFkJukxRzCa4OF1a7cCAXYFp9+wZDu0bw4y63qbpjhdCl8GO6Z2lkcXy7KOzbPE01ukg7+gN+7uKpoohgAhIwpAKQXmX5xtd0=" },
+};
diff --git a/src/stdx.zig b/src/stdx.zig
new file mode 100644
index 0000000..33fac5e
--- /dev/null
+++ b/src/stdx.zig
@@ -0,0 +1,728 @@
+//! Extensions to the standard library -- things which could have been in std, but aren't.
+
+const std = @import("std");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+
+pub const BoundedArray = @import("bounded_array.zig").BoundedArray;
+
+pub inline fn div_ceil(numerator: anytype, denominator: anytype) @TypeOf(numerator, denominator) {
+    comptime {
+        switch (@typeInfo(@TypeOf(numerator))) {
+            .Int => |int| assert(int.signedness == .unsigned),
+            .ComptimeInt => assert(numerator >= 0),
+            else => @compileError("div_ceil: invalid numerator type"),
+        }
+
+        switch (@typeInfo(@TypeOf(denominator))) {
+            .Int => |int| assert(int.signedness == .unsigned),
+            .ComptimeInt => assert(denominator > 0),
+            else => @compileError("div_ceil: invalid denominator type"),
+        }
+    }
+
+    assert(denominator > 0);
+
+    if (numerator == 0) return 0;
+    return @divFloor(numerator - 1, denominator) + 1;
+}
+
+test "div_ceil" {
+    // Comptime ints.
+    try std.testing.expectEqual(div_ceil(0, 8), 0);
+    try std.testing.expectEqual(div_ceil(1, 8), 1);
+    try std.testing.expectEqual(div_ceil(7, 8), 1);
+    try std.testing.expectEqual(div_ceil(8, 8), 1);
+    try std.testing.expectEqual(div_ceil(9, 8), 2);
+
+    // Unsized ints
+    const max = std.math.maxInt(u64);
+    try std.testing.expectEqual(div_ceil(@as(u64, 0), 8), 0);
+    try std.testing.expectEqual(div_ceil(@as(u64, 1), 8), 1);
+    try std.testing.expectEqual(div_ceil(@as(u64, max), 2), max / 2 + 1);
+    try std.testing.expectEqual(div_ceil(@as(u64, max) - 1, 2), max / 2);
+    try std.testing.expectEqual(div_ceil(@as(u64, max) - 2, 2), max / 2);
+}
+
+pub const CopyPrecision = enum { exact, inexact };
+
+pub inline fn copy_left(
+    comptime precision: CopyPrecision,
+    comptime T: type,
+    target: []T,
+    source: []const T,
+) void {
+    switch (precision) {
+        .exact => assert(target.len == source.len),
+        .inexact => assert(target.len >= source.len),
+    }
+
+    if (!disjoint_slices(T, T, target, source)) {
+        assert(@intFromPtr(target.ptr) < @intFromPtr(source.ptr));
+    }
+    std.mem.copy(T, target, source);
+}
+
+test "copy_left" {
+    const a = try std.testing.allocator.alloc(usize, 8);
+    defer std.testing.allocator.free(a);
+
+    for (a, 0..) |*v, i| v.* = i;
+    copy_left(.exact, usize, a[0..6], a[2..]);
+    try std.testing.expect(std.mem.eql(usize, a, &.{ 2, 3, 4, 5, 6, 7, 6, 7 }));
+}
+
+pub inline fn copy_right(
+    comptime precision: CopyPrecision,
+    comptime T: type,
+    target: []T,
+    source: []const T,
+) void {
+    switch (precision) {
+        .exact => assert(target.len == source.len),
+        .inexact => assert(target.len >= source.len),
+    }
+
+    if (!disjoint_slices(T, T, target, source)) {
+        assert(@intFromPtr(target.ptr) > @intFromPtr(source.ptr));
+    }
+    std.mem.copyBackwards(T, target, source);
+}
+
+test "copy_right" {
+    const a = try std.testing.allocator.alloc(usize, 8);
+    defer std.testing.allocator.free(a);
+
+    for (a, 0..) |*v, i| v.* = i;
+    copy_right(.exact, usize, a[2..], a[0..6]);
+    try std.testing.expect(std.mem.eql(usize, a, &.{ 0, 1, 0, 1, 2, 3, 4, 5 }));
+}
+
+pub inline fn copy_disjoint(
+    comptime precision: CopyPrecision,
+    comptime T: type,
+    target: []T,
+    source: []const T,
+) void {
+    switch (precision) {
+        .exact => assert(target.len == source.len),
+        .inexact => assert(target.len >= source.len),
+    }
+
+    assert(disjoint_slices(T, T, target, source));
+    std.mem.copy(T, target, source);
+}
+
+pub inline fn disjoint_slices(comptime A: type, comptime B: type, a: []const A, b: []const B) bool {
+    return @intFromPtr(a.ptr) + a.len * @sizeOf(A) <= @intFromPtr(b.ptr) or
+        @intFromPtr(b.ptr) + b.len * @sizeOf(B) <= @intFromPtr(a.ptr);
+}
+
+test "disjoint_slices" {
+    const a = try std.testing.allocator.alignedAlloc(u8, @sizeOf(u32), 8 * @sizeOf(u32));
+    defer std.testing.allocator.free(a);
+
+    const b = try std.testing.allocator.alloc(u32, 8);
+    defer std.testing.allocator.free(b);
+
+    try std.testing.expectEqual(true, disjoint_slices(u8, u32, a, b));
+    try std.testing.expectEqual(true, disjoint_slices(u32, u8, b, a));
+
+    try std.testing.expectEqual(true, disjoint_slices(u8, u8, a, a[0..0]));
+    try std.testing.expectEqual(true, disjoint_slices(u32, u32, b, b[0..0]));
+
+    try std.testing.expectEqual(false, disjoint_slices(u8, u8, a, a[0..1]));
+    try std.testing.expectEqual(false, disjoint_slices(u8, u8, a, a[a.len - 1 .. a.len]));
+
+    try std.testing.expectEqual(false, disjoint_slices(u32, u32, b, b[0..1]));
+    try std.testing.expectEqual(false, disjoint_slices(u32, u32, b, b[b.len - 1 .. b.len]));
+
+    try std.testing.expectEqual(false, disjoint_slices(u8, u32, a, std.mem.bytesAsSlice(u32, a)));
+    try std.testing.expectEqual(false, disjoint_slices(u32, u8, b, std.mem.sliceAsBytes(b)));
+}
+
+/// Checks that a byteslice is zeroed.
+pub fn zeroed(bytes: []const u8) bool {
+    // This implementation already gets vectorized
+    // https://godbolt.org/z/46cMsPKPc
+    var byte_bits: u8 = 0;
+    for (bytes) |byte| {
+        byte_bits |= byte;
+    }
+    return byte_bits == 0;
+}
+
+const Cut = struct {
+    prefix: []const u8,
+    suffix: []const u8,
+};
+
+/// Splits the `haystack` around the first occurrence of `needle`, returning parts before and after.
+///
+/// This is a Zig version of Go's `string.Cut` / Rust's `str::split_once`. Cut turns out to be a
+/// surprisingly versatile primitive for ad-hoc string processing. Often `std.mem.indexOf` and
+/// `std.mem.split` can be replaced with a shorter and clearer code using  `cut`.
+pub fn cut(haystack: []const u8, needle: []const u8) ?Cut {
+    const index = std.mem.indexOf(u8, haystack, needle) orelse return null;
+
+    return Cut{
+        .prefix = haystack[0..index],
+        .suffix = haystack[index + needle.len ..],
+    };
+}
+
+/// `maybe` is the dual of `assert`: it signals that condition is sometimes true
+///  and sometimes false.
+///
+/// Currently we use it for documentation, but maybe one day we plug it into
+/// coverage.
+pub fn maybe(ok: bool) void {
+    assert(ok or !ok);
+}
+
+/// Signal that something is not yet fully implemented, and abort the process.
+///
+/// In VOPR, this will exit with status 0, to make it easy to find "real" failures by running
+/// the simulator in a loop.
+pub fn unimplemented(comptime message: []const u8) noreturn {
+    const full_message = "unimplemented: " ++ message;
+    const root = @import("root");
+    if (@hasDecl(root, "Simulator")) {
+        root.output.info(full_message, .{});
+        root.output.info("not crashing in VOPR", .{});
+        std.process.exit(0);
+    }
+    @panic(full_message);
+}
+
+/// Utility function for ad-hoc profiling.
+///
+/// A thin wrapper around `std.time.Timer` which handles the boilerplate of
+/// printing to stderr and formatting times in some (unspecified) readable way.
+pub fn timeit() TimeIt {
+    return TimeIt{ .inner = std.time.Timer.start() catch unreachable };
+}
+
+const TimeIt = struct {
+    inner: std.time.Timer,
+
+    /// Prints elapsed time to stderr and resets the internal timer.
+    pub fn lap(self: *TimeIt, comptime label: []const u8) void {
+        const label_alignment = comptime " " ** (1 + (12 -| label.len));
+
+        const nanos = self.inner.lap();
+        std.debug.print(
+            label ++ ":" ++ label_alignment ++ "{}\n",
+            .{std.fmt.fmtDuration(nanos)},
+        );
+    }
+};
+
+pub const log = if (builtin.is_test)
+    // Downgrade `err` to `warn` for tests.
+    // Zig fails any test that does `log.err`, but we want to test those code paths here.
+    struct {
+        pub fn scoped(comptime scope: @Type(.EnumLiteral)) type {
+            const base = std.log.scoped(scope);
+            return struct {
+                pub const err = warn;
+                pub const warn = base.warn;
+                pub const info = base.info;
+                pub const debug = base.debug;
+            };
+        }
+    }
+else
+    std.log;
+
+/// Compare two values by directly comparing the underlying memory.
+///
+/// Assert at compile time that this is a reasonable thing to do for a given `T`. That is, check
+/// that:
+///   - `T` doesn't have any non-deterministic padding,
+///   - `T` doesn't embed any pointers.
+pub fn equal_bytes(comptime T: type, a: *const T, b: *const T) bool {
+    comptime assert(has_unique_representation(T));
+    comptime assert(!has_pointers(T));
+    comptime assert(@sizeOf(T) * 8 == @bitSizeOf(T));
+
+    // Pick the biggest "word" for word-wise comparison, and don't try to early-return on the first
+    // mismatch, so that a compiler can vectorize the loop.
+
+    const Word = inline for (.{ u64, u32, u16, u8 }) |Word| {
+        if (@alignOf(T) >= @alignOf(Word) and @sizeOf(T) % @sizeOf(Word) == 0) break Word;
+    } else unreachable;
+
+    const a_words = std.mem.bytesAsSlice(Word, std.mem.asBytes(a));
+    const b_words = std.mem.bytesAsSlice(Word, std.mem.asBytes(b));
+    assert(a_words.len == b_words.len);
+
+    var total: Word = 0;
+    for (a_words, 0..) |a_word, i| {
+        const b_word = b_words[i];
+        total |= a_word ^ b_word;
+    }
+
+    return total == 0;
+}
+
+fn has_pointers(comptime T: type) bool {
+    switch (@typeInfo(T)) {
+        .Pointer => return true,
+        // Be conservative.
+        else => return true,
+
+        .Bool, .Int, .Enum => return false,
+
+        .Array => |info| return comptime has_pointers(info.child),
+        .Struct => |info| {
+            inline for (info.fields) |field| {
+                if (comptime has_pointers(field.type)) return true;
+            }
+            return false;
+        },
+    }
+}
+
+/// Checks that a type does not have implicit padding.
+pub fn no_padding(comptime T: type) bool {
+    comptime switch (@typeInfo(T)) {
+        .Int => return @bitSizeOf(T) == 8 * @sizeOf(T),
+        .Array => |info| return no_padding(info.child),
+        .Struct => |info| {
+            switch (info.layout) {
+                .Auto => return false,
+                .Extern => {
+                    for (info.fields) |field| {
+                        if (!no_padding(field.type)) return false;
+                    }
+
+                    // Check offsets of u128 and pseudo-u256 fields.
+                    for (info.fields) |field| {
+                        if (field.type == u128) {
+                            const offset = @offsetOf(T, field.name);
+                            if (offset % @sizeOf(u128) != 0) return false;
+
+                            if (@hasField(T, field.name ++ "_padding")) {
+                                if (offset % @sizeOf(u256) != 0) return false;
+                                if (offset + @sizeOf(u128) !=
+                                    @offsetOf(T, field.name ++ "_padding"))
+                                {
+                                    return false;
+                                }
+                            }
+                        }
+                    }
+
+                    var offset = 0;
+                    for (info.fields) |field| {
+                        const field_offset = @offsetOf(T, field.name);
+                        if (offset != field_offset) return false;
+                        offset += @sizeOf(field.type);
+                    }
+                    return offset == @sizeOf(T);
+                },
+                .Packed => return @bitSizeOf(T) == 8 * @sizeOf(T),
+            }
+        },
+        .Enum => |info| {
+            maybe(info.is_exhaustive);
+            return no_padding(info.tag_type);
+        },
+        .Pointer => return false,
+        .Union => return false,
+        else => return false,
+    };
+}
+
+test no_padding {
+    comptime for (.{
+        u8,
+        extern struct { x: u8 },
+        packed struct { x: u7, y: u1 },
+        extern struct { x: extern struct { y: u64, z: u64 } },
+        enum(u8) { x },
+    }) |T| {
+        assert(no_padding(T));
+    };
+
+    comptime for (.{
+        u7,
+        struct { x: u7 },
+        struct { x: u8 },
+        struct { x: u64, y: u32 },
+        extern struct { x: extern struct { y: u64, z: u32 } },
+        packed struct { x: u7 },
+        enum(u7) { x },
+    }) |T| {
+        assert(!no_padding(T));
+    };
+}
+
+pub inline fn hash_inline(value: anytype) u64 {
+    comptime {
+        assert(no_padding(@TypeOf(value)));
+        assert(has_unique_representation(@TypeOf(value)));
+    }
+    return low_level_hash(0, switch (@typeInfo(@TypeOf(value))) {
+        .Struct, .Int => std.mem.asBytes(&value),
+        else => @compileError("unsupported hashing for " ++ @typeName(@TypeOf(value))),
+    });
+}
+
+/// Inline version of Google Abseil "LowLevelHash" (inspired by wyhash).
+/// https://github.com/abseil/abseil-cpp/blob/master/absl/hash/internal/low_level_hash.cc
+inline fn low_level_hash(seed: u64, input: anytype) u64 {
+    const salt = [_]u64{
+        0xa0761d6478bd642f,
+        0xe7037ed1a0b428db,
+        0x8ebc6af09c88c6e3,
+        0x589965cc75374cc3,
+        0x1d8e4e27c47d124f,
+    };
+
+    var in: []const u8 = input;
+    var state = seed ^ salt[0];
+    const starting_len = input.len;
+
+    if (in.len > 64) {
+        var dup = [_]u64{ state, state };
+        defer state = dup[0] ^ dup[1];
+
+        while (in.len > 64) : (in = in[64..]) {
+            for (@as([2][4]u64, @bitCast(in[0..64].*)), 0..) |chunk, i| {
+                const mix1 = @as(u128, chunk[0] ^ salt[(i * 2) + 1]) *% (chunk[1] ^ dup[i]);
+                const mix2 = @as(u128, chunk[2] ^ salt[(i * 2) + 2]) *% (chunk[3] ^ dup[i]);
+                dup[i] = @as(u64, @truncate(mix1 ^ (mix1 >> 64)));
+                dup[i] ^= @as(u64, @truncate(mix2 ^ (mix2 >> 64)));
+            }
+        }
+    }
+
+    while (in.len > 16) : (in = in[16..]) {
+        const chunk = @as([2]u64, @bitCast(in[0..16].*));
+        const mixed = @as(u128, chunk[0] ^ salt[1]) *% (chunk[1] ^ state);
+        state = @as(u64, @truncate(mixed ^ (mixed >> 64)));
+    }
+
+    var chunk = std.mem.zeroes([2]u64);
+    if (in.len > 8) {
+        chunk[0] = @as(u64, @bitCast(in[0..8].*));
+        chunk[1] = @as(u64, @bitCast(in[in.len - 8 ..][0..8].*));
+    } else if (in.len > 3) {
+        chunk[0] = @as(u32, @bitCast(in[0..4].*));
+        chunk[1] = @as(u32, @bitCast(in[in.len - 4 ..][0..4].*));
+    } else if (in.len > 0) {
+        chunk[0] = (@as(u64, in[0]) << 16) | (@as(u64, in[in.len / 2]) << 8) | in[in.len - 1];
+    }
+
+    var mixed = @as(u128, chunk[0] ^ salt[1]) *% (chunk[1] ^ state);
+    mixed = @as(u64, @truncate(mixed ^ (mixed >> 64)));
+    mixed *%= (@as(u64, starting_len) ^ salt[1]);
+    return @as(u64, @truncate(mixed ^ (mixed >> 64)));
+}
+
+test "hash_inline" {
+    for (@import("low_level_hash_vectors.zig").cases) |case| {
+        var buffer: [0x100]u8 = undefined;
+
+        const b64 = std.base64.standard;
+        const input = buffer[0..try b64.Decoder.calcSizeForSlice(case.b64)];
+        try b64.Decoder.decode(input, case.b64);
+
+        const hash = low_level_hash(case.seed, input);
+        try std.testing.expectEqual(case.hash, hash);
+    }
+}
+
+/// Returns a copy of `base` with fields changed according to `diff`.
+///
+/// Intended exclusively for table-driven prototype-based tests. Write
+/// updates explicitly in production code.
+pub fn update(base: anytype, diff: anytype) @TypeOf(base) {
+    assert(builtin.is_test);
+    assert(@typeInfo(@TypeOf(base)) == .Struct);
+
+    var updated = base;
+    inline for (std.meta.fields(@TypeOf(diff))) |f| {
+        @field(updated, f.name) = @field(diff, f.name);
+    }
+    return updated;
+}
+
+// std.SemanticVersion requires there be no extra characters after the
+// major/minor/patch numbers. But when we try to parse `uname
+// --kernel-release` (note: while Linux doesn't follow semantic
+// versioning, it doesn't violate it either), some distributions have
+// extra characters, such as this Fedora one: 6.3.8-100.fc37.x86_64, and
+// this WSL one has more than three dots:
+// 5.15.90.1-microsoft-standard-WSL2.
+pub fn parse_dirty_semver(dirty_release: []const u8) !std.SemanticVersion {
+    const release = blk: {
+        var last_valid_version_character_index: usize = 0;
+        var dots_found: u8 = 0;
+        for (dirty_release) |c| {
+            if (c == '.') dots_found += 1;
+            if (dots_found == 3) {
+                break;
+            }
+
+            if (c == '.' or (c >= '0' and c <= '9')) {
+                last_valid_version_character_index += 1;
+                continue;
+            }
+
+            break;
+        }
+
+        break :blk dirty_release[0..last_valid_version_character_index];
+    };
+
+    return std.SemanticVersion.parse(release);
+}
+
+test "stdx.zig: parse_dirty_semver" {
+    const SemverTestCase = struct {
+        dirty_release: []const u8,
+        expected_version: std.SemanticVersion,
+    };
+
+    const cases = &[_]SemverTestCase{
+        .{
+            .dirty_release = "1.2.3",
+            .expected_version = std.SemanticVersion{ .major = 1, .minor = 2, .patch = 3 },
+        },
+        .{
+            .dirty_release = "1001.843.909",
+            .expected_version = std.SemanticVersion{ .major = 1001, .minor = 843, .patch = 909 },
+        },
+        .{
+            .dirty_release = "6.3.8-100.fc37.x86_64",
+            .expected_version = std.SemanticVersion{ .major = 6, .minor = 3, .patch = 8 },
+        },
+        .{
+            .dirty_release = "5.15.90.1-microsoft-standard-WSL2",
+            .expected_version = std.SemanticVersion{ .major = 5, .minor = 15, .patch = 90 },
+        },
+    };
+    for (cases) |case| {
+        const version = try parse_dirty_semver(case.dirty_release);
+        try std.testing.expectEqual(case.expected_version, version);
+    }
+}
+
+// TODO(zig): Zig 0.11 doesn't have the statfs / fstatfs syscalls to get the type of a filesystem.
+// Once those are available, this can be removed.
+// The `statfs` definition used by the Linux kernel, and the magic number for tmpfs, from
+// `man 2 fstatfs`.
+const fsblkcnt64_t = u64;
+const fsfilcnt64_t = u64;
+const fsword_t = i64;
+const fsid_t = u64;
+
+pub const TmpfsMagic = 0x01021994;
+pub const StatFs = extern struct {
+    f_type: fsword_t,
+    f_bsize: fsword_t,
+    f_blocks: fsblkcnt64_t,
+    f_bfree: fsblkcnt64_t,
+    f_bavail: fsblkcnt64_t,
+    f_files: fsfilcnt64_t,
+    f_ffree: fsfilcnt64_t,
+    f_fsid: fsid_t,
+    f_namelen: fsword_t,
+    f_frsize: fsword_t,
+    f_flags: fsword_t,
+    f_spare: [4]fsword_t,
+};
+
+pub fn fstatfs(fd: i32, statfs_buf: *StatFs) usize {
+    return std.os.linux.syscall2(
+        if (@hasField(std.os.linux.SYS, "fstatfs64")) .fstatfs64 else .fstatfs,
+        @as(usize, @bitCast(@as(isize, fd))),
+        @intFromPtr(statfs_buf),
+    );
+}
+
+// TODO(Zig): https://github.com/ziglang/zig/issues/17592.
+/// True if every value of the type `T` has a unique bit pattern representing it.
+/// In other words, `T` has no unused bits and no padding.
+pub fn has_unique_representation(comptime T: type) bool {
+    switch (@typeInfo(T)) {
+        else => return false, // TODO can we know if it's true for some of these types ?
+
+        .AnyFrame,
+        .Enum,
+        .ErrorSet,
+        .Fn,
+        => return true,
+
+        .Bool => return false,
+
+        .Int => |info| return @sizeOf(T) * 8 == info.bits,
+
+        .Pointer => |info| return info.size != .Slice,
+
+        .Array => |info| return comptime has_unique_representation(info.child),
+
+        .Struct => |info| {
+            // Only consider packed structs unique if they are byte aligned.
+            if (info.backing_integer) |backing_integer| {
+                return @sizeOf(T) * 8 == @bitSizeOf(backing_integer);
+            }
+
+            var sum_size = @as(usize, 0);
+
+            inline for (info.fields) |field| {
+                const FieldType = field.type;
+                if (comptime !has_unique_representation(FieldType)) return false;
+                sum_size += @sizeOf(FieldType);
+            }
+
+            return @sizeOf(T) == sum_size;
+        },
+
+        .Vector => |info| return comptime has_unique_representation(info.child) and
+            @sizeOf(T) == @sizeOf(info.child) * info.len,
+    }
+}
+
+// Test vectors mostly from upstream, with some added to test the packed struct case.
+test "has_unique_representation" {
+    const TestStruct1 = struct {
+        a: u32,
+        b: u32,
+    };
+
+    try std.testing.expect(has_unique_representation(TestStruct1));
+
+    const TestStruct2 = struct {
+        a: u32,
+        b: u16,
+    };
+
+    try std.testing.expect(!has_unique_representation(TestStruct2));
+
+    const TestStruct3 = struct {
+        a: u32,
+        b: u32,
+    };
+
+    try std.testing.expect(has_unique_representation(TestStruct3));
+
+    const TestStruct4 = struct { a: []const u8 };
+
+    try std.testing.expect(!has_unique_representation(TestStruct4));
+
+    const TestStruct5 = struct { a: TestStruct4 };
+
+    try std.testing.expect(!has_unique_representation(TestStruct5));
+
+    const TestStruct6 = packed struct {
+        a: u32,
+        b: u31,
+    };
+
+    try std.testing.expect(!has_unique_representation(TestStruct6));
+
+    const TestStruct7 = struct {
+        a: u64,
+        b: TestStruct6,
+    };
+
+    try std.testing.expect(!has_unique_representation(TestStruct7));
+
+    const TestStruct8 = packed struct {
+        a: u32,
+        b: u32,
+    };
+
+    try std.testing.expect(has_unique_representation(TestStruct8));
+
+    const TestStruct9 = struct {
+        a: u64,
+        b: TestStruct8,
+    };
+
+    try std.testing.expect(has_unique_representation(TestStruct9));
+
+    const TestStruct10 = packed struct {
+        a: TestStruct8,
+        b: TestStruct8,
+    };
+
+    try std.testing.expect(has_unique_representation(TestStruct10));
+
+    const TestUnion1 = packed union {
+        a: u32,
+        b: u16,
+    };
+
+    try std.testing.expect(!has_unique_representation(TestUnion1));
+
+    const TestUnion2 = extern union {
+        a: u32,
+        b: u16,
+    };
+
+    try std.testing.expect(!has_unique_representation(TestUnion2));
+
+    const TestUnion3 = union {
+        a: u32,
+        b: u16,
+    };
+
+    try std.testing.expect(!has_unique_representation(TestUnion3));
+
+    const TestUnion4 = union(enum) {
+        a: u32,
+        b: u16,
+    };
+
+    try std.testing.expect(!has_unique_representation(TestUnion4));
+
+    inline for ([_]type{ i0, u8, i16, u32, i64 }) |T| {
+        try std.testing.expect(has_unique_representation(T));
+    }
+    inline for ([_]type{ i1, u9, i17, u33, i24 }) |T| {
+        try std.testing.expect(!has_unique_representation(T));
+    }
+
+    try std.testing.expect(!has_unique_representation([]u8));
+    try std.testing.expect(!has_unique_representation([]const u8));
+
+    try std.testing.expect(has_unique_representation(@Vector(4, u16)));
+}
+
+/// Construct a `union(Enum)` type, where each union "value" type is defined in terms of the
+/// variant.
+///
+/// That is, `EnumUnionType(Enum, TypeForVariant)` is equivalent to:
+///
+///   union(Enum) {
+///     // For every `e` in `Enum`:
+///     e: TypeForVariant(e),
+///   }
+///
+pub fn EnumUnionType(
+    comptime Enum: type,
+    comptime TypeForVariant: fn (comptime variant: Enum) type,
+) type {
+    const UnionField = std.builtin.Type.UnionField;
+
+    var fields: []const UnionField = &[_]UnionField{};
+    for (std.enums.values(Enum)) |enum_variant| {
+        fields = fields ++ &[_]UnionField{.{
+            .name = @tagName(enum_variant),
+            .type = TypeForVariant(enum_variant),
+            .alignment = @alignOf(TypeForVariant(enum_variant)),
+        }};
+    }
+
+    return @Type(.{ .Union = .{
+        .layout = .Auto,
+        .fields = fields,
+        .decls = &.{},
+        .tag_type = Enum,
+    } });
+}
diff --git a/src/test.zig b/src/test.zig
new file mode 100644
index 0000000..8e5fd4b
--- /dev/null
+++ b/src/test.zig
@@ -0,0 +1,654 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const os = std.os;
+const testing = std.testing;
+const assert = std.debug.assert;
+
+const Time = @import("time.zig").Time;
+const IO = @import("io.zig").IO;
+
+test "write/read/close" {
+    try struct {
+        const Context = @This();
+
+        io: IO,
+        done: bool = false,
+        fd: os.fd_t,
+
+        write_buf: [20]u8 = [_]u8{97} ** 20,
+        read_buf: [20]u8 = [_]u8{98} ** 20,
+
+        written: usize = 0,
+        read: usize = 0,
+
+        fn run_test() !void {
+            const path = "test_io_write_read_close";
+            const file = try std.fs.cwd().createFile(path, .{ .read = true, .truncate = true });
+            defer std.fs.cwd().deleteFile(path) catch {};
+
+            var self: Context = .{
+                .io = try IO.init(32, 0),
+                .fd = file.handle,
+            };
+            defer self.io.deinit();
+
+            var completion: IO.Completion = undefined;
+
+            self.io.write(
+                *Context,
+                &self,
+                write_callback,
+                &completion,
+                self.fd,
+                &self.write_buf,
+                10,
+            );
+            while (!self.done) try self.io.tick();
+
+            try testing.expectEqual(self.write_buf.len, self.written);
+            try testing.expectEqual(self.read_buf.len, self.read);
+            try testing.expectEqualSlices(u8, &self.write_buf, &self.read_buf);
+        }
+
+        fn write_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.WriteError!usize,
+        ) void {
+            self.written = result catch @panic("write error");
+            self.io.read(*Context, self, read_callback, completion, self.fd, &self.read_buf, 10);
+        }
+
+        fn read_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.ReadError!usize,
+        ) void {
+            self.read = result catch @panic("read error");
+            self.io.close(*Context, self, close_callback, completion, self.fd);
+        }
+
+        fn close_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.CloseError!void,
+        ) void {
+            _ = completion;
+            _ = result catch @panic("close error");
+
+            self.done = true;
+        }
+    }.run_test();
+}
+
+test "accept/connect/send/receive" {
+    try struct {
+        const Context = @This();
+
+        io: *IO,
+        done: bool = false,
+        server: os.socket_t,
+        client: os.socket_t,
+
+        accepted_sock: os.socket_t = undefined,
+
+        send_buf: [10]u8 = [_]u8{ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+        recv_buf: [5]u8 = [_]u8{ 0, 1, 0, 1, 0 },
+
+        sent: usize = 0,
+        received: usize = 0,
+
+        fn run_test() !void {
+            var io = try IO.init(32, 0);
+            defer io.deinit();
+
+            const address = try std.net.Address.parseIp4("127.0.0.1", 0);
+            const kernel_backlog = 1;
+            const server = try io.open_socket(address.any.family, os.SOCK.STREAM, os.IPPROTO.TCP);
+            defer os.closeSocket(server);
+
+            const client = try io.open_socket(address.any.family, os.SOCK.STREAM, os.IPPROTO.TCP);
+            defer os.closeSocket(client);
+
+            try os.setsockopt(
+                server,
+                os.SOL.SOCKET,
+                os.SO.REUSEADDR,
+                &std.mem.toBytes(@as(c_int, 1)),
+            );
+            try os.bind(server, &address.any, address.getOsSockLen());
+            try os.listen(server, kernel_backlog);
+
+            var client_address = std.net.Address.initIp4(undefined, undefined);
+            var client_address_len = client_address.getOsSockLen();
+            try os.getsockname(server, &client_address.any, &client_address_len);
+
+            var self: Context = .{
+                .io = &io,
+                .server = server,
+                .client = client,
+            };
+
+            var client_completion: IO.Completion = undefined;
+            self.io.connect(
+                *Context,
+                &self,
+                connect_callback,
+                &client_completion,
+                client,
+                client_address,
+            );
+
+            var server_completion: IO.Completion = undefined;
+            self.io.accept(*Context, &self, accept_callback, &server_completion, server);
+
+            while (!self.done) try self.io.tick();
+
+            try testing.expectEqual(self.send_buf.len, self.sent);
+            try testing.expectEqual(self.recv_buf.len, self.received);
+
+            try testing.expectEqualSlices(u8, self.send_buf[0..self.received], &self.recv_buf);
+        }
+
+        fn connect_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.ConnectError!void,
+        ) void {
+            _ = result catch @panic("connect error");
+
+            self.io.send(
+                *Context,
+                self,
+                send_callback,
+                completion,
+                self.client,
+                &self.send_buf,
+            );
+        }
+
+        fn send_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.SendError!usize,
+        ) void {
+            _ = completion;
+
+            self.sent = result catch @panic("send error");
+        }
+
+        fn accept_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.AcceptError!os.socket_t,
+        ) void {
+            self.accepted_sock = result catch @panic("accept error");
+            self.io.recv(
+                *Context,
+                self,
+                recv_callback,
+                completion,
+                self.accepted_sock,
+                &self.recv_buf,
+            );
+        }
+
+        fn recv_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.RecvError!usize,
+        ) void {
+            _ = completion;
+
+            self.received = result catch @panic("recv error");
+            self.done = true;
+        }
+    }.run_test();
+}
+
+test "timeout" {
+    const ms = 20;
+    const margin = 5;
+    const count = 10;
+
+    try struct {
+        const Context = @This();
+
+        io: IO,
+        timer: *Time,
+        count: u32 = 0,
+        stop_time: u64 = 0,
+
+        fn run_test() !void {
+            var timer = Time{};
+            const start_time = timer.monotonic();
+            var self: Context = .{
+                .timer = &timer,
+                .io = try IO.init(32, 0),
+            };
+            defer self.io.deinit();
+
+            var completions: [count]IO.Completion = undefined;
+            for (&completions) |*completion| {
+                self.io.timeout(
+                    *Context,
+                    &self,
+                    timeout_callback,
+                    completion,
+                    ms * std.time.ns_per_ms,
+                );
+            }
+            while (self.count < count) try self.io.tick();
+
+            try self.io.tick();
+            try testing.expectEqual(@as(u32, count), self.count);
+
+            try testing.expectApproxEqAbs(
+                @as(f64, ms),
+                @as(f64, @floatFromInt((self.stop_time - start_time) / std.time.ns_per_ms)),
+                margin,
+            );
+        }
+
+        fn timeout_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.TimeoutError!void,
+        ) void {
+            _ = completion;
+            _ = result catch @panic("timeout error");
+
+            if (self.stop_time == 0) self.stop_time = self.timer.monotonic();
+            self.count += 1;
+        }
+    }.run_test();
+}
+
+test "submission queue full" {
+    const ms = 20;
+    const count = 10;
+
+    try struct {
+        const Context = @This();
+
+        io: IO,
+        count: u32 = 0,
+
+        fn run_test() !void {
+            var self: Context = .{ .io = try IO.init(1, 0) };
+            defer self.io.deinit();
+
+            var completions: [count]IO.Completion = undefined;
+            for (&completions) |*completion| {
+                self.io.timeout(
+                    *Context,
+                    &self,
+                    timeout_callback,
+                    completion,
+                    ms * std.time.ns_per_ms,
+                );
+            }
+            while (self.count < count) try self.io.tick();
+
+            try self.io.tick();
+            try testing.expectEqual(@as(u32, count), self.count);
+        }
+
+        fn timeout_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.TimeoutError!void,
+        ) void {
+            _ = completion;
+            _ = result catch @panic("timeout error");
+
+            self.count += 1;
+        }
+    }.run_test();
+}
+
+test "tick to wait" {
+    // Use only IO.tick() to see if pending IO is actually processsed
+
+    try struct {
+        const Context = @This();
+
+        io: IO,
+        accepted: os.socket_t = IO.INVALID_SOCKET,
+        connected: bool = false,
+        received: bool = false,
+
+        fn run_test() !void {
+            var self: Context = .{ .io = try IO.init(1, 0) };
+            defer self.io.deinit();
+
+            const address = try std.net.Address.parseIp4("127.0.0.1", 0);
+            const kernel_backlog = 1;
+
+            const server = try self.io.open_socket(address.any.family, os.SOCK.STREAM, os.IPPROTO.TCP);
+            defer os.closeSocket(server);
+
+            try os.setsockopt(
+                server,
+                os.SOL.SOCKET,
+                os.SO.REUSEADDR,
+                &std.mem.toBytes(@as(c_int, 1)),
+            );
+            try os.bind(server, &address.any, address.getOsSockLen());
+            try os.listen(server, kernel_backlog);
+
+            var client_address = std.net.Address.initIp4(undefined, undefined);
+            var client_address_len = client_address.getOsSockLen();
+            try os.getsockname(server, &client_address.any, &client_address_len);
+
+            const client = try self.io.open_socket(client_address.any.family, os.SOCK.STREAM, os.IPPROTO.TCP);
+            defer os.closeSocket(client);
+
+            // Start the accept
+            var server_completion: IO.Completion = undefined;
+            self.io.accept(*Context, &self, accept_callback, &server_completion, server);
+
+            // Start the connect
+            var client_completion: IO.Completion = undefined;
+            self.io.connect(
+                *Context,
+                &self,
+                connect_callback,
+                &client_completion,
+                client,
+                client_address,
+            );
+
+            // Tick the IO to drain the accept & connect completions
+            assert(!self.connected);
+            assert(self.accepted == IO.INVALID_SOCKET);
+
+            while (self.accepted == IO.INVALID_SOCKET or !self.connected)
+                try self.io.tick();
+
+            assert(self.connected);
+            assert(self.accepted != IO.INVALID_SOCKET);
+            defer os.closeSocket(self.accepted);
+
+            // Start receiving on the client
+            var recv_completion: IO.Completion = undefined;
+            var recv_buffer: [64]u8 = undefined;
+            @memset(&recv_buffer, 0xaa);
+            self.io.recv(
+                *Context,
+                &self,
+                recv_callback,
+                &recv_completion,
+                client,
+                &recv_buffer,
+            );
+
+            // Drain out the recv completion from any internal IO queues
+            try self.io.tick();
+            try self.io.tick();
+            try self.io.tick();
+
+            // Complete the recv() *outside* of the IO instance.
+            // Other tests already check .tick() with IO based completions.
+            // This simulates IO being completed by an external system
+            var send_buf = std.mem.zeroes([64]u8);
+            const wrote = try os_send(self.accepted, &send_buf, 0);
+            try testing.expectEqual(wrote, send_buf.len);
+
+            // Wait for the recv() to complete using only IO.tick().
+            // If tick is broken, then this will deadlock
+            assert(!self.received);
+            while (!self.received) {
+                try self.io.tick();
+            }
+
+            // Make sure the receive actually happened
+            assert(self.received);
+            try testing.expect(std.mem.eql(u8, &recv_buffer, &send_buf));
+        }
+
+        fn accept_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.AcceptError!os.socket_t,
+        ) void {
+            _ = completion;
+
+            assert(self.accepted == IO.INVALID_SOCKET);
+            self.accepted = result catch @panic("accept error");
+        }
+
+        fn connect_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.ConnectError!void,
+        ) void {
+            _ = completion;
+            _ = result catch @panic("connect error");
+
+            assert(!self.connected);
+            self.connected = true;
+        }
+
+        fn recv_callback(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.RecvError!usize,
+        ) void {
+            _ = completion;
+            _ = result catch |err| std.debug.panic("recv error: {}", .{err});
+
+            assert(!self.received);
+            self.received = true;
+        }
+
+        // TODO: use os.send() instead when it gets fixed for windows
+        fn os_send(sock: os.socket_t, buf: []const u8, flags: u32) !usize {
+            if (builtin.target.os.tag != .windows) {
+                return os.send(sock, buf, flags);
+            }
+
+            const rc = os.windows.sendto(sock, buf.ptr, buf.len, flags, null, 0);
+            if (rc == os.windows.ws2_32.SOCKET_ERROR) {
+                switch (os.windows.ws2_32.WSAGetLastError()) {
+                    .WSAEACCES => return error.AccessDenied,
+                    .WSAEADDRNOTAVAIL => return error.AddressNotAvailable,
+                    .WSAECONNRESET => return error.ConnectionResetByPeer,
+                    .WSAEMSGSIZE => return error.MessageTooBig,
+                    .WSAENOBUFS => return error.SystemResources,
+                    .WSAENOTSOCK => return error.FileDescriptorNotASocket,
+                    .WSAEAFNOSUPPORT => return error.AddressFamilyNotSupported,
+                    .WSAEDESTADDRREQ => unreachable, // A destination address is required.
+                    .WSAEFAULT => unreachable, // The lpBuffers, lpTo, lpOverlapped, lpNumberOfBytesSent, or lpCompletionRoutine parameters are not part of the user address space, or the lpTo parameter is too small.
+                    .WSAEHOSTUNREACH => return error.NetworkUnreachable,
+                    // TODO: WSAEINPROGRESS, WSAEINTR
+                    .WSAEINVAL => unreachable,
+                    .WSAENETDOWN => return error.NetworkSubsystemFailed,
+                    .WSAENETRESET => return error.ConnectionResetByPeer,
+                    .WSAENETUNREACH => return error.NetworkUnreachable,
+                    .WSAENOTCONN => return error.SocketNotConnected,
+                    .WSAESHUTDOWN => unreachable, // The socket has been shut down; it is not possible to WSASendTo on a socket after shutdown has been invoked with how set to SD_SEND or SD_BOTH.
+                    .WSAEWOULDBLOCK => return error.WouldBlock,
+                    .WSANOTINITIALISED => unreachable, // A successful WSAStartup call must occur before using this function.
+                    else => |err| return os.windows.unexpectedWSAError(err),
+                }
+            } else {
+                return @as(usize, @intCast(rc));
+            }
+        }
+    }.run_test();
+}
+
+test "pipe data over socket" {
+    try struct {
+        io: IO,
+        tx: Pipe,
+        rx: Pipe,
+        server: Socket = .{},
+
+        const buffer_size = 1 * 1024 * 1024;
+
+        const Context = @This();
+        const Socket = struct {
+            fd: os.socket_t = IO.INVALID_SOCKET,
+            completion: IO.Completion = undefined,
+        };
+        const Pipe = struct {
+            socket: Socket = .{},
+            buffer: []u8,
+            transferred: usize = 0,
+        };
+
+        fn run() !void {
+            const tx_buf = try testing.allocator.alloc(u8, buffer_size);
+            defer testing.allocator.free(tx_buf);
+            const rx_buf = try testing.allocator.alloc(u8, buffer_size);
+            defer testing.allocator.free(rx_buf);
+
+            @memset(tx_buf, 1);
+            @memset(rx_buf, 0);
+            var self = Context{
+                .io = try IO.init(32, 0),
+                .tx = .{ .buffer = tx_buf },
+                .rx = .{ .buffer = rx_buf },
+            };
+            defer self.io.deinit();
+
+            self.server.fd = try self.io.open_socket(os.AF.INET, os.SOCK.STREAM, os.IPPROTO.TCP);
+            defer os.closeSocket(self.server.fd);
+
+            const address = try std.net.Address.parseIp4("127.0.0.1", 0);
+            try os.setsockopt(
+                self.server.fd,
+                os.SOL.SOCKET,
+                os.SO.REUSEADDR,
+                &std.mem.toBytes(@as(c_int, 1)),
+            );
+
+            try os.bind(self.server.fd, &address.any, address.getOsSockLen());
+            try os.listen(self.server.fd, 1);
+
+            var client_address = std.net.Address.initIp4(undefined, undefined);
+            var client_address_len = client_address.getOsSockLen();
+            try os.getsockname(self.server.fd, &client_address.any, &client_address_len);
+
+            self.io.accept(
+                *Context,
+                &self,
+                on_accept,
+                &self.server.completion,
+                self.server.fd,
+            );
+
+            self.tx.socket.fd = try self.io.open_socket(os.AF.INET, os.SOCK.STREAM, os.IPPROTO.TCP);
+            defer os.closeSocket(self.tx.socket.fd);
+
+            self.io.connect(
+                *Context,
+                &self,
+                on_connect,
+                &self.tx.socket.completion,
+                self.tx.socket.fd,
+                client_address,
+            );
+
+            var tick: usize = 0xdeadbeef;
+            while (self.rx.transferred != self.rx.buffer.len) : (tick +%= 1) {
+                if (tick % 61 == 0) {
+                    const timeout_ns = tick % (10 * std.time.ns_per_ms);
+                    try self.io.run_for_ns(@as(u63, @intCast(timeout_ns)));
+                } else {
+                    try self.io.tick();
+                }
+            }
+
+            try testing.expect(self.server.fd != IO.INVALID_SOCKET);
+            try testing.expect(self.tx.socket.fd != IO.INVALID_SOCKET);
+            try testing.expect(self.rx.socket.fd != IO.INVALID_SOCKET);
+            os.closeSocket(self.rx.socket.fd);
+
+            try testing.expectEqual(self.tx.transferred, buffer_size);
+            try testing.expectEqual(self.rx.transferred, buffer_size);
+            try testing.expect(std.mem.eql(u8, self.tx.buffer, self.rx.buffer));
+        }
+
+        fn on_accept(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.AcceptError!os.socket_t,
+        ) void {
+            assert(self.rx.socket.fd == IO.INVALID_SOCKET);
+            assert(&self.server.completion == completion);
+            self.rx.socket.fd = result catch |err| std.debug.panic("accept error {}", .{err});
+
+            assert(self.rx.transferred == 0);
+            self.do_receiver(0);
+        }
+
+        fn on_connect(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.ConnectError!void,
+        ) void {
+            _ = result catch unreachable;
+
+            assert(self.tx.socket.fd != IO.INVALID_SOCKET);
+            assert(&self.tx.socket.completion == completion);
+
+            assert(self.tx.transferred == 0);
+            self.do_sender(0);
+        }
+
+        fn do_sender(self: *Context, bytes: usize) void {
+            self.tx.transferred += bytes;
+            assert(self.tx.transferred <= self.tx.buffer.len);
+
+            if (self.tx.transferred < self.tx.buffer.len) {
+                self.io.send(
+                    *Context,
+                    self,
+                    on_send,
+                    &self.tx.socket.completion,
+                    self.tx.socket.fd,
+                    self.tx.buffer[self.tx.transferred..],
+                );
+            }
+        }
+
+        fn on_send(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.SendError!usize,
+        ) void {
+            const bytes = result catch |err| std.debug.panic("send error: {}", .{err});
+            assert(&self.tx.socket.completion == completion);
+            self.do_sender(bytes);
+        }
+
+        fn do_receiver(self: *Context, bytes: usize) void {
+            self.rx.transferred += bytes;
+            assert(self.rx.transferred <= self.rx.buffer.len);
+
+            if (self.rx.transferred < self.rx.buffer.len) {
+                self.io.recv(
+                    *Context,
+                    self,
+                    on_recv,
+                    &self.rx.socket.completion,
+                    self.rx.socket.fd,
+                    self.rx.buffer[self.rx.transferred..],
+                );
+            }
+        }
+
+        fn on_recv(
+            self: *Context,
+            completion: *IO.Completion,
+            result: IO.RecvError!usize,
+        ) void {
+            const bytes = result catch |err| std.debug.panic("recv error: {}", .{err});
+            assert(&self.rx.socket.completion == completion);
+            self.do_receiver(bytes);
+        }
+    }.run();
+}
diff --git a/src/time.zig b/src/time.zig
new file mode 100644
index 0000000..e894b6c
--- /dev/null
+++ b/src/time.zig
@@ -0,0 +1,112 @@
+const std = @import("std");
+const builtin = @import("builtin");
+
+const os = std.os;
+const assert = std.debug.assert;
+const is_darwin = builtin.target.os.tag.isDarwin();
+const is_windows = builtin.target.os.tag == .windows;
+
+pub const Time = struct {
+    const Self = @This();
+
+    /// Hardware and/or software bugs can mean that the monotonic clock may regress.
+    /// One example (of many): https://bugzilla.redhat.com/show_bug.cgi?id=448449
+    /// We crash the process for safety if this ever happens, to protect against infinite loops.
+    /// It's better to crash and come back with a valid monotonic clock than get stuck forever.
+    monotonic_guard: u64 = 0,
+
+    /// A timestamp to measure elapsed time, meaningful only on the same system, not across reboots.
+    /// Always use a monotonic timestamp if the goal is to measure elapsed time.
+    /// This clock is not affected by discontinuous jumps in the system time, for example if the
+    /// system administrator manually changes the clock.
+    pub fn monotonic(self: *Self) u64 {
+        const m = blk: {
+            // Uses QueryPerformanceCounter() on windows due to it being the highest precision timer
+            // available while also accounting for time spent suspended by default:
+            // https://docs.microsoft.com/en-us/windows/win32/api/realtimeapiset/nf-realtimeapiset-queryunbiasedinterrupttime#remarks
+            if (is_windows) {
+                // QPF need not be globally cached either as it ends up being a load from read-only
+                // memory mapped to all processed by the kernel called KUSER_SHARED_DATA (See "QpcFrequency")
+                // https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/ntddk/ns-ntddk-kuser_shared_data
+                // https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/ntexapi_x/kuser_shared_data/index.htm
+                const qpc = os.windows.QueryPerformanceCounter();
+                const qpf = os.windows.QueryPerformanceFrequency();
+
+                // 10Mhz (1 qpc tick every 100ns) is a common QPF on modern systems.
+                // We can optimize towards this by converting to ns via a single multiply.
+                // https://github.com/microsoft/STL/blob/785143a0c73f030238ef618890fd4d6ae2b3a3a0/stl/inc/chrono#L694-L701
+                const common_qpf = 10_000_000;
+                if (qpf == common_qpf) break :blk qpc * (std.time.ns_per_s / common_qpf);
+
+                // Convert qpc to nanos using fixed point to avoid expensive extra divs and overflow.
+                const scale = (std.time.ns_per_s << 32) / qpf;
+                break :blk @as(u64, @truncate((@as(u96, qpc) * scale) >> 32));
+            }
+
+            // Uses mach_continuous_time() instead of mach_absolute_time() as it counts while suspended.
+            // https://developer.apple.com/documentation/kernel/1646199-mach_continuous_time
+            // https://opensource.apple.com/source/Libc/Libc-1158.1.2/gen/clock_gettime.c.auto.html
+            if (is_darwin) {
+                const darwin = struct {
+                    const mach_timebase_info_t = os.darwin.mach_timebase_info_data;
+                    extern "c" fn mach_timebase_info(info: *mach_timebase_info_t) os.darwin.kern_return_t;
+                    extern "c" fn mach_continuous_time() u64;
+                };
+
+                // mach_timebase_info() called through libc already does global caching for us
+                // https://opensource.apple.com/source/xnu/xnu-7195.81.3/libsyscall/wrappers/mach_timebase_info.c.auto.html
+                var info: darwin.mach_timebase_info_t = undefined;
+                if (darwin.mach_timebase_info(&info) != 0) @panic("mach_timebase_info() failed");
+
+                const now = darwin.mach_continuous_time();
+                return (now * info.numer) / info.denom;
+            }
+
+            // The true monotonic clock on Linux is not in fact CLOCK_MONOTONIC:
+            // CLOCK_MONOTONIC excludes elapsed time while the system is suspended (e.g. VM migration).
+            // CLOCK_BOOTTIME is the same as CLOCK_MONOTONIC but includes elapsed time during a suspend.
+            // For more detail and why CLOCK_MONOTONIC_RAW is even worse than CLOCK_MONOTONIC,
+            // see https://github.com/ziglang/zig/pull/933#discussion_r656021295.
+            var ts: os.timespec = undefined;
+            os.clock_gettime(os.CLOCK.BOOTTIME, &ts) catch @panic("CLOCK_BOOTTIME required");
+            break :blk @as(u64, @intCast(ts.tv_sec)) * std.time.ns_per_s + @as(u64, @intCast(ts.tv_nsec));
+        };
+
+        // "Oops!...I Did It Again"
+        if (m < self.monotonic_guard) @panic("a hardware/kernel bug regressed the monotonic clock");
+        self.monotonic_guard = m;
+        return m;
+    }
+
+    /// A timestamp to measure real (i.e. wall clock) time, meaningful across systems, and reboots.
+    /// This clock is affected by discontinuous jumps in the system time.
+    pub fn realtime(_: *Self) i64 {
+        if (is_windows) {
+            const kernel32 = struct {
+                extern "kernel32" fn GetSystemTimePreciseAsFileTime(
+                    lpFileTime: *os.windows.FILETIME,
+                ) callconv(os.windows.WINAPI) void;
+            };
+
+            var ft: os.windows.FILETIME = undefined;
+            kernel32.GetSystemTimePreciseAsFileTime(&ft);
+            const ft64 = (@as(u64, ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
+
+            // FileTime is in units of 100 nanoseconds
+            // and uses the NTFS/Windows epoch of 1601-01-01 instead of Unix Epoch 1970-01-01.
+            const epoch_adjust = std.time.epoch.windows * (std.time.ns_per_s / 100);
+            return (@as(i64, @bitCast(ft64)) + epoch_adjust) * 100;
+        }
+
+        if (is_darwin) {
+            // macos has supported clock_gettime() since 10.12:
+            // https://opensource.apple.com/source/Libc/Libc-1158.1.2/gen/clock_gettime.3.auto.html
+        }
+
+        var ts: os.timespec = undefined;
+        os.clock_gettime(os.CLOCK.REALTIME, &ts) catch unreachable;
+        return @as(i64, ts.tv_sec) * std.time.ns_per_s + ts.tv_nsec;
+    }
+
+    pub fn tick(_: *Self) void {}
+};