Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

Chapter 6: Test-Driven Development

Testing Philosophy

TDD Cycle for zbmd

// 1. Write failing test
test "tensor actor performs matrix multiplication" {
    const system = try ActorSystem.init(.{});
    defer system.deinit();

    const tensor_actor = try system.spawn(TensorActor, .{});

    const a = try Tensor.init(.{ .shape = .{2, 3}, .data = .{...} });
    const b = try Tensor.init(.{ .shape = .{3, 2}, .data = .{...} });

    const result = try tensor_actor.call(.{
        .matmul = .{ .left = a, .right = b }
    }, 1000);

    try testing.expectEqual(.{2, 2}, result.shape);
    // This test will fail until we implement TensorActor
}

// 2. Implement minimum code to pass
pub const TensorActor = struct {
    pub fn behavior(self: *Actor, msg: Message) !void {
        switch (msg.payload) {
            .matmul => |args| {
                const result = try matmul(args.left, args.right);
                try msg.reply(result);
            },
        }
    }
};

// 3. Refactor with confidence

Test Categories

// Unit tests - test individual components
test "message queue operations" {
    var queue = BoundedQueue(Message, 10).init();

    try queue.push(.{ .data = "hello" });
    const msg = try queue.pop();

    try testing.expectEqualStrings("hello", msg.data);
}

// Integration tests - test component interactions
test "actor system message routing" {
    var system = try ActorSystem.init(.{});
    const actor1 = try system.spawn(echoActor, .{});
    const actor2 = try system.spawn(echoActor, .{});

    try actor1.send(.{ .forward_to = actor2, .data = "test" });
    const response = try actor2.receive(1000);

    try testing.expectEqualStrings("test", response.data);
}

// Property-based tests
test "actor mailbox never loses messages" {
    var prng = std.rand.DefaultPrng.init(0);
    const rand = prng.random();

    for (0..1000) |_| {
        var mailbox = BoundedQueue(Message, 256).init();
        const num_messages = rand.intRangeAtMost(u32, 1, 256);

        // Send random messages
        var sent = ArrayList(u32).init();
        for (0..num_messages) |_| {
            const val = rand.int(u32);
            try sent.append(val);
            try mailbox.push(.{ .value = val });
        }

        // Receive all messages
        var received = ArrayList(u32).init();
        while (mailbox.pop()) |msg| {
            try received.append(msg.value);
        }

        // Property: all sent messages are received
        try testing.expectEqualSlices(u32, sent.items, received.items);
    }
}

// Fault injection tests
test "system recovers from random actor failures" {
    var system = try ActorSystem.init(.{
        .fault_injection = .{
            .enabled = true,
            .failure_rate = 0.1,
            .failure_types = .{ .crash, .timeout, .corruption },
        },
    });
    defer system.deinit();

    // Create supervised actor tree
    const supervisor = try system.spawn(Supervisor, .{
        .strategy = .one_for_one,
    });

    var workers: [10]ActorId = undefined;
    for (&workers) |*w| {
        w.* = try supervisor.startChild(workerActor, .{});
    }

    // Run workload with injected faults
    for (0..1000) |i| {
        const worker = workers[i % 10];
        _ = worker.send(.{ .work = i }) catch {
            // Actor might be restarting
            continue;
        };
    }

    // All workers should still be alive
    for (workers) |w| {
        try testing.expect(w.isAlive());
    }
}

GPU Testing

Mock GPU Backend

pub const MockGpuBackend = struct {
    allocations: std.AutoHashMap(DevicePtr, []u8),
    kernels: std.StringHashMap(KernelFunc),

    // Fault injection
    should_fail: bool = false,
    failure_type: GpuError = .out_of_memory,

    pub fn allocate(self: *MockGpuBackend, size: usize) !DevicePtr {
        if (self.should_fail) {
            return self.failure_type;
        }

        const memory = try std.heap.page_allocator.alloc(u8, size);
        const ptr = @ptrToInt(memory.ptr);
        try self.allocations.put(ptr, memory);
        return ptr;
    }

    pub fn launch(
        self: *MockGpuBackend,
        kernel: Kernel,
        grid: Grid,
        args: []const *anyopaque,
    ) !void {
        if (self.should_fail) {
            return self.failure_type;
        }

        // Execute kernel on CPU for testing
        const func = self.kernels.get(kernel.name) orelse
            return error.KernelNotFound;

        try func(grid, args);
    }
};

test "GPU kernel actor handles device failures" {
    var system = try ActorSystem.init(.{});
    defer system.deinit();

    // Create mock backend
    var mock_gpu = MockGpuBackend{};

    const kernel_actor = try system.spawn(GpuKernelActor, .{
        .backend = .{ .mock = &mock_gpu },
        .kernel = test_kernel,
    });

    // First launch succeeds
    const result1 = try kernel_actor.call(.{
        .launch = .{ .params = test_params },
    }, 1000);
    try testing.expect(result1 == .success);

    // Inject failure
    mock_gpu.should_fail = true;
    mock_gpu.failure_type = .device_error;

    // Should retry and eventually succeed
    const result2 = kernel_actor.call(.{
        .launch = .{ .params = test_params },
    }, 5000);

    // Kernel actor should have migrated to different device
    try testing.expect(result2 == .success);
}

Performance Testing

test "actor system throughput" {
    var system = try ActorSystem.init(.{
        .workers = 8,
    });
    defer system.deinit();

    const start = std.time.nanoTimestamp();
    const num_messages = 1_000_000;

    // Create actors
    var actors: [100]ActorId = undefined;
    for (&actors) |*a| {
        a.* = try system.spawn(throughputActor, .{});
    }

    // Send messages
    for (0..num_messages) |i| {
        const target = actors[i % 100];
        try target.send(.{ .data = i });
    }

    // Wait for completion
    for (actors) |a| {
        try a.call(.{ .get_count = {} }, 5000);
    }

    const elapsed = std.time.nanoTimestamp() - start;
    const msgs_per_sec = num_messages * 1_000_000_000 / elapsed;

    std.debug.print("Throughput: {} msgs/sec\n", .{msgs_per_sec});

    // Should handle at least 100k msgs/sec
    try testing.expect(msgs_per_sec > 100_000);
}

test "GPU kernel performance" {
    const backend = try selectBestBackend();

    // Skip if no GPU available
    if (backend == .cpu) return;

    const n = 1024 * 1024;
    const a = try backend.allocate(n * @sizeOf(f32));
    const b = try backend.allocate(n * @sizeOf(f32));
    const c = try backend.allocate(n * @sizeOf(f32));
    defer backend.free(a);
    defer backend.free(b);
    defer backend.free(c);

    // Measure kernel execution time
    const start = std.time.nanoTimestamp();

    for (0..100) |_| {
        try vectorAddKernel.launch(backend, .{
            .grid = .{ .x = (n + 255) / 256 },
            .blocks = .{ .x = 256 },
            .params = .{ a, b, c, n },
        });
    }

    try backend.synchronize();
    const elapsed = std.time.nanoTimestamp() - start;

    const gflops = (100.0 * n) / (@as(f64, elapsed) / 1_000_000_000.0) / 1_000_000_000.0;
    std.debug.print("Vector add: {.2} GFLOPS\n", .{gflops});

    // Should achieve reasonable performance
    try testing.expect(gflops > 10.0);
}

Test Infrastructure

Test Fixtures

pub const TestFixtures = struct {
    pub fn createTestSystem() !ActorSystem {
        return ActorSystem.init(.{
            .workers = 4,
            .max_actors = 1000,
            .enable_tracing = true,
        });
    }

    pub fn createTestTensor(comptime shape: []const usize) !Tensor {
        const size = comptime blk: {
            var s = 1;
            for (shape) |dim| {
                s *= dim;
            }
            break :blk s;
        };

        var data: [size]f32 = undefined;
        for (&data, 0..) |*d, i| {
            d.* = @floatFromInt(i);
        }

        return Tensor.init(.{
            .shape = shape,
            .data = data,
        });
    }

    pub fn createTestModel() !ModelActor {
        var system = try createTestSystem();

        return system.spawn(ModelActor, .{
            .layers = &[_]LayerSpec{
                .{ .type = .dense, .units = 128 },
                .{ .type = .relu },
                .{ .type = .dense, .units = 10 },
                .{ .type = .softmax },
            },
        });
    }
};

Test Helpers

pub const TestHelpers = struct {
    // Wait for condition with timeout
    pub fn waitFor(
        condition: *const fn () bool,
        timeout_ms: u64,
    ) !void {
        const start = std.time.milliTimestamp();

        while (!condition()) {
            if (std.time.milliTimestamp() - start > timeout_ms) {
                return error.Timeout;
            }
            std.time.sleep(1_000_000); // 1ms
        }
    }

    // Verify actor message trace
    pub fn expectMessageSequence(
        actor: ActorId,
        expected: []const MessageType,
    ) !void {
        const trace = actor.getMessageTrace();

        try testing.expectEqual(expected.len, trace.len);

        for (expected, trace) |exp, actual| {
            try testing.expectEqual(exp, actual.type);
        }
    }

    // Compare tensors with tolerance
    pub fn expectTensorApproxEqual(
        expected: Tensor,
        actual: Tensor,
        tolerance: f32,
    ) !void {
        try testing.expectEqualSlices(usize, expected.shape, actual.shape);

        for (expected.data, actual.data) |e, a| {
            try testing.expectApproxEqRel(e, a, tolerance);
        }
    }
};

Benchmarking

pub const Benchmark = struct {
    name: []const u8,
    iterations: u32,
    warmup: u32 = 10,

    pub fn run(
        self: Benchmark,
        func: anytype,
        args: anytype,
    ) !BenchmarkResult {
        // Warmup
        for (0..self.warmup) |_| {
            _ = try func(args);
        }

        var times = ArrayList(u64).init();
        var total: u64 = 0;

        for (0..self.iterations) |_| {
            const start = std.time.nanoTimestamp();
            _ = try func(args);
            const elapsed = std.time.nanoTimestamp() - start;

            try times.append(elapsed);
            total += elapsed;
        }

        // Calculate statistics
        std.sort.sort(u64, times.items, {}, comptime std.sort.asc(u64));

        return BenchmarkResult{
            .name = self.name,
            .iterations = self.iterations,
            .mean = total / self.iterations,
            .median = times.items[times.items.len / 2],
            .min = times.items[0],
            .max = times.items[times.items.len - 1],
            .p99 = times.items[@intFromFloat(times.items.len * 0.99)],
        };
    }
};

test "benchmark actor messaging" {
    var system = try TestFixtures.createTestSystem();
    defer system.deinit();

    const actor = try system.spawn(echoActor, .{});

    const bench = Benchmark{
        .name = "actor_send_receive",
        .iterations = 10000,
    };

    const result = try bench.run(
        struct {
            fn run(a: ActorId) !void {
                try a.send(.{ .data = "test" });
                _ = try a.receive(100);
            }
        }.run,
        actor,
    );

    std.debug.print("Actor messaging: mean={}, p99={}\n", .{
        result.mean,
        result.p99,
    });

    // Should be under 1 microsecond
    try testing.expect(result.mean < 1000);
}

Continuous Testing

Test Runner Configuration

// build.zig
pub fn build(b: *std.Build) void {
    // ... other config ...

    // Test step with coverage
    const test_step = b.step("test", "Run all tests");

    // Unit tests
    const unit_tests = b.addTest(.{
        .root_source_file = .{ .path = "src/test_all.zig" },
        .optimize = .Debug,
    });
    unit_tests.addOption(bool, "enable_coverage", true);
    test_step.dependOn(&unit_tests.step);

    // Integration tests
    const integration_tests = b.addTest(.{
        .root_source_file = .{ .path = "tests/integration.zig" },
        .optimize = .ReleaseSafe,
    });
    test_step.dependOn(&integration_tests.step);

    // Fuzzing
    const fuzz_step = b.step("fuzz", "Run fuzz tests");
    const fuzz_tests = b.addExecutable(.{
        .name = "fuzz",
        .root_source_file = .{ .path = "tests/fuzz.zig" },
        .optimize = .ReleaseFast,
    });
    fuzz_tests.linkSystemLibrary("AFL");
    fuzz_step.dependOn(&fuzz_tests.step);

    // Benchmarks
    const bench_step = b.step("bench", "Run benchmarks");
    const benchmarks = b.addExecutable(.{
        .name = "bench",
        .root_source_file = .{ .path = "tests/bench.zig" },
        .optimize = .ReleaseFast,
    });
    bench_step.dependOn(&benchmarks.step);
}

CI Pipeline

# .github/workflows/test.yml
name: Tests

on: [push, pull_request]

jobs:
  test:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
        zig: [0.15.2]

    steps:
      - uses: actions/checkout@v3

      - name: Setup Zig
        uses: goto-bus-stop/setup-zig@v2
        with:
          version: ${{ matrix.zig }}

      - name: Install CUDA
        if: runner.os == 'Linux'
        run: |
          # Install CUDA toolkit

      - name: Run Tests
        run: |
          zig build test

      - name: Run Benchmarks
        run: |
          zig build bench

      - name: Fuzzing
        if: runner.os == 'Linux'
        run: |
          zig build fuzz -- -max_total_time=60