Skip to content

How to Block AI Bots in Zig Zap

Zap is a fast HTTP server library for Zig, wrapping the battle-tested facil.io C library. There is no built-in middleware chain — instead, a single on_request callback handles every request. Bot blocking sits at the top of that callback, checking the User-Agent before delegating to the router. Zig's comptime string arrays and stack-allocated lowercase buffers keep the hot path allocation-free.

1. Bot pattern module (ai_bots.zig)

A comptime array of string literals costs nothing at runtime — the data lives in the binary's read-only segment. std.ascii.lowerString() writes into a stack buffer, avoiding heap allocation. std.mem.indexOf() does the substring search.

// src/ai_bots.zig
const std = @import("std");

/// Comptime array — zero runtime overhead, lives in read-only data segment.
pub const patterns = [_][]const u8{
    "gptbot",
    "chatgpt-user",
    "claudebot",
    "anthropic-ai",
    "ccbot",
    "google-extended",
    "cohere-ai",
    "meta-externalagent",
    "bytespider",
    "omgili",
    "diffbot",
    "imagesiftbot",
    "magpie-crawler",
    "amazonbot",
    "dataprovider",
    "netcraft",
};

/// Returns true if user_agent matches any known AI crawler pattern.
/// Uses a stack buffer for lowercasing — no heap allocation.
pub fn isAiBot(user_agent: []const u8) bool {
    // Lowercase into a fixed stack buffer (truncates silently if UA > 512 bytes)
    var buf: [512]u8 = undefined;
    const len = @min(user_agent.len, buf.len);
    const ua_lower = std.ascii.lowerString(buf[0..len], user_agent[0..len]);

    for (patterns) |pattern| {
        if (std.mem.indexOf(u8, ua_lower, pattern) != null) {
            return true;
        }
    }
    return false;
}

2. Server with middleware wrapper (main.zig)

Set X-Robots-Tag first — before any branch — so it appears on blocked and passing responses alike. r.getHeader() returns ?[]const u8; use orelse "" to default to an empty slice. After sending a 403, return immediately to prevent dispatch() from also writing a response.

// src/main.zig
const std  = @import("std");
const zap  = @import("zap");
const bots = @import("ai_bots.zig");

// ---------------------------------------------------------------------------
// Route handlers
// ---------------------------------------------------------------------------

fn homeHandler(r: zap.SimpleRequest) void {
    r.setContentType(.HTML) catch return;
    r.sendBody("<h1>Hello</h1>") catch return;
}

fn healthHandler(r: zap.SimpleRequest) void {
    r.sendBody("OK") catch return;
}

fn robotsTxtHandler(r: zap.SimpleRequest) void {
    r.setHeader("Content-Type", "text/plain") catch return;
    r.sendBody(
        \User-agent: *
        \Allow: /
        \
        \User-agent: GPTBot
        \Disallow: /
    ) catch return;
}

// ---------------------------------------------------------------------------
// Router dispatch — called by botBlocker on passing requests
// ---------------------------------------------------------------------------

fn dispatch(r: zap.SimpleRequest) void {
    const path = r.path orelse "/";
    if (std.mem.eql(u8, path, "/"))            return homeHandler(r);
    if (std.mem.eql(u8, path, "/health"))      return healthHandler(r);
    if (std.mem.eql(u8, path, "/robots.txt"))  return robotsTxtHandler(r);
    r.setStatus(.not_found);
    r.sendBody("Not Found") catch return;
}

// ---------------------------------------------------------------------------
// Bot blocker — middleware wrapper around dispatch
// ---------------------------------------------------------------------------

fn onRequest(r: zap.SimpleRequest) void {
    // Always set X-Robots-Tag first (applies to blocked and passing responses)
    r.setHeader("X-Robots-Tag", "noai, noimageai") catch return;

    // Exempt robots.txt from bot detection
    const path = r.path orelse "";
    if (!std.mem.eql(u8, path, "/robots.txt")) {
        const ua = r.getHeader("User-Agent") orelse "";
        if (bots.isAiBot(ua)) {
            r.setStatus(.forbidden);
            r.sendBody("Forbidden") catch return;
            return;  // <-- return here, do NOT call dispatch()
        }
    }

    dispatch(r);
}

// ---------------------------------------------------------------------------
// Server startup
// ---------------------------------------------------------------------------

pub fn main() !void {
    var listener = zap.SimpleHttpListener.init(.{
        .port       = 8000,
        .on_request = onRequest,
        .log        = true,
    });
    try listener.listen();

    std.log.info("Listening on http://0.0.0.0:8000", .{});
    zap.start(.{ .threads = 4, .workers = 1 });
}

3. Build configuration

// build.zig.zon
.{
    .name    = "myapp",
    .version = "0.1.0",
    .dependencies = .{
        .zap = .{
            .url  = "https://github.com/zigzap/zap/archive/refs/tags/v0.8.0.tar.gz",
            .hash = "<run `zig fetch --save` to fill this in>",
        },
    },
}
// build.zig (excerpt)
const zap = b.dependency("zap", .{
    .target   = target,
    .optimize = optimize,
    .openssl  = false,
});
exe.root_module.addImport("zap", zap.module("zap"));
exe.linkLibrary(zap.artifact("facil.io"));

Fetch dependencies and build: zig fetch --save && zig build run

4. SimpleRouter variant

For larger apps, use zap.SimpleRouter instead of manual path matching. The bot check still lives in onRequestrouter.handle(r) is only called after the bot check passes.

// Alternative: use zap.SimpleRouter for cleaner route registration
// (still wraps it in onRequest for the bot check)

var router: zap.SimpleRouter = undefined;

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    const allocator = gpa.allocator();

    router = zap.SimpleRouter.init(allocator, false);
    defer router.deinit();

    try router.addRoute("/",           homeHandler);
    try router.addRoute("/health",     healthHandler);
    try router.addRoute("/robots.txt", robotsTxtHandler);

    var listener = zap.SimpleHttpListener.init(.{
        .port       = 8000,
        .on_request = onRequest,
        .log        = true,
    });
    try listener.listen();
    zap.start(.{ .threads = 4, .workers = 1 });
}

fn onRequest(r: zap.SimpleRequest) void {
    r.setHeader("X-Robots-Tag", "noai, noimageai") catch return;
    const path = r.path orelse "";
    if (!std.mem.eql(u8, path, "/robots.txt")) {
        const ua = r.getHeader("User-Agent") orelse "";
        if (bots.isAiBot(ua)) {
            r.setStatus(.forbidden);
            r.sendBody("Forbidden") catch return;
            return;
        }
    }
    if (!router.handle(r)) {
        r.setStatus(.not_found);
        r.sendBody("Not Found") catch return;
    }
}

Key points

Framework comparison — systems-level web servers

FrameworkMiddleware modelShort-circuitUA header
Zig ZapSingle on_request callbacksetStatus(.forbidden); sendBody(); returnr.getHeader() ?[]const u8
Rust Actix-webwrap_fn / Service traitHttpResponse::Forbidden()req.headers().get(USER_AGENT)
Nim Jesterbefore: / after: blockshalt(Http403, headers, body)request.headers.getOrDefault()
Crystal Kemalbefore_all filterhalt(env, 403)env.request.headers[...]?
Go (stdlib)func(http.Handler) http.Handlerhttp.Error(w, ..., 403); returnr.Header.Get("User-Agent")

Zap's single-callback model is closest to Go's stdlib http.HandlerFunc — both require explicit return after sending a response to prevent double writes. The Zig-specific nuances are the optional header return type (?[]const u8), the catch return error handling idiom, and the stack-buffer approach to string lowercasing.