How to Block AI Bots on Falcon (Python): Complete 2026 Guide
Falcon is Python's bare-metal REST framework — no decorators, no magic, no overhead. Unlike Flask (which returns from before_request()) and Django (which returns an HttpResponse), Falcon middleware is exception-based: to block a request you raise falcon.HTTPForbidden() in process_request().
Exception-based blocking — not return-based
Falcon converts HTTP errors to responses via its error handler. Raising falcon.HTTPForbidden() in process_request() immediately stops the middleware chain and resource dispatch — the route handler never runs. Do not return a response object from process_request() — returning None (implicit) is correct for pass-through; only raising an exception blocks.
Protection layers
Layer 1: robots.txt
Falcon does not auto-serve static files. Create a resource class and register it at /robots.txt. Add the path to EXEMPT_PATHS so it bypasses bot blocking:
# static/robots.txt User-agent: * Allow: / User-agent: GPTBot User-agent: ClaudeBot User-agent: anthropic-ai User-agent: Google-Extended User-agent: CCBot User-agent: Bytespider User-agent: Applebot-Extended User-agent: PerplexityBot User-agent: Diffbot User-agent: cohere-ai User-agent: FacebookBot User-agent: omgili User-agent: omgilibot User-agent: Amazonbot User-agent: DeepSeekBot User-agent: MistralBot User-agent: xAI-Bot User-agent: AI2Bot Disallow: /
resources/static.py — robots.txt resource
import falcon
class RobotsTxtResource:
def on_get(self, req, resp):
resp.content_type = 'text/plain'
with open('static/robots.txt', 'r') as f:
resp.text = f.read()app.py — register BEFORE middleware
import falcon
from resources.static import RobotsTxtResource
from middleware.bot_blocker import AiBotBlocker
app = falcon.App(middleware=[AiBotBlocker()])
app.add_route('/robots.txt', RobotsTxtResource())Note: routes registered after middleware creation still benefit from middleware — but add /robots.txt to EXEMPT_PATHS so the bot blocker doesn't intercept it.
Layer 2: noai meta tag
Set a robots value in req.context in process_request(). Templates read it via the context dict passed to the renderer:
middleware/bot_blocker.py — set context
def process_request(self, req, resp):
# Set robots default before bot check
req.context['robots'] = 'noai, noimageai'
if req.path in self.EXEMPT_PATHS:
return
ua = req.get_header('User-Agent') or ''
# ... rest of bot checktemplates/base.html (Jinja2)
<meta name="robots" content="{{ robots | default('noai, noimageai') }}">Per-route override in a responder
class BlogResource:
def on_get(self, req, resp, slug):
req.context['robots'] = 'index, follow' # allow for this route
resp.render_template('blog.html', req.context)Layer 3 & 4: X-Robots-Tag + 403 block
The complete middleware. process_request() raises falcon.HTTPForbidden to block. process_response() sets X-Robots-Tag on every successful response:
middleware/bot_blocker.py
import falcon
class AiBotBlocker:
AI_BOTS = [
'gptbot', 'chatgpt-user', 'claudebot', 'anthropic-ai',
'ccbot', 'cohere-ai', 'bytespider', 'amazonbot',
'applebot-extended', 'perplexitybot', 'youbot', 'diffbot',
'google-extended', 'facebookbot', 'omgili', 'omgilibot',
'deepseekbot', 'mistralbot', 'xai-bot', 'ai2bot',
]
EXEMPT_PATHS = {'/robots.txt', '/sitemap.xml', '/favicon.ico', '/health'}
def process_request(self, req, resp):
"""Called before routing — raise to block, return None to continue."""
req.context['robots'] = 'noai, noimageai' # default for templates
if req.path in self.EXEMPT_PATHS:
return # let through — will not reach resource responder check
ua = req.get_header('User-Agent') or ''
ua_lower = ua.lower()
for bot in self.AI_BOTS:
if bot in ua_lower:
raise falcon.HTTPForbidden(
title='Forbidden',
description='AI crawlers are not permitted on this site.',
)
# Implicit return None — request continues to routing + responder
def process_response(self, req, resp, resource, req_succeeded):
"""Called after responder — set response headers here."""
# resource is None if routing failed (404); req_succeeded is False if
# an exception was raised (including our HTTPForbidden above).
# Only add header on successful responses:
if req_succeeded:
resp.set_header('X-Robots-Tag', 'noai, noimageai')process_response signature
process_response(req, resp, resource, req_succeeded) has four parameters — not two like Flask's after_request(response). resource is the matched resource instance (None on 404). req_succeeded is False if any exception was raised during processing, including the HTTPForbidden we raise for bots — so the guard prevents adding X-Robots-Tag to blocked responses.
app.py — wire it up
import falcon
from middleware.bot_blocker import AiBotBlocker
from resources.static import RobotsTxtResource
from resources.articles import ArticleResource
app = falcon.App(middleware=[AiBotBlocker()])
app.add_route('/robots.txt', RobotsTxtResource())
app.add_route('/articles/{slug}', ArticleResource())Async / ASGI variant
Use falcon.asgi.App for async. Method signatures are identical — just add async:
import falcon.asgi
class AiBotBlocker:
AI_BOTS = [
'gptbot', 'chatgpt-user', 'claudebot', 'anthropic-ai',
'ccbot', 'cohere-ai', 'bytespider', 'amazonbot',
'applebot-extended', 'perplexitybot', 'diffbot',
'google-extended', 'deepseekbot', 'mistralbot', 'xai-bot',
]
EXEMPT_PATHS = {'/robots.txt', '/sitemap.xml', '/favicon.ico'}
async def process_request(self, req, resp):
req.context['robots'] = 'noai, noimageai'
if req.path in self.EXEMPT_PATHS:
return
ua = req.get_header('User-Agent') or ''
ua_lower = ua.lower()
for bot in self.AI_BOTS:
if bot in ua_lower:
raise falcon.HTTPForbidden(
title='Forbidden',
description='AI crawlers are not permitted.',
)
async def process_response(self, req, resp, resource, req_succeeded):
if req_succeeded:
resp.set_header('X-Robots-Tag', 'noai, noimageai')
app = falcon.asgi.App(middleware=[AiBotBlocker()])
# Run: uvicorn myapp:app --reloadFalcon ASGI runs on Uvicorn or Daphne. For sync Falcon, run with gunicorn myapp:app -w 4.
Path-scoped blocking
Falcon middleware runs for all routes — unlike Express or Gin, there is no per-group middleware. Scope it in process_request() with a path prefix check:
def process_request(self, req, resp):
req.context['robots'] = 'noai, noimageai'
# Only block on /api/* — allow bots on public pages
if not req.path.startswith('/api/'):
return
if req.path in self.EXEMPT_PATHS:
return
ua = req.get_header('User-Agent') or ''
for bot in self.AI_BOTS:
if bot in ua.lower():
raise falcon.HTTPForbidden()Alternatively, use process_resource() to check the matched resource class: if isinstance(resource, PublicResource): return.
Falcon vs Flask vs Django — blocking comparison
Falcon — raise exception
# falcon middleware
def process_request(self, req, resp):
ua = req.get_header('User-Agent') or ''
if any(b in ua.lower() for b in AI_BOTS):
raise falcon.HTTPForbidden() # exception stops chainFlask — return response from before_request
# flask
@app.before_request
def block_bots():
ua = request.headers.get('User-Agent', '').lower()
if any(b in ua for b in AI_BOTS):
return Response('Forbidden', 403) # return stops chainDjango — return HttpResponse from process_request
# django middleware
def process_request(self, request):
ua = request.META.get('HTTP_USER_AGENT', '').lower()
if any(b in ua for b in AI_BOTS):
return HttpResponseForbidden('Forbidden') # return stops chainTesting
Falcon's built-in test client (falcon.testing.TestClient) makes middleware testing straightforward:
import falcon.testing
import pytest
from app import app
client = falcon.testing.TestClient(app)
def test_blocks_gptbot():
result = client.simulate_get(
'/articles/test',
headers={'User-Agent': 'GPTBot/1.0'}
)
assert result.status_code == 403
def test_allows_normal_browser():
result = client.simulate_get(
'/articles/test',
headers={'User-Agent': 'Mozilla/5.0 (compatible)'}
)
assert result.status_code == 200
assert result.headers.get('X-Robots-Tag') == 'noai, noimageai'
def test_robots_txt_accessible():
result = client.simulate_get(
'/robots.txt',
headers={'User-Agent': 'GPTBot/1.0'}
)
# Exempt path — bots can read robots.txt
assert result.status_code == 200AI bot User-Agent strings (2026)
Match case-insensitively — use bot in ua.lower() after lowercasing the list.
Is your site protected from AI bots?
Run a free scan to check your robots.txt, meta tags, and overall AI readiness score.