# robots.txt for Heidi on Claude
# Strategy: keep standard search engines (Google, Bing) crawling for SEO,
# but block AI training crawlers from harvesting lesson content.
# Note: This is enforced only by bots that voluntarily respect robots.txt.
# It is the polite first line of defense, not a security measure.
# For real protection (auth gate, watermarking, video DRM), see CLAUDE.md.

# ----- AI training crawlers · disallow -----

User-agent: GPTBot
Disallow: /

User-agent: ChatGPT-User
Disallow: /

User-agent: OAI-SearchBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: Claude-Web
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: PerplexityBot
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: FacebookBot
Disallow: /

User-agent: Meta-ExternalAgent
Disallow: /

User-agent: cohere-ai
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: Omgilibot
Disallow: /

User-agent: Omgili
Disallow: /

User-agent: YouBot
Disallow: /

User-agent: Amazonbot
Disallow: /

User-agent: TimpiBot
Disallow: /

User-agent: img2dataset
Disallow: /

# ----- Standard search engines · allow -----

User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: Applebot
Allow: /

# ----- Default · allow everyone else -----

User-agent: *
Allow: /

# Sitemap (replace heidionclaude.com with the production domain when known)
Sitemap: https://heidionclaude.com/sitemap.xml