aboutsummaryrefslogtreecommitdiff
path: root/static/robots.txt
diff options
context:
space:
mode:
Diffstat (limited to 'static/robots.txt')
-rw-r--r--static/robots.txt52
1 files changed, 52 insertions, 0 deletions
diff --git a/static/robots.txt b/static/robots.txt
new file mode 100644
index 0000000..1b9dcbf
--- /dev/null
+++ b/static/robots.txt
@@ -0,0 +1,52 @@
+# block LLM bots by user-agent
+# src: https://robotstxt.com/ai
+# updated: 2025-01-30
+User-Agent: GPTBot
+User-Agent: ClaudeBot
+User-Agent: Claude-Web
+User-Agent: CCBot
+User-Agent: Googlebot-Extended
+User-Agent: Applebot-Extended
+User-Agent: Facebookbot
+User-Agent: Meta-ExternalAgent
+User-Agent: Meta-ExternalFetcher
+User-Agent: diffbot
+User-Agent: PerplexityBot
+User-Agent: Omgili
+User-Agent: Omgilibot
+User-Agent: webzio-extended
+User-Agent: ImagesiftBot
+User-Agent: Bytespider
+User-Agent: Amazonbot
+User-Agent: Youbot
+User-Agent: SemrushBot-OCOB
+User-Agent: Petalbot
+User-Agent: VelenPublicWebCrawler
+User-Agent: TurnitinBot
+User-Agent: Timpibot
+User-Agent: OAI-SearchBot
+User-Agent: ICC-Crawler
+User-Agent: AI2Bot
+User-Agent: AI2Bot-Dolma
+User-Agent: DataForSeoBot
+User-Agent: AwarioBot
+User-Agent: AwarioSmartBot
+User-Agent: AwarioRssBot
+User-Agent: Google-CloudVertexBot
+User-Agent: PanguBot
+User-Agent: Kangaroo Bot
+User-Agent: Sentibot
+User-Agent: img2dataset
+User-Agent: Meltwater
+User-Agent: Seekr
+User-Agent: peer39_crawler
+User-Agent: cohere-ai
+User-Agent: cohere-training-data-crawler
+User-Agent: DuckAssistBot
+User-Agent: Scrapy
+Disallow: /
+DisallowAITraining: /
+
+# block non-specific LLM bots (note: experimental directive)
+User-Agent: *
+DisallowAITraining: /