aboutsummaryrefslogtreecommitdiff
path: root/static
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2025-01-30 21:58:37 -0500
committerPaul Duncan <pabs@pablotron.org>2025-01-30 21:58:37 -0500
commit315ff2f674c4f2f255532ed9449095d72f84e91c (patch)
tree3c2084be088976b6b6ef114ef3603e89f0ddb59e /static
parent837151f4fc6f49436866d0616e2a7cbc81192d45 (diff)
downloadpablotron.org-315ff2f674c4f2f255532ed9449095d72f84e91c.tar.bz2
pablotron.org-315ff2f674c4f2f255532ed9449095d72f84e91c.zip
TODO.md: add ideas
Diffstat (limited to 'static')
-rw-r--r--static/robots.txt52
1 files changed, 52 insertions, 0 deletions
diff --git a/static/robots.txt b/static/robots.txt
new file mode 100644
index 0000000..1b9dcbf
--- /dev/null
+++ b/static/robots.txt
@@ -0,0 +1,52 @@
+# block LLM bots by user-agent
+# src: https://robotstxt.com/ai
+# updated: 2025-01-30
+User-Agent: GPTBot
+User-Agent: ClaudeBot
+User-Agent: Claude-Web
+User-Agent: CCBot
+User-Agent: Googlebot-Extended
+User-Agent: Applebot-Extended
+User-Agent: Facebookbot
+User-Agent: Meta-ExternalAgent
+User-Agent: Meta-ExternalFetcher
+User-Agent: diffbot
+User-Agent: PerplexityBot
+User-Agent: Omgili
+User-Agent: Omgilibot
+User-Agent: webzio-extended
+User-Agent: ImagesiftBot
+User-Agent: Bytespider
+User-Agent: Amazonbot
+User-Agent: Youbot
+User-Agent: SemrushBot-OCOB
+User-Agent: Petalbot
+User-Agent: VelenPublicWebCrawler
+User-Agent: TurnitinBot
+User-Agent: Timpibot
+User-Agent: OAI-SearchBot
+User-Agent: ICC-Crawler
+User-Agent: AI2Bot
+User-Agent: AI2Bot-Dolma
+User-Agent: DataForSeoBot
+User-Agent: AwarioBot
+User-Agent: AwarioSmartBot
+User-Agent: AwarioRssBot
+User-Agent: Google-CloudVertexBot
+User-Agent: PanguBot
+User-Agent: Kangaroo Bot
+User-Agent: Sentibot
+User-Agent: img2dataset
+User-Agent: Meltwater
+User-Agent: Seekr
+User-Agent: peer39_crawler
+User-Agent: cohere-ai
+User-Agent: cohere-training-data-crawler
+User-Agent: DuckAssistBot
+User-Agent: Scrapy
+Disallow: /
+DisallowAITraining: /
+
+# block non-specific LLM bots (note: experimental directive)
+User-Agent: *
+DisallowAITraining: /