From 315ff2f674c4f2f255532ed9449095d72f84e91c Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Thu, 30 Jan 2025 21:58:37 -0500 Subject: TODO.md: add ideas --- static/robots.txt | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 static/robots.txt (limited to 'static/robots.txt') diff --git a/static/robots.txt b/static/robots.txt new file mode 100644 index 0000000..1b9dcbf --- /dev/null +++ b/static/robots.txt @@ -0,0 +1,52 @@ +# block LLM bots by user-agent +# src: https://robotstxt.com/ai +# updated: 2025-01-30 +User-Agent: GPTBot +User-Agent: ClaudeBot +User-Agent: Claude-Web +User-Agent: CCBot +User-Agent: Googlebot-Extended +User-Agent: Applebot-Extended +User-Agent: Facebookbot +User-Agent: Meta-ExternalAgent +User-Agent: Meta-ExternalFetcher +User-Agent: diffbot +User-Agent: PerplexityBot +User-Agent: Omgili +User-Agent: Omgilibot +User-Agent: webzio-extended +User-Agent: ImagesiftBot +User-Agent: Bytespider +User-Agent: Amazonbot +User-Agent: Youbot +User-Agent: SemrushBot-OCOB +User-Agent: Petalbot +User-Agent: VelenPublicWebCrawler +User-Agent: TurnitinBot +User-Agent: Timpibot +User-Agent: OAI-SearchBot +User-Agent: ICC-Crawler +User-Agent: AI2Bot +User-Agent: AI2Bot-Dolma +User-Agent: DataForSeoBot +User-Agent: AwarioBot +User-Agent: AwarioSmartBot +User-Agent: AwarioRssBot +User-Agent: Google-CloudVertexBot +User-Agent: PanguBot +User-Agent: Kangaroo Bot +User-Agent: Sentibot +User-Agent: img2dataset +User-Agent: Meltwater +User-Agent: Seekr +User-Agent: peer39_crawler +User-Agent: cohere-ai +User-Agent: cohere-training-data-crawler +User-Agent: DuckAssistBot +User-Agent: Scrapy +Disallow: / +DisallowAITraining: / + +# block non-specific LLM bots (note: experimental directive) +User-Agent: * +DisallowAITraining: / -- cgit v1.2.3