# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a content-signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a content-signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a content signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via content signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-Agent: *
Content-signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

User-agent: *
Allow: /

# BEGIN Cloudflare Managed content

User-agent: CCBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# ImagesiftBot is billed as a reverse image search tool, but it's associated with The Hive, a company that produces models for image generation.
User-agent: ImagesiftBot 
Disallow: /

# KUKA's youBot
User-agent: YouBot
Disallow: /

# OMGilibot - They sell data for training LLMs (large language models)
User-agent: omgilibot
Disallow: /

# Omgili (Oh My God I Love It)
User-agent: omgili
Disallow: /

# OpenAI API - bot that OpenAI specifically uses to collect bulk training data from your website for ChatGPT.
User-agent: GPTBot
Disallow: /

# Perplexity AI
User-agent: PerplexityBot
Disallow: /

## Social Media Bots

# Bytespider is a web crawler operated by ByteDance, the Chinese owner of TikTok
User-agent: Bytespider
Disallow: /

# Meta’s bot that crawls public web pages to improve language models for their speech recognition technology
User-agent: FacebookBot
Disallow: /

#Twitter's bot used to index the content of any given URL
User-agent: Twitterbot
Disallow: /

# Diffbot - somewhat dishonest scraping bot used to collect data to train LLMs.
User-agent: Diffbot
Disallow: /

# Amazon Bot - enabling Alexa to answer even more questions for customers.
User-agent: Amazonbot
Disallow: /

# Anthropic AI Bot
User-agent: anthropic-ai
Disallow: /

# Apple Bot - collects website data for its Siri and Spotlight services.
User-agent: Applebot
Disallow: /

# Claude Bot run by Anthropic
User-agent: Claude-Web
Disallow: /

# END Cloudflare Managed Content
User-agent: *
Disallow: /lp
Disallow: /feedback
Disallow: /langtest