# Miles Mosaic - robots.txt # v7: added explicit blocks for AI search + training crawlers (OpenAI, # Anthropic, Perplexity) and added /staging/ to the Disallow list so the # staging mirror at https://milesmosaic.com/staging/ is never indexed. # Belt-and-braces alongside meta noindex emitted by lib/public_head.php # when the request is served under /staging/. # # AI crawlers documented at: # OpenAI: https://platform.openai.com/docs/bots # Anthropic: https://support.anthropic.com/en/articles/8896518 # Perplexity: https://docs.perplexity.ai/guides/bots # Deliberately NOT listing the deprecated `Claude-Web` or `anthropic-ai` # user agents; the active Anthropic agents are ClaudeBot, Claude-User, # and Claude-SearchBot. User-agent: * Allow: / Allow: /articles/ Allow: /about/ Allow: /sitemap.xml Allow: /images/ # Public editorial / comparison / trust surfaces — explicitly allowed alongside # the catch-all so any future Disallow lines do not accidentally swallow them. Allow: /vs/ Allow: /with/ Allow: /privacy/ Allow: /about/faq/ # Authenticated application surface - do not crawl Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php # Staging mirror at https://milesmosaic.com/staging/ — never index. Disallow: /staging/ # Administration and operational Disallow: /admin/ Disallow: /api/ Disallow: /sandbox/ Disallow: /billing/ Disallow: /guides/ Disallow: /cron/ Disallow: /tmp/ Disallow: /log/ Disallow: /logs/ Disallow: /vendor/ Disallow: /lib/*.php Disallow: /components/*.php Disallow: /config/ # Sensitive Disallow: /*.env Disallow: /*.sql Disallow: /*.bak Disallow: /*.backup Disallow: /*.old # Status Passport — token-gated share URLs are not for indexing. # Owner views (/passport/) require login and are noindex via meta. Disallow: /passport/share/ Disallow: /passport/card.php # Sharing (Phase 3) — read-only dashboard share links. # Token-gated. Defence in depth alongside noindex meta on the share page. Disallow: /share/ Disallow: /inbox/ # /help/ admin helpers — the public FAQ now lives at /about/faq/; the # editor, Parsedown library, and content.php helper are internal-only and # have no value to a search engine. Don't waste crawl budget on them. Disallow: /help/faq_editor.php Disallow: /help/content.php Disallow: /help/Parsedown.php # Drawer iframe host — popup.php only serves logged-in routed partials and # always returns 404 to direct GET attempts. Block at the crawler layer so # Google never logs the bare URL or the popup.php?path= spam variants. Disallow: /popup.php # AdSense and ad networks - explicitly allowed so the review bot can crawl User-agent: Mediapartners-Google Allow: / User-agent: AdsBot-Google Allow: / # ── AI search + training crawlers ────────────────────────────────────────── # Each agent is allowed to crawl the public surface (homepage, /articles/, # /about/, /vs/, /with/, /privacy/) and explicitly blocked from staging, # authenticated, and internal paths. Keep this list in sync with the wildcard # Disallow rules above — Allow-then-Disallow lets these bots reach editorial # content while still respecting the no-index zones. # OpenAI — ChatGPT live search + browsing + GPTBot (training). User-agent: OAI-SearchBot Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php User-agent: ChatGPT-User Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php User-agent: GPTBot Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php # Anthropic — Claude live search, in-app fetch on user request, and ClaudeBot. User-agent: Claude-SearchBot Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php User-agent: Claude-User Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php User-agent: ClaudeBot Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php # Perplexity — live answer engine. User-agent: PerplexityBot Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php # Perplexity-User — Perplexity's per-user on-demand fetch agent. # Cloudflare's Aug 2025 report documented Perplexity using undeclared # crawlers that don't honour robots.txt; listing the agent here is # correct-for-the-record even if compliance is patchy. User-agent: Perplexity-User Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php # ── Google Gemini / Bingbot ────────────────────────────────────────────── # Google-Extended is a training-consent TOKEN (no separate crawler — gates # whether Googlebot's already-crawled content can be used for Gemini / # Vertex training). Allow it so we remain eligible for Google AI Overviews # while gradually building citation share. Bingbot is the classic search # crawler and also feeds Microsoft Copilot grounding — explicit listing is # defensive hygiene alongside the wildcard above. User-agent: Google-Extended Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php User-agent: Bingbot Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php # Apple Intelligence — powers Spotlight, Siri, Safari AI summaries. # Applebot-Extended is also a training-consent token (Applebot itself # crawls for Spotlight/Siri regardless). User-agent: Applebot-Extended Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php # Meta AI training/grounding. Surged from 8.5% → 11.6% of AI bot traffic # Dec 2025–Jan 2026 (Cloudflare). Allowed for consistency with the rest of # the AI search/training set. User-agent: Meta-ExternalAgent Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php # Amazon (Alexa + Amazon LLMs). User-agent: Amazonbot Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php # ByteDance / Doubao (TikTok AI). Doubled to 8.71% of AI bot traffic Q1 # 2026 (Cloudflare). Listed for visibility. User-agent: Bytespider Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php # Common Crawl — open corpus used downstream by many models (LLaMA, # Mistral, GPT pre-training, etc.). Allowed for the same reason as the # named training crawlers above. User-agent: CCBot Allow: / Disallow: /staging/ Disallow: /login/ Disallow: /menu.php Disallow: /main/ Disallow: /flights/ Disallow: /hotels/ Disallow: /trips/ Disallow: /loyalty/ Disallow: /activity/ Disallow: /profile/ Disallow: /subscriptions/ Disallow: /settings/ Disallow: /dashboard.php Disallow: /admin/ Disallow: /api/ Disallow: /billing/ Disallow: /cron/ Disallow: /tmp/ Disallow: /share/ Disallow: /inbox/ Disallow: /passport/share/ Disallow: /popup.php # Sitemap Sitemap: https://milesmosaic.com/sitemap.xml