# Robots.txt for Scored News - Financial Intelligence Platform # Updated: 2025-11-13 # Purpose: Guide search engines and AI crawlers for optimal indexing # Default rules for all bots User-agent: * Allow: / # Priority crawling for main content sections Allow: /deep-dives/ Allow: /sentiment-trends/ Allow: /premium-dashboard/ Allow: /monetization/ Allow: /api/social-image/ # Block admin, private areas, and technical endpoints Disallow: /admin/ Disallow: /private/ Disallow: /_next/ Disallow: /api/ Disallow: *.json$ Disallow: /auth/ # Allow specific API endpoints for social previews Allow: /api/social-image/ Allow: /api/og-image/ # Sitemaps - Multiple sitemap indices for comprehensive coverage Sitemap: https://scorednews.com/sitemap.xml Sitemap: https://scorednews.com/sitemap-index.xml Sitemap: https://scorednews.com/sitemap-images.xml Sitemap: https://scorednews.com/sitemap-news.xml # Default crawl-delay for respectful crawling Crawl-delay: 1 # ============================================ # MAJOR SEARCH ENGINES # ============================================ # Google - Highest priority, no delay User-agent: Googlebot Allow: / Crawl-delay: 0 User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / Crawl-delay: 0 # Bing - Standard crawling User-agent: Bingbot Allow: / Crawl-delay: 0.5 User-agent: BingPreview Allow: / # Yahoo/Verizon Media User-agent: Slurp Allow: / Crawl-delay: 1 # DuckDuckGo User-agent: DuckDuckBot Allow: / Crawl-delay: 1 # Yandex User-agent: Yandex Allow: / Crawl-delay: 2 # Baidu User-agent: Baiduspider Allow: / Crawl-delay: 2 # ============================================ # AI & LLM CRAWLERS - Enable for AI training # ============================================ # OpenAI (ChatGPT) User-agent: ChatGPT-User Allow: / User-agent: GPTBot Allow: / # Anthropic (Claude) User-agent: Claude-Web Allow: / User-agent: anthropic-ai Allow: / # Google AI (Bard/Gemini) User-agent: Google-Extended Allow: / # Perplexity AI User-agent: PerplexityBot Allow: / # You.com User-agent: YouBot Allow: / # Bing AI/Copilot User-agent: BingAI Allow: / # Meta AI User-agent: FacebookBot Allow: / User-agent: Meta-ExternalAgent Allow: / # Apple Intelligence User-agent: Applebot Allow: / User-agent: Applebot-Extended Allow: / # Amazon Alexa User-agent: ia_archiver Allow: / # Common Crawl User-agent: CCBot Allow: / # ============================================ # SOCIAL MEDIA CRAWLERS # ============================================ User-agent: Twitterbot Allow: / Crawl-delay: 0 User-agent: facebookexternalhit Allow: / Crawl-delay: 0 User-agent: LinkedInBot Allow: / Crawl-delay: 1 User-agent: Pinterest Allow: / Crawl-delay: 1 User-agent: Slackbot Allow: / Crawl-delay: 0 User-agent: TelegramBot Allow: / User-agent: Discordbot Allow: / User-agent: WhatsApp Allow: / # ============================================ # NEWS & AGGREGATORS # ============================================ User-agent: Mediapartners-Google Allow: / User-agent: AdsBot-Google Allow: / User-agent: Feedfetcher-Google Allow: / User-agent: Google-InspectionTool Allow: / # Apple News User-agent: AppleNewsBot Allow: / Crawl-delay: 1 # ============================================ # SPECIALIZED CRAWLERS # ============================================ # Archive.org User-agent: ia_archiver Allow: / # SEMrush User-agent: SemrushBot Allow: / Crawl-delay: 2 # Ahrefs User-agent: AhrefsBot Allow: / Crawl-delay: 2 # Moz User-agent: rogerbot Allow: / Crawl-delay: 2 # Screaming Frog User-agent: Screaming Frog SEO Spider Allow: / # ============================================ # BLOCK PROBLEMATIC BOTS # ============================================ # Block content scrapers User-agent: HTTrack Disallow: / User-agent: wget Disallow: / User-agent: WebCopier Disallow: / User-agent: WebReaper Disallow: / User-agent: Offline Explorer Disallow: / # Block spam bots User-agent: EmailCollector Disallow: / User-agent: EmailSiphon Disallow: / User-agent: EmailWolf Disallow: /