# Robots.txt for nuovicomici.com User-agent: * Allow: / Disallow: /api/ Disallow: /_next/ Disallow: /admin/ Disallow: /.well-known/ Disallow: /private/ Disallow: /tmp/ Disallow: /*.json$ Disallow: /*?*session* Disallow: /*?*debug* # Allow specific resource directories Allow: /images/ Allow: /public/ Allow: /static/ Allow: /assets/ # Sitemap location Sitemap: https://nuovicomici.com/sitemap.xml # Crawl-delay for polite crawling Crawl-delay: 1 # Allow all major search engines User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: Slurp Allow: / User-agent: DuckDuckBot Allow: / User-agent: Baiduspider Allow: / User-agent: YandexBot Allow: / User-agent: facebot Allow: / User-agent: ia_archiver Allow: / # Block problematic bots User-agent: AhrefsBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / # Specific rules for social media crawlers User-agent: facebookexternalhit Allow: / User-agent: Twitterbot Allow: / User-agent: LinkedInBot Allow: / # Specific rules for image crawlers User-agent: Googlebot-Image Allow: / User-agent: Bingbot-Image Allow: / # Wildcard rules for specific file types Disallow: /*.log$ Disallow: /*.bak$ Disallow: /*.tmp$ Disallow: /*.sql$