mirror of
				https://github.com/discourse/discourse.git
				synced 2025-02-25 18:55:32 -06:00 
			
		
		
		
	The `/u` rule also matches the `/uploads` path, which prevents Twitter from showing the site logo in its link previews.
		
			
				
	
	
		
			100 lines
		
	
	
		
			2.9 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			100 lines
		
	
	
		
			2.9 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| # frozen_string_literal: true
 | |
| 
 | |
| class RobotsTxtController < ApplicationController
 | |
|   layout false
 | |
|   skip_before_action :preload_json, :check_xhr, :redirect_to_login_if_required
 | |
| 
 | |
|   OVERRIDDEN_HEADER = "# This robots.txt file has been customized at /admin/customize/robots\n"
 | |
| 
 | |
|   # NOTE: order is important!
 | |
|   DISALLOWED_PATHS ||= %w{
 | |
|     /admin/
 | |
|     /auth/
 | |
|     /assets/browser-update*.js
 | |
|     /email/
 | |
|     /session
 | |
|     /user-api-key
 | |
|     /*?api_key*
 | |
|     /*?*api_key*
 | |
|   }
 | |
| 
 | |
|   DISALLOWED_WITH_HEADER_PATHS ||= %w{
 | |
|     /badges
 | |
|     /u/
 | |
|     /my
 | |
|     /search
 | |
|     /tag/*/l
 | |
|     /g
 | |
|     /t/*/*.rss
 | |
|     /c/*.rss
 | |
|   }
 | |
| 
 | |
|   def index
 | |
|     if (overridden = SiteSetting.overridden_robots_txt.dup).present?
 | |
|       overridden.prepend(OVERRIDDEN_HEADER) if guardian.is_admin? && !is_api?
 | |
|       render plain: overridden
 | |
|       return
 | |
|     end
 | |
|     if SiteSetting.allow_index_in_robots_txt?
 | |
|       @robots_info = self.class.fetch_default_robots_info
 | |
|       render :index, content_type: 'text/plain'
 | |
|     else
 | |
|       render :no_index, content_type: 'text/plain'
 | |
|     end
 | |
|   end
 | |
| 
 | |
|   # If you are hosting Discourse in a subfolder, you will need to create your robots.txt
 | |
|   # in the root of your web server with the appropriate paths. This method will return
 | |
|   # JSON that can be used by a script to create a robots.txt that works well with your
 | |
|   # existing site.
 | |
|   def builder
 | |
|     result = self.class.fetch_default_robots_info
 | |
|     overridden = SiteSetting.overridden_robots_txt
 | |
|     result[:overridden] = overridden if overridden.present?
 | |
|     render json: result
 | |
|   end
 | |
| 
 | |
|   def self.fetch_default_robots_info
 | |
|     deny_paths_googlebot = DISALLOWED_PATHS.map { |p| Discourse.base_path + p }
 | |
|     deny_paths = deny_paths_googlebot + DISALLOWED_WITH_HEADER_PATHS.map { |p| Discourse.base_path + p }
 | |
|     deny_all = [ "#{Discourse.base_path}/" ]
 | |
| 
 | |
|     result = {
 | |
|       header: "# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file",
 | |
|       agents: []
 | |
|     }
 | |
| 
 | |
|     if SiteSetting.allowed_crawler_user_agents.present?
 | |
|       SiteSetting.allowed_crawler_user_agents.split('|').each do |agent|
 | |
|         paths = agent == "Googlebot" ? deny_paths_googlebot : deny_paths
 | |
|         result[:agents] << { name: agent, disallow: paths }
 | |
|       end
 | |
| 
 | |
|       result[:agents] << { name: '*', disallow: deny_all }
 | |
|     else
 | |
| 
 | |
|       if SiteSetting.blocked_crawler_user_agents.present?
 | |
|         SiteSetting.blocked_crawler_user_agents.split('|').each do |agent|
 | |
|           result[:agents] << { name: agent, disallow: deny_all }
 | |
|         end
 | |
|       end
 | |
| 
 | |
|       result[:agents] << { name: '*', disallow: deny_paths }
 | |
| 
 | |
|       result[:agents] << { name: 'Googlebot', disallow: deny_paths_googlebot }
 | |
|     end
 | |
| 
 | |
|     if SiteSetting.slow_down_crawler_user_agents.present?
 | |
|       SiteSetting.slow_down_crawler_user_agents.split('|').each do |agent|
 | |
|         result[:agents] << {
 | |
|           name: agent,
 | |
|           delay: SiteSetting.slow_down_crawler_rate,
 | |
|           disallow: deny_paths
 | |
|         }
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     result
 | |
|   end
 | |
| end
 |