mirror of
https://github.com/discourse/discourse.git
synced 2024-11-23 01:16:38 -06:00
FEATURE: An API to help sites build robots.txt files programatically
This is mainly useful for subfolder sites, who need to expose their robots.txt contents to a parent site.
This commit is contained in:
parent
cfe88a67e1
commit
3d7dbdedc0
@ -36,26 +36,59 @@ class RobotsTxtController < ApplicationController
|
|||||||
}
|
}
|
||||||
|
|
||||||
def index
|
def index
|
||||||
if SiteSetting.allow_index_in_robots_txt
|
if SiteSetting.allow_index_in_robots_txt?
|
||||||
path = :index
|
@robots_info = fetch_robots_info
|
||||||
|
render :index, content_type: 'text/plain'
|
||||||
|
else
|
||||||
|
render :no_index, content_type: 'text/plain'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
@crawler_delayed_agents = SiteSetting.slow_down_crawler_user_agents.split('|').map { |agent|
|
# If you are hosting Discourse in a subfolder, you will need to create your robots.txt
|
||||||
[agent, SiteSetting.slow_down_crawler_rate]
|
# in the root of your web server with the appropriate paths. This method will return
|
||||||
}
|
# JSON that can be used by a script to create a robots.txt that works well with your
|
||||||
|
# existing site.
|
||||||
|
def builder
|
||||||
|
render json: fetch_robots_info
|
||||||
|
end
|
||||||
|
|
||||||
if SiteSetting.whitelisted_crawler_user_agents.present?
|
protected
|
||||||
@allowed_user_agents = SiteSetting.whitelisted_crawler_user_agents.split('|')
|
|
||||||
@disallowed_user_agents = ['*']
|
def fetch_robots_info
|
||||||
elsif SiteSetting.blacklisted_crawler_user_agents.present?
|
deny_paths = DISALLOWED_PATHS.map { |p| Discourse.base_uri + p }
|
||||||
@allowed_user_agents = ['*']
|
deny_all = [ "#{Discourse.base_uri}/" ]
|
||||||
@disallowed_user_agents = SiteSetting.blacklisted_crawler_user_agents.split('|')
|
|
||||||
else
|
result = {
|
||||||
@allowed_user_agents = ['*']
|
header: "# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file",
|
||||||
|
agents: []
|
||||||
|
}
|
||||||
|
|
||||||
|
if SiteSetting.whitelisted_crawler_user_agents.present?
|
||||||
|
SiteSetting.whitelisted_crawler_user_agents.split('|').each do |agent|
|
||||||
|
result[:agents] << { name: agent, disallow: deny_paths }
|
||||||
|
end
|
||||||
|
|
||||||
|
result[:agents] << { name: '*', disallow: deny_all }
|
||||||
|
elsif SiteSetting.blacklisted_crawler_user_agents.present?
|
||||||
|
result[:agents] << { name: '*', disallow: deny_paths }
|
||||||
|
SiteSetting.blacklisted_crawler_user_agents.split('|').each do |agent|
|
||||||
|
result[:agents] << { name: agent, disallow: deny_all }
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
path = :no_index
|
result[:agents] << { name: '*', disallow: deny_paths }
|
||||||
end
|
end
|
||||||
|
|
||||||
render path, content_type: 'text/plain'
|
if SiteSetting.slow_down_crawler_user_agents.present?
|
||||||
|
SiteSetting.slow_down_crawler_user_agents.split('|').each do |agent|
|
||||||
|
result[:agents] << {
|
||||||
|
name: agent,
|
||||||
|
delay: SiteSetting.slow_down_crawler_rate,
|
||||||
|
disallow: deny_paths
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
result
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
@ -1,30 +1,18 @@
|
|||||||
# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file
|
<%= @robots_info[:header] %>
|
||||||
<% if Discourse.base_uri.present? %>
|
<% if Discourse.base_uri.present? %>
|
||||||
# This robots.txt file is not used. Please append the content below in the robots.txt file located at the root
|
# This robots.txt file is not used. Please append the content below in the robots.txt file located at the root
|
||||||
<% end %>
|
<% end %>
|
||||||
#
|
#
|
||||||
<% @allowed_user_agents.each do |user_agent| %>
|
<% @robots_info[:agents].each do |agent| %>
|
||||||
User-agent: <%= user_agent %>
|
User-agent: <%= agent[:name] %>
|
||||||
<% end %>
|
<%- if agent[:delay] -%>
|
||||||
<% RobotsTxtController::DISALLOWED_PATHS.each do |path| %>
|
Crawl-delay: <%= agent[:delay] %>
|
||||||
Disallow: <%= Discourse.base_uri + path %>
|
<%- end -%>
|
||||||
|
<% agent[:disallow].each do |path| %>
|
||||||
|
Disallow: <%= path %>
|
||||||
<% end %>
|
<% end %>
|
||||||
|
|
||||||
<% if @disallowed_user_agents %>
|
|
||||||
<% @disallowed_user_agents.each do |user_agent| %>
|
|
||||||
User-agent: <%= user_agent %>
|
|
||||||
Disallow: <%= Discourse.base_uri + "/" %>
|
|
||||||
|
|
||||||
<% end %>
|
|
||||||
<% end %>
|
<% end %>
|
||||||
|
|
||||||
<%= server_plugin_outlet "robots_txt_index" %>
|
<%= server_plugin_outlet "robots_txt_index" %>
|
||||||
|
|
||||||
<% @crawler_delayed_agents.each do |agent, delay| %>
|
|
||||||
User-agent: <%= agent %>
|
|
||||||
Crawl-delay: <%= delay %>
|
|
||||||
<% RobotsTxtController::DISALLOWED_PATHS.each do |path| %>
|
|
||||||
Disallow: <%= Discourse.base_uri + path %>
|
|
||||||
<% end %>
|
|
||||||
|
|
||||||
<% end %>
|
|
||||||
|
@ -744,6 +744,7 @@ Discourse::Application.routes.draw do
|
|||||||
get "favicon/proxied" => "static#favicon", format: false
|
get "favicon/proxied" => "static#favicon", format: false
|
||||||
|
|
||||||
get "robots.txt" => "robots_txt#index"
|
get "robots.txt" => "robots_txt#index"
|
||||||
|
get "robots-builder.json" => "robots_txt#builder"
|
||||||
get "offline.html" => "offline#index"
|
get "offline.html" => "offline#index"
|
||||||
get "manifest.json" => "metadata#manifest", as: :manifest
|
get "manifest.json" => "metadata#manifest", as: :manifest
|
||||||
get "opensearch" => "metadata#opensearch", format: :xml
|
get "opensearch" => "metadata#opensearch", format: :xml
|
||||||
|
@ -1,6 +1,16 @@
|
|||||||
require 'rails_helper'
|
require 'rails_helper'
|
||||||
|
|
||||||
RSpec.describe RobotsTxtController do
|
RSpec.describe RobotsTxtController do
|
||||||
|
describe '#builder' do
|
||||||
|
it "returns json information for building a robots.txt" do
|
||||||
|
get "/robots-builder.json"
|
||||||
|
json = ::JSON.parse(response.body)
|
||||||
|
expect(json).to be_present
|
||||||
|
expect(json['header']).to be_present
|
||||||
|
expect(json['agents']).to be_present
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
describe '#index' do
|
describe '#index' do
|
||||||
|
|
||||||
context 'subfolder' do
|
context 'subfolder' do
|
||||||
|
Loading…
Reference in New Issue
Block a user