FEATURE: Send a 'noindex' header in non-canonical responses (#15026)

* FEATURE: Optionally send a 'noindex' header in non-canonical responses

This will be used in a SEO experiment.

Co-authored-by: David Taylor <david@taylorhq.com>
This commit is contained in:
Rafael dos Santos Silva 2021-11-25 16:58:39 -03:00 committed by GitHub
parent 1166afa4e8
commit 5647819de4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 56 additions and 17 deletions

View File

@ -41,13 +41,14 @@ class ApplicationController < ActionController::Base
before_action :redirect_to_login_if_required before_action :redirect_to_login_if_required
before_action :block_if_requires_login before_action :block_if_requires_login
before_action :preload_json before_action :preload_json
before_action :add_noindex_header, if: -> { is_feed_request? || !SiteSetting.allow_index_in_robots_txt }
before_action :check_xhr before_action :check_xhr
after_action :add_readonly_header after_action :add_readonly_header
after_action :perform_refresh_session after_action :perform_refresh_session
after_action :dont_cache_page after_action :dont_cache_page
after_action :conditionally_allow_site_embedding after_action :conditionally_allow_site_embedding
after_action :ensure_vary_header after_action :ensure_vary_header
after_action :add_noindex_header, if: -> { is_feed_request? || !SiteSetting.allow_index_in_robots_txt }
after_action :add_noindex_header_to_non_canonical, if: -> { request.get? && !(request.format && request.format.json?) && !request.xhr? }
HONEYPOT_KEY ||= 'HONEYPOT_KEY' HONEYPOT_KEY ||= 'HONEYPOT_KEY'
CHALLENGE_KEY ||= 'CHALLENGE_KEY' CHALLENGE_KEY ||= 'CHALLENGE_KEY'
@ -912,6 +913,13 @@ class ApplicationController < ActionController::Base
end end
end end
def add_noindex_header_to_non_canonical
canonical = (@canonical_url || @default_canonical)
if canonical.present? && canonical != request.url && !SiteSetting.allow_indexing_non_canonical_urls
response.headers['X-Robots-Tag'] ||= 'noindex'
end
end
protected protected
def honeypot_value def honeypot_value

View File

@ -1602,6 +1602,9 @@ security:
regex: "^(Lax|Strict|Disabled|None)$" regex: "^(Lax|Strict|Disabled|None)$"
enable_escaped_fragments: true enable_escaped_fragments: true
allow_index_in_robots_txt: true allow_index_in_robots_txt: true
allow_indexing_non_canonical_urls:
default: true
hidden: true
moderators_manage_categories_and_groups: false moderators_manage_categories_and_groups: false
moderators_change_post_ownership: moderators_change_post_ownership:
client: true client: true

View File

@ -2,6 +2,8 @@
module CanonicalURL module CanonicalURL
module ControllerExtensions module ControllerExtensions
ALLOWED_CANONICAL_PARAMS = %w(page)
def canonical_url(url_for_options = {}) def canonical_url(url_for_options = {})
case url_for_options case url_for_options
when Hash when Hash
@ -10,15 +12,9 @@ module CanonicalURL
@canonical_url = url_for_options @canonical_url = url_for_options
end end
end end
end
module Helpers
ALLOWED_CANONICAL_PARAMS = %w(page)
def canonical_link_tag(url = nil)
tag('link', rel: 'canonical', href: url || @canonical_url || default_canonical)
end
def default_canonical def default_canonical
@default_canonical ||= begin
canonical = +"#{Discourse.base_url_no_prefix}#{request.path}" canonical = +"#{Discourse.base_url_no_prefix}#{request.path}"
allowed_params = params.select { |key| ALLOWED_CANONICAL_PARAMS.include?(key) } allowed_params = params.select { |key| ALLOWED_CANONICAL_PARAMS.include?(key) }
if allowed_params.present? if allowed_params.present?
@ -27,6 +23,17 @@ module CanonicalURL
canonical canonical
end end
end end
def self.included(base)
base.helper_method :default_canonical
end
end
module Helpers
def canonical_link_tag(url = nil)
tag('link', rel: 'canonical', href: url || @canonical_url || default_canonical)
end
end
end end
# https://github.com/mbleigh/canonical-url/blob/master/lib/canonical_url.rb # https://github.com/mbleigh/canonical-url/blob/master/lib/canonical_url.rb

View File

@ -668,6 +668,33 @@ RSpec.describe ApplicationController do
expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/t/#{topic.slug}/#{topic.id}" }) expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/t/#{topic.slug}/#{topic.id}" })
end end
it "adds a noindex header if non-canonical indexing is disabled" do
SiteSetting.allow_indexing_non_canonical_urls = false
get '/'
expect(response.headers['X-Robots-Tag']).to be_nil
get '/latest'
expect(response.headers['X-Robots-Tag']).to be_nil
get '/categories'
expect(response.headers['X-Robots-Tag']).to be_nil
topic = create_post.topic
get "/t/#{topic.slug}/#{topic.id}"
expect(response.headers['X-Robots-Tag']).to be_nil
post = create_post(topic_id: topic.id)
get "/t/#{topic.slug}/#{topic.id}/2"
expect(response.headers['X-Robots-Tag']).to eq('noindex')
20.times do
create_post(topic_id: topic.id)
end
get "/t/#{topic.slug}/#{topic.id}/21"
expect(response.headers['X-Robots-Tag']).to eq('noindex')
get "/t/#{topic.slug}/#{topic.id}?page=2"
expect(response.headers['X-Robots-Tag']).to be_nil
end
context "default locale" do context "default locale" do
before do before do
SiteSetting.default_locale = :fr SiteSetting.default_locale = :fr

View File

@ -176,12 +176,6 @@ describe GroupsController do
) )
end end
it 'should return correct X-Robots-Tag header when allow_index_in_robots_txt is set to false' do
SiteSetting.allow_index_in_robots_txt = false
get "/groups"
expect(response.headers['X-Robots-Tag']).to eq('noindex, nofollow')
end
context 'viewing groups of another user' do context 'viewing groups of another user' do
describe 'when an invalid username is given' do describe 'when an invalid username is given' do
it 'should return the right response' do it 'should return the right response' do