FEATURE: Send a 'noindex' header in non-canonical responses (#15026)

* FEATURE: Optionally send a 'noindex' header in non-canonical responses

This will be used in a SEO experiment.

Co-authored-by: David Taylor <david@taylorhq.com>
This commit is contained in:
Rafael dos Santos Silva 2021-11-25 16:58:39 -03:00 committed by GitHub
parent 1166afa4e8
commit 5647819de4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 56 additions and 17 deletions

View File

@ -41,13 +41,14 @@ class ApplicationController < ActionController::Base
before_action :redirect_to_login_if_required
before_action :block_if_requires_login
before_action :preload_json
before_action :add_noindex_header, if: -> { is_feed_request? || !SiteSetting.allow_index_in_robots_txt }
before_action :check_xhr
after_action :add_readonly_header
after_action :perform_refresh_session
after_action :dont_cache_page
after_action :conditionally_allow_site_embedding
after_action :ensure_vary_header
after_action :add_noindex_header, if: -> { is_feed_request? || !SiteSetting.allow_index_in_robots_txt }
after_action :add_noindex_header_to_non_canonical, if: -> { request.get? && !(request.format && request.format.json?) && !request.xhr? }
HONEYPOT_KEY ||= 'HONEYPOT_KEY'
CHALLENGE_KEY ||= 'CHALLENGE_KEY'
@ -912,6 +913,13 @@ class ApplicationController < ActionController::Base
end
end
def add_noindex_header_to_non_canonical
canonical = (@canonical_url || @default_canonical)
if canonical.present? && canonical != request.url && !SiteSetting.allow_indexing_non_canonical_urls
response.headers['X-Robots-Tag'] ||= 'noindex'
end
end
protected
def honeypot_value

View File

@ -1602,6 +1602,9 @@ security:
regex: "^(Lax|Strict|Disabled|None)$"
enable_escaped_fragments: true
allow_index_in_robots_txt: true
allow_indexing_non_canonical_urls:
default: true
hidden: true
moderators_manage_categories_and_groups: false
moderators_change_post_ownership:
client: true

View File

@ -2,6 +2,8 @@
module CanonicalURL
module ControllerExtensions
ALLOWED_CANONICAL_PARAMS = %w(page)
def canonical_url(url_for_options = {})
case url_for_options
when Hash
@ -10,15 +12,9 @@ module CanonicalURL
@canonical_url = url_for_options
end
end
end
module Helpers
ALLOWED_CANONICAL_PARAMS = %w(page)
def canonical_link_tag(url = nil)
tag('link', rel: 'canonical', href: url || @canonical_url || default_canonical)
end
def default_canonical
@default_canonical ||= begin
canonical = +"#{Discourse.base_url_no_prefix}#{request.path}"
allowed_params = params.select { |key| ALLOWED_CANONICAL_PARAMS.include?(key) }
if allowed_params.present?
@ -27,6 +23,17 @@ module CanonicalURL
canonical
end
end
def self.included(base)
base.helper_method :default_canonical
end
end
module Helpers
def canonical_link_tag(url = nil)
tag('link', rel: 'canonical', href: url || @canonical_url || default_canonical)
end
end
end
# https://github.com/mbleigh/canonical-url/blob/master/lib/canonical_url.rb

View File

@ -668,6 +668,33 @@ RSpec.describe ApplicationController do
expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/t/#{topic.slug}/#{topic.id}" })
end
it "adds a noindex header if non-canonical indexing is disabled" do
SiteSetting.allow_indexing_non_canonical_urls = false
get '/'
expect(response.headers['X-Robots-Tag']).to be_nil
get '/latest'
expect(response.headers['X-Robots-Tag']).to be_nil
get '/categories'
expect(response.headers['X-Robots-Tag']).to be_nil
topic = create_post.topic
get "/t/#{topic.slug}/#{topic.id}"
expect(response.headers['X-Robots-Tag']).to be_nil
post = create_post(topic_id: topic.id)
get "/t/#{topic.slug}/#{topic.id}/2"
expect(response.headers['X-Robots-Tag']).to eq('noindex')
20.times do
create_post(topic_id: topic.id)
end
get "/t/#{topic.slug}/#{topic.id}/21"
expect(response.headers['X-Robots-Tag']).to eq('noindex')
get "/t/#{topic.slug}/#{topic.id}?page=2"
expect(response.headers['X-Robots-Tag']).to be_nil
end
context "default locale" do
before do
SiteSetting.default_locale = :fr

View File

@ -176,12 +176,6 @@ describe GroupsController do
)
end
it 'should return correct X-Robots-Tag header when allow_index_in_robots_txt is set to false' do
SiteSetting.allow_index_in_robots_txt = false
get "/groups"
expect(response.headers['X-Robots-Tag']).to eq('noindex, nofollow')
end
context 'viewing groups of another user' do
describe 'when an invalid username is given' do
it 'should return the right response' do