From d7657d8e471142ae7fafd2183616257247353f86 Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 16 Jan 2018 16:28:11 +1100 Subject: [PATCH] correct specs, ensure crawler layout only applies to html --- app/controllers/application_controller.rb | 6 +++++- lib/crawler_detection.rb | 12 ++++++++++-- spec/components/middleware/request_tracker_spec.rb | 1 + 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index 9d3fc5c3a7b..52624e0fe7b 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -60,7 +60,11 @@ class ApplicationController < ActionController::Base end def use_crawler_layout? - @use_crawler_layout ||= (has_escaped_fragment? || CrawlerDetection.crawler?(request.user_agent) || params.key?("print")) + @use_crawler_layout ||= + request.user_agent && + (request.content_type.blank? || request.content_type.include?('html')) && + !['json', 'rss'].include?(params[:format]) && + (has_escaped_fragment? || CrawlerDetection.crawler?(request.user_agent) || params.key?("print")) end def add_readonly_header diff --git a/lib/crawler_detection.rb b/lib/crawler_detection.rb index 15ff75d384b..2958087a3c2 100644 --- a/lib/crawler_detection.rb +++ b/lib/crawler_detection.rb @@ -1,16 +1,24 @@ module CrawlerDetection - def self.to_matcher(string) + def self.to_matcher(string, type: nil) escaped = string.split('|').map { |agent| Regexp.escape(agent) }.join('|') + + if type == :real && Rails.env == "test" + # we need this bypass so we properly render views + escaped << "|Rails Testing" + end + Regexp.new(escaped, Regexp::IGNORECASE) end def self.crawler?(user_agent) + return true if user_agent.nil? + # this is done to avoid regenerating regexes @non_crawler_matchers ||= {} @matchers ||= {} - possibly_real = (@non_crawler_matchers[SiteSetting.non_crawler_user_agents] ||= to_matcher(SiteSetting.non_crawler_user_agents)) + possibly_real = (@non_crawler_matchers[SiteSetting.non_crawler_user_agents] ||= to_matcher(SiteSetting.non_crawler_user_agents, type: :real)) if user_agent.match?(possibly_real) known_bots = (@matchers[SiteSetting.crawler_user_agents] ||= to_matcher(SiteSetting.crawler_user_agents)) diff --git a/spec/components/middleware/request_tracker_spec.rb b/spec/components/middleware/request_tracker_spec.rb index d27ca361edb..f6cf1849c76 100644 --- a/spec/components/middleware/request_tracker_spec.rb +++ b/spec/components/middleware/request_tracker_spec.rb @@ -6,6 +6,7 @@ describe Middleware::RequestTracker do def env(opts = {}) { "HTTP_HOST" => "http://test.com", + "HTTP_USER_AGENT" => "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36", "REQUEST_URI" => "/path?bla=1", "REQUEST_METHOD" => "GET", "rack.input" => ""