FIX: use crawler layout when saving url in Wayback Machine (#7667)

This commit is contained in:
Maja Komel
2019-06-03 04:13:32 +02:00
committed by Sam
parent 28dcf445b7
commit 42809f4d69
5 changed files with 50 additions and 39 deletions

View File

@@ -1,6 +1,7 @@
# frozen_string_literal: true
module CrawlerDetection
WAYBACK_MACHINE_URL = "web.archive.org"
def self.to_matcher(string, type: nil)
escaped = string.split('|').map { |agent| Regexp.escape(agent) }.join('|')
@@ -13,8 +14,8 @@ module CrawlerDetection
Regexp.new(escaped, Regexp::IGNORECASE)
end
def self.crawler?(user_agent)
return true if user_agent.nil?
def self.crawler?(user_agent, via_header = nil)
return true if user_agent.nil? || via_header&.include?(WAYBACK_MACHINE_URL)
# this is done to avoid regenerating regexes
@non_crawler_matchers ||= {}