FIX: Detect Wayback Machine using user agent (#9777)

This commit is contained in:
Dan Ungureanu
2020-05-14 14:10:07 +03:00
committed by GitHub
parent 321879a619
commit 3ed6a0e904
2 changed files with 5 additions and 3 deletions

View File

@@ -1,7 +1,7 @@
# frozen_string_literal: true
module CrawlerDetection
WAYBACK_MACHINE_URL = "web.archive.org"
WAYBACK_MACHINE_URL = "archive.org"
def self.to_matcher(string, type: nil)
escaped = string.split('|').map { |agent| Regexp.escape(agent) }.join('|')
@@ -15,7 +15,7 @@ module CrawlerDetection
end
def self.crawler?(user_agent, via_header = nil)
return true if user_agent.nil? || via_header&.include?(WAYBACK_MACHINE_URL)
return true if user_agent.nil? || user_agent&.include?(WAYBACK_MACHINE_URL) || via_header&.include?(WAYBACK_MACHINE_URL)
# this is done to avoid regenerating regexes
@non_crawler_matchers ||= {}