From a4eb523af6e6237f64b34c957db393c84d3301fa Mon Sep 17 00:00:00 2001 From: Penar Musaraj Date: Wed, 8 May 2019 10:38:55 -0400 Subject: [PATCH] Track Discourse user agent pageviews as crawler Since 5bfe051e, Discourse user agents are marked as non-crawlers (to avoid accidental blacklisting). This makes sure pageviews for these agents are tracked as crawler hits. --- lib/middleware/anonymous_cache.rb | 6 +++++- spec/components/middleware/request_tracker_spec.rb | 10 ++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/middleware/anonymous_cache.rb b/lib/middleware/anonymous_cache.rb index b31d0b03aef..48a7648a70a 100644 --- a/lib/middleware/anonymous_cache.rb +++ b/lib/middleware/anonymous_cache.rb @@ -62,7 +62,11 @@ module Middleware @is_crawler ||= begin user_agent = @env[USER_AGENT] - CrawlerDetection.crawler?(user_agent) ? :true : :false + if CrawlerDetection.crawler?(user_agent) + :true + else + user_agent.downcase.include?("discourse") ? :true : :false + end end @is_crawler == :true end diff --git a/spec/components/middleware/request_tracker_spec.rb b/spec/components/middleware/request_tracker_spec.rb index 9029024953f..ca8e1b67dd6 100644 --- a/spec/components/middleware/request_tracker_spec.rb +++ b/spec/components/middleware/request_tracker_spec.rb @@ -68,6 +68,16 @@ describe Middleware::RequestTracker do expect(ApplicationRequest.page_view_anon.first.count).to eq(2) expect(ApplicationRequest.page_view_crawler.first.count).to eq(1) expect(ApplicationRequest.page_view_anon_mobile.first.count).to eq(1) + + # log discourse User Agent requests as crawler for page views + data = Middleware::RequestTracker.get_data(env( + "HTTP_USER_AGENT" => "DiscourseAPI Ruby Gem 0.19.0" + ), ["200", { "Content-Type" => 'text/html' }], 0.1) + + Middleware::RequestTracker.log_request(data) + ApplicationRequest.write_cache! + + expect(ApplicationRequest.page_view_crawler.first.count).to eq(2) end end