mirror of
https://github.com/discourse/discourse.git
synced 2024-12-02 05:29:17 -06:00
527f02e99f
Followup 2f2da72747
This commit moves topic view tracking from happening
every time a Topic is requested, which is susceptible
to inflating numbers of views from web crawlers, to
our request tracker middleware.
In this new location, topic views are only tracked when
the following headers are sent:
* HTTP_DISCOURSE_TRACK_VIEW - This is sent on every page navigation when
clicking around the ember app. We count these as browser page views
because we know it comes from the AJAX call in our app. The topic ID
is extracted from HTTP_DISCOURSE_TRACK_VIEW_TOPIC_ID
* HTTP_DISCOURSE_DEFERRED_TRACK_VIEW - Sent when MessageBus initializes
after first loading the page to count the initial page load view. The
topic ID is extracted from HTTP_DISCOURSE_DEFERRED_TRACK_VIEW.
This will bring topic views more in line with the change we
made to page views in the referenced commit and result in
more realistic topic view counts.
556 lines
17 KiB
Ruby
556 lines
17 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "method_profiler"
|
|
require "middleware/anonymous_cache"
|
|
require "http_user_agent_encoder"
|
|
|
|
class Middleware::RequestTracker
|
|
@@detailed_request_loggers = nil
|
|
@@ip_skipper = nil
|
|
|
|
# You can add exceptions to our app rate limiter in the app.yml ENV section.
|
|
# example:
|
|
#
|
|
# env:
|
|
# DISCOURSE_MAX_REQS_PER_IP_EXCEPTIONS: >-
|
|
# 14.15.16.32/27
|
|
# 216.148.1.2
|
|
#
|
|
STATIC_IP_SKIPPER =
|
|
ENV["DISCOURSE_MAX_REQS_PER_IP_EXCEPTIONS"]&.split&.map { |ip| IPAddr.new(ip) }
|
|
|
|
# register callbacks for detailed request loggers called on every request
|
|
# example:
|
|
#
|
|
# Middleware::RequestTracker.detailed_request_logger(->|env, data| do
|
|
# # do stuff with env and data
|
|
# end
|
|
def self.register_detailed_request_logger(callback)
|
|
MethodProfiler.ensure_discourse_instrumentation!
|
|
(@@detailed_request_loggers ||= []) << callback
|
|
end
|
|
|
|
def self.unregister_detailed_request_logger(callback)
|
|
@@detailed_request_loggers.delete(callback)
|
|
@detailed_request_loggers = nil if @@detailed_request_loggers.length == 0
|
|
end
|
|
|
|
# used for testing
|
|
def self.unregister_ip_skipper
|
|
@@ip_skipper = nil
|
|
end
|
|
|
|
# Register a custom `ip_skipper`, a function that will skip rate limiting
|
|
# for any IP that returns true.
|
|
#
|
|
# For example, if you never wanted to rate limit 1.2.3.4
|
|
#
|
|
# ```
|
|
# Middleware::RequestTracker.register_ip_skipper do |ip|
|
|
# ip == "1.2.3.4"
|
|
# end
|
|
# ```
|
|
def self.register_ip_skipper(&blk)
|
|
raise "IP skipper is already registered!" if @@ip_skipper
|
|
@@ip_skipper = blk
|
|
end
|
|
|
|
def self.ip_skipper
|
|
@@ip_skipper
|
|
end
|
|
|
|
def initialize(app, settings = {})
|
|
@app = app
|
|
end
|
|
|
|
def self.log_request(data)
|
|
if data[:is_api]
|
|
ApplicationRequest.increment!(:api)
|
|
elsif data[:is_user_api]
|
|
ApplicationRequest.increment!(:user_api)
|
|
elsif data[:track_view]
|
|
if data[:is_crawler]
|
|
ApplicationRequest.increment!(:page_view_crawler)
|
|
WebCrawlerRequest.increment!(data[:user_agent])
|
|
elsif data[:has_auth_cookie]
|
|
ApplicationRequest.increment!(:page_view_logged_in)
|
|
ApplicationRequest.increment!(:page_view_logged_in_mobile) if data[:is_mobile]
|
|
|
|
if data[:explicit_track_view]
|
|
# Must be a browser if it had this header from our ajax implementation
|
|
ApplicationRequest.increment!(:page_view_logged_in_browser)
|
|
ApplicationRequest.increment!(:page_view_logged_in_browser_mobile) if data[:is_mobile]
|
|
|
|
if data[:topic_id].present? && data[:current_user_id].present?
|
|
TopicsController.defer_topic_view(
|
|
data[:topic_id],
|
|
data[:request_remote_ip],
|
|
data[:current_user_id],
|
|
)
|
|
end
|
|
end
|
|
elsif !SiteSetting.login_required
|
|
ApplicationRequest.increment!(:page_view_anon)
|
|
ApplicationRequest.increment!(:page_view_anon_mobile) if data[:is_mobile]
|
|
|
|
if data[:explicit_track_view]
|
|
# Must be a browser if it had this header from our ajax implementation
|
|
ApplicationRequest.increment!(:page_view_anon_browser)
|
|
ApplicationRequest.increment!(:page_view_anon_browser_mobile) if data[:is_mobile]
|
|
|
|
if data[:topic_id].present?
|
|
TopicsController.defer_topic_view(data[:topic_id], data[:request_remote_ip])
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
# Message-bus requests may include this 'deferred track' header which we use to detect
|
|
# 'real browser' views.
|
|
if data[:deferred_track] && !data[:is_crawler]
|
|
if data[:has_auth_cookie]
|
|
ApplicationRequest.increment!(:page_view_logged_in_browser)
|
|
ApplicationRequest.increment!(:page_view_logged_in_browser_mobile) if data[:is_mobile]
|
|
|
|
if data[:topic_id].present? && data[:current_user_id].present?
|
|
TopicsController.defer_topic_view(
|
|
data[:topic_id],
|
|
data[:request_remote_ip],
|
|
data[:current_user_id],
|
|
)
|
|
end
|
|
elsif !SiteSetting.login_required
|
|
ApplicationRequest.increment!(:page_view_anon_browser)
|
|
ApplicationRequest.increment!(:page_view_anon_browser_mobile) if data[:is_mobile]
|
|
|
|
if data[:topic_id].present?
|
|
TopicsController.defer_topic_view(data[:topic_id], data[:request_remote_ip])
|
|
end
|
|
end
|
|
end
|
|
|
|
ApplicationRequest.increment!(:http_total)
|
|
|
|
status = data[:status]
|
|
if status >= 500
|
|
ApplicationRequest.increment!(:http_5xx)
|
|
elsif data[:is_background]
|
|
ApplicationRequest.increment!(:http_background)
|
|
elsif status >= 400
|
|
ApplicationRequest.increment!(:http_4xx)
|
|
elsif status >= 300
|
|
ApplicationRequest.increment!(:http_3xx)
|
|
elsif status >= 200
|
|
ApplicationRequest.increment!(:http_2xx)
|
|
end
|
|
end
|
|
|
|
def self.get_data(env, result, timing, request = nil)
|
|
status, headers = result
|
|
|
|
# result may be nil if the downstream app raised an exception
|
|
status = status.to_i
|
|
headers ||= {}
|
|
|
|
request ||= Rack::Request.new(env)
|
|
helper = Middleware::AnonymousCache::Helper.new(env, request)
|
|
|
|
# Since ActionDispatch::RemoteIp middleware is run before this middleware,
|
|
# we have access to the normalised remote IP based on ActionDispatch::RemoteIp::GetIp
|
|
#
|
|
# NOTE: Locally with MessageBus requests, the remote IP ends up as ::1 because
|
|
# of the X-Forwarded-For header set...somewhere, whereas all other requests
|
|
# end up as 127.0.0.1.
|
|
request_remote_ip = env["action_dispatch.remote_ip"].to_s
|
|
|
|
# Value of the discourse-track-view request header, set in `lib/ajax.js`
|
|
env_track_view = env["HTTP_DISCOURSE_TRACK_VIEW"]
|
|
|
|
# Was the discourse-track-view request header set to true? Likely
|
|
# set by our ajax library to indicate a page view.
|
|
explicit_track_view = status == 200 && %w[1 true].include?(env_track_view)
|
|
|
|
# An HTML response to a GET request is tracked implicitly
|
|
implicit_track_view =
|
|
status == 200 && !%w[0 false].include?(env_track_view) && request.get? && !request.xhr? &&
|
|
headers["Content-Type"] =~ %r{text/html}
|
|
|
|
# This header is sent on a follow-up request after a real browser loads up a page
|
|
# see `scripts/pageview.js` and `instance-initializers/page-tracking.js`
|
|
deferred_track_view = %w[1 true].include?(env["HTTP_DISCOURSE_DEFERRED_TRACK_VIEW"])
|
|
|
|
# This is treated separately from deferred tracking in #log_request, this is
|
|
# why deferred_track_view is not counted here.
|
|
track_view = !!(explicit_track_view || implicit_track_view)
|
|
|
|
# These are set in the same place as the respective track view headers in the client.
|
|
topic_id =
|
|
if deferred_track_view
|
|
env["HTTP_DISCOURSE_DEFERRED_TRACK_VIEW_TOPIC_ID"]
|
|
elsif explicit_track_view || implicit_track_view
|
|
env["HTTP_DISCOURSE_TRACK_VIEW_TOPIC_ID"]
|
|
end
|
|
|
|
auth_cookie = Auth::DefaultCurrentUserProvider.find_v0_auth_cookie(request)
|
|
auth_cookie ||= Auth::DefaultCurrentUserProvider.find_v1_auth_cookie(env)
|
|
has_auth_cookie = auth_cookie.present?
|
|
|
|
is_api ||= !!env[Auth::DefaultCurrentUserProvider::API_KEY_ENV]
|
|
is_user_api ||= !!env[Auth::DefaultCurrentUserProvider::USER_API_KEY_ENV]
|
|
|
|
is_message_bus = request.path.start_with?("#{Discourse.base_path}/message-bus/")
|
|
is_topic_timings = request.path.start_with?("#{Discourse.base_path}/topics/timings")
|
|
|
|
# Auth cookie can be used to find the ID for logged in users, but API calls must look up the
|
|
# current user based on env variables.
|
|
#
|
|
# We only care about this for topic views, other pageviews it's enough to know if the user is
|
|
# logged in or not, and we have separate pageview tracking for API views.
|
|
current_user_id =
|
|
if topic_id.present?
|
|
begin
|
|
(auth_cookie&.[](:user_id) || CurrentUser.lookup_from_env(env)&.id)
|
|
rescue Discourse::InvalidAccess => err
|
|
# This error is raised when the API key is invalid, no need to stop the show.
|
|
Discourse.warn_exception(
|
|
err,
|
|
message: "RequestTracker.get_data failed with an invalid API key error",
|
|
)
|
|
nil
|
|
end
|
|
end
|
|
|
|
h = {
|
|
status: status,
|
|
is_crawler: helper.is_crawler?,
|
|
has_auth_cookie: has_auth_cookie,
|
|
current_user_id: current_user_id,
|
|
topic_id: topic_id,
|
|
is_api: is_api,
|
|
is_user_api: is_user_api,
|
|
is_background: is_message_bus || is_topic_timings,
|
|
is_mobile: helper.is_mobile?,
|
|
track_view: track_view,
|
|
timing: timing,
|
|
queue_seconds: env["REQUEST_QUEUE_SECONDS"],
|
|
explicit_track_view: explicit_track_view,
|
|
deferred_track: deferred_track_view,
|
|
request_remote_ip: request_remote_ip,
|
|
}
|
|
|
|
if h[:is_background]
|
|
h[:background_type] = if is_message_bus
|
|
if request.query_string.include?("dlp=t")
|
|
"message-bus-dlp"
|
|
elsif env["HTTP_DONT_CHUNK"]
|
|
"message-bus-dontchunk"
|
|
else
|
|
"message-bus"
|
|
end
|
|
else
|
|
"topic-timings"
|
|
end
|
|
end
|
|
|
|
if h[:is_crawler]
|
|
user_agent = env["HTTP_USER_AGENT"]
|
|
user_agent = HttpUserAgentEncoder.ensure_utf8(user_agent) if user_agent
|
|
h[:user_agent] = user_agent
|
|
end
|
|
|
|
if cache = headers["X-Discourse-Cached"]
|
|
h[:cache] = cache
|
|
end
|
|
|
|
h
|
|
end
|
|
|
|
def log_request_info(env, result, info, request = nil)
|
|
# We've got to skip this on error ... its just logging
|
|
data =
|
|
begin
|
|
self.class.get_data(env, result, info, request)
|
|
rescue StandardError => err
|
|
Discourse.warn_exception(err, message: "RequestTracker.get_data failed")
|
|
|
|
# This is super hard to find if in testing, we should still raise in this case.
|
|
raise err if Rails.env.test?
|
|
|
|
nil
|
|
end
|
|
|
|
if data
|
|
if result && (headers = result[1])
|
|
headers["X-Discourse-TrackView"] = "1" if data[:track_view]
|
|
end
|
|
|
|
if @@detailed_request_loggers
|
|
@@detailed_request_loggers.each { |logger| logger.call(env, data) }
|
|
end
|
|
|
|
log_later(data)
|
|
end
|
|
end
|
|
|
|
def self.populate_request_queue_seconds!(env)
|
|
if !env["REQUEST_QUEUE_SECONDS"]
|
|
if queue_start = env["HTTP_X_REQUEST_START"]
|
|
queue_start =
|
|
if queue_start.start_with?("t=")
|
|
queue_start.split("t=")[1].to_f
|
|
else
|
|
queue_start.to_f / 1000.0
|
|
end
|
|
queue_time = (Time.now.to_f - queue_start)
|
|
env["REQUEST_QUEUE_SECONDS"] = queue_time
|
|
end
|
|
end
|
|
end
|
|
|
|
def call(env)
|
|
result = nil
|
|
info = nil
|
|
gc_stat_timing = nil
|
|
|
|
# doing this as early as possible so we have an
|
|
# accurate counter
|
|
::Middleware::RequestTracker.populate_request_queue_seconds!(env)
|
|
|
|
request = Rack::Request.new(env)
|
|
|
|
cookie = find_auth_cookie(env)
|
|
if error_details = rate_limit(request, cookie)
|
|
available_in, error_code, limit_on_id = error_details
|
|
message = <<~TEXT
|
|
Slow down, too many requests from this #{limit_on_id ? "user" : "IP address"}.
|
|
Please retry again in #{available_in} seconds.
|
|
Error code: #{error_code}.
|
|
TEXT
|
|
headers = {
|
|
"Content-Type" => "text/plain",
|
|
"Retry-After" => available_in.to_s,
|
|
"Discourse-Rate-Limit-Error-Code" => error_code,
|
|
}
|
|
if username = cookie&.[](:username)
|
|
headers["X-Discourse-Username"] = username
|
|
end
|
|
return 429, headers, [message]
|
|
end
|
|
|
|
if !cookie
|
|
if error_details = check_crawler_limits(env)
|
|
available_in, error_code = error_details
|
|
message = "Too many crawling requests. Error code: #{error_code}."
|
|
headers = {
|
|
"Content-Type" => "text/plain",
|
|
"Retry-After" => available_in.to_s,
|
|
"Discourse-Rate-Limit-Error-Code" => error_code,
|
|
}
|
|
return 429, headers, [message]
|
|
end
|
|
end
|
|
|
|
env["discourse.request_tracker"] = self
|
|
|
|
MethodProfiler.start
|
|
|
|
if SiteSetting.instrument_gc_stat_per_request
|
|
gc_stat_timing = GCStatInstrumenter.instrument { result = @app.call(env) }
|
|
else
|
|
result = @app.call(env)
|
|
end
|
|
|
|
info = MethodProfiler.stop
|
|
|
|
# possibly transferred?
|
|
if info && (headers = result[1])
|
|
headers["X-Runtime"] = "%0.6f" % info[:total_duration]
|
|
|
|
if GlobalSetting.enable_performance_http_headers
|
|
if redis = info[:redis]
|
|
headers["X-Redis-Calls"] = redis[:calls].to_s
|
|
headers["X-Redis-Time"] = "%0.6f" % redis[:duration]
|
|
end
|
|
if sql = info[:sql]
|
|
headers["X-Sql-Calls"] = sql[:calls].to_s
|
|
headers["X-Sql-Time"] = "%0.6f" % sql[:duration]
|
|
end
|
|
if queue = env["REQUEST_QUEUE_SECONDS"]
|
|
headers["X-Queue-Time"] = "%0.6f" % queue
|
|
end
|
|
end
|
|
end
|
|
|
|
if env[Auth::DefaultCurrentUserProvider::BAD_TOKEN] && (headers = result[1])
|
|
headers["Discourse-Logged-Out"] = "1"
|
|
end
|
|
|
|
result
|
|
ensure
|
|
if (limiters = env["DISCOURSE_RATE_LIMITERS"]) && env["DISCOURSE_IS_ASSET_PATH"]
|
|
limiters.each(&:rollback!)
|
|
env["DISCOURSE_ASSET_RATE_LIMITERS"].each do |limiter|
|
|
begin
|
|
limiter.performed!
|
|
rescue RateLimiter::LimitExceeded
|
|
# skip
|
|
end
|
|
end
|
|
end
|
|
|
|
if !env["discourse.request_tracker.skip"]
|
|
info.merge!(gc_stat_timing) if gc_stat_timing
|
|
log_request_info(env, result, info, request)
|
|
end
|
|
end
|
|
|
|
def log_later(data)
|
|
Scheduler::Defer.later("Track view") do
|
|
self.class.log_request(data) unless Discourse.pg_readonly_mode?
|
|
end
|
|
end
|
|
|
|
def find_auth_cookie(env)
|
|
min_allowed_timestamp = Time.now.to_i - (UserAuthToken::ROTATE_TIME_MINS + 1) * 60
|
|
cookie = Auth::DefaultCurrentUserProvider.find_v1_auth_cookie(env)
|
|
cookie if cookie && cookie[:issued_at] >= min_allowed_timestamp
|
|
end
|
|
|
|
def is_private_ip?(ip)
|
|
ip = IPAddr.new(ip)
|
|
!!(ip && (ip.private? || ip.loopback?))
|
|
rescue IPAddr::AddressFamilyError, IPAddr::InvalidAddressError
|
|
false
|
|
end
|
|
|
|
def rate_limit(request, cookie)
|
|
warn =
|
|
GlobalSetting.max_reqs_per_ip_mode == "warn" ||
|
|
GlobalSetting.max_reqs_per_ip_mode == "warn+block"
|
|
block =
|
|
GlobalSetting.max_reqs_per_ip_mode == "block" ||
|
|
GlobalSetting.max_reqs_per_ip_mode == "warn+block"
|
|
|
|
return if !block && !warn
|
|
|
|
ip = request.ip
|
|
|
|
if !GlobalSetting.max_reqs_rate_limit_on_private
|
|
return if is_private_ip?(ip)
|
|
end
|
|
|
|
return if @@ip_skipper&.call(ip)
|
|
return if STATIC_IP_SKIPPER&.any? { |entry| entry.include?(ip) }
|
|
|
|
ip_or_id = ip
|
|
limit_on_id = false
|
|
if cookie && cookie[:user_id] && cookie[:trust_level] &&
|
|
cookie[:trust_level] >= GlobalSetting.skip_per_ip_rate_limit_trust_level
|
|
ip_or_id = cookie[:user_id]
|
|
limit_on_id = true
|
|
end
|
|
|
|
limiter10 =
|
|
RateLimiter.new(
|
|
nil,
|
|
"global_limit_10_#{ip_or_id}",
|
|
GlobalSetting.max_reqs_per_ip_per_10_seconds,
|
|
10,
|
|
global: !limit_on_id,
|
|
aggressive: true,
|
|
error_code: limit_on_id ? "id_10_secs_limit" : "ip_10_secs_limit",
|
|
)
|
|
|
|
limiter60 =
|
|
RateLimiter.new(
|
|
nil,
|
|
"global_limit_60_#{ip_or_id}",
|
|
GlobalSetting.max_reqs_per_ip_per_minute,
|
|
60,
|
|
global: !limit_on_id,
|
|
error_code: limit_on_id ? "id_60_secs_limit" : "ip_60_secs_limit",
|
|
aggressive: true,
|
|
)
|
|
|
|
limiter_assets10 =
|
|
RateLimiter.new(
|
|
nil,
|
|
"global_limit_10_assets_#{ip_or_id}",
|
|
GlobalSetting.max_asset_reqs_per_ip_per_10_seconds,
|
|
10,
|
|
error_code: limit_on_id ? "id_assets_10_secs_limit" : "ip_assets_10_secs_limit",
|
|
global: !limit_on_id,
|
|
)
|
|
|
|
request.env["DISCOURSE_RATE_LIMITERS"] = [limiter10, limiter60]
|
|
request.env["DISCOURSE_ASSET_RATE_LIMITERS"] = [limiter_assets10]
|
|
|
|
if !limiter_assets10.can_perform?
|
|
if warn
|
|
limited_on = limit_on_id ? "user_id" : "ip"
|
|
Discourse.warn(
|
|
"Global asset rate limit exceeded for #{limited_on}: #{ip}: 10 second rate limit",
|
|
uri: request.env["REQUEST_URI"],
|
|
)
|
|
end
|
|
|
|
if block
|
|
return [
|
|
limiter_assets10.seconds_to_wait(Time.now.to_i),
|
|
limiter_assets10.error_code,
|
|
limit_on_id
|
|
]
|
|
end
|
|
end
|
|
|
|
begin
|
|
type = 10
|
|
limiter10.performed!
|
|
|
|
type = 60
|
|
limiter60.performed!
|
|
|
|
nil
|
|
rescue RateLimiter::LimitExceeded => e
|
|
if warn
|
|
limited_on = limit_on_id ? "user_id" : "ip"
|
|
Discourse.warn(
|
|
"Global rate limit exceeded for #{limited_on}: #{ip}: #{type} second rate limit",
|
|
uri: request.env["REQUEST_URI"],
|
|
)
|
|
end
|
|
if block
|
|
[e.available_in, e.error_code, limit_on_id]
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
|
|
def check_crawler_limits(env)
|
|
slow_down_agents = SiteSetting.slow_down_crawler_user_agents
|
|
return if slow_down_agents.blank?
|
|
|
|
user_agent = HttpUserAgentEncoder.ensure_utf8(env["HTTP_USER_AGENT"])&.downcase
|
|
return if user_agent.blank?
|
|
|
|
return if !CrawlerDetection.crawler?(user_agent)
|
|
|
|
slow_down_agents
|
|
.downcase
|
|
.split("|")
|
|
.each do |crawler|
|
|
if user_agent.include?(crawler)
|
|
key = "#{crawler}_crawler_rate_limit"
|
|
limiter =
|
|
RateLimiter.new(nil, key, 1, SiteSetting.slow_down_crawler_rate, error_code: key)
|
|
limiter.performed!
|
|
break
|
|
end
|
|
end
|
|
nil
|
|
rescue RateLimiter::LimitExceeded => e
|
|
[e.available_in, e.error_code]
|
|
end
|
|
end
|