FEATURE: add basic instrumentation to defer queue (#19824)

This will give us some aggregate stats on the defer queue performance.

It is limited to 100 entries (for safety) which is stored in an LRU cache.

Scheduler::Defer.stats can then be used to get an array that denotes:

- number of runs and completions (queued, finished)
- error count (errors)
- total duration (duration)

We can look later at exposing these metrics to gain visibility on the reason
the defer queue is clogged.
This commit is contained in:
Sam
2023-01-12 12:29:50 +11:00
committed by GitHub
parent 9fcd8336e4
commit 7b63c42304
2 changed files with 50 additions and 0 deletions

View File

@@ -4,15 +4,18 @@ require "weakref"
module Scheduler
module Deferrable
DEFAULT_TIMEOUT ||= 90
STATS_CACHE_SIZE ||= 100
def initialize
@async = !Rails.env.test?
@queue = Queue.new
@mutex = Mutex.new
@stats_mutex = Mutex.new
@paused = false
@thread = nil
@reactor = nil
@timeout = DEFAULT_TIMEOUT
@stats = LruRedux::ThreadSafeCache.new(STATS_CACHE_SIZE)
end
def timeout=(t)
@@ -23,6 +26,10 @@ module Scheduler
@queue.length
end
def stats
@stats_mutex.synchronize { @stats.to_a }
end
def pause
stop!
@paused = true
@@ -38,6 +45,11 @@ module Scheduler
end
def later(desc = nil, db = RailsMultisite::ConnectionManagement.current_db, &blk)
@stats_mutex.synchronize do
stats = (@stats[desc] ||= { queued: 0, finished: 0, duration: 0, errors: 0 })
stats[:queued] += 1
end
if @async
start_thread if !@thread&.alive? && !@paused
@queue << [db, blk, desc]
@@ -74,6 +86,7 @@ module Scheduler
# using non_block to match Ruby #deq
def do_work(non_block = false)
db, job, desc = @queue.deq(non_block)
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
db ||= RailsMultisite::ConnectionManagement::DEFAULT
RailsMultisite::ConnectionManagement.with_connection(db) do
@@ -84,6 +97,10 @@ module Scheduler
end if !non_block
job.call
rescue => ex
@stats_mutex.synchronize do
stats = @stats[desc]
stats[:errors] += 1 if stats
end
Discourse.handle_job_exception(ex, message: "Running deferred code '#{desc}'")
ensure
warning_job&.cancel
@@ -93,6 +110,13 @@ module Scheduler
Discourse.handle_job_exception(ex, message: "Processing deferred code queue")
ensure
ActiveRecord::Base.connection_handler.clear_active_connections!
@stats_mutex.synchronize do
stats = @stats[desc]
if stats
stats[:finished] += 1
stats[:duration] += Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
end
end
end
end