mirror of
https://github.com/discourse/discourse.git
synced 2024-11-21 16:38:15 -06:00
DEV: Add support for custom retries for scheduled admin checks (#24224)
We updated scheduled admin checks to run concurrently in their own jobs. The main reason for this was so that we can implement re-check functionality for especially flaky checks (e.g. group e-mail credentials check.) This works in the following way: 1. The check declares its retry policy using class methods. 2. A block can be yielded to if there are problems, but before they are committed to Redis. 3. The job uses this block to either a) schedule a retry if there are any remaining or b) do nothing and let the check commit.
This commit is contained in:
parent
d5e4b85e82
commit
c3708c4276
@ -1,15 +1,29 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Jobs
|
||||
class RetrySignal < Exception
|
||||
end
|
||||
|
||||
# This job runs a singular scheduled admin check. It is scheduled by
|
||||
# the ProblemChecks (plural) scheduled job.
|
||||
class ProblemCheck < ::Jobs::Base
|
||||
sidekiq_options retry: false
|
||||
|
||||
def execute(args)
|
||||
identifier = args[:check_identifier]
|
||||
retry_count = args[:retry_count].to_i
|
||||
identifier = args[:check_identifier].to_sym
|
||||
|
||||
AdminDashboardData.execute_scheduled_check(identifier.to_sym)
|
||||
check = AdminDashboardData.problem_scheduled_check_klasses[identifier]
|
||||
|
||||
AdminDashboardData.execute_scheduled_check(identifier) do |problems|
|
||||
raise RetrySignal if retry_count < check.max_retries
|
||||
end
|
||||
rescue RetrySignal
|
||||
Jobs.enqueue_in(
|
||||
check.retry_wait,
|
||||
:problem_check,
|
||||
args.merge(retry_count: retry_count + 1).stringify_keys,
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -3,7 +3,11 @@
|
||||
class AdminDashboardData
|
||||
include StatsCacheable
|
||||
|
||||
cattr_reader :problem_syms, :problem_blocks, :problem_messages, :problem_scheduled_check_blocks
|
||||
cattr_reader :problem_syms,
|
||||
:problem_blocks,
|
||||
:problem_messages,
|
||||
:problem_scheduled_check_blocks,
|
||||
:problem_scheduled_check_klasses
|
||||
|
||||
class Problem
|
||||
VALID_PRIORITIES = %w[low high].freeze
|
||||
@ -80,7 +84,8 @@ class AdminDashboardData
|
||||
@@problem_blocks << blk if blk
|
||||
end
|
||||
|
||||
def self.add_scheduled_problem_check(check_identifier, &blk)
|
||||
def self.add_scheduled_problem_check(check_identifier, klass = nil, &blk)
|
||||
@@problem_scheduled_check_klasses[check_identifier] = klass
|
||||
@@problem_scheduled_check_blocks[check_identifier] = blk
|
||||
end
|
||||
|
||||
@ -125,7 +130,7 @@ class AdminDashboardData
|
||||
end
|
||||
|
||||
def self.register_default_scheduled_problem_checks
|
||||
add_scheduled_problem_check(:group_smtp_credentials) do
|
||||
add_scheduled_problem_check(:group_smtp_credentials, GroupEmailCredentialsCheck) do
|
||||
problems = GroupEmailCredentialsCheck.run
|
||||
problems.map do |p|
|
||||
problem_message =
|
||||
@ -158,6 +163,8 @@ class AdminDashboardData
|
||||
|
||||
problems = instance_exec(&check)
|
||||
|
||||
yield(problems) if block_given? && problems.present?
|
||||
|
||||
Array
|
||||
.wrap(problems)
|
||||
.compact
|
||||
@ -186,6 +193,7 @@ class AdminDashboardData
|
||||
@@problem_syms = []
|
||||
@@problem_blocks = []
|
||||
@@problem_scheduled_check_blocks = {}
|
||||
@@problem_scheduled_check_klasses = {}
|
||||
|
||||
@@problem_messages = %w[
|
||||
dashboard.bad_favicon_url
|
||||
|
@ -7,6 +7,9 @@
|
||||
# problem checks, and if any credentials have issues they will show up on
|
||||
# the admin dashboard as a high priority issue.
|
||||
class GroupEmailCredentialsCheck
|
||||
def self.max_retries = 2
|
||||
def self.retry_wait = 30.seconds
|
||||
|
||||
def self.run
|
||||
errors = []
|
||||
|
||||
|
@ -6,8 +6,13 @@ RSpec.describe Jobs::ProblemCheck do
|
||||
AdminDashboardData.reset_problem_checks
|
||||
end
|
||||
|
||||
class TestCheck
|
||||
def self.max_retries = 0
|
||||
def self.retry_wait = 30.seconds
|
||||
end
|
||||
|
||||
it "runs the scheduled problem check that has been added and adds the messages to the load_found_scheduled_check_problems array" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||
AdminDashboardData::Problem.new("big problem")
|
||||
end
|
||||
|
||||
@ -19,7 +24,7 @@ RSpec.describe Jobs::ProblemCheck do
|
||||
end
|
||||
|
||||
it "can handle the problem check returning multiple problems" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||
[
|
||||
AdminDashboardData::Problem.new("big problem"),
|
||||
AdminDashboardData::Problem.new(
|
||||
@ -36,7 +41,7 @@ RSpec.describe Jobs::ProblemCheck do
|
||||
end
|
||||
|
||||
it "does not add the same problem twice if the identifier already exists" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||
[
|
||||
AdminDashboardData::Problem.new(
|
||||
"yuge problem",
|
||||
@ -56,8 +61,36 @@ RSpec.describe Jobs::ProblemCheck do
|
||||
expect(problems.map(&:to_s)).to match_array(["yuge problem"])
|
||||
end
|
||||
|
||||
it "schedules a retry if there are attempts remaining" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||
AdminDashboardData::Problem.new("big problem")
|
||||
end
|
||||
|
||||
TestCheck.stubs(:max_retries).returns(1)
|
||||
|
||||
expect_enqueued_with(
|
||||
job: :problem_check,
|
||||
args: {
|
||||
check_identifier: :test_identifier,
|
||||
retry_count: 1,
|
||||
},
|
||||
) { described_class.new.execute(check_identifier: :test_identifier) }
|
||||
end
|
||||
|
||||
it "does not schedule a retry if there are no more attempts remaining" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||
AdminDashboardData::Problem.new("big problem")
|
||||
end
|
||||
|
||||
TestCheck.stubs(:max_retries).returns(1)
|
||||
|
||||
expect_not_enqueued_with(job: :problem_check) do
|
||||
described_class.new.execute(check_identifier: :test_identifier, retry_count: 1)
|
||||
end
|
||||
end
|
||||
|
||||
it "handles errors from a troublesome check" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||
raise StandardError.new("something went wrong")
|
||||
AdminDashboardData::Problem.new("polling issue")
|
||||
end
|
||||
|
@ -8,10 +8,15 @@ RSpec.describe Jobs::ProblemChecks do
|
||||
AdminDashboardData.reset_problem_checks
|
||||
end
|
||||
|
||||
class TestCheck
|
||||
def self.max_retries = 0
|
||||
def self.retry_wait = 0.seconds
|
||||
end
|
||||
|
||||
it "starts with a blank slate every time the checks are run to avoid duplicate problems and to clear no longer firing problems" do
|
||||
problem_should_fire = true
|
||||
AdminDashboardData.reset_problem_checks
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||
if problem_should_fire
|
||||
problem_should_fire = false
|
||||
AdminDashboardData::Problem.new("yuge problem", priority: "high")
|
||||
|
Loading…
Reference in New Issue
Block a user