mirror of
https://github.com/discourse/discourse.git
synced 2024-11-22 00:47:46 -06:00
DEV: Add support for custom retries for scheduled admin checks (#24224)
We updated scheduled admin checks to run concurrently in their own jobs. The main reason for this was so that we can implement re-check functionality for especially flaky checks (e.g. group e-mail credentials check.) This works in the following way: 1. The check declares its retry policy using class methods. 2. A block can be yielded to if there are problems, but before they are committed to Redis. 3. The job uses this block to either a) schedule a retry if there are any remaining or b) do nothing and let the check commit.
This commit is contained in:
parent
d5e4b85e82
commit
c3708c4276
@ -1,15 +1,29 @@
|
|||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
module Jobs
|
module Jobs
|
||||||
|
class RetrySignal < Exception
|
||||||
|
end
|
||||||
|
|
||||||
# This job runs a singular scheduled admin check. It is scheduled by
|
# This job runs a singular scheduled admin check. It is scheduled by
|
||||||
# the ProblemChecks (plural) scheduled job.
|
# the ProblemChecks (plural) scheduled job.
|
||||||
class ProblemCheck < ::Jobs::Base
|
class ProblemCheck < ::Jobs::Base
|
||||||
sidekiq_options retry: false
|
sidekiq_options retry: false
|
||||||
|
|
||||||
def execute(args)
|
def execute(args)
|
||||||
identifier = args[:check_identifier]
|
retry_count = args[:retry_count].to_i
|
||||||
|
identifier = args[:check_identifier].to_sym
|
||||||
|
|
||||||
AdminDashboardData.execute_scheduled_check(identifier.to_sym)
|
check = AdminDashboardData.problem_scheduled_check_klasses[identifier]
|
||||||
|
|
||||||
|
AdminDashboardData.execute_scheduled_check(identifier) do |problems|
|
||||||
|
raise RetrySignal if retry_count < check.max_retries
|
||||||
|
end
|
||||||
|
rescue RetrySignal
|
||||||
|
Jobs.enqueue_in(
|
||||||
|
check.retry_wait,
|
||||||
|
:problem_check,
|
||||||
|
args.merge(retry_count: retry_count + 1).stringify_keys,
|
||||||
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -3,7 +3,11 @@
|
|||||||
class AdminDashboardData
|
class AdminDashboardData
|
||||||
include StatsCacheable
|
include StatsCacheable
|
||||||
|
|
||||||
cattr_reader :problem_syms, :problem_blocks, :problem_messages, :problem_scheduled_check_blocks
|
cattr_reader :problem_syms,
|
||||||
|
:problem_blocks,
|
||||||
|
:problem_messages,
|
||||||
|
:problem_scheduled_check_blocks,
|
||||||
|
:problem_scheduled_check_klasses
|
||||||
|
|
||||||
class Problem
|
class Problem
|
||||||
VALID_PRIORITIES = %w[low high].freeze
|
VALID_PRIORITIES = %w[low high].freeze
|
||||||
@ -80,7 +84,8 @@ class AdminDashboardData
|
|||||||
@@problem_blocks << blk if blk
|
@@problem_blocks << blk if blk
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.add_scheduled_problem_check(check_identifier, &blk)
|
def self.add_scheduled_problem_check(check_identifier, klass = nil, &blk)
|
||||||
|
@@problem_scheduled_check_klasses[check_identifier] = klass
|
||||||
@@problem_scheduled_check_blocks[check_identifier] = blk
|
@@problem_scheduled_check_blocks[check_identifier] = blk
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -125,7 +130,7 @@ class AdminDashboardData
|
|||||||
end
|
end
|
||||||
|
|
||||||
def self.register_default_scheduled_problem_checks
|
def self.register_default_scheduled_problem_checks
|
||||||
add_scheduled_problem_check(:group_smtp_credentials) do
|
add_scheduled_problem_check(:group_smtp_credentials, GroupEmailCredentialsCheck) do
|
||||||
problems = GroupEmailCredentialsCheck.run
|
problems = GroupEmailCredentialsCheck.run
|
||||||
problems.map do |p|
|
problems.map do |p|
|
||||||
problem_message =
|
problem_message =
|
||||||
@ -158,6 +163,8 @@ class AdminDashboardData
|
|||||||
|
|
||||||
problems = instance_exec(&check)
|
problems = instance_exec(&check)
|
||||||
|
|
||||||
|
yield(problems) if block_given? && problems.present?
|
||||||
|
|
||||||
Array
|
Array
|
||||||
.wrap(problems)
|
.wrap(problems)
|
||||||
.compact
|
.compact
|
||||||
@ -186,6 +193,7 @@ class AdminDashboardData
|
|||||||
@@problem_syms = []
|
@@problem_syms = []
|
||||||
@@problem_blocks = []
|
@@problem_blocks = []
|
||||||
@@problem_scheduled_check_blocks = {}
|
@@problem_scheduled_check_blocks = {}
|
||||||
|
@@problem_scheduled_check_klasses = {}
|
||||||
|
|
||||||
@@problem_messages = %w[
|
@@problem_messages = %w[
|
||||||
dashboard.bad_favicon_url
|
dashboard.bad_favicon_url
|
||||||
|
@ -7,6 +7,9 @@
|
|||||||
# problem checks, and if any credentials have issues they will show up on
|
# problem checks, and if any credentials have issues they will show up on
|
||||||
# the admin dashboard as a high priority issue.
|
# the admin dashboard as a high priority issue.
|
||||||
class GroupEmailCredentialsCheck
|
class GroupEmailCredentialsCheck
|
||||||
|
def self.max_retries = 2
|
||||||
|
def self.retry_wait = 30.seconds
|
||||||
|
|
||||||
def self.run
|
def self.run
|
||||||
errors = []
|
errors = []
|
||||||
|
|
||||||
|
@ -6,8 +6,13 @@ RSpec.describe Jobs::ProblemCheck do
|
|||||||
AdminDashboardData.reset_problem_checks
|
AdminDashboardData.reset_problem_checks
|
||||||
end
|
end
|
||||||
|
|
||||||
|
class TestCheck
|
||||||
|
def self.max_retries = 0
|
||||||
|
def self.retry_wait = 30.seconds
|
||||||
|
end
|
||||||
|
|
||||||
it "runs the scheduled problem check that has been added and adds the messages to the load_found_scheduled_check_problems array" do
|
it "runs the scheduled problem check that has been added and adds the messages to the load_found_scheduled_check_problems array" do
|
||||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||||
AdminDashboardData::Problem.new("big problem")
|
AdminDashboardData::Problem.new("big problem")
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -19,7 +24,7 @@ RSpec.describe Jobs::ProblemCheck do
|
|||||||
end
|
end
|
||||||
|
|
||||||
it "can handle the problem check returning multiple problems" do
|
it "can handle the problem check returning multiple problems" do
|
||||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||||
[
|
[
|
||||||
AdminDashboardData::Problem.new("big problem"),
|
AdminDashboardData::Problem.new("big problem"),
|
||||||
AdminDashboardData::Problem.new(
|
AdminDashboardData::Problem.new(
|
||||||
@ -36,7 +41,7 @@ RSpec.describe Jobs::ProblemCheck do
|
|||||||
end
|
end
|
||||||
|
|
||||||
it "does not add the same problem twice if the identifier already exists" do
|
it "does not add the same problem twice if the identifier already exists" do
|
||||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||||
[
|
[
|
||||||
AdminDashboardData::Problem.new(
|
AdminDashboardData::Problem.new(
|
||||||
"yuge problem",
|
"yuge problem",
|
||||||
@ -56,8 +61,36 @@ RSpec.describe Jobs::ProblemCheck do
|
|||||||
expect(problems.map(&:to_s)).to match_array(["yuge problem"])
|
expect(problems.map(&:to_s)).to match_array(["yuge problem"])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "schedules a retry if there are attempts remaining" do
|
||||||
|
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||||
|
AdminDashboardData::Problem.new("big problem")
|
||||||
|
end
|
||||||
|
|
||||||
|
TestCheck.stubs(:max_retries).returns(1)
|
||||||
|
|
||||||
|
expect_enqueued_with(
|
||||||
|
job: :problem_check,
|
||||||
|
args: {
|
||||||
|
check_identifier: :test_identifier,
|
||||||
|
retry_count: 1,
|
||||||
|
},
|
||||||
|
) { described_class.new.execute(check_identifier: :test_identifier) }
|
||||||
|
end
|
||||||
|
|
||||||
|
it "does not schedule a retry if there are no more attempts remaining" do
|
||||||
|
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||||
|
AdminDashboardData::Problem.new("big problem")
|
||||||
|
end
|
||||||
|
|
||||||
|
TestCheck.stubs(:max_retries).returns(1)
|
||||||
|
|
||||||
|
expect_not_enqueued_with(job: :problem_check) do
|
||||||
|
described_class.new.execute(check_identifier: :test_identifier, retry_count: 1)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
it "handles errors from a troublesome check" do
|
it "handles errors from a troublesome check" do
|
||||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||||
raise StandardError.new("something went wrong")
|
raise StandardError.new("something went wrong")
|
||||||
AdminDashboardData::Problem.new("polling issue")
|
AdminDashboardData::Problem.new("polling issue")
|
||||||
end
|
end
|
||||||
|
@ -8,10 +8,15 @@ RSpec.describe Jobs::ProblemChecks do
|
|||||||
AdminDashboardData.reset_problem_checks
|
AdminDashboardData.reset_problem_checks
|
||||||
end
|
end
|
||||||
|
|
||||||
|
class TestCheck
|
||||||
|
def self.max_retries = 0
|
||||||
|
def self.retry_wait = 0.seconds
|
||||||
|
end
|
||||||
|
|
||||||
it "starts with a blank slate every time the checks are run to avoid duplicate problems and to clear no longer firing problems" do
|
it "starts with a blank slate every time the checks are run to avoid duplicate problems and to clear no longer firing problems" do
|
||||||
problem_should_fire = true
|
problem_should_fire = true
|
||||||
AdminDashboardData.reset_problem_checks
|
AdminDashboardData.reset_problem_checks
|
||||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
AdminDashboardData.add_scheduled_problem_check(:test_identifier, TestCheck) do
|
||||||
if problem_should_fire
|
if problem_should_fire
|
||||||
problem_should_fire = false
|
problem_should_fire = false
|
||||||
AdminDashboardData::Problem.new("yuge problem", priority: "high")
|
AdminDashboardData::Problem.new("yuge problem", priority: "high")
|
||||||
|
Loading…
Reference in New Issue
Block a user