DEV: Parallel scheduled admin checks (#24190)

This PR does some preparatory refactoring of scheduled admin checks in order for us to be able to do custom retry strategies for some of them.

Instead of running all checks in sequence inside a single, scheduled job, the scheduled job spawns one new job per check.

In order to be concurrency-safe, we need to change the existing Redis data structure from a string (of serialized JSON) to a list of strings (of serialized JSON).
This commit is contained in:
Ted Johansson 2023-11-03 09:05:29 +08:00 committed by GitHub
parent 4346512f88
commit 47e58357b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 173 additions and 120 deletions

View File

@ -0,0 +1,15 @@
# frozen_string_literal: true
module Jobs
# This job runs a singular scheduled admin check. It is scheduled by
# the ProblemChecks (plural) scheduled job.
class ProblemCheck < ::Jobs::Base
sidekiq_options retry: false
def execute(args)
identifier = args[:check_identifier]
AdminDashboardData.execute_scheduled_check(identifier.to_sym)
end
end
end

View File

@ -5,6 +5,8 @@ module Jobs
# on a regular basis. To add a problem check for this scheduled job run # on a regular basis. To add a problem check for this scheduled job run
# call AdminDashboardData.add_scheduled_problem_check # call AdminDashboardData.add_scheduled_problem_check
class ProblemChecks < ::Jobs::Scheduled class ProblemChecks < ::Jobs::Scheduled
sidekiq_options retry: false
every 10.minutes every 10.minutes
def execute(_args) def execute(_args)

View File

@ -35,7 +35,7 @@ class AdminDashboardData
GLOBAL_REPORTS ||= [] GLOBAL_REPORTS ||= []
PROBLEM_MESSAGE_PREFIX = "admin-problem:" PROBLEM_MESSAGE_PREFIX = "admin-problem:"
SCHEDULED_PROBLEM_STORAGE_KEY = "admin-found-scheduled-problems" SCHEDULED_PROBLEM_STORAGE_KEY = "admin-found-scheduled-problems-list"
def initialize(opts = {}) def initialize(opts = {})
@opts = opts @opts = opts
@ -89,12 +89,11 @@ class AdminDashboardData
if problem.identifier.present? if problem.identifier.present?
return if problems.find { |p| p.identifier == problem.identifier } return if problems.find { |p| p.identifier == problem.identifier }
end end
problems << problem set_found_scheduled_check_problem(problem)
set_found_scheduled_check_problems(problems)
end end
def self.set_found_scheduled_check_problems(problems) def self.set_found_scheduled_check_problem(problem)
Discourse.redis.setex(SCHEDULED_PROBLEM_STORAGE_KEY, 300, JSON.dump(problems.map(&:to_h))) Discourse.redis.rpush(SCHEDULED_PROBLEM_STORAGE_KEY, JSON.dump(problem.to_h))
end end
def self.clear_found_scheduled_check_problems def self.clear_found_scheduled_check_problems
@ -103,21 +102,25 @@ class AdminDashboardData
def self.clear_found_problem(identifier) def self.clear_found_problem(identifier)
problems = load_found_scheduled_check_problems problems = load_found_scheduled_check_problems
problems.reject! { |p| p.identifier == identifier } problem = problems.find { |p| p.identifier == identifier }
set_found_scheduled_check_problems(problems) Discourse.redis.lrem(SCHEDULED_PROBLEM_STORAGE_KEY, 1, JSON.dump(problem.to_h))
end end
def self.load_found_scheduled_check_problems def self.load_found_scheduled_check_problems
found_problems_json = Discourse.redis.get(SCHEDULED_PROBLEM_STORAGE_KEY) found_problems = Discourse.redis.lrange(SCHEDULED_PROBLEM_STORAGE_KEY, 0, -1)
return [] if found_problems_json.blank?
begin return [] if found_problems.blank?
JSON.parse(found_problems_json).map { |problem| Problem.from_h(problem) }
rescue JSON::ParserError => err found_problems.filter_map do |problem|
Discourse.warn_exception( begin
err, Problem.from_h(JSON.parse(problem))
message: "Error parsing found problem JSON in admin dashboard: #{found_problems_json}", rescue JSON::ParserError => err
) Discourse.warn_exception(
[] err,
message: "Error parsing found problem JSON in admin dashboard: #{problem}",
)
nil
end
end end
end end
@ -145,28 +148,29 @@ class AdminDashboardData
end end
def self.execute_scheduled_checks def self.execute_scheduled_checks
found_problems = [] problem_scheduled_check_blocks.keys.each do |check_identifier|
problem_scheduled_check_blocks.each do |check_identifier, blk| Jobs.enqueue(:problem_check, check_identifier: check_identifier.to_s)
problems = nil end
end
begin def self.execute_scheduled_check(identifier)
problems = instance_exec(&blk) check = problem_scheduled_check_blocks[identifier]
rescue StandardError => err
Discourse.warn_exception( problems = instance_exec(&check)
err,
message: "A scheduled admin dashboard problem check (#{check_identifier}) errored.", Array
) .wrap(problems)
# we don't want to hold up other checks because this one errored .compact
next .each do |problem|
next if !problem.is_a?(Problem)
add_found_scheduled_check_problem(problem)
end end
rescue StandardError => err
found_problems += Array.wrap(problems) Discourse.warn_exception(
end err,
message: "A scheduled admin dashboard problem check (#{identifier}) errored.",
found_problems.compact.each do |problem| )
next if !problem.is_a?(Problem)
add_found_scheduled_check_problem(problem)
end
end end
## ##

View File

@ -0,0 +1,68 @@
# frozen_string_literal: true
RSpec.describe Jobs::ProblemCheck do
after do
Discourse.redis.flushdb
AdminDashboardData.reset_problem_checks
end
it "runs the scheduled problem check that has been added and adds the messages to the load_found_scheduled_check_problems array" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
AdminDashboardData::Problem.new("big problem")
end
described_class.new.execute(check_identifier: :test_identifier)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.count).to eq(1)
expect(problems.first).to be_a(AdminDashboardData::Problem)
expect(problems.first.to_s).to eq("big problem")
end
it "can handle the problem check returning multiple problems" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
[
AdminDashboardData::Problem.new("big problem"),
AdminDashboardData::Problem.new(
"yuge problem",
priority: "high",
identifier: "config_is_a_mess",
),
]
end
described_class.new.execute(check_identifier: :test_identifier)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.map(&:to_s)).to match_array(["big problem", "yuge problem"])
end
it "does not add the same problem twice if the identifier already exists" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
[
AdminDashboardData::Problem.new(
"yuge problem",
priority: "high",
identifier: "config_is_a_mess",
),
AdminDashboardData::Problem.new(
"nasty problem",
priority: "high",
identifier: "config_is_a_mess",
),
]
end
described_class.new.execute(check_identifier: :test_identifier)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.map(&:to_s)).to match_array(["yuge problem"])
end
it "handles errors from a troublesome check" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
raise StandardError.new("something went wrong")
AdminDashboardData::Problem.new("polling issue")
end
described_class.new.execute(check_identifier: :test_identifier)
expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(0)
end
end

View File

@ -1,63 +1,16 @@
# frozen_string_literal: true # frozen_string_literal: true
RSpec.describe Jobs::ProblemChecks do RSpec.describe Jobs::ProblemChecks do
before { Jobs.run_immediately! }
after do after do
Discourse.redis.flushdb Discourse.redis.flushdb
AdminDashboardData.reset_problem_checks AdminDashboardData.reset_problem_checks
end end
it "runs the scheduled problem check that has been added and adds the messages to the load_found_scheduled_check_problems array" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
AdminDashboardData::Problem.new("big problem")
end
described_class.new.execute(nil)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.count).to eq(1)
expect(problems.first).to be_a(AdminDashboardData::Problem)
expect(problems.first.to_s).to eq("big problem")
end
it "can handle the problem check returning multiple problems" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
[
AdminDashboardData::Problem.new("big problem"),
AdminDashboardData::Problem.new(
"yuge problem",
priority: "high",
identifier: "config_is_a_mess",
),
]
end
described_class.new.execute(nil)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.map(&:to_s)).to match_array(["big problem", "yuge problem"])
end
it "does not add the same problem twice if the identifier already exists" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
[
AdminDashboardData::Problem.new(
"yuge problem",
priority: "high",
identifier: "config_is_a_mess",
),
AdminDashboardData::Problem.new(
"nasty problem",
priority: "high",
identifier: "config_is_a_mess",
),
]
end
described_class.new.execute(nil)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.map(&:to_s)).to match_array(["yuge problem"])
end
it "starts with a blank slate every time the checks are run to avoid duplicate problems and to clear no longer firing problems" do it "starts with a blank slate every time the checks are run to avoid duplicate problems and to clear no longer firing problems" do
problem_should_fire = true problem_should_fire = true
AdminDashboardData.reset_problem_checks
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
if problem_should_fire if problem_should_fire
problem_should_fire = false problem_should_fire = false
@ -70,17 +23,4 @@ RSpec.describe Jobs::ProblemChecks do
described_class.new.execute(nil) described_class.new.execute(nil)
expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(0) expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(0)
end end
it "handles errors from a troublesome check and proceeds with the rest" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
raise StandardError.new("something went wrong")
AdminDashboardData::Problem.new("polling issue")
end
AdminDashboardData.add_scheduled_problem_check(:test_identifier_2) do
AdminDashboardData::Problem.new("yuge problem", priority: "high")
end
described_class.new.execute(nil)
expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(1)
end
end end

View File

@ -48,25 +48,6 @@ RSpec.describe AdminDashboardData do
end end
describe "adding scheduled checks" do describe "adding scheduled checks" do
it "adds the passed block to the scheduled checks" do
called = false
AdminDashboardData.add_scheduled_problem_check(:test_identifier) { called = true }
AdminDashboardData.execute_scheduled_checks
expect(called).to eq(true)
end
it "adds a found problem from a scheduled check" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
AdminDashboardData::Problem.new("test problem")
end
AdminDashboardData.execute_scheduled_checks
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.first).to be_a(AdminDashboardData::Problem)
expect(problems.first.message).to eq("test problem")
end
it "does not add duplicate problems with the same identifier" do it "does not add duplicate problems with the same identifier" do
prob1 = AdminDashboardData::Problem.new("test problem", identifier: "test") prob1 = AdminDashboardData::Problem.new("test problem", identifier: "test")
prob2 = AdminDashboardData::Problem.new("test problem 2", identifier: "test") prob2 = AdminDashboardData::Problem.new("test problem 2", identifier: "test")
@ -78,7 +59,7 @@ RSpec.describe AdminDashboardData do
end end
it "does not error when loading malformed problems saved in redis" do it "does not error when loading malformed problems saved in redis" do
Discourse.redis.set(AdminDashboardData::SCHEDULED_PROBLEM_STORAGE_KEY, "{ 'badjson") Discourse.redis.rpush(AdminDashboardData::SCHEDULED_PROBLEM_STORAGE_KEY, "{ 'badjson")
expect(AdminDashboardData.load_found_scheduled_check_problems).to eq([]) expect(AdminDashboardData.load_found_scheduled_check_problems).to eq([])
end end
@ -101,6 +82,49 @@ RSpec.describe AdminDashboardData do
include_examples "stats cacheable" include_examples "stats cacheable"
end end
describe ".execute_scheduled_checks" do
let(:blk) { -> {} }
before { AdminDashboardData.add_scheduled_problem_check(:foo, &blk) }
after { AdminDashboardData.reset_problem_checks }
it do
expect_enqueued_with(job: :problem_check, args: { check_identifier: :foo }) do
AdminDashboardData.execute_scheduled_checks
end
end
end
describe ".execute_scheduled_check" do
context "when problems are found" do
let(:blk) { -> { self::Problem.new("Problem") } }
before do
AdminDashboardData.add_scheduled_problem_check(:foo, &blk)
AdminDashboardData.expects(:add_found_scheduled_check_problem).once
end
after { AdminDashboardData.reset_problem_checks }
it do
expect(described_class.execute_scheduled_check(:foo)).to all(be_a(described_class::Problem))
end
end
context "when check errors out" do
let(:blk) { -> { raise StandardError } }
before do
AdminDashboardData.add_scheduled_problem_check(:foo, &blk)
Discourse.expects(:warn_exception).once
end
after { AdminDashboardData.reset_problem_checks }
it { expect(described_class.execute_scheduled_check(:foo)).to eq(nil) }
end
end
describe "#problem_message_check" do describe "#problem_message_check" do
let(:key) { AdminDashboardData.problem_messages.first } let(:key) { AdminDashboardData.problem_messages.first }