From c1f25cdf5b060aa11d84f56d6db87d26c56a85c1 Mon Sep 17 00:00:00 2001 From: Alan Guo Xiang Tan Date: Thu, 10 Oct 2024 08:01:40 +0800 Subject: [PATCH] FIX: Unicorn master and Sidekiq reopening logs at the same time (#29137) In our production environment, we have been seeing Sidekiq processes getting stuck randomly when a USR1 signal is sent to the Unicorn master process. We have not been able to identify the root cause of why the Sidekiq process gets stuck. We however noticed that when the Unicorn master process receives a USR1 signal, it will reopen the logs for the Unicorn master process first before sending a USR1 signal for the Unicorn worker processes to reopen the logs. We figured that we should do the same for the Sidekiq process as well when a USR1 signal. In this commit, we introduce an arbitrary delay of 1 second before we the Sidekiq process reopens its log files so as to allow enough time for the Unicorn master to finish reopening it logs first. We also do not send reopen logs for the Sidekiq process if the `DISCOURSE_LOG_SIDEKIQ` env is not present because there is no need to reopen any logs. --- app/jobs/base.rb | 2 +- config/unicorn.conf.rb | 23 ++++++++++++++++------- lib/demon/sidekiq.rb | 33 +++++++++++++++++++++------------ lib/discourse.rb | 4 ++++ 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/app/jobs/base.rb b/app/jobs/base.rb index 433ee2cee8d..e1042afb80a 100644 --- a/app/jobs/base.rb +++ b/app/jobs/base.rb @@ -150,7 +150,7 @@ module Jobs end def enabled? - ENV["DISCOURSE_LOG_SIDEKIQ"] == "1" + Discourse.enable_sidekiq_logging? end def live_slots_limit diff --git a/config/unicorn.conf.rb b/config/unicorn.conf.rb index 13c3f4eaf16..8db561ba17e 100644 --- a/config/unicorn.conf.rb +++ b/config/unicorn.conf.rb @@ -89,13 +89,22 @@ before_fork do |server, worker| Demon::Sidekiq.after_fork { DiscourseEvent.trigger(:sidekiq_fork_started) } Demon::Sidekiq.start(sidekiqs, logger: server.logger) - # Trap USR1, so we can re-issue to sidekiq workers - # but chain the default unicorn implementation as well - old_handler = - Signal.trap("USR1") do - Demon::Sidekiq.kill("USR1") - old_handler.call - end + if Discourse.enable_sidekiq_logging? + # Trap USR1, so we can re-issue to sidekiq workers + # but chain the default unicorn implementation as well + old_handler = + Signal.trap("USR1") do + old_handler.call + + # We have seen Sidekiq processes getting stuck in production sporadically when log rotation happens. + # The cause is currently unknown but we suspect that it is related to the Unicorn master process and + # Sidekiq demon processes reopening logs at the same time as we noticed that Unicorn worker processes only + # reopen logs after the Unicorn master process is done. To workaround the problem, we are adding an arbitrary + # delay of 1 second to Sidekiq's log reopeing procedure. The 1 second delay should be + # more than enough for the Unicorn master process to finish reopening logs. + Demon::Sidekiq.kill("USR2") + end + end end if ENV["DISCOURSE_ENABLE_EMAIL_SYNC_DEMON"] == "true" diff --git a/lib/demon/sidekiq.rb b/lib/demon/sidekiq.rb index f10d033ef1f..2c90676cc72 100644 --- a/lib/demon/sidekiq.rb +++ b/lib/demon/sidekiq.rb @@ -34,19 +34,11 @@ class Demon::Sidekiq < ::Demon::Base cli = Sidekiq::CLI.instance # Unicorn uses USR1 to indicate that log files have been rotated - Signal.trap("USR1") do - begin - log_in_trap("Sidekiq reopening logs...") - Unicorn::Util.reopen_logs - log_in_trap("Sidekiq done reopening logs...") - rescue => error - log_in_trap( - "Error encountered while reopening logs: [#{error.class}] #{error.message}\n#{error.backtrace.join("\n")}", - level: :error, - ) + Signal.trap("USR1") { reopen_logs } - exit 1 - end + Signal.trap("USR2") do + sleep 1 + reopen_logs end options = ["-c", GlobalSetting.sidekiq_workers.to_s] @@ -75,4 +67,21 @@ class Demon::Sidekiq < ::Demon::Base exit 1 end + + private + + def reopen_logs + begin + log_in_trap("Sidekiq reopening logs...") + Unicorn::Util.reopen_logs + log_in_trap("Sidekiq done reopening logs...") + rescue => error + log_in_trap( + "Error encountered while reopening logs: [#{error.class}] #{error.message}\n#{error.backtrace.join("\n")}", + level: :error, + ) + + exit 1 + end + end end diff --git a/lib/discourse.rb b/lib/discourse.rb index 1dfe73fee07..68119a94571 100644 --- a/lib/discourse.rb +++ b/lib/discourse.rb @@ -1223,4 +1223,8 @@ module Discourse ) if SiteSetting.set_locale_from_accept_language_header locale end + + def self.enable_sidekiq_logging? + ENV["DISCOURSE_LOG_SIDEKIQ"] == "1" + end end