mirror of
https://github.com/mattermost/mattermost.git
synced 2025-02-25 18:55:24 -06:00
MM-16378: revert plugin health check config removal (#11819)
* revert 4e5f6fcfbc
This reverts the configuration setting to disable plugin health checks, preferring instead to retain this functionality for advanced cases. In part, this was driven by the discovery that the health checks were failing on Windows, though that will be addressed separately.
Fixes: MM-16378
* simplify interval handling/logging
Also shutdown the job when plugins are altogether disabled.
This commit is contained in:
@@ -597,6 +597,7 @@ func (a *App) trackConfig() {
|
||||
"enable": *cfg.PluginSettings.Enable,
|
||||
"enable_uploads": *cfg.PluginSettings.EnableUploads,
|
||||
"allow_insecure_download_url": *cfg.PluginSettings.AllowInsecureDownloadUrl,
|
||||
"enable_health_check": *cfg.PluginSettings.EnableHealthCheck,
|
||||
})
|
||||
|
||||
a.SendDiagnostic(TRACK_CONFIG_DATA_RETENTION, map[string]interface{}{
|
||||
|
||||
@@ -204,8 +204,14 @@ func NewServer(options ...Option) (*Server, error) {
|
||||
// Start plugin health check job
|
||||
pluginsEnvironment := s.PluginsEnvironment
|
||||
if pluginsEnvironment != nil {
|
||||
pluginsEnvironment.InitPluginHealthCheckJob()
|
||||
pluginsEnvironment.InitPluginHealthCheckJob(*s.Config().PluginSettings.Enable && *s.Config().PluginSettings.EnableHealthCheck)
|
||||
}
|
||||
s.AddConfigListener(func(_, c *model.Config) {
|
||||
pluginsEnvironment := s.PluginsEnvironment
|
||||
if pluginsEnvironment != nil {
|
||||
pluginsEnvironment.InitPluginHealthCheckJob(*s.Config().PluginSettings.Enable && *c.PluginSettings.EnableHealthCheck)
|
||||
}
|
||||
})
|
||||
|
||||
mlog.Info(fmt.Sprintf("Current version is %v (%v/%v/%v/%v)", model.CurrentVersion, model.BuildNumber, model.BuildDate, model.BuildHash, model.BuildHashEnterprise))
|
||||
mlog.Info(fmt.Sprintf("Enterprise Enabled: %v", model.BuildEnterpriseReady))
|
||||
|
||||
@@ -34,6 +34,7 @@ var String = zap.String
|
||||
var Any = zap.Any
|
||||
var Err = zap.Error
|
||||
var Bool = zap.Bool
|
||||
var Duration = zap.Duration
|
||||
|
||||
type LoggerConfiguration struct {
|
||||
EnableConsole bool
|
||||
|
||||
@@ -2187,6 +2187,7 @@ type PluginSettings struct {
|
||||
Enable *bool
|
||||
EnableUploads *bool `restricted:"true"`
|
||||
AllowInsecureDownloadUrl *bool `restricted:"true"`
|
||||
EnableHealthCheck *bool `restricted:"true"`
|
||||
Directory *string `restricted:"true"`
|
||||
ClientDirectory *string `restricted:"true"`
|
||||
Plugins map[string]map[string]interface{}
|
||||
@@ -2206,6 +2207,10 @@ func (s *PluginSettings) SetDefaults(ls LogSettings) {
|
||||
s.AllowInsecureDownloadUrl = NewBool(false)
|
||||
}
|
||||
|
||||
if s.EnableHealthCheck == nil {
|
||||
s.EnableHealthCheck = NewBool(true)
|
||||
}
|
||||
|
||||
if s.Directory == nil {
|
||||
s.Directory = NewString(PLUGIN_SETTINGS_DEFAULT_DIRECTORY)
|
||||
}
|
||||
|
||||
@@ -12,10 +12,10 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
HEALTH_CHECK_INTERVAL = 30 // seconds. How often the health check should run
|
||||
HEALTH_CHECK_DISABLE_DURATION = 60 // minutes. How long we wait for num fails to incur before disabling the plugin
|
||||
HEALTH_CHECK_PING_FAIL_LIMIT = 3 // How many times we call RPC ping in a row before it is considered a failure
|
||||
HEALTH_CHECK_RESTART_LIMIT = 3 // How many times we restart a plugin before we disable it
|
||||
HEALTH_CHECK_INTERVAL = 30 * time.Second // How often the health check should run
|
||||
HEALTH_CHECK_DISABLE_DURATION = 60 * time.Minute // How long we wait for num fails to incur before disabling the plugin
|
||||
HEALTH_CHECK_PING_FAIL_LIMIT = 3 // How many times we call RPC ping in a row before it is considered a failure
|
||||
HEALTH_CHECK_RESTART_LIMIT = 3 // How many times we restart a plugin before we disable it
|
||||
)
|
||||
|
||||
type PluginHealthCheckJob struct {
|
||||
@@ -24,22 +24,34 @@ type PluginHealthCheckJob struct {
|
||||
env *Environment
|
||||
}
|
||||
|
||||
// InitPluginHealthCheckJob starts a new job for checking all active plugins
|
||||
func (env *Environment) InitPluginHealthCheckJob() {
|
||||
job := newPluginHealthCheckJob(env)
|
||||
env.pluginHealthCheckJob = job
|
||||
job.Start()
|
||||
// InitPluginHealthCheckJob starts a new job if one is not running and is set to enabled, or kills an existing one if set to disabled.
|
||||
func (env *Environment) InitPluginHealthCheckJob(enable bool) {
|
||||
// Config is set to enable. No job exists, start a new job.
|
||||
if enable && env.pluginHealthCheckJob == nil {
|
||||
mlog.Debug("Enabling plugin health check job", mlog.Duration("interval_s", HEALTH_CHECK_INTERVAL))
|
||||
|
||||
job := newPluginHealthCheckJob(env)
|
||||
env.pluginHealthCheckJob = job
|
||||
job.Start()
|
||||
}
|
||||
|
||||
// Config is set to disable. Job exists, kill existing job.
|
||||
if !enable && env.pluginHealthCheckJob != nil {
|
||||
mlog.Debug("Disabling plugin health check job")
|
||||
|
||||
env.pluginHealthCheckJob.Cancel()
|
||||
env.pluginHealthCheckJob = nil
|
||||
}
|
||||
}
|
||||
|
||||
// Start continuously runs health checks on all active plugins, on a timer.
|
||||
func (job *PluginHealthCheckJob) Start() {
|
||||
interval := time.Duration(HEALTH_CHECK_INTERVAL) * time.Second
|
||||
mlog.Debug(fmt.Sprintf("Plugin health check job starting. Sending health check pings every %v seconds.", interval))
|
||||
mlog.Debug("Plugin health check job starting.")
|
||||
|
||||
go func() {
|
||||
defer close(job.cancelled)
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
ticker := time.NewTicker(HEALTH_CHECK_INTERVAL)
|
||||
defer func() {
|
||||
ticker.Stop()
|
||||
}()
|
||||
@@ -125,7 +137,7 @@ func shouldDeactivatePlugin(rp *registeredPlugin) bool {
|
||||
index := len(rp.failTimeStamps) - HEALTH_CHECK_RESTART_LIMIT
|
||||
t := rp.failTimeStamps[index]
|
||||
now := time.Now()
|
||||
elapsed := now.Sub(t).Minutes()
|
||||
elapsed := now.Sub(t)
|
||||
if elapsed <= HEALTH_CHECK_DISABLE_DURATION {
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -130,8 +130,8 @@ func TestShouldDeactivatePlugin(t *testing.T) {
|
||||
|
||||
// Failures are recent enough to restart
|
||||
rp = newRegisteredPlugin(bundle)
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now.Add(-HEALTH_CHECK_DISABLE_DURATION*0.2*time.Minute))
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now.Add(-HEALTH_CHECK_DISABLE_DURATION*0.1*time.Minute))
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now.Add(-HEALTH_CHECK_DISABLE_DURATION/10*2))
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now.Add(-HEALTH_CHECK_DISABLE_DURATION/10))
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now)
|
||||
|
||||
result = shouldDeactivatePlugin(rp)
|
||||
@@ -139,8 +139,8 @@ func TestShouldDeactivatePlugin(t *testing.T) {
|
||||
|
||||
// Failures are too spaced out to warrant a restart
|
||||
rp = newRegisteredPlugin(bundle)
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now.Add(-HEALTH_CHECK_DISABLE_DURATION*2*time.Minute))
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now.Add(-HEALTH_CHECK_DISABLE_DURATION*1*time.Minute))
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now.Add(-HEALTH_CHECK_DISABLE_DURATION*2))
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now.Add(-HEALTH_CHECK_DISABLE_DURATION*1))
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now)
|
||||
|
||||
result = shouldDeactivatePlugin(rp)
|
||||
@@ -148,7 +148,7 @@ func TestShouldDeactivatePlugin(t *testing.T) {
|
||||
|
||||
// Not enough failures are present to warrant a restart
|
||||
rp = newRegisteredPlugin(bundle)
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now.Add(-HEALTH_CHECK_DISABLE_DURATION*0.1*time.Minute))
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now.Add(-HEALTH_CHECK_DISABLE_DURATION/10))
|
||||
rp.failTimeStamps = append(rp.failTimeStamps, now)
|
||||
|
||||
result = shouldDeactivatePlugin(rp)
|
||||
|
||||
Reference in New Issue
Block a user