[MM-16032] Add system ping endpoint health checks (#11267)

* Add system ping endpoint health checks

This change adds the option for additional server health checks
to be performed when the system ping endpoint is hit. An additional
field 'getserverstatus' is required to run the enhanced health
checks to ensure previous default ping behavior is not modified.

* Use snake_casing
This commit is contained in:
Gabe Jackson
2019-06-20 16:06:04 -04:00
committed by Joram Wilander
parent a9b4a7f508
commit 41e5ec3c5e
5 changed files with 109 additions and 40 deletions

View File

@@ -41,30 +41,61 @@ func (api *API) InitSystem() {
}
func getSystemPing(c *Context, w http.ResponseWriter, r *http.Request) {
reqs := c.App.Config().ClientRequirements
s := make(map[string]string)
s[model.STATUS] = model.STATUS_OK
s["AndroidLatestVersion"] = reqs.AndroidLatestVersion
s["AndroidMinVersion"] = reqs.AndroidMinVersion
s["DesktopLatestVersion"] = reqs.DesktopLatestVersion
s["DesktopMinVersion"] = reqs.DesktopMinVersion
s["IosLatestVersion"] = reqs.IosLatestVersion
s["IosMinVersion"] = reqs.IosMinVersion
actualGoroutines := runtime.NumGoroutine()
if *c.App.Config().ServiceSettings.GoroutineHealthThreshold <= 0 || actualGoroutines <= *c.App.Config().ServiceSettings.GoroutineHealthThreshold {
m := make(map[string]string)
m[model.STATUS] = model.STATUS_OK
reqs := c.App.Config().ClientRequirements
m["AndroidLatestVersion"] = reqs.AndroidLatestVersion
m["AndroidMinVersion"] = reqs.AndroidMinVersion
m["DesktopLatestVersion"] = reqs.DesktopLatestVersion
m["DesktopMinVersion"] = reqs.DesktopMinVersion
m["IosLatestVersion"] = reqs.IosLatestVersion
m["IosMinVersion"] = reqs.IosMinVersion
w.Write([]byte(model.MapToJson(m)))
} else {
rdata := map[string]string{}
rdata["status"] = "unhealthy"
mlog.Warn(fmt.Sprintf("The number of running goroutines is over the health threshold %v of %v", actualGoroutines, *c.App.Config().ServiceSettings.GoroutineHealthThreshold))
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(model.MapToJson(rdata)))
if *c.App.Config().ServiceSettings.GoroutineHealthThreshold > 0 && actualGoroutines >= *c.App.Config().ServiceSettings.GoroutineHealthThreshold {
mlog.Warn(fmt.Sprintf("The number of running goroutines (%v) is over the health threshold (%v)", actualGoroutines, *c.App.Config().ServiceSettings.GoroutineHealthThreshold))
s[model.STATUS] = model.STATUS_UNHEALTHY
}
// Enhanced ping health check:
// If an extra form value is provided then perform extra health checks for
// database and file storage backends.
if r.FormValue("get_server_status") != "" {
dbStatusKey := "database_status"
s[dbStatusKey] = model.STATUS_OK
_, appErr := c.App.Srv.Store.System().Get()
if appErr != nil {
mlog.Debug(fmt.Sprintf("Unable to get database status: %s", appErr.Error()))
s[dbStatusKey] = model.STATUS_UNHEALTHY
s[model.STATUS] = model.STATUS_UNHEALTHY
}
filestoreStatusKey := "filestore_status"
s[filestoreStatusKey] = model.STATUS_OK
license := c.App.License()
backend, appErr := filesstore.NewFileBackend(&c.App.Config().FileSettings, license != nil && *license.Features.Compliance)
if appErr == nil {
appErr = backend.TestConnection()
if appErr != nil {
s[filestoreStatusKey] = model.STATUS_UNHEALTHY
s[model.STATUS] = model.STATUS_UNHEALTHY
}
} else {
mlog.Debug(fmt.Sprintf("Unable to get filestore for ping status: %s", appErr.Error()))
s[filestoreStatusKey] = model.STATUS_UNHEALTHY
s[model.STATUS] = model.STATUS_UNHEALTHY
}
w.Header().Set(model.STATUS, s[model.STATUS])
w.Header().Set(dbStatusKey, s[dbStatusKey])
w.Header().Set(filestoreStatusKey, s[filestoreStatusKey])
}
if s[model.STATUS] != model.STATUS_OK {
w.WriteHeader(http.StatusInternalServerError)
}
w.Write([]byte(model.MapToJson(s)))
}
func testEmail(c *Context, w http.ResponseWriter, r *http.Request) {

View File

@@ -15,25 +15,47 @@ import (
func TestGetPing(t *testing.T) {
th := Setup().InitBasic()
defer th.TearDown()
Client := th.Client
goRoutineHealthThreshold := *th.App.Config().ServiceSettings.GoroutineHealthThreshold
defer func() {
th.App.UpdateConfig(func(cfg *model.Config) { *cfg.ServiceSettings.GoroutineHealthThreshold = goRoutineHealthThreshold })
}()
t.Run("basic ping", func(t *testing.T) {
status, resp := Client.GetPing()
CheckNoError(t, resp)
if status != "OK" {
t.Fatal("should return OK")
}
t.Run("healthy", func(t *testing.T) {
status, resp := th.Client.GetPing()
CheckNoError(t, resp)
assert.Equal(t, model.STATUS_OK, status)
})
th.App.UpdateConfig(func(cfg *model.Config) { *cfg.ServiceSettings.GoroutineHealthThreshold = 10 })
status, resp = th.SystemAdminClient.GetPing()
CheckInternalErrorStatus(t, resp)
if status != "unhealthy" {
t.Fatal("should return unhealthy")
}
t.Run("unhealthy", func(t *testing.T) {
goRoutineHealthThreshold := *th.App.Config().ServiceSettings.GoroutineHealthThreshold
defer func() {
th.App.UpdateConfig(func(cfg *model.Config) { *cfg.ServiceSettings.GoroutineHealthThreshold = goRoutineHealthThreshold })
}()
th.App.UpdateConfig(func(cfg *model.Config) { *cfg.ServiceSettings.GoroutineHealthThreshold = 10 })
status, resp := th.Client.GetPing()
CheckInternalErrorStatus(t, resp)
assert.Equal(t, model.STATUS_UNHEALTHY, status)
})
})
t.Run("with server status", func(t *testing.T) {
t.Run("healthy", func(t *testing.T) {
status, resp := th.Client.GetPingWithServerStatus()
CheckNoError(t, resp)
assert.Equal(t, model.STATUS_OK, status)
})
t.Run("unhealthy", func(t *testing.T) {
badDriver := "badDriverName"
th.App.Config().FileSettings.DriverName = &badDriver
status, resp := th.Client.GetPingWithServerStatus()
CheckInternalErrorStatus(t, resp)
assert.Equal(t, model.STATUS_UNHEALTHY, status)
})
})
}
func TestGetAudits(t *testing.T) {

View File

@@ -35,6 +35,7 @@ const (
STATUS = "status"
STATUS_OK = "OK"
STATUS_FAIL = "FAIL"
STATUS_UNHEALTHY = "UNHEALTHY"
STATUS_REMOVE = "REMOVE"
CLIENT_DIR = "client"
@@ -2682,7 +2683,22 @@ func (c *Client4) GetPing() (string, *Response) {
r, err := c.DoApiGet(c.GetSystemRoute()+"/ping", "")
if r != nil && r.StatusCode == 500 {
defer r.Body.Close()
return "unhealthy", BuildErrorResponse(r, err)
return STATUS_UNHEALTHY, BuildErrorResponse(r, err)
}
if err != nil {
return "", BuildErrorResponse(r, err)
}
defer closeBody(r)
return MapFromJson(r.Body)["status"], BuildResponse(r)
}
// GetPingWithServerStatus will return ok if several basic server health checks
// all psss successfully.
func (c *Client4) GetPingWithServerStatus() (string, *Response) {
r, err := c.DoApiGet(c.GetSystemRoute()+"/ping?get_server_status=true", "")
if r != nil && r.StatusCode == 500 {
defer r.Body.Close()
return STATUS_UNHEALTHY, BuildErrorResponse(r, err)
}
if err != nil {
return "", BuildErrorResponse(r, err)

View File

@@ -30,7 +30,7 @@ func (b *LocalFileBackend) TestConnection() *model.AppError {
return model.NewAppError("TestFileConnection", "api.file.test_connection.local.connection.app_error", nil, err.Error(), http.StatusInternalServerError)
}
os.Remove(filepath.Join(b.directory, TEST_FILE_PATH))
mlog.Info("Able to write files to local storage.")
mlog.Debug("Able to write files to local storage.")
return nil
}

View File

@@ -79,7 +79,7 @@ func (b *S3FileBackend) TestConnection() *model.AppError {
return model.NewAppError("TestFileConnection", "api.file.test_connection.s3.bucked_create.app_error", nil, err.Error(), http.StatusInternalServerError)
}
}
mlog.Info("Connection to S3 or minio is good. Bucket exists.")
mlog.Debug("Connection to S3 or minio is good. Bucket exists.")
return nil
}