Alerting: Log error but don't fail initialization if state history connection test fails (#64699)

Don't return init error if ping fails, add tests
This commit is contained in:
Alexander Weaver 2023-03-13 15:54:46 -05:00 committed by GitHub
parent 74436d31de
commit faef3a8258
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 83 additions and 3 deletions

View File

@ -210,7 +210,7 @@ func (ng *AlertNG) init() error {
Tracer: ng.tracer,
}
history, err := configureHistorianBackend(initCtx, ng.Cfg.UnifiedAlerting.StateHistory, ng.annotationsRepo, ng.dashboardService, ng.store, ng.Metrics.GetHistorianMetrics())
history, err := configureHistorianBackend(initCtx, ng.Cfg.UnifiedAlerting.StateHistory, ng.annotationsRepo, ng.dashboardService, ng.store, ng.Metrics.GetHistorianMetrics(), ng.Log)
if err != nil {
return err
}
@ -389,7 +389,7 @@ type Historian interface {
state.Historian
}
func configureHistorianBackend(ctx context.Context, cfg setting.UnifiedAlertingStateHistorySettings, ar annotations.Repository, ds dashboards.DashboardService, rs historian.RuleStore, met *metrics.Historian) (Historian, error) {
func configureHistorianBackend(ctx context.Context, cfg setting.UnifiedAlertingStateHistorySettings, ar annotations.Repository, ds dashboards.DashboardService, rs historian.RuleStore, met *metrics.Historian, l log.Logger) (Historian, error) {
if !cfg.Enabled {
met.Info.WithLabelValues("noop").Set(0)
return historian.NewNopHistorian(), nil
@ -410,7 +410,7 @@ func configureHistorianBackend(ctx context.Context, cfg setting.UnifiedAlertingS
testConnCtx, cancelFunc := context.WithTimeout(ctx, 10*time.Second)
defer cancelFunc()
if err := backend.TestConnection(testConnCtx); err != nil {
return nil, fmt.Errorf("failed to ping the remote loki historian: %w", err)
l.Error("Failed to communicate with configured remote Loki backend, state history may not be persisted", "error", err)
}
return backend, nil
}

View File

@ -1,11 +1,14 @@
package ngalert
import (
"bytes"
"context"
"math/rand"
"testing"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
@ -14,9 +17,11 @@ import (
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/folder"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/schedule"
"github.com/grafana/grafana/pkg/services/ngalert/tests/fakes"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util"
)
@ -72,3 +77,78 @@ func Test_subscribeToFolderChanges(t *testing.T) {
scheduler.AssertCalled(t, "UpdateAlertRule", rule.GetKey(), rule.Version, false)
}
}
func TestConfigureHistorianBackend(t *testing.T) {
t.Run("fail initialization if invalid backend", func(t *testing.T) {
met := metrics.NewHistorianMetrics(prometheus.NewRegistry())
logger := log.NewNopLogger()
cfg := setting.UnifiedAlertingStateHistorySettings{
Enabled: true,
Backend: "invalid-backend",
}
_, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger)
require.ErrorContains(t, err, "unrecognized")
})
t.Run("do not fail initialization if pinging Loki fails", func(t *testing.T) {
met := metrics.NewHistorianMetrics(prometheus.NewRegistry())
logger := log.NewNopLogger()
cfg := setting.UnifiedAlertingStateHistorySettings{
Enabled: true,
Backend: "loki",
// Should never resolve at the DNS level: https://www.rfc-editor.org/rfc/rfc6761#section-6.4
LokiReadURL: "http://gone.invalid",
LokiWriteURL: "http://gone.invalid",
}
h, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger)
require.NotNil(t, h)
require.NoError(t, err)
})
t.Run("emit metric describing chosen backend", func(t *testing.T) {
reg := prometheus.NewRegistry()
met := metrics.NewHistorianMetrics(reg)
logger := log.NewNopLogger()
cfg := setting.UnifiedAlertingStateHistorySettings{
Enabled: true,
Backend: "annotations",
}
h, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger)
require.NotNil(t, h)
require.NoError(t, err)
exp := bytes.NewBufferString(`
# HELP grafana_alerting_state_history_info Information about the state history store.
# TYPE grafana_alerting_state_history_info gauge
grafana_alerting_state_history_info{backend="annotations"} 1
`)
err = testutil.GatherAndCompare(reg, exp, "grafana_alerting_state_history_info")
require.NoError(t, err)
})
t.Run("emit special zero metric if state history disabled", func(t *testing.T) {
reg := prometheus.NewRegistry()
met := metrics.NewHistorianMetrics(reg)
logger := log.NewNopLogger()
cfg := setting.UnifiedAlertingStateHistorySettings{
Enabled: false,
}
h, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger)
require.NotNil(t, h)
require.NoError(t, err)
exp := bytes.NewBufferString(`
# HELP grafana_alerting_state_history_info Information about the state history store.
# TYPE grafana_alerting_state_history_info gauge
grafana_alerting_state_history_info{backend="noop"} 0
`)
err = testutil.GatherAndCompare(reg, exp, "grafana_alerting_state_history_info")
require.NoError(t, err)
})
}