grafana/pkg/services/ngalert/notifier/silences.go

120 lines
4.1 KiB
Go
Raw Normal View History

package notifier
import (
"errors"
"fmt"
"time"
Inhouse alerting api (#33129) * init * autogens AM route * POST dashboards/db spec * POST alert-notifications spec * fix description * re inits vendor, updates grafana to master * go mod updates * alerting routes * renames to receivers * prometheus endpoints * align config endpoint with cortex, include templates * Change grafana receiver type * Update receivers.go * rename struct to stop swagger thrashing * add rules API * index html * standalone swagger ui html page * Update README.md * Expose GrafanaManagedAlert properties * Some fixes - /api/v1/rules/{Namespace} should return a map - update ExtendedUpsertAlertDefinitionCommand properties * am alerts routes * rename prom swagger section for clarity, remove example endpoints * Add missing json and yaml tags * folder perms * make folders POST again * fix grafana receiver type * rename fodler->namespace for perms * make ruler json again * PR fixes * silences * fix Ok -> Ack * Add id to POST /api/v1/silences (#9) Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in> * Add POST /api/v1/alerts (#10) Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in> * fix silences * Add testing endpoints * removes grpc replace directives * [wip] starts validation * pkg cleanup * go mod tidy * ignores vendor dir * Change response type for Cortex/Loki alerts * receiver unmarshaling tests * ability to split routes between AM & Grafana * api marshaling & validation * begins work on routing lib * [hack] ignores embedded field in generation * path specific datasource for alerting * align endpoint names with cloud * single route per Alerting config * removes unused routing pkg * regens spec * adds datasource param to ruler/prom route paths * Modifications for supporting migration * Apply suggestions from code review * hack for cleaning circular refs in swagger definition * generates files * minor fixes for prom endpoints * decorate prom apis with required: true where applicable * Revert "generates files" This reverts commit ef7e97558477d79bcad416e043b04dbd04a2c8f7. * removes server autogen * Update imported structs from ngalert * Fix listing rules response * Update github.com/prometheus/common dependency * Update get silence response * Update get silences response * adds ruler validation & backend switching * Fix GET /alertmanager/{DatasourceId}/config/api/v1/alerts response * Distinct gettable and postable grafana receivers * Remove permissions routes * Latest JSON specs * Fix testing routes * inline yaml annotation on apirulenode * yaml test & yamlv3 + comments * Fix yaml annotations for embedded type * Rename DatasourceId path parameter * Implement Backend.String() * backend zero value is a real backend * exports DiscoveryBase * Fix GO initialisms * Silences: Use PostableSilence as the base struct for creating silences * Use type alias instead of struct embedding * More fixes to alertmanager silencing routes * post and spec JSONs * Split rule config to postable/gettable * Fix empty POST /silences payload Recreating the generated JSON specs fixes the issue without further modifications * better yaml unmarshaling for nested yaml docs in cortex-am configs * regens spec * re-adds config.receivers * omitempty to align with prometheus API behavior * Prefix routes with /api * Update Alertmanager models * Make adjustments to follow the Alertmanager API * ruler: add for and annotations to grafana alert (#45) * Modify testing API routes * Fix grafana rule for field type * Move PostableUserConfig validation to this library * Fix PostableUserConfig YAML encoding/decoding * Use common fields for grafana and lotex rules * Add namespace id in GettableGrafanaRule * Apply suggestions from code review * fixup * more changes * Apply suggestions from code review * aligns structure pre merge * fix new imports & tests * updates tooling readme * goimports * lint * more linting!! * revive lint Co-authored-by: Sofia Papagiannaki <papagian@gmail.com> Co-authored-by: Domas <domasx2@gmail.com> Co-authored-by: Sofia Papagiannaki <papagian@users.noreply.github.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Co-authored-by: gotjosh <josue@grafana.com> Co-authored-by: David Parrott <stomp.box.yo@gmail.com> Co-authored-by: Kyle Brandt <kyle@grafana.com>
2021-04-19 13:26:04 -05:00
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
v2 "github.com/prometheus/alertmanager/api/v2"
"github.com/prometheus/alertmanager/silence"
)
var (
ErrGetSilencesInternal = fmt.Errorf("unable to retrieve silence(s) due to an internal error")
ErrDeleteSilenceInternal = fmt.Errorf("unable to delete silence due to an internal error")
ErrCreateSilenceBadPayload = fmt.Errorf("unable to create silence")
ErrListSilencesBadPayload = fmt.Errorf("unable to list silences")
ErrSilenceNotFound = silence.ErrNotFound
)
// ListSilences retrieves a list of stored silences. It supports a set of labels as filters.
func (am *Alertmanager) ListSilences(filter []string) (apimodels.GettableSilences, error) {
matchers, err := parseFilter(filter)
if err != nil {
am.logger.Error("failed to parse matchers", "err", err)
return nil, fmt.Errorf("%s: %w", ErrListSilencesBadPayload.Error(), err)
}
psils, _, err := am.silences.Query()
if err != nil {
am.logger.Error(ErrGetSilencesInternal.Error(), "err", err)
return nil, fmt.Errorf("%s: %w", ErrGetSilencesInternal.Error(), err)
}
sils := apimodels.GettableSilences{}
for _, ps := range psils {
if !v2.CheckSilenceMatchesFilterLabels(ps, matchers) {
continue
}
silence, err := v2.GettableSilenceFromProto(ps)
if err != nil {
am.logger.Error("unmarshaling from protobuf failed", "err", err)
return apimodels.GettableSilences{}, fmt.Errorf("%s: failed to convert internal silence to API silence: %w",
ErrGetSilencesInternal.Error(), err)
}
sils = append(sils, &silence)
}
v2.SortSilences(sils)
return sils, nil
}
// GetSilence retrieves a silence by the provided silenceID. It returns ErrSilenceNotFound if the silence is not present.
func (am *Alertmanager) GetSilence(silenceID string) (apimodels.GettableSilence, error) {
sils, _, err := am.silences.Query(silence.QIDs(silenceID))
if err != nil {
return apimodels.GettableSilence{}, fmt.Errorf("%s: %w", ErrGetSilencesInternal.Error(), err)
}
if len(sils) == 0 {
am.logger.Error("failed to find silence", "err", err, "id", sils)
return apimodels.GettableSilence{}, ErrSilenceNotFound
}
sil, err := v2.GettableSilenceFromProto(sils[0])
if err != nil {
am.logger.Error("unmarshaling from protobuf failed", "err", err)
return apimodels.GettableSilence{}, fmt.Errorf("%s: failed to convert internal silence to API silence: %w",
ErrGetSilencesInternal.Error(), err)
}
return sil, nil
}
// CreateSilence persists the provided silence and returns the silence ID if successful.
func (am *Alertmanager) CreateSilence(ps *apimodels.PostableSilence) (string, error) {
sil, err := v2.PostableSilenceToProto(ps)
if err != nil {
am.logger.Error("marshaling to protobuf failed", "err", err)
return "", fmt.Errorf("%s: failed to convert API silence to internal silence: %w",
ErrCreateSilenceBadPayload.Error(), err)
}
if sil.StartsAt.After(sil.EndsAt) || sil.StartsAt.Equal(sil.EndsAt) {
msg := "start time must be before end time"
am.logger.Error(msg, "err", "starts_at", sil.StartsAt, "ends_at", sil.EndsAt)
return "", fmt.Errorf("%s: %w", msg, ErrCreateSilenceBadPayload)
}
if sil.EndsAt.Before(time.Now()) {
msg := "end time can't be in the past"
am.logger.Error(msg, "ends_at", sil.EndsAt)
return "", fmt.Errorf("%s: %w", msg, ErrCreateSilenceBadPayload)
}
silenceID, err := am.silences.Set(sil)
if err != nil {
am.logger.Error("msg", "unable to save silence", "err", err)
if errors.Is(err, silence.ErrNotFound) {
return "", ErrSilenceNotFound
}
return "", fmt.Errorf("unable to save silence: %s: %w", err.Error(), ErrCreateSilenceBadPayload)
}
return silenceID, nil
}
// DeleteSilence looks for and expires the silence by the provided silenceID. It returns ErrSilenceNotFound if the silence is not present.
func (am *Alertmanager) DeleteSilence(silenceID string) error {
if err := am.silences.Expire(silenceID); err != nil {
if errors.Is(err, silence.ErrNotFound) {
return ErrSilenceNotFound
}
return fmt.Errorf("%s: %w", err.Error(), ErrDeleteSilenceInternal)
}
return nil
}