grafana/pkg/services/ngalert/notifier/alerts.go

229 lines
6.0 KiB
Go
Raw Normal View History

package notifier
import (
"fmt"
"regexp"
"sort"
"time"
Inhouse alerting api (#33129) * init * autogens AM route * POST dashboards/db spec * POST alert-notifications spec * fix description * re inits vendor, updates grafana to master * go mod updates * alerting routes * renames to receivers * prometheus endpoints * align config endpoint with cortex, include templates * Change grafana receiver type * Update receivers.go * rename struct to stop swagger thrashing * add rules API * index html * standalone swagger ui html page * Update README.md * Expose GrafanaManagedAlert properties * Some fixes - /api/v1/rules/{Namespace} should return a map - update ExtendedUpsertAlertDefinitionCommand properties * am alerts routes * rename prom swagger section for clarity, remove example endpoints * Add missing json and yaml tags * folder perms * make folders POST again * fix grafana receiver type * rename fodler->namespace for perms * make ruler json again * PR fixes * silences * fix Ok -> Ack * Add id to POST /api/v1/silences (#9) Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in> * Add POST /api/v1/alerts (#10) Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in> * fix silences * Add testing endpoints * removes grpc replace directives * [wip] starts validation * pkg cleanup * go mod tidy * ignores vendor dir * Change response type for Cortex/Loki alerts * receiver unmarshaling tests * ability to split routes between AM & Grafana * api marshaling & validation * begins work on routing lib * [hack] ignores embedded field in generation * path specific datasource for alerting * align endpoint names with cloud * single route per Alerting config * removes unused routing pkg * regens spec * adds datasource param to ruler/prom route paths * Modifications for supporting migration * Apply suggestions from code review * hack for cleaning circular refs in swagger definition * generates files * minor fixes for prom endpoints * decorate prom apis with required: true where applicable * Revert "generates files" This reverts commit ef7e97558477d79bcad416e043b04dbd04a2c8f7. * removes server autogen * Update imported structs from ngalert * Fix listing rules response * Update github.com/prometheus/common dependency * Update get silence response * Update get silences response * adds ruler validation & backend switching * Fix GET /alertmanager/{DatasourceId}/config/api/v1/alerts response * Distinct gettable and postable grafana receivers * Remove permissions routes * Latest JSON specs * Fix testing routes * inline yaml annotation on apirulenode * yaml test & yamlv3 + comments * Fix yaml annotations for embedded type * Rename DatasourceId path parameter * Implement Backend.String() * backend zero value is a real backend * exports DiscoveryBase * Fix GO initialisms * Silences: Use PostableSilence as the base struct for creating silences * Use type alias instead of struct embedding * More fixes to alertmanager silencing routes * post and spec JSONs * Split rule config to postable/gettable * Fix empty POST /silences payload Recreating the generated JSON specs fixes the issue without further modifications * better yaml unmarshaling for nested yaml docs in cortex-am configs * regens spec * re-adds config.receivers * omitempty to align with prometheus API behavior * Prefix routes with /api * Update Alertmanager models * Make adjustments to follow the Alertmanager API * ruler: add for and annotations to grafana alert (#45) * Modify testing API routes * Fix grafana rule for field type * Move PostableUserConfig validation to this library * Fix PostableUserConfig YAML encoding/decoding * Use common fields for grafana and lotex rules * Add namespace id in GettableGrafanaRule * Apply suggestions from code review * fixup * more changes * Apply suggestions from code review * aligns structure pre merge * fix new imports & tests * updates tooling readme * goimports * lint * more linting!! * revive lint Co-authored-by: Sofia Papagiannaki <papagian@gmail.com> Co-authored-by: Domas <domasx2@gmail.com> Co-authored-by: Sofia Papagiannaki <papagian@users.noreply.github.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Co-authored-by: gotjosh <josue@grafana.com> Co-authored-by: David Parrott <stomp.box.yo@gmail.com> Co-authored-by: Kyle Brandt <kyle@grafana.com>
2021-04-19 13:26:04 -05:00
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
v2 "github.com/prometheus/alertmanager/api/v2"
"github.com/prometheus/alertmanager/dispatch"
"github.com/prometheus/alertmanager/pkg/labels"
"github.com/prometheus/alertmanager/types"
prometheus_model "github.com/prometheus/common/model"
)
var (
ErrGetAlertsInternal = fmt.Errorf("unable to retrieve alerts(s) due to an internal error")
ErrGetAlertsUnavailable = fmt.Errorf("unable to retrieve alerts(s) as alertmanager is not initialised yet")
ErrGetAlertsBadPayload = fmt.Errorf("unable to retrieve alerts")
ErrGetAlertGroupsBadPayload = fmt.Errorf("unable to retrieve alerts groups")
)
func (am *Alertmanager) GetAlerts(active, silenced, inhibited bool, filter []string, receivers string) (apimodels.GettableAlerts, error) {
var (
// Initialize result slice to prevent api returning `null` when there
// are no alerts present
res = apimodels.GettableAlerts{}
)
if !am.Ready() {
return res, ErrGetAlertsUnavailable
}
matchers, err := parseFilter(filter)
if err != nil {
am.logger.Error("failed to parse matchers", "err", err)
return nil, fmt.Errorf("%s: %w", err.Error(), ErrGetAlertsBadPayload)
}
receiverFilter, err := parseReceivers(receivers)
if err != nil {
am.logger.Error("failed to parse receiver regex", "err", err)
return nil, fmt.Errorf("%s: %w", err.Error(), ErrGetAlertsBadPayload)
}
alerts := am.alerts.GetPending()
defer alerts.Close()
alertFilter := am.alertFilter(matchers, silenced, inhibited, active)
now := time.Now()
am.reloadConfigMtx.RLock()
for a := range alerts.Next() {
if err = alerts.Err(); err != nil {
break
}
routes := am.route.Match(a.Labels)
receivers := make([]string, 0, len(routes))
for _, r := range routes {
receivers = append(receivers, r.RouteOpts.Receiver)
}
if receiverFilter != nil && !receiversMatchFilter(receivers, receiverFilter) {
continue
}
if !alertFilter(a, now) {
continue
}
alert := v2.AlertToOpenAPIAlert(a, am.marker.Status(a.Fingerprint()), receivers)
res = append(res, alert)
}
am.reloadConfigMtx.RUnlock()
if err != nil {
am.logger.Error("failed to iterate through the alerts", "err", err)
return nil, fmt.Errorf("%s: %w", err.Error(), ErrGetAlertsInternal)
}
sort.Slice(res, func(i, j int) bool {
return *res[i].Fingerprint < *res[j].Fingerprint
})
return res, nil
}
func (am *Alertmanager) GetAlertGroups(active, silenced, inhibited bool, filter []string, receivers string) (apimodels.AlertGroups, error) {
matchers, err := parseFilter(filter)
if err != nil {
am.logger.Error("msg", "failed to parse matchers", "err", err)
return nil, fmt.Errorf("%s: %w", err.Error(), ErrGetAlertGroupsBadPayload)
}
receiverFilter, err := parseReceivers(receivers)
if err != nil {
am.logger.Error("msg", "failed to compile receiver regex", "err", err)
return nil, fmt.Errorf("%s: %w", err.Error(), ErrGetAlertGroupsBadPayload)
}
rf := func(receiverFilter *regexp.Regexp) func(r *dispatch.Route) bool {
return func(r *dispatch.Route) bool {
receiver := r.RouteOpts.Receiver
if receiverFilter != nil && !receiverFilter.MatchString(receiver) {
return false
}
return true
}
}(receiverFilter)
af := am.alertFilter(matchers, silenced, inhibited, active)
alertGroups, allReceivers := am.dispatcher.Groups(rf, af)
res := make(apimodels.AlertGroups, 0, len(alertGroups))
for _, alertGroup := range alertGroups {
ag := &apimodels.AlertGroup{
Receiver: &apimodels.Receiver{Name: &alertGroup.Receiver},
Labels: v2.ModelLabelSetToAPILabelSet(alertGroup.Labels),
Alerts: make([]*apimodels.GettableAlert, 0, len(alertGroup.Alerts)),
}
for _, alert := range alertGroup.Alerts {
fp := alert.Fingerprint()
receivers := allReceivers[fp]
status := am.marker.Status(fp)
apiAlert := v2.AlertToOpenAPIAlert(alert, status, receivers)
ag.Alerts = append(ag.Alerts, apiAlert)
}
res = append(res, ag)
}
return res, nil
}
func (am *Alertmanager) alertFilter(matchers []*labels.Matcher, silenced, inhibited, active bool) func(a *types.Alert, now time.Time) bool {
return func(a *types.Alert, now time.Time) bool {
if !a.EndsAt.IsZero() && a.EndsAt.Before(now) {
return false
}
// Set alert's current status based on its label set.
am.silencer.Mutes(a.Labels)
am.inhibitor.Mutes(a.Labels)
// Get alert's current status after seeing if it is suppressed.
status := am.marker.Status(a.Fingerprint())
if !active && status.State == types.AlertStateActive {
return false
}
if !silenced && len(status.SilencedBy) != 0 {
return false
}
if !inhibited && len(status.InhibitedBy) != 0 {
return false
}
return alertMatchesFilterLabels(&a.Alert, matchers)
}
}
func alertMatchesFilterLabels(a *prometheus_model.Alert, matchers []*labels.Matcher) bool {
sms := make(map[string]string)
for name, value := range a.Labels {
sms[string(name)] = string(value)
}
return matchFilterLabels(matchers, sms)
}
func matchFilterLabels(matchers []*labels.Matcher, sms map[string]string) bool {
for _, m := range matchers {
v, prs := sms[m.Name]
switch m.Type {
case labels.MatchNotRegexp, labels.MatchNotEqual:
if m.Value == "" && prs {
continue
}
if !m.Matches(v) {
return false
}
default:
if m.Value == "" && !prs {
continue
}
if !m.Matches(v) {
return false
}
}
}
return true
}
func parseReceivers(receivers string) (*regexp.Regexp, error) {
if receivers == "" {
return nil, nil
}
return regexp.Compile("^(?:" + receivers + ")$")
}
func parseFilter(filter []string) ([]*labels.Matcher, error) {
matchers := make([]*labels.Matcher, 0, len(filter))
for _, matcherString := range filter {
matcher, err := labels.ParseMatcher(matcherString)
if err != nil {
return nil, err
}
matchers = append(matchers, matcher)
}
return matchers, nil
}
func receiversMatchFilter(receivers []string, filter *regexp.Regexp) bool {
for _, r := range receivers {
if filter.MatchString(r) {
return true
}
}
return false
}