grafana/pkg/services/ngalert/api/api_prometheus.go
Owen Diehl e37a780e14
Inhouse alerting api (#33129)
* init

* autogens AM route

* POST dashboards/db spec

* POST alert-notifications spec

* fix description

* re inits vendor, updates grafana to master

* go mod updates

* alerting routes

* renames to receivers

* prometheus endpoints

* align config endpoint with cortex, include templates

* Change grafana receiver type

* Update receivers.go

* rename struct to stop swagger thrashing

* add rules API

* index html

* standalone swagger ui html page

* Update README.md

* Expose GrafanaManagedAlert properties

* Some fixes

- /api/v1/rules/{Namespace} should return a map
- update ExtendedUpsertAlertDefinitionCommand properties

* am alerts routes

* rename prom swagger section for clarity, remove example endpoints

* Add missing json and yaml tags

* folder perms

* make folders POST again

* fix grafana receiver type

* rename fodler->namespace for perms

* make ruler json again

* PR fixes

* silences

* fix Ok -> Ack

* Add id to POST /api/v1/silences (#9)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Add POST /api/v1/alerts (#10)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* fix silences

* Add testing endpoints

* removes grpc replace directives

* [wip] starts validation

* pkg cleanup

* go mod tidy

* ignores vendor dir

* Change response type for Cortex/Loki alerts

* receiver unmarshaling tests

* ability to split routes between AM & Grafana

* api marshaling & validation

* begins work on routing lib

* [hack] ignores embedded field in generation

* path specific datasource for alerting

* align endpoint names with cloud

* single route per Alerting config

* removes unused routing pkg

* regens spec

* adds datasource param to ruler/prom route paths

* Modifications for supporting migration

* Apply suggestions from code review

* hack for cleaning circular refs in swagger definition

* generates files

* minor fixes for prom endpoints

* decorate prom apis with required: true where applicable

* Revert "generates files"

This reverts commit ef7e975584.

* removes server autogen

* Update imported structs from ngalert

* Fix listing rules response

* Update github.com/prometheus/common dependency

* Update get silence response

* Update get silences response

* adds ruler validation & backend switching

* Fix GET /alertmanager/{DatasourceId}/config/api/v1/alerts response

* Distinct gettable and postable grafana receivers

* Remove permissions routes

* Latest JSON specs

* Fix testing routes

* inline yaml annotation on apirulenode

* yaml test & yamlv3 + comments

* Fix yaml annotations for embedded type

* Rename DatasourceId path parameter

* Implement Backend.String()

* backend zero value is a real backend

* exports DiscoveryBase

* Fix GO initialisms

* Silences: Use PostableSilence as the base struct for creating silences

* Use type alias instead of struct embedding

* More fixes to alertmanager silencing routes

* post and spec JSONs

* Split rule config to postable/gettable

* Fix empty POST /silences payload

Recreating the generated JSON specs fixes the issue
without further modifications

* better yaml unmarshaling for nested yaml docs in cortex-am configs

* regens spec

* re-adds config.receivers

* omitempty to align with prometheus API behavior

* Prefix routes with /api

* Update Alertmanager models

* Make adjustments to follow the Alertmanager API

* ruler: add for and annotations to grafana alert (#45)

* Modify testing API routes

* Fix grafana rule for field type

* Move PostableUserConfig validation to this library

* Fix PostableUserConfig YAML encoding/decoding

* Use common fields for grafana and lotex rules

* Add namespace id in GettableGrafanaRule

* Apply suggestions from code review

* fixup

* more changes

* Apply suggestions from code review

* aligns structure pre merge

* fix new imports & tests

* updates tooling readme

* goimports

* lint

* more linting!!

* revive lint

Co-authored-by: Sofia Papagiannaki <papagian@gmail.com>
Co-authored-by: Domas <domasx2@gmail.com>
Co-authored-by: Sofia Papagiannaki <papagian@users.noreply.github.com>
Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
Co-authored-by: gotjosh <josue@grafana.com>
Co-authored-by: David Parrott <stomp.box.yo@gmail.com>
Co-authored-by: Kyle Brandt <kyle@grafana.com>
2021-04-19 14:26:04 -04:00

160 lines
5.8 KiB
Go

package api
import (
"fmt"
"net/http"
"time"
apiv1 "github.com/prometheus/client_golang/api/prometheus/v1"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/api/response"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/models"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/state"
)
type PrometheusSrv struct {
log log.Logger
stateTracker *state.StateTracker
store store.RuleStore
}
func (srv PrometheusSrv) RouteGetAlertStatuses(c *models.ReqContext) response.Response {
alertResponse := apimodels.AlertResponse{
DiscoveryBase: apimodels.DiscoveryBase{
Status: "success",
},
Data: apimodels.AlertDiscovery{
Alerts: []*apimodels.Alert{},
},
}
for _, alertState := range srv.stateTracker.GetAll() {
startsAt := alertState.StartsAt
alertResponse.Data.Alerts = append(alertResponse.Data.Alerts, &apimodels.Alert{
Labels: map[string]string(alertState.Labels),
Annotations: map[string]string{}, //TODO: Once annotations are added to the evaluation result, set them here
State: alertState.State.String(),
ActiveAt: &startsAt,
Value: "", //TODO: once the result of the evaluation is added to the evaluation result, set it here
})
}
return response.JSON(http.StatusOK, alertResponse)
}
func (srv PrometheusSrv) RouteGetRuleStatuses(c *models.ReqContext) response.Response {
ruleResponse := apimodels.RuleResponse{
DiscoveryBase: apimodels.DiscoveryBase{
Status: "success",
},
Data: apimodels.RuleDiscovery{},
}
ruleGroupQuery := ngmodels.ListOrgRuleGroupsQuery{
OrgID: c.SignedInUser.OrgId,
}
if err := srv.store.GetOrgRuleGroups(&ruleGroupQuery); err != nil {
ruleResponse.DiscoveryBase.Status = "error"
ruleResponse.DiscoveryBase.Error = fmt.Sprintf("failure getting rule groups: %s", err.Error())
ruleResponse.DiscoveryBase.ErrorType = apiv1.ErrServer
return response.JSON(http.StatusInternalServerError, ruleResponse)
}
for _, r := range ruleGroupQuery.Result {
if len(r) < 3 {
continue
}
groupId, namespaceUID, namespace := r[0], r[1], r[2]
alertRuleQuery := ngmodels.ListRuleGroupAlertRulesQuery{OrgID: c.SignedInUser.OrgId, NamespaceUID: namespaceUID, RuleGroup: groupId}
if err := srv.store.GetRuleGroupAlertRules(&alertRuleQuery); err != nil {
ruleResponse.DiscoveryBase.Status = "error"
ruleResponse.DiscoveryBase.Error = fmt.Sprintf("failure getting rules for group %s: %s", groupId, err.Error())
ruleResponse.DiscoveryBase.ErrorType = apiv1.ErrServer
return response.JSON(http.StatusInternalServerError, ruleResponse)
}
newGroup := &apimodels.RuleGroup{
Name: groupId,
// This doesn't make sense in our architecture
// so we use this field for passing to the frontend the namespace
File: namespace,
LastEvaluation: time.Time{},
EvaluationTime: 0, // TODO: see if we are able to pass this along with evaluation results
}
for _, rule := range alertRuleQuery.Result {
instanceQuery := ngmodels.ListAlertInstancesQuery{
DefinitionOrgID: c.SignedInUser.OrgId,
DefinitionUID: rule.UID,
}
if err := srv.store.ListAlertInstances(&instanceQuery); err != nil {
ruleResponse.DiscoveryBase.Status = "error"
ruleResponse.DiscoveryBase.Error = fmt.Sprintf("failure getting alerts for rule %s: %s", rule.UID, err.Error())
ruleResponse.DiscoveryBase.ErrorType = apiv1.ErrServer
return response.JSON(http.StatusInternalServerError, ruleResponse)
}
alertingRule := apimodels.AlertingRule{
State: "inactive",
Name: rule.Title,
Query: rule.DataToString(), // TODO: don't escape <>& etc
Duration: rule.For.Seconds(),
Annotations: rule.Annotations,
}
newRule := apimodels.Rule{
Name: rule.Title,
Labels: nil, // TODO: NG AlertRule does not have labels but does have annotations
Health: "ok", // TODO: update this in the future when error and noData states are being evaluated and set
Type: apiv1.RuleTypeAlerting,
LastEvaluation: time.Time{}, // TODO: set this to be rule evaluation time once it is being set
EvaluationTime: 0, // TODO: set this once we are saving it or adding it to evaluation results
}
for _, instance := range instanceQuery.Result {
activeAt := instance.CurrentStateSince
alert := &apimodels.Alert{
Labels: map[string]string(instance.Labels),
Annotations: nil, // TODO: set these once they are added to evaluation results
State: translateInstanceState(instance.CurrentState),
ActiveAt: &activeAt,
Value: "", // TODO: set this once it is added to the evaluation results
}
if instance.LastEvalTime.After(newRule.LastEvaluation) {
newRule.LastEvaluation = instance.LastEvalTime
newGroup.LastEvaluation = instance.LastEvalTime
}
switch alert.State {
case "pending":
if alertingRule.State == "inactive" {
alertingRule.State = "pending"
}
case "firing":
alertingRule.State = "firing"
}
alertingRule.Alerts = append(alertingRule.Alerts, alert)
}
alertingRule.Rule = newRule
newGroup.Rules = append(newGroup.Rules, alertingRule)
newGroup.Interval = float64(rule.IntervalSeconds)
}
ruleResponse.Data.RuleGroups = append(ruleResponse.Data.RuleGroups, newGroup)
}
return response.JSON(http.StatusOK, ruleResponse)
}
func translateInstanceState(state ngmodels.InstanceStateType) string {
switch {
case state == ngmodels.InstanceStateFiring:
return "firing"
case state == ngmodels.InstanceStateNormal:
return "inactive"
case state == ngmodels.InstanceStatePending:
return "pending"
default:
return "inactive"
}
}