grafana/pkg/services/ngalert/api/api_prometheus.go

181 lines
5.8 KiB
Go
Raw Normal View History

package api
import (
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
apiv1 "github.com/prometheus/client_golang/api/prometheus/v1"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/api/response"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/models"
Inhouse alerting api (#33129) * init * autogens AM route * POST dashboards/db spec * POST alert-notifications spec * fix description * re inits vendor, updates grafana to master * go mod updates * alerting routes * renames to receivers * prometheus endpoints * align config endpoint with cortex, include templates * Change grafana receiver type * Update receivers.go * rename struct to stop swagger thrashing * add rules API * index html * standalone swagger ui html page * Update README.md * Expose GrafanaManagedAlert properties * Some fixes - /api/v1/rules/{Namespace} should return a map - update ExtendedUpsertAlertDefinitionCommand properties * am alerts routes * rename prom swagger section for clarity, remove example endpoints * Add missing json and yaml tags * folder perms * make folders POST again * fix grafana receiver type * rename fodler->namespace for perms * make ruler json again * PR fixes * silences * fix Ok -> Ack * Add id to POST /api/v1/silences (#9) Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in> * Add POST /api/v1/alerts (#10) Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in> * fix silences * Add testing endpoints * removes grpc replace directives * [wip] starts validation * pkg cleanup * go mod tidy * ignores vendor dir * Change response type for Cortex/Loki alerts * receiver unmarshaling tests * ability to split routes between AM & Grafana * api marshaling & validation * begins work on routing lib * [hack] ignores embedded field in generation * path specific datasource for alerting * align endpoint names with cloud * single route per Alerting config * removes unused routing pkg * regens spec * adds datasource param to ruler/prom route paths * Modifications for supporting migration * Apply suggestions from code review * hack for cleaning circular refs in swagger definition * generates files * minor fixes for prom endpoints * decorate prom apis with required: true where applicable * Revert "generates files" This reverts commit ef7e97558477d79bcad416e043b04dbd04a2c8f7. * removes server autogen * Update imported structs from ngalert * Fix listing rules response * Update github.com/prometheus/common dependency * Update get silence response * Update get silences response * adds ruler validation & backend switching * Fix GET /alertmanager/{DatasourceId}/config/api/v1/alerts response * Distinct gettable and postable grafana receivers * Remove permissions routes * Latest JSON specs * Fix testing routes * inline yaml annotation on apirulenode * yaml test & yamlv3 + comments * Fix yaml annotations for embedded type * Rename DatasourceId path parameter * Implement Backend.String() * backend zero value is a real backend * exports DiscoveryBase * Fix GO initialisms * Silences: Use PostableSilence as the base struct for creating silences * Use type alias instead of struct embedding * More fixes to alertmanager silencing routes * post and spec JSONs * Split rule config to postable/gettable * Fix empty POST /silences payload Recreating the generated JSON specs fixes the issue without further modifications * better yaml unmarshaling for nested yaml docs in cortex-am configs * regens spec * re-adds config.receivers * omitempty to align with prometheus API behavior * Prefix routes with /api * Update Alertmanager models * Make adjustments to follow the Alertmanager API * ruler: add for and annotations to grafana alert (#45) * Modify testing API routes * Fix grafana rule for field type * Move PostableUserConfig validation to this library * Fix PostableUserConfig YAML encoding/decoding * Use common fields for grafana and lotex rules * Add namespace id in GettableGrafanaRule * Apply suggestions from code review * fixup * more changes * Apply suggestions from code review * aligns structure pre merge * fix new imports & tests * updates tooling readme * goimports * lint * more linting!! * revive lint Co-authored-by: Sofia Papagiannaki <papagian@gmail.com> Co-authored-by: Domas <domasx2@gmail.com> Co-authored-by: Sofia Papagiannaki <papagian@users.noreply.github.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Co-authored-by: gotjosh <josue@grafana.com> Co-authored-by: David Parrott <stomp.box.yo@gmail.com> Co-authored-by: Kyle Brandt <kyle@grafana.com>
2021-04-19 13:26:04 -05:00
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/state"
)
type PrometheusSrv struct {
log log.Logger
manager *state.Manager
store store.RuleStore
}
func (srv PrometheusSrv) RouteGetAlertStatuses(c *models.ReqContext) response.Response {
alertResponse := apimodels.AlertResponse{
DiscoveryBase: apimodels.DiscoveryBase{
Status: "success",
},
Data: apimodels.AlertDiscovery{
Alerts: []*apimodels.Alert{},
},
}
for _, alertState := range srv.manager.GetAll(c.OrgId) {
startsAt := alertState.StartsAt
valString := ""
if len(alertState.Results) > 0 && alertState.State == eval.Alerting {
valString = alertState.Results[0].EvaluationString
}
alertResponse.Data.Alerts = append(alertResponse.Data.Alerts, &apimodels.Alert{
Labels: map[string]string(alertState.Labels),
Annotations: map[string]string{}, //TODO: Once annotations are added to the evaluation result, set them here
State: alertState.State.String(),
ActiveAt: &startsAt,
Value: valString,
})
}
return response.JSON(http.StatusOK, alertResponse)
}
func (srv PrometheusSrv) RouteGetRuleStatuses(c *models.ReqContext) response.Response {
ruleResponse := apimodels.RuleResponse{
DiscoveryBase: apimodels.DiscoveryBase{
Status: "success",
},
Data: apimodels.RuleDiscovery{
RuleGroups: []*apimodels.RuleGroup{},
},
}
namespaceMap, err := srv.store.GetNamespaces(c.Req.Context(), c.OrgId, c.SignedInUser)
if err != nil {
return ErrResp(http.StatusInternalServerError, err, "failed to get namespaces visible to the user")
}
namespaceUIDs := make([]string, len(namespaceMap))
for k := range namespaceMap {
namespaceUIDs = append(namespaceUIDs, k)
}
ruleGroupQuery := ngmodels.ListOrgRuleGroupsQuery{
OrgID: c.SignedInUser.OrgId,
NamespaceUIDs: namespaceUIDs,
}
if err := srv.store.GetOrgRuleGroups(&ruleGroupQuery); err != nil {
ruleResponse.DiscoveryBase.Status = "error"
ruleResponse.DiscoveryBase.Error = fmt.Sprintf("failure getting rule groups: %s", err.Error())
ruleResponse.DiscoveryBase.ErrorType = apiv1.ErrServer
return response.JSON(http.StatusInternalServerError, ruleResponse)
}
for _, r := range ruleGroupQuery.Result {
if len(r) < 3 {
continue
}
groupId, namespaceUID, namespace := r[0], r[1], r[2]
alertRuleQuery := ngmodels.ListRuleGroupAlertRulesQuery{OrgID: c.SignedInUser.OrgId, NamespaceUID: namespaceUID, RuleGroup: groupId}
if err := srv.store.GetRuleGroupAlertRules(&alertRuleQuery); err != nil {
ruleResponse.DiscoveryBase.Status = "error"
ruleResponse.DiscoveryBase.Error = fmt.Sprintf("failure getting rules for group %s: %s", groupId, err.Error())
ruleResponse.DiscoveryBase.ErrorType = apiv1.ErrServer
return response.JSON(http.StatusInternalServerError, ruleResponse)
}
newGroup := &apimodels.RuleGroup{
Name: groupId,
// This doesn't make sense in our architecture
Alerting: set query in rules response (#33010) * set query in rules response * Theme: tweaking dark theme colors (#33007) * Library Panels: Add library panel tab to share modal (#32953) * Explore: Scroll split panes in Explore independently (#32978) * Change default prometheus to latest and prometheus v1 to prometheus1 * Update README * Remove prometheus1 block as not used * Explore: Separatae scrolling in split view * Update snapshot * Allow skip migrations in tests via environment variable (#32958) * Dashboard: Fix issue where Slack notifications won't link to users (#32861) * DashboardPage: refactored styles from sass to emotion (#32955) * DashboardPage: refactored styles from sass to emotion * refactored dashboardPage component to be alot easier to read and understand * more refactoring... * more cleaning... * fixes frontend test * fixes frontend test- I hope * fixes frontend test- I hope * moves dashboard scss styles back to it's standalone file * GraphNG: use theme font family and size for axis labels (#33009) * GraphNG: use theme font family and size for axis labels * fix test * AlertingNG: Slack notification channel (#32675) * AlertingNG: Slack notification channel Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Add tests Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix review comments Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix review comments and small refactoring Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * GraphNG: stacking (#30749) * First iteration * Dev dash * Re-use StackingMode type * Fix ts and api issues * Stacking work resurected * Fix overrides * Correct values in tooltip and updated test dashboard * Update dev dashboard * Apply correct bands for stacking * Merge fix * Update snapshot * Revert go.sum * Handle null values correctyl and make filleBelowTo and stacking mutual exclusive * Snapshots update * Graph->Time series stacking migration * Review comments * Indicate overrides in StandardEditorContext * Change stacking UI editor, migrate stacking to object option * Small refactor, fix for hiding series and dev dashboard * VizLegend: sets a min and max value of the seriesCount control in Storybook (#33022) * Alerting: Filter rules list (#32818) * Chore: Reduces strict errors (#33012) * Chore: reduces strict error in OptionPicker tests * Chore: reduces strict errors in FormDropdownCtrl * Chore: reduces has no initializer and is not definitely assigned in the constructor errors * Chore: reduces has no initializer and is not definitely assigned in the constructor errors * Chore: lowers strict count limit * Tests: updates snapshots * Tests: updates snapshots * Chore: updates after PR comments * Refactor: removes throw and changes signature for DashboardSrv.getCurrent * [Alerting]: Several modifications in alert rules (#32983) * [Alerting]: Use common properties for all rules * Add Labels in rules * Fix update ruleGroup API Return 400 Bad Request response when the request contains a UID that does not exist * Check permissions and return namespace id * Apply suggestions from code review Co-authored-by: gotjosh <josue@grafana.com> * WIP (#33025) * Chore: Bump strict error count limit (#33035) * set query in rules response Co-authored-by: Torkel Ödegaard <torkel@grafana.org> Co-authored-by: kay delaney <45561153+kaydelaney@users.noreply.github.com> Co-authored-by: Ivana Huckova <30407135+ivanahuckova@users.noreply.github.com> Co-authored-by: Dafydd <72009875+dafydd-t@users.noreply.github.com> Co-authored-by: n-wbrown <n-wbrown@users.noreply.github.com> Co-authored-by: Uchechukwu Obasi <obasiuche62@gmail.com> Co-authored-by: Leon Sorokin <leeoniya@gmail.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Co-authored-by: Dominik Prokop <dominik.prokop@grafana.com> Co-authored-by: Nathan Rodman <nathanrodman@gmail.com> Co-authored-by: Hugo Häggmark <hugo.haggmark@grafana.com> Co-authored-by: Sofia Papagiannaki <papagian@users.noreply.github.com> Co-authored-by: gotjosh <josue@grafana.com> Co-authored-by: Marcus Efraimsson <marcus.efraimsson@gmail.com>
2021-04-15 15:23:16 -05:00
// so we use this field for passing to the frontend the namespace
File: namespace,
LastEvaluation: time.Time{},
EvaluationTime: 0, // TODO: see if we are able to pass this along with evaluation results
}
for _, rule := range alertRuleQuery.Result {
var queryStr string
encodedQuery, err := json.Marshal(rule.Data)
if err != nil {
queryStr = err.Error()
} else {
queryStr = string(encodedQuery)
}
alertingRule := apimodels.AlertingRule{
State: "inactive",
Name: rule.Title,
Query: queryStr,
Duration: rule.For.Seconds(),
Annotations: rule.Annotations,
}
newRule := apimodels.Rule{
Name: rule.Title,
Labels: rule.Labels,
Health: "ok",
Type: apiv1.RuleTypeAlerting,
LastEvaluation: time.Time{},
}
for _, alertState := range srv.manager.GetStatesForRuleUID(c.OrgId, rule.UID) {
activeAt := alertState.StartsAt
valString := ""
if len(alertState.Results) > 0 && alertState.State == eval.Alerting {
valString = alertState.Results[0].EvaluationString
}
alert := &apimodels.Alert{
Labels: map[string]string(alertState.Labels),
Annotations: alertState.Annotations,
State: alertState.State.String(),
ActiveAt: &activeAt,
Value: valString, // TODO: set this once it is added to the evaluation results
}
if alertState.LastEvaluationTime.After(newRule.LastEvaluation) {
newRule.LastEvaluation = alertState.LastEvaluationTime
newGroup.LastEvaluation = alertState.LastEvaluationTime
}
newRule.EvaluationTime = alertState.EvaluationDuration.Seconds()
switch alertState.State {
case eval.Normal:
case eval.Pending:
if alertingRule.State == "inactive" {
alertingRule.State = "pending"
}
case eval.Alerting:
alertingRule.State = "firing"
case eval.Error:
newRule.Health = "error"
case eval.NoData:
newRule.Health = "nodata"
}
if alertState.Error != nil {
newRule.LastError = alertState.Error.Error()
newRule.Health = "error"
}
alertingRule.Alerts = append(alertingRule.Alerts, alert)
}
alertingRule.Rule = newRule
newGroup.Rules = append(newGroup.Rules, alertingRule)
newGroup.Interval = float64(rule.IntervalSeconds)
}
ruleResponse.Data.RuleGroups = append(ruleResponse.Data.RuleGroups, newGroup)
}
return response.JSON(http.StatusOK, ruleResponse)
}