2021-04-05 17:05:39 -05:00
package api
import (
2021-04-27 15:15:00 -05:00
"encoding/json"
2021-10-04 10:33:55 -05:00
"errors"
2021-04-13 16:38:09 -05:00
"fmt"
2021-04-05 17:05:39 -05:00
"net/http"
2022-04-05 13:36:42 -05:00
"sort"
2021-10-04 10:33:55 -05:00
"strconv"
"strings"
2021-04-13 16:38:09 -05:00
"time"
2023-04-17 11:45:06 -05:00
"github.com/prometheus/alertmanager/pkg/labels"
2023-01-30 02:55:35 -06:00
apiv1 "github.com/prometheus/client_golang/api/prometheus/v1"
2021-04-05 17:05:39 -05:00
"github.com/grafana/grafana/pkg/api/response"
"github.com/grafana/grafana/pkg/infra/log"
2022-04-11 16:37:44 -05:00
"github.com/grafana/grafana/pkg/services/accesscontrol"
2023-01-27 01:50:36 -06:00
contextmodel "github.com/grafana/grafana/pkg/services/contexthandler/model"
2022-11-11 07:28:24 -06:00
"github.com/grafana/grafana/pkg/services/folder"
2021-04-19 13:26:04 -05:00
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
2022-03-16 11:04:19 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
2021-04-05 17:05:39 -05:00
"github.com/grafana/grafana/pkg/services/ngalert/state"
2023-04-17 11:45:06 -05:00
"github.com/grafana/grafana/pkg/util"
2021-04-05 17:05:39 -05:00
)
type PrometheusSrv struct {
2021-04-23 14:32:25 -05:00
log log . Logger
2022-03-09 12:20:29 -06:00
manager state . AlertInstanceManager
2022-09-27 08:56:30 -05:00
store RuleStore
2022-04-11 16:37:44 -05:00
ac accesscontrol . AccessControl
2021-04-05 17:05:39 -05:00
}
2022-03-16 11:04:19 -05:00
const queryIncludeInternalLabels = "includeInternalLabels"
2023-01-27 01:50:36 -06:00
func ( srv PrometheusSrv ) RouteGetAlertStatuses ( c * contextmodel . ReqContext ) response . Response {
2021-04-05 17:05:39 -05:00
alertResponse := apimodels . AlertResponse {
DiscoveryBase : apimodels . DiscoveryBase {
Status : "success" ,
} ,
Data : apimodels . AlertDiscovery {
Alerts : [ ] * apimodels . Alert { } ,
} ,
}
2022-03-16 11:04:19 -05:00
var labelOptions [ ] ngmodels . LabelOption
if ! c . QueryBoolWithDefault ( queryIncludeInternalLabels , false ) {
labelOptions = append ( labelOptions , ngmodels . WithoutInternalLabels ( ) )
}
2022-08-11 06:28:55 -05:00
for _ , alertState := range srv . manager . GetAll ( c . OrgID ) {
2021-04-05 17:05:39 -05:00
startsAt := alertState . StartsAt
2021-05-18 08:12:39 -05:00
valString := ""
2022-04-05 13:36:42 -05:00
if alertState . State == eval . Alerting || alertState . State == eval . Pending {
valString = formatValues ( alertState )
2021-05-18 08:12:39 -05:00
}
2022-03-09 12:20:29 -06:00
2021-04-05 17:05:39 -05:00
alertResponse . Data . Alerts = append ( alertResponse . Data . Alerts , & apimodels . Alert {
2022-03-16 11:04:19 -05:00
Labels : alertState . GetLabels ( labelOptions ... ) ,
2022-03-09 12:20:29 -06:00
Annotations : alertState . Annotations ,
2022-05-23 03:49:49 -05:00
// TODO: or should we make this two fields? Using one field lets the
// frontend use the same logic for parsing text on annotations and this.
2022-11-04 10:39:26 -05:00
State : state . FormatStateAndReason ( alertState . State , alertState . StateReason ) ,
2022-05-23 03:49:49 -05:00
ActiveAt : & startsAt ,
Value : valString ,
2021-04-05 17:05:39 -05:00
} )
}
2022-03-09 12:20:29 -06:00
2021-04-05 17:05:39 -05:00
return response . JSON ( http . StatusOK , alertResponse )
}
2022-04-05 13:36:42 -05:00
func formatValues ( alertState * state . State ) string {
var fv string
values := alertState . GetLastEvaluationValuesForCondition ( )
switch len ( values ) {
case 0 :
fv = alertState . LastEvaluationString
case 1 :
for _ , v := range values {
fv = strconv . FormatFloat ( v , 'e' , - 1 , 64 )
break
}
default :
vs := make ( [ ] string , 0 , len ( values ) )
for k , v := range values {
vs = append ( vs , fmt . Sprintf ( "%s: %s" , k , strconv . FormatFloat ( v , 'e' , - 1 , 64 ) ) )
}
// Ensure we have a consistent natural ordering after formatting e.g. A0, A1, A10, A11, A3, etc.
sort . Strings ( vs )
fv = strings . Join ( vs , ", " )
}
return fv
}
2021-10-04 10:33:55 -05:00
func getPanelIDFromRequest ( r * http . Request ) ( int64 , error ) {
if s := strings . TrimSpace ( r . URL . Query ( ) . Get ( "panel_id" ) ) ; s != "" {
return strconv . ParseInt ( s , 10 , 64 )
}
return 0 , nil
}
2023-04-17 11:45:06 -05:00
func getMatchersFromRequest ( r * http . Request ) ( labels . Matchers , error ) {
var matchers labels . Matchers
for _ , s := range r . URL . Query ( ) [ "matcher" ] {
var m labels . Matcher
if err := json . Unmarshal ( [ ] byte ( s ) , & m ) ; err != nil {
return nil , err
}
if len ( m . Name ) == 0 {
return nil , errors . New ( "bad matcher: the name cannot be blank" )
}
matchers = append ( matchers , & m )
}
return matchers , nil
}
func getStatesFromRequest ( r * http . Request ) ( [ ] eval . State , error ) {
var states [ ] eval . State
for _ , s := range r . URL . Query ( ) [ "state" ] {
s = strings . ToLower ( s )
switch s {
case "normal" , "inactive" :
states = append ( states , eval . Normal )
case "alerting" , "firing" :
states = append ( states , eval . Alerting )
case "pending" :
states = append ( states , eval . Pending )
case "nodata" :
states = append ( states , eval . NoData )
// nolint:goconst
case "error" :
states = append ( states , eval . Error )
default :
return states , fmt . Errorf ( "unknown state '%s'" , s )
}
}
return states , nil
}
2023-01-27 01:50:36 -06:00
func ( srv PrometheusSrv ) RouteGetRuleStatuses ( c * contextmodel . ReqContext ) response . Response {
2022-04-11 09:54:29 -05:00
dashboardUID := c . Query ( "dashboard_uid" )
panelID , err := getPanelIDFromRequest ( c . Req )
if err != nil {
return ErrResp ( http . StatusBadRequest , err , "invalid panel_id" )
}
if dashboardUID == "" && panelID != 0 {
return ErrResp ( http . StatusBadRequest , errors . New ( "panel_id must be set with dashboard_uid" ) , "" )
}
2023-04-17 11:45:06 -05:00
limitGroups := c . QueryInt64WithDefault ( "limit" , - 1 )
limitRulesPerGroup := c . QueryInt64WithDefault ( "limit_rules" , - 1 )
limitAlertsPerRule := c . QueryInt64WithDefault ( "limit_alerts" , - 1 )
matchers , err := getMatchersFromRequest ( c . Req )
if err != nil {
return ErrResp ( http . StatusBadRequest , err , "" )
}
withStates , err := getStatesFromRequest ( c . Req )
if err != nil {
return ErrResp ( http . StatusBadRequest , err , "" )
}
withStatesFast := make ( map [ eval . State ] struct { } )
for _ , state := range withStates {
withStatesFast [ state ] = struct { } { }
}
2021-04-13 16:38:09 -05:00
ruleResponse := apimodels . RuleResponse {
DiscoveryBase : apimodels . DiscoveryBase {
Status : "success" ,
} ,
2021-04-23 13:47:52 -05:00
Data : apimodels . RuleDiscovery {
2023-04-17 11:45:06 -05:00
RuleGroups : [ ] apimodels . RuleGroup { } ,
2021-04-23 13:47:52 -05:00
} ,
2021-04-13 16:38:09 -05:00
}
2022-03-16 11:04:19 -05:00
var labelOptions [ ] ngmodels . LabelOption
if ! c . QueryBoolWithDefault ( queryIncludeInternalLabels , false ) {
labelOptions = append ( labelOptions , ngmodels . WithoutInternalLabels ( ) )
}
2022-08-11 06:28:55 -05:00
namespaceMap , err := srv . store . GetUserVisibleNamespaces ( c . Req . Context ( ) , c . OrgID , c . SignedInUser )
2021-07-22 01:53:14 -05:00
if err != nil {
return ErrResp ( http . StatusInternalServerError , err , "failed to get namespaces visible to the user" )
}
2021-11-08 07:26:08 -06:00
if len ( namespaceMap ) == 0 {
2022-06-07 12:54:23 -05:00
srv . log . Debug ( "user does not have access to any namespaces" )
2021-11-08 07:26:08 -06:00
return response . JSON ( http . StatusOK , ruleResponse )
}
2021-07-22 01:53:14 -05:00
namespaceUIDs := make ( [ ] string , len ( namespaceMap ) )
for k := range namespaceMap {
namespaceUIDs = append ( namespaceUIDs , k )
}
2022-04-11 09:54:29 -05:00
alertRuleQuery := ngmodels . ListAlertRulesQuery {
2022-08-11 06:28:55 -05:00
OrgID : c . SignedInUser . OrgID ,
2021-07-22 01:53:14 -05:00
NamespaceUIDs : namespaceUIDs ,
2021-10-04 10:33:55 -05:00
DashboardUID : dashboardUID ,
PanelID : panelID ,
2021-04-13 16:38:09 -05:00
}
2023-03-28 03:34:35 -05:00
ruleList , err := srv . store . ListAlertRules ( c . Req . Context ( ) , & alertRuleQuery )
if err != nil {
2021-10-01 08:39:04 -05:00
ruleResponse . DiscoveryBase . Status = "error"
ruleResponse . DiscoveryBase . Error = fmt . Sprintf ( "failure getting rules: %s" , err . Error ( ) )
ruleResponse . DiscoveryBase . ErrorType = apiv1 . ErrServer
return response . JSON ( http . StatusInternalServerError , ruleResponse )
}
2022-04-11 16:37:44 -05:00
hasAccess := func ( evaluator accesscontrol . Evaluator ) bool {
2023-05-30 08:39:09 -05:00
return accesscontrol . HasAccess ( srv . ac , c ) ( evaluator )
2022-04-11 16:37:44 -05:00
}
2021-10-01 08:39:04 -05:00
2023-04-17 11:45:06 -05:00
// Group rules together by Namespace and Rule Group. Rules are also grouped by Org ID,
// but in this API all rules belong to the same organization.
2022-05-16 14:45:45 -05:00
groupedRules := make ( map [ ngmodels . AlertRuleGroupKey ] [ ] * ngmodels . AlertRule )
2023-03-28 03:34:35 -05:00
for _ , rule := range ruleList {
2023-04-17 11:45:06 -05:00
groupKey := rule . GetGroupKey ( )
ruleGroup := groupedRules [ groupKey ]
ruleGroup = append ( ruleGroup , rule )
groupedRules [ groupKey ] = ruleGroup
}
// Sort the rules in each rule group by index. We do this at the end instead of
// after each append to avoid having to sort each group multiple times.
for _ , groupRules := range groupedRules {
ngmodels . AlertRulesBy ( ngmodels . AlertRulesByIndex ) . Sort ( groupRules )
2022-05-16 14:45:45 -05:00
}
2023-04-17 11:45:06 -05:00
rulesTotals := make ( map [ string ] int64 , len ( groupedRules ) )
2022-05-16 14:45:45 -05:00
for groupKey , rules := range groupedRules {
folder := namespaceMap [ groupKey . NamespaceUID ]
if folder == nil {
srv . log . Warn ( "query returned rules that belong to folder the user does not have access to. All rules that belong to that namespace will not be added to the response" , "folder_uid" , groupKey . NamespaceUID )
continue
2021-10-01 08:39:04 -05:00
}
2022-06-01 09:23:54 -05:00
if ! authorizeAccessToRuleGroup ( rules , hasAccess ) {
continue
}
2023-04-17 11:45:06 -05:00
ruleGroup , totals := srv . toRuleGroup ( groupKey , folder , rules , limitAlertsPerRule , withStatesFast , matchers , labelOptions )
ruleGroup . Totals = totals
for k , v := range totals {
rulesTotals [ k ] += v
}
if len ( withStates ) > 0 {
// Filtering is weird but firing, pending, and normal filters also need to be
// applied to the rule. Others such as nodata and error should have no effect.
// This is to match the current behavior in the UI.
filteredRules := make ( [ ] apimodels . AlertingRule , 0 , len ( ruleGroup . Rules ) )
for _ , rule := range ruleGroup . Rules {
var state * eval . State
switch rule . State {
case "normal" , "inactive" :
state = util . Pointer ( eval . Normal )
case "alerting" , "firing" :
state = util . Pointer ( eval . Alerting )
case "pending" :
state = util . Pointer ( eval . Pending )
}
if state != nil {
if _ , ok := withStatesFast [ * state ] ; ok {
filteredRules = append ( filteredRules , rule )
}
}
}
ruleGroup . Rules = filteredRules
}
if limitRulesPerGroup > - 1 && int64 ( len ( ruleGroup . Rules ) ) > limitRulesPerGroup {
ruleGroup . Rules = ruleGroup . Rules [ 0 : limitRulesPerGroup ]
}
ruleResponse . Data . RuleGroups = append ( ruleResponse . Data . RuleGroups , * ruleGroup )
2022-05-16 14:45:45 -05:00
}
2023-04-17 11:45:06 -05:00
ruleResponse . Data . Totals = rulesTotals
// Sort Rule Groups before checking limits
apimodels . RuleGroupsBy ( apimodels . RuleGroupsByFileAndName ) . Sort ( ruleResponse . Data . RuleGroups )
if limitGroups > - 1 && int64 ( len ( ruleResponse . Data . RuleGroups ) ) >= limitGroups {
ruleResponse . Data . RuleGroups = ruleResponse . Data . RuleGroups [ 0 : limitGroups ]
}
2022-05-16 14:45:45 -05:00
return response . JSON ( http . StatusOK , ruleResponse )
}
2023-04-17 11:45:06 -05:00
// This is the same as matchers.Matches but avoids the need to create a LabelSet
func matchersMatch ( matchers [ ] * labels . Matcher , labels map [ string ] string ) bool {
for _ , m := range matchers {
if ! m . Matches ( labels [ m . Name ] ) {
return false
}
}
return true
}
func ( srv PrometheusSrv ) toRuleGroup ( groupKey ngmodels . AlertRuleGroupKey , folder * folder . Folder , rules [ ] * ngmodels . AlertRule , limitAlerts int64 , withStates map [ eval . State ] struct { } , matchers labels . Matchers , labelOptions [ ] ngmodels . LabelOption ) ( * apimodels . RuleGroup , map [ string ] int64 ) {
2022-05-16 14:45:45 -05:00
newGroup := & apimodels . RuleGroup {
2023-04-17 11:45:06 -05:00
Name : groupKey . RuleGroup ,
// file is what Prometheus uses for provisioning, we replace it with namespace which is the folder in Grafana.
File : folder . Title ,
2022-05-16 14:45:45 -05:00
}
2023-04-17 11:45:06 -05:00
rulesTotals := make ( map [ string ] int64 , len ( rules ) )
2022-06-22 09:52:46 -05:00
ngmodels . RulesGroup ( rules ) . SortByGroupIndex ( )
2022-05-16 14:45:45 -05:00
for _ , rule := range rules {
2021-10-01 08:39:04 -05:00
alertingRule := apimodels . AlertingRule {
State : "inactive" ,
Name : rule . Title ,
2022-03-14 05:39:20 -05:00
Query : ruleToQuery ( srv . log , rule ) ,
2021-10-01 08:39:04 -05:00
Duration : rule . For . Seconds ( ) ,
Annotations : rule . Annotations ,
}
newRule := apimodels . Rule {
Name : rule . Title ,
2022-03-16 11:04:19 -05:00
Labels : rule . GetLabels ( labelOptions ... ) ,
2021-10-01 08:39:04 -05:00
Health : "ok" ,
Type : apiv1 . RuleTypeAlerting ,
LastEvaluation : time . Time { } ,
}
2023-04-17 11:45:06 -05:00
states := srv . manager . GetStatesForRuleUID ( rule . OrgID , rule . UID )
totals := make ( map [ string ] int64 )
2023-04-21 03:35:12 -05:00
totalsFiltered := make ( map [ string ] int64 )
2023-04-17 11:45:06 -05:00
for _ , alertState := range states {
2021-10-01 08:39:04 -05:00
activeAt := alertState . StartsAt
valString := ""
2022-04-05 13:36:42 -05:00
if alertState . State == eval . Alerting || alertState . State == eval . Pending {
valString = formatValues ( alertState )
2021-04-27 15:15:00 -05:00
}
2023-04-21 03:35:12 -05:00
stateKey := strings . ToLower ( alertState . State . String ( ) )
totals [ stateKey ] += 1
2023-04-17 11:45:06 -05:00
// Do not add error twice when execution error state is Error
if alertState . Error != nil && rule . ExecErrState != ngmodels . ErrorErrState {
totals [ "error" ] += 1
}
alert := apimodels . Alert {
2022-03-16 11:04:19 -05:00
Labels : alertState . GetLabels ( labelOptions ... ) ,
2021-10-01 08:39:04 -05:00
Annotations : alertState . Annotations ,
2022-05-23 03:49:49 -05:00
// TODO: or should we make this two fields? Using one field lets the
// frontend use the same logic for parsing text on annotations and this.
2022-11-04 10:39:26 -05:00
State : state . FormatStateAndReason ( alertState . State , alertState . StateReason ) ,
2022-05-23 03:49:49 -05:00
ActiveAt : & activeAt ,
Value : valString ,
2021-04-13 16:38:09 -05:00
}
2021-10-01 08:39:04 -05:00
if alertState . LastEvaluationTime . After ( newRule . LastEvaluation ) {
newRule . LastEvaluation = alertState . LastEvaluationTime
2021-04-13 16:38:09 -05:00
}
2021-04-21 11:30:03 -05:00
2021-10-01 08:39:04 -05:00
newRule . EvaluationTime = alertState . EvaluationDuration . Seconds ( )
2021-05-04 12:08:12 -05:00
2021-10-01 08:39:04 -05:00
switch alertState . State {
case eval . Normal :
case eval . Pending :
if alertingRule . State == "inactive" {
alertingRule . State = "pending"
2021-05-04 12:08:12 -05:00
}
2021-10-01 08:39:04 -05:00
case eval . Alerting :
2023-04-17 11:45:06 -05:00
if alertingRule . ActiveAt == nil || alertingRule . ActiveAt . After ( activeAt ) {
alertingRule . ActiveAt = & activeAt
}
2021-10-01 08:39:04 -05:00
alertingRule . State = "firing"
case eval . Error :
newRule . Health = "error"
case eval . NoData :
newRule . Health = "nodata"
2021-04-13 16:38:09 -05:00
}
2021-04-21 11:30:03 -05:00
2021-10-01 08:39:04 -05:00
if alertState . Error != nil {
newRule . LastError = alertState . Error . Error ( )
newRule . Health = "error"
}
2022-03-14 05:39:20 -05:00
2023-04-17 11:45:06 -05:00
if len ( withStates ) > 0 {
if _ , ok := withStates [ alertState . State ] ; ! ok {
continue
}
}
if ! matchersMatch ( matchers , alertState . Labels ) {
continue
}
2023-04-21 03:35:12 -05:00
totalsFiltered [ stateKey ] += 1
// Do not add error twice when execution error state is Error
if alertState . Error != nil && rule . ExecErrState != ngmodels . ErrorErrState {
totalsFiltered [ "error" ] += 1
}
2021-10-01 08:39:04 -05:00
alertingRule . Alerts = append ( alertingRule . Alerts , alert )
2021-04-13 16:38:09 -05:00
}
2021-10-01 08:39:04 -05:00
2023-04-17 11:45:06 -05:00
if alertingRule . State != "" {
rulesTotals [ alertingRule . State ] += 1
}
if newRule . Health == "error" || newRule . Health == "nodata" {
rulesTotals [ newRule . Health ] += 1
}
apimodels . AlertsBy ( apimodels . AlertsByImportance ) . Sort ( alertingRule . Alerts )
if limitAlerts > - 1 && int64 ( len ( alertingRule . Alerts ) ) > limitAlerts {
alertingRule . Alerts = alertingRule . Alerts [ 0 : limitAlerts ]
}
2021-10-01 08:39:04 -05:00
alertingRule . Rule = newRule
2023-04-17 11:45:06 -05:00
alertingRule . Totals = totals
2023-04-21 03:35:12 -05:00
alertingRule . TotalsFiltered = totalsFiltered
2021-10-01 08:39:04 -05:00
newGroup . Rules = append ( newGroup . Rules , alertingRule )
newGroup . Interval = float64 ( rule . IntervalSeconds )
2022-05-16 14:45:45 -05:00
// TODO yuri. Change that when scheduler will process alerts in groups
2022-04-11 09:54:29 -05:00
newGroup . EvaluationTime = newRule . EvaluationTime
newGroup . LastEvaluation = newRule . LastEvaluation
2021-04-13 16:38:09 -05:00
}
2023-04-17 11:45:06 -05:00
return newGroup , rulesTotals
2021-04-13 16:38:09 -05:00
}
2022-03-14 05:39:20 -05:00
// ruleToQuery attempts to extract the datasource queries from the alert query model.
// Returns the whole JSON model as a string if it fails to extract a minimum of 1 query.
func ruleToQuery ( logger log . Logger , rule * ngmodels . AlertRule ) string {
var queryErr error
var queries [ ] string
for _ , q := range rule . Data {
q , err := q . GetQuery ( )
if err != nil {
// If we can't find the query simply omit it, and try the rest.
// Even single query alerts would have 2 `AlertQuery`, one for the query and one for the condition.
if errors . Is ( err , ngmodels . ErrNoQuery ) {
continue
}
// For any other type of error, it is unexpected abort and return the whole JSON.
2022-10-19 16:36:54 -05:00
logger . Debug ( "failed to parse a query" , "error" , err )
2022-03-14 05:39:20 -05:00
queryErr = err
break
}
queries = append ( queries , q )
}
// If we were able to extract at least one query without failure use it.
if queryErr == nil && len ( queries ) > 0 {
return strings . Join ( queries , " | " )
}
return encodedQueriesOrError ( rule . Data )
}
// encodedQueriesOrError tries to encode rule query data into JSON if it fails returns the encoding error as a string.
func encodedQueriesOrError ( rules [ ] ngmodels . AlertQuery ) string {
encodedQueries , err := json . Marshal ( rules )
if err == nil {
return string ( encodedQueries )
}
return err . Error ( )
}