Storage Api: Add metrics (#85316)

* Storage server runs own instrumentation server if its the sole target. Starts adding some sample metrics for now.

* adds metric for failed optimistic locks

* refactors metrics registration to own method on service for testability. Adds tests.

* Register sql storage server metrics from within the service

* fixes test

* troubleshooting drone test failures. Maybe timing when starting instrumentation server?

* Waits until instrumentation server has started. Updates tests.

* defer wont get called unless theres an error. removing.

* wait for instrumentation server to be running

* linter - close res body

* use port 3000 for metrics and removes test metric inc() call

* fixes test - updates port

* refactors module server to provide an instrumentation server module when there is no ALL or CORE target provided and running as single target

* make instrumentation server a dependency of all modules that do not run their own http server

* adds module server test

* adds tests for instrumentation service and removes old tests that aren't needed

* ignore error in test

* uses helper to start and run service

* when running wait on ctx done or http server err

* wait for http server

* removes println

* updates module server test to be integration test

* require no error in goroutine

* skips integration test when GRAFANA_TEST_DB not defined

* move http server start into start, verify returned content

* make test error when run fails

* try waiting longer and see if drone tests pass

* update integration test mysql creds to match drone

* go back to only waiting half second

* debug log drone mysql connection string

* use same db connection config as drone

* try using same hostname as drone

* cant use localhost as mysql hostname in drone tests. Need to parse it from the cfg db connection string

---------

Co-authored-by: Dan Cech <dcech@grafana.com>
This commit is contained in:
owensmallwood
2024-04-08 08:35:01 -06:00
committed by GitHub
parent 3d9a0c8398
commit 37d39de36d
8 changed files with 277 additions and 10 deletions

View File

@@ -4,8 +4,7 @@ import (
"context"
"github.com/grafana/dskit/services"
"github.com/prometheus/client_golang/prometheus"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/modules"
"github.com/grafana/grafana/pkg/registry"
@@ -17,6 +16,7 @@ import (
"github.com/grafana/grafana/pkg/services/store/entity/grpc"
"github.com/grafana/grafana/pkg/services/store/entity/sqlstash"
"github.com/grafana/grafana/pkg/setting"
"github.com/prometheus/client_golang/prometheus"
)
var (
@@ -51,11 +51,14 @@ type service struct {
tracing *tracing.TracingService
authenticator interceptors.Authenticator
log log.Logger
}
func ProvideService(
cfg *setting.Cfg,
features featuremgmt.FeatureToggles,
log log.Logger,
) (*service, error) {
tracingCfg, err := tracing.ProvideTracingConfig(cfg)
if err != nil {
@@ -76,6 +79,7 @@ func ProvideService(
stopCh: make(chan struct{}),
authenticator: authn,
tracing: tracing,
log: log,
}
// This will be used when running as a dskit service

View File

@@ -0,0 +1,41 @@
package sqlstash
import (
"sync"
"github.com/prometheus/client_golang/prometheus"
)
var (
once sync.Once
StorageServerMetrics *StorageApiMetrics
)
type StorageApiMetrics struct {
OptimisticLockFailed *prometheus.CounterVec
}
func NewStorageMetrics() *StorageApiMetrics {
once.Do(func() {
StorageServerMetrics = &StorageApiMetrics{
OptimisticLockFailed: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "storage_server",
Name: "optimistic_lock_failed",
Help: "count of optimistic locks failed",
},
[]string{"action"},
),
}
})
return StorageServerMetrics
}
func (s *StorageApiMetrics) Collect(ch chan<- prometheus.Metric) {
s.OptimisticLockFailed.Collect(ch)
}
func (s *StorageApiMetrics) Describe(ch chan<- *prometheus.Desc) {
s.OptimisticLockFailed.Describe(ch)
}

View File

@@ -14,7 +14,6 @@ import (
"github.com/bwmarrin/snowflake"
"github.com/google/uuid"
folder "github.com/grafana/grafana/pkg/apis/folder/v0alpha1"
"github.com/grafana/grafana/pkg/infra/appcontext"
"github.com/grafana/grafana/pkg/infra/log"
@@ -23,6 +22,7 @@ import (
"github.com/grafana/grafana/pkg/services/store"
"github.com/grafana/grafana/pkg/services/store/entity"
"github.com/grafana/grafana/pkg/services/store/entity/db"
"github.com/prometheus/client_golang/prometheus"
)
const entityTable = "entity"
@@ -38,6 +38,10 @@ func ProvideSQLEntityServer(db db.EntityDBInterface /*, cfg *setting.Cfg */) (en
ctx: context.Background(),
}
if err := prometheus.Register(NewStorageMetrics()); err != nil {
entityServer.log.Warn("error registering storage server metrics", "error", err)
}
return entityServer, nil
}
@@ -514,6 +518,7 @@ func (s *sqlEntityServer) Update(ctx context.Context, r *entity.UpdateEntityRequ
// Optimistic locking
if r.PreviousVersion > 0 && r.PreviousVersion != current.ResourceVersion {
StorageServerMetrics.OptimisticLockFailed.WithLabelValues("update").Inc()
return fmt.Errorf("optimistic lock failed")
}
@@ -759,6 +764,7 @@ func (s *sqlEntityServer) Delete(ctx context.Context, r *entity.DeleteEntityRequ
if r.PreviousVersion > 0 && r.PreviousVersion != rsp.Entity.ResourceVersion {
rsp.Status = entity.DeleteEntityResponse_ERROR
StorageServerMetrics.OptimisticLockFailed.WithLabelValues("delete").Inc()
return fmt.Errorf("optimistic lock failed")
}