mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
SearchV2: add tracing to background jobs (#55250)
* searchv2: add tracing to background jobs * searchv2: lint * searchv2: lint * searchv2: fix context passing * searchv2: add init org index span * searchv2: add traceid to logs * searchv2: add db count to logs
This commit is contained in:
parent
a14bd4055f
commit
03af63d52e
@ -103,7 +103,7 @@ func TestIntegrationPluginManager_Run(t *testing.T) {
|
|||||||
pg := postgres.ProvideService(cfg)
|
pg := postgres.ProvideService(cfg)
|
||||||
my := mysql.ProvideService(cfg, hcp)
|
my := mysql.ProvideService(cfg, hcp)
|
||||||
ms := mssql.ProvideService(cfg)
|
ms := mssql.ProvideService(cfg)
|
||||||
sv2 := searchV2.ProvideService(cfg, sqlstore.InitTestDB(t), nil, nil, nil)
|
sv2 := searchV2.ProvideService(cfg, sqlstore.InitTestDB(t), nil, nil, tracing.InitializeTracerForTest(), featuremgmt.WithFeatures(), nil)
|
||||||
graf := grafanads.ProvideService(cfg, sv2, nil)
|
graf := grafanads.ProvideService(cfg, sv2, nil)
|
||||||
|
|
||||||
coreRegistry := coreplugin.ProvideCoreRegistry(am, cw, cm, es, grap, idb, lk, otsdb, pr, tmpo, td, pg, my, ms, graf)
|
coreRegistry := coreplugin.ProvideCoreRegistry(am, cw, cm, es, grap, idb, lk, otsdb, pr, tmpo, td, pg, my, ms, graf)
|
||||||
|
@ -9,10 +9,12 @@ import (
|
|||||||
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
||||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||||
"github.com/grafana/grafana-plugin-sdk-go/experimental"
|
"github.com/grafana/grafana-plugin-sdk-go/experimental"
|
||||||
|
"github.com/grafana/grafana/pkg/infra/tracing"
|
||||||
ac "github.com/grafana/grafana/pkg/services/accesscontrol"
|
ac "github.com/grafana/grafana/pkg/services/accesscontrol"
|
||||||
accesscontrolmock "github.com/grafana/grafana/pkg/services/accesscontrol/mock"
|
accesscontrolmock "github.com/grafana/grafana/pkg/services/accesscontrol/mock"
|
||||||
"github.com/grafana/grafana/pkg/services/dashboards"
|
"github.com/grafana/grafana/pkg/services/dashboards"
|
||||||
"github.com/grafana/grafana/pkg/services/datasources"
|
"github.com/grafana/grafana/pkg/services/datasources"
|
||||||
|
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
||||||
"github.com/grafana/grafana/pkg/services/user"
|
"github.com/grafana/grafana/pkg/services/user"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
@ -80,7 +82,7 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func service(t *testing.T) *StandardSearchService {
|
func service(t *testing.T) *StandardSearchService {
|
||||||
service, ok := ProvideService(nil, nil, nil, accesscontrolmock.New(), nil).(*StandardSearchService)
|
service, ok := ProvideService(nil, nil, nil, accesscontrolmock.New(), tracing.InitializeTracerForTest(), featuremgmt.WithFeatures(), nil).(*StandardSearchService)
|
||||||
require.True(t, ok)
|
require.True(t, ok)
|
||||||
return service
|
return service
|
||||||
}
|
}
|
||||||
|
@ -14,10 +14,13 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/grafana/grafana/pkg/infra/log"
|
"github.com/grafana/grafana/pkg/infra/log"
|
||||||
|
"github.com/grafana/grafana/pkg/infra/tracing"
|
||||||
|
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
||||||
"github.com/grafana/grafana/pkg/services/searchV2/dslookup"
|
"github.com/grafana/grafana/pkg/services/searchV2/dslookup"
|
||||||
"github.com/grafana/grafana/pkg/services/searchV2/extract"
|
"github.com/grafana/grafana/pkg/services/searchV2/extract"
|
||||||
"github.com/grafana/grafana/pkg/services/sqlstore"
|
"github.com/grafana/grafana/pkg/services/sqlstore"
|
||||||
"github.com/grafana/grafana/pkg/services/store"
|
"github.com/grafana/grafana/pkg/services/store"
|
||||||
|
"go.opentelemetry.io/otel/attribute"
|
||||||
|
|
||||||
"github.com/blugelabs/bluge"
|
"github.com/blugelabs/bluge"
|
||||||
)
|
)
|
||||||
@ -93,9 +96,11 @@ type searchIndex struct {
|
|||||||
extender DocumentExtender
|
extender DocumentExtender
|
||||||
folderIdLookup folderUIDLookup
|
folderIdLookup folderUIDLookup
|
||||||
syncCh chan chan struct{}
|
syncCh chan chan struct{}
|
||||||
|
tracer tracing.Tracer
|
||||||
|
features featuremgmt.FeatureToggles
|
||||||
}
|
}
|
||||||
|
|
||||||
func newSearchIndex(dashLoader dashboardLoader, evStore eventStore, extender DocumentExtender, folderIDs folderUIDLookup) *searchIndex {
|
func newSearchIndex(dashLoader dashboardLoader, evStore eventStore, extender DocumentExtender, folderIDs folderUIDLookup, tracer tracing.Tracer, features featuremgmt.FeatureToggles) *searchIndex {
|
||||||
return &searchIndex{
|
return &searchIndex{
|
||||||
loader: dashLoader,
|
loader: dashLoader,
|
||||||
eventStore: evStore,
|
eventStore: evStore,
|
||||||
@ -106,6 +111,8 @@ func newSearchIndex(dashLoader dashboardLoader, evStore eventStore, extender Doc
|
|||||||
extender: extender,
|
extender: extender,
|
||||||
folderIdLookup: folderIDs,
|
folderIdLookup: folderIDs,
|
||||||
syncCh: make(chan chan struct{}),
|
syncCh: make(chan chan struct{}),
|
||||||
|
tracer: tracer,
|
||||||
|
features: features,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -209,17 +216,22 @@ func (i *searchIndex) run(ctx context.Context, orgIDs []int64, reIndexSignalCh c
|
|||||||
close(doneCh)
|
close(doneCh)
|
||||||
case <-partialUpdateTimer.C:
|
case <-partialUpdateTimer.C:
|
||||||
// Periodically apply updates collected in entity events table.
|
// Periodically apply updates collected in entity events table.
|
||||||
lastEventID = i.applyIndexUpdates(ctx, lastEventID)
|
partialIndexUpdateCtx, span := i.tracer.Start(ctx, "searchV2 partial update timer")
|
||||||
|
lastEventID = i.applyIndexUpdates(partialIndexUpdateCtx, lastEventID)
|
||||||
|
span.End()
|
||||||
partialUpdateTimer.Reset(partialUpdateInterval)
|
partialUpdateTimer.Reset(partialUpdateInterval)
|
||||||
case <-reIndexSignalCh:
|
case <-reIndexSignalCh:
|
||||||
// External systems may trigger re-indexing, at this moment provisioning does this.
|
// External systems may trigger re-indexing, at this moment provisioning does this.
|
||||||
i.logger.Info("Full re-indexing due to external signal")
|
i.logger.Info("Full re-indexing due to external signal")
|
||||||
fullReIndexTimer.Reset(0)
|
fullReIndexTimer.Reset(0)
|
||||||
case signal := <-i.buildSignals:
|
case signal := <-i.buildSignals:
|
||||||
|
buildSignalCtx, span := i.tracer.Start(ctx, "searchV2 build signal")
|
||||||
|
|
||||||
// When search read request meets new not-indexed org we build index for it.
|
// When search read request meets new not-indexed org we build index for it.
|
||||||
i.mu.RLock()
|
i.mu.RLock()
|
||||||
_, ok := i.perOrgIndex[signal.orgID]
|
_, ok := i.perOrgIndex[signal.orgID]
|
||||||
if ok {
|
if ok {
|
||||||
|
span.End()
|
||||||
// Index for org already exists, do nothing.
|
// Index for org already exists, do nothing.
|
||||||
i.mu.RUnlock()
|
i.mu.RUnlock()
|
||||||
close(signal.done)
|
close(signal.done)
|
||||||
@ -232,14 +244,17 @@ func (i *searchIndex) run(ctx context.Context, orgIDs []int64, reIndexSignalCh c
|
|||||||
// branch.
|
// branch.
|
||||||
fullReIndexTimer.Stop()
|
fullReIndexTimer.Stop()
|
||||||
go func() {
|
go func() {
|
||||||
|
defer span.End()
|
||||||
// We need semaphore here since asynchronous re-indexing may be in progress already.
|
// We need semaphore here since asynchronous re-indexing may be in progress already.
|
||||||
asyncReIndexSemaphore <- struct{}{}
|
asyncReIndexSemaphore <- struct{}{}
|
||||||
defer func() { <-asyncReIndexSemaphore }()
|
defer func() { <-asyncReIndexSemaphore }()
|
||||||
_, err = i.buildOrgIndex(ctx, signal.orgID)
|
_, err = i.buildOrgIndex(buildSignalCtx, signal.orgID)
|
||||||
signal.done <- err
|
signal.done <- err
|
||||||
reIndexDoneCh <- lastIndexedEventID
|
reIndexDoneCh <- lastIndexedEventID
|
||||||
}()
|
}()
|
||||||
case <-fullReIndexTimer.C:
|
case <-fullReIndexTimer.C:
|
||||||
|
fullReindexCtx, span := i.tracer.Start(ctx, "searchV2 full reindex timer")
|
||||||
|
|
||||||
// Periodically rebuild indexes since we could miss updates. At this moment we are issuing
|
// Periodically rebuild indexes since we could miss updates. At this moment we are issuing
|
||||||
// entity events non-atomically (outside of transaction) and do not cover all possible dashboard
|
// entity events non-atomically (outside of transaction) and do not cover all possible dashboard
|
||||||
// change places, so periodic re-indexing fixes possibly broken state. But ideally we should
|
// change places, so periodic re-indexing fixes possibly broken state. But ideally we should
|
||||||
@ -247,6 +262,7 @@ func (i *searchIndex) run(ctx context.Context, orgIDs []int64, reIndexSignalCh c
|
|||||||
// is to use DB triggers, see https://github.com/grafana/grafana/pull/47712.
|
// is to use DB triggers, see https://github.com/grafana/grafana/pull/47712.
|
||||||
lastIndexedEventID := lastEventID
|
lastIndexedEventID := lastEventID
|
||||||
go func() {
|
go func() {
|
||||||
|
defer span.End()
|
||||||
// Do full re-index asynchronously to avoid blocking index synchronization
|
// Do full re-index asynchronously to avoid blocking index synchronization
|
||||||
// on read for a long time.
|
// on read for a long time.
|
||||||
|
|
||||||
@ -255,9 +271,9 @@ func (i *searchIndex) run(ctx context.Context, orgIDs []int64, reIndexSignalCh c
|
|||||||
defer func() { <-asyncReIndexSemaphore }()
|
defer func() { <-asyncReIndexSemaphore }()
|
||||||
|
|
||||||
started := time.Now()
|
started := time.Now()
|
||||||
i.logger.Info("Start re-indexing")
|
i.logger.Info("Start re-indexing", i.withCtxData(fullReindexCtx)...)
|
||||||
i.reIndexFromScratch(ctx)
|
i.reIndexFromScratch(fullReindexCtx)
|
||||||
i.logger.Info("Full re-indexing finished", "fullReIndexElapsed", time.Since(started))
|
i.logger.Info("Full re-indexing finished", i.withCtxData(fullReindexCtx, "fullReIndexElapsed", time.Since(started))...)
|
||||||
reIndexDoneCh <- lastIndexedEventID
|
reIndexDoneCh <- lastIndexedEventID
|
||||||
}()
|
}()
|
||||||
case lastIndexedEventID := <-reIndexDoneCh:
|
case lastIndexedEventID := <-reIndexDoneCh:
|
||||||
@ -439,6 +455,8 @@ func (i *searchIndex) reportSizeOfIndexDiskBackup(orgID int64) {
|
|||||||
func (i *searchIndex) buildOrgIndex(ctx context.Context, orgID int64) (int, error) {
|
func (i *searchIndex) buildOrgIndex(ctx context.Context, orgID int64) (int, error) {
|
||||||
started := time.Now()
|
started := time.Now()
|
||||||
ctx, cancel := context.WithTimeout(ctx, time.Minute)
|
ctx, cancel := context.WithTimeout(ctx, time.Minute)
|
||||||
|
ctx = log.InitCounter(ctx)
|
||||||
|
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
i.logger.Info("Start building org index", "orgId", orgID)
|
i.logger.Info("Start building org index", "orgId", orgID)
|
||||||
@ -450,7 +468,14 @@ func (i *searchIndex) buildOrgIndex(ctx context.Context, orgID int64) (int, erro
|
|||||||
i.logger.Info("Finish loading org dashboards", "elapsed", orgSearchIndexLoadTime, "orgId", orgID)
|
i.logger.Info("Finish loading org dashboards", "elapsed", orgSearchIndexLoadTime, "orgId", orgID)
|
||||||
|
|
||||||
dashboardExtender := i.extender.GetDashboardExtender(orgID)
|
dashboardExtender := i.extender.GetDashboardExtender(orgID)
|
||||||
|
|
||||||
|
_, initOrgIndexSpan := i.tracer.Start(ctx, "searchV2 init org index")
|
||||||
|
initOrgIndexSpan.SetAttributes("org_id", orgID, attribute.Key("org_id").Int64(orgID))
|
||||||
|
|
||||||
index, err := initOrgIndex(dashboards, i.logger, dashboardExtender)
|
index, err := initOrgIndex(dashboards, i.logger, dashboardExtender)
|
||||||
|
|
||||||
|
initOrgIndexSpan.End()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("error initializing index: %w", err)
|
return 0, fmt.Errorf("error initializing index: %w", err)
|
||||||
}
|
}
|
||||||
@ -458,11 +483,11 @@ func (i *searchIndex) buildOrgIndex(ctx context.Context, orgID int64) (int, erro
|
|||||||
orgSearchIndexBuildTime := orgSearchIndexTotalTime - orgSearchIndexLoadTime
|
orgSearchIndexBuildTime := orgSearchIndexTotalTime - orgSearchIndexLoadTime
|
||||||
|
|
||||||
i.logger.Info("Re-indexed dashboards for organization",
|
i.logger.Info("Re-indexed dashboards for organization",
|
||||||
"orgId", orgID,
|
i.withCtxData(ctx, "orgId", orgID,
|
||||||
"orgSearchIndexLoadTime", orgSearchIndexLoadTime,
|
"orgSearchIndexLoadTime", orgSearchIndexLoadTime,
|
||||||
"orgSearchIndexBuildTime", orgSearchIndexBuildTime,
|
"orgSearchIndexBuildTime", orgSearchIndexBuildTime,
|
||||||
"orgSearchIndexTotalTime", orgSearchIndexTotalTime,
|
"orgSearchIndexTotalTime", orgSearchIndexTotalTime,
|
||||||
"orgSearchDashboardCount", len(dashboards))
|
"orgSearchDashboardCount", len(dashboards))...)
|
||||||
|
|
||||||
i.mu.Lock()
|
i.mu.Lock()
|
||||||
if oldIndex, ok := i.perOrgIndex[orgID]; ok {
|
if oldIndex, ok := i.perOrgIndex[orgID]; ok {
|
||||||
@ -539,8 +564,22 @@ func (i *searchIndex) reIndexFromScratch(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *searchIndex) withCtxData(ctx context.Context, params ...interface{}) []interface{} {
|
||||||
|
traceID := tracing.TraceIDFromContext(ctx, false)
|
||||||
|
if traceID != "" {
|
||||||
|
params = append(params, "traceID", traceID)
|
||||||
|
}
|
||||||
|
|
||||||
|
if i.features.IsEnabled(featuremgmt.FlagDatabaseMetrics) {
|
||||||
|
params = append(params, "db_call_count", log.TotalDBCallCount(ctx))
|
||||||
|
}
|
||||||
|
|
||||||
|
return params
|
||||||
|
}
|
||||||
|
|
||||||
func (i *searchIndex) applyIndexUpdates(ctx context.Context, lastEventID int64) int64 {
|
func (i *searchIndex) applyIndexUpdates(ctx context.Context, lastEventID int64) int64 {
|
||||||
events, err := i.eventStore.GetAllEventsAfter(context.Background(), lastEventID)
|
ctx = log.InitCounter(ctx)
|
||||||
|
events, err := i.eventStore.GetAllEventsAfter(ctx, lastEventID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
i.logger.Error("can't load events", "error", err)
|
i.logger.Error("can't load events", "error", err)
|
||||||
return lastEventID
|
return lastEventID
|
||||||
@ -557,7 +596,7 @@ func (i *searchIndex) applyIndexUpdates(ctx context.Context, lastEventID int64)
|
|||||||
}
|
}
|
||||||
lastEventID = e.Id
|
lastEventID = e.Id
|
||||||
}
|
}
|
||||||
i.logger.Info("Index updates applied", "indexEventsAppliedElapsed", time.Since(started), "numEvents", len(events))
|
i.logger.Info("Index updates applied", i.withCtxData(ctx, "indexEventsAppliedElapsed", time.Since(started), "numEvents", len(events))...)
|
||||||
return lastEventID
|
return lastEventID
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,6 +8,8 @@ import (
|
|||||||
|
|
||||||
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
||||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||||
|
"github.com/grafana/grafana/pkg/infra/tracing"
|
||||||
|
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
||||||
|
|
||||||
"github.com/grafana/grafana/pkg/infra/log"
|
"github.com/grafana/grafana/pkg/infra/log"
|
||||||
"github.com/grafana/grafana/pkg/services/searchV2/extract"
|
"github.com/grafana/grafana/pkg/services/searchV2/extract"
|
||||||
@ -60,11 +62,7 @@ func initTestIndexFromDashesExtended(t *testing.T, dashboards []dashboard, exten
|
|||||||
dashboardLoader := &testDashboardLoader{
|
dashboardLoader := &testDashboardLoader{
|
||||||
dashboards: dashboards,
|
dashboards: dashboards,
|
||||||
}
|
}
|
||||||
index := newSearchIndex(
|
index := newSearchIndex(dashboardLoader, &store.MockEntityEventsService{}, extender, func(ctx context.Context, folderId int64) (string, error) { return "x", nil }, tracing.InitializeTracerForTest(), featuremgmt.WithFeatures())
|
||||||
dashboardLoader,
|
|
||||||
&store.MockEntityEventsService{},
|
|
||||||
extender,
|
|
||||||
func(ctx context.Context, folderId int64) (string, error) { return "x", nil })
|
|
||||||
require.NotNil(t, index)
|
require.NotNil(t, index)
|
||||||
numDashboards, err := index.buildOrgIndex(context.Background(), testOrgID)
|
numDashboards, err := index.buildOrgIndex(context.Background(), testOrgID)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
@ -7,6 +7,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/grafana/grafana/pkg/infra/log"
|
"github.com/grafana/grafana/pkg/infra/log"
|
||||||
|
"github.com/grafana/grafana/pkg/infra/tracing"
|
||||||
"github.com/grafana/grafana/pkg/models"
|
"github.com/grafana/grafana/pkg/models"
|
||||||
"github.com/grafana/grafana/pkg/registry"
|
"github.com/grafana/grafana/pkg/registry"
|
||||||
"github.com/grafana/grafana/pkg/services/accesscontrol"
|
"github.com/grafana/grafana/pkg/services/accesscontrol"
|
||||||
@ -69,8 +70,7 @@ func (s *StandardSearchService) IsReady(ctx context.Context, orgId int64) IsSear
|
|||||||
return s.dashboardIndex.isInitialized(ctx, orgId)
|
return s.dashboardIndex.isInitialized(ctx, orgId)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ProvideService(cfg *setting.Cfg, sql *sqlstore.SQLStore, entityEventStore store.EntityEventsService,
|
func ProvideService(cfg *setting.Cfg, sql *sqlstore.SQLStore, entityEventStore store.EntityEventsService, ac accesscontrol.Service, tracer tracing.Tracer, features featuremgmt.FeatureToggles, orgService org.Service) SearchService {
|
||||||
ac accesscontrol.Service, orgService org.Service) SearchService {
|
|
||||||
extender := &NoopExtender{}
|
extender := &NoopExtender{}
|
||||||
s := &StandardSearchService{
|
s := &StandardSearchService{
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
@ -85,6 +85,8 @@ func ProvideService(cfg *setting.Cfg, sql *sqlstore.SQLStore, entityEventStore s
|
|||||||
entityEventStore,
|
entityEventStore,
|
||||||
extender.GetDocumentExtender(),
|
extender.GetDocumentExtender(),
|
||||||
newFolderIDLookup(sql),
|
newFolderIDLookup(sql),
|
||||||
|
tracer,
|
||||||
|
features,
|
||||||
),
|
),
|
||||||
logger: log.New("searchV2"),
|
logger: log.New("searchV2"),
|
||||||
extender: extender,
|
extender: extender,
|
||||||
|
Loading…
Reference in New Issue
Block a user