Search: Fix indexing - re-index after initial provisioning (#50959)

2025-02-25 18:55:37 -06:00 · 2022-06-27 18:11:08 +03:00
parent ee3f4f1709
commit 120c92b950
7 changed files with 58 additions and 4 deletions
--- a/pkg/services/searchV2/index.go
+++ b/pkg/services/searchV2/index.go
@@ -101,7 +101,7 @@ func (i *dashboardIndex) sync(ctx context.Context) error {
 	}
 }

-func (i *dashboardIndex) run(ctx context.Context, orgIDs []int64) error {
+func (i *dashboardIndex) run(ctx context.Context, orgIDs []int64, reIndexSignalCh chan struct{}) error {
 	reIndexInterval := 5 * time.Minute
 	fullReIndexTimer := time.NewTimer(reIndexInterval)
 	defer fullReIndexTimer.Stop()
@@ -124,18 +124,28 @@ func (i *dashboardIndex) run(ctx context.Context, orgIDs []int64) error {
 		return err
 	}

+	// This semaphore channel allows limiting concurrent async re-indexing routines to 1.
+	asyncReIndexSemaphore := make(chan struct{}, 1)
+
 	// Channel to handle signals about asynchronous full re-indexing completion.
 	reIndexDoneCh := make(chan int64, 1)

 	for {
 		select {
 		case doneCh := <-i.syncCh:
+			// Executed on search read requests to make sure index is consistent.
 			lastEventID = i.applyIndexUpdates(ctx, lastEventID)
 			close(doneCh)
 		case <-partialUpdateTimer.C:
+			// Periodically apply updates collected in entity events table.
 			lastEventID = i.applyIndexUpdates(ctx, lastEventID)
 			partialUpdateTimer.Reset(partialUpdateInterval)
+		case <-reIndexSignalCh:
+			// External systems may trigger re-indexing, at this moment provisioning does this.
+			i.logger.Info("Full re-indexing due to external signal")
+			fullReIndexTimer.Reset(0)
 		case signal := <-i.buildSignals:
+			// When search read request meets new not-indexed org we build index for it.
 			i.mu.RLock()
 			_, ok := i.perOrgWriter[signal.orgID]
 			if ok {
@@ -151,15 +161,28 @@ func (i *dashboardIndex) run(ctx context.Context, orgIDs []int64) error {
 			// branch.
 			fullReIndexTimer.Stop()
 			go func() {
+				// We need semaphore here since asynchronous re-indexing may be in progress already.
+				asyncReIndexSemaphore <- struct{}{}
+				defer func() { <-asyncReIndexSemaphore }()
 				_, err = i.buildOrgIndex(ctx, signal.orgID)
 				signal.done <- err
 				reIndexDoneCh <- lastIndexedEventID
 			}()
 		case <-fullReIndexTimer.C:
+			// Periodically rebuild indexes since we could miss updates. At this moment we are issuing
+			// entity events non-atomically (outside of transaction) and do not cover all possible dashboard
+			// change places, so periodic re-indexing fixes possibly broken state. But ideally we should
+			// come to an approach which does not require periodic re-indexing at all. One possible way
+			// is to use DB triggers, see https://github.com/grafana/grafana/pull/47712.
 			lastIndexedEventID := lastEventID
 			go func() {
 				// Do full re-index asynchronously to avoid blocking index synchronization
 				// on read for a long time.
+
+				// We need semaphore here since re-indexing due to build signal may be in progress already.
+				asyncReIndexSemaphore <- struct{}{}
+				defer func() { <-asyncReIndexSemaphore }()
+
 				started := time.Now()
 				i.logger.Info("Start re-indexing")
 				i.reIndexFromScratch(ctx)