Storage: Add command line tool to migrate legacy dashboards (and folders) to unified storage (#99199)

This commit is contained in:
Ryan McKinley 2025-02-11 20:57:46 +03:00 committed by GitHub
parent b6ea06f259
commit a5355fd66c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
36 changed files with 3569 additions and 614 deletions

5
go.mod
View File

@ -241,6 +241,7 @@ require (
github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2 // indirect
github.com/FZambia/eagle v0.1.0 // indirect
github.com/HdrHistogram/hdrhistogram-go v1.1.2 // indirect
github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/squirrel v1.5.4 // indirect
github.com/NYTimes/gziphandler v1.1.1 // indirect
@ -250,6 +251,7 @@ require (
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b // indirect
github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a // indirect
github.com/antlr4-go/antlr/v4 v4.13.1 // indirect
github.com/apache/thrift v0.21.0 // indirect
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect
github.com/armon/go-metrics v0.4.1 // indirect
@ -397,6 +399,7 @@ require (
github.com/jpillora/backoff v1.0.0 // indirect
github.com/jszwedko/go-datemath v0.1.1-0.20230526204004-640a500621d6 // indirect
github.com/jtolds/gls v4.20.0+incompatible // indirect
github.com/klauspost/asmfmt v1.3.2 // indirect
github.com/klauspost/compress v1.17.11 // indirect
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
@ -416,6 +419,8 @@ require (
github.com/mdlayher/vsock v1.2.1 // indirect
github.com/mfridman/interpolate v0.0.2 // indirect
github.com/miekg/dns v1.1.62 // indirect
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/go-wordwrap v1.0.1 // indirect

1
go.sum
View File

@ -708,6 +708,7 @@ github.com/FZambia/eagle v0.1.0 h1:9gyX6x+xjoIfglgyPTcYm7dvY7FJ93us1QY5De4CyXA=
github.com/FZambia/eagle v0.1.0/go.mod h1:YjGSPVkQTNcVLfzEUQJNgW9ScPR0K4u/Ky0yeFa4oDA=
github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM=
github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo=
github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU=
github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=

View File

@ -144,6 +144,11 @@ var adminCommands = []*cli.Command{
Usage: "Migrates passwords from unsecured fields to secure_json_data field. Return ok unless there is an error. Safe to execute multiple times.",
Action: runDbCommand(datamigrations.EncryptDatasourcePasswords),
},
{
Name: "to-unified-storage",
Usage: "Migrates classic SQL data into unified storage",
Action: runDbCommand(datamigrations.ToUnifiedStorage),
},
},
},
{

View File

@ -0,0 +1,70 @@
package datamigrations
import (
"context"
"path/filepath"
"github.com/grafana/grafana/pkg/services/provisioning"
"github.com/grafana/grafana/pkg/services/provisioning/dashboards"
)
var (
_ provisioning.ProvisioningService = (*stubProvisioning)(nil)
)
func newStubProvisioning(path string) (provisioning.ProvisioningService, error) {
cfgs, err := dashboards.ReadDashboardConfig(filepath.Join(path, "dashboards"))
if err != nil {
return nil, err
}
stub := &stubProvisioning{
path: make(map[string]string),
}
for _, cfg := range cfgs {
stub.path[cfg.Name] = cfg.Options["path"].(string)
}
return &stubProvisioning{}, nil
}
type stubProvisioning struct {
path map[string]string // name > options.path
}
// GetAllowUIUpdatesFromConfig implements provisioning.ProvisioningService.
func (s *stubProvisioning) GetAllowUIUpdatesFromConfig(name string) bool {
return false
}
func (s *stubProvisioning) GetDashboardProvisionerResolvedPath(name string) string {
return s.path[name]
}
// ProvisionAlerting implements provisioning.ProvisioningService.
func (s *stubProvisioning) ProvisionAlerting(ctx context.Context) error {
panic("unimplemented")
}
// ProvisionDashboards implements provisioning.ProvisioningService.
func (s *stubProvisioning) ProvisionDashboards(ctx context.Context) error {
panic("unimplemented")
}
// ProvisionDatasources implements provisioning.ProvisioningService.
func (s *stubProvisioning) ProvisionDatasources(ctx context.Context) error {
panic("unimplemented")
}
// ProvisionPlugins implements provisioning.ProvisioningService.
func (s *stubProvisioning) ProvisionPlugins(ctx context.Context) error {
panic("unimplemented")
}
// Run implements provisioning.ProvisioningService.
func (s *stubProvisioning) Run(ctx context.Context) error {
panic("unimplemented")
}
// RunInitProvisioners implements provisioning.ProvisioningService.
func (s *stubProvisioning) RunInitProvisioners(ctx context.Context) error {
panic("unimplemented")
}

View File

@ -0,0 +1,207 @@
package datamigrations
import (
"context"
"encoding/json"
"fmt"
"os"
"strings"
"time"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/apimachinery/pkg/runtime/schema"
dashboard "github.com/grafana/grafana/pkg/apis/dashboard"
folders "github.com/grafana/grafana/pkg/apis/folder/v0alpha1"
authlib "github.com/grafana/authlib/types"
"github.com/grafana/grafana/pkg/apimachinery/identity"
"github.com/grafana/grafana/pkg/cmd/grafana-cli/utils"
"github.com/grafana/grafana/pkg/infra/db"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/registry/apis/dashboard/legacy"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/storage/legacysql"
"github.com/grafana/grafana/pkg/storage/unified"
"github.com/grafana/grafana/pkg/storage/unified/parquet"
"github.com/grafana/grafana/pkg/storage/unified/resource"
)
// ToUnifiedStorage converts dashboards+folders into unified storage
func ToUnifiedStorage(c utils.CommandLine, cfg *setting.Cfg, sqlStore db.DB) error {
namespace := "default" // TODO... from command line
ns, err := authlib.ParseNamespace(namespace)
if err != nil {
return err
}
ctx := identity.WithServiceIdentityContext(context.Background(), ns.OrgID)
start := time.Now()
last := time.Now()
opts := legacy.MigrateOptions{
Namespace: namespace,
Resources: []schema.GroupResource{
{Group: folders.GROUP, Resource: folders.RESOURCE},
{Group: dashboard.GROUP, Resource: dashboard.DASHBOARD_RESOURCE},
{Group: dashboard.GROUP, Resource: dashboard.LIBRARY_PANEL_RESOURCE},
},
LargeObjects: nil, // TODO... from config
Progress: func(count int, msg string) {
if count < 1 || time.Since(last) > time.Second {
fmt.Printf("[%4d] %s\n", count, msg)
last = time.Now()
}
},
}
provisioning, err := newStubProvisioning(cfg.ProvisioningPath)
if err != nil {
return err
}
migrator := legacy.NewDashboardAccess(
legacysql.NewDatabaseProvider(sqlStore),
authlib.OrgNamespaceFormatter,
nil, provisioning, false,
)
yes, err := promptYesNo(fmt.Sprintf("Count legacy resources for namespace: %s?", opts.Namespace))
if err != nil {
return err
}
if yes {
opts.OnlyCount = true
rsp, err := migrator.Migrate(ctx, opts)
if err != nil {
return err
}
fmt.Printf("Counting DONE: %s\n", time.Since(start))
if rsp != nil {
jj, _ := json.MarshalIndent(rsp, "", " ")
fmt.Printf("%s\n", string(jj))
}
}
opts.OnlyCount = false
opts.WithHistory, err = promptYesNo("Include history in exports?")
if err != nil {
return err
}
yes, err = promptYesNo("Export legacy resources to parquet file?")
if err != nil {
return err
}
if yes {
file, err := os.CreateTemp(cfg.DataPath, "grafana-export-*.parquet")
if err != nil {
return err
}
start = time.Now()
last = time.Now()
opts.Store, err = newParquetClient(file)
if err != nil {
return err
}
rsp, err := migrator.Migrate(ctx, opts)
if err != nil {
return err
}
fmt.Printf("Parquet export DONE: %s\n", time.Since(start))
if rsp != nil {
jj, _ := json.MarshalIndent(rsp, "", " ")
fmt.Printf("%s\n", string(jj))
}
fmt.Printf("File: %s\n", file.Name())
}
yes, err = promptYesNo("Export legacy resources to unified storage?")
if err != nil {
return err
}
if yes {
client, err := newUnifiedClient(cfg, sqlStore)
if err != nil {
return err
}
// Check the stats (eventually compare)
req := &resource.ResourceStatsRequest{
Namespace: opts.Namespace,
}
for _, r := range opts.Resources {
req.Kinds = append(req.Kinds, fmt.Sprintf("%s/%s", r.Group, r.Resource))
}
stats, err := client.GetStats(ctx, req)
if err != nil {
return err
}
if stats != nil {
fmt.Printf("Existing resources in unified storage:\n")
jj, _ := json.MarshalIndent(stats, "", " ")
fmt.Printf("%s\n", string(jj))
}
yes, err = promptYesNo("Would you like to continue? (existing resources will be replaced)")
if err != nil {
return err
}
if yes {
start = time.Now()
last = time.Now()
opts.Store = client
opts.BlobStore = client
rsp, err := migrator.Migrate(ctx, opts)
if err != nil {
return err
}
fmt.Printf("Unified storage export: %s\n", time.Since(start))
if rsp != nil {
jj, _ := json.MarshalIndent(rsp, "", " ")
fmt.Printf("%s\n", string(jj))
}
}
}
return nil
}
func promptYesNo(prompt string) (bool, error) {
line := ""
for {
fmt.Printf("%s (Y/N) >", prompt)
_, err := fmt.Scanln(&line)
if err != nil && err.Error() != "unexpected newline" {
return false, err
}
switch strings.ToLower(line) {
case "y", "yes":
return true, nil
case "n", "no":
return false, nil
}
}
}
func newUnifiedClient(cfg *setting.Cfg, sqlStore db.DB) (resource.ResourceClient, error) {
return unified.ProvideUnifiedStorageClient(cfg,
featuremgmt.WithFeatures(), // none??
sqlStore,
tracing.NewNoopTracerService(),
prometheus.NewPedanticRegistry(),
authlib.FixedAccessClient(true), // always true!
nil, // document supplier
)
}
func newParquetClient(file *os.File) (resource.BatchStoreClient, error) {
writer, err := parquet.NewParquetWriter(file)
if err != nil {
return nil, err
}
client := parquet.NewBatchResourceWriterClient(writer)
return client, nil
}

View File

@ -0,0 +1,29 @@
package datamigrations
import (
"context"
"fmt"
"testing"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/infra/db"
)
func TestUnifiedStorageCommand(t *testing.T) {
// setup datasources with password, basic_auth and none
store := db.InitTestDB(t)
err := store.WithDbSession(context.Background(), func(sess *db.Session) error {
unistoreMigrationTest(t, sess, store)
return nil
})
require.NoError(t, err)
}
func unistoreMigrationTest(t *testing.T, session *db.Session, sqlstore db.DB) {
// empty stats
t.Run("get stats", func(t *testing.T) {
fmt.Printf("TODO... add folders and check that they migrate\n")
})
}

View File

@ -87,5 +87,10 @@ func (d *directResourceClient) Update(ctx context.Context, in *resource.UpdateRe
// Watch implements ResourceClient.
func (d *directResourceClient) Watch(ctx context.Context, in *resource.WatchRequest, opts ...grpc.CallOption) (resource.ResourceStore_WatchClient, error) {
return nil, fmt.Errorf("watch not yet supported with direct resource client")
return nil, fmt.Errorf("watch not supported with direct resource client")
}
// BatchProcess implements resource.ResourceClient.
func (d *directResourceClient) BatchProcess(ctx context.Context, opts ...grpc.CallOption) (resource.BatchStore_BatchProcessClient, error) {
return nil, fmt.Errorf("BatchProcess not supported with direct resource client")
}

View File

@ -0,0 +1,411 @@
package legacy
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"google.golang.org/grpc/metadata"
"k8s.io/apimachinery/pkg/runtime/schema"
authlib "github.com/grafana/authlib/types"
"github.com/grafana/grafana/pkg/apimachinery/utils"
dashboard "github.com/grafana/grafana/pkg/apis/dashboard"
folders "github.com/grafana/grafana/pkg/apis/folder/v0alpha1"
"github.com/grafana/grafana/pkg/services/sqlstore"
"github.com/grafana/grafana/pkg/storage/unified/apistore"
"github.com/grafana/grafana/pkg/storage/unified/resource"
)
type MigrateOptions struct {
Namespace string
Store resource.BatchStoreClient
Writer resource.BatchResourceWriter
LargeObjects apistore.LargeObjectSupport
BlobStore resource.BlobStoreClient
Resources []schema.GroupResource
WithHistory bool // only applies to dashboards
OnlyCount bool // just count the values
Progress func(count int, msg string)
}
// Read from legacy and write into unified storage
type LegacyMigrator interface {
Migrate(ctx context.Context, opts MigrateOptions) (*resource.BatchResponse, error)
}
type BlobStoreInfo struct {
Count int64
Size int64
}
// migrate function -- works for a single kind
type migrator = func(ctx context.Context, orgId int64, opts MigrateOptions, stream resource.BatchStore_BatchProcessClient) (*BlobStoreInfo, error)
func (a *dashboardSqlAccess) Migrate(ctx context.Context, opts MigrateOptions) (*resource.BatchResponse, error) {
info, err := authlib.ParseNamespace(opts.Namespace)
if err != nil {
return nil, err
}
// Migrate everything
if len(opts.Resources) < 1 {
return nil, fmt.Errorf("missing resource selector")
}
migrators := []migrator{}
settings := resource.BatchSettings{
RebuildCollection: true,
SkipValidation: true,
}
for _, res := range opts.Resources {
switch fmt.Sprintf("%s/%s", res.Group, res.Resource) {
case "folder.grafana.app/folders":
migrators = append(migrators, a.migrateFolders)
settings.Collection = append(settings.Collection, &resource.ResourceKey{
Namespace: opts.Namespace,
Group: folders.GROUP,
Resource: folders.RESOURCE,
})
case "dashboard.grafana.app/librarypanels":
migrators = append(migrators, a.migratePanels)
settings.Collection = append(settings.Collection, &resource.ResourceKey{
Namespace: opts.Namespace,
Group: dashboard.GROUP,
Resource: dashboard.LIBRARY_PANEL_RESOURCE,
})
case "dashboard.grafana.app/dashboards":
migrators = append(migrators, a.migrateDashboards)
settings.Collection = append(settings.Collection, &resource.ResourceKey{
Namespace: opts.Namespace,
Group: dashboard.GROUP,
Resource: dashboard.DASHBOARD_RESOURCE,
})
default:
return nil, fmt.Errorf("unsupported resource: %s", res)
}
}
if opts.OnlyCount {
return a.countValues(ctx, opts)
}
ctx = metadata.NewOutgoingContext(ctx, settings.ToMD())
stream, err := opts.Store.BatchProcess(ctx)
if err != nil {
return nil, err
}
// Now run each migration
blobStore := BlobStoreInfo{}
for _, m := range migrators {
blobs, err := m(ctx, info.OrgID, opts, stream)
if err != nil {
return nil, err
}
if blobs != nil {
blobStore.Count += blobs.Count
blobStore.Size += blobs.Size
}
}
fmt.Printf("BLOBS: %+v\n", blobStore)
return stream.CloseAndRecv()
}
func (a *dashboardSqlAccess) countValues(ctx context.Context, opts MigrateOptions) (*resource.BatchResponse, error) {
sql, err := a.sql(ctx)
if err != nil {
return nil, err
}
ns, err := authlib.ParseNamespace(opts.Namespace)
if err != nil {
return nil, err
}
orgId := ns.OrgID
rsp := &resource.BatchResponse{}
err = sql.DB.WithDbSession(ctx, func(sess *sqlstore.DBSession) error {
for _, res := range opts.Resources {
switch fmt.Sprintf("%s/%s", res.Group, res.Resource) {
case "folder.grafana.app/folders":
summary := &resource.BatchResponse_Summary{}
summary.Group = folders.GROUP
summary.Group = folders.RESOURCE
_, err = sess.SQL("SELECT COUNT(*) FROM "+sql.Table("dashboard")+
" WHERE is_folder=FALSE AND org_id=?", orgId).Get(&summary.Count)
rsp.Summary = append(rsp.Summary, summary)
case "dashboard.grafana.app/librarypanels":
summary := &resource.BatchResponse_Summary{}
summary.Group = dashboard.GROUP
summary.Resource = dashboard.LIBRARY_PANEL_RESOURCE
_, err = sess.SQL("SELECT COUNT(*) FROM "+sql.Table("library_element")+
" WHERE org_id=?", orgId).Get(&summary.Count)
rsp.Summary = append(rsp.Summary, summary)
case "dashboard.grafana.app/dashboards":
summary := &resource.BatchResponse_Summary{}
summary.Group = dashboard.GROUP
summary.Resource = dashboard.DASHBOARD_RESOURCE
rsp.Summary = append(rsp.Summary, summary)
_, err = sess.SQL("SELECT COUNT(*) FROM "+sql.Table("dashboard")+
" WHERE is_folder=FALSE AND org_id=?", orgId).Get(&summary.Count)
if err != nil {
return err
}
// Also count history
_, err = sess.SQL(`SELECT COUNT(*)
FROM `+sql.Table("dashboard_version")+` as dv
JOIN `+sql.Table("dashboard")+` as dd
ON dd.id = dv.dashboard_id
WHERE org_id=?`, orgId).Get(&summary.History)
}
if err != nil {
return err
}
}
return nil
})
return rsp, nil
}
func (a *dashboardSqlAccess) migrateDashboards(ctx context.Context, orgId int64, opts MigrateOptions, stream resource.BatchStore_BatchProcessClient) (*BlobStoreInfo, error) {
query := &DashboardQuery{
OrgID: orgId,
Limit: 100000000,
GetHistory: opts.WithHistory, // include history
}
blobs := &BlobStoreInfo{}
sql, err := a.sql(ctx)
if err != nil {
return blobs, err
}
opts.Progress(-1, "migrating dashboards...")
rows, err := a.getRows(ctx, sql, query)
if rows != nil {
defer func() {
_ = rows.Close()
}()
}
if err != nil {
return blobs, err
}
large := opts.LargeObjects
// Now send each dashboard
for i := 1; rows.Next(); i++ {
dash := rows.row.Dash
dash.APIVersion = fmt.Sprintf("%s/v0alpha1", dashboard.GROUP) // << eventually v0
dash.SetNamespace(opts.Namespace)
dash.SetResourceVersion("") // it will be filled in by the backend
body, err := json.Marshal(dash)
if err != nil {
err = fmt.Errorf("error reading json from: %s // %w", rows.row.Dash.Name, err)
return blobs, err
}
req := &resource.BatchRequest{
Key: &resource.ResourceKey{
Namespace: opts.Namespace,
Group: dashboard.GROUP,
Resource: dashboard.DASHBOARD_RESOURCE,
Name: rows.Name(),
},
Value: body,
Folder: rows.row.FolderUID,
Action: resource.BatchRequest_ADDED,
}
if dash.Generation > 1 {
req.Action = resource.BatchRequest_MODIFIED
} else if dash.Generation < 0 {
req.Action = resource.BatchRequest_DELETED
}
// With large object support
if large != nil && len(body) > large.Threshold() {
obj, err := utils.MetaAccessor(dash)
if err != nil {
return blobs, err
}
opts.Progress(i, fmt.Sprintf("[v:%d] %s Large object (%d)", dash.Generation, dash.Name, len(body)))
err = large.Deconstruct(ctx, req.Key, opts.BlobStore, obj, req.Value)
if err != nil {
return blobs, err
}
// The smaller version (most of spec removed)
req.Value, err = json.Marshal(dash)
if err != nil {
return blobs, err
}
blobs.Count++
blobs.Size += int64(len(body))
}
opts.Progress(i, fmt.Sprintf("[v:%2d] %s (size:%d / %d|%d)", dash.Generation, dash.Name, len(req.Value), i, rows.count))
err = stream.Send(req)
if err != nil {
if errors.Is(err, io.EOF) {
opts.Progress(i, fmt.Sprintf("stream EOF/cancelled. index=%d", i))
err = nil
}
return blobs, err
}
}
if len(rows.rejected) > 0 {
for _, row := range rows.rejected {
id := row.Dash.Labels[utils.LabelKeyDeprecatedInternalID]
fmt.Printf("REJECTED: %s / %s\n", id, row.Dash.Name)
opts.Progress(-2, fmt.Sprintf("rejected: id:%s, uid:%s", id, row.Dash.Name))
}
}
if rows.Error() != nil {
return blobs, rows.Error()
}
opts.Progress(-2, fmt.Sprintf("finished dashboards... (%d)", rows.count))
return blobs, err
}
func (a *dashboardSqlAccess) migrateFolders(ctx context.Context, orgId int64, opts MigrateOptions, stream resource.BatchStore_BatchProcessClient) (*BlobStoreInfo, error) {
query := &DashboardQuery{
OrgID: orgId,
Limit: 100000000,
GetFolders: true,
}
sql, err := a.sql(ctx)
if err != nil {
return nil, err
}
opts.Progress(-1, "migrating folders...")
rows, err := a.getRows(ctx, sql, query)
if rows != nil {
defer func() {
_ = rows.Close()
}()
}
if err != nil {
return nil, err
}
// Now send each dashboard
for i := 1; rows.Next(); i++ {
dash := rows.row.Dash
dash.APIVersion = "folder.grafana.app/v0alpha1"
dash.Kind = "Folder"
dash.SetNamespace(opts.Namespace)
dash.SetResourceVersion("") // it will be filled in by the backend
spec := map[string]any{
"title": dash.Spec.Object["title"],
}
description := dash.Spec.Object["description"]
if description != nil {
spec["description"] = description
}
dash.Spec.Object = spec
body, err := json.Marshal(dash)
if err != nil {
return nil, err
}
req := &resource.BatchRequest{
Key: &resource.ResourceKey{
Namespace: opts.Namespace,
Group: "folder.grafana.app",
Resource: "folders",
Name: rows.Name(),
},
Value: body,
Folder: rows.row.FolderUID,
Action: resource.BatchRequest_ADDED,
}
if dash.Generation > 1 {
req.Action = resource.BatchRequest_MODIFIED
} else if dash.Generation < 0 {
req.Action = resource.BatchRequest_DELETED
}
opts.Progress(i, fmt.Sprintf("[v:%d] %s (%d)", dash.Generation, dash.Name, len(req.Value)))
err = stream.Send(req)
if err != nil {
if errors.Is(err, io.EOF) {
err = nil
}
return nil, err
}
}
if rows.Error() != nil {
return nil, rows.Error()
}
opts.Progress(-2, fmt.Sprintf("finished folders... (%d)", rows.count))
return nil, err
}
func (a *dashboardSqlAccess) migratePanels(ctx context.Context, orgId int64, opts MigrateOptions, stream resource.BatchStore_BatchProcessClient) (*BlobStoreInfo, error) {
opts.Progress(-1, "migrating library panels...")
panels, err := a.GetLibraryPanels(ctx, LibraryPanelQuery{
OrgID: orgId,
Limit: 1000000,
})
if err != nil {
return nil, err
}
for i, panel := range panels.Items {
meta, err := utils.MetaAccessor(&panel)
if err != nil {
return nil, err
}
body, err := json.Marshal(panel)
if err != nil {
return nil, err
}
req := &resource.BatchRequest{
Key: &resource.ResourceKey{
Namespace: opts.Namespace,
Group: dashboard.GROUP,
Resource: dashboard.LIBRARY_PANEL_RESOURCE,
Name: panel.Name,
},
Value: body,
Folder: meta.GetFolder(),
Action: resource.BatchRequest_ADDED,
}
if panel.Generation > 1 {
req.Action = resource.BatchRequest_MODIFIED
}
opts.Progress(i, fmt.Sprintf("[v:%d] %s (%d)", i, meta.GetName(), len(req.Value)))
err = stream.Send(req)
if err != nil {
if errors.Is(err, io.EOF) {
err = nil
}
return nil, err
}
}
opts.Progress(-2, fmt.Sprintf("finished panels... (%d)", len(panels.Items)))
return nil, nil
}

View File

@ -49,6 +49,7 @@ type LibraryPanelQuery struct {
type DashboardAccess interface {
resource.StorageBackend
resource.ResourceIndexServer
LegacyMigrator
GetDashboard(ctx context.Context, orgId int64, uid string, version int64) (*dashboard.Dashboard, int64, error)
SaveDashboard(ctx context.Context, orgId int64, dash *dashboard.Dashboard) (*dashboard.Dashboard, bool, error)

View File

@ -529,6 +529,9 @@ func (r resourceClientMock) List(ctx context.Context, in *resource.ListRequest,
func (r resourceClientMock) Watch(ctx context.Context, in *resource.WatchRequest, opts ...grpc.CallOption) (resource.ResourceStore_WatchClient, error) {
return nil, nil
}
func (r resourceClientMock) BatchProcess(ctx context.Context, opts ...grpc.CallOption) (resource.BatchStore_BatchProcessClient, error) {
return nil, nil
}
func (r resourceClientMock) Search(ctx context.Context, in *resource.ResourceSearchRequest, opts ...grpc.CallOption) (*resource.ResourceSearchResponse, error) {
if len(in.Options.Labels) > 0 &&
in.Options.Labels[0].Key == utils.LabelKeyDeprecatedInternalID &&

View File

@ -47,6 +47,7 @@ require (
github.com/Azure/go-autorest/autorest/to v0.4.0 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2 // indirect
github.com/BurntSushi/toml v1.4.0 // indirect
github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.3.0 // indirect
github.com/Masterminds/sprig/v3 v3.3.0 // indirect
@ -57,8 +58,10 @@ require (
github.com/VividCortex/mysqlerr v0.0.0-20170204212430-6c6b55f8796f // indirect
github.com/Yiling-J/theine-go v0.6.0 // indirect
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b // indirect
github.com/andybalholm/brotli v1.1.1 // indirect
github.com/antlr4-go/antlr/v4 v4.13.1 // indirect
github.com/apache/arrow-go/v18 v18.0.1-0.20241212180703-82be143d7c30 // indirect
github.com/apache/thrift v0.21.0 // indirect
github.com/armon/go-metrics v0.4.1 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/at-wat/mqtt-go v0.19.4 // indirect
@ -218,6 +221,7 @@ require (
github.com/jpillora/backoff v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/jszwedko/go-datemath v0.1.1-0.20230526204004-640a500621d6 // indirect
github.com/klauspost/asmfmt v1.3.2 // indirect
github.com/klauspost/compress v1.17.11 // indirect
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
@ -238,6 +242,8 @@ require (
github.com/mdlayher/vsock v1.2.1 // indirect
github.com/mfridman/interpolate v0.0.2 // indirect
github.com/miekg/dns v1.1.62 // indirect
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/mapstructure v1.5.1-0.20231216201459-8508981c8b6c // indirect

View File

@ -87,6 +87,8 @@ github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM=
github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo=
github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU=
github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww=
@ -1052,6 +1054,8 @@ github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhe
github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y=
github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 h1:S2dVYn90KE98chqDkyE9Z4N61UnQd+KOfgp5Iu53llk=
github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=

View File

@ -0,0 +1,6 @@
# Parquet Support
This package implements a limited parquet backend that is currently only useful
as a pass-though buffer while batch writing values.
Eventually this package could evolve into a full storage backend.

View File

@ -0,0 +1,78 @@
package parquet
import (
"context"
"errors"
"fmt"
"google.golang.org/grpc"
"google.golang.org/grpc/metadata"
"github.com/grafana/grafana/pkg/storage/unified/resource"
)
var (
_ resource.BatchStoreClient = (*writerClient)(nil)
_ resource.BatchStore_BatchProcessClient = (*writerClient)(nil)
errUnimplemented = errors.New("not implemented (BatchResourceWriter as BatchStoreClient shim)")
)
type writerClient struct {
writer resource.BatchResourceWriter
ctx context.Context
}
// NewBatchResourceWriterClient wraps a BatchResourceWriter so that it can be used as a ResourceStoreClient
func NewBatchResourceWriterClient(writer resource.BatchResourceWriter) *writerClient {
return &writerClient{writer: writer}
}
// Send implements resource.ResourceStore_BatchProcessClient.
func (w *writerClient) Send(req *resource.BatchRequest) error {
return w.writer.Write(w.ctx, req.Key, req.Value)
}
// BatchProcess implements resource.ResourceStoreClient.
func (w *writerClient) BatchProcess(ctx context.Context, opts ...grpc.CallOption) (resource.BatchStore_BatchProcessClient, error) {
if w.ctx != nil {
return nil, fmt.Errorf("only one batch request supported")
}
w.ctx = ctx
return w, nil
}
// CloseAndRecv implements resource.ResourceStore_BatchProcessClient.
func (w *writerClient) CloseAndRecv() (*resource.BatchResponse, error) {
return w.writer.CloseWithResults()
}
// CloseSend implements resource.ResourceStore_BatchProcessClient.
func (w *writerClient) CloseSend() error {
return w.writer.Close()
}
// Context implements resource.ResourceStore_BatchProcessClient.
func (w *writerClient) Context() context.Context {
return w.ctx
}
// Header implements resource.ResourceStore_BatchProcessClient.
func (w *writerClient) Header() (metadata.MD, error) {
return nil, errUnimplemented
}
// RecvMsg implements resource.ResourceStore_BatchProcessClient.
func (w *writerClient) RecvMsg(m any) error {
return errUnimplemented
}
// SendMsg implements resource.ResourceStore_BatchProcessClient.
func (w *writerClient) SendMsg(m any) error {
return errUnimplemented
}
// Trailer implements resource.ResourceStore_BatchProcessClient.
func (w *writerClient) Trailer() metadata.MD {
return nil
}

View File

@ -0,0 +1,264 @@
package parquet
import (
"fmt"
"github.com/apache/arrow-go/v18/parquet"
"github.com/apache/arrow-go/v18/parquet/file"
"github.com/grafana/grafana/pkg/storage/unified/resource"
)
var (
_ resource.BatchRequestIterator = (*parquetReader)(nil)
)
func NewParquetReader(inputPath string, batchSize int64) (resource.BatchRequestIterator, error) {
return newResourceReader(inputPath, batchSize)
}
type parquetReader struct {
reader *file.Reader
namespace *stringColumn
group *stringColumn
resource *stringColumn
name *stringColumn
value *stringColumn
folder *stringColumn
action *int32Column
columns []columnBuffer
batchSize int64
defLevels []int16
repLevels []int16
// how many we already read
bufferSize int
bufferIndex int
rowGroupIDX int
req *resource.BatchRequest
err error
}
// Next implements resource.BatchRequestIterator.
func (r *parquetReader) Next() bool {
r.req = nil
for r.err == nil && r.reader != nil {
if r.bufferIndex >= r.bufferSize && r.value.reader.HasNext() {
r.bufferIndex = 0
r.err = r.readBatch()
if r.err != nil {
return false
}
r.bufferIndex = r.value.count
}
if r.bufferSize > r.bufferIndex {
i := r.bufferIndex
r.bufferIndex++
r.req = &resource.BatchRequest{
Key: &resource.ResourceKey{
Group: r.group.buffer[i].String(),
Resource: r.resource.buffer[i].String(),
Namespace: r.namespace.buffer[i].String(),
Name: r.name.buffer[i].String(),
},
Action: resource.BatchRequest_Action(r.action.buffer[i]),
Value: r.value.buffer[i].Bytes(),
Folder: r.folder.buffer[i].String(),
}
return true
}
r.rowGroupIDX++
if r.rowGroupIDX >= r.reader.NumRowGroups() {
_ = r.reader.Close()
r.reader = nil
return false
}
r.err = r.open(r.reader.RowGroup(r.rowGroupIDX))
}
return false
}
// Request implements resource.BatchRequestIterator.
func (r *parquetReader) Request() *resource.BatchRequest {
return r.req
}
// RollbackRequested implements resource.BatchRequestIterator.
func (r *parquetReader) RollbackRequested() bool {
return r.err != nil
}
func newResourceReader(inputPath string, batchSize int64) (*parquetReader, error) {
rdr, err := file.OpenParquetFile(inputPath, true)
if err != nil {
return nil, err
}
schema := rdr.MetaData().Schema
makeColumn := func(name string) *stringColumn {
index := schema.ColumnIndexByName(name)
if index < 0 {
err = fmt.Errorf("missing column: %s", name)
}
return &stringColumn{
index: index,
buffer: make([]parquet.ByteArray, batchSize),
}
}
reader := &parquetReader{
reader: rdr,
namespace: makeColumn("namespace"),
group: makeColumn("group"),
resource: makeColumn("resource"),
name: makeColumn("name"),
value: makeColumn("value"),
folder: makeColumn("folder"),
action: &int32Column{
index: schema.ColumnIndexByName("action"),
buffer: make([]int32, batchSize),
},
batchSize: batchSize,
defLevels: make([]int16, batchSize),
repLevels: make([]int16, batchSize),
}
if err != nil {
_ = rdr.Close()
return nil, err
}
reader.columns = []columnBuffer{
reader.namespace,
reader.group,
reader.resource,
reader.name,
reader.action,
reader.value,
}
// Empty file, close and return
if rdr.NumRowGroups() < 1 {
err = rdr.Close()
reader.reader = nil
return reader, err
}
err = reader.open(rdr.RowGroup(0))
if err != nil {
_ = rdr.Close()
return nil, err
}
// get the first batch
err = reader.readBatch()
if err != nil {
_ = rdr.Close()
return nil, err
}
return reader, nil
}
func (r *parquetReader) open(rgr *file.RowGroupReader) error {
for _, c := range r.columns {
err := c.open(rgr)
if err != nil {
return err
}
}
return nil
}
func (r *parquetReader) readBatch() error {
r.bufferIndex = 0
r.bufferSize = 0
for i, c := range r.columns {
count, err := c.batch(r.batchSize, r.defLevels, r.repLevels)
if err != nil {
return err
}
if i > 0 && r.bufferSize != count {
return fmt.Errorf("expecting the same size for all columns")
}
r.bufferSize = count
}
return nil
}
//-------------------------------
// Column support
//-------------------------------
type columnBuffer interface {
open(rgr *file.RowGroupReader) error
batch(batchSize int64, defLevels []int16, repLevels []int16) (int, error)
}
type stringColumn struct {
index int // within the schema
reader *file.ByteArrayColumnChunkReader
buffer []parquet.ByteArray
count int // the active count
}
func (c *stringColumn) open(rgr *file.RowGroupReader) error {
tmp, err := rgr.Column(c.index)
if err != nil {
return err
}
var ok bool
c.reader, ok = tmp.(*file.ByteArrayColumnChunkReader)
if !ok {
return fmt.Errorf("expected resource strings")
}
return nil
}
func (c *stringColumn) batch(batchSize int64, defLevels []int16, repLevels []int16) (int, error) {
_, count, err := c.reader.ReadBatch(batchSize, c.buffer, defLevels, repLevels)
c.count = count
return count, err
}
type int32Column struct {
index int // within the schemna
reader *file.Int32ColumnChunkReader
buffer []int32
count int // the active count
}
func (c *int32Column) open(rgr *file.RowGroupReader) error {
tmp, err := rgr.Column(c.index)
if err != nil {
return err
}
var ok bool
c.reader, ok = tmp.(*file.Int32ColumnChunkReader)
if !ok {
return fmt.Errorf("expected resource strings")
}
return nil
}
func (c *int32Column) batch(batchSize int64, defLevels []int16, repLevels []int16) (int, error) {
_, count, err := c.reader.ReadBatch(batchSize, c.buffer, defLevels, repLevels)
c.count = count
return count, err
}
//-------------------------------
// Column support
//-------------------------------

View File

@ -0,0 +1,125 @@
package parquet
import (
"context"
"os"
"testing"
"github.com/stretchr/testify/require"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"github.com/grafana/grafana/pkg/apimachinery/utils"
"github.com/grafana/grafana/pkg/storage/unified/resource"
)
func TestParquetWriteThenRead(t *testing.T) {
t.Run("read-write-couple-rows", func(t *testing.T) {
file, err := os.CreateTemp(t.TempDir(), "temp-*.parquet")
require.NoError(t, err)
defer func() { _ = os.Remove(file.Name()) }()
writer, err := NewParquetWriter(file)
require.NoError(t, err)
ctx := context.Background()
require.NoError(t, writer.Write(toKeyAndBytes(ctx, "ggg", "rrr", &unstructured.Unstructured{
Object: map[string]any{
"metadata": map[string]any{
"namespace": "ns",
"name": "aaa",
"resourceVersion": "1234",
"annotations": map[string]string{
utils.AnnoKeyFolder: "xyz",
},
},
"spec": map[string]any{
"hello": "first",
},
},
})))
require.NoError(t, writer.Write(toKeyAndBytes(ctx, "ggg", "rrr", &unstructured.Unstructured{
Object: map[string]any{
"metadata": map[string]any{
"namespace": "ns",
"name": "bbb",
"resourceVersion": "5678",
"generation": -999, // deleted action
},
"spec": map[string]any{
"hello": "second",
},
},
})))
require.NoError(t, writer.Write(toKeyAndBytes(ctx, "ggg", "rrr", &unstructured.Unstructured{
Object: map[string]any{
"metadata": map[string]any{
"namespace": "ns",
"name": "ccc",
"resourceVersion": "789",
"generation": 3, // modified action
},
"spec": map[string]any{
"hello": "thirt",
},
},
})))
res, err := writer.CloseWithResults()
require.NoError(t, err)
require.Equal(t, int64(3), res.Processed)
var keys []string
reader, err := newResourceReader(file.Name(), 20)
require.NoError(t, err)
for reader.Next() {
req := reader.Request()
keys = append(keys, req.Key.SearchID())
}
// Verify that we read all values
require.Equal(t, []string{
"rrr/ns/ggg/aaa",
"rrr/ns/ggg/bbb",
"rrr/ns/ggg/ccc",
}, keys)
})
t.Run("read-write-empty-db", func(t *testing.T) {
file, err := os.CreateTemp(t.TempDir(), "temp-*.parquet")
require.NoError(t, err)
defer func() { _ = os.Remove(file.Name()) }()
writer, err := NewParquetWriter(file)
require.NoError(t, err)
err = writer.Close()
require.NoError(t, err)
var keys []string
reader, err := newResourceReader(file.Name(), 20)
require.NoError(t, err)
for reader.Next() {
req := reader.Request()
keys = append(keys, req.Key.SearchID())
}
require.NoError(t, reader.err)
require.Empty(t, keys)
})
}
func toKeyAndBytes(ctx context.Context, group string, res string, obj *unstructured.Unstructured) (context.Context, *resource.ResourceKey, []byte) {
if obj.GetKind() == "" {
obj.SetKind(res)
}
if obj.GetAPIVersion() == "" {
obj.SetAPIVersion(group + "/vXyz")
}
data, _ := obj.MarshalJSON()
return ctx, &resource.ResourceKey{
Namespace: obj.GetNamespace(),
Resource: res,
Group: group,
Name: obj.GetName(),
}, data
}

View File

@ -0,0 +1,209 @@
package parquet
import (
"context"
"io"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/apache/arrow-go/v18/parquet"
"github.com/apache/arrow-go/v18/parquet/compress"
"github.com/apache/arrow-go/v18/parquet/pqarrow"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"github.com/grafana/grafana-app-sdk/logging"
"github.com/grafana/grafana/pkg/apimachinery/utils"
"github.com/grafana/grafana/pkg/storage/unified/resource"
)
var (
_ resource.BatchResourceWriter = (*parquetWriter)(nil)
)
// Write resources into a parquet file
func NewParquetWriter(f io.Writer) (*parquetWriter, error) {
w := &parquetWriter{
pool: memory.DefaultAllocator,
schema: newSchema(nil),
buffer: 1024 * 10 * 100 * 10, // 10MB
logger: logging.DefaultLogger.With("logger", "parquet.writer"),
rsp: &resource.BatchResponse{},
summary: make(map[string]*resource.BatchResponse_Summary),
}
props := parquet.NewWriterProperties(
parquet.WithCompression(compress.Codecs.Brotli),
)
writer, err := pqarrow.NewFileWriter(w.schema, f, props, pqarrow.DefaultWriterProps())
if err != nil {
return nil, err
}
w.writer = writer
return w, w.init()
}
// ProcessBatch implements resource.BatchProcessingBackend.
func (w *parquetWriter) ProcessBatch(ctx context.Context, setting resource.BatchSettings, iter resource.BatchRequestIterator) *resource.BatchResponse {
defer func() { _ = w.Close() }()
var err error
for iter.Next() {
if iter.RollbackRequested() {
break
}
req := iter.Request()
err = w.Write(ctx, req.Key, req.Value)
if err != nil {
break
}
}
rsp, err := w.CloseWithResults()
if err != nil {
w.logger.Warn("error closing parquet file", "err", err)
}
if rsp == nil {
rsp = &resource.BatchResponse{}
}
if err != nil {
rsp.Error = resource.AsErrorResult(err)
}
return rsp
}
type parquetWriter struct {
pool memory.Allocator
buffer int
wrote int
schema *arrow.Schema
writer *pqarrow.FileWriter
logger logging.Logger
rv *array.Int64Builder
namespace *array.StringBuilder
group *array.StringBuilder
resource *array.StringBuilder
name *array.StringBuilder
folder *array.StringBuilder
action *array.Int8Builder
value *array.StringBuilder
rsp *resource.BatchResponse
summary map[string]*resource.BatchResponse_Summary
}
func (w *parquetWriter) CloseWithResults() (*resource.BatchResponse, error) {
err := w.Close()
return w.rsp, err
}
func (w *parquetWriter) Close() error {
if w.rv.Len() > 0 {
_ = w.flush()
}
w.logger.Info("close")
return w.writer.Close()
}
// writes the current buffer to parquet and re-inits the arrow buffer
func (w *parquetWriter) flush() error {
w.logger.Info("flush", "count", w.rv.Len())
rec := array.NewRecord(w.schema, []arrow.Array{
w.rv.NewArray(),
w.namespace.NewArray(),
w.group.NewArray(),
w.resource.NewArray(),
w.name.NewArray(),
w.folder.NewArray(),
w.action.NewArray(),
w.value.NewArray(),
}, int64(w.rv.Len()))
defer rec.Release()
err := w.writer.Write(rec)
if err != nil {
return err
}
return w.init()
}
func (w *parquetWriter) init() error {
w.rv = array.NewInt64Builder(w.pool)
w.namespace = array.NewStringBuilder(w.pool)
w.group = array.NewStringBuilder(w.pool)
w.resource = array.NewStringBuilder(w.pool)
w.name = array.NewStringBuilder(w.pool)
w.folder = array.NewStringBuilder(w.pool)
w.action = array.NewInt8Builder(w.pool)
w.value = array.NewStringBuilder(w.pool)
w.wrote = 0
return nil
}
func (w *parquetWriter) Write(ctx context.Context, key *resource.ResourceKey, value []byte) error {
w.rsp.Processed++
obj := &unstructured.Unstructured{}
err := obj.UnmarshalJSON(value)
if err != nil {
return err
}
meta, err := utils.MetaAccessor(obj)
if err != nil {
return err
}
rv, _ := meta.GetResourceVersionInt64() // it can be empty
w.rv.Append(rv)
w.namespace.Append(key.Namespace)
w.group.Append(key.Group)
w.resource.Append(key.Resource)
w.name.Append(key.Name)
w.folder.Append(meta.GetFolder())
w.value.Append(string(value))
var action resource.WatchEvent_Type
switch meta.GetGeneration() {
case 0, 1:
action = resource.WatchEvent_ADDED
case utils.DeletedGeneration:
action = resource.WatchEvent_DELETED
default:
action = resource.WatchEvent_MODIFIED
}
w.action.Append(int8(action))
w.wrote = w.wrote + len(value)
if w.wrote > w.buffer {
w.logger.Info("buffer full", "buffer", w.wrote, "max", w.buffer)
return w.flush()
}
summary := w.summary[key.BatchID()]
if summary == nil {
summary = &resource.BatchResponse_Summary{
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
}
w.summary[key.BatchID()] = summary
w.rsp.Summary = append(w.rsp.Summary, summary)
}
summary.Count++
return nil
}
func newSchema(metadata *arrow.Metadata) *arrow.Schema {
return arrow.NewSchema([]arrow.Field{
{Name: "resource_version", Type: &arrow.Int64Type{}, Nullable: false},
{Name: "group", Type: &arrow.StringType{}, Nullable: false},
{Name: "resource", Type: &arrow.StringType{}, Nullable: false},
{Name: "namespace", Type: &arrow.StringType{}, Nullable: false},
{Name: "name", Type: &arrow.StringType{}, Nullable: false},
{Name: "folder", Type: &arrow.StringType{}, Nullable: false},
{Name: "action", Type: &arrow.Int8Type{}, Nullable: false}, // 1,2,3
{Name: "value", Type: &arrow.StringType{}, Nullable: false},
}, metadata)
}

View File

@ -0,0 +1,297 @@
package resource
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"google.golang.org/grpc/metadata"
authlib "github.com/grafana/authlib/types"
"github.com/grafana/grafana/pkg/apimachinery/utils"
)
const grpcMetaKeyCollection = "x-gf-batch-collection"
const grpcMetaKeyRebuildCollection = "x-gf-batch-rebuild-collection"
const grpcMetaKeySkipValidation = "x-gf-batch-skip-validation"
func grpcMetaValueIsTrue(vals []string) bool {
return len(vals) == 1 && vals[0] == "true"
}
type BatchRequestIterator interface {
Next() bool
// The next event we should process
Request() *BatchRequest
// Rollback requested
RollbackRequested() bool
}
type BatchProcessingBackend interface {
ProcessBatch(ctx context.Context, setting BatchSettings, iter BatchRequestIterator) *BatchResponse
}
type BatchResourceWriter interface {
io.Closer
Write(ctx context.Context, key *ResourceKey, value []byte) error
// Called when finished writing
CloseWithResults() (*BatchResponse, error)
}
type BatchSettings struct {
// All requests will be within this namespace/group/resource
Collection []*ResourceKey
// The batch will include everything from the collection
// - all existing values will be removed/replaced if the batch completes successfully
RebuildCollection bool
// The byte[] payload and folder has already been validated - no need to decode and verify
SkipValidation bool
}
func (x *BatchSettings) ToMD() metadata.MD {
md := make(metadata.MD)
if len(x.Collection) > 0 {
for _, v := range x.Collection {
md[grpcMetaKeyCollection] = append(md[grpcMetaKeyCollection], v.SearchID())
}
}
if x.RebuildCollection {
md[grpcMetaKeyRebuildCollection] = []string{"true"}
}
if x.SkipValidation {
md[grpcMetaKeySkipValidation] = []string{"true"}
}
return md
}
func NewBatchSettings(md metadata.MD) (BatchSettings, error) {
settings := BatchSettings{}
for k, v := range md {
switch k {
case grpcMetaKeyCollection:
for _, c := range v {
key := &ResourceKey{}
err := key.ReadSearchID(c)
if err != nil {
return settings, fmt.Errorf("error reading collection metadata: %s / %w", c, err)
}
settings.Collection = append(settings.Collection, key)
}
case grpcMetaKeyRebuildCollection:
settings.RebuildCollection = grpcMetaValueIsTrue(v)
case grpcMetaKeySkipValidation:
settings.SkipValidation = grpcMetaValueIsTrue(v)
}
}
return settings, nil
}
// BatchWrite implements ResourceServer.
// All requests must be to the same NAMESPACE/GROUP/RESOURCE
func (s *server) BatchProcess(stream BatchStore_BatchProcessServer) error {
ctx := stream.Context()
user, ok := authlib.AuthInfoFrom(ctx)
if !ok || user == nil {
return stream.SendAndClose(&BatchResponse{
Error: &ErrorResult{
Message: "no user found in context",
Code: http.StatusUnauthorized,
},
})
}
md, ok := metadata.FromIncomingContext(ctx)
if !ok {
return stream.SendAndClose(&BatchResponse{
Error: &ErrorResult{
Message: "unable to read metadata gRPC request",
Code: http.StatusPreconditionFailed,
},
})
}
runner := &batchRunner{
checker: make(map[string]authlib.ItemChecker), // Can create
stream: stream,
}
settings, err := NewBatchSettings(md)
if err != nil {
return stream.SendAndClose(&BatchResponse{
Error: &ErrorResult{
Message: "error reading settings",
Reason: err.Error(),
Code: http.StatusPreconditionFailed,
},
})
}
if len(settings.Collection) < 1 {
return stream.SendAndClose(&BatchResponse{
Error: &ErrorResult{
Message: "Missing target collection(s) in request header",
Code: http.StatusBadRequest,
},
})
}
// HACK!!! always allow everything!!!!!!
access := authlib.FixedAccessClient(true)
if settings.RebuildCollection {
for _, k := range settings.Collection {
// Can we delete the whole collection
rsp, err := access.Check(ctx, user, authlib.CheckRequest{
Namespace: k.Namespace,
Group: k.Group,
Resource: k.Resource,
Verb: utils.VerbDeleteCollection,
})
if err != nil || !rsp.Allowed {
return stream.SendAndClose(&BatchResponse{
Error: &ErrorResult{
Message: fmt.Sprintf("Requester must be able to: %s", utils.VerbDeleteCollection),
Code: http.StatusForbidden,
},
})
}
// This will be called for each request -- with the folder ID
runner.checker[k.BatchID()], err = access.Compile(ctx, user, authlib.ListRequest{
Namespace: k.Namespace,
Group: k.Group,
Resource: k.Resource,
Verb: utils.VerbCreate,
})
if err != nil {
return stream.SendAndClose(&BatchResponse{
Error: &ErrorResult{
Message: "Unable to check `create` permission",
Code: http.StatusForbidden,
},
})
}
}
} else {
return stream.SendAndClose(&BatchResponse{
Error: &ErrorResult{
Message: "Batch currently only supports RebuildCollection",
Code: http.StatusBadRequest,
},
})
}
backend, ok := s.backend.(BatchProcessingBackend)
if !ok {
return stream.SendAndClose(&BatchResponse{
Error: &ErrorResult{
Message: "The server backend does not support batch processing",
Code: http.StatusNotImplemented,
},
})
}
// BatchProcess requests
rsp := backend.ProcessBatch(ctx, settings, runner)
if rsp == nil {
rsp = &BatchResponse{
Error: &ErrorResult{
Code: http.StatusInternalServerError,
Message: "Nothing returned from process batch",
},
}
}
if runner.err != nil {
rsp.Error = AsErrorResult(runner.err)
}
if rsp.Error == nil && s.search != nil {
// Rebuild any changed indexes
for _, summary := range rsp.Summary {
_, _, err := s.search.build(ctx, NamespacedResource{
Namespace: summary.Namespace,
Group: summary.Group,
Resource: summary.Resource,
}, summary.Count, summary.ResourceVersion)
if err != nil {
s.log.Warn("error building search index after batch load", "err", err)
rsp.Error = &ErrorResult{
Code: http.StatusInternalServerError,
Message: "err building search index: " + summary.Resource,
Reason: err.Error(),
}
}
}
}
return stream.SendAndClose(rsp)
}
var (
_ BatchRequestIterator = (*batchRunner)(nil)
)
type batchRunner struct {
stream BatchStore_BatchProcessServer
rollback bool
request *BatchRequest
err error
checker map[string]authlib.ItemChecker
}
// Next implements BatchRequestIterator.
func (b *batchRunner) Next() bool {
if b.rollback {
return true
}
b.request, b.err = b.stream.Recv()
if errors.Is(b.err, io.EOF) {
b.err = nil
b.rollback = false
b.request = nil
return false
}
if b.err != nil {
b.rollback = true
return true
}
if b.request != nil {
key := b.request.Key
k := key.BatchID()
checker, ok := b.checker[k]
if !ok {
b.err = fmt.Errorf("missing access control for: %s", k)
b.rollback = true
} else if !checker(key.Namespace, key.Name, b.request.Folder) {
b.err = fmt.Errorf("not allowed to create resource")
b.rollback = true
}
return true
}
return false
}
// Request implements BatchRequestIterator.
func (b *batchRunner) Request() *BatchRequest {
if b.rollback {
return nil
}
return b.request
}
// RollbackRequested implements BatchRequestIterator.
func (b *batchRunner) RollbackRequested() bool {
if b.rollback {
b.rollback = false // break iterator
return true
}
return false
}

View File

@ -25,6 +25,7 @@ type ResourceClient interface {
ResourceStoreClient
ResourceIndexClient
RepositoryIndexClient
BatchStoreClient
BlobStoreClient
DiagnosticsClient
}
@ -34,6 +35,7 @@ type resourceClient struct {
ResourceStoreClient
ResourceIndexClient
RepositoryIndexClient
BatchStoreClient
BlobStoreClient
DiagnosticsClient
}
@ -44,6 +46,7 @@ func NewLegacyResourceClient(channel *grpc.ClientConn) ResourceClient {
ResourceStoreClient: NewResourceStoreClient(cc),
ResourceIndexClient: NewResourceIndexClient(cc),
RepositoryIndexClient: NewRepositoryIndexClient(cc),
BatchStoreClient: NewBatchStoreClient(cc),
BlobStoreClient: NewBlobStoreClient(cc),
DiagnosticsClient: NewDiagnosticsClient(cc),
}
@ -59,6 +62,7 @@ func NewLocalResourceClient(server ResourceServer) ResourceClient {
&ResourceIndex_ServiceDesc,
&RepositoryIndex_ServiceDesc,
&BlobStore_ServiceDesc,
&BatchStore_ServiceDesc,
&Diagnostics_ServiceDesc,
} {
channel.RegisterService(
@ -82,6 +86,7 @@ func NewLocalResourceClient(server ResourceServer) ResourceClient {
ResourceStoreClient: NewResourceStoreClient(cc),
ResourceIndexClient: NewResourceIndexClient(cc),
RepositoryIndexClient: NewRepositoryIndexClient(cc),
BatchStoreClient: NewBatchStoreClient(cc),
BlobStoreClient: NewBlobStoreClient(cc),
DiagnosticsClient: NewDiagnosticsClient(cc),
}
@ -101,10 +106,12 @@ func NewGRPCResourceClient(tracer tracing.Tracer, conn *grpc.ClientConn) (Resour
cc := grpchan.InterceptClientConn(conn, clientInt.UnaryClientInterceptor, clientInt.StreamClientInterceptor)
return &resourceClient{
ResourceStoreClient: NewResourceStoreClient(cc),
ResourceIndexClient: NewResourceIndexClient(cc),
BlobStoreClient: NewBlobStoreClient(cc),
DiagnosticsClient: NewDiagnosticsClient(cc),
ResourceStoreClient: NewResourceStoreClient(cc),
ResourceIndexClient: NewResourceIndexClient(cc),
BlobStoreClient: NewBlobStoreClient(cc),
BatchStoreClient: NewBatchStoreClient(cc),
RepositoryIndexClient: NewRepositoryIndexClient(cc),
DiagnosticsClient: NewDiagnosticsClient(cc),
}, nil
}
@ -126,10 +133,12 @@ func NewCloudResourceClient(tracer tracing.Tracer, conn *grpc.ClientConn, cfg au
cc := grpchan.InterceptClientConn(conn, clientInt.UnaryClientInterceptor, clientInt.StreamClientInterceptor)
return &resourceClient{
ResourceStoreClient: NewResourceStoreClient(cc),
ResourceIndexClient: NewResourceIndexClient(cc),
BlobStoreClient: NewBlobStoreClient(cc),
DiagnosticsClient: NewDiagnosticsClient(cc),
ResourceStoreClient: NewResourceStoreClient(cc),
ResourceIndexClient: NewResourceIndexClient(cc),
BlobStoreClient: NewBlobStoreClient(cc),
BatchStoreClient: NewBatchStoreClient(cc),
RepositoryIndexClient: NewRepositoryIndexClient(cc),
DiagnosticsClient: NewDiagnosticsClient(cc),
}, nil
}

View File

@ -48,24 +48,43 @@ func (x *ResourceKey) SearchID() string {
sb.WriteString(x.Group)
sb.WriteString("/")
sb.WriteString(x.Resource)
sb.WriteString("/")
sb.WriteString(x.Name)
if x.Name != "" {
sb.WriteString("/")
sb.WriteString(x.Name)
}
return sb.String()
}
func (x *ResourceKey) ReadSearchID(v string) error {
parts := strings.Split(v, "/")
if len(parts) != 4 {
if len(parts) < 3 {
return fmt.Errorf("invalid search id (expecting 3 slashes)")
}
x.Namespace = parts[0]
x.Group = parts[1]
x.Resource = parts[2]
x.Name = parts[3]
if len(parts) > 3 {
x.Name = parts[3]
}
if x.Namespace == clusterNamespace {
x.Namespace = ""
}
return nil
}
// The namespace/group/resource
func (x *ResourceKey) BatchID() string {
var sb strings.Builder
if x.Namespace == "" {
sb.WriteString(clusterNamespace)
} else {
sb.WriteString(x.Namespace)
}
sb.WriteString("/")
sb.WriteString(x.Group)
sb.WriteString("/")
sb.WriteString(x.Resource)
return sb.String()
}

View File

@ -33,13 +33,20 @@ func TestSearchIDKeys(t *testing.T) {
Resource: "resource",
Name: "name",
}},
{input: "/group/resource/", // missing name
{input: "/group/resource/",
expected: &ResourceKey{
Namespace: "",
Group: "group",
Resource: "resource",
Name: "",
}},
{input: "default/group/resource",
expected: &ResourceKey{
Namespace: "default",
Group: "group",
Resource: "resource",
Name: "",
}},
{input: "**cluster**/group/resource/aaa", // cluster namespace
expected: &ResourceKey{
Namespace: "",

File diff suppressed because it is too large Load Diff

View File

@ -308,7 +308,7 @@ message WatchEvent {
// Timestamp the event was sent
int64 timestamp = 1;
// Timestamp the event was sent
// The event type
Type type = 2;
// Resource version for the object
@ -318,6 +318,65 @@ message WatchEvent {
Resource previous = 4;
}
message BatchRequest {
enum Action {
// will be an error
UNKNOWN = 0;
// Matches Watch event enum
ADDED = 1;
MODIFIED = 2;
DELETED = 3;
}
// NOTE everything in the same stream must share the same Namespace/Group/Resource
ResourceKey key = 1;
// Requested action
Action action = 2;
// The resource value
bytes value = 3;
// Hint that a new version will be written on-top of this
string folder = 4;
}
message BatchResponse {
message Summary {
string namespace = 1;
string group = 2;
string resource = 3;
int64 count = 4;
int64 history = 5;
int64 resource_version = 6; // The max saved RV
// The previous count
int64 previous_count = 7;
int64 previous_history = 8;
}
// Collect a few invalid messages
message Rejected {
ResourceKey key = 1;
BatchRequest.Action action = 2;
string error = 3;
}
// Error details
ErrorResult error = 1;
// Total events processed
int64 processed = 2;
// Summary status for the processed values
repeated Summary summary = 3;
// Rejected
repeated Rejected rejected = 4;
}
// Get statistics across multiple resources
// For these queries, we do not need authorization to see the actual values
message ResourceStatsRequest {
@ -754,6 +813,13 @@ service ResourceStore {
rpc Watch(WatchRequest) returns (stream WatchEvent);
}
service BatchStore {
// Write multiple resources to the same Namespace/Group/Resource
// Events will not be sent until the stream is complete
// Only the *create* permissions is checked
rpc BatchProcess(stream BatchRequest) returns (BatchResponse);
}
// Unlike the ResourceStore, this service can be exposed to clients directly
// It should be implemented with efficient indexes and does not need read-after-write semantics
service ResourceIndex {

View File

@ -385,6 +385,135 @@ var ResourceStore_ServiceDesc = grpc.ServiceDesc{
Metadata: "resource.proto",
}
const (
BatchStore_BatchProcess_FullMethodName = "/resource.BatchStore/BatchProcess"
)
// BatchStoreClient is the client API for BatchStore service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type BatchStoreClient interface {
// Write multiple resources to the same Namespace/Group/Resource
// Events will not be sent until the stream is complete
// Only the *create* permissions is checked
BatchProcess(ctx context.Context, opts ...grpc.CallOption) (BatchStore_BatchProcessClient, error)
}
type batchStoreClient struct {
cc grpc.ClientConnInterface
}
func NewBatchStoreClient(cc grpc.ClientConnInterface) BatchStoreClient {
return &batchStoreClient{cc}
}
func (c *batchStoreClient) BatchProcess(ctx context.Context, opts ...grpc.CallOption) (BatchStore_BatchProcessClient, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
stream, err := c.cc.NewStream(ctx, &BatchStore_ServiceDesc.Streams[0], BatchStore_BatchProcess_FullMethodName, cOpts...)
if err != nil {
return nil, err
}
x := &batchStoreBatchProcessClient{ClientStream: stream}
return x, nil
}
type BatchStore_BatchProcessClient interface {
Send(*BatchRequest) error
CloseAndRecv() (*BatchResponse, error)
grpc.ClientStream
}
type batchStoreBatchProcessClient struct {
grpc.ClientStream
}
func (x *batchStoreBatchProcessClient) Send(m *BatchRequest) error {
return x.ClientStream.SendMsg(m)
}
func (x *batchStoreBatchProcessClient) CloseAndRecv() (*BatchResponse, error) {
if err := x.ClientStream.CloseSend(); err != nil {
return nil, err
}
m := new(BatchResponse)
if err := x.ClientStream.RecvMsg(m); err != nil {
return nil, err
}
return m, nil
}
// BatchStoreServer is the server API for BatchStore service.
// All implementations should embed UnimplementedBatchStoreServer
// for forward compatibility
type BatchStoreServer interface {
// Write multiple resources to the same Namespace/Group/Resource
// Events will not be sent until the stream is complete
// Only the *create* permissions is checked
BatchProcess(BatchStore_BatchProcessServer) error
}
// UnimplementedBatchStoreServer should be embedded to have forward compatible implementations.
type UnimplementedBatchStoreServer struct {
}
func (UnimplementedBatchStoreServer) BatchProcess(BatchStore_BatchProcessServer) error {
return status.Errorf(codes.Unimplemented, "method BatchProcess not implemented")
}
// UnsafeBatchStoreServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to BatchStoreServer will
// result in compilation errors.
type UnsafeBatchStoreServer interface {
mustEmbedUnimplementedBatchStoreServer()
}
func RegisterBatchStoreServer(s grpc.ServiceRegistrar, srv BatchStoreServer) {
s.RegisterService(&BatchStore_ServiceDesc, srv)
}
func _BatchStore_BatchProcess_Handler(srv interface{}, stream grpc.ServerStream) error {
return srv.(BatchStoreServer).BatchProcess(&batchStoreBatchProcessServer{ServerStream: stream})
}
type BatchStore_BatchProcessServer interface {
SendAndClose(*BatchResponse) error
Recv() (*BatchRequest, error)
grpc.ServerStream
}
type batchStoreBatchProcessServer struct {
grpc.ServerStream
}
func (x *batchStoreBatchProcessServer) SendAndClose(m *BatchResponse) error {
return x.ServerStream.SendMsg(m)
}
func (x *batchStoreBatchProcessServer) Recv() (*BatchRequest, error) {
m := new(BatchRequest)
if err := x.ServerStream.RecvMsg(m); err != nil {
return nil, err
}
return m, nil
}
// BatchStore_ServiceDesc is the grpc.ServiceDesc for BatchStore service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var BatchStore_ServiceDesc = grpc.ServiceDesc{
ServiceName: "resource.BatchStore",
HandlerType: (*BatchStoreServer)(nil),
Methods: []grpc.MethodDesc{},
Streams: []grpc.StreamDesc{
{
StreamName: "BatchProcess",
Handler: _BatchStore_BatchProcess_Handler,
ClientStreams: true,
},
},
Metadata: "resource.proto",
}
const (
ResourceIndex_Search_FullMethodName = "/resource.ResourceIndex/Search"
ResourceIndex_GetStats_FullMethodName = "/resource.ResourceIndex/GetStats"

View File

@ -107,6 +107,9 @@ func newSearchSupport(opts SearchOptions, storage StorageBackend, access types.A
if opts.Backend == nil {
return nil, nil
}
if tracer == nil {
return nil, fmt.Errorf("missing tracer")
}
if opts.WorkerThreads < 1 {
opts.WorkerThreads = 1
@ -384,6 +387,11 @@ func (s *searchSupport) init(ctx context.Context) error {
for {
v := <-events
// Skip events during batch updates
if v.PreviousRV < 0 {
continue
}
s.handleEvent(watchctx, v)
}
}()
@ -496,7 +504,7 @@ func (s *searchSupport) getOrCreateIndex(ctx context.Context, key NamespacedReso
if idx == nil {
idx, _, err = s.build(ctx, key, 10, 0) // unknown size and RV
if err != nil {
return nil, err
return nil, fmt.Errorf("error building search index, %w", err)
}
if idx == nil {
return nil, fmt.Errorf("nil index after build")
@ -541,7 +549,8 @@ func (s *searchSupport) build(ctx context.Context, nsr NamespacedResource, size
// Convert it to an indexable document
doc, err := builder.BuildDocument(ctx, key, iter.ResourceVersion(), iter.Value())
if err != nil {
return err
s.log.Error("error building search document", "key", key.SearchID(), "err", err)
continue
}
// And finally write it to the index

View File

@ -26,6 +26,7 @@ import (
// ResourceServer implements all gRPC services
type ResourceServer interface {
ResourceStoreServer
BatchStoreServer
ResourceIndexServer
RepositoryIndexServer
BlobStoreServer
@ -260,7 +261,7 @@ func NewResourceServer(opts ResourceServerOptions) (ResourceServer, error) {
err := s.Init(ctx)
if err != nil {
s.log.Error("error initializing resource server", "error", err)
s.log.Error("resource server init failed", "error", err)
return nil, err
}
@ -314,7 +315,7 @@ func (s *server) Init(ctx context.Context) error {
}
if s.initErr != nil {
s.log.Error("error initializing resource server", "error", s.initErr)
s.log.Error("error running resource server init", "error", s.initErr)
}
})
return s.initErr
@ -921,6 +922,12 @@ func (s *server) initWatcher() error {
for {
// pipe all events
v := <-events
// Skip events during batch updates
if v.PreviousRV < 0 {
continue
}
s.log.Debug("Server. Streaming Event", "type", v.Type, "previousRV", v.PreviousRV, "group", v.Key.Group, "namespace", v.Key.Namespace, "resource", v.Key.Resource, "name", v.Key.Name)
s.mostRecentRV.Store(v.ResourceVersion)
out <- v

View File

@ -59,6 +59,7 @@ func NewBackend(opts BackendOptions) (Backend, error) {
tracer: opts.Tracer,
dbProvider: opts.DBProvider,
pollingInterval: pollingInterval,
batchLock: &batchLock{running: make(map[string]bool)},
}, nil
}
@ -77,6 +78,7 @@ type backend struct {
dbProvider db.DBProvider
db db.DB
dialect sqltemplate.Dialect
batchLock *batchLock
// watch streaming
//stream chan *resource.WatchEvent
@ -701,7 +703,7 @@ func (b *backend) WatchWriteEvents(ctx context.Context) (<-chan *resource.Writte
// Get the latest RV
since, err := b.listLatestRVs(ctx)
if err != nil {
return nil, fmt.Errorf("get the latest resource version: %w", err)
return nil, fmt.Errorf("watch, get latest resource version: %w", err)
}
// Start the poller
stream := make(chan *resource.WrittenEvent)
@ -713,17 +715,23 @@ func (b *backend) poller(ctx context.Context, since groupResourceRV, stream chan
t := time.NewTicker(b.pollingInterval)
defer close(stream)
defer t.Stop()
isSQLite := b.dialect.DialectName() == "sqlite"
for {
select {
case <-b.done:
return
case <-t.C:
// Block polling duffing import to avoid database locked issues
if isSQLite && b.batchLock.Active() {
continue
}
ctx, span := b.tracer.Start(ctx, tracePrefix+"poller")
// List the latest RVs
grv, err := b.listLatestRVs(ctx)
if err != nil {
b.log.Error("get the latest resource version", "err", err)
b.log.Error("poller get latest resource version", "err", err)
t.Reset(b.pollingInterval)
continue
}

View File

@ -0,0 +1,338 @@
package sql
import (
"context"
"fmt"
"net/http"
"os"
"sync"
"time"
"github.com/google/uuid"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"github.com/grafana/grafana-app-sdk/logging"
"github.com/grafana/grafana/pkg/apimachinery/utils"
"github.com/grafana/grafana/pkg/storage/unified/parquet"
"github.com/grafana/grafana/pkg/storage/unified/resource"
"github.com/grafana/grafana/pkg/storage/unified/sql/db"
"github.com/grafana/grafana/pkg/storage/unified/sql/dbutil"
"github.com/grafana/grafana/pkg/storage/unified/sql/sqltemplate"
)
var (
_ resource.BatchProcessingBackend = (*backend)(nil)
)
type batchRV struct {
max int64
counter int64
}
func newBatchRV() *batchRV {
t := time.Now().Truncate(time.Second * 10)
return &batchRV{
max: (t.UnixMicro() / 10000000) * 10000000,
counter: 0,
}
}
func (x *batchRV) next(obj metav1.Object) int64 {
ts := obj.GetCreationTimestamp().UnixMicro()
anno := obj.GetAnnotations()
if anno != nil {
v := anno[utils.AnnoKeyUpdatedTimestamp]
t, err := time.Parse(time.RFC3339, v)
if err == nil {
ts = t.UnixMicro()
}
}
if ts > x.max || ts < 10000000 {
ts = x.max
}
x.counter++
return (ts/10000000)*10000000 + x.counter
}
type batchLock struct {
running map[string]bool
mu sync.Mutex
}
func (x *batchLock) Start(keys []*resource.ResourceKey) error {
x.mu.Lock()
defer x.mu.Unlock()
// First verify that it is not already running
ids := make([]string, len(keys))
for i, k := range keys {
id := k.BatchID()
if x.running[id] {
return &apierrors.StatusError{ErrStatus: metav1.Status{
Code: http.StatusPreconditionFailed,
Message: "batch export is already running",
}}
}
ids[i] = id
}
// Then add the keys to the lock
for _, k := range ids {
x.running[k] = true
}
return nil
}
func (x *batchLock) Finish(keys []*resource.ResourceKey) {
x.mu.Lock()
defer x.mu.Unlock()
for _, k := range keys {
delete(x.running, k.BatchID())
}
}
func (x *batchLock) Active() bool {
x.mu.Lock()
defer x.mu.Unlock()
return len(x.running) > 0
}
func (b *backend) ProcessBatch(ctx context.Context, setting resource.BatchSettings, iter resource.BatchRequestIterator) *resource.BatchResponse {
err := b.batchLock.Start(setting.Collection)
if err != nil {
return &resource.BatchResponse{
Error: resource.AsErrorResult(err),
}
}
defer b.batchLock.Finish(setting.Collection)
// We may want to first write parquet, then read parquet
if b.dialect.DialectName() == "sqlite" {
file, err := os.CreateTemp("", "grafana-batch-export-*.parquet")
if err != nil {
return &resource.BatchResponse{
Error: resource.AsErrorResult(err),
}
}
writer, err := parquet.NewParquetWriter(file)
if err != nil {
return &resource.BatchResponse{
Error: resource.AsErrorResult(err),
}
}
// write batch to parquet
rsp := writer.ProcessBatch(ctx, setting, iter)
if rsp.Error != nil {
return rsp
}
b.log.Info("using parquet buffer", "parquet", file)
// Replace the iterator with one from parquet
iter, err = parquet.NewParquetReader(file.Name(), 50)
if err != nil {
return &resource.BatchResponse{
Error: resource.AsErrorResult(err),
}
}
}
return b.processBatch(ctx, setting, iter)
}
// internal batch process
func (b *backend) processBatch(ctx context.Context, setting resource.BatchSettings, iter resource.BatchRequestIterator) *resource.BatchResponse {
rsp := &resource.BatchResponse{}
err := b.db.WithTx(ctx, ReadCommitted, func(ctx context.Context, tx db.Tx) error {
rollbackWithError := func(err error) error {
txerr := tx.Rollback()
if txerr != nil {
b.log.Warn("rollback", "error", txerr)
} else {
b.log.Info("rollback")
}
return err
}
batch := &batchWroker{
ctx: ctx,
tx: tx,
dialect: b.dialect,
logger: logging.FromContext(ctx),
}
// Calculate the RV based on incoming request timestamps
rv := newBatchRV()
summaries := make(map[string]*resource.BatchResponse_Summary, len(setting.Collection)*4)
// First clear everything in the transaction
if setting.RebuildCollection {
for _, key := range setting.Collection {
summary, err := batch.deleteCollection(key)
if err != nil {
return rollbackWithError(err)
}
summaries[key.BatchID()] = summary
rsp.Summary = append(rsp.Summary, summary)
}
}
obj := &unstructured.Unstructured{}
// Write each event into the history
for iter.Next() {
if iter.RollbackRequested() {
return rollbackWithError(nil)
}
req := iter.Request()
if req == nil {
return rollbackWithError(fmt.Errorf("missing request"))
}
rsp.Processed++
if req.Action == resource.BatchRequest_UNKNOWN {
rsp.Rejected = append(rsp.Rejected, &resource.BatchResponse_Rejected{
Key: req.Key,
Action: req.Action,
Error: "unknown action",
})
continue
}
err := obj.UnmarshalJSON(req.Value)
if err != nil {
rsp.Rejected = append(rsp.Rejected, &resource.BatchResponse_Rejected{
Key: req.Key,
Action: req.Action,
Error: "unable to unmarshal json",
})
continue
}
// Write the event to history
if _, err := dbutil.Exec(ctx, tx, sqlResourceHistoryInsert, sqlResourceRequest{
SQLTemplate: sqltemplate.New(b.dialect),
WriteEvent: resource.WriteEvent{
Key: req.Key,
Type: resource.WatchEvent_Type(req.Action),
Value: req.Value,
PreviousRV: -1, // Used for WATCH, but we want to skip watch events
},
Folder: req.Folder,
GUID: uuid.NewString(),
ResourceVersion: rv.next(obj),
}); err != nil {
return rollbackWithError(fmt.Errorf("insert into resource history: %w", err))
}
}
// Now update the resource table from history
for _, key := range setting.Collection {
k := fmt.Sprintf("%s/%s/%s", key.Namespace, key.Group, key.Resource)
summary := summaries[k]
if summary == nil {
return rollbackWithError(fmt.Errorf("missing summary key for: %s", k))
}
err := batch.syncCollection(key, summary)
if err != nil {
return err
}
// Make sure the collection RV is above our last written event
_, err = b.resourceVersionAtomicInc(ctx, tx, key)
if err != nil {
b.log.Warn("error increasing RV", "error", err)
}
}
return nil
})
if err != nil {
rsp.Error = resource.AsErrorResult(err)
}
return rsp
}
type batchWroker struct {
ctx context.Context
tx db.ContextExecer
dialect sqltemplate.Dialect
logger logging.Logger
}
// This will remove everything from the `resource` and `resource_history` table for a given namespace/group/resource
func (w *batchWroker) deleteCollection(key *resource.ResourceKey) (*resource.BatchResponse_Summary, error) {
summary := &resource.BatchResponse_Summary{
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
}
// First delete history
res, err := dbutil.Exec(w.ctx, w.tx, sqlResourceHistoryDelete, &sqlResourceHistoryDeleteRequest{
SQLTemplate: sqltemplate.New(w.dialect),
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
})
if err != nil {
return nil, err
}
summary.PreviousHistory, err = res.RowsAffected()
if err != nil {
return nil, err
}
// Next delete the active resource table
res, err = dbutil.Exec(w.ctx, w.tx, sqlResourceDelete, &sqlResourceRequest{
SQLTemplate: sqltemplate.New(w.dialect),
WriteEvent: resource.WriteEvent{
Key: key,
},
})
if err != nil {
return nil, err
}
summary.PreviousCount, err = res.RowsAffected()
return summary, err
}
// Copy the latest value from history into the active resource table
func (w *batchWroker) syncCollection(key *resource.ResourceKey, summary *resource.BatchResponse_Summary) error {
w.logger.Info("synchronize collection", "key", key.BatchID())
_, err := dbutil.Exec(w.ctx, w.tx, sqlResourceInsertFromHistory, &sqlResourceInsertFromHistoryRequest{
SQLTemplate: sqltemplate.New(w.dialect),
Key: key,
})
if err != nil {
return err
}
w.logger.Info("get stats (still in transaction)", "key", key.BatchID())
rows, err := dbutil.QueryRows(w.ctx, w.tx, sqlResourceStats, &sqlStatsRequest{
SQLTemplate: sqltemplate.New(w.dialect),
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
})
if err != nil {
return err
}
if rows != nil {
defer func() {
_ = rows.Close()
}()
}
if rows.Next() {
row := resource.ResourceStats{}
return rows.Scan(&row.Namespace, &row.Group, &row.Resource,
&summary.Count,
&summary.ResourceVersion)
}
return err
}

View File

@ -0,0 +1,24 @@
package sql
import (
"testing"
"github.com/stretchr/testify/require"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
)
func TestBatch(t *testing.T) {
t.Parallel()
t.Run("rv iterator", func(t *testing.T) {
t.Parallel()
rv := newBatchRV()
v0 := rv.next(&unstructured.Unstructured{})
v1 := rv.next(&unstructured.Unstructured{})
v2 := rv.next(&unstructured.Unstructured{})
require.True(t, v0 > 1000)
require.Equal(t, int64(1), v1-v0)
require.Equal(t, int64(1), v2-v1)
})
}

View File

@ -0,0 +1,52 @@
INSERT INTO {{ .Ident "resource" }}
SELECT
kv.{{ .Ident "guid" }},
kv.{{ .Ident "resource_version" }},
kv.{{ .Ident "group" }},
kv.{{ .Ident "resource" }},
kv.{{ .Ident "namespace" }},
kv.{{ .Ident "name" }},
kv.{{ .Ident "value" }},
kv.{{ .Ident "action" }},
kv.{{ .Ident "label_set" }},
kv.{{ .Ident "previous_resource_version" }},
kv.{{ .Ident "folder" }}
FROM {{ .Ident "resource_history" }} AS kv
INNER JOIN (
SELECT {{ .Ident "namespace" }}, {{ .Ident "group" }}, {{ .Ident "resource" }}, {{ .Ident "name" }}, max({{ .Ident "resource_version" }}) AS {{ .Ident "resource_version" }}
FROM {{ .Ident "resource_history" }} AS mkv
WHERE 1 = 1
{{ if .Key.Namespace }}
AND {{ .Ident "namespace" }} = {{ .Arg .Key.Namespace }}
{{ end }}
{{ if .Key.Group }}
AND {{ .Ident "group" }} = {{ .Arg .Key.Group }}
{{ end }}
{{ if .Key.Resource }}
AND {{ .Ident "resource" }} = {{ .Arg .Key.Resource }}
{{ end }}
{{ if .Key.Name }}
AND {{ .Ident "name" }} = {{ .Arg .Key.Name }}
{{ end }}
GROUP BY mkv.{{ .Ident "namespace" }}, mkv.{{ .Ident "group" }}, mkv.{{ .Ident "resource" }}, mkv.{{ .Ident "name" }}
) AS maxkv
ON maxkv.{{ .Ident "resource_version" }} = kv.{{ .Ident "resource_version" }}
AND maxkv.{{ .Ident "namespace" }} = kv.{{ .Ident "namespace" }}
AND maxkv.{{ .Ident "group" }} = kv.{{ .Ident "group" }}
AND maxkv.{{ .Ident "resource" }} = kv.{{ .Ident "resource" }}
AND maxkv.{{ .Ident "name" }} = kv.{{ .Ident "name" }}
WHERE kv.{{ .Ident "action" }} != 3
{{ if .Key.Namespace }}
AND kv.{{ .Ident "namespace" }} = {{ .Arg .Key.Namespace }}
{{ end }}
{{ if .Key.Group }}
AND kv.{{ .Ident "group" }} = {{ .Arg .Key.Group }}
{{ end }}
{{ if .Key.Resource }}
AND kv.{{ .Ident "resource" }} = {{ .Arg .Key.Resource }}
{{ end }}
{{ if .Key.Name }}
AND kv.{{ .Ident "name" }} = {{ .Arg .Key.Name }}
{{ end }}
ORDER BY kv.{{ .Ident "resource_version" }} ASC
;

View File

@ -44,6 +44,7 @@ var (
sqlResourceHistoryPoll = mustTemplate("resource_history_poll.sql")
sqlResourceHistoryGet = mustTemplate("resource_history_get.sql")
sqlResourceHistoryDelete = mustTemplate("resource_history_delete.sql")
sqlResourceInsertFromHistory = mustTemplate("resource_insert_from_history.sql")
// sqlResourceLabelsInsert = mustTemplate("resource_labels_insert.sql")
sqlResourceVersionGet = mustTemplate("resource_version_get.sql")
@ -83,6 +84,18 @@ func (r sqlResourceRequest) Validate() error {
return nil // TODO
}
type sqlResourceInsertFromHistoryRequest struct {
sqltemplate.SQLTemplate
Key *resource.ResourceKey
}
func (r sqlResourceInsertFromHistoryRequest) Validate() error {
if r.Key == nil {
return fmt.Errorf("missing key")
}
return nil
}
type sqlStatsRequest struct {
sqltemplate.SQLTemplate
Namespace string

View File

@ -385,5 +385,18 @@ func TestUnifiedStorageQueries(t *testing.T) {
},
},
},
sqlResourceInsertFromHistory: {
{
Name: "update",
Data: &sqlResourceInsertFromHistoryRequest{
SQLTemplate: mocks.NewTestingSQLTemplate(),
Key: &resource.ResourceKey{
Namespace: "default",
Group: "dashboard.grafana.app",
Resource: "dashboards",
},
},
},
},
}})
}

View File

@ -126,7 +126,9 @@ func (s *service) start(ctx context.Context) error {
srv := s.handler.GetServer()
resource.RegisterResourceStoreServer(srv, server)
resource.RegisterBatchStoreServer(srv, server)
resource.RegisterResourceIndexServer(srv, server)
resource.RegisterRepositoryIndexServer(srv, server)
resource.RegisterBlobStoreServer(srv, server)
resource.RegisterDiagnosticsServer(srv, server)
grpc_health_v1.RegisterHealthServer(srv, healthService)

View File

@ -0,0 +1,34 @@
INSERT INTO `resource`
SELECT
kv.`guid`,
kv.`resource_version`,
kv.`group`,
kv.`resource`,
kv.`namespace`,
kv.`name`,
kv.`value`,
kv.`action`,
kv.`label_set`,
kv.`previous_resource_version`,
kv.`folder`
FROM `resource_history` AS kv
INNER JOIN (
SELECT `namespace`, `group`, `resource`, `name`, max(`resource_version`) AS `resource_version`
FROM `resource_history` AS mkv
WHERE 1 = 1
AND `namespace` = 'default'
AND `group` = 'dashboard.grafana.app'
AND `resource` = 'dashboards'
GROUP BY mkv.`namespace`, mkv.`group`, mkv.`resource`, mkv.`name`
) AS maxkv
ON maxkv.`resource_version` = kv.`resource_version`
AND maxkv.`namespace` = kv.`namespace`
AND maxkv.`group` = kv.`group`
AND maxkv.`resource` = kv.`resource`
AND maxkv.`name` = kv.`name`
WHERE kv.`action` != 3
AND kv.`namespace` = 'default'
AND kv.`group` = 'dashboard.grafana.app'
AND kv.`resource` = 'dashboards'
ORDER BY kv.`resource_version` ASC
;

View File

@ -0,0 +1,34 @@
INSERT INTO "resource"
SELECT
kv."guid",
kv."resource_version",
kv."group",
kv."resource",
kv."namespace",
kv."name",
kv."value",
kv."action",
kv."label_set",
kv."previous_resource_version",
kv."folder"
FROM "resource_history" AS kv
INNER JOIN (
SELECT "namespace", "group", "resource", "name", max("resource_version") AS "resource_version"
FROM "resource_history" AS mkv
WHERE 1 = 1
AND "namespace" = 'default'
AND "group" = 'dashboard.grafana.app'
AND "resource" = 'dashboards'
GROUP BY mkv."namespace", mkv."group", mkv."resource", mkv."name"
) AS maxkv
ON maxkv."resource_version" = kv."resource_version"
AND maxkv."namespace" = kv."namespace"
AND maxkv."group" = kv."group"
AND maxkv."resource" = kv."resource"
AND maxkv."name" = kv."name"
WHERE kv."action" != 3
AND kv."namespace" = 'default'
AND kv."group" = 'dashboard.grafana.app'
AND kv."resource" = 'dashboards'
ORDER BY kv."resource_version" ASC
;

View File

@ -0,0 +1,34 @@
INSERT INTO "resource"
SELECT
kv."guid",
kv."resource_version",
kv."group",
kv."resource",
kv."namespace",
kv."name",
kv."value",
kv."action",
kv."label_set",
kv."previous_resource_version",
kv."folder"
FROM "resource_history" AS kv
INNER JOIN (
SELECT "namespace", "group", "resource", "name", max("resource_version") AS "resource_version"
FROM "resource_history" AS mkv
WHERE 1 = 1
AND "namespace" = 'default'
AND "group" = 'dashboard.grafana.app'
AND "resource" = 'dashboards'
GROUP BY mkv."namespace", mkv."group", mkv."resource", mkv."name"
) AS maxkv
ON maxkv."resource_version" = kv."resource_version"
AND maxkv."namespace" = kv."namespace"
AND maxkv."group" = kv."group"
AND maxkv."resource" = kv."resource"
AND maxkv."name" = kv."name"
WHERE kv."action" != 3
AND kv."namespace" = 'default'
AND kv."group" = 'dashboard.grafana.app'
AND kv."resource" = 'dashboards'
ORDER BY kv."resource_version" ASC
;