SQLStore: Create utility for bulk inserts (#57632)

* Add batchsize to dialects

* Some intermediary progress

* Reflection-based implementation

* Validate batch size

* Implement a few simple tests

* Fix linter error

* Add debug message when falling back to default batch size
This commit is contained in:
Alexander Weaver 2022-11-01 13:24:32 -05:00 committed by GitHub
parent d8a4a7c90d
commit ea8c5ce929
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 172 additions and 0 deletions

View File

@ -0,0 +1,60 @@
package sqlstore
import (
"fmt"
"reflect"
"github.com/grafana/grafana/pkg/services/sqlstore/migrator"
)
const DefaultBatchSize = 1000
type BulkOpSettings struct {
BatchSize int
}
func NativeSettingsForDialect(d migrator.Dialect) BulkOpSettings {
return BulkOpSettings{
BatchSize: d.BatchSize(),
}
}
func normalizeBulkSettings(s BulkOpSettings) BulkOpSettings {
if s.BatchSize < 1 {
sessionLogger.Debug("Invalid batch size, falling back to the default", "requested", s.BatchSize, "actual", DefaultBatchSize)
s.BatchSize = DefaultBatchSize
}
return s
}
func (sess *DBSession) BulkInsert(table interface{}, recordsSlice interface{}, opts BulkOpSettings) (int64, error) {
var inserted int64
err := InBatches(recordsSlice, opts, func(batch interface{}) error {
a, err := sess.Table(table).InsertMulti(batch)
inserted += a
return err
})
return inserted, err
}
func InBatches(items interface{}, opts BulkOpSettings, fn func(batch interface{}) error) error {
opts = normalizeBulkSettings(opts)
slice := reflect.Indirect(reflect.ValueOf(items))
if slice.Kind() != reflect.Slice {
return fmt.Errorf("need a slice of objects in order to batch")
}
for i := 0; i < slice.Len(); i += opts.BatchSize {
end := i + opts.BatchSize
if end > slice.Len() {
end = slice.Len()
}
chunk := slice.Slice(i, end).Interface()
if err := fn(chunk); err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,97 @@
package sqlstore
import (
"context"
"testing"
"github.com/stretchr/testify/require"
)
type bulkTestItem struct {
ID int64
Value string `xorm:"varchar(10)"`
}
func TestBatching(t *testing.T) {
t.Run("InBatches", func(t *testing.T) {
t.Run("calls fn 0 times if items is empty", func(t *testing.T) {
var calls int
fn := func(batch interface{}) error { calls += 1; return nil }
opts := BulkOpSettings{BatchSize: DefaultBatchSize}
err := InBatches([]int{}, opts, fn)
require.NoError(t, err)
require.Zero(t, calls)
})
t.Run("succeeds if batch size is nonpositive", func(t *testing.T) {
var calls int
fn := func(batch interface{}) error { calls += 1; return nil }
opts := BulkOpSettings{BatchSize: DefaultBatchSize}
err := InBatches([]int{1, 2, 3}, opts, fn)
require.NoError(t, err)
require.Equal(t, 1, calls)
})
t.Run("rejects if items is not a slice", func(t *testing.T) {
var calls int
fn := func(batch interface{}) error { calls += 1; return nil }
opts := BulkOpSettings{BatchSize: DefaultBatchSize}
err := InBatches("lol", opts, fn)
require.Error(t, err)
})
t.Run("calls expected number of times when batch size does not evenly divide length", func(t *testing.T) {
var calls int
fn := func(batch interface{}) error { calls += 1; return nil }
opts := BulkOpSettings{BatchSize: 5}
vals := make([]int, 93)
err := InBatches(vals, opts, fn)
require.NoError(t, err)
require.Equal(t, 19, calls)
})
})
}
func TestBulkOps(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test")
}
db := InitTestDB(t)
err := db.engine.Sync(new(bulkTestItem))
require.NoError(t, err)
t.Run("insert several records", func(t *testing.T) {
vals := make([]bulkTestItem, 45)
opts := NativeSettingsForDialect(db.GetDialect())
opts.BatchSize = 10
var inserted int64
err := db.WithDbSession(context.Background(), func(sess *DBSession) error {
ins, err := sess.BulkInsert(bulkTestItem{}, vals, opts)
inserted = ins
return err
})
require.NoError(t, err)
require.Equal(t, int64(45), inserted)
assertTableCount(t, db, bulkTestItem{}, 45)
})
}
func assertTableCount(t *testing.T, db *SQLStore, table interface{}, expCount int64) {
t.Helper()
err := db.WithDbSession(context.Background(), func(sess *DBSession) error {
total, err := sess.Table(bulkTestItem{}).Count()
require.Equal(t, expCount, total)
return err
})
require.NoError(t, err)
}

View File

@ -26,6 +26,7 @@ type Dialect interface {
Default(col *Column) string
BooleanStr(bool) string
DateTimeFunc(string) string
BatchSize() int
OrderBy(order string) string

View File

@ -44,6 +44,10 @@ func (db *MySQLDialect) BooleanStr(value bool) string {
return "0"
}
func (db *MySQLDialect) BatchSize() int {
return 1000
}
func (db *MySQLDialect) SQLType(c *Column) string {
var res string
switch c.Type {

View File

@ -45,6 +45,10 @@ func (db *PostgresDialect) BooleanStr(value bool) string {
return strconv.FormatBool(value)
}
func (db *PostgresDialect) BatchSize() int {
return 1000
}
func (db *PostgresDialect) Default(col *Column) string {
if col.Type == DB_Bool {
if col.Default == "0" {

View File

@ -40,6 +40,12 @@ func (db *SQLite3) BooleanStr(value bool) string {
return "0"
}
func (db *SQLite3) BatchSize() int {
// SQLite has a maximum parameter count per statement of 100.
// So, we use a small batch size to support write operations.
return 10
}
func (db *SQLite3) DateTimeFunc(value string) string {
return "datetime(" + value + ")"
}