Search PoC: Indexmapping per resource and transform to intermediate IndexedResource (#94906)

* Transforms raw US resource into an intermediate IndexableResource and indexes that. Pulls index mapping code out into different file. For now, we will hardcode which spec fields are indexed, per resource.

* Fixes a few bugs with field casing and timestamps not being formatted right (or not existing).

* adds readme section for using search with US

* extracts to function to transform from search hit to IndexedResource

* get folders when building index
This commit is contained in:
owensmallwood 2024-10-21 09:34:41 -06:00 committed by GitHub
parent 40f0a72db0
commit fb9cfd0d1b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 243 additions and 82 deletions

View File

@ -270,3 +270,24 @@ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
```
- make changes in `.proto` file
- to compile all protobuf files in the repository run `make protobuf` at its top level
## Setting up search (EXPERIMENTAL)
Unified storage now exposes an **experimental** search API. It can be used to search for specific resources, or to filter/query resources.
To enable it, add the following to your `custom.ini` under the `[feature_toggles]` section:
```ini
[feature_toggles]
unifiedStorageSearch = true
```
To access the api through Grafana, go to Explore -> Query Type -> Search.
The query needs to be a valid [Bleve query string](https://blevesearch.com/docs/Query-String-Query/).
Some example queries are:
- `*` - returns all objects
- `Kind:Playlist` - returns all playlists
- `Spec.inveral:5m` - returns all objects with the spec.inverval field set to 5m
- `+Kind:Playlist +Spec.title:p4` - returns all playlists with the title matching "p4"
- `*foo*` - returns all objects containing "foo" in any field
- `CreatedAt:>="2024-10-17"` - returns all objects created after 2024-10-17
- `+CreatedAt:>="2024-10-17" +Kind:Playlist` - returns all playlists created after 2024-10-17

View File

@ -6,14 +6,10 @@ import (
"fmt"
golog "log"
"os"
"strings"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/lang/en"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/google/uuid"
"github.com/grafana/grafana/pkg/infra/log"
"golang.org/x/exp/slices"
)
type Shard struct {
@ -52,12 +48,13 @@ func (i *Index) IndexBatch(list *ListResponse, kind string) error {
}
i.log.Debug("initial indexing resources batch", "count", len(list.Items), "kind", kind, "tenant", tenant(res))
var jsonDoc interface{}
err = json.Unmarshal(obj.Value, &jsonDoc)
// Transform the raw resource into a more generic indexable resource
indexableResource, err := NewIndexedResource(obj.Value)
if err != nil {
return err
}
err = shard.batch.Index(res.Metadata.Uid, jsonDoc)
err = shard.batch.Index(res.Metadata.Uid, indexableResource)
if err != nil {
return err
}
@ -115,12 +112,12 @@ func (i *Index) Index(ctx context.Context, data *Data) error {
if err != nil {
return err
}
var jsonDoc interface{}
err = json.Unmarshal(data.Value.Value, &jsonDoc)
// Transform the raw resource into a more generic indexable resource
indexableResource, err := NewIndexedResource(data.Value.Value)
if err != nil {
return err
}
err = shard.index.Index(res.Metadata.Uid, jsonDoc)
err = shard.index.Index(res.Metadata.Uid, indexableResource)
if err != nil {
return err
}
@ -139,7 +136,7 @@ func (i *Index) Delete(ctx context.Context, uid string, key *ResourceKey) error
return nil
}
func (i *Index) Search(ctx context.Context, tenant string, query string, limit int, offset int) ([]SearchSummary, error) {
func (i *Index) Search(ctx context.Context, tenant string, query string, limit int, offset int) ([]IndexedResource, error) {
if tenant == "" {
tenant = "default"
}
@ -153,6 +150,9 @@ func (i *Index) Search(ctx context.Context, tenant string, query string, limit i
}
i.log.Info("got index for tenant", "tenant", tenant, "docCount", docCount)
fields, _ := shard.index.Fields()
i.log.Debug("indexed fields", "fields", fields)
// use 10 as a default limit for now
if limit <= 0 {
limit = 10
@ -173,30 +173,10 @@ func (i *Index) Search(ctx context.Context, tenant string, query string, limit i
i.log.Info("got search results", "hits", hits)
results := make([]SearchSummary, len(hits))
results := make([]IndexedResource, len(hits))
for resKey, hit := range hits {
searchSummary := SearchSummary{}
// add common fields to search results
searchSummary.Kind = hit.Fields["kind"].(string)
searchSummary.Metadata.CreationTimestamp = hit.Fields["metadata.creationTimestamp"].(string)
searchSummary.Metadata.Uid = hit.Fields["metadata.uid"].(string)
// add allowed indexed spec fields to search results
specResult := map[string]interface{}{}
for k, v := range hit.Fields {
if strings.HasPrefix(k, "spec.") {
mappedFields := specFieldMappings(searchSummary.Kind)
// should only include spec fields we care about in search results
if slices.Contains(mappedFields, k) {
specKey := strings.TrimPrefix(k, "spec.")
specResult[specKey] = v
}
}
searchSummary.Spec = specResult
}
results[resKey] = searchSummary
ir := IndexedResource{}.FromSearchHit(hit)
results[resKey] = ir
}
return results, nil
@ -242,43 +222,6 @@ func createFileIndex() (bleve.Index, string, error) {
return index, indexPath, err
}
func createIndexMappings() *mapping.IndexMappingImpl {
//Create mapping for the creationTimestamp field in the metadata
creationTimestampFieldMapping := bleve.NewDateTimeFieldMapping()
uidMapping := bleve.NewTextFieldMapping()
metaMapping := bleve.NewDocumentMapping()
metaMapping.AddFieldMappingsAt("creationTimestamp", creationTimestampFieldMapping)
metaMapping.AddFieldMappingsAt("uid", uidMapping)
metaMapping.Dynamic = false
metaMapping.Enabled = true
// Spec is different for all resources, so we create a dynamic mapping for it to index all fields (for now)
specMapping := bleve.NewDocumentMapping()
specMapping.Dynamic = true
specMapping.Enabled = true
//Create a sub-document mapping for the metadata field
objectMapping := bleve.NewDocumentMapping()
objectMapping.AddSubDocumentMapping("metadata", metaMapping)
objectMapping.AddSubDocumentMapping("spec", specMapping)
objectMapping.Dynamic = true
objectMapping.Enabled = true
// a generic reusable mapping for english text
englishTextFieldMapping := bleve.NewTextFieldMapping()
englishTextFieldMapping.Analyzer = en.AnalyzerName
// Map top level fields - just kind for now
objectMapping.AddFieldMappingsAt("kind", englishTextFieldMapping)
objectMapping.Dynamic = false
// Create the index mapping
indexMapping := bleve.NewIndexMapping()
indexMapping.DefaultMapping = objectMapping
return indexMapping
}
func getResource(data []byte) (*Resource, error) {
res := &Resource{}
err := json.Unmarshal(data, res)
@ -316,17 +259,11 @@ func fetchResourceTypes() []*ListOptions {
Group: "playlist.grafana.app",
Resource: "playlists",
},
}, &ListOptions{
Key: &ResourceKey{
Group: "folder.grafana.app",
Resource: "folders",
},
})
return items
}
func specFieldMappings(kind string) []string {
mappedFields := map[string][]string{
"Playlist": {
"spec.title",
"spec.interval",
},
}
return mappedFields[kind]
}

View File

@ -0,0 +1,203 @@
package resource
import (
"strings"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/grafana/grafana/pkg/apimachinery/utils"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
)
type IndexedResource struct {
Group string
Namespace string
Kind string
Name string
Title string
CreatedAt string
CreatedBy string
UpdatedAt string
UpdatedBy string
FolderId string
Spec any
}
func (ir IndexedResource) FromSearchHit(hit *search.DocumentMatch) IndexedResource {
ir.Kind = hit.Fields["Kind"].(string)
ir.Name = hit.Fields["Name"].(string)
ir.Namespace = hit.Fields["Namespace"].(string)
ir.Group = hit.Fields["Group"].(string)
ir.CreatedAt = hit.Fields["CreatedAt"].(string)
ir.CreatedBy = hit.Fields["CreatedBy"].(string)
ir.UpdatedAt = hit.Fields["UpdatedAt"].(string)
ir.UpdatedBy = hit.Fields["UpdatedBy"].(string)
ir.Title = hit.Fields["Title"].(string)
// add indexed spec fields to search results
specResult := map[string]interface{}{}
for k, v := range hit.Fields {
if strings.HasPrefix(k, "Spec.") {
specKey := strings.TrimPrefix(k, "Spec.")
specResult[specKey] = v
}
ir.Spec = specResult
}
return ir
}
// NewIndexedResource creates a new IndexedResource from a raw resource.
// rawResource is the raw json for the resource from unified storage.
func NewIndexedResource(rawResource []byte) (*IndexedResource, error) {
ir := &IndexedResource{}
k8sObj := unstructured.Unstructured{}
err := k8sObj.UnmarshalJSON(rawResource)
if err != nil {
return nil, err
}
meta, err := utils.MetaAccessor(&k8sObj)
if err != nil {
return nil, err
}
ir.Name = meta.GetName()
ir.Title = meta.FindTitle("")
ir.Namespace = meta.GetNamespace()
ir.Group = meta.GetGroupVersionKind().Group
ir.Kind = meta.GetGroupVersionKind().Kind
ir.CreatedAt = meta.GetCreationTimestamp().Time.Format("2006-01-02T15:04:05Z")
ir.CreatedBy = meta.GetCreatedBy()
updatedAt, err := meta.GetUpdatedTimestamp()
if err != nil {
return nil, err
}
if updatedAt != nil {
ir.UpdatedAt = updatedAt.Format("2006-01-02T15:04:05Z")
} else {
ir.UpdatedAt = ir.CreatedAt
}
ir.UpdatedBy = meta.GetUpdatedBy()
spec, err := meta.GetSpec()
if err != nil {
return nil, err
}
ir.Spec = spec
return ir, nil
}
func createIndexMappings() *mapping.IndexMappingImpl {
// Create the index mapping
indexMapping := bleve.NewIndexMapping()
// Create an individual index mapping for each kind
indexMapping.TypeField = "Kind"
// for all kinds, create their index mappings
for k, _ := range getSpecObjectMappings() {
objMapping := createIndexMappingForKind(k)
indexMapping.AddDocumentMapping(k, objMapping)
}
return indexMapping
}
func createIndexMappingForKind(resourceKind string) *mapping.DocumentMapping {
// create mappings for top level fields
baseFields := map[string]*mapping.FieldMapping{
"Group": bleve.NewTextFieldMapping(),
"Namespace": bleve.NewTextFieldMapping(),
"Kind": bleve.NewTextFieldMapping(),
"Name": bleve.NewTextFieldMapping(),
"Title": bleve.NewTextFieldMapping(),
"CreatedAt": bleve.NewDateTimeFieldMapping(),
"CreatedBy": bleve.NewTextFieldMapping(),
"UpdatedAt": bleve.NewDateTimeFieldMapping(),
"UpdatedBy": bleve.NewTextFieldMapping(),
"FolderId": bleve.NewTextFieldMapping(),
}
// Spec is different for all resources, so we need to generate the spec mapping based on the kind
specMapping := createSpecObjectMapping(resourceKind)
objectMapping := bleve.NewDocumentMapping()
objectMapping.Dynamic = false // only map fields that we have explicitly defined
// map spec
objectMapping.AddSubDocumentMapping("Spec", specMapping)
// map top level fields
for k, v := range baseFields {
objectMapping.AddFieldMappingsAt(k, v)
}
return objectMapping
}
type SpecFieldMapping struct {
Field string
Type string
}
// Right now we are hardcoding which spec fields to index for each kind
// In the future, which fields to index will be defined on the resources themselves by their owners.
func getSpecObjectMappings() map[string][]SpecFieldMapping {
mappings := map[string][]SpecFieldMapping{
"Playlist": {
{
Field: "interval",
Type: "string",
},
{
Field: "title",
Type: "string",
},
},
"Folder": {
{
Field: "title",
Type: "string",
},
{
Field: "description",
Type: "string",
},
},
}
return mappings
}
// Generate the spec field mapping for a given kind
func createSpecObjectMapping(kind string) *mapping.DocumentMapping {
specMapping := bleve.NewDocumentMapping()
specMapping.Dynamic = false
// get the fields to index for the kind
mappings := getSpecObjectMappings()[kind]
for _, m := range mappings {
fieldName := m.Field
fieldType := m.Type
// Create a field mapping based on field type
switch fieldType {
case "string":
specMapping.AddFieldMappingsAt(fieldName, bleve.NewTextFieldMapping())
case "int", "int64", "float64":
specMapping.AddFieldMappingsAt(fieldName, bleve.NewNumericFieldMapping())
case "bool":
specMapping.AddFieldMappingsAt(fieldName, bleve.NewBooleanFieldMapping())
case "time":
specMapping.AddFieldMappingsAt(fieldName, bleve.NewDateTimeFieldMapping())
default:
// TODO support indexing arrays and nested fields
// We are only indexing top level string,int, and bool fields within spec for now. Arrays or nested fields are not yet supported.
}
}
return specMapping
}