Search: Add substring matcher (#54813)

* search: bring back substring search

* lint fix

* search: bring back ngram

* search: bring back ngram

* search: more tests
This commit is contained in:
Artur Wierzbicki 2022-09-08 12:10:30 +02:00 committed by GitHub
parent b4e3c89f60
commit 1353177e15
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 424 additions and 152 deletions

View File

@ -24,7 +24,6 @@ const (
documentFieldName = "name"
documentFieldName_sort = "name_sort"
documentFieldName_ngram = "name_ngram"
documentFieldDescription = "description"
documentFieldLocation = "location" // parent path
documentFieldPanelType = "panel_type"
documentFieldTransformer = "transformer"
@ -232,14 +231,11 @@ func newSearchDocument(uid string, name string, descr string, url string) *bluge
doc.AddField(bluge.NewTextField(documentFieldName_ngram, name).WithAnalyzer(ngramIndexAnalyzer))
// Don't add a field for empty names
sortStr := strings.Trim(strings.ToUpper(name), " ")
sortStr := formatForNameSortField(name)
if len(sortStr) > 0 {
doc.AddField(bluge.NewKeywordField(documentFieldName_sort, sortStr).Sortable())
}
}
if descr != "" {
doc.AddField(bluge.NewTextField(documentFieldDescription, descr).SearchTermPositions())
}
if url != "" {
doc.AddField(bluge.NewKeywordField(documentFieldURL, url).StoreValue())
}
@ -435,21 +431,19 @@ func doSearchQuery(
fullQuery.AddShould(bluge.NewMatchAllQuery())
}
} else {
// The actual se
bq := bluge.NewBooleanQuery().
AddShould(bluge.NewMatchQuery(q.Query).SetField(documentFieldName).SetBoost(6)).
AddShould(bluge.NewMatchQuery(q.Query).SetField(documentFieldDescription).SetBoost(3)).
AddShould(bluge.NewMatchQuery(q.Query).
bq := bluge.NewBooleanQuery()
bq.AddShould(NewSubstringQuery(formatForNameSortField(q.Query)).
SetField(documentFieldName_sort).
SetBoost(6))
if shouldUseNgram(q) {
bq.AddShould(bluge.NewMatchQuery(q.Query).
SetField(documentFieldName_ngram).
SetOperator(bluge.MatchQueryOperatorAnd). // all terms must match
SetAnalyzer(ngramQueryAnalyzer).SetBoost(1))
}
if len(q.Query) > 4 {
bq.AddShould(bluge.NewFuzzyQuery(q.Query).SetField(documentFieldName)).SetBoost(1.5)
}
if len(q.Query) > ngramEdgeFilterMaxLength && !strings.Contains(q.Query, " ") {
bq.AddShould(bluge.NewPrefixQuery(strings.ToLower(q.Query)).SetField(documentFieldName)).SetBoost(6)
}
fullQuery.AddMust(bq)
}
@ -664,6 +658,25 @@ func doSearchQuery(
return response
}
func shouldUseNgram(q DashboardQuery) bool {
var tokens []string
if len(q.Query) > ngramEdgeFilterMaxLength {
tokens = strings.Fields(q.Query)
for _, k := range tokens {
// ngram will never match if at least one input token exceeds the max token length,
// as all tokens must match simultaneously with the `bluge.MatchQueryOperatorAnd` operator
if len(k) > ngramEdgeFilterMaxLength {
return false
}
}
}
return true
}
func formatForNameSortField(name string) string {
return strings.Trim(strings.ToUpper(name), " ")
}
func getLocationLookupInfo(ctx context.Context, reader *bluge.Reader, uids map[string]bool) map[string]locationItem {
res := make(map[string]locationItem, len(uids))
bq := bluge.NewBooleanQuery()

View File

@ -683,6 +683,54 @@ func TestDashboardIndex_MultiTermPrefixMatch(t *testing.T) {
),
query: "Prometheus stat",
},
{
dashboards: dashboardsWithTitles(
"Loki Tests - Bar Gauge 2",
"Loki 2.0",
"Loki 2.0 Stats",
"Loki 20.0",
"Loki Second Word",
"Loki Stats",
"dynamic (2)",
"Loki histogram",
"Loki histogram2",
"roci-simple-2",
"x not y",
),
query: "Loki 2.",
},
{
dashboards: dashboardsWithTitles(
"Loki Tests - Bar Gauge 2",
"Loki 2.0",
"Loki 2.0 Stats",
"Loki 20.0",
"Loki Second Word",
"Loki Stats",
"dynamic (2)",
"Loki histogram",
"Loki histogram2",
"roci-simple-2",
"x not y",
),
query: "Lok",
},
{
dashboards: dashboardsWithTitles(
"Loki Tests - Bar Gauge 2",
"Loki 2.0",
"Loki 2.0 Stats",
"Loki 20.0",
"Loki Second Word",
"Loki Stats",
"dynamic (2)",
"Loki histogram",
"Loki histogram2",
"roci-simple-2",
"x not y",
),
query: "Loki stats",
},
}
for i, tt := range tests {

View File

@ -0,0 +1,102 @@
// based on https://github.com/blugelabs/bluge/blob/57414197005148539c5dc5db8ab581594969df79/query.go#L1407-L1482, license:
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searchV2
import (
"strings"
"github.com/blugelabs/bluge/search"
"github.com/blugelabs/bluge/search/searcher"
"github.com/blugelabs/bluge/search/similarity"
)
type boost float64
func (b *boost) Value() float64 {
if b == nil {
return 1
}
return float64(*b)
}
type SubstringQuery struct {
substring string
field string
boost *boost
scorer search.Scorer
}
func NewSubstringQuery(wildcard string) *SubstringQuery {
return &SubstringQuery{
substring: wildcard,
}
}
// Wildcard returns the substring being queried
func (q *SubstringQuery) Wildcard() string {
return q.substring
}
func (q *SubstringQuery) SetBoost(b float64) *SubstringQuery {
boostVal := boost(b)
q.boost = &boostVal
return q
}
func (q *SubstringQuery) Boost() float64 {
return q.boost.Value()
}
func (q *SubstringQuery) SetField(f string) *SubstringQuery {
q.field = f
return q
}
func (q *SubstringQuery) Field() string {
return q.field
}
var regexpEscaper = strings.NewReplacer(
// characters in the substring that must
// be escaped in the regexp
"+", `\+`,
"*", `\*`,
"(", `\(`,
")", `\)`,
"^", `\^`,
"$", `\$`,
".", `\.`,
"{", `\{`,
"}", `\}`,
"[", `\[`,
"]", `\]`,
`|`, `\|`,
`\`, `\\`)
func (q *SubstringQuery) Searcher(i search.Reader, options search.SearcherOptions) (search.Searcher, error) {
field := q.field
if q.field == "" {
field = options.DefaultSearchField
}
regexpString := ".*" + regexpEscaper.Replace(q.substring) + ".*"
return searcher.NewRegexpStringSearcher(i, regexpString, field,
q.boost.Value(), q.scorer, similarity.NewCompositeSumScorer(), options)
}
func (q *SubstringQuery) Validate() error {
return nil // real validation delayed until searcher constructor
}

View File

@ -2,23 +2,15 @@
//
// Frame[0]
// Name: ordering frame
// Dimensions: 2 Fields by 10 Rows
// +---------------------------+--------------------+
// | Name: name | Name: score |
// | Labels: | Labels: |
// | Type: []string | Type: []float64 |
// +---------------------------+--------------------+
// | dynamic (2) | 61.26983678470653 |
// | roci-simple-2 | 52.417145624935095 |
// | Prometheus 2.0 | 45.193422538475374 |
// | Prometheus 20.0 | 45.193422538475374 |
// | Prometheus Stats | 45.193422538475374 |
// | prometheus histogram | 45.193422538475374 |
// | prometheus histogram2 | 45.193422538475374 |
// | Panel Tests - Bar Gauge 2 | 40.74215154954845 |
// | Prometheus 2.0 Stats | 38.70210379766075 |
// | Prometheus Second Word | 38.70210379766075 |
// +---------------------------+--------------------+
// Dimensions: 2 Fields by 2 Rows
// +----------------------+-------------------+
// | Name: name | Name: score |
// | Labels: | Labels: |
// | Type: []string | Type: []float64 |
// +----------------------+-------------------+
// | Prometheus 2.0 | 7.621131552585596 |
// | Prometheus 2.0 Stats | 7.621131552585596 |
// +----------------------+-------------------+
//
//
// 🌟 This was machine generated. Do not edit. 🌟
@ -47,28 +39,12 @@
"data": {
"values": [
[
"dynamic (2)",
"roci-simple-2",
"Prometheus 2.0",
"Prometheus 20.0",
"Prometheus Stats",
"prometheus histogram",
"prometheus histogram2",
"Panel Tests - Bar Gauge 2",
"Prometheus 2.0 Stats",
"Prometheus Second Word"
"Prometheus 2.0 Stats"
],
[
61.26983678470653,
52.417145624935095,
45.193422538475374,
45.193422538475374,
45.193422538475374,
45.193422538475374,
45.193422538475374,
40.74215154954845,
38.70210379766075,
38.70210379766075
7.621131552585596,
7.621131552585596
]
]
}

View File

@ -3,16 +3,16 @@
// Frame[0]
// Name: ordering frame
// Dimensions: 2 Fields by 4 Rows
// +----------------------+--------------------+
// | Name: name | Name: score |
// | Labels: | Labels: |
// | Type: []string | Type: []float64 |
// +----------------------+--------------------+
// | Prometheus 2.0 | 2.861163199700355 |
// | Prometheus 20.0 | 2.795309067950216 |
// | Prometheus Stats | 2.6766576425310045 |
// | Prometheus 2.0 Stats | 2.5727172425512226 |
// +----------------------+--------------------+
// +----------------------+-------------------+
// | Name: name | Name: score |
// | Labels: | Labels: |
// | Type: []string | Type: []float64 |
// +----------------------+-------------------+
// | Prometheus 2.0 | 9.502378631081775 |
// | Prometheus 20.0 | 9.458475876581684 |
// | Prometheus Stats | 9.379374926302209 |
// | Prometheus 2.0 Stats | 9.310081326315688 |
// +----------------------+-------------------+
//
//
// 🌟 This was machine generated. Do not edit. 🌟
@ -47,10 +47,10 @@
"Prometheus 2.0 Stats"
],
[
2.861163199700355,
2.795309067950216,
2.6766576425310045,
2.5727172425512226
9.502378631081775,
9.458475876581684,
9.379374926302209,
9.310081326315688
]
]
}

View File

@ -2,20 +2,14 @@
//
// Frame[0]
// Name: ordering frame
// Dimensions: 2 Fields by 7 Rows
// +------------------------+--------------------+
// | Name: name | Name: score |
// | Labels: | Labels: |
// | Type: []string | Type: []float64 |
// +------------------------+--------------------+
// | Prometheus 2.0 | 45.193422538475374 |
// | Prometheus 20.0 | 45.193422538475374 |
// | Prometheus Stats | 45.193422538475374 |
// | prometheus histogram | 45.193422538475374 |
// | prometheus histogram2 | 45.193422538475374 |
// | Prometheus 2.0 Stats | 38.70210379766075 |
// | Prometheus Second Word | 38.70210379766075 |
// +------------------------+--------------------+
// Dimensions: 2 Fields by 1 Rows
// +------------------+-------------------+
// | Name: name | Name: score |
// | Labels: | Labels: |
// | Type: []string | Type: []float64 |
// +------------------+-------------------+
// | Prometheus Stats | 7.621131552585596 |
// +------------------+-------------------+
//
//
// 🌟 This was machine generated. Do not edit. 🌟
@ -44,22 +38,10 @@
"data": {
"values": [
[
"Prometheus 2.0",
"Prometheus 20.0",
"Prometheus Stats",
"prometheus histogram",
"prometheus histogram2",
"Prometheus 2.0 Stats",
"Prometheus Second Word"
"Prometheus Stats"
],
[
45.193422538475374,
45.193422538475374,
45.193422538475374,
45.193422538475374,
45.193422538475374,
38.70210379766075,
38.70210379766075
7.621131552585596
]
]
}

View File

@ -0,0 +1,62 @@
// 🌟 This was machine generated. Do not edit. 🌟
//
// Frame[0]
// Name: ordering frame
// Dimensions: 2 Fields by 5 Rows
// +--------------------------+--------------------+
// | Name: name | Name: score |
// | Labels: | Labels: |
// | Type: []string | Type: []float64 |
// +--------------------------+--------------------+
// | Loki 2.0 | 9.273036652923247 |
// | Loki 2.0 Stats | 8.951742733604135 |
// | Loki 20.0 | 2.57580523764178 |
// | Loki histogram2 | 2.2807887502617943 |
// | Loki Tests - Bar Gauge 2 | 2.045832444623899 |
// +--------------------------+--------------------+
//
//
// 🌟 This was machine generated. Do not edit. 🌟
{
"frames": [
{
"schema": {
"name": "ordering frame",
"fields": [
{
"name": "name",
"type": "string",
"typeInfo": {
"frame": "string"
}
},
{
"name": "score",
"type": "number",
"typeInfo": {
"frame": "float64"
}
}
]
},
"data": {
"values": [
[
"Loki 2.0",
"Loki 2.0 Stats",
"Loki 20.0",
"Loki histogram2",
"Loki Tests - Bar Gauge 2"
],
[
9.273036652923247,
8.951742733604135,
2.57580523764178,
2.2807887502617943,
2.045832444623899
]
]
}
}
]
}

View File

@ -0,0 +1,71 @@
// 🌟 This was machine generated. Do not edit. 🌟
//
// Frame[0]
// Name: ordering frame
// Dimensions: 2 Fields by 8 Rows
// +--------------------------+-------------------+
// | Name: name | Name: score |
// | Labels: | Labels: |
// | Type: []string | Type: []float64 |
// +--------------------------+-------------------+
// | Loki 2.0 | 8.386497572003142 |
// | Loki 20.0 | 8.351238737232393 |
// | Loki Stats | 8.289644692681875 |
// | Loki 2.0 Stats | 8.237634633059454 |
// | Loki histogram | 8.237634633059454 |
// | Loki histogram2 | 8.214550743132483 |
// | Loki Second Word | 8.173207674966303 |
// | Loki Tests - Bar Gauge 2 | 8.105690026892566 |
// +--------------------------+-------------------+
//
//
// 🌟 This was machine generated. Do not edit. 🌟
{
"frames": [
{
"schema": {
"name": "ordering frame",
"fields": [
{
"name": "name",
"type": "string",
"typeInfo": {
"frame": "string"
}
},
{
"name": "score",
"type": "number",
"typeInfo": {
"frame": "float64"
}
}
]
},
"data": {
"values": [
[
"Loki 2.0",
"Loki 20.0",
"Loki Stats",
"Loki 2.0 Stats",
"Loki histogram",
"Loki histogram2",
"Loki Second Word",
"Loki Tests - Bar Gauge 2"
],
[
8.386497572003142,
8.351238737232393,
8.289644692681875,
8.237634633059454,
8.237634633059454,
8.214550743132483,
8.173207674966303,
8.105690026892566
]
]
}
}
]
}

View File

@ -0,0 +1,53 @@
// 🌟 This was machine generated. Do not edit. 🌟
//
// Frame[0]
// Name: ordering frame
// Dimensions: 2 Fields by 2 Rows
// +----------------+-------------------+
// | Name: name | Name: score |
// | Labels: | Labels: |
// | Type: []string | Type: []float64 |
// +----------------+-------------------+
// | Loki Stats | 9.397899591158676 |
// | Loki 2.0 Stats | 2.638536183312778 |
// +----------------+-------------------+
//
//
// 🌟 This was machine generated. Do not edit. 🌟
{
"frames": [
{
"schema": {
"name": "ordering frame",
"fields": [
{
"name": "name",
"type": "string",
"typeInfo": {
"frame": "string"
}
},
{
"name": "score",
"type": "number",
"typeInfo": {
"frame": "float64"
}
}
]
},
"data": {
"values": [
[
"Loki Stats",
"Loki 2.0 Stats"
],
[
9.397899591158676,
2.638536183312778
]
]
}
}
]
}

View File

@ -3,19 +3,17 @@
// Frame[0] {
// "type": "search-results",
// "custom": {
// "count": 2
// "count": 0
// }
// }
// Name: Query results
// Dimensions: 8 Fields by 2 Rows
// +----------------+----------------+----------------------------------------------------------------------+------------------+----------------+--------------------------+-------------------------+----------------+
// | Name: kind | Name: uid | Name: name | Name: panel_type | Name: url | Name: tags | Name: ds_uid | Name: location |
// | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: |
// | Type: []string | Type: []string | Type: []string | Type: []string | Type: []string | Type: []*json.RawMessage | Type: []json.RawMessage | Type: []string |
// +----------------+----------------+----------------------------------------------------------------------+------------------+----------------+--------------------------+-------------------------+----------------+
// | dashboard | 2 | A secret is powerful when it is empty (Umberto Eco) | | /pfix/d/2/ | null | [] | |
// | dashboard | 1 | Three can keep a secret, if two of them are dead (Benjamin Franklin) | | /pfix/d/1/ | null | [] | |
// +----------------+----------------+----------------------------------------------------------------------+------------------+----------------+--------------------------+-------------------------+----------------+
// Dimensions: 8 Fields by 0 Rows
// +----------------+----------------+----------------+------------------+----------------+--------------------------+-------------------------+----------------+
// | Name: kind | Name: uid | Name: name | Name: panel_type | Name: url | Name: tags | Name: ds_uid | Name: location |
// | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: |
// | Type: []string | Type: []string | Type: []string | Type: []string | Type: []string | Type: []*json.RawMessage | Type: []json.RawMessage | Type: []string |
// +----------------+----------------+----------------+------------------+----------------+--------------------------+-------------------------+----------------+
// +----------------+----------------+----------------+------------------+----------------+--------------------------+-------------------------+----------------+
//
//
// 🌟 This was machine generated. Do not edit. 🌟
@ -27,7 +25,7 @@
"meta": {
"type": "search-results",
"custom": {
"count": 2
"count": 0
}
},
"fields": [
@ -100,38 +98,14 @@
},
"data": {
"values": [
[
"dashboard",
"dashboard"
],
[
"2",
"1"
],
[
"A secret is powerful when it is empty (Umberto Eco)",
"Three can keep a secret, if two of them are dead (Benjamin Franklin)"
],
[
"",
""
],
[
"/pfix/d/2/",
"/pfix/d/1/"
],
[
null,
null
],
[
[],
[]
],
[
"",
""
]
[],
[],
[],
[],
[],
[],
[],
[]
]
}
}

View File

@ -3,18 +3,17 @@
// Frame[0] {
// "type": "search-results",
// "custom": {
// "count": 2
// "count": 1
// }
// }
// Name: Query results
// Dimensions: 8 Fields by 2 Rows
// Dimensions: 8 Fields by 1 Rows
// +----------------+----------------+----------------------------------------------------------------------+------------------+----------------+--------------------------+-------------------------+----------------+
// | Name: kind | Name: uid | Name: name | Name: panel_type | Name: url | Name: tags | Name: ds_uid | Name: location |
// | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: |
// | Type: []string | Type: []string | Type: []string | Type: []string | Type: []string | Type: []*json.RawMessage | Type: []json.RawMessage | Type: []string |
// +----------------+----------------+----------------------------------------------------------------------+------------------+----------------+--------------------------+-------------------------+----------------+
// | dashboard | 1 | Three can keep a secret, if two of them are dead (Benjamin Franklin) | | /pfix/d/1/ | null | [] | |
// | dashboard | 2 | A secret is powerful when it is empty (Umberto Eco) | | /pfix/d/2/ | null | [] | |
// +----------------+----------------+----------------------------------------------------------------------+------------------+----------------+--------------------------+-------------------------+----------------+
//
//
@ -27,7 +26,7 @@
"meta": {
"type": "search-results",
"custom": {
"count": 2
"count": 1
}
},
"fields": [
@ -101,35 +100,27 @@
"data": {
"values": [
[
"dashboard",
"dashboard"
],
[
"1",
"2"
"1"
],
[
"Three can keep a secret, if two of them are dead (Benjamin Franklin)",
"A secret is powerful when it is empty (Umberto Eco)"
"Three can keep a secret, if two of them are dead (Benjamin Franklin)"
],
[
"",
""
],
[
"/pfix/d/1/",
"/pfix/d/2/"
"/pfix/d/1/"
],
[
null,
null
],
[
[],
[]
],
[
"",
""
]
]