Elasticsearch: Add log highlight processing on backend (#63924)

* Elasticsearch: Add highlight for logs

* Fix running of queries trough backend only when shouldRunTroughBackend

* Fix incorrect json in test string

* Address feedback

* Keep order of words same
This commit is contained in:
Ivana Huckova 2023-03-09 13:34:53 +01:00 committed by GitHub
parent cb8e01214d
commit bd0fa79542
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 206 additions and 24 deletions

View File

@ -6,9 +6,9 @@ import (
)
const (
highlightPreTagsString = "@HIGHLIGHT@"
highlightPostTagsString = "@/HIGHLIGHT@"
highlightFragmentSize = 2147483647
HighlightPreTagsString = "@HIGHLIGHT@"
HighlightPostTagsString = "@/HIGHLIGHT@"
HighlightFragmentSize = 2147483647
)
// SearchRequestBuilder represents a builder which can build a search request
@ -103,9 +103,9 @@ func (b *SearchRequestBuilder) AddHighlight() *SearchRequestBuilder {
"fields": map[string]interface{}{
"*": map[string]interface{}{},
},
"pre_tags": []string{highlightPreTagsString},
"post_tags": []string{highlightPostTagsString},
"fragment_size": highlightFragmentSize,
"pre_tags": []string{HighlightPreTagsString},
"post_tags": []string{HighlightPostTagsString},
"fragment_size": HighlightFragmentSize,
}
return b
}

View File

@ -3,6 +3,7 @@ package elasticsearch
import (
"encoding/json"
"errors"
"fmt"
"regexp"
"sort"
"strconv"
@ -36,6 +37,8 @@ const (
logsType = "logs"
)
var searchWordsRegex = regexp.MustCompile(regexp.QuoteMeta(es.HighlightPreTagsString) + `(.*?)` + regexp.QuoteMeta(es.HighlightPostTagsString))
func parseResponse(responses []*es.SearchResponse, targets []*Query, configuredFields es.ConfiguredFields) (*backend.QueryDataResponse, error) {
result := backend.QueryDataResponse{
Responses: backend.Responses{},
@ -94,6 +97,7 @@ func parseResponse(responses []*es.SearchResponse, targets []*Query, configuredF
func processLogsResponse(res *es.SearchResponse, target *Query, configuredFields es.ConfiguredFields, queryRes *backend.DataResponse) error {
propNames := make(map[string]bool)
docs := make([]map[string]interface{}, len(res.Hits.Hits))
searchWords := make(map[string]bool)
for hitIdx, hit := range res.Hits.Hits {
var flattened map[string]interface{}
@ -121,7 +125,22 @@ func processLogsResponse(res *es.SearchResponse, target *Query, configuredFields
for key := range doc {
propNames[key] = true
}
// TODO: Implement highlighting
// Process highlight to searchWords
if highlights, ok := doc["highlight"].(map[string]interface{}); ok {
for _, highlight := range highlights {
if highlightList, ok := highlight.([]interface{}); ok {
for _, highlightValue := range highlightList {
str := fmt.Sprintf("%v", highlightValue)
matches := searchWordsRegex.FindAllStringSubmatch(str, -1)
for _, v := range matches {
searchWords[v[1]] = true
}
}
}
}
}
docs[hitIdx] = doc
}
@ -132,6 +151,7 @@ func processLogsResponse(res *es.SearchResponse, target *Query, configuredFields
frames := data.Frames{}
frame := data.NewFrame("", fields...)
setPreferredVisType(frame, "logs")
setSearchWords(frame, searchWords)
frames = append(frames, frame)
queryRes.Frames = frames
@ -1073,3 +1093,25 @@ func setPreferredVisType(frame *data.Frame, visType data.VisType) {
frame.Meta.PreferredVisualization = visType
}
func setSearchWords(frame *data.Frame, searchWords map[string]bool) {
i := 0
searchWordsList := make([]string, len(searchWords))
for searchWord := range searchWords {
searchWordsList[i] = searchWord
i++
}
sort.Strings(searchWordsList)
if frame.Meta == nil {
frame.Meta = &data.FrameMeta{}
}
if frame.Meta.Custom == nil {
frame.Meta.Custom = map[string]interface{}{}
}
frame.Meta.Custom = map[string]interface{}{
"searchWords": searchWordsList,
}
}

View File

@ -1164,6 +1164,77 @@ func TestResponseParser(t *testing.T) {
require.Equal(t, data.FieldTypeNullableString, frame.Fields[15].Type())
})
t.Run("Log query with highlight", func(t *testing.T) {
targets := map[string]string{
"A": `{
"metrics": [{ "type": "logs" }]
}`,
}
response := `{
"responses":[
{
"hits":{
"total":{
"value":109,
"relation":"eq"
},
"max_score":null,
"hits":[
{
"_index":"logs-2023.02.08",
"_id":"GB2UMYYBfCQ-FCMjayJa",
"_score":null,
"highlight": {
"line": [
"@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"
],
"duplicated": ["@HIGHLIGHT@hello@/HIGHLIGHT@"]
},
"_source":{
"@timestamp":"2023-02-08T15:10:55.830Z",
"line":"log text [479231733]"
}
},
{
"_index":"logs-2023.02.08",
"_id":"GB2UMYYBfCQ-FCMjayJa",
"_score":null,
"highlight": {
"line": [
"@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"
],
"duplicated": ["@HIGHLIGHT@hello@/HIGHLIGHT@"]
},
"_source":{
"@timestamp":"2023-02-08T15:10:55.830Z",
"line":"log text [479231733]"
}
}
]
},
"status":200
}
]
}`
result, err := parseTestResponse(targets, response)
require.NoError(t, err)
require.Len(t, result.Responses, 1)
queryRes := result.Responses["A"]
require.NotNil(t, queryRes)
dataframes := queryRes.Frames
require.Len(t, dataframes, 1)
frame := dataframes[0]
customMeta := frame.Meta.Custom
require.Equal(t, map[string]interface{}{
"searchWords": []string{"hello", "message"},
}, customMeta)
})
t.Run("Raw document query", func(t *testing.T) {
targets := map[string]string{
"A": `{

View File

@ -5,21 +5,27 @@
// 0,
// 0
// ],
// "custom": {
// "searchWords": [
// "hello",
// "message"
// ]
// },
// "preferredVisualisationType": "logs"
// }
// Name:
// Dimensions: 17 Fields by 5 Rows
// +-----------------------------------+---------------------------+----------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+-----------------+------------------+--------------------+--------------------------+---------------+-----------------+-----------------+-----------------------------------------+------------------------------------+---------------------------------------------------------------------------------+--------------------------+
// | Name: testtime | Name: line | Name: _id | Name: _index | Name: _source | Name: _type | Name: abc | Name: counter | Name: float | Name: highlight | Name: is_true | Name: label | Name: level | Name: location | Name: nested_field.internal.nested | Name: shapes | Name: sort |
// | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: |
// | Type: []*time.Time | Type: []*string | Type: []*string | Type: []*string | Type: []*json.RawMessage | Type: []*json.RawMessage | Type: []*string | Type: []*float64 | Type: []*float64 | Type: []*json.RawMessage | Type: []*bool | Type: []*string | Type: []*string | Type: []*string | Type: []*string | Type: []*json.RawMessage | Type: []*json.RawMessage |
// +-----------------------------------+---------------------------+----------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+-----------------+------------------+--------------------+--------------------------+---------------+-----------------+-----------------+-----------------------------------------+------------------------------------+---------------------------------------------------------------------------------+--------------------------+
// | 2023-02-09 14:40:01.475 +0000 UTC | log text [106619125] | g2aeNoYB7vaC3bq-ezfK | logs-2023.02.09 | {"abc":null,"counter":81,"float":10.911972180833306,"is_true":true,"label":"val3","line":"log text [106619125]","location":"-42.73465234425797, -14.097854057104112","lvl":"info","nested_field.internal.nested":"value1","shapes":[{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}],"testtime":"2023-02-09T14:40:01.475Z"} | null | null | 81 | 10.911972180833306 | null | true | val3 | info | -42.73465234425797, -14.097854057104112 | value1 | [{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}] | [1675953601475,4] |
// | 2023-02-09 14:40:00.513 +0000 UTC | log text with [781660944] | gmaeNoYB7vaC3bq-eDcN | logs-2023.02.09 | {"abc":null,"counter":80,"float":62.94120607636795,"is_true":false,"label":"val3","line":"log text with [781660944]","location":"42.07571917624318, 15.95725088484611","lvl":"error","nested_field.internal.nested":"value2","shapes":[{"type":"triangle"},{"type":"square"}],"testtime":"2023-02-09T14:40:00.513Z"} | null | null | 80 | 62.94120607636795 | null | false | val3 | error | 42.07571917624318, 15.95725088484611 | value2 | [{"type":"triangle"},{"type":"square"}] | [1675953600513,7] |
// | 2023-02-09 14:39:59.556 +0000 UTC | log text [894867430] | gWaeNoYB7vaC3bq-dDdL | logs-2023.02.09 | {"abc":"def","counter":79,"float":53.323706427230455,"is_true":true,"label":"val1","line":"log text [894867430]","location":"-38.27341566189766, -23.66739642570781","lvl":"info","nested_field.internal.nested":"value3","shapes":[{"type":"triangle"},{"type":"square"}],"testtime":"2023-02-09T14:39:59.556Z"} | null | def | 79 | 53.323706427230455 | null | true | val1 | info | -38.27341566189766, -23.66739642570781 | value3 | [{"type":"triangle"},{"type":"square"}] | [1675953599556,10] |
// | 2023-02-09 14:39:58.608 +0000 UTC | log text [478598889] | gGaeNoYB7vaC3bq-cDeY | logs-2023.02.09 | {"abc":"def","counter":78,"float":82.72012623471589,"is_true":false,"label":"val1","line":"log text [478598889]","location":"12.373240290451287, 43.265493464362024","lvl":"info","nested_field.internal.nested":"value4","shapes":[{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}],"testtime":"2023-02-09T14:39:58.608Z"} | null | def | 78 | 82.72012623471589 | null | false | val1 | info | 12.373240290451287, 43.265493464362024 | value4 | [{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}] | [1675953598608,15] |
// | 2023-02-09 14:39:57.665 +0000 UTC | log text [526995818] | f2aeNoYB7vaC3bq-bDf7 | logs-2023.02.09 | {"abc":"def","counter":77,"float":35.05784443331803,"is_true":false,"label":"val3","line":"log text [526995818]","location":"-31.524344042228194, -32.11254790120572","lvl":"info","nested_field.internal.nested":"value5","shapes":[{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}],"testtime":"2023-02-09T14:39:57.665Z"} | null | def | 77 | 35.05784443331803 | null | false | val3 | info | -31.524344042228194, -32.11254790120572 | value5 | [{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}] | [1675953597665,20] |
// +-----------------------------------+---------------------------+----------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+-----------------+------------------+--------------------+--------------------------+---------------+-----------------+-----------------+-----------------------------------------+------------------------------------+---------------------------------------------------------------------------------+--------------------------+
// +-----------------------------------+---------------------------+----------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+-----------------+------------------+--------------------+--------------------------------------------------------------------------------------------------------------------------------+---------------+-----------------+-----------------+-----------------------------------------+------------------------------------+---------------------------------------------------------------------------------+--------------------------+
// | Name: testtime | Name: line | Name: _id | Name: _index | Name: _source | Name: _type | Name: abc | Name: counter | Name: float | Name: highlight | Name: is_true | Name: label | Name: level | Name: location | Name: nested_field.internal.nested | Name: shapes | Name: sort |
// | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: | Labels: |
// | Type: []*time.Time | Type: []*string | Type: []*string | Type: []*string | Type: []*json.RawMessage | Type: []*json.RawMessage | Type: []*string | Type: []*float64 | Type: []*float64 | Type: []*json.RawMessage | Type: []*bool | Type: []*string | Type: []*string | Type: []*string | Type: []*string | Type: []*json.RawMessage | Type: []*json.RawMessage |
// +-----------------------------------+---------------------------+----------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+-----------------+------------------+--------------------+--------------------------------------------------------------------------------------------------------------------------------+---------------+-----------------+-----------------+-----------------------------------------+------------------------------------+---------------------------------------------------------------------------------+--------------------------+
// | 2023-02-09 14:40:01.475 +0000 UTC | log text [106619125] | g2aeNoYB7vaC3bq-ezfK | logs-2023.02.09 | {"abc":null,"counter":81,"float":10.911972180833306,"is_true":true,"label":"val3","line":"log text [106619125]","location":"-42.73465234425797, -14.097854057104112","lvl":"info","nested_field.internal.nested":"value1","shapes":[{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}],"testtime":"2023-02-09T14:40:01.475Z"} | null | null | 81 | 10.911972180833306 | {"duplicated":["@HIGHLIGHT@hello@/HIGHLIGHT@"],"line":["@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"]} | true | val3 | info | -42.73465234425797, -14.097854057104112 | value1 | [{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}] | [1675953601475,4] |
// | 2023-02-09 14:40:00.513 +0000 UTC | log text with [781660944] | gmaeNoYB7vaC3bq-eDcN | logs-2023.02.09 | {"abc":null,"counter":80,"float":62.94120607636795,"is_true":false,"label":"val3","line":"log text with [781660944]","location":"42.07571917624318, 15.95725088484611","lvl":"error","nested_field.internal.nested":"value2","shapes":[{"type":"triangle"},{"type":"square"}],"testtime":"2023-02-09T14:40:00.513Z"} | null | null | 80 | 62.94120607636795 | {"duplicated":["@HIGHLIGHT@hello@/HIGHLIGHT@"],"line":["@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"]} | false | val3 | error | 42.07571917624318, 15.95725088484611 | value2 | [{"type":"triangle"},{"type":"square"}] | [1675953600513,7] |
// | 2023-02-09 14:39:59.556 +0000 UTC | log text [894867430] | gWaeNoYB7vaC3bq-dDdL | logs-2023.02.09 | {"abc":"def","counter":79,"float":53.323706427230455,"is_true":true,"label":"val1","line":"log text [894867430]","location":"-38.27341566189766, -23.66739642570781","lvl":"info","nested_field.internal.nested":"value3","shapes":[{"type":"triangle"},{"type":"square"}],"testtime":"2023-02-09T14:39:59.556Z"} | null | def | 79 | 53.323706427230455 | {"duplicated":["@HIGHLIGHT@hello@/HIGHLIGHT@"],"line":["@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"]} | true | val1 | info | -38.27341566189766, -23.66739642570781 | value3 | [{"type":"triangle"},{"type":"square"}] | [1675953599556,10] |
// | 2023-02-09 14:39:58.608 +0000 UTC | log text [478598889] | gGaeNoYB7vaC3bq-cDeY | logs-2023.02.09 | {"abc":"def","counter":78,"float":82.72012623471589,"is_true":false,"label":"val1","line":"log text [478598889]","location":"12.373240290451287, 43.265493464362024","lvl":"info","nested_field.internal.nested":"value4","shapes":[{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}],"testtime":"2023-02-09T14:39:58.608Z"} | null | def | 78 | 82.72012623471589 | {"duplicated":["@HIGHLIGHT@hello@/HIGHLIGHT@"],"line":["@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"]} | false | val1 | info | 12.373240290451287, 43.265493464362024 | value4 | [{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}] | [1675953598608,15] |
// | 2023-02-09 14:39:57.665 +0000 UTC | log text [526995818] | f2aeNoYB7vaC3bq-bDf7 | logs-2023.02.09 | {"abc":"def","counter":77,"float":35.05784443331803,"is_true":false,"label":"val3","line":"log text [526995818]","location":"-31.524344042228194, -32.11254790120572","lvl":"info","nested_field.internal.nested":"value5","shapes":[{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}],"testtime":"2023-02-09T14:39:57.665Z"} | null | def | 77 | 35.05784443331803 | {"duplicated":["@HIGHLIGHT@hello@/HIGHLIGHT@"],"line":["@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"]} | false | val3 | info | -31.524344042228194, -32.11254790120572 | value5 | [{"type":"triangle"},{"type":"triangle"},{"type":"triangle"},{"type":"square"}] | [1675953597665,20] |
// +-----------------------------------+---------------------------+----------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+-----------------+------------------+--------------------+--------------------------------------------------------------------------------------------------------------------------------+---------------+-----------------+-----------------+-----------------------------------------+------------------------------------+---------------------------------------------------------------------------------+--------------------------+
//
//
// 🌟 This was machine generated. Do not edit. 🌟
@ -33,6 +39,12 @@
0,
0
],
"custom": {
"searchWords": [
"hello",
"message"
]
},
"preferredVisualisationType": "logs"
},
"fields": [
@ -404,11 +416,46 @@
35.05784443331803
],
[
null,
null,
null,
null,
null
{
"duplicated": [
"@HIGHLIGHT@hello@/HIGHLIGHT@"
],
"line": [
"@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"
]
},
{
"duplicated": [
"@HIGHLIGHT@hello@/HIGHLIGHT@"
],
"line": [
"@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"
]
},
{
"duplicated": [
"@HIGHLIGHT@hello@/HIGHLIGHT@"
],
"line": [
"@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"
]
},
{
"duplicated": [
"@HIGHLIGHT@hello@/HIGHLIGHT@"
],
"line": [
"@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"
]
},
{
"duplicated": [
"@HIGHLIGHT@hello@/HIGHLIGHT@"
],
"line": [
"@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"
]
}
],
[
true,

View File

@ -21,6 +21,10 @@
"_index": "logs-2023.02.09",
"_id": "g2aeNoYB7vaC3bq-ezfK",
"_score": null,
"highlight": {
"line": ["@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"],
"duplicated": ["@HIGHLIGHT@hello@/HIGHLIGHT@"]
},
"_source": {
"abc": null,
"is_true": true,
@ -60,6 +64,10 @@
"_index": "logs-2023.02.09",
"_id": "gmaeNoYB7vaC3bq-eDcN",
"_score": null,
"highlight": {
"line": ["@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"],
"duplicated": ["@HIGHLIGHT@hello@/HIGHLIGHT@"]
},
"_source": {
"abc": null,
"is_true": false,
@ -93,6 +101,10 @@
"_index": "logs-2023.02.09",
"_id": "gWaeNoYB7vaC3bq-dDdL",
"_score": null,
"highlight": {
"line": ["@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"],
"duplicated": ["@HIGHLIGHT@hello@/HIGHLIGHT@"]
},
"_source": {
"abc": "def",
"is_true": true,
@ -126,6 +138,10 @@
"_index": "logs-2023.02.09",
"_id": "gGaeNoYB7vaC3bq-cDeY",
"_score": null,
"highlight": {
"line": ["@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"],
"duplicated": ["@HIGHLIGHT@hello@/HIGHLIGHT@"]
},
"_source": {
"abc": "def",
"is_true": false,
@ -165,6 +181,10 @@
"_index": "logs-2023.02.09",
"_id": "f2aeNoYB7vaC3bq-bDf7",
"_score": null,
"highlight": {
"line": ["@HIGHLIGHT@hello@/HIGHLIGHT@, i am a @HIGHLIGHT@message@/HIGHLIGHT@"],
"duplicated": ["@HIGHLIGHT@hello@/HIGHLIGHT@"]
},
"_source": {
"abc": "def",
"is_true": false,

View File

@ -421,7 +421,9 @@ export function logSeriesToLogsModel(logSeries: DataFrame[], queries: DataQuery[
const hasUnescapedContent = !!message.match(/\\n|\\t|\\r/);
const searchWords = series.meta && series.meta.searchWords ? series.meta.searchWords : [];
// Data sources that set up searchWords on backend use meta.custom.searchWords
// Data sources that set up searchWords trough frontend can use meta.searchWords
const searchWords = series.meta?.custom?.searchWords ?? series.meta?.searchWords ?? [];
const entry = hasAnsi ? ansicolor.strip(message) : message;
const labels = getLabelsForFrameRow(info, j);