FIX: Reduce number of terms injected for host lexeme.

We do prefix matching in search so there is no need to inject the extra
terms.

Before:
```
"'discourse':10,11 'discourse.org':10,11 'org':10,11 'test':8A,10,11 'test.discourse.org':10,11 'titl':4A 'uncategor':9B"
```

After:
```
"'discourse.org':10,11 'org':10,11 'test':8A 'test.discourse.org':10,11 'titl':4A 'uncategor':9B"
```
This commit is contained in:
Guo Xiang Tan 2020-07-27 15:22:54 +08:00
parent 0f53ad58c2
commit 15e9057ec5
No known key found for this signature in database
GPG Key ID: FBD110179AAC1F20
2 changed files with 9 additions and 5 deletions

View File

@ -65,7 +65,7 @@ class SearchIndexer
break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots
term, _, remaining = lexeme.partition(".") term, _, remaining = lexeme.partition(".")
break if remaining.blank? break if remaining.blank?
array << "'#{term}':#{positions} '#{remaining}':#{positions}" array << "'#{remaining}':#{positions}"
lexeme = remaining lexeme = remaining
end end
end end

View File

@ -108,7 +108,8 @@ describe SearchIndexer do
end end
describe '.index' do describe '.index' do
let(:post) { Fabricate(:post) } let(:topic) { Fabricate(:topic, title: "this is a title that I am testing") }
let(:post) { Fabricate(:post, topic: topic) }
it 'should index posts correctly' do it 'should index posts correctly' do
expect { post }.to change { PostSearchData.count }.by(1) expect { post }.to change { PostSearchData.count }.by(1)
@ -128,7 +129,7 @@ describe SearchIndexer do
end end
it "should not tokenize urls and duplicate title and href in <a>" do it "should not tokenize urls and duplicate title and href in <a>" do
post = Fabricate(:post, raw: <<~RAW) post.update!(raw: <<~RAW)
https://meta.discourse.org/some.png https://meta.discourse.org/some.png
RAW RAW
@ -139,10 +140,13 @@ describe SearchIndexer do
expect(post.post_search_data.raw_data).to eq( expect(post.post_search_data.raw_data).to eq(
"https://meta.discourse.org/some.png" "https://meta.discourse.org/some.png"
) )
expect(post.post_search_data.search_data).to eq(
"'/some.png':12 'discourse.org':11 'meta.discourse.org':11 'meta.discourse.org/some.png':10 'org':11 'test':8A 'titl':4A 'uncategor':9B"
)
end end
it 'should not tokenize versions' do it 'should not tokenize versions' do
post.topic.update!(title: "this is a title that I am testing")
post.update!(raw: '123.223') post.update!(raw: '123.223')
expect(post.post_search_data.search_data).to eq( expect(post.post_search_data.search_data).to eq(
@ -174,7 +178,7 @@ describe SearchIndexer do
) )
expect(post.post_search_data.search_data).to eq( expect(post.post_search_data.search_data).to eq(
"'/xyz=1':14,17 'abc':13,16 'abc.net':13,16 'abc.net/xyz=1':12,15 'au':10 'awesom':6B 'b':11 'categori':7B 'cnn':9 'cnn.com':9 'com':9,10 'com.au':10 'net':13,16 'stuff':10 'stuff.com.au':10 'test':4A 'topic':5A" "'/xyz=1':14,17 'abc.net':13,16 'abc.net/xyz=1':12,15 'au':10 'awesom':6B 'b':11 'categori':7B 'cnn.com':9 'com':9 'com.au':10 'net':13,16 'stuff.com.au':10 'test':4A 'topic':5A"
) )
end end