mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
FIX: Reduce number of terms injected for host lexeme.
We do prefix matching in search so there is no need to inject the extra terms. Before: ``` "'discourse':10,11 'discourse.org':10,11 'org':10,11 'test':8A,10,11 'test.discourse.org':10,11 'titl':4A 'uncategor':9B" ``` After: ``` "'discourse.org':10,11 'org':10,11 'test':8A 'test.discourse.org':10,11 'titl':4A 'uncategor':9B" ```
This commit is contained in:
parent
0f53ad58c2
commit
15e9057ec5
@ -65,7 +65,7 @@ class SearchIndexer
|
|||||||
break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots
|
break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots
|
||||||
term, _, remaining = lexeme.partition(".")
|
term, _, remaining = lexeme.partition(".")
|
||||||
break if remaining.blank?
|
break if remaining.blank?
|
||||||
array << "'#{term}':#{positions} '#{remaining}':#{positions}"
|
array << "'#{remaining}':#{positions}"
|
||||||
lexeme = remaining
|
lexeme = remaining
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -108,7 +108,8 @@ describe SearchIndexer do
|
|||||||
end
|
end
|
||||||
|
|
||||||
describe '.index' do
|
describe '.index' do
|
||||||
let(:post) { Fabricate(:post) }
|
let(:topic) { Fabricate(:topic, title: "this is a title that I am testing") }
|
||||||
|
let(:post) { Fabricate(:post, topic: topic) }
|
||||||
|
|
||||||
it 'should index posts correctly' do
|
it 'should index posts correctly' do
|
||||||
expect { post }.to change { PostSearchData.count }.by(1)
|
expect { post }.to change { PostSearchData.count }.by(1)
|
||||||
@ -128,7 +129,7 @@ describe SearchIndexer do
|
|||||||
end
|
end
|
||||||
|
|
||||||
it "should not tokenize urls and duplicate title and href in <a>" do
|
it "should not tokenize urls and duplicate title and href in <a>" do
|
||||||
post = Fabricate(:post, raw: <<~RAW)
|
post.update!(raw: <<~RAW)
|
||||||
https://meta.discourse.org/some.png
|
https://meta.discourse.org/some.png
|
||||||
RAW
|
RAW
|
||||||
|
|
||||||
@ -139,10 +140,13 @@ describe SearchIndexer do
|
|||||||
expect(post.post_search_data.raw_data).to eq(
|
expect(post.post_search_data.raw_data).to eq(
|
||||||
"https://meta.discourse.org/some.png"
|
"https://meta.discourse.org/some.png"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
expect(post.post_search_data.search_data).to eq(
|
||||||
|
"'/some.png':12 'discourse.org':11 'meta.discourse.org':11 'meta.discourse.org/some.png':10 'org':11 'test':8A 'titl':4A 'uncategor':9B"
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'should not tokenize versions' do
|
it 'should not tokenize versions' do
|
||||||
post.topic.update!(title: "this is a title that I am testing")
|
|
||||||
post.update!(raw: '123.223')
|
post.update!(raw: '123.223')
|
||||||
|
|
||||||
expect(post.post_search_data.search_data).to eq(
|
expect(post.post_search_data.search_data).to eq(
|
||||||
@ -174,7 +178,7 @@ describe SearchIndexer do
|
|||||||
)
|
)
|
||||||
|
|
||||||
expect(post.post_search_data.search_data).to eq(
|
expect(post.post_search_data.search_data).to eq(
|
||||||
"'/xyz=1':14,17 'abc':13,16 'abc.net':13,16 'abc.net/xyz=1':12,15 'au':10 'awesom':6B 'b':11 'categori':7B 'cnn':9 'cnn.com':9 'com':9,10 'com.au':10 'net':13,16 'stuff':10 'stuff.com.au':10 'test':4A 'topic':5A"
|
"'/xyz=1':14,17 'abc.net':13,16 'abc.net/xyz=1':12,15 'au':10 'awesom':6B 'b':11 'categori':7B 'cnn.com':9 'com':9 'com.au':10 'net':13,16 'stuff.com.au':10 'test':4A 'topic':5A"
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user