From 15e9057ec5142762cb7bfabebfd430bff7853821 Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Mon, 27 Jul 2020 15:22:54 +0800 Subject: [PATCH] FIX: Reduce number of terms injected for host lexeme. We do prefix matching in search so there is no need to inject the extra terms. Before: ``` "'discourse':10,11 'discourse.org':10,11 'org':10,11 'test':8A,10,11 'test.discourse.org':10,11 'titl':4A 'uncategor':9B" ``` After: ``` "'discourse.org':10,11 'org':10,11 'test':8A 'test.discourse.org':10,11 'titl':4A 'uncategor':9B" ``` --- app/services/search_indexer.rb | 2 +- spec/services/search_indexer_spec.rb | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index acd343a00bc..cb60237ab94 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -65,7 +65,7 @@ class SearchIndexer break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots term, _, remaining = lexeme.partition(".") break if remaining.blank? - array << "'#{term}':#{positions} '#{remaining}':#{positions}" + array << "'#{remaining}':#{positions}" lexeme = remaining end end diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb index e1af96d0f65..f55b183e864 100644 --- a/spec/services/search_indexer_spec.rb +++ b/spec/services/search_indexer_spec.rb @@ -108,7 +108,8 @@ describe SearchIndexer do end describe '.index' do - let(:post) { Fabricate(:post) } + let(:topic) { Fabricate(:topic, title: "this is a title that I am testing") } + let(:post) { Fabricate(:post, topic: topic) } it 'should index posts correctly' do expect { post }.to change { PostSearchData.count }.by(1) @@ -128,7 +129,7 @@ describe SearchIndexer do end it "should not tokenize urls and duplicate title and href in " do - post = Fabricate(:post, raw: <<~RAW) + post.update!(raw: <<~RAW) https://meta.discourse.org/some.png RAW @@ -139,10 +140,13 @@ describe SearchIndexer do expect(post.post_search_data.raw_data).to eq( "https://meta.discourse.org/some.png" ) + + expect(post.post_search_data.search_data).to eq( + "'/some.png':12 'discourse.org':11 'meta.discourse.org':11 'meta.discourse.org/some.png':10 'org':11 'test':8A 'titl':4A 'uncategor':9B" + ) end it 'should not tokenize versions' do - post.topic.update!(title: "this is a title that I am testing") post.update!(raw: '123.223') expect(post.post_search_data.search_data).to eq( @@ -174,7 +178,7 @@ describe SearchIndexer do ) expect(post.post_search_data.search_data).to eq( - "'/xyz=1':14,17 'abc':13,16 'abc.net':13,16 'abc.net/xyz=1':12,15 'au':10 'awesom':6B 'b':11 'categori':7B 'cnn':9 'cnn.com':9 'com':9,10 'com.au':10 'net':13,16 'stuff':10 'stuff.com.au':10 'test':4A 'topic':5A" + "'/xyz=1':14,17 'abc.net':13,16 'abc.net/xyz=1':12,15 'au':10 'awesom':6B 'b':11 'categori':7B 'cnn.com':9 'com':9 'com.au':10 'net':13,16 'stuff.com.au':10 'test':4A 'topic':5A" ) end