From 108c231d1cabfd6429c50765ab54649d6cfcca79 Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Mon, 8 Apr 2019 16:51:39 +0800 Subject: [PATCH] FIX: Clean up `topic_search_data` of trashed topics. This keeps the index and table smaller. --- app/jobs/scheduled/reindex_search.rb | 18 +++++++++++++++++- spec/jobs/reindex_search_spec.rb | 24 +++++++++++++++++++++--- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/app/jobs/scheduled/reindex_search.rb b/app/jobs/scheduled/reindex_search.rb index 5a1c784e20a..802379dff06 100644 --- a/app/jobs/scheduled/reindex_search.rb +++ b/app/jobs/scheduled/reindex_search.rb @@ -3,6 +3,8 @@ module Jobs class ReindexSearch < Jobs::Scheduled every 2.hours + CLEANUP_GRACE_PERIOD = 1.week.ago + def execute(args) rebuild_problem_topics rebuild_problem_posts @@ -10,6 +12,7 @@ module Jobs rebuild_problem_users rebuild_problem_tags clean_post_search_data + clean_topic_search_data end def rebuild_problem_categories(limit: 500) @@ -67,7 +70,7 @@ module Jobs .where("p.raw = ''") .delete_all - DB.exec(<<~SQL, deleted_at: 1.week.ago) + DB.exec(<<~SQL, deleted_at: CLEANUP_GRACE_PERIOD) DELETE FROM post_search_data WHERE post_id IN ( SELECT post_id @@ -80,6 +83,19 @@ module Jobs SQL end + def clean_topic_search_data + DB.exec(<<~SQL, deleted_at: CLEANUP_GRACE_PERIOD) + DELETE FROM topic_search_data + WHERE topic_id IN ( + SELECT topic_id + FROM topic_search_data + INNER JOIN topics ON topic_search_data.topic_id = topics.id + WHERE topics.deleted_at IS NOT NULL + AND topics.deleted_at <= :deleted_at + ) + SQL + end + def load_problem_post_ids(limit) params = { locale: SiteSetting.default_locale, diff --git a/spec/jobs/reindex_search_spec.rb b/spec/jobs/reindex_search_spec.rb index bc6b3f6efa5..a3edcb97293 100644 --- a/spec/jobs/reindex_search_spec.rb +++ b/spec/jobs/reindex_search_spec.rb @@ -83,6 +83,24 @@ describe Jobs::ReindexSearch do end describe '#execute' do + it "should clean up topic_search_data of trashed topics" do + topic = Fabricate(:post).topic + topic2 = Fabricate(:post).topic + + [topic, topic2].each { |t| SearchIndexer.index(t, force: true) } + + freeze_time(described_class::CLEANUP_GRACE_PERIOD) do + topic.trash! + end + + expect { subject.execute({}) }.to change { TopicSearchData.count }.by(-1) + expect(Topic.pluck(:id)).to contain_exactly(topic2.id) + + expect(TopicSearchData.pluck(:topic_id)).to contain_exactly( + topic2.topic_search_data.topic_id + ) + end + it( "should clean up post_search_data of posts with empty raw or posts from " \ "trashed topics" @@ -96,18 +114,18 @@ describe Jobs::ReindexSearch do post3.topic.trash! post4 = nil - freeze_time(1.week.ago) do + freeze_time(described_class::CLEANUP_GRACE_PERIOD) do post4 = Fabricate(:post) post4.topic.trash! end expect { subject.execute({}) }.to change { PostSearchData.count }.by(-2) - expect(Post.all.pluck(:id)).to contain_exactly( + expect(Post.pluck(:id)).to contain_exactly( post.id, post2.id, post3.id, post4.id ) - expect(PostSearchData.all.pluck(:post_id)).to contain_exactly( + expect(PostSearchData.pluck(:post_id)).to contain_exactly( post.post_search_data.post_id, post3.post_search_data.post_id ) end