PERF: remove avg_time calculations and regular jobs from posts and topics

After careful analysis of large data-sets it became apparent that avg_time
had no impact whatsoever on "best of" topic scoring. Calculating avg_time
was a very costly operation especially on large databases.

We have some longer term plans of introducing other weighting that is read
time based into our scoring for "best of" and "top" topics, but in the
interim to stop a large amount of work that is not achieving any value we
are removing the jobs.

Column removal will follow once we decide on a new replacement metric.
This commit is contained in:
Sam Saffron
2019-05-06 15:58:49 +10:00
parent ba3cf7109c
commit f8eddd40ad
12 changed files with 0 additions and 198 deletions

View File

@@ -1,14 +0,0 @@
module Jobs
class CalculateAvgTime < Jobs::Scheduled
every 1.day
# PERF: these calculations can become exceedingly expnsive
# they run a huge gemoetric mean and are hard to optimise
# defer to only run once a day
def execute(args)
# Update the average times
Post.calculate_avg_time(2.days.ago)
Topic.calculate_avg_time(2.days.ago)
end
end
end

View File

@@ -8,8 +8,6 @@ module Jobs
every 1.week
def execute(args)
Post.calculate_avg_time
Topic.calculate_avg_time
ScoreCalculator.new.calculate
MiniScheduler::Stat.purge_old
Draft.cleanup!

View File

@@ -674,37 +674,6 @@ class Post < ActiveRecord::Base
end
# This calculates the geometric mean of the post timings and stores it along with
# each post.
def self.calculate_avg_time(min_topic_age = nil)
retry_lock_error do
builder = DB.build("UPDATE posts
SET avg_time = (x.gmean / 1000)
FROM (SELECT post_timings.topic_id,
post_timings.post_number,
round(exp(avg(CASE WHEN msecs > 0 THEN ln(msecs) ELSE 0 END))) AS gmean
FROM post_timings
INNER JOIN posts AS p2
ON p2.post_number = post_timings.post_number
AND p2.topic_id = post_timings.topic_id
AND p2.user_id <> post_timings.user_id
/*where2*/
GROUP BY post_timings.topic_id, post_timings.post_number) AS x
/*where*/")
builder.where("x.topic_id = posts.topic_id
AND x.post_number = posts.post_number
AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)")
if min_topic_age
builder.where2("p2.topic_id IN (SELECT id FROM topics where bumped_at > :bumped_at)",
bumped_at: min_topic_age)
end
builder.exec
end
end
before_save do
self.last_editor_id ||= user_id

View File

@@ -671,31 +671,6 @@ class Topic < ActiveRecord::Base
SQL
end
# This calculates the geometric mean of the posts and stores it with the topic
def self.calculate_avg_time(min_topic_age = nil)
builder = DB.build <<~SQL
UPDATE topics
SET avg_time = x.gmean
FROM (SELECT topic_id,
round(exp(avg(ln(avg_time)))) AS gmean
FROM posts
WHERE avg_time > 0 AND avg_time IS NOT NULL
GROUP BY topic_id) AS x
/*where*/
SQL
builder.where <<~SQL
x.topic_id = topics.id AND
(topics.avg_time <> x.gmean OR topics.avg_time IS NULL)
SQL
if min_topic_age
builder.where("topics.bumped_at > :bumped_at", bumped_at: min_topic_age)
end
builder.exec
end
def changed_to_category(new_category)
return true if new_category.blank? || Category.exists?(topic_id: id)
return false if new_category.id == SiteSetting.uncategorized_category_id && !SiteSetting.allow_uncategorized_topics

View File

@@ -23,7 +23,6 @@ class PostSerializer < BasicPostSerializer
:reply_count,
:reply_to_post_number,
:quote_count,
:avg_time,
:incoming_link_count,
:reads,
:score,