From 5f896ae8f785ab03e87ed336ce94f61ba6718147 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Fri, 5 Apr 2019 12:44:36 +1100 Subject: [PATCH] PERF: Keep track of when a users first unread is This optimisation avoids large scans joining the topics table with the topic_users table. Previously when a user carried a lot of read state we would have to join the entire read state with the topics table. This operation would slow down home page and every topic page. The more read state you accumulated the larger the impact. The optimisation helps people who clean up unread, however if you carry unread from years ago it will only have minimal impact. --- app/models/post_timing.rb | 19 +++++ app/models/topic_tracking_state.rb | 10 ++- app/models/user_stat.rb | 72 +++++++++++++++++++ ...24053_add_first_unread_at_to_user_stats.rb | 30 ++++++++ lib/topic_query.rb | 4 ++ spec/models/user_stat_spec.rb | 16 +++++ spec/requests/topics_controller_spec.rb | 7 ++ 7 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20190402024053_add_first_unread_at_to_user_stats.rb diff --git a/app/models/post_timing.rb b/app/models/post_timing.rb index 82f7a024812..0de539a6d37 100644 --- a/app/models/post_timing.rb +++ b/app/models/post_timing.rb @@ -80,6 +80,10 @@ class PostTiming < ActiveRecord::Base highest_seen_post_number: last_read, last_read_post_number: last_read ) + + if !topic.private_message? + set_minimum_first_unread!(user_id: user.id, date: topic.updated_at) + end end end @@ -92,9 +96,24 @@ class PostTiming < ActiveRecord::Base TopicUser .where('user_id = ? and topic_id in (?)', user_id, topic_ids) .delete_all + + date = Topic.listable_topics.where(id: topic_ids).minimum(:updated_at) + + if date + set_minimum_first_unread!(user_id: user_id, date: date) + end end end + def self.set_minimum_first_unread!(user_id:, date:) + DB.exec(<<~SQL, date: date, user_id: user_id) + UPDATE user_stats + SET first_unread_at = :date + WHERE first_unread_at > :date AND + user_id = :user_id + SQL + end + MAX_READ_TIME_PER_BATCH = 60 * 1000.0 def self.process_timings(current_user, topic_id, topic_time, timings, opts = {}) diff --git a/app/models/topic_tracking_state.rb b/app/models/topic_tracking_state.rb index 27965ecf4f5..d56dda72858 100644 --- a/app/models/topic_tracking_state.rb +++ b/app/models/topic_tracking_state.rb @@ -172,7 +172,7 @@ class TopicTrackingState # sql = report_raw_sql(topic_id: topic_id, skip_unread: true, skip_order: true, staff: user.staff?) sql << "\nUNION ALL\n\n" - sql << report_raw_sql(topic_id: topic_id, skip_new: true, skip_order: true, staff: user.staff?) + sql << report_raw_sql(topic_id: topic_id, skip_new: true, skip_order: true, staff: user.staff?, filter_old_unread: true) DB.query( sql, @@ -195,6 +195,13 @@ class TopicTrackingState .gsub("-999", ":user_id") end + filter_old_unread = + if opts && opts[:filter_old_unread] + " topics.updated_at >= us.first_unread_at AND " + else + "" + end + new = if opts && opts[:skip_new] "1=0" @@ -221,6 +228,7 @@ class TopicTrackingState JOIN categories c ON c.id = topics.category_id LEFT JOIN topic_users tu ON tu.topic_id = topics.id AND tu.user_id = u.id WHERE u.id = :user_id AND + #{filter_old_unread} topics.archetype <> 'private_message' AND ((#{unread}) OR (#{new})) AND (topics.visible OR u.admin OR u.moderator) AND diff --git a/app/models/user_stat.rb b/app/models/user_stat.rb index c4806af20b7..48940f948a2 100644 --- a/app/models/user_stat.rb +++ b/app/models/user_stat.rb @@ -7,6 +7,77 @@ class UserStat < ActiveRecord::Base def self.ensure_consistency!(last_seen = 1.hour.ago) reset_bounce_scores update_view_counts(last_seen) + update_first_unread(last_seen) + end + + def self.update_first_unread(last_seen, limit: 10_000) + DB.exec(<<~SQL, min_date: last_seen, limit: limit) + UPDATE user_stats us + SET first_unread_at = Y.min_date + FROM ( + SELECT u1.id user_id, + X.min min_date + FROM users u1 + LEFT JOIN + (SELECT u.id AS user_id, + min(topics.updated_at) min + FROM users u + LEFT JOIN topic_users tu ON tu.user_id = u.id + LEFT JOIN topics ON tu.topic_id = topics.id + JOIN user_stats AS us ON us.user_id = u.id + JOIN user_options AS uo ON uo.user_id = u.id + JOIN categories c ON c.id = topics.category_id + WHERE u.id IN ( + SELECT id + FROM users + WHERE last_seen_at IS NOT NULL + AND last_seen_at > :min_date + ORDER BY last_seen_at DESC + LIMIT :limit + ) + AND topics.archetype <> 'private_message' + AND (("topics"."deleted_at" IS NULL + AND tu.last_read_post_number < CASE + WHEN u.admin + OR u.moderator THEN topics.highest_staff_post_number + ELSE topics.highest_post_number + END + AND COALESCE(tu.notification_level, 1) >= 2) + OR (1=0)) + AND (topics.visible + OR u.admin + OR u.moderator) + AND topics.deleted_at IS NULL + AND (NOT c.read_restricted + OR u.admin + OR category_id IN + (SELECT c2.id + FROM categories c2 + JOIN category_groups cg ON cg.category_id = c2.id + JOIN group_users gu ON gu.user_id = u.id + AND cg.group_id = gu.group_id + WHERE c2.read_restricted )) + AND NOT EXISTS + (SELECT 1 + FROM category_users cu + WHERE last_read_post_number IS NULL + AND cu.user_id = u.id + AND cu.category_id = topics.category_id + AND cu.notification_level = 0) + GROUP BY u.id, + u.username) AS X ON X.user_id = u1.id + WHERE u1.id IN + ( + SELECT id + FROM users + WHERE last_seen_at IS NOT NULL + AND last_seen_at > :min_date + ORDER BY last_seen_at DESC + LIMIT :limit + ) + ) Y + WHERE Y.user_id = us.user_id + SQL end def self.reset_bounce_scores @@ -133,4 +204,5 @@ end # flags_agreed :integer default(0), not null # flags_disagreed :integer default(0), not null # flags_ignored :integer default(0), not null +# first_unread_at :datetime # diff --git a/db/migrate/20190402024053_add_first_unread_at_to_user_stats.rb b/db/migrate/20190402024053_add_first_unread_at_to_user_stats.rb new file mode 100644 index 00000000000..e89b93a4fe1 --- /dev/null +++ b/db/migrate/20190402024053_add_first_unread_at_to_user_stats.rb @@ -0,0 +1,30 @@ +class AddFirstUnreadAtToUserStats < ActiveRecord::Migration[5.2] + disable_ddl_transaction! + + def up + # so we can rerun this if the index creation fails out of ddl + if !column_exists?(:user_stats, :first_unread_at) + add_column :user_stats, :first_unread_at, :datetime, null: false, default: -> { 'CURRENT_TIMESTAMP' } + end + + execute <<~SQL + UPDATE user_stats us + SET first_unread_at = u.created_at + FROM users u + WHERE u.id = us.user_id + SQL + + # this is quite a big index to carry, but we need it to optimise home page initial load + # by covering all these columns we are able to quickly retrieve the set of topics that were + # updated in the last N days. We perform a ranged lookup and selectivity may vary a lot + add_index :topics, + [:updated_at, :visible, :highest_staff_post_number, :highest_post_number, :category_id, :created_at, :id], + algorithm: :concurrently, + name: 'index_topics_on_updated_at_public', + where: "(topics.archetype <> 'private_message') AND (topics.deleted_at IS NULL)" + end + + def down + raise ActiveRecord::IrreversibleMigration + end +end diff --git a/lib/topic_query.rb b/lib/topic_query.rb index e1934f85093..9fb949627a0 100644 --- a/lib/topic_query.rb +++ b/lib/topic_query.rb @@ -469,6 +469,10 @@ class TopicQuery staff: @user&.staff?) .order('CASE WHEN topics.user_id = tu.user_id THEN 1 ELSE 2 END') + if @user + result = result.where("topics.updated_at >= (SELECT first_unread_at FROM user_stats WHERE user_id = ?)", @user.id) + end + self.class.results_filter_callbacks.each do |filter_callback| result = filter_callback.call(:unread, result, @user, options) end diff --git a/spec/models/user_stat_spec.rb b/spec/models/user_stat_spec.rb index be844c015d1..cff85f51dc4 100644 --- a/spec/models/user_stat_spec.rb +++ b/spec/models/user_stat_spec.rb @@ -72,6 +72,22 @@ describe UserStat do end end + describe 'ensure consistency!' do + it 'can update first unread' do + post = create_post + + freeze_time 10.minutes.from_now + create_post(topic_id: post.topic_id) + + post.user.update!(last_seen_at: Time.now) + + UserStat.ensure_consistency! + + post.user.user_stat.reload + expect(post.user.user_stat.first_unread_at).to eq_time(Time.now) + end + end + describe 'update_time_read!' do let(:user) { Fabricate(:user) } let(:stat) { user.user_stat } diff --git a/spec/requests/topics_controller_spec.rb b/spec/requests/topics_controller_spec.rb index 66e2c5923d7..6cbec67ade4 100644 --- a/spec/requests/topics_controller_spec.rb +++ b/spec/requests/topics_controller_spec.rb @@ -723,6 +723,8 @@ RSpec.describe TopicsController do context 'for last post only' do it 'should allow you to retain topic timing but remove last post only' do + freeze_time + post1 = create_post topic = post1.topic @@ -731,6 +733,8 @@ RSpec.describe TopicsController do PostTiming.create!(topic: topic, user: user, post_number: 1, msecs: 100) PostTiming.create!(topic: topic, user: user, post_number: 2, msecs: 100) + user.user_stat.update!(first_unread_at: Time.now + 1.week) + TopicUser.create!( topic: topic, user: user, @@ -747,6 +751,9 @@ RSpec.describe TopicsController do expect(TopicUser.where(topic: topic, user: user, last_read_post_number: 1, highest_seen_post_number: 1).exists?).to eq(true) + user.user_stat.reload + expect(user.user_stat.first_unread_at).to eq_time(topic.updated_at) + PostDestroyer.new(Fabricate(:admin), post2).destroy delete "/t/#{topic.id}/timings.json?last=1"