mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
PERF: optimise backfilling of topic_id (#13545)
Relying on large offsets can have uneven performance on huge table, new implementation recovers more cleanly and avoids double updates
This commit is contained in:
parent
a6b9289477
commit
14a0247301
@ -5,27 +5,22 @@ class BackfillEmailLogTopicId < ActiveRecord::Migration[6.1]
|
|||||||
BATCH_SIZE = 30_000
|
BATCH_SIZE = 30_000
|
||||||
|
|
||||||
def up
|
def up
|
||||||
offset = 0
|
|
||||||
email_log_count = DB.query_single("SELECT COUNT(*) FROM email_logs").first
|
|
||||||
|
|
||||||
loop do
|
loop do
|
||||||
DB.exec(<<~SQL, offset: offset, batch_size: BATCH_SIZE)
|
count = DB.exec(<<~SQL, batch_size: BATCH_SIZE)
|
||||||
WITH cte AS (
|
WITH cte AS (
|
||||||
SELECT post_id
|
SELECT l1.id, p1.topic_id
|
||||||
FROM email_logs
|
FROM email_logs l1
|
||||||
ORDER BY id
|
INNER JOIN posts p1 ON p1.id = l1.post_id
|
||||||
|
WHERE l1.topic_id IS NULL AND p1.topic_id IS NOT NULL
|
||||||
LIMIT :batch_size
|
LIMIT :batch_size
|
||||||
OFFSET :offset
|
|
||||||
)
|
)
|
||||||
UPDATE email_logs
|
UPDATE email_logs
|
||||||
SET topic_id = posts.topic_id
|
SET topic_id = cte.topic_id
|
||||||
FROM cte
|
FROM cte
|
||||||
INNER JOIN posts ON posts.id = cte.post_id
|
WHERE email_logs.id = cte.id
|
||||||
WHERE email_logs.post_id = cte.post_id
|
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
offset += BATCH_SIZE
|
break if count == 0
|
||||||
break if offset > (email_log_count + BATCH_SIZE * 2)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user