mirror of
https://github.com/discourse/discourse.git
synced 2025-02-20 11:48:26 -06:00
PERF: avoid race conditions when creating topic links
Previously the code was very race condition prone leading to odd failures in production It was re-written in raw SQL to avoid conditions where rows conflict on inserts There is no clean way in ActiveRecord to do: Insert, on conflict do nothing and return existing id. This also increases test coverage, we were previously not testing the code responsible for crawling external sites directly
This commit is contained in:
parent
ca8d11635c
commit
7f841dc21f
@ -140,9 +140,12 @@ class TopicLink < ActiveRecord::Base
|
|||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Crawl a link's title after it's saved
|
def self.crawl_link_title(topic_link_id)
|
||||||
|
Jobs.enqueue(:crawl_topic_link, topic_link_id: topic_link_id)
|
||||||
|
end
|
||||||
|
|
||||||
def crawl_link_title
|
def crawl_link_title
|
||||||
Jobs.enqueue(:crawl_topic_link, topic_link_id: id)
|
TopicLink.crawl_link_title(id)
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.duplicate_lookup(topic)
|
def self.duplicate_lookup(topic)
|
||||||
@ -167,6 +170,97 @@ class TopicLink < ActiveRecord::Base
|
|||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
# This pattern is used to create topic links very efficiently with minimal
|
||||||
|
# errors under heavy concurrent use
|
||||||
|
#
|
||||||
|
# It avoids a SELECT to find out if the record is there and minimizes all
|
||||||
|
# the work it needs to do in case a record is missing
|
||||||
|
#
|
||||||
|
# It handles calling the required callback and has parity with Rails implementation
|
||||||
|
#
|
||||||
|
# Usually we would rely on ActiveRecord but in this case we have had lots of churn
|
||||||
|
# around creation of topic links leading to hard to debug log messages in production
|
||||||
|
#
|
||||||
|
def self.safe_create_topic_link(
|
||||||
|
post_id:,
|
||||||
|
user_id:,
|
||||||
|
topic_id:,
|
||||||
|
url:,
|
||||||
|
domain: nil,
|
||||||
|
internal: false,
|
||||||
|
link_topic_id: nil,
|
||||||
|
link_post_id: nil,
|
||||||
|
quote: false,
|
||||||
|
extension: nil,
|
||||||
|
reflection: false
|
||||||
|
)
|
||||||
|
|
||||||
|
domain ||= Discourse.current_hostname
|
||||||
|
|
||||||
|
sql = <<~SQL
|
||||||
|
WITH new_row AS(
|
||||||
|
INSERT INTO topic_links(
|
||||||
|
post_id,
|
||||||
|
user_id,
|
||||||
|
topic_id,
|
||||||
|
url,
|
||||||
|
domain,
|
||||||
|
internal,
|
||||||
|
link_topic_id,
|
||||||
|
link_post_id,
|
||||||
|
quote,
|
||||||
|
extension,
|
||||||
|
reflection,
|
||||||
|
created_at,
|
||||||
|
updated_at
|
||||||
|
) VALUES (
|
||||||
|
:post_id,
|
||||||
|
:user_id,
|
||||||
|
:topic_id,
|
||||||
|
:url,
|
||||||
|
:domain,
|
||||||
|
:internal,
|
||||||
|
:link_topic_id,
|
||||||
|
:link_post_id,
|
||||||
|
:quote,
|
||||||
|
:extension,
|
||||||
|
:reflection,
|
||||||
|
:now,
|
||||||
|
:now
|
||||||
|
)
|
||||||
|
ON CONFLICT DO NOTHING
|
||||||
|
RETURNING id
|
||||||
|
)
|
||||||
|
SELECT COALESCE(
|
||||||
|
(SELECT id FROM new_row),
|
||||||
|
(SELECT id FROM topic_links WHERE post_id = :post_id AND topic_id = :topic_id AND url = :url)
|
||||||
|
), (SELECT id FROM new_row) IS NOT NULL
|
||||||
|
SQL
|
||||||
|
|
||||||
|
topic_link_id, new_record = DB.query_single(sql,
|
||||||
|
post_id: post_id,
|
||||||
|
user_id: user_id,
|
||||||
|
topic_id: topic_id,
|
||||||
|
url: url,
|
||||||
|
domain: domain,
|
||||||
|
internal: internal,
|
||||||
|
link_topic_id: link_topic_id,
|
||||||
|
link_post_id: link_post_id,
|
||||||
|
quote: quote,
|
||||||
|
extension: extension,
|
||||||
|
reflection: reflection,
|
||||||
|
now: Time.now
|
||||||
|
)
|
||||||
|
|
||||||
|
if new_record
|
||||||
|
DB.after_commit do
|
||||||
|
crawl_link_title(topic_link_id)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
topic_link_id
|
||||||
|
end
|
||||||
|
|
||||||
def self.ensure_entry_for(post, link, parsed)
|
def self.ensure_entry_for(post, link, parsed)
|
||||||
url = link.url
|
url = link.url
|
||||||
internal = false
|
internal = false
|
||||||
@ -210,25 +304,20 @@ class TopicLink < ActiveRecord::Base
|
|||||||
url = url[0...TopicLink.max_url_length]
|
url = url[0...TopicLink.max_url_length]
|
||||||
return nil if parsed && parsed.host && parsed.host.length > TopicLink.max_domain_length
|
return nil if parsed && parsed.host && parsed.host.length > TopicLink.max_domain_length
|
||||||
|
|
||||||
unless TopicLink.exists?(topic_id: post.topic_id, post_id: post.id, url: url)
|
file_extension = File.extname(parsed.path)[1..10].downcase unless parsed.path.nil? || File.extname(parsed.path).empty?
|
||||||
file_extension = File.extname(parsed.path)[1..10].downcase unless parsed.path.nil? || File.extname(parsed.path).empty?
|
|
||||||
begin
|
safe_create_topic_link(
|
||||||
TopicLink.create(
|
post_id: post.id,
|
||||||
post_id: post.id,
|
user_id: post.user_id,
|
||||||
user_id: post.user_id,
|
topic_id: post.topic_id,
|
||||||
topic_id: post.topic_id,
|
url: url,
|
||||||
url: url,
|
domain: parsed.host,
|
||||||
domain: parsed.host || Discourse.current_hostname,
|
internal: internal,
|
||||||
internal: internal,
|
link_topic_id: topic&.id,
|
||||||
link_topic_id: topic&.id,
|
link_post_id: reflected_post.try(:id),
|
||||||
link_post_id: reflected_post.try(:id),
|
quote: link.is_quote,
|
||||||
quote: link.is_quote,
|
extension: file_extension,
|
||||||
extension: file_extension
|
)
|
||||||
)
|
|
||||||
rescue ActiveRecord::RecordNotUnique
|
|
||||||
# it's fine
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
reflected_id = nil
|
reflected_id = nil
|
||||||
|
|
||||||
@ -236,24 +325,19 @@ class TopicLink < ActiveRecord::Base
|
|||||||
if topic && post.topic && topic.archetype != 'private_message' && post.topic.archetype != 'private_message' && post.topic.visible?
|
if topic && post.topic && topic.archetype != 'private_message' && post.topic.archetype != 'private_message' && post.topic.visible?
|
||||||
prefix = Discourse.base_url_no_prefix
|
prefix = Discourse.base_url_no_prefix
|
||||||
reflected_url = "#{prefix}#{post.topic.relative_url(post.post_number)}"
|
reflected_url = "#{prefix}#{post.topic.relative_url(post.post_number)}"
|
||||||
tl = TopicLink.find_by(topic_id: topic&.id,
|
|
||||||
post_id: reflected_post&.id,
|
|
||||||
url: reflected_url)
|
|
||||||
|
|
||||||
unless tl
|
reflected_id = safe_create_topic_link(
|
||||||
tl = TopicLink.create(user_id: post.user_id,
|
user_id: post.user_id,
|
||||||
topic_id: topic&.id,
|
topic_id: topic&.id,
|
||||||
post_id: reflected_post&.id,
|
post_id: reflected_post&.id,
|
||||||
url: reflected_url,
|
url: reflected_url,
|
||||||
domain: Discourse.current_hostname,
|
domain: Discourse.current_hostname,
|
||||||
reflection: true,
|
reflection: true,
|
||||||
internal: true,
|
internal: true,
|
||||||
link_topic_id: post.topic_id,
|
link_topic_id: post.topic_id,
|
||||||
link_post_id: post.id)
|
link_post_id: post.id
|
||||||
|
)
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
reflected_id = tl.id if tl.persisted?
|
|
||||||
end
|
end
|
||||||
|
|
||||||
[url, reflected_id]
|
[url, reflected_id]
|
||||||
|
@ -28,31 +28,49 @@ describe TopicLink do
|
|||||||
end
|
end
|
||||||
|
|
||||||
describe 'external links' do
|
describe 'external links' do
|
||||||
fab!(:post2) do
|
it 'correctly handles links' do
|
||||||
Fabricate(:post, raw: <<~RAW, user: user, topic: topic)
|
|
||||||
|
non_png = "https://b.com/#{SecureRandom.hex}"
|
||||||
|
|
||||||
|
# prepare a title for one of the links
|
||||||
|
stub_request(:get, non_png).
|
||||||
|
with(headers: {
|
||||||
|
'Accept' => '*/*',
|
||||||
|
'Accept-Encoding' => 'gzip',
|
||||||
|
'Host' => 'b.com',
|
||||||
|
}).
|
||||||
|
to_return(status: 200, body: "<html><head><title>amazing</title></head></html>", headers: {})
|
||||||
|
|
||||||
|
# so we run crawl_topic_links
|
||||||
|
Jobs.run_immediately!
|
||||||
|
|
||||||
|
png_title = "#{SecureRandom.hex}.png"
|
||||||
|
png = "https://awesome.com/#{png_title}"
|
||||||
|
|
||||||
|
post = Fabricate(:post, raw: <<~RAW, user: user, topic: topic)
|
||||||
http://a.com/
|
http://a.com/
|
||||||
https://b.com/b
|
#{non_png}
|
||||||
http://#{'a' * 200}.com/invalid
|
http://#{'a' * 200}.com/invalid
|
||||||
//b.com/#{'a' * 500}
|
//b.com/#{'a' * 500}
|
||||||
|
#{png}
|
||||||
RAW
|
RAW
|
||||||
end
|
|
||||||
|
|
||||||
before do
|
TopicLink.extract_from(post)
|
||||||
TopicLink.extract_from(post2)
|
|
||||||
end
|
# we have a special rule for images title where we pull them out of the filename
|
||||||
|
expect(topic.topic_links.where(url: png).pluck(:title).first).to eq(png_title)
|
||||||
|
expect(topic.topic_links.where(url: non_png).pluck(:title).first).to eq("amazing")
|
||||||
|
|
||||||
it 'works' do
|
|
||||||
expect(topic.topic_links.pluck(:url)).to contain_exactly(
|
expect(topic.topic_links.pluck(:url)).to contain_exactly(
|
||||||
|
png,
|
||||||
|
non_png,
|
||||||
"http://a.com/",
|
"http://a.com/",
|
||||||
"https://b.com/b",
|
|
||||||
"//b.com/#{'a' * 500}"[0...TopicLink.max_url_length]
|
"//b.com/#{'a' * 500}"[0...TopicLink.max_url_length]
|
||||||
)
|
)
|
||||||
end
|
|
||||||
|
|
||||||
it "doesn't reset them when rebaking" do
|
|
||||||
old_ids = topic.topic_links.pluck(:id)
|
old_ids = topic.topic_links.pluck(:id)
|
||||||
|
|
||||||
TopicLink.extract_from(post2)
|
TopicLink.extract_from(post)
|
||||||
|
|
||||||
new_ids = topic.topic_links.pluck(:id)
|
new_ids = topic.topic_links.pluck(:id)
|
||||||
|
|
||||||
@ -107,15 +125,17 @@ describe TopicLink do
|
|||||||
# this is subtle, but we had a bug were second time
|
# this is subtle, but we had a bug were second time
|
||||||
# TopicLink.extract_from was called a reflection was nuked
|
# TopicLink.extract_from was called a reflection was nuked
|
||||||
2.times do
|
2.times do
|
||||||
topic.reload
|
|
||||||
TopicLink.extract_from(linked_post)
|
TopicLink.extract_from(linked_post)
|
||||||
|
|
||||||
|
topic.reload
|
||||||
|
other_topic.reload
|
||||||
|
|
||||||
link = topic.topic_links.first
|
link = topic.topic_links.first
|
||||||
expect(link).to be_present
|
expect(link).to be_present
|
||||||
expect(link).to be_internal
|
expect(link).to be_internal
|
||||||
expect(link.url).to eq(url)
|
expect(link.url).to eq(url)
|
||||||
expect(link.domain).to eq(test_uri.host)
|
expect(link.domain).to eq(test_uri.host)
|
||||||
link.link_topic_id == other_topic.id
|
expect(link.link_topic_id). to eq(other_topic.id)
|
||||||
expect(link).not_to be_reflection
|
expect(link).not_to be_reflection
|
||||||
|
|
||||||
reflection = other_topic.topic_links.first
|
reflection = other_topic.topic_links.first
|
||||||
|
Loading…
Reference in New Issue
Block a user