FEATURE: Cache embed contents in the database (#25133)

* FEATURE: Cache embed contents in the database

This will be useful for features that rely on the semantic content of topics, like the many AI features



Co-authored-by: Roman Rizzi <rizziromanalejandro@gmail.com>
This commit is contained in:
Rafael dos Santos Silva
2024-01-05 10:09:31 -03:00
committed by GitHub
parent ac4d90b3a6
commit 13735f35fb
3 changed files with 64 additions and 10 deletions

View File

@@ -3,10 +3,13 @@
class TopicEmbed < ActiveRecord::Base
include Trashable
EMBED_CONTENT_CACHE_MAX_LENGTH = 32_000
belongs_to :topic
belongs_to :post
validates_presence_of :embed_url
validates_uniqueness_of :embed_url
validates :embed_content_cache, length: { maximum: EMBED_CONTENT_CACHE_MAX_LENGTH }
before_validation(on: :create) do
unless (
@@ -43,6 +46,7 @@ class TopicEmbed < ActiveRecord::Base
def self.import(user, url, title, contents, category_id: nil, cook_method: nil, tags: nil)
return unless url =~ %r{\Ahttps?\://}
original_contents = contents.dup.truncate(EMBED_CONTENT_CACHE_MAX_LENGTH)
contents = first_paragraph_from(contents) if SiteSetting.embed_truncate && cook_method.nil?
contents ||= ""
contents = contents.dup << imported_from_html(url)
@@ -77,6 +81,7 @@ class TopicEmbed < ActiveRecord::Base
}
post = PostCreator.create(user, create_args)
post.topic.topic_embed.update!(embed_content_cache: original_contents)
end
else
absolutize_urls(url, contents)
@@ -101,7 +106,7 @@ class TopicEmbed < ActiveRecord::Base
changes[:title] = title if title.present?
post.revise(user, changes, skip_validations: true, bypass_rate_limiter: true)
embed.update!(content_sha1: content_sha1)
embed.update!(content_sha1: content_sha1, embed_content_cache: original_contents)
end
end
end
@@ -296,6 +301,11 @@ class TopicEmbed < ActiveRecord::Base
response = TopicEmbed.find_remote(url)
body = response.body
if post&.topic&.topic_embed && body.present?
post.topic.topic_embed.update!(
embed_content_cache: body.truncate(EMBED_CONTENT_CACHE_MAX_LENGTH),
)
end
body << TopicEmbed.imported_from_html(url)
body
end
@@ -306,15 +316,16 @@ end
#
# Table name: topic_embeds
#
# id :integer not null, primary key
# topic_id :integer not null
# post_id :integer not null
# embed_url :string(1000) not null
# content_sha1 :string(40)
# created_at :datetime not null
# updated_at :datetime not null
# deleted_at :datetime
# deleted_by_id :integer
# id :integer not null, primary key
# topic_id :integer not null
# post_id :integer not null
# embed_url :string(1000) not null
# content_sha1 :string(40)
# created_at :datetime not null
# updated_at :datetime not null
# deleted_at :datetime
# deleted_by_id :integer
# embed_content_cache :text
#
# Indexes
#