diff --git a/app/jobs/regular/process_post.rb b/app/jobs/regular/process_post.rb
index b573f0683c8..60f444fe981 100644
--- a/app/jobs/regular/process_post.rb
+++ b/app/jobs/regular/process_post.rb
@@ -40,6 +40,8 @@ module Jobs
end
end
+ enqueue_pull_hotlinked_images(post) unless args[:skip_pull_hotlinked_images]
+
if !post.user&.staff? && !post.user&.staged?
s = post.raw
s << " #{post.topic.title}" if post.post_number == 1
@@ -60,6 +62,11 @@ module Jobs
TopicLink.extract_from(post)
QuotedPost.extract_from(post)
end
+
+ def enqueue_pull_hotlinked_images(post)
+ Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: post.id)
+ Jobs.enqueue(:pull_hotlinked_images, post_id: post.id)
+ end
end
end
diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb
index ba3389843ea..6b5d6d2ddbd 100644
--- a/app/jobs/regular/pull_hotlinked_images.rb
+++ b/app/jobs/regular/pull_hotlinked_images.rb
@@ -10,16 +10,13 @@ module Jobs
end
def execute(args)
+ disable_if_low_on_disk_space
+
@post_id = args[:post_id]
raise Discourse::InvalidParameters.new(:post_id) if @post_id.blank?
post = Post.find_by(id: @post_id)
- return if post.blank?
- return if post.topic.blank?
- return if post.cook_method == Post.cook_methods[:raw_html]
-
- raw = post.raw.dup
- start_raw = raw.dup
+ return if post.nil? || post.topic.nil?
hotlinked_map = post.post_hotlinked_media.map { |r| [r.url, r] }.to_h
@@ -55,30 +52,23 @@ module Jobs
changed_hotlink_records = true
hotlink_record.save!
end
-
- # have we successfully downloaded that file?
- if upload = hotlink_record&.upload
- raw = replace_in_raw(original_src: original_src, upload: upload, raw: raw)
- end
rescue => e
raise e if Rails.env.test?
log(:error, "Failed to pull hotlinked image (#{download_src}) post: #{@post_id}\n" + e.message + "\n" + e.backtrace.join("\n"))
end
- # If post changed while we were downloading images, never apply edits
- post.reload
- post_changed_elsewhere = (start_raw != post.raw)
- raw_changed_here = (raw != post.raw)
-
- if !post_changed_elsewhere && raw_changed_here
- changes = { raw: raw, edit_reason: I18n.t("upload.edit_reason") }
- post.revise(Discourse.system_user, changes, bypass_bump: true, skip_staff_log: true)
- elsif changed_hotlink_records
+ if changed_hotlink_records
post.trigger_post_process(
bypass_bump: true,
skip_pull_hotlinked_images: true # Avoid an infinite loop of job scheduling
)
end
+
+ if hotlinked_map.size > 0
+ Jobs.cancel_scheduled_job(:update_hotlinked_raw, post_id: post.id)
+ update_raw_delay = SiteSetting.editing_grace_period + 1
+ Jobs.enqueue_in(update_raw_delay, :update_hotlinked_raw, post_id: post.id)
+ end
end
def download(src)
@@ -137,45 +127,6 @@ module Jobs
end
end
- def replace_in_raw(original_src:, raw:, upload:)
- raw = raw.dup
- escaped_src = Regexp.escape(original_src)
-
- replace_raw = ->(match, match_src, replacement, _index) {
- if normalize_src(original_src) == normalize_src(match_src)
- replacement =
- if replacement.include?(InlineUploads::PLACEHOLDER)
- replacement.sub(InlineUploads::PLACEHOLDER, upload.short_url)
- elsif replacement.include?(InlineUploads::PATH_PLACEHOLDER)
- replacement.sub(InlineUploads::PATH_PLACEHOLDER, upload.short_path)
- end
-
- raw = raw.gsub(
- match,
- replacement
- )
- end
- }
-
- # there are 6 ways to insert an image in a post
- # HTML tag -
- InlineUploads.match_img(raw, external_src: true, &replace_raw)
-
- # BBCode tag - [img]http://...[/img]
- InlineUploads.match_bbcode_img(raw, external_src: true, &replace_raw)
-
- # Markdown linked image - [](http://...)
- # Markdown inline - 
- # Markdown inline - 
- # Markdown inline - 
- InlineUploads.match_md_inline_img(raw, external_src: true, &replace_raw)
-
- # Direct link
- raw.gsub!(/^#{escaped_src}(\s?)$/) { "#{$1}" }
-
- raw
- end
-
def extract_images_from(html)
doc = Nokogiri::HTML5::fragment(html)
@@ -239,6 +190,30 @@ module Jobs
def normalize_src(src)
PostHotlinkedMedia.normalize_src(src)
end
+
+ def disable_if_low_on_disk_space
+ return if Discourse.store.external?
+ return if !SiteSetting.download_remote_images_to_local
+ return if available_disk_space >= SiteSetting.download_remote_images_threshold
+
+ SiteSetting.download_remote_images_to_local = false
+
+ # log the site setting change
+ reason = I18n.t("disable_remote_images_download_reason")
+ staff_action_logger = StaffActionLogger.new(Discourse.system_user)
+ staff_action_logger.log_site_setting_change("download_remote_images_to_local", true, false, details: reason)
+
+ # also send a private message to the site contact user notify_about_low_disk_space
+ notify_about_low_disk_space
+ end
+
+ def notify_about_low_disk_space
+ SystemMessage.create_from_system_user(Discourse.site_contact_user, :download_remote_images_disabled)
+ end
+
+ def available_disk_space
+ 100 - DiskSpace.percent_free("#{Rails.root}/public/uploads")
+ end
end
end
diff --git a/app/jobs/regular/pull_user_profile_hotlinked_images.rb b/app/jobs/regular/pull_user_profile_hotlinked_images.rb
index ae87a9043d7..66280f2f7eb 100644
--- a/app/jobs/regular/pull_user_profile_hotlinked_images.rb
+++ b/app/jobs/regular/pull_user_profile_hotlinked_images.rb
@@ -30,16 +30,16 @@ module Jobs
rescue ImageBrokenError
broken_image_urls << normalized_src
end
-
- # have we successfully downloaded that file?
- if upload = downloaded_images[normalized_src]
- user_profile.bio_raw = replace_in_raw(original_src: original_src, upload: upload, raw: user_profile.bio_raw)
- end
rescue => e
raise e if Rails.env.test?
log(:error, "Failed to pull hotlinked image (#{download_src}) user: #{@user_id}\n" + e.message + "\n" + e.backtrace.join("\n"))
end
+ user_profile.bio_raw = InlineUploads.replace_hotlinked_image_urls(raw: user_profile.bio_raw) do |match_src|
+ normalized_match_src = PostHotlinkedMedia.normalize_src(match_src)
+ downloaded_images[normalized_match_src]
+ end
+
user_profile.skip_pull_hotlinked_image = true
user_profile.save!
end
diff --git a/app/jobs/regular/update_hotlinked_raw.rb b/app/jobs/regular/update_hotlinked_raw.rb
new file mode 100644
index 00000000000..b938a1ffaae
--- /dev/null
+++ b/app/jobs/regular/update_hotlinked_raw.rb
@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+
+module Jobs
+ class UpdateHotlinkedRaw < ::Jobs::Base
+ sidekiq_options queue: 'low'
+
+ def execute(args)
+ @post_id = args[:post_id]
+ raise Discourse::InvalidParameters.new(:post_id) if @post_id.blank?
+
+ post = Post.find_by(id: @post_id)
+ return if post.nil?
+ return if post.cook_method == Post.cook_methods[:raw_html]
+
+ hotlinked_map = post.post_hotlinked_media.preload(:upload).map { |r| [r.url, r] }.to_h
+
+ raw = InlineUploads.replace_hotlinked_image_urls(raw: post.raw) do |match_src|
+ normalized_match_src = PostHotlinkedMedia.normalize_src(match_src)
+ hotlinked_map[normalized_match_src]&.upload
+ end
+
+ if post.raw != raw
+ changes = { raw: raw, edit_reason: I18n.t("upload.edit_reason") }
+ post.revise(Discourse.system_user, changes, bypass_bump: true, skip_staff_log: true)
+ end
+ end
+ end
+end
diff --git a/app/services/inline_uploads.rb b/app/services/inline_uploads.rb
index 55a162a8ea4..b2519f6331f 100644
--- a/app/services/inline_uploads.rb
+++ b/app/services/inline_uploads.rb
@@ -214,6 +214,40 @@ class InlineUploads
end
end
+ def self.replace_hotlinked_image_urls(raw:, &blk)
+ replace = Proc.new do |match, match_src, replacement, _index|
+ upload = blk.call(match_src)
+ next if !upload
+
+ replacement =
+ if replacement.include?(InlineUploads::PLACEHOLDER)
+ replacement.sub(InlineUploads::PLACEHOLDER, upload.short_url)
+ elsif replacement.include?(InlineUploads::PATH_PLACEHOLDER)
+ replacement.sub(InlineUploads::PATH_PLACEHOLDER, upload.short_path)
+ end
+
+ raw = raw.gsub(
+ match,
+ replacement
+ )
+ end
+
+ # there are 6 ways to insert an image in a post
+ # HTML tag -
+ InlineUploads.match_img(raw, external_src: true, &replace)
+
+ # BBCode tag - [img]http://...[/img]
+ InlineUploads.match_bbcode_img(raw, external_src: true, &replace)
+
+ # Markdown linked image - [](http://...)
+ # Markdown inline - 
+ # Markdown inline - 
+ # Markdown inline - 
+ InlineUploads.match_md_inline_img(raw, external_src: true, &replace)
+
+ raw
+ end
+
def self.matched_uploads(node)
upload_path = Discourse.store.upload_path
base_url = Discourse.base_url.sub(/https?:\/\//, "(https?://)")
diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb
index 8444a4768bb..189c048dcf3 100644
--- a/lib/cooked_post_processor.rb
+++ b/lib/cooked_post_processor.rb
@@ -47,7 +47,6 @@ class CookedPostProcessor
remove_user_ids
update_post_image
enforce_nofollow
- pull_hotlinked_images
grant_badges
@post.link_post_uploads(fragments: @doc)
DiscourseEvent.trigger(:post_process_cooked, @doc, @post)
@@ -361,41 +360,6 @@ class CookedPostProcessor
PrettyText.add_rel_attributes_to_user_content(@doc, add_nofollow)
end
- def pull_hotlinked_images
- return if @opts[:skip_pull_hotlinked_images]
- # have we enough disk space?
- disable_if_low_on_disk_space # But still enqueue the job
- # make sure no other job is scheduled
- Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id)
- # schedule the job
- delay = SiteSetting.editing_grace_period + 1
- Jobs.enqueue_in(delay.seconds.to_i, :pull_hotlinked_images, post_id: @post.id)
- end
-
- def disable_if_low_on_disk_space
- return if Discourse.store.external?
- return if !SiteSetting.download_remote_images_to_local
- return if available_disk_space >= SiteSetting.download_remote_images_threshold
-
- SiteSetting.download_remote_images_to_local = false
-
- # log the site setting change
- reason = I18n.t("disable_remote_images_download_reason")
- staff_action_logger = StaffActionLogger.new(Discourse.system_user)
- staff_action_logger.log_site_setting_change("download_remote_images_to_local", true, false, details: reason)
-
- # also send a private message to the site contact user notify_about_low_disk_space
- notify_about_low_disk_space
- end
-
- def notify_about_low_disk_space
- SystemMessage.create_from_system_user(Discourse.site_contact_user, :download_remote_images_disabled)
- end
-
- def available_disk_space
- 100 - DiskSpace.percent_free("#{Rails.root}/public/uploads")
- end
-
private
def post_process_images
diff --git a/spec/jobs/process_post_spec.rb b/spec/jobs/process_post_spec.rb
index fc20d8f5083..0fb49e684b9 100644
--- a/spec/jobs/process_post_spec.rb
+++ b/spec/jobs/process_post_spec.rb
@@ -88,4 +88,35 @@ describe Jobs::ProcessPost do
expect(post.topic.reload.excerpt).to eq("Some OP content")
end
end
+
+ context "#enqueue_pull_hotlinked_images" do
+ fab!(:post) { Fabricate(:post, created_at: 20.days.ago) }
+ let(:job) { Jobs::ProcessPost.new }
+
+ it "runs even when download_remote_images_to_local is disabled" do
+ # We want to run it to pull hotlinked optimized images
+ SiteSetting.download_remote_images_to_local = false
+ expect_enqueued_with(job: :pull_hotlinked_images, args: { post_id: post.id }) do
+ job.execute({ post_id: post.id })
+ end
+ end
+
+ context "when download_remote_images_to_local? is enabled" do
+ before do
+ SiteSetting.download_remote_images_to_local = true
+ end
+
+ it "enqueues" do
+ expect_enqueued_with(job: :pull_hotlinked_images, args: { post_id: post.id }) do
+ job.execute({ post_id: post.id })
+ end
+ end
+
+ it "does not run when requested to skip" do
+ job.execute({ post_id: post.id, skip_pull_hotlinked_images: true })
+ expect(Jobs::PullHotlinkedImages.jobs.size).to eq(0)
+ end
+ end
+
+ end
end
diff --git a/spec/jobs/pull_hotlinked_images_spec.rb b/spec/jobs/pull_hotlinked_images_spec.rb
index 416d9a80ab4..c7be60aea6d 100644
--- a/spec/jobs/pull_hotlinked_images_spec.rb
+++ b/spec/jobs/pull_hotlinked_images_spec.rb
@@ -67,6 +67,21 @@ describe Jobs::PullHotlinkedImages do
expect(post.reload.raw).to eq("
")
end
+ it 'enqueues raw replacement job with a delay' do
+ Jobs.run_later!
+
+ post = Fabricate(:post, raw: "
")
+ stub_image_size
+
+ freeze_time
+ Jobs.expects(:cancel_scheduled_job).with(:update_hotlinked_raw, post_id: post.id).once
+ delay = SiteSetting.editing_grace_period + 1
+
+ expect_enqueued_with(job: :update_hotlinked_raw, args: { post_id: post.id }, at: Time.zone.now + delay.seconds) do
+ Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
+ end
+ end
+
it 'removes downloaded images when they are no longer needed' do
post = Fabricate(:post, raw: "
")
stub_image_size
@@ -196,14 +211,14 @@ describe Jobs::PullHotlinkedImages do
expect(post.uploads).to contain_exactly(upload)
end
- it "skips raw_html posts" do
+ it "skips editing raw for raw_html posts" do
raw = "
"
post = Fabricate(:post, raw: raw, cook_method: Post.cook_methods[:raw_html])
stub_image_size
expect do
post.rebake!
post.reload
- end.not_to change { Upload.count }
+ end.to change { Upload.count }.by(1)
expect(post.raw).to eq(raw)
end
@@ -556,6 +571,43 @@ describe Jobs::PullHotlinkedImages do
end
end
+ context "#disable_if_low_on_disk_space" do
+ fab!(:post) { Fabricate(:post, created_at: 20.days.ago) }
+ let(:job) { Jobs::PullHotlinkedImages.new }
+
+ before do
+ SiteSetting.download_remote_images_to_local = true
+ SiteSetting.download_remote_images_threshold = 20
+ job.stubs(:available_disk_space).returns(50)
+ end
+
+ it "does nothing when there's enough disk space" do
+ SiteSetting.expects(:download_remote_images_to_local=).never
+ job.execute({ post_id: post.id })
+ end
+
+ context "when there's not enough disk space" do
+
+ before { SiteSetting.download_remote_images_threshold = 75 }
+
+ it "disables download_remote_images_threshold and send a notification to the admin" do
+ StaffActionLogger.any_instance.expects(:log_site_setting_change).once
+ SystemMessage.expects(:create_from_system_user).with(Discourse.site_contact_user, :download_remote_images_disabled).once
+ job.execute({ post_id: post.id })
+
+ expect(SiteSetting.download_remote_images_to_local).to eq(false)
+ end
+
+ it "doesn't disable download_remote_images_to_local if site uses S3" do
+ setup_s3
+ job.execute({ post_id: post.id })
+
+ expect(SiteSetting.download_remote_images_to_local).to eq(true)
+ end
+
+ end
+ end
+
def stub_s3(upload)
stub_upload(upload)
stub_request(:get, "https:" + upload.url).to_return(status: 200, body: file_from_fixtures("smallest.png"))
diff --git a/spec/lib/cooked_post_processor_spec.rb b/spec/lib/cooked_post_processor_spec.rb
index 8f4a2b925b3..d626cc5f5c3 100644
--- a/spec/lib/cooked_post_processor_spec.rb
+++ b/spec/lib/cooked_post_processor_spec.rb
@@ -21,7 +21,6 @@ describe CookedPostProcessor do
cpp.expects(:post_process_oneboxes).in_sequence(post_process)
cpp.expects(:post_process_images).in_sequence(post_process)
cpp.expects(:optimize_urls).in_sequence(post_process)
- cpp.expects(:pull_hotlinked_images).in_sequence(post_process)
cpp.post_process
expect(PostUpload.exists?(post: post, upload: upload)).to eq(true)
@@ -1543,102 +1542,6 @@ describe CookedPostProcessor do
end
end
- context "#pull_hotlinked_images" do
-
- let(:post) { build(:post, created_at: 20.days.ago) }
- let(:cpp) { CookedPostProcessor.new(post) }
-
- before { cpp.stubs(:available_disk_space).returns(90) }
-
- it "runs even when download_remote_images_to_local is disabled" do
- # We want to run it to pull hotlinked optimized images
- SiteSetting.download_remote_images_to_local = false
- expect { cpp.pull_hotlinked_images }.
- to change { Jobs::PullHotlinkedImages.jobs.count }.by 1
- end
-
- context "when download_remote_images_to_local? is enabled" do
- before do
- SiteSetting.download_remote_images_to_local = true
- end
-
- it "disables download_remote_images if there is not enough disk space" do
- cpp.expects(:available_disk_space).returns(5)
- cpp.pull_hotlinked_images
- expect(SiteSetting.download_remote_images_to_local).to eq(false)
- end
-
- it "does not run when requested to skip" do
- CookedPostProcessor.new(post, skip_pull_hotlinked_images: true).pull_hotlinked_images
- expect(Jobs::PullHotlinkedImages.jobs.size).to eq(0)
- end
-
- context "and there is enough disk space" do
- before { cpp.expects(:disable_if_low_on_disk_space).at_least_once }
-
- context "and the post has been updated by an actual user" do
-
- before { post.id = 42 }
-
- it "ensures only one job is scheduled right after the editing_grace_period" do
- freeze_time
-
- Jobs.expects(:cancel_scheduled_job).with(:pull_hotlinked_images, post_id: post.id).once
-
- delay = SiteSetting.editing_grace_period + 1
-
- expect_enqueued_with(job: :pull_hotlinked_images, args: { post_id: post.id }, at: Time.zone.now + delay.seconds) do
- cpp.pull_hotlinked_images
- end
- end
-
- end
-
- end
-
- end
-
- end
-
- context "#disable_if_low_on_disk_space" do
-
- let(:post) { build(:post, created_at: 20.days.ago) }
- let(:cpp) { CookedPostProcessor.new(post) }
-
- before do
- SiteSetting.download_remote_images_to_local = true
- SiteSetting.download_remote_images_threshold = 20
- cpp.stubs(:available_disk_space).returns(50)
- end
-
- it "does nothing when there's enough disk space" do
- SiteSetting.expects(:download_remote_images_to_local=).never
- cpp.disable_if_low_on_disk_space
- end
-
- context "when there's not enough disk space" do
-
- before { SiteSetting.download_remote_images_threshold = 75 }
-
- it "disables download_remote_images_threshold and send a notification to the admin" do
- StaffActionLogger.any_instance.expects(:log_site_setting_change).once
- SystemMessage.expects(:create_from_system_user).with(Discourse.site_contact_user, :download_remote_images_disabled).once
- cpp.disable_if_low_on_disk_space
-
- expect(SiteSetting.download_remote_images_to_local).to eq(false)
- end
-
- it "doesn't disable download_remote_images_to_local if site uses S3" do
- setup_s3
- cpp.disable_if_low_on_disk_space
-
- expect(SiteSetting.download_remote_images_to_local).to eq(true)
- end
-
- end
-
- end
-
context "#is_a_hyperlink?" do
let(:post) { build(:post) }