diff --git a/app/jobs/regular/process_post.rb b/app/jobs/regular/process_post.rb index b573f0683c8..60f444fe981 100644 --- a/app/jobs/regular/process_post.rb +++ b/app/jobs/regular/process_post.rb @@ -40,6 +40,8 @@ module Jobs end end + enqueue_pull_hotlinked_images(post) unless args[:skip_pull_hotlinked_images] + if !post.user&.staff? && !post.user&.staged? s = post.raw s << " #{post.topic.title}" if post.post_number == 1 @@ -60,6 +62,11 @@ module Jobs TopicLink.extract_from(post) QuotedPost.extract_from(post) end + + def enqueue_pull_hotlinked_images(post) + Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: post.id) + Jobs.enqueue(:pull_hotlinked_images, post_id: post.id) + end end end diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb index ba3389843ea..6b5d6d2ddbd 100644 --- a/app/jobs/regular/pull_hotlinked_images.rb +++ b/app/jobs/regular/pull_hotlinked_images.rb @@ -10,16 +10,13 @@ module Jobs end def execute(args) + disable_if_low_on_disk_space + @post_id = args[:post_id] raise Discourse::InvalidParameters.new(:post_id) if @post_id.blank? post = Post.find_by(id: @post_id) - return if post.blank? - return if post.topic.blank? - return if post.cook_method == Post.cook_methods[:raw_html] - - raw = post.raw.dup - start_raw = raw.dup + return if post.nil? || post.topic.nil? hotlinked_map = post.post_hotlinked_media.map { |r| [r.url, r] }.to_h @@ -55,30 +52,23 @@ module Jobs changed_hotlink_records = true hotlink_record.save! end - - # have we successfully downloaded that file? - if upload = hotlink_record&.upload - raw = replace_in_raw(original_src: original_src, upload: upload, raw: raw) - end rescue => e raise e if Rails.env.test? log(:error, "Failed to pull hotlinked image (#{download_src}) post: #{@post_id}\n" + e.message + "\n" + e.backtrace.join("\n")) end - # If post changed while we were downloading images, never apply edits - post.reload - post_changed_elsewhere = (start_raw != post.raw) - raw_changed_here = (raw != post.raw) - - if !post_changed_elsewhere && raw_changed_here - changes = { raw: raw, edit_reason: I18n.t("upload.edit_reason") } - post.revise(Discourse.system_user, changes, bypass_bump: true, skip_staff_log: true) - elsif changed_hotlink_records + if changed_hotlink_records post.trigger_post_process( bypass_bump: true, skip_pull_hotlinked_images: true # Avoid an infinite loop of job scheduling ) end + + if hotlinked_map.size > 0 + Jobs.cancel_scheduled_job(:update_hotlinked_raw, post_id: post.id) + update_raw_delay = SiteSetting.editing_grace_period + 1 + Jobs.enqueue_in(update_raw_delay, :update_hotlinked_raw, post_id: post.id) + end end def download(src) @@ -137,45 +127,6 @@ module Jobs end end - def replace_in_raw(original_src:, raw:, upload:) - raw = raw.dup - escaped_src = Regexp.escape(original_src) - - replace_raw = ->(match, match_src, replacement, _index) { - if normalize_src(original_src) == normalize_src(match_src) - replacement = - if replacement.include?(InlineUploads::PLACEHOLDER) - replacement.sub(InlineUploads::PLACEHOLDER, upload.short_url) - elsif replacement.include?(InlineUploads::PATH_PLACEHOLDER) - replacement.sub(InlineUploads::PATH_PLACEHOLDER, upload.short_path) - end - - raw = raw.gsub( - match, - replacement - ) - end - } - - # there are 6 ways to insert an image in a post - # HTML tag - - InlineUploads.match_img(raw, external_src: true, &replace_raw) - - # BBCode tag - [img]http://...[/img] - InlineUploads.match_bbcode_img(raw, external_src: true, &replace_raw) - - # Markdown linked image - [![alt](http://...)](http://...) - # Markdown inline - ![alt](http://...) - # Markdown inline - ![](http://... "image title") - # Markdown inline - ![alt](http://... "image title") - InlineUploads.match_md_inline_img(raw, external_src: true, &replace_raw) - - # Direct link - raw.gsub!(/^#{escaped_src}(\s?)$/) { "![](#{upload.short_url})#{$1}" } - - raw - end - def extract_images_from(html) doc = Nokogiri::HTML5::fragment(html) @@ -239,6 +190,30 @@ module Jobs def normalize_src(src) PostHotlinkedMedia.normalize_src(src) end + + def disable_if_low_on_disk_space + return if Discourse.store.external? + return if !SiteSetting.download_remote_images_to_local + return if available_disk_space >= SiteSetting.download_remote_images_threshold + + SiteSetting.download_remote_images_to_local = false + + # log the site setting change + reason = I18n.t("disable_remote_images_download_reason") + staff_action_logger = StaffActionLogger.new(Discourse.system_user) + staff_action_logger.log_site_setting_change("download_remote_images_to_local", true, false, details: reason) + + # also send a private message to the site contact user notify_about_low_disk_space + notify_about_low_disk_space + end + + def notify_about_low_disk_space + SystemMessage.create_from_system_user(Discourse.site_contact_user, :download_remote_images_disabled) + end + + def available_disk_space + 100 - DiskSpace.percent_free("#{Rails.root}/public/uploads") + end end end diff --git a/app/jobs/regular/pull_user_profile_hotlinked_images.rb b/app/jobs/regular/pull_user_profile_hotlinked_images.rb index ae87a9043d7..66280f2f7eb 100644 --- a/app/jobs/regular/pull_user_profile_hotlinked_images.rb +++ b/app/jobs/regular/pull_user_profile_hotlinked_images.rb @@ -30,16 +30,16 @@ module Jobs rescue ImageBrokenError broken_image_urls << normalized_src end - - # have we successfully downloaded that file? - if upload = downloaded_images[normalized_src] - user_profile.bio_raw = replace_in_raw(original_src: original_src, upload: upload, raw: user_profile.bio_raw) - end rescue => e raise e if Rails.env.test? log(:error, "Failed to pull hotlinked image (#{download_src}) user: #{@user_id}\n" + e.message + "\n" + e.backtrace.join("\n")) end + user_profile.bio_raw = InlineUploads.replace_hotlinked_image_urls(raw: user_profile.bio_raw) do |match_src| + normalized_match_src = PostHotlinkedMedia.normalize_src(match_src) + downloaded_images[normalized_match_src] + end + user_profile.skip_pull_hotlinked_image = true user_profile.save! end diff --git a/app/jobs/regular/update_hotlinked_raw.rb b/app/jobs/regular/update_hotlinked_raw.rb new file mode 100644 index 00000000000..b938a1ffaae --- /dev/null +++ b/app/jobs/regular/update_hotlinked_raw.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Jobs + class UpdateHotlinkedRaw < ::Jobs::Base + sidekiq_options queue: 'low' + + def execute(args) + @post_id = args[:post_id] + raise Discourse::InvalidParameters.new(:post_id) if @post_id.blank? + + post = Post.find_by(id: @post_id) + return if post.nil? + return if post.cook_method == Post.cook_methods[:raw_html] + + hotlinked_map = post.post_hotlinked_media.preload(:upload).map { |r| [r.url, r] }.to_h + + raw = InlineUploads.replace_hotlinked_image_urls(raw: post.raw) do |match_src| + normalized_match_src = PostHotlinkedMedia.normalize_src(match_src) + hotlinked_map[normalized_match_src]&.upload + end + + if post.raw != raw + changes = { raw: raw, edit_reason: I18n.t("upload.edit_reason") } + post.revise(Discourse.system_user, changes, bypass_bump: true, skip_staff_log: true) + end + end + end +end diff --git a/app/services/inline_uploads.rb b/app/services/inline_uploads.rb index 55a162a8ea4..b2519f6331f 100644 --- a/app/services/inline_uploads.rb +++ b/app/services/inline_uploads.rb @@ -214,6 +214,40 @@ class InlineUploads end end + def self.replace_hotlinked_image_urls(raw:, &blk) + replace = Proc.new do |match, match_src, replacement, _index| + upload = blk.call(match_src) + next if !upload + + replacement = + if replacement.include?(InlineUploads::PLACEHOLDER) + replacement.sub(InlineUploads::PLACEHOLDER, upload.short_url) + elsif replacement.include?(InlineUploads::PATH_PLACEHOLDER) + replacement.sub(InlineUploads::PATH_PLACEHOLDER, upload.short_path) + end + + raw = raw.gsub( + match, + replacement + ) + end + + # there are 6 ways to insert an image in a post + # HTML tag - + InlineUploads.match_img(raw, external_src: true, &replace) + + # BBCode tag - [img]http://...[/img] + InlineUploads.match_bbcode_img(raw, external_src: true, &replace) + + # Markdown linked image - [![alt](http://...)](http://...) + # Markdown inline - ![alt](http://...) + # Markdown inline - ![](http://... "image title") + # Markdown inline - ![alt](http://... "image title") + InlineUploads.match_md_inline_img(raw, external_src: true, &replace) + + raw + end + def self.matched_uploads(node) upload_path = Discourse.store.upload_path base_url = Discourse.base_url.sub(/https?:\/\//, "(https?://)") diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb index 8444a4768bb..189c048dcf3 100644 --- a/lib/cooked_post_processor.rb +++ b/lib/cooked_post_processor.rb @@ -47,7 +47,6 @@ class CookedPostProcessor remove_user_ids update_post_image enforce_nofollow - pull_hotlinked_images grant_badges @post.link_post_uploads(fragments: @doc) DiscourseEvent.trigger(:post_process_cooked, @doc, @post) @@ -361,41 +360,6 @@ class CookedPostProcessor PrettyText.add_rel_attributes_to_user_content(@doc, add_nofollow) end - def pull_hotlinked_images - return if @opts[:skip_pull_hotlinked_images] - # have we enough disk space? - disable_if_low_on_disk_space # But still enqueue the job - # make sure no other job is scheduled - Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id) - # schedule the job - delay = SiteSetting.editing_grace_period + 1 - Jobs.enqueue_in(delay.seconds.to_i, :pull_hotlinked_images, post_id: @post.id) - end - - def disable_if_low_on_disk_space - return if Discourse.store.external? - return if !SiteSetting.download_remote_images_to_local - return if available_disk_space >= SiteSetting.download_remote_images_threshold - - SiteSetting.download_remote_images_to_local = false - - # log the site setting change - reason = I18n.t("disable_remote_images_download_reason") - staff_action_logger = StaffActionLogger.new(Discourse.system_user) - staff_action_logger.log_site_setting_change("download_remote_images_to_local", true, false, details: reason) - - # also send a private message to the site contact user notify_about_low_disk_space - notify_about_low_disk_space - end - - def notify_about_low_disk_space - SystemMessage.create_from_system_user(Discourse.site_contact_user, :download_remote_images_disabled) - end - - def available_disk_space - 100 - DiskSpace.percent_free("#{Rails.root}/public/uploads") - end - private def post_process_images diff --git a/spec/jobs/process_post_spec.rb b/spec/jobs/process_post_spec.rb index fc20d8f5083..0fb49e684b9 100644 --- a/spec/jobs/process_post_spec.rb +++ b/spec/jobs/process_post_spec.rb @@ -88,4 +88,35 @@ describe Jobs::ProcessPost do expect(post.topic.reload.excerpt).to eq("Some OP content") end end + + context "#enqueue_pull_hotlinked_images" do + fab!(:post) { Fabricate(:post, created_at: 20.days.ago) } + let(:job) { Jobs::ProcessPost.new } + + it "runs even when download_remote_images_to_local is disabled" do + # We want to run it to pull hotlinked optimized images + SiteSetting.download_remote_images_to_local = false + expect_enqueued_with(job: :pull_hotlinked_images, args: { post_id: post.id }) do + job.execute({ post_id: post.id }) + end + end + + context "when download_remote_images_to_local? is enabled" do + before do + SiteSetting.download_remote_images_to_local = true + end + + it "enqueues" do + expect_enqueued_with(job: :pull_hotlinked_images, args: { post_id: post.id }) do + job.execute({ post_id: post.id }) + end + end + + it "does not run when requested to skip" do + job.execute({ post_id: post.id, skip_pull_hotlinked_images: true }) + expect(Jobs::PullHotlinkedImages.jobs.size).to eq(0) + end + end + + end end diff --git a/spec/jobs/pull_hotlinked_images_spec.rb b/spec/jobs/pull_hotlinked_images_spec.rb index 416d9a80ab4..c7be60aea6d 100644 --- a/spec/jobs/pull_hotlinked_images_spec.rb +++ b/spec/jobs/pull_hotlinked_images_spec.rb @@ -67,6 +67,21 @@ describe Jobs::PullHotlinkedImages do expect(post.reload.raw).to eq("") end + it 'enqueues raw replacement job with a delay' do + Jobs.run_later! + + post = Fabricate(:post, raw: "") + stub_image_size + + freeze_time + Jobs.expects(:cancel_scheduled_job).with(:update_hotlinked_raw, post_id: post.id).once + delay = SiteSetting.editing_grace_period + 1 + + expect_enqueued_with(job: :update_hotlinked_raw, args: { post_id: post.id }, at: Time.zone.now + delay.seconds) do + Jobs::PullHotlinkedImages.new.execute(post_id: post.id) + end + end + it 'removes downloaded images when they are no longer needed' do post = Fabricate(:post, raw: "") stub_image_size @@ -196,14 +211,14 @@ describe Jobs::PullHotlinkedImages do expect(post.uploads).to contain_exactly(upload) end - it "skips raw_html posts" do + it "skips editing raw for raw_html posts" do raw = "" post = Fabricate(:post, raw: raw, cook_method: Post.cook_methods[:raw_html]) stub_image_size expect do post.rebake! post.reload - end.not_to change { Upload.count } + end.to change { Upload.count }.by(1) expect(post.raw).to eq(raw) end @@ -556,6 +571,43 @@ describe Jobs::PullHotlinkedImages do end end + context "#disable_if_low_on_disk_space" do + fab!(:post) { Fabricate(:post, created_at: 20.days.ago) } + let(:job) { Jobs::PullHotlinkedImages.new } + + before do + SiteSetting.download_remote_images_to_local = true + SiteSetting.download_remote_images_threshold = 20 + job.stubs(:available_disk_space).returns(50) + end + + it "does nothing when there's enough disk space" do + SiteSetting.expects(:download_remote_images_to_local=).never + job.execute({ post_id: post.id }) + end + + context "when there's not enough disk space" do + + before { SiteSetting.download_remote_images_threshold = 75 } + + it "disables download_remote_images_threshold and send a notification to the admin" do + StaffActionLogger.any_instance.expects(:log_site_setting_change).once + SystemMessage.expects(:create_from_system_user).with(Discourse.site_contact_user, :download_remote_images_disabled).once + job.execute({ post_id: post.id }) + + expect(SiteSetting.download_remote_images_to_local).to eq(false) + end + + it "doesn't disable download_remote_images_to_local if site uses S3" do + setup_s3 + job.execute({ post_id: post.id }) + + expect(SiteSetting.download_remote_images_to_local).to eq(true) + end + + end + end + def stub_s3(upload) stub_upload(upload) stub_request(:get, "https:" + upload.url).to_return(status: 200, body: file_from_fixtures("smallest.png")) diff --git a/spec/lib/cooked_post_processor_spec.rb b/spec/lib/cooked_post_processor_spec.rb index 8f4a2b925b3..d626cc5f5c3 100644 --- a/spec/lib/cooked_post_processor_spec.rb +++ b/spec/lib/cooked_post_processor_spec.rb @@ -21,7 +21,6 @@ describe CookedPostProcessor do cpp.expects(:post_process_oneboxes).in_sequence(post_process) cpp.expects(:post_process_images).in_sequence(post_process) cpp.expects(:optimize_urls).in_sequence(post_process) - cpp.expects(:pull_hotlinked_images).in_sequence(post_process) cpp.post_process expect(PostUpload.exists?(post: post, upload: upload)).to eq(true) @@ -1543,102 +1542,6 @@ describe CookedPostProcessor do end end - context "#pull_hotlinked_images" do - - let(:post) { build(:post, created_at: 20.days.ago) } - let(:cpp) { CookedPostProcessor.new(post) } - - before { cpp.stubs(:available_disk_space).returns(90) } - - it "runs even when download_remote_images_to_local is disabled" do - # We want to run it to pull hotlinked optimized images - SiteSetting.download_remote_images_to_local = false - expect { cpp.pull_hotlinked_images }. - to change { Jobs::PullHotlinkedImages.jobs.count }.by 1 - end - - context "when download_remote_images_to_local? is enabled" do - before do - SiteSetting.download_remote_images_to_local = true - end - - it "disables download_remote_images if there is not enough disk space" do - cpp.expects(:available_disk_space).returns(5) - cpp.pull_hotlinked_images - expect(SiteSetting.download_remote_images_to_local).to eq(false) - end - - it "does not run when requested to skip" do - CookedPostProcessor.new(post, skip_pull_hotlinked_images: true).pull_hotlinked_images - expect(Jobs::PullHotlinkedImages.jobs.size).to eq(0) - end - - context "and there is enough disk space" do - before { cpp.expects(:disable_if_low_on_disk_space).at_least_once } - - context "and the post has been updated by an actual user" do - - before { post.id = 42 } - - it "ensures only one job is scheduled right after the editing_grace_period" do - freeze_time - - Jobs.expects(:cancel_scheduled_job).with(:pull_hotlinked_images, post_id: post.id).once - - delay = SiteSetting.editing_grace_period + 1 - - expect_enqueued_with(job: :pull_hotlinked_images, args: { post_id: post.id }, at: Time.zone.now + delay.seconds) do - cpp.pull_hotlinked_images - end - end - - end - - end - - end - - end - - context "#disable_if_low_on_disk_space" do - - let(:post) { build(:post, created_at: 20.days.ago) } - let(:cpp) { CookedPostProcessor.new(post) } - - before do - SiteSetting.download_remote_images_to_local = true - SiteSetting.download_remote_images_threshold = 20 - cpp.stubs(:available_disk_space).returns(50) - end - - it "does nothing when there's enough disk space" do - SiteSetting.expects(:download_remote_images_to_local=).never - cpp.disable_if_low_on_disk_space - end - - context "when there's not enough disk space" do - - before { SiteSetting.download_remote_images_threshold = 75 } - - it "disables download_remote_images_threshold and send a notification to the admin" do - StaffActionLogger.any_instance.expects(:log_site_setting_change).once - SystemMessage.expects(:create_from_system_user).with(Discourse.site_contact_user, :download_remote_images_disabled).once - cpp.disable_if_low_on_disk_space - - expect(SiteSetting.download_remote_images_to_local).to eq(false) - end - - it "doesn't disable download_remote_images_to_local if site uses S3" do - setup_s3 - cpp.disable_if_low_on_disk_space - - expect(SiteSetting.download_remote_images_to_local).to eq(true) - end - - end - - end - context "#is_a_hyperlink?" do let(:post) { build(:post) }