diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb index 5cd69b4ca21..252c3790523 100644 --- a/app/jobs/regular/pull_hotlinked_images.rb +++ b/app/jobs/regular/pull_hotlinked_images.rb @@ -56,7 +56,7 @@ module Jobs src = original_src = node['src'] || node['href'] src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//") - if should_download_image?(src) + if should_download_image?(src, post) begin # have we already downloaded that file? schemeless_src = remove_scheme(original_src) @@ -89,6 +89,7 @@ module Jobs has_new_broken_image = true end end + # have we successfully downloaded that file? if downloaded_urls[src].present? escaped_src = Regexp.escape(original_src) @@ -163,7 +164,7 @@ module Jobs doc.css(".lightbox img[src]") end - def should_download_image?(src) + def should_download_image?(src, post = nil) # make sure we actually have a url return false unless src.present? @@ -174,7 +175,15 @@ module Jobs # Someone could hotlink a file from a different site on the same CDN, # so check whether we have it in this database - return !Upload.get_from_url(src) + # + # if the upload already exists and is attached to a different post, + # or the original_sha1 is missing meaning it was created before secure + # media was enabled, then we definitely want to redownload again otherwise + # we end up reusing existing uploads which may be linked to many posts + # already. + upload = Upload.consider_for_reuse(Upload.get_from_url(src), post) + + return !upload.present? end # Don't download non-local images unless site setting enabled diff --git a/app/models/upload.rb b/app/models/upload.rb index 20820ba0164..cd374a4efd4 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -121,6 +121,12 @@ class Upload < ActiveRecord::Base self.class.short_path(sha1: self.sha1, extension: self.extension) end + def self.consider_for_reuse(upload, post) + return upload if !SiteSetting.secure_media? || upload.blank? || post.blank? + return nil if upload.access_control_post_id != post.id || upload.original_sha1.blank? + upload + end + def self.secure_media_url?(url) url.include?(SECURE_MEDIA_ROUTE) end diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb index 1bb64e648d0..938c8fbeb19 100644 --- a/lib/cooked_post_processor.rb +++ b/lib/cooked_post_processor.rb @@ -330,7 +330,13 @@ class CookedPostProcessor img["height"] = height end - if upload = Upload.get_from_url(src) + # if the upload already exists and is attached to a different post, + # or the original_sha1 is missing meaning it was created before secure + # media was enabled. we want to re-thumbnail and re-optimize in this case + # to avoid using uploads linked to many other posts + upload = Upload.consider_for_reuse(Upload.get_from_url(src), @post) + + if upload.present? upload.create_thumbnail!(width, height, crop: crop) each_responsive_ratio do |ratio| @@ -351,7 +357,9 @@ class CookedPostProcessor add_lightbox!(img, original_width, original_height, upload, cropped: crop) end - optimize_image!(img, upload, cropped: crop) if upload + if upload.present? + optimize_image!(img, upload, cropped: crop) + end end def loading_image(upload) diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb index 8905905ba82..4a9165d5b6e 100644 --- a/spec/components/cooked_post_processor_spec.rb +++ b/spec/components/cooked_post_processor_spec.rb @@ -504,6 +504,7 @@ describe CookedPostProcessor do end context "s3_uploads" do + let(:upload) { Fabricate(:secure_upload_s3) } before do s3_setup stored_path = Discourse.store.get_path_for_upload(upload) @@ -516,13 +517,8 @@ describe CookedPostProcessor do ) stub_request(:get, /#{SiteSetting.s3_upload_bucket}\.s3\.amazonaws\.com/) - OptimizedImage.expects(:resize).returns(true) - FileStore::BaseStore.any_instance.expects(:get_depth_for).returns(0) - Discourse.store.class.any_instance.expects(:has_been_uploaded?).at_least_once.returns(true) - SiteSetting.login_required = true SiteSetting.secure_media = true - upload.update_column(:secure, true) end let(:optimized_size) { "600x500" } @@ -531,15 +527,43 @@ describe CookedPostProcessor do Fabricate(:post, raw: "![large.png|#{optimized_size}](#{upload.short_url})") end - it "handles secure images with the correct lightbox link href" do - cpp.post_process - - expect(cpp.html).to match_html <<~HTML + let(:cooked_html) do + <<~HTML

HTML end + + context "when the upload is attached to the correct post" do + before do + OptimizedImage.expects(:resize).returns(true) + FileStore::BaseStore.any_instance.expects(:get_depth_for).returns(0) + Discourse.store.class.any_instance.expects(:has_been_uploaded?).at_least_once.returns(true) + upload.update(secure: true, access_control_post: post) + end + + it "handles secure images with the correct lightbox link href" do + cpp.post_process + + expect(cpp.html).to match_html cooked_html + end + end + + context "when the upload is attached to a different post" do + before do + FastImage.size(upload.url) + upload.update(secure: true, access_control_post: Fabricate(:post)) + end + + it "does not create thumbnails or optimize images" do + CookedPostProcessor.any_instance.expects(:optimize_image!).never + Upload.any_instance.expects(:create_thumbnail!).never + cpp.post_process + + expect(cpp.html).not_to match_html cooked_html + end + end end end diff --git a/spec/fabricators/upload_fabricator.rb b/spec/fabricators/upload_fabricator.rb index d9686ff1274..01d05d9da55 100644 --- a/spec/fabricators/upload_fabricator.rb +++ b/spec/fabricators/upload_fabricator.rb @@ -51,3 +51,9 @@ Fabricator(:upload_s3, from: :upload) do end end end + +Fabricator(:secure_upload_s3, from: :upload_s3) do + secure { true } + sha1 { SecureRandom.hex(20) } + original_sha1 { sequence(:sha1) { |n| Digest::SHA1.hexdigest(n.to_s) } } +end diff --git a/spec/jobs/pull_hotlinked_images_spec.rb b/spec/jobs/pull_hotlinked_images_spec.rb index 3371a73ae4f..40406744509 100644 --- a/spec/jobs/pull_hotlinked_images_spec.rb +++ b/spec/jobs/pull_hotlinked_images_spec.rb @@ -125,17 +125,56 @@ describe Jobs::PullHotlinkedImages do expect(post.uploads).to contain_exactly(upload) end - context "when secure media enabled" do + context "when secure media enabled for an upload that has already been downloaded and exists" do it "doesnt redownload the secure upload" do enable_secure_media - upload = Fabricate(:upload_s3, secure: true) + upload = Fabricate(:secure_upload_s3, secure: true) stub_s3(upload) url = Upload.secure_media_url_from_upload_url(upload.url) url = Discourse.base_url + url post = Fabricate(:post, raw: "") + upload.update(access_control_post: post) expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) } .not_to change { Upload.count } end + + context "when the upload original_sha1 is missing" do + it "redownloads the upload" do + enable_secure_media + upload = Fabricate(:upload_s3, secure: true) + stub_s3(upload) + Upload.stubs(:signed_url_from_secure_media_url).returns(upload.url) + url = Upload.secure_media_url_from_upload_url(upload.url) + url = Discourse.base_url + url + post = Fabricate(:post, raw: "") + upload.update(access_control_post: post) + FileStore::S3Store.any_instance.stubs(:store_upload).returns(upload.url) + + # without this we get an infinite hang... + Post.any_instance.stubs(:trigger_post_process) + expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) } + .to change { Upload.count }.by(1) + end + end + + context "when the upload access_control_post is different to the current post" do + it "redownloads the upload" do + enable_secure_media + upload = Fabricate(:secure_upload_s3, secure: true) + stub_s3(upload) + Upload.stubs(:signed_url_from_secure_media_url).returns(upload.url) + url = Upload.secure_media_url_from_upload_url(upload.url) + url = Discourse.base_url + url + post = Fabricate(:post, raw: "") + upload.update(access_control_post: Fabricate(:post)) + FileStore::S3Store.any_instance.stubs(:store_upload).returns(upload.url) + + # without this we get an infinite hang... + Post.any_instance.stubs(:trigger_post_process) + expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) } + .to change { Upload.count }.by(1) + end + end end it 'replaces markdown image' do @@ -376,5 +415,7 @@ describe Jobs::PullHotlinkedImages do :put, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl" ) + stub_request(:get, "https:" + upload.url).to_return(status: 200, body: file_from_fixtures("smallest.png")) + # stub_request(:get, /#{SiteSetting.s3_upload_bucket}\.s3\.amazonaws\.com/) end end diff --git a/spec/models/topic_converter_spec.rb b/spec/models/topic_converter_spec.rb index 7d5606561ff..a816600034b 100644 --- a/spec/models/topic_converter_spec.rb +++ b/spec/models/topic_converter_spec.rb @@ -131,6 +131,11 @@ describe TopicConverter do public_reply.link_post_uploads public_reply.update_uploads_secure_status + # we need to do this otherwise we get infinite loop shenanigans, as the pull hotlinked + # images is never satisfied and keeps calling cooked post processor which in turn runs + # pull hotlinked images + Jobs::PullHotlinkedImages.stubs(:new).returns(Helpers::StubbedJob.new) + expect(public_reply.uploads[0].secure).to eq(false) public_topic.convert_to_private_message(admin) expect(public_topic.reload.posts.find(public_reply.id).uploads[0].secure).to eq(true) diff --git a/spec/models/upload_spec.rb b/spec/models/upload_spec.rb index 5bf3df79973..fed2faf1fa8 100644 --- a/spec/models/upload_spec.rb +++ b/spec/models/upload_spec.rb @@ -288,6 +288,53 @@ describe Upload do end end + describe ".consider_for_reuse" do + let(:post) { Fabricate(:post) } + let(:upload) { Fabricate(:upload) } + + it "returns nil when the provided upload is blank" do + expect(Upload.consider_for_reuse(nil, post)).to eq(nil) + end + + it "returns the upload when secure media is disabled" do + expect(Upload.consider_for_reuse(upload, post)).to eq(upload) + end + + context "when secure media enabled" do + before do + enable_secure_media + end + + context "when the upload access control post is != to the provided post" do + before do + upload.update(access_control_post_id: Fabricate(:post).id) + end + + it "returns nil" do + expect(Upload.consider_for_reuse(upload, post)).to eq(nil) + end + end + + context "when the upload original_sha1 is blank (pre-secure-media upload)" do + before do + upload.update(original_sha1: nil, access_control_post: post) + end + + it "returns nil" do + expect(Upload.consider_for_reuse(upload, post)).to eq(nil) + end + end + + context "when the upload original_sha1 is present and access control post is correct" do + let(:upload) { Fabricate(:secure_upload_s3, access_control_post: post) } + + it "returns the upload" do + expect(Upload.consider_for_reuse(upload, post)).to eq(upload) + end + end + end + end + describe '.update_secure_status' do it "respects the secure_override_value parameter if provided" do upload.update!(secure: true) @@ -344,18 +391,7 @@ describe Upload do context "secure media enabled" do before do - SiteSetting.enable_s3_uploads = true - SiteSetting.s3_upload_bucket = "s3-upload-bucket" - SiteSetting.s3_access_key_id = "some key" - SiteSetting.s3_secret_access_key = "some secrets3_region key" - SiteSetting.secure_media = true - - stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/") - - stub_request( - :put, - "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl" - ) + enable_secure_media end it 'does not mark an image upload as not secure when there is no access control post id, to avoid unintentional exposure' do @@ -399,4 +435,19 @@ describe Upload do expect(upload2.reload.extension).to eq("png") end end + + def enable_secure_media + SiteSetting.enable_s3_uploads = true + SiteSetting.s3_upload_bucket = "s3-upload-bucket" + SiteSetting.s3_access_key_id = "some key" + SiteSetting.s3_secret_access_key = "some secrets3_region key" + SiteSetting.secure_media = true + + stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/") + + stub_request( + :put, + "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl" + ) + end end diff --git a/spec/support/helpers.rb b/spec/support/helpers.rb index efffb42785f..e2e278bb91e 100644 --- a/spec/support/helpers.rb +++ b/spec/support/helpers.rb @@ -147,4 +147,9 @@ module Helpers ActionController::Base.config.relative_url_root = old_root end end + + class StubbedJob + def initialize; end + def perform(args); end + end end