From ab3bda6cd0d46c8809074637a392516543b506b7 Mon Sep 17 00:00:00 2001 From: Martin Brennan Date: Wed, 29 Jan 2020 10:11:38 +1000 Subject: [PATCH] FIX: Mitigate issue where legacy pre-secure hotlinked media would not be redownloaded (#8802) Basically, say you had already downloaded a certain image from a certain URL using pull_hotlinked_images and the onebox. The upload would be stored by its sha as an upload record. Whenever you linked to the same URL again in a post (e.g. in our case an og:image on review.discourse) we would would reuse the original upload record because of the sha1. However when you turned on secure media this could cause problems as the first post that uses that upload after secure media is enabled will set the access control post for the upload to the new post. Then if the post is deleted every single onebox/link to that same image URL will fail forever with 403 as the secure-media-uploads URL fails if the access control post has been deleted. To fix this when cooking posts and pulling hotlinked images, we only allow using an original upload by URL if its access control post matches the current post, and if the original_sha1 is filled in, meaning it was uploaded AFTER secure media was enabled. otherwise we just redownload the media again to be safe, as the URL will always be new then. --- app/jobs/regular/pull_hotlinked_images.rb | 15 +++- app/models/upload.rb | 6 ++ lib/cooked_post_processor.rb | 12 ++- spec/components/cooked_post_processor_spec.rb | 42 ++++++++--- spec/fabricators/upload_fabricator.rb | 6 ++ spec/jobs/pull_hotlinked_images_spec.rb | 45 ++++++++++- spec/models/topic_converter_spec.rb | 5 ++ spec/models/upload_spec.rb | 75 ++++++++++++++++--- spec/support/helpers.rb | 5 ++ 9 files changed, 183 insertions(+), 28 deletions(-) diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb index 5cd69b4ca21..252c3790523 100644 --- a/app/jobs/regular/pull_hotlinked_images.rb +++ b/app/jobs/regular/pull_hotlinked_images.rb @@ -56,7 +56,7 @@ module Jobs src = original_src = node['src'] || node['href'] src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//") - if should_download_image?(src) + if should_download_image?(src, post) begin # have we already downloaded that file? schemeless_src = remove_scheme(original_src) @@ -89,6 +89,7 @@ module Jobs has_new_broken_image = true end end + # have we successfully downloaded that file? if downloaded_urls[src].present? escaped_src = Regexp.escape(original_src) @@ -163,7 +164,7 @@ module Jobs doc.css(".lightbox img[src]") end - def should_download_image?(src) + def should_download_image?(src, post = nil) # make sure we actually have a url return false unless src.present? @@ -174,7 +175,15 @@ module Jobs # Someone could hotlink a file from a different site on the same CDN, # so check whether we have it in this database - return !Upload.get_from_url(src) + # + # if the upload already exists and is attached to a different post, + # or the original_sha1 is missing meaning it was created before secure + # media was enabled, then we definitely want to redownload again otherwise + # we end up reusing existing uploads which may be linked to many posts + # already. + upload = Upload.consider_for_reuse(Upload.get_from_url(src), post) + + return !upload.present? end # Don't download non-local images unless site setting enabled diff --git a/app/models/upload.rb b/app/models/upload.rb index 20820ba0164..cd374a4efd4 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -121,6 +121,12 @@ class Upload < ActiveRecord::Base self.class.short_path(sha1: self.sha1, extension: self.extension) end + def self.consider_for_reuse(upload, post) + return upload if !SiteSetting.secure_media? || upload.blank? || post.blank? + return nil if upload.access_control_post_id != post.id || upload.original_sha1.blank? + upload + end + def self.secure_media_url?(url) url.include?(SECURE_MEDIA_ROUTE) end diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb index 1bb64e648d0..938c8fbeb19 100644 --- a/lib/cooked_post_processor.rb +++ b/lib/cooked_post_processor.rb @@ -330,7 +330,13 @@ class CookedPostProcessor img["height"] = height end - if upload = Upload.get_from_url(src) + # if the upload already exists and is attached to a different post, + # or the original_sha1 is missing meaning it was created before secure + # media was enabled. we want to re-thumbnail and re-optimize in this case + # to avoid using uploads linked to many other posts + upload = Upload.consider_for_reuse(Upload.get_from_url(src), @post) + + if upload.present? upload.create_thumbnail!(width, height, crop: crop) each_responsive_ratio do |ratio| @@ -351,7 +357,9 @@ class CookedPostProcessor add_lightbox!(img, original_width, original_height, upload, cropped: crop) end - optimize_image!(img, upload, cropped: crop) if upload + if upload.present? + optimize_image!(img, upload, cropped: crop) + end end def loading_image(upload) diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb index 8905905ba82..4a9165d5b6e 100644 --- a/spec/components/cooked_post_processor_spec.rb +++ b/spec/components/cooked_post_processor_spec.rb @@ -504,6 +504,7 @@ describe CookedPostProcessor do end context "s3_uploads" do + let(:upload) { Fabricate(:secure_upload_s3) } before do s3_setup stored_path = Discourse.store.get_path_for_upload(upload) @@ -516,13 +517,8 @@ describe CookedPostProcessor do ) stub_request(:get, /#{SiteSetting.s3_upload_bucket}\.s3\.amazonaws\.com/) - OptimizedImage.expects(:resize).returns(true) - FileStore::BaseStore.any_instance.expects(:get_depth_for).returns(0) - Discourse.store.class.any_instance.expects(:has_been_uploaded?).at_least_once.returns(true) - SiteSetting.login_required = true SiteSetting.secure_media = true - upload.update_column(:secure, true) end let(:optimized_size) { "600x500" } @@ -531,15 +527,43 @@ describe CookedPostProcessor do Fabricate(:post, raw: "![large.png|#{optimized_size}](#{upload.short_url})") end - it "handles secure images with the correct lightbox link href" do - cpp.post_process - - expect(cpp.html).to match_html <<~HTML + let(:cooked_html) do + <<~HTML

HTML end + + context "when the upload is attached to the correct post" do + before do + OptimizedImage.expects(:resize).returns(true) + FileStore::BaseStore.any_instance.expects(:get_depth_for).returns(0) + Discourse.store.class.any_instance.expects(:has_been_uploaded?).at_least_once.returns(true) + upload.update(secure: true, access_control_post: post) + end + + it "handles secure images with the correct lightbox link href" do + cpp.post_process + + expect(cpp.html).to match_html cooked_html + end + end + + context "when the upload is attached to a different post" do + before do + FastImage.size(upload.url) + upload.update(secure: true, access_control_post: Fabricate(:post)) + end + + it "does not create thumbnails or optimize images" do + CookedPostProcessor.any_instance.expects(:optimize_image!).never + Upload.any_instance.expects(:create_thumbnail!).never + cpp.post_process + + expect(cpp.html).not_to match_html cooked_html + end + end end end diff --git a/spec/fabricators/upload_fabricator.rb b/spec/fabricators/upload_fabricator.rb index d9686ff1274..01d05d9da55 100644 --- a/spec/fabricators/upload_fabricator.rb +++ b/spec/fabricators/upload_fabricator.rb @@ -51,3 +51,9 @@ Fabricator(:upload_s3, from: :upload) do end end end + +Fabricator(:secure_upload_s3, from: :upload_s3) do + secure { true } + sha1 { SecureRandom.hex(20) } + original_sha1 { sequence(:sha1) { |n| Digest::SHA1.hexdigest(n.to_s) } } +end diff --git a/spec/jobs/pull_hotlinked_images_spec.rb b/spec/jobs/pull_hotlinked_images_spec.rb index 3371a73ae4f..40406744509 100644 --- a/spec/jobs/pull_hotlinked_images_spec.rb +++ b/spec/jobs/pull_hotlinked_images_spec.rb @@ -125,17 +125,56 @@ describe Jobs::PullHotlinkedImages do expect(post.uploads).to contain_exactly(upload) end - context "when secure media enabled" do + context "when secure media enabled for an upload that has already been downloaded and exists" do it "doesnt redownload the secure upload" do enable_secure_media - upload = Fabricate(:upload_s3, secure: true) + upload = Fabricate(:secure_upload_s3, secure: true) stub_s3(upload) url = Upload.secure_media_url_from_upload_url(upload.url) url = Discourse.base_url + url post = Fabricate(:post, raw: "") + upload.update(access_control_post: post) expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) } .not_to change { Upload.count } end + + context "when the upload original_sha1 is missing" do + it "redownloads the upload" do + enable_secure_media + upload = Fabricate(:upload_s3, secure: true) + stub_s3(upload) + Upload.stubs(:signed_url_from_secure_media_url).returns(upload.url) + url = Upload.secure_media_url_from_upload_url(upload.url) + url = Discourse.base_url + url + post = Fabricate(:post, raw: "") + upload.update(access_control_post: post) + FileStore::S3Store.any_instance.stubs(:store_upload).returns(upload.url) + + # without this we get an infinite hang... + Post.any_instance.stubs(:trigger_post_process) + expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) } + .to change { Upload.count }.by(1) + end + end + + context "when the upload access_control_post is different to the current post" do + it "redownloads the upload" do + enable_secure_media + upload = Fabricate(:secure_upload_s3, secure: true) + stub_s3(upload) + Upload.stubs(:signed_url_from_secure_media_url).returns(upload.url) + url = Upload.secure_media_url_from_upload_url(upload.url) + url = Discourse.base_url + url + post = Fabricate(:post, raw: "") + upload.update(access_control_post: Fabricate(:post)) + FileStore::S3Store.any_instance.stubs(:store_upload).returns(upload.url) + + # without this we get an infinite hang... + Post.any_instance.stubs(:trigger_post_process) + expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) } + .to change { Upload.count }.by(1) + end + end end it 'replaces markdown image' do @@ -376,5 +415,7 @@ describe Jobs::PullHotlinkedImages do :put, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl" ) + stub_request(:get, "https:" + upload.url).to_return(status: 200, body: file_from_fixtures("smallest.png")) + # stub_request(:get, /#{SiteSetting.s3_upload_bucket}\.s3\.amazonaws\.com/) end end diff --git a/spec/models/topic_converter_spec.rb b/spec/models/topic_converter_spec.rb index 7d5606561ff..a816600034b 100644 --- a/spec/models/topic_converter_spec.rb +++ b/spec/models/topic_converter_spec.rb @@ -131,6 +131,11 @@ describe TopicConverter do public_reply.link_post_uploads public_reply.update_uploads_secure_status + # we need to do this otherwise we get infinite loop shenanigans, as the pull hotlinked + # images is never satisfied and keeps calling cooked post processor which in turn runs + # pull hotlinked images + Jobs::PullHotlinkedImages.stubs(:new).returns(Helpers::StubbedJob.new) + expect(public_reply.uploads[0].secure).to eq(false) public_topic.convert_to_private_message(admin) expect(public_topic.reload.posts.find(public_reply.id).uploads[0].secure).to eq(true) diff --git a/spec/models/upload_spec.rb b/spec/models/upload_spec.rb index 5bf3df79973..fed2faf1fa8 100644 --- a/spec/models/upload_spec.rb +++ b/spec/models/upload_spec.rb @@ -288,6 +288,53 @@ describe Upload do end end + describe ".consider_for_reuse" do + let(:post) { Fabricate(:post) } + let(:upload) { Fabricate(:upload) } + + it "returns nil when the provided upload is blank" do + expect(Upload.consider_for_reuse(nil, post)).to eq(nil) + end + + it "returns the upload when secure media is disabled" do + expect(Upload.consider_for_reuse(upload, post)).to eq(upload) + end + + context "when secure media enabled" do + before do + enable_secure_media + end + + context "when the upload access control post is != to the provided post" do + before do + upload.update(access_control_post_id: Fabricate(:post).id) + end + + it "returns nil" do + expect(Upload.consider_for_reuse(upload, post)).to eq(nil) + end + end + + context "when the upload original_sha1 is blank (pre-secure-media upload)" do + before do + upload.update(original_sha1: nil, access_control_post: post) + end + + it "returns nil" do + expect(Upload.consider_for_reuse(upload, post)).to eq(nil) + end + end + + context "when the upload original_sha1 is present and access control post is correct" do + let(:upload) { Fabricate(:secure_upload_s3, access_control_post: post) } + + it "returns the upload" do + expect(Upload.consider_for_reuse(upload, post)).to eq(upload) + end + end + end + end + describe '.update_secure_status' do it "respects the secure_override_value parameter if provided" do upload.update!(secure: true) @@ -344,18 +391,7 @@ describe Upload do context "secure media enabled" do before do - SiteSetting.enable_s3_uploads = true - SiteSetting.s3_upload_bucket = "s3-upload-bucket" - SiteSetting.s3_access_key_id = "some key" - SiteSetting.s3_secret_access_key = "some secrets3_region key" - SiteSetting.secure_media = true - - stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/") - - stub_request( - :put, - "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl" - ) + enable_secure_media end it 'does not mark an image upload as not secure when there is no access control post id, to avoid unintentional exposure' do @@ -399,4 +435,19 @@ describe Upload do expect(upload2.reload.extension).to eq("png") end end + + def enable_secure_media + SiteSetting.enable_s3_uploads = true + SiteSetting.s3_upload_bucket = "s3-upload-bucket" + SiteSetting.s3_access_key_id = "some key" + SiteSetting.s3_secret_access_key = "some secrets3_region key" + SiteSetting.secure_media = true + + stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/") + + stub_request( + :put, + "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl" + ) + end end diff --git a/spec/support/helpers.rb b/spec/support/helpers.rb index efffb42785f..e2e278bb91e 100644 --- a/spec/support/helpers.rb +++ b/spec/support/helpers.rb @@ -147,4 +147,9 @@ module Helpers ActionController::Base.config.relative_url_root = old_root end end + + class StubbedJob + def initialize; end + def perform(args); end + end end