From ab3bda6cd0d46c8809074637a392516543b506b7 Mon Sep 17 00:00:00 2001
From: Martin Brennan
Date: Wed, 29 Jan 2020 10:11:38 +1000
Subject: [PATCH] FIX: Mitigate issue where legacy pre-secure hotlinked media
would not be redownloaded (#8802)
Basically, say you had already downloaded a certain image from a certain URL
using pull_hotlinked_images and the onebox. The upload would be stored
by its sha as an upload record. Whenever you linked to the same URL again
in a post (e.g. in our case an og:image on review.discourse) we would
would reuse the original upload record because of the sha1.
However when you turned on secure media this could cause problems as
the first post that uses that upload after secure media is enabled
will set the access control post for the upload to the new post.
Then if the post is deleted every single onebox/link to that same image
URL will fail forever with 403 as the secure-media-uploads URL fails
if the access control post has been deleted.
To fix this when cooking posts and pulling hotlinked images, we only
allow using an original upload by URL if its access control post
matches the current post, and if the original_sha1 is filled in,
meaning it was uploaded AFTER secure media was enabled. otherwise
we just redownload the media again to be safe, as the URL will always
be new then.
---
app/jobs/regular/pull_hotlinked_images.rb | 15 +++-
app/models/upload.rb | 6 ++
lib/cooked_post_processor.rb | 12 ++-
spec/components/cooked_post_processor_spec.rb | 42 ++++++++---
spec/fabricators/upload_fabricator.rb | 6 ++
spec/jobs/pull_hotlinked_images_spec.rb | 45 ++++++++++-
spec/models/topic_converter_spec.rb | 5 ++
spec/models/upload_spec.rb | 75 ++++++++++++++++---
spec/support/helpers.rb | 5 ++
9 files changed, 183 insertions(+), 28 deletions(-)
diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb
index 5cd69b4ca21..252c3790523 100644
--- a/app/jobs/regular/pull_hotlinked_images.rb
+++ b/app/jobs/regular/pull_hotlinked_images.rb
@@ -56,7 +56,7 @@ module Jobs
src = original_src = node['src'] || node['href']
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
- if should_download_image?(src)
+ if should_download_image?(src, post)
begin
# have we already downloaded that file?
schemeless_src = remove_scheme(original_src)
@@ -89,6 +89,7 @@ module Jobs
has_new_broken_image = true
end
end
+
# have we successfully downloaded that file?
if downloaded_urls[src].present?
escaped_src = Regexp.escape(original_src)
@@ -163,7 +164,7 @@ module Jobs
doc.css(".lightbox img[src]")
end
- def should_download_image?(src)
+ def should_download_image?(src, post = nil)
# make sure we actually have a url
return false unless src.present?
@@ -174,7 +175,15 @@ module Jobs
# Someone could hotlink a file from a different site on the same CDN,
# so check whether we have it in this database
- return !Upload.get_from_url(src)
+ #
+ # if the upload already exists and is attached to a different post,
+ # or the original_sha1 is missing meaning it was created before secure
+ # media was enabled, then we definitely want to redownload again otherwise
+ # we end up reusing existing uploads which may be linked to many posts
+ # already.
+ upload = Upload.consider_for_reuse(Upload.get_from_url(src), post)
+
+ return !upload.present?
end
# Don't download non-local images unless site setting enabled
diff --git a/app/models/upload.rb b/app/models/upload.rb
index 20820ba0164..cd374a4efd4 100644
--- a/app/models/upload.rb
+++ b/app/models/upload.rb
@@ -121,6 +121,12 @@ class Upload < ActiveRecord::Base
self.class.short_path(sha1: self.sha1, extension: self.extension)
end
+ def self.consider_for_reuse(upload, post)
+ return upload if !SiteSetting.secure_media? || upload.blank? || post.blank?
+ return nil if upload.access_control_post_id != post.id || upload.original_sha1.blank?
+ upload
+ end
+
def self.secure_media_url?(url)
url.include?(SECURE_MEDIA_ROUTE)
end
diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb
index 1bb64e648d0..938c8fbeb19 100644
--- a/lib/cooked_post_processor.rb
+++ b/lib/cooked_post_processor.rb
@@ -330,7 +330,13 @@ class CookedPostProcessor
img["height"] = height
end
- if upload = Upload.get_from_url(src)
+ # if the upload already exists and is attached to a different post,
+ # or the original_sha1 is missing meaning it was created before secure
+ # media was enabled. we want to re-thumbnail and re-optimize in this case
+ # to avoid using uploads linked to many other posts
+ upload = Upload.consider_for_reuse(Upload.get_from_url(src), @post)
+
+ if upload.present?
upload.create_thumbnail!(width, height, crop: crop)
each_responsive_ratio do |ratio|
@@ -351,7 +357,9 @@ class CookedPostProcessor
add_lightbox!(img, original_width, original_height, upload, cropped: crop)
end
- optimize_image!(img, upload, cropped: crop) if upload
+ if upload.present?
+ optimize_image!(img, upload, cropped: crop)
+ end
end
def loading_image(upload)
diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb
index 8905905ba82..4a9165d5b6e 100644
--- a/spec/components/cooked_post_processor_spec.rb
+++ b/spec/components/cooked_post_processor_spec.rb
@@ -504,6 +504,7 @@ describe CookedPostProcessor do
end
context "s3_uploads" do
+ let(:upload) { Fabricate(:secure_upload_s3) }
before do
s3_setup
stored_path = Discourse.store.get_path_for_upload(upload)
@@ -516,13 +517,8 @@ describe CookedPostProcessor do
)
stub_request(:get, /#{SiteSetting.s3_upload_bucket}\.s3\.amazonaws\.com/)
- OptimizedImage.expects(:resize).returns(true)
- FileStore::BaseStore.any_instance.expects(:get_depth_for).returns(0)
- Discourse.store.class.any_instance.expects(:has_been_uploaded?).at_least_once.returns(true)
-
SiteSetting.login_required = true
SiteSetting.secure_media = true
- upload.update_column(:secure, true)
end
let(:optimized_size) { "600x500" }
@@ -531,15 +527,43 @@ describe CookedPostProcessor do
Fabricate(:post, raw: "![large.png|#{optimized_size}](#{upload.short_url})")
end
- it "handles secure images with the correct lightbox link href" do
- cpp.post_process
-
- expect(cpp.html).to match_html <<~HTML
+ let(:cooked_html) do
+ <<~HTML
HTML
end
+
+ context "when the upload is attached to the correct post" do
+ before do
+ OptimizedImage.expects(:resize).returns(true)
+ FileStore::BaseStore.any_instance.expects(:get_depth_for).returns(0)
+ Discourse.store.class.any_instance.expects(:has_been_uploaded?).at_least_once.returns(true)
+ upload.update(secure: true, access_control_post: post)
+ end
+
+ it "handles secure images with the correct lightbox link href" do
+ cpp.post_process
+
+ expect(cpp.html).to match_html cooked_html
+ end
+ end
+
+ context "when the upload is attached to a different post" do
+ before do
+ FastImage.size(upload.url)
+ upload.update(secure: true, access_control_post: Fabricate(:post))
+ end
+
+ it "does not create thumbnails or optimize images" do
+ CookedPostProcessor.any_instance.expects(:optimize_image!).never
+ Upload.any_instance.expects(:create_thumbnail!).never
+ cpp.post_process
+
+ expect(cpp.html).not_to match_html cooked_html
+ end
+ end
end
end
diff --git a/spec/fabricators/upload_fabricator.rb b/spec/fabricators/upload_fabricator.rb
index d9686ff1274..01d05d9da55 100644
--- a/spec/fabricators/upload_fabricator.rb
+++ b/spec/fabricators/upload_fabricator.rb
@@ -51,3 +51,9 @@ Fabricator(:upload_s3, from: :upload) do
end
end
end
+
+Fabricator(:secure_upload_s3, from: :upload_s3) do
+ secure { true }
+ sha1 { SecureRandom.hex(20) }
+ original_sha1 { sequence(:sha1) { |n| Digest::SHA1.hexdigest(n.to_s) } }
+end
diff --git a/spec/jobs/pull_hotlinked_images_spec.rb b/spec/jobs/pull_hotlinked_images_spec.rb
index 3371a73ae4f..40406744509 100644
--- a/spec/jobs/pull_hotlinked_images_spec.rb
+++ b/spec/jobs/pull_hotlinked_images_spec.rb
@@ -125,17 +125,56 @@ describe Jobs::PullHotlinkedImages do
expect(post.uploads).to contain_exactly(upload)
end
- context "when secure media enabled" do
+ context "when secure media enabled for an upload that has already been downloaded and exists" do
it "doesnt redownload the secure upload" do
enable_secure_media
- upload = Fabricate(:upload_s3, secure: true)
+ upload = Fabricate(:secure_upload_s3, secure: true)
stub_s3(upload)
url = Upload.secure_media_url_from_upload_url(upload.url)
url = Discourse.base_url + url
post = Fabricate(:post, raw: "")
+ upload.update(access_control_post: post)
expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }
.not_to change { Upload.count }
end
+
+ context "when the upload original_sha1 is missing" do
+ it "redownloads the upload" do
+ enable_secure_media
+ upload = Fabricate(:upload_s3, secure: true)
+ stub_s3(upload)
+ Upload.stubs(:signed_url_from_secure_media_url).returns(upload.url)
+ url = Upload.secure_media_url_from_upload_url(upload.url)
+ url = Discourse.base_url + url
+ post = Fabricate(:post, raw: "")
+ upload.update(access_control_post: post)
+ FileStore::S3Store.any_instance.stubs(:store_upload).returns(upload.url)
+
+ # without this we get an infinite hang...
+ Post.any_instance.stubs(:trigger_post_process)
+ expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }
+ .to change { Upload.count }.by(1)
+ end
+ end
+
+ context "when the upload access_control_post is different to the current post" do
+ it "redownloads the upload" do
+ enable_secure_media
+ upload = Fabricate(:secure_upload_s3, secure: true)
+ stub_s3(upload)
+ Upload.stubs(:signed_url_from_secure_media_url).returns(upload.url)
+ url = Upload.secure_media_url_from_upload_url(upload.url)
+ url = Discourse.base_url + url
+ post = Fabricate(:post, raw: "")
+ upload.update(access_control_post: Fabricate(:post))
+ FileStore::S3Store.any_instance.stubs(:store_upload).returns(upload.url)
+
+ # without this we get an infinite hang...
+ Post.any_instance.stubs(:trigger_post_process)
+ expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }
+ .to change { Upload.count }.by(1)
+ end
+ end
end
it 'replaces markdown image' do
@@ -376,5 +415,7 @@ describe Jobs::PullHotlinkedImages do
:put,
"https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
)
+ stub_request(:get, "https:" + upload.url).to_return(status: 200, body: file_from_fixtures("smallest.png"))
+ # stub_request(:get, /#{SiteSetting.s3_upload_bucket}\.s3\.amazonaws\.com/)
end
end
diff --git a/spec/models/topic_converter_spec.rb b/spec/models/topic_converter_spec.rb
index 7d5606561ff..a816600034b 100644
--- a/spec/models/topic_converter_spec.rb
+++ b/spec/models/topic_converter_spec.rb
@@ -131,6 +131,11 @@ describe TopicConverter do
public_reply.link_post_uploads
public_reply.update_uploads_secure_status
+ # we need to do this otherwise we get infinite loop shenanigans, as the pull hotlinked
+ # images is never satisfied and keeps calling cooked post processor which in turn runs
+ # pull hotlinked images
+ Jobs::PullHotlinkedImages.stubs(:new).returns(Helpers::StubbedJob.new)
+
expect(public_reply.uploads[0].secure).to eq(false)
public_topic.convert_to_private_message(admin)
expect(public_topic.reload.posts.find(public_reply.id).uploads[0].secure).to eq(true)
diff --git a/spec/models/upload_spec.rb b/spec/models/upload_spec.rb
index 5bf3df79973..fed2faf1fa8 100644
--- a/spec/models/upload_spec.rb
+++ b/spec/models/upload_spec.rb
@@ -288,6 +288,53 @@ describe Upload do
end
end
+ describe ".consider_for_reuse" do
+ let(:post) { Fabricate(:post) }
+ let(:upload) { Fabricate(:upload) }
+
+ it "returns nil when the provided upload is blank" do
+ expect(Upload.consider_for_reuse(nil, post)).to eq(nil)
+ end
+
+ it "returns the upload when secure media is disabled" do
+ expect(Upload.consider_for_reuse(upload, post)).to eq(upload)
+ end
+
+ context "when secure media enabled" do
+ before do
+ enable_secure_media
+ end
+
+ context "when the upload access control post is != to the provided post" do
+ before do
+ upload.update(access_control_post_id: Fabricate(:post).id)
+ end
+
+ it "returns nil" do
+ expect(Upload.consider_for_reuse(upload, post)).to eq(nil)
+ end
+ end
+
+ context "when the upload original_sha1 is blank (pre-secure-media upload)" do
+ before do
+ upload.update(original_sha1: nil, access_control_post: post)
+ end
+
+ it "returns nil" do
+ expect(Upload.consider_for_reuse(upload, post)).to eq(nil)
+ end
+ end
+
+ context "when the upload original_sha1 is present and access control post is correct" do
+ let(:upload) { Fabricate(:secure_upload_s3, access_control_post: post) }
+
+ it "returns the upload" do
+ expect(Upload.consider_for_reuse(upload, post)).to eq(upload)
+ end
+ end
+ end
+ end
+
describe '.update_secure_status' do
it "respects the secure_override_value parameter if provided" do
upload.update!(secure: true)
@@ -344,18 +391,7 @@ describe Upload do
context "secure media enabled" do
before do
- SiteSetting.enable_s3_uploads = true
- SiteSetting.s3_upload_bucket = "s3-upload-bucket"
- SiteSetting.s3_access_key_id = "some key"
- SiteSetting.s3_secret_access_key = "some secrets3_region key"
- SiteSetting.secure_media = true
-
- stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/")
-
- stub_request(
- :put,
- "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
- )
+ enable_secure_media
end
it 'does not mark an image upload as not secure when there is no access control post id, to avoid unintentional exposure' do
@@ -399,4 +435,19 @@ describe Upload do
expect(upload2.reload.extension).to eq("png")
end
end
+
+ def enable_secure_media
+ SiteSetting.enable_s3_uploads = true
+ SiteSetting.s3_upload_bucket = "s3-upload-bucket"
+ SiteSetting.s3_access_key_id = "some key"
+ SiteSetting.s3_secret_access_key = "some secrets3_region key"
+ SiteSetting.secure_media = true
+
+ stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/")
+
+ stub_request(
+ :put,
+ "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
+ )
+ end
end
diff --git a/spec/support/helpers.rb b/spec/support/helpers.rb
index efffb42785f..e2e278bb91e 100644
--- a/spec/support/helpers.rb
+++ b/spec/support/helpers.rb
@@ -147,4 +147,9 @@ module Helpers
ActionController::Base.config.relative_url_root = old_root
end
end
+
+ class StubbedJob
+ def initialize; end
+ def perform(args); end
+ end
end