FIX: Mitigate issue where legacy pre-secure hotlinked media would not be redownloaded (#8802)

Basically, say you had already downloaded a certain image from a certain URL
using pull_hotlinked_images and the onebox. The upload would be stored
by its sha as an upload record. Whenever you linked to the same URL again
in a post (e.g. in our case an og:image on review.discourse) we would
would reuse the original upload record because of the sha1.

However when you turned on secure media this could cause problems as
the first post that uses that upload after secure media is enabled
will set the access control post for the upload to the new post.
Then if the post is deleted every single onebox/link to that same image
URL will fail forever with 403 as the secure-media-uploads URL fails
if the access control post has been deleted.

To fix this when cooking posts and pulling hotlinked images, we only
allow using an original upload by URL if its access control post
matches the current post, and if the original_sha1 is filled in,
meaning it was uploaded AFTER secure media was enabled. otherwise
we just redownload the media again to be safe, as the URL will always
be new then.
This commit is contained in:
Martin Brennan 2020-01-29 10:11:38 +10:00 committed by GitHub
parent 20e7fb1c95
commit ab3bda6cd0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 183 additions and 28 deletions

View File

@ -56,7 +56,7 @@ module Jobs
src = original_src = node['src'] || node['href'] src = original_src = node['src'] || node['href']
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//") src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
if should_download_image?(src) if should_download_image?(src, post)
begin begin
# have we already downloaded that file? # have we already downloaded that file?
schemeless_src = remove_scheme(original_src) schemeless_src = remove_scheme(original_src)
@ -89,6 +89,7 @@ module Jobs
has_new_broken_image = true has_new_broken_image = true
end end
end end
# have we successfully downloaded that file? # have we successfully downloaded that file?
if downloaded_urls[src].present? if downloaded_urls[src].present?
escaped_src = Regexp.escape(original_src) escaped_src = Regexp.escape(original_src)
@ -163,7 +164,7 @@ module Jobs
doc.css(".lightbox img[src]") doc.css(".lightbox img[src]")
end end
def should_download_image?(src) def should_download_image?(src, post = nil)
# make sure we actually have a url # make sure we actually have a url
return false unless src.present? return false unless src.present?
@ -174,7 +175,15 @@ module Jobs
# Someone could hotlink a file from a different site on the same CDN, # Someone could hotlink a file from a different site on the same CDN,
# so check whether we have it in this database # so check whether we have it in this database
return !Upload.get_from_url(src) #
# if the upload already exists and is attached to a different post,
# or the original_sha1 is missing meaning it was created before secure
# media was enabled, then we definitely want to redownload again otherwise
# we end up reusing existing uploads which may be linked to many posts
# already.
upload = Upload.consider_for_reuse(Upload.get_from_url(src), post)
return !upload.present?
end end
# Don't download non-local images unless site setting enabled # Don't download non-local images unless site setting enabled

View File

@ -121,6 +121,12 @@ class Upload < ActiveRecord::Base
self.class.short_path(sha1: self.sha1, extension: self.extension) self.class.short_path(sha1: self.sha1, extension: self.extension)
end end
def self.consider_for_reuse(upload, post)
return upload if !SiteSetting.secure_media? || upload.blank? || post.blank?
return nil if upload.access_control_post_id != post.id || upload.original_sha1.blank?
upload
end
def self.secure_media_url?(url) def self.secure_media_url?(url)
url.include?(SECURE_MEDIA_ROUTE) url.include?(SECURE_MEDIA_ROUTE)
end end

View File

@ -330,7 +330,13 @@ class CookedPostProcessor
img["height"] = height img["height"] = height
end end
if upload = Upload.get_from_url(src) # if the upload already exists and is attached to a different post,
# or the original_sha1 is missing meaning it was created before secure
# media was enabled. we want to re-thumbnail and re-optimize in this case
# to avoid using uploads linked to many other posts
upload = Upload.consider_for_reuse(Upload.get_from_url(src), @post)
if upload.present?
upload.create_thumbnail!(width, height, crop: crop) upload.create_thumbnail!(width, height, crop: crop)
each_responsive_ratio do |ratio| each_responsive_ratio do |ratio|
@ -351,7 +357,9 @@ class CookedPostProcessor
add_lightbox!(img, original_width, original_height, upload, cropped: crop) add_lightbox!(img, original_width, original_height, upload, cropped: crop)
end end
optimize_image!(img, upload, cropped: crop) if upload if upload.present?
optimize_image!(img, upload, cropped: crop)
end
end end
def loading_image(upload) def loading_image(upload)

View File

@ -504,6 +504,7 @@ describe CookedPostProcessor do
end end
context "s3_uploads" do context "s3_uploads" do
let(:upload) { Fabricate(:secure_upload_s3) }
before do before do
s3_setup s3_setup
stored_path = Discourse.store.get_path_for_upload(upload) stored_path = Discourse.store.get_path_for_upload(upload)
@ -516,13 +517,8 @@ describe CookedPostProcessor do
) )
stub_request(:get, /#{SiteSetting.s3_upload_bucket}\.s3\.amazonaws\.com/) stub_request(:get, /#{SiteSetting.s3_upload_bucket}\.s3\.amazonaws\.com/)
OptimizedImage.expects(:resize).returns(true)
FileStore::BaseStore.any_instance.expects(:get_depth_for).returns(0)
Discourse.store.class.any_instance.expects(:has_been_uploaded?).at_least_once.returns(true)
SiteSetting.login_required = true SiteSetting.login_required = true
SiteSetting.secure_media = true SiteSetting.secure_media = true
upload.update_column(:secure, true)
end end
let(:optimized_size) { "600x500" } let(:optimized_size) { "600x500" }
@ -531,15 +527,43 @@ describe CookedPostProcessor do
Fabricate(:post, raw: "![large.png|#{optimized_size}](#{upload.short_url})") Fabricate(:post, raw: "![large.png|#{optimized_size}](#{upload.short_url})")
end end
it "handles secure images with the correct lightbox link href" do let(:cooked_html) do
cpp.post_process <<~HTML
expect(cpp.html).to match_html <<~HTML
<p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost/secure-media-uploads/original/1X/#{upload.sha1}.png" data-download-href="//test.localhost/uploads/short-url/#{upload.base62_sha1}.unknown?dl=1" title="large.png"><img src="" alt="large.png" data-base62-sha1="#{upload.base62_sha1}" width="600" height="500"><div class="meta"> <p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost/secure-media-uploads/original/1X/#{upload.sha1}.png" data-download-href="//test.localhost/uploads/short-url/#{upload.base62_sha1}.unknown?dl=1" title="large.png"><img src="" alt="large.png" data-base62-sha1="#{upload.base62_sha1}" width="600" height="500"><div class="meta">
<svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">large.png</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg> <svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">large.png</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg>
</div></a></div></p> </div></a></div></p>
HTML HTML
end end
context "when the upload is attached to the correct post" do
before do
OptimizedImage.expects(:resize).returns(true)
FileStore::BaseStore.any_instance.expects(:get_depth_for).returns(0)
Discourse.store.class.any_instance.expects(:has_been_uploaded?).at_least_once.returns(true)
upload.update(secure: true, access_control_post: post)
end
it "handles secure images with the correct lightbox link href" do
cpp.post_process
expect(cpp.html).to match_html cooked_html
end
end
context "when the upload is attached to a different post" do
before do
FastImage.size(upload.url)
upload.update(secure: true, access_control_post: Fabricate(:post))
end
it "does not create thumbnails or optimize images" do
CookedPostProcessor.any_instance.expects(:optimize_image!).never
Upload.any_instance.expects(:create_thumbnail!).never
cpp.post_process
expect(cpp.html).not_to match_html cooked_html
end
end
end end
end end

View File

@ -51,3 +51,9 @@ Fabricator(:upload_s3, from: :upload) do
end end
end end
end end
Fabricator(:secure_upload_s3, from: :upload_s3) do
secure { true }
sha1 { SecureRandom.hex(20) }
original_sha1 { sequence(:sha1) { |n| Digest::SHA1.hexdigest(n.to_s) } }
end

View File

@ -125,17 +125,56 @@ describe Jobs::PullHotlinkedImages do
expect(post.uploads).to contain_exactly(upload) expect(post.uploads).to contain_exactly(upload)
end end
context "when secure media enabled" do context "when secure media enabled for an upload that has already been downloaded and exists" do
it "doesnt redownload the secure upload" do it "doesnt redownload the secure upload" do
enable_secure_media enable_secure_media
upload = Fabricate(:upload_s3, secure: true) upload = Fabricate(:secure_upload_s3, secure: true)
stub_s3(upload) stub_s3(upload)
url = Upload.secure_media_url_from_upload_url(upload.url) url = Upload.secure_media_url_from_upload_url(upload.url)
url = Discourse.base_url + url url = Discourse.base_url + url
post = Fabricate(:post, raw: "<img src='#{url}'>") post = Fabricate(:post, raw: "<img src='#{url}'>")
upload.update(access_control_post: post)
expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) } expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }
.not_to change { Upload.count } .not_to change { Upload.count }
end end
context "when the upload original_sha1 is missing" do
it "redownloads the upload" do
enable_secure_media
upload = Fabricate(:upload_s3, secure: true)
stub_s3(upload)
Upload.stubs(:signed_url_from_secure_media_url).returns(upload.url)
url = Upload.secure_media_url_from_upload_url(upload.url)
url = Discourse.base_url + url
post = Fabricate(:post, raw: "<img src='#{url}'>")
upload.update(access_control_post: post)
FileStore::S3Store.any_instance.stubs(:store_upload).returns(upload.url)
# without this we get an infinite hang...
Post.any_instance.stubs(:trigger_post_process)
expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }
.to change { Upload.count }.by(1)
end
end
context "when the upload access_control_post is different to the current post" do
it "redownloads the upload" do
enable_secure_media
upload = Fabricate(:secure_upload_s3, secure: true)
stub_s3(upload)
Upload.stubs(:signed_url_from_secure_media_url).returns(upload.url)
url = Upload.secure_media_url_from_upload_url(upload.url)
url = Discourse.base_url + url
post = Fabricate(:post, raw: "<img src='#{url}'>")
upload.update(access_control_post: Fabricate(:post))
FileStore::S3Store.any_instance.stubs(:store_upload).returns(upload.url)
# without this we get an infinite hang...
Post.any_instance.stubs(:trigger_post_process)
expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }
.to change { Upload.count }.by(1)
end
end
end end
it 'replaces markdown image' do it 'replaces markdown image' do
@ -376,5 +415,7 @@ describe Jobs::PullHotlinkedImages do
:put, :put,
"https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl" "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
) )
stub_request(:get, "https:" + upload.url).to_return(status: 200, body: file_from_fixtures("smallest.png"))
# stub_request(:get, /#{SiteSetting.s3_upload_bucket}\.s3\.amazonaws\.com/)
end end
end end

View File

@ -131,6 +131,11 @@ describe TopicConverter do
public_reply.link_post_uploads public_reply.link_post_uploads
public_reply.update_uploads_secure_status public_reply.update_uploads_secure_status
# we need to do this otherwise we get infinite loop shenanigans, as the pull hotlinked
# images is never satisfied and keeps calling cooked post processor which in turn runs
# pull hotlinked images
Jobs::PullHotlinkedImages.stubs(:new).returns(Helpers::StubbedJob.new)
expect(public_reply.uploads[0].secure).to eq(false) expect(public_reply.uploads[0].secure).to eq(false)
public_topic.convert_to_private_message(admin) public_topic.convert_to_private_message(admin)
expect(public_topic.reload.posts.find(public_reply.id).uploads[0].secure).to eq(true) expect(public_topic.reload.posts.find(public_reply.id).uploads[0].secure).to eq(true)

View File

@ -288,6 +288,53 @@ describe Upload do
end end
end end
describe ".consider_for_reuse" do
let(:post) { Fabricate(:post) }
let(:upload) { Fabricate(:upload) }
it "returns nil when the provided upload is blank" do
expect(Upload.consider_for_reuse(nil, post)).to eq(nil)
end
it "returns the upload when secure media is disabled" do
expect(Upload.consider_for_reuse(upload, post)).to eq(upload)
end
context "when secure media enabled" do
before do
enable_secure_media
end
context "when the upload access control post is != to the provided post" do
before do
upload.update(access_control_post_id: Fabricate(:post).id)
end
it "returns nil" do
expect(Upload.consider_for_reuse(upload, post)).to eq(nil)
end
end
context "when the upload original_sha1 is blank (pre-secure-media upload)" do
before do
upload.update(original_sha1: nil, access_control_post: post)
end
it "returns nil" do
expect(Upload.consider_for_reuse(upload, post)).to eq(nil)
end
end
context "when the upload original_sha1 is present and access control post is correct" do
let(:upload) { Fabricate(:secure_upload_s3, access_control_post: post) }
it "returns the upload" do
expect(Upload.consider_for_reuse(upload, post)).to eq(upload)
end
end
end
end
describe '.update_secure_status' do describe '.update_secure_status' do
it "respects the secure_override_value parameter if provided" do it "respects the secure_override_value parameter if provided" do
upload.update!(secure: true) upload.update!(secure: true)
@ -344,18 +391,7 @@ describe Upload do
context "secure media enabled" do context "secure media enabled" do
before do before do
SiteSetting.enable_s3_uploads = true enable_secure_media
SiteSetting.s3_upload_bucket = "s3-upload-bucket"
SiteSetting.s3_access_key_id = "some key"
SiteSetting.s3_secret_access_key = "some secrets3_region key"
SiteSetting.secure_media = true
stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/")
stub_request(
:put,
"https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
)
end end
it 'does not mark an image upload as not secure when there is no access control post id, to avoid unintentional exposure' do it 'does not mark an image upload as not secure when there is no access control post id, to avoid unintentional exposure' do
@ -399,4 +435,19 @@ describe Upload do
expect(upload2.reload.extension).to eq("png") expect(upload2.reload.extension).to eq("png")
end end
end end
def enable_secure_media
SiteSetting.enable_s3_uploads = true
SiteSetting.s3_upload_bucket = "s3-upload-bucket"
SiteSetting.s3_access_key_id = "some key"
SiteSetting.s3_secret_access_key = "some secrets3_region key"
SiteSetting.secure_media = true
stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/")
stub_request(
:put,
"https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
)
end
end end

View File

@ -147,4 +147,9 @@ module Helpers
ActionController::Base.config.relative_url_root = old_root ActionController::Base.config.relative_url_root = old_root
end end
end end
class StubbedJob
def initialize; end
def perform(args); end
end
end end