DEV: Upload and secure media retroactive rake task improvements (#9027)

* Add uploads:sync_s3_acls rake task to ensure the ACLs in S3 are the correct (public-read or private) setting based on upload security

* Improved uploads:disable_secure_media to be more efficient and provide better messages to the user.

* Rename uploads:ensure_correct_acl task to uploads:secure_upload_analyse_and_update as it does more than check the ACL

* Many improvements to uploads:secure_upload_analyse_and_update

* Make sure that upload.access_control_post is unscoped so deleted posts are still fetched, because they still affect the security of the upload.

* Add escape hatch for capture_stdout in the form of RAILS_ENABLE_TEST_STDOUT. If provided the capture_stdout code will be ignored, so you can see the output if you need.
This commit is contained in:
Martin Brennan
2020-03-03 10:03:58 +11:00
committed by GitHub
parent 8a696a4ffc
commit 0388653a4d
7 changed files with 271 additions and 62 deletions

View File

@@ -14,6 +14,12 @@ class Upload < ActiveRecord::Base
belongs_to :user belongs_to :user
belongs_to :access_control_post, class_name: 'Post' belongs_to :access_control_post, class_name: 'Post'
# when we access this post we don't care if the post
# is deleted
def access_control_post
Post.unscoped { super }
end
has_many :post_uploads, dependent: :destroy has_many :post_uploads, dependent: :destroy
has_many :posts, through: :post_uploads has_many :posts, through: :post_uploads

View File

@@ -615,6 +615,21 @@ task "uploads:recover" => :environment do
end end
end end
task "uploads:sync_s3_acls" => :environment do
RailsMultisite::ConnectionManagement.each_connection do |db|
unless Discourse.store.external?
puts "This task only works for external storage."
exit 1
end
puts "CAUTION: This task may take a long time to complete!"
puts "-" * 30
puts "Uploads marked as secure will get a private ACL, and uploads marked as not secure will get a public ACL."
adjust_acls(Upload.find_each(batch_size: 100))
puts "", "Upload ACL sync complete!"
end
end
task "uploads:disable_secure_media" => :environment do task "uploads:disable_secure_media" => :environment do
RailsMultisite::ConnectionManagement.each_connection do |db| RailsMultisite::ConnectionManagement.each_connection do |db|
unless Discourse.store.external? unless Discourse.store.external?
@@ -628,43 +643,56 @@ task "uploads:disable_secure_media" => :environment do
secure_uploads = Upload.includes(:posts).where(secure: true) secure_uploads = Upload.includes(:posts).where(secure: true)
secure_upload_count = secure_uploads.count secure_upload_count = secure_uploads.count
uploads_to_adjust_acl_for = []
posts_to_rebake = {}
i = 0 i = 0
secure_uploads.find_each(batch_size: 20).each do |upload| secure_uploads.find_each(batch_size: 20).each do |upload|
Upload.transaction do uploads_to_adjust_acl_for << upload
upload.secure = false
RakeHelpers.print_status_with_label("Updating ACL for upload #{upload.id}.......", i, secure_upload_count) upload.posts.each do |post|
Discourse.store.update_upload_ACL(upload) # don't want unnecessary double-ups
next if posts_to_rebake.key?(post.id)
RakeHelpers.print_status_with_label("Rebaking posts for upload #{upload.id}.......", i, secure_upload_count) posts_to_rebake[post.id] = post
upload.posts.each(&:rebake!)
upload.save
i += 1
end end
i += 1
end end
puts "", "Marking #{secure_upload_count} uploads as not secure.", ""
secure_uploads.update_all(secure: false)
adjust_acls(uploads_to_adjust_acl_for)
post_rebake_errors = rebake_upload_posts(posts_to_rebake)
log_rebake_errors(post_rebake_errors)
RakeHelpers.print_status_with_label("Rebaking and updating complete! ", i, secure_upload_count) RakeHelpers.print_status_with_label("Rebaking and updating complete! ", i, secure_upload_count)
puts ""
end end
puts "Secure media is now disabled!", "" puts "", "Secure media is now disabled!", ""
end
# Renamed to uploads:secure_upload_analyse_and_update
task "uploads:ensure_correct_acl" => :environment do
puts "This task has been deprecated, run uploads:secure_upload_analyse_and_update task instead."
exit 1
end end
## ##
# Run this task whenever the secure_media or login_required # Run this task whenever the secure_media or login_required
# settings are changed for a Discourse instance to update # settings are changed for a Discourse instance to update
# the upload secure flag and S3 upload ACLs. # the upload secure flag and S3 upload ACLs. Any uploads that
task "uploads:ensure_correct_acl" => :environment do # have their secure status changed will have all associated posts
# rebaked.
task "uploads:secure_upload_analyse_and_update" => :environment do
RailsMultisite::ConnectionManagement.each_connection do |db| RailsMultisite::ConnectionManagement.each_connection do |db|
unless Discourse.store.external? unless Discourse.store.external?
puts "This task only works for external storage." puts "This task only works for external storage."
exit 1 exit 1
end end
puts "Ensuring correct ACL for uploads in #{db}...", "" puts "Analyzing security for uploads in #{db}...", ""
upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure, posts_to_rebake, uploads_to_adjust_acl_for = nil
Upload.transaction do Upload.transaction do
mark_secure_in_loop_because_no_login_required = false mark_secure_in_loop_because_no_login_required = false
@@ -677,19 +705,23 @@ task "uploads:ensure_correct_acl" => :environment do
update_uploads_access_control_post update_uploads_access_control_post
end end
# First of all only get relevant uploads (supported media). # Get all uploads in the database, including optimized images. Both media (images, videos,
# # etc) along with attachments (pdfs, txt, etc.) must be loaded because all can be marked as
# Also only get uploads that are not for a theme or a site setting, so only # secure based on site settings.
# get post related uploads. uploads_to_update = Upload.includes(:posts, :optimized_images).joins(:post_uploads)
uploads_with_supported_media = Upload.includes(:posts, :access_control_post, :optimized_images).where(
"LOWER(original_filename) SIMILAR TO '%\.(jpg|jpeg|png|gif|svg|ico|mp3|ogg|wav|m4a|mov|mp4|webm|ogv)'"
).joins(:post_uploads)
puts "There are #{uploads_with_supported_media.count} upload(s) with supported media that could be marked secure.", "" # we do this to avoid a heavier post query, and to make sure we only
# get unique posts AND include deleted posts (unscoped)
unique_access_control_posts = Post.unscoped.select(:id, :topic_id).includes(topic: :category).where(id: uploads_to_update.pluck(:access_control_post_id).uniq)
uploads_to_update.each do |upload|
upload.access_control_post = unique_access_control_posts.find { |post| post.id == upload.access_control_post_id }
end
puts "There are #{uploads_to_update.count} upload(s) that could be marked secure.", ""
# Simply mark all these uploads as secure if login_required because no anons will be able to access them # Simply mark all these uploads as secure if login_required because no anons will be able to access them
if SiteSetting.login_required? if SiteSetting.login_required?
mark_all_as_secure_login_required(uploads_with_supported_media) mark_secure_in_loop_because_no_login_required = false
else else
# If NOT login_required, then we have to go for the other slower flow, where in the loop # If NOT login_required, then we have to go for the other slower flow, where in the loop
@@ -698,24 +730,50 @@ task "uploads:ensure_correct_acl" => :environment do
puts "Marking posts as secure in the next step because login_required is false." puts "Marking posts as secure in the next step because login_required is false."
end end
puts "", "Determining which of #{uploads_with_supported_media.count} upload posts need to be marked secure and be rebaked.", "" puts "", "Analysing which of #{uploads_to_update.count} uploads need to be marked secure and be rebaked.", ""
upload_ids_to_mark_as_secure, posts_to_rebake = determine_upload_security_and_posts_to_rebake( upload_ids_to_mark_as_secure,
uploads_with_supported_media, mark_secure_in_loop_because_no_login_required upload_ids_to_mark_as_not_secure,
posts_to_rebake,
uploads_to_adjust_acl_for = determine_upload_security_and_posts_to_rebake(
uploads_to_update, mark_secure_in_loop_because_no_login_required
) )
mark_specific_uploads_as_secure_no_login_required(upload_ids_to_mark_as_secure) if !SiteSetting.login_required?
update_specific_upload_security_no_login_required(upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure)
post_rebake_errors = rebake_upload_posts(posts_to_rebake) else
log_rebake_errors(post_rebake_errors) mark_all_as_secure_login_required(uploads_to_update)
end
end end
# Enqueue rebakes AFTER upload transaction complete, so there is no race condition
# between updating the DB and the rebakes occurring.
post_rebake_errors = rebake_upload_posts(posts_to_rebake)
log_rebake_errors(post_rebake_errors)
# Also do this AFTER upload transaction complete so we don't end up with any
# errors leaving ACLs in a bad state (the ACL sync task can be run to fix any
# outliers at any time).
adjust_acls(uploads_to_adjust_acl_for)
end end
puts "", "Done" puts "", "", "Done!"
end end
def mark_all_as_secure_login_required(uploads_with_supported_media) def adjust_acls(uploads_to_adjust_acl_for)
puts "Marking #{uploads_with_supported_media.count} upload(s) as secure because login_required is true.", "" total_count = uploads_to_adjust_acl_for.respond_to?(:length) ? uploads_to_adjust_acl_for.length : uploads_to_adjust_acl_for.count
uploads_with_supported_media.update_all(secure: true) puts "", "Updating ACL for #{total_count} uploads.", ""
i = 0
uploads_to_adjust_acl_for.each do |upload|
RakeHelpers.print_status_with_label("Updating ACL for upload.......", i, total_count)
Discourse.store.update_upload_ACL(upload)
i += 1
end
RakeHelpers.print_status_with_label("Updaing ACLs complete! ", i, total_count)
end
def mark_all_as_secure_login_required(uploads_to_update)
puts "Marking #{uploads_to_update.count} upload(s) as secure because login_required is true.", ""
uploads_to_update.update_all(secure: true)
puts "Finished marking upload(s) as secure." puts "Finished marking upload(s) as secure."
end end
@@ -727,11 +785,16 @@ def log_rebake_errors(rebake_errors)
end end
end end
def mark_specific_uploads_as_secure_no_login_required(upload_ids_to_mark_as_secure) def update_specific_upload_security_no_login_required(upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure)
return if upload_ids_to_mark_as_secure.empty? if upload_ids_to_mark_as_secure.any?
puts "Marking #{upload_ids_to_mark_as_secure.length} uploads as secure because UploadSecurity determined them to be secure." puts "Marking #{upload_ids_to_mark_as_secure.length} uploads as secure because UploadSecurity determined them to be secure."
Upload.where(id: upload_ids_to_mark_as_secure).update_all(secure: true) Upload.where(id: upload_ids_to_mark_as_secure).update_all(secure: true)
puts "Finished marking uploads as secure." end
if upload_ids_to_mark_as_not_secure.any?
puts "Marking #{upload_ids_to_mark_as_not_secure.length} uploads as not secure because UploadSecurity determined them to be not secure."
Upload.where(id: upload_ids_to_mark_as_not_secure).update_all(secure: false)
end
puts "Finished updating upload security."
end end
def update_uploads_access_control_post def update_uploads_access_control_post
@@ -752,12 +815,13 @@ def update_uploads_access_control_post
end end
def rebake_upload_posts(posts_to_rebake) def rebake_upload_posts(posts_to_rebake)
posts_to_rebake = posts_to_rebake.values
post_rebake_errors = [] post_rebake_errors = []
puts "", "Rebaking #{posts_to_rebake.length} posts with affected uploads.", "" puts "", "Rebaking #{posts_to_rebake.length} posts with affected uploads.", ""
begin begin
i = 0 i = 0
posts_to_rebake.each do |post| posts_to_rebake.each do |post|
RakeHelpers.print_status_with_label("Determining which uploads to mark secure and rebake.....", i, posts_to_rebake.length) RakeHelpers.print_status_with_label("Rebaking posts.....", i, posts_to_rebake.length)
post.rebake! post.rebake!
i += 1 i += 1
end end
@@ -770,32 +834,55 @@ def rebake_upload_posts(posts_to_rebake)
post_rebake_errors post_rebake_errors
end end
def determine_upload_security_and_posts_to_rebake(uploads_with_supported_media, mark_secure_in_loop_because_no_login_required) def determine_upload_security_and_posts_to_rebake(uploads_to_update, mark_secure_in_loop_because_no_login_required)
upload_ids_to_mark_as_secure = [] upload_ids_to_mark_as_secure = []
posts_to_rebake = [] upload_ids_to_mark_as_not_secure = []
uploads_to_adjust_acl_for = []
posts_to_rebake = {}
i = 0 i = 0
uploads_with_supported_media.find_each(batch_size: 50) do |upload_with_supported_media| uploads_to_update.find_each(batch_size: 50) do |upload_to_update|
RakeHelpers.print_status_with_label("Updating ACL for upload.......", i, uploads_with_supported_media.count)
# we just need to determine the post security here so the ACL is set to the correct thing, # we just need to determine the post security here so the ACL is set to the correct thing,
# because the update_upload_ACL method uses upload.secure? # because the update_upload_ACL method uses upload.secure?
upload_with_supported_media.secure = UploadSecurity.new(upload_with_supported_media).should_be_secure? original_update_secure_status = upload_to_update.secure
Discourse.store.update_upload_ACL(upload_with_supported_media) upload_to_update.secure = UploadSecurity.new(upload_to_update).should_be_secure?
RakeHelpers.print_status_with_label("Determining which uploads to mark secure and rebake.....", i, uploads_with_supported_media.count) # no point changing ACLs or rebaking or doing any such shenanigans
upload_with_supported_media.posts.each { |post| posts_to_rebake << post } # when the secure status hasn't even changed!
if original_update_secure_status == upload_to_update.secure
i += 1
next
end
if mark_secure_in_loop_because_no_login_required && upload_with_supported_media.secure? # we only want to update the acl later once the secure status
upload_ids_to_mark_as_secure << upload_with_supported_media.id # has been saved in the DB; otherwise if there is a later failure
# we get stuck with an incorrect ACL in S3
uploads_to_adjust_acl_for << upload_to_update
RakeHelpers.print_status_with_label("Analysing which upload posts to rebake.....", i, uploads_to_update.count)
upload_to_update.posts.each do |post|
# don't want unnecessary double-ups
next if posts_to_rebake.key?(post.id)
posts_to_rebake[post.id] = post
end
# some uploads will be marked as not secure here.
# we need to address this with upload_ids_to_mark_as_not_secure
# e.g. turning off SiteSetting.login_required
if mark_secure_in_loop_because_no_login_required
if upload_to_update.secure?
upload_ids_to_mark_as_secure << upload_to_update.id
else
upload_ids_to_mark_as_not_secure << upload_to_update.id
end
end end
i += 1 i += 1
end end
RakeHelpers.print_status_with_label("Determination complete! ", i, uploads_with_supported_media.count) RakeHelpers.print_status_with_label("Analysis complete! ", i, uploads_to_update.count)
puts "" puts ""
[upload_ids_to_mark_as_secure, posts_to_rebake] [upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure, posts_to_rebake, uploads_to_adjust_acl_for]
end end
def inline_uploads(post) def inline_uploads(post)

View File

@@ -57,9 +57,6 @@ class UploadSecurity
# if there is no access control post id and the upload is currently secure, we # if there is no access control post id and the upload is currently secure, we
# do not want to make it un-secure to avoid unintentionally exposing it # do not want to make it un-secure to avoid unintentionally exposing it
def access_control_post_has_secure_media? def access_control_post_has_secure_media?
# if the post is deleted the access_control_post will be blank...
# TODO: deal with this in a better way
return false if @upload.access_control_post.blank?
@upload.access_control_post.with_secure_media? @upload.access_control_post.with_secure_media?
end end

View File

@@ -24,9 +24,9 @@ describe Jobs::UpdatePrivateUploadsAcl do
before do before do
SiteSetting.login_required = true SiteSetting.login_required = true
SiteSetting.prevent_anons_from_downloading_files = true SiteSetting.prevent_anons_from_downloading_files = true
SiteSetting::Upload.stubs(:enable_s3_uploads).returns(true)
Discourse.stubs(:store).returns(stub(external?: false)) Discourse.stubs(:store).returns(stub(external?: false))
SiteSetting.stubs(:secure_media?).returns(true) enable_s3_uploads([upload])
SiteSetting.secure_media = true
end end
it "changes the upload to secure" do it "changes the upload to secure" do
@@ -35,4 +35,20 @@ describe Jobs::UpdatePrivateUploadsAcl do
end end
end end
end end
def enable_s3_uploads(uploads)
SiteSetting.enable_s3_uploads = true
SiteSetting.s3_upload_bucket = "s3-upload-bucket"
SiteSetting.s3_access_key_id = "some key"
SiteSetting.s3_secret_access_key = "some secrets3_region key"
stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/")
uploads.each do |upload|
stub_request(
:put,
"https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
)
end
end
end end

View File

@@ -97,11 +97,20 @@ RSpec.describe UploadSecurity do
context "when the access control post has_secure_media?" do context "when the access control post has_secure_media?" do
before do before do
upload.update(access_control_post: post_in_secure_context) upload.update(access_control_post_id: post_in_secure_context.id)
end end
it "returns true" do it "returns true" do
expect(subject.should_be_secure?).to eq(true) expect(subject.should_be_secure?).to eq(true)
end end
context "when the post is deleted" do
before do
post_in_secure_context.trash!
end
it "still determines whether the post has secure media; returns true" do
expect(subject.should_be_secure?).to eq(true)
end
end
end end
context "when uploading in the composer" do context "when uploading in the composer" do

View File

@@ -130,6 +130,10 @@ module Helpers
def capture_stdout def capture_stdout
old_stdout = $stdout old_stdout = $stdout
if ENV['RAILS_ENABLE_TEST_STDOUT']
yield
return
end
io = StringIO.new io = StringIO.new
$stdout = io $stdout = io
yield yield

View File

@@ -6,9 +6,10 @@ RSpec.describe "tasks/uploads" do
before do before do
Rake::Task.clear Rake::Task.clear
Discourse::Application.load_tasks Discourse::Application.load_tasks
SiteSetting.authorized_extensions += "|pdf"
end end
describe "uploads:ensure_correct_acl" do describe "uploads:secure_upload_analyse_and_update" do
let!(:uploads) do let!(:uploads) do
[ [
multi_post_upload1, multi_post_upload1,
@@ -19,6 +20,7 @@ RSpec.describe "tasks/uploads" do
let(:multi_post_upload1) { Fabricate(:upload_s3) } let(:multi_post_upload1) { Fabricate(:upload_s3) }
let(:upload1) { Fabricate(:upload_s3) } let(:upload1) { Fabricate(:upload_s3) }
let(:upload2) { Fabricate(:upload_s3) } let(:upload2) { Fabricate(:upload_s3) }
let(:upload3) { Fabricate(:upload_s3, original_filename: 'test.pdf') }
let!(:post1) { Fabricate(:post) } let!(:post1) { Fabricate(:post) }
let!(:post2) { Fabricate(:post) } let!(:post2) { Fabricate(:post) }
@@ -29,11 +31,12 @@ RSpec.describe "tasks/uploads" do
PostUpload.create(post: post2, upload: multi_post_upload1) PostUpload.create(post: post2, upload: multi_post_upload1)
PostUpload.create(post: post2, upload: upload1) PostUpload.create(post: post2, upload: upload1)
PostUpload.create(post: post3, upload: upload2) PostUpload.create(post: post3, upload: upload2)
PostUpload.create(post: post3, upload: upload3)
end end
def invoke_task def invoke_task
capture_stdout do capture_stdout do
Rake::Task['uploads:ensure_correct_acl'].invoke Rake::Task['uploads:secure_upload_analyse_and_update'].invoke
end end
end end
@@ -59,6 +62,7 @@ RSpec.describe "tasks/uploads" do
expect(multi_post_upload1.reload.access_control_post).to eq(post1) expect(multi_post_upload1.reload.access_control_post).to eq(post1)
expect(upload1.reload.access_control_post).to eq(post2) expect(upload1.reload.access_control_post).to eq(post2)
expect(upload2.reload.access_control_post).to eq(post3) expect(upload2.reload.access_control_post).to eq(post3)
expect(upload3.reload.access_control_post).to eq(post3)
end end
it "sets the upload in the read restricted topic category to secure" do it "sets the upload in the read restricted topic category to secure" do
@@ -66,6 +70,7 @@ RSpec.describe "tasks/uploads" do
invoke_task invoke_task
expect(upload2.reload.secure).to eq(true) expect(upload2.reload.secure).to eq(true)
expect(upload1.reload.secure).to eq(false) expect(upload1.reload.secure).to eq(false)
expect(upload3.reload.secure).to eq(false)
end end
it "sets the upload in the PM topic to secure" do it "sets the upload in the PM topic to secure" do
@@ -86,10 +91,95 @@ RSpec.describe "tasks/uploads" do
expect(post2.reload.baked_at).not_to eq(post2_baked) expect(post2.reload.baked_at).not_to eq(post2_baked)
expect(post3.reload.baked_at).not_to eq(post3_baked) expect(post3.reload.baked_at).not_to eq(post3_baked)
end end
context "for an upload that is already secure and does not need to change" do
before do
post3.topic.update(archetype: 'private_message', category: nil)
upload2.update(access_control_post: post3)
upload2.update_secure_status
end
it "does not rebake the associated post" do
post3_baked = post3.baked_at.to_s
invoke_task
expect(post3.reload.baked_at.to_s).to eq(post3_baked)
end
it "does not attempt to update the acl" do
Discourse.store.expects(:update_upload_ACL).with(upload2).never
invoke_task
end
end
context "for an upload that is already secure and is changing to not secure" do
it "changes the upload to not secure and updates the ACL" do
upload_to_mark_not_secure = Fabricate(:upload_s3, secure: true)
post_for_upload = Fabricate(:post)
PostUpload.create(post: post_for_upload, upload: upload_to_mark_not_secure)
enable_s3_uploads(uploads.concat([upload_to_mark_not_secure]))
invoke_task
expect(upload_to_mark_not_secure.reload.secure).to eq(false)
end
end
end end
end end
end end
describe "uploads:disable_secure_media" do
def invoke_task
capture_stdout do
Rake::Task['uploads:disable_secure_media'].invoke
end
end
before do
enable_s3_uploads(uploads)
SiteSetting.secure_media = true
PostUpload.create(post: post1, upload: upload1)
PostUpload.create(post: post1, upload: upload2)
PostUpload.create(post: post2, upload: upload3)
PostUpload.create(post: post2, upload: upload4)
end
let!(:uploads) do
[
upload1, upload2, upload3, upload4, upload5
]
end
let(:post1) { Fabricate(:post) }
let(:post2) { Fabricate(:post) }
let(:upload1) { Fabricate(:upload_s3, secure: true, access_control_post: post1) }
let(:upload2) { Fabricate(:upload_s3, secure: true, access_control_post: post1) }
let(:upload3) { Fabricate(:upload_s3, secure: true, access_control_post: post2) }
let(:upload4) { Fabricate(:upload_s3, secure: true, access_control_post: post2) }
let(:upload5) { Fabricate(:upload_s3, secure: false) }
it "disables the secure media setting" do
invoke_task
expect(SiteSetting.secure_media).to eq(false)
end
it "updates all secure uploads to secure: false" do
invoke_task
[upload1, upload2, upload3, upload4].each do |upl|
expect(upl.reload.secure).to eq(false)
end
end
it "rebakes the associated posts" do
baked_post1 = post1.baked_at
baked_post2 = post2.baked_at
invoke_task
expect(post1.reload.baked_at).not_to eq(baked_post1)
expect(post2.reload.baked_at).not_to eq(baked_post2)
end
it "updates the affected ACLs" do
FileStore::S3Store.any_instance.expects(:update_upload_ACL).times(4)
invoke_task
end
end
def enable_s3_uploads(uploads) def enable_s3_uploads(uploads)
SiteSetting.enable_s3_uploads = true SiteSetting.enable_s3_uploads = true
SiteSetting.s3_upload_bucket = "s3-upload-bucket" SiteSetting.s3_upload_bucket = "s3-upload-bucket"