discourse/spec/requests/uploads_controller_spec.rb

1449 lines
52 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
require 'rails_helper'
2013-04-02 18:17:17 -05:00
describe UploadsController do
fab!(:user) { Fabricate(:user) }
describe '#create' do
it 'requires you to be logged in' do
post "/uploads.json"
expect(response.status).to eq(403)
2013-04-02 18:17:17 -05:00
end
context 'logged in' do
before do
sign_in(user)
end
2013-04-02 18:17:17 -05:00
let(:logo_file) { file_from_fixtures("logo.png") }
let(:logo_filename) { File.basename(logo_file) }
2013-04-02 18:17:17 -05:00
let(:logo) { Rack::Test::UploadedFile.new(logo_file) }
let(:fake_jpg) { Rack::Test::UploadedFile.new(file_from_fixtures("fake.jpg")) }
let(:text_file) { Rack::Test::UploadedFile.new(File.new("#{Rails.root}/LICENSE.txt")) }
2013-04-02 18:17:17 -05:00
it 'expects a type or upload_type' do
post "/uploads.json", params: { file: logo }
expect(response.status).to eq(400)
post "/uploads.json", params: { file: Rack::Test::UploadedFile.new(logo_file), type: "avatar" }
expect(response.status).to eq 200
post "/uploads.json", params: { file: Rack::Test::UploadedFile.new(logo_file), upload_type: "avatar" }
expect(response.status).to eq 200
end
it 'is successful with an image' do
post "/uploads.json", params: { file: logo, type: "avatar" }
expect(response.status).to eq 200
expect(response.parsed_body["id"]).to be_present
expect(Jobs::CreateAvatarThumbnails.jobs.size).to eq(1)
end
it 'returns "raw" url for site settings' do
set_cdn_url "https://awesome.com"
upload = UploadCreator.new(logo_file, "logo.png").create_for(-1)
logo = Rack::Test::UploadedFile.new(file_from_fixtures("logo.png"))
post "/uploads.json", params: { file: logo, type: "site_setting", for_site_setting: "true" }
expect(response.status).to eq 200
expect(response.parsed_body["url"]).to eq(upload.url)
end
it 'returns cdn url' do
set_cdn_url "https://awesome.com"
post "/uploads.json", params: { file: logo, type: "composer" }
expect(response.status).to eq 200
expect(response.parsed_body["url"]).to start_with("https://awesome.com/uploads/default/")
end
it 'is successful with an attachment' do
SiteSetting.authorized_extensions = "*"
post "/uploads.json", params: { file: text_file, type: "composer" }
expect(response.status).to eq 200
expect(Jobs::CreateAvatarThumbnails.jobs.size).to eq(0)
id = response.parsed_body["id"]
expect(id).to be
end
it 'is successful with api' do
2017-04-14 23:11:02 -05:00
SiteSetting.authorized_extensions = "*"
api_key = Fabricate(:api_key, user: user).key
url = "http://example.com/image.png"
png = File.read(Rails.root + "spec/fixtures/images/logo.png")
stub_request(:get, url).to_return(status: 200, body: png)
post "/uploads.json", params: { url: url, type: "avatar" }, headers: {
HTTP_API_KEY: api_key,
HTTP_API_USERNAME: user.username.downcase
}
json = response.parsed_body
expect(response.status).to eq(200)
expect(Jobs::CreateAvatarThumbnails.jobs.size).to eq(1)
expect(json["id"]).to be_present
expect(json["short_url"]).to eq("upload://qUm0DGR49PAZshIi7HxMd3cAlzn.png")
end
it 'correctly sets retain_hours for admins' do
sign_in(Fabricate(:admin))
post "/uploads.json", params: {
file: logo,
retain_hours: 100,
type: "profile_background",
}
id = response.parsed_body["id"]
expect(Jobs::CreateAvatarThumbnails.jobs.size).to eq(0)
expect(Upload.find(id).retain_hours).to eq(100)
end
2013-04-02 18:17:17 -05:00
it 'requires a file' do
post "/uploads.json", params: { type: "composer" }
expect(Jobs::CreateAvatarThumbnails.jobs.size).to eq(0)
message = response.parsed_body
expect(response.status).to eq 422
expect(message["errors"]).to contain_exactly(I18n.t("upload.file_missing"))
end
it 'properly returns errors' do
SiteSetting.authorized_extensions = "*"
SiteSetting.max_attachment_size_kb = 1
2013-04-02 18:17:17 -05:00
post "/uploads.json", params: { file: text_file, type: "avatar" }
expect(response.status).to eq(422)
expect(Jobs::CreateAvatarThumbnails.jobs.size).to eq(0)
errors = response.parsed_body["errors"]
expect(errors.first).to eq(I18n.t("upload.attachments.too_large_humanized", max_size: "1 KB"))
2013-04-02 18:17:17 -05:00
end
it 'ensures allow_uploaded_avatars is enabled when uploading an avatar' do
SiteSetting.allow_uploaded_avatars = 'disabled'
post "/uploads.json", params: { file: logo, type: "avatar" }
expect(response.status).to eq(422)
end
FEATURE: Rename 'Discourse SSO' to DiscourseConnect (#11978) The 'Discourse SSO' protocol is being rebranded to DiscourseConnect. This should help to reduce confusion when 'SSO' is used in the generic sense. This commit aims to: - Rename `sso_` site settings. DiscourseConnect specific ones are prefixed `discourse_connect_`. Generic settings are prefixed `auth_` - Add (server-side-only) backwards compatibility for the old setting names, with deprecation notices - Copy `site_settings` database records to the new names - Rename relevant translation keys - Update relevant translations This commit does **not** aim to: - Rename any Ruby classes or methods. This might be done in a future commit - Change any URLs. This would break existing integrations - Make any changes to the protocol. This would break existing integrations - Change any functionality. Further normalization across DiscourseConnect and other auth methods will be done separately The risks are: - There is no backwards compatibility for site settings on the client-side. Accessing auth-related site settings in Javascript is fairly rare, and an error on the client side would not be security-critical. - If a plugin is monkey-patching parts of the auth process, changes to locale keys could cause broken error messages. This should also be unlikely. The old site setting names remain functional, so security-related overrides will remain working. A follow-up commit will be made with a post-deploy migration to delete the old `site_settings` rows.
2021-02-08 04:04:33 -06:00
it 'ensures discourse_connect_overrides_avatar is not enabled when uploading an avatar' do
SiteSetting.discourse_connect_overrides_avatar = true
post "/uploads.json", params: { file: logo, type: "avatar" }
expect(response.status).to eq(422)
end
it 'always allows admins to upload avatars' do
sign_in(Fabricate(:admin))
SiteSetting.allow_uploaded_avatars = 'disabled'
post "/uploads.json", params: { file: logo, type: "avatar" }
expect(response.status).to eq(200)
end
it 'allows staff to upload any file in PM' do
SiteSetting.authorized_extensions = "jpg"
SiteSetting.allow_staff_to_upload_any_file_in_pm = true
user.update_columns(moderator: true)
post "/uploads.json", params: {
file: text_file,
type: "composer",
for_private_message: "true",
}
expect(response.status).to eq(200)
id = response.parsed_body["id"]
2018-11-14 01:03:02 -06:00
expect(Upload.last.id).to eq(id)
end
it 'allows staff to upload supported images for site settings' do
SiteSetting.authorized_extensions = ''
user.update!(admin: true)
post "/uploads.json", params: {
file: logo,
type: "site_setting",
for_site_setting: "true",
}
expect(response.status).to eq(200)
id = response.parsed_body["id"]
2018-11-14 01:03:02 -06:00
upload = Upload.last
expect(upload.id).to eq(id)
expect(upload.original_filename).to eq(logo_filename)
end
it 'respects `authorized_extensions_for_staff` setting when staff upload file' do
SiteSetting.authorized_extensions = ""
SiteSetting.authorized_extensions_for_staff = "*"
user.update_columns(moderator: true)
post "/uploads.json", params: {
file: text_file,
type: "composer",
}
expect(response.status).to eq(200)
data = response.parsed_body
expect(data["id"]).to be_present
end
it 'ignores `authorized_extensions_for_staff` setting when non-staff upload file' do
SiteSetting.authorized_extensions = ""
SiteSetting.authorized_extensions_for_staff = "*"
post "/uploads.json", params: {
file: text_file,
type: "composer",
}
data = response.parsed_body
expect(data["errors"].first).to eq(I18n.t("upload.unauthorized", authorized_extensions: ''))
end
it 'returns an error when it could not determine the dimensions of an image' do
post "/uploads.json", params: { file: fake_jpg, type: "composer" }
expect(response.status).to eq(422)
expect(Jobs::CreateAvatarThumbnails.jobs.size).to eq(0)
message = response.parsed_body["errors"]
expect(message).to contain_exactly(I18n.t("upload.images.size_not_found"))
end
2013-04-02 18:17:17 -05:00
end
end
def upload_file(file, folder = "images")
fake_logo = Rack::Test::UploadedFile.new(file_from_fixtures(file, folder))
SiteSetting.authorized_extensions = "*"
sign_in(user)
post "/uploads.json", params: {
file: fake_logo,
type: "composer",
}
expect(response.status).to eq(200)
url = response.parsed_body["url"]
upload = Upload.get_from_url(url)
upload
end
describe '#show' do
let(:site) { "default" }
let(:sha) { Digest::SHA1.hexdigest("discourse") }
context "when using external storage" do
fab!(:upload) { upload_file("small.pdf", "pdf") }
before do
setup_s3
end
it "returns 404 " do
upload = Fabricate(:upload_s3)
get "/uploads/#{site}/#{upload.sha1}.#{upload.extension}"
expect(response.response_code).to eq(404)
end
it "returns upload if url not migrated" do
get "/uploads/#{site}/#{upload.sha1}.#{upload.extension}"
expect(response.status).to eq(200)
end
end
it "returns 404 when the upload doesn't exist" do
get "/uploads/#{site}/#{sha}.pdf"
expect(response.status).to eq(404)
end
it "returns 404 when the path is nil" do
upload = upload_file("logo.png")
upload.update_column(:url, "invalid-url")
get "/uploads/#{site}/#{upload.sha1}.#{upload.extension}"
expect(response.status).to eq(404)
end
it 'uses send_file' do
upload = upload_file("logo.png")
get "/uploads/#{site}/#{upload.sha1}.#{upload.extension}"
expect(response.status).to eq(200)
expect(response.headers["Content-Disposition"])
.to eq(%Q|attachment; filename="#{upload.original_filename}"; filename*=UTF-8''#{upload.original_filename}|)
end
it 'returns 200 when js file' do
ActionDispatch::FileHandler.any_instance.stubs(:match?).returns(false)
upload = upload_file("test.js", "themes")
get upload.url
expect(response.status).to eq(200)
end
2018-08-19 21:41:46 -05:00
it "handles image without extension" do
SiteSetting.authorized_extensions = "*"
upload = upload_file("image_no_extension")
get "/uploads/#{site}/#{upload.sha1}.json"
expect(response.status).to eq(200)
expect(response.headers["Content-Disposition"])
.to eq(%Q|attachment; filename="#{upload.original_filename}"; filename*=UTF-8''#{upload.original_filename}|)
2018-08-19 21:41:46 -05:00
end
it "handles file without extension" do
SiteSetting.authorized_extensions = "*"
upload = upload_file("not_an_image")
get "/uploads/#{site}/#{upload.sha1}.json"
expect(response.status).to eq(200)
expect(response.headers["Content-Disposition"])
.to eq(%Q|attachment; filename="#{upload.original_filename}"; filename*=UTF-8''#{upload.original_filename}|)
end
context "prevent anons from downloading files" do
it "returns 404 when an anonymous user tries to download a file" do
upload = upload_file("small.pdf", "pdf")
delete "/session/#{user.username}.json"
SiteSetting.prevent_anons_from_downloading_files = true
get "/uploads/#{site}/#{upload.sha1}.#{upload.extension}"
expect(response.status).to eq(404)
end
end
end
describe "#show_short" do
it 'inlines only supported image files' do
upload = upload_file("smallest.png")
get upload.short_path, params: { inline: true }
expect(response.header['Content-Type']).to eq('image/png')
expect(response.header['Content-Disposition']).to include('inline;')
upload.update!(original_filename: "test.xml")
get upload.short_path, params: { inline: true }
expect(response.header['Content-Type']).to eq('application/xml')
expect(response.header['Content-Disposition']).to include('attachment;')
end
describe "local store" do
fab!(:image_upload) { upload_file("smallest.png") }
it "returns the right response" do
get image_upload.short_path
expect(response.status).to eq(200)
expect(response.headers["Content-Disposition"])
.to include("attachment; filename=\"#{image_upload.original_filename}\"")
end
it "returns the right response when `inline` param is given" do
get "#{image_upload.short_path}?inline=1"
expect(response.status).to eq(200)
expect(response.headers["Content-Disposition"])
.to include("inline; filename=\"#{image_upload.original_filename}\"")
end
it "returns the right response when base62 param is invalid " do
get "/uploads/short-url/12345.png"
expect(response.status).to eq(404)
end
it "returns uploads with underscore in extension correctly" do
fake_upload = upload_file("fake.not_image")
get fake_upload.short_path
expect(response.status).to eq(200)
end
it "returns uploads with a dash and uppercase in extension correctly" do
fake_upload = upload_file("fake.long-FileExtension")
get fake_upload.short_path
expect(response.status).to eq(200)
end
it "returns the right response when anon tries to download a file " \
"when prevent_anons_from_downloading_files is true" do
delete "/session/#{user.username}.json"
SiteSetting.prevent_anons_from_downloading_files = true
get image_upload.short_path
expect(response.status).to eq(404)
end
end
describe "s3 store" do
let(:upload) { Fabricate(:upload_s3) }
before do
setup_s3
end
it "should redirect to the s3 URL" do
get upload.short_path
expect(response).to redirect_to(upload.url)
end
FEATURE: Secure media allowing duplicated uploads with category-level privacy and post-based access rules (#8664) ### General Changes and Duplication * We now consider a post `with_secure_media?` if it is in a read-restricted category. * When uploading we now set an upload's secure status straight away. * When uploading if `SiteSetting.secure_media` is enabled, we do not check to see if the upload already exists using the `sha1` digest of the upload. The `sha1` column of the upload is filled with a `SecureRandom.hex(20)` value which is the same length as `Upload::SHA1_LENGTH`. The `original_sha1` column is filled with the _real_ sha1 digest of the file. * Whether an upload `should_be_secure?` is now determined by whether the `access_control_post` is `with_secure_media?` (if there is no access control post then we leave the secure status as is). * When serializing the upload, we now cook the URL if the upload is secure. This is so it shows up correctly in the composer preview, because we set secure status on upload. ### Viewing Secure Media * The secure-media-upload URL will take the post that the upload is attached to into account via `Guardian.can_see?` for access permissions * If there is no `access_control_post` then we just deliver the media. This should be a rare occurrance and shouldn't cause issues as the `access_control_post` is set when `link_post_uploads` is called via `CookedPostProcessor` ### Removed We no longer do any of these because we do not reuse uploads by sha1 if secure media is enabled. * We no longer have a way to prevent cross-posting of a secure upload from a private context to a public context. * We no longer have to set `secure: false` for uploads when uploading for a theme component.
2020-01-15 21:50:27 -06:00
context "when upload is secure and secure media enabled" do
before do
SiteSetting.secure_media = true
upload.update(secure: true)
end
it "redirects to the signed_url_for_path" do
sign_in(user)
freeze_time
FEATURE: Secure media allowing duplicated uploads with category-level privacy and post-based access rules (#8664) ### General Changes and Duplication * We now consider a post `with_secure_media?` if it is in a read-restricted category. * When uploading we now set an upload's secure status straight away. * When uploading if `SiteSetting.secure_media` is enabled, we do not check to see if the upload already exists using the `sha1` digest of the upload. The `sha1` column of the upload is filled with a `SecureRandom.hex(20)` value which is the same length as `Upload::SHA1_LENGTH`. The `original_sha1` column is filled with the _real_ sha1 digest of the file. * Whether an upload `should_be_secure?` is now determined by whether the `access_control_post` is `with_secure_media?` (if there is no access control post then we leave the secure status as is). * When serializing the upload, we now cook the URL if the upload is secure. This is so it shows up correctly in the composer preview, because we set secure status on upload. ### Viewing Secure Media * The secure-media-upload URL will take the post that the upload is attached to into account via `Guardian.can_see?` for access permissions * If there is no `access_control_post` then we just deliver the media. This should be a rare occurrance and shouldn't cause issues as the `access_control_post` is set when `link_post_uploads` is called via `CookedPostProcessor` ### Removed We no longer do any of these because we do not reuse uploads by sha1 if secure media is enabled. * We no longer have a way to prevent cross-posting of a secure upload from a private context to a public context. * We no longer have to set `secure: false` for uploads when uploading for a theme component.
2020-01-15 21:50:27 -06:00
get upload.short_path
expect(response).to redirect_to(Discourse.store.signed_url_for_path(Discourse.store.get_path_for_upload(upload)))
expect(response.header['Location']).not_to include('response-content-disposition=attachment')
end
it "respects the force download (dl) param" do
sign_in(user)
freeze_time
get upload.short_path, params: { dl: '1' }
expect(response.header['Location']).to include('response-content-disposition=attachment')
FEATURE: Secure media allowing duplicated uploads with category-level privacy and post-based access rules (#8664) ### General Changes and Duplication * We now consider a post `with_secure_media?` if it is in a read-restricted category. * When uploading we now set an upload's secure status straight away. * When uploading if `SiteSetting.secure_media` is enabled, we do not check to see if the upload already exists using the `sha1` digest of the upload. The `sha1` column of the upload is filled with a `SecureRandom.hex(20)` value which is the same length as `Upload::SHA1_LENGTH`. The `original_sha1` column is filled with the _real_ sha1 digest of the file. * Whether an upload `should_be_secure?` is now determined by whether the `access_control_post` is `with_secure_media?` (if there is no access control post then we leave the secure status as is). * When serializing the upload, we now cook the URL if the upload is secure. This is so it shows up correctly in the composer preview, because we set secure status on upload. ### Viewing Secure Media * The secure-media-upload URL will take the post that the upload is attached to into account via `Guardian.can_see?` for access permissions * If there is no `access_control_post` then we just deliver the media. This should be a rare occurrance and shouldn't cause issues as the `access_control_post` is set when `link_post_uploads` is called via `CookedPostProcessor` ### Removed We no longer do any of these because we do not reuse uploads by sha1 if secure media is enabled. * We no longer have a way to prevent cross-posting of a secure upload from a private context to a public context. * We no longer have to set `secure: false` for uploads when uploading for a theme component.
2020-01-15 21:50:27 -06:00
end
it "has the correct caching header" do
sign_in(user)
get upload.short_path
expected_max_age = S3Helper::DOWNLOAD_URL_EXPIRES_AFTER_SECONDS - UploadsController::SECURE_REDIRECT_GRACE_SECONDS
expect(expected_max_age).to be > 0 # Sanity check that the constants haven't been set to broken values
expect(response.headers["Cache-Control"]).to eq("max-age=#{expected_max_age}, private")
end
FEATURE: Secure media allowing duplicated uploads with category-level privacy and post-based access rules (#8664) ### General Changes and Duplication * We now consider a post `with_secure_media?` if it is in a read-restricted category. * When uploading we now set an upload's secure status straight away. * When uploading if `SiteSetting.secure_media` is enabled, we do not check to see if the upload already exists using the `sha1` digest of the upload. The `sha1` column of the upload is filled with a `SecureRandom.hex(20)` value which is the same length as `Upload::SHA1_LENGTH`. The `original_sha1` column is filled with the _real_ sha1 digest of the file. * Whether an upload `should_be_secure?` is now determined by whether the `access_control_post` is `with_secure_media?` (if there is no access control post then we leave the secure status as is). * When serializing the upload, we now cook the URL if the upload is secure. This is so it shows up correctly in the composer preview, because we set secure status on upload. ### Viewing Secure Media * The secure-media-upload URL will take the post that the upload is attached to into account via `Guardian.can_see?` for access permissions * If there is no `access_control_post` then we just deliver the media. This should be a rare occurrance and shouldn't cause issues as the `access_control_post` is set when `link_post_uploads` is called via `CookedPostProcessor` ### Removed We no longer do any of these because we do not reuse uploads by sha1 if secure media is enabled. * We no longer have a way to prevent cross-posting of a secure upload from a private context to a public context. * We no longer have to set `secure: false` for uploads when uploading for a theme component.
2020-01-15 21:50:27 -06:00
it "raises invalid access if the user cannot access the upload access control post" do
sign_in(user)
FEATURE: Secure media allowing duplicated uploads with category-level privacy and post-based access rules (#8664) ### General Changes and Duplication * We now consider a post `with_secure_media?` if it is in a read-restricted category. * When uploading we now set an upload's secure status straight away. * When uploading if `SiteSetting.secure_media` is enabled, we do not check to see if the upload already exists using the `sha1` digest of the upload. The `sha1` column of the upload is filled with a `SecureRandom.hex(20)` value which is the same length as `Upload::SHA1_LENGTH`. The `original_sha1` column is filled with the _real_ sha1 digest of the file. * Whether an upload `should_be_secure?` is now determined by whether the `access_control_post` is `with_secure_media?` (if there is no access control post then we leave the secure status as is). * When serializing the upload, we now cook the URL if the upload is secure. This is so it shows up correctly in the composer preview, because we set secure status on upload. ### Viewing Secure Media * The secure-media-upload URL will take the post that the upload is attached to into account via `Guardian.can_see?` for access permissions * If there is no `access_control_post` then we just deliver the media. This should be a rare occurrance and shouldn't cause issues as the `access_control_post` is set when `link_post_uploads` is called via `CookedPostProcessor` ### Removed We no longer do any of these because we do not reuse uploads by sha1 if secure media is enabled. * We no longer have a way to prevent cross-posting of a secure upload from a private context to a public context. * We no longer have to set `secure: false` for uploads when uploading for a theme component.
2020-01-15 21:50:27 -06:00
post = Fabricate(:post)
post.topic.change_category_to_id(Fabricate(:private_category, group: Fabricate(:group)).id)
upload.update(access_control_post: post)
get upload.short_path
expect(response.code).to eq("403")
end
end
end
end
describe "#show_secure" do
describe "local store" do
fab!(:image_upload) { upload_file("smallest.png") }
it "does not return secure media when using local store" do
secure_url = image_upload.url.sub("/uploads", "/secure-media-uploads")
get secure_url
expect(response.status).to eq(404)
end
end
describe "s3 store" do
let(:upload) { Fabricate(:upload_s3) }
FEATURE: Secure media allowing duplicated uploads with category-level privacy and post-based access rules (#8664) ### General Changes and Duplication * We now consider a post `with_secure_media?` if it is in a read-restricted category. * When uploading we now set an upload's secure status straight away. * When uploading if `SiteSetting.secure_media` is enabled, we do not check to see if the upload already exists using the `sha1` digest of the upload. The `sha1` column of the upload is filled with a `SecureRandom.hex(20)` value which is the same length as `Upload::SHA1_LENGTH`. The `original_sha1` column is filled with the _real_ sha1 digest of the file. * Whether an upload `should_be_secure?` is now determined by whether the `access_control_post` is `with_secure_media?` (if there is no access control post then we leave the secure status as is). * When serializing the upload, we now cook the URL if the upload is secure. This is so it shows up correctly in the composer preview, because we set secure status on upload. ### Viewing Secure Media * The secure-media-upload URL will take the post that the upload is attached to into account via `Guardian.can_see?` for access permissions * If there is no `access_control_post` then we just deliver the media. This should be a rare occurrance and shouldn't cause issues as the `access_control_post` is set when `link_post_uploads` is called via `CookedPostProcessor` ### Removed We no longer do any of these because we do not reuse uploads by sha1 if secure media is enabled. * We no longer have a way to prevent cross-posting of a secure upload from a private context to a public context. * We no longer have to set `secure: false` for uploads when uploading for a theme component.
2020-01-15 21:50:27 -06:00
let(:secure_url) { upload.url.sub(SiteSetting.Upload.absolute_base_url, "/secure-media-uploads") }
before do
setup_s3
SiteSetting.authorized_extensions = "*"
SiteSetting.secure_media = true
end
it "should return 404 for anonymous requests requests" do
get secure_url
expect(response.status).to eq(404)
end
it "should return signed url for legitimate request" do
sign_in(user)
get secure_url
expect(response.status).to eq(302)
expect(response.redirect_url).to match("Amz-Expires")
end
it "should return secure media URL when looking up urls" do
upload.update_column(:secure, true)
sign_in(user)
post "/uploads/lookup-urls.json", params: { short_urls: [upload.short_url] }
expect(response.status).to eq(200)
result = response.parsed_body
expect(result[0]["url"]).to match("secure-media-uploads")
end
FEATURE: Secure media allowing duplicated uploads with category-level privacy and post-based access rules (#8664) ### General Changes and Duplication * We now consider a post `with_secure_media?` if it is in a read-restricted category. * When uploading we now set an upload's secure status straight away. * When uploading if `SiteSetting.secure_media` is enabled, we do not check to see if the upload already exists using the `sha1` digest of the upload. The `sha1` column of the upload is filled with a `SecureRandom.hex(20)` value which is the same length as `Upload::SHA1_LENGTH`. The `original_sha1` column is filled with the _real_ sha1 digest of the file. * Whether an upload `should_be_secure?` is now determined by whether the `access_control_post` is `with_secure_media?` (if there is no access control post then we leave the secure status as is). * When serializing the upload, we now cook the URL if the upload is secure. This is so it shows up correctly in the composer preview, because we set secure status on upload. ### Viewing Secure Media * The secure-media-upload URL will take the post that the upload is attached to into account via `Guardian.can_see?` for access permissions * If there is no `access_control_post` then we just deliver the media. This should be a rare occurrance and shouldn't cause issues as the `access_control_post` is set when `link_post_uploads` is called via `CookedPostProcessor` ### Removed We no longer do any of these because we do not reuse uploads by sha1 if secure media is enabled. * We no longer have a way to prevent cross-posting of a secure upload from a private context to a public context. * We no longer have to set `secure: false` for uploads when uploading for a theme component.
2020-01-15 21:50:27 -06:00
context "when the upload cannot be found from the URL" do
it "returns a 404" do
sign_in(user)
FEATURE: Secure media allowing duplicated uploads with category-level privacy and post-based access rules (#8664) ### General Changes and Duplication * We now consider a post `with_secure_media?` if it is in a read-restricted category. * When uploading we now set an upload's secure status straight away. * When uploading if `SiteSetting.secure_media` is enabled, we do not check to see if the upload already exists using the `sha1` digest of the upload. The `sha1` column of the upload is filled with a `SecureRandom.hex(20)` value which is the same length as `Upload::SHA1_LENGTH`. The `original_sha1` column is filled with the _real_ sha1 digest of the file. * Whether an upload `should_be_secure?` is now determined by whether the `access_control_post` is `with_secure_media?` (if there is no access control post then we leave the secure status as is). * When serializing the upload, we now cook the URL if the upload is secure. This is so it shows up correctly in the composer preview, because we set secure status on upload. ### Viewing Secure Media * The secure-media-upload URL will take the post that the upload is attached to into account via `Guardian.can_see?` for access permissions * If there is no `access_control_post` then we just deliver the media. This should be a rare occurrance and shouldn't cause issues as the `access_control_post` is set when `link_post_uploads` is called via `CookedPostProcessor` ### Removed We no longer do any of these because we do not reuse uploads by sha1 if secure media is enabled. * We no longer have a way to prevent cross-posting of a secure upload from a private context to a public context. * We no longer have to set `secure: false` for uploads when uploading for a theme component.
2020-01-15 21:50:27 -06:00
upload.update(sha1: 'test')
get secure_url
expect(response.status).to eq(404)
end
end
context "when the access_control_post_id has been set for the upload" do
let(:post) { Fabricate(:post) }
let!(:private_category) { Fabricate(:private_category, group: Fabricate(:group)) }
before do
sign_in(user)
FEATURE: Secure media allowing duplicated uploads with category-level privacy and post-based access rules (#8664) ### General Changes and Duplication * We now consider a post `with_secure_media?` if it is in a read-restricted category. * When uploading we now set an upload's secure status straight away. * When uploading if `SiteSetting.secure_media` is enabled, we do not check to see if the upload already exists using the `sha1` digest of the upload. The `sha1` column of the upload is filled with a `SecureRandom.hex(20)` value which is the same length as `Upload::SHA1_LENGTH`. The `original_sha1` column is filled with the _real_ sha1 digest of the file. * Whether an upload `should_be_secure?` is now determined by whether the `access_control_post` is `with_secure_media?` (if there is no access control post then we leave the secure status as is). * When serializing the upload, we now cook the URL if the upload is secure. This is so it shows up correctly in the composer preview, because we set secure status on upload. ### Viewing Secure Media * The secure-media-upload URL will take the post that the upload is attached to into account via `Guardian.can_see?` for access permissions * If there is no `access_control_post` then we just deliver the media. This should be a rare occurrance and shouldn't cause issues as the `access_control_post` is set when `link_post_uploads` is called via `CookedPostProcessor` ### Removed We no longer do any of these because we do not reuse uploads by sha1 if secure media is enabled. * We no longer have a way to prevent cross-posting of a secure upload from a private context to a public context. * We no longer have to set `secure: false` for uploads when uploading for a theme component.
2020-01-15 21:50:27 -06:00
upload.update(access_control_post_id: post.id)
end
context "when the user has access to the post via guardian" do
it "should return signed url for legitimate request" do
sign_in(user)
FEATURE: Secure media allowing duplicated uploads with category-level privacy and post-based access rules (#8664) ### General Changes and Duplication * We now consider a post `with_secure_media?` if it is in a read-restricted category. * When uploading we now set an upload's secure status straight away. * When uploading if `SiteSetting.secure_media` is enabled, we do not check to see if the upload already exists using the `sha1` digest of the upload. The `sha1` column of the upload is filled with a `SecureRandom.hex(20)` value which is the same length as `Upload::SHA1_LENGTH`. The `original_sha1` column is filled with the _real_ sha1 digest of the file. * Whether an upload `should_be_secure?` is now determined by whether the `access_control_post` is `with_secure_media?` (if there is no access control post then we leave the secure status as is). * When serializing the upload, we now cook the URL if the upload is secure. This is so it shows up correctly in the composer preview, because we set secure status on upload. ### Viewing Secure Media * The secure-media-upload URL will take the post that the upload is attached to into account via `Guardian.can_see?` for access permissions * If there is no `access_control_post` then we just deliver the media. This should be a rare occurrance and shouldn't cause issues as the `access_control_post` is set when `link_post_uploads` is called via `CookedPostProcessor` ### Removed We no longer do any of these because we do not reuse uploads by sha1 if secure media is enabled. * We no longer have a way to prevent cross-posting of a secure upload from a private context to a public context. * We no longer have to set `secure: false` for uploads when uploading for a theme component.
2020-01-15 21:50:27 -06:00
get secure_url
expect(response.status).to eq(302)
expect(response.redirect_url).to match("Amz-Expires")
end
end
context "when the user does not have access to the post via guardian" do
before do
post.topic.change_category_to_id(private_category.id)
end
it "returns a 403" do
sign_in(user)
FEATURE: Secure media allowing duplicated uploads with category-level privacy and post-based access rules (#8664) ### General Changes and Duplication * We now consider a post `with_secure_media?` if it is in a read-restricted category. * When uploading we now set an upload's secure status straight away. * When uploading if `SiteSetting.secure_media` is enabled, we do not check to see if the upload already exists using the `sha1` digest of the upload. The `sha1` column of the upload is filled with a `SecureRandom.hex(20)` value which is the same length as `Upload::SHA1_LENGTH`. The `original_sha1` column is filled with the _real_ sha1 digest of the file. * Whether an upload `should_be_secure?` is now determined by whether the `access_control_post` is `with_secure_media?` (if there is no access control post then we leave the secure status as is). * When serializing the upload, we now cook the URL if the upload is secure. This is so it shows up correctly in the composer preview, because we set secure status on upload. ### Viewing Secure Media * The secure-media-upload URL will take the post that the upload is attached to into account via `Guardian.can_see?` for access permissions * If there is no `access_control_post` then we just deliver the media. This should be a rare occurrance and shouldn't cause issues as the `access_control_post` is set when `link_post_uploads` is called via `CookedPostProcessor` ### Removed We no longer do any of these because we do not reuse uploads by sha1 if secure media is enabled. * We no longer have a way to prevent cross-posting of a secure upload from a private context to a public context. * We no longer have to set `secure: false` for uploads when uploading for a theme component.
2020-01-15 21:50:27 -06:00
get secure_url
expect(response.status).to eq(403)
end
end
end
context "when the upload is an attachment file" do
before do
upload.update(original_filename: 'test.pdf')
end
it "redirects to the signed_url_for_path" do
sign_in(user)
get secure_url
expect(response.status).to eq(302)
expect(response.redirect_url).to match("Amz-Expires")
end
context "when the user does not have access to the access control post via guardian" do
let(:post) { Fabricate(:post) }
let!(:private_category) { Fabricate(:private_category, group: Fabricate(:group)) }
before do
post.topic.change_category_to_id(private_category.id)
upload.update(access_control_post_id: post.id)
end
it "returns a 403" do
sign_in(user)
get secure_url
expect(response.status).to eq(403)
end
end
context "when the prevent_anons_from_downloading_files setting is enabled and the user is anon" do
before do
SiteSetting.prevent_anons_from_downloading_files = true
end
it "returns a 404" do
delete "/session/#{user.username}.json"
get secure_url
expect(response.status).to eq(404)
end
end
end
context "when secure media is disabled" do
before do
SiteSetting.secure_media = false
end
context "if the upload is secure false, meaning the ACL is probably public" do
before do
upload.update(secure: false)
end
it "should redirect to the regular show route" do
secure_url = upload.url.sub(SiteSetting.Upload.absolute_base_url, "/secure-media-uploads")
sign_in(user)
get secure_url
expect(response.status).to eq(302)
expect(response.redirect_url).to eq(Discourse.store.cdn_url(upload.url))
end
end
context "if the upload is secure true, meaning the ACL is probably private" do
before do
upload.update(secure: true)
end
it "should redirect to the presigned URL still otherwise we will get a 403" do
secure_url = upload.url.sub(SiteSetting.Upload.absolute_base_url, "/secure-media-uploads")
sign_in(user)
get secure_url
expect(response.status).to eq(302)
expect(response.redirect_url).to match("Amz-Expires")
end
end
end
end
end
describe '#lookup_urls' do
it 'can look up long urls' do
sign_in(user)
upload = Fabricate(:upload)
post "/uploads/lookup-urls.json", params: { short_urls: [upload.short_url] }
expect(response.status).to eq(200)
result = response.parsed_body
expect(result[0]["url"]).to eq(upload.url)
expect(result[0]["short_path"]).to eq(upload.short_path)
end
describe 'secure media' do
let(:upload) { Fabricate(:upload_s3, secure: true) }
before do
setup_s3
SiteSetting.authorized_extensions = "pdf|png"
SiteSetting.secure_media = true
end
it 'returns secure url for a secure media upload' do
sign_in(user)
post "/uploads/lookup-urls.json", params: { short_urls: [upload.short_url] }
expect(response.status).to eq(200)
result = response.parsed_body
expect(result[0]["url"]).to match("/secure-media-uploads")
expect(result[0]["short_path"]).to eq(upload.short_path)
end
it 'returns secure urls for non-media uploads' do
upload.update!(original_filename: "not-an-image.pdf", extension: "pdf")
sign_in(user)
post "/uploads/lookup-urls.json", params: { short_urls: [upload.short_url] }
expect(response.status).to eq(200)
result = response.parsed_body
expect(result[0]["url"]).to match("/secure-media-uploads")
expect(result[0]["short_path"]).to eq(upload.short_path)
end
end
end
describe '#metadata' do
fab!(:upload) { Fabricate(:upload) }
describe 'when url is missing' do
it 'should return the right response' do
post "/uploads/lookup-metadata.json"
expect(response.status).to eq(403)
end
end
describe 'when not signed in' do
it 'should return the right response' do
post "/uploads/lookup-metadata.json", params: { url: upload.url }
expect(response.status).to eq(403)
end
end
describe 'when signed in' do
before do
sign_in(user)
end
describe 'when url is invalid' do
it 'should return the right response' do
post "/uploads/lookup-metadata.json", params: { url: 'abc' }
expect(response.status).to eq(404)
end
end
it "should return the right response" do
post "/uploads/lookup-metadata.json", params: { url: upload.url }
expect(response.status).to eq(200)
result = response.parsed_body
expect(result["original_filename"]).to eq(upload.original_filename)
expect(result["width"]).to eq(upload.width)
expect(result["height"]).to eq(upload.height)
expect(result["human_filesize"]).to eq(upload.human_filesize)
end
end
end
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
describe "#generate_presigned_put" do
context "when the store is external" do
before do
sign_in(user)
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
SiteSetting.enable_direct_s3_uploads = true
setup_s3
end
it "errors if the correct params are not provided" do
post "/uploads/generate-presigned-put.json", params: { file_name: "test.png" }
expect(response.status).to eq(400)
post "/uploads/generate-presigned-put.json", params: { type: "card_background" }
expect(response.status).to eq(400)
end
it "generates a presigned URL and creates an external upload stub" do
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
post "/uploads/generate-presigned-put.json", params: {
file_name: "test.png", type: "card_background", file_size: 1024
}
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
expect(response.status).to eq(200)
result = response.parsed_body
external_upload_stub = ExternalUploadStub.where(
unique_identifier: result["unique_identifier"],
original_filename: "test.png",
created_by: user,
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
upload_type: "card_background",
filesize: 1024
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
)
expect(external_upload_stub.exists?).to eq(true)
expect(result["key"]).to include(FileStore::S3Store::TEMPORARY_UPLOAD_PREFIX)
expect(result["url"]).to include(FileStore::S3Store::TEMPORARY_UPLOAD_PREFIX)
expect(result["url"]).to include("Amz-Expires")
end
it "includes accepted metadata in the presigned url when provided" do
2021-09-27 07:45:05 -05:00
post "/uploads/generate-presigned-put.json", **{
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
params: {
file_name: "test.png",
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
file_size: 1024,
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
type: "card_background",
metadata: {
"sha1-checksum" => "testing",
"blah" => "wontbeincluded"
}
}
}
expect(response.status).to eq(200)
result = response.parsed_body
expect(result['url']).to include("&x-amz-meta-sha1-checksum=testing")
expect(result['url']).not_to include("&x-amz-meta-blah=wontbeincluded")
end
it "rate limits" do
RateLimiter.enable
RateLimiter.clear_all!
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
stub_const(UploadsController, "PRESIGNED_PUT_RATE_LIMIT_PER_MINUTE", 1) do
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
post "/uploads/generate-presigned-put.json", params: { file_name: "test.png", type: "card_background", file_size: 1024 }
post "/uploads/generate-presigned-put.json", params: { file_name: "test.png", type: "card_background", file_size: 1024 }
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
end
expect(response.status).to eq(429)
end
end
context "when the store is not external" do
before do
sign_in(user)
end
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
it "returns 404" do
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
post "/uploads/generate-presigned-put.json", params: { file_name: "test.png", type: "card_background", file_size: 1024 }
expect(response.status).to eq(404)
end
end
end
describe "#create_multipart" do
context "when the store is external" do
let(:mock_multipart_upload_id) { "ibZBv_75gd9r8lH_gqXatLdxMVpAlj6CFTR.OwyF3953YdwbcQnMA2BLGn8Lx12fQNICtMw5KyteFeHw.Sjng--" }
before do
sign_in(user)
SiteSetting.enable_direct_s3_uploads = true
setup_s3
FileStore::S3Store.any_instance.stubs(:temporary_upload_path).returns(
"uploads/default/test_0/temp/28fccf8259bbe75b873a2bd2564b778c/test.png"
)
end
it "errors if the correct params are not provided" do
post "/uploads/create-multipart.json", params: { file_name: "test.png" }
expect(response.status).to eq(400)
post "/uploads/create-multipart.json", params: { upload_type: "composer" }
expect(response.status).to eq(400)
end
it "returns 422 when the create request errors" do
FileStore::S3Store.any_instance.stubs(:create_multipart).raises(Aws::S3::Errors::ServiceError.new({}, "test"))
2021-09-27 07:45:05 -05:00
post "/uploads/create-multipart.json", **{
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
params: {
file_name: "test.png",
file_size: 1024,
upload_type: "composer",
}
}
expect(response.status).to eq(422)
end
it "returns 422 when the file is an attachment and it's too big" do
SiteSetting.max_attachment_size_kb = 1024
2021-09-27 07:45:05 -05:00
post "/uploads/create-multipart.json", **{
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
params: {
file_name: "test.zip",
file_size: 9999999,
upload_type: "composer",
}
}
expect(response.status).to eq(422)
expect(response.body).to include(I18n.t("upload.attachments.too_large_humanized", max_size: "1 MB"))
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
end
def stub_create_multipart_request
create_multipart_result = <<~BODY
<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n
<InitiateMultipartUploadResult>
<Bucket>s3-upload-bucket</Bucket>
<Key>uploads/default/test_0/temp/28fccf8259bbe75b873a2bd2564b778c/test.png</Key>
<UploadId>#{mock_multipart_upload_id}</UploadId>
</InitiateMultipartUploadResult>
BODY
stub_request(
:post,
"https://s3-upload-bucket.s3.us-west-1.amazonaws.com/uploads/default/test_0/temp/28fccf8259bbe75b873a2bd2564b778c/test.png?uploads"
).to_return({ status: 200, body: create_multipart_result })
end
it "creates a multipart upload and creates an external upload stub that is marked as multipart" do
stub_create_multipart_request
2021-09-27 07:45:05 -05:00
post "/uploads/create-multipart.json", **{
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
params: {
file_name: "test.png",
file_size: 1024,
upload_type: "composer",
}
}
expect(response.status).to eq(200)
result = response.parsed_body
external_upload_stub = ExternalUploadStub.where(
unique_identifier: result["unique_identifier"],
original_filename: "test.png",
created_by: user,
upload_type: "composer",
key: result["key"],
external_upload_identifier: mock_multipart_upload_id,
multipart: true,
filesize: 1024
)
expect(external_upload_stub.exists?).to eq(true)
expect(result["key"]).to include(FileStore::S3Store::TEMPORARY_UPLOAD_PREFIX)
expect(result["external_upload_identifier"]).to eq(mock_multipart_upload_id)
expect(result["key"]).to eq(external_upload_stub.last.key)
end
it "includes accepted metadata when calling the store to create_multipart, but only allowed keys" do
stub_create_multipart_request
FileStore::S3Store.any_instance.expects(:create_multipart).with(
"test.png", "image/png", metadata: { "sha1-checksum" => "testing" }
).returns({ key: "test" })
2021-09-27 07:45:05 -05:00
post "/uploads/create-multipart.json", **{
params: {
file_name: "test.png",
file_size: 1024,
upload_type: "composer",
metadata: {
"sha1-checksum" => "testing",
"blah" => "wontbeincluded"
}
}
}
expect(response.status).to eq(200)
end
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
it "rate limits" do
RateLimiter.enable
RateLimiter.clear_all!
stub_create_multipart_request
stub_const(UploadsController, "CREATE_MULTIPART_RATE_LIMIT_PER_MINUTE", 1) do
post "/uploads/create-multipart.json", params: {
file_name: "test.png",
upload_type: "composer",
file_size: 1024
}
expect(response.status).to eq(200)
post "/uploads/create-multipart.json", params: {
file_name: "test.png",
upload_type: "composer",
file_size: 1024
}
expect(response.status).to eq(429)
end
end
end
context "when the store is not external" do
before do
sign_in(user)
end
it "returns 404" do
post "/uploads/create-multipart.json", params: {
file_name: "test.png",
upload_type: "composer",
file_size: 1024
}
expect(response.status).to eq(404)
end
end
end
describe "#batch_presign_multipart_parts" do
fab!(:mock_multipart_upload_id) { "ibZBv_75gd9r8lH_gqXatLdxMVpAlj6CFTR.OwyF3953YdwbcQnMA2BLGn8Lx12fQNICtMw5KyteFeHw.Sjng--" }
fab!(:external_upload_stub) do
Fabricate(:image_external_upload_stub, created_by: user, multipart: true, external_upload_identifier: mock_multipart_upload_id)
end
context "when the store is external" do
before do
sign_in(user)
SiteSetting.enable_direct_s3_uploads = true
setup_s3
end
def stub_list_multipart_request
list_multipart_result = <<~BODY
<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n
<ListPartsResult>
<Bucket>s3-upload-bucket</Bucket>
<Key>#{external_upload_stub.key}</Key>
<UploadId>#{mock_multipart_upload_id}</UploadId>
<PartNumberMarker>0</PartNumberMarker>
<NextPartNumberMarker>0</NextPartNumberMarker>
<MaxParts>1</MaxParts>
<IsTruncated>false</IsTruncated>
<Part>
<ETag>test</ETag>
<LastModified>#{Time.zone.now}</LastModified>
<PartNumber>1</PartNumber>
<Size>#{5.megabytes}</Size>
</Part>
<Initiator>
<DisplayName>test-upload-user</DisplayName>
<ID>arn:aws:iam::123:user/test-upload-user</ID>
</Initiator>
<Owner>
<DisplayName></DisplayName>
<ID>12345</ID>
</Owner>
<StorageClass>STANDARD</StorageClass>
</ListPartsResult>
BODY
stub_request(:get, "https://s3-upload-bucket.s3.us-west-1.amazonaws.com/#{external_upload_stub.key}?uploadId=#{mock_multipart_upload_id}").to_return({ status: 200, body: list_multipart_result })
end
it "errors if the correct params are not provided" do
post "/uploads/batch-presign-multipart-parts.json", params: {}
expect(response.status).to eq(400)
end
it "errors if the part_numbers do not contain numbers between 1 and 10000" do
post "/uploads/batch-presign-multipart-parts.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
part_numbers: [-1, 0, 1, 2, 3, 4]
}
expect(response.status).to eq(400)
expect(response.body).to include("You supplied invalid parameters to the request: Each part number should be between 1 and 10000")
post "/uploads/batch-presign-multipart-parts.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
part_numbers: [3, 4, "blah"]
}
expect(response.status).to eq(400)
expect(response.body).to include("You supplied invalid parameters to the request: Each part number should be between 1 and 10000")
end
it "returns 404 when the upload stub does not exist" do
post "/uploads/batch-presign-multipart-parts.json", params: {
unique_identifier: "unknown",
part_numbers: [1, 2, 3]
}
expect(response.status).to eq(404)
end
it "returns 404 when the upload stub does not belong to the user" do
external_upload_stub.update!(created_by: Fabricate(:user))
post "/uploads/batch-presign-multipart-parts.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
part_numbers: [1, 2, 3]
}
expect(response.status).to eq(404)
end
it "returns 404 when the multipart upload does not exist" do
FileStore::S3Store.any_instance.stubs(:list_multipart_parts).raises(Aws::S3::Errors::NoSuchUpload.new("test", "test"))
post "/uploads/batch-presign-multipart-parts.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
part_numbers: [1, 2, 3]
}
expect(response.status).to eq(404)
end
it "returns an object with the presigned URLs with the part numbers as keys" do
stub_list_multipart_request
post "/uploads/batch-presign-multipart-parts.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
part_numbers: [2, 3, 4]
}
expect(response.status).to eq(200)
result = response.parsed_body
expect(result["presigned_urls"].keys).to eq(["2", "3", "4"])
expect(result["presigned_urls"]["2"]).to include("?partNumber=2&uploadId=#{mock_multipart_upload_id}")
expect(result["presigned_urls"]["3"]).to include("?partNumber=3&uploadId=#{mock_multipart_upload_id}")
expect(result["presigned_urls"]["4"]).to include("?partNumber=4&uploadId=#{mock_multipart_upload_id}")
end
it "rate limits" do
RateLimiter.enable
RateLimiter.clear_all!
stub_const(UploadsController, "BATCH_PRESIGN_RATE_LIMIT_PER_MINUTE", 1) do
stub_list_multipart_request
post "/uploads/batch-presign-multipart-parts.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
part_numbers: [1, 2, 3]
}
expect(response.status).to eq(200)
post "/uploads/batch-presign-multipart-parts.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
part_numbers: [1, 2, 3]
}
expect(response.status).to eq(429)
end
end
end
context "when the store is not external" do
before do
sign_in(user)
end
it "returns 404" do
post "/uploads/batch-presign-multipart-parts.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
part_numbers: [1, 2, 3]
}
expect(response.status).to eq(404)
end
end
end
describe "#complete_multipart" do
let(:upload_base_url) { "https://#{SiteSetting.s3_upload_bucket}.s3.#{SiteSetting.s3_region}.amazonaws.com" }
let(:mock_multipart_upload_id) { "ibZBv_75gd9r8lH_gqXatLdxMVpAlj6CFTR.OwyF3953YdwbcQnMA2BLGn8Lx12fQNICtMw5KyteFeHw.Sjng--" }
let!(:external_upload_stub) do
Fabricate(:image_external_upload_stub, created_by: user, multipart: true, external_upload_identifier: mock_multipart_upload_id)
end
context "when the store is external" do
before do
sign_in(user)
SiteSetting.enable_direct_s3_uploads = true
setup_s3
end
def stub_list_multipart_request
list_multipart_result = <<~BODY
<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n
<ListPartsResult>
<Bucket>s3-upload-bucket</Bucket>
<Key>#{external_upload_stub.key}</Key>
<UploadId>#{mock_multipart_upload_id}</UploadId>
<PartNumberMarker>0</PartNumberMarker>
<NextPartNumberMarker>0</NextPartNumberMarker>
<MaxParts>1</MaxParts>
<IsTruncated>false</IsTruncated>
<Part>
<ETag>test</ETag>
<LastModified>#{Time.zone.now}</LastModified>
<PartNumber>1</PartNumber>
<Size>#{5.megabytes}</Size>
</Part>
<Initiator>
<DisplayName>test-upload-user</DisplayName>
<ID>arn:aws:iam::123:user/test-upload-user</ID>
</Initiator>
<Owner>
<DisplayName></DisplayName>
<ID>12345</ID>
</Owner>
<StorageClass>STANDARD</StorageClass>
</ListPartsResult>
BODY
stub_request(:get, "#{upload_base_url}/#{external_upload_stub.key}?uploadId=#{mock_multipart_upload_id}").to_return({ status: 200, body: list_multipart_result })
end
it "errors if the correct params are not provided" do
post "/uploads/complete-multipart.json", params: {}
expect(response.status).to eq(400)
end
it "errors if the part_numbers do not contain numbers between 1 and 10000" do
stub_list_multipart_request
post "/uploads/complete-multipart.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
parts: [{ part_number: -1, etag: "test1" }]
}
expect(response.status).to eq(400)
expect(response.body).to include("You supplied invalid parameters to the request: Each part number should be between 1 and 10000")
post "/uploads/complete-multipart.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
parts: [{ part_number: 20001, etag: "test1" }]
}
expect(response.status).to eq(400)
expect(response.body).to include("You supplied invalid parameters to the request: Each part number should be between 1 and 10000")
post "/uploads/complete-multipart.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
parts: [{ part_number: "blah", etag: "test1" }]
}
expect(response.status).to eq(400)
expect(response.body).to include("You supplied invalid parameters to the request: Each part number should be between 1 and 10000")
end
it "errors if any of the parts objects have missing values" do
stub_list_multipart_request
post "/uploads/complete-multipart.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
parts: [{ part_number: 1 }]
}
expect(response.status).to eq(400)
expect(response.body).to include("All parts must have an etag")
end
it "returns 404 when the upload stub does not exist" do
post "/uploads/complete-multipart.json", params: {
unique_identifier: "unknown",
parts: [{ part_number: 1, etag: "test1" }]
}
expect(response.status).to eq(404)
end
it "returns 422 when the complete request errors" do
FileStore::S3Store.any_instance.stubs(:complete_multipart).raises(Aws::S3::Errors::ServiceError.new({}, "test"))
stub_list_multipart_request
post "/uploads/complete-multipart.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
parts: [{ part_number: 1, etag: "test1" }]
}
expect(response.status).to eq(422)
end
it "returns 404 when the upload stub does not belong to the user" do
external_upload_stub.update!(created_by: Fabricate(:user))
post "/uploads/complete-multipart.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
parts: [{ part_number: 1, etag: "test1" }]
}
expect(response.status).to eq(404)
end
it "returns 404 when the multipart upload does not exist" do
FileStore::S3Store.any_instance.stubs(:list_multipart_parts).raises(Aws::S3::Errors::NoSuchUpload.new("test", "test"))
post "/uploads/complete-multipart.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
parts: [{ part_number: 1, etag: "test1" }]
}
expect(response.status).to eq(404)
end
it "completes the multipart upload, creates the Upload record, and returns a serialized Upload record" do
temp_location = "#{upload_base_url}/#{external_upload_stub.key}"
stub_list_multipart_request
stub_request(
:post,
"#{temp_location}?uploadId=#{external_upload_stub.external_upload_identifier}"
).with(
body: "<CompleteMultipartUpload xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\"><Part><ETag>test1</ETag><PartNumber>1</PartNumber></Part><Part><ETag>test2</ETag><PartNumber>2</PartNumber></Part></CompleteMultipartUpload>"
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
).to_return(status: 200, body: <<~XML)
<?xml version="1.0" encoding="UTF-8"?>
<CompleteMultipartUploadResult>
<Location>#{temp_location}</Location>
<Bucket>s3-upload-bucket</Bucket>
<Key>#{external_upload_stub.key}</Key>
<ETag>testfinal</ETag>
</CompleteMultipartUploadResult>
XML
# all the functionality for ExternalUploadManager is already tested along
# with stubs to S3 in its own test, we can just stub the response here
upload = Fabricate(:upload)
ExternalUploadManager.any_instance.stubs(:promote_to_upload!).returns(upload)
post "/uploads/complete-multipart.json", params: {
unique_identifier: external_upload_stub.unique_identifier,
parts: [{ part_number: 1, etag: "test1" }, { part_number: 2, etag: "test2" }]
}
expect(response.status).to eq(200)
result = response.parsed_body
expect(result[:upload]).to eq(JSON.parse(UploadSerializer.new(upload).to_json)[:upload])
end
it "rate limits" do
RateLimiter.enable
RateLimiter.clear_all!
stub_const(UploadsController, "COMPLETE_MULTIPART_RATE_LIMIT_PER_MINUTE", 1) do
post "/uploads/complete-multipart.json", params: {
unique_identifier: "blah",
parts: [{ part_number: 1, etag: "test1" }, { part_number: 2, etag: "test2" }]
}
post "/uploads/complete-multipart.json", params: {
unique_identifier: "blah",
parts: [{ part_number: 1, etag: "test1" }, { part_number: 2, etag: "test2" }]
}
end
expect(response.status).to eq(429)
end
end
context "when the store is not external" do
before do
sign_in(user)
end
it "returns 404" do
post "/uploads/complete-multipart.json", params: {
unique_identifier: external_upload_stub.external_upload_identifier,
parts: [
{
part_number: 1,
etag: "test1"
},
{
part_number: 2,
etag: "test2"
}
]
}
expect(response.status).to eq(404)
end
end
end
describe "#abort_multipart" do
let(:upload_base_url) { "https://#{SiteSetting.s3_upload_bucket}.s3.#{SiteSetting.s3_region}.amazonaws.com" }
let(:mock_multipart_upload_id) { "ibZBv_75gd9r8lH_gqXatLdxMVpAlj6CFTR.OwyF3953YdwbcQnMA2BLGn8Lx12fQNICtMw5KyteFeHw.Sjng--" }
let!(:external_upload_stub) do
Fabricate(:image_external_upload_stub, created_by: user, multipart: true, external_upload_identifier: mock_multipart_upload_id)
end
context "when the store is external" do
before do
sign_in(user)
SiteSetting.enable_direct_s3_uploads = true
setup_s3
end
def stub_abort_request
temp_location = "#{upload_base_url}/#{external_upload_stub.key}"
stub_request(
:delete,
"#{temp_location}?uploadId=#{external_upload_stub.external_upload_identifier}"
).to_return(status: 200, body: "")
end
it "errors if the correct params are not provided" do
post "/uploads/abort-multipart.json", params: {}
expect(response.status).to eq(400)
end
it "returns 200 when the stub does not exist, assumes it has already been deleted" do
FileStore::S3Store.any_instance.expects(:abort_multipart).never
post "/uploads/abort-multipart.json", params: {
external_upload_identifier: "unknown",
}
expect(response.status).to eq(200)
end
it "returns 404 when the upload stub does not belong to the user" do
external_upload_stub.update!(created_by: Fabricate(:user))
post "/uploads/abort-multipart.json", params: {
external_upload_identifier: external_upload_stub.external_upload_identifier
}
expect(response.status).to eq(404)
end
it "aborts the multipart upload and deletes the stub" do
stub_abort_request
post "/uploads/abort-multipart.json", params: {
external_upload_identifier: external_upload_stub.external_upload_identifier
}
expect(response.status).to eq(200)
expect(ExternalUploadStub.exists?(id: external_upload_stub.id)).to eq(false)
end
it "returns 422 when the abort request errors" do
FileStore::S3Store.any_instance.stubs(:abort_multipart).raises(Aws::S3::Errors::ServiceError.new({}, "test"))
post "/uploads/abort-multipart.json", params: {
external_upload_identifier: external_upload_stub.external_upload_identifier
}
expect(response.status).to eq(422)
end
end
context "when the store is not external" do
before do
sign_in(user)
end
it "returns 404" do
post "/uploads/complete-multipart.json", params: {
unique_identifier: external_upload_stub.external_upload_identifier,
parts: [
{
part_number: 1,
etag: "test1"
},
{
part_number: 2,
etag: "test2"
}
]
}
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
expect(response.status).to eq(404)
end
end
end
describe "#complete_external_upload" do
before do
sign_in(user)
end
context "when the store is external" do
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
fab!(:external_upload_stub) { Fabricate(:image_external_upload_stub, created_by: user) }
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
let(:upload) { Fabricate(:upload) }
before do
SiteSetting.enable_direct_s3_uploads = true
SiteSetting.enable_upload_debug_mode = true
setup_s3
end
it "returns 404 when the upload stub does not exist" do
post "/uploads/complete-external-upload.json", params: { unique_identifier: "unknown" }
expect(response.status).to eq(404)
end
it "returns 404 when the upload stub does not belong to the user" do
external_upload_stub.update!(created_by: Fabricate(:user))
post "/uploads/complete-external-upload.json", params: { unique_identifier: external_upload_stub.unique_identifier }
expect(response.status).to eq(404)
end
it "handles ChecksumMismatchError" do
ExternalUploadManager.any_instance.stubs(:promote_to_upload!).raises(ExternalUploadManager::ChecksumMismatchError)
post "/uploads/complete-external-upload.json", params: { unique_identifier: external_upload_stub.unique_identifier }
FEATURE: Uppy direct S3 multipart uploads in composer (#14051) This pull request introduces the endpoints required, and the JavaScript functionality in the `ComposerUppyUpload` mixin, for direct S3 multipart uploads. There are four new endpoints in the uploads controller: * `create-multipart.json` - Creates the multipart upload in S3 along with an `ExternalUploadStub` record, storing information about the file in the same way as `generate-presigned-put.json` does for regular direct S3 uploads * `batch-presign-multipart-parts.json` - Takes a list of part numbers and the unique identifier for an `ExternalUploadStub` record, and generates the presigned URLs for those parts if the multipart upload still exists and if the user has permission to access that upload * `complete-multipart.json` - Completes the multipart upload in S3. Needs the full list of part numbers and their associated ETags which are returned when the part is uploaded to the presigned URL above. Only works if the user has permission to access the associated `ExternalUploadStub` record and the multipart upload still exists. After we confirm the upload is complete in S3, we go through the regular `UploadCreator` flow, the same as `complete-external-upload.json`, and promote the temporary upload S3 into a full `Upload` record, moving it to its final destination. * `abort-multipart.json` - Aborts the multipart upload on S3 and destroys the `ExternalUploadStub` record if the user has permission to access that upload. Also added are a few new columns to `ExternalUploadStub`: * multipart - Whether or not this is a multipart upload * external_upload_identifier - The "upload ID" for an S3 multipart upload * filesize - The size of the file when the `create-multipart.json` or `generate-presigned-put.json` is called. This is used for validation. When the user completes a direct S3 upload, either regular or multipart, we take the `filesize` that was captured when the `ExternalUploadStub` was first created and compare it with the final `Content-Length` size of the file where it is stored in S3. Then, if the two do not match, we throw an error, delete the file on S3, and ban the user from uploading files for N (default 5) minutes. This would only happen if the user uploads a different file than what they first specified, or in the case of multipart uploads uploaded larger chunks than needed. This is done to prevent abuse of S3 storage by bad actors. Also included in this PR is an update to vendor/uppy.js. This has been built locally from the latest uppy source at https://github.com/transloadit/uppy/commit/d613b849a6591083f8a0968aa8d66537e231bbcd. This must be done so that I can get my multipart upload changes into Discourse. When the Uppy team cuts a proper release, we can bump the package.json versions instead.
2021-08-24 17:46:54 -05:00
expect(response.status).to eq(422)
expect(response.parsed_body["errors"].first).to eq(I18n.t("upload.failed"))
end
it "handles SizeMismatchError" do
ExternalUploadManager.any_instance.stubs(:promote_to_upload!).raises(ExternalUploadManager::SizeMismatchError.new("expected: 10, actual: 1000"))
post "/uploads/complete-external-upload.json", params: { unique_identifier: external_upload_stub.unique_identifier }
FEATURE: Initial implementation of direct S3 uploads with uppy and stubs (#13787) This adds a few different things to allow for direct S3 uploads using uppy. **These changes are still not the default.** There are hidden `enable_experimental_image_uploader` and `enable_direct_s3_uploads` settings that must be turned on for any of this code to be used, and even if they are turned on only the User Card Background for the user profile actually uses uppy-image-uploader. A new `ExternalUploadStub` model and database table is introduced in this pull request. This is used to keep track of uploads that are uploaded to a temporary location in S3 with the direct to S3 code, and they are eventually deleted a) when the direct upload is completed and b) after a certain time period of not being used. ### Starting a direct S3 upload When an S3 direct upload is initiated with uppy, we first request a presigned PUT URL from the new `generate-presigned-put` endpoint in `UploadsController`. This generates an S3 key in the `temp` folder inside the correct bucket path, along with any metadata from the clientside (e.g. the SHA1 checksum described below). This will also create an `ExternalUploadStub` and store the details of the temp object key and the file being uploaded. Once the clientside has this URL, uppy will upload the file direct to S3 using the presigned URL. Once the upload is complete we go to the next stage. ### Completing a direct S3 upload Once the upload to S3 is done we call the new `complete-external-upload` route with the unique identifier of the `ExternalUploadStub` created earlier. Only the user who made the stub can complete the external upload. One of two paths is followed via the `ExternalUploadManager`. 1. If the object in S3 is too large (currently 100mb defined by `ExternalUploadManager::DOWNLOAD_LIMIT`) we do not download and generate the SHA1 for that file. Instead we create the `Upload` record via `UploadCreator` and simply copy it to its final destination on S3 then delete the initial temp file. Several modifications to `UploadCreator` have been made to accommodate this. 2. If the object in S3 is small enough, we download it. When the temporary S3 file is downloaded, we compare the SHA1 checksum generated by the browser with the actual SHA1 checksum of the file generated by ruby. The browser SHA1 checksum is stored on the object in S3 with metadata, and is generated via the `UppyChecksum` plugin. Keep in mind that some browsers will not generate this due to compatibility or other issues. We then follow the normal `UploadCreator` path with one exception. To cut down on having to re-upload the file again, if there are no changes (such as resizing etc) to the file in `UploadCreator` we follow the same copy + delete temp path that we do for files that are too large. 3. Finally we return the serialized upload record back to the client There are several errors that could happen that are handled by `UploadsController` as well. Also in this PR is some refactoring of `displayErrorForUpload` to handle both uppy and jquery file uploader errors.
2021-07-27 17:42:25 -05:00
expect(response.status).to eq(422)
expect(response.parsed_body["errors"].first).to eq(I18n.t("upload.failed"))
end
it "handles CannotPromoteError" do
ExternalUploadManager.any_instance.stubs(:promote_to_upload!).raises(ExternalUploadManager::CannotPromoteError)
post "/uploads/complete-external-upload.json", params: { unique_identifier: external_upload_stub.unique_identifier }
expect(response.status).to eq(422)
expect(response.parsed_body["errors"].first).to eq(I18n.t("upload.failed"))
end
it "handles DownloadFailedError and Aws::S3::Errors::NotFound" do
ExternalUploadManager.any_instance.stubs(:promote_to_upload!).raises(ExternalUploadManager::DownloadFailedError)
post "/uploads/complete-external-upload.json", params: { unique_identifier: external_upload_stub.unique_identifier }
expect(response.status).to eq(422)
expect(response.parsed_body["errors"].first).to eq(I18n.t("upload.failed"))
ExternalUploadManager.any_instance.stubs(:promote_to_upload!).raises(Aws::S3::Errors::NotFound.new("error", "not found"))
post "/uploads/complete-external-upload.json", params: { unique_identifier: external_upload_stub.unique_identifier }
expect(response.status).to eq(422)
expect(response.parsed_body["errors"].first).to eq(I18n.t("upload.failed"))
end
it "handles a generic upload failure" do
ExternalUploadManager.any_instance.stubs(:promote_to_upload!).raises(StandardError)
post "/uploads/complete-external-upload.json", params: { unique_identifier: external_upload_stub.unique_identifier }
expect(response.status).to eq(422)
expect(response.parsed_body["errors"].first).to eq(I18n.t("upload.failed"))
end
it "handles validation errors on the upload" do
upload.errors.add(:base, "test error")
ExternalUploadManager.any_instance.stubs(:promote_to_upload!).returns(upload)
post "/uploads/complete-external-upload.json", params: { unique_identifier: external_upload_stub.unique_identifier }
expect(response.status).to eq(422)
expect(response.parsed_body["errors"]).to eq(["test error"])
end
it "deletes the stub and returns the serialized upload when complete" do
ExternalUploadManager.any_instance.stubs(:promote_to_upload!).returns(upload)
post "/uploads/complete-external-upload.json", params: { unique_identifier: external_upload_stub.unique_identifier }
expect(ExternalUploadStub.exists?(id: external_upload_stub.id)).to eq(false)
expect(response.status).to eq(200)
expect(response.parsed_body).to eq(UploadsController.serialize_upload(upload))
end
end
context "when the store is not external" do
it "returns 404" do
post "/uploads/generate-presigned-put.json", params: { file_name: "test.png", type: "card_background" }
expect(response.status).to eq(404)
end
end
end
2013-04-02 18:17:17 -05:00
end