FEATURE: Direct S3 multipart uploads for backups (#14736)

This PR introduces a new `enable_experimental_backup_uploads` site setting (default false and hidden), which when enabled alongside `enable_direct_s3_uploads` will allow for direct S3 multipart uploads of backup .tar.gz files.

To make multipart external uploads work with both the S3BackupStore and the S3Store, I've had to move several methods out of S3Store and into S3Helper, including:

* presigned_url
* create_multipart
* abort_multipart
* complete_multipart
* presign_multipart_part
* list_multipart_parts

Then, S3Store and S3BackupStore either delegate directly to S3Helper or have their own special methods to call S3Helper for these methods. FileStore.temporary_upload_path has also removed its dependence on upload_path, and can now be used interchangeably between the stores. A similar change was made in the frontend as well, moving the multipart related JS code out of ComposerUppyUpload and into a mixin of its own, so it can also be used by UppyUploadMixin.

Some changes to ExternalUploadManager had to be made here as well. The backup direct uploads do not need an Upload record made for them in the database, so they can be moved to their final S3 resting place when completing the multipart upload.

This changeset is not perfect; it introduces some special cases in UploadController to handle backups that was previously in BackupController, because UploadController is where the multipart routes are located. A subsequent pull request will pull these routes into a module or some other sharing pattern, along with hooks, so the backup controller and the upload controller (and any future controllers that may need them) can include these routes in a nicer way.
This commit is contained in:
Martin Brennan
2021-11-11 08:25:31 +10:00
committed by GitHub
parent d4e35f50c2
commit e4350bb966
22 changed files with 725 additions and 356 deletions

View File

@@ -12,6 +12,9 @@ export default Controller.extend({
uploadLabel: i18n("admin.backups.upload.label"),
backupLocation: setting("backup_location"),
localBackupStorage: equal("backupLocation", "local"),
enableExperimentalBackupUploader: setting(
"enable_experimental_backup_uploader"
),
@discourseComputed("status.allowRestore", "status.isOperationRunning")
restoreTitle(allowRestore, isOperationRunning) {

View File

@@ -8,7 +8,11 @@
title="admin.backups.upload.title"
class="btn-default"}}
{{else}}
{{backup-uploader done=(route-action "remoteUploadSuccess")}}
{{#if enableExperimentalBackupUploader}}
{{uppy-backup-uploader done=(route-action "remoteUploadSuccess")}}
{{else}}
{{backup-uploader done=(route-action "remoteUploadSuccess")}}
{{/if}}
{{/if}}
{{#if site.isReadOnly}}

View File

@@ -0,0 +1,25 @@
import Component from "@ember/component";
import I18n from "I18n";
import UppyUploadMixin from "discourse/mixins/uppy-upload";
import discourseComputed from "discourse-common/utils/decorators";
export default Component.extend(UppyUploadMixin, {
tagName: "span",
type: "backup",
useMultipartUploadsIfAvailable: true,
@discourseComputed("uploading", "uploadProgress")
uploadButtonText(uploading, progress) {
return uploading
? I18n.t("admin.backups.upload.uploading_progress", { progress })
: I18n.t("admin.backups.upload.label");
},
validateUploadedFilesOptions() {
return { skipValidation: true };
},
uploadDone() {
this.done();
},
});

View File

@@ -1,13 +1,11 @@
import Mixin from "@ember/object/mixin";
import { Promise } from "rsvp";
import ExtendableUploader from "discourse/mixins/extendable-uploader";
import { ajax } from "discourse/lib/ajax";
import UppyS3Multipart from "discourse/mixins/uppy-s3-multipart";
import { deepMerge } from "discourse-common/lib/object";
import UppyChecksum from "discourse/lib/uppy-checksum-plugin";
import Uppy from "@uppy/core";
import DropTarget from "@uppy/drop-target";
import XHRUpload from "@uppy/xhr-upload";
import AwsS3Multipart from "@uppy/aws-s3-multipart";
import { warn } from "@ember/debug";
import I18n from "I18n";
import getURL from "discourse-common/lib/get-url";
@@ -33,7 +31,7 @@ import { cacheShortUploadUrl } from "pretty-text/upload-short-url";
// and the most important _bindUploadTarget which handles all the main upload
// functionality and event binding.
//
export default Mixin.create(ExtendableUploader, {
export default Mixin.create(ExtendableUploader, UppyS3Multipart, {
@observes("composerModel.uploadCancelled")
_cancelUpload() {
if (!this.get("composerModel.uploadCancelled")) {
@@ -390,158 +388,6 @@ export default Mixin.create(ExtendableUploader, {
});
},
_useS3MultipartUploads() {
const self = this;
const retryDelays = [0, 1000, 3000, 5000];
this._uppyInstance.use(AwsS3Multipart, {
// controls how many simultaneous _chunks_ are uploaded, not files,
// which in turn controls the minimum number of chunks presigned
// in each batch (limit / 2)
//
// the default, and minimum, chunk size is 5mb. we can control the
// chunk size via getChunkSize(file), so we may want to increase
// the chunk size for larger files
limit: 10,
retryDelays,
createMultipartUpload(file) {
self._uppyInstance.emit("create-multipart", file.id);
const data = {
file_name: file.name,
file_size: file.size,
upload_type: file.meta.upload_type,
metadata: file.meta,
};
// the sha1 checksum is set by the UppyChecksum plugin, except
// for in cases where the browser does not support the required
// crypto mechanisms or an error occurs. it is an additional layer
// of security, and not required.
if (file.meta.sha1_checksum) {
data.metadata = { "sha1-checksum": file.meta.sha1_checksum };
}
return ajax("/uploads/create-multipart.json", {
type: "POST",
data,
// uppy is inconsistent, an error here fires the upload-error event
}).then((responseData) => {
self._uppyInstance.emit("create-multipart-success", file.id);
file.meta.unique_identifier = responseData.unique_identifier;
return {
uploadId: responseData.external_upload_identifier,
key: responseData.key,
};
});
},
prepareUploadParts(file, partData) {
if (file.preparePartsRetryAttempts === undefined) {
file.preparePartsRetryAttempts = 0;
}
return ajax("/uploads/batch-presign-multipart-parts.json", {
type: "POST",
data: {
part_numbers: partData.partNumbers,
unique_identifier: file.meta.unique_identifier,
},
})
.then((data) => {
if (file.preparePartsRetryAttempts) {
delete file.preparePartsRetryAttempts;
self._consoleDebug(
`[uppy] Retrying batch fetch for ${file.id} was successful, continuing.`
);
}
return { presignedUrls: data.presigned_urls };
})
.catch((err) => {
const status = err.jqXHR.status;
// it is kind of ugly to have to track the retry attempts for
// the file based on the retry delays, but uppy's `retryable`
// function expects the rejected Promise data to be structured
// _just so_, and provides no interface for us to tell how many
// times the upload has been retried (which it tracks internally)
//
// if we exceed the attempts then there is no way that uppy will
// retry the upload once again, so in that case the alert can
// be safely shown to the user that their upload has failed.
if (file.preparePartsRetryAttempts < retryDelays.length) {
file.preparePartsRetryAttempts += 1;
const attemptsLeft =
retryDelays.length - file.preparePartsRetryAttempts + 1;
self._consoleDebug(
`[uppy] Fetching a batch of upload part URLs for ${file.id} failed with status ${status}, retrying ${attemptsLeft} more times...`
);
return Promise.reject({ source: { status } });
} else {
self._consoleDebug(
`[uppy] Fetching a batch of upload part URLs for ${file.id} failed too many times, throwing error.`
);
// uppy is inconsistent, an error here does not fire the upload-error event
self._handleUploadError(file, err);
}
});
},
completeMultipartUpload(file, data) {
self._uppyInstance.emit("complete-multipart", file.id);
const parts = data.parts.map((part) => {
return { part_number: part.PartNumber, etag: part.ETag };
});
return ajax("/uploads/complete-multipart.json", {
type: "POST",
contentType: "application/json",
data: JSON.stringify({
parts,
unique_identifier: file.meta.unique_identifier,
pasted: file.meta.pasted,
for_private_message: file.meta.for_private_message,
}),
// uppy is inconsistent, an error here fires the upload-error event
}).then((responseData) => {
self._uppyInstance.emit("complete-multipart-success", file.id);
return responseData;
});
},
abortMultipartUpload(file, { key, uploadId }) {
// if the user cancels the upload before the key and uploadId
// are stored from the createMultipartUpload response then they
// will not be set, and we don't have to abort the upload because
// it will not exist yet
if (!key || !uploadId) {
return;
}
// this gives us a chance to inspect the upload stub before
// it is deleted from external storage by aborting the multipart
// upload; see also ExternalUploadManager
if (file.meta.error && self.siteSettings.enable_upload_debug_mode) {
return;
}
return ajax("/uploads/abort-multipart.json", {
type: "POST",
data: {
external_upload_identifier: uploadId,
},
// uppy is inconsistent, an error here does not fire the upload-error event
}).catch((err) => {
self._handleUploadError(file, err);
});
},
// we will need a listParts function at some point when we want to
// resume multipart uploads; this is used by uppy to figure out
// what parts are uploaded and which still need to be
});
},
_reset() {
this._uppyInstance?.reset();
this.setProperties({

View File

@@ -0,0 +1,159 @@
import Mixin from "@ember/object/mixin";
import { Promise } from "rsvp";
import { ajax } from "discourse/lib/ajax";
import AwsS3Multipart from "@uppy/aws-s3-multipart";
export default Mixin.create({
_useS3MultipartUploads() {
this.set("usingS3MultipartUploads", true);
const self = this;
const retryDelays = [0, 1000, 3000, 5000];
this._uppyInstance.use(AwsS3Multipart, {
// controls how many simultaneous _chunks_ are uploaded, not files,
// which in turn controls the minimum number of chunks presigned
// in each batch (limit / 2)
//
// the default, and minimum, chunk size is 5mb. we can control the
// chunk size via getChunkSize(file), so we may want to increase
// the chunk size for larger files
limit: 10,
retryDelays,
createMultipartUpload(file) {
self._uppyInstance.emit("create-multipart", file.id);
const data = {
file_name: file.name,
file_size: file.size,
upload_type: file.meta.upload_type,
metadata: file.meta,
};
// the sha1 checksum is set by the UppyChecksum plugin, except
// for in cases where the browser does not support the required
// crypto mechanisms or an error occurs. it is an additional layer
// of security, and not required.
if (file.meta.sha1_checksum) {
data.metadata = { "sha1-checksum": file.meta.sha1_checksum };
}
return ajax("/uploads/create-multipart.json", {
type: "POST",
data,
// uppy is inconsistent, an error here fires the upload-error event
}).then((responseData) => {
self._uppyInstance.emit("create-multipart-success", file.id);
file.meta.unique_identifier = responseData.unique_identifier;
return {
uploadId: responseData.external_upload_identifier,
key: responseData.key,
};
});
},
prepareUploadParts(file, partData) {
if (file.preparePartsRetryAttempts === undefined) {
file.preparePartsRetryAttempts = 0;
}
return ajax("/uploads/batch-presign-multipart-parts.json", {
type: "POST",
data: {
part_numbers: partData.partNumbers,
unique_identifier: file.meta.unique_identifier,
},
})
.then((data) => {
if (file.preparePartsRetryAttempts) {
delete file.preparePartsRetryAttempts;
self._consoleDebug(
`[uppy] Retrying batch fetch for ${file.id} was successful, continuing.`
);
}
return { presignedUrls: data.presigned_urls };
})
.catch((err) => {
const status = err.jqXHR.status;
// it is kind of ugly to have to track the retry attempts for
// the file based on the retry delays, but uppy's `retryable`
// function expects the rejected Promise data to be structured
// _just so_, and provides no interface for us to tell how many
// times the upload has been retried (which it tracks internally)
//
// if we exceed the attempts then there is no way that uppy will
// retry the upload once again, so in that case the alert can
// be safely shown to the user that their upload has failed.
if (file.preparePartsRetryAttempts < retryDelays.length) {
file.preparePartsRetryAttempts += 1;
const attemptsLeft =
retryDelays.length - file.preparePartsRetryAttempts + 1;
self._consoleDebug(
`[uppy] Fetching a batch of upload part URLs for ${file.id} failed with status ${status}, retrying ${attemptsLeft} more times...`
);
return Promise.reject({ source: { status } });
} else {
self._consoleDebug(
`[uppy] Fetching a batch of upload part URLs for ${file.id} failed too many times, throwing error.`
);
// uppy is inconsistent, an error here does not fire the upload-error event
self._handleUploadError(file, err);
}
});
},
completeMultipartUpload(file, data) {
self._uppyInstance.emit("complete-multipart", file.id);
const parts = data.parts.map((part) => {
return { part_number: part.PartNumber, etag: part.ETag };
});
return ajax("/uploads/complete-multipart.json", {
type: "POST",
contentType: "application/json",
data: JSON.stringify({
parts,
unique_identifier: file.meta.unique_identifier,
pasted: file.meta.pasted,
for_private_message: file.meta.for_private_message,
}),
// uppy is inconsistent, an error here fires the upload-error event
}).then((responseData) => {
self._uppyInstance.emit("complete-multipart-success", file.id);
return responseData;
});
},
abortMultipartUpload(file, { key, uploadId }) {
// if the user cancels the upload before the key and uploadId
// are stored from the createMultipartUpload response then they
// will not be set, and we don't have to abort the upload because
// it will not exist yet
if (!key || !uploadId) {
return;
}
// this gives us a chance to inspect the upload stub before
// it is deleted from external storage by aborting the multipart
// upload; see also ExternalUploadManager
if (file.meta.error && self.siteSettings.enable_upload_debug_mode) {
return;
}
return ajax("/uploads/abort-multipart.json", {
type: "POST",
data: {
external_upload_identifier: uploadId,
},
// uppy is inconsistent, an error here does not fire the upload-error event
}).catch((err) => {
self._handleUploadError(file, err);
});
},
// we will need a listParts function at some point when we want to
// resume multipart uploads; this is used by uppy to figure out
// what parts are uploaded and which still need to be
});
},
});

View File

@@ -13,12 +13,13 @@ import DropTarget from "@uppy/drop-target";
import XHRUpload from "@uppy/xhr-upload";
import AwsS3 from "@uppy/aws-s3";
import UppyChecksum from "discourse/lib/uppy-checksum-plugin";
import UppyS3Multipart from "discourse/mixins/uppy-s3-multipart";
import { on } from "discourse-common/utils/decorators";
import { warn } from "@ember/debug";
export const HUGE_FILE_THRESHOLD_BYTES = 104_857_600; // 100MB
export default Mixin.create({
export default Mixin.create(UppyS3Multipart, {
uploading: false,
uploadProgress: 0,
_uppyInstance: null,
@@ -26,13 +27,6 @@ export default Mixin.create({
_inProgressUploads: 0,
id: null,
// TODO (martin): currently used for backups to turn on auto upload and PUT/XML requests
// and for emojis to do sequential uploads, when we get to replacing those
// with uppy make sure this is used when initializing uppy
uploadOptions() {
return {};
},
uploadDone() {
warn("You should implement `uploadDone`", {
id: "discourse.upload.missing-upload-done",
@@ -170,7 +164,7 @@ export default Mixin.create({
});
this._uppyInstance.on("upload-error", (file, error, response) => {
displayErrorForUpload(response, this.siteSettings, file.name);
displayErrorForUpload(response || error, this.siteSettings, file.name);
this._reset();
});
@@ -184,7 +178,11 @@ export default Mixin.create({
this.siteSettings.enable_direct_s3_uploads &&
!this.preventDirectS3Uploads
) {
this._useS3Uploads();
if (this.useMultipartUploadsIfAvailable) {
this._useS3MultipartUploads();
} else {
this._useS3Uploads();
}
} else {
this._useXHRUploads();
}

View File

@@ -1,4 +1,4 @@
<label class="btn" disabled={{uploading}} title={{i18n "admin.backups.upload.title"}}>
{{d-icon "upload"}}&nbsp;{{uploadButtonText}}
{{d-icon "upload"}}{{uploadButtonText}}
<input class="hidden-upload-field" disabled={{uploading}} type="file" accept=".gz">
</label>

View File

@@ -0,0 +1,4 @@
<label class="btn" disabled={{uploading}} title={{i18n "admin.backups.upload.title"}}>
{{d-icon "upload"}}{{uploadButtonText}}
<input class="hidden-upload-field" disabled={{uploading}} type="file" accept=".gz">
</label>