fix(fs/S3): support 50GB+ files (#5242)

This commit is contained in:
Nicolas Raynaud
2020-09-27 20:49:41 +02:00
committed by GitHub
parent 511a04dad5
commit 4bb702fe89
3 changed files with 71 additions and 37 deletions

View File

@@ -6,6 +6,12 @@ import { createChecksumStream } from './checksum'
// endpoints https://docs.aws.amazon.com/general/latest/gr/s3.html
// limits: https://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html
const MIN_PART_SIZE = 1024 * 1024 * 5 // 5MB
const MAX_PART_SIZE = 1024 * 1024 * 1024 * 5 // 5GB
const MAX_PARTS_COUNT = 10000
const MAX_OBJECT_SIZE = 1024 * 1024 * 1024 * 1024 * 5 // 5TB
const IDEAL_FRAGMENT_SIZE = Math.ceil(MAX_OBJECT_SIZE / MAX_PARTS_COUNT) // the smallest fragment size that still allows a 5TB upload in 10000 fragments, about 524MB
export default class S3Handler extends RemoteHandlerAbstract {
constructor(remote, _opts) {
super(remote)
@@ -44,10 +50,13 @@ export default class S3Handler extends RemoteHandlerAbstract {
input.on('error', forwardError)
inputStream = checksumStream
}
const upload = this._s3.upload({
...this._createParams(path),
Body: inputStream,
})
const upload = this._s3.upload(
{
...this._createParams(path),
Body: inputStream,
},
{ partSize: IDEAL_FRAGMENT_SIZE }
)
await upload.promise()
if (checksum) {
const checksum = await inputStream.checksum
@@ -125,14 +134,13 @@ export default class S3Handler extends RemoteHandlerAbstract {
}
async _write(file, buffer, position) {
const MIN_FRAGMENT_SIZE = 1024 * 1024 * 5 // 5Mo
if (typeof file !== 'string') {
file = file.fd
}
const uploadParams = this._createParams(file)
const fileSize = +(await this._s3.headObject(uploadParams).promise())
.ContentLength
if (fileSize < MIN_FRAGMENT_SIZE) {
if (fileSize < MIN_PART_SIZE) {
const resultBuffer = Buffer.alloc(
Math.max(fileSize, position + buffer.length)
)
@@ -146,7 +154,7 @@ export default class S3Handler extends RemoteHandlerAbstract {
return { buffer, bytesWritten: buffer.length }
} else {
// using this trick: https://stackoverflow.com/a/38089437/72637
// multipart fragments have a minimum size of 5Mo unless they are last
// multipart fragments have a minimum size of 5Mo and a max of 5Go unless they are last
// splitting the file in 3 parts: [prefix, edit, suffix]
// if `prefix` is bigger than 5Mo, it will be sourced from uploadPartCopy()
// otherwise otherwise it will be downloaded, concatenated to `edit`
@@ -164,9 +172,11 @@ export default class S3Handler extends RemoteHandlerAbstract {
let editBuffer = buffer
let editBufferOffset = position
let partNumber = 1
const prefixRange = `bytes=0-${prefixSize - 1}`
if (prefixSize < MIN_FRAGMENT_SIZE) {
const downloadParams = { ...uploadParams, Range: prefixRange }
if (prefixSize < MIN_PART_SIZE) {
const downloadParams = {
...uploadParams,
Range: `bytes=0-${prefixSize - 1}`,
}
const prefixBuffer =
prefixSize > 0
? (await this._s3.getObject(downloadParams).promise()).Body
@@ -174,26 +184,38 @@ export default class S3Handler extends RemoteHandlerAbstract {
editBuffer = Buffer.concat([prefixBuffer, buffer])
editBufferOffset = 0
} else {
const copyPrefixParams = {
...multipartParams,
PartNumber: partNumber++,
CopySource: `/${this._bucket}/${this._dir + file}`,
CopySourceRange: prefixRange,
const fragmentsCount = Math.ceil(prefixSize / MAX_PART_SIZE)
const prefixFragmentSize = Math.ceil(prefixSize / fragmentsCount)
const lastFragmentSize =
prefixFragmentSize * fragmentsCount - prefixSize
let prefixPosition = 0
for (let i = 0; i < fragmentsCount; i++) {
const copyPrefixParams = {
...multipartParams,
PartNumber: partNumber++,
CopySource: `/${this._bucket}/${this._dir + file}`,
CopySourceRange: `bytes=${prefixPosition}-${
prefixPosition + prefixFragmentSize - 1
}`,
}
const prefixPart = (
await this._s3.uploadPartCopy(copyPrefixParams).promise()
).CopyPartResult
parts.push({
ETag: prefixPart.ETag,
PartNumber: copyPrefixParams.PartNumber,
})
prefixPosition += prefixFragmentSize
}
if (lastFragmentSize) {
}
const prefixPart = (
await this._s3.uploadPartCopy(copyPrefixParams).promise()
).CopyPartResult
parts.push({
ETag: prefixPart.ETag,
PartNumber: copyPrefixParams.PartNumber,
})
}
if (hasSuffix && editBuffer.length < MIN_FRAGMENT_SIZE) {
if (hasSuffix && editBuffer.length < MIN_PART_SIZE) {
// the edit fragment is too short and is not the last fragment
// let's steal from the suffix fragment to reach the minimum size
// the suffix might be too short and itself entirely absorbed in the edit fragment, making it the last one.
const complementSize = Math.min(
MIN_FRAGMENT_SIZE - editBuffer.length,
MIN_PART_SIZE - editBuffer.length,
suffixSize
)
const complementOffset = editBufferOffset + editBuffer.length
@@ -217,20 +239,29 @@ export default class S3Handler extends RemoteHandlerAbstract {
const editPart = await this._s3.uploadPart(editParams).promise()
parts.push({ ETag: editPart.ETag, PartNumber: editParams.PartNumber })
if (hasSuffix) {
const suffixRange = `bytes=${suffixOffset}-${fileSize - 1}`
const copySuffixParams = {
...multipartParams,
PartNumber: partNumber++,
CopySource: `/${this._bucket}/${this._dir + file}`,
CopySourceRange: suffixRange,
const suffixFragments = Math.ceil(suffixSize / MAX_PART_SIZE)
const suffixFragmentsSize = Math.ceil(suffixSize / suffixFragments)
let suffixFragmentOffset = suffixOffset
for (let i = 0; i < suffixFragments; i++) {
const fragmentEnd = suffixFragmentOffset + suffixFragmentsSize
const suffixRange = `bytes=${suffixFragmentOffset}-${
Math.min(fileSize, fragmentEnd) - 1
}`
const copySuffixParams = {
...multipartParams,
PartNumber: partNumber++,
CopySource: `/${this._bucket}/${this._dir + file}`,
CopySourceRange: suffixRange,
}
const suffixPart = (
await this._s3.uploadPartCopy(copySuffixParams).promise()
).CopyPartResult
parts.push({
ETag: suffixPart.ETag,
PartNumber: copySuffixParams.PartNumber,
})
suffixFragmentOffset = fragmentEnd
}
const suffixPart = (
await this._s3.uploadPartCopy(copySuffixParams).promise()
).CopyPartResult
parts.push({
ETag: suffixPart.ETag,
PartNumber: copySuffixParams.PartNumber,
})
}
await this._s3
.completeMultipartUpload({

View File

@@ -21,6 +21,7 @@
- [Import VMDK] Fix `No position specified for vmdisk1` error (PR [#5255](https://github.com/vatesfr/xen-orchestra/pull/5255))
- [API] Fix `this.removeSubjectFromResourceSet is not a function` error on calling `resourceSet.removeSubject` via `xo-cli` [#5265](https://github.com/vatesfr/xen-orchestra/issues/5265) (PR [#5266](https://github.com/vatesfr/xen-orchestra/pull/5266))
- [Import OVA] Fix frozen UI when dropping a big OVA on the page (PR [#5274](https://github.com/vatesfr/xen-orchestra/pull/5274))
- [Remotes/S3] Fix S3 backup of 50GB+ files [#5197](https://github.com/vatesfr/xen-orchestra/issues/5197) (PR[ #5242](https://github.com/vatesfr/xen-orchestra/pull/5242) )
### Packages to release
@@ -39,6 +40,7 @@
>
> In case of conflict, the highest (lowest in previous list) `$version` wins.
- @xen-orchestra/fs patch
- xo-vmdk-to-vhd patch
- xo-web minor
- xo-server patch

View File

@@ -40,6 +40,7 @@
"jest": {
"collectCoverage": true,
"moduleNameMapper": {
"^.": "./src",
"^(@vates/[^/]+)": "$1/src",
"^(@xen-orchestra/[^/]+)": "$1/src",
"^(value-matcher)": "$1/src",