fix(import/ova): speedup the import of gziped vmdk disks nested in .ova (#5275)

This the followup to #5085

Avoid unzipping the entire file from the beginning before each read.
The test case when from 10min down to 26 seconds.

When reading a block from the gzipped file, we keep the current state in memory, if the next read happens at an offset greater than the previous read, we just carry one decompressing the file until the desired position.

The previous code would decompress from the start of the file for every read operation.
This commit is contained in:
Nicolas Raynaud 2020-09-28 15:42:55 +02:00 committed by GitHub
parent f755365e23
commit aed09b152a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 49 additions and 36 deletions

View File

@ -22,6 +22,7 @@
- [API] Fix `this.removeSubjectFromResourceSet is not a function` error on calling `resourceSet.removeSubject` via `xo-cli` [#5265](https://github.com/vatesfr/xen-orchestra/issues/5265) (PR [#5266](https://github.com/vatesfr/xen-orchestra/pull/5266)) - [API] Fix `this.removeSubjectFromResourceSet is not a function` error on calling `resourceSet.removeSubject` via `xo-cli` [#5265](https://github.com/vatesfr/xen-orchestra/issues/5265) (PR [#5266](https://github.com/vatesfr/xen-orchestra/pull/5266))
- [Import OVA] Fix frozen UI when dropping a big OVA on the page (PR [#5274](https://github.com/vatesfr/xen-orchestra/pull/5274)) - [Import OVA] Fix frozen UI when dropping a big OVA on the page (PR [#5274](https://github.com/vatesfr/xen-orchestra/pull/5274))
- [Remotes/S3] Fix S3 backup of 50GB+ files [#5197](https://github.com/vatesfr/xen-orchestra/issues/5197) (PR[ #5242](https://github.com/vatesfr/xen-orchestra/pull/5242) ) - [Remotes/S3] Fix S3 backup of 50GB+ files [#5197](https://github.com/vatesfr/xen-orchestra/issues/5197) (PR[ #5242](https://github.com/vatesfr/xen-orchestra/pull/5242) )
- [Import OVA] Improve import speed of embedded gzipped VMDK disks (PR [#5275](https://github.com/vatesfr/xen-orchestra/pull/5275))
- [Remotes] Fix editing bucket and directory for S3 remotes [#5233](https://github.com/vatesfr/xen-orchestra/issues/5233) (PR [5276](https://github.com/vatesfr/xen-orchestra/pull/5276)) - [Remotes] Fix editing bucket and directory for S3 remotes [#5233](https://github.com/vatesfr/xen-orchestra/issues/5233) (PR [5276](https://github.com/vatesfr/xen-orchestra/pull/5276))
### Packages to release ### Packages to release

View File

@ -1,3 +1,4 @@
import assert from 'assert'
import find from 'lodash/find' import find from 'lodash/find'
import forEach from 'lodash/forEach' import forEach from 'lodash/forEach'
import pako from 'pako' import pako from 'pako'
@ -95,7 +96,8 @@ function parseTarHeader(header, stringDeserializer) {
const sizeBuffer = header.slice(124, 124 + 12) const sizeBuffer = header.slice(124, 124 + 12)
// size encoding: https://codeistry.wordpress.com/2014/08/14/how-to-parse-a-tar-file/ // size encoding: https://codeistry.wordpress.com/2014/08/14/how-to-parse-a-tar-file/
let fileSize = 0 let fileSize = 0
// If the leading byte is 0x80 (128), the non-leading bytes of the field are concatenated in big-endian order, with the result being a positive number expressed in binary form. // If the leading byte is 0x80 (128), the non-leading bytes of the field are concatenated in big-endian order,
// with the result being a positive number expressed in binary form.
// //
// Source: https://www.gnu.org/software/tar/manual/html_node/Extensions.html // Source: https://www.gnu.org/software/tar/manual/html_node/Extensions.html
if (new Uint8Array(sizeBuffer)[0] === 128) { if (new Uint8Array(sizeBuffer)[0] === 128) {
@ -218,41 +220,6 @@ async function parseOVF(fileFragment, stringDeserializer) {
const GZIP_CHUNK_SIZE = 4 * 1024 * 1024 const GZIP_CHUNK_SIZE = 4 * 1024 * 1024
async function parseGzipFromStart(start, end, fileSlice) {
let currentDeflatedPos = 0
let currentInflatedPos = 0
const inflate = new pako.Inflate()
const chunks = []
while (currentInflatedPos < end) {
const slice = fileSlice.slice(
currentDeflatedPos,
currentDeflatedPos + GZIP_CHUNK_SIZE
)
const compressed = await slice.read()
inflate.push(compressed, pako.Z_SYNC_FLUSH)
let chunk = inflate.result
const inflatedChunkEnd = currentInflatedPos + chunk.length
if (inflatedChunkEnd > start) {
if (currentInflatedPos < start) {
chunk = chunk.slice(start - currentInflatedPos)
}
if (inflatedChunkEnd > end) {
chunk = chunk.slice(0, -(inflatedChunkEnd - end))
}
chunks.push(chunk)
}
currentInflatedPos = inflatedChunkEnd
currentDeflatedPos += GZIP_CHUNK_SIZE
}
const resultBuffer = new Uint8Array(sum(chunks.map(c => c.length)))
let index = 0
chunks.forEach(c => {
resultBuffer.set(c, index)
index += c.length
})
return resultBuffer.buffer
}
// start and end are negative numbers // start and end are negative numbers
// used with streamOptimized format where only the footer has the directory address filled // used with streamOptimized format where only the footer has the directory address filled
async function parseGzipFromEnd(start, end, fileSlice, header) { async function parseGzipFromEnd(start, end, fileSlice, header) {
@ -335,7 +302,52 @@ export async function parseOVAFile(
} }
if (!skipVmdk && header.fileName.toLowerCase().endsWith('.vmdk.gz')) { if (!skipVmdk && header.fileName.toLowerCase().endsWith('.vmdk.gz')) {
const fileSlice = parsableFile.slice(offset, offset + header.fileSize) const fileSlice = parsableFile.slice(offset, offset + header.fileSize)
let forwardsInflater = new pako.Inflate()
const readFile = async (start, end) => { const readFile = async (start, end) => {
// if next read is further down the stream than previous read, re-uses the previous zstream
async function parseGzipFromStart(start, end, fileSlice) {
const chunks = []
const resultStart = () =>
forwardsInflater.strm.total_out - forwardsInflater.result.length
if (forwardsInflater.result != null && start < resultStart()) {
// the block we are reading starts before the last decompressed chunk, reset stream
forwardsInflater = new pako.Inflate()
}
let isLast = false
while (true) {
if (forwardsInflater.strm.total_out > start) {
let chunk = forwardsInflater.result
if (resultStart() < start) {
chunk = chunk.slice(start - resultStart())
}
if (forwardsInflater.strm.total_out > end) {
chunk = chunk.slice(0, -(forwardsInflater.strm.total_out - end))
isLast = true
}
chunks.push(chunk)
}
if (isLast) {
// don't move the stream forwards if we took our last chunk
// gives the next read operation an opportunity to read from the same position
break
}
const slice = fileSlice.slice(
forwardsInflater.strm.total_in,
forwardsInflater.strm.total_in + GZIP_CHUNK_SIZE
)
forwardsInflater.push(await slice.read(), pako.Z_SYNC_FLUSH)
}
const resultBuffer = new Uint8Array(sum(chunks.map(c => c.length)))
let index = 0
chunks.forEach(c => {
resultBuffer.set(c, index)
index += c.length
})
assert.strictEqual(resultBuffer.buffer.byteLength, end - start)
return resultBuffer.buffer
}
if (start === end) { if (start === end) {
return new Uint8Array(0) return new Uint8Array(0)
} }