fix(import/ova): speedup the import of gziped vmdk disks nested in .ova (#5275)

This the followup to #5085 Avoid unzipping the entire file from the beginning before each read. The test case when from 10min down to 26 seconds. When reading a block from the gzipped file, we keep the current state in memory, if the next read happens at an offset greater than the previous read, we just carry one decompressing the file until the desired position. The previous code would decompress from the start of the file for every read operation.
2020-09-28 15:42:55 +02:00 · 2020-09-28 15:42:55 +02:00 · aed09b152a
commit aed09b152a
parent f755365e23
2 changed files with 49 additions and 36 deletions
--- a/CHANGELOG.unreleased.md
+++ b/CHANGELOG.unreleased.md
@ -22,6 +22,7 @@
 - [API] Fix `this.removeSubjectFromResourceSet is not a function` error on calling `resourceSet.removeSubject` via `xo-cli` [#5265](https://github.com/vatesfr/xen-orchestra/issues/5265) (PR [#5266](https://github.com/vatesfr/xen-orchestra/pull/5266))
 - [Import OVA] Fix frozen UI when dropping a big OVA on the page (PR [#5274](https://github.com/vatesfr/xen-orchestra/pull/5274))
 - [Remotes/S3] Fix S3 backup of 50GB+ files [#5197](https://github.com/vatesfr/xen-orchestra/issues/5197) (PR[ #5242](https://github.com/vatesfr/xen-orchestra/pull/5242) )
 - [Import OVA] Improve import speed of embedded gzipped VMDK disks (PR [#5275](https://github.com/vatesfr/xen-orchestra/pull/5275))
 - [Remotes] Fix editing bucket and directory for S3 remotes [#5233](https://github.com/vatesfr/xen-orchestra/issues/5233) (PR [5276](https://github.com/vatesfr/xen-orchestra/pull/5276))
 ### Packages to release
--- a/packages/xo-vmdk-to-vhd/src/ova.js
+++ b/packages/xo-vmdk-to-vhd/src/ova.js
@ -1,3 +1,4 @@
 import assert from 'assert'
 import find from 'lodash/find'
 import forEach from 'lodash/forEach'
 import pako from 'pako'
@ -95,7 +96,8 @@ function parseTarHeader(header, stringDeserializer) {
  const sizeBuffer = header.slice(124, 124 + 12)
  // size encoding: https://codeistry.wordpress.com/2014/08/14/how-to-parse-a-tar-file/
  let fileSize = 0
-  // If the leading byte is 0x80 (128), the non-leading bytes of the field are concatenated in big-endian order, with the result being a positive number expressed in binary form.
+  // If the leading byte is 0x80 (128), the non-leading bytes of the field are concatenated in big-endian order,
  // with the result being a positive number expressed in binary form.
  //
  // Source: https://www.gnu.org/software/tar/manual/html_node/Extensions.html
  if (new Uint8Array(sizeBuffer)[0] === 128) {
@ -218,41 +220,6 @@ async function parseOVF(fileFragment, stringDeserializer) {
 const GZIP_CHUNK_SIZE = 4 * 1024 * 1024
 async function parseGzipFromStart(start, end, fileSlice) {
  let currentDeflatedPos = 0
  let currentInflatedPos = 0
  const inflate = new pako.Inflate()
  const chunks = []
  while (currentInflatedPos < end) {
    const slice = fileSlice.slice(
      currentDeflatedPos,
      currentDeflatedPos + GZIP_CHUNK_SIZE
    )
    const compressed = await slice.read()
    inflate.push(compressed, pako.Z_SYNC_FLUSH)
    let chunk = inflate.result
    const inflatedChunkEnd = currentInflatedPos + chunk.length
    if (inflatedChunkEnd > start) {
      if (currentInflatedPos < start) {
        chunk = chunk.slice(start - currentInflatedPos)
      }
      if (inflatedChunkEnd > end) {
        chunk = chunk.slice(0, -(inflatedChunkEnd - end))
      }
      chunks.push(chunk)
    }
    currentInflatedPos = inflatedChunkEnd
    currentDeflatedPos += GZIP_CHUNK_SIZE
  }
  const resultBuffer = new Uint8Array(sum(chunks.map(c => c.length)))
  let index = 0
  chunks.forEach(c => {
    resultBuffer.set(c, index)
    index += c.length
  })
  return resultBuffer.buffer
 }
 // start and end are negative numbers
 // used with streamOptimized format where only the footer has the directory address filled
 async function parseGzipFromEnd(start, end, fileSlice, header) {
@ -335,7 +302,52 @@ export async function parseOVAFile(
    }
    if (!skipVmdk && header.fileName.toLowerCase().endsWith('.vmdk.gz')) {
      const fileSlice = parsableFile.slice(offset, offset + header.fileSize)
      let forwardsInflater = new pako.Inflate()
      const readFile = async (start, end) => {
        // if next read is further down the stream than previous read, re-uses the previous zstream
        async function parseGzipFromStart(start, end, fileSlice) {
          const chunks = []
          const resultStart = () =>
            forwardsInflater.strm.total_out - forwardsInflater.result.length
          if (forwardsInflater.result != null && start < resultStart()) {
            // the block we are reading starts before the last decompressed chunk, reset stream
            forwardsInflater = new pako.Inflate()
          }
          let isLast = false
          while (true) {
            if (forwardsInflater.strm.total_out > start) {
              let chunk = forwardsInflater.result
              if (resultStart() < start) {
                chunk = chunk.slice(start - resultStart())
              }
              if (forwardsInflater.strm.total_out > end) {
                chunk = chunk.slice(0, -(forwardsInflater.strm.total_out - end))
                isLast = true
              }
              chunks.push(chunk)
            }
            if (isLast) {
              // don't move the stream forwards if we took our last chunk
              // gives the next read operation an opportunity to read from the same position
              break
            }
            const slice = fileSlice.slice(
              forwardsInflater.strm.total_in,
              forwardsInflater.strm.total_in + GZIP_CHUNK_SIZE
            )
            forwardsInflater.push(await slice.read(), pako.Z_SYNC_FLUSH)
          }
          const resultBuffer = new Uint8Array(sum(chunks.map(c => c.length)))
          let index = 0
          chunks.forEach(c => {
            resultBuffer.set(c, index)
            index += c.length
          })
          assert.strictEqual(resultBuffer.buffer.byteLength, end - start)
          return resultBuffer.buffer
        }
        if (start === end) {
          return new Uint8Array(0)
        }