From 152cf09b7e8aed79d998a6de6d2abd219044d83a Mon Sep 17 00:00:00 2001 From: Florent BEAUCHAMP Date: Thu, 27 Jul 2023 17:15:29 +0200 Subject: [PATCH] feat(vmware-explorer): handle sesparse files (#6909) --- .../vmware-explorer/VhdEsxiSeSparse.mjs | 316 ++++++++++-------- .../vmware-explorer/openDeltaVmdkAsVhd.mjs | 7 +- CHANGELOG.unreleased.md | 1 + 3 files changed, 173 insertions(+), 151 deletions(-) diff --git a/@xen-orchestra/vmware-explorer/VhdEsxiSeSparse.mjs b/@xen-orchestra/vmware-explorer/VhdEsxiSeSparse.mjs index dfbbe09c7..e877fbc3e 100644 --- a/@xen-orchestra/vmware-explorer/VhdEsxiSeSparse.mjs +++ b/@xen-orchestra/vmware-explorer/VhdEsxiSeSparse.mjs @@ -1,18 +1,54 @@ import _computeGeometryForSize from 'vhd-lib/_computeGeometryForSize.js' import { createFooter, createHeader } from 'vhd-lib/_createFooterHeader.js' -import { FOOTER_SIZE } from 'vhd-lib/_constants.js' +import { DISK_TYPES, FOOTER_SIZE } from 'vhd-lib/_constants.js' import { notEqual, strictEqual } from 'node:assert' import { unpackFooter, unpackHeader } from 'vhd-lib/Vhd/_utils.js' import { VhdAbstract } from 'vhd-lib' -// from https://github.com/qemu/qemu/commit/98eb9733f4cf2eeab6d12db7e758665d2fd5367b# +// one big difference with the other versions of VMDK is that the grain tables are actually sparse, they are pre-allocated but not used in grain order, +// so we have to read the grain directory to know where to find the grain tables -function readInt64(buffer, index) { - const n = buffer.readBigInt64LE(index * 8 /* size of an int64 in bytes */) - if (n > Number.MAX_SAFE_INTEGER) { +const SE_SPARSE_DIR_NON_ALLOCATED = 0 +const SE_SPARSE_DIR_ALLOCATED = 1 + +const SE_SPARSE_GRAIN_NON_ALLOCATED = 0 // check in parent +const SE_SPARSE_GRAIN_UNMAPPED = 1 // grain has been unmapped, but index of previous grain still readable for reclamation +const SE_SPARSE_GRAIN_ZERO = 2 +const SE_SPARSE_GRAIN_ALLOCATED = 3 + +const VHD_BLOCK_SIZE_BYTES = 2 * 1024 * 1024 +const GRAIN_SIZE_BYTES = 4 * 1024 +const GRAIN_TABLE_COUNT = 4 * 1024 + +const ones = n => (1n << BigInt(n)) - 1n + +function asNumber(n) { + if (n > Number.MAX_SAFE_INTEGER) throw new Error(`can't handle ${n} ${Number.MAX_SAFE_INTEGER} ${n & 0x00000000ffffffffn}`) - } - return +n + return Number(n) +} + +const readInt64 = (buffer, index) => asNumber(buffer.readBigInt64LE(index * 8)) + +/** + * @returns {{topNibble: number, low60: bigint}} topNibble is the first 4 bits of the 64 bits entry, indexPart is the remaining 60 bits + */ +function readTaggedEntry(buffer, index) { + const entry = buffer.readBigInt64LE(index * 8) + return { topNibble: Number(entry >> 60n), low60: entry & ones(60) } +} + +function readSeSparseDir(buffer, index) { + const { topNibble, low60 } = readTaggedEntry(buffer, index) + return { type: topNibble, tableIndex: asNumber(low60) } +} + +function readSeSparseTable(buffer, index) { + const { topNibble, low60 } = readTaggedEntry(buffer, index) + // https://lists.gnu.org/archive/html/qemu-block/2019-06/msg00934.html + const topIndexPart = low60 >> 48n // bring the top 12 bits down + const bottomIndexPart = (low60 & ones(48)) << 12n // bring the bottom 48 bits up + return { type: topNibble, grainIndex: asNumber(bottomIndexPart | topIndexPart) } } export default class VhdEsxiSeSparse extends VhdAbstract { @@ -25,27 +61,22 @@ export default class VhdEsxiSeSparse extends VhdAbstract { #header #footer - #grainDirectory - // as we will read all grain with data with load everything in memory - // in theory , that can be 512MB of data for a 2TB fully allocated - // but our use case is to transfer a relatively small diff - // and random access is expensive in HTTP, and migration is a one time cors - // so let's go with naive approach, and future me will have to handle a more - // clever approach if necessary - // grain at zero won't be stored + #grainIndex // Map blockId => [] - #grainMap = new Map() - - #grainSize - #grainTableSize - #grainTableOffset - #grainOffset + #grainDirOffsetBytes + #grainDirSizeBytes + #grainTableOffsetBytes + #grainOffsetBytes static async open(esxi, datastore, path, parentVhd, opts) { const vhd = new VhdEsxiSeSparse(esxi, datastore, path, parentVhd, opts) await vhd.readHeaderAndFooter() return vhd } + + get path() { + return this.#path + } constructor(esxi, datastore, path, parentVhd, { lookMissingBlockInParent = true } = {}) { super() this.#esxi = esxi @@ -63,156 +94,149 @@ export default class VhdEsxiSeSparse extends VhdAbstract { return this.#footer } - async #readGrain(start, length = 4 * 1024) { - return (await this.#esxi.download(this.#datastore, this.#path, `${start}-${start + length - 1}`)).buffer() - } - containsBlock(blockId) { - notEqual(this.#grainDirectory, undefined, "bat must be loaded to use contain blocks'") - - // a grain table is 4096 entries of 4KB - // a grain table cover 8 vhd blocks - // grain table always exists in sespars - - // depending on the paramters we also look into the parent data + notEqual(this.#grainIndex, undefined, "bat must be loaded to use contain blocks'") return ( - this.#grainDirectory.readInt32LE(blockId * 4) !== 0 || + this.#grainIndex.get(blockId) !== undefined || (this.#lookMissingBlockInParent && this.#parentVhd.containsBlock(blockId)) ) } - async #read(start, end) { - return (await this.#esxi.download(this.#datastore, this.#path, `${start}-${end}`)).buffer() + async #read(start, length) { + const buffer = await ( + await this.#esxi.download(this.#datastore, this.#path, `${start}-${start + length - 1}`) + ).buffer() + strictEqual(buffer.length, length) + return buffer } async readHeaderAndFooter() { - const buffer = await this.#read(0, 2048) - strictEqual(buffer.readBigInt64LE(0), 0xcafebaben) + const vmdkHeaderBuffer = await this.#read(0, 2048) - strictEqual(readInt64(buffer, 1), 0x200000001) // version 2.1 + strictEqual(vmdkHeaderBuffer.readBigInt64LE(0), 0xcafebaben) + strictEqual(readInt64(vmdkHeaderBuffer, 1), 0x200000001) // version 2.1 - const capacity = readInt64(buffer, 2) - const grain_size = readInt64(buffer, 3) + this.#grainDirOffsetBytes = readInt64(vmdkHeaderBuffer, 16) * 512 + // console.log('grainDirOffsetBytes', this.#grainDirOffsetBytes) + this.#grainDirSizeBytes = readInt64(vmdkHeaderBuffer, 17) * 512 + // console.log('grainDirSizeBytes', this.#grainDirSizeBytes) - const grain_tables_offset = readInt64(buffer, 18) - const grain_tables_size = readInt64(buffer, 19) - this.#grainOffset = readInt64(buffer, 24) + const grainSizeSectors = readInt64(vmdkHeaderBuffer, 3) + const grainSizeBytes = grainSizeSectors * 512 // 8 sectors = 4KB default + strictEqual(grainSizeBytes, GRAIN_SIZE_BYTES) // we only support default grain size - this.#grainSize = grain_size * 512 // 8 sectors / 4KB default - this.#grainTableOffset = grain_tables_offset * 512 - this.#grainTableSize = grain_tables_size * 512 + this.#grainTableOffsetBytes = readInt64(vmdkHeaderBuffer, 18) * 512 + // console.log('grainTableOffsetBytes', this.#grainTableOffsetBytes) - const size = capacity * grain_size * 512 - this.#header = unpackHeader(createHeader(Math.ceil(size / (4096 * 512)))) - const geometry = _computeGeometryForSize(size) - const actualSize = geometry.actualSize + const grainTableCount = (readInt64(vmdkHeaderBuffer, 4) * 512) / 8 // count is the number of 64b entries in each tables + // console.log('grainTableCount', grainTableCount) + strictEqual(grainTableCount, GRAIN_TABLE_COUNT) // we only support tables of 4096 entries (default) + + this.#grainOffsetBytes = readInt64(vmdkHeaderBuffer, 24) * 512 + // console.log('grainOffsetBytes', this.#grainOffsetBytes) + + const sizeBytes = readInt64(vmdkHeaderBuffer, 2) * 512 + // console.log('sizeBytes', sizeBytes) + + const nbBlocks = Math.ceil(sizeBytes / VHD_BLOCK_SIZE_BYTES) + this.#header = unpackHeader(createHeader(nbBlocks)) + const geometry = _computeGeometryForSize(sizeBytes) this.#footer = unpackFooter( - createFooter(actualSize, Math.floor(Date.now() / 1000), geometry, FOOTER_SIZE, this.#parentVhd.footer.diskType) + createFooter(sizeBytes, Math.floor(Date.now() / 1000), geometry, FOOTER_SIZE, DISK_TYPES.DYNAMIC) ) } async readBlockAllocationTable() { - const CHUNK_SIZE = 64 * 512 + this.#grainIndex = new Map() - strictEqual(this.#grainTableSize % CHUNK_SIZE, 0) - - for (let chunkIndex = 0, grainIndex = 0; chunkIndex < this.#grainTableSize / CHUNK_SIZE; chunkIndex++) { - process.stdin.write('.') - const start = chunkIndex * CHUNK_SIZE + this.#grainTableOffset - const end = start + 4096 * 8 - 1 - const buffer = await this.#read(start, end) - for (let indexInChunk = 0; indexInChunk < 4096; indexInChunk++) { - const entry = buffer.readBigInt64LE(indexInChunk * 8) - switch (entry) { - case 0n: // not allocated, go to parent - break - case 1n: // unmapped - break - } - if (entry > 3n) { - this.#grainMap.set(grainIndex) - grainIndex++ - } - } - } - - // read grain directory and the grain tables - const nbBlocks = this.header.maxTableEntries - this.#grainDirectory = await this.#read(2048 /* header length */, 2048 + nbBlocks * 4 - 1) - } - - // we're lucky : a grain address can address exacty a full block - async readBlock(blockId) { - notEqual(this.#grainDirectory, undefined, 'grainDirectory is not loaded') - const sectorOffset = this.#grainDirectory.readInt32LE(blockId * 4) - - const buffer = (await this.#parentVhd.readBlock(blockId)).buffer - - if (sectorOffset === 0) { - strictEqual(this.#lookMissingBlockInParent, true, "shouldn't have empty block in a delta alone") - return { - id: blockId, - bitmap: buffer.slice(0, 512), - data: buffer.slice(512), - buffer, - } - } - const offset = sectorOffset * 512 - - const graintable = await this.#read(offset, offset + 4096 * 4 /* grain table length */ - 1) - - strictEqual(graintable.length, 4096 * 4) - // we have no guaranty that data are order or contiguous - // let's construct ranges to limit the number of queries - let rangeStart, offsetStart, offsetEnd - - const changeRange = async (index, offset) => { - if (offsetStart !== undefined) { - // if there was a - if (offset === offsetEnd) { - offsetEnd++ - return - } - const grains = await this.#read(offsetStart * 512, offsetEnd * 512 - 1) - grains.copy(buffer, (rangeStart + 1) /* block bitmap */ * 512) - } - if (offset) { - // we're at the beginning of a range present in the file - rangeStart = index - offsetStart = offset - offsetEnd = offset + 1 - } else { - // we're at the beginning of a range from the parent or empty - rangeStart = undefined - offsetStart = undefined - offsetEnd = undefined - } - } - - for (let i = 0; i < graintable.length / 4; i++) { - const grainOffset = graintable.readInt32LE(i * 4) - if (grainOffset === 0) { - await changeRange() - // from parent + const tableSizeBytes = GRAIN_TABLE_COUNT * 8 + const grainDirBuffer = await this.#read(this.#grainDirOffsetBytes, this.#grainDirSizeBytes) + // read the grain dir ( first level ) + for (let grainDirIndex = 0; grainDirIndex < grainDirBuffer.length / 8; grainDirIndex++) { + const { type: grainDirType, tableIndex } = readSeSparseDir(grainDirBuffer, grainDirIndex) + if (grainDirType === SE_SPARSE_DIR_NON_ALLOCATED) { + // no grain table allocated at all in this grain dir continue } - if (grainOffset === 1) { - await changeRange() - // this is a emptied grain, no data, don't look into parent - buffer.fill(0, (i + 1) /* block bitmap */ * 512) + strictEqual(grainDirType, SE_SPARSE_DIR_ALLOCATED) + // read the corresponding grain table ( second level ) + const grainTableBuffer = await this.#read( + this.#grainTableOffsetBytes + tableIndex * tableSizeBytes, + tableSizeBytes + ) + // offset in bytes if >0, grainType if <=0 + let grainOffsets = [] + let blockId = grainDirIndex * 8 + + const addGrain = val => { + grainOffsets.push(val) + // 4096 block of 4Kb per dir entry =>16MB/grain dir + // 1 block = 2MB + // 512 grain => 1 block + // 8 block per dir entry + if (grainOffsets.length === 512) { + this.#grainIndex.set(blockId, grainOffsets) + grainOffsets = [] + blockId++ + } } - if (grainOffset > 1) { - // non empty grain - await changeRange(i, grainOffset) + for (let grainTableIndex = 0; grainTableIndex < grainTableBuffer.length / 8; grainTableIndex++) { + const { type: grainType, grainIndex } = readSeSparseTable(grainTableBuffer, grainTableIndex) + if (grainType === SE_SPARSE_GRAIN_ALLOCATED) { + // this is ok in 32 bits int with VMDK smaller than 2TB + const offsetByte = grainIndex * GRAIN_SIZE_BYTES + this.#grainOffsetBytes + addGrain(offsetByte) + } else { + // multiply by -1 to differenciate type and offset + // no offset can be zero + addGrain(-grainType) + } } - } - await changeRange() - return { - id: blockId, - bitmap: buffer.slice(0, 512), - data: buffer.slice(512), - buffer, + strictEqual(grainOffsets.length, 0) } } + + async readBlock(blockId) { + let changed = false + const parentBlock = await this.#parentVhd.readBlock(blockId) + const parentBuffer = parentBlock.buffer + const grainOffsets = this.#grainIndex.get(blockId) // may be undefined if the child contains block and lookMissingBlockInParent=true + const EMPTY_GRAIN = Buffer.alloc(GRAIN_SIZE_BYTES, 0) + for (const index in grainOffsets) { + const value = grainOffsets[index] + let data + if (value > 0) { + // it's the offset in byte of a grain type SE_SPARSE_GRAIN_ALLOCATED + data = await this.#read(value, GRAIN_SIZE_BYTES) + } else { + // back to the real grain type + const type = value * -1 + switch (type) { + case SE_SPARSE_GRAIN_ZERO: + case SE_SPARSE_GRAIN_UNMAPPED: + data = EMPTY_GRAIN + break + case SE_SPARSE_GRAIN_NON_ALLOCATED: + /* from parent */ + break + default: + throw new Error(`can't handle grain type ${type}`) + } + } + if (data) { + changed = true + data.copy(parentBuffer, index * GRAIN_SIZE_BYTES + 512 /* block bitmap */) + } + } + // no need to copy if data all come from parent + return changed + ? { + id: blockId, + bitmap: parentBuffer.slice(0, 512), + data: parentBuffer.slice(512), + buffer: parentBuffer, + } + : parentBlock + } } diff --git a/@xen-orchestra/vmware-explorer/openDeltaVmdkAsVhd.mjs b/@xen-orchestra/vmware-explorer/openDeltaVmdkAsVhd.mjs index 765dbe717..81bf2939b 100644 --- a/@xen-orchestra/vmware-explorer/openDeltaVmdkAsVhd.mjs +++ b/@xen-orchestra/vmware-explorer/openDeltaVmdkAsVhd.mjs @@ -1,13 +1,10 @@ +import VHDEsxiSeSparse from './VhdEsxiSeSparse.mjs' import VhdEsxiCowd from './VhdEsxiCowd.mjs' -// import VhdEsxiSeSparse from "./VhdEsxiSeSparse.mjs"; export default async function openDeltaVmdkasVhd(esxi, datastore, path, parentVhd, opts) { let vhd if (path.endsWith('-sesparse.vmdk')) { - throw new Error( - `sesparse VMDK reading is not functional yet ${path}. For now, this VM can only be migrated if it doesn't have any snapshots and if it is halted.` - ) - // vhd = new VhdEsxiSeSparse(esxi, datastore, path, parentVhd, opts) + vhd = new VHDEsxiSeSparse(esxi, datastore, path, parentVhd, opts) } else { if (path.endsWith('-delta.vmdk')) { vhd = new VhdEsxiCowd(esxi, datastore, path, parentVhd, opts) diff --git a/CHANGELOG.unreleased.md b/CHANGELOG.unreleased.md index 128a4254f..969890a0c 100644 --- a/CHANGELOG.unreleased.md +++ b/CHANGELOG.unreleased.md @@ -8,6 +8,7 @@ > Users must be able to say: “Nice enhancement, I'm eager to test it” - [Backup/Restore] Button to open the raw log in the REST API (PR [#6936](https://github.com/vatesfr/xen-orchestra/pull/6936)) +- [Import/From VMWare] Support ESXi 6.5+ with snapshot (PR [#6909](https://github.com/vatesfr/xen-orchestra/pull/6909)) - [Netbox] New major version. BREAKING: in order for this new version to work, you need to assign the type `virtualization > vminterface` to the custom field `UUID` in your Netbox instance. [See documentation](https://xen-orchestra.com/docs/advanced.html#netbox). [#6038](https://github.com/vatesfr/xen-orchestra/issues/6038) [#6135](https://github.com/vatesfr/xen-orchestra/issues/6135) [#6024](https://github.com/vatesfr/xen-orchestra/issues/6024) [#6036](https://github.com/vatesfr/xen-orchestra/issues/6036) [Forum#6070](https://xcp-ng.org/forum/topic/6070) [Forum#6149](https://xcp-ng.org/forum/topic/6149) [Forum#6332](https://xcp-ng.org/forum/topic/6332) [Forum#6902](https://xcp-ng.org/forum/topic/6902) (PR [#6950](https://github.com/vatesfr/xen-orchestra/pull/6950)) - Synchronize VM description - Synchronize VM platform