From c955da9bc6d26e7b7db3a84bc79589dc629c4494 Mon Sep 17 00:00:00 2001 From: Julien Fontanet Date: Fri, 11 Oct 2019 14:48:37 +0200 Subject: [PATCH] feat(backups-cli): lowlevel tool to help with backups (#4556) --- .eslintrc.js | 2 +- @xen-orchestra/backups-cli/index.js | 369 ++++++++++++++++++ @xen-orchestra/backups-cli/package.json | 27 ++ packages/vhd-lib/src/createSyntheticStream.js | 19 +- yarn.lock | 14 + 5 files changed, 424 insertions(+), 7 deletions(-) create mode 100755 @xen-orchestra/backups-cli/index.js create mode 100644 @xen-orchestra/backups-cli/package.json diff --git a/.eslintrc.js b/.eslintrc.js index 74996ff2b..8acfb728d 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -21,7 +21,7 @@ module.exports = { overrides: [ { - files: ['cli.js', '*-cli.js', 'packages/*cli*/**/*.js'], + files: ['cli.js', '*-cli.js', '**/*cli*/**/*.js'], rules: { 'no-console': 'off', }, diff --git a/@xen-orchestra/backups-cli/index.js b/@xen-orchestra/backups-cli/index.js new file mode 100755 index 000000000..4ea7ba4ef --- /dev/null +++ b/@xen-orchestra/backups-cli/index.js @@ -0,0 +1,369 @@ +#!/usr/bin/env node + +const args = process.argv.slice(2) + +if ( + args.length === 0 || + /^(?:-h|--help)$/.test(args[0]) || + args[0] !== 'clean-vms' +) { + console.log('Usage: xo-backups clean-vms [--force] xo-vm-backups/*') + // eslint-disable-next-line no-process-exit + return process.exit(1) +} + +// remove `clean-vms` arg which is the only available command ATM +args.splice(0, 1) + +// only act (ie delete files) if `--force` is present +const force = args[0] === '--force' +if (force) { + args.splice(0, 1) +} + +// ----------------------------------------------------------------------------- + +const assert = require('assert') +const lockfile = require('proper-lockfile') +const { default: Vhd } = require('vhd-lib') +const { curryRight, flatten } = require('lodash') +const { dirname, resolve } = require('path') +const { DISK_TYPE_DIFFERENCING } = require('vhd-lib/dist/_constants') +const { pipe, promisifyAll } = require('promise-toolbox') + +const fs = promisifyAll(require('fs')) +const handler = require('@xen-orchestra/fs').getHandler({ url: 'file://' }) + +// ----------------------------------------------------------------------------- + +const asyncMap = curryRight((iterable, fn) => + Promise.all( + Array.isArray(iterable) ? iterable.map(fn) : Array.from(iterable, fn) + ) +) + +const filter = (...args) => thisArg => thisArg.filter(...args) + +// TODO: better check? + +// our heuristic is not good enough, there has been some false positives +// (detected as invalid by us but valid by `tar` and imported with success), +// either: +// - these files were normal but the check is incorrect +// - these files were invalid but without data loss +// - these files were invalid but with silent data loss +// +// FIXME: the heuristic does not work if the XVA is compressed, we need to +// implement a specific test for it +// +// maybe reading the end of the file looking for a file named +// /^Ref:\d+/\d+\.checksum$/ and then validating the tar structure from it +// +// https://github.com/npm/node-tar/issues/234#issuecomment-538190295 +const isValidTar = async path => { + try { + const fd = await fs.open(path, 'r') + try { + const { size } = await fs.fstat(fd) + if (size <= 1024 || size % 512 !== 0) { + return false + } + + const buf = Buffer.allocUnsafe(1024) + assert.strictEqual( + await fs.read(fd, buf, 0, buf.length, size - buf.length), + buf.length + ) + return buf.every(_ => _ === 0) + } finally { + fs.close(fd).catch(noop) + } + } catch (error) { + // never throw, log and report as valid to avoid side effects + console.error('isValidTar', path, error) + return true + } +} + +const noop = Function.prototype + +const readDir = path => + fs.readdir(path).then(entries => { + entries.forEach((entry, i) => { + entries[i] = `${path}/${entry}` + }) + + return entries + }) + +// ----------------------------------------------------------------------------- + +// chain is an array of VHDs from child to parent +// +// the whole chain will be merged into parent, parent will be renamed to child +// and all the others will deleted +async function mergeVhdChain(chain) { + assert(chain.length >= 2) + + const child = chain[0] + const parent = chain[chain.length - 1] + const children = chain.slice(0, -1).reverse() + + console.warn('Unused parents of VHD', child) + chain + .slice(1) + .reverse() + .forEach(parent => { + console.warn(' ', parent) + }) + force && console.warn(' merging…') + console.warn('') + if (force) { + // `mergeVhd` does not work with a stream, either + // - make it accept a stream + // - or create synthetic VHD which is not a stream + return console.warn('TODO: implement merge') + // await mergeVhd( + // handler, + // parent, + // handler, + // children.length === 1 + // ? child + // : await createSyntheticStream(handler, children) + // ) + } + + await Promise.all([ + force && fs.rename(parent, child), + asyncMap(children.slice(0, -1), child => { + console.warn('Unused VHD', child) + force && console.warn(' deleting…') + console.warn('') + return force && handler.unlink(child) + }), + ]) +} + +const listVhds = pipe([ + vmDir => vmDir + '/vdis', + readDir, + asyncMap(readDir), + flatten, + asyncMap(readDir), + flatten, + filter(_ => _.endsWith('.vhd')), +]) + +async function handleVm(vmDir) { + const vhds = new Set() + const vhdParents = { __proto__: null } + const vhdChildren = { __proto__: null } + + // remove broken VHDs + await asyncMap(await listVhds(vmDir), async path => { + try { + const vhd = new Vhd(handler, path) + await vhd.readHeaderAndFooter() + vhds.add(path) + if (vhd.footer.diskType === DISK_TYPE_DIFFERENCING) { + const parent = resolve(dirname(path), vhd.header.parentUnicodeName) + vhdParents[path] = parent + if (parent in vhdChildren) { + const error = new Error( + 'this script does not support multiple VHD children' + ) + error.parent = parent + error.child1 = vhdChildren[parent] + error.child2 = path + throw error // should we throw? + } + vhdChildren[parent] = path + } + } catch (error) { + console.warn('Error while checking VHD', path) + console.warn(' ', error) + if (error != null && error.code === 'ERR_ASSERTION') { + force && console.warn(' deleting…') + console.warn('') + force && (await handler.unlink(path)) + } + } + }) + + // remove VHDs with missing ancestors + { + const deletions = [] + + // return true if the VHD has been deleted or is missing + const deleteIfOrphan = vhd => { + const parent = vhdParents[vhd] + if (parent === undefined) { + return + } + + // no longer needs to be checked + delete vhdParents[vhd] + + deleteIfOrphan(parent) + + if (!vhds.has(parent)) { + vhds.delete(vhd) + + console.warn('Error while checking VHD', vhd) + console.warn(' missing parent', parent) + force && console.warn(' deleting…') + console.warn('') + force && deletions.push(handler.unlink(vhd)) + } + } + + // > A property that is deleted before it has been visited will not be + // > visited later. + // > + // > -- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...in#Deleted_added_or_modified_properties + for (const child in vhdParents) { + deleteIfOrphan(child) + } + + await Promise.all(deletions) + } + + const [jsons, xvas] = await readDir(vmDir).then(entries => [ + entries.filter(_ => _.endsWith('.json')), + new Set(entries.filter(_ => _.endsWith('.xva'))), + ]) + + await asyncMap(xvas, async path => { + // check is not good enough to delete the file, the best we can do is report + // it + if (!(await isValidTar(path))) { + console.warn('Potential broken XVA', path) + console.warn('') + } + }) + + const unusedVhds = new Set(vhds) + const unusedXvas = new Set(xvas) + + // compile the list of unused XVAs and VHDs, and remove backup metadata which + // reference a missing XVA/VHD + await asyncMap(jsons, async json => { + const metadata = JSON.parse(await fs.readFile(json)) + const { mode } = metadata + if (mode === 'full') { + const linkedXva = resolve(vmDir, metadata.xva) + + if (xvas.has(linkedXva)) { + unusedXvas.delete(linkedXva) + } else { + console.warn('Error while checking backup', json) + console.warn(' missing file', linkedXva) + force && console.warn(' deleting…') + console.warn('') + force && (await handler.unlink(json)) + } + } else if (mode === 'delta') { + const linkedVhds = (() => { + const { vhds } = metadata + return Object.keys(vhds).map(key => resolve(vmDir, vhds[key])) + })() + + // FIXME: find better approach by keeping as much of the backup as + // possible (existing disks) even if one disk is missing + if (linkedVhds.every(_ => vhds.has(_))) { + linkedVhds.forEach(_ => unusedVhds.delete(_)) + } else { + console.warn('Error while checking backup', json) + const missingVhds = linkedVhds.filter(_ => !vhds.has(_)) + console.warn( + ' %i/%i missing VHDs', + missingVhds.length, + linkedVhds.length + ) + missingVhds.forEach(vhd => { + console.warn(' ', vhd) + }) + force && console.warn(' deleting…') + console.warn('') + force && (await handler.unlink(json)) + } + } + }) + + // TODO: parallelize by vm/job/vdi + const unusedVhdsDeletion = [] + { + // VHD chains (as list from child to ancestor) to merge indexed by last + // ancestor + const vhdChainsToMerge = { __proto__: null } + + const toCheck = new Set(unusedVhds) + + const getUsedChildChainOrDelete = vhd => { + if (vhd in vhdChainsToMerge) { + const chain = vhdChainsToMerge[vhd] + delete vhdChainsToMerge[vhd] + return chain + } + + if (!unusedVhds.has(vhd)) { + return [vhd] + } + + // no longer needs to be checked + toCheck.delete(vhd) + + const child = vhdChildren[vhd] + if (child !== undefined) { + const chain = getUsedChildChainOrDelete(child) + if (chain !== undefined) { + chain.push(vhd) + return chain + } + } + + console.warn('Unused VHD', vhd) + force && console.warn(' deleting…') + console.warn('') + force && unusedVhdsDeletion.push(handler.unlink(vhd)) + } + + toCheck.forEach(vhd => { + vhdChainsToMerge[vhd] = getUsedChildChainOrDelete(vhd) + }) + + Object.keys(vhdChainsToMerge).forEach(key => { + const chain = vhdChainsToMerge[key] + if (chain !== undefined) { + unusedVhdsDeletion.push(mergeVhdChain(chain)) + } + }) + } + + await Promise.all([ + unusedVhdsDeletion, + asyncMap(unusedXvas, path => { + console.warn('Unused XVA', path) + force && console.warn(' deleting…') + console.warn('') + return force && handler.unlink(path) + }), + ]) +} + +// ----------------------------------------------------------------------------- + +asyncMap(args, async vmDir => { + vmDir = resolve(vmDir) + + // TODO: implement this in `xo-server`, not easy because not compatible with + // `@xen-orchestra/fs`. + const release = await lockfile.lock(vmDir) + try { + await handleVm(vmDir) + } catch (error) { + console.error('handleVm', vmDir, error) + } finally { + await release() + } +}).catch(error => console.error('main', error)) diff --git a/@xen-orchestra/backups-cli/package.json b/@xen-orchestra/backups-cli/package.json new file mode 100644 index 000000000..c1b8e76e7 --- /dev/null +++ b/@xen-orchestra/backups-cli/package.json @@ -0,0 +1,27 @@ +{ + "bin": { + "xo-backups": "index.js" + }, + "bugs": "https://github.com/vatesfr/xen-orchestra/issues", + "dependencies": { + "@xen-orchestra/fs": "^0.10.1", + "lodash": "^4.17.15", + "promise-toolbox": "^0.13.0", + "proper-lockfile": "^4.1.1", + "vhd-lib": "^0.7.0" + }, + "engines": { + "node": ">=8.16.1" + }, + "homepage": "https://github.com/vatesfr/xen-orchestra/tree/master/@xen-orchestra/backups-cli", + "name": "@xen-orchestra/backups-cli", + "repository": { + "directory": "@xen-orchestra/backups-cli", + "type": "git", + "url": "https://github.com/vatesfr/xen-orchestra.git" + }, + "scripts": { + "postversion": "npm publish --access public" + }, + "version": "0.0.0" +} diff --git a/packages/vhd-lib/src/createSyntheticStream.js b/packages/vhd-lib/src/createSyntheticStream.js index beaffd172..8b5a3871c 100644 --- a/packages/vhd-lib/src/createSyntheticStream.js +++ b/packages/vhd-lib/src/createSyntheticStream.js @@ -13,7 +13,7 @@ import { import { fuFooter, fuHeader, checksumStruct } from './_structs' import { test as mapTestBit } from './_bitmap' -export default async function createSyntheticStream(handler, path) { +export default async function createSyntheticStream(handler, paths) { const fds = [] const cleanup = () => { for (let i = 0, n = fds.length; i < n; ++i) { @@ -24,7 +24,7 @@ export default async function createSyntheticStream(handler, path) { } try { const vhds = [] - while (true) { + const open = async path => { const fd = await handler.openFile(path, 'r') fds.push(fd) const vhd = new Vhd(handler, fd) @@ -32,11 +32,18 @@ export default async function createSyntheticStream(handler, path) { await vhd.readHeaderAndFooter() await vhd.readBlockAllocationTable() - if (vhd.footer.diskType === DISK_TYPE_DYNAMIC) { - break + return vhd + } + if (typeof paths === 'string') { + let path = paths + let vhd + while ((vhd = await open(path)).footer.diskType !== DISK_TYPE_DYNAMIC) { + path = resolveRelativeFromFile(path, vhd.header.parentUnicodeName) + } + } else { + for (const path of paths) { + await open(path) } - - path = resolveRelativeFromFile(path, vhd.header.parentUnicodeName) } const nVhds = vhds.length diff --git a/yarn.lock b/yarn.lock index 295786470..3b8d6c8d2 100644 --- a/yarn.lock +++ b/yarn.lock @@ -11165,6 +11165,15 @@ prop-types@^15.5.10, prop-types@^15.5.4, prop-types@^15.5.6, prop-types@^15.5.7, object-assign "^4.1.1" react-is "^16.8.1" +proper-lockfile@^4.1.1: + version "4.1.1" + resolved "https://registry.yarnpkg.com/proper-lockfile/-/proper-lockfile-4.1.1.tgz#284cf9db9e30a90e647afad69deb7cb06881262c" + integrity sha512-1w6rxXodisVpn7QYvLk706mzprPTAPCYAqxMvctmPN3ekuRk/kuGkGc82pangZiAt4R3lwSuUzheTTn0/Yb7Zg== + dependencies: + graceful-fs "^4.1.11" + retry "^0.12.0" + signal-exit "^3.0.2" + protocol-buffers-encodings@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/protocol-buffers-encodings/-/protocol-buffers-encodings-1.1.0.tgz#f3905631106669b85381bad47a336add7d206873" @@ -12408,6 +12417,11 @@ ret@~0.1.10: resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc" integrity sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg== +retry@^0.12.0: + version "0.12.0" + resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b" + integrity sha1-G0KmJmoh8HQh0bC1S33BZ7AcATs= + reusify@^1.0.0: version "1.0.4" resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76"