feat(@xen-orchestra/backups#RemoteAdapter): ability to clean broken backups (#5684)

This commit is contained in:
badrAZ 2021-03-24 10:00:47 +01:00 committed by GitHub
parent 0bd09896f3
commit 20f4c952fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 299 additions and 326 deletions

View File

@ -1,312 +1,16 @@
#!/usr/bin/env node
// assigned when options are parsed by the main function
let merge, remove
// -----------------------------------------------------------------------------
const assert = require('assert')
const asyncMap = require('lodash/curryRight')(require('@xen-orchestra/async-map').asyncMap)
const flatten = require('lodash/flatten')
const getopts = require('getopts')
const limitConcurrency = require('limit-concurrency-decorator').default
const lockfile = require('proper-lockfile')
const pipe = require('promise-toolbox/pipe')
const { default: Vhd, mergeVhd } = require('vhd-lib')
const { dirname, resolve } = require('path')
const { DISK_TYPE_DIFFERENCING } = require('vhd-lib/dist/_constants')
const { isValidXva } = require('@xen-orchestra/backups/isValidXva')
const { RemoteAdapter } = require('@xen-orchestra/backups/RemoteAdapter')
const { resolve } = require('path')
const fs = require('../_fs')
const handler = require('@xen-orchestra/fs').getHandler({ url: 'file://' })
// -----------------------------------------------------------------------------
// chain is an array of VHDs from child to parent
//
// the whole chain will be merged into parent, parent will be renamed to child
// and all the others will deleted
const mergeVhdChain = limitConcurrency(1)(async function mergeVhdChain(chain) {
assert(chain.length >= 2)
let child = chain[0]
const parent = chain[chain.length - 1]
const children = chain.slice(0, -1).reverse()
console.warn('Unused parents of VHD', child)
chain
.slice(1)
.reverse()
.forEach(parent => {
console.warn(' ', parent)
})
merge && console.warn(' merging…')
console.warn('')
if (merge) {
// `mergeVhd` does not work with a stream, either
// - make it accept a stream
// - or create synthetic VHD which is not a stream
if (children.length !== 1) {
console.warn('TODO: implement merging multiple children')
children.length = 1
child = children[0]
}
let done, total
const handle = setInterval(() => {
if (done !== undefined) {
console.log('merging %s: %s/%s', child, done, total)
}
}, 10e3)
await mergeVhd(
handler,
parent,
handler,
child,
// children.length === 1
// ? child
// : await createSyntheticStream(handler, children),
{
onProgress({ done: d, total: t }) {
done = d
total = t
},
}
)
clearInterval(handle)
}
await Promise.all([
remove && fs.rename(parent, child),
asyncMap(children.slice(0, -1), child => {
console.warn('Unused VHD', child)
remove && console.warn(' deleting…')
console.warn('')
return remove && handler.unlink(child)
}),
])
})
const listVhds = pipe([
vmDir => vmDir + '/vdis',
fs.readdir2,
asyncMap(fs.readdir2),
flatten,
asyncMap(fs.readdir2),
flatten,
_ => _.filter(_ => _.endsWith('.vhd')),
])
async function handleVm(vmDir) {
const vhds = new Set()
const vhdParents = { __proto__: null }
const vhdChildren = { __proto__: null }
// remove broken VHDs
await asyncMap(await listVhds(vmDir), async path => {
try {
const vhd = new Vhd(handler, path)
await vhd.readHeaderAndFooter()
vhds.add(path)
if (vhd.footer.diskType === DISK_TYPE_DIFFERENCING) {
const parent = resolve(dirname(path), vhd.header.parentUnicodeName)
vhdParents[path] = parent
if (parent in vhdChildren) {
const error = new Error('this script does not support multiple VHD children')
error.parent = parent
error.child1 = vhdChildren[parent]
error.child2 = path
throw error // should we throw?
}
vhdChildren[parent] = path
}
} catch (error) {
console.warn('Error while checking VHD', path)
console.warn(' ', error)
if (error != null && error.code === 'ERR_ASSERTION') {
remove && console.warn(' deleting…')
console.warn('')
remove && (await handler.unlink(path))
}
}
})
// remove VHDs with missing ancestors
{
const deletions = []
// return true if the VHD has been deleted or is missing
const deleteIfOrphan = vhd => {
const parent = vhdParents[vhd]
if (parent === undefined) {
return
}
// no longer needs to be checked
delete vhdParents[vhd]
deleteIfOrphan(parent)
if (!vhds.has(parent)) {
vhds.delete(vhd)
console.warn('Error while checking VHD', vhd)
console.warn(' missing parent', parent)
remove && console.warn(' deleting…')
console.warn('')
remove && deletions.push(handler.unlink(vhd))
}
}
// > A property that is deleted before it has been visited will not be
// > visited later.
// >
// > -- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...in#Deleted_added_or_modified_properties
for (const child in vhdParents) {
deleteIfOrphan(child)
}
await Promise.all(deletions)
}
const [jsons, xvas, xvaSums] = await fs
.readdir2(vmDir)
.then(entries => [
entries.filter(_ => _.endsWith('.json')),
new Set(entries.filter(_ => _.endsWith('.xva'))),
entries.filter(_ => _.endsWith('.xva.cheksum')),
])
await asyncMap(xvas, async path => {
// check is not good enough to delete the file, the best we can do is report
// it
if (!(await isValidXva(path))) {
console.warn('Potential broken XVA', path)
console.warn('')
}
})
const unusedVhds = new Set(vhds)
const unusedXvas = new Set(xvas)
// compile the list of unused XVAs and VHDs, and remove backup metadata which
// reference a missing XVA/VHD
await asyncMap(jsons, async json => {
const metadata = JSON.parse(await fs.readFile(json))
const { mode } = metadata
if (mode === 'full') {
const linkedXva = resolve(vmDir, metadata.xva)
if (xvas.has(linkedXva)) {
unusedXvas.delete(linkedXva)
} else {
console.warn('Error while checking backup', json)
console.warn(' missing file', linkedXva)
remove && console.warn(' deleting…')
console.warn('')
remove && (await handler.unlink(json))
}
} else if (mode === 'delta') {
const linkedVhds = (() => {
const { vhds } = metadata
return Object.keys(vhds).map(key => resolve(vmDir, vhds[key]))
})()
// FIXME: find better approach by keeping as much of the backup as
// possible (existing disks) even if one disk is missing
if (linkedVhds.every(_ => vhds.has(_))) {
linkedVhds.forEach(_ => unusedVhds.delete(_))
} else {
console.warn('Error while checking backup', json)
const missingVhds = linkedVhds.filter(_ => !vhds.has(_))
console.warn(' %i/%i missing VHDs', missingVhds.length, linkedVhds.length)
missingVhds.forEach(vhd => {
console.warn(' ', vhd)
})
remove && console.warn(' deleting…')
console.warn('')
remove && (await handler.unlink(json))
}
}
})
// TODO: parallelize by vm/job/vdi
const unusedVhdsDeletion = []
{
// VHD chains (as list from child to ancestor) to merge indexed by last
// ancestor
const vhdChainsToMerge = { __proto__: null }
const toCheck = new Set(unusedVhds)
const getUsedChildChainOrDelete = vhd => {
if (vhd in vhdChainsToMerge) {
const chain = vhdChainsToMerge[vhd]
delete vhdChainsToMerge[vhd]
return chain
}
if (!unusedVhds.has(vhd)) {
return [vhd]
}
// no longer needs to be checked
toCheck.delete(vhd)
const child = vhdChildren[vhd]
if (child !== undefined) {
const chain = getUsedChildChainOrDelete(child)
if (chain !== undefined) {
chain.push(vhd)
return chain
}
}
console.warn('Unused VHD', vhd)
remove && console.warn(' deleting…')
console.warn('')
remove && unusedVhdsDeletion.push(handler.unlink(vhd))
}
toCheck.forEach(vhd => {
vhdChainsToMerge[vhd] = getUsedChildChainOrDelete(vhd)
})
Object.keys(vhdChainsToMerge).forEach(key => {
const chain = vhdChainsToMerge[key]
if (chain !== undefined) {
unusedVhdsDeletion.push(mergeVhdChain(chain))
}
})
}
await Promise.all([
unusedVhdsDeletion,
asyncMap(unusedXvas, path => {
console.warn('Unused XVA', path)
remove && console.warn(' deleting…')
console.warn('')
return remove && handler.unlink(path)
}),
asyncMap(xvaSums, path => {
// no need to handle checksums for XVAs deleted by the script, they will be handled by `unlink()`
if (!xvas.has(path.slice(0, -'.checksum'.length))) {
console.warn('Unused XVA checksum', path)
remove && console.warn(' deleting…')
console.warn('')
return remove && handler.unlink(path)
}
}),
])
}
// -----------------------------------------------------------------------------
const adapter = new RemoteAdapter(require('@xen-orchestra/fs').getHandler({ url: 'file://' }))
module.exports = async function main(args) {
const opts = getopts(args, {
const { _, remove, merge } = getopts(args, {
alias: {
remove: 'r',
merge: 'm',
@ -318,19 +22,12 @@ module.exports = async function main(args) {
},
})
;({ remove, merge } = opts)
await asyncMap(opts._, async vmDir => {
await asyncMap(_, async vmDir => {
vmDir = resolve(vmDir)
// TODO: implement this in `xo-server`, not easy because not compatible with
// `@xen-orchestra/fs`.
const release = await lockfile.lock(vmDir)
try {
await handleVm(vmDir)
await adapter.cleanVm(vmDir, { remove, merge, onLog: log => console.warn(log) })
} catch (error) {
console.error('handleVm', vmDir, error)
} finally {
await release()
console.error('adapter.cleanVm', vmDir, error)
}
})
}

View File

@ -11,10 +11,8 @@
"@xen-orchestra/fs": "^0.13.1",
"filenamify": "^4.1.0",
"getopts": "^2.2.5",
"limit-concurrency-decorator": "^0.4.0",
"lodash": "^4.17.15",
"promise-toolbox": "^0.17.0",
"proper-lockfile": "^4.1.1",
"vhd-lib": "^1.0.0"
},
"engines": {

View File

@ -13,7 +13,9 @@ const { readdir, stat } = require('fs-extra')
const { ZipFile } = require('yazl')
const { BACKUP_DIR } = require('./_getVmBackupDir')
const { cleanVm } = require('./_cleanVm')
const { getTmpDir } = require('./_getTmpDir')
const { isMetadataFile, isVhdFile } = require('./_backupType')
const { listPartitions, LVM_PARTITION_TYPE } = require('./_listPartitions')
const { lvs, pvs } = require('./_lvm')
@ -27,9 +29,6 @@ const { warn } = createLogger('xo:backups:RemoteAdapter')
const compareTimestamp = (a, b) => a.timestamp - b.timestamp
const isMetadataFile = filename => filename.endsWith('.json')
const isVhdFile = filename => filename.endsWith('.vhd')
const noop = Function.prototype
const resolveRelativeFromFile = (file, path) => resolve('/', dirname(file), path).slice(1)
@ -66,8 +65,8 @@ const debounceResourceFactory = factory =>
return this._debounceResource(factory.apply(this, arguments))
}
exports.RemoteAdapter = class RemoteAdapter {
constructor(handler, { debounceResource, dirMode }) {
class RemoteAdapter {
constructor(handler, { debounceResource = res => res, dirMode } = {}) {
this._debounceResource = debounceResource
this._dirMode = dirMode
this._handler = handler
@ -551,3 +550,9 @@ exports.RemoteAdapter = class RemoteAdapter {
return Object.defineProperty(JSON.parse(await this._handler.readFile(path)), '_filename', { value: path })
}
}
RemoteAdapter.prototype.cleanVm = function (vmDir) {
return Disposable.use(this._handler.lock(vmDir), () => cleanVm.apply(this, arguments))
}
exports.RemoteAdapter = RemoteAdapter

View File

@ -0,0 +1,4 @@
exports.isMetadataFile = filename => filename.endsWith('.json')
exports.isVhdFile = filename => filename.endsWith('.vhd')
exports.isXvaFile = filename => filename.endsWith('.xva')
exports.isXvaSumFile = filename => filename.endsWith('.xva.cheksum')

View File

@ -0,0 +1,277 @@
const assert = require('assert')
const limitConcurrency = require('limit-concurrency-decorator').default
const { asyncMap } = require('@xen-orchestra/async-map')
const { default: Vhd, mergeVhd } = require('vhd-lib')
const { dirname, resolve } = require('path')
const { DISK_TYPE_DIFFERENCING } = require('vhd-lib/dist/_constants')
const { isMetadataFile, isVhdFile, isXvaFile, isXvaSumFile } = require('./_backupType')
const { isValidXva } = require('./isValidXva')
// chain is an array of VHDs from child to parent
//
// the whole chain will be merged into parent, parent will be renamed to child
// and all the others will deleted
const mergeVhdChain = limitConcurrency(1)(async function mergeVhdChain(chain, { handler, onLog, remove, merge }) {
assert(chain.length >= 2)
let child = chain[0]
const parent = chain[chain.length - 1]
const children = chain.slice(0, -1).reverse()
chain
.slice(1)
.reverse()
.forEach(parent => {
onLog(`the parent ${parent} of the child ${child} is unused`)
})
if (merge) {
// `mergeVhd` does not work with a stream, either
// - make it accept a stream
// - or create synthetic VHD which is not a stream
if (children.length !== 1) {
// TODO: implement merging multiple children
children.length = 1
child = children[0]
}
let done, total
const handle = setInterval(() => {
if (done !== undefined) {
onLog(`merging ${child}: ${done}/${total}`)
}
}, 10e3)
await mergeVhd(
handler,
parent,
handler,
child,
// children.length === 1
// ? child
// : await createSyntheticStream(handler, children),
{
onProgress({ done: d, total: t }) {
done = d
total = t
},
}
)
clearInterval(handle)
}
await Promise.all([
remove && handler.rename(parent, child),
asyncMap(children.slice(0, -1), child => {
onLog(`the VHD ${child} is unused`)
return remove && handler.unlink(child)
}),
])
})
const noop = Function.prototype
exports.cleanVm = async function cleanVm(vmDir, { remove, merge, onLog = noop }) {
const handler = this._handler
const vhds = new Set()
const vhdParents = { __proto__: null }
const vhdChildren = { __proto__: null }
// remove broken VHDs
await asyncMap(
await handler.list(`${vmDir}/vdis`, {
filter: isVhdFile,
prependDir: true,
}),
async path => {
try {
const vhd = new Vhd(handler, path)
await vhd.readHeaderAndFooter()
vhds.add(path)
if (vhd.footer.diskType === DISK_TYPE_DIFFERENCING) {
const parent = resolve(dirname(path), vhd.header.parentUnicodeName)
vhdParents[path] = parent
if (parent in vhdChildren) {
const error = new Error('this script does not support multiple VHD children')
error.parent = parent
error.child1 = vhdChildren[parent]
error.child2 = path
throw error // should we throw?
}
vhdChildren[parent] = path
}
} catch (error) {
onLog(`error while checking the VHD with path ${path}`)
if (error?.code === 'ERR_ASSERTION' && remove) {
await handler.unlink(path)
}
}
}
)
// remove VHDs with missing ancestors
{
const deletions = []
// return true if the VHD has been deleted or is missing
const deleteIfOrphan = vhd => {
const parent = vhdParents[vhd]
if (parent === undefined) {
return
}
// no longer needs to be checked
delete vhdParents[vhd]
deleteIfOrphan(parent)
if (!vhds.has(parent)) {
vhds.delete(vhd)
onLog(`the parent ${parent} of the VHD ${vhd} is missing`)
if (remove) {
deletions.push(handler.unlink(vhd))
}
}
}
// > A property that is deleted before it has been visited will not be
// > visited later.
// >
// > -- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...in#Deleted_added_or_modified_properties
for (const child in vhdParents) {
deleteIfOrphan(child)
}
await Promise.all(deletions)
}
const jsons = []
const xvas = new Set()
const xvaSums = []
const entries = await handler.list(vmDir, {
prependDir: true,
})
entries.forEach(path => {
if (isMetadataFile(path)) {
jsons.push(path)
} else if (isXvaFile(path)) {
xvas.add(path)
} else if (isXvaSumFile(path)) {
xvaSums.push(path)
}
})
await asyncMap(xvas, async path => {
// check is not good enough to delete the file, the best we can do is report
// it
if (!(await isValidXva(path))) {
onLog(`the XVA with path ${path} is potentially broken`)
}
})
const unusedVhds = new Set(vhds)
const unusedXvas = new Set(xvas)
// compile the list of unused XVAs and VHDs, and remove backup metadata which
// reference a missing XVA/VHD
await asyncMap(jsons, async json => {
const metadata = JSON.parse(await handler.readFile(json))
const { mode } = metadata
if (mode === 'full') {
const linkedXva = resolve(vmDir, metadata.xva)
if (xvas.has(linkedXva)) {
unusedXvas.delete(linkedXva)
} else {
onLog(`the XVA linked to the metadata ${json} is missing`)
if (remove) {
await handler.unlink(json)
}
}
} else if (mode === 'delta') {
const linkedVhds = (() => {
const { vhds } = metadata
return Object.keys(vhds).map(key => resolve(vmDir, vhds[key]))
})()
// FIXME: find better approach by keeping as much of the backup as
// possible (existing disks) even if one disk is missing
if (linkedVhds.every(_ => vhds.has(_))) {
linkedVhds.forEach(_ => unusedVhds.delete(_))
} else {
onLog(`Some VHDs linked to the metadata ${json} are missing`)
if (remove) {
await handler.unlink(json)
}
}
}
})
// TODO: parallelize by vm/job/vdi
const unusedVhdsDeletion = []
{
// VHD chains (as list from child to ancestor) to merge indexed by last
// ancestor
const vhdChainsToMerge = { __proto__: null }
const toCheck = new Set(unusedVhds)
const getUsedChildChainOrDelete = vhd => {
if (vhd in vhdChainsToMerge) {
const chain = vhdChainsToMerge[vhd]
delete vhdChainsToMerge[vhd]
return chain
}
if (!unusedVhds.has(vhd)) {
return [vhd]
}
// no longer needs to be checked
toCheck.delete(vhd)
const child = vhdChildren[vhd]
if (child !== undefined) {
const chain = getUsedChildChainOrDelete(child)
if (chain !== undefined) {
chain.push(vhd)
return chain
}
}
onLog(`the VHD ${vhd} is unused`)
if (remove) {
unusedVhdsDeletion.push(handler.unlink(vhd))
}
}
toCheck.forEach(vhd => {
vhdChainsToMerge[vhd] = getUsedChildChainOrDelete(vhd)
})
Object.keys(vhdChainsToMerge).forEach(key => {
const chain = vhdChainsToMerge[key]
if (chain !== undefined) {
unusedVhdsDeletion.push(mergeVhdChain(chain, { onLog, remove, merge }))
}
})
}
await Promise.all([
unusedVhdsDeletion,
asyncMap(unusedXvas, path => {
onLog(`the XVA ${path} is unused`)
return remove && handler.unlink(path)
}),
asyncMap(xvaSums, path => {
// no need to handle checksums for XVAs deleted by the script, they will be handled by `unlink()`
if (!xvas.has(path.slice(0, -'.checksum'.length))) {
onLog(`the XVA checksum ${path} is unused`)
return remove && handler.unlink(path)
}
}),
])
}

View File

@ -39,5 +39,6 @@
- @xen-orchestra/fs minor
- @xen-orchestra/xapi minor
- @xen-orchestra/backups minor
- @xen-orchestra/backups-cli minor
- xo-server minor
- xo-web patch

View File

@ -14968,15 +14968,6 @@ prop-types@^15.5.10, prop-types@^15.5.4, prop-types@^15.5.6, prop-types@^15.5.7,
object-assign "^4.1.1"
react-is "^16.8.1"
proper-lockfile@^4.1.1:
version "4.1.1"
resolved "https://registry.yarnpkg.com/proper-lockfile/-/proper-lockfile-4.1.1.tgz#284cf9db9e30a90e647afad69deb7cb06881262c"
integrity sha512-1w6rxXodisVpn7QYvLk706mzprPTAPCYAqxMvctmPN3ekuRk/kuGkGc82pangZiAt4R3lwSuUzheTTn0/Yb7Zg==
dependencies:
graceful-fs "^4.1.11"
retry "^0.12.0"
signal-exit "^3.0.2"
proper-lockfile@^4.1.2:
version "4.1.2"
resolved "https://registry.yarnpkg.com/proper-lockfile/-/proper-lockfile-4.1.2.tgz#c8b9de2af6b2f1601067f98e01ac66baa223141f"