feat(backups,xo-server): scheduled health check (#6228)

This commit is contained in:
Florent BEAUCHAMP
2022-05-24 14:16:48 +02:00
committed by GitHub
parent b75ca2700b
commit 231f09de12
7 changed files with 176 additions and 41 deletions

View File

@@ -36,6 +36,8 @@ const DEFAULT_VM_SETTINGS = {
deleteFirst: false,
exportRetention: 0,
fullInterval: 0,
healthCheckSr: undefined,
healthCheckVmsWithTags: [],
maxMergedDeltasPerRun: 2,
offlineBackup: false,
offlineSnapshot: false,
@@ -215,6 +217,7 @@ exports.Backup = class Backup {
const schedule = this._schedule
const config = this._config
const settings = this._settings
await Disposable.use(
Disposable.all(
extractIdsFromSimplePattern(job.srs).map(id =>
@@ -242,15 +245,14 @@ exports.Backup = class Backup {
})
)
),
async (srs, remoteAdapters) => {
() => settings.healthCheckSr !== undefined && this._getRecord('SR', settings.healthCheckSr),
async (srs, remoteAdapters, healthCheckSr) => {
// remove adapters that failed (already handled)
remoteAdapters = remoteAdapters.filter(_ => _ !== undefined)
// remove srs that failed (already handled)
srs = srs.filter(_ => _ !== undefined)
const settings = this._settings
if (remoteAdapters.length === 0 && srs.length === 0 && settings.snapshotRetention === 0) {
return
}
@@ -271,6 +273,7 @@ exports.Backup = class Backup {
baseSettings,
config,
getSnapshotNameLabel,
healthCheckSr,
job,
remoteAdapters,
schedule,

View File

@@ -0,0 +1,64 @@
'use strict'
const { Task } = require('./Task')
exports.HealthCheckVmBackup = class HealthCheckVmBackup {
#xapi
#restoredVm
constructor({ restoredVm, xapi }) {
this.#restoredVm = restoredVm
this.#xapi = xapi
}
async run() {
return Task.run(
{
name: 'vmstart',
},
async () => {
let restoredVm = this.#restoredVm
const xapi = this.#xapi
const restoredId = restoredVm.uuid
// remove vifs
await Promise.all(restoredVm.$VIFs.map(vif => xapi.callAsync('VIF.destroy', vif.$ref)))
const start = new Date()
// start Vm
await xapi.callAsync(
'VM.start',
restoredVm.$ref,
false, // Start paused?
false // Skip pre-boot checks?
)
const started = new Date()
const timeout = 10 * 60 * 1000
const startDuration = started - start
let remainingTimeout = timeout - startDuration
if (remainingTimeout < 0) {
throw new Error(`VM ${restoredId} not started after ${timeout / 1000} second`)
}
// wait for the 'Running' event to be really stored in local xapi object cache
restoredVm = await xapi.waitObjectState(restoredVm.$ref, vm => vm.power_state === 'Running', {
timeout: remainingTimeout,
})
const running = new Date()
remainingTimeout -= running - started
if (remainingTimeout < 0) {
throw new Error(`local xapi did not get Runnig state for VM ${restoredId} after ${timeout / 1000} second`)
}
// wait for the guest tool version to be defined
await xapi.waitObjectState(restoredVm.guest_metrics, gm => gm?.PV_drivers_version?.major !== undefined, {
timeout: remainingTimeout,
})
}
)
}
}

View File

@@ -45,7 +45,18 @@ const forkDeltaExport = deltaExport =>
})
class VmBackup {
constructor({ baseSettings, config, getSnapshotNameLabel, job, remoteAdapters, schedule, settings, srs, vm }) {
constructor({
config,
getSnapshotNameLabel,
healthCheckSr,
job,
remoteAdapters,
remotes,
schedule,
settings,
srs,
vm,
}) {
if (vm.other_config['xo:backup:job'] === job.id && 'start' in vm.blocked_operations) {
// don't match replicated VMs created by this very job otherwise they
// will be replicated again and again
@@ -68,6 +79,7 @@ class VmBackup {
this._fullVdisRequired = undefined
this._getSnapshotNameLabel = getSnapshotNameLabel
this._isDelta = job.mode === 'delta'
this._healthCheckSr = healthCheckSr
this._jobId = job.id
this._jobSnapshots = undefined
this._xapi = vm.$xapi
@@ -94,7 +106,6 @@ class VmBackup {
: [FullBackupWriter, FullReplicationWriter]
const allSettings = job.settings
Object.keys(remoteAdapters).forEach(remoteId => {
const targetSettings = {
...settings,
@@ -396,6 +407,24 @@ class VmBackup {
this._fullVdisRequired = fullVdisRequired
}
async _healthCheck() {
const settings = this._settings
if (this._healthCheckSr === undefined) {
return
}
// check if current VM has tags
const { tags } = this.vm
const intersect = settings.healthCheckVmsWithTags.some(t => tags.includes(t))
if (settings.healthCheckVmsWithTags.length !== 0 && !intersect) {
return
}
await this._callWriters(writer => writer.healthCheck(this._healthCheckSr), 'writer.healthCheck()')
}
async run($defer) {
const settings = this._settings
assert(
@@ -405,7 +434,9 @@ class VmBackup {
await this._callWriters(async writer => {
await writer.beforeBackup()
$defer(() => writer.afterBackup())
$defer(async () => {
await writer.afterBackup()
})
}, 'writer.beforeBackup()')
await this._fetchJobSnapshots()
@@ -441,6 +472,7 @@ class VmBackup {
await this._fetchJobSnapshots()
await this._removeUnusedSnapshots()
}
await this._healthCheck()
}
}
exports.VmBackup = VmBackup

View File

@@ -77,6 +77,14 @@ job.start(data: { mode: Mode, reportWhen: ReportWhen })
│ │ │ ├─ task.warning(message: string)
│ │ │ └─ task.end(result: { size: number })
│ │ │
│ │ │ // in case there is a healthcheck scheduled for this vm in this job
│ │ ├─ task.start(message: 'health check')
│ │ │ ├─ task.start(message: 'transfer')
│ │ │ │ └─ task.end(result: { size: number })
│ │ │ ├─ task.start(message: 'vmstart')
│ │ │ │ └─ task.end
│ │ │ └─ task.end
│ │ │
│ │ │ // in case of full backup, DR and CR
│ │ ├─ task.start(message: 'clean')
│ │ │ ├─ task.warning(message: string)
@@ -195,6 +203,7 @@ Settings are described in [`@xen-orchestra/backups/Backup.js](https://github.com
- `prepare({ isFull })`
- `transfer({ timestamp, deltaExport, sizeContainers })`
- `cleanup()`
- `healthCheck(sr)`
- **Full**
- `run({ timestamp, sizeContainer, stream })`
- `afterBackup()`

View File

@@ -19,6 +19,8 @@ const { AbstractDeltaWriter } = require('./_AbstractDeltaWriter.js')
const { checkVhd } = require('./_checkVhd.js')
const { packUuid } = require('./_packUuid.js')
const { Disposable } = require('promise-toolbox')
const { HealthCheckVmBackup } = require('../HealthCheckVmBackup.js')
const { ImportVmBackup } = require('../ImportVmBackup.js')
const { warn } = createLogger('xo:backups:DeltaBackupWriter')
@@ -69,6 +71,35 @@ exports.DeltaBackupWriter = class DeltaBackupWriter extends MixinBackupWriter(Ab
return this._cleanVm({ merge: true })
}
healthCheck(sr) {
return Task.run(
{
name: 'health check',
},
async () => {
const xapi = sr.$xapi
const srUuid = sr.uuid
const adapter = this._adapter
const metadata = await adapter.readVmBackupMetadata(this._metadataFileName)
const { id: restoredId } = await new ImportVmBackup({
adapter,
metadata,
srUuid,
xapi,
}).run()
const restoredVm = xapi.getObject(restoredId)
try {
await new HealthCheckVmBackup({
restoredVm,
xapi,
}).run()
} finally {
await xapi.VM_destroy(restoredVm.$ref)
}
}
)
}
prepare({ isFull }) {
// create the task related to this export and ensure all methods are called in this context
const task = new Task({
@@ -80,7 +111,9 @@ exports.DeltaBackupWriter = class DeltaBackupWriter extends MixinBackupWriter(Ab
},
})
this.transfer = task.wrapFn(this.transfer)
this.cleanup = task.wrapFn(this.cleanup, true)
this.healthCheck = task.wrapFn(this.healthCheck)
this.cleanup = task.wrapFn(this.cleanup)
this.afterBackup = task.wrapFn(this.afterBackup, true)
return task.run(() => this._prepare())
}
@@ -156,7 +189,7 @@ exports.DeltaBackupWriter = class DeltaBackupWriter extends MixinBackupWriter(Ab
}/${adapter.getVhdFileName(basename)}`
)
const metadataFilename = `${backupDir}/${basename}.json`
const metadataFilename = (this._metadataFileName = `${backupDir}/${basename}.json`)
const metadataContent = {
jobId,
mode: job.mode,

View File

@@ -9,4 +9,6 @@ exports.AbstractWriter = class AbstractWriter {
beforeBackup() {}
afterBackup() {}
healthCheck(sr) {}
}

View File

@@ -8,6 +8,7 @@ import { createLogger } from '@xen-orchestra/log'
import { createPredicate } from 'value-matcher'
import { decorateWith } from '@vates/decorate-with'
import { formatVmBackups } from '@xen-orchestra/backups/formatVmBackups.js'
import { HealthCheckVmBackup } from '@xen-orchestra/backups/HealthCheckVmBackup.js'
import { ImportVmBackup } from '@xen-orchestra/backups/ImportVmBackup.js'
import { invalidParameters } from 'xo-common/api-errors.js'
import { runBackupWorker } from '@xen-orchestra/backups/runBackupWorker.js'
@@ -185,6 +186,11 @@ export default class BackupNg {
vmIds.forEach(handleRecord)
unboxIdsFromPattern(job.srs).forEach(handleRecord)
// add xapi specific to the healthcheck SR if needed
if (job.settings[schedule.id].healthCheckSr !== undefined) {
handleRecord(job.settings[schedule.id].healthCheckSr)
}
const remotes = {}
const xapis = {}
await waitAll([
@@ -538,40 +544,26 @@ export default class BackupNg {
return backupsByVmByRemote
}
async checkVmBackupNg(id, srId, settings) {
let restoredVm, xapi
try {
const restoredId = await this.importVmBackupNg(id, srId, settings)
const app = this._app
xapi = app.getXapi(srId)
restoredVm = xapi.getObject(restoredId)
async checkVmBackupNg(backupId, srId, settings) {
await Task.run(
{
name: 'health check',
},
async () => {
const app = this._app
const xapi = app.getXapi(srId)
const restoredId = await this.importVmBackupNg(backupId, srId, settings)
// remove vifs
await Promise.all(restoredVm.$VIFs.map(vif => xapi._deleteVif(vif)))
const start = new Date()
// start Vm
await xapi.startVm(restoredId)
const started = new Date()
const timeout = 10 * 60 * 1000
const startDuration = started - start
if (startDuration >= timeout) {
throw new Error(`VM ${restoredId} not started after ${timeout / 1000} second`)
const restoredVm = xapi.getObject(restoredId)
try {
await new HealthCheckVmBackup({
restoredVm,
xapi,
}).run()
} finally {
await xapi.VM_destroy(restoredVm.$ref)
}
}
let remainingTimeout = timeout - startDuration
restoredVm = await xapi.waitObjectState(restoredVm.$ref, vm => vm.power_state === 'Running', {
timeout: remainingTimeout,
})
const running = new Date()
remainingTimeout -= running - started
await xapi.waitObjectState(restoredVm.guest_metrics, gm => gm?.PV_drivers_version?.major !== undefined, {
timeout: remainingTimeout,
})
} finally {
restoredVm !== undefined && xapi !== undefined && (await xapi.VM_destroy(restoredVm.$ref))
}
)
}
}