feat(backups,xo-server): scheduled health check (#6228)
This commit is contained in:
committed by
GitHub
parent
b75ca2700b
commit
231f09de12
@@ -36,6 +36,8 @@ const DEFAULT_VM_SETTINGS = {
|
||||
deleteFirst: false,
|
||||
exportRetention: 0,
|
||||
fullInterval: 0,
|
||||
healthCheckSr: undefined,
|
||||
healthCheckVmsWithTags: [],
|
||||
maxMergedDeltasPerRun: 2,
|
||||
offlineBackup: false,
|
||||
offlineSnapshot: false,
|
||||
@@ -215,6 +217,7 @@ exports.Backup = class Backup {
|
||||
const schedule = this._schedule
|
||||
|
||||
const config = this._config
|
||||
const settings = this._settings
|
||||
await Disposable.use(
|
||||
Disposable.all(
|
||||
extractIdsFromSimplePattern(job.srs).map(id =>
|
||||
@@ -242,15 +245,14 @@ exports.Backup = class Backup {
|
||||
})
|
||||
)
|
||||
),
|
||||
async (srs, remoteAdapters) => {
|
||||
() => settings.healthCheckSr !== undefined && this._getRecord('SR', settings.healthCheckSr),
|
||||
async (srs, remoteAdapters, healthCheckSr) => {
|
||||
// remove adapters that failed (already handled)
|
||||
remoteAdapters = remoteAdapters.filter(_ => _ !== undefined)
|
||||
|
||||
// remove srs that failed (already handled)
|
||||
srs = srs.filter(_ => _ !== undefined)
|
||||
|
||||
const settings = this._settings
|
||||
|
||||
if (remoteAdapters.length === 0 && srs.length === 0 && settings.snapshotRetention === 0) {
|
||||
return
|
||||
}
|
||||
@@ -271,6 +273,7 @@ exports.Backup = class Backup {
|
||||
baseSettings,
|
||||
config,
|
||||
getSnapshotNameLabel,
|
||||
healthCheckSr,
|
||||
job,
|
||||
remoteAdapters,
|
||||
schedule,
|
||||
|
||||
64
@xen-orchestra/backups/HealthCheckVmBackup.js
Normal file
64
@xen-orchestra/backups/HealthCheckVmBackup.js
Normal file
@@ -0,0 +1,64 @@
|
||||
'use strict'
|
||||
|
||||
const { Task } = require('./Task')
|
||||
|
||||
exports.HealthCheckVmBackup = class HealthCheckVmBackup {
|
||||
#xapi
|
||||
#restoredVm
|
||||
|
||||
constructor({ restoredVm, xapi }) {
|
||||
this.#restoredVm = restoredVm
|
||||
this.#xapi = xapi
|
||||
}
|
||||
|
||||
async run() {
|
||||
return Task.run(
|
||||
{
|
||||
name: 'vmstart',
|
||||
},
|
||||
async () => {
|
||||
let restoredVm = this.#restoredVm
|
||||
const xapi = this.#xapi
|
||||
const restoredId = restoredVm.uuid
|
||||
|
||||
// remove vifs
|
||||
await Promise.all(restoredVm.$VIFs.map(vif => xapi.callAsync('VIF.destroy', vif.$ref)))
|
||||
|
||||
const start = new Date()
|
||||
// start Vm
|
||||
|
||||
await xapi.callAsync(
|
||||
'VM.start',
|
||||
restoredVm.$ref,
|
||||
false, // Start paused?
|
||||
false // Skip pre-boot checks?
|
||||
)
|
||||
const started = new Date()
|
||||
const timeout = 10 * 60 * 1000
|
||||
const startDuration = started - start
|
||||
|
||||
let remainingTimeout = timeout - startDuration
|
||||
|
||||
if (remainingTimeout < 0) {
|
||||
throw new Error(`VM ${restoredId} not started after ${timeout / 1000} second`)
|
||||
}
|
||||
|
||||
// wait for the 'Running' event to be really stored in local xapi object cache
|
||||
restoredVm = await xapi.waitObjectState(restoredVm.$ref, vm => vm.power_state === 'Running', {
|
||||
timeout: remainingTimeout,
|
||||
})
|
||||
|
||||
const running = new Date()
|
||||
remainingTimeout -= running - started
|
||||
|
||||
if (remainingTimeout < 0) {
|
||||
throw new Error(`local xapi did not get Runnig state for VM ${restoredId} after ${timeout / 1000} second`)
|
||||
}
|
||||
// wait for the guest tool version to be defined
|
||||
await xapi.waitObjectState(restoredVm.guest_metrics, gm => gm?.PV_drivers_version?.major !== undefined, {
|
||||
timeout: remainingTimeout,
|
||||
})
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -45,7 +45,18 @@ const forkDeltaExport = deltaExport =>
|
||||
})
|
||||
|
||||
class VmBackup {
|
||||
constructor({ baseSettings, config, getSnapshotNameLabel, job, remoteAdapters, schedule, settings, srs, vm }) {
|
||||
constructor({
|
||||
config,
|
||||
getSnapshotNameLabel,
|
||||
healthCheckSr,
|
||||
job,
|
||||
remoteAdapters,
|
||||
remotes,
|
||||
schedule,
|
||||
settings,
|
||||
srs,
|
||||
vm,
|
||||
}) {
|
||||
if (vm.other_config['xo:backup:job'] === job.id && 'start' in vm.blocked_operations) {
|
||||
// don't match replicated VMs created by this very job otherwise they
|
||||
// will be replicated again and again
|
||||
@@ -68,6 +79,7 @@ class VmBackup {
|
||||
this._fullVdisRequired = undefined
|
||||
this._getSnapshotNameLabel = getSnapshotNameLabel
|
||||
this._isDelta = job.mode === 'delta'
|
||||
this._healthCheckSr = healthCheckSr
|
||||
this._jobId = job.id
|
||||
this._jobSnapshots = undefined
|
||||
this._xapi = vm.$xapi
|
||||
@@ -94,7 +106,6 @@ class VmBackup {
|
||||
: [FullBackupWriter, FullReplicationWriter]
|
||||
|
||||
const allSettings = job.settings
|
||||
|
||||
Object.keys(remoteAdapters).forEach(remoteId => {
|
||||
const targetSettings = {
|
||||
...settings,
|
||||
@@ -396,6 +407,24 @@ class VmBackup {
|
||||
this._fullVdisRequired = fullVdisRequired
|
||||
}
|
||||
|
||||
async _healthCheck() {
|
||||
const settings = this._settings
|
||||
|
||||
if (this._healthCheckSr === undefined) {
|
||||
return
|
||||
}
|
||||
|
||||
// check if current VM has tags
|
||||
const { tags } = this.vm
|
||||
const intersect = settings.healthCheckVmsWithTags.some(t => tags.includes(t))
|
||||
|
||||
if (settings.healthCheckVmsWithTags.length !== 0 && !intersect) {
|
||||
return
|
||||
}
|
||||
|
||||
await this._callWriters(writer => writer.healthCheck(this._healthCheckSr), 'writer.healthCheck()')
|
||||
}
|
||||
|
||||
async run($defer) {
|
||||
const settings = this._settings
|
||||
assert(
|
||||
@@ -405,7 +434,9 @@ class VmBackup {
|
||||
|
||||
await this._callWriters(async writer => {
|
||||
await writer.beforeBackup()
|
||||
$defer(() => writer.afterBackup())
|
||||
$defer(async () => {
|
||||
await writer.afterBackup()
|
||||
})
|
||||
}, 'writer.beforeBackup()')
|
||||
|
||||
await this._fetchJobSnapshots()
|
||||
@@ -441,6 +472,7 @@ class VmBackup {
|
||||
await this._fetchJobSnapshots()
|
||||
await this._removeUnusedSnapshots()
|
||||
}
|
||||
await this._healthCheck()
|
||||
}
|
||||
}
|
||||
exports.VmBackup = VmBackup
|
||||
|
||||
@@ -77,6 +77,14 @@ job.start(data: { mode: Mode, reportWhen: ReportWhen })
|
||||
│ │ │ ├─ task.warning(message: string)
|
||||
│ │ │ └─ task.end(result: { size: number })
|
||||
│ │ │
|
||||
│ │ │ // in case there is a healthcheck scheduled for this vm in this job
|
||||
│ │ ├─ task.start(message: 'health check')
|
||||
│ │ │ ├─ task.start(message: 'transfer')
|
||||
│ │ │ │ └─ task.end(result: { size: number })
|
||||
│ │ │ ├─ task.start(message: 'vmstart')
|
||||
│ │ │ │ └─ task.end
|
||||
│ │ │ └─ task.end
|
||||
│ │ │
|
||||
│ │ │ // in case of full backup, DR and CR
|
||||
│ │ ├─ task.start(message: 'clean')
|
||||
│ │ │ ├─ task.warning(message: string)
|
||||
@@ -195,6 +203,7 @@ Settings are described in [`@xen-orchestra/backups/Backup.js](https://github.com
|
||||
- `prepare({ isFull })`
|
||||
- `transfer({ timestamp, deltaExport, sizeContainers })`
|
||||
- `cleanup()`
|
||||
- `healthCheck(sr)`
|
||||
- **Full**
|
||||
- `run({ timestamp, sizeContainer, stream })`
|
||||
- `afterBackup()`
|
||||
|
||||
@@ -19,6 +19,8 @@ const { AbstractDeltaWriter } = require('./_AbstractDeltaWriter.js')
|
||||
const { checkVhd } = require('./_checkVhd.js')
|
||||
const { packUuid } = require('./_packUuid.js')
|
||||
const { Disposable } = require('promise-toolbox')
|
||||
const { HealthCheckVmBackup } = require('../HealthCheckVmBackup.js')
|
||||
const { ImportVmBackup } = require('../ImportVmBackup.js')
|
||||
|
||||
const { warn } = createLogger('xo:backups:DeltaBackupWriter')
|
||||
|
||||
@@ -69,6 +71,35 @@ exports.DeltaBackupWriter = class DeltaBackupWriter extends MixinBackupWriter(Ab
|
||||
return this._cleanVm({ merge: true })
|
||||
}
|
||||
|
||||
healthCheck(sr) {
|
||||
return Task.run(
|
||||
{
|
||||
name: 'health check',
|
||||
},
|
||||
async () => {
|
||||
const xapi = sr.$xapi
|
||||
const srUuid = sr.uuid
|
||||
const adapter = this._adapter
|
||||
const metadata = await adapter.readVmBackupMetadata(this._metadataFileName)
|
||||
const { id: restoredId } = await new ImportVmBackup({
|
||||
adapter,
|
||||
metadata,
|
||||
srUuid,
|
||||
xapi,
|
||||
}).run()
|
||||
const restoredVm = xapi.getObject(restoredId)
|
||||
try {
|
||||
await new HealthCheckVmBackup({
|
||||
restoredVm,
|
||||
xapi,
|
||||
}).run()
|
||||
} finally {
|
||||
await xapi.VM_destroy(restoredVm.$ref)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
prepare({ isFull }) {
|
||||
// create the task related to this export and ensure all methods are called in this context
|
||||
const task = new Task({
|
||||
@@ -80,7 +111,9 @@ exports.DeltaBackupWriter = class DeltaBackupWriter extends MixinBackupWriter(Ab
|
||||
},
|
||||
})
|
||||
this.transfer = task.wrapFn(this.transfer)
|
||||
this.cleanup = task.wrapFn(this.cleanup, true)
|
||||
this.healthCheck = task.wrapFn(this.healthCheck)
|
||||
this.cleanup = task.wrapFn(this.cleanup)
|
||||
this.afterBackup = task.wrapFn(this.afterBackup, true)
|
||||
|
||||
return task.run(() => this._prepare())
|
||||
}
|
||||
@@ -156,7 +189,7 @@ exports.DeltaBackupWriter = class DeltaBackupWriter extends MixinBackupWriter(Ab
|
||||
}/${adapter.getVhdFileName(basename)}`
|
||||
)
|
||||
|
||||
const metadataFilename = `${backupDir}/${basename}.json`
|
||||
const metadataFilename = (this._metadataFileName = `${backupDir}/${basename}.json`)
|
||||
const metadataContent = {
|
||||
jobId,
|
||||
mode: job.mode,
|
||||
|
||||
@@ -9,4 +9,6 @@ exports.AbstractWriter = class AbstractWriter {
|
||||
beforeBackup() {}
|
||||
|
||||
afterBackup() {}
|
||||
|
||||
healthCheck(sr) {}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import { createLogger } from '@xen-orchestra/log'
|
||||
import { createPredicate } from 'value-matcher'
|
||||
import { decorateWith } from '@vates/decorate-with'
|
||||
import { formatVmBackups } from '@xen-orchestra/backups/formatVmBackups.js'
|
||||
import { HealthCheckVmBackup } from '@xen-orchestra/backups/HealthCheckVmBackup.js'
|
||||
import { ImportVmBackup } from '@xen-orchestra/backups/ImportVmBackup.js'
|
||||
import { invalidParameters } from 'xo-common/api-errors.js'
|
||||
import { runBackupWorker } from '@xen-orchestra/backups/runBackupWorker.js'
|
||||
@@ -185,6 +186,11 @@ export default class BackupNg {
|
||||
vmIds.forEach(handleRecord)
|
||||
unboxIdsFromPattern(job.srs).forEach(handleRecord)
|
||||
|
||||
// add xapi specific to the healthcheck SR if needed
|
||||
if (job.settings[schedule.id].healthCheckSr !== undefined) {
|
||||
handleRecord(job.settings[schedule.id].healthCheckSr)
|
||||
}
|
||||
|
||||
const remotes = {}
|
||||
const xapis = {}
|
||||
await waitAll([
|
||||
@@ -538,40 +544,26 @@ export default class BackupNg {
|
||||
return backupsByVmByRemote
|
||||
}
|
||||
|
||||
async checkVmBackupNg(id, srId, settings) {
|
||||
let restoredVm, xapi
|
||||
try {
|
||||
const restoredId = await this.importVmBackupNg(id, srId, settings)
|
||||
const app = this._app
|
||||
xapi = app.getXapi(srId)
|
||||
restoredVm = xapi.getObject(restoredId)
|
||||
async checkVmBackupNg(backupId, srId, settings) {
|
||||
await Task.run(
|
||||
{
|
||||
name: 'health check',
|
||||
},
|
||||
async () => {
|
||||
const app = this._app
|
||||
const xapi = app.getXapi(srId)
|
||||
const restoredId = await this.importVmBackupNg(backupId, srId, settings)
|
||||
|
||||
// remove vifs
|
||||
await Promise.all(restoredVm.$VIFs.map(vif => xapi._deleteVif(vif)))
|
||||
|
||||
const start = new Date()
|
||||
// start Vm
|
||||
await xapi.startVm(restoredId)
|
||||
const started = new Date()
|
||||
const timeout = 10 * 60 * 1000
|
||||
const startDuration = started - start
|
||||
|
||||
if (startDuration >= timeout) {
|
||||
throw new Error(`VM ${restoredId} not started after ${timeout / 1000} second`)
|
||||
const restoredVm = xapi.getObject(restoredId)
|
||||
try {
|
||||
await new HealthCheckVmBackup({
|
||||
restoredVm,
|
||||
xapi,
|
||||
}).run()
|
||||
} finally {
|
||||
await xapi.VM_destroy(restoredVm.$ref)
|
||||
}
|
||||
}
|
||||
|
||||
let remainingTimeout = timeout - startDuration
|
||||
|
||||
restoredVm = await xapi.waitObjectState(restoredVm.$ref, vm => vm.power_state === 'Running', {
|
||||
timeout: remainingTimeout,
|
||||
})
|
||||
const running = new Date()
|
||||
remainingTimeout -= running - started
|
||||
await xapi.waitObjectState(restoredVm.guest_metrics, gm => gm?.PV_drivers_version?.major !== undefined, {
|
||||
timeout: remainingTimeout,
|
||||
})
|
||||
} finally {
|
||||
restoredVm !== undefined && xapi !== undefined && (await xapi.VM_destroy(restoredVm.$ref))
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user