feat(load-balancer): improve migration (perf mode) regarding memory and cpu usage

- ensure we optimize CPU first instead of free memory
- use low threshold now to forbid bad migration based on cpu usage
- add a tolerance on the VM CPU usage to migrate VM with the most memory used
- do not migrate if we create an unbalanced configuration (only if high tresholds are not reached)
- change factors to take into account the new algorithm
This commit is contained in:
Ronan Abhamon
2021-04-14 17:41:40 +02:00
committed by Julien Fontanet
parent 8ae432554e
commit 4008934bbb
3 changed files with 105 additions and 58 deletions

View File

@@ -15,6 +15,7 @@
- [XOA] Notify user when proxies need to be upgraded (PR [#5717](https://github.com/vatesfr/xen-orchestra/pull/5717))
- [Host/network] Identify the management network [#5731](https://github.com/vatesfr/xen-orchestra/issues/5731) (PR [#5743](https://github.com/vatesfr/xen-orchestra/pull/5743))
- [Backup/S3] Support for HTTP protocol and choice of region (PR [#5658](https://github.com/vatesfr/xen-orchestra/pull/5658))
- [Host/Load-balancer] Improve migration (perf mode) regarding memory and cpu usage (PR [#5734](https://github.com/vatesfr/xen-orchestra/pull/5734))
### Bug fixes

View File

@@ -1,19 +1,12 @@
import { filter, find } from 'lodash'
import { filter } from 'lodash'
import Plan from './plan'
import { debug } from './utils'
// Compare a list of objects and give the best.
function searchBestObject(objects, fun) {
let object = objects[0]
for (let i = 1; i < objects.length; i++) {
if (fun(object, objects[i]) > 0) {
object = objects[i]
}
}
return object
function epsiEqual(a, b, epsi = 0.001) {
const absA = Math.abs(a)
const absB = Math.abs(b)
return Math.abs(a - b) <= Math.min(absA, absB) * epsi || (absA <= epsi && absB <= epsi)
}
// ===================================================================
@@ -55,14 +48,7 @@ export default class PerformancePlan extends Plan {
}
const { averages, toOptimize } = results
toOptimize.sort((a, b) => {
a = averages[a.id]
b = averages[b.id]
return b.cpu - a.cpu || a.memoryFree - b.memoryFree
})
toOptimize.sort((a, b) => -this._sortHosts(a, b))
for (const exceededHost of toOptimize) {
const { id } = exceededHost
@@ -79,51 +65,68 @@ export default class PerformancePlan extends Plan {
}
}
_getThresholdState(averages) {
return {
cpu: averages.cpu >= this._thresholds.cpu.high,
mem: averages.memoryFree <= this._thresholds.memoryFree.high,
}
}
_sortHosts(aAverages, bAverages) {
const aState = this._getThresholdState(aAverages)
const bState = this._getThresholdState(bAverages)
// A. Same state.
if (aState.mem === bState.mem && aState.cpu === bState.cpu) {
if (epsiEqual(aAverages.cpu, bAverages.cpu)) {
return bAverages.memoryFree - aAverages.memoryFree
}
return aAverages.cpu - bAverages.cpu
}
// B. No limit reached on A OR both limits reached on B.
if ((!aState.mem && !aState.cpu) || (bState.mem && bState.cpu)) {
return -1
}
// C. No limit reached on B OR both limits reached on A.
if ((!bState.mem && !bState.cpu) || (aState.mem && aState.cpu)) {
return 1
}
// D. If only one limit is reached on A AND B, we prefer to migrate on the host with the lowest CPU usage.
return !aState.cpu ? -1 : 1
}
async _optimize({ exceededHost, hosts, hostsAverages }) {
const vms = filter(this._getAllRunningVms(), vm => vm.$container === exceededHost.id)
const vmsAverages = await this._getVmsAverages(vms, { [exceededHost.id]: exceededHost })
// Sort vms by cpu usage. (lower to higher)
vms.sort((a, b) => vmsAverages[b.id].cpu - vmsAverages[a.id].cpu)
// Sort vms by cpu usage. (higher to lower) + use memory otherwise.
vms.sort((a, b) => {
const aAverages = vmsAverages[a.id]
const bAverages = vmsAverages[b.id]
// We use a tolerance to migrate VM with the most memory used.
if (epsiEqual(aAverages.cpu, bAverages.cpu, 3)) {
return bAverages.memory - aAverages.memory
}
return bAverages.cpu - aAverages.cpu
})
const exceededAverages = hostsAverages[exceededHost.id]
const promises = []
const xapiSrc = this.xo.getXapi(exceededHost)
let optimizationsCount = 0
const searchFunction = (a, b) => hostsAverages[b.id].cpu - hostsAverages[a.id].cpu
let optimizationCount = 0
for (const vm of vms) {
debug(`Trying to migrate ${vm.id}...`)
// Search host with lower cpu usage in the same pool first. In other pool if necessary.
let destination = searchBestObject(
find(hosts, host => host.$poolId === vm.$poolId),
searchFunction
)
if (!destination) {
debug('No destination host found in the current VM pool. Trying in all pools.')
destination = searchBestObject(hosts, searchFunction)
}
const destinationAverages = hostsAverages[destination.id]
const vmAverages = vmsAverages[vm.id]
debug(`Trying to migrate VM (${vm.id}) to Host (${destination.id}) from Host (${exceededHost.id})...`)
// Unable to move the vm.
// Stop migration if we are below low threshold.
if (
exceededAverages.cpu - vmAverages.cpu < destinationAverages.cpu + vmAverages.cpu ||
destinationAverages.memoryFree < vmAverages.memory
exceededAverages.cpu <= this._thresholds.cpu.low &&
exceededAverages.memoryFree >= this._thresholds.memoryFree.low
) {
debug(`Cannot migrate VM (${vm.id}) to Host (${destination.id}).`)
debug(
`Src Host CPU=${exceededAverages.cpu}, Dest Host CPU=${destinationAverages.cpu}, VM CPU=${vmAverages.cpu}`
)
debug(`Dest Host free RAM=${destinationAverages.memoryFree}, VM used RAM=${vmAverages.memory})`)
continue
return
}
if (!vm.xenTools) {
@@ -145,6 +148,49 @@ export default class PerformancePlan extends Plan {
}
}
hosts.sort((a, b) => {
if (a.$poolId !== b.$poolId) {
// Use host in the same pool first. In other pool if necessary.
if (a.$poolId === vm.$poolId) {
return -1
}
if (b.$poolId === vm.$poolId) {
return 1
}
}
return this._sortHosts(hostsAverages[a.id], hostsAverages[b.id])
})
const destination = hosts[0]
const destinationAverages = hostsAverages[destination.id]
const vmAverages = vmsAverages[vm.id]
// Unable to move the vm.
// Because the performance mode is focused on the CPU usage, we can't migrate if the low threshold
// is reached on the destination.
// It's not the same idea regarding the memory usage, we can migrate if the low threshold is reached,
// but we avoid the migration in the critical (high) threshold case.
const state = this._getThresholdState(exceededAverages)
if (
destinationAverages.cpu + vmAverages.cpu >= this._thresholds.cpu.low ||
destinationAverages.memoryFree - vmAverages.memory <= this._thresholds.cpu.high ||
(!state.cpu &&
!state.memory &&
(exceededAverages.cpu - vmAverages.cpu < destinationAverages.cpu + vmAverages.cpu ||
exceededAverages.memoryFree + vmAverages.memory > destinationAverages.memoryFree - vmAverages.memory))
) {
debug(`Cannot migrate VM (${vm.id}) to Host (${destination.id}).`)
debug(
`Src Host CPU=${exceededAverages.cpu}, Dest Host CPU=${destinationAverages.cpu}, VM CPU=${vmAverages.cpu}`
)
debug(
`Src Host free RAM=${exceededAverages.memoryFree}, Dest Host free RAM=${destinationAverages.memoryFree}, VM used RAM=${vmAverages.memory})`
)
continue
}
exceededAverages.cpu -= vmAverages.cpu
destinationAverages.cpu += vmAverages.cpu
@@ -152,12 +198,12 @@ export default class PerformancePlan extends Plan {
destinationAverages.memoryFree -= vmAverages.memory
debug(`Migrate VM (${vm.id}) to Host (${destination.id}) from Host (${exceededHost.id}).`)
optimizationsCount++
optimizationCount++
promises.push(xapiSrc.migrateVm(vm._xapiId, this.xo.getXapi(destination), destination._xapiId))
}
await Promise.all(promises)
debug(`Performance mode: ${optimizationsCount} optimizations for Host (${exceededHost.id}).`)
debug(`Performance mode: ${optimizationCount} optimizations for Host (${exceededHost.id}).`)
}
}

View File

@@ -9,14 +9,14 @@ const MINUTES_OF_HISTORICAL_DATA = 30
export const DEFAULT_CRITICAL_THRESHOLD_CPU = 90.0
// Memory threshold in MB.
export const DEFAULT_CRITICAL_THRESHOLD_MEMORY_FREE = 64.0
export const DEFAULT_CRITICAL_THRESHOLD_MEMORY_FREE = 1000.0
// Thresholds factors.
const HIGH_THRESHOLD_FACTOR = 0.85
const LOW_THRESHOLD_FACTOR = 0.25
const LOW_THRESHOLD_FACTOR = 0.65
const HIGH_THRESHOLD_MEMORY_FREE_FACTOR = 1.25
const LOW_THRESHOLD_MEMORY_FREE_FACTOR = 20.0
const HIGH_THRESHOLD_MEMORY_FREE_FACTOR = 1.2
const LOW_THRESHOLD_MEMORY_FREE_FACTOR = 1.5
const numberOrDefault = (value, def) => (value >= 0 ? value : def)