feat(load-balancer): improve migration (perf mode) regarding memory and cpu usage
- ensure we optimize CPU first instead of free memory - use low threshold now to forbid bad migration based on cpu usage - add a tolerance on the VM CPU usage to migrate VM with the most memory used - do not migrate if we create an unbalanced configuration (only if high tresholds are not reached) - change factors to take into account the new algorithm
This commit is contained in:
committed by
Julien Fontanet
parent
8ae432554e
commit
4008934bbb
@@ -15,6 +15,7 @@
|
||||
- [XOA] Notify user when proxies need to be upgraded (PR [#5717](https://github.com/vatesfr/xen-orchestra/pull/5717))
|
||||
- [Host/network] Identify the management network [#5731](https://github.com/vatesfr/xen-orchestra/issues/5731) (PR [#5743](https://github.com/vatesfr/xen-orchestra/pull/5743))
|
||||
- [Backup/S3] Support for HTTP protocol and choice of region (PR [#5658](https://github.com/vatesfr/xen-orchestra/pull/5658))
|
||||
- [Host/Load-balancer] Improve migration (perf mode) regarding memory and cpu usage (PR [#5734](https://github.com/vatesfr/xen-orchestra/pull/5734))
|
||||
|
||||
### Bug fixes
|
||||
|
||||
|
||||
@@ -1,19 +1,12 @@
|
||||
import { filter, find } from 'lodash'
|
||||
import { filter } from 'lodash'
|
||||
|
||||
import Plan from './plan'
|
||||
import { debug } from './utils'
|
||||
|
||||
// Compare a list of objects and give the best.
|
||||
function searchBestObject(objects, fun) {
|
||||
let object = objects[0]
|
||||
|
||||
for (let i = 1; i < objects.length; i++) {
|
||||
if (fun(object, objects[i]) > 0) {
|
||||
object = objects[i]
|
||||
}
|
||||
}
|
||||
|
||||
return object
|
||||
function epsiEqual(a, b, epsi = 0.001) {
|
||||
const absA = Math.abs(a)
|
||||
const absB = Math.abs(b)
|
||||
return Math.abs(a - b) <= Math.min(absA, absB) * epsi || (absA <= epsi && absB <= epsi)
|
||||
}
|
||||
|
||||
// ===================================================================
|
||||
@@ -55,14 +48,7 @@ export default class PerformancePlan extends Plan {
|
||||
}
|
||||
|
||||
const { averages, toOptimize } = results
|
||||
|
||||
toOptimize.sort((a, b) => {
|
||||
a = averages[a.id]
|
||||
b = averages[b.id]
|
||||
|
||||
return b.cpu - a.cpu || a.memoryFree - b.memoryFree
|
||||
})
|
||||
|
||||
toOptimize.sort((a, b) => -this._sortHosts(a, b))
|
||||
for (const exceededHost of toOptimize) {
|
||||
const { id } = exceededHost
|
||||
|
||||
@@ -79,51 +65,68 @@ export default class PerformancePlan extends Plan {
|
||||
}
|
||||
}
|
||||
|
||||
_getThresholdState(averages) {
|
||||
return {
|
||||
cpu: averages.cpu >= this._thresholds.cpu.high,
|
||||
mem: averages.memoryFree <= this._thresholds.memoryFree.high,
|
||||
}
|
||||
}
|
||||
|
||||
_sortHosts(aAverages, bAverages) {
|
||||
const aState = this._getThresholdState(aAverages)
|
||||
const bState = this._getThresholdState(bAverages)
|
||||
|
||||
// A. Same state.
|
||||
if (aState.mem === bState.mem && aState.cpu === bState.cpu) {
|
||||
if (epsiEqual(aAverages.cpu, bAverages.cpu)) {
|
||||
return bAverages.memoryFree - aAverages.memoryFree
|
||||
}
|
||||
return aAverages.cpu - bAverages.cpu
|
||||
}
|
||||
|
||||
// B. No limit reached on A OR both limits reached on B.
|
||||
if ((!aState.mem && !aState.cpu) || (bState.mem && bState.cpu)) {
|
||||
return -1
|
||||
}
|
||||
|
||||
// C. No limit reached on B OR both limits reached on A.
|
||||
if ((!bState.mem && !bState.cpu) || (aState.mem && aState.cpu)) {
|
||||
return 1
|
||||
}
|
||||
|
||||
// D. If only one limit is reached on A AND B, we prefer to migrate on the host with the lowest CPU usage.
|
||||
return !aState.cpu ? -1 : 1
|
||||
}
|
||||
|
||||
async _optimize({ exceededHost, hosts, hostsAverages }) {
|
||||
const vms = filter(this._getAllRunningVms(), vm => vm.$container === exceededHost.id)
|
||||
const vmsAverages = await this._getVmsAverages(vms, { [exceededHost.id]: exceededHost })
|
||||
|
||||
// Sort vms by cpu usage. (lower to higher)
|
||||
vms.sort((a, b) => vmsAverages[b.id].cpu - vmsAverages[a.id].cpu)
|
||||
// Sort vms by cpu usage. (higher to lower) + use memory otherwise.
|
||||
vms.sort((a, b) => {
|
||||
const aAverages = vmsAverages[a.id]
|
||||
const bAverages = vmsAverages[b.id]
|
||||
|
||||
// We use a tolerance to migrate VM with the most memory used.
|
||||
if (epsiEqual(aAverages.cpu, bAverages.cpu, 3)) {
|
||||
return bAverages.memory - aAverages.memory
|
||||
}
|
||||
return bAverages.cpu - aAverages.cpu
|
||||
})
|
||||
|
||||
const exceededAverages = hostsAverages[exceededHost.id]
|
||||
const promises = []
|
||||
|
||||
const xapiSrc = this.xo.getXapi(exceededHost)
|
||||
let optimizationsCount = 0
|
||||
|
||||
const searchFunction = (a, b) => hostsAverages[b.id].cpu - hostsAverages[a.id].cpu
|
||||
let optimizationCount = 0
|
||||
|
||||
for (const vm of vms) {
|
||||
debug(`Trying to migrate ${vm.id}...`)
|
||||
|
||||
// Search host with lower cpu usage in the same pool first. In other pool if necessary.
|
||||
let destination = searchBestObject(
|
||||
find(hosts, host => host.$poolId === vm.$poolId),
|
||||
searchFunction
|
||||
)
|
||||
|
||||
if (!destination) {
|
||||
debug('No destination host found in the current VM pool. Trying in all pools.')
|
||||
destination = searchBestObject(hosts, searchFunction)
|
||||
}
|
||||
|
||||
const destinationAverages = hostsAverages[destination.id]
|
||||
const vmAverages = vmsAverages[vm.id]
|
||||
|
||||
debug(`Trying to migrate VM (${vm.id}) to Host (${destination.id}) from Host (${exceededHost.id})...`)
|
||||
|
||||
// Unable to move the vm.
|
||||
// Stop migration if we are below low threshold.
|
||||
if (
|
||||
exceededAverages.cpu - vmAverages.cpu < destinationAverages.cpu + vmAverages.cpu ||
|
||||
destinationAverages.memoryFree < vmAverages.memory
|
||||
exceededAverages.cpu <= this._thresholds.cpu.low &&
|
||||
exceededAverages.memoryFree >= this._thresholds.memoryFree.low
|
||||
) {
|
||||
debug(`Cannot migrate VM (${vm.id}) to Host (${destination.id}).`)
|
||||
debug(
|
||||
`Src Host CPU=${exceededAverages.cpu}, Dest Host CPU=${destinationAverages.cpu}, VM CPU=${vmAverages.cpu}`
|
||||
)
|
||||
debug(`Dest Host free RAM=${destinationAverages.memoryFree}, VM used RAM=${vmAverages.memory})`)
|
||||
continue
|
||||
return
|
||||
}
|
||||
|
||||
if (!vm.xenTools) {
|
||||
@@ -145,6 +148,49 @@ export default class PerformancePlan extends Plan {
|
||||
}
|
||||
}
|
||||
|
||||
hosts.sort((a, b) => {
|
||||
if (a.$poolId !== b.$poolId) {
|
||||
// Use host in the same pool first. In other pool if necessary.
|
||||
if (a.$poolId === vm.$poolId) {
|
||||
return -1
|
||||
}
|
||||
if (b.$poolId === vm.$poolId) {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
return this._sortHosts(hostsAverages[a.id], hostsAverages[b.id])
|
||||
})
|
||||
|
||||
const destination = hosts[0]
|
||||
|
||||
const destinationAverages = hostsAverages[destination.id]
|
||||
const vmAverages = vmsAverages[vm.id]
|
||||
|
||||
// Unable to move the vm.
|
||||
// Because the performance mode is focused on the CPU usage, we can't migrate if the low threshold
|
||||
// is reached on the destination.
|
||||
// It's not the same idea regarding the memory usage, we can migrate if the low threshold is reached,
|
||||
// but we avoid the migration in the critical (high) threshold case.
|
||||
const state = this._getThresholdState(exceededAverages)
|
||||
if (
|
||||
destinationAverages.cpu + vmAverages.cpu >= this._thresholds.cpu.low ||
|
||||
destinationAverages.memoryFree - vmAverages.memory <= this._thresholds.cpu.high ||
|
||||
(!state.cpu &&
|
||||
!state.memory &&
|
||||
(exceededAverages.cpu - vmAverages.cpu < destinationAverages.cpu + vmAverages.cpu ||
|
||||
exceededAverages.memoryFree + vmAverages.memory > destinationAverages.memoryFree - vmAverages.memory))
|
||||
) {
|
||||
debug(`Cannot migrate VM (${vm.id}) to Host (${destination.id}).`)
|
||||
debug(
|
||||
`Src Host CPU=${exceededAverages.cpu}, Dest Host CPU=${destinationAverages.cpu}, VM CPU=${vmAverages.cpu}`
|
||||
)
|
||||
debug(
|
||||
`Src Host free RAM=${exceededAverages.memoryFree}, Dest Host free RAM=${destinationAverages.memoryFree}, VM used RAM=${vmAverages.memory})`
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
exceededAverages.cpu -= vmAverages.cpu
|
||||
destinationAverages.cpu += vmAverages.cpu
|
||||
|
||||
@@ -152,12 +198,12 @@ export default class PerformancePlan extends Plan {
|
||||
destinationAverages.memoryFree -= vmAverages.memory
|
||||
|
||||
debug(`Migrate VM (${vm.id}) to Host (${destination.id}) from Host (${exceededHost.id}).`)
|
||||
optimizationsCount++
|
||||
optimizationCount++
|
||||
|
||||
promises.push(xapiSrc.migrateVm(vm._xapiId, this.xo.getXapi(destination), destination._xapiId))
|
||||
}
|
||||
|
||||
await Promise.all(promises)
|
||||
debug(`Performance mode: ${optimizationsCount} optimizations for Host (${exceededHost.id}).`)
|
||||
debug(`Performance mode: ${optimizationCount} optimizations for Host (${exceededHost.id}).`)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,14 +9,14 @@ const MINUTES_OF_HISTORICAL_DATA = 30
|
||||
export const DEFAULT_CRITICAL_THRESHOLD_CPU = 90.0
|
||||
|
||||
// Memory threshold in MB.
|
||||
export const DEFAULT_CRITICAL_THRESHOLD_MEMORY_FREE = 64.0
|
||||
export const DEFAULT_CRITICAL_THRESHOLD_MEMORY_FREE = 1000.0
|
||||
|
||||
// Thresholds factors.
|
||||
const HIGH_THRESHOLD_FACTOR = 0.85
|
||||
const LOW_THRESHOLD_FACTOR = 0.25
|
||||
const LOW_THRESHOLD_FACTOR = 0.65
|
||||
|
||||
const HIGH_THRESHOLD_MEMORY_FREE_FACTOR = 1.25
|
||||
const LOW_THRESHOLD_MEMORY_FREE_FACTOR = 20.0
|
||||
const HIGH_THRESHOLD_MEMORY_FREE_FACTOR = 1.2
|
||||
const LOW_THRESHOLD_MEMORY_FREE_FACTOR = 1.5
|
||||
|
||||
const numberOrDefault = (value, def) => (value >= 0 ? value : def)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user