feat(load-balancer): add new anti-affinity mode (#5652)

Fixes #5600
This commit is contained in:
Ronan Abhamon
2021-03-30 17:25:41 +02:00
committed by GitHub
parent efdd196441
commit 9ef05b8afe
6 changed files with 365 additions and 40 deletions

View File

@@ -5,6 +5,8 @@
### Enhancements
- [Host/Load-balancer] Add a new anti-affinity mode (PR [#5652](https://github.com/vatesfr/xen-orchestra/pull/5652))
### Bug fixes
### Packages to release
@@ -26,5 +28,6 @@
- @xen-orchestra/xapi minor
- @xen-orchestra/backups minor
- xo-server-load-balancer minor
- xo-server patch
- xo-web minor

View File

@@ -15,13 +15,19 @@ export default class DensityPlan extends Plan {
}
async execute() {
const results = await this._findHostsToOptimize()
await this._processAntiAffinity()
const hosts = this._getHosts()
const results = await this._getHostStatsAverages({
hosts,
toOptimizeOnly: true,
})
if (!results) {
return
}
const { hosts, toOptimize } = results
const { toOptimize } = results
let { averages: hostsAverages } = results
@@ -94,14 +100,26 @@ export default class DensityPlan extends Plan {
debug(`Try to optimize Host (${hostId}).`)
const vms = await this._getVms(hostId)
const vmsAverages = await this._getVmsAverages(vms, host)
const vms = filter(this._getAllRunningVms(), vm => vm.$container === hostId)
const vmsAverages = await this._getVmsAverages(vms, { [host.id]: host })
for (const vm of vms) {
if (!vm.xenTools) {
debug(`VM (${vm.id}) of Host (${hostId}) does not support pool migration.`)
return
}
for (const tag of vm.tags) {
// TODO: Improve this piece of code. We could compute variance to check if the VM
// is migratable. But the code must be rewritten:
// - All VMs, hosts and stats must be fetched at one place.
// - It's necessary to maintain a dictionary of tags for each host.
// - ...
if (this._antiAffinityTags.includes(tag)) {
debug(`VM (${vm.id}) of Host (${hostId}) cannot be migrated. It contains anti-affinity tag '${tag}'.`)
return
}
}
}
// Sort vms by amount of memory. (+ -> -)

View File

@@ -3,6 +3,7 @@ import { intersection, uniq } from 'lodash'
import DensityPlan from './density-plan'
import PerformancePlan from './performance-plan'
import SimplePlan from './simple-plan'
import { DEFAULT_CRITICAL_THRESHOLD_CPU, DEFAULT_CRITICAL_THRESHOLD_MEMORY_FREE } from './plan'
import { EXECUTION_DELAY, debug } from './utils'
@@ -33,7 +34,7 @@ export const configurationSchema = {
},
mode: {
enum: ['Performance mode', 'Density mode'],
enum: ['Performance mode', 'Density mode', 'Simple mode'],
title: 'Mode',
},
@@ -75,6 +76,17 @@ export const configurationSchema = {
$type: 'Host',
},
},
antiAffinityTags: {
type: 'array',
title: 'Anti-affinity tags',
description: 'list of VM tags to force place VMs on different hosts',
items: {
type: 'string',
$type: 'Tag',
},
},
},
required: ['name', 'mode', 'pools'],
@@ -130,11 +142,15 @@ class LoadBalancerPlugin {
}
this._poolIds = this._poolIds.concat(pools)
this._plans.push(
mode === PERFORMANCE_MODE
? new PerformancePlan(this.xo, name, pools, options)
: new DensityPlan(this.xo, name, pools, options)
)
let plan
if (mode === PERFORMANCE_MODE) {
plan = new PerformancePlan(this.xo, name, pools, options)
} else if (mode === DENSITY_MODE) {
plan = new DensityPlan(this.xo, name, pools, options)
} else {
plan = new SimplePlan(this.xo, name, pools, options)
}
this._plans.push(plan)
}
_executePlans() {

View File

@@ -42,14 +42,19 @@ export default class PerformancePlan extends Plan {
console.error(error)
}
const results = await this._findHostsToOptimize()
await this._processAntiAffinity()
const hosts = this._getHosts()
const results = await this._getHostStatsAverages({
hosts,
toOptimizeOnly: true,
})
if (!results) {
return
}
const { averages, toOptimize } = results
const { hosts } = results
toOptimize.sort((a, b) => {
a = averages[a.id]
@@ -75,8 +80,8 @@ export default class PerformancePlan extends Plan {
}
async _optimize({ exceededHost, hosts, hostsAverages }) {
const vms = await this._getVms(exceededHost.id)
const vmsAverages = await this._getVmsAverages(vms, exceededHost)
const vms = filter(this._getAllRunningVms(), vm => vm.$container === exceededHost.id)
const vmsAverages = await this._getVmsAverages(vms, { [exceededHost.id]: exceededHost })
// Sort vms by cpu usage. (lower to higher)
vms.sort((a, b) => vmsAverages[b.id].cpu - vmsAverages[a.id].cpu)
@@ -121,6 +126,25 @@ export default class PerformancePlan extends Plan {
continue
}
if (!vm.xenTools) {
debug(`VM (${vm.id}) of Host (${exceededHost.id}) does not support pool migration.`)
continue
}
for (const tag of vm.tags) {
// TODO: Improve this piece of code. We could compute variance to check if the VM
// is migratable. But the code must be rewritten:
// - All VMs, hosts and stats must be fetched at one place.
// - It's necessary to maintain a dictionary of tags for each host.
// - ...
if (this._antiAffinityTags.includes(tag)) {
debug(
`VM (${vm.id}) of Host (${exceededHost.id}) cannot be migrated. It contains anti-affinity tag '${tag}'.`
)
continue
}
}
exceededAverages.cpu -= vmAverages.cpu
destinationAverages.cpu += vmAverages.cpu

View File

@@ -1,4 +1,5 @@
import { filter, includes, map as mapToArray, size } from 'lodash'
import { filter, groupBy, includes, isEmpty, keyBy, map as mapToArray, maxBy, minBy, size, sortBy } from 'lodash'
import { inspect } from 'util'
import { EXECUTION_DELAY, debug } from './utils'
@@ -94,7 +95,7 @@ function setRealCpuAverageOfVms(vms, vmsAverages, nCpus) {
// ===================================================================
export default class Plan {
constructor(xo, name, poolIds, { excludedHosts, thresholds } = {}) {
constructor(xo, name, poolIds, { excludedHosts, thresholds, antiAffinityTags } = {}) {
this.xo = xo
this._name = name
this._poolIds = poolIds
@@ -107,6 +108,7 @@ export default class Plan {
critical: numberOrDefault(thresholds && thresholds.memoryFree, DEFAULT_CRITICAL_THRESHOLD_MEMORY_FREE) * 1024,
},
}
this._antiAffinityTags = antiAffinityTags
for (const key in this._thresholds) {
const attr = this._thresholds[key]
@@ -130,36 +132,35 @@ export default class Plan {
// Get hosts to optimize.
// ===================================================================
async _findHostsToOptimize() {
const hosts = this._getHosts()
async _getHostStatsAverages({ hosts, toOptimizeOnly = false }) {
const hostsStats = await this._getHostsStats(hosts, 'minutes')
// Check if a resource's utilization exceeds threshold.
const avgNow = computeResourcesAverage(hosts, hostsStats, EXECUTION_DELAY)
let toOptimize = this._checkResourcesThresholds(hosts, avgNow)
// No resource's utilization problem.
if (toOptimize.length === 0) {
debug('No hosts to optimize.')
return
let toOptimize
if (toOptimizeOnly) {
// Check if a resource utilization exceeds threshold.
toOptimize = this._checkResourcesThresholds(hosts, avgNow)
if (toOptimize.length === 0) {
debug('No hosts to optimize.')
return
}
}
// Check in the last 30 min interval with ratio.
const avgBefore = computeResourcesAverage(hosts, hostsStats, MINUTES_OF_HISTORICAL_DATA)
const avgWithRatio = computeResourcesAverageWithWeight(avgNow, avgBefore, 0.75)
toOptimize = this._checkResourcesThresholds(toOptimize, avgWithRatio)
// No resource's utilization problem.
if (toOptimize.length === 0) {
debug('No hosts to optimize.')
return
if (toOptimizeOnly) {
// Check in the last 30 min interval with ratio.
toOptimize = this._checkResourcesThresholds(toOptimize, avgWithRatio)
if (toOptimize.length === 0) {
debug('No hosts to optimize.')
return
}
}
return {
toOptimize,
averages: avgWithRatio,
hosts,
}
}
@@ -197,11 +198,8 @@ export default class Plan {
)
}
async _getVms(hostId) {
return filter(
this.xo.getObjects(),
object => object.type === 'VM' && object.power_state === 'Running' && object.$container === hostId
)
_getAllRunningVms() {
return filter(this.xo.getObjects(), object => object.type === 'VM' && object.power_state === 'Running')
}
// ===================================================================
@@ -244,7 +242,7 @@ export default class Plan {
return vmsStats
}
async _getVmsAverages(vms, host) {
async _getVmsAverages(vms, hosts) {
const vmsStats = await this._getVmsStats(vms, 'minutes')
const vmsAverages = computeResourcesAverageWithWeight(
computeResourcesAverage(vms, vmsStats, EXECUTION_DELAY),
@@ -253,8 +251,265 @@ export default class Plan {
)
// Compute real CPU usage. Virtuals cpus to reals cpus.
setRealCpuAverageOfVms(vms, vmsAverages, host.CPUs.cpu_count)
for (const [hostId, hostVms] of Object.entries(groupBy(vms, '$container'))) {
setRealCpuAverageOfVms(hostVms, vmsAverages, hosts[hostId].CPUs.cpu_count)
}
return vmsAverages
}
// ===================================================================
// Anti-affinity helpers
// ===================================================================
async _processAntiAffinity() {
if (!this._antiAffinityTags.length) {
return
}
const allHosts = await this._getHosts()
if (allHosts.length <= 1) {
return
}
const idToHost = keyBy(allHosts, 'id')
const allVms = filter(this._getAllRunningVms(), vm => vm.$container in idToHost)
const taggedHosts = this._getAntiAffinityTaggedHosts(allHosts, allVms)
// 1. Check if we must migrate VMs...
const tagsDiff = {}
for (const watchedTag of this._antiAffinityTags) {
const getCount = fn => fn(taggedHosts.hosts, host => host.tags[watchedTag]).tags[watchedTag]
const diff = getCount(maxBy) - getCount(minBy)
if (diff > 1) {
tagsDiff[watchedTag] = diff - 1
}
}
if (isEmpty(tagsDiff)) {
return
}
// 2. Migrate!
debug('Try to apply anti-affinity policy.')
debug(`VM tag count per host: ${inspect(taggedHosts, { depth: null })}.`)
debug(`Tags diff: ${inspect(tagsDiff, { depth: null })}.`)
const vmsAverages = await this._getVmsAverages(allVms, idToHost)
const { averages: hostsAverages } = await this._getHostStatsAverages({ hosts: allHosts })
debug(`Hosts averages: ${inspect(hostsAverages, { depth: null })}.`)
const promises = []
for (const tag in tagsDiff) {
promises.push(...this._processAntiAffinityTag({ tag, vmsAverages, hostsAverages, taggedHosts, idToHost }))
}
// 3. Done!
debug(`VM tag count per host after migration: ${inspect(taggedHosts, { depth: null })}.`)
return Promise.all(promises)
}
_processAntiAffinityTag({ tag, vmsAverages, hostsAverages, taggedHosts, idToHost }) {
const promises = []
while (true) {
// 1. Find source host from which to migrate.
const sources = sortBy(
filter(taggedHosts.hosts, host => host.tags[tag] > 1),
[
host => host.tags[tag],
// Find host with the most memory used. Don't forget the "-". ;)
host => -hostsAverages[host.id].memoryFree,
]
)
for (let sourceIndex = sources.length; sourceIndex >= 0; --sourceIndex) {
if (sourceIndex === 0) {
return promises // Nothing to migrate or we can't.
}
const sourceHost = sources[sourceIndex - 1]
// 2. Find destination host.
const destinations = sortBy(
filter(taggedHosts.hosts, host => host.id !== sourceHost.id && host.tags[tag] + 1 < sourceHost.tags[tag]),
[
host => host.tags[tag],
// Ideally it would be interesting to migrate in the same pool.
host => host.poolId !== sourceHost.poolId,
// Find host with the least memory used. Don't forget the "-". ;)
host => -hostsAverages[host.id].memoryFree,
]
)
if (!destinations.length) {
return promises // Cannot find a valid destination.
}
// Build VM list to migrate.
// We try to migrate VMs with the targeted tag.
const sourceVms = filter(sourceHost.vms, vm => vm.tags.includes(tag))
let destinationHost
let vm
for (const destination of destinations) {
destinationHost = destination
debug(`Host candidate: ${sourceHost.id} -> ${destinationHost.id}.`)
const vms = filter(sourceVms, vm => hostsAverages[destinationHost.id].memoryFree >= vmsAverages[vm.id].memory)
debug(
`Tagged VM ("${tag}") candidates to migrate from host ${sourceHost.id}: ${inspect(mapToArray(vms, 'id'))}.`
)
vm = this._getAntiAffinityVmToMigrate({
vms,
vmsAverages,
hostsAverages,
taggedHosts,
sourceHost,
destinationHost,
})
if (vm) {
break
}
}
if (!vm) {
continue // If we can't find a VM to migrate, we must try with another source!
}
debug(`Migrate VM (${vm.id}) to Host (${destinationHost.id}) from Host (${sourceHost.id}).`)
// 3. Update tags and averages.
// This update can change the source host for the next migration.
for (const tag of vm.tags) {
if (this._antiAffinityTags.includes(tag)) {
sourceHost.tags[tag]--
destinationHost.tags[tag]++
}
}
const destinationAverages = hostsAverages[destinationHost.id]
const vmAverages = vmsAverages[vm.id]
destinationAverages.cpu += vmAverages.cpu
destinationAverages.memoryFree -= vmAverages.memory
delete sourceHost.vms[vm.id]
// 4. Migrate.
const destination = idToHost[destinationHost.id]
promises.push(
this.xo
.getXapi(idToHost[sourceHost.id])
.migrateVm(vm._xapiId, this.xo.getXapi(destination), destination._xapiId)
)
break // Continue with the same tag, the source can be different.
}
}
}
_getAntiAffinityTaggedHosts(hosts, vms) {
const tagCount = {}
for (const tag of this._antiAffinityTags) {
tagCount[tag] = 0
}
const taggedHosts = {}
for (const host of hosts) {
const tags = {}
for (const tag of this._antiAffinityTags) {
tags[tag] = 0
}
const taggedHost = (taggedHosts[host.id] = {
id: host.id,
poolId: host.$poolId,
tags,
vms: {},
})
// Hide properties when util.inspect is used.
Object.defineProperties(taggedHost, {
poolId: { enumerable: false },
vms: { enumerable: false }
})
}
for (const vm of vms) {
const hostId = vm.$container
if (!(hostId in taggedHosts)) {
continue
}
const taggedHost = taggedHosts[hostId]
for (const tag of vm.tags) {
if (this._antiAffinityTags.includes(tag)) {
tagCount[tag]++
taggedHost.tags[tag]++
taggedHost.vms[vm.id] = vm
}
}
}
return { tagCount, hosts: Object.values(taggedHosts) }
}
_computeAntiAffinityVariance(taggedHosts) {
// See: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
let variance = 0
const { hosts } = taggedHosts
for (const tag in taggedHosts.tagCount) {
const k = hosts[0].tags[tag]
let ex = 0
let ex2 = 0
for (const host of hosts) {
const x = host.tags[tag]
const diff = x - k
ex += diff
ex2 += diff * diff
}
const n = hosts.length
variance += (ex2 - (ex * ex) / n) / n
}
return variance
}
_getAntiAffinityVmToMigrate({ vms, vmsAverages, hostsAverages, taggedHosts, sourceHost, destinationHost }) {
let bestVariance = this._computeAntiAffinityVariance(taggedHosts)
let bestVm
for (const vm of vms) {
const vmTags = filter(vm.tags, tag => this._antiAffinityTags.includes(tag))
for (const tag of vmTags) {
sourceHost.tags[tag]--
destinationHost.tags[tag]++
}
const variance = this._computeAntiAffinityVariance(taggedHosts)
for (const tag of vmTags) {
sourceHost.tags[tag]++
destinationHost.tags[tag]--
}
if (variance < bestVariance) {
if (vm.xenTools) {
bestVariance = variance
bestVm = vm
} else {
debug(`VM (${vm.id}) of Host (${sourceHost.id}) does not support pool migration.`)
}
}
}
return bestVm
}
}

View File

@@ -0,0 +1,9 @@
import Plan from './plan'
// ===================================================================
export default class SimplePlan extends Plan {
async execute() {
await this._processAntiAffinity()
}
}