mirror of
https://github.com/libvirt/libvirt.git
synced 2025-02-25 18:55:26 -06:00
qemu_domain: add a PPC64 memLockLimit helper
There is a lot of documentation in the comments about how PPC64 handles passthrough VFIO devices to calculate the @memLockLimit. And more will be added with the PPC64 NVLink2 support code. Let's remove the PPC64 code from qemuDomainGetMemLockLimitBytes() body and put it into a helper function. This will simplify the flow of qemuDomainGetMemLockLimitBytes() that handles all the other platforms and improves readability of the PPC64 specifics. Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> Reviewed-by: Erik Skultety <eskultet@redhat.com>
This commit is contained in:
parent
cf7c521287
commit
7a686fd2ea
@ -10343,6 +10343,97 @@ qemuDomainUpdateCurrentMemorySize(virDomainObjPtr vm)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* getPPC64MemLockLimitBytes:
|
||||||
|
* @def: domain definition
|
||||||
|
*
|
||||||
|
* A PPC64 helper that calculates the memory locking limit in order for
|
||||||
|
* the guest to operate properly.
|
||||||
|
*/
|
||||||
|
static unsigned long long
|
||||||
|
getPPC64MemLockLimitBytes(virDomainDefPtr def)
|
||||||
|
{
|
||||||
|
unsigned long long memKB = 0;
|
||||||
|
unsigned long long baseLimit = 0;
|
||||||
|
unsigned long long memory = 0;
|
||||||
|
unsigned long long maxMemory = 0;
|
||||||
|
unsigned long long passthroughLimit = 0;
|
||||||
|
size_t i, nPCIHostBridges = 0;
|
||||||
|
bool usesVFIO = false;
|
||||||
|
|
||||||
|
for (i = 0; i < def->ncontrollers; i++) {
|
||||||
|
virDomainControllerDefPtr cont = def->controllers[i];
|
||||||
|
|
||||||
|
if (!virDomainControllerIsPSeriesPHB(cont))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nPCIHostBridges++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < def->nhostdevs; i++) {
|
||||||
|
virDomainHostdevDefPtr dev = def->hostdevs[i];
|
||||||
|
|
||||||
|
if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
|
||||||
|
dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
|
||||||
|
dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
|
||||||
|
usesVFIO = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
memory = virDomainDefGetMemoryTotal(def);
|
||||||
|
|
||||||
|
if (def->mem.max_memory)
|
||||||
|
maxMemory = def->mem.max_memory;
|
||||||
|
else
|
||||||
|
maxMemory = memory;
|
||||||
|
|
||||||
|
/* baseLimit := maxMemory / 128 (a)
|
||||||
|
* + 4 MiB * #PHBs + 8 MiB (b)
|
||||||
|
*
|
||||||
|
* (a) is the hash table
|
||||||
|
*
|
||||||
|
* (b) is accounting for the 32-bit DMA window - it could be either the
|
||||||
|
* KVM accelerated TCE tables for emulated devices, or the VFIO
|
||||||
|
* userspace view. The 4 MiB per-PHB (including the default one) covers
|
||||||
|
* a 2GiB DMA window: default is 1GiB, but it's possible it'll be
|
||||||
|
* increased to help performance. The 8 MiB extra should be plenty for
|
||||||
|
* the TCE table index for any reasonable number of PHBs and several
|
||||||
|
* spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */
|
||||||
|
baseLimit = maxMemory / 128 +
|
||||||
|
4096 * nPCIHostBridges +
|
||||||
|
8192;
|
||||||
|
|
||||||
|
/* passthroughLimit := max( 2 GiB * #PHBs, (c)
|
||||||
|
* memory (d)
|
||||||
|
* + memory * 1/512 * #PHBs + 8 MiB ) (e)
|
||||||
|
*
|
||||||
|
* (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB
|
||||||
|
* rather than 1 GiB
|
||||||
|
*
|
||||||
|
* (d) is the with-DDW (and memory pre-registration and related
|
||||||
|
* features) DMA window accounting - assuming that we only account RAM
|
||||||
|
* once, even if mapped to multiple PHBs
|
||||||
|
*
|
||||||
|
* (e) is the with-DDW userspace view and overhead for the 64-bit DMA
|
||||||
|
* window. This is based a bit on expected guest behaviour, but there
|
||||||
|
* really isn't a way to completely avoid that. We assume the guest
|
||||||
|
* requests a 64-bit DMA window (per PHB) just big enough to map all
|
||||||
|
* its RAM. 4 kiB page size gives the 1/512; it will be less with 64
|
||||||
|
* kiB pages, less still if the guest is mapped with hugepages (unlike
|
||||||
|
* the default 32-bit DMA window, DDW windows can use large IOMMU
|
||||||
|
* pages). 8 MiB is for second and further level overheads, like (b) */
|
||||||
|
if (usesVFIO)
|
||||||
|
passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges,
|
||||||
|
memory +
|
||||||
|
memory / 512 * nPCIHostBridges + 8192);
|
||||||
|
|
||||||
|
memKB = baseLimit + passthroughLimit;
|
||||||
|
|
||||||
|
return memKB << 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* qemuDomainGetMemLockLimitBytes:
|
* qemuDomainGetMemLockLimitBytes:
|
||||||
* @def: domain definition
|
* @def: domain definition
|
||||||
@ -10374,84 +10465,8 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def)
|
|||||||
if (def->mem.locked)
|
if (def->mem.locked)
|
||||||
return VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
|
return VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
|
||||||
|
|
||||||
if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM) {
|
if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM)
|
||||||
unsigned long long maxMemory;
|
return getPPC64MemLockLimitBytes(def);
|
||||||
unsigned long long memory;
|
|
||||||
unsigned long long baseLimit;
|
|
||||||
unsigned long long passthroughLimit = 0;
|
|
||||||
size_t nPCIHostBridges = 0;
|
|
||||||
bool usesVFIO = false;
|
|
||||||
|
|
||||||
for (i = 0; i < def->ncontrollers; i++) {
|
|
||||||
virDomainControllerDefPtr cont = def->controllers[i];
|
|
||||||
|
|
||||||
if (!virDomainControllerIsPSeriesPHB(cont))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
nPCIHostBridges++;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < def->nhostdevs; i++) {
|
|
||||||
virDomainHostdevDefPtr dev = def->hostdevs[i];
|
|
||||||
|
|
||||||
if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
|
|
||||||
dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
|
|
||||||
dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
|
|
||||||
usesVFIO = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
memory = virDomainDefGetMemoryTotal(def);
|
|
||||||
|
|
||||||
if (def->mem.max_memory)
|
|
||||||
maxMemory = def->mem.max_memory;
|
|
||||||
else
|
|
||||||
maxMemory = memory;
|
|
||||||
|
|
||||||
/* baseLimit := maxMemory / 128 (a)
|
|
||||||
* + 4 MiB * #PHBs + 8 MiB (b)
|
|
||||||
*
|
|
||||||
* (a) is the hash table
|
|
||||||
*
|
|
||||||
* (b) is accounting for the 32-bit DMA window - it could be either the
|
|
||||||
* KVM accelerated TCE tables for emulated devices, or the VFIO
|
|
||||||
* userspace view. The 4 MiB per-PHB (including the default one) covers
|
|
||||||
* a 2GiB DMA window: default is 1GiB, but it's possible it'll be
|
|
||||||
* increased to help performance. The 8 MiB extra should be plenty for
|
|
||||||
* the TCE table index for any reasonable number of PHBs and several
|
|
||||||
* spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */
|
|
||||||
baseLimit = maxMemory / 128 +
|
|
||||||
4096 * nPCIHostBridges +
|
|
||||||
8192;
|
|
||||||
|
|
||||||
/* passthroughLimit := max( 2 GiB * #PHBs, (c)
|
|
||||||
* memory (d)
|
|
||||||
* + memory * 1/512 * #PHBs + 8 MiB ) (e)
|
|
||||||
*
|
|
||||||
* (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB
|
|
||||||
* rather than 1 GiB
|
|
||||||
*
|
|
||||||
* (d) is the with-DDW (and memory pre-registration and related
|
|
||||||
* features) DMA window accounting - assuming that we only account RAM
|
|
||||||
* once, even if mapped to multiple PHBs
|
|
||||||
*
|
|
||||||
* (e) is the with-DDW userspace view and overhead for the 64-bit DMA
|
|
||||||
* window. This is based a bit on expected guest behaviour, but there
|
|
||||||
* really isn't a way to completely avoid that. We assume the guest
|
|
||||||
* requests a 64-bit DMA window (per PHB) just big enough to map all
|
|
||||||
* its RAM. 4 kiB page size gives the 1/512; it will be less with 64
|
|
||||||
* kiB pages, less still if the guest is mapped with hugepages (unlike
|
|
||||||
* the default 32-bit DMA window, DDW windows can use large IOMMU
|
|
||||||
* pages). 8 MiB is for second and further level overheads, like (b) */
|
|
||||||
if (usesVFIO)
|
|
||||||
passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges,
|
|
||||||
memory +
|
|
||||||
memory / 512 * nPCIHostBridges + 8192);
|
|
||||||
|
|
||||||
memKB = baseLimit + passthroughLimit;
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* For device passthrough using VFIO the guest memory and MMIO memory
|
/* For device passthrough using VFIO the guest memory and MMIO memory
|
||||||
* regions need to be locked persistent in order to allow DMA.
|
* regions need to be locked persistent in order to allow DMA.
|
||||||
|
Loading…
Reference in New Issue
Block a user