Allow NUMA nodes without vCPUs

QEMU allows creating NUMA nodes that have memory only.
These are somehow important for HMAT.

With check done in qemuValidateDomainDef() for QEMU 2.7 or newer
(checked via QEMU_CAPS_NUMA), we can be sure that the vCPUs are
fully assigned to NUMA nodes in domain XML.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
This commit is contained in:
Michal Privoznik 2020-05-27 13:42:22 +02:00
parent 1050c6beb1
commit a26f61ee0c
11 changed files with 149 additions and 56 deletions

View File

@ -1840,6 +1840,8 @@
consistent across qemu and libvirt versions. consistent across qemu and libvirt versions.
<code>memory</code> specifies the node memory <code>memory</code> specifies the node memory
in kibibytes (i.e. blocks of 1024 bytes). in kibibytes (i.e. blocks of 1024 bytes).
<span class="since">Since 6.6.0</span> the <code>cpus</code> attribute
is optional and if omitted a CPU-less NUMA node is created.
<span class="since">Since 1.2.11</span> one can use an additional <a <span class="since">Since 1.2.11</span> one can use an additional <a
href="#elementsMemoryAllocation"><code>unit</code></a> attribute to href="#elementsMemoryAllocation"><code>unit</code></a> attribute to
define units in which <code>memory</code> is specified. define units in which <code>memory</code> is specified.

View File

@ -115,9 +115,11 @@
<ref name="unsignedInt"/> <ref name="unsignedInt"/>
</attribute> </attribute>
</optional> </optional>
<optional>
<attribute name="cpus"> <attribute name="cpus">
<ref name="cpuset"/> <ref name="cpuset"/>
</attribute> </attribute>
</optional>
<attribute name="memory"> <attribute name="memory">
<ref name="memoryKB"/> <ref name="memoryKB"/>
</attribute> </attribute>

View File

@ -889,32 +889,28 @@ virDomainNumaDefParseXML(virDomainNumaPtr def,
} }
VIR_FREE(tmp); VIR_FREE(tmp);
if (def->mem_nodes[cur_cell].cpumask) { if (def->mem_nodes[cur_cell].mem) {
virReportError(VIR_ERR_XML_ERROR, virReportError(VIR_ERR_XML_ERROR,
_("Duplicate NUMA cell info for cell id '%u'"), _("Duplicate NUMA cell info for cell id '%u'"),
cur_cell); cur_cell);
goto cleanup; goto cleanup;
} }
if (!(tmp = virXMLPropString(nodes[i], "cpus"))) { if ((tmp = virXMLPropString(nodes[i], "cpus"))) {
virReportError(VIR_ERR_XML_ERROR, "%s", g_autoptr(virBitmap) cpumask = NULL;
_("Missing 'cpus' attribute in NUMA cell"));
goto cleanup;
}
if (virBitmapParse(tmp, &def->mem_nodes[cur_cell].cpumask, if (virBitmapParse(tmp, &cpumask, VIR_DOMAIN_CPUMASK_LEN) < 0)
VIR_DOMAIN_CPUMASK_LEN) < 0)
goto cleanup; goto cleanup;
if (virBitmapIsAllClear(def->mem_nodes[cur_cell].cpumask)) { if (!virBitmapIsAllClear(cpumask))
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, def->mem_nodes[cur_cell].cpumask = g_steal_pointer(&cpumask);
_("NUMA cell %d has no vCPUs assigned"), cur_cell);
goto cleanup;
}
VIR_FREE(tmp); VIR_FREE(tmp);
}
for (j = 0; j < n; j++) { for (j = 0; j < n; j++) {
if (j == cur_cell || !def->mem_nodes[j].cpumask) if (j == cur_cell ||
!def->mem_nodes[j].cpumask ||
!def->mem_nodes[cur_cell].cpumask)
continue; continue;
if (virBitmapOverlaps(def->mem_nodes[j].cpumask, if (virBitmapOverlaps(def->mem_nodes[j].cpumask,
@ -975,7 +971,6 @@ virDomainNumaDefFormatXML(virBufferPtr buf,
{ {
virDomainMemoryAccess memAccess; virDomainMemoryAccess memAccess;
virTristateBool discard; virTristateBool discard;
char *cpustr;
size_t ncells = virDomainNumaGetNodeCount(def); size_t ncells = virDomainNumaGetNodeCount(def);
size_t i; size_t i;
@ -985,17 +980,22 @@ virDomainNumaDefFormatXML(virBufferPtr buf,
virBufferAddLit(buf, "<numa>\n"); virBufferAddLit(buf, "<numa>\n");
virBufferAdjustIndent(buf, 2); virBufferAdjustIndent(buf, 2);
for (i = 0; i < ncells; i++) { for (i = 0; i < ncells; i++) {
virBitmapPtr cpumask = virDomainNumaGetNodeCpumask(def, i);
int ndistances; int ndistances;
memAccess = virDomainNumaGetNodeMemoryAccessMode(def, i); memAccess = virDomainNumaGetNodeMemoryAccessMode(def, i);
discard = virDomainNumaGetNodeDiscard(def, i); discard = virDomainNumaGetNodeDiscard(def, i);
if (!(cpustr = virBitmapFormat(virDomainNumaGetNodeCpumask(def, i))))
return -1;
virBufferAddLit(buf, "<cell"); virBufferAddLit(buf, "<cell");
virBufferAsprintf(buf, " id='%zu'", i); virBufferAsprintf(buf, " id='%zu'", i);
if (cpumask) {
g_autofree char *cpustr = virBitmapFormat(cpumask);
if (!cpustr)
return -1;
virBufferAsprintf(buf, " cpus='%s'", cpustr); virBufferAsprintf(buf, " cpus='%s'", cpustr);
}
virBufferAsprintf(buf, " memory='%llu'", virBufferAsprintf(buf, " memory='%llu'",
virDomainNumaGetNodeMemorySize(def, i)); virDomainNumaGetNodeMemorySize(def, i));
virBufferAddLit(buf, " unit='KiB'"); virBufferAddLit(buf, " unit='KiB'");
@ -1031,8 +1031,6 @@ virDomainNumaDefFormatXML(virBufferPtr buf,
virBufferAdjustIndent(buf, -2); virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</cell>\n"); virBufferAddLit(buf, "</cell>\n");
} }
VIR_FREE(cpustr);
} }
virBufferAdjustIndent(buf, -2); virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</numa>\n"); virBufferAddLit(buf, "</numa>\n");
@ -1047,8 +1045,12 @@ virDomainNumaGetCPUCountTotal(virDomainNumaPtr numa)
size_t i; size_t i;
unsigned int ret = 0; unsigned int ret = 0;
for (i = 0; i < numa->nmem_nodes; i++) for (i = 0; i < numa->nmem_nodes; i++) {
ret += virBitmapCountBits(virDomainNumaGetNodeCpumask(numa, i)); virBitmapPtr cpumask = virDomainNumaGetNodeCpumask(numa, i);
if (cpumask)
ret += virBitmapCountBits(cpumask);
}
return ret; return ret;
} }
@ -1060,12 +1062,15 @@ virDomainNumaGetMaxCPUID(virDomainNumaPtr numa)
unsigned int ret = 0; unsigned int ret = 0;
for (i = 0; i < numa->nmem_nodes; i++) { for (i = 0; i < numa->nmem_nodes; i++) {
virBitmapPtr cpumask = virDomainNumaGetNodeCpumask(numa, i);
int bit; int bit;
bit = virBitmapLastSetBit(virDomainNumaGetNodeCpumask(numa, i)); if (cpumask) {
bit = virBitmapLastSetBit(cpumask);
if (bit > ret) if (bit > ret)
ret = bit; ret = bit;
} }
}
return ret; return ret;
} }

View File

@ -1444,19 +1444,21 @@ xenFormatXLVnuma(virConfValuePtr list,
{ {
int ret = -1; int ret = -1;
size_t i; size_t i;
virBuffer buf = VIR_BUFFER_INITIALIZER; virBuffer buf = VIR_BUFFER_INITIALIZER;
virConfValuePtr numaVnode, tmp; virConfValuePtr numaVnode, tmp;
virBitmapPtr cpumask = virDomainNumaGetNodeCpumask(numa, node);
size_t nodeSize = virDomainNumaGetNodeMemorySize(numa, node) / 1024; size_t nodeSize = virDomainNumaGetNodeMemorySize(numa, node) / 1024;
char *nodeVcpus = virBitmapFormat(virDomainNumaGetNodeCpumask(numa, node)); g_autofree char *nodeVcpus = NULL;
if (VIR_ALLOC(numaVnode) < 0) if (!cpumask ||
VIR_ALLOC(numaVnode) < 0)
goto cleanup; goto cleanup;
numaVnode->type = VIR_CONF_LIST; numaVnode->type = VIR_CONF_LIST;
numaVnode->list = NULL; numaVnode->list = NULL;
nodeVcpus = virBitmapFormat(cpumask);
/* pnode */ /* pnode */
virBufferAsprintf(&buf, "pnode=%zu", node); virBufferAsprintf(&buf, "pnode=%zu", node);
xenFormatXLVnode(numaVnode, &buf); xenFormatXLVnode(numaVnode, &buf);

View File

@ -7124,8 +7124,6 @@ qemuBuildNumaCommandLine(virQEMUDriverConfigPtr cfg,
size_t i, j; size_t i, j;
virQEMUCapsPtr qemuCaps = priv->qemuCaps; virQEMUCapsPtr qemuCaps = priv->qemuCaps;
g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
char *cpumask = NULL;
char *tmpmask = NULL;
char *next = NULL; char *next = NULL;
virBufferPtr nodeBackends = NULL; virBufferPtr nodeBackends = NULL;
bool needBackend = false; bool needBackend = false;
@ -7170,9 +7168,7 @@ qemuBuildNumaCommandLine(virQEMUDriverConfigPtr cfg,
goto cleanup; goto cleanup;
for (i = 0; i < ncells; i++) { for (i = 0; i < ncells; i++) {
VIR_FREE(cpumask); virBitmapPtr cpumask = virDomainNumaGetNodeCpumask(def->numa, i);
if (!(cpumask = virBitmapFormat(virDomainNumaGetNodeCpumask(def->numa, i))))
goto cleanup;
if (needBackend) { if (needBackend) {
virCommandAddArg(cmd, "-object"); virCommandAddArg(cmd, "-object");
@ -7182,12 +7178,20 @@ qemuBuildNumaCommandLine(virQEMUDriverConfigPtr cfg,
virCommandAddArg(cmd, "-numa"); virCommandAddArg(cmd, "-numa");
virBufferAsprintf(&buf, "node,nodeid=%zu", i); virBufferAsprintf(&buf, "node,nodeid=%zu", i);
for (tmpmask = cpumask; tmpmask; tmpmask = next) { if (cpumask) {
g_autofree char *cpumaskStr = NULL;
char *tmpmask;
if (!(cpumaskStr = virBitmapFormat(cpumask)))
goto cleanup;
for (tmpmask = cpumaskStr; tmpmask; tmpmask = next) {
if ((next = strchr(tmpmask, ','))) if ((next = strchr(tmpmask, ',')))
*(next++) = '\0'; *(next++) = '\0';
virBufferAddLit(&buf, ",cpus="); virBufferAddLit(&buf, ",cpus=");
virBufferAdd(&buf, tmpmask, -1); virBufferAdd(&buf, tmpmask, -1);
} }
}
if (needBackend) if (needBackend)
virBufferAsprintf(&buf, ",memdev=ram-node%zu", i); virBufferAsprintf(&buf, ",memdev=ram-node%zu", i);
@ -7217,8 +7221,6 @@ qemuBuildNumaCommandLine(virQEMUDriverConfigPtr cfg,
ret = 0; ret = 0;
cleanup: cleanup:
VIR_FREE(cpumask);
if (nodeBackends) { if (nodeBackends) {
for (i = 0; i < ncells; i++) for (i = 0; i < ncells; i++)
virBufferFreeAndReset(&nodeBackends[i]); virBufferFreeAndReset(&nodeBackends[i]);

View File

@ -654,7 +654,7 @@ qemuValidateDomainDefNuma(const virDomainDef *def,
} }
for (i = 0; i < ncells; i++) { for (i = 0; i < ncells; i++) {
g_autofree char * cpumask = NULL; virBitmapPtr cpumask = virDomainNumaGetNodeCpumask(def->numa, i);
if (!hasMemoryCap && if (!hasMemoryCap &&
virDomainNumaGetNodeMemoryAccessMode(def->numa, i)) { virDomainNumaGetNodeMemoryAccessMode(def->numa, i)) {
@ -664,17 +664,19 @@ qemuValidateDomainDefNuma(const virDomainDef *def,
return -1; return -1;
} }
if (!(cpumask = virBitmapFormat(virDomainNumaGetNodeCpumask(def->numa, i)))) if (cpumask) {
g_autofree char * cpumaskStr = NULL;
if (!(cpumaskStr = virBitmapFormat(cpumask)))
return -1; return -1;
if (strchr(cpumask, ',') && if (strchr(cpumaskStr, ',') &&
!virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA)) { !virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA)) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("disjoint NUMA cpu ranges are not supported " _("disjoint NUMA cpu ranges are not supported "
"with this QEMU")); "with this QEMU"));
return -1; return -1;
} }
}
} }
if (virDomainNumaNodesDistancesAreBeingSet(def->numa) && if (virDomainNumaNodesDistancesAreBeingSet(def->numa) &&

View File

@ -0,0 +1,33 @@
LC_ALL=C \
PATH=/bin \
HOME=/tmp/lib/domain--1-QEMUGuest \
USER=test \
LOGNAME=test \
XDG_DATA_HOME=/tmp/lib/domain--1-QEMUGuest/.local/share \
XDG_CACHE_HOME=/tmp/lib/domain--1-QEMUGuest/.cache \
XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest/.config \
QEMU_AUDIO_DRV=none \
/usr/bin/qemu-system-x86_64 \
-name QEMUGuest \
-S \
-machine pc,accel=tcg,usb=off,dump-guest-core=off \
-m 12288 \
-realtime mlock=off \
-smp 12,sockets=12,cores=1,threads=1 \
-numa node,nodeid=0,cpus=0-3,mem=2048 \
-numa node,nodeid=1,cpus=4-7,mem=2048 \
-numa node,nodeid=2,cpus=8-11,mem=2048 \
-numa node,nodeid=3,mem=2048 \
-numa node,nodeid=4,mem=2048 \
-numa node,nodeid=5,mem=2048 \
-uuid c7a5fdb2-cdaf-9455-926a-d65c16db1809 \
-display none \
-no-user-config \
-nodefaults \
-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest/monitor.sock,\
server,nowait \
-mon chardev=charmonitor,id=monitor,mode=control \
-rtc base=utc \
-no-shutdown \
-usb \
-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3

View File

@ -0,0 +1,42 @@
<domain type='qemu'>
<name>QEMUGuest</name>
<uuid>c7a5fdb2-cdaf-9455-926a-d65c16db1809</uuid>
<memory unit='KiB'>12582912</memory>
<currentMemory unit='KiB'>12582912</currentMemory>
<vcpu placement='static'>12</vcpu>
<os>
<type arch='x86_64' machine='pc'>hvm</type>
<boot dev='hd'/>
</os>
<features>
<acpi/>
<apic/>
<pae/>
</features>
<cpu>
<numa>
<cell id='0' cpus='0-3' memory='2097152' unit='KiB'/>
<cell id='1' cpus='4-7' memory='2097152' unit='KiB'/>
<cell id='2' cpus='8-11' memory='2097152' unit='KiB'/>
<cell id='3' memory='2097152' unit='KiB'/>
<cell id='4' memory='2097152' unit='KiB'/>
<cell id='5' memory='2097152' unit='KiB'/>
</numa>
</cpu>
<clock offset='utc'/>
<on_poweroff>destroy</on_poweroff>
<on_reboot>restart</on_reboot>
<on_crash>restart</on_crash>
<devices>
<emulator>/usr/bin/qemu-system-x86_64</emulator>
<controller type='usb' index='0'>
<address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/>
</controller>
<controller type='pci' index='0' model='pci-root'/>
<input type='mouse' bus='ps2'/>
<input type='keyboard' bus='ps2'/>
<memballoon model='virtio'>
<address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/>
</memballoon>
</devices>
</domain>

View File

@ -1945,6 +1945,7 @@ mymain(void)
DO_TEST_PARSE_ERROR("numatune-memnode-no-memory", NONE); DO_TEST_PARSE_ERROR("numatune-memnode-no-memory", NONE);
DO_TEST("numatune-distances", QEMU_CAPS_NUMA, QEMU_CAPS_NUMA_DIST); DO_TEST("numatune-distances", QEMU_CAPS_NUMA, QEMU_CAPS_NUMA_DIST);
DO_TEST("numatune-no-vcpu", NONE);
DO_TEST("numatune-auto-nodeset-invalid", NONE); DO_TEST("numatune-auto-nodeset-invalid", NONE);
DO_TEST("numatune-auto-prefer", QEMU_CAPS_OBJECT_MEMORY_RAM, DO_TEST("numatune-auto-prefer", QEMU_CAPS_OBJECT_MEMORY_RAM,

View File

@ -0,0 +1 @@
../qemuxml2argvdata/numatune-no-vcpu.xml

View File

@ -1126,6 +1126,7 @@ mymain(void)
DO_TEST("numatune-memnode", QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("numatune-memnode", QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_FILE);
DO_TEST("numatune-memnode-no-memory", QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("numatune-memnode-no-memory", QEMU_CAPS_OBJECT_MEMORY_FILE);
DO_TEST("numatune-distances", QEMU_CAPS_NUMA, QEMU_CAPS_NUMA_DIST); DO_TEST("numatune-distances", QEMU_CAPS_NUMA, QEMU_CAPS_NUMA_DIST);
DO_TEST("numatune-no-vcpu", QEMU_CAPS_NUMA);
DO_TEST("bios-nvram", NONE); DO_TEST("bios-nvram", NONE);
DO_TEST("bios-nvram-os-interleave", NONE); DO_TEST("bios-nvram-os-interleave", NONE);