On Thu, Dec 03, 2020 at 13:36:23 +0100, Michal Privoznik wrote: > QEMU gained this new virtio-mem model. It's similar to pc-dimm > in a sense that guest uses it as memory, but in a sense very > different from it as it can dynamically change allocation, > without need for hotplug. More specifically, the device has two > attributes more (it has more of course, but these two are > important here): > > 1) block-size - the granularity of the device. You can imagine > the device being divided into blocks, each 'block-size' long. > > 2) requested-size - the portion of the device that is in use by > the guest. > > And it all works like this: at guest startup/hotplug both > block-size and requested-size are specified. When sysadmin wants > to give some more memory to the guest, or take some back, they > change the 'requested-size' attribute which is propagated to the > guest where virtio-mem module takes corresponding action. > This means, that 'requested-size' must be a whole number product > of 'block-size' and of course has to be in rage [0, max-size] > (including). The 'max-size' is yet another attribute but if not > set it's "inherited" from corresponding memory-backend-* object. > > Therefore, two new elements are introduced under <target/>, to > reflect these attributes: > > <memory model='virtio'> > <target> > <size unit='KiB'>4194304</size> > <node>0</node> > <block unit='KiB'>2048</block> > <requested unit='KiB'>524288</requested> > </target> > <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> > </memory> > > The intent here is that <requested/> will be allowed to change > via virDomainUpdateDeviceFlags() API. > > Note, QEMU does inform us about success of allocation via an > event - this is covered in next patches. > > Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> > --- > docs/formatdomain.rst | 22 ++++ > docs/schemas/domaincommon.rng | 10 ++ > src/conf/domain_conf.c | 103 ++++++++++++++++-- > src/conf/domain_conf.h | 5 + > .../memory-hotplug-virtio-mem.xml | 78 +++++++++++++ > ...emory-hotplug-virtio-mem.x86_64-latest.xml | 1 + > tests/qemuxml2xmltest.c | 1 + > 7 files changed, 213 insertions(+), 7 deletions(-) > create mode 100644 tests/qemuxml2argvdata/memory-hotplug-virtio-mem.xml > create mode 120000 tests/qemuxml2xmloutdata/memory-hotplug-virtio-mem.x86_64-latest.xml > > diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst > index ca6bc0432e..3990728939 100644 > --- a/docs/formatdomain.rst > +++ b/docs/formatdomain.rst > @@ -7187,6 +7187,17 @@ Example: usage of the memory devices > </label> > </target> > </memory> > + <memory model='virtio'> > + <source> > + <path>/tmp/virtio_mem</path> > + </source> > + <target> > + <size unit='KiB'>1048576</size> > + <node>0</node> > + <block unit='KiB'>2048</block> > + <requested unit='KiB'>524288</requested> > + </target> > + </memory> > <memory model='virtio' access='shared'> > <source> > <path>/tmp/virtio_pmem</path> > @@ -7300,6 +7311,17 @@ Example: usage of the memory devices > so other backend types should use the ``readonly`` element. :since:`Since > 5.0.0` > > + ``block`` > + The size of an individual block, granularity of division of memory module. > + Must be power of two and at least equal to size of a transparent hugepage > + (2MiB on x84_64). The default is hypervisor dependant. This is valid for > + ``virtio`` model only and mutually exclusive with ``pmem``. > + > + ``requested`` > + The total size of blocks exposed to the guest. Must respect ``block`` > + granularity. This is valid for ``virtio`` model only and mutually > + exclusive with ``pmem``. Docs don't mention interactions of <size> and <requested>. Is size the actual size? In such case you'll need to clear it on startup and populate it with the actual size later. The issue with <pmem> was mentioned earlier. [...] > diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c > index 935bea1804..0551f6f266 100644 > --- a/src/conf/domain_conf.c > +++ b/src/conf/domain_conf.c > @@ -6764,10 +6764,23 @@ virDomainMemoryDefValidate(const virDomainMemoryDef *mem, > return -1; > } > } else { > - /* TODO: plain virtio-mem behaves differently then virtio-pmem */ > - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > - _("virtio-mem is not supported yet. <pmem/> is required")); > - return -1; > + if (mem->requestedsize > mem->size) { > + virReportError(VIR_ERR_XML_DETAIL, "%s", > + _("requested size must be smaller than @size")); > + return -1; > + } > + > + if (!VIR_IS_POW2(mem->blocksize)) { > + virReportError(VIR_ERR_XML_DETAIL, "%s", > + _("block size must be a power of two")); > + return -1; > + } Docs state that blocksize must be also a multiple of page size. > + > + if (mem->requestedsize % mem->blocksize != 0) { > + virReportError(VIR_ERR_XML_DETAIL, "%s", > + _("requested size must be an integer multiple of block size")); > + return -1; > + } > } > break; > > @@ -16774,9 +16787,25 @@ virDomainMemorySourceDefParseXML(xmlNodePtr node, > case VIR_DOMAIN_MEMORY_MODEL_VIRTIO: > def->s.virtio.path = virXPathString("string(./path)", ctxt); > > - if (virXPathBoolean("boolean(./pmem)", ctxt)) > + if (virXPathBoolean("boolean(./pmem)", ctxt)) { > def->s.virtio.pmem = true; > + } else { > + if (virDomainParseMemory("./pagesize", "./pagesize/@unit", ctxt, > + &def->s.virtio.pagesize, false, false) < 0) > + return -1; > > + if ((nodemask = virXPathString("string(./nodemask)", ctxt))) { > + if (virBitmapParse(nodemask, &def->s.virtio.sourceNodes, > + VIR_DOMAIN_CPUMASK_LEN) < 0) > + return -1; > + > + if (virBitmapIsAllClear(def->s.virtio.sourceNodes)) { > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > + _("Invalid value of 'nodemask': %s"), nodemask); > + return -1; Move this to virDomainMemoryDefValidate too. > + } > + } > + } > break; > > case VIR_DOMAIN_MEMORY_MODEL_NONE: [...] > @@ -16831,6 +16861,27 @@ virDomainMemoryTargetDefParseXML(xmlNodePtr node, > > if (virXPathBoolean("boolean(./readonly)", ctxt)) > def->readonly = true; > + > + break; > + > + case VIR_DOMAIN_MEMORY_MODEL_VIRTIO: > + if (!def->s.virtio.pmem) { > + if (virDomainParseMemory("./block", "./block/@unit", ctxt, > + &def->blocksize, false, false) < 0) > + return -1; > + > + if (virDomainParseMemory("./requested", "./requested/@unit", ctxt, > + &def->requestedsize, false, false) < 0) > + return -1; > + } If size is current size it should be optional. > + > + break; > + > + case VIR_DOMAIN_MEMORY_MODEL_NONE: > + case VIR_DOMAIN_MEMORY_MODEL_DIMM: > + case VIR_DOMAIN_MEMORY_MODEL_LAST: > + /* nada */ > + break; > } > > return 0; > @@ -18649,7 +18700,9 @@ virDomainMemoryFindByDefInternal(virDomainDefPtr def, > /* target info -> always present */ > if (tmp->model != mem->model || > tmp->targetNode != mem->targetNode || > - tmp->size != mem->size) > + tmp->size != mem->size || If size is the current size and being actively updated, this lookup might not actually work if it's updated between checking and updating. > + tmp->blocksize != mem->blocksize || > + tmp->requestedsize != mem->requestedsize) > continue; > > switch (mem->model) { [...] > diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h > index efaa4c5473..f16dc0a029 100644 > --- a/src/conf/domain_conf.h > +++ b/src/conf/domain_conf.h > @@ -2327,8 +2327,11 @@ struct _virDomainMemoryDef { > bool pmem; > } nvdimm; /* VIR_DOMAIN_MEMORY_MODEL_NVDIMM */ > struct { > + // nodemask + hugepages + no prealloc Leftovers? > char *path; /* Required for pmem, otherwise optional */ > bool pmem; > + virBitmapPtr sourceNodes; > + unsigned long long pagesize; /* kibibytes */ > } virtio; /* VIR_DOMAIN_MEMORY_MODEL_VIRTIO */ > } s; >