On Fri, 6 Nov 2015 16:31:43 +0800 Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx> wrote: > > > On 11/05/2015 10:49 PM, Igor Mammedov wrote: > > On Thu, 5 Nov 2015 21:33:39 +0800 > > Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx> wrote: > > > >> > >> > >> On 11/05/2015 09:03 PM, Igor Mammedov wrote: > >>> On Thu, 5 Nov 2015 18:15:31 +0800 > >>> Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx> wrote: > >>> > >>>> > >>>> > >>>> On 11/05/2015 05:58 PM, Igor Mammedov wrote: > >>>>> On Mon, 2 Nov 2015 17:13:27 +0800 > >>>>> Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx> wrote: > >>>>> > >>>>>> A page staring from 0xFF00000 and IO port 0x0a18 - 0xa1b in guest are > >>>>> ^^ missing one 0??? > >>>>> > >>>>>> reserved for NVDIMM ACPI emulation, refer to docs/specs/acpi_nvdimm.txt > >>>>>> for detailed design > >>>>>> > >>>>>> A parameter, 'nvdimm-support', is introduced for PIIX4_PM and ICH9-LPC > >>>>>> that controls if nvdimm support is enabled, it is true on default and > >>>>>> it is false on 2.4 and its earlier version to keep compatibility > >>>>>> > >>>>>> Signed-off-by: Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx> > >>>>> [...] > >>>>> > >>>>>> @@ -33,6 +33,15 @@ > >>>>>> */ > >>>>>> #define MIN_NAMESPACE_LABEL_SIZE (128UL << 10) > >>>>>> > >>>>>> +/* > >>>>>> + * A page staring from 0xFF00000 and IO port 0x0a18 - 0xa1b in guest are > >>>>> ^^^ missing 0 or value in define below has an extra 0 > >>>>> > >>>>>> + * reserved for NVDIMM ACPI emulation, refer to docs/specs/acpi_nvdimm.txt > >>>>>> + * for detailed design. > >>>>>> + */ > >>>>>> +#define NVDIMM_ACPI_MEM_BASE 0xFF000000ULL > >>>>> it still maps RAM at arbitrary place, > >>>>> that's the reason why VMGenID patches hasn't been merged for > >>>>> more than several months. > >>>>> I'm not against of using (more exactly I'm for it) direct mapping > >>>>> but we should reach consensus when and how to use it first. > >>>>> > >>>>> I'd wouldn't use addresses below 4G as it may be used firmware or > >>>>> legacy hardware and I won't bet that 0xFF000000ULL won't conflict > >>>>> with anything. > >>>>> An alternative place to allocate reserve from could be high memory. > >>>>> For pc we have "reserved-memory-end" which currently makes sure > >>>>> that hotpluggable memory range isn't used by firmware. > >>>>> > >>>>> How about making API that allows to map additional memory > >>>>> ranges before reserved-memory-end and pushes it up as mappings are > >>>>> added. [...] > > Really a good study case to me, i tried your patch and moved the 64 bit > staffs to the private method, it worked. :) > > Igor, is this the API you want? Lets get ack from Michael on the idea of RAM mapping before "reserved-memory-end" first. If he rejects it then there isn't any other way except of switching to MMIO instead. > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 6bf569a..aba29df 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -1291,6 +1291,38 @@ FWCfgState *xen_load_linux(PCMachineState *pcms, > return fw_cfg; > } > > +static void pc_reserve_high_memory_init(PCMachineState *pcms, > + uint64_t base, uint64_t align) > +{ > + pcms->reserve_high_memory.current_addr = ROUND_UP(base, align); > +} > + > +static uint64_t > +pc_reserve_high_memory_end(PCMachineState *pcms, int64_t align) > +{ > + return ROUND_UP(pcms->reserve_high_memory.current_addr, align); > +} > + > +uint64_t pc_reserve_high_memory(PCMachineState *pcms, uint64_t size, > + int64_t align, Error **errp) > +{ > + uint64_t end_addr, current_addr = pcms->reserve_high_memory.current_addr; > + > + if (!current_addr) { > + error_setg(errp, "reserved high memory is not initialized."); > + return 0; > + } > + > + end_addr = pc_reserve_high_memory_end(pcms, align) + size; > + if (current_addr > end_addr) { > + error_setg(errp, "reserved high memory is not enough."); > + return 0; > + } > + > + pcms->reserve_high_memory.current_addr = end_addr; > + return end_addr; > +} > + > FWCfgState *pc_memory_init(PCMachineState *pcms, > MemoryRegion *system_memory, > MemoryRegion *rom_memory, > @@ -1379,6 +1411,8 @@ FWCfgState *pc_memory_init(PCMachineState *pcms, > "hotplug-memory", hotplug_mem_size); > memory_region_add_subregion(system_memory, pcms->hotplug_memory.base, > &pcms->hotplug_memory.mr); > + pc_reserve_high_memory_init(pcms, pcms->hotplug_memory.base + > + hotplug_mem_size, 1ULL << 30); > } > > /* Initialize PC system firmware */ > @@ -1403,7 +1437,7 @@ FWCfgState *pc_memory_init(PCMachineState *pcms, > uint64_t res_mem_end = pcms->hotplug_memory.base; > > if (!pcmc->broken_reserved_end) { > - res_mem_end += memory_region_size(&pcms->hotplug_memory.mr); > + res_mem_end = pc_reserve_high_memory_end(pcms, 0x1ULL << 30); > } > *val = cpu_to_le64(ROUND_UP(res_mem_end, 0x1ULL << 30)); > fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index 47162cf..fae3fea 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -20,6 +20,11 @@ > > #define HPET_INTCAP "hpet-intcap" > > +struct PCReserveHighMemory { > + uint64_t current_addr; > +}; > +typedef struct PCReserveHighMemory PCReserveHighMemory; > + > /** > * PCMachineState: > * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling > @@ -41,6 +46,7 @@ struct PCMachineState { > OnOffAuto smm; > bool enforce_aligned_dimm; > ram_addr_t below_4g_mem_size, above_4g_mem_size; > + PCReserveHighMemory reserve_high_memory; > }; > > #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" > @@ -175,6 +181,9 @@ PcGuestInfo *pc_guest_info_init(PCMachineState *pcms); > > void pc_set_legacy_acpi_data_size(void); > > +uint64_t pc_reserve_high_memory(PCMachineState *pcms, uint64_t size, > + int64_t align, Error **errp); > + > #define PCI_HOST_PROP_PCI_HOLE_START "pci-hole-start" > #define PCI_HOST_PROP_PCI_HOLE_END "pci-hole-end" > #define PCI_HOST_PROP_PCI_HOLE64_START "pci-hole64-start" > > > > > >>>> > >>>> The luck thing is, the ACPI part is not ABI, we can move it to the high > >>>> memory if QEMU supports XSDT is ready in future development. > >>> But mapped control region is ABI and we can't change it if we find out later > >>> that it breaks something. > >> > >> But the ACPI code is completely built by QEMU, which is transparent to guest > >> and guest should not depend on it, no? > > unfortunately no, think about cross-version migration. > > It makes sense. Stupid me. :( > > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html