From 3038c84daa5ae2983ad3b711f6fefdd40234d506 Mon Sep 17 00:00:00 2001 From: "Edgar E. Iglesias" Date: Wed, 23 Jul 2025 11:29:15 +0200 Subject: [PATCH 1/5] xen: mapcache: Assert mapcache existance Assert that the mapcache was created prior to being used. Signed-off-by: Edgar E. Iglesias Reviewed-by: Stefano Stabellini --- hw/xen/xen-mapcache.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c index 11115f6084..517e5171b7 100644 --- a/hw/xen/xen-mapcache.c +++ b/hw/xen/xen-mapcache.c @@ -464,6 +464,8 @@ uint8_t *xen_map_cache(MemoryRegion *mr, MapCache *mc = mapcache; uint8_t *p; + assert(mapcache); + if (grant) { mc = is_write ? mapcache_grants_rw : mapcache_grants_ro; } @@ -530,6 +532,8 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr) { ram_addr_t addr; + assert(mapcache); + addr = xen_ram_addr_from_mapcache_single(mapcache, ptr); if (addr == RAM_ADDR_INVALID) { addr = xen_ram_addr_from_mapcache_single(mapcache_grants_ro, ptr); @@ -652,6 +656,8 @@ static void xen_invalidate_map_cache_entry_bh(void *opaque) void coroutine_mixed_fn xen_invalidate_map_cache_entry(uint8_t *buffer) { + assert(mapcache); + if (qemu_in_coroutine()) { XenMapCacheData data = { .co = qemu_coroutine_self(), @@ -709,6 +715,8 @@ static void xen_invalidate_map_cache_single(MapCache *mc) void xen_invalidate_map_cache(void) { + assert(mapcache); + /* Flush pending AIO before destroying the mapcache */ bdrv_drain_all(); @@ -776,6 +784,8 @@ uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr, { uint8_t *p; + assert(mapcache); + mapcache_lock(mapcache); p = xen_replace_cache_entry_unlocked(mapcache, old_phys_addr, new_phys_addr, size); From ded5f9a90925212b64da3bbad1651a1a455c14ea Mon Sep 17 00:00:00 2001 From: "Edgar E. Iglesias" Date: Wed, 23 Jul 2025 11:41:26 +0200 Subject: [PATCH 2/5] xen: mapcache: Add function to check if the mapcache is enabled Add function to check if the mapcache is enabled. No functional change. Signed-off-by: Edgar E. Iglesias Reviewed-by: Stefano Stabellini --- hw/xen/xen-mapcache.c | 6 ++++++ hw/xen/xen_stubs.c | 5 +++++ include/system/xen-mapcache.h | 1 + 3 files changed, 12 insertions(+) diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c index 517e5171b7..a3c162cd4c 100644 --- a/hw/xen/xen-mapcache.c +++ b/hw/xen/xen-mapcache.c @@ -80,6 +80,12 @@ static MapCache *mapcache_grants_ro; static MapCache *mapcache_grants_rw; static xengnttab_handle *xen_region_gnttabdev; +bool xen_map_cache_enabled(void) +{ + /* Map cache enabled implies xen_enabled(). */ + return xen_enabled() && mapcache; +} + static inline void mapcache_lock(MapCache *mc) { qemu_mutex_lock(&mc->lock); diff --git a/hw/xen/xen_stubs.c b/hw/xen/xen_stubs.c index 5e565df392..a39efb5062 100644 --- a/hw/xen/xen_stubs.c +++ b/hw/xen/xen_stubs.c @@ -29,6 +29,11 @@ bool xen_mr_is_memory(MemoryRegion *mr) g_assert_not_reached(); } +bool xen_map_cache_enabled(void) +{ + return false; +} + void xen_invalidate_map_cache_entry(uint8_t *buffer) { g_assert_not_reached(); diff --git a/include/system/xen-mapcache.h b/include/system/xen-mapcache.h index fa2cff38d1..c46190dd0c 100644 --- a/include/system/xen-mapcache.h +++ b/include/system/xen-mapcache.h @@ -13,6 +13,7 @@ typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset, ram_addr_t size); +bool xen_map_cache_enabled(void); void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque); uint8_t *xen_map_cache(MemoryRegion *mr, hwaddr phys_addr, hwaddr size, From c1d78ba63cf520841635594049ecf437b432b791 Mon Sep 17 00:00:00 2001 From: "Edgar E. Iglesias" Date: Wed, 23 Jul 2025 11:56:51 +0200 Subject: [PATCH 3/5] physmem: xen: Conditionalize use of the mapcache Conditionalize use of the mapcache. This is in preparation to optionally disable the mapcache at runtime. Signed-off-by: Edgar E. Iglesias Acked-by: Peter Xu Reviewed-by: Stefano Stabellini --- system/physmem.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/system/physmem.c b/system/physmem.c index e5ff26acec..7eecd81877 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -578,7 +578,9 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat, is_write, true, &as, attrs); mr = section.mr; - if (xen_enabled() && memory_access_is_direct(mr, is_write, attrs)) { + if (xen_map_cache_enabled() && + memory_access_is_direct(mr, is_write, attrs)) { + /* mapcache: Next page may be unmapped or in a different bucket/VA. */ hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr; *plen = MIN(page, *plen); } @@ -2577,7 +2579,7 @@ static void reclaim_ramblock(RAMBlock *block) { if (block->flags & RAM_PREALLOC) { ; - } else if (xen_enabled()) { + } else if (xen_map_cache_enabled()) { xen_invalidate_map_cache_entry(block->host); #if !defined(_WIN32) && !defined(EMSCRIPTEN) } else if (block->fd >= 0) { @@ -2736,7 +2738,7 @@ static void *qemu_ram_ptr_length(RAMBlock *block, ram_addr_t addr, len = *size; } - if (xen_enabled() && block->host == NULL) { + if (xen_map_cache_enabled() && block->host == NULL) { /* We need to check if the requested address is in the RAM * because we don't want to map the entire memory in QEMU. * In that case just map the requested area. @@ -2785,7 +2787,7 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, RAMBlock *block; uint8_t *host = ptr; - if (xen_enabled()) { + if (xen_map_cache_enabled()) { ram_addr_t ram_addr; RCU_READ_LOCK_GUARD(); ram_addr = xen_ram_addr_from_mapcache(ptr); @@ -3787,7 +3789,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, if (is_write) { invalidate_and_set_dirty(mr, addr1, access_len); } - if (xen_enabled()) { + if (xen_map_cache_enabled()) { xen_invalidate_map_cache_entry(buffer); } memory_region_unref(mr); @@ -3898,7 +3900,7 @@ void address_space_cache_destroy(MemoryRegionCache *cache) return; } - if (xen_enabled()) { + if (xen_map_cache_enabled()) { xen_invalidate_map_cache_entry(cache->ptr); } memory_region_unref(cache->mrs.mr); From ad0c53fb5bab27f7344de9c8f90004e283d0ab69 Mon Sep 17 00:00:00 2001 From: "Edgar E. Iglesias" Date: Wed, 23 Jul 2025 12:06:18 +0200 Subject: [PATCH 4/5] hw/xen-hvm: Add a mapcache arg to xen_register_ioreq() Add a mapcache argument to xen_register_ioreq() allowing the caller to optionally disable the mapcache. All callers still call it with mapcache = true so there's no functional change yet. Signed-off-by: Edgar E. Iglesias Reviewed-by: Stefano Stabellini --- hw/i386/xen/xen-hvm.c | 2 +- hw/xen/xen-hvm-common.c | 18 ++++++++++++------ hw/xen/xen-pvh-common.c | 3 ++- include/hw/xen/xen-hvm-common.h | 3 ++- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index a6e1683885..c7bfcaae8e 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -622,7 +622,7 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) xen_register_ioreq(state, max_cpus, HVM_IOREQSRV_BUFIOREQ_ATOMIC, - &xen_memory_listener); + &xen_memory_listener, true); xen_is_stubdomain = xen_check_stubdomain(state->xenstore); diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c index 59c73dfaeb..8c825588ae 100644 --- a/hw/xen/xen-hvm-common.c +++ b/hw/xen/xen-hvm-common.c @@ -473,7 +473,9 @@ static void handle_ioreq(XenIOState *state, ioreq_t *req) case IOREQ_TYPE_TIMEOFFSET: break; case IOREQ_TYPE_INVALIDATE: - xen_invalidate_map_cache(); + if (xen_map_cache_enabled()) { + xen_invalidate_map_cache(); + } break; case IOREQ_TYPE_PCI_CONFIG: cpu_ioreq_config(state, req); @@ -823,7 +825,8 @@ void xen_shutdown_fatal_error(const char *fmt, ...) static void xen_do_ioreq_register(XenIOState *state, unsigned int max_cpus, - const MemoryListener *xen_memory_listener) + const MemoryListener *xen_memory_listener, + bool mapcache) { int i, rc; @@ -874,11 +877,13 @@ static void xen_do_ioreq_register(XenIOState *state, state->bufioreq_local_port = rc; } /* Init RAM management */ + if (mapcache) { #ifdef XEN_COMPAT_PHYSMAP - xen_map_cache_init(xen_phys_offset_to_gaddr, state); + xen_map_cache_init(xen_phys_offset_to_gaddr, state); #else - xen_map_cache_init(NULL, state); + xen_map_cache_init(NULL, state); #endif + } qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state); @@ -901,7 +906,8 @@ err: void xen_register_ioreq(XenIOState *state, unsigned int max_cpus, uint8_t handle_bufioreq, - const MemoryListener *xen_memory_listener) + const MemoryListener *xen_memory_listener, + bool mapcache) { int rc; @@ -922,7 +928,7 @@ void xen_register_ioreq(XenIOState *state, unsigned int max_cpus, state->has_bufioreq = handle_bufioreq != HVM_IOREQSRV_BUFIOREQ_OFF; rc = xen_create_ioreq_server(xen_domid, handle_bufioreq, &state->ioservid); if (!rc) { - xen_do_ioreq_register(state, max_cpus, xen_memory_listener); + xen_do_ioreq_register(state, max_cpus, xen_memory_listener, mapcache); } else { warn_report("xen: failed to create ioreq server"); } diff --git a/hw/xen/xen-pvh-common.c b/hw/xen/xen-pvh-common.c index 1381310fc7..4fade017bb 100644 --- a/hw/xen/xen-pvh-common.c +++ b/hw/xen/xen-pvh-common.c @@ -202,7 +202,8 @@ static void xen_pvh_init(MachineState *ms) xen_pvh_init_ram(s, sysmem); xen_register_ioreq(&s->ioreq, ms->smp.max_cpus, xpc->handle_bufioreq, - &xen_memory_listener); + &xen_memory_listener, + true); if (s->cfg.virtio_mmio_num) { xen_create_virtio_mmio_devices(s); diff --git a/include/hw/xen/xen-hvm-common.h b/include/hw/xen/xen-hvm-common.h index e1beca062f..d177ff14ea 100644 --- a/include/hw/xen/xen-hvm-common.h +++ b/include/hw/xen/xen-hvm-common.h @@ -91,7 +91,8 @@ void xen_device_unrealize(DeviceListener *listener, DeviceState *dev); void xen_hvm_change_state_handler(void *opaque, bool running, RunState rstate); void xen_register_ioreq(XenIOState *state, unsigned int max_cpus, uint8_t handle_bufioreq, - const MemoryListener *xen_memory_listener); + const MemoryListener *xen_memory_listener, + bool mapcache); void cpu_ioreq_pio(ioreq_t *req); #endif /* HW_XEN_HVM_COMMON_H */ From 83d1d9a49e8eb970f87508ae4ce0f222084df365 Mon Sep 17 00:00:00 2001 From: "Edgar E. Iglesias" Date: Wed, 23 Jul 2025 14:02:17 +0200 Subject: [PATCH 5/5] hw/xen: xenpvh: Add prop to enable/disable the mapcache Add a machine property to enable/disable the mapcache. Default it to enabled for backwards compatibility. Signed-off-by: Edgar E. Iglesias Reviewed-by: Stefano Stabellini --- hw/xen/xen-pvh-common.c | 133 +++++++++++++++++++++++++++++--- include/hw/xen/xen-pvh-common.h | 2 + 2 files changed, 124 insertions(+), 11 deletions(-) diff --git a/hw/xen/xen-pvh-common.c b/hw/xen/xen-pvh-common.c index 4fade017bb..cca37202ff 100644 --- a/hw/xen/xen-pvh-common.c +++ b/hw/xen/xen-pvh-common.c @@ -29,6 +29,69 @@ static const MemoryListener xen_memory_listener = { .priority = MEMORY_LISTENER_PRIORITY_ACCEL, }; +/* + * Map foreign RAM in bounded chunks so we don't build a PFN array for the + * entire guest size (which can be huge for large guests). We reserve a VA + * range once and then MAP_FIXED each chunk into place. + */ +#define XEN_PVH_MAP_CHUNK_PAGES 65535 + +static void *xen_map_guest_ram(XenPVHMachineState *s, + uint64_t addr, uint64_t size) +{ + size_t total_pages = size >> XC_PAGE_SHIFT; + size_t chunk_pages = MIN(XEN_PVH_MAP_CHUNK_PAGES, total_pages); + g_autofree xen_pfn_t *pfns = NULL; + void *base = NULL; + size_t offset; + + if (!total_pages) { + goto done; + } + + base = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (base == MAP_FAILED) { + base = NULL; + goto done; + } + + pfns = g_new0(xen_pfn_t, chunk_pages); + if (!pfns) { + munmap(base, size); + base = NULL; + goto done; + } + + for (offset = 0; offset < total_pages; offset += chunk_pages) { + size_t num_pages = MIN(chunk_pages, total_pages - offset); + void *mapped; + size_t i; + + for (i = 0; i < num_pages; i++) { + pfns[i] = (addr >> XC_PAGE_SHIFT) + offset + i; + } + + mapped = xenforeignmemory_map2( + xen_fmem, xen_domid, + (uint8_t *)base + (offset << XC_PAGE_SHIFT), + PROT_READ | PROT_WRITE, MAP_FIXED, + num_pages, pfns, NULL); + if (!mapped) { + munmap(base, size); + base = NULL; + goto done; + } + } +done: + if (!base) { + /* We can't recover from this. */ + error_report("FATAL: Failed to foreign-map %" PRIx64 " - %" PRIx64, + addr, addr + size); + exit(EXIT_FAILURE); + } + return base; +} + static void xen_pvh_init_ram(XenPVHMachineState *s, MemoryRegion *sysmem) { @@ -45,22 +108,42 @@ static void xen_pvh_init_ram(XenPVHMachineState *s, block_len = s->cfg.ram_high.base + ram_size[1]; } - memory_region_init_ram(&xen_memory, NULL, "xen.ram", block_len, - &error_fatal); + if (s->cfg.mapcache) { + memory_region_init_ram(&xen_memory, NULL, "xen.ram", + block_len, &error_fatal); + memory_region_init_alias(&s->ram.low, NULL, "xen.ram.lo", &xen_memory, + s->cfg.ram_low.base, ram_size[0]); + if (ram_size[1] > 0) { + memory_region_init_alias(&s->ram.high, NULL, "xen.ram.hi", + &xen_memory, + s->cfg.ram_high.base, ram_size[1]); + } + } else { + void *p; + + p = xen_map_guest_ram(s, s->cfg.ram_low.base, ram_size[0]); + memory_region_init_ram_ptr(&s->ram.low, NULL, "xen.ram.lo", + ram_size[0], p); + if (ram_size[1] > 0) { + p = xen_map_guest_ram(s, s->cfg.ram_high.base, ram_size[1]); + memory_region_init_ram_ptr(&s->ram.high, NULL, "xen.ram.hi", + ram_size[1], p); + } + } - memory_region_init_alias(&s->ram.low, NULL, "xen.ram.lo", &xen_memory, - s->cfg.ram_low.base, ram_size[0]); + /* Map them onto QEMU's address-space. */ memory_region_add_subregion(sysmem, s->cfg.ram_low.base, &s->ram.low); if (ram_size[1] > 0) { - memory_region_init_alias(&s->ram.high, NULL, "xen.ram.hi", &xen_memory, - s->cfg.ram_high.base, ram_size[1]); memory_region_add_subregion(sysmem, s->cfg.ram_high.base, &s->ram.high); } - /* Setup support for grants. */ - memory_region_init_ram(&xen_grants, NULL, "xen.grants", block_len, - &error_fatal); - memory_region_add_subregion(sysmem, XEN_GRANT_ADDR_OFF, &xen_grants); + /* Grants are only supported when the mapcache is on. */ + if (s->cfg.mapcache) { + /* Setup support for grants. */ + memory_region_init_ram(&xen_grants, NULL, "xen.grants", block_len, + &error_fatal); + memory_region_add_subregion(sysmem, XEN_GRANT_ADDR_OFF, &xen_grants); + } } static void xen_set_irq(void *opaque, int irq, int level) @@ -203,7 +286,7 @@ static void xen_pvh_init(MachineState *ms) xen_register_ioreq(&s->ioreq, ms->smp.max_cpus, xpc->handle_bufioreq, &xen_memory_listener, - true); + s->cfg.mapcache); if (s->cfg.virtio_mmio_num) { xen_create_virtio_mmio_devices(s); @@ -285,6 +368,20 @@ XEN_PVH_PROP_MEMMAP(pci_ecam) XEN_PVH_PROP_MEMMAP(pci_mmio) XEN_PVH_PROP_MEMMAP(pci_mmio_high) +static void xen_pvh_set_mapcache(Object *obj, bool value, Error **errp) +{ + XenPVHMachineState *xp = XEN_PVH_MACHINE(obj); + + xp->cfg.mapcache = value; +} + +static bool xen_pvh_get_mapcache(Object *obj, Error **errp) +{ + XenPVHMachineState *xp = XEN_PVH_MACHINE(obj); + + return xp->cfg.mapcache; +} + static void xen_pvh_set_pci_intx_irq_base(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) @@ -338,6 +435,12 @@ do { \ OC_MEMMAP_PROP_SIZE(c, prop_name, name); \ } while (0) + object_class_property_add_bool(oc, "mapcache", xen_pvh_get_mapcache, + xen_pvh_set_mapcache); + object_class_property_set_description(oc, "mapcache", + "Set on/off to enable/disable the " + "mapcache"); + /* * We provide memmap properties to allow Xen to move things to other * addresses for example when users need to accomodate the memory-map @@ -377,6 +480,13 @@ do { \ #endif } +static void xen_pvh_instance_init(Object *obj) +{ + XenPVHMachineState *xp = XEN_PVH_MACHINE(obj); + + xp->cfg.mapcache = true; +} + static void xen_pvh_class_init(ObjectClass *oc, const void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -395,6 +505,7 @@ static const TypeInfo xen_pvh_info = { .parent = TYPE_MACHINE, .abstract = true, .instance_size = sizeof(XenPVHMachineState), + .instance_init = xen_pvh_instance_init, .class_size = sizeof(XenPVHMachineClass), .class_init = xen_pvh_class_init, }; diff --git a/include/hw/xen/xen-pvh-common.h b/include/hw/xen/xen-pvh-common.h index 0ed07c5694..0209b798f3 100644 --- a/include/hw/xen/xen-pvh-common.h +++ b/include/hw/xen/xen-pvh-common.h @@ -84,6 +84,8 @@ struct XenPVHMachineState { /* PCI */ MemMapEntry pci_ecam, pci_mmio, pci_mmio_high; uint32_t pci_intx_irq_base; + + bool mapcache; } cfg; };