multifd: Add COLO support

Like in the normal ram_load() path, put the received pages into the colo cache and mark the pages in the bitmap so that they will be flushed to the guest later. Multifd with COLO is useful to reduce the VM pause time during checkpointing for latency sensitive workloads. In such workloads the worst-case latency is especially important. Also, this is already worth it for the precopy phase as it helps with converging. Moreover, multifd migration is the preferred way to do migration nowadays and this allows to use multifd compression with COLO. Benchmark: Cluster nodes - Intel Xenon E5-2630 v3 - 48Gb RAM - 10G Ethernet Guest - Windows Server 2016 - 6Gb RAM - 4 cores Workload - Upload a file to the guest with SMB to simulate moderate memory dirtying - Measure the memory transfer time portion of each checkpoint - 600ms COLO checkpoint interval Results Plain idle mean: 4.50ms 99per: 10.33ms load mean: 24.30ms 99per: 78.05ms Multifd-4 idle mean: 6.48ms 99per: 10.41ms load mean: 14.12ms 99per: 31.27ms Evaluation While multifd has slightly higher latency when the guest idles, it is 10ms faster under load and more importantly it's worst case latency is less than 1/2 of plain under load as can be seen in the 99. Percentile. Co-authored-by: Juan Quintela <quintela@redhat.com> [farosas: changed SoB to coauthored as Juan doesn't own that email address anymore] Reviewed-by: Fabiano Rosas <farosas@suse.de> Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Lukas Straub <lukasstraub2@web.de> Link: https://lore.kernel.org/qemu-devel/20260302-colo_unit_test_multifd-v11-8-d653fb3b1d80@web.de [removed license boilerplate] Signed-off-by: Fabiano Rosas <farosas@suse.de>
1 month ago · dd67c149e2
7 changed files with 87 additions and 3 deletions
--- a/1
+++ b/1
@ -3883,6 +3883,7 @@ COLO Framework
 M: Lukas Straub <lukasstraub2@web.de>
 S: Maintained
 F: migration/colo*
+F: migration/multifd-colo.*
 F: include/migration/colo.h
 F: include/migration/failover.h
 F: docs/COLO-FT.txt
--- a/migration/meson.build
+++ b/migration/meson.build
@ -39,7 +39,7 @@ system_ss.add(files(
 ), gnutls, zlib)

 if get_option('replication').allowed()
-  system_ss.add(files('colo-failover.c', 'colo.c'))
+  system_ss.add(files('colo-failover.c', 'colo.c', 'multifd-colo.c'))
 else
  system_ss.add(files('colo-stubs.c'))
 endif
--- a/migration/multifd-colo.c
+++ b/migration/multifd-colo.c
@ -0,0 +1,41 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * multifd colo implementation
+ *
+ * Copyright (c) Lukas Straub <lukasstraub2@web.de>
+ */
+
+#include "qemu/osdep.h"
+#include "multifd.h"
+#include "multifd-colo.h"
+#include "migration/colo.h"
+#include "system/ramblock.h"
+
+void multifd_colo_prepare_recv(MultiFDRecvParams *p)
+{
+    /*
+     * While we're still in precopy state (not yet in colo state), we copy
+     * received pages to both guest and cache. No need to set dirty bits,
+     * since guest and cache memory are in sync.
+     */
+    if (migration_incoming_in_colo_state()) {
+        colo_record_bitmap(p->block, p->normal, p->normal_num);
+        colo_record_bitmap(p->block, p->zero, p->zero_num);
+    }
+}
+
+void multifd_colo_process_recv(MultiFDRecvParams *p)
+{
+    if (!migration_incoming_in_colo_state()) {
+        for (int i = 0; i < p->normal_num; i++) {
+            void *guest = p->block->host + p->normal[i];
+            void *cache = p->host + p->normal[i];
+            memcpy(guest, cache, multifd_ram_page_size());
+        }
+        for (int i = 0; i < p->zero_num; i++) {
+            void *guest = p->block->host + p->zero[i];
+            memset(guest, 0, multifd_ram_page_size());
+        }
+    }
+}
--- a/migration/multifd-colo.h
+++ b/migration/multifd-colo.h
@ -0,0 +1,23 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * multifd colo header
+ *
+ * Copyright (c) Lukas Straub <lukasstraub2@web.de>
+ */
+
+#ifndef QEMU_MIGRATION_MULTIFD_COLO_H
+#define QEMU_MIGRATION_MULTIFD_COLO_H
+
+#ifdef CONFIG_REPLICATION
+
+void multifd_colo_prepare_recv(MultiFDRecvParams *p);
+void multifd_colo_process_recv(MultiFDRecvParams *p);
+
+#else
+
+static inline void multifd_colo_prepare_recv(MultiFDRecvParams *p) {}
+static inline void multifd_colo_process_recv(MultiFDRecvParams *p) {}
+
+#endif
+#endif
--- a/migration/multifd-nocomp.c
+++ b/migration/multifd-nocomp.c
@ -16,6 +16,7 @@
 #include "file.h"
 #include "migration-stats.h"
 #include "multifd.h"
+#include "multifd-colo.h"
 #include "options.h"
 #include "migration.h"
 #include "qapi/error.h"
@ -269,7 +270,6 @@ int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp)
        return -1;
    }

-    p->host = p->block->host;
    for (i = 0; i < p->normal_num; i++) {
        uint64_t offset = be64_to_cpu(packet->offset[i]);

@ -294,6 +294,14 @@ int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp)
        p->zero[i] = offset;
    }

+    if (migrate_colo()) {
+        multifd_colo_prepare_recv(p);
+        assert(p->block->colo_cache);
+        p->host = p->block->colo_cache;
+    } else {
+        p->host = p->block->host;
+    }
+
    return 0;
 }

--- a/migration/multifd.c
+++ b/migration/multifd.c
@ -29,6 +29,7 @@
 #include "qemu-file.h"
 #include "trace.h"
 #include "multifd.h"
+#include "multifd-colo.h"
 #include "options.h"
 #include "qemu/yank.h"
 #include "io/channel-file.h"
@ -1258,6 +1259,13 @@ static int multifd_ram_state_recv(MultiFDRecvParams *p, Error **errp)
    int ret;

    ret = multifd_recv_state->ops->recv(p, errp);
+    if (ret != 0) {
+        return ret;
+    }
+
+    if (migrate_colo()) {
+        multifd_colo_process_recv(p);
+    }

    return ret;
 }
--- a/migration/multifd.h
+++ b/migration/multifd.h
@ -279,7 +279,10 @@ typedef struct {
    uint64_t packets_recved;
    /* ramblock */
    RAMBlock *block;
-    /* ramblock host address */
+    /*
+     * Normally, it points to ramblock's host address.  When COLO
+     * is enabled, it points to the mirror cache for the ramblock.
+     */
    uint8_t *host;
    /* buffers to recv */
    struct iovec *iov;