Browse Source

colo: Reuse the return path from migration on primary and secondary side

Use the return-path capability with colo and reuse the opened return path
file on both primary and secondary side.

This fixes a crash in colo where migration_cancel() races with colo closing
s->rp_state.from_dst_file.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
Reviewed-by: Peter Xu <peterx@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20260302-colo_unit_test_multifd-v11-21-d653fb3b1d80@web.de
Signed-off-by: Fabiano Rosas <farosas@suse.de>
master
Lukas Straub 4 weeks ago
committed by Fabiano Rosas
parent
commit
161e603975
  1. 4
      docs/system/qemu-colo.rst
  2. 26
      migration/colo.c
  3. 10
      migration/options.c
  4. 1
      tests/qtest/migration/colo-tests.c
  5. 13
      tests/qtest/migration/framework.c

4
docs/system/qemu-colo.rst

@ -227,7 +227,7 @@ any IP's here, except for the ``$primary_ip`` variable::
**3.** On Secondary VM's QEMU monitor, issue command:: **3.** On Secondary VM's QEMU monitor, issue command::
{"execute":"qmp_capabilities"} {"execute":"qmp_capabilities"}
{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "x-colo", "state": true } ] } } {"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "return-path", "state": true }, {"capability": "x-colo", "state": true } ] } }
{"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data": {"host": "0.0.0.0", "port": "9999"} } } } {"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data": {"host": "0.0.0.0", "port": "9999"} } } }
{"execute": "nbd-server-add", "arguments": {"device": "parent0", "writable": true } } {"execute": "nbd-server-add", "arguments": {"device": "parent0", "writable": true } }
@ -244,7 +244,7 @@ Note:
{"execute":"qmp_capabilities"} {"execute":"qmp_capabilities"}
{"execute": "blockdev-add", "arguments": {"driver": "nbd", "node-name": "nbd0", "server": {"type": "inet", "host": "127.0.0.2", "port": "9999"}, "export": "parent0", "detect-zeroes": "on"} } {"execute": "blockdev-add", "arguments": {"driver": "nbd", "node-name": "nbd0", "server": {"type": "inet", "host": "127.0.0.2", "port": "9999"}, "export": "parent0", "detect-zeroes": "on"} }
{"execute": "x-blockdev-change", "arguments":{"parent": "colo-disk0", "node": "nbd0" } } {"execute": "x-blockdev-change", "arguments":{"parent": "colo-disk0", "node": "nbd0" } }
{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "x-colo", "state": true } ] } } {"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "return-path", "state": true }, {"capability": "x-colo", "state": true } ] } }
{"execute": "migrate", "arguments": {"uri": "tcp:127.0.0.2:9998" } } {"execute": "migrate", "arguments": {"uri": "tcp:127.0.0.2:9998" } }
Note: Note:

26
migration/colo.c

@ -539,6 +539,8 @@ static void colo_process_checkpoint(MigrationState *s)
Error *local_err = NULL; Error *local_err = NULL;
int ret; int ret;
assert(s->rp_state.from_dst_file);
assert(!s->rp_state.rp_thread_created);
if (get_colo_mode() != COLO_MODE_PRIMARY) { if (get_colo_mode() != COLO_MODE_PRIMARY) {
error_report("COLO mode must be COLO_MODE_PRIMARY"); error_report("COLO mode must be COLO_MODE_PRIMARY");
return; return;
@ -546,12 +548,6 @@ static void colo_process_checkpoint(MigrationState *s)
failover_init_state(); failover_init_state();
s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
if (!s->rp_state.from_dst_file) {
error_report("Open QEMUFile from_dst_file failed");
goto out;
}
packets_compare_notifier.notify = colo_compare_notify_checkpoint; packets_compare_notifier.notify = colo_compare_notify_checkpoint;
colo_compare_register_notifier(&packets_compare_notifier); colo_compare_register_notifier(&packets_compare_notifier);
@ -636,16 +632,6 @@ out:
colo_compare_unregister_notifier(&packets_compare_notifier); colo_compare_unregister_notifier(&packets_compare_notifier);
timer_free(s->colo_delay_timer); timer_free(s->colo_delay_timer);
qemu_event_destroy(&s->colo_checkpoint_event); qemu_event_destroy(&s->colo_checkpoint_event);
/*
* Must be called after failover BH is completed,
* Or the failover BH may shutdown the wrong fd that
* re-used by other threads after we release here.
*/
if (s->rp_state.from_dst_file) {
qemu_fclose(s->rp_state.from_dst_file);
s->rp_state.from_dst_file = NULL;
}
} }
void migrate_start_colo_process(MigrationState *s) void migrate_start_colo_process(MigrationState *s)
@ -838,6 +824,7 @@ static void *colo_process_incoming_thread(void *opaque)
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO); MIGRATION_STATUS_COLO);
assert(mis->to_src_file);
if (get_colo_mode() != COLO_MODE_SECONDARY) { if (get_colo_mode() != COLO_MODE_SECONDARY) {
error_report("COLO mode must be COLO_MODE_SECONDARY"); error_report("COLO mode must be COLO_MODE_SECONDARY");
return NULL; return NULL;
@ -854,7 +841,6 @@ static void *colo_process_incoming_thread(void *opaque)
failover_init_state(); failover_init_state();
mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
/* /*
* Note: the communication between Primary side and Secondary side * Note: the communication between Primary side and Secondary side
* should be sequential, we set the fd to unblocked in migration incoming * should be sequential, we set the fd to unblocked in migration incoming
@ -866,6 +852,12 @@ static void *colo_process_incoming_thread(void *opaque)
goto out; goto out;
} }
/*
* rp thread still running on primary side, shut it down to go into
* colo state.
*/
migrate_send_rp_shut(mis, 0);
colo_incoming_start_dirty_log(); colo_incoming_start_dirty_log();
bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);

10
migration/options.c

@ -575,7 +575,15 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
ERRP_GUARD(); ERRP_GUARD();
MigrationIncomingState *mis = migration_incoming_get_current(); MigrationIncomingState *mis = migration_incoming_get_current();
#ifndef CONFIG_REPLICATION #ifdef CONFIG_REPLICATION
if (new_caps[MIGRATION_CAPABILITY_X_COLO]) {
if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) {
error_setg(errp, "Capability 'x-colo' requires capability "
"'return-path'");
return false;
}
}
#else
if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { if (new_caps[MIGRATION_CAPABILITY_X_COLO]) {
error_setg(errp, "QEMU compiled without replication module" error_setg(errp, "QEMU compiled without replication module"
" can't enable COLO"); " can't enable COLO");

1
tests/qtest/migration/colo-tests.c

@ -42,6 +42,7 @@ static int test_colo_common(MigrateCommon *args,
* used in production. * used in production.
*/ */
args->start.oob = true; args->start.oob = true;
args->start.caps[MIGRATION_CAPABILITY_RETURN_PATH] = true;
args->start.caps[MIGRATION_CAPABILITY_X_COLO] = true; args->start.caps[MIGRATION_CAPABILITY_X_COLO] = true;
if (migrate_start(&from, &to, args->listen_uri, &args->start)) { if (migrate_start(&from, &to, args->listen_uri, &args->start)) {

13
tests/qtest/migration/framework.c

@ -216,6 +216,19 @@ static void migrate_start_set_capabilities(QTestState *from, QTestState *to,
* MigrationCapability_lookup and MIGRATION_CAPABILITY_ constants * MigrationCapability_lookup and MIGRATION_CAPABILITY_ constants
* are from qapi-types-migration.h. * are from qapi-types-migration.h.
*/ */
/*
* Enable return path first, since other features depend on it.
*/
if (args->caps[MIGRATION_CAPABILITY_RETURN_PATH]) {
if (from) {
migrate_set_capability(from, "return-path", true);
}
if (to) {
migrate_set_capability(to, "return-path", true);
}
}
for (uint8_t i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { for (uint8_t i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
if (!args->caps[i]) { if (!args->caps[i]) {
continue; continue;

Loading…
Cancel
Save