From 37c74b1dbb205506dc17cfc9cb3ba3ae7e207986 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Mon, 2 Mar 2026 12:45:33 +0100 Subject: [PATCH] multifd: Fix hang if send thread errors during sync When a send thread encounters an error (as is the case with yank), it sets multifd_send_state->exiting and the other threads exit too. This races with multifd_send_sync_main() which now hangs at qemu_sem_wait(&p->sem_sync) in multifd_send_sync_main() line 647 as it waits for threads that have exited. Fix this by kicking the semaphores when exiting the send threads. I encountered this hang when stress testing the colo unit test, though I was unable to write a migration test to reliably hit this. Reviewed-by: Peter Xu Signed-off-by: Lukas Straub Link: https://lore.kernel.org/qemu-devel/20260302-colo_unit_test_multifd-v11-18-d653fb3b1d80@web.de Signed-off-by: Fabiano Rosas --- migration/multifd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/migration/multifd.c b/migration/multifd.c index 2193088996..8b9ed84805 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -772,9 +772,14 @@ out: assert(local_err); trace_multifd_send_error(p->id); multifd_send_error_propagate(local_err); - multifd_send_kick_main(p); } + /* + * Always kick the main thread: The main thread might wait on this thread + * while another thread encounters an error and signals this thread to exit. + */ + multifd_send_kick_main(p); + rcu_unregister_thread(); trace_multifd_send_thread_end(p->id, p->packets_sent);