QEMU main repository: Please see https://www.qemu.org/docs/master/devel/submitting-a-patch.html for how to submit changes to QEMU. Pull Requests are ignored. Please only use release tarballs from the QEMU website. http://www.qemu.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

919 lines
20 KiB

/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "monitor/monitor.h"
#include "qemu/coroutine-tls.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-machine.h"
#include "qapi/qapi-commands-misc.h"
#include "qapi/qapi-events-run-state.h"
#include "qapi/qmp/qerror.h"
#include "exec/gdbstub.h"
#include "accel/accel-cpu-ops.h"
#include "system/hw_accel.h"
#include "exec/cpu-common.h"
#include "qemu/thread.h"
#include "qemu/main-loop.h"
#include "qemu/plugin.h"
#include "system/cpus.h"
#include "qemu/guest-random.h"
#include "hw/core/nmi.h"
#include "system/replay.h"
#include "system/runstate.h"
#include "system/cpu-timers.h"
#include "system/whpx.h"
#include "hw/core/boards.h"
#include "hw/core/hw-error.h"
#include "trace.h"
#ifdef CONFIG_LINUX
#include <sys/prctl.h>
#ifndef PR_MCE_KILL
#define PR_MCE_KILL 33
#endif
#ifndef PR_MCE_KILL_SET
#define PR_MCE_KILL_SET 1
#endif
#ifndef PR_MCE_KILL_EARLY
#define PR_MCE_KILL_EARLY 1
#endif
#endif /* CONFIG_LINUX */
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
/* The Big QEMU Lock (BQL) */
static QemuMutex bql;
/*
* The chosen accelerator is supposed to register this.
*/
static const AccelOpsClass *cpus_accel;
bool cpu_is_stopped(CPUState *cpu)
{
return cpu->stopped || !runstate_is_running();
}
bool cpu_work_list_empty(CPUState *cpu)
{
return QSIMPLEQ_EMPTY_ATOMIC(&cpu->work_list);
}
bool cpu_thread_is_idle(CPUState *cpu)
{
if (cpu->stop || !cpu_work_list_empty(cpu)) {
return false;
}
if (cpu_is_stopped(cpu)) {
return true;
}
if (!cpu->halted || cpu_has_work(cpu)) {
return false;
}
if (cpus_accel->cpu_thread_is_idle) {
return cpus_accel->cpu_thread_is_idle(cpu);
}
return true;
}
bool all_cpu_threads_idle(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (!cpu_thread_is_idle(cpu)) {
return false;
}
}
return true;
}
/***********************************************************/
void hw_error(const char *fmt, ...)
{
va_list ap;
CPUState *cpu;
va_start(ap, fmt);
fprintf(stderr, "qemu: hardware error: ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
CPU_FOREACH(cpu) {
fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
}
va_end(ap);
abort();
}
void cpu_synchronize_all_states(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
cpu_synchronize_state(cpu);
}
}
void cpu_synchronize_all_post_reset(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
cpu_synchronize_post_reset(cpu);
}
}
void cpu_synchronize_all_post_init(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
cpu_synchronize_post_init(cpu);
}
}
void cpu_synchronize_all_pre_loadvm(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
cpu_synchronize_pre_loadvm(cpu);
}
}
void cpu_synchronize_state(CPUState *cpu)
{
if (cpus_accel->synchronize_state) {
cpus_accel->synchronize_state(cpu);
}
}
void cpu_synchronize_post_reset(CPUState *cpu)
{
if (cpus_accel->synchronize_post_reset) {
cpus_accel->synchronize_post_reset(cpu);
}
}
void cpu_synchronize_post_init(CPUState *cpu)
{
if (cpus_accel->synchronize_post_init) {
cpus_accel->synchronize_post_init(cpu);
}
}
void cpu_synchronize_pre_loadvm(CPUState *cpu)
{
if (cpus_accel->synchronize_pre_loadvm) {
cpus_accel->synchronize_pre_loadvm(cpu);
}
}
bool cpus_are_resettable(void)
{
if (cpus_accel->cpus_are_resettable) {
return cpus_accel->cpus_are_resettable();
}
return true;
}
void cpu_exec_reset_hold(CPUState *cpu)
{
if (cpus_accel->cpu_reset_hold) {
cpus_accel->cpu_reset_hold(cpu);
}
}
int64_t cpus_get_virtual_clock(void)
{
/*
* XXX
*
* need to check that cpus_accel is not NULL, because qcow2 calls
* qemu_get_clock_ns(CLOCK_VIRTUAL) without any accel initialized and
* with ticks disabled in some io-tests:
* 030 040 041 060 099 120 127 140 156 161 172 181 191 192 195 203 229 249 256 267
*
* is this expected?
*
* XXX
*/
if (cpus_accel && cpus_accel->get_virtual_clock) {
return cpus_accel->get_virtual_clock();
}
return cpu_get_clock();
}
/*
* Signal the new virtual time to the accelerator. This is only needed
* by accelerators that need to track the changes as we warp time.
*/
void cpus_set_virtual_clock(int64_t new_time)
{
if (cpus_accel && cpus_accel->set_virtual_clock) {
cpus_accel->set_virtual_clock(new_time);
}
}
/*
* return the time elapsed in VM between vm_start and vm_stop. Unless
* icount is active, cpus_get_elapsed_ticks() uses units of the host CPU cycle
* counter.
*/
int64_t cpus_get_elapsed_ticks(void)
{
if (cpus_accel->get_elapsed_ticks) {
return cpus_accel->get_elapsed_ticks();
}
return cpu_get_ticks();
}
void cpu_set_interrupt(CPUState *cpu, int mask)
{
/* Pairs with cpu_test_interrupt(). */
qatomic_or(&cpu->interrupt_request, mask);
}
void generic_handle_interrupt(CPUState *cpu, int mask)
{
cpu_set_interrupt(cpu, mask);
if (!qemu_cpu_is_self(cpu)) {
qemu_cpu_kick(cpu);
}
}
void cpu_interrupt(CPUState *cpu, int mask)
{
g_assert(bql_locked());
cpus_accel->handle_interrupt(cpu, mask);
}
/*
* True if the vm was previously suspended, and has not been woken or reset.
*/
static int vm_was_suspended;
void vm_set_suspended(bool suspended)
{
vm_was_suspended = suspended;
}
bool vm_get_suspended(void)
{
return vm_was_suspended;
}
vl: introduce vm_shutdown() Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa ("iothread: Stop threads before main() quits") tried to work around the fact that emulation was still active during termination by stopping iothreads. They suffer from race conditions: 1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the virtio_scsi_ctx_check() assertion failure because the BDS AioContext has been modified by iothread_stop_all(). 2. Guest vq kick racing with main loop termination leaves a readable ioeventfd that is handled by the next aio_poll() when external clients are enabled again, resulting in unwanted emulation activity. This patch obsoletes those commits by fully disabling emulation activity when vcpus are stopped. Use the new vm_shutdown() function instead of pause_all_vcpus() so that vm change state handlers are invoked too. Virtio devices will now stop their ioeventfds, preventing further emulation activity after vm_stop(). Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a QMP STOP event that may affect existing clients. It is no longer necessary to call replay_disable_events() directly since vm_shutdown() does so already. Drop iothread_stop_all() since it is no longer used. Cc: Fam Zheng <famz@redhat.com> Cc: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Fam Zheng <famz@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Message-id: 20180307144205.20619-5-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8 years ago
static int do_vm_stop(RunState state, bool send_stop)
{
int ret = 0;
cpus: stop vm in suspended runstate Currently, a vm in the suspended state is not completely stopped. The VCPUs have been paused, but the cpu clock still runs, and runstate notifiers for the transition to stopped have not been called. This causes problems for live migration. Stale cpu timers_state is saved to the migration stream, causing time errors in the guest when it wakes from suspend, and state that would have been modified by runstate notifiers is wrong. Modify vm_stop to completely stop the vm if the current state is suspended, transition to RUN_STATE_PAUSED, and remember that the machine was suspended. Modify vm_start to restore the suspended state. This affects all callers of vm_stop and vm_start, notably, the qapi stop and cont commands: old behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_SUSPENDED new behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_PAUSED RUN_STATE_PAUSED --> cont --> RUN_STATE_SUSPENDED For example: (qemu) info status VM status: paused (suspended) (qemu) stop (qemu) info status VM status: paused (qemu) system_wakeup Error: Unable to wake up: guest is not in suspended state (qemu) cont (qemu) info status VM status: paused (suspended) (qemu) system_wakeup (qemu) info status VM status: running Suggested-by: Peter Xu <peterx@redhat.com> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Peter Xu <peterx@redhat.com> Link: https://lore.kernel.org/r/1704312341-66640-3-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu <peterx@redhat.com>
2 years ago
RunState oldstate = runstate_get();
cpus: stop vm in suspended runstate Currently, a vm in the suspended state is not completely stopped. The VCPUs have been paused, but the cpu clock still runs, and runstate notifiers for the transition to stopped have not been called. This causes problems for live migration. Stale cpu timers_state is saved to the migration stream, causing time errors in the guest when it wakes from suspend, and state that would have been modified by runstate notifiers is wrong. Modify vm_stop to completely stop the vm if the current state is suspended, transition to RUN_STATE_PAUSED, and remember that the machine was suspended. Modify vm_start to restore the suspended state. This affects all callers of vm_stop and vm_start, notably, the qapi stop and cont commands: old behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_SUSPENDED new behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_PAUSED RUN_STATE_PAUSED --> cont --> RUN_STATE_SUSPENDED For example: (qemu) info status VM status: paused (suspended) (qemu) stop (qemu) info status VM status: paused (qemu) system_wakeup Error: Unable to wake up: guest is not in suspended state (qemu) cont (qemu) info status VM status: paused (suspended) (qemu) system_wakeup (qemu) info status VM status: running Suggested-by: Peter Xu <peterx@redhat.com> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Peter Xu <peterx@redhat.com> Link: https://lore.kernel.org/r/1704312341-66640-3-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu <peterx@redhat.com>
2 years ago
if (runstate_is_live(oldstate)) {
vm_was_suspended = (oldstate == RUN_STATE_SUSPENDED);
runstate_set(state);
cpu_disable_ticks();
cpus: stop vm in suspended runstate Currently, a vm in the suspended state is not completely stopped. The VCPUs have been paused, but the cpu clock still runs, and runstate notifiers for the transition to stopped have not been called. This causes problems for live migration. Stale cpu timers_state is saved to the migration stream, causing time errors in the guest when it wakes from suspend, and state that would have been modified by runstate notifiers is wrong. Modify vm_stop to completely stop the vm if the current state is suspended, transition to RUN_STATE_PAUSED, and remember that the machine was suspended. Modify vm_start to restore the suspended state. This affects all callers of vm_stop and vm_start, notably, the qapi stop and cont commands: old behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_SUSPENDED new behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_PAUSED RUN_STATE_PAUSED --> cont --> RUN_STATE_SUSPENDED For example: (qemu) info status VM status: paused (suspended) (qemu) stop (qemu) info status VM status: paused (qemu) system_wakeup Error: Unable to wake up: guest is not in suspended state (qemu) cont (qemu) info status VM status: paused (suspended) (qemu) system_wakeup (qemu) info status VM status: running Suggested-by: Peter Xu <peterx@redhat.com> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Peter Xu <peterx@redhat.com> Link: https://lore.kernel.org/r/1704312341-66640-3-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu <peterx@redhat.com>
2 years ago
if (oldstate == RUN_STATE_RUNNING) {
pause_all_vcpus();
}
ret = vm_state_notify(0, state);
vl: introduce vm_shutdown() Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa ("iothread: Stop threads before main() quits") tried to work around the fact that emulation was still active during termination by stopping iothreads. They suffer from race conditions: 1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the virtio_scsi_ctx_check() assertion failure because the BDS AioContext has been modified by iothread_stop_all(). 2. Guest vq kick racing with main loop termination leaves a readable ioeventfd that is handled by the next aio_poll() when external clients are enabled again, resulting in unwanted emulation activity. This patch obsoletes those commits by fully disabling emulation activity when vcpus are stopped. Use the new vm_shutdown() function instead of pause_all_vcpus() so that vm change state handlers are invoked too. Virtio devices will now stop their ioeventfds, preventing further emulation activity after vm_stop(). Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a QMP STOP event that may affect existing clients. It is no longer necessary to call replay_disable_events() directly since vm_shutdown() does so already. Drop iothread_stop_all() since it is no longer used. Cc: Fam Zheng <famz@redhat.com> Cc: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Fam Zheng <famz@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Message-id: 20180307144205.20619-5-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8 years ago
if (send_stop) {
qapi_event_send_stop();
vl: introduce vm_shutdown() Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa ("iothread: Stop threads before main() quits") tried to work around the fact that emulation was still active during termination by stopping iothreads. They suffer from race conditions: 1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the virtio_scsi_ctx_check() assertion failure because the BDS AioContext has been modified by iothread_stop_all(). 2. Guest vq kick racing with main loop termination leaves a readable ioeventfd that is handled by the next aio_poll() when external clients are enabled again, resulting in unwanted emulation activity. This patch obsoletes those commits by fully disabling emulation activity when vcpus are stopped. Use the new vm_shutdown() function instead of pause_all_vcpus() so that vm change state handlers are invoked too. Virtio devices will now stop their ioeventfds, preventing further emulation activity after vm_stop(). Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a QMP STOP event that may affect existing clients. It is no longer necessary to call replay_disable_events() directly since vm_shutdown() does so already. Drop iothread_stop_all() since it is no longer used. Cc: Fam Zheng <famz@redhat.com> Cc: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Fam Zheng <famz@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Message-id: 20180307144205.20619-5-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8 years ago
}
}
bdrv_drain_all();
/*
* Even if vm_state_notify() return failure,
* it would be better to flush as before.
*/
ret |= bdrv_flush_all();
trace_vm_stop_flush_all(ret);
return ret;
}
vl: introduce vm_shutdown() Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa ("iothread: Stop threads before main() quits") tried to work around the fact that emulation was still active during termination by stopping iothreads. They suffer from race conditions: 1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the virtio_scsi_ctx_check() assertion failure because the BDS AioContext has been modified by iothread_stop_all(). 2. Guest vq kick racing with main loop termination leaves a readable ioeventfd that is handled by the next aio_poll() when external clients are enabled again, resulting in unwanted emulation activity. This patch obsoletes those commits by fully disabling emulation activity when vcpus are stopped. Use the new vm_shutdown() function instead of pause_all_vcpus() so that vm change state handlers are invoked too. Virtio devices will now stop their ioeventfds, preventing further emulation activity after vm_stop(). Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a QMP STOP event that may affect existing clients. It is no longer necessary to call replay_disable_events() directly since vm_shutdown() does so already. Drop iothread_stop_all() since it is no longer used. Cc: Fam Zheng <famz@redhat.com> Cc: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Fam Zheng <famz@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Message-id: 20180307144205.20619-5-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8 years ago
/* Special vm_stop() variant for terminating the process. Historically clients
* did not expect a QMP STOP event and so we need to retain compatibility.
*/
int vm_shutdown(void)
{
return do_vm_stop(RUN_STATE_SHUTDOWN, false);
}
bool cpu_can_run(CPUState *cpu)
{
if (cpu->stop) {
return false;
}
if (cpu_is_stopped(cpu)) {
return false;
}
return true;
}
void cpu_handle_guest_debug(CPUState *cpu)
{
if (replay_running_debug()) {
if (!cpu->singlestep_enabled) {
/*
* Report about the breakpoint and
* make a single step to skip it
*/
replay_breakpoint();
cpu_single_step(cpu, SSTEP_ENABLE);
} else {
cpu_single_step(cpu, 0);
}
} else {
gdb_set_stop_cpu(cpu);
qemu_system_debug_request();
cpu->stopped = true;
}
}
#ifdef CONFIG_LINUX
static void sigbus_reraise(void)
{
sigset_t set;
struct sigaction action;
memset(&action, 0, sizeof(action));
action.sa_handler = SIG_DFL;
if (!sigaction(SIGBUS, &action, NULL)) {
raise(SIGBUS);
sigemptyset(&set);
sigaddset(&set, SIGBUS);
pthread_sigmask(SIG_UNBLOCK, &set, NULL);
}
perror("Failed to re-raise SIGBUS!");
abort();
}
static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
{
if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
sigbus_reraise();
}
if (current_cpu) {
/* Called asynchronously in VCPU thread. */
if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
sigbus_reraise();
}
} else {
/* Called synchronously (via signalfd) in main thread. */
if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
sigbus_reraise();
}
}
}
static void qemu_init_sigbus(void)
{
struct sigaction action;
/*
* ALERT: when modifying this, take care that SIGBUS forwarding in
* qemu_prealloc_mem() will continue working as expected.
*/
memset(&action, 0, sizeof(action));
action.sa_flags = SA_SIGINFO;
action.sa_sigaction = sigbus_handler;
sigaction(SIGBUS, &action, NULL);
prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
}
#else /* !CONFIG_LINUX */
static void qemu_init_sigbus(void)
{
}
#endif /* !CONFIG_LINUX */
static QemuThread io_thread;
/* cpu creation */
static QemuCond qemu_cpu_cond;
/* system init */
static QemuCond qemu_pause_cond;
void qemu_init_cpu_loop(void)
{
qemu_init_sigbus();
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_pause_cond);
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
qemu_mutex_init(&bql);
qemu_thread_get_self(&io_thread);
}
void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
{
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
do_run_on_cpu(cpu, func, data, &bql);
}
static void qemu_cpu_stop(CPUState *cpu, bool exit)
{
g_assert(qemu_cpu_is_self(cpu));
cpu->stop = false;
cpu->stopped = true;
if (exit) {
cpu_exit(cpu);
}
qemu_cond_broadcast(&qemu_pause_cond);
}
void qemu_process_cpu_events_common(CPUState *cpu)
{
qatomic_set_mb(&cpu->thread_kicked, false);
if (cpu->stop) {
qemu_cpu_stop(cpu, false);
}
process_queued_cpu_work(cpu);
}
void qemu_process_cpu_events(CPUState *cpu)
{
bool slept = false;
cpus: clear exit_request in qemu_process_cpu_events Make the code common to all accelerators: after seeing cpu->exit_request set to true, accelerator code needs to reach qemu_process_cpu_events_common(). So for the common cases where they use qemu_process_cpu_events(), go ahead and clear it in there. Note that the cheap qatomic_set() is enough because at this point the thread has taken the BQL; qatomic_set_mb() is not needed. In particular, this is the ordering of the communication between I/O and vCPU threads is always the same. In the I/O thread: (a) store other memory locations that will be checked if cpu->exit_request or cpu->interrupt_request is 1 (for example cpu->stop or cpu->work_list for cpu->exit_request) (b) cpu_exit(): store-release cpu->exit_request, or (b) cpu_interrupt(): store-release cpu->interrupt_request >>> at this point, cpu->halt_cond is broadcast and the BQL released (c) do the accelerator-specific kick (e.g. write icount_decr for TCG, pthread_kill for KVM, etc.) In the vCPU thread instead the opposite order is respected: (c) the accelerator's execution loop exits thanks to the kick (b) then the inner execution loop checks cpu->interrupt_request and cpu->exit_request. If needed cpu->interrupt_request is converted into cpu->exit_request when work is needed outside the execution loop. (a) then the other memory locations are checked. Some may need to be read under the BQL, but the vCPU thread may also take other locks (e.g. for queued work items) or none at all. qatomic_set_mb() would only be needed if the halt sleep was done outside the BQL (though in that case, cpu->exit_request probably would be replaced by a QemuEvent or something like that). Reviewed-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8 months ago
qatomic_set(&cpu->exit_request, false);
while (cpu_thread_is_idle(cpu)) {
if (!slept) {
slept = true;
qemu_plugin_vcpu_idle_cb(cpu);
}
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
qemu_cond_wait(cpu->halt_cond, &bql);
}
if (slept) {
qemu_plugin_vcpu_resume_cb(cpu);
}
qemu_process_cpu_events_common(cpu);
i386: hvf: add code base from Google's QEMU repository This file begins tracking the files that will be the code base for HVF support in QEMU. This code base is part of Google's QEMU version of their Android emulator, and can be found at https://android.googlesource.com/platform/external/qemu/+/emu-master-dev This code is based on Veertu Inc's vdhh (Veertu Desktop Hosted Hypervisor), found at https://github.com/veertuinc/vdhh. Everything is appropriately licensed under GPL v2-or-later, except for the code inside x86_task.c and x86_task.h, which, deriving from KVM (the Linux kernel), is licensed GPL v2-only. This code base already implements a very great deal of functionality, although Google's version removed from Vertuu's the support for APIC page and hyperv-related stuff. According to the Android Emulator Release Notes, Revision 26.1.3 (August 2017), "Hypervisor.framework is now enabled by default on macOS for 32-bit x86 images to improve performance and macOS compatibility", although we better use with caution for, as the same Revision warns us, "If you experience issues with it specifically, please file a bug report...". The code hasn't seen much update in the last 5 months, so I think that we can further develop the code with occasional visiting Google's repository to see if there has been any update. On top of Google's code, the following changes were made: - add code to the configure script to support the --enable-hvf argument. If the OS is Darwin, it checks for presence of HVF in the system. The patch also adds strings related to HVF in the file qemu-options.hx. QEMU will only support the modern syntax style '-M accel=hvf' no enable hvf; the legacy '-enable-hvf' will not be supported. - fix styling issues - add glue code to cpus.c - move HVFX86EmulatorState field to CPUX86State, changing the the emulation functions to have a parameter with signature 'CPUX86State *' instead of 'CPUState *' so we don't have to get the 'env'. Signed-off-by: Sergio Andres Gomez Del Real <Sergio.G.DelReal@gmail.com> Message-Id: <20170913090522.4022-2-Sergio.G.DelReal@gmail.com> Message-Id: <20170913090522.4022-3-Sergio.G.DelReal@gmail.com> Message-Id: <20170913090522.4022-5-Sergio.G.DelReal@gmail.com> Message-Id: <20170913090522.4022-6-Sergio.G.DelReal@gmail.com> Message-Id: <20170905035457.3753-7-Sergio.G.DelReal@gmail.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9 years ago
}
void cpus_kick_thread(CPUState *cpu)
{
if (qatomic_read(&cpu->thread_kicked)) {
return;
}
qatomic_set(&cpu->thread_kicked, true);
#ifndef _WIN32
int err = pthread_kill(cpu->thread->thread, SIG_IPI);
if (err && err != ESRCH) {
fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
exit(1);
}
#else
qemu_sem_post(&cpu->sem);
#endif
}
void qemu_cpu_kick(CPUState *cpu)
{
qemu_cond_broadcast(cpu->halt_cond);
if (cpus_accel->kick_vcpu_thread) {
cpus_accel->kick_vcpu_thread(cpu);
} else { /* default */
cpus_kick_thread(cpu);
}
}
void qemu_cpu_kick_self(void)
{
assert(current_cpu);
cpus_kick_thread(current_cpu);
}
bool qemu_cpu_is_self(CPUState *cpu)
{
return qemu_thread_is_self(cpu->thread);
}
bool qemu_in_vcpu_thread(void)
{
return current_cpu && qemu_cpu_is_self(current_cpu);
}
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
QEMU_DEFINE_STATIC_CO_TLS(bool, bql_locked)
bool mutex_is_bql(QemuMutex *mutex)
{
return mutex == &bql;
}
void bql_update_status(bool locked)
{
/* This function should only be used when an update happened.. */
assert(bql_locked() != locked);
set_bql_locked(locked);
}
static uint32_t bql_unlock_blocked;
void bql_block_unlock(bool increase)
{
uint32_t new_value;
assert(bql_locked());
/* check for overflow! */
new_value = bql_unlock_blocked + increase - !increase;
assert((new_value > bql_unlock_blocked) == increase);
bql_unlock_blocked = new_value;
}
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
bool bql_locked(void)
{
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
return get_bql_locked();
}
bool qemu_in_main_thread(void)
{
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
return bql_locked();
}
void rust_bql_mock_lock(void)
{
error_report("This function should be used only from tests");
abort();
}
/*
* The BQL is taken from so many places that it is worth profiling the
* callers directly, instead of funneling them all through a single function.
*/
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
void bql_lock_impl(const char *file, int line)
{
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
QemuMutexLockFunc bql_lock_fn = qatomic_read(&bql_mutex_lock_func);
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
g_assert(!bql_locked());
bql_lock_fn(&bql, file, line);
}
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
void bql_unlock(void)
{
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
g_assert(bql_locked());
g_assert(!bql_unlock_blocked);
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
qemu_mutex_unlock(&bql);
}
void qemu_cond_wait_bql(QemuCond *cond)
{
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
qemu_cond_wait(cond, &bql);
}
void qemu_cond_timedwait_bql(QemuCond *cond, int ms)
{
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
qemu_cond_timedwait(cond, &bql, ms);
}
/* signal CPU creation */
void cpu_thread_signal_created(CPUState *cpu)
{
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
}
/* signal CPU destruction */
void cpu_thread_signal_destroyed(CPUState *cpu)
{
cpu->created = false;
qemu_cond_signal(&qemu_cpu_cond);
}
void cpu_pause(CPUState *cpu)
{
if (qemu_cpu_is_self(cpu)) {
qemu_cpu_stop(cpu, true);
} else {
cpu->stop = true;
cpu_exit(cpu);
}
}
void cpu_resume(CPUState *cpu)
{
cpu->stop = false;
cpu->stopped = false;
qemu_cpu_kick(cpu);
}
static bool all_vcpus_paused(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (!cpu->stopped) {
return false;
}
}
return true;
}
void pause_all_vcpus(void)
{
CPUState *cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
CPU_FOREACH(cpu) {
cpu_pause(cpu);
}
/* We need to drop the replay_lock so any vCPU threads woken up
* can finish their replay tasks
*/
replay_mutex_unlock();
while (!all_vcpus_paused()) {
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
qemu_cond_wait(&qemu_pause_cond, &bql);
/* FIXME: is this needed? */
CPU_FOREACH(cpu) {
qemu_cpu_kick(cpu);
}
}
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
bql_unlock();
replay_mutex_lock();
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
bql_lock();
}
void resume_all_vcpus(void)
{
CPUState *cpu;
if (!runstate_is_running()) {
return;
}
qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
CPU_FOREACH(cpu) {
cpu_resume(cpu);
}
}
void cpu_remove_sync(CPUState *cpu)
{
cpu->stop = true;
cpu->unplug = true;
cpu_exit(cpu);
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
bql_unlock();
qemu_thread_join(cpu->thread);
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
bql_lock();
}
void cpus_register_accel(const AccelOpsClass *ops)
{
assert(ops != NULL);
assert(ops->create_vcpu_thread != NULL); /* mandatory */
assert(ops->handle_interrupt);
cpus_accel = ops;
}
const AccelOpsClass *cpus_get_accel(void)
{
/* broken if we call this early */
assert(cpus_accel);
return cpus_accel;
}
void qemu_init_vcpu(CPUState *cpu)
{
MachineState *ms = MACHINE(qdev_get_machine());
cpu->nr_threads = ms->smp.threads;
cpu->stopped = true;
cpu->random_seed = qemu_guest_random_seed_thread_part1();
if (!cpu->as) {
/* If the target cpu hasn't set up any address spaces itself,
* give it the default one.
*/
cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
}
/* accelerators all implement the AccelOpsClass */
g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL);
cpus_accel->create_vcpu_thread(cpu);
while (!cpu->created) {
system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 years ago
qemu_cond_wait(&qemu_cpu_cond, &bql);
}
}
void cpu_stop_current(void)
{
if (current_cpu) {
current_cpu->stop = true;
cpu_exit(current_cpu);
}
}
int vm_stop(RunState state)
{
if (qemu_in_vcpu_thread()) {
qemu_system_vmstop_request_prepare();
qemu_system_vmstop_request(state);
/*
* FIXME: should not return to device code in case
* vm_stop() has been requested.
*/
cpu_stop_current();
return 0;
}
vl: introduce vm_shutdown() Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa ("iothread: Stop threads before main() quits") tried to work around the fact that emulation was still active during termination by stopping iothreads. They suffer from race conditions: 1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the virtio_scsi_ctx_check() assertion failure because the BDS AioContext has been modified by iothread_stop_all(). 2. Guest vq kick racing with main loop termination leaves a readable ioeventfd that is handled by the next aio_poll() when external clients are enabled again, resulting in unwanted emulation activity. This patch obsoletes those commits by fully disabling emulation activity when vcpus are stopped. Use the new vm_shutdown() function instead of pause_all_vcpus() so that vm change state handlers are invoked too. Virtio devices will now stop their ioeventfds, preventing further emulation activity after vm_stop(). Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a QMP STOP event that may affect existing clients. It is no longer necessary to call replay_disable_events() directly since vm_shutdown() does so already. Drop iothread_stop_all() since it is no longer used. Cc: Fam Zheng <famz@redhat.com> Cc: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Fam Zheng <famz@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Message-id: 20180307144205.20619-5-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8 years ago
return do_vm_stop(state, true);
}
/**
* Prepare for (re)starting the VM.
cpus: stop vm in suspended runstate Currently, a vm in the suspended state is not completely stopped. The VCPUs have been paused, but the cpu clock still runs, and runstate notifiers for the transition to stopped have not been called. This causes problems for live migration. Stale cpu timers_state is saved to the migration stream, causing time errors in the guest when it wakes from suspend, and state that would have been modified by runstate notifiers is wrong. Modify vm_stop to completely stop the vm if the current state is suspended, transition to RUN_STATE_PAUSED, and remember that the machine was suspended. Modify vm_start to restore the suspended state. This affects all callers of vm_stop and vm_start, notably, the qapi stop and cont commands: old behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_SUSPENDED new behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_PAUSED RUN_STATE_PAUSED --> cont --> RUN_STATE_SUSPENDED For example: (qemu) info status VM status: paused (suspended) (qemu) stop (qemu) info status VM status: paused (qemu) system_wakeup Error: Unable to wake up: guest is not in suspended state (qemu) cont (qemu) info status VM status: paused (suspended) (qemu) system_wakeup (qemu) info status VM status: running Suggested-by: Peter Xu <peterx@redhat.com> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Peter Xu <peterx@redhat.com> Link: https://lore.kernel.org/r/1704312341-66640-3-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu <peterx@redhat.com>
2 years ago
* Returns 0 if the vCPUs should be restarted, -1 on an error condition,
* and 1 otherwise.
*/
int vm_prepare_start(bool step_pending)
{
cpus: stop vm in suspended runstate Currently, a vm in the suspended state is not completely stopped. The VCPUs have been paused, but the cpu clock still runs, and runstate notifiers for the transition to stopped have not been called. This causes problems for live migration. Stale cpu timers_state is saved to the migration stream, causing time errors in the guest when it wakes from suspend, and state that would have been modified by runstate notifiers is wrong. Modify vm_stop to completely stop the vm if the current state is suspended, transition to RUN_STATE_PAUSED, and remember that the machine was suspended. Modify vm_start to restore the suspended state. This affects all callers of vm_stop and vm_start, notably, the qapi stop and cont commands: old behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_SUSPENDED new behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_PAUSED RUN_STATE_PAUSED --> cont --> RUN_STATE_SUSPENDED For example: (qemu) info status VM status: paused (suspended) (qemu) stop (qemu) info status VM status: paused (qemu) system_wakeup Error: Unable to wake up: guest is not in suspended state (qemu) cont (qemu) info status VM status: paused (suspended) (qemu) system_wakeup (qemu) info status VM status: running Suggested-by: Peter Xu <peterx@redhat.com> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Peter Xu <peterx@redhat.com> Link: https://lore.kernel.org/r/1704312341-66640-3-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu <peterx@redhat.com>
2 years ago
int ret = vm_was_suspended ? 1 : 0;
RunState state = vm_was_suspended ? RUN_STATE_SUSPENDED : RUN_STATE_RUNNING;
RunState requested;
qemu_vmstop_requested(&requested);
if (runstate_is_running() && requested == RUN_STATE__MAX) {
return -1;
}
/* Ensure that a STOP/RESUME pair of events is emitted if a
* vmstop request was pending. The BLOCK_IO_ERROR event, for
* example, according to documentation is always followed by
* the STOP event.
*/
if (runstate_is_running()) {
qapi_event_send_stop();
qapi_event_send_resume();
cpus: Fix event order on resume of stopped guest When resume of a stopped guest immediately runs into block device errors, the BLOCK_IO_ERROR event is sent before the RESUME event. Reproducer: 1. Create a scratch image $ dd if=/dev/zero of=scratch.img bs=1M count=100 Size doesn't actually matter. 2. Prepare blkdebug configuration: $ cat >blkdebug.conf <<EOF [inject-error] event = "write_aio" errno = "5" EOF Note that errno 5 is EIO. 3. Run a guest with an additional scratch disk, i.e. with additional arguments -drive if=none,id=scratch-drive,format=raw,werror=stop,file=blkdebug:blkdebug.conf:scratch.img -device virtio-blk-pci,id=scratch,drive=scratch-drive The blkdebug part makes all writes to the scratch drive fail with EIO. The werror=stop pauses the guest on write errors. 4. Connect to the QMP socket e.g. like this: $ socat UNIX:/your/qmp/socket READLINE,history=$HOME/.qmp_history,prompt='QMP> ' Issue QMP command 'qmp_capabilities': QMP> { "execute": "qmp_capabilities" } 5. Boot the guest. 6. In the guest, write to the scratch disk, e.g. like this: # dd if=/dev/zero of=/dev/vdb count=1 Do double-check the device specified with of= is actually the scratch device! 7. Issue QMP command 'cont': QMP> { "execute": "cont" } After step 6, I get a BLOCK_IO_ERROR event followed by a STOP event. Good. After step 7, I get BLOCK_IO_ERROR, then RESUME, then STOP. Not so good; I'd expect RESUME, then BLOCK_IO_ERROR, then STOP. The funny event order confuses libvirt: virsh -r domstate DOMAIN --reason reports "paused (unknown)" rather than "paused (I/O error)". The culprit is vm_prepare_start(). /* Ensure that a STOP/RESUME pair of events is emitted if a * vmstop request was pending. The BLOCK_IO_ERROR event, for * example, according to documentation is always followed by * the STOP event. */ if (runstate_is_running()) { qapi_event_send_stop(&error_abort); res = -1; } else { replay_enable_events(); cpu_enable_ticks(); runstate_set(RUN_STATE_RUNNING); vm_state_notify(1, RUN_STATE_RUNNING); } /* We are sending this now, but the CPUs will be resumed shortly later */ qapi_event_send_resume(&error_abort); return res; When resuming a stopped guest, we take the else branch before we get to sending RESUME. vm_state_notify() runs virtio_vmstate_change(), among other things. This restarts I/O, triggering the BLOCK_IO_ERROR event. Reshuffle vm_prepare_start() to send the RESUME event earlier. Fixes RHBZ 1566153. Cc: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <20180423084518.2426-1-armbru@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8 years ago
return -1;
}
/*
* WHPX accelerator needs to know whether we are going to step
* any CPUs, before starting the first one.
*/
accel_pre_resume(MACHINE(qdev_get_machine()), step_pending);
/* We are sending this now, but the CPUs will be resumed shortly later */
qapi_event_send_resume();
cpus: Fix event order on resume of stopped guest When resume of a stopped guest immediately runs into block device errors, the BLOCK_IO_ERROR event is sent before the RESUME event. Reproducer: 1. Create a scratch image $ dd if=/dev/zero of=scratch.img bs=1M count=100 Size doesn't actually matter. 2. Prepare blkdebug configuration: $ cat >blkdebug.conf <<EOF [inject-error] event = "write_aio" errno = "5" EOF Note that errno 5 is EIO. 3. Run a guest with an additional scratch disk, i.e. with additional arguments -drive if=none,id=scratch-drive,format=raw,werror=stop,file=blkdebug:blkdebug.conf:scratch.img -device virtio-blk-pci,id=scratch,drive=scratch-drive The blkdebug part makes all writes to the scratch drive fail with EIO. The werror=stop pauses the guest on write errors. 4. Connect to the QMP socket e.g. like this: $ socat UNIX:/your/qmp/socket READLINE,history=$HOME/.qmp_history,prompt='QMP> ' Issue QMP command 'qmp_capabilities': QMP> { "execute": "qmp_capabilities" } 5. Boot the guest. 6. In the guest, write to the scratch disk, e.g. like this: # dd if=/dev/zero of=/dev/vdb count=1 Do double-check the device specified with of= is actually the scratch device! 7. Issue QMP command 'cont': QMP> { "execute": "cont" } After step 6, I get a BLOCK_IO_ERROR event followed by a STOP event. Good. After step 7, I get BLOCK_IO_ERROR, then RESUME, then STOP. Not so good; I'd expect RESUME, then BLOCK_IO_ERROR, then STOP. The funny event order confuses libvirt: virsh -r domstate DOMAIN --reason reports "paused (unknown)" rather than "paused (I/O error)". The culprit is vm_prepare_start(). /* Ensure that a STOP/RESUME pair of events is emitted if a * vmstop request was pending. The BLOCK_IO_ERROR event, for * example, according to documentation is always followed by * the STOP event. */ if (runstate_is_running()) { qapi_event_send_stop(&error_abort); res = -1; } else { replay_enable_events(); cpu_enable_ticks(); runstate_set(RUN_STATE_RUNNING); vm_state_notify(1, RUN_STATE_RUNNING); } /* We are sending this now, but the CPUs will be resumed shortly later */ qapi_event_send_resume(&error_abort); return res; When resuming a stopped guest, we take the else branch before we get to sending RESUME. vm_state_notify() runs virtio_vmstate_change(), among other things. This restarts I/O, triggering the BLOCK_IO_ERROR event. Reshuffle vm_prepare_start() to send the RESUME event earlier. Fixes RHBZ 1566153. Cc: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <20180423084518.2426-1-armbru@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8 years ago
cpu_enable_ticks();
cpus: stop vm in suspended runstate Currently, a vm in the suspended state is not completely stopped. The VCPUs have been paused, but the cpu clock still runs, and runstate notifiers for the transition to stopped have not been called. This causes problems for live migration. Stale cpu timers_state is saved to the migration stream, causing time errors in the guest when it wakes from suspend, and state that would have been modified by runstate notifiers is wrong. Modify vm_stop to completely stop the vm if the current state is suspended, transition to RUN_STATE_PAUSED, and remember that the machine was suspended. Modify vm_start to restore the suspended state. This affects all callers of vm_stop and vm_start, notably, the qapi stop and cont commands: old behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_SUSPENDED new behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_PAUSED RUN_STATE_PAUSED --> cont --> RUN_STATE_SUSPENDED For example: (qemu) info status VM status: paused (suspended) (qemu) stop (qemu) info status VM status: paused (qemu) system_wakeup Error: Unable to wake up: guest is not in suspended state (qemu) cont (qemu) info status VM status: paused (suspended) (qemu) system_wakeup (qemu) info status VM status: running Suggested-by: Peter Xu <peterx@redhat.com> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Peter Xu <peterx@redhat.com> Link: https://lore.kernel.org/r/1704312341-66640-3-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu <peterx@redhat.com>
2 years ago
runstate_set(state);
vm_state_notify(1, state);
vm_was_suspended = false;
return ret;
}
void vm_start(void)
{
if (!vm_prepare_start(false)) {
resume_all_vcpus();
}
}
void vm_resume(RunState state)
{
if (runstate_is_live(state)) {
vm_start();
} else {
runstate_set(state);
}
}
/* does a state transition even if the VM is already stopped,
current state is forgotten forever */
int vm_stop_force_state(RunState state)
{
cpus: stop vm in suspended runstate Currently, a vm in the suspended state is not completely stopped. The VCPUs have been paused, but the cpu clock still runs, and runstate notifiers for the transition to stopped have not been called. This causes problems for live migration. Stale cpu timers_state is saved to the migration stream, causing time errors in the guest when it wakes from suspend, and state that would have been modified by runstate notifiers is wrong. Modify vm_stop to completely stop the vm if the current state is suspended, transition to RUN_STATE_PAUSED, and remember that the machine was suspended. Modify vm_start to restore the suspended state. This affects all callers of vm_stop and vm_start, notably, the qapi stop and cont commands: old behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_SUSPENDED new behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_PAUSED RUN_STATE_PAUSED --> cont --> RUN_STATE_SUSPENDED For example: (qemu) info status VM status: paused (suspended) (qemu) stop (qemu) info status VM status: paused (qemu) system_wakeup Error: Unable to wake up: guest is not in suspended state (qemu) cont (qemu) info status VM status: paused (suspended) (qemu) system_wakeup (qemu) info status VM status: running Suggested-by: Peter Xu <peterx@redhat.com> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Peter Xu <peterx@redhat.com> Link: https://lore.kernel.org/r/1704312341-66640-3-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu <peterx@redhat.com>
2 years ago
if (runstate_is_live(runstate_get())) {
return vm_stop(state);
} else {
int ret;
runstate_set(state);
bdrv_drain_all();
/* Make sure to return an error if the flush in a previous vm_stop()
* failed. */
ret = bdrv_flush_all();
trace_vm_stop_flush_all(ret);
return ret;
}
}
void qmp_memsave(uint64_t addr, uint64_t size, const char *filename,
bool has_cpu, int64_t cpu_index, Error **errp)
{
FILE *f;
uint64_t l;
CPUState *cpu;
uint8_t buf[1024];
uint64_t orig_addr = addr, orig_size = size;
if (!has_cpu) {
cpu_index = 0;
}
cpu = qemu_get_cpu(cpu_index);
if (cpu == NULL) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
"a CPU number");
return;
}
f = fopen(filename, "wb");
if (!f) {
error_setg_file_open(errp, errno, filename);
return;
}
while (size != 0) {
l = sizeof(buf);
if (l > size)
l = size;
if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRIu64
" specified", orig_addr, orig_size);
goto exit;
}
if (fwrite(buf, 1, l, f) != l) {
error_setg(errp, "writing memory to '%s' failed",
filename);
goto exit;
}
addr += l;
size -= l;
}
exit:
fclose(f);
}
void qmp_pmemsave(uint64_t addr, uint64_t size, const char *filename,
Error **errp)
{
FILE *f;
uint64_t l;
uint8_t buf[1024];
f = fopen(filename, "wb");
if (!f) {
error_setg_file_open(errp, errno, filename);
return;
}
while (size != 0) {
l = sizeof(buf);
if (l > size)
l = size;
cpu_physical_memory_read(addr, buf, l);
if (fwrite(buf, 1, l, f) != l) {
error_setg(errp, "writing memory to '%s' failed",
filename);
goto exit;
}
addr += l;
size -= l;
}
exit:
fclose(f);
}
void qmp_inject_nmi(Error **errp)
{
nmi_monitor_handle(monitor_get_cpu_index(monitor_cur()), errp);
}