@ -91,6 +91,7 @@ struct KVMState
int many_ioeventfds ;
int intx_set_mask ;
bool sync_mmu ;
bool manual_dirty_log_protect ;
/* The man page (and posix) say ioctl numbers are signed int, but
* they ' re not . Linux , glibc and * BSD all treat ioctl numbers as
* unsigned , and treating them as signed here can break things */
@ -560,6 +561,159 @@ out:
return ret ;
}
/* Alignment requirement for KVM_CLEAR_DIRTY_LOG - 64 pages */
# define KVM_CLEAR_LOG_SHIFT 6
# define KVM_CLEAR_LOG_ALIGN (qemu_real_host_page_size << KVM_CLEAR_LOG_SHIFT)
# define KVM_CLEAR_LOG_MASK (-KVM_CLEAR_LOG_ALIGN)
/**
* kvm_physical_log_clear - Clear the kernel ' s dirty bitmap for range
*
* NOTE : this will be a no - op if we haven ' t enabled manual dirty log
* protection in the host kernel because in that case this operation
* will be done within log_sync ( ) .
*
* @ kml : the kvm memory listener
* @ section : the memory range to clear dirty bitmap
*/
static int kvm_physical_log_clear ( KVMMemoryListener * kml ,
MemoryRegionSection * section )
{
KVMState * s = kvm_state ;
struct kvm_clear_dirty_log d ;
uint64_t start , end , bmap_start , start_delta , bmap_npages , size ;
unsigned long * bmap_clear = NULL , psize = qemu_real_host_page_size ;
KVMSlot * mem = NULL ;
int ret , i ;
if ( ! s - > manual_dirty_log_protect ) {
/* No need to do explicit clear */
return 0 ;
}
start = section - > offset_within_address_space ;
size = int128_get64 ( section - > size ) ;
if ( ! size ) {
/* Nothing more we can do... */
return 0 ;
}
kvm_slots_lock ( kml ) ;
/* Find any possible slot that covers the section */
for ( i = 0 ; i < s - > nr_slots ; i + + ) {
mem = & kml - > slots [ i ] ;
if ( mem - > start_addr < = start & &
start + size < = mem - > start_addr + mem - > memory_size ) {
break ;
}
}
/*
* We should always find one memslot until this point , otherwise
* there could be something wrong from the upper layer
*/
assert ( mem & & i ! = s - > nr_slots ) ;
/*
* We need to extend either the start or the size or both to
* satisfy the KVM interface requirement . Firstly , do the start
* page alignment on 64 host pages
*/
bmap_start = ( start - mem - > start_addr ) & KVM_CLEAR_LOG_MASK ;
start_delta = start - mem - > start_addr - bmap_start ;
bmap_start / = psize ;
/*
* The kernel interface has restriction on the size too , that either :
*
* ( 1 ) the size is 64 host pages aligned ( just like the start ) , or
* ( 2 ) the size fills up until the end of the KVM memslot .
*/
bmap_npages = DIV_ROUND_UP ( size + start_delta , KVM_CLEAR_LOG_ALIGN )
< < KVM_CLEAR_LOG_SHIFT ;
end = mem - > memory_size / psize ;
if ( bmap_npages > end - bmap_start ) {
bmap_npages = end - bmap_start ;
}
start_delta / = psize ;
/*
* Prepare the bitmap to clear dirty bits . Here we must guarantee
* that we won ' t clear any unknown dirty bits otherwise we might
* accidentally clear some set bits which are not yet synced from
* the kernel into QEMU ' s bitmap , then we ' ll lose track of the
* guest modifications upon those pages ( which can directly lead
* to guest data loss or panic after migration ) .
*
* Layout of the KVMSlot . dirty_bmap :
*
* | < - - - - - - - - bmap_npages - - - - - - - - - - - . . > |
* [ 1 ]
* start_delta size
* | - - - - - - - - - - - - - - - - | - - - - - - - - - - - - - | - - - - - - - - - - - - - - - - - - | - - - - - - - - - - - - |
* ^ ^ ^ ^
* | | | |
* start bmap_start ( start ) end
* of memslot of memslot
*
* [ 1 ] bmap_npages can be aligned to either 64 pages or the end of slot
*/
assert ( bmap_start % BITS_PER_LONG = = 0 ) ;
/* We should never do log_clear before log_sync */
assert ( mem - > dirty_bmap ) ;
if ( start_delta ) {
/* Slow path - we need to manipulate a temp bitmap */
bmap_clear = bitmap_new ( bmap_npages ) ;
bitmap_copy_with_src_offset ( bmap_clear , mem - > dirty_bmap ,
bmap_start , start_delta + size / psize ) ;
/*
* We need to fill the holes at start because that was not
* specified by the caller and we extended the bitmap only for
* 64 pages alignment
*/
bitmap_clear ( bmap_clear , 0 , start_delta ) ;
d . dirty_bitmap = bmap_clear ;
} else {
/* Fast path - start address aligns well with BITS_PER_LONG */
d . dirty_bitmap = mem - > dirty_bmap + BIT_WORD ( bmap_start ) ;
}
d . first_page = bmap_start ;
/* It should never overflow. If it happens, say something */
assert ( bmap_npages < = UINT32_MAX ) ;
d . num_pages = bmap_npages ;
d . slot = mem - > slot | ( kml - > as_id < < 16 ) ;
if ( kvm_vm_ioctl ( s , KVM_CLEAR_DIRTY_LOG , & d ) = = - 1 ) {
ret = - errno ;
error_report ( " %s: KVM_CLEAR_DIRTY_LOG failed, slot=%d, "
" start=0x% " PRIx64 " , size=0x% " PRIx32 " , errno=%d " ,
__func__ , d . slot , ( uint64_t ) d . first_page ,
( uint32_t ) d . num_pages , ret ) ;
} else {
ret = 0 ;
trace_kvm_clear_dirty_log ( d . slot , d . first_page , d . num_pages ) ;
}
/*
* After we have updated the remote dirty bitmap , we update the
* cached bitmap as well for the memslot , then if another user
* clears the same region we know we shouldn ' t clear it again on
* the remote otherwise it ' s data loss as well .
*/
bitmap_clear ( mem - > dirty_bmap , bmap_start + start_delta ,
size / psize ) ;
/* This handles the NULL case well */
g_free ( bmap_clear ) ;
kvm_slots_unlock ( kml ) ;
return ret ;
}
static void kvm_coalesce_mmio_region ( MemoryListener * listener ,
MemoryRegionSection * secion ,
hwaddr start , hwaddr size )
@ -894,6 +1048,22 @@ static void kvm_log_sync(MemoryListener *listener,
}
}
static void kvm_log_clear ( MemoryListener * listener ,
MemoryRegionSection * section )
{
KVMMemoryListener * kml = container_of ( listener , KVMMemoryListener , listener ) ;
int r ;
r = kvm_physical_log_clear ( kml , section ) ;
if ( r < 0 ) {
error_report_once ( " %s: kvm log clear failed: mr=%s "
" offset=% " HWADDR_PRIx " size=% " PRIx64 , __func__ ,
section - > mr - > name , section - > offset_within_region ,
int128_get64 ( section - > size ) ) ;
abort ( ) ;
}
}
static void kvm_mem_ioeventfd_add ( MemoryListener * listener ,
MemoryRegionSection * section ,
bool match_data , uint64_t data ,
@ -985,6 +1155,7 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
kml - > listener . log_start = kvm_log_start ;
kml - > listener . log_stop = kvm_log_stop ;
kml - > listener . log_sync = kvm_log_sync ;
kml - > listener . log_clear = kvm_log_clear ;
kml - > listener . priority = 10 ;
memory_listener_register ( & kml - > listener , as ) ;
@ -1709,6 +1880,17 @@ static int kvm_init(MachineState *ms)
s - > coalesced_pio = s - > coalesced_mmio & &
kvm_check_extension ( s , KVM_CAP_COALESCED_PIO ) ;
s - > manual_dirty_log_protect =
kvm_check_extension ( s , KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 ) ;
if ( s - > manual_dirty_log_protect ) {
ret = kvm_vm_enable_cap ( s , KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 , 0 , 1 ) ;
if ( ret ) {
warn_report ( " Trying to enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 "
" but failed. Falling back to the legacy mode. " ) ;
s - > manual_dirty_log_protect = false ;
}
}
# ifdef KVM_CAP_VCPU_EVENTS
s - > vcpu_events = kvm_check_extension ( s , KVM_CAP_VCPU_EVENTS ) ;
# endif