@ -46,8 +46,10 @@
# include "qemu/osdep.h"
# include <poll.h>
# include "qapi/error.h"
# include "qemu/defer-call.h"
# include "qemu/rcu_queue.h"
# include "aio-posix.h"
# include "trace.h"
enum {
FDMON_IO_URING_ENTRIES = 128 , /* sq/cq ring size */
@ -76,8 +78,8 @@ static inline int pfd_events_from_poll(int poll_events)
}
/*
* Returns an sqe for submitting a request . Only be called within
* fdmon_io_uring_wait ( ) .
* Returns an sqe for submitting a request . Only called from the AioContext
* thread .
*/
static struct io_uring_sqe * get_sqe ( AioContext * ctx )
{
@ -168,23 +170,46 @@ static void fdmon_io_uring_update(AioContext *ctx,
}
}
static void fdmon_io_uring_add_sqe ( AioContext * ctx ,
void ( * prep_sqe ) ( struct io_uring_sqe * sqe , void * opaque ) ,
void * opaque , CqeHandler * cqe_handler )
{
struct io_uring_sqe * sqe = get_sqe ( ctx ) ;
prep_sqe ( sqe , opaque ) ;
io_uring_sqe_set_data ( sqe , cqe_handler ) ;
trace_fdmon_io_uring_add_sqe ( ctx , opaque , sqe - > opcode , sqe - > fd , sqe - > off ,
cqe_handler ) ;
}
static void fdmon_special_cqe_handler ( CqeHandler * cqe_handler )
{
/*
* This is an empty function that is never called . It is used as a function
* pointer to distinguish it from ordinary cqe handlers .
*/
}
static void add_poll_add_sqe ( AioContext * ctx , AioHandler * node )
{
struct io_uring_sqe * sqe = get_sqe ( ctx ) ;
int events = poll_events_from_pfd ( node - > pfd . events ) ;
io_uring_prep_poll_add ( sqe , node - > pfd . fd , events ) ;
io_uring_sqe_set_data ( sqe , node ) ;
node - > internal_cqe_handler . cb = fdmon_special_cqe_handler ;
io_uring_sqe_set_data ( sqe , & node - > internal_cqe_handler ) ;
}
static void add_poll_remove_sqe ( AioContext * ctx , AioHandler * node )
{
struct io_uring_sqe * sqe = get_sqe ( ctx ) ;
CqeHandler * cqe_handler = & node - > internal_cqe_handler ;
# ifdef LIBURING_HAVE_DATA64
io_uring_prep_poll_remove ( sqe , ( uintptr_t ) node ) ;
io_uring_prep_poll_remove ( sqe , ( uintptr_t ) cqe_ha ndl er ) ;
# else
io_uring_prep_poll_remove ( sqe , node ) ;
io_uring_prep_poll_remove ( sqe , cqe_ha ndl er ) ;
# endif
io_uring_sqe_set_data ( sqe , NULL ) ;
}
@ -219,19 +244,13 @@ static void fill_sq_ring(AioContext *ctx)
}
}
/* Returns true if a handler became ready */
static bool process_cqe ( AioContext * ctx ,
AioHandlerList * ready_list ,
struct io_uring_cqe * cqe )
static bool process_cqe_aio_handler ( AioContext * ctx ,
AioHandlerList * ready_list ,
AioHandler * node ,
struct io_uring_cqe * cqe )
{
AioHandler * node = io_uring_cqe_get_data ( cqe ) ;
unsigned flags ;
/* poll_timeout and poll_remove have a zero user_data field */
if ( ! node ) {
return false ;
}
/*
* Deletion can only happen when IORING_OP_POLL_ADD completes . If we race
* with enqueue ( ) here then we can safely clear the FDMON_IO_URING_REMOVE
@ -255,6 +274,35 @@ static bool process_cqe(AioContext *ctx,
return true ;
}
/* Returns true if a handler became ready */
static bool process_cqe ( AioContext * ctx ,
AioHandlerList * ready_list ,
struct io_uring_cqe * cqe )
{
CqeHandler * cqe_handler = io_uring_cqe_get_data ( cqe ) ;
/* poll_timeout and poll_remove have a zero user_data field */
if ( ! cqe_handler ) {
return false ;
}
/*
* Special handling for AioHandler cqes . They need ready_list and have a
* return value .
*/
if ( cqe_handler - > cb = = fdmon_special_cqe_handler ) {
AioHandler * node = container_of ( cqe_handler , AioHandler ,
internal_cqe_handler ) ;
return process_cqe_aio_handler ( ctx , ready_list , node , cqe ) ;
}
cqe_handler - > cqe = * cqe ;
/* Handlers are invoked later by fdmon_io_uring_dispatch() */
QSIMPLEQ_INSERT_TAIL ( & ctx - > cqe_handler_ready_list , cqe_handler , next ) ;
return false ;
}
static int process_cq_ring ( AioContext * ctx , AioHandlerList * ready_list )
{
struct io_uring * ring = & ctx - > fdmon_io_uring ;
@ -299,6 +347,32 @@ static bool fdmon_io_uring_gsource_check(AioContext *ctx)
return g_source_query_unix_fd ( & ctx - > source , tag ) & G_IO_IN ;
}
/* Dispatch CQE handlers that are ready */
static bool fdmon_io_uring_dispatch ( AioContext * ctx )
{
CqeHandlerSimpleQ * ready_list = & ctx - > cqe_handler_ready_list ;
bool progress = false ;
/* Handlers may use defer_call() to coalesce frequent operations */
defer_call_begin ( ) ;
while ( ! QSIMPLEQ_EMPTY ( ready_list ) ) {
CqeHandler * cqe_handler = QSIMPLEQ_FIRST ( ready_list ) ;
QSIMPLEQ_REMOVE_HEAD ( ready_list , next ) ;
trace_fdmon_io_uring_cqe_handler ( ctx , cqe_handler ,
cqe_handler - > cqe . res ) ;
cqe_handler - > cb ( cqe_handler ) ;
progress = true ;
}
defer_call_end ( ) ;
return progress ;
}
/* This is where CQEs are processed in the glib event loop */
static void fdmon_io_uring_gsource_dispatch ( AioContext * ctx ,
AioHandlerList * ready_list )
@ -371,9 +445,11 @@ static const FDMonOps fdmon_io_uring_ops = {
. update = fdmon_io_uring_update ,
. wait = fdmon_io_uring_wait ,
. need_wait = fdmon_io_uring_need_wait ,
. dispatch = fdmon_io_uring_dispatch ,
. gsource_prepare = fdmon_io_uring_gsource_prepare ,
. gsource_check = fdmon_io_uring_gsource_check ,
. gsource_dispatch = fdmon_io_uring_gsource_dispatch ,
. add_sqe = fdmon_io_uring_add_sqe ,
} ;
bool fdmon_io_uring_setup ( AioContext * ctx , Error * * errp )
@ -389,6 +465,7 @@ bool fdmon_io_uring_setup(AioContext *ctx, Error **errp)
}
QSLIST_INIT ( & ctx - > submit_list ) ;
QSIMPLEQ_INIT ( & ctx - > cqe_handler_ready_list ) ;
ctx - > fdmon_ops = & fdmon_io_uring_ops ;
ctx - > io_uring_fd_tag = g_source_add_unix_fd ( & ctx - > source ,
ctx - > fdmon_io_uring . ring_fd , G_IO_IN ) ;
@ -425,6 +502,8 @@ void fdmon_io_uring_destroy(AioContext *ctx)
g_source_remove_unix_fd ( & ctx - > source , ctx - > io_uring_fd_tag ) ;
ctx - > io_uring_fd_tag = NULL ;
assert ( QSIMPLEQ_EMPTY ( & ctx - > cqe_handler_ready_list ) ) ;
qemu_lockcnt_lock ( & ctx - > list_lock ) ;
fdmon_poll_downgrade ( ctx ) ;
qemu_lockcnt_unlock ( & ctx - > list_lock ) ;