Browse Source
libblkio (https://gitlab.com/libblkio/libblkio/) is a library for high-performance disk I/O. It currently supports io_uring, virtio-blk-vhost-user, and virtio-blk-vhost-vdpa with additional drivers under development. One of the reasons for developing libblkio is that other applications besides QEMU can use it. This will be particularly useful for virtio-blk-vhost-user which applications may wish to use for connecting to qemu-storage-daemon. libblkio also gives us an opportunity to develop in Rust behind a C API that is easy to consume from QEMU. This commit adds io_uring, nvme-io_uring, virtio-blk-vhost-user, and virtio-blk-vhost-vdpa BlockDrivers to QEMU using libblkio. It will be easy to add other libblkio drivers since they will share the majority of code. For now I/O buffers are copied through bounce buffers if the libblkio driver requires it. Later commits add an optimization for pre-registering guest RAM to avoid bounce buffers. The syntax is: --blockdev io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off --blockdev nvme-io_uring,node-name=drive0,filename=/dev/ng0n1,readonly=on|off,cache.direct=on --blockdev virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off,cache.direct=on --blockdev virtio-blk-vhost-user,node-name=drive0,path=vhost-user-blk.sock,readonly=on|off,cache.direct=on Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> Message-id: 20221013185908.1297568-3-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>pull/225/head
8 changed files with 928 additions and 4 deletions
@ -0,0 +1,831 @@ |
|||
/* SPDX-License-Identifier: LGPL-2.1-or-later */ |
|||
/*
|
|||
* libblkio BlockDriver |
|||
* |
|||
* Copyright Red Hat, Inc. |
|||
* |
|||
* Author: |
|||
* Stefan Hajnoczi <stefanha@redhat.com> |
|||
*/ |
|||
|
|||
#include "qemu/osdep.h" |
|||
#include <blkio.h> |
|||
#include "block/block_int.h" |
|||
#include "qapi/error.h" |
|||
#include "qapi/qmp/qdict.h" |
|||
#include "qemu/module.h" |
|||
|
|||
/*
|
|||
* Keep the QEMU BlockDriver names identical to the libblkio driver names. |
|||
* Using macros instead of typing out the string literals avoids typos. |
|||
*/ |
|||
#define DRIVER_IO_URING "io_uring" |
|||
#define DRIVER_NVME_IO_URING "nvme-io_uring" |
|||
#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user" |
|||
#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa" |
|||
|
|||
/*
|
|||
* Allocated bounce buffers are kept in a list sorted by buffer address. |
|||
*/ |
|||
typedef struct BlkioBounceBuf { |
|||
QLIST_ENTRY(BlkioBounceBuf) next; |
|||
|
|||
/* The bounce buffer */ |
|||
struct iovec buf; |
|||
} BlkioBounceBuf; |
|||
|
|||
typedef struct { |
|||
/*
|
|||
* libblkio is not thread-safe so this lock protects ->blkio and |
|||
* ->blkioq. |
|||
*/ |
|||
QemuMutex blkio_lock; |
|||
struct blkio *blkio; |
|||
struct blkioq *blkioq; /* make this multi-queue in the future... */ |
|||
int completion_fd; |
|||
|
|||
/*
|
|||
* Polling fetches the next completion into this field. |
|||
* |
|||
* No lock is necessary since only one thread calls aio_poll() and invokes |
|||
* fd and poll handlers. |
|||
*/ |
|||
struct blkio_completion poll_completion; |
|||
|
|||
/*
|
|||
* Protects ->bounce_pool, ->bounce_bufs, ->bounce_available. |
|||
* |
|||
* Lock ordering: ->bounce_lock before ->blkio_lock. |
|||
*/ |
|||
CoMutex bounce_lock; |
|||
|
|||
/* Bounce buffer pool */ |
|||
struct blkio_mem_region bounce_pool; |
|||
|
|||
/* Sorted list of allocated bounce buffers */ |
|||
QLIST_HEAD(, BlkioBounceBuf) bounce_bufs; |
|||
|
|||
/* Queue for coroutines waiting for bounce buffer space */ |
|||
CoQueue bounce_available; |
|||
|
|||
/* The value of the "mem-region-alignment" property */ |
|||
size_t mem_region_alignment; |
|||
|
|||
/* Can we skip adding/deleting blkio_mem_regions? */ |
|||
bool needs_mem_regions; |
|||
} BDRVBlkioState; |
|||
|
|||
/* Called with s->bounce_lock held */ |
|||
static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes) |
|||
{ |
|||
/* There can be no allocated bounce buffers during resize */ |
|||
assert(QLIST_EMPTY(&s->bounce_bufs)); |
|||
|
|||
/* Pad size to reduce frequency of resize calls */ |
|||
bytes += 128 * 1024; |
|||
|
|||
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
|||
int ret; |
|||
|
|||
if (s->bounce_pool.addr) { |
|||
blkio_unmap_mem_region(s->blkio, &s->bounce_pool); |
|||
blkio_free_mem_region(s->blkio, &s->bounce_pool); |
|||
memset(&s->bounce_pool, 0, sizeof(s->bounce_pool)); |
|||
} |
|||
|
|||
/* Automatically freed when s->blkio is destroyed */ |
|||
ret = blkio_alloc_mem_region(s->blkio, &s->bounce_pool, bytes); |
|||
if (ret < 0) { |
|||
return ret; |
|||
} |
|||
|
|||
ret = blkio_map_mem_region(s->blkio, &s->bounce_pool); |
|||
if (ret < 0) { |
|||
blkio_free_mem_region(s->blkio, &s->bounce_pool); |
|||
memset(&s->bounce_pool, 0, sizeof(s->bounce_pool)); |
|||
return ret; |
|||
} |
|||
} |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
/* Called with s->bounce_lock held */ |
|||
static bool |
|||
blkio_do_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce, |
|||
int64_t bytes) |
|||
{ |
|||
void *addr = s->bounce_pool.addr; |
|||
BlkioBounceBuf *cur = NULL; |
|||
BlkioBounceBuf *prev = NULL; |
|||
ptrdiff_t space; |
|||
|
|||
/*
|
|||
* This is just a linear search over the holes between requests. An |
|||
* efficient allocator would be nice. |
|||
*/ |
|||
QLIST_FOREACH(cur, &s->bounce_bufs, next) { |
|||
space = cur->buf.iov_base - addr; |
|||
if (bytes <= space) { |
|||
QLIST_INSERT_BEFORE(cur, bounce, next); |
|||
bounce->buf.iov_base = addr; |
|||
bounce->buf.iov_len = bytes; |
|||
return true; |
|||
} |
|||
|
|||
addr = cur->buf.iov_base + cur->buf.iov_len; |
|||
prev = cur; |
|||
} |
|||
|
|||
/* Is there space after the last request? */ |
|||
space = s->bounce_pool.addr + s->bounce_pool.len - addr; |
|||
if (bytes > space) { |
|||
return false; |
|||
} |
|||
if (prev) { |
|||
QLIST_INSERT_AFTER(prev, bounce, next); |
|||
} else { |
|||
QLIST_INSERT_HEAD(&s->bounce_bufs, bounce, next); |
|||
} |
|||
bounce->buf.iov_base = addr; |
|||
bounce->buf.iov_len = bytes; |
|||
return true; |
|||
} |
|||
|
|||
static int coroutine_fn |
|||
blkio_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce, |
|||
int64_t bytes) |
|||
{ |
|||
/*
|
|||
* Ensure fairness: first time around we join the back of the queue, |
|||
* subsequently we join the front so we don't lose our place. |
|||
*/ |
|||
CoQueueWaitFlags wait_flags = 0; |
|||
|
|||
QEMU_LOCK_GUARD(&s->bounce_lock); |
|||
|
|||
/* Ensure fairness: don't even try if other requests are already waiting */ |
|||
if (!qemu_co_queue_empty(&s->bounce_available)) { |
|||
qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock, |
|||
wait_flags); |
|||
wait_flags = CO_QUEUE_WAIT_FRONT; |
|||
} |
|||
|
|||
while (true) { |
|||
if (blkio_do_alloc_bounce_buffer(s, bounce, bytes)) { |
|||
/* Kick the next queued request since there may be space */ |
|||
qemu_co_queue_next(&s->bounce_available); |
|||
return 0; |
|||
} |
|||
|
|||
/*
|
|||
* If there are no in-flight requests then the pool was simply too |
|||
* small. |
|||
*/ |
|||
if (QLIST_EMPTY(&s->bounce_bufs)) { |
|||
bool ok; |
|||
int ret; |
|||
|
|||
ret = blkio_resize_bounce_pool(s, bytes); |
|||
if (ret < 0) { |
|||
/* Kick the next queued request since that may fail too */ |
|||
qemu_co_queue_next(&s->bounce_available); |
|||
return ret; |
|||
} |
|||
|
|||
ok = blkio_do_alloc_bounce_buffer(s, bounce, bytes); |
|||
assert(ok); /* must have space this time */ |
|||
return 0; |
|||
} |
|||
|
|||
qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock, |
|||
wait_flags); |
|||
wait_flags = CO_QUEUE_WAIT_FRONT; |
|||
} |
|||
} |
|||
|
|||
static void coroutine_fn blkio_free_bounce_buffer(BDRVBlkioState *s, |
|||
BlkioBounceBuf *bounce) |
|||
{ |
|||
QEMU_LOCK_GUARD(&s->bounce_lock); |
|||
|
|||
QLIST_REMOVE(bounce, next); |
|||
|
|||
/* Wake up waiting coroutines since space may now be available */ |
|||
qemu_co_queue_next(&s->bounce_available); |
|||
} |
|||
|
|||
/* For async to .bdrv_co_*() conversion */ |
|||
typedef struct { |
|||
Coroutine *coroutine; |
|||
int ret; |
|||
} BlkioCoData; |
|||
|
|||
static void blkio_completion_fd_read(void *opaque) |
|||
{ |
|||
BlockDriverState *bs = opaque; |
|||
BDRVBlkioState *s = bs->opaque; |
|||
uint64_t val; |
|||
int ret; |
|||
|
|||
/* Polling may have already fetched a completion */ |
|||
if (s->poll_completion.user_data != NULL) { |
|||
BlkioCoData *cod = s->poll_completion.user_data; |
|||
cod->ret = s->poll_completion.ret; |
|||
|
|||
/* Clear it in case aio_co_wake() enters a nested event loop */ |
|||
s->poll_completion.user_data = NULL; |
|||
|
|||
aio_co_wake(cod->coroutine); |
|||
} |
|||
|
|||
/* Reset completion fd status */ |
|||
ret = read(s->completion_fd, &val, sizeof(val)); |
|||
|
|||
/* Ignore errors, there's nothing we can do */ |
|||
(void)ret; |
|||
|
|||
/*
|
|||
* Reading one completion at a time makes nested event loop re-entrancy |
|||
* simple. Change this loop to get multiple completions in one go if it |
|||
* becomes a performance bottleneck. |
|||
*/ |
|||
while (true) { |
|||
struct blkio_completion completion; |
|||
|
|||
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
|||
ret = blkioq_do_io(s->blkioq, &completion, 0, 1, NULL); |
|||
} |
|||
if (ret != 1) { |
|||
break; |
|||
} |
|||
|
|||
BlkioCoData *cod = completion.user_data; |
|||
cod->ret = completion.ret; |
|||
aio_co_wake(cod->coroutine); |
|||
} |
|||
} |
|||
|
|||
static bool blkio_completion_fd_poll(void *opaque) |
|||
{ |
|||
BlockDriverState *bs = opaque; |
|||
BDRVBlkioState *s = bs->opaque; |
|||
int ret; |
|||
|
|||
/* Just in case we already fetched a completion */ |
|||
if (s->poll_completion.user_data != NULL) { |
|||
return true; |
|||
} |
|||
|
|||
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
|||
ret = blkioq_do_io(s->blkioq, &s->poll_completion, 0, 1, NULL); |
|||
} |
|||
return ret == 1; |
|||
} |
|||
|
|||
static void blkio_completion_fd_poll_ready(void *opaque) |
|||
{ |
|||
blkio_completion_fd_read(opaque); |
|||
} |
|||
|
|||
static void blkio_attach_aio_context(BlockDriverState *bs, |
|||
AioContext *new_context) |
|||
{ |
|||
BDRVBlkioState *s = bs->opaque; |
|||
|
|||
aio_set_fd_handler(new_context, |
|||
s->completion_fd, |
|||
false, |
|||
blkio_completion_fd_read, |
|||
NULL, |
|||
blkio_completion_fd_poll, |
|||
blkio_completion_fd_poll_ready, |
|||
bs); |
|||
} |
|||
|
|||
static void blkio_detach_aio_context(BlockDriverState *bs) |
|||
{ |
|||
BDRVBlkioState *s = bs->opaque; |
|||
|
|||
aio_set_fd_handler(bdrv_get_aio_context(bs), |
|||
s->completion_fd, |
|||
false, NULL, NULL, NULL, NULL, NULL); |
|||
} |
|||
|
|||
/* Call with s->blkio_lock held to submit I/O after enqueuing a new request */ |
|||
static void blkio_submit_io(BlockDriverState *bs) |
|||
{ |
|||
if (qatomic_read(&bs->io_plugged) == 0) { |
|||
BDRVBlkioState *s = bs->opaque; |
|||
|
|||
blkioq_do_io(s->blkioq, NULL, 0, 0, NULL); |
|||
} |
|||
} |
|||
|
|||
static int coroutine_fn |
|||
blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) |
|||
{ |
|||
BDRVBlkioState *s = bs->opaque; |
|||
BlkioCoData cod = { |
|||
.coroutine = qemu_coroutine_self(), |
|||
}; |
|||
|
|||
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
|||
blkioq_discard(s->blkioq, offset, bytes, &cod, 0); |
|||
blkio_submit_io(bs); |
|||
} |
|||
|
|||
qemu_coroutine_yield(); |
|||
return cod.ret; |
|||
} |
|||
|
|||
static int coroutine_fn |
|||
blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, |
|||
QEMUIOVector *qiov, BdrvRequestFlags flags) |
|||
{ |
|||
BlkioCoData cod = { |
|||
.coroutine = qemu_coroutine_self(), |
|||
}; |
|||
BDRVBlkioState *s = bs->opaque; |
|||
bool use_bounce_buffer = s->needs_mem_regions; |
|||
BlkioBounceBuf bounce; |
|||
struct iovec *iov = qiov->iov; |
|||
int iovcnt = qiov->niov; |
|||
|
|||
if (use_bounce_buffer) { |
|||
int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes); |
|||
if (ret < 0) { |
|||
return ret; |
|||
} |
|||
|
|||
iov = &bounce.buf; |
|||
iovcnt = 1; |
|||
} |
|||
|
|||
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
|||
blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0); |
|||
blkio_submit_io(bs); |
|||
} |
|||
|
|||
qemu_coroutine_yield(); |
|||
|
|||
if (use_bounce_buffer) { |
|||
if (cod.ret == 0) { |
|||
qemu_iovec_from_buf(qiov, 0, |
|||
bounce.buf.iov_base, |
|||
bounce.buf.iov_len); |
|||
} |
|||
|
|||
blkio_free_bounce_buffer(s, &bounce); |
|||
} |
|||
|
|||
return cod.ret; |
|||
} |
|||
|
|||
static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset, |
|||
int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) |
|||
{ |
|||
uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0; |
|||
BlkioCoData cod = { |
|||
.coroutine = qemu_coroutine_self(), |
|||
}; |
|||
BDRVBlkioState *s = bs->opaque; |
|||
bool use_bounce_buffer = s->needs_mem_regions; |
|||
BlkioBounceBuf bounce; |
|||
struct iovec *iov = qiov->iov; |
|||
int iovcnt = qiov->niov; |
|||
|
|||
if (use_bounce_buffer) { |
|||
int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes); |
|||
if (ret < 0) { |
|||
return ret; |
|||
} |
|||
|
|||
qemu_iovec_to_buf(qiov, 0, bounce.buf.iov_base, bytes); |
|||
iov = &bounce.buf; |
|||
iovcnt = 1; |
|||
} |
|||
|
|||
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
|||
blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags); |
|||
blkio_submit_io(bs); |
|||
} |
|||
|
|||
qemu_coroutine_yield(); |
|||
|
|||
if (use_bounce_buffer) { |
|||
blkio_free_bounce_buffer(s, &bounce); |
|||
} |
|||
|
|||
return cod.ret; |
|||
} |
|||
|
|||
static int coroutine_fn blkio_co_flush(BlockDriverState *bs) |
|||
{ |
|||
BDRVBlkioState *s = bs->opaque; |
|||
BlkioCoData cod = { |
|||
.coroutine = qemu_coroutine_self(), |
|||
}; |
|||
|
|||
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
|||
blkioq_flush(s->blkioq, &cod, 0); |
|||
blkio_submit_io(bs); |
|||
} |
|||
|
|||
qemu_coroutine_yield(); |
|||
return cod.ret; |
|||
} |
|||
|
|||
static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs, |
|||
int64_t offset, int64_t bytes, BdrvRequestFlags flags) |
|||
{ |
|||
BDRVBlkioState *s = bs->opaque; |
|||
BlkioCoData cod = { |
|||
.coroutine = qemu_coroutine_self(), |
|||
}; |
|||
uint32_t blkio_flags = 0; |
|||
|
|||
if (flags & BDRV_REQ_FUA) { |
|||
blkio_flags |= BLKIO_REQ_FUA; |
|||
} |
|||
if (!(flags & BDRV_REQ_MAY_UNMAP)) { |
|||
blkio_flags |= BLKIO_REQ_NO_UNMAP; |
|||
} |
|||
if (flags & BDRV_REQ_NO_FALLBACK) { |
|||
blkio_flags |= BLKIO_REQ_NO_FALLBACK; |
|||
} |
|||
|
|||
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
|||
blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags); |
|||
blkio_submit_io(bs); |
|||
} |
|||
|
|||
qemu_coroutine_yield(); |
|||
return cod.ret; |
|||
} |
|||
|
|||
static void blkio_io_unplug(BlockDriverState *bs) |
|||
{ |
|||
BDRVBlkioState *s = bs->opaque; |
|||
|
|||
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
|||
blkio_submit_io(bs); |
|||
} |
|||
} |
|||
|
|||
static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, |
|||
Error **errp) |
|||
{ |
|||
const char *filename = qdict_get_str(options, "filename"); |
|||
BDRVBlkioState *s = bs->opaque; |
|||
int ret; |
|||
|
|||
ret = blkio_set_str(s->blkio, "path", filename); |
|||
qdict_del(options, "filename"); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "failed to set path: %s", |
|||
blkio_get_error_msg()); |
|||
return ret; |
|||
} |
|||
|
|||
if (flags & BDRV_O_NOCACHE) { |
|||
ret = blkio_set_bool(s->blkio, "direct", true); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "failed to set direct: %s", |
|||
blkio_get_error_msg()); |
|||
return ret; |
|||
} |
|||
} |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, |
|||
Error **errp) |
|||
{ |
|||
const char *filename = qdict_get_str(options, "filename"); |
|||
BDRVBlkioState *s = bs->opaque; |
|||
int ret; |
|||
|
|||
ret = blkio_set_str(s->blkio, "path", filename); |
|||
qdict_del(options, "filename"); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "failed to set path: %s", |
|||
blkio_get_error_msg()); |
|||
return ret; |
|||
} |
|||
|
|||
if (!(flags & BDRV_O_NOCACHE)) { |
|||
error_setg(errp, "cache.direct=off is not supported"); |
|||
return -EINVAL; |
|||
} |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
static int blkio_virtio_blk_common_open(BlockDriverState *bs, |
|||
QDict *options, int flags, Error **errp) |
|||
{ |
|||
const char *path = qdict_get_try_str(options, "path"); |
|||
BDRVBlkioState *s = bs->opaque; |
|||
int ret; |
|||
|
|||
if (!path) { |
|||
error_setg(errp, "missing 'path' option"); |
|||
return -EINVAL; |
|||
} |
|||
|
|||
ret = blkio_set_str(s->blkio, "path", path); |
|||
qdict_del(options, "path"); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "failed to set path: %s", |
|||
blkio_get_error_msg()); |
|||
return ret; |
|||
} |
|||
|
|||
if (!(flags & BDRV_O_NOCACHE)) { |
|||
error_setg(errp, "cache.direct=off is not supported"); |
|||
return -EINVAL; |
|||
} |
|||
return 0; |
|||
} |
|||
|
|||
static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, |
|||
Error **errp) |
|||
{ |
|||
const char *blkio_driver = bs->drv->protocol_name; |
|||
BDRVBlkioState *s = bs->opaque; |
|||
int ret; |
|||
|
|||
ret = blkio_create(blkio_driver, &s->blkio); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "blkio_create failed: %s", |
|||
blkio_get_error_msg()); |
|||
return ret; |
|||
} |
|||
|
|||
if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) { |
|||
ret = blkio_io_uring_open(bs, options, flags, errp); |
|||
} else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) { |
|||
ret = blkio_nvme_io_uring(bs, options, flags, errp); |
|||
} else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) { |
|||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp); |
|||
} else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) { |
|||
ret = blkio_virtio_blk_common_open(bs, options, flags, errp); |
|||
} else { |
|||
g_assert_not_reached(); |
|||
} |
|||
if (ret < 0) { |
|||
blkio_destroy(&s->blkio); |
|||
return ret; |
|||
} |
|||
|
|||
if (!(flags & BDRV_O_RDWR)) { |
|||
ret = blkio_set_bool(s->blkio, "read-only", true); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "failed to set read-only: %s", |
|||
blkio_get_error_msg()); |
|||
blkio_destroy(&s->blkio); |
|||
return ret; |
|||
} |
|||
} |
|||
|
|||
ret = blkio_connect(s->blkio); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "blkio_connect failed: %s", |
|||
blkio_get_error_msg()); |
|||
blkio_destroy(&s->blkio); |
|||
return ret; |
|||
} |
|||
|
|||
ret = blkio_get_bool(s->blkio, |
|||
"needs-mem-regions", |
|||
&s->needs_mem_regions); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, |
|||
"failed to get needs-mem-regions: %s", |
|||
blkio_get_error_msg()); |
|||
blkio_destroy(&s->blkio); |
|||
return ret; |
|||
} |
|||
|
|||
ret = blkio_get_uint64(s->blkio, |
|||
"mem-region-alignment", |
|||
&s->mem_region_alignment); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, |
|||
"failed to get mem-region-alignment: %s", |
|||
blkio_get_error_msg()); |
|||
blkio_destroy(&s->blkio); |
|||
return ret; |
|||
} |
|||
|
|||
ret = blkio_start(s->blkio); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "blkio_start failed: %s", |
|||
blkio_get_error_msg()); |
|||
blkio_destroy(&s->blkio); |
|||
return ret; |
|||
} |
|||
|
|||
bs->supported_write_flags = BDRV_REQ_FUA; |
|||
bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | |
|||
BDRV_REQ_NO_FALLBACK; |
|||
|
|||
qemu_mutex_init(&s->blkio_lock); |
|||
qemu_co_mutex_init(&s->bounce_lock); |
|||
qemu_co_queue_init(&s->bounce_available); |
|||
QLIST_INIT(&s->bounce_bufs); |
|||
s->blkioq = blkio_get_queue(s->blkio, 0); |
|||
s->completion_fd = blkioq_get_completion_fd(s->blkioq); |
|||
|
|||
blkio_attach_aio_context(bs, bdrv_get_aio_context(bs)); |
|||
return 0; |
|||
} |
|||
|
|||
static void blkio_close(BlockDriverState *bs) |
|||
{ |
|||
BDRVBlkioState *s = bs->opaque; |
|||
|
|||
/* There is no destroy() API for s->bounce_lock */ |
|||
|
|||
qemu_mutex_destroy(&s->blkio_lock); |
|||
blkio_detach_aio_context(bs); |
|||
blkio_destroy(&s->blkio); |
|||
} |
|||
|
|||
static int64_t blkio_getlength(BlockDriverState *bs) |
|||
{ |
|||
BDRVBlkioState *s = bs->opaque; |
|||
uint64_t capacity; |
|||
int ret; |
|||
|
|||
WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { |
|||
ret = blkio_get_uint64(s->blkio, "capacity", &capacity); |
|||
} |
|||
if (ret < 0) { |
|||
return -ret; |
|||
} |
|||
|
|||
return capacity; |
|||
} |
|||
|
|||
static int blkio_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) |
|||
{ |
|||
return 0; |
|||
} |
|||
|
|||
static void blkio_refresh_limits(BlockDriverState *bs, Error **errp) |
|||
{ |
|||
BDRVBlkioState *s = bs->opaque; |
|||
QEMU_LOCK_GUARD(&s->blkio_lock); |
|||
int value; |
|||
int ret; |
|||
|
|||
ret = blkio_get_int(s->blkio, "request-alignment", &value); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "failed to get \"request-alignment\": %s", |
|||
blkio_get_error_msg()); |
|||
return; |
|||
} |
|||
bs->bl.request_alignment = value; |
|||
if (bs->bl.request_alignment < 1 || |
|||
bs->bl.request_alignment >= INT_MAX || |
|||
!is_power_of_2(bs->bl.request_alignment)) { |
|||
error_setg(errp, "invalid \"request-alignment\" value %" PRIu32 ", " |
|||
"must be a power of 2 less than INT_MAX", |
|||
bs->bl.request_alignment); |
|||
return; |
|||
} |
|||
|
|||
ret = blkio_get_int(s->blkio, "optimal-io-size", &value); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "failed to get \"optimal-io-size\": %s", |
|||
blkio_get_error_msg()); |
|||
return; |
|||
} |
|||
bs->bl.opt_transfer = value; |
|||
if (bs->bl.opt_transfer > INT_MAX || |
|||
(bs->bl.opt_transfer % bs->bl.request_alignment)) { |
|||
error_setg(errp, "invalid \"optimal-io-size\" value %" PRIu32 ", must " |
|||
"be a multiple of %" PRIu32, bs->bl.opt_transfer, |
|||
bs->bl.request_alignment); |
|||
return; |
|||
} |
|||
|
|||
ret = blkio_get_int(s->blkio, "max-transfer", &value); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "failed to get \"max-transfer\": %s", |
|||
blkio_get_error_msg()); |
|||
return; |
|||
} |
|||
bs->bl.max_transfer = value; |
|||
if ((bs->bl.max_transfer % bs->bl.request_alignment) || |
|||
(bs->bl.opt_transfer && (bs->bl.max_transfer % bs->bl.opt_transfer))) { |
|||
error_setg(errp, "invalid \"max-transfer\" value %" PRIu32 ", must be " |
|||
"a multiple of %" PRIu32 " and %" PRIu32 " (if non-zero)", |
|||
bs->bl.max_transfer, bs->bl.request_alignment, |
|||
bs->bl.opt_transfer); |
|||
return; |
|||
} |
|||
|
|||
ret = blkio_get_int(s->blkio, "buf-alignment", &value); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "failed to get \"buf-alignment\": %s", |
|||
blkio_get_error_msg()); |
|||
return; |
|||
} |
|||
if (value < 1) { |
|||
error_setg(errp, "invalid \"buf-alignment\" value %d, must be " |
|||
"positive", value); |
|||
return; |
|||
} |
|||
bs->bl.min_mem_alignment = value; |
|||
|
|||
ret = blkio_get_int(s->blkio, "optimal-buf-alignment", &value); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, |
|||
"failed to get \"optimal-buf-alignment\": %s", |
|||
blkio_get_error_msg()); |
|||
return; |
|||
} |
|||
if (value < 1) { |
|||
error_setg(errp, "invalid \"optimal-buf-alignment\" value %d, " |
|||
"must be positive", value); |
|||
return; |
|||
} |
|||
bs->bl.opt_mem_alignment = value; |
|||
|
|||
ret = blkio_get_int(s->blkio, "max-segments", &value); |
|||
if (ret < 0) { |
|||
error_setg_errno(errp, -ret, "failed to get \"max-segments\": %s", |
|||
blkio_get_error_msg()); |
|||
return; |
|||
} |
|||
if (value < 1) { |
|||
error_setg(errp, "invalid \"max-segments\" value %d, must be positive", |
|||
value); |
|||
return; |
|||
} |
|||
bs->bl.max_iov = value; |
|||
} |
|||
|
|||
/*
|
|||
* TODO |
|||
* Missing libblkio APIs: |
|||
* - block_status |
|||
* - co_invalidate_cache |
|||
* |
|||
* Out of scope? |
|||
* - create |
|||
* - truncate |
|||
*/ |
|||
|
|||
#define BLKIO_DRIVER(name, ...) \ |
|||
{ \ |
|||
.format_name = name, \ |
|||
.protocol_name = name, \ |
|||
.instance_size = sizeof(BDRVBlkioState), \ |
|||
.bdrv_file_open = blkio_file_open, \ |
|||
.bdrv_close = blkio_close, \ |
|||
.bdrv_getlength = blkio_getlength, \ |
|||
.bdrv_get_info = blkio_get_info, \ |
|||
.bdrv_attach_aio_context = blkio_attach_aio_context, \ |
|||
.bdrv_detach_aio_context = blkio_detach_aio_context, \ |
|||
.bdrv_co_pdiscard = blkio_co_pdiscard, \ |
|||
.bdrv_co_preadv = blkio_co_preadv, \ |
|||
.bdrv_co_pwritev = blkio_co_pwritev, \ |
|||
.bdrv_co_flush_to_disk = blkio_co_flush, \ |
|||
.bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ |
|||
.bdrv_io_unplug = blkio_io_unplug, \ |
|||
.bdrv_refresh_limits = blkio_refresh_limits, \ |
|||
__VA_ARGS__ \ |
|||
} |
|||
|
|||
static BlockDriver bdrv_io_uring = BLKIO_DRIVER( |
|||
DRIVER_IO_URING, |
|||
.bdrv_needs_filename = true, |
|||
); |
|||
|
|||
static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER( |
|||
DRIVER_NVME_IO_URING, |
|||
.bdrv_needs_filename = true, |
|||
); |
|||
|
|||
static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER( |
|||
DRIVER_VIRTIO_BLK_VHOST_USER |
|||
); |
|||
|
|||
static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER( |
|||
DRIVER_VIRTIO_BLK_VHOST_VDPA |
|||
); |
|||
|
|||
static void bdrv_blkio_init(void) |
|||
{ |
|||
bdrv_register(&bdrv_io_uring); |
|||
bdrv_register(&bdrv_nvme_io_uring); |
|||
bdrv_register(&bdrv_virtio_blk_vhost_user); |
|||
bdrv_register(&bdrv_virtio_blk_vhost_vdpa); |
|||
} |
|||
|
|||
block_init(bdrv_blkio_init); |
|||
Loading…
Reference in new issue