Browse Source

block: move drain outside of bdrv_root_attach_child()

This is part of resolving the deadlock mentioned in commit "block:
move draining out of bdrv_change_aio_context() and mark GRAPH_RDLOCK".

The function bdrv_root_attach_child() runs under the graph lock, so it
is not allowed to drain. It is called by:
1. blk_insert_bs(), where a drained section is introduced.
2. block_job_add_bdrv(), which holds the graph lock itself.

block_job_add_bdrv() is called by:
1. mirror_start_job()
2. stream_start()
3. commit_start()
4. backup_job_create()
5. block_job_create()
6. In the test_blockjob_common_drain_node() unit test

In all callers, a drained section is introduced.

Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20250530151125.955508-13-f.ebner@proxmox.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
pull/287/head
Fiona Ebner 10 months ago
committed by Kevin Wolf
parent
commit
ffdcd081f5
  1. 4
      block.c
  2. 2
      block/backup.c
  3. 2
      block/block-backend.c
  4. 4
      block/commit.c
  5. 5
      block/mirror.c
  6. 4
      block/stream.c
  7. 4
      blockjob.c
  8. 2
      include/block/blockjob.h
  9. 2
      tests/unit/test-bdrv-drain.c

4
block.c

@ -3228,6 +3228,8 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs,
*
* On failure NULL is returned, errp is set and the reference to
* child_bs is also dropped.
*
* All block nodes must be drained.
*/
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
@ -3242,7 +3244,6 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
GLOBAL_STATE_CODE();
bdrv_drain_all_begin();
child = bdrv_attach_child_common(child_bs, child_name, child_class,
child_role, perm, shared_perm, opaque,
tran, errp);
@ -3255,7 +3256,6 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
out:
tran_finalize(tran, ret);
bdrv_drain_all_end();
bdrv_schedule_unref(child_bs);

2
block/backup.c

@ -498,10 +498,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
block_copy_set_speed(bcs, speed);
/* Required permissions are taken by copy-before-write filter target */
bdrv_drain_all_begin();
bdrv_graph_wrlock();
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
return &job->common;

2
block/block-backend.c

@ -904,6 +904,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
GLOBAL_STATE_CODE();
bdrv_ref(bs);
bdrv_drain_all_begin();
bdrv_graph_wrlock();
if ((bs->open_flags & BDRV_O_INACTIVE) && blk_can_inactivate(blk)) {
@ -919,6 +920,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
perm, shared_perm, blk, errp);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
if (blk->root == NULL) {
return -EPERM;
}

4
block/commit.c

@ -392,6 +392,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
* this is the responsibility of the interface (i.e. whoever calls
* commit_start()).
*/
bdrv_drain_all_begin();
bdrv_graph_wrlock();
s->base_overlay = bdrv_find_overlay(top, base);
assert(s->base_overlay);
@ -424,18 +425,21 @@ void commit_start(const char *job_id, BlockDriverState *bs,
iter_shared_perms, errp);
if (ret < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
}
if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
s->chain_frozen = true;
ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
if (ret < 0) {
goto fail;

5
block/mirror.c

@ -2014,6 +2014,7 @@ static BlockJob *mirror_start_job(
*/
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
bdrv_drain_all_begin();
bdrv_graph_wrlock();
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
@ -2021,6 +2022,7 @@ static BlockJob *mirror_start_job(
errp);
if (ret < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
@ -2066,16 +2068,19 @@ static BlockJob *mirror_start_job(
iter_shared_perms, errp);
if (ret < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
}
if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
}
bdrv_graph_wrunlock();
bdrv_drain_all_end();
QTAILQ_INIT(&s->ops_in_flight);

4
block/stream.c

@ -371,10 +371,12 @@ void stream_start(const char *job_id, BlockDriverState *bs,
* already have our own plans. Also don't allow resize as the image size is
* queried only at the job start and then cached.
*/
bdrv_drain_all_begin();
bdrv_graph_wrlock();
if (block_job_add_bdrv(&s->common, "active node", bs, 0,
basic_flags | BLK_PERM_WRITE, errp)) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
@ -395,10 +397,12 @@ void stream_start(const char *job_id, BlockDriverState *bs,
basic_flags, errp);
if (ret < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
}
bdrv_graph_wrunlock();
bdrv_drain_all_end();
s->base_overlay = base_overlay;
s->above_base = above_base;

4
blockjob.c

@ -496,6 +496,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
int ret;
GLOBAL_STATE_CODE();
bdrv_drain_all_begin();
bdrv_graph_wrlock();
if (job_id == NULL && !(flags & JOB_INTERNAL)) {
@ -506,6 +507,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
flags, cb, opaque, errp);
if (job == NULL) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
return NULL;
}
@ -544,10 +546,12 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
}
bdrv_graph_wrunlock();
bdrv_drain_all_end();
return job;
fail:
bdrv_graph_wrunlock();
bdrv_drain_all_end();
job_early_fail(&job->job);
return NULL;
}

2
include/block/blockjob.h

@ -137,6 +137,8 @@ BlockJob *block_job_get_locked(const char *id);
* Add @bs to the list of BlockDriverState that are involved in
* @job. This means that all operations will be blocked on @bs while
* @job exists.
*
* All block nodes must be drained.
*/
int GRAPH_WRLOCK
block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,

2
tests/unit/test-bdrv-drain.c

@ -772,9 +772,11 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
tjob->bs = src;
job = &tjob->common;
bdrv_drain_all_begin();
bdrv_graph_wrlock();
block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
switch (result) {
case TEST_JOB_SUCCESS:

Loading…
Cancel
Save