From 31b737b19dca4d50758f5e9834d27b683102f2f1 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 3 Jun 2025 14:59:05 +0200 Subject: [PATCH 1/3] hw/nvme: fix namespace attachment Commit 6ccca4b6bb9f ("hw/nvme: rework csi handling") introduced a bug in Namespace Attachment, causing it to a) not allow a controller to attach namespaces to other controllers b) assert if a valid non-attached namespace is detached This fixes both issues. Fixes: 6ccca4b6bb9f ("hw/nvme: rework csi handling") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2976 Reviewed-by: Jesper Wendel Devantier Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index e764ec7683..6c06d7f8f9 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6816,7 +6816,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) switch (sel) { case NVME_NS_ATTACHMENT_ATTACH: - if (nvme_ns(n, nsid)) { + if (nvme_ns(ctrl, nsid)) { return NVME_NS_ALREADY_ATTACHED | NVME_DNR; } @@ -6824,7 +6824,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) return NVME_NS_PRIVATE | NVME_DNR; } - if (!nvme_csi_supported(n, ns->csi)) { + if (!nvme_csi_supported(ctrl, ns->csi)) { return NVME_IOCS_NOT_SUPPORTED | NVME_DNR; } @@ -6834,6 +6834,10 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) break; case NVME_NS_ATTACHMENT_DETACH: + if (!nvme_ns(ctrl, nsid)) { + return NVME_NS_NOT_ATTACHED | NVME_DNR; + } + nvme_detach_ns(ctrl, ns); nvme_update_dsm_limits(ctrl, NULL); From bc0c24fdb157b014932a5012fe4ccbeba7617f17 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 3 Jun 2025 14:59:06 +0200 Subject: [PATCH 2/3] hw/nvme: revert CMIC behavior Commit cd59f50ab017 ("hw/nvme: always initialize a subsystem") causes the controller to always set the CMIC.MCTRS ("Multiple Controllers") bit. While spec-compliant, this is a deviation from the previous behavior where this was only set if an nvme-subsys device was explicitly created (to configure a subsystem with multiple controllers/namespaces). Revert the behavior to only set CMIC.MCTRS if an nvme-subsys device is created explicitly. Reported-by: Alan Adamson Fixes: cd59f50ab017 ("hw/nvme: always initialize a subsystem") Reviewed-by: Alan Adamson Tested-by: Alan Adamson Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 6c06d7f8f9..fa48412ef4 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -8780,7 +8780,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) uint8_t *pci_conf = pci_dev->config; uint64_t cap = ldq_le_p(&n->bar.cap); NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); - uint32_t ctratt; + uint32_t ctratt = le32_to_cpu(id->ctratt); uint16_t oacs; memcpy(n->cse.acs, nvme_cse_acs_default, sizeof(n->cse.acs)); @@ -8798,10 +8798,11 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR); - ctratt = NVME_CTRATT_ELBAS; + ctratt |= NVME_CTRATT_ELBAS; if (n->params.ctratt.mem) { ctratt |= NVME_CTRATT_MEM; } + id->ctratt = cpu_to_le32(ctratt); id->rab = 6; @@ -8884,17 +8885,6 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); - id->cmic |= NVME_CMIC_MULTI_CTRL; - ctratt |= NVME_CTRATT_ENDGRPS; - - id->endgidmax = cpu_to_le16(0x1); - - if (n->subsys->endgrp.fdp.enabled) { - ctratt |= NVME_CTRATT_FDPS; - } - - id->ctratt = cpu_to_le32(ctratt); - NVME_CAP_SET_MQES(cap, n->params.mqes); NVME_CAP_SET_CQR(cap, 1); NVME_CAP_SET_TO(cap, 0xf); @@ -8927,6 +8917,20 @@ static int nvme_init_subsys(NvmeCtrl *n, Error **errp) } n->subsys = NVME_SUBSYS(dev); + } else { + NvmeIdCtrl *id = &n->id_ctrl; + uint32_t ctratt = le32_to_cpu(id->ctratt); + + id->cmic |= NVME_CMIC_MULTI_CTRL; + ctratt |= NVME_CTRATT_ENDGRPS; + + id->endgidmax = cpu_to_le16(0x1); + + if (n->subsys->endgrp.fdp.enabled) { + ctratt |= NVME_CTRATT_FDPS; + } + + id->ctratt = cpu_to_le32(ctratt); } cntlid = nvme_subsys_register_ctrl(n, errp); From 53493c1f836f4dda90a6b5f2fb3d9264918c6871 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 1 Aug 2025 07:24:57 -0700 Subject: [PATCH 3/3] hw/nvme: cap MDTS value for internal limitation The emulated device had let the user set whatever max transfers size they wanted, including no limit. However the device does have an internal limit of 1024 segments. NVMe doesn't report max segments, though. This is implicitly inferred based on the MDTS and MPSMIN values. IOV_MAX is currently 1024 which 4k PRPs can exceed with 2MB transfers. Don't allow MDTS values that can exceed this, otherwise users risk seeing "internal error" status to their otherwise protocol compliant commands. Signed-off-by: Keith Busch Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index fa48412ef4..f5ee6bf260 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -8339,6 +8339,11 @@ static bool nvme_check_params(NvmeCtrl *n, Error **errp) host_memory_backend_set_mapped(n->pmr.dev, true); } + if (!n->params.mdts || ((1 << n->params.mdts) + 1) > IOV_MAX) { + error_setg(errp, "mdts exceeds IOV_MAX"); + return false; + } + if (n->params.zasl > n->params.mdts) { error_setg(errp, "zoned.zasl (Zone Append Size Limit) must be less " "than or equal to mdts (Maximum Data Transfer Size)");