Merge tag 'nvme-next-pull-request' of https://gitlab.com/birkelund/qemu into staging
hw/nvme patches
# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmaQHpQACgkQTeGvMW1P
# DemukQf+Pqcq75cflBqIyVN84/0eThJxmpoTP0ynGNMKJp+K+oecb5pdgTeDI3Kh
# esDOjL8m849r5LFjrjmySrTX8znHPFXdBdqCaOp/MZlgz3NML1guB5EYsizZJ+L6
# K4IRLE/8gzfZHY4yWGmUBuL1VBs8XZV0bXYYlA0xKlO638O0KgVQ/2YpC/44l93J
# rEnefSeXIi+/tCYEaX7t2dA+Qfm/qUrcEZBgvhCREi8t8hTzKGHsl2LVKrsFdA5I
# QZtTFcqeoJThtzWmxGKqbfFb/qeirBlCfhvTEmUWXlS1z9VNzy0ZuqA2l0Sy05ls
# eARbl+JnvV6ic6PikZd8dMSrILjNkQ==
# =dLKH
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 11 Jul 2024 11:04:04 AM PDT
# gpg: using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9
# gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [unknown]
# gpg: aka "Klaus Jensen <k.jensen@samsung.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468 4272 63D5 6FC5 E55D A838
# Subkey fingerprint: 5228 33AA 75E2 DCE6 A247 66C0 4DE1 AF31 6D4F 0DE9
* tag 'nvme-next-pull-request' of https://gitlab.com/birkelund/qemu:
hw/nvme: Expand VI/VQ resource to uint32
hw/nvme: Allocate sec-ctrl-list as a dynamic array
hw/nvme: separate identify data for sec. ctrl list
hw/nvme: add Identify Endurance Group List
hw/nvme: fix BAR size mismatch of SR-IOV VF
hw/nvme: fix number of PIDs for FDP RUH update
hw/nvme: Add support for setting the MQES for the NVMe emulation
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 066389e..5b1b0ca 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -219,7 +219,6 @@
#define NVME_TEMPERATURE_CRITICAL 0x175
#define NVME_NUM_FW_SLOTS 1
#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
-#define NVME_MAX_VFS 127
#define NVME_VF_RES_GRANULARITY 1
#define NVME_VF_OFFSET 0x1
#define NVME_VF_STRIDE 1
@@ -4352,7 +4351,7 @@
NvmeNamespace *ns = req->ns;
uint32_t cdw10 = le32_to_cpu(cmd->cdw10);
uint16_t ret = NVME_SUCCESS;
- uint32_t npid = (cdw10 >> 1) + 1;
+ uint32_t npid = (cdw10 >> 16) + 1;
unsigned int i = 0;
g_autofree uint16_t *pids = NULL;
uint32_t maxnpid;
@@ -5480,14 +5479,14 @@
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
uint16_t pri_ctrl_id = le16_to_cpu(n->pri_ctrl_cap.cntlid);
uint16_t min_id = le16_to_cpu(c->ctrlid);
- uint8_t num_sec_ctrl = n->sec_ctrl_list.numcntl;
+ uint8_t num_sec_ctrl = n->nr_sec_ctrls;
NvmeSecCtrlList list = {0};
uint8_t i;
for (i = 0; i < num_sec_ctrl; i++) {
- if (n->sec_ctrl_list.sec[i].scid >= min_id) {
- list.numcntl = num_sec_ctrl - i;
- memcpy(&list.sec, n->sec_ctrl_list.sec + i,
+ if (n->sec_ctrl_list[i].scid >= min_id) {
+ list.numcntl = MIN(num_sec_ctrl - i, 127);
+ memcpy(&list.sec, n->sec_ctrl_list + i,
list.numcntl * sizeof(NvmeSecCtrlEntry));
break;
}
@@ -5629,6 +5628,26 @@
return nvme_c2h(n, list, data_len, req);
}
+static uint16_t nvme_endurance_group_list(NvmeCtrl *n, NvmeRequest *req)
+{
+ uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
+ uint16_t *nr_ids = &list[0];
+ uint16_t *ids = &list[1];
+ uint16_t endgid = le32_to_cpu(req->cmd.cdw11) & 0xffff;
+
+ /*
+ * The current nvme-subsys only supports Endurance Group #1.
+ */
+ if (!endgid) {
+ *nr_ids = 1;
+ ids[0] = 1;
+ } else {
+ *nr_ids = 0;
+ }
+
+ return nvme_c2h(n, list, sizeof(list), req);
+}
+
static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
{
NvmeNamespace *ns;
@@ -5744,6 +5763,8 @@
return nvme_identify_nslist(n, req, false);
case NVME_ID_CNS_CS_NS_ACTIVE_LIST:
return nvme_identify_nslist_csi(n, req, true);
+ case NVME_ID_CNS_ENDURANCE_GROUP_LIST:
+ return nvme_endurance_group_list(n, req);
case NVME_ID_CNS_CS_NS_PRESENT_LIST:
return nvme_identify_nslist_csi(n, req, false);
case NVME_ID_CNS_NS_DESCR_LIST:
@@ -7122,8 +7143,8 @@
if (n->params.sriov_max_vfs) {
if (!pci_is_vf(pci_dev)) {
- for (i = 0; i < n->sec_ctrl_list.numcntl; i++) {
- sctrl = &n->sec_ctrl_list.sec[i];
+ for (i = 0; i < n->nr_sec_ctrls; i++) {
+ sctrl = &n->sec_ctrl_list[i];
nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
}
}
@@ -7805,6 +7826,11 @@
return false;
}
+ if (params->mqes < 1) {
+ error_setg(errp, "mqes property cannot be less than 1");
+ return false;
+ }
+
if (n->pmr.dev) {
if (params->msix_exclusive_bar) {
error_setg(errp, "not enough BARs available to enable PMR");
@@ -7842,12 +7868,6 @@
return false;
}
- if (params->sriov_max_vfs > NVME_MAX_VFS) {
- error_setg(errp, "sriov_max_vfs must be between 0 and %d",
- NVME_MAX_VFS);
- return false;
- }
-
if (params->cmb_size_mb) {
error_setg(errp, "CMB is not supported with SR-IOV");
return false;
@@ -7912,7 +7932,7 @@
static void nvme_init_state(NvmeCtrl *n)
{
NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
- NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ NvmeSecCtrlEntry *list = n->sec_ctrl_list;
NvmeSecCtrlEntry *sctrl;
PCIDevice *pci = PCI_DEVICE(n);
uint8_t max_vfs;
@@ -7937,9 +7957,9 @@
n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
QTAILQ_INIT(&n->aer_queue);
- list->numcntl = max_vfs;
+ n->nr_sec_ctrls = max_vfs;
for (i = 0; i < max_vfs; i++) {
- sctrl = &list->sec[i];
+ sctrl = &list[i];
sctrl->pcid = cpu_to_le16(n->cntlid);
sctrl->vfn = cpu_to_le16(i + 1);
}
@@ -8099,6 +8119,7 @@
uint8_t *pci_conf = pci_dev->config;
uint64_t bar_size;
unsigned msix_table_offset = 0, msix_pba_offset = 0;
+ unsigned nr_vectors;
int ret;
pci_conf[PCI_INTERRUPT_PIN] = 1;
@@ -8131,9 +8152,19 @@
assert(n->params.msix_qsize >= 1);
/* add one to max_ioqpairs to account for the admin queue pair */
- bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1,
- n->params.msix_qsize, &msix_table_offset,
- &msix_pba_offset);
+ if (!pci_is_vf(pci_dev)) {
+ nr_vectors = n->params.msix_qsize;
+ bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1,
+ nr_vectors, &msix_table_offset,
+ &msix_pba_offset);
+ } else {
+ NvmeCtrl *pn = NVME(pcie_sriov_get_pf(pci_dev));
+ NvmePriCtrlCap *cap = &pn->pri_ctrl_cap;
+
+ nr_vectors = le16_to_cpu(cap->vifrsm);
+ bar_size = nvme_mbar_size(le16_to_cpu(cap->vqfrsm), nr_vectors,
+ &msix_table_offset, &msix_pba_offset);
+ }
memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size);
memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
@@ -8147,7 +8178,7 @@
PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
}
- ret = msix_init(pci_dev, n->params.msix_qsize,
+ ret = msix_init(pci_dev, nr_vectors,
&n->bar0, 0, msix_table_offset,
&n->bar0, 0, msix_pba_offset, 0, errp);
}
@@ -8289,7 +8320,7 @@
id->ctratt = cpu_to_le32(ctratt);
- NVME_CAP_SET_MQES(cap, 0x7ff);
+ NVME_CAP_SET_MQES(cap, n->params.mqes);
NVME_CAP_SET_CQR(cap, 1);
NVME_CAP_SET_TO(cap, 0xf);
NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM);
@@ -8448,17 +8479,18 @@
DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
params.auto_transition_zones, true),
- DEFINE_PROP_UINT8("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0),
+ DEFINE_PROP_UINT16("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0),
DEFINE_PROP_UINT16("sriov_vq_flexible", NvmeCtrl,
params.sriov_vq_flexible, 0),
DEFINE_PROP_UINT16("sriov_vi_flexible", NvmeCtrl,
params.sriov_vi_flexible, 0),
- DEFINE_PROP_UINT8("sriov_max_vi_per_vf", NvmeCtrl,
- params.sriov_max_vi_per_vf, 0),
- DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl,
- params.sriov_max_vq_per_vf, 0),
+ DEFINE_PROP_UINT32("sriov_max_vi_per_vf", NvmeCtrl,
+ params.sriov_max_vi_per_vf, 0),
+ DEFINE_PROP_UINT32("sriov_max_vq_per_vf", NvmeCtrl,
+ params.sriov_max_vq_per_vf, 0),
DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar,
false),
+ DEFINE_PROP_UINT16("mqes", NvmeCtrl, params.mqes, 0x7ff),
DEFINE_PROP_END_OF_LIST(),
};
@@ -8520,7 +8552,7 @@
int i;
for (i = pcie_sriov_num_vfs(dev); i < old_num_vfs; i++) {
- sctrl = &n->sec_ctrl_list.sec[i];
+ sctrl = &n->sec_ctrl_list[i];
nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
}
}
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index bed8191..7819857 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -521,6 +521,7 @@
uint32_t num_queues; /* deprecated since 5.1 */
uint32_t max_ioqpairs;
uint16_t msix_qsize;
+ uint16_t mqes;
uint32_t cmb_size_mb;
uint8_t aerl;
uint32_t aer_max_queued;
@@ -531,11 +532,11 @@
bool auto_transition_zones;
bool legacy_cmb;
bool ioeventfd;
- uint8_t sriov_max_vfs;
+ uint16_t sriov_max_vfs;
uint16_t sriov_vq_flexible;
uint16_t sriov_vi_flexible;
- uint8_t sriov_max_vq_per_vf;
- uint8_t sriov_max_vi_per_vf;
+ uint32_t sriov_max_vq_per_vf;
+ uint32_t sriov_max_vi_per_vf;
bool msix_exclusive_bar;
} NvmeParams;
@@ -612,7 +613,8 @@
} features;
NvmePriCtrlCap pri_ctrl_cap;
- NvmeSecCtrlList sec_ctrl_list;
+ uint32_t nr_sec_ctrls;
+ NvmeSecCtrlEntry *sec_ctrl_list;
struct {
uint16_t vqrfap;
uint16_t virfap;
@@ -662,7 +664,7 @@
NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
if (pci_is_vf(pci_dev)) {
- return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
+ return &pf->sec_ctrl_list[pcie_sriov_vf_number(pci_dev)];
}
return NULL;
@@ -671,12 +673,12 @@
static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
uint16_t cntlid)
{
- NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ NvmeSecCtrlEntry *list = n->sec_ctrl_list;
uint8_t i;
- for (i = 0; i < list->numcntl; i++) {
- if (le16_to_cpu(list->sec[i].scid) == cntlid) {
- return &list->sec[i];
+ for (i = 0; i < n->nr_sec_ctrls; i++) {
+ if (le16_to_cpu(list[i].scid) == cntlid) {
+ return &list[i];
}
}
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index d30bb8b..77deaf2 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -17,13 +17,13 @@
static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num)
{
NvmeSubsystem *subsys = n->subsys;
- NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ NvmeSecCtrlEntry *list = n->sec_ctrl_list;
NvmeSecCtrlEntry *sctrl;
int i, cnt = 0;
for (i = start; i < ARRAY_SIZE(subsys->ctrls) && cnt < num; i++) {
if (!subsys->ctrls[i]) {
- sctrl = &list->sec[cnt];
+ sctrl = &list[cnt];
sctrl->scid = cpu_to_le16(i);
subsys->ctrls[i] = SUBSYS_SLOT_RSVD;
cnt++;
@@ -36,12 +36,12 @@
static void nvme_subsys_unreserve_cntlids(NvmeCtrl *n)
{
NvmeSubsystem *subsys = n->subsys;
- NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ NvmeSecCtrlEntry *list = n->sec_ctrl_list;
NvmeSecCtrlEntry *sctrl;
int i, cntlid;
for (i = 0; i < n->params.sriov_max_vfs; i++) {
- sctrl = &list->sec[i];
+ sctrl = &list[i];
cntlid = le16_to_cpu(sctrl->scid);
if (cntlid) {
@@ -61,6 +61,8 @@
if (pci_is_vf(&n->parent_obj)) {
cntlid = le16_to_cpu(sctrl->scid);
} else {
+ n->sec_ctrl_list = g_new0(NvmeSecCtrlEntry, num_vfs);
+
for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
if (!subsys->ctrls[cntlid]) {
break;
diff --git a/include/block/nvme.h b/include/block/nvme.h
index bb231d0..7c77d38 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1074,6 +1074,7 @@
NVME_ID_CNS_CTRL_LIST = 0x13,
NVME_ID_CNS_PRIMARY_CTRL_CAP = 0x14,
NVME_ID_CNS_SECONDARY_CTRL_LIST = 0x15,
+ NVME_ID_CNS_ENDURANCE_GROUP_LIST = 0x19,
NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a,
NVME_ID_CNS_CS_NS_PRESENT = 0x1b,
NVME_ID_CNS_IO_COMMAND_SET = 0x1c,