On Tue, May 6, 2025 at 5:31 AM Kanchan Joshi <joshi.k@xxxxxxxxxxx> wrote: > > From: Keith Busch <kbusch@xxxxxxxxxx> > > Register the device data placement limits if supported. This is just > registering the limits with the block layer. Nothing beyond reporting > these attributes is happening in this patch. > > Reviewed-by: Hannes Reinecke <hare@xxxxxxx> > Reviewed-by: Nitesh Shetty <nj.shetty@xxxxxxxxxxx> > Reviewed-by: Christoph Hellwig <hch@xxxxxx> > Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx> > Signed-off-by: Kanchan Joshi <joshi.k@xxxxxxxxxxx> > --- > drivers/nvme/host/core.c | 144 +++++++++++++++++++++++++++++++++++++++ > drivers/nvme/host/nvme.h | 2 + > 2 files changed, 146 insertions(+) > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c > index dd71b4c2b7b7..f25e03ff03df 100644 > --- a/drivers/nvme/host/core.c > +++ b/drivers/nvme/host/core.c > @@ -38,6 +38,8 @@ struct nvme_ns_info { > u32 nsid; > __le32 anagrpid; > u8 pi_offset; > + u16 endgid; > + u64 runs; > bool is_shared; > bool is_readonly; > bool is_ready; > @@ -1611,6 +1613,7 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl, > info->is_shared = id->nmic & NVME_NS_NMIC_SHARED; > info->is_readonly = id->nsattr & NVME_NS_ATTR_RO; > info->is_ready = true; > + info->endgid = le16_to_cpu(id->endgid); > if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) { > dev_info(ctrl->device, > "Ignoring bogus Namespace Identifiers\n"); > @@ -1651,6 +1654,7 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl, > info->is_ready = id->nstat & NVME_NSTAT_NRDY; > info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL; > info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT; > + info->endgid = le16_to_cpu(id->endgid); > } > kfree(id); > return ret; > @@ -2155,6 +2159,132 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns, > return ret; > } > > +static int nvme_query_fdp_granularity(struct nvme_ctrl *ctrl, > + struct nvme_ns_info *info, u8 fdp_idx) > +{ > + struct nvme_fdp_config_log hdr, *h; > + struct nvme_fdp_config_desc *desc; > + size_t size = sizeof(hdr); > + void *log, *end; > + int i, n, ret; > + > + ret = nvme_get_log_lsi(ctrl, 0, NVME_LOG_FDP_CONFIGS, 0, > + NVME_CSI_NVM, &hdr, size, 0, info->endgid); > + if (ret) { > + dev_warn(ctrl->device, > + "FDP configs log header status:0x%x endgid:%d\n", ret, > + info->endgid); > + return ret; > + } > + > + size = le32_to_cpu(hdr.sze); > + if (size > PAGE_SIZE * MAX_ORDER_NR_PAGES) { > + dev_warn(ctrl->device, "FDP config size too large:%zu\n", > + size); > + return 0; > + } > + > + h = kvmalloc(size, GFP_KERNEL); > + if (!h) > + return -ENOMEM; > + > + ret = nvme_get_log_lsi(ctrl, 0, NVME_LOG_FDP_CONFIGS, 0, > + NVME_CSI_NVM, h, size, 0, info->endgid); > + if (ret) { > + dev_warn(ctrl->device, > + "FDP configs log status:0x%x endgid:%d\n", ret, > + info->endgid); > + goto out; > + } > + > + n = le16_to_cpu(h->numfdpc) + 1; > + if (fdp_idx > n) { > + dev_warn(ctrl->device, "FDP index:%d out of range:%d\n", > + fdp_idx, n); > + /* Proceed without registering FDP streams */ > + ret = 0; > + goto out; > + } > + > + log = h + 1; > + desc = log; > + end = log + size - sizeof(*h); > + for (i = 0; i < fdp_idx; i++) { > + log += le16_to_cpu(desc->dsze); > + desc = log; > + if (log >= end) { > + dev_warn(ctrl->device, > + "FDP invalid config descriptor list\n"); > + ret = 0; > + goto out; > + } > + } > + > + if (le32_to_cpu(desc->nrg) > 1) { > + dev_warn(ctrl->device, "FDP NRG > 1 not supported\n"); > + ret = 0; > + goto out; > + } > + > + info->runs = le64_to_cpu(desc->runs); > +out: > + kvfree(h); > + return ret; > +} > + > +static int nvme_query_fdp_info(struct nvme_ns *ns, struct nvme_ns_info *info) > +{ > + struct nvme_ns_head *head = ns->head; > + struct nvme_ctrl *ctrl = ns->ctrl; > + struct nvme_fdp_ruh_status *ruhs; > + struct nvme_fdp_config fdp; > + struct nvme_command c = {}; > + size_t size; > + int ret; > + > + /* > + * The FDP configuration is static for the lifetime of the namespace, > + * so return immediately if we've already registered this namespace's > + * streams. > + */ > + if (head->nr_plids) > + return 0; > + > + ret = nvme_get_features(ctrl, NVME_FEAT_FDP, info->endgid, NULL, 0, > + &fdp); > + if (ret) { > + dev_warn(ctrl->device, "FDP get feature status:0x%x\n", ret); > + return ret; > + } > + > + if (!(fdp.flags & FDPCFG_FDPE)) > + return 0; > + > + ret = nvme_query_fdp_granularity(ctrl, info, fdp.fdpcidx); > + if (!info->runs) > + return ret; > + > + size = struct_size(ruhs, ruhsd, S8_MAX - 1); > + ruhs = kzalloc(size, GFP_KERNEL); > + if (!ruhs) > + return -ENOMEM; > + > + c.imr.opcode = nvme_cmd_io_mgmt_recv; > + c.imr.nsid = cpu_to_le32(head->ns_id); > + c.imr.mo = NVME_IO_MGMT_RECV_MO_RUHS; > + c.imr.numd = cpu_to_le32(nvme_bytes_to_numd(size)); > + ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size); > + if (ret) { > + dev_warn(ctrl->device, "FDP io-mgmt status:0x%x\n", ret); > + goto free; > + } > + > + head->nr_plids = le16_to_cpu(ruhs->nruhsd); > +free: > + kfree(ruhs); > + return ret; > +} > + > static int nvme_update_ns_info_block(struct nvme_ns *ns, > struct nvme_ns_info *info) > { > @@ -2192,6 +2322,12 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, > goto out; > } > > + if (ns->ctrl->ctratt & NVME_CTRL_ATTR_FDPS) { > + ret = nvme_query_fdp_info(ns, info); > + if (ret < 0) > + goto out; > + } > + > lim = queue_limits_start_update(ns->disk->queue); > > memflags = blk_mq_freeze_queue(ns->disk->queue); > @@ -2225,6 +2361,12 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, > if (!nvme_init_integrity(ns->head, &lim, info)) > capacity = 0; > > + lim.max_write_streams = ns->head->nr_plids; > + if (lim.max_write_streams) > + lim.write_stream_granularity = max(info->runs, U32_MAX); What is the purpose of this max(..., U32_MAX)? Should it be min() instead? Best, Caleb > + else > + lim.write_stream_granularity = 0; > + > ret = queue_limits_commit_update(ns->disk->queue, &lim); > if (ret) { > blk_mq_unfreeze_queue(ns->disk->queue, memflags); > @@ -2328,6 +2470,8 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) > ns->head->disk->flags |= GENHD_FL_HIDDEN; > else > nvme_init_integrity(ns->head, &lim, info); > + lim.max_write_streams = ns_lim->max_write_streams; > + lim.write_stream_granularity = ns_lim->write_stream_granularity; > ret = queue_limits_commit_update(ns->head->disk->queue, &lim); > > set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); > diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h > index aedb734283b8..3e14daa4ed3e 100644 > --- a/drivers/nvme/host/nvme.h > +++ b/drivers/nvme/host/nvme.h > @@ -496,6 +496,8 @@ struct nvme_ns_head { > struct device cdev_device; > > struct gendisk *disk; > + > + u16 nr_plids; > #ifdef CONFIG_NVME_MULTIPATH > struct bio_list requeue_list; > spinlock_t requeue_lock; > -- > 2.25.1 > >