Re: [PATCH v16 10/11] nvme: register fdp parameters with the block layer

Caleb Sander Mateos <csander@xxxxxxxxxxxxxxx> · Tue, 6 May 2025 09:13:33 -0700

On Tue, May 6, 2025 at 5:31 AM Kanchan Joshi <joshi.k@xxxxxxxxxxx> wrote:
>
> From: Keith Busch <kbusch@xxxxxxxxxx>
>
> Register the device data placement limits if supported. This is just
> registering the limits with the block layer. Nothing beyond reporting
> these attributes is happening in this patch.
>
> Reviewed-by: Hannes Reinecke <hare@xxxxxxx>
> Reviewed-by: Nitesh Shetty <nj.shetty@xxxxxxxxxxx>
> Reviewed-by: Christoph Hellwig <hch@xxxxxx>
> Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx>
> Signed-off-by: Kanchan Joshi <joshi.k@xxxxxxxxxxx>
> ---
>  drivers/nvme/host/core.c | 144 +++++++++++++++++++++++++++++++++++++++
>  drivers/nvme/host/nvme.h |   2 +
>  2 files changed, 146 insertions(+)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index dd71b4c2b7b7..f25e03ff03df 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -38,6 +38,8 @@ struct nvme_ns_info {
>         u32 nsid;
>         __le32 anagrpid;
>         u8 pi_offset;
> +       u16 endgid;
> +       u64 runs;
>         bool is_shared;
>         bool is_readonly;
>         bool is_ready;
> @@ -1611,6 +1613,7 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
>         info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
>         info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
>         info->is_ready = true;
> +       info->endgid = le16_to_cpu(id->endgid);
>         if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
>                 dev_info(ctrl->device,
>                          "Ignoring bogus Namespace Identifiers\n");
> @@ -1651,6 +1654,7 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
>                 info->is_ready = id->nstat & NVME_NSTAT_NRDY;
>                 info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL;
>                 info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT;
> +               info->endgid = le16_to_cpu(id->endgid);
>         }
>         kfree(id);
>         return ret;
> @@ -2155,6 +2159,132 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
>         return ret;
>  }
>
> +static int nvme_query_fdp_granularity(struct nvme_ctrl *ctrl,
> +                                     struct nvme_ns_info *info, u8 fdp_idx)
> +{
> +       struct nvme_fdp_config_log hdr, *h;
> +       struct nvme_fdp_config_desc *desc;
> +       size_t size = sizeof(hdr);
> +       void *log, *end;
> +       int i, n, ret;
> +
> +       ret = nvme_get_log_lsi(ctrl, 0, NVME_LOG_FDP_CONFIGS, 0,
> +                              NVME_CSI_NVM, &hdr, size, 0, info->endgid);
> +       if (ret) {
> +               dev_warn(ctrl->device,
> +                        "FDP configs log header status:0x%x endgid:%d\n", ret,
> +                        info->endgid);
> +               return ret;
> +       }
> +
> +       size = le32_to_cpu(hdr.sze);
> +       if (size > PAGE_SIZE * MAX_ORDER_NR_PAGES) {
> +               dev_warn(ctrl->device, "FDP config size too large:%zu\n",
> +                        size);
> +               return 0;
> +       }
> +
> +       h = kvmalloc(size, GFP_KERNEL);
> +       if (!h)
> +               return -ENOMEM;
> +
> +       ret = nvme_get_log_lsi(ctrl, 0, NVME_LOG_FDP_CONFIGS, 0,
> +                              NVME_CSI_NVM, h, size, 0, info->endgid);
> +       if (ret) {
> +               dev_warn(ctrl->device,
> +                        "FDP configs log status:0x%x endgid:%d\n", ret,
> +                        info->endgid);
> +               goto out;
> +       }
> +
> +       n = le16_to_cpu(h->numfdpc) + 1;
> +       if (fdp_idx > n) {
> +               dev_warn(ctrl->device, "FDP index:%d out of range:%d\n",
> +                        fdp_idx, n);
> +               /* Proceed without registering FDP streams */
> +               ret = 0;
> +               goto out;
> +       }
> +
> +       log = h + 1;
> +       desc = log;
> +       end = log + size - sizeof(*h);
> +       for (i = 0; i < fdp_idx; i++) {
> +               log += le16_to_cpu(desc->dsze);
> +               desc = log;
> +               if (log >= end) {
> +                       dev_warn(ctrl->device,
> +                                "FDP invalid config descriptor list\n");
> +                       ret = 0;
> +                       goto out;
> +               }
> +       }
> +
> +       if (le32_to_cpu(desc->nrg) > 1) {
> +               dev_warn(ctrl->device, "FDP NRG > 1 not supported\n");
> +               ret = 0;
> +               goto out;
> +       }
> +
> +       info->runs = le64_to_cpu(desc->runs);
> +out:
> +       kvfree(h);
> +       return ret;
> +}
> +
> +static int nvme_query_fdp_info(struct nvme_ns *ns, struct nvme_ns_info *info)
> +{
> +       struct nvme_ns_head *head = ns->head;
> +       struct nvme_ctrl *ctrl = ns->ctrl;
> +       struct nvme_fdp_ruh_status *ruhs;
> +       struct nvme_fdp_config fdp;
> +       struct nvme_command c = {};
> +       size_t size;
> +       int ret;
> +
> +       /*
> +        * The FDP configuration is static for the lifetime of the namespace,
> +        * so return immediately if we've already registered this namespace's
> +        * streams.
> +        */
> +       if (head->nr_plids)
> +               return 0;
> +
> +       ret = nvme_get_features(ctrl, NVME_FEAT_FDP, info->endgid, NULL, 0,
> +                               &fdp);
> +       if (ret) {
> +               dev_warn(ctrl->device, "FDP get feature status:0x%x\n", ret);
> +               return ret;
> +       }
> +
> +       if (!(fdp.flags & FDPCFG_FDPE))
> +               return 0;
> +
> +       ret = nvme_query_fdp_granularity(ctrl, info, fdp.fdpcidx);
> +       if (!info->runs)
> +               return ret;
> +
> +       size = struct_size(ruhs, ruhsd, S8_MAX - 1);
> +       ruhs = kzalloc(size, GFP_KERNEL);
> +       if (!ruhs)
> +               return -ENOMEM;
> +
> +       c.imr.opcode = nvme_cmd_io_mgmt_recv;
> +       c.imr.nsid = cpu_to_le32(head->ns_id);
> +       c.imr.mo = NVME_IO_MGMT_RECV_MO_RUHS;
> +       c.imr.numd = cpu_to_le32(nvme_bytes_to_numd(size));
> +       ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size);
> +       if (ret) {
> +               dev_warn(ctrl->device, "FDP io-mgmt status:0x%x\n", ret);
> +               goto free;
> +       }
> +
> +       head->nr_plids = le16_to_cpu(ruhs->nruhsd);
> +free:
> +       kfree(ruhs);
> +       return ret;
> +}
> +
>  static int nvme_update_ns_info_block(struct nvme_ns *ns,
>                 struct nvme_ns_info *info)
>  {
> @@ -2192,6 +2322,12 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
>                         goto out;
>         }
>
> +       if (ns->ctrl->ctratt & NVME_CTRL_ATTR_FDPS) {
> +               ret = nvme_query_fdp_info(ns, info);
> +               if (ret < 0)
> +                       goto out;
> +       }
> +
>         lim = queue_limits_start_update(ns->disk->queue);
>
>         memflags = blk_mq_freeze_queue(ns->disk->queue);
> @@ -2225,6 +2361,12 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
>         if (!nvme_init_integrity(ns->head, &lim, info))
>                 capacity = 0;
>
> +       lim.max_write_streams = ns->head->nr_plids;
> +       if (lim.max_write_streams)
> +               lim.write_stream_granularity = max(info->runs, U32_MAX);

What is the purpose of this max(..., U32_MAX)? Should it be min() instead?

Best,
Caleb

> +       else
> +               lim.write_stream_granularity = 0;
> +
>         ret = queue_limits_commit_update(ns->disk->queue, &lim);
>         if (ret) {
>                 blk_mq_unfreeze_queue(ns->disk->queue, memflags);
> @@ -2328,6 +2470,8 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
>                         ns->head->disk->flags |= GENHD_FL_HIDDEN;
>                 else
>                         nvme_init_integrity(ns->head, &lim, info);
> +               lim.max_write_streams = ns_lim->max_write_streams;
> +               lim.write_stream_granularity = ns_lim->write_stream_granularity;
>                 ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
>
>                 set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index aedb734283b8..3e14daa4ed3e 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -496,6 +496,8 @@ struct nvme_ns_head {
>         struct device           cdev_device;
>
>         struct gendisk          *disk;
> +
> +       u16                     nr_plids;
>  #ifdef CONFIG_NVME_MULTIPATH
>         struct bio_list         requeue_list;
>         spinlock_t              requeue_lock;
> --
> 2.25.1
>
>