Re: [PATCH v2 2/2] ceph/inode: drop extra reference from ceph_get_reply_dir() in ceph_fill_trace()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Sep 4, 2025 at 12:12 PM Alex Markuze <amarkuze@xxxxxxxxxx> wrote:
>
> ceph_get_reply_dir() may return a different, referenced inode when r_parent is stale and the parent directory lock is not held.
> ceph_fill_trace() used that inode but failed to drop the reference when it differed from req->r_parent, leaking an inode reference.
>
> Keep the directory inode in a local and iput() it at function end if it does not match req->r_parent.
>
> Signed-off-by: Alex Markuze <amarkuze@xxxxxxxxxx>
> ---
>  fs/ceph/inode.c | 30 +++++++++++++++++++-----------
>  1 file changed, 19 insertions(+), 11 deletions(-)
>
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 470ee595ecf2..cffa2cd7b530 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -1584,6 +1584,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
>         struct ceph_vino tvino, dvino;
>         struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
>         struct ceph_client *cl = fsc->client;
> +       struct inode *parent_dir = NULL;
>         int err = 0;
>
>         doutc(cl, "%p is_dentry %d is_target %d\n", req,
> @@ -1601,9 +1602,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
>                  * r_parent may be stale, in cases when R_PARENT_LOCKED is not set,
>                  * so we need to get the correct inode
>                  */
> -               struct inode *dir = ceph_get_reply_dir(sb, req->r_parent, rinfo);
> -               if (dir) {
> -                       err = ceph_fill_inode(dir, NULL, &rinfo->diri,
> +               parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo);
> +               if (unlikely(IS_ERR(parent_dir))) {
> +                       err = PTR_ERR(parent_dir);
> +                       goto done;
> +               }
> +               if (parent_dir) {
> +                       err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri,
>                                               rinfo->dirfrag, session, -1,
>                                               &req->r_caps_reservation);
>                         if (err < 0)
> @@ -1612,14 +1617,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
>                         WARN_ON_ONCE(1);
>                 }
>
> -               if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
> +               if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
>                     test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
>                     !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
>                         bool is_nokey = false;
>                         struct qstr dname;
>                         struct dentry *dn, *parent;
>                         struct fscrypt_str oname = FSTR_INIT(NULL, 0);
> -                       struct ceph_fname fname = { .dir        = dir,
> +                       struct ceph_fname fname = { .dir        = parent_dir,
>                                                     .name       = rinfo->dname,
>                                                     .ctext      = rinfo->altname,
>                                                     .name_len   = rinfo->dname_len,
> @@ -1628,10 +1633,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
>                         BUG_ON(!rinfo->head->is_target);
>                         BUG_ON(req->r_dentry);
>
> -                       parent = d_find_any_alias(dir);
> +                       parent = d_find_any_alias(parent_dir);
>                         BUG_ON(!parent);
>
> -                       err = ceph_fname_alloc_buffer(dir, &oname);
> +                       err = ceph_fname_alloc_buffer(parent_dir, &oname);
>                         if (err < 0) {
>                                 dput(parent);
>                                 goto done;
> @@ -1640,7 +1645,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
>                         err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
>                         if (err < 0) {
>                                 dput(parent);
> -                               ceph_fname_free_buffer(dir, &oname);
> +                               ceph_fname_free_buffer(parent_dir, &oname);
>                                 goto done;
>                         }
>                         dname.name = oname.name;
> @@ -1659,7 +1664,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
>                                       dname.len, dname.name, dn);
>                                 if (!dn) {
>                                         dput(parent);
> -                                       ceph_fname_free_buffer(dir, &oname);
> +                                       ceph_fname_free_buffer(parent_dir, &oname);
>                                         err = -ENOMEM;
>                                         goto done;
>                                 }
> @@ -1674,12 +1679,12 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
>                                     ceph_snap(d_inode(dn)) != tvino.snap)) {
>                                 doutc(cl, " dn %p points to wrong inode %p\n",
>                                       dn, d_inode(dn));
> -                               ceph_dir_clear_ordered(dir);
> +                               ceph_dir_clear_ordered(parent_dir);
>                                 d_delete(dn);
>                                 dput(dn);
>                                 goto retry_lookup;
>                         }
> -                       ceph_fname_free_buffer(dir, &oname);
> +                       ceph_fname_free_buffer(parent_dir, &oname);
>
>                         req->r_dentry = dn;
>                         dput(parent);
> @@ -1869,6 +1874,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
>                                             &dvino, ptvino);
>         }
>  done:
> +       /* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */
> +       if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent))
> +               iput(parent_dir);
>         doutc(cl, "done err=%d\n", err);
>         return err;
>  }
> --
> 2.34.1
>

Folded into "ceph: fix client race condition where r_parent becomes stale
before sending message" along with the following kerneldoc fixup

  * ceph_mdsc_build_path - build a path string to a given dentry
  * @dentry: dentry to which path should be built
- * @plen: returned length of string
- * @pbase: returned base inode number
+ * @path_info: output path, length, base ino+snap, and freepath ownership flag
  * @for_wire: is this path going to be sent to the MDS?
  *

and queued up for 6.17-rc6.

Thanks,

                Ilya





[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux