On Thu, 2025-09-04 at 10:12 +0000, Alex Markuze wrote: > ceph_get_reply_dir() may return a different, referenced inode when r_parent is stale and the parent directory lock is not held. > ceph_fill_trace() used that inode but failed to drop the reference when it differed from req->r_parent, leaking an inode reference. > > Keep the directory inode in a local and iput() it at function end if it does not match req->r_parent. I assume that you mean "in a local variable"? > > Signed-off-by: Alex Markuze <amarkuze@xxxxxxxxxx> > --- > fs/ceph/inode.c | 30 +++++++++++++++++++----------- > 1 file changed, 19 insertions(+), 11 deletions(-) > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > index 470ee595ecf2..cffa2cd7b530 100644 > --- a/fs/ceph/inode.c > +++ b/fs/ceph/inode.c > @@ -1584,6 +1584,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > struct ceph_vino tvino, dvino; > struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); > struct ceph_client *cl = fsc->client; > + struct inode *parent_dir = NULL; > int err = 0; > > doutc(cl, "%p is_dentry %d is_target %d\n", req, > @@ -1601,9 +1602,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > * r_parent may be stale, in cases when R_PARENT_LOCKED is not set, > * so we need to get the correct inode > */ > - struct inode *dir = ceph_get_reply_dir(sb, req->r_parent, rinfo); > - if (dir) { > - err = ceph_fill_inode(dir, NULL, &rinfo->diri, > + parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo); > + if (unlikely(IS_ERR(parent_dir))) { > + err = PTR_ERR(parent_dir); > + goto done; > + } > + if (parent_dir) { > + err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri, > rinfo->dirfrag, session, -1, > &req->r_caps_reservation); > if (err < 0) > @@ -1612,14 +1617,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > WARN_ON_ONCE(1); > } > > - if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME && > + if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME && > test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && > !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { > bool is_nokey = false; > struct qstr dname; > struct dentry *dn, *parent; > struct fscrypt_str oname = FSTR_INIT(NULL, 0); > - struct ceph_fname fname = { .dir = dir, > + struct ceph_fname fname = { .dir = parent_dir, > .name = rinfo->dname, > .ctext = rinfo->altname, > .name_len = rinfo->dname_len, > @@ -1628,10 +1633,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > BUG_ON(!rinfo->head->is_target); > BUG_ON(req->r_dentry); > > - parent = d_find_any_alias(dir); > + parent = d_find_any_alias(parent_dir); > BUG_ON(!parent); > > - err = ceph_fname_alloc_buffer(dir, &oname); > + err = ceph_fname_alloc_buffer(parent_dir, &oname); > if (err < 0) { > dput(parent); > goto done; > @@ -1640,7 +1645,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey); > if (err < 0) { > dput(parent); > - ceph_fname_free_buffer(dir, &oname); > + ceph_fname_free_buffer(parent_dir, &oname); > goto done; > } > dname.name = oname.name; > @@ -1659,7 +1664,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > dname.len, dname.name, dn); > if (!dn) { > dput(parent); > - ceph_fname_free_buffer(dir, &oname); > + ceph_fname_free_buffer(parent_dir, &oname); > err = -ENOMEM; > goto done; > } > @@ -1674,12 +1679,12 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > ceph_snap(d_inode(dn)) != tvino.snap)) { > doutc(cl, " dn %p points to wrong inode %p\n", > dn, d_inode(dn)); > - ceph_dir_clear_ordered(dir); > + ceph_dir_clear_ordered(parent_dir); > d_delete(dn); > dput(dn); > goto retry_lookup; > } > - ceph_fname_free_buffer(dir, &oname); > + ceph_fname_free_buffer(parent_dir, &oname); > > req->r_dentry = dn; > dput(parent); > @@ -1869,6 +1874,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > &dvino, ptvino); > } > done: > + /* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */ > + if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent)) The '&&' implies to check both conditions. However, if we have parent_dir equal to NULL or some error code, then it doesn't make sense to check the second condition. I don't think that it could introduce some issue here. But, potentially, it could be the reason of rarely triggered and non-trivial issue. What do you think? Do I have point here? What if we can do like this: parent_dir = parent_dir != req->r_parent ? parent_dir : NULL; if (parent_dir) iput(parent_dir); > + iput(parent_dir); Potentially, we could jump here if we have the error. > doutc(cl, "done err=%d\n", err); > return err; > } Mostly, looks good. Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@xxxxxxx> Thanks, Slava.