On Wed, 2025-07-30 at 15:18 +0000, Alex Markuze wrote: > Add validation to ensure the cached parent directory inode matches the > directory info in MDS replies. This prevents client-side race conditions > where concurrent operations (e.g. rename) cause r_parent to become stale > between request initiation and reply processing, which could lead to > applying state changes to incorrect directory inodes. > --- > fs/ceph/mds_client.c | 67 +++++++++++++++++++++++++++++++------------- > 1 file changed, 47 insertions(+), 20 deletions(-) > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 8d9fc5e18b17..a164783fc1e1 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -2853,7 +2853,7 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, > > static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry, > struct inode *dir, const char **ppath, int *ppathlen, > - u64 *pino, bool *pfreepath, bool parent_locked) > + struct ceph_vino *pvino, bool *pfreepath, bool parent_locked) We have too much arguments here. It looks like we need to introduce some structure. > { > char *path; > > @@ -2862,23 +2862,29 @@ static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry > dir = d_inode_rcu(dentry->d_parent); > if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP && > !IS_ENCRYPTED(dir)) { > - *pino = ceph_ino(dir); > + pvino->ino = ceph_ino(dir); > + pvino->snap = ceph_snap(dir); > rcu_read_unlock(); > *ppath = dentry->d_name.name; > *ppathlen = dentry->d_name.len; > return 0; > } > rcu_read_unlock(); > - path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); > + path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, &pvino->ino, 1); > if (IS_ERR(path)) > return PTR_ERR(path); > *ppath = path; > *pfreepath = true; > + /* For paths built by ceph_mdsc_build_path, we need to get snap from dentry */ I believe the multi-line comment could be good here. > + if (dentry && d_inode(dentry)) Could dentry be really NULL here? > + pvino->snap = ceph_snap(d_inode(dentry)); > + else > + pvino->snap = CEPH_NOSNAP; > return 0; > } > > static int build_inode_path(struct inode *inode, > - const char **ppath, int *ppathlen, u64 *pino, > + const char **ppath, int *ppathlen, struct ceph_vino *pvino, > bool *pfreepath) We have too much arguments here too. > { > struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); > @@ -2886,17 +2892,19 @@ static int build_inode_path(struct inode *inode, > char *path; > > if (ceph_snap(inode) == CEPH_NOSNAP) { > - *pino = ceph_ino(inode); > + pvino->ino = ceph_ino(inode); > + pvino->snap = ceph_snap(inode); > *ppathlen = 0; > return 0; > } > dentry = d_find_alias(inode); > - path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); > + path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, &pvino->ino, 1); > dput(dentry); > if (IS_ERR(path)) > return PTR_ERR(path); > *ppath = path; > *pfreepath = true; > + pvino->snap = ceph_snap(inode); > return 0; > } > > @@ -2907,7 +2915,7 @@ static int build_inode_path(struct inode *inode, > static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rinode, > struct dentry *rdentry, struct inode *rdiri, > const char *rpath, u64 rino, const char **ppath, > - int *pathlen, u64 *ino, bool *freepath, > + int *pathlen, struct ceph_vino *p_vino, bool *freepath, > bool parent_locked) Ditto. :) Thanks, Slava. > { > struct ceph_client *cl = mdsc->fsc->client; > @@ -2915,16 +2923,17 @@ static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rin > int r = 0; > > if (rinode) { > - r = build_inode_path(rinode, ppath, pathlen, ino, freepath); > + r = build_inode_path(rinode, ppath, pathlen, p_vino, freepath); > boutc(cl, " inode %p %llx.%llx\n", rinode, ceph_ino(rinode), > - ceph_snap(rinode)); > + ceph_snap(rinode)); > } else if (rdentry) { > - r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, ino, > - freepath, parent_locked); > + r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, p_vino, > + freepath, parent_locked); > CEPH_SAN_STRNCPY(result_str, sizeof(result_str), *ppath, *pathlen); > - boutc(cl, " dentry %p %llx/%s\n", rdentry, *ino, result_str); > + boutc(cl, " dentry %p %llx/%s\n", rdentry, p_vino->ino, result_str); > } else if (rpath || rino) { > - *ino = rino; > + p_vino->ino = rino; > + p_vino->snap = CEPH_NOSNAP; > *ppath = rpath; > *pathlen = rpath ? strlen(rpath) : 0; > CEPH_SAN_STRNCPY(result_str, sizeof(result_str), rpath, *pathlen); > @@ -3007,7 +3016,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, > struct ceph_mds_request_head_legacy *lhead; > const char *path1 = NULL; > const char *path2 = NULL; > - u64 ino1 = 0, ino2 = 0; > + struct ceph_vino vino1 = {0}, vino2 = {0}; > int pathlen1 = 0, pathlen2 = 0; > bool freepath1 = false, freepath2 = false; > struct dentry *old_dentry = NULL; > @@ -3019,17 +3028,35 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, > u16 request_head_version = mds_supported_head_version(session); > kuid_t caller_fsuid = req->r_cred->fsuid; > kgid_t caller_fsgid = req->r_cred->fsgid; > + bool parent_locked = test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); > > ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry, > req->r_parent, req->r_path1, req->r_ino1.ino, > - &path1, &pathlen1, &ino1, &freepath1, > - test_bit(CEPH_MDS_R_PARENT_LOCKED, > - &req->r_req_flags)); > + &path1, &pathlen1, &vino1, &freepath1, > + parent_locked); > if (ret < 0) { > msg = ERR_PTR(ret); > goto out; > } > > + /* > + * When the parent directory's i_rwsem is *not* locked, req->r_parent may > + * have become stale (e.g. after a concurrent rename) between the time the > + * dentry was looked up and now. If we detect that the stored r_parent > + * does not match the inode number we just encoded for the request, switch > + * to the correct inode so that the MDS receives a valid parent reference. > + */ > + if (!parent_locked && > + req->r_parent && vino1.ino && ceph_ino(req->r_parent) != vino1.ino) { > + struct inode *correct_dir = ceph_get_inode(mdsc->fsc->sb, vino1, NULL); > + if (!IS_ERR(correct_dir)) { > + WARN(1, "ceph: r_parent mismatch (had %llx wanted %llx) - updating\n", > + ceph_ino(req->r_parent), vino1.ino); > + iput(req->r_parent); > + req->r_parent = correct_dir; > + } > + } > + > /* If r_old_dentry is set, then assume that its parent is locked */ > if (req->r_old_dentry && > !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED)) > @@ -3037,7 +3064,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, > ret = set_request_path_attr(mdsc, NULL, old_dentry, > req->r_old_dentry_dir, > req->r_path2, req->r_ino2.ino, > - &path2, &pathlen2, &ino2, &freepath2, true); > + &path2, &pathlen2, &vino2, &freepath2, true); > if (ret < 0) { > msg = ERR_PTR(ret); > goto out_free1; > @@ -3191,8 +3218,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, > lhead->ino = cpu_to_le64(req->r_deleg_ino); > lhead->args = req->r_args; > > - ceph_encode_filepath(&p, end, ino1, path1); > - ceph_encode_filepath(&p, end, ino2, path2); > + ceph_encode_filepath(&p, end, vino1.ino, path1); > + ceph_encode_filepath(&p, end, vino2.ino, path2); > > /* make note of release offset, in case we need to replay */ > req->r_request_release_offset = p - msg->front.iov_base;