On Sat, 17 May 2025, Anna Schumaker wrote: > From: Anna Schumaker <anna.schumaker@xxxxxxxxxx> > > The PTR_ERR_OR_ZERO() macro uses IS_ERR(), which checks if an error > value is a valid Linux error code. It does not take into account NFS > error codes, which are well out of the range of MAX_ERRNO. So if > _nfs4_proc_mkdir() returns -NFS4ERR_DELAY (which xfstests generic/477 was > able to consistently hit while running against a Hammerspace server), > PTR_ERR_OR_ZERO() will happily say "no, that's not an error", so we > propagate it up to the VFS who then tries to dput() it. > > Naturally, the kernel doesn't like this: > > [ 247.669307] BUG: unable to handle page fault for address: ffffffffffffd968 > [ 247.690824] RIP: 0010:lockref_put_return+0x67/0x130 > [ 247.719037] Call Trace: > [ 247.719446] <TASK> > [ 247.719806] ? __pfx_lockref_put_return+0x10/0x10 > [ 247.720538] ? _raw_spin_unlock+0x15/0x30 > [ 247.721173] ? dput+0x179/0x490 > [ 247.721682] ? vfs_mkdir+0x475/0x780 > [ 247.722259] dput+0x30/0x490 > [ 247.722730] do_mkdirat+0x158/0x310 > [ 247.723292] ? __pfx_do_mkdirat+0x10/0x10 > [ 247.723928] __x64_sys_mkdir+0xd3/0x160 > [ 247.724531] do_syscall_64+0x4b/0x120 > [ 247.725131] entry_SYSCALL_64_after_hwframe+0x76/0x7e > [ 247.725914] RIP: 0033:0x7fe0e22f3ddb > > While I was in the area, I noticed that we're discarding any errors left > unhandled by nfs4_handle_exception(). This patch fixes both of these > issues. > > Fixes: 8376583b84a1 ("nfs: change mkdir inode_operation to return alternate dentry if needed.") > Signed-off-by: Anna Schumaker <anna.schumaker@xxxxxxxxxx> > --- > fs/nfs/nfs4proc.c | 6 +++++- > 1 file changed, 5 insertions(+), 1 deletion(-) > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c > index c7e068b563ff..306dade146e6 100644 > --- a/fs/nfs/nfs4proc.c > +++ b/fs/nfs/nfs4proc.c > @@ -5274,13 +5274,17 @@ static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, > sattr->ia_mode &= ~current_umask(); > do { > alias = _nfs4_proc_mkdir(dir, dentry, sattr, label); > - err = PTR_ERR_OR_ZERO(alias); > + err = PTR_ERR(alias); > + if (err > 0) > + err = 0; This would be problematic on a 32 bit machine with more than 2GB of memory ... or maybe on any 32bit machine as I think kernel addresses are always negative. The largest nfs error code is a little over 12000. We could maybe change MAX_ERRNO to 13000, but as that is more than one page it might not work. The best solution would be to separate the error from the pointer while the error might exceed MAX_ERRNO. Something like this? NeilBrown diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 970f28dbf253..feebca84b980 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5155,13 +5155,15 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ } static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry, - struct nfs4_createdata *data) + struct nfs4_createdata *data, int *statusp) { - int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, + struct dentry *ret; + + *statusp = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, &data->arg.seq_args, &data->res.seq_res, 1); - if (status) - return ERR_PTR(status); + if (*statusp) + return NULL; spin_lock(&dir->i_lock); /* Creating a directory bumps nlink in the parent */ @@ -5170,7 +5172,11 @@ static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry, data->res.fattr->time_start, NFS_INO_INVALID_DATA); spin_unlock(&dir->i_lock); - return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr); + ret = nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr); + if (!IS_ERR(ret)) + return ret; + *statusp = PTR_ERR(ret); + return NULL; } static void nfs4_free_createdata(struct nfs4_createdata *data) @@ -5231,17 +5237,18 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, static struct dentry *_nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - struct nfs4_label *label) + struct nfs4_label *label, int *statusp) { struct nfs4_createdata *data; - struct dentry *ret = ERR_PTR(-ENOMEM); + struct dentry *ret = NULL; + *statusp = -ENOMEM; data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR); if (data == NULL) goto out; data->arg.label = label; - ret = nfs4_do_mkdir(dir, dentry, data); + ret = nfs4_do_mkdir(dir, dentry, data, statusp); nfs4_free_createdata(data); out: @@ -5264,11 +5271,12 @@ static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) sattr->ia_mode &= ~current_umask(); do { - alias = _nfs4_proc_mkdir(dir, dentry, sattr, label); - err = PTR_ERR_OR_ZERO(alias); + alias = _nfs4_proc_mkdir(dir, dentry, sattr, label, &err); trace_nfs4_mkdir(dir, &dentry->d_name, err); - err = nfs4_handle_exception(NFS_SERVER(dir), err, - &exception); + if (err) + alias = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + err, + &exception)); } while (exception.retry); nfs4_label_release_security(label);