On Thu, 2025-03-06 at 11:31 -0800, Dai Ngo wrote: > RFC8881, section 9.1.2 says: > > "In the case of READ, the server may perform the corresponding > check on the access mode, or it may choose to allow READ for > OPEN4_SHARE_ACCESS_WRITE, to accommodate clients whose WRITE > implementation may unavoidably do reads (e.g., due to buffer cache > constraints)." > > and in section 10.4.1: > "Similarly, when closing a file opened for OPEN4_SHARE_ACCESS_WRITE/ > OPEN4_SHARE_ACCESS_BOTH and if an OPEN_DELEGATE_WRITE delegation > is in effect" > > This patch allow READ using write delegation stateid granted on OPENs > with OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE > implementation may unavoidably do (e.g., due to buffer cache > constraints). > > For write delegation granted for OPEN with OPEN4_SHARE_ACCESS_WRITE > a new nfsd_file and a struct file are allocated to use for reads. > The nfsd_file is freed when the file is closed by release_all_access. > > Suggested-by: Chuck Lever <chuck.lever@xxxxxxxxxx> > Signed-off-by: Dai Ngo <dai.ngo@xxxxxxxxxx> > Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx> > --- > fs/nfsd/nfs4state.c | 75 ++++++++++++++++++++++++++++----------------- > 1 file changed, 47 insertions(+), 28 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 0f97f2c62b3a..295415fda985 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -633,18 +633,6 @@ find_readable_file(struct nfs4_file *f) > return ret; > } > > -static struct nfsd_file * > -find_rw_file(struct nfs4_file *f) > -{ > - struct nfsd_file *ret; > - > - spin_lock(&f->fi_lock); > - ret = nfsd_file_get(f->fi_fds[O_RDWR]); > - spin_unlock(&f->fi_lock); > - > - return ret; > -} > - > struct nfsd_file * > find_any_file(struct nfs4_file *f) > { > @@ -5987,14 +5975,19 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, > * "An OPEN_DELEGATE_WRITE delegation allows the client to handle, > * on its own, all opens." > * > - * Furthermore the client can use a write delegation for most READ > - * operations as well, so we require a O_RDWR file here. > + * Furthermore, section 9.1.2 says: > + * > + * "In the case of READ, the server may perform the corresponding > + * check on the access mode, or it may choose to allow READ for > + * OPEN4_SHARE_ACCESS_WRITE, to accommodate clients whose WRITE > + * implementation may unavoidably do reads (e.g., due to buffer > + * cache constraints)." > * > - * Offer a write delegation in the case of a BOTH open, and ensure > - * we get the O_RDWR descriptor. > + * We choose to offer a write delegation for OPEN with the > + * OPEN4_SHARE_ACCESS_WRITE access mode to accommodate such clients. > */ > - if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) { > - nf = find_rw_file(fp); > + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { > + nf = find_writeable_file(fp); > dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE; > } > > @@ -6116,7 +6109,7 @@ static bool > nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, > struct kstat *stat) > { > - struct nfsd_file *nf = find_rw_file(dp->dl_stid.sc_file); > + struct nfsd_file *nf = find_writeable_file(dp->dl_stid.sc_file); > struct path path; > int rc; > > @@ -6134,6 +6127,33 @@ nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, > return rc == 0; > } > > +/* > + * Add NFS4_SHARE_ACCESS_READ to the write delegation granted on OPEN > + * with NFS4_SHARE_ACCESS_WRITE by allocating separate nfsd_file and > + * struct file to be used for read with delegation stateid. > + * > + */ > +static bool > +nfsd4_add_rdaccess_to_wrdeleg(struct svc_rqst *rqstp, struct nfsd4_open *open, > + struct svc_fh *fh, struct nfs4_ol_stateid *stp) > +{ > + struct nfs4_file *fp; > + struct nfsd_file *nf = NULL; > + > + if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == > + NFS4_SHARE_ACCESS_WRITE) { > + if (nfsd_file_acquire_opened(rqstp, fh, NFSD_MAY_READ, NULL, &nf)) > + return (false); > + fp = stp->st_stid.sc_file; > + spin_lock(&fp->fi_lock); > + __nfs4_file_get_access(fp, NFS4_SHARE_ACCESS_READ); > + fp = stp->st_stid.sc_file; > + fp->fi_fds[O_RDONLY] = nf; > + spin_unlock(&fp->fi_lock); > + } > + return true; > +} > + > /* > * The Linux NFS server does not offer write delegations to NFSv4.0 > * clients in order to avoid conflicts between write delegations and > @@ -6159,8 +6179,9 @@ nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, > * open or lock state. > */ > static void > -nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, > - struct svc_fh *currentfh) > +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open, > + struct nfs4_ol_stateid *stp, struct svc_fh *currentfh, > + struct svc_fh *fh) > { > bool deleg_ts = open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS; > struct nfs4_openowner *oo = openowner(stp->st_stateowner); > @@ -6205,7 +6226,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, > memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); > > if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { > - if (!nfs4_delegation_stat(dp, currentfh, &stat)) { > + if (!nfsd4_add_rdaccess_to_wrdeleg(rqstp, open, fh, stp) || > + !nfs4_delegation_stat(dp, currentfh, &stat)) { > nfs4_put_stid(&dp->dl_stid); > destroy_delegation(dp); > goto out_no_deleg; > @@ -6361,7 +6383,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf > * Attempt to hand out a delegation. No error return, because the > * OPEN succeeds even if we fail. > */ > - nfs4_open_delegation(open, stp, &resp->cstate.current_fh); > + nfs4_open_delegation(rqstp, open, stp, > + &resp->cstate.current_fh, current_fh); > > /* > * If there is an existing open stateid, it must be updated and > @@ -7063,7 +7086,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, > return_revoked = true; > if (typemask & SC_TYPE_DELEG) > /* Always allow REVOKED for DELEG so we can > - * retturn the appropriate error. > + * return the appropriate error. > */ > statusmask |= SC_STATUS_REVOKED; > > @@ -7106,10 +7129,6 @@ nfs4_find_file(struct nfs4_stid *s, int flags) > > switch (s->sc_type) { > case SC_TYPE_DELEG: > - spin_lock(&s->sc_file->fi_lock); > - ret = nfsd_file_get(s->sc_file->fi_deleg_file); > - spin_unlock(&s->sc_file->fi_lock); > - break; > case SC_TYPE_OPEN: > case SC_TYPE_LOCK: > if (flags & RD_STATE) I'm seeing a nfsd_file leak in chuck's nfsd-next tree and a bisect landed on this patch. The reproducer is: 1/ set up an exported fs on a server (I used xfs, but it probably doesn't matter). 2/ mount up the export on a client using v4.2 3/ Run this fio script in the directory: ----------------8<--------------------- [global] name=fio-seq-write filename=fio-seq-write rw=write bs=1M direct=0 numjobs=1 time_based runtime=60 [file1] size=50G ioengine=io_uring iodepth=16 ----------------8<--------------------- When it completes, shut down the nfs server. You'll see warnings like this: BUG nfsd_file (Tainted: G B W ): Objects remaining on __kmem_cache_shutdown() Dai, can you take a look? Thanks, -- Jeff Layton <jlayton@xxxxxxxxxx>