Hi Trond, Thanks for your comments. In this change, I also tried to make LAYOUTCOMMIT call nfs4_layout_refresh_old_stateid to update the stateid when receiving NFS4ERR_OLD_STATEID. However, the update is skipped because pnfs_mark_matching_lsegs_return returns -EBUSY. I’m wondering why the stateid update is skipped in this case, and what the impact would be if we ignored the -EBUSY error and proceeded with the update. Thanks, On Tue, Sep 2, 2025 at 2:09 PM Trond Myklebust <trondmy@xxxxxxxxxx> wrote: > > On Tue, 2025-09-02 at 18:30 +0000, Haihua Yang wrote: > > [You don't often get email from yanghh@xxxxxxxxx. Learn why this is > > important at https://aka.ms/LearnAboutSenderIdentification ] > > > > 1, fix an issue that client may send LAYOUTRETURN before LAYOUTCOMMIT > > 2, update layout stateid when layoutcommit receiving > > NFS4ERR_OLD_STATEID > > Please read Documentation/process/submitting-patches.rst > Specifically, please read the section "Describe your changes" > carefully. > > > --- > > fs/nfs/callback_proc.c | 2 +- > > fs/nfs/nfs4proc.c | 10 +++++++++- > > fs/nfs/pnfs.c | 28 +++++++++++++++++----------- > > fs/nfs/pnfs.h | 2 +- > > 4 files changed, 28 insertions(+), 14 deletions(-) > > > > diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c > > index 8397c43358bd..1e6e4a7a3f15 100644 > > --- a/fs/nfs/callback_proc.c > > +++ b/fs/nfs/callback_proc.c > > @@ -287,7 +287,7 @@ static u32 initiate_file_draining(struct > > nfs_client *clp, > > pnfs_set_layout_stateid(lo, &args->cbl_stateid, NULL, true); > > switch (pnfs_mark_matching_lsegs_return(lo, &free_me_list, > > &args->cbl_range, > > - be32_to_cpu(args- > > >cbl_stateid.seqid))) { > > + be32_to_cpu(args->cbl_stateid.seqid), > > true)) { > > case 0: > > case -EBUSY: > > /* There are layout segments that need to be returned > > */ > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c > > index 7d2b67e06cc3..46a1bc1f31ee 100644 > > --- a/fs/nfs/nfs4proc.c > > +++ b/fs/nfs/nfs4proc.c > > @@ -10255,6 +10255,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, > > void *calldata) > > { > > struct nfs4_layoutcommit_data *data = calldata; > > struct nfs_server *server = NFS_SERVER(data->args.inode); > > + struct pnfs_layout_range dst_range; > > > > if (!nfs41_sequence_done(task, &data->res.seq_res)) > > return; > > @@ -10268,6 +10269,13 @@ nfs4_layoutcommit_done(struct rpc_task > > *task, void *calldata) > > break; > > case 0: > > break; > > + case -NFS4ERR_OLD_STATEID: > > + if (data->inode) { > > + nfs4_layout_refresh_old_stateid(&data- > > >args.stateid, > > + &dst_range, > > + data->inode); > > + } > > + fallthrough; > > default: > > if (nfs4_async_handle_error(task, server, NULL, NULL) > > == -EAGAIN) { > > rpc_restart_call_prepare(task); > > @@ -10319,8 +10327,8 @@ nfs4_proc_layoutcommit(struct > > nfs4_layoutcommit_data *data, bool sync) > > data->args.lastbytewritten, > > data->args.inode->i_ino); > > > > + data->inode = nfs_igrab_and_active(data->args.inode); > > if (!sync) { > > - data->inode = nfs_igrab_and_active(data->args.inode); > > if (data->inode == NULL) { > > nfs4_layoutcommit_release(data); > > return -EAGAIN; > > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > > index a3135b5af7ee..aaa3719b1957 100644 > > --- a/fs/nfs/pnfs.c > > +++ b/fs/nfs/pnfs.c > > @@ -432,7 +432,7 @@ bool nfs4_layout_refresh_old_stateid(nfs4_stateid > > *dst, > > goto out; > > } > > /* Try to update the seqid to the most recent */ > > - err = pnfs_mark_matching_lsegs_return(lo, &head, > > &range, 0); > > + err = pnfs_mark_matching_lsegs_return(lo, &head, > > &range, 0, true); > > if (err != -EBUSY) { > > dst->seqid = lo->plh_stateid.seqid; > > *dst_range = range; > > @@ -484,7 +484,7 @@ static int pnfs_mark_layout_stateid_return(struct > > pnfs_layout_hdr *lo, > > .length = NFS4_MAX_UINT64, > > }; > > > > - return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, > > seq); > > + return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, > > seq, false); > > } > > > > static int > > @@ -522,7 +522,7 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr > > *lo, u32 iomode) > > > > spin_lock(&inode->i_lock); > > pnfs_layout_set_fail_bit(lo, > > pnfs_iomode_to_fail_bit(iomode)); > > - pnfs_mark_matching_lsegs_return(lo, &head, &range, 0); > > + pnfs_mark_matching_lsegs_return(lo, &head, &range, 0, false); > > spin_unlock(&inode->i_lock); > > pnfs_free_lseg_list(&head); > > dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, > > @@ -1459,7 +1459,7 @@ _pnfs_return_layout(struct inode *ino) > > } > > valid_layout = pnfs_layout_is_valid(lo); > > pnfs_clear_layoutcommit(ino, &tmp_list); > > - pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0); > > + pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0, > > false); > > > > if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) > > NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, > > &range); > > @@ -2583,7 +2583,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) > > .iomode = IOMODE_ANY, > > .length = NFS4_MAX_UINT64, > > }; > > - pnfs_mark_matching_lsegs_return(lo, &free_me, &range, > > 0); > > + pnfs_mark_matching_lsegs_return(lo, &free_me, &range, > > 0, false); > > goto out_forget; > > } else { > > /* We have a completely new layout */ > > @@ -2628,7 +2628,7 @@ int > > pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, > > struct list_head *tmp_list, > > const struct pnfs_layout_range > > *return_range, > > - u32 seq) > > + u32 seq, bool committing) > > { > > struct pnfs_layout_segment *lseg, *next; > > struct nfs_server *server = NFS_SERVER(lo->plh_inode); > > @@ -2658,12 +2658,18 @@ pnfs_mark_matching_lsegs_return(struct > > pnfs_layout_hdr *lo, > > } > > > > if (remaining) { > > - pnfs_set_plh_return_info(lo, return_range->iomode, > > seq); > > - return -EBUSY; > > + if (!committing) { > > + pnfs_set_plh_return_info(lo, return_range- > > >iomode, seq); > > + return -EBUSY; > > + } else { > > + return 0; > > + } > > NACK. > > This change would mean that we can have layout segments on the list lo- > >plh_return_segs without even the NFS_LAYOUT_RETURN_REQUESTED flag > being set. > > It also badly breaks pnfs_layout_need_return(). > > I see no reason why we should need to add this 'committing' flag > argument to pnfs_mark_matching_lsegs_return(). I suggest rather > modifying pnfs_prepare_layoutreturn() to check for > pnfs_layoutcommit_outstanding(). > > > } > > > > if (!list_empty(&lo->plh_return_segs)) { > > - pnfs_set_plh_return_info(lo, return_range->iomode, > > seq); > > + if (!committing) { > > + pnfs_set_plh_return_info(lo, return_range- > > >iomode, seq); > > + } > > return 0; > > } > > > > @@ -2689,7 +2695,7 @@ pnfs_mark_layout_for_return(struct inode > > *inode, > > * segments at hand when sending layoutreturn. See > > pnfs_put_lseg() > > * for how it works. > > */ > > - if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, > > range, 0) != -EBUSY) { > > + if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, > > range, 0, false) != -EBUSY) { > > const struct cred *cred; > > nfs4_stateid stateid; > > enum pnfs_iomode iomode; > > @@ -2804,7 +2810,7 @@ static int > > pnfs_layout_return_unused_byserver(struct nfs_server *server, > > pnfs_get_layout_hdr(lo); > > pnfs_set_plh_return_info(lo, range->iomode, 0); > > if (pnfs_mark_matching_lsegs_return(lo, &lo- > > >plh_return_segs, > > - range, 0) != 0 || > > + range, 0, false) > > != 0 || > > !pnfs_prepare_layoutreturn(lo, &stateid, &cred, > > &iomode)) { > > spin_unlock(&inode->i_lock); > > rcu_read_unlock(); > > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h > > index 91ff877185c8..33a7a09477b2 100644 > > --- a/fs/nfs/pnfs.h > > +++ b/fs/nfs/pnfs.h > > @@ -300,7 +300,7 @@ int pnfs_mark_matching_lsegs_invalid(struct > > pnfs_layout_hdr *lo, > > int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, > > struct list_head *tmp_list, > > const struct pnfs_layout_range > > *recall_range, > > - u32 seq); > > + u32 seq, bool committing); > > int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, > > struct list_head *lseg_list); > > bool pnfs_roc(struct inode *ino, > > -- > > 2.51.0.87.g1fa68948c3.dirty > > > A. > -- > Trond Myklebust Linux NFS client maintainer, Hammerspace > trondmy@xxxxxxxxxx, trond.myklebust@xxxxxxxxxxxxxxx