[PATCH RFT] nfsd: provide locking for v4_end_grace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Writing to v4_end_grace can race with server shutdown and result in
memory being accessed after it was freed - reclaim_str_hashtbl in
particular.

We cannot hold nfsd_mutex across the nfsd4_end_grace() call as that is
held while client_tracking_op->init() is called and that can wait for
an upcall to nfsdcltrack which can write to v4_end_grace, resulting in a
deadlock.

nfsd4_end_grace() is also called by the landromat work queue and this
doesn't require locking as server shutdown will stop the work and wait
for it before freeing anything that nfsd4_end_grace() might access.

However, we must be sure that writing to v4_end_grace doesn't restart
the work item after shutdown has already waited for it.  For this we can
use disable_delayed_work_sync() instead of cancel_delayed_work_sync().

So this patch adds a nfsd_net field "grace_end_forced", sets that when
v4_end_grace is written, and schedules the laundromat (providing it
hasn't been disabled).  This field bypasses other checks for whether the
grace period has finished.  The delayed work is disabled before
nfsd4_client_tracking_exit() is call to shutdown client tracking.

This resolves a race which can result in use-after-free.

Note that disable_delayed_work_sync() was added in v6.10.  To backport
to an earlier kernel without that interface the exclusion could be
provided by some spinlock that was released in the shutdown path
after ->nfsd_serv is set to NULL.  It would need to be taken before
the test on nfsd_serv in write_v4_end_grace() and released after
nfsd4_force_end_grace() is called.

Reported-by: Li Lingfeng <lilingfeng3@xxxxxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: NeilBrown <neil@xxxxxxxxxx>
---
 fs/nfsd/netns.h     |  1 +
 fs/nfsd/nfs4state.c | 19 ++++++++++++++++---
 fs/nfsd/nfsctl.c    |  7 ++++++-
 fs/nfsd/state.h     |  2 +-
 4 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 3e2d0fde80a7..d83c68872c4c 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -66,6 +66,7 @@ struct nfsd_net {
 
 	struct lock_manager nfsd4_manager;
 	bool grace_ended;
+	bool grace_end_forced;
 	time64_t boot_time;
 
 	struct dentry *nfsd_client_dir;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d5694987f86f..b34f157334e6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -84,7 +84,7 @@ static u64 current_sessionid = 1;
 /* forward declarations */
 static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
 static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
-void nfsd4_end_grace(struct nfsd_net *nn);
+static void nfsd4_end_grace(struct nfsd_net *nn);
 static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
 static void nfsd4_file_hash_remove(struct nfs4_file *fi);
 static void deleg_reaper(struct nfsd_net *nn);
@@ -6458,7 +6458,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return nfs_ok;
 }
 
-void
+static void
 nfsd4_end_grace(struct nfsd_net *nn)
 {
 	/* do nothing if grace period already ended */
@@ -6491,6 +6491,16 @@ nfsd4_end_grace(struct nfsd_net *nn)
 	 */
 }
 
+void
+nfsd4_force_end_grace(struct nfsd_net *nn)
+{
+	nn->grace_end_forced = true;
+	/* This is a no-op after nfs4_state_shutdown_net() has called
+	 * disable_delayed_work_sync()
+	 */
+	mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
+}
+
 /*
  * If we've waited a lease period but there are still clients trying to
  * reclaim, wait a little longer to give them a chance to finish.
@@ -6500,6 +6510,8 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
 	time64_t double_grace_period_end = nn->boot_time +
 					   2 * nn->nfsd4_lease;
 
+	if (nn->grace_end_forced)
+		return false;
 	if (nn->track_reclaim_completes &&
 			atomic_read(&nn->nr_reclaim_complete) ==
 			nn->reclaim_str_hashtbl_size)
@@ -8807,6 +8819,7 @@ static int nfs4_state_create_net(struct net *net)
 	nn->unconf_name_tree = RB_ROOT;
 	nn->boot_time = ktime_get_real_seconds();
 	nn->grace_ended = false;
+	nn->grace_end_forced = false;
 	nn->nfsd4_manager.block_opens = true;
 	INIT_LIST_HEAD(&nn->nfsd4_manager.list);
 	INIT_LIST_HEAD(&nn->client_lru);
@@ -8935,7 +8948,7 @@ nfs4_state_shutdown_net(struct net *net)
 
 	shrinker_free(nn->nfsd_client_shrinker);
 	cancel_work_sync(&nn->nfsd_shrinker_work);
-	cancel_delayed_work_sync(&nn->laundromat_work);
+	disable_delayed_work_sync(&nn->laundromat_work);
 	locks_end_grace(&nn->nfsd4_manager);
 
 	INIT_LIST_HEAD(&reaplist);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3f3e9f6c4250..a9e6c2a155da 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1082,10 +1082,15 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
 		case 'Y':
 		case 'y':
 		case '1':
+			/* This test ensures we don't try to
+			 * end grace before the server has been started,
+			 * but doesn't guarantee we don't end grace
+			 * while the server is being shut down.
+			 */
 			if (!nn->nfsd_serv)
 				return -EBUSY;
 			trace_nfsd_end_grace(netns(file));
-			nfsd4_end_grace(nn);
+			nfsd4_force_end_grace(nn);
 			break;
 		default:
 			return -EINVAL;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 1995bca158b8..e2bea9908fa8 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -836,7 +836,7 @@ static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 #endif
 
 /* grace period management */
-void nfsd4_end_grace(struct nfsd_net *nn);
+void nfsd4_force_end_grace(struct nfsd_net *nn);
 
 /* nfs4recover operations */
 extern int nfsd4_client_tracking_init(struct net *net);
-- 
2.49.0






[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux