Hi Jeff,
Following our discussion, my colleague Yang Erkun proposed an alternative
solution: using the nfsd_mutex to prevent concurrency between
initialization/destruction and usage of client_tracking_ops.
Both our previous approaches (delayed pointer assignment and the
initialization flag) still leave a window where the issue could occur.
For example, after validating client_tracking_ops as non-NULL but before
invoking its callbacks, the pointer could be set to NULL during teardown.
I've prototyped the nfsd_mutex approach and confirmed it resolves the
problem. What are your thoughts on this solution?
Best regards,
Li Lingfeng
在 2025/5/13 20:34, Jeff Layton 写道:
On Tue, 2025-05-13 at 15:43 +0800, Li Lingfeng wrote:
Checking whether tracking callbacks can be called based on whether
nn->client_tracking_ops is NULL may lead to callbacks being invoked
before tracking initialization completes, causing resource access
violations (UAF, NULL pointer dereference). Examples:
1) nfsd4_client_tracking_init
// set nn->client_tracking_ops
nfsd4_cld_tracking_init
nfs4_cld_state_init
nn->reclaim_str_hashtbl = kmalloc_array
... // error path, goto err
nfs4_cld_state_shutdown
kfree(nn->reclaim_str_hashtbl)
write_v4_end_grace
nfsd4_end_grace
nfsd4_record_grace_done
nfsd4_cld_grace_done
nfs4_release_reclaim
nn->reclaim_str_hashtbl[i]
// UAF
// clear nn->client_tracking_ops
2) nfsd4_client_tracking_init
// set nn->client_tracking_ops
nfsd4_cld_tracking_init
write_v4_end_grace
nfsd4_end_grace
nfsd4_record_grace_done
nfsd4_cld_grace_done
alloc_cld_upcall
cn = nn->cld_net
spin_lock // cn->cn_lock
// NULL deref
// error path, skip init pipe
__nfsd4_init_cld_pipe
cn = kzalloc
nn->cld_net = cn
// clear nn->client_tracking_ops
After nfsd mounts, users can trigger grace_done callbacks via
/proc/fs/nfsd/v4_end_grace. If resources are uninitialized or freed
in error paths, this causes access violations.
Instead of adding locks for specific resources(e.g., reclaim_str_hashtbl),
introducing a flag to indicate whether tracking initialization has
completed and checking this flag before invoking callbacks may be better.
Fixes: 52e19c09a183 ("nfsd: make reclaim_str_hashtbl allocated per net")
Signed-off-by: Li Lingfeng <lilingfeng3@xxxxxxxxxx>
---
fs/nfsd/netns.h | 1 +
fs/nfsd/nfs4recover.c | 13 +++++++++----
2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 3e2d0fde80a7..dbd782d6b063 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -113,6 +113,7 @@ struct nfsd_net {
struct file *rec_file;
bool in_grace;
+ bool client_tracking_init_done;
const struct nfsd4_client_tracking_ops *client_tracking_ops;
time64_t nfsd4_lease;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index c1d9bd07285f..6c27c1252c0e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -2096,7 +2096,11 @@ nfsd4_client_tracking_init(struct net *net)
pr_warn("NFSD: Unable to initialize client recovery tracking! (%d)\n", status);
pr_warn("NFSD: Is nfsdcld running? If not, enable CONFIG_NFSD_LEGACY_CLIENT_TRACKING.\n");
nn->client_tracking_ops = NULL;
+ nn->client_tracking_init_done = false;
+ } else {
+ nn->client_tracking_init_done = true;
}
+
The problem seems real (theoretically at least), but I'm not a fan of
this fix.
If the problem is as you say, then why not just delay the setting of
the client_tracking_ops until there is a method that works. IOW, set a
temporary variable with an ops pointer and only assign
client_tracking_ops at the end of the function/
Would that also fix this issue?
return status;
}
@@ -2105,6 +2109,7 @@ nfsd4_client_tracking_exit(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ nn->client_tracking_init_done = false;
if (nn->client_tracking_ops) {
if (nn->client_tracking_ops->exit)
nn->client_tracking_ops->exit(net);
@@ -2117,7 +2122,7 @@ nfsd4_client_record_create(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- if (nn->client_tracking_ops)
+ if (nn->client_tracking_ops && nn->client_tracking_init_done)
nn->client_tracking_ops->create(clp);
}
@@ -2126,7 +2131,7 @@ nfsd4_client_record_remove(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- if (nn->client_tracking_ops)
+ if (nn->client_tracking_ops && nn->client_tracking_init_done)
nn->client_tracking_ops->remove(clp);
}
@@ -2135,7 +2140,7 @@ nfsd4_client_record_check(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- if (nn->client_tracking_ops)
+ if (nn->client_tracking_ops && nn->client_tracking_init_done)
return nn->client_tracking_ops->check(clp);
return -EOPNOTSUPP;
@@ -2144,7 +2149,7 @@ nfsd4_client_record_check(struct nfs4_client *clp)
void
nfsd4_record_grace_done(struct nfsd_net *nn)
{
- if (nn->client_tracking_ops)
+ if (nn->client_tracking_ops && nn->client_tracking_init_done)
nn->client_tracking_ops->grace_done(nn);
}