We have seen crashes where the RPC client has been freed, while a gss_auth still has a pointer to it. Subsequently, GSS attempts to send a NULL call, resulting in a use-after-free in xprt_iter_get_next. Fix that by having GSS take a reference on the RPC client, in gss_create_new, and release it in gss_free. The crashes we've seen have been on a v5.4-based kernel. They are not reproducible on demand, happening once every few months under heavy load. The crashing thread is an rpc_async_release worker: xprt_iter_ops (net/sunrpc/xprtmultipath.c:184:43) xprt_iter_get_next (net/sunrpc/xprtmultipath.c:527:35) rpc_task_get_next_xprt (net/sunrpc/clnt.c:1062:9) rpc_task_set_transport (net/sunrpc/clnt.c:1073:19) rpc_task_set_transport (net/sunrpc/clnt.c:1066:6) rpc_task_set_client (net/sunrpc/clnt.c:1081:3) rpc_run_task (net/sunrpc/clnt.c:1133:2) rpc_call_null_helper (net/sunrpc/clnt.c:2766:9) rpc_call_null (net/sunrpc/clnt.c:2771:9) gss_send_destroy_context (net/sunrpc/auth_gss/auth_gss.c:1274:10) gss_destroy_cred (net/sunrpc/auth_gss/auth_gss.c:1341:3) put_rpccred (net/sunrpc/auth.c:758:2) put_rpccred (net/sunrpc/auth.c:733:1) __put_nfs_open_context (fs/nfs/inode.c:1010:2) put_nfs_open_context (fs/nfs/inode.c:1017:2) nfs_pgio_data_destroy (fs/nfs/pagelist.c:562:3) nfs_pgio_header_free (fs/nfs/pagelist.c:573:2) nfs_read_completion (fs/nfs/read.c:200:2) nfs_pgio_release (fs/nfs/pagelist.c:699:2) rpc_release_calldata (net/sunrpc/sched.c:890:3) rpc_free_task (net/sunrpc/sched.c:1171:2) rpc_async_release (net/sunrpc/sched.c:1183:2) process_one_work (kernel/workqueue.c:2295:2) worker_thread (kernel/workqueue.c:2448:4) kthread (kernel/kthread.c:296:9) ret_from_fork+0x2b/0x36 (arch/x86/entry/entry_64.S:358) Immediately before __put_nfs_open_context calls put_rpccred, above, it calls nfs_sb_deactive, which frees the RPC client: rpc_free_client+189 [sunrpc] rpc_release_client+98 [sunrpc] rpc_shutdown_client+98 [sunrpc] nfs_free_client+123 [nfs] nfs_put_client+217 [nfs] nfs_free_server+81 [nfs] nfs_kill_super+49 [nfs] deactivate_locked_super+58 deactivate_super+89 nfs_sb_deactive+36 [nfs] After the RPC client is freed here, __put_nfs_open_context calls put_rpccred, which wants to destroy the cred, since its refcnt is now zero. Since the cred is not marked as UPTODATE, gss_send_destroy_context needs to send a NULL call to the server, to get it to release all state for this context. For this NULL call, we need an RPC client with an associated xprt; whilst looking for one, we trip over the freed xpi, that we freed earlier from nfs_sb_deactive. Ensuring that the RPC client refcnt is incremented whilst gss_auth has a pointer to it would ensure that the client can't be freed until gss_auth has been freed. Whilst the above occurred on a v5.4 kernel, I don't see anything obvious that would stop this happening to current code. Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: Calum Mackay <calum.mackay@xxxxxxxxxx> --- net/sunrpc/auth_gss/auth_gss.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5c095cb8cb20..8c2cc92c6cd6 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1026,6 +1026,13 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) if (!try_module_get(THIS_MODULE)) return ERR_PTR(err); + + /* + * Make sure the RPC client can't be freed while gss_auth has + * a link to it + */ + refcount_inc(&clnt->cl_count); + if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) goto out_dec; INIT_HLIST_NODE(&gss_auth->hash); @@ -1098,6 +1105,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) kfree(gss_auth->target_name); kfree(gss_auth); out_dec: + if (refcount_dec_and_test(&clnt->cl_count)) + rpc_free_client(clnt); module_put(THIS_MODULE); trace_rpcgss_createauth(flavor, err); return ERR_PTR(err); @@ -1106,13 +1115,18 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) static void gss_free(struct gss_auth *gss_auth) { + struct rpc_clnt *clnt = gss_auth->client; + gss_pipe_free(gss_auth->gss_pipe[0]); gss_pipe_free(gss_auth->gss_pipe[1]); gss_mech_put(gss_auth->mech); put_net_track(gss_auth->net, &gss_auth->ns_tracker); kfree(gss_auth->target_name); - kfree(gss_auth); + + if (clnt != NULL && refcount_dec_and_test(&clnt->cl_count)) + rpc_free_client(clnt); + module_put(THIS_MODULE); } -- 2.47.3