When I have this patch it causes the following panic just from doing "systemctl start nfs-server" followed by "systemctl stop nfs-server" [ 69.705986] Unable to handle kernel paging request at virtual address dfff800000000001 [ 69.706706] KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] [ 69.707299] Mem abort info: [ 69.707514] ESR = 0x0000000096000005 [ 69.707787] EC = 0x25: DABT (current EL), IL = 32 bits [ 69.708186] SET = 0, FnV = 0 [ 69.708415] EA = 0, S1PTW = 0 [ 69.708637] FSC = 0x05: level 1 translation fault [ 69.708986] Data abort info: [ 69.709203] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 [ 69.709604] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 69.709986] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 69.710345] [dfff800000000001] address between user and kernel address ranges [ 69.710842] Internal error: Oops: 0000000096000005 [#1] SMP [ 69.711207] Modules linked in: rpcrdma rdma_cm iw_cm ib_cm ib_core nfsd nfs_acl lockd grace nfs_localio ext4 crc16 mbcache jbd2 overlay isofs uinput snd_seq_dummy snd_hrtimer qrtr rfkill vfat fat uvcvideo videobuf2_vmalloc videobuf2_memops snd_hda_codec_generic uvc videobuf2_v4l2 videobuf2_common e1000e snd_hda_intel videodev snd_intel_dspcfg snd_hda_codec mc snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer snd soundcore sg auth_rpcgss loop dm_multipath sunrpc dm_mod nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vmw_vmci vsock xfs nvme sr_mod nvme_core cdrom ghash_ce nvme_keyring nvme_auth vmwgfx drm_ttm_helper ttm fuse [ 69.715331] CPU: 2 UID: 0 PID: 204 Comm: kworker/2:1 Kdump: loaded Not tainted 6.17.0-rc4+ #3 PREEMPT(voluntary) [ 69.716036] Hardware name: VMware, Inc. VMware20,1/VBSA, BIOS VMW201.00V.24006586.BA64.2406042154 06/04/2024 [ 69.716751] Workqueue: events __svc_rdma_free [rpcrdma] [ 69.717100] pstate: 01400005 (nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 69.717483] pc : xas_start+0x74/0x260 [ 69.717756] lr : xas_load+0x18/0x208 [ 69.717972] sp : ffff800088287910 [ 69.718182] x29: ffff800088287910 x28: 000000000000002c x27: ffff000098ca3c78 [ 69.718625] x26: 1ffff00011050f62 x25: ffff800088287a08 x24: ffffffffffffffff [ 69.719067] x23: 0000000000000000 x22: ffff800088287a18 x21: 1ffff00011050f43 [ 69.719504] x20: 0000000000000000 x19: ffff800088287a00 x18: 0000000000000000 [ 69.719928] x17: 302e3a3a06000000 x16: 3670637404000000 x15: 03000000a3860100 [ 69.720362] x14: 0000000000000000 x13: 0000003001000000 x12: ffff600015a36265 [ 69.720800] x11: 1fffe00015a36264 x10: ffff600015a36264 x9 : ffff800082878150 [ 69.721241] x8 : ffff700011050f3c x7 : ffff8000882879e0 x6 : 00000000f3f3f3f3 [ 69.721674] x5 : 0000000000000000 x4 : dfff800000000000 x3 : 1fffe00013201657 [ 69.722112] x2 : 0000000000000001 x1 : dfff800000000000 x0 : 0000000000000008 [ 69.722567] Call trace: [ 69.722733] xas_start+0x74/0x260 (P) [ 69.722996] xas_load+0x18/0x208 [ 69.723193] xas_find+0x288/0x440 [ 69.723385] xa_find+0x160/0x1a0 [ 69.723572] rpcrdma_pool_destroy+0xb0/0x480 [rpcrdma] [ 69.724164] svc_rdma_recv_ctxts_destroy+0x54/0x78 [rpcrdma] [ 69.724550] __svc_rdma_free+0xd0/0x288 [rpcrdma] [ 69.724842] process_one_work+0x584/0x1050 [ 69.725093] worker_thread+0x4c4/0xc80 [ 69.725295] kthread+0x2f8/0x398 [ 69.725468] ret_from_fork+0x10/0x20 [ 69.725666] Code: d2d00001 f2fbffe1 91002280 d343fc02 (38e16841) [ 69.725988] SMP: stopping secondary CPUs [ 69.727548] Starting crashdump kernel... [ 69.727781] Bye! On Thu, Aug 21, 2025 at 11:35 AM Chuck Lever <cel@xxxxxxxxxx> wrote: > > From: Chuck Lever <chuck.lever@xxxxxxxxxx> > > Reduce the per-connection footprint in the host's and RNIC's memory > management TLBs by combining groups of a connection's Receive > buffers into fewer IOVAs. > > I don't have a good way to measure whether this approach is > effective. > > Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> > --- > include/linux/sunrpc/svc_rdma.h | 3 + > include/trace/events/rpcrdma.h | 99 ++++++++++ > net/sunrpc/xprtrdma/Makefile | 2 +- > net/sunrpc/xprtrdma/pool.c | 241 ++++++++++++++++++++++++ > net/sunrpc/xprtrdma/pool.h | 20 ++ > net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 45 ++--- > 6 files changed, 380 insertions(+), 30 deletions(-) > create mode 100644 net/sunrpc/xprtrdma/pool.c > create mode 100644 net/sunrpc/xprtrdma/pool.h > > Changes since v2: > - Allocate the shard buffer with alloc_pages instead of kmalloc > - Simplify the synopsis of rpcrdma_pool_create > - rpcrdma_pool_buffer_alloc now initializes the RECV's ib_sge > - Added a "sync_for_cpu" API > > Changes since v1: > - Rename "chunks" to "shards" -- RPC/RDMA already has chunks > - Replace pool's list of shards with an xarray > - Implement bitmap-based shard free space management > - Implement some naive observability > > diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h > index 22704c2e5b9b..b4f3c01f1b94 100644 > --- a/include/linux/sunrpc/svc_rdma.h > +++ b/include/linux/sunrpc/svc_rdma.h > @@ -73,6 +73,8 @@ extern struct percpu_counter svcrdma_stat_recv; > extern struct percpu_counter svcrdma_stat_sq_starve; > extern struct percpu_counter svcrdma_stat_write; > > +struct rpcrdma_pool; > + > struct svcxprt_rdma { > struct svc_xprt sc_xprt; /* SVC transport structure */ > struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ > @@ -112,6 +114,7 @@ struct svcxprt_rdma { > unsigned long sc_flags; > struct work_struct sc_work; > > + struct rpcrdma_pool *sc_recv_pool; > struct llist_head sc_recv_ctxts; > > atomic_t sc_completion_ids; > diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h > index e6a72646c507..8bc713082c1a 100644 > --- a/include/trace/events/rpcrdma.h > +++ b/include/trace/events/rpcrdma.h > @@ -2336,6 +2336,105 @@ DECLARE_EVENT_CLASS(rpcrdma_client_register_class, > DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_register); > DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_unregister); > > +TRACE_EVENT(rpcrdma_pool_create, > + TP_PROTO( > + unsigned int poolid, > + size_t bufsize > + ), > + > + TP_ARGS(poolid, bufsize), > + > + TP_STRUCT__entry( > + __field(unsigned int, poolid) > + __field(size_t, bufsize) > + ), > + > + TP_fast_assign( > + __entry->poolid = poolid; > + __entry->bufsize = bufsize; > + ), > + > + TP_printk("poolid=%u bufsize=%zu bytes", > + __entry->poolid, __entry->bufsize > + ) > +); > + > +TRACE_EVENT(rpcrdma_pool_destroy, > + TP_PROTO( > + unsigned int poolid > + ), > + > + TP_ARGS(poolid), > + > + TP_STRUCT__entry( > + __field(unsigned int, poolid) > + ), > + > + TP_fast_assign( > + __entry->poolid = poolid;), > + > + TP_printk("poolid=%u", > + __entry->poolid > + ) > +); > + > +DECLARE_EVENT_CLASS(rpcrdma_pool_shard_class, > + TP_PROTO( > + unsigned int poolid, > + u32 shardid > + ), > + > + TP_ARGS(poolid, shardid), > + > + TP_STRUCT__entry( > + __field(unsigned int, poolid) > + __field(u32, shardid) > + ), > + > + TP_fast_assign( > + __entry->poolid = poolid; > + __entry->shardid = shardid; > + ), > + > + TP_printk("poolid=%u shardid=%u", > + __entry->poolid, __entry->shardid > + ) > +); > + > +#define DEFINE_RPCRDMA_POOL_SHARD_EVENT(name) \ > + DEFINE_EVENT(rpcrdma_pool_shard_class, name, \ > + TP_PROTO( \ > + unsigned int poolid, \ > + u32 shardid \ > + ), \ > + TP_ARGS(poolid, shardid)) > + > +DEFINE_RPCRDMA_POOL_SHARD_EVENT(rpcrdma_pool_shard_new); > +DEFINE_RPCRDMA_POOL_SHARD_EVENT(rpcrdma_pool_shard_free); > + > +TRACE_EVENT(rpcrdma_pool_buffer, > + TP_PROTO( > + unsigned int poolid, > + const void *buffer > + ), > + > + TP_ARGS(poolid, buffer), > + > + TP_STRUCT__entry( > + __field(unsigned int, poolid) > + __field(const void *, buffer) > + ), > + > + TP_fast_assign( > + __entry->poolid = poolid; > + __entry->buffer = buffer; > + ), > + > + TP_printk("poolid=%u buffer=%p", > + __entry->poolid, __entry->buffer > + ) > +); > + > #endif /* _TRACE_RPCRDMA_H */ > > #include <trace/define_trace.h> > diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile > index 3232aa23cdb4..f69456dffe87 100644 > --- a/net/sunrpc/xprtrdma/Makefile > +++ b/net/sunrpc/xprtrdma/Makefile > @@ -1,7 +1,7 @@ > # SPDX-License-Identifier: GPL-2.0 > obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o > > -rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o \ > +rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o pool.o \ > svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ > svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \ > svc_rdma_pcl.o module.o > diff --git a/net/sunrpc/xprtrdma/pool.c b/net/sunrpc/xprtrdma/pool.c > new file mode 100644 > index 000000000000..87404f1fc5bc > --- /dev/null > +++ b/net/sunrpc/xprtrdma/pool.c > @@ -0,0 +1,241 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (c) 2025, Oracle and/or its affiliates. > + * > + * Pools for RPC-over-RDMA Receive buffers. > + * > + * A buffer pool attempts to conserve both the number of DMA mappings > + * and the device's IOVA space by collecting small buffers together > + * into a shard that has a single DMA mapping. > + * > + * API Contract: > + * - Buffers contained in one rpcrdma_pool instance are the same > + * size (rp_bufsize), no larger than RPCRDMA_MAX_INLINE_THRESH > + * - Buffers in one rpcrdma_pool instance are automatically released > + * when the pool instance is destroyed > + * > + * Future work: > + * - Manage pool resources by reference count > + */ > + > +#include <linux/list.h> > +#include <linux/xarray.h> > +#include <linux/sunrpc/svc_rdma.h> > + > +#include <rdma/ib_verbs.h> > + > +#include "xprt_rdma.h" > +#include "pool.h" > + > +#include <trace/events/rpcrdma.h> > + > +/* > + * An idr would give near perfect pool ID uniqueness, but for > + * the moment the pool ID is used only for observability, not > + * correctness. > + */ > +static atomic_t rpcrdma_pool_id; > + > +struct rpcrdma_pool { > + struct xarray rp_xa; > + struct ib_pd *rp_pd; > + size_t rp_shardsize; // in bytes > + size_t rp_bufsize; // in bytes > + unsigned int rp_bufs_per_shard; > + unsigned int rp_pool_id; > +}; > + > +struct rpcrdma_pool_shard { > + struct page *pc_pages; > + u8 *pc_cpu_addr; > + u64 pc_mapped_addr; > + unsigned long *pc_bitmap; > +}; > + > +/* > + * For good NUMA awareness, ensure that the shard is allocated on > + * the NUMA node that the underlying device is affined to. > + * > + * For the shard buffer, we really want alloc_pages_node rather > + * than kmalloc_node. > + */ > +static struct rpcrdma_pool_shard * > +rpcrdma_pool_shard_alloc(struct rpcrdma_pool *pool, gfp_t flags) > +{ > + struct ib_device *device = pool->rp_pd->device; > + int numa_node = ibdev_to_node(device); > + struct rpcrdma_pool_shard *shard; > + size_t bmap_size; > + > + shard = kmalloc_node(sizeof(*shard), flags, numa_node); > + if (!shard) > + goto fail; > + > + bmap_size = BITS_TO_LONGS(pool->rp_bufs_per_shard) * sizeof(unsigned long); > + shard->pc_bitmap = kzalloc(bmap_size, flags); > + if (!shard->pc_bitmap) > + goto free_shard; > + > + shard->pc_pages = alloc_pages_node(numa_node, flags, > + get_order(pool->rp_shardsize)); > + if (!shard->pc_pages) > + goto free_bitmap; > + > + shard->pc_cpu_addr = page_address(shard->pc_pages); > + shard->pc_mapped_addr = ib_dma_map_single(device, shard->pc_cpu_addr, > + pool->rp_shardsize, > + DMA_FROM_DEVICE); > + if (ib_dma_mapping_error(device, shard->pc_mapped_addr)) > + goto free_iobuf; > + > + return shard; > + > +free_iobuf: > + __free_pages(shard->pc_pages, get_order(pool->rp_shardsize)); > +free_bitmap: > + kfree(shard->pc_bitmap); > +free_shard: > + kfree(shard); > +fail: > + return NULL; > +} > + > +static void > +rpcrdma_pool_shard_free(struct rpcrdma_pool *pool, > + struct rpcrdma_pool_shard *shard) > +{ > + ib_dma_unmap_single(pool->rp_pd->device, shard->pc_mapped_addr, > + pool->rp_shardsize, DMA_FROM_DEVICE); > + > + __free_pages(shard->pc_pages, get_order(pool->rp_shardsize)); > + kfree(shard->pc_bitmap); > + kfree(shard); > +} > + > +/** > + * rpcrdma_pool_create - Allocate an rpcrdma_pool instance > + * @pd: RDMA protection domain to be used for the pool's buffers > + * @bufsize: Size, in bytes, of all buffers in the pool > + * @flags: GFP flags to be used during pool creation > + * > + * Returns a pointer to an opaque rpcrdma_pool instance, or NULL. If > + * a pool instance is returned, caller must free the instance using > + * rpcrdma_pool_destroy(). > + */ > +struct rpcrdma_pool *rpcrdma_pool_create(struct ib_pd *pd, size_t bufsize, > + gfp_t flags) > +{ > + struct rpcrdma_pool *pool; > + > + pool = kmalloc(sizeof(*pool), flags); > + if (!pool) > + return NULL; > + > + xa_init_flags(&pool->rp_xa, XA_FLAGS_ALLOC); > + pool->rp_pd = pd; > + pool->rp_shardsize = RPCRDMA_MAX_INLINE_THRESH; > + pool->rp_bufsize = bufsize; > + pool->rp_bufs_per_shard = pool->rp_shardsize / pool->rp_bufsize; > + pool->rp_pool_id = atomic_inc_return(&rpcrdma_pool_id); > + > + trace_rpcrdma_pool_create(pool->rp_pool_id, pool->rp_bufsize); > + return pool; > +} > + > +/** > + * rpcrdma_pool_destroy - Release resources owned by @pool > + * @pool: buffer pool instance that will no longer be used > + * > + * This call releases all buffers in @pool that were allocated > + * via rpcrdma_pool_buffer_alloc(). > + */ > +void > +rpcrdma_pool_destroy(struct rpcrdma_pool *pool) > +{ > + struct rpcrdma_pool_shard *shard; > + unsigned long index; > + > + trace_rpcrdma_pool_destroy(pool->rp_pool_id); > + > + xa_for_each(&pool->rp_xa, index, shard) { > + trace_rpcrdma_pool_shard_free(pool->rp_pool_id, index); > + xa_erase(&pool->rp_xa, index); > + rpcrdma_pool_shard_free(pool, shard); > + } > + > + xa_destroy(&pool->rp_xa); > + kfree(pool); > +} > + > +/** > + * rpcrdma_pool_buffer_alloc - Allocate a buffer from @pool > + * @pool: buffer pool from which to allocate the buffer > + * @flags: GFP flags used during this allocation > + * @cpu_addr: CPU address of the buffer > + * @sge: OUT: an initialized scatter-gather entry > + * > + * Return values: > + * %true: @cpu_addr and @mapped_addr are filled in with a DMA-mapped buffer > + * %false: No buffer is available > + * > + * When rpcrdma_pool_buffer_alloc() is successful, the returned > + * buffer is freed automatically when the buffer pool is released > + * by rpcrdma_pool_destroy(). > + */ > +bool > +rpcrdma_pool_buffer_alloc(struct rpcrdma_pool *pool, gfp_t flags, > + void **cpu_addr, struct ib_sge *sge) > +{ > + struct rpcrdma_pool_shard *shard; > + u64 returned_mapped_addr; > + void *returned_cpu_addr; > + unsigned long index; > + u32 id; > + > + xa_for_each(&pool->rp_xa, index, shard) { > + unsigned int i; > + > + returned_cpu_addr = shard->pc_cpu_addr; > + returned_mapped_addr = shard->pc_mapped_addr; > + for (i = 0; i < pool->rp_bufs_per_shard; i++) { > + if (!test_and_set_bit(i, shard->pc_bitmap)) { > + returned_cpu_addr += i * pool->rp_bufsize; > + returned_mapped_addr += i * pool->rp_bufsize; > + goto out; > + } > + } > + } > + > + shard = rpcrdma_pool_shard_alloc(pool, flags); > + if (!shard) > + return false; > + set_bit(0, shard->pc_bitmap); > + returned_cpu_addr = shard->pc_cpu_addr; > + returned_mapped_addr = shard->pc_mapped_addr; > + > + if (xa_alloc(&pool->rp_xa, &id, shard, xa_limit_16b, flags) != 0) { > + rpcrdma_pool_shard_free(pool, shard); > + return false; > + } > + trace_rpcrdma_pool_shard_new(pool->rp_pool_id, id); > + > +out: > + *cpu_addr = returned_cpu_addr; > + sge->addr = returned_mapped_addr; > + sge->length = pool->rp_bufsize; > + sge->lkey = pool->rp_pd->local_dma_lkey; > + > + trace_rpcrdma_pool_buffer(pool->rp_pool_id, returned_cpu_addr); > + return true; > +} > + > +/** > + * rpcrdma_pool_buffer_sync - Sync the contents of a pool buffer after I/O > + * @pool: buffer pool to which the buffer belongs > + * @sge: SGE containing the DMA-mapped buffer address and length > + */ > +void rpcrdma_pool_buffer_sync(struct rpcrdma_pool *pool, struct ib_sge *sge) > +{ > + ib_dma_sync_single_for_cpu(pool->rp_pd->device, sge->addr, > + sge->length, DMA_FROM_DEVICE); > +} > diff --git a/net/sunrpc/xprtrdma/pool.h b/net/sunrpc/xprtrdma/pool.h > new file mode 100644 > index 000000000000..9c8ec8723884 > --- /dev/null > +++ b/net/sunrpc/xprtrdma/pool.h > @@ -0,0 +1,20 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright (c) 2025, Oracle and/or its affiliates. > + * > + * Pools for RDMA Receive buffers. > + */ > + > +#ifndef RPCRDMA_POOL_H > +#define RPCRDMA_POOL_H > + > +struct rpcrdma_pool; > + > +struct rpcrdma_pool *rpcrdma_pool_create(struct ib_pd *pd, size_t bufsize, > + gfp_t flags); > +void rpcrdma_pool_destroy(struct rpcrdma_pool *pool); > +bool rpcrdma_pool_buffer_alloc(struct rpcrdma_pool *pool, gfp_t flags, > + void **cpu_addr, struct ib_sge *sge); > +void rpcrdma_pool_buffer_sync(struct rpcrdma_pool *pool, struct ib_sge *sge); > + > +#endif /* RPCRDMA_POOL_H */ > diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c > index e7e4a39ca6c6..704f6d5fa3e6 100644 > --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c > +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c > @@ -104,9 +104,9 @@ > #include <linux/sunrpc/svc_rdma.h> > > #include "xprt_rdma.h" > -#include <trace/events/rpcrdma.h> > +#include "pool.h" > > -static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc); > +#include <trace/events/rpcrdma.h> > > static inline struct svc_rdma_recv_ctxt * > svc_rdma_next_recv_ctxt(struct list_head *list) > @@ -115,14 +115,14 @@ svc_rdma_next_recv_ctxt(struct list_head *list) > rc_list); > } > > +static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc); > + > static struct svc_rdma_recv_ctxt * > svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) > { > int node = ibdev_to_node(rdma->sc_cm_id->device); > struct svc_rdma_recv_ctxt *ctxt; > unsigned long pages; > - dma_addr_t addr; > - void *buffer; > > pages = svc_serv_maxpages(rdma->sc_xprt.xpt_server); > ctxt = kzalloc_node(struct_size(ctxt, rc_pages, pages), > @@ -130,13 +130,10 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) > if (!ctxt) > goto fail0; > ctxt->rc_maxpages = pages; > - buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node); > - if (!buffer) > + > + if (!rpcrdma_pool_buffer_alloc(rdma->sc_recv_pool, GFP_KERNEL, > + &ctxt->rc_recv_buf, &ctxt->rc_recv_sge)) > goto fail1; > - addr = ib_dma_map_single(rdma->sc_pd->device, buffer, > - rdma->sc_max_req_size, DMA_FROM_DEVICE); > - if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) > - goto fail2; > > svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid); > pcl_init(&ctxt->rc_call_pcl); > @@ -149,30 +146,15 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) > ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge; > ctxt->rc_recv_wr.num_sge = 1; > ctxt->rc_cqe.done = svc_rdma_wc_receive; > - ctxt->rc_recv_sge.addr = addr; > - ctxt->rc_recv_sge.length = rdma->sc_max_req_size; > - ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; > - ctxt->rc_recv_buf = buffer; > svc_rdma_cc_init(rdma, &ctxt->rc_cc); > return ctxt; > > -fail2: > - kfree(buffer); > fail1: > kfree(ctxt); > fail0: > return NULL; > } > > -static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma, > - struct svc_rdma_recv_ctxt *ctxt) > -{ > - ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr, > - ctxt->rc_recv_sge.length, DMA_FROM_DEVICE); > - kfree(ctxt->rc_recv_buf); > - kfree(ctxt); > -} > - > /** > * svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt > * @rdma: svcxprt_rdma being torn down > @@ -185,8 +167,9 @@ void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma) > > while ((node = llist_del_first(&rdma->sc_recv_ctxts))) { > ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node); > - svc_rdma_recv_ctxt_destroy(rdma, ctxt); > + kfree(ctxt); > } > + rpcrdma_pool_destroy(rdma->sc_recv_pool); > } > > /** > @@ -307,6 +290,12 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) > { > unsigned int total; > > + rdma->sc_recv_pool = rpcrdma_pool_create(rdma->sc_pd, > + rdma->sc_max_req_size, > + GFP_KERNEL); > + if (!rdma->sc_recv_pool) > + return false; > + > /* For each credit, allocate enough recv_ctxts for one > * posted Receive and one RPC in process. > */ > @@ -962,9 +951,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) > return 0; > > percpu_counter_inc(&svcrdma_stat_recv); > - ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device, > - ctxt->rc_recv_sge.addr, ctxt->rc_byte_len, > - DMA_FROM_DEVICE); > + rpcrdma_pool_buffer_sync(rdma_xprt->sc_recv_pool, &ctxt->rc_recv_sge); > svc_rdma_build_arg_xdr(rqstp, ctxt); > > ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt); > -- > 2.50.0 > >