On Tue, Jul 22, 2025 at 2:58 PM Bernd Schubert <bschubert@xxxxxxx> wrote: > > Add per-CPU and per-NUMA node bitmasks to track which > io-uring queues are available for new requests. > > - Global queue availability (avail_q_mask) > - Per-NUMA node queue availability (per_numa_avail_q_mask) > - Global queue registration (registered_q_mask) > - Per-NUMA node queue registration (numa_registered_q_mask) > > Note that these bitmasks are not lock protected, accessing them > will not be absolutely accurate. Goal is to determine which > queues are aproximately idle and might be better suited for > a request. > > Signed-off-by: Bernd Schubert <bschubert@xxxxxxx> > --- > fs/fuse/dev_uring.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++ > fs/fuse/dev_uring_i.h | 18 ++++++++++ > 2 files changed, 117 insertions(+) > > diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c > index 0f5ab27dacb66c9f5f10eac2713d9bd3eb4c26da..c2bc20848bc54541ede9286562177994e7ca5879 100644 > --- a/fs/fuse/dev_uring.c > +++ b/fs/fuse/dev_uring.c > +static int fuse_ring_create_q_masks(struct fuse_ring *ring) > +{ > + if (!zalloc_cpumask_var(&ring->avail_q_mask, GFP_KERNEL_ACCOUNT)) > + return -ENOMEM; > + > + if (!zalloc_cpumask_var(&ring->registered_q_mask, GFP_KERNEL_ACCOUNT)) > + return -ENOMEM; > + > + ring->per_numa_avail_q_mask = kcalloc(ring->nr_numa_nodes, > + sizeof(struct cpumask *), nit: sizeof(cpumask_var_t) since per_numa_avail_q_mask gets defined in the struct as a cpumask_var_t? > + GFP_KERNEL_ACCOUNT); > + if (!ring->per_numa_avail_q_mask) > + return -ENOMEM; > + for (int node = 0; node < ring->nr_numa_nodes; node++) nit: afaik, the general convention has the int declared at top of function instead of inside loop scope > + if (!zalloc_cpumask_var(&ring->per_numa_avail_q_mask[node], > + GFP_KERNEL_ACCOUNT)) > @@ -472,11 +539,18 @@ void fuse_uring_stop_queues(struct fuse_ring *ring) > > for (qid = 0; qid < ring->max_nr_queues; qid++) { > struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]); > + int node; > > if (!queue) > continue; > > fuse_uring_teardown_entries(queue); > + > + node = queue->numa_node; > + cpumask_clear_cpu(qid, ring->registered_q_mask); > + cpumask_clear_cpu(qid, ring->avail_q_mask); > + cpumask_clear_cpu(qid, ring->numa_registered_q_mask[node]); > + cpumask_clear_cpu(qid, ring->per_numa_avail_q_mask[node]); Would it be more efficient to clear it all at once (eg cpumask_clear()) outside the loop instead of clearing it bit by bit here? > } > > if (atomic_read(&ring->queue_refs) > 0) { > @@ -744,9 +818,18 @@ static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent, > static void fuse_uring_ent_avail(struct fuse_ring_ent *ent, > struct fuse_ring_queue *queue) > { > + struct fuse_ring *ring = queue->ring; > + int node = queue->numa_node; > + > WARN_ON_ONCE(!ent->cmd); > list_move(&ent->list, &queue->ent_avail_queue); > ent->state = FRRS_AVAILABLE; > + > + if (list_is_singular(&queue->ent_avail_queue) && Did you mean to include "list_is_singular()" here? I think even if queue->ent_avail_queue has more than one entry on it, we still need to run this loop in case it previously used to be the case that queue->nr_reqs >= FUSE_URING_QUEUE_THRESHOLD? > + queue->nr_reqs <= FUSE_URING_QUEUE_THRESHOLD) { Should this be <? Afaict, if queue->nr_reqs == FUSE_URING_QUEUE_THRESHOLD then it's considered full (and no longer available)? > + cpumask_set_cpu(queue->qid, ring->avail_q_mask); > + cpumask_set_cpu(queue->qid, ring->per_numa_avail_q_mask[node]); > + } > } > > /* Used to find the request on SQE commit */ > @@ -769,6 +852,8 @@ static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent, > struct fuse_req *req) > { > struct fuse_ring_queue *queue = ent->queue; > + struct fuse_ring *ring = queue->ring; > + int node = queue->numa_node; > > lockdep_assert_held(&queue->lock); > > @@ -783,6 +868,16 @@ static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent, > ent->state = FRRS_FUSE_REQ; > list_move_tail(&ent->list, &queue->ent_w_req_queue); > fuse_uring_add_to_pq(ent, req); > + > + /* > + * If there are no more available entries, mark the queue as unavailable > + * in both global and per-NUMA node masks > + */ > + if (list_empty(&queue->ent_avail_queue)) { > + cpumask_clear_cpu(queue->qid, ring->avail_q_mask); > + cpumask_clear_cpu(queue->qid, > + ring->per_numa_avail_q_mask[node]); > + } > } > > /* Fetch the next fuse request if available */ > @@ -982,6 +1077,7 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ent, > struct fuse_ring *ring = queue->ring; > struct fuse_conn *fc = ring->fc; > struct fuse_iqueue *fiq = &fc->iq; > + int node = queue->numa_node; > > fuse_uring_prepare_cancel(cmd, issue_flags, ent); > > @@ -990,6 +1086,9 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ent, > fuse_uring_ent_avail(ent, queue); > spin_unlock(&queue->lock); > > + cpumask_set_cpu(queue->qid, ring->registered_q_mask); > + cpumask_set_cpu(queue->qid, ring->numa_registered_q_mask[node]); > + > if (!ring->ready) { > bool ready = is_ring_ready(ring, queue->qid); > > diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h > index 708412294982566919122a1a0d7f741217c763ce..0457dbc6737c8876dd7a7d4c9c724da05e553e6a 100644 > --- a/fs/fuse/dev_uring_i.h > +++ b/fs/fuse/dev_uring_i.h > @@ -125,6 +131,18 @@ struct fuse_ring { > */ > unsigned int stop_debug_log : 1; > > + /* Tracks which queues are available (empty) globally */ nit: is "(empty)" accurate here? afaict, the queue is available as long as it has <= FUSE_URING_QUEUE_THRESHOLD requests (eg even if it's not empty)? > + cpumask_var_t avail_q_mask; Should avail_q_mask also get set accordingly when requests are queued (eg fuse_uring_queue_fuse_req(), fuse_uring_queue_bq_req()) or completed by userspace (eg fuse_uring_req_end()), if they meet FUSE_URING_QUEUE_THRESHOLD? Thanks, Joanne > +