[PATCH 5/5] fuse: {io-uring} Allow reduced number of ring queues

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Currently, FUSE io-uring requires all queues to be registered before
becoming ready, which can result in too much memory usage.

This patch introduces a static queue mapping system that allows FUSE
io-uring to operate with a reduced number of registered queues by:

1. Adding a queue_mapping array to track which registered queue each
   CPU should use
2. Replacing the is_ring_ready() check with immediate queue mapping
   once any queues are registered
3. Implementing fuse_uring_map_queues() to create CPU-to-queue mappings
   that prefer NUMA-local queues when available
4. Updating fuse_uring_get_queue() to use the static mapping instead
   of direct CPU-to-queue correspondence

The mapping prioritizes NUMA locality by first attempting to map CPUs
to queues on the same NUMA node, falling back to any available
registered queue if no local queue exists.

Signed-off-by: Bernd Schubert <bschubert@xxxxxxx>
---
 fs/fuse/dev_uring.c   | 112 ++++++++++++++++++++++++++++++--------------------
 fs/fuse/dev_uring_i.h |   3 ++
 2 files changed, 71 insertions(+), 44 deletions(-)

diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 624f856388e0867f3c3caed6771e61babd076645..8d16880cb0eb9b252dd6b6cf565011c3787ad1d0 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -238,6 +238,7 @@ void fuse_uring_destruct(struct fuse_conn *fc)
 
 	fuse_ring_destruct_q_masks(ring);
 	kfree(ring->queues);
+	kfree(ring->queue_mapping);
 	kfree(ring);
 	fc->ring = NULL;
 }
@@ -303,6 +304,12 @@ static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
 	if (err)
 		goto out_err;
 
+	err = -ENOMEM;
+	ring->queue_mapping =
+		kcalloc(nr_queues, sizeof(int), GFP_KERNEL_ACCOUNT);
+	if (!ring->queue_mapping)
+		goto out_err;
+
 	spin_lock(&fc->lock);
 	if (fc->ring) {
 		/* race, another thread created the ring in the meantime */
@@ -324,6 +331,7 @@ static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
 out_err:
 	fuse_ring_destruct_q_masks(ring);
 	kfree(ring->queues);
+	kfree(ring->queue_mapping);
 	kfree(ring);
 	return res;
 }
@@ -1040,31 +1048,6 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
 	return 0;
 }
 
-static bool is_ring_ready(struct fuse_ring *ring, int current_qid)
-{
-	int qid;
-	struct fuse_ring_queue *queue;
-	bool ready = true;
-
-	for (qid = 0; qid < ring->max_nr_queues && ready; qid++) {
-		if (current_qid == qid)
-			continue;
-
-		queue = ring->queues[qid];
-		if (!queue) {
-			ready = false;
-			break;
-		}
-
-		spin_lock(&queue->lock);
-		if (list_empty(&queue->ent_avail_queue))
-			ready = false;
-		spin_unlock(&queue->lock);
-	}
-
-	return ready;
-}
-
 static int fuse_uring_map_qid(int qid, const struct cpumask *mask)
 {
 	int nr_queues = cpumask_weight(mask);
@@ -1082,6 +1065,41 @@ static int fuse_uring_map_qid(int qid, const struct cpumask *mask)
 	return -1;
 }
 
+static int fuse_uring_map_queues(struct fuse_ring *ring)
+{
+	int qid, mapped_qid, node;
+
+	for (qid = 0; qid < ring->max_nr_queues; qid++) {
+		node = cpu_to_node(qid);
+		if (WARN_ON_ONCE(node >= ring->nr_numa_nodes) || node < 0)
+			return -EINVAL;
+
+		/* First try to find a registered queue on the same NUMA node */
+		mapped_qid = fuse_uring_map_qid(
+			qid, ring->numa_registered_q_mask[node]);
+		if (mapped_qid < 0) {
+			/*
+			 * No registered queue on this NUMA node,
+			 * use any registered queue
+			 */
+			mapped_qid = fuse_uring_map_qid(
+				qid, ring->registered_q_mask);
+			if (WARN_ON_ONCE(mapped_qid < 0))
+				return -EINVAL;
+		}
+
+		if (WARN_ON_ONCE(!ring->queues[mapped_qid])) {
+			pr_err("qid=%d mapped_qid=%d not created\n", qid,
+			       mapped_qid);
+			return -EINVAL;
+		}
+
+		WRITE_ONCE(ring->queue_mapping[qid], mapped_qid);
+	}
+
+	return 0;
+}
+
 /*
  * fuse_uring_req_fetch command handling
  */
@@ -1094,6 +1112,7 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ent,
 	struct fuse_conn *fc = ring->fc;
 	struct fuse_iqueue *fiq = &fc->iq;
 	int node = queue->numa_node;
+	int err;
 
 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
 
@@ -1105,14 +1124,14 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ent,
 	cpumask_set_cpu(queue->qid, ring->registered_q_mask);
 	cpumask_set_cpu(queue->qid, ring->numa_registered_q_mask[node]);
 
-	if (!ring->ready) {
-		bool ready = is_ring_ready(ring, queue->qid);
+	err = fuse_uring_map_queues(ring);
+	if (err)
+		return;
 
-		if (ready) {
-			WRITE_ONCE(fiq->ops, &fuse_io_uring_ops);
-			WRITE_ONCE(ring->ready, true);
-			wake_up_all(&fc->blocked_waitq);
-		}
+	if (!ring->ready) {
+		WRITE_ONCE(fiq->ops, &fuse_io_uring_ops);
+		WRITE_ONCE(ring->ready, true);
+		wake_up_all(&fc->blocked_waitq);
 	}
 }
 
@@ -1365,25 +1384,27 @@ fuse_uring_get_first_queue(struct fuse_ring *ring, const struct cpumask *mask)
  */
 static struct fuse_ring_queue *fuse_uring_get_queue(struct fuse_ring *ring)
 {
-	unsigned int qid;
-	struct fuse_ring_queue *queue, *local_queue;
+	unsigned int mapped_qid;
+	struct fuse_ring_queue *queue;
 	int local_node;
 	struct cpumask *mask;
+	unsigned int core = task_cpu(current);
 
-	qid = task_cpu(current);
-	if (WARN_ONCE(qid >= ring->max_nr_queues,
-		      "Core number (%u) exceeds nr queues (%zu)\n", qid,
-		      ring->max_nr_queues))
-		qid = 0;
-	local_node = cpu_to_node(qid);
+	local_node = cpu_to_node(core);
+	if (WARN_ON_ONCE(local_node >= ring->nr_numa_nodes) || local_node < 0)
+		local_node = 0;
 
-	local_queue = queue = ring->queues[qid];
-	if (WARN_ONCE(!queue, "Missing queue for qid %d\n", qid))
-		return NULL;
+	if (WARN_ON_ONCE(core >= ring->max_nr_queues))
+		core = 0;
 
+	mapped_qid = READ_ONCE(ring->queue_mapping[core]);
+	queue = ring->queues[mapped_qid];
+
+	/* First check if current CPU's queue is available */
 	if (queue->nr_reqs <= FUSE_URING_QUEUE_THRESHOLD)
 		return queue;
 
+	/* Second check if there are any available queues on the local node */
 	mask = ring->per_numa_avail_q_mask[local_node];
 	queue = fuse_uring_get_first_queue(ring, mask);
 	if (queue)
@@ -1394,7 +1415,10 @@ static struct fuse_ring_queue *fuse_uring_get_queue(struct fuse_ring *ring)
 	if (queue)
 		return queue;
 
-	return local_queue;
+	/* no better queue available, use the mapped queue */
+	queue = ring->queues[mapped_qid];
+
+	return queue;
 }
 
 static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent)
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
index 0457dbc6737c8876dd7a7d4c9c724da05e553e6a..e72b83471cbfc2e911273966f3715305ca10e9ef 100644
--- a/fs/fuse/dev_uring_i.h
+++ b/fs/fuse/dev_uring_i.h
@@ -153,6 +153,9 @@ struct fuse_ring {
 
 	atomic_t queue_refs;
 
+	/* static queue mapping */
+	int *queue_mapping;
+
 	bool ready;
 };
 

-- 
2.43.0





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux