Re: [PATCH 3/3] scsi: core: Improve IOPS in case of host-wide tags

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 9/10/25 23:32, Bart Van Assche wrote:
The SCSI core uses the budget map to enforce the cmd_per_lun limit.
That limit cannot be exceeded if host->cmd_per_lun >= host->can_queue
and if the host tag set is shared across all hardware queues.
Since scsi_mq_get_budget() shows up in all CPU profiles for fast SCSI
devices, do not allocate a budget map if cmd_per_lun >= can_queue and
if the host tag set is shared across all hardware queues.

On my UFS 4 test setup this patch improves IOPS by 1% and reduces the
time spent in scsi_mq_get_budget() from 0.22% to 0.01%.

Cc: Jens Axboe <axboe@xxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Cc: Ming Lei <ming.lei@xxxxxxxxxx>
Cc: John Garry <john.g.garry@xxxxxxxxxx>
Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
  drivers/scsi/scsi.c        |  7 ++++-
  drivers/scsi/scsi_lib.c    | 60 +++++++++++++++++++++++++++++++++-----
  drivers/scsi/scsi_scan.c   | 11 ++++++-
  include/scsi/scsi_device.h |  5 +---
  4 files changed, 70 insertions(+), 13 deletions(-)

That is actually a valid point.
There are devices which set 'cmd_per_lun' to the same value
as 'can_queue', rendering the budget map a bit pointless.
But calling blk_mq_all_tag_iter() is more expensive than a simple
sbitmap_weight(), so the improvement isn't _that_ big
(as demonstrated by just 1% performance increase).

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 9a0f467264b3..06066b694d8a 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -216,6 +216,8 @@ int scsi_device_max_queue_depth(struct scsi_device *sdev)
   */
  int scsi_change_queue_depth(struct scsi_device *sdev, int depth)
  {
+	struct Scsi_Host *shost = sdev->host;
+
  	depth = min_t(int, depth, scsi_device_max_queue_depth(sdev));
if (depth > 0) {
@@ -226,7 +228,10 @@ int scsi_change_queue_depth(struct scsi_device *sdev, int depth)
  	if (sdev->request_queue)
  		blk_set_queue_depth(sdev->request_queue, depth);
- sbitmap_resize(&sdev->budget_map, sdev->queue_depth);
+	if (shost->host_tagset && depth >= shost->can_queue)
+		sbitmap_free(&sdev->budget_map);
+	else
+		sbitmap_resize(&sdev->budget_map, sdev->queue_depth);
return sdev->queue_depth;
  }
I would make this static, and only allocate a budget_map if the
'cmd_per_lun' setting is smaller than the 'can_queue' setting.

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0c65ecfedfbd..c546514d1049 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -396,7 +396,8 @@ void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd)
  	if (starget->can_queue > 0)
  		atomic_dec(&starget->target_busy);
- sbitmap_put(&sdev->budget_map, cmd->budget_token);
+	if (sdev->budget_map.map)
+		sbitmap_put(&sdev->budget_map, cmd->budget_token);
  	cmd->budget_token = -1;
  }
@@ -445,6 +446,47 @@ static void scsi_single_lun_run(struct scsi_device *current_sdev)
  	spin_unlock_irqrestore(shost->host_lock, flags);
  }
+struct sdev_in_flight_data {
+	const struct scsi_device *sdev;
+	int count;
+};
+
+static bool scsi_device_check_in_flight(struct request *rq, void *data)
+{
+	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+	struct sdev_in_flight_data *sifd = data;
+
+	if (cmd->device == sifd->sdev)
+		sifd->count++;
+
+	return true;
+}
+
+/**
+ * scsi_device_busy() - Number of commands allocated for a SCSI device
+ * @sdev: SCSI device.
+ *
+ * Note: There is a subtle difference between this function and
+ * scsi_host_busy(). scsi_host_busy() counts the number of commands that have
+ * been started. This function counts the number of commands that have been
+ * allocated. At least the UFS driver depends on this function counting commands

But then please don't name the callback 'scsi_device_check_in_flight',
as 'in flight' means 'commands which have been started'.
Please name it 'scsi_device_check_allocated' to make the distinction
clear.

+ * that have already been allocated but that have not yet been started.
+ */
+int scsi_device_busy(const struct scsi_device *sdev)
+{
+	struct sdev_in_flight_data sifd = { .sdev = sdev };
+	struct blk_mq_tag_set *set = &sdev->host->tag_set;
+
+	if (sdev->budget_map.map)
+		return sbitmap_weight(&sdev->budget_map);
+	if (WARN_ON_ONCE(!set->shared_tags))
+		return 0;

One wonders: what would happen if you would return '0' here if
there is only one LUN?

+	blk_mq_all_tag_iter(set->shared_tags, scsi_device_check_in_flight,
+			    &sifd);
+	return sifd.count;
+}
+EXPORT_SYMBOL(scsi_device_busy);
+
  static inline bool scsi_device_is_busy(struct scsi_device *sdev)
  {
  	if (scsi_device_busy(sdev) >= sdev->queue_depth)
@@ -1358,11 +1400,13 @@ scsi_device_state_check(struct scsi_device *sdev, struct request *req)
  static inline int scsi_dev_queue_ready(struct request_queue *q,
  				  struct scsi_device *sdev)
  {
-	int token;
+	int token = INT_MAX;
- token = sbitmap_get(&sdev->budget_map);
-	if (token < 0)
-		return -1;
+	if (sdev->budget_map.map) {
+		token = sbitmap_get(&sdev->budget_map);
+		if (token < 0)
+			return -1;
+	}
if (!atomic_read(&sdev->device_blocked))
  		return token;
@@ -1373,7 +1417,8 @@ static inline int scsi_dev_queue_ready(struct request_queue *q,
  	 */
  	if (scsi_device_busy(sdev) > 1 ||
  	    atomic_dec_return(&sdev->device_blocked) > 0) {
-		sbitmap_put(&sdev->budget_map, token);
+		if (sdev->budget_map.map)
+			sbitmap_put(&sdev->budget_map, token);
  		return -1;
  	}
@@ -1749,7 +1794,8 @@ static void scsi_mq_put_budget(struct request_queue *q, int budget_token)
  {
  	struct scsi_device *sdev = q->queuedata;
- sbitmap_put(&sdev->budget_map, budget_token);
+	if (sdev->budget_map.map)
+		sbitmap_put(&sdev->budget_map, budget_token);
  }
/*
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 3c6e089e80c3..6f2d0bf0e3ec 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -218,6 +218,7 @@ static void scsi_unlock_floptical(struct scsi_device *sdev,
  static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
  					unsigned int depth)
  {
+	struct Scsi_Host *shost = sdev->host;
  	int new_shift = sbitmap_calculate_shift(depth);
  	bool need_alloc = !sdev->budget_map.map;
  	bool need_free = false;
@@ -225,6 +226,13 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
  	int ret;
  	struct sbitmap sb_backup;
+ if (shost->host_tagset && depth >= shost->can_queue) {
+		memflags = blk_mq_freeze_queue(sdev->request_queue);
+		sbitmap_free(&sb_backup);

What are you freeing here?
The sbitmap was never allocated, so you should be able to simply
return 0 here...

+		blk_mq_unfreeze_queue(sdev->request_queue, memflags);
+		return 0;
+	}
+
  	depth = min_t(unsigned int, depth, scsi_device_max_queue_depth(sdev));
/*
@@ -1112,7 +1120,8 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
  	scsi_cdl_check(sdev);
sdev->max_queue_depth = sdev->queue_depth;
-	WARN_ON_ONCE(sdev->max_queue_depth > sdev->budget_map.depth);
+	WARN_ON_ONCE(sdev->budget_map.map &&
+		     sdev->max_queue_depth > sdev->budget_map.depth);
  	sdev->sdev_bflags = *bflags;
/*
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 6d6500148c4b..3c7a95fa9b67 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -687,10 +687,7 @@ static inline int scsi_device_supports_vpd(struct scsi_device *sdev)
  	return 0;
  }
-static inline int scsi_device_busy(struct scsi_device *sdev)
-{
-	return sbitmap_weight(&sdev->budget_map);
-}
+int scsi_device_busy(const struct scsi_device *sdev);
/* Macros to access the UNIT ATTENTION counters */
  #define scsi_get_ua_new_media_ctr(sdev) \

Cheers,

Hannes
--
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@xxxxxxx                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux