Hi Ming, I'm attempting to back port the fix for this issue to the 6.14-stable branch. Greg Kroah-Hartman has already applied d6aa0c178bf8 - "ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA", but was unable to apply f40139fde527 cleanly. I created the patch below. It applies and compiles, but when I rerun the scenario I get several hung tasks waiting on the ub->mutex which is being held by the following task: jared@nvme195:~$ sudo cat /proc/230/stack [<0>] msleep+0x2b/0x50 [<0>] ublk_wait_tagset_rqs_idle+0x3d/0xa0 [ublk_drv] [<0>] ublk_nosrv_work+0x189/0x1b0 [ublk_drv] [<0>] process_one_work+0x17b/0x3d0 [<0>] worker_thread+0x2de/0x410 [<0>] kthread+0xfe/0x230 [<0>] ret_from_fork+0x47/0x70 [<0>] ret_from_fork_asm+0x1a/0x30 I presume that 6.14-stable is missing a commit that calls __blk_mq_end_request for the reqs that are not cancelled by ublk_cancel_cmd Can see a small tweak that would solve this without having to back-port all the ublk changes from 6.15 to 6.14-stable? Thanks for the help. Regards, Jared --- drivers/block/ublk_drv.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index ab06a7a064fb..7d937168b245 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1595,14 +1595,31 @@ static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) return !was_canceled; } -static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, +static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, unsigned int issue_flags) { + struct ublk_io *io = &ubq->ios[tag]; + struct ublk_device *ub = ubq->dev; + struct request *req; bool done; if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) return; + /* + * Don't try to cancel this command if the request is started for + * avoiding race between io_uring_cmd_done() and + * io_uring_cmd_complete_in_task(). + * + * Either the started request will be aborted via __ublk_abort_rq(), + * then this uring_cmd is canceled next time, or it will be done in + * task work function ublk_dispatch_req() because io_uring guarantees + * that ublk_dispatch_req() is always called + */ + req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); + if (req && blk_mq_request_started(req)) + return; + spin_lock(&ubq->cancel_lock); done = !!(io->flags & UBLK_IO_FLAG_CANCELED); if (!done) @@ -1625,7 +1642,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, struct task_struct *task; struct ublk_device *ub; bool need_schedule; - struct ublk_io *io; if (WARN_ON_ONCE(!ubq)) return; @@ -1640,9 +1656,8 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, ub = ubq->dev; need_schedule = ublk_abort_requests(ub, ubq); - io = &ubq->ios[pdu->tag]; - WARN_ON_ONCE(io->cmd != cmd); - ublk_cancel_cmd(ubq, io, issue_flags); + WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd); + ublk_cancel_cmd(ubq, pdu->tag, issue_flags); if (need_schedule) { schedule_work(&ub->nosrv_work); @@ -1659,7 +1674,7 @@ static void ublk_cancel_queue(struct ublk_queue *ubq) int i; for (i = 0; i < ubq->q_depth; i++) - ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED); + ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED); } /* Cancel all pending commands, must be called after del_gendisk() returns */ -- 2.43.0