ublk_cancel_cmd() calls io_uring_cmd_done() to complete uring_cmd, but we may have scheduled task work via io_uring_cmd_complete_in_task() for dispatching request, then kernel crash can be triggered. Fix it by not trying to canceling the command if ublk block request is coming to this slot. Reported-by: Jared Holzman <jholzman@xxxxxxxxxx> Closes: https://lore.kernel.org/linux-block/d2179120-171b-47ba-b664-23242981ef19@xxxxxxxxxx/ Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- drivers/block/ublk_drv.c | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c4d4be4f6fbd..fbfb5b815c8d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1334,6 +1334,12 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, if (res != BLK_STS_OK) return res; + /* + * Order writing to rq->state in blk_mq_start_request() and + * reading ubq->canceling, see comment in ublk_cancel_command() + * wrt. the pair barrier. + */ + smp_mb(); /* * ->canceling has to be handled after ->force_abort and ->fail_io * is dealt with, otherwise this request may not be failed in case @@ -1683,14 +1689,35 @@ static void ublk_start_cancel(struct ublk_queue *ubq) ublk_put_disk(disk); } -static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, +static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, unsigned int issue_flags) { + struct ublk_io *io = &ubq->ios[tag]; + struct ublk_device *ub = ubq->dev; + struct request *req; bool done; if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) return; + /* + * Don't try to cancel this command if the request is started for + * avoiding race between io_uring_cmd_done() and + * io_uring_cmd_complete_in_task(). + * + * memory barrier is implied in ublk_start_cancel() for ordering to + * WRITE ubq->canceling and READ request state from + * blk_mq_request_started(). + * + * If the request state is observed as not started, ublk_queue_rq() + * should observe ubq->canceling, so request can be aborted and this + * uring_cmd won't be used. Otherwise, this uring_cmd will be completed + * from the dispatch code path finally. + */ + req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); + if (req && blk_mq_request_started(req)) + return; + spin_lock(&ubq->cancel_lock); done = !!(io->flags & UBLK_IO_FLAG_CANCELED); if (!done) @@ -1722,7 +1749,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; struct task_struct *task; - struct ublk_io *io; if (WARN_ON_ONCE(!ubq)) return; @@ -1737,9 +1763,8 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, if (!ubq->canceling) ublk_start_cancel(ubq); - io = &ubq->ios[pdu->tag]; - WARN_ON_ONCE(io->cmd != cmd); - ublk_cancel_cmd(ubq, io, issue_flags); + WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd); + ublk_cancel_cmd(ubq, pdu->tag, issue_flags); } static inline bool ublk_queue_ready(struct ublk_queue *ubq) @@ -1752,7 +1777,7 @@ static void ublk_cancel_queue(struct ublk_queue *ubq) int i; for (i = 0; i < ubq->q_depth; i++) - ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED); + ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED); } /* Cancel all pending commands, must be called after del_gendisk() returns */ -- 2.47.0