On 22/04/2025 16:42, Ming Lei wrote: > On Tue, Apr 22, 2025 at 02:43:06PM +0300, Jared Holzman wrote: >> On 15/04/2025 15:56, Ming Lei wrote: >>> On Tue, Apr 15, 2025 at 10:58:37AM +0000, Guy Eisenberg wrote: > > ... > >> >> Hi Ming, >> >> Unfortunately your patch did not solve the issue, it is still happening (6.14 Kernel) >> >> I believe the issue is that ublk_cancel_cmd() is calling io_uring_cmd_done() on a uring_cmd that is currently scheduled as a task work by io_uring_cmd_complete_in_task() >> >> I reproduced with the patch below and saw the warning I added shortly before the crash. The dmesg log is attached. >> >> I'm not sure how to solve the issue though. Unless we wait for the task work to complete in ublk_cancel cmd. I can't see any way to cancel the task work >> >> Would appreciate your assistance, >> >> Regards, >> >> Jared >> >> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c >> index ca9a67b5b537..d9f544206b36 100644 >> --- a/drivers/block/ublk_drv.c >> +++ b/drivers/block/ublk_drv.c >> @@ -72,6 +72,10 @@ >> (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \ >> UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED) >> >> +#ifndef IORING_URING_CMD_TW_SCHED >> + #define IORING_URING_CMD_TW_SCHED (1U << 31) >> +#endif >> + >> struct ublk_rq_data { >> struct llist_node node; >> >> @@ -1236,6 +1240,7 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags) >> struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); >> struct ublk_queue *ubq = pdu->ubq; >> >> + cmd->flags &= ~IORING_URING_CMD_TW_SCHED; >> ublk_forward_io_cmds(ubq, issue_flags); >> } >> >> @@ -1245,7 +1250,7 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) >> >> if (llist_add(&data->node, &ubq->io_cmds)) { >> struct ublk_io *io = &ubq->ios[rq->tag]; >> - >> + io->cmd->flags |= IORING_URING_CMD_TW_SCHED; >> io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); >> } >> } >> @@ -1498,8 +1503,10 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, >> io->flags |= UBLK_IO_FLAG_CANCELED; >> spin_unlock(&ubq->cancel_lock); >> >> - if (!done) >> + if (!done) { >> + WARN_ON_ONCE(io->cmd->flags & IORING_URING_CMD_TW_SCHED); >> io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0, issue_flags); >> + } >> } >> >> /* >> @@ -1925,6 +1932,7 @@ static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd, >> static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd, >> unsigned int issue_flags) >> { >> + cmd->flags &= ~IORING_URING_CMD_TW_SCHED; >> ublk_ch_uring_cmd_local(cmd, issue_flags); >> } >> >> @@ -1937,6 +1945,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) >> >> /* well-implemented server won't run into unlocked */ >> if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { >> + cmd->flags |= IORING_URING_CMD_TW_SCHED; >> io_uring_cmd_complete_in_task(cmd, ublk_ch_uring_cmd_cb); >> return -EIOCBQUEUED; >> } >> diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h >> index abd0c8bd950b..3ac2ef7bd99a 100644 >> --- a/include/linux/io_uring/cmd.h >> +++ b/include/linux/io_uring/cmd.h >> @@ -7,6 +7,7 @@ >> >> /* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */ >> #define IORING_URING_CMD_CANCELABLE (1U << 30) >> +#define IORING_URING_CMD_TW_SCHED (1U << 31) >> >> struct io_uring_cmd { >> struct file *file; >> > > Nice debug patch! > > Your patch and the dmesg log has shown the race between io_uring_cmd_complete_in_task() > and io_uring_cmd_done() <- ublk_cancel_cmd(). > > In theory, io_uring should have the knowledge to cover it, but I guess it > might be a bit hard. > > I will try to cook a ublk fix tomorrow for you to test. > > Thanks, > Ming > That would be great. Much appreciated! Regards, Jared