On Mon, Apr 14, 2025 at 01:58:47PM -0600, Uday Shankar wrote: > On Mon, Apr 14, 2025 at 07:25:43PM +0800, Ming Lei wrote: > > From: Uday Shankar <ushankar@xxxxxxxxxxxxxxx> > > > > Most uring_cmds issued against ublk character devices are serialized > > because each command affects only one queue, and there is an early check > > which only allows a single task (the queue's ubq_daemon) to issue > > uring_cmds against that queue. However, this mechanism does not work for > > FETCH_REQs, since they are expected before ubq_daemon is set. Since > > FETCH_REQs are only used at initialization and not in the fast path, > > serialize them using the per-ublk-device mutex. This fixes a number of > > data races that were previously possible if a badly behaved ublk server > > decided to issue multiple FETCH_REQs against the same qid/tag > > concurrently. > > > > Reviewed-by: Ming Lei <ming.lei@xxxxxxxxxx> > > Reported-by: Caleb Sander Mateos <csander@xxxxxxxxxxxxxxx> > > Signed-off-by: Uday Shankar <ushankar@xxxxxxxxxxxxxxx> > > Thanks for picking this up. Can you use the following patch instead? It > has two changes compared to [1]: > > - Factor FETCH command into its own function > - Return -EAGAIN for non-blocking dispatch because we are taking a > mutex. > > [1] https://lore.kernel.org/linux-block/20250410-ublk_task_per_io-v3-1-b811e8f4554a@xxxxxxxxxxxxxxx/ > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > index 2fd05c1bd30b..8efb7668ab2c 100644 > --- a/drivers/block/ublk_drv.c > +++ b/drivers/block/ublk_drv.c > @@ -1809,8 +1809,8 @@ static void ublk_nosrv_work(struct work_struct *work) > > /* device can only be started after all IOs are ready */ > static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) > + __must_hold(&ub->mutex) > { > - mutex_lock(&ub->mutex); > ubq->nr_io_ready++; > if (ublk_queue_ready(ubq)) { > ubq->ubq_daemon = current; > @@ -1822,7 +1822,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) > } > if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) > complete_all(&ub->completion); > - mutex_unlock(&ub->mutex); > } > > static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, > @@ -1906,6 +1905,52 @@ static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, > return io_buffer_unregister_bvec(cmd, index, issue_flags); > } > > +static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_device *ub, > + struct ublk_queue *ubq, struct ublk_io *io, > + const struct ublksrv_io_cmd *ub_cmd, > + unsigned int issue_flags) > +{ > + int ret = 0; > + > + if (issue_flags & IO_URING_F_NONBLOCK) > + return -EAGAIN; > + > + mutex_lock(&ub->mutex); > + /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ > + if (ublk_queue_ready(ubq)) { > + ret = -EBUSY; > + goto out; > + } > + > + /* allow each command to be FETCHed at most once */ > + if (io->flags & UBLK_IO_FLAG_ACTIVE) { > + ret = -EINVAL; > + goto out; > + } > + > + WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV); > + > + if (ublk_need_map_io(ubq)) { > + /* > + * FETCH_RQ has to provide IO buffer if NEED GET > + * DATA is not enabled > + */ > + if (!ub_cmd->addr && !ublk_need_get_data(ubq)) > + goto out; > + } else if (ub_cmd->addr) { > + /* User copy requires addr to be unset */ > + ret = -EINVAL; > + goto out; > + } > + > + ublk_fill_io_cmd(io, cmd, ub_cmd->addr); > + ublk_mark_io_ready(ub, ubq); > + > +out: > + mutex_unlock(&ub->mutex); > + return ret; > +} > + > static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, > unsigned int issue_flags, > const struct ublksrv_io_cmd *ub_cmd) > @@ -1962,34 +2007,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, > case UBLK_IO_UNREGISTER_IO_BUF: > return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags); > case UBLK_IO_FETCH_REQ: > - /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ > - if (ublk_queue_ready(ubq)) { > - ret = -EBUSY; > - goto out; > - } > - /* > - * The io is being handled by server, so COMMIT_RQ is expected > - * instead of FETCH_REQ > - */ > - if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) > - goto out; > - > - if (ublk_need_map_io(ubq)) { > - /* > - * FETCH_RQ has to provide IO buffer if NEED GET > - * DATA is not enabled > - */ > - if (!ub_cmd->addr && !ublk_need_get_data(ubq)) > - goto out; > - } else if (ub_cmd->addr) { > - /* User copy requires addr to be unset */ > - ret = -EINVAL; > - goto out; > - } > - > - ublk_fill_io_cmd(io, cmd, ub_cmd->addr); > - ublk_mark_io_ready(ub, ubq); > - break; > + return ublk_fetch(cmd, ub, ubq, io, ub_cmd, issue_flags); One more bug here, this skips the ublk_prep_cancel(cmd, issue_flags, ubq, tag); return -EIOCBQUEUED; that is after the switch statement. > case UBLK_IO_COMMIT_AND_FETCH_REQ: > req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); > >