On Tue, Jul 1, 2025 at 2:29 PM Michael S. Tsirkin <mst@xxxxxxxxxx> wrote: > > On Mon, Jun 16, 2025 at 04:25:11PM +0800, Jason Wang wrote: > > This patch introduces virtqueue ops which is a set of the callbacks > > that will be called for different queue layout or features. This would > > help to avoid branches for split/packed and will ease the future > > implementation like in order. > > > > Note that in order to eliminate the indirect calls this patch uses > > global array of const ops to allow compiler to avoid indirect > > branches. > > > > Tested with CONFIG_MITIGATION_RETPOLINE, no performance differences > > were noticed. > > > > Suggested-by: Michael S. Tsirkin <mst@xxxxxxxxxx> > > Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx> > > --- > > drivers/virtio/virtio_ring.c | 172 ++++++++++++++++++++++++++--------- > > 1 file changed, 129 insertions(+), 43 deletions(-) > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > index b14bbb4d6713..af32d1a1a1db 100644 > > --- a/drivers/virtio/virtio_ring.c > > +++ b/drivers/virtio/virtio_ring.c > > @@ -67,6 +67,12 @@ > > #define LAST_ADD_TIME_INVALID(vq) > > #endif > > > > +enum vq_layout { > > + SPLIT = 0, > > + PACKED, > > + VQ_TYPE_MAX, > > +}; > > + > > struct vring_desc_state_split { > > void *data; /* Data for callback. */ > > > > @@ -159,12 +165,28 @@ struct vring_virtqueue_packed { > > size_t event_size_in_bytes; > > }; > > > > +struct vring_virtqueue; > > + > > +struct virtqueue_ops { > > + int (*add)(struct vring_virtqueue *_vq, struct scatterlist *sgs[], > > + unsigned int total_sg, unsigned int out_sgs, > > + unsigned int in_sgs, void *data, > > + void *ctx, bool premapped, gfp_t gfp); > > + void *(*get)(struct vring_virtqueue *vq, unsigned int *len, void **ctx); > > + bool (*kick_prepare)(struct vring_virtqueue *vq); > > + void (*disable_cb)(struct vring_virtqueue *vq); > > + bool (*enable_cb_delayed)(struct vring_virtqueue *vq); > > + unsigned int (*enable_cb_prepare)(struct vring_virtqueue *vq); > > + bool (*poll)(const struct vring_virtqueue *vq, u16 last_used_idx); > > + void *(*detach_unused_buf)(struct vring_virtqueue *vq); > > + bool (*more_used)(const struct vring_virtqueue *vq); > > + int (*resize)(struct vring_virtqueue *vq, u32 num); > > + void (*reset)(struct vring_virtqueue *vq); > > +}; > > + > > struct vring_virtqueue { > > struct virtqueue vq; > > > > - /* Is this a packed ring? */ > > - bool packed_ring; > > - > > /* Is DMA API used? */ > > bool use_dma_api; > > > > @@ -180,6 +202,8 @@ struct vring_virtqueue { > > /* Host publishes avail event idx */ > > bool event; > > > > + enum vq_layout layout; > > + > > /* Head of free buffer list. */ > > unsigned int free_head; > > /* Number we've added since last sync. */ > > @@ -232,6 +256,12 @@ static void vring_free(struct virtqueue *_vq); > > > > #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq) > > > > + > > Why two empty lines? Let me fix that. > > > +static inline bool virtqueue_is_packed(const struct vring_virtqueue *vq) > > +{ > > + return vq->layout == PACKED; > > +} > > + > > So why still have this? > Let's replace all uses with a per layout > callback. None of them seem to be on the data path. > Well, with one exception - there is I actually start with that but the problem is a lot of codes could be duplicated that needs more refactoring which will end up with a pretty giant series: +static const struct virtqueue_ops split_in_order_ops = { + .add = virtqueue_add_split, + .get = virtqueue_get_buf_ctx_split_in_order, + .kick_prepare = virtqueue_kick_prepare_split, + .disable_cb = virtqueue_disable_cb_split, + .enable_cb_delayed = virtqueue_enable_cb_delayed_split, + .enable_cb_prepare = virtqueue_enable_cb_prepare_split, + .poll = virtqueue_poll_split, + .detach_unused_buf = virtqueue_detach_unused_buf_split, + .more_used = more_used_split_in_order, + .resize = virtqueue_resize_split, + .reset = virtqueue_reset_split, +}; So I start with the function that has sufficient difference: virtqueue_get_buf_ctx_split_in_order more_used_split_in_order And leave the rest for the future if we can see performance improvement. > > @@ -2921,7 +3006,7 @@ u32 vring_notification_data(struct virtqueue *_vq) > > struct vring_virtqueue *vq = to_vvq(_vq); > > u16 next; > > > > - if (vq->packed_ring) > > + if (virtqueue_is_packed(vq)) > > next = (vq->packed.next_avail_idx & > > ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) | > > vq->packed.avail_wrap_counter << > > I think it's the only one? Are we trying to save a function call there? > I am fine to keep it as in this patch, or if we do change it, > maybe with a separate patch, so if there is a regression we can bisect > more easily. > Yes. > > > > For example, for the chunk below, we could have: > .init_last_used > > having said that, this patchset is already big, so let us do it with a separate patch - > but if so, the way to split would be in patch 1 to just leave vq->packed_ring > in place, gradually replace call sites, and in patch N drop > vq->packed_ring. Ok, let me do that. > > > > static bool virtqueue_use_indirect(const struct vring_virtqueue *vq, > > unsigned int total_sg) > > { > > @@ -422,7 +452,7 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num) > > { > > vq->vq.num_free = num; > > > > - if (vq->packed_ring) > > + if (virtqueue_is_packed(vq)) > > vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); > > else > > vq->last_used_idx = 0; > > @@ -1116,6 +1146,8 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, > > return 0; > > } > > > > +static const struct virtqueue_ops split_ops; > > + > > static struct virtqueue *__vring_new_virtqueue_split(unsigned int index, > > struct vring_virtqueue_split *vring_split, > > struct virtio_device *vdev, > > @@ -1133,7 +1165,7 @@ static struct virtqueue *__vring_new_virtqueue_split(unsigned int index, > > if (!vq) > > return NULL; > > > > - vq->packed_ring = false; > > + vq->layout = SPLIT; > > vq->vq.callback = callback; > > vq->vq.vdev = vdev; > > vq->vq.name = name; > > @@ -2076,6 +2108,8 @@ static void virtqueue_reset_packed(struct vring_virtqueue *vq) > > virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback); > > } > > > > +static const struct virtqueue_ops packed_ops; > > + > > static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index, > > struct vring_virtqueue_packed *vring_packed, > > struct virtio_device *vdev, > > @@ -2106,7 +2140,7 @@ static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index, > > #else > > vq->broken = false; > > #endif > > - vq->packed_ring = true; > > + vq->layout = PACKED; > > vq->dma_dev = dma_dev; > > vq->use_dma_api = vring_use_dma_api(vdev); > > > > @@ -2194,6 +2228,39 @@ static int virtqueue_resize_packed(struct vring_virtqueue *vq, u32 num) > > return -ENOMEM; > > } > > > > +static const struct virtqueue_ops split_ops = { > > + .add = virtqueue_add_split, > > + .get = virtqueue_get_buf_ctx_split, > > + .kick_prepare = virtqueue_kick_prepare_split, > > + .disable_cb = virtqueue_disable_cb_split, > > + .enable_cb_delayed = virtqueue_enable_cb_delayed_split, > > + .enable_cb_prepare = virtqueue_enable_cb_prepare_split, > > + .poll = virtqueue_poll_split, > > + .detach_unused_buf = virtqueue_detach_unused_buf_split, > > + .more_used = more_used_split, > > + .resize = virtqueue_resize_split, > > + .reset = virtqueue_reset_split, > > +}; > > + > > +static const struct virtqueue_ops packed_ops = { > > + .add = virtqueue_add_packed, > > + .get = virtqueue_get_buf_ctx_packed, > > + .kick_prepare = virtqueue_kick_prepare_packed, > > + .disable_cb = virtqueue_disable_cb_packed, > > + .enable_cb_delayed = virtqueue_enable_cb_delayed_packed, > > + .enable_cb_prepare = virtqueue_enable_cb_prepare_packed, > > + .poll = virtqueue_poll_packed, > > + .detach_unused_buf = virtqueue_detach_unused_buf_packed, > > + .more_used = more_used_packed, > > + .resize = virtqueue_resize_packed, > > + .reset = virtqueue_reset_packed, > > +}; > > + > > +static const struct virtqueue_ops *const all_ops[VQ_TYPE_MAX] = { > > + [SPLIT] = &split_ops, > > + [PACKED] = &packed_ops > > +}; > > + > > static int virtqueue_disable_and_recycle(struct virtqueue *_vq, > > void (*recycle)(struct virtqueue *vq, void *buf)) > > { > > @@ -2236,6 +2303,38 @@ static int virtqueue_enable_after_reset(struct virtqueue *_vq) > > * Generic functions and exported symbols. > > */ > > > > +#define VIRTQUEUE_CALL(vq, op, ...) \ > > + ({ \ > > + typeof(all_ops[SPLIT]->op(vq, ##__VA_ARGS__)) ret; \ > > we need an empty line here, after the declaration. Ok. Thanks