Will Deacon <will@xxxxxxxxxx> writes: > On Sun, May 25, 2025 at 01:19:15PM +0530, Aneesh Kumar K.V (Arm) wrote: >> This also allocates a stage1 bypass and stage2 translate table. > > Please write your commit messages as per Linux kernel guidelines. > >> Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@xxxxxxxxxx> >> --- >> builtin-run.c | 2 + >> include/kvm/kvm-config.h | 1 + >> vfio/core.c | 4 +- >> vfio/iommufd.c | 115 ++++++++++++++++++++++++++++++++++++++- > > [...] > >> 4 files changed, 119 insertions(+), 3 deletions(-) >> diff --git a/vfio/iommufd.c b/vfio/iommufd.c >> index 742550705746..39870320e4ac 100644 >> --- a/vfio/iommufd.c >> +++ b/vfio/iommufd.c >> @@ -108,6 +108,116 @@ err_out: >> return ret; >> } >> >> +static int iommufd_alloc_s1bypass_hwpt(struct vfio_device *vdev) >> +{ >> + int ret; >> + unsigned long dev_num; >> + unsigned long guest_bdf; >> + struct vfio_device_bind_iommufd bind; >> + struct vfio_device_attach_iommufd_pt attach_data; >> + struct iommu_hwpt_alloc alloc_hwpt; >> + struct iommu_viommu_alloc alloc_viommu; >> + struct iommu_hwpt_arm_smmuv3 bypass_ste; >> + struct iommu_vdevice_alloc alloc_vdev; >> + >> + bind.argsz = sizeof(bind); >> + bind.flags = 0; >> + bind.iommufd = iommu_fd; >> + >> + /* now bind the iommufd */ >> + if (ioctl(vdev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) { >> + ret = -errno; >> + vfio_dev_err(vdev, "failed to get info"); >> + goto err_out; >> + } >> + >> + alloc_hwpt.size = sizeof(struct iommu_hwpt_alloc); >> + alloc_hwpt.flags = IOMMU_HWPT_ALLOC_NEST_PARENT; >> + alloc_hwpt.dev_id = bind.out_devid; >> + alloc_hwpt.pt_id = ioas_id; >> + alloc_hwpt.data_type = IOMMU_HWPT_DATA_NONE; >> + alloc_hwpt.data_len = 0; >> + alloc_hwpt.data_uptr = 0; >> + >> + if (ioctl(iommu_fd, IOMMU_HWPT_ALLOC, &alloc_hwpt)) { >> + ret = -errno; >> + pr_err("Failed to allocate HWPT"); >> + goto err_out; >> + } >> + >> + attach_data.argsz = sizeof(attach_data); >> + attach_data.flags = 0; >> + attach_data.pt_id = alloc_hwpt.out_hwpt_id; >> + >> + if (ioctl(vdev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data)) { >> + ret = -errno; >> + vfio_dev_err(vdev, "failed to attach to IOAS "); >> + goto err_out; >> + } >> + >> + alloc_viommu.size = sizeof(alloc_viommu); >> + alloc_viommu.flags = 0; >> + alloc_viommu.type = IOMMU_VIOMMU_TYPE_ARM_SMMUV3; >> + alloc_viommu.dev_id = bind.out_devid; >> + alloc_viommu.hwpt_id = alloc_hwpt.out_hwpt_id; >> + >> + if (ioctl(iommu_fd, IOMMU_VIOMMU_ALLOC, &alloc_viommu)) { >> + ret = -errno; >> + vfio_dev_err(vdev, "failed to allocate VIOMMU %d", ret); >> + goto err_out; >> + } >> +#define STRTAB_STE_0_V (1UL << 0) >> +#define STRTAB_STE_0_CFG_S2_TRANS 6 >> +#define STRTAB_STE_0_CFG_S1_TRANS 5 >> +#define STRTAB_STE_0_CFG_BYPASS 4 >> + >> + /* set up virtual ste as bypass ste */ >> + bypass_ste.ste[0] = STRTAB_STE_0_V | (STRTAB_STE_0_CFG_BYPASS << 1); >> + bypass_ste.ste[1] = 0x0UL; >> + >> + alloc_hwpt.size = sizeof(struct iommu_hwpt_alloc); >> + alloc_hwpt.flags = 0; >> + alloc_hwpt.dev_id = bind.out_devid; >> + alloc_hwpt.pt_id = alloc_viommu.out_viommu_id; >> + alloc_hwpt.data_type = IOMMU_HWPT_DATA_ARM_SMMUV3; >> + alloc_hwpt.data_len = sizeof(bypass_ste); >> + alloc_hwpt.data_uptr = (unsigned long)&bypass_ste; >> + >> + if (ioctl(iommu_fd, IOMMU_HWPT_ALLOC, &alloc_hwpt)) { >> + ret = -errno; >> + pr_err("Failed to allocate S1 bypass HWPT %d", ret); >> + goto err_out; >> + } >> + >> + alloc_vdev.size = sizeof(alloc_vdev), >> + alloc_vdev.viommu_id = alloc_viommu.out_viommu_id; >> + alloc_vdev.dev_id = bind.out_devid; >> + >> + dev_num = vdev->dev_hdr.dev_num; >> + /* kvmtool only do 0 domain, 0 bus and 0 function devices. */ >> + guest_bdf = (0ULL << 32) | (0 << 16) | dev_num << 11 | (0 << 8); > > I don't understand this. Shouldn't the BDF correspond to the virtual > configuration space? That's not allocated until later, but just going > with 0 isn't going to work. > > What am I missing? > As I understand it, kvmtool supports only bus 0 and does not allow multifunction devices. Based on that, I derived the guest BDF as follows (correcting what was wrong in the original patch): guest_bdf = (0ULL << 16) | (0 << 8) | dev_num << 3 | (0 << 0); Are you suggesting that this approach is incorrect, and that we can use a bus number other than 0?