[RFC PATCH kvmtool 07/10] vfio/iommufd: Add basic iommufd support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This use a stage1 translate stage2 bypass iommu config.

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@xxxxxxxxxx>
---
 Makefile                 |   1 +
 builtin-run.c            |   1 +
 include/kvm/kvm-config.h |   1 +
 include/kvm/vfio.h       |   2 +
 vfio/core.c              |   5 +
 vfio/iommufd.c           | 368 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 378 insertions(+)
 create mode 100644 vfio/iommufd.c

diff --git a/Makefile b/Makefile
index 8b2720f73386..740b95c7c3c3 100644
--- a/Makefile
+++ b/Makefile
@@ -64,6 +64,7 @@ OBJS	+= mmio.o
 OBJS	+= pci.o
 OBJS	+= term.o
 OBJS	+= vfio/core.o
+OBJS	+= vfio/iommufd.o
 OBJS	+= vfio/pci.o
 OBJS	+= vfio/legacy.o
 OBJS	+= virtio/blk.o
diff --git a/builtin-run.c b/builtin-run.c
index 81f255f911b3..39198f9bc0d6 100644
--- a/builtin-run.c
+++ b/builtin-run.c
@@ -262,6 +262,7 @@ static int loglevel_parser(const struct option *opt, const char *arg, int unset)
 	OPT_CALLBACK('\0', "vfio-pci", NULL, "[domain:]bus:dev.fn",	\
 		     "Assign a PCI device to the virtual machine",	\
 		     vfio_device_parser, kvm),				\
+	OPT_BOOLEAN('\0', "iommufd", &(cfg)->iommufd, "Use iommufd interface"),	\
 									\
 	OPT_GROUP("Debug options:"),					\
 	OPT_CALLBACK_NOOPT('\0', "debug", kvm, NULL,			\
diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
index 592b035785c9..632eaf84b7eb 100644
--- a/include/kvm/kvm-config.h
+++ b/include/kvm/kvm-config.h
@@ -65,6 +65,7 @@ struct kvm_config {
 	bool ioport_debug;
 	bool mmio_debug;
 	int virtio_transport;
+	bool iommufd;
 };
 
 #endif
diff --git a/include/kvm/vfio.h b/include/kvm/vfio.h
index fed692b0f265..37a2b5ac3dad 100644
--- a/include/kvm/vfio.h
+++ b/include/kvm/vfio.h
@@ -128,6 +128,8 @@ void vfio_pci_teardown_device(struct kvm *kvm, struct vfio_device *vdev);
 
 extern int (*dma_map_mem_range)(struct kvm *kvm, __u64 host_addr, __u64 iova, __u64 size);
 extern int (*dma_unmap_mem_range)(struct kvm *kvm, __u64 iova, __u64 size);
+int iommufd__init(struct kvm *kvm);
+int iommufd__exit(struct kvm *kvm);
 
 struct kvm_mem_bank;
 int vfio_map_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data);
diff --git a/vfio/core.c b/vfio/core.c
index 32a8e0fe67c0..0b1796c54ffd 100644
--- a/vfio/core.c
+++ b/vfio/core.c
@@ -373,6 +373,8 @@ static int vfio__init(struct kvm *kvm)
 	}
 	kvm_vfio_device = device.fd;
 
+	if (kvm->cfg.iommufd)
+		return iommufd__init(kvm);
 	return legacy_vfio__init(kvm);
 }
 dev_base_init(vfio__init);
@@ -393,6 +395,9 @@ static int vfio__exit(struct kvm *kvm)
 
 	free(kvm->cfg.vfio_devices);
 
+	if (kvm->cfg.iommufd)
+		return iommufd__exit(kvm);
+
 	return legacy_vfio__exit(kvm);
 }
 dev_base_exit(vfio__exit);
diff --git a/vfio/iommufd.c b/vfio/iommufd.c
new file mode 100644
index 000000000000..3728a06cb318
--- /dev/null
+++ b/vfio/iommufd.c
@@ -0,0 +1,368 @@
+#include <sys/types.h>
+#include <dirent.h>
+
+#include "kvm/kvm.h"
+#include <linux/iommufd.h>
+#include <linux/list.h>
+
+#define VFIO_DEV_DIR		"/dev/vfio"
+#define VFIO_DEV_NODE		VFIO_DEV_DIR "/devices/"
+#define IOMMU_DEV		"/dev/iommu"
+
+static int iommu_fd;
+static int ioas_id;
+
+static int __iommufd_configure_device(struct kvm *kvm, struct vfio_device *vdev)
+{
+	int ret;
+
+	vdev->info.argsz = sizeof(vdev->info);
+	if (ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &vdev->info)) {
+		ret = -errno;
+		vfio_dev_err(vdev, "failed to get info");
+		goto err_close_device;
+	}
+
+	if (vdev->info.flags & VFIO_DEVICE_FLAGS_RESET &&
+	    ioctl(vdev->fd, VFIO_DEVICE_RESET) < 0)
+		vfio_dev_warn(vdev, "failed to reset device");
+
+	vdev->regions = calloc(vdev->info.num_regions, sizeof(*vdev->regions));
+	if (!vdev->regions) {
+		ret = -ENOMEM;
+		goto err_close_device;
+	}
+
+	/* Now for the bus-specific initialization... */
+	switch (vdev->params->type) {
+	case VFIO_DEVICE_PCI:
+		BUG_ON(!(vdev->info.flags & VFIO_DEVICE_FLAGS_PCI));
+		ret = vfio_pci_setup_device(kvm, vdev);
+		break;
+	default:
+		BUG_ON(1);
+		ret = -EINVAL;
+	}
+
+	if (ret)
+		goto err_free_regions;
+
+	vfio_dev_info(vdev, "assigned to device number 0x%x ",
+		      vdev->dev_hdr.dev_num) ;
+
+	return 0;
+
+err_free_regions:
+	free(vdev->regions);
+err_close_device:
+	close(vdev->fd);
+
+	return ret;
+}
+
+static int iommufd_configure_device(struct kvm *kvm, struct vfio_device *vdev)
+{
+	int ret;
+	DIR *dir = NULL;
+	struct dirent *dir_ent;
+	bool found_dev = false;
+	char pci_dev_path[PATH_MAX];
+	char vfio_dev_path[PATH_MAX];
+	struct iommu_hwpt_alloc alloc_hwpt;
+	struct vfio_device_bind_iommufd bind;
+	struct vfio_device_attach_iommufd_pt attach_data;
+
+	ret = snprintf(pci_dev_path, PATH_MAX, "%s/vfio-dev/", vdev->sysfs_path);
+	if (ret < 0 || ret == PATH_MAX)
+		return -EINVAL;
+
+	dir = opendir(pci_dev_path);
+	if (!dir)
+		return -EINVAL;
+
+	while ((dir_ent = readdir(dir))) {
+		if (!strncmp(dir_ent->d_name, "vfio", 4)) {
+			ret = snprintf(vfio_dev_path, PATH_MAX, VFIO_DEV_NODE "%s", dir_ent->d_name);
+			if (ret < 0 || ret == PATH_MAX) {
+				ret = -EINVAL;
+				goto err_close_dir;
+			}
+			found_dev = true;
+			break;
+		}
+	}
+	if (!found_dev) {
+		ret = -ENODEV;
+		goto err_close_dir;
+	}
+
+	vdev->fd = open(vfio_dev_path, O_RDWR);
+	if (vdev->fd == -1) {
+		ret = errno;
+		pr_err("Failed to open %s", vfio_dev_path);
+		goto err_close_dir;
+	}
+
+	struct kvm_device_attr attr = {
+		.group = KVM_DEV_VFIO_FILE,
+		.attr = KVM_DEV_VFIO_FILE_ADD,
+		.addr = (__u64)&vdev->fd,
+	};
+
+	if (ioctl(kvm_vfio_device, KVM_SET_DEVICE_ATTR, &attr)) {
+		ret = -errno;
+		pr_err("Failed KVM_SET_DEVICE_ATTR for KVM_DEV_VFIO_FILE");
+		goto err_close_device;
+	}
+
+	bind.argsz = sizeof(bind);
+	bind.flags = 0;
+	bind.iommufd = iommu_fd;
+
+	/* now bind the iommufd */
+	if (ioctl(vdev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) {
+		ret = -errno;
+		vfio_dev_err(vdev, "failed to get info");
+		goto err_close_device;
+	}
+
+	alloc_hwpt.size = sizeof(struct iommu_hwpt_alloc);
+	alloc_hwpt.flags = 0;
+	alloc_hwpt.dev_id = bind.out_devid;
+	alloc_hwpt.pt_id = ioas_id;
+	alloc_hwpt.data_type = IOMMU_HWPT_DATA_NONE;
+	alloc_hwpt.data_len = 0;
+	alloc_hwpt.data_uptr = 0;
+
+	if (ioctl(iommu_fd, IOMMU_HWPT_ALLOC, &alloc_hwpt)) {
+		ret = -errno;
+		pr_err("Failed to allocate HWPT");
+		goto err_close_device;
+	}
+
+	attach_data.argsz = sizeof(attach_data);
+	attach_data.flags = 0;
+	attach_data.pt_id = alloc_hwpt.out_hwpt_id;
+
+	if (ioctl(vdev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data)) {
+		ret = -errno;
+		vfio_dev_err(vdev, "failed to attach to IOAS ");
+		goto err_close_device;
+	}
+
+	closedir(dir);
+	return __iommufd_configure_device(kvm, vdev);
+
+err_close_device:
+	close(vdev->fd);
+err_close_dir:
+	closedir(dir);
+	return ret;
+}
+
+static int iommufd_configure_devices(struct kvm *kvm)
+{
+	int i, ret;
+
+	for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) {
+		ret = iommufd_configure_device(kvm, &vfio_devices[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int iommufd_create_ioas(struct kvm *kvm)
+{
+	int ret;
+	struct iommu_ioas_alloc alloc_data;
+	iommu_fd = open(IOMMU_DEV, O_RDWR);
+	if (iommu_fd == -1) {
+		ret = errno;
+		pr_err("Failed to open %s", IOMMU_DEV);
+		return ret;
+	}
+
+	alloc_data.size = sizeof(alloc_data);
+	alloc_data.flags = 0;
+
+	if (ioctl(iommu_fd, IOMMU_IOAS_ALLOC, &alloc_data)) {
+		ret = errno;
+		pr_err("Failed to alloc IOAS ");
+		goto err_close_device;
+	}
+	ioas_id = alloc_data.out_ioas_id;
+	return 0;
+
+err_close_device:
+	close(iommu_fd);
+	return ret;
+}
+
+static int vfio_device_init(struct kvm *kvm, struct vfio_device *vdev)
+{
+	int ret, dirfd;
+	char *group_name;
+	unsigned long group_id;
+	char dev_path[PATH_MAX];
+	struct vfio_group *group = NULL;
+
+	ret = snprintf(dev_path, PATH_MAX, "/sys/bus/%s/devices/%s",
+		       vdev->params->bus, vdev->params->name);
+	if (ret < 0 || ret == PATH_MAX)
+		return -EINVAL;
+
+	vdev->sysfs_path = strndup(dev_path, PATH_MAX);
+	if (!vdev->sysfs_path)
+		return -ENOMEM;
+
+	/* Find IOMMU group for this device */
+	dirfd = open(vdev->sysfs_path, O_DIRECTORY | O_PATH | O_RDONLY);
+	if (dirfd < 0) {
+		vfio_dev_err(vdev, "failed to open '%s'", vdev->sysfs_path);
+		return -errno;
+	}
+
+	ret = readlinkat(dirfd, "iommu_group", dev_path, PATH_MAX);
+	if (ret < 0) {
+		vfio_dev_err(vdev, "no iommu_group");
+		goto out_close;
+	}
+	if (ret == PATH_MAX) {
+		ret =  -ENOMEM;
+		goto out_close;
+	}
+
+	dev_path[ret] = '\0';
+	group_name = basename(dev_path);
+	errno = 0;
+	group_id = strtoul(group_name, NULL, 10);
+	if (errno) {
+		ret = -errno;
+		goto out_close;
+	}
+
+	list_for_each_entry(group, &vfio_groups, list) {
+		if (group->id == group_id) {
+			group->refs++;
+			break;
+		}
+	}
+	if (group->id != group_id) {
+		group = calloc(1, sizeof(*group));
+		if (!group) {
+			ret = -ENOMEM;
+			goto out_close;
+		}
+		group->id	= group_id;
+		group->refs	= 1;
+		/* no group fd for iommufd */
+		group->fd	= -1;
+		list_add(&group->list, &vfio_groups);
+	}
+	vdev->group = group;
+	ret = 0;
+
+out_close:
+	close(dirfd);
+	return ret;
+}
+
+static int iommufd_map_mem_range(struct kvm *kvm, __u64 host_addr, __u64 iova, __u64 size)
+{
+	int ret = 0;
+	struct iommu_ioas_map dma_map;
+
+	dma_map.size = sizeof(dma_map);
+	dma_map.flags = IOMMU_IOAS_MAP_READABLE | IOMMU_IOAS_MAP_WRITEABLE |
+			IOMMU_IOAS_MAP_FIXED_IOVA;
+	dma_map.ioas_id = ioas_id;
+	dma_map.__reserved = 0;
+	dma_map.user_va = host_addr;
+	dma_map.iova = iova;
+	dma_map.length = size;
+
+	/* Map the guest memory for DMA (i.e. provide isolation) */
+	if (ioctl(iommu_fd, IOMMU_IOAS_MAP, &dma_map)) {
+		ret = -errno;
+		pr_err("Failed to map 0x%llx -> 0x%llx (%u) for DMA",
+		       dma_map.iova, dma_map.user_va, dma_map.size);
+	}
+
+	return ret;
+}
+
+static int iommufd_unmap_mem_range(struct kvm *kvm,  __u64 iova, __u64 size)
+{
+	int ret = 0;
+	struct iommu_ioas_unmap dma_unmap;
+
+	dma_unmap.size = sizeof(dma_unmap);
+	dma_unmap.ioas_id = ioas_id;
+	dma_unmap.iova = iova;
+	dma_unmap.length = size;
+
+	if (ioctl(iommu_fd, IOMMU_IOAS_UNMAP, &dma_unmap)) {
+		ret = -errno;
+		if (ret != -ENOENT)
+			pr_err("Failed to unmap 0x%llx - size (%u) for DMA %d",
+			       dma_unmap.iova, dma_unmap.size, ret);
+	}
+
+	return ret;
+}
+
+static int iommufd_map_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data)
+{
+	return iommufd_map_mem_range(kvm, (u64)bank->host_addr, bank->guest_phys_addr, bank->size);
+}
+
+static int iommufd_configure_reserved_mem(struct kvm *kvm)
+{
+	int ret;
+	struct vfio_group *group;
+
+	list_for_each_entry(group, &vfio_groups, list) {
+		ret = vfio_configure_reserved_regions(kvm, group);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+int iommufd__init(struct kvm *kvm)
+{
+	int ret, i;
+
+	for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) {
+		vfio_devices[i].params = &kvm->cfg.vfio_devices[i];
+
+		ret = vfio_device_init(kvm, &vfio_devices[i]);
+		if (ret)
+			return ret;
+	}
+
+	ret = iommufd_create_ioas(kvm);
+	if (ret)
+		return ret;
+
+	ret = iommufd_configure_devices(kvm);
+	if (ret)
+		return ret;
+
+	ret = iommufd_configure_reserved_mem(kvm);
+	if (ret)
+		return ret;
+
+	dma_map_mem_range = iommufd_map_mem_range;
+	dma_unmap_mem_range = iommufd_unmap_mem_range;
+	/* Now map the full memory */
+	return kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, iommufd_map_mem_bank,
+				      NULL);
+}
+
+int iommufd__exit(struct kvm *kvm)
+{
+	return 0;
+}
-- 
2.43.0





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux