[RFC v2 05/14] vfio/nvidia-vgpu: allocate vGPU FB memory when creating vGPUs

Zhi Wang <zhiw@xxxxxxxxxx> · Wed, 3 Sep 2025 15:11:02 -0700

Creating a vGPU requires allocating a portion of the FB memory from the
NVKM. The size of the FB memory that a vGPU requires is from the vGPU
type.

Acquire the size of the required FB memory from the vGPU type. Allocate
the FB memory from NVKM when creating a vGPU.

Signed-off-by: Zhi Wang <zhiw@xxxxxxxxxx>
---
 drivers/vfio/pci/nvidia-vgpu/debug.h          |  5 ++
 .../vfio/pci/nvidia-vgpu/include/nvrm/ecc.h   | 45 ++++++++++++
 .../vfio/pci/nvidia-vgpu/include/nvrm/vmmu.h  | 39 +++++++++++
 drivers/vfio/pci/nvidia-vgpu/pf.h             |  8 +++
 drivers/vfio/pci/nvidia-vgpu/vgpu.c           | 70 +++++++++++++++++++
 drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.c       | 56 ++++++++++++++-
 drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h       |  8 +++
 7 files changed, 229 insertions(+), 2 deletions(-)
 create mode 100644 drivers/vfio/pci/nvidia-vgpu/include/nvrm/ecc.h
 create mode 100644 drivers/vfio/pci/nvidia-vgpu/include/nvrm/vmmu.h

diff --git a/drivers/vfio/pci/nvidia-vgpu/debug.h b/drivers/vfio/pci/nvidia-vgpu/debug.h
index 7cf92c9060ae..db9288752384 100644
--- a/drivers/vfio/pci/nvidia-vgpu/debug.h
+++ b/drivers/vfio/pci/nvidia-vgpu/debug.h
@@ -17,4 +17,9 @@
 	pci_dbg(__v->pdev, "nvidia-vgpu %d: "f, __v->info.id, ##a); \
 })
 
+#define vgpu_error(v, f, a...) ({ \
+	typeof(v) __v = (v); \
+	pci_err(__v->pdev, "nvidia-vgpu %d: "f, __v->info.id, ##a); \
+})
+
 #endif
diff --git a/drivers/vfio/pci/nvidia-vgpu/include/nvrm/ecc.h b/drivers/vfio/pci/nvidia-vgpu/include/nvrm/ecc.h
new file mode 100644
index 000000000000..d2a8316a0f12
--- /dev/null
+++ b/drivers/vfio/pci/nvidia-vgpu/include/nvrm/ecc.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 NVIDIA Corporation
+ */
+#ifndef __NVRM_ECC_H__
+#define __NVRM_ECC_H__
+
+#include <nvrm/nvtypes.h>
+
+/* Excerpt of RM headers from https://github.com/NVIDIA/open-gpu-kernel-modules/tree/570.124.04 */
+
+typedef struct NV2080_CTRL_GPU_QUERY_ECC_EXCEPTION_STATUS {
+    NV_DECLARE_ALIGNED(NvU64 count, 8);
+} NV2080_CTRL_GPU_QUERY_ECC_EXCEPTION_STATUS;
+
+typedef struct NV2080_CTRL_GPU_QUERY_ECC_UNIT_STATUS {
+    NvBool enabled;
+    NvBool scrubComplete;
+    NvBool supported;
+    NV_DECLARE_ALIGNED(NV2080_CTRL_GPU_QUERY_ECC_EXCEPTION_STATUS dbe, 8);
+    NV_DECLARE_ALIGNED(NV2080_CTRL_GPU_QUERY_ECC_EXCEPTION_STATUS dbeNonResettable, 8);
+    NV_DECLARE_ALIGNED(NV2080_CTRL_GPU_QUERY_ECC_EXCEPTION_STATUS sbe, 8);
+    NV_DECLARE_ALIGNED(NV2080_CTRL_GPU_QUERY_ECC_EXCEPTION_STATUS sbeNonResettable, 8);
+} NV2080_CTRL_GPU_QUERY_ECC_UNIT_STATUS;
+
+typedef struct NV0080_CTRL_GR_ROUTE_INFO {
+    NvU32 flags;
+    NV_DECLARE_ALIGNED(NvU64 route, 8);
+} NV0080_CTRL_GR_ROUTE_INFO;
+
+typedef NV0080_CTRL_GR_ROUTE_INFO NV2080_CTRL_GR_ROUTE_INFO;
+
+#define NV2080_CTRL_GPU_ECC_UNIT_COUNT (0x00000024U)
+
+#define NV2080_CTRL_CMD_GPU_QUERY_ECC_STATUS (0x2080012fU)
+
+typedef struct NV2080_CTRL_GPU_QUERY_ECC_STATUS_PARAMS {
+    NV_DECLARE_ALIGNED(NV2080_CTRL_GPU_QUERY_ECC_UNIT_STATUS units[NV2080_CTRL_GPU_ECC_UNIT_COUNT], 8);
+    NvBool bFatalPoisonError;
+    NvU8   uncorrectableError;
+    NvU32  flags;
+    NV_DECLARE_ALIGNED(NV2080_CTRL_GR_ROUTE_INFO grRouteInfo, 8);
+} NV2080_CTRL_GPU_QUERY_ECC_STATUS_PARAMS;
+
+#endif
diff --git a/drivers/vfio/pci/nvidia-vgpu/include/nvrm/vmmu.h b/drivers/vfio/pci/nvidia-vgpu/include/nvrm/vmmu.h
new file mode 100644
index 000000000000..fb1f100deac4
--- /dev/null
+++ b/drivers/vfio/pci/nvidia-vgpu/include/nvrm/vmmu.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+
+/* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. */
+
+#ifndef __NVRM_VMMU_H__
+#define __NVRM_VMMU_H__
+
+#include <nvrm/nvtypes.h>
+
+/* Excerpt of RM headers from https://github.com/NVIDIA/open-gpu-kernel-modules/tree/570.124.04 */
+
+/*
+ * NV2080_CTRL_CMD_GPU_GET_VMMU_SEGMENT_SIZE
+ *
+ * This command returns the VMMU page size
+ *
+ *   vmmuSegmentSize
+ *     Output parameter.
+ *     Returns the VMMU segment size (in bytes)
+ *
+ * Possible status values returned are:
+ *   NV_OK
+ *   NV_ERR_NOT_SUPPORTED
+ */
+#define NV2080_CTRL_CMD_GPU_GET_VMMU_SEGMENT_SIZE  (0x2080017eU) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_GPU_INTERFACE_ID << 8) | NV2080_CTRL_GPU_GET_VMMU_SEGMENT_SIZE_PARAMS_MESSAGE_ID" */
+
+#define NV2080_CTRL_GPU_GET_VMMU_SEGMENT_SIZE_PARAMS_MESSAGE_ID (0x7EU)
+
+typedef struct NV2080_CTRL_GPU_GET_VMMU_SEGMENT_SIZE_PARAMS {
+	NV_DECLARE_ALIGNED(NvU64 vmmuSegmentSize, 8);
+} NV2080_CTRL_GPU_GET_VMMU_SEGMENT_SIZE_PARAMS;
+
+#define NV2080_CTRL_GPU_VMMU_SEGMENT_SIZE_32MB     0x02000000U
+#define NV2080_CTRL_GPU_VMMU_SEGMENT_SIZE_64MB     0x04000000U
+#define NV2080_CTRL_GPU_VMMU_SEGMENT_SIZE_128MB    0x08000000U
+#define NV2080_CTRL_GPU_VMMU_SEGMENT_SIZE_256MB    0x10000000U
+#define NV2080_CTRL_GPU_VMMU_SEGMENT_SIZE_512MB    0x20000000U
+
+#endif
diff --git a/drivers/vfio/pci/nvidia-vgpu/pf.h b/drivers/vfio/pci/nvidia-vgpu/pf.h
index b8008d8ee434..ce2728ce969b 100644
--- a/drivers/vfio/pci/nvidia-vgpu/pf.h
+++ b/drivers/vfio/pci/nvidia-vgpu/pf.h
@@ -95,4 +95,12 @@ static inline int nvidia_vgpu_mgr_init_handle(struct pci_dev *pdev,
 	__m->handle.ops->free_chids(__m->handle.pf_drvdata, o, s); \
 })
 
+#define nvidia_vgpu_mgr_alloc_fbmem(m, info) ({\
+	typeof(m) __m = (m); \
+	__m->handle.ops->alloc_fbmem(__m->handle.pf_drvdata, info); \
+})
+
+#define nvidia_vgpu_mgr_free_fbmem(m, h) \
+	((m)->handle.ops->free_fbmem(h))
+
 #endif
diff --git a/drivers/vfio/pci/nvidia-vgpu/vgpu.c b/drivers/vfio/pci/nvidia-vgpu/vgpu.c
index 52b946469043..7025c7e2b9ac 100644
--- a/drivers/vfio/pci/nvidia-vgpu/vgpu.c
+++ b/drivers/vfio/pci/nvidia-vgpu/vgpu.c
@@ -105,7 +105,70 @@ static int setup_chids(struct nvidia_vgpu *vgpu)
 
 	vgpu_debug(vgpu, "alloc guest channel offset %u size %u\n", chid->chid_offset,
 		   chid->num_chid);
+	return 0;
+}
+
+static void clean_fbmem_heap(struct nvidia_vgpu *vgpu)
+{
+	struct nvidia_vgpu_mgr *vgpu_mgr = vgpu->vgpu_mgr;
+
+	vgpu_debug(vgpu, "free guest FB memory, offset 0x%llx size 0x%llx\n",
+		   vgpu->fbmem_heap->addr, vgpu->fbmem_heap->size);
+
+	nvidia_vgpu_mgr_free_fbmem(vgpu_mgr, vgpu->fbmem_heap);
+	vgpu->fbmem_heap = NULL;
+}
+
+static int get_alloc_fbmem_size(struct nvidia_vgpu *vgpu, u64 *size)
+{
+	struct nvidia_vgpu_mgr *vgpu_mgr = vgpu->vgpu_mgr;
+	struct nvidia_vgpu_info *info = &vgpu->info;
+	struct nvidia_vgpu_type *type = info->vgpu_type;
+	u64 fb_length;
+
+	if (!vgpu_mgr->ecc_enabled) {
+		*size = type->fb_length;
+		return 0;
+	}
+
+	if (!info->vgpu_type->ecc_supported) {
+		vgpu_error(vgpu, "ECC is enabled. vGPU type %s doesn't support ECC!\n",
+			   type->vgpu_type_name);
+		return -ENODEV;
+	}
 
+	/* Re-calculate the FB memory length when ECC is enabled. */
+	fb_length = ALIGN(vgpu_mgr->total_fbmem_size, vgpu_mgr->vmmu_segment_size);
+	fb_length = fb_length / type->max_instance - type->fb_reservation - type->gsp_heap_size;
+	fb_length = min(type->fb_length, fb_length);
+	fb_length = ALIGN_DOWN(fb_length, vgpu_mgr->vmmu_segment_size);
+
+	*size = fb_length;
+	return 0;
+}
+
+static int setup_fbmem_heap(struct nvidia_vgpu *vgpu)
+{
+	struct nvidia_vgpu_mgr *vgpu_mgr = vgpu->vgpu_mgr;
+	struct nvidia_vgpu_alloc_fbmem_info info = {0};
+	struct nvidia_vgpu_mem *mem;
+	int ret;
+
+	ret = get_alloc_fbmem_size(vgpu, &info.size);
+	if (ret)
+		return ret;
+
+	info.align = vgpu_mgr->vmmu_segment_size;
+
+	vgpu_debug(vgpu, "alloc guest FB memory, size 0x%llx\n", info.size);
+
+	mem = nvidia_vgpu_mgr_alloc_fbmem(vgpu_mgr, &info);
+	if (IS_ERR(mem))
+		return PTR_ERR(mem);
+
+	vgpu_debug(vgpu, "guest FB memory offset 0x%llx size 0x%llx\n", mem->addr, mem->size);
+
+	vgpu->fbmem_heap = mem;
 	return 0;
 }
 
@@ -120,6 +183,7 @@ int nvidia_vgpu_mgr_destroy_vgpu(struct nvidia_vgpu *vgpu)
 	if (!atomic_cmpxchg(&vgpu->status, 1, 0))
 		return -ENODEV;
 
+	clean_fbmem_heap(vgpu);
 	clean_chids(vgpu);
 	unregister_vgpu(vgpu);
 
@@ -164,12 +228,18 @@ int nvidia_vgpu_mgr_create_vgpu(struct nvidia_vgpu *vgpu)
 	if (ret)
 		goto err_setup_chids;
 
+	ret = setup_fbmem_heap(vgpu);
+	if (ret)
+		goto err_setup_fbmem_heap;
+
 	atomic_set(&vgpu->status, 1);
 
 	vgpu_debug(vgpu, "created\n");
 
 	return 0;
 
+err_setup_fbmem_heap:
+	clean_chids(vgpu);
 err_setup_chids:
 	unregister_vgpu(vgpu);
 
diff --git a/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.c b/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.c
index 8565bb881fda..e8b670308b21 100644
--- a/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.c
+++ b/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.c
@@ -6,6 +6,9 @@
 #include "debug.h"
 #include "vgpu_mgr.h"
 
+#include <nvrm/vmmu.h>
+#include <nvrm/ecc.h>
+
 static void clean_vgpu_mgr(struct nvidia_vgpu_mgr *vgpu_mgr)
 {
 	if (vgpu_mgr->use_chid_alloc_bitmap) {
@@ -104,6 +107,39 @@ static void attach_vgpu_mgr(struct nvidia_vgpu_mgr *vgpu_mgr,
 	handle_data->vfio.pf_detach_handle_fn = pf_detach_handle_fn;
 }
 
+static int get_vmmu_segment_size(struct nvidia_vgpu_mgr *vgpu_mgr)
+{
+	NV2080_CTRL_GPU_GET_VMMU_SEGMENT_SIZE_PARAMS *ctrl;
+
+	ctrl = nvidia_vgpu_mgr_rm_ctrl_rd(vgpu_mgr, &vgpu_mgr->gsp_client,
+					  NV2080_CTRL_CMD_GPU_GET_VMMU_SEGMENT_SIZE,
+					  sizeof(*ctrl));
+	if (IS_ERR(ctrl))
+		return PTR_ERR(ctrl);
+
+	vgpu_mgr->vmmu_segment_size = ctrl->vmmuSegmentSize;
+
+	nvidia_vgpu_mgr_rm_ctrl_done(vgpu_mgr, &vgpu_mgr->gsp_client, ctrl);
+
+	return 0;
+}
+
+static int get_ecc_status(struct nvidia_vgpu_mgr *vgpu_mgr)
+{
+	NV2080_CTRL_GPU_QUERY_ECC_STATUS_PARAMS *ctrl;
+
+	ctrl = nvidia_vgpu_mgr_rm_ctrl_rd(vgpu_mgr, &vgpu_mgr->gsp_client,
+					  NV2080_CTRL_CMD_GPU_QUERY_ECC_STATUS,
+					  sizeof(*ctrl));
+	if (IS_ERR(ctrl))
+		return PTR_ERR(ctrl);
+
+	vgpu_mgr->ecc_enabled = ctrl->units[0].enabled;
+
+	nvidia_vgpu_mgr_rm_ctrl_done(vgpu_mgr, &vgpu_mgr->gsp_client, ctrl);
+	return 0;
+}
+
 static int setup_chid_alloc_bitmap(struct nvidia_vgpu_mgr *vgpu_mgr)
 {
 	if (WARN_ON(!vgpu_mgr->use_chid_alloc_bitmap))
@@ -120,11 +156,27 @@ static int setup_chid_alloc_bitmap(struct nvidia_vgpu_mgr *vgpu_mgr)
 
 static int init_vgpu_mgr(struct nvidia_vgpu_mgr *vgpu_mgr)
 {
+	int ret;
+
+	ret = get_vmmu_segment_size(vgpu_mgr);
+	if (ret)
+		return ret;
+
+	ret = get_ecc_status(vgpu_mgr);
+	if (ret)
+		return ret;
+
+	vgpu_mgr_debug(vgpu_mgr, "[GSP RM] VMMU segment size: 0x%llx\n",
+		       vgpu_mgr->vmmu_segment_size);
+	vgpu_mgr_debug(vgpu_mgr, "[GSP RM] ECC enabled: %d\n", vgpu_mgr->ecc_enabled);
+
 	vgpu_mgr->total_avail_chids = nvidia_vgpu_mgr_get_avail_chids(vgpu_mgr);
 	vgpu_mgr->total_fbmem_size = nvidia_vgpu_mgr_get_total_fbmem_size(vgpu_mgr);
 
-	vgpu_mgr_debug(vgpu_mgr, "total avail chids %u\n", vgpu_mgr->total_avail_chids);
-	vgpu_mgr_debug(vgpu_mgr, "total fbmem size 0x%llx\n", vgpu_mgr->total_fbmem_size);
+	vgpu_mgr_debug(vgpu_mgr, "[core driver] total avail chids %u\n",
+		       vgpu_mgr->total_avail_chids);
+	vgpu_mgr_debug(vgpu_mgr, "[core driver] total fbmem size 0x%llx\n",
+		       vgpu_mgr->total_fbmem_size);
 
 	return vgpu_mgr->use_chid_alloc_bitmap ? setup_chid_alloc_bitmap(vgpu_mgr) : 0;
 }
diff --git a/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h b/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h
index 5a7a6103a677..356779404cc2 100644
--- a/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h
+++ b/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h
@@ -59,6 +59,7 @@ struct nvidia_vgpu_chid {
  * @info: vGPU info
  * @vgpu_mgr: pointer to vGPU manager
  * @chid: vGPU channel IDs
+ * @fbmem_heap: allocated FB memory for the vGPU
  */
 struct nvidia_vgpu {
 	/* Per-vGPU lock */
@@ -71,6 +72,7 @@ struct nvidia_vgpu {
 	struct nvidia_vgpu_mgr *vgpu_mgr;
 
 	struct nvidia_vgpu_chid chid;
+	struct nvidia_vgpu_mem *fbmem_heap;
 };
 
 /**
@@ -80,6 +82,8 @@ struct nvidia_vgpu {
  * @handle: the driver handle
  * @total_avail_chids: total available channel IDs
  * @total_fbmem_size: total FB memory size
+ * @vmmu_segment_size: VMMU segment size
+ * @ecc_enabled: ECC is enabled in the GPU
  * @vgpu_major: vGPU major version
  * @vgpu_minor: vGPU minor version
  * @vgpu_list_lock: lock to protect vGPU list
@@ -99,6 +103,10 @@ struct nvidia_vgpu_mgr {
 	u32 total_avail_chids;
 	u64 total_fbmem_size;
 
+	/* GSP RM configurations */
+	u64 vmmu_segment_size;
+	bool ecc_enabled;
+
 	u64 vgpu_major;
 	u64 vgpu_minor;
 
-- 
2.34.1