From: Ankit Agrawal <ankita@xxxxxxxxxx> It is possible for some system memory pages on the EGM to have retired pages with uncorrectable ECC errors. A list of pages known with such errors (referred as retired pages) are maintained by the Host UEFI. The Host UEFI populates such list in a reserved region. It communicates the SPA of this region through a ACPI DSDT property. nvgrace-egm module is responsible to store the list of retired page offsets to be made available for usermode processes. The module: 1. Get the reserved memory region SPA and maps to it to fetch the list of bad pages. 2. Calculate the retired page offsets in the EGM and stores it. Signed-off-by: Ankit Agrawal <ankita@xxxxxxxxxx> --- drivers/vfio/pci/nvgrace-gpu/egm.c | 81 ++++++++++++++++++++++++++ drivers/vfio/pci/nvgrace-gpu/egm_dev.c | 32 ++++++++-- drivers/vfio/pci/nvgrace-gpu/egm_dev.h | 5 +- drivers/vfio/pci/nvgrace-gpu/main.c | 8 ++- include/linux/nvgrace-egm.h | 2 + 5 files changed, 118 insertions(+), 10 deletions(-) diff --git a/drivers/vfio/pci/nvgrace-gpu/egm.c b/drivers/vfio/pci/nvgrace-gpu/egm.c index bf1241ed1d60..7a026b4d98f7 100644 --- a/drivers/vfio/pci/nvgrace-gpu/egm.c +++ b/drivers/vfio/pci/nvgrace-gpu/egm.c @@ -8,6 +8,11 @@ #define MAX_EGM_NODES 4 +struct h_node { + unsigned long mem_offset; + struct hlist_node node; +}; + static dev_t dev; static struct class *class; static DEFINE_XARRAY(egm_chardevs); @@ -16,6 +21,7 @@ struct chardev { struct device device; struct cdev cdev; atomic_t open_count; + DECLARE_HASHTABLE(htbl, 0x10); }; static struct nvgrace_egm_dev * @@ -145,20 +151,86 @@ static void del_egm_chardev(struct chardev *egm_chardev) put_device(&egm_chardev->device); } +static void cleanup_retired_pages(struct chardev *egm_chardev) +{ + struct h_node *cur_page; + unsigned long bkt; + struct hlist_node *temp_node; + + hash_for_each_safe(egm_chardev->htbl, bkt, temp_node, cur_page, node) { + hash_del(&cur_page->node); + kvfree(cur_page); + } +} + +static int nvgrace_egm_fetch_retired_pages(struct nvgrace_egm_dev *egm_dev, + struct chardev *egm_chardev) +{ + u64 count; + void *memaddr; + int index, ret = 0; + + memaddr = memremap(egm_dev->retiredpagesphys, PAGE_SIZE, MEMREMAP_WB); + if (!memaddr) + return -ENOMEM; + + count = *(u64 *)memaddr; + + for (index = 0; index < count; index++) { + struct h_node *retired_page; + + /* + * Since the EGM is linearly mapped, the offset in the + * carveout is the same offset in the VM system memory. + * + * Calculate the offset to communicate to the usermode + * apps. + */ + retired_page = kvzalloc(sizeof(*retired_page), GFP_KERNEL); + if (!retired_page) { + ret = -ENOMEM; + break; + } + + retired_page->mem_offset = *((u64 *)memaddr + index + 1) - + egm_dev->egmphys; + hash_add(egm_chardev->htbl, &retired_page->node, + retired_page->mem_offset); + } + + memunmap(memaddr); + + if (ret) + cleanup_retired_pages(egm_chardev); + + return ret; +} + static int egm_driver_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id) { struct nvgrace_egm_dev *egm_dev = container_of(aux_dev, struct nvgrace_egm_dev, aux_dev); struct chardev *egm_chardev; + int ret; egm_chardev = setup_egm_chardev(egm_dev); if (!egm_chardev) return -EINVAL; + hash_init(egm_chardev->htbl); + + ret = nvgrace_egm_fetch_retired_pages(egm_dev, egm_chardev); + if (ret) + goto error_exit; + xa_store(&egm_chardevs, egm_dev->egmpxm, egm_chardev, GFP_KERNEL); return 0; + +error_exit: + del_egm_chardev(egm_chardev); + return ret; } static void egm_driver_remove(struct auxiliary_device *aux_dev) @@ -166,10 +238,19 @@ static void egm_driver_remove(struct auxiliary_device *aux_dev) struct nvgrace_egm_dev *egm_dev = container_of(aux_dev, struct nvgrace_egm_dev, aux_dev); struct chardev *egm_chardev = xa_erase(&egm_chardevs, egm_dev->egmpxm); + struct h_node *cur_page; + unsigned long bkt; + struct hlist_node *temp_node; if (!egm_chardev) return; + hash_for_each_safe(egm_chardev->htbl, bkt, temp_node, cur_page, node) { + hash_del(&cur_page->node); + kvfree(cur_page); + } + + cleanup_retired_pages(egm_chardev); del_egm_chardev(egm_chardev); } diff --git a/drivers/vfio/pci/nvgrace-gpu/egm_dev.c b/drivers/vfio/pci/nvgrace-gpu/egm_dev.c index ca50bc1f67a0..b8e143542bce 100644 --- a/drivers/vfio/pci/nvgrace-gpu/egm_dev.c +++ b/drivers/vfio/pci/nvgrace-gpu/egm_dev.c @@ -18,22 +18,41 @@ int nvgrace_gpu_has_egm_property(struct pci_dev *pdev, u64 *pegmpxm) } int nvgrace_gpu_fetch_egm_property(struct pci_dev *pdev, u64 *pegmphys, - u64 *pegmlength) + u64 *pegmlength, u64 *pretiredpagesphys) { int ret; /* - * The memory information is present in the system ACPI tables as DSD - * properties nvidia,egm-base-pa and nvidia,egm-size. + * The EGM memory information is present in the system ACPI tables + * as DSD properties nvidia,egm-base-pa and nvidia,egm-size. */ ret = device_property_read_u64(&pdev->dev, "nvidia,egm-size", pegmlength); if (ret) - return ret; + goto error_exit; ret = device_property_read_u64(&pdev->dev, "nvidia,egm-base-pa", pegmphys); + if (ret) + goto error_exit; + + /* + * SBIOS puts the list of retired pages on a region. The region + * SPA is exposed as "nvidia,egm-retired-pages-data-base". + */ + ret = device_property_read_u64(&pdev->dev, + "nvidia,egm-retired-pages-data-base", + pretiredpagesphys); + if (ret) + goto error_exit; + + /* Catch firmware bug and avoid a crash */ + if (*pretiredpagesphys == 0) { + dev_err(&pdev->dev, "Retired pages region is not setup\n"); + ret = -EINVAL; + } +error_exit: return ret; } @@ -74,7 +93,8 @@ static void nvgrace_gpu_release_aux_device(struct device *device) struct nvgrace_egm_dev * nvgrace_gpu_create_aux_device(struct pci_dev *pdev, const char *name, - u64 egmphys, u64 egmlength, u64 egmpxm) + u64 egmphys, u64 egmlength, u64 egmpxm, + u64 retiredpagesphys) { struct nvgrace_egm_dev *egm_dev; int ret; @@ -86,6 +106,8 @@ nvgrace_gpu_create_aux_device(struct pci_dev *pdev, const char *name, egm_dev->egmpxm = egmpxm; egm_dev->egmphys = egmphys; egm_dev->egmlength = egmlength; + egm_dev->retiredpagesphys = retiredpagesphys; + INIT_LIST_HEAD(&egm_dev->gpus); egm_dev->aux_dev.id = egmpxm; diff --git a/drivers/vfio/pci/nvgrace-gpu/egm_dev.h b/drivers/vfio/pci/nvgrace-gpu/egm_dev.h index 2e1612445898..2f329a05685d 100644 --- a/drivers/vfio/pci/nvgrace-gpu/egm_dev.h +++ b/drivers/vfio/pci/nvgrace-gpu/egm_dev.h @@ -16,8 +16,9 @@ void remove_gpu(struct nvgrace_egm_dev *egm_dev, struct pci_dev *pdev); struct nvgrace_egm_dev * nvgrace_gpu_create_aux_device(struct pci_dev *pdev, const char *name, - u64 egmphys, u64 egmlength, u64 egmpxm); + u64 egmphys, u64 egmlength, u64 egmpxm, + u64 retiredpagesphys); int nvgrace_gpu_fetch_egm_property(struct pci_dev *pdev, u64 *pegmphys, - u64 *pegmlength); + u64 *pegmlength, u64 *pretiredpagesphys); #endif /* EGM_DEV_H */ diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index b1ccd1ac2e0a..534dc3ee6113 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -67,7 +67,7 @@ static struct list_head egm_dev_list; static int nvgrace_gpu_create_egm_aux_device(struct pci_dev *pdev) { struct nvgrace_egm_dev_entry *egm_entry = NULL; - u64 egmphys, egmlength, egmpxm; + u64 egmphys, egmlength, egmpxm, retiredpagesphys; int ret = 0; bool is_new_region = false; @@ -80,7 +80,8 @@ static int nvgrace_gpu_create_egm_aux_device(struct pci_dev *pdev) if (nvgrace_gpu_has_egm_property(pdev, &egmpxm)) goto exit; - ret = nvgrace_gpu_fetch_egm_property(pdev, &egmphys, &egmlength); + ret = nvgrace_gpu_fetch_egm_property(pdev, &egmphys, &egmlength, + &retiredpagesphys); if (ret) goto exit; @@ -103,7 +104,8 @@ static int nvgrace_gpu_create_egm_aux_device(struct pci_dev *pdev) egm_entry->egm_dev = nvgrace_gpu_create_aux_device(pdev, NVGRACE_EGM_DEV_NAME, - egmphys, egmlength, egmpxm); + egmphys, egmlength, egmpxm, + retiredpagesphys); if (!egm_entry->egm_dev) { ret = -EINVAL; goto free_egm_entry; diff --git a/include/linux/nvgrace-egm.h b/include/linux/nvgrace-egm.h index a66906753267..197255c2a3b7 100644 --- a/include/linux/nvgrace-egm.h +++ b/include/linux/nvgrace-egm.h @@ -7,6 +7,7 @@ #define NVGRACE_EGM_H #include <linux/auxiliary_bus.h> +#include <linux/hashtable.h> #define NVGRACE_EGM_DEV_NAME "egm" @@ -19,6 +20,7 @@ struct nvgrace_egm_dev { struct auxiliary_device aux_dev; phys_addr_t egmphys; size_t egmlength; + phys_addr_t retiredpagesphys; u64 egmpxm; struct list_head gpus; }; -- 2.34.1