On 03-09-2025 19:09, Nilawar, Badal wrote:
On 24-06-2025 15:58, Poosa, Karthik wrote:On 29-05-2025 16:46, Badal Nilawar wrote:Instead of indefinite retry, can you retries with count, ~3 times, after which we can return failure.Add the API xe_pm_vrsr_enable to initialize the VRSR feature, requesting AUX power limit and PERST# assertion delay. V2: Add retry mechanism while requesting AUX power limit Signed-off-by: Badal Nilawar <badal.nilawar@xxxxxxxxx> Signed-off-by: Anshuman Gupta <anshuman.gupta@xxxxxxxxx> --- drivers/gpu/drm/xe/xe_device_types.h | 1 + drivers/gpu/drm/xe/xe_pcode_api.h | 7 ++drivers/gpu/drm/xe/xe_pm.c | 105 ++++++++++++++++++++++++++-drivers/gpu/drm/xe/xe_pm.h | 1 + 4 files changed, 113 insertions(+), 1 deletion(-)diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.hindex 3a15b3a252fd..5f9a1a358468 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -7,6 +7,7 @@ #define _XE_DEVICE_TYPES_H_ #include <linux/pci.h> +#include <linux/pci-acpi.h> #include <drm/drm_device.h> #include <drm/drm_file.h>diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.hindex 127d4d26c4cf..f54ef03799d4 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -43,6 +43,13 @@#define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */#define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0) +#define PCODE_D3_VRAM_SELF_REFRESH 0x71 +#define PCODE_D3_VRSR_SC_DISABLE 0x0 +#define PCODE_D3_VRSR_SC_ENABLE 0x1 +#define PCODE_D3_VRSR_SC_AUX_PL_AND_PERST_DELAY 0x2 +#define POWER_D3_VRSR_PERST_MASK REG_GENMASK(31, 16) +#define POWER_D3_VRSR_AUX_PL_MASK REG_GENMASK(15, 0) + #define PCODE_FREQUENCY_CONFIG 0x6e /* Frequency Config Sub Commands (param1) */ #define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0 diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index c9395e62d21d..278f2eeeaab6 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -5,6 +5,7 @@ #include "xe_pm.h" +#include <linux/delay.h> #include <linux/fault-inject.h> #include <linux/pm_runtime.h> #include <linux/suspend.h> @@ -23,6 +24,7 @@ #include "xe_guc.h" #include "xe_irq.h" #include "xe_mmio.h" +#include "xe_pcode_api.h" #include "xe_pcode.h" #include "xe_pxp.h" #include "xe_trace.h"@@ -260,6 +262,107 @@ static bool xe_pm_vrsr_capable(struct xe_device *xe)return val & VRAM_SR_SUPPORTED; } +static int pci_acpi_aux_power_setup(struct xe_device *xe) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(xe); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct pci_dev *root_pdev; + int ret; + u32 uval; + u32 aux_pwr_limit; + u32 retry_interval; + u32 perst_delay; + + root_pdev = pcie_find_root_port(pdev); + if (!root_pdev) + return -EINVAL; ++ ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_D3_VRAM_SELF_REFRESH,+ PCODE_D3_VRSR_SC_AUX_PL_AND_PERST_DELAY, 0), + &uval, NULL); + + if (ret) + return ret; + + aux_pwr_limit = REG_FIELD_GET(POWER_D3_VRSR_AUX_PL_MASK, uval); + perst_delay = REG_FIELD_GET(POWER_D3_VRSR_PERST_MASK, uval); + + drm_dbg(&xe->drm, "Aux Power limit = %d mW\n", aux_pwr_limit);+ drm_dbg(&xe->drm, "PERST# Assertion delay = %d microseconds\n", perst_delay);+ +retry:+ ret = pci_acpi_request_d3cold_aux_power(root_pdev, aux_pwr_limit, &retry_interval);+ + if (ret == -EAGAIN) {+ drm_warn(&xe->drm, "D3cold Aux Power request needs retry interval: %d seconds\n",+ retry_interval); + msleep(retry_interval * 1000); + goto retry;Retry count is not needed because as per PCI firmware specifications, section 4.6.10 Rev 3.3. Firmware is not permitted to return a value in this range more than once for each _DSM instance (located within the ACPI namespace of a single downstream port DeviceObject), unless there is a subsequent invocation of this function before the previously returned retry interval has expired.
okay, so -EGAIN shall be returned only once from the firmware, right ?
+ } + + if (ret) + return ret; + + ret = pci_acpi_add_perst_assertion_delay(root_pdev, perst_delay); + + return ret; +} + +static void xe_pm_vrsr_init(struct xe_device *xe) +{ + int ret; + + /* Check if platform support D3Cold VRSR */This can be rephrased to, "Check if Xe should support VRSR for the platform."Here with has_vrsr we are checking if DGFX platform support VRSR or not. If supported, VRSR will be enabled in Xe kmd.I think let's keep the comment as it is.
"platform support" here is confusing, as there is another check below in xe_pm_vrsr_capable for actual platform support.
I think we may not need need has_vrsr at all ! xe_pm_vrsr_capable should suffice.
+ if (!xe->info.has_vrsr)Can you add a comment here also, viz "Check if GPU firmware supports VRSR"+ return; +Sure, I will add.+ if (!xe_pm_vrsr_capable(xe)) + return; + + /*+ * If the VRSR initialization fails, the device will proceed with the regular+ * D3Cold flow + */ + ret = pci_acpi_aux_power_setup(xe); + if (ret) { + drm_info(&xe->drm, "VRSR capable: No\n"); + return; + } + + xe->d3cold.vrsr_capable = true; + drm_info(&xe->drm, "VRSR capable: Yes\n"); +} + +/** + * xe_pm_vrsr_enable - Enable VRAM self refreshas this function does both enable & disable VRSR, this can be renamed to xe_pm_set_vrsr(xe, flag)IMO, _vrsr_enable() is a more suitable name than _set_vrsr. Since the subsequent patches don't involve disabling VRSR, I can remove the flag and retain only the code for enabling it. Note that VRSR is internally disabled by pcode upon exiting D3Cold. Therefore, in subsequent patches, VRSR is re-enabled each time before entering D3Cold.Thanks, Badal
No need to change the implementation, let the enable flag be there for future usage, just update description to
This function enables/disables the VRSR feature on the GPU.
+ * @xe: The xe device. + * @enable: true: Enable, false: Disable + * + * This function enables the VRSR feature in D3Cold path. + * + * Return: It returns 0 on success and errno on failure. + */ +int xe_pm_vrsr_enable(struct xe_device *xe, bool enable) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(xe); + int ret; + u32 uval = 0; + + if (!xe->d3cold.vrsr_capable) + return -ENXIO; + + drm_dbg(&xe->drm, "%s VRSR\n", enable ? "Enabling" : "Disabling"); + + if (enable)+ ret = xe_pcode_write(root_tile, PCODE_MBOX(PCODE_D3_VRAM_SELF_REFRESH,+ PCODE_D3_VRSR_SC_ENABLE, 0), uval); + else+ ret = xe_pcode_write(root_tile, PCODE_MBOX(PCODE_D3_VRAM_SELF_REFRESH,+ PCODE_D3_VRSR_SC_DISABLE, 0), uval); + + return ret; +} + static void xe_pm_runtime_init(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -374,7 +477,7 @@ int xe_pm_init(struct xe_device *xe) err = xe_pm_set_vram_threshold(xe, vram_threshold); if (err) goto err_unregister; - xe->d3cold.vrsr_capable = xe_pm_vrsr_capable(xe); + xe_pm_vrsr_init(xe); } xe_pm_runtime_init(xe); diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 59678b310e55..ba550281b130 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -35,4 +35,5 @@ bool xe_rpm_reclaim_safe(const struct xe_device *xe); struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); int xe_pm_module_init(void); +int xe_pm_vrsr_enable(struct xe_device *xe, bool enable); #endif