Re: [Bug 219984] New: [BISECTED] High power usage since 'PCI/ASPM: Correct LTR_L1.2_THRESHOLD computation'

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Apr 13, 2025 at 12:59:07PM +0100, Sergey Dolgov wrote:
> Dear Bjorn,
> 
> I've compiled pciutils-3.13.0 with MinGW and run it with WinDBG, and
> it managed to display how Windows sets LTR1.2_Threshold. The trick is
> that it does NOT! I've removed all the actual reprogramming in
> aspm_calc_l12_info to see the devices' default thresholds, and they've
> come out exactly the same as in Windows (81920ns for all devices
> except 0 for NVIDIA, but it should have its own dynamic power
> management). I've collected these logs also after booting the original
> 6.14.0 kernel to make sure the devices haven't just kept the values
> set by Windows - nope, the devices (or maybe BIOS/Firmware) do reset
> LTR_L1.2_THRESHOLD to 81920ns every reboot.
> 
> I tried to read latencies from PCI_EXT_CAP_ID_LTR (see the new
> function pcie_log_runtime_ltr in l12debug.patch), but it bails out at
> "No LTR capability found" for all devices.
> 
> The idle power usage has increased slightly to 0.916 W due to about
> 80% PC10 residency and 60-70% SYS%LPI residency (in contrast to 90%+
> PC10 and 80%+ SYS%LPI with pre 7afeb84d14ea thresholds), but that's
> still much more power efficient than the mainline kernel.
> 
> So the question is why the OS has to reprogram T_POWER_ON, CMRT and
> LTR_L1.2_THRESHOLD at all, instead of trusting the device defaults? Is
> there any statistics on broken devices reporting bogus values, and
> frequently running out of buffer?

Great question.  I think you're right that if the BIOS has programmed
L1.2 settings, it's very unlikely that the OS can do a better job.

This is really ugly, but since I won't have more time for this until
Monday, could you try the patch below?  (Replace the previous debug
patch with this one)


diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 29fcb0689a91..e6e9d6faa028 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -563,6 +563,27 @@ static void encode_l12_threshold(u32 threshold_us, u32 *scale, u32 *value)
 	}
 }
 
+static u64 decode_l12_threshold(u32 scale, u32 value)
+{
+	u32 mpy;
+
+	/*
+	 * LTR_L1.2_THRESHOLD_Value ("value") is a 10-bit field with max
+	 * value of 0x3ff.
+	 */
+	switch (scale) {
+	case 0: mpy = 1; break;
+	case 1: mpy = 32; break;
+	case 2: mpy = 1024; break;
+	case 3: mpy = 32768; break;
+	case 4: mpy = 1048576; break;
+	case 5: mpy = 33554432; break;
+	default: mpy = 0;
+	}
+
+	return (u64) value * mpy;
+}
+
 static void pcie_aspm_check_latency(struct pci_dev *endpoint)
 {
 	u32 latency, encoding, lnkcap_up, lnkcap_dw;
@@ -641,18 +662,29 @@ static void aspm_calc_l12_info(struct pcie_link_state *link,
 	u32 ctl1 = 0, ctl2 = 0;
 	u32 pctl1, pctl2, cctl1, cctl2;
 	u32 pl1_2_enables, cl1_2_enables;
+	u32 cmrt;
+	u32 p_ltr_val, p_ltr_scale;
+	u32 c_ltr_val, c_ltr_scale;
+	u64 p_ltr_threshold, c_ltr_threshold;
 
 	/* Choose the greater of the two Port Common_Mode_Restore_Times */
 	val1 = FIELD_GET(PCI_L1SS_CAP_CM_RESTORE_TIME, parent_l1ss_cap);
 	val2 = FIELD_GET(PCI_L1SS_CAP_CM_RESTORE_TIME, child_l1ss_cap);
 	t_common_mode = max(val1, val2);
 
+	pci_info(child, "cap: parent CMRT %#04x child CMRT %#04x\n", val1, val2);
+
 	/* Choose the greater of the two Port T_POWER_ON times */
 	val1   = FIELD_GET(PCI_L1SS_CAP_P_PWR_ON_VALUE, parent_l1ss_cap);
 	scale1 = FIELD_GET(PCI_L1SS_CAP_P_PWR_ON_SCALE, parent_l1ss_cap);
 	val2   = FIELD_GET(PCI_L1SS_CAP_P_PWR_ON_VALUE, child_l1ss_cap);
 	scale2 = FIELD_GET(PCI_L1SS_CAP_P_PWR_ON_SCALE, child_l1ss_cap);
 
+	pci_info(child, "cap: parent T_POWER_ON %#06x usec (val %#04x scale %x)\n",
+		 calc_l12_pwron(parent, scale1, val1), val1, scale1);
+	pci_info(child, "cap: child  T_POWER_ON %#06x usec (val %#04x scale %x)\n",
+		 calc_l12_pwron(child, scale2, val2), val2, scale2);
+
 	if (calc_l12_pwron(parent, scale1, val1) >
 	    calc_l12_pwron(child, scale2, val2)) {
 		ctl2 |= FIELD_PREP(PCI_L1SS_CTL2_T_PWR_ON_SCALE, scale1) |
@@ -675,7 +707,11 @@ static void aspm_calc_l12_info(struct pcie_link_state *link,
 	 * least 4us.
 	 */
 	l1_2_threshold = 2 + 4 + t_common_mode + t_power_on;
+	pci_info(child, "t_common_mode %#04x t_power_on %#04x l1_2_threshold %#06x\n",
+		 t_common_mode, t_power_on, l1_2_threshold);
 	encode_l12_threshold(l1_2_threshold, &scale, &value);
+	pci_info(child, "encoded LTR_L1.2_THRESHOLD value %#06x scale %x\n",
+		 value, scale);
 	ctl1 |= FIELD_PREP(PCI_L1SS_CTL1_CM_RESTORE_TIME, t_common_mode) |
 		FIELD_PREP(PCI_L1SS_CTL1_LTR_L12_TH_VALUE, value) |
 		FIELD_PREP(PCI_L1SS_CTL1_LTR_L12_TH_SCALE, scale);
@@ -686,6 +722,28 @@ static void aspm_calc_l12_info(struct pcie_link_state *link,
 	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1, &cctl1);
 	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL2, &cctl2);
 
+	pci_info(child, "L1SS_CTL1 %#08x (parent %#010x child %#010x)\n",
+		 ctl1, pctl1, cctl1);
+	pci_info(child, "L1SS_CTL2 %#08x (parent %#010x child %#010x)\n",
+		 ctl2, pctl2, cctl2);
+
+	cmrt = FIELD_GET(PCI_L1SS_CTL1_CM_RESTORE_TIME, pctl1);
+	p_ltr_val = FIELD_GET(PCI_L1SS_CTL1_LTR_L12_TH_VALUE, pctl1);
+	p_ltr_scale = FIELD_GET(PCI_L1SS_CTL1_LTR_L12_TH_SCALE, pctl1);
+	p_ltr_threshold = decode_l12_threshold(p_ltr_scale, p_ltr_val);
+	c_ltr_val = FIELD_GET(PCI_L1SS_CTL1_LTR_L12_TH_VALUE, cctl1);
+	c_ltr_scale = FIELD_GET(PCI_L1SS_CTL1_LTR_L12_TH_SCALE, cctl1);
+	c_ltr_threshold = decode_l12_threshold(c_ltr_scale, c_ltr_val);
+	if (cmrt || p_ltr_threshold || c_ltr_threshold) {
+		pci_info(child, "cur: parent CMRT %d usec\n", cmrt);
+		pci_info(child, "cur: parent LTR_L1.2_THRESHOLD value %#06x scale %x decoded %#012llx ns\n",
+			 p_ltr_val, p_ltr_scale, p_ltr_threshold);
+		pci_info(child, "cur: child  LTR_L1.2_THRESHOLD value %#06x scale %x decoded %#012llx ns\n",
+			 c_ltr_val, c_ltr_scale, c_ltr_threshold);
+		pci_info(child, "skipping L1.2 config\n");
+		return;
+	}
+
 	if (ctl1 == pctl1 && ctl1 == cctl1 &&
 	    ctl2 == pctl2 && ctl2 == cctl2)
 		return;
@@ -703,14 +761,27 @@ static void aspm_calc_l12_info(struct pcie_link_state *link,
 					       PCI_L1SS_CTL1_L1_2_MASK, 0);
 	}
 
+	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, &pctl1);
+	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1, &cctl1);
+	pci_info(child, "L1SS_CTL1 parent %#08x child %#08x (L1.2 disabled)\n",
+		 pctl1, cctl1);
+
 	/* Program T_POWER_ON times in both ports */
 	pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, ctl2);
 	pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2);
 
+	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, &pctl2);
+	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL2, &cctl2);
+	pci_info(child, "L1SS_CTL2 parent %#08x child %#08x (T_POWER_ON updated)\n",
+		 pctl2, cctl2);
+
 	/* Program Common_Mode_Restore_Time in upstream device */
 	pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
 				       PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1);
 
+	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, &pctl1);
+	pci_info(child, "L1SS_CTL1 parent %#08x (CMRT updated)\n", pctl1);
+
 	/* Program LTR_L1.2_THRESHOLD time in both ports */
 	pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
 				       PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
@@ -721,6 +792,11 @@ static void aspm_calc_l12_info(struct pcie_link_state *link,
 				       PCI_L1SS_CTL1_LTR_L12_TH_SCALE,
 				       ctl1);
 
+	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, &pctl1);
+	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1, &cctl1);
+	pci_info(child, "L1SS_CTL1 parent %#08x child %#08x (LTR_L1.2_THRESHOLD updated)\n",
+		 pctl1, cctl1);
+
 	if (pl1_2_enables || cl1_2_enables) {
 		pci_clear_and_set_config_dword(parent,
 					       parent->l1ss + PCI_L1SS_CTL1, 0,
@@ -729,6 +805,11 @@ static void aspm_calc_l12_info(struct pcie_link_state *link,
 					       child->l1ss + PCI_L1SS_CTL1, 0,
 					       cl1_2_enables);
 	}
+
+	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, &pctl1);
+	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1, &cctl1);
+	pci_info(child, "L1SS_CTL1 parent %#08x child %#08x (L1.2 restored)\n",
+		 pctl1, cctl1);
 }
 
 static void aspm_l1ss_init(struct pcie_link_state *link)




[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux