On Mon, 2025-06-23 at 14:27 +0100, David Woodhouse wrote: > From: David Woodhouse <dwmw@xxxxxxxxxxxx> > > We observed systems going dark on kexec, due to corruption of the new > kernel's text (and sometimes the initrd). This was eventually determined > to be caused by the vLPI pending tables used by the GIC in the previous > kernel, which were not being quiesced properly. FWIW this is a previous hack we attempted which *didn't* work. (For illustration only; ignore the syscore .kexec hook. We addressed that differently in the end with https://lore.kernel.org/kexec/20231213064004.2419447-1-jgowans@xxxxxxxxxx/ ) At the point where the its_kexec() hook in this patch has completed, we poisoned the (ex-) vLPI pending tables and then scanned for corruption in them. We saw the same characteristic pattern of corruption which had been breaking the next kernel after kexec: 32 bytes copied from offset 0 to offset 32 in a page, followed by bytes 0, 1, 32, 33, 34, 35 being zeroed. Adding a few milliseconds of sleep before the poisoning was enough to make the problem go away. As is the patch which calls unmap_all_vpes() ∀ kvm. Of course, if the GIC were behind an IOMMU as all DMA-capable devices should be, this might never have happened... diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index f407cce9ecaa..a4fde376d214 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -19,6 +19,12 @@ struct gic_quirk { u32 mask; }; +struct redist_region { + void __iomem *redist_base; + phys_addr_t phys_base; + bool single_redist; +}; + int gic_configure_irq(unsigned int irq, unsigned int type, void __iomem *base, void (*sync_access)(void)); void gic_dist_config(void __iomem *base, int gic_irqs, @@ -33,4 +39,6 @@ void gic_enable_of_quirks(const struct device_node *np, #define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1) #define RDIST_FLAGS_FORCE_NON_SHAREABLE (1 << 2) +int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *)); + #endif /* _IRQ_GIC_COMMON_H */ diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 638f7eb033ad..d106b6ccca8b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4902,6 +4902,51 @@ static void its_enable_quirks(struct its_node *its) its_quirks, its); } +static int disable_vpes(struct redist_region *region, void __iomem *ptr) +{ + u64 typer; + u64 val; + + typer = gic_read_typer(ptr + GICR_TYPER); + + if (!((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID))) + return 1; + + /* Deactivate any present vPE */ + its_clear_vpend_valid(ptr + SZ_128K, 0, GICR_VPENDBASER_PendingLast); + + /* Mark the VPE table as invalid */ + val = gicr_read_vpropbaser(ptr + SZ_128K + GICR_VPROPBASER); + val &= ~GICR_VPROPBASER_4_1_VALID; + gicr_write_vpropbaser(val, ptr + SZ_128K + GICR_VPROPBASER); + + /* Disable next redistributor */ + return 1; +} + +static int its_kexec(void) +{ + int err = 0, err_return = 0; + struct its_node *its; + + raw_spin_lock(&its_lock); + + list_for_each_entry(its, &its_nodes, entry) { + err = its_force_quiescent(its->base); + if (err) { + pr_err("ITS@%pa: failed to quiesce: %d\n", + &its->phys_base, err); + err_return = -EBUSY; + } + } + + gic_iterate_rdists(disable_vpes); + + raw_spin_unlock(&its_lock); + + return err_return; +} + static int its_save_disable(void) { struct its_node *its; @@ -5001,6 +5046,7 @@ static void its_restore_enable(void) static struct syscore_ops its_syscore_ops = { .suspend = its_save_disable, .resume = its_restore_enable, + .kexec = its_kexec, }; static void __init __iomem *its_map_one(struct resource *res, int *err) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 50143de1791d..2014c5a75a6e 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -46,12 +46,6 @@ #define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) -struct redist_region { - void __iomem *redist_base; - phys_addr_t phys_base; - bool single_redist; -}; - struct gic_chip_data { struct fwnode_handle *fwnode; phys_addr_t dist_phys_base; @@ -968,7 +962,7 @@ static void __init gic_dist_init(void) gic_write_irouter(affinity, base + GICD_IROUTERnE + i * 8); } -static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *)) +int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *)) { int ret = -ENODEV; int i;
Attachment:
smime.p7s
Description: S/MIME cryptographic signature