Hi Thomas, On 3/21/2025 9:05 PM, Neeraj Upadhyay wrote: ... > >>> diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c >>> index 72fa4bb78f0a..e0c9505e05f8 100644 >>> --- a/arch/x86/kernel/apic/vector.c >>> +++ b/arch/x86/kernel/apic/vector.c >>> @@ -174,6 +174,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, >>> apicd->prev_cpu = apicd->cpu; >>> WARN_ON_ONCE(apicd->cpu == newcpu); >>> } else { >>> + if (apic->update_vector) >>> + apic->update_vector(apicd->cpu, apicd->vector, false); >>> irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, >>> managed); >>> } >>> @@ -183,6 +185,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, >>> apicd->cpu = newcpu; >>> BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); >>> per_cpu(vector_irq, newcpu)[newvec] = desc; >>> + if (apic->update_vector) >>> + apic->update_vector(apicd->cpu, apicd->vector, true); >> >> A trivial >> >> static inline void apic_update_vector(....) >> { >> if (apic->update_vector) >> .... >> } >> >> would be too easy to read and add not enough line count, right? >> > > Yes. > As apic_update_vector() is already defined, I used apic_drv_update_vector(). >>> static void vector_assign_managed_shutdown(struct irq_data *irqd) >>> @@ -528,11 +532,15 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, >>> if (irqd_is_activated(irqd)) { >>> trace_vector_setup(virq, true, 0); >>> apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); >>> + if (apic->update_vector) >>> + apic->update_vector(apicd->cpu, apicd->vector, true); >>> } else { >>> /* Release the vector */ >>> apicd->can_reserve = true; >>> irqd_set_can_reserve(irqd); >>> clear_irq_vector(irqd); >>> + if (apic->update_vector) >>> + apic->update_vector(apicd->cpu, apicd->vector, false); >>> realloc = true; >> >> This is as incomplete as it gets. None of the other code paths which >> invoke clear_irq_vector() nor those which invoke free_moved_vector() are >> mopping up the leftovers in the backing page. >> >> And no, you don't sprinkle this nonsense all over the call sites. There >> is only a very limited number of functions which are involed in setting >> up and tearing down a vector. Doing this at the call sites is a >> guarantee for missing out as you demonstrated. >> > > This is the part where I was looking for guidance. As ALLOWED_IRR (which > tells if Hypervisor is allowed to inject a vector to guest vCPU) is per > CPU, intent was to call it at places where vector's CPU affinity changes. > I surely have missed cleaning up ALLOWED_IRR on previously affined CPU. > I will follow your suggestion to do it during setup/teardown of vector (need > to figure out those functions) and configure it for all CPUs in those > functions. > I updated it like below. Can you share your opinion on this, if this looks fine or I got it wrong? diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 736f62812f5c..fef6faffeed1 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -139,6 +139,46 @@ static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector, apicd->hw_irq_cfg.dest_apicid); } +static inline void apic_drv_update_vector(unsigned int cpu, unsigned int vector, bool set) +{ + if (apic->update_vector) + apic->update_vector(cpu, vector, set); +} + +static int irq_alloc_vector(const struct cpumask *dest, bool resvd, unsigned int *cpu) +{ + int vector; + + vector = irq_matrix_alloc(vector_matrix, dest, resvd, cpu); + + if (vector < 0) + return vector; + + apic_drv_update_vector(*cpu, vector, true); + + return vector; +} + +static int irq_alloc_managed_vector(unsigned int *cpu) +{ + int vector; + + vector = irq_matrix_alloc_managed(vector_matrix, vector_searchmask, cpu); + + if (vector < 0) + return vector; + + apic_drv_update_vector(*cpu, vector, true); + + return vector; +} + +static void irq_free_vector(unsigned int cpu, unsigned int vector, bool managed) +{ + apic_drv_update_vector(cpu, vector, false); + irq_matrix_free(vector_matrix, cpu, vector, managed); +} + static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, unsigned int newcpu) { @@ -174,8 +214,7 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, apicd->prev_cpu = apicd->cpu; WARN_ON_ONCE(apicd->cpu == newcpu); } else { - irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, - managed); + irq_free_vector(apicd->cpu, apicd->vector, managed); } setnew: @@ -256,7 +295,7 @@ assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest) if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist)) return -EBUSY; - vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); + vector = irq_alloc_vector(dest, resvd, &cpu); trace_vector_alloc(irqd->irq, vector, resvd, vector); if (vector < 0) return vector; @@ -332,8 +371,7 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest) /* set_affinity might call here for nothing */ if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask)) return 0; - vector = irq_matrix_alloc_managed(vector_matrix, vector_searchmask, - &cpu); + vector = irq_alloc_managed_vector(&cpu); trace_vector_alloc_managed(irqd->irq, vector, vector); if (vector < 0) return vector; @@ -357,7 +395,7 @@ static void clear_irq_vector(struct irq_data *irqd) apicd->prev_cpu); per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN; - irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); + irq_free_vector(apicd->cpu, vector, managed); apicd->vector = 0; /* Clean up move in progress */ @@ -366,7 +404,7 @@ static void clear_irq_vector(struct irq_data *irqd) return; per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN; - irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); + irq_free_vector(apicd->prev_cpu, vector, managed); apicd->prev_vector = 0; apicd->move_in_progress = 0; hlist_del_init(&apicd->clist); @@ -528,6 +566,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, if (irqd_is_activated(irqd)) { trace_vector_setup(virq, true, 0); apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); + apic_drv_update_vector(apicd->cpu, apicd->vector, true); } else { /* Release the vector */ apicd->can_reserve = true; @@ -949,7 +988,7 @@ static void free_moved_vector(struct apic_chip_data *apicd) * affinity mask comes online. */ trace_vector_free_moved(apicd->irq, cpu, vector, managed); - irq_matrix_free(vector_matrix, cpu, vector, managed); + irq_free_vector(cpu, vector, managed); per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; hlist_del_init(&apicd->clist); apicd->prev_vector = 0; ... > >> Als I have no idea what SAVIC_ALLOWED_IRR_OFFSET means. Whether it's >> something from the datashit or a made up thing does not matter. It's >> patently non-informative. >> > > Ok, I had tried to give some details in the cover letter. These APIC > regs are at offset APIC_IRR(n) + 4 and are used by guest to configure the > interrupt vectors which can be injected by Hypervisor to Guest. > > >> Again: >> >> struct apic_page { >> union { >> u32 regs[NR_APIC_REGS]; >> u8 bytes[PAGE_SIZE]; >> }; >> }; >> >> struct apic_page *ap = this_cpu_ptr(apic_page); >> unsigned long *sirr; >> >> /* >> * apic_page.regs[SAVIC_ALLOWED_IRR_OFFSET...] is an array of >> * consecutive 32-bit registers, which represents a vector bitmap. >> */ >> sirr = (unsigned long *) &ap->regs[SAVIC_ALLOWED_IRR_OFFSET]; >> if (set) >> set_bit(sirr, vector); >> else >> clear_bit(sirr, vector); >> >> See how code suddenly becomes self explaining, obvious and >> comprehensible? >> > > Yes, thank you! > After checking more on this, set_bit(vector, ) cannot be used directly here, as 32-bit registers are not consecutive. Each register is aligned at 16 byte boundary. So, I changed it to below: --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -19,6 +19,26 @@ /* APIC_EILVTn(3) is the last defined APIC register. */ #define NR_APIC_REGS (APIC_EILVTn(4) >> 2) +/* + * APIC registers such as APIC_IRR, APIC_ISR, ... are mapped as + * 32-bit registers and are aligned at 16-byte boundary. For + * example, APIC_IRR registers mapping looks like below: + * + * #Offset #bits Description + * 0x200 31:0 vectors 0-31 + * 0x210 31:0 vectors 32-63 + * ... + * 0x270 31:0 vectors 224-255 + * + * VEC_BIT_POS gives the bit position of a vector in the APIC + * reg containing its state. + */ +#define VEC_BIT_POS(v) ((v) & (32 - 1)) +/* + * VEC_REG_OFF gives the relative (from the start offset of that APIC + * register) offset of the APIC register containing state for a vector. + */ +#define VEC_REG_OFF(v) (((v) >> 5) << 4) struct apic_page { union { @@ -185,6 +205,35 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } +static void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) +{ + struct apic_page *ap = per_cpu_ptr(apic_backing_page, cpu); + unsigned long *sirr; + int vec_bit; + int reg_off; + + /* + * ALLOWED_IRR registers are mapped in the apic_page at below byte + * offsets. Each register is a 32-bit register aligned at 16-byte + * boundary. + * + * #Offset #bits Description + * SAVIC_ALLOWED_IRR_OFFSET 31:0 Guest allowed vectors 0-31 + * "" + 0x10 31:0 Guest allowed vectors 32-63 + * ... + * "" + 0x70 31:0 Guest allowed vectors 224-255 + * + */ + reg_off = SAVIC_ALLOWED_IRR_OFFSET + VEC_REG_OFF(vector); + sirr = (unsigned long *) &ap->regs[reg_off >> 2]; + vec_bit = VEC_BIT_POS(vector); + + if (set) + set_bit(vec_bit, sirr); + else + clear_bit(vec_bit, sirr); +} + static void init_backing_page(void *backing_page) { u32 apic_id; @@ -272,6 +321,8 @@ static struct apic apic_x2apic_savic __ro_after_init = { .eoi = native_apic_msr_eoi, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, + + .update_vector = x2apic_savic_update_vector, }; - Neeraj > - Neeraj > >> Thanks, >> >> tglx >