Re: [RFC v2 05/17] x86/apic: Add update_vector callback for Secure AVIC

Neeraj Upadhyay <Neeraj.Upadhyay@xxxxxxx> · Tue, 25 Mar 2025 17:40:45 +0530

Hi Thomas,

On 3/21/2025 9:05 PM, Neeraj Upadhyay wrote:
...

> 
>>> diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
>>> index 72fa4bb78f0a..e0c9505e05f8 100644
>>> --- a/arch/x86/kernel/apic/vector.c
>>> +++ b/arch/x86/kernel/apic/vector.c
>>> @@ -174,6 +174,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
>>>  		apicd->prev_cpu = apicd->cpu;
>>>  		WARN_ON_ONCE(apicd->cpu == newcpu);
>>>  	} else {
>>> +		if (apic->update_vector)
>>> +			apic->update_vector(apicd->cpu, apicd->vector, false);
>>>  		irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
>>>  				managed);
>>>  	}
>>> @@ -183,6 +185,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
>>>  	apicd->cpu = newcpu;
>>>  	BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
>>>  	per_cpu(vector_irq, newcpu)[newvec] = desc;
>>> +	if (apic->update_vector)
>>> +		apic->update_vector(apicd->cpu, apicd->vector, true);
>>
>> A trivial
>>
>> static inline void apic_update_vector(....)
>> {
>>         if (apic->update_vector)
>>            ....
>> }
>>
>> would be too easy to read and add not enough line count, right?
>>
> 
> Yes.
>

As apic_update_vector() is already defined, I used apic_drv_update_vector().
 
>>>  static void vector_assign_managed_shutdown(struct irq_data *irqd)
>>> @@ -528,11 +532,15 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
>>>  	if (irqd_is_activated(irqd)) {
>>>  		trace_vector_setup(virq, true, 0);
>>>  		apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
>>> +		if (apic->update_vector)
>>> +			apic->update_vector(apicd->cpu, apicd->vector, true);
>>>  	} else {
>>>  		/* Release the vector */
>>>  		apicd->can_reserve = true;
>>>  		irqd_set_can_reserve(irqd);
>>>  		clear_irq_vector(irqd);
>>> +		if (apic->update_vector)
>>> +			apic->update_vector(apicd->cpu, apicd->vector, false);
>>>  		realloc = true;
>>
>> This is as incomplete as it gets. None of the other code paths which
>> invoke clear_irq_vector() nor those which invoke free_moved_vector() are
>> mopping up the leftovers in the backing page.
>>
>> And no, you don't sprinkle this nonsense all over the call sites. There
>> is only a very limited number of functions which are involed in setting
>> up and tearing down a vector. Doing this at the call sites is a
>> guarantee for missing out as you demonstrated.
>>
> 
> This is the part where I was looking for guidance. As ALLOWED_IRR (which
> tells if Hypervisor is allowed to inject a vector to guest vCPU) is per
> CPU, intent was to call it at places where vector's CPU affinity changes.
> I surely have missed cleaning up ALLOWED_IRR on previously affined CPU.
> I will follow your suggestion to do it during setup/teardown of vector (need
> to figure out those functions) and configure it for all CPUs in those
> functions.
> 

I updated it like below. Can you share your opinion on this, if this
looks fine or I got it wrong?

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 736f62812f5c..fef6faffeed1 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -139,6 +139,46 @@ static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector,
                            apicd->hw_irq_cfg.dest_apicid);
 }

+static inline void apic_drv_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+       if (apic->update_vector)
+               apic->update_vector(cpu, vector, set);
+}
+
+static int irq_alloc_vector(const struct cpumask *dest, bool resvd, unsigned int *cpu)
+{
+       int vector;
+
+       vector = irq_matrix_alloc(vector_matrix, dest, resvd, cpu);
+
+       if (vector < 0)
+               return vector;
+
+       apic_drv_update_vector(*cpu, vector, true);
+
+       return vector;
+}
+
+static int irq_alloc_managed_vector(unsigned int *cpu)
+{
+       int vector;
+
+       vector = irq_matrix_alloc_managed(vector_matrix, vector_searchmask, cpu);
+
+       if (vector < 0)
+               return vector;
+
+       apic_drv_update_vector(*cpu, vector, true);
+
+       return vector;
+}
+
+static void irq_free_vector(unsigned int cpu, unsigned int vector, bool managed)
+{
+       apic_drv_update_vector(cpu, vector, false);
+       irq_matrix_free(vector_matrix, cpu, vector, managed);
+}
+
 static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
                               unsigned int newcpu)
 {
@@ -174,8 +214,7 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
                apicd->prev_cpu = apicd->cpu;
                WARN_ON_ONCE(apicd->cpu == newcpu);
        } else {
-               irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
-                               managed);
+               irq_free_vector(apicd->cpu, apicd->vector, managed);
        }

 setnew:
@@ -256,7 +295,7 @@ assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest)
        if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist))
                return -EBUSY;

-       vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
+       vector = irq_alloc_vector(dest, resvd, &cpu);
        trace_vector_alloc(irqd->irq, vector, resvd, vector);
        if (vector < 0)
                return vector;
@@ -332,8 +371,7 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
        /* set_affinity might call here for nothing */
        if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask))
                return 0;
-       vector = irq_matrix_alloc_managed(vector_matrix, vector_searchmask,
-                                         &cpu);
+       vector = irq_alloc_managed_vector(&cpu);
        trace_vector_alloc_managed(irqd->irq, vector, vector);
        if (vector < 0)
                return vector;
@@ -357,7 +395,7 @@ static void clear_irq_vector(struct irq_data *irqd)
                           apicd->prev_cpu);

        per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
-       irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
+       irq_free_vector(apicd->cpu, vector, managed);
        apicd->vector = 0;

        /* Clean up move in progress */
@@ -366,7 +404,7 @@ static void clear_irq_vector(struct irq_data *irqd)
                return;

        per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
-       irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
+       irq_free_vector(apicd->prev_cpu, vector, managed);
        apicd->prev_vector = 0;
        apicd->move_in_progress = 0;
        hlist_del_init(&apicd->clist);
@@ -528,6 +566,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
        if (irqd_is_activated(irqd)) {
                trace_vector_setup(virq, true, 0);
                apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
+               apic_drv_update_vector(apicd->cpu, apicd->vector, true);
        } else {
                /* Release the vector */
                apicd->can_reserve = true;
@@ -949,7 +988,7 @@ static void free_moved_vector(struct apic_chip_data *apicd)
         *    affinity mask comes online.
         */
        trace_vector_free_moved(apicd->irq, cpu, vector, managed);
-       irq_matrix_free(vector_matrix, cpu, vector, managed);
+       irq_free_vector(cpu, vector, managed);
        per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
        hlist_del_init(&apicd->clist);
        apicd->prev_vector = 0;


...

>  
>> Als I have no idea what SAVIC_ALLOWED_IRR_OFFSET means. Whether it's
>> something from the datashit or a made up thing does not matter. It's
>> patently non-informative.
>>
> 
> Ok, I had tried to give some details in the cover letter. These APIC
> regs are at offset APIC_IRR(n) + 4 and are used by guest to configure the
> interrupt vectors which can be injected by Hypervisor to Guest.
> 
> 
>> Again:
>>
>> struct apic_page {
>> 	union {
>> 		u32	regs[NR_APIC_REGS];
>> 		u8	bytes[PAGE_SIZE];
>> 	};
>> };                
>>
>>        struct apic_page *ap = this_cpu_ptr(apic_page);
>>        unsigned long *sirr;
>>
>>        /*
>>         * apic_page.regs[SAVIC_ALLOWED_IRR_OFFSET...] is an array of
>>         * consecutive 32-bit registers, which represents a vector bitmap.
>>         */
>>         sirr = (unsigned long *) &ap->regs[SAVIC_ALLOWED_IRR_OFFSET];
>>         if (set)
>>         	set_bit(sirr, vector);
>>         else
>>         	clear_bit(sirr, vector);
>>
>> See how code suddenly becomes self explaining, obvious and
>> comprehensible?
>>
> 
> Yes, thank you!
> 

After checking more on this, set_bit(vector, ) cannot be used directly  here, as
32-bit registers are not consecutive. Each register is aligned at 16 byte
boundary. So, I changed it to below:

--- a/arch/x86/kernel/apic/x2apic_savic.c
+++ b/arch/x86/kernel/apic/x2apic_savic.c
@@ -19,6 +19,26 @@

 /* APIC_EILVTn(3) is the last defined APIC register. */
 #define NR_APIC_REGS   (APIC_EILVTn(4) >> 2)
+/*
+ * APIC registers such as APIC_IRR, APIC_ISR, ... are mapped as
+ * 32-bit registers and are aligned at 16-byte boundary. For
+ * example, APIC_IRR registers mapping looks like below:
+ *
+ * #Offset    #bits         Description
+ *  0x200      31:0         vectors 0-31
+ *  0x210      31:0         vectors 32-63
+ *  ...
+ *  0x270      31:0         vectors 224-255
+ *
+ * VEC_BIT_POS gives the bit position of a vector in the APIC
+ * reg containing its state.
+ */
+#define VEC_BIT_POS(v) ((v) & (32 - 1))
+/*
+ * VEC_REG_OFF gives the relative (from the start offset of that APIC
+ * register) offset of the APIC register containing state for a vector.
+ */
+#define VEC_REG_OFF(v) (((v) >> 5) << 4)

 struct apic_page {
        union {
@@ -185,6 +205,35 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in
        __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
 }

+static void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+       struct apic_page *ap = per_cpu_ptr(apic_backing_page, cpu);
+       unsigned long *sirr;
+       int vec_bit;
+       int reg_off;
+
+       /*
+        * ALLOWED_IRR registers are mapped in the apic_page at below byte
+        * offsets. Each register is a 32-bit register aligned at 16-byte
+        * boundary.
+        *
+        * #Offset                    #bits     Description
+        * SAVIC_ALLOWED_IRR_OFFSET   31:0      Guest allowed vectors 0-31
+        * "" + 0x10                  31:0      Guest allowed vectors 32-63
+        * ...
+        * "" + 0x70                  31:0      Guest allowed vectors 224-255
+        *
+        */
+       reg_off = SAVIC_ALLOWED_IRR_OFFSET + VEC_REG_OFF(vector);
+       sirr = (unsigned long *) &ap->regs[reg_off >> 2];
+       vec_bit = VEC_BIT_POS(vector);
+
+       if (set)
+               set_bit(vec_bit, sirr);
+       else
+               clear_bit(vec_bit, sirr);
+}
+
 static void init_backing_page(void *backing_page)
 {
        u32 apic_id;
@@ -272,6 +321,8 @@ static struct apic apic_x2apic_savic __ro_after_init = {
        .eoi                            = native_apic_msr_eoi,
        .icr_read                       = native_x2apic_icr_read,
        .icr_write                      = native_x2apic_icr_write,
+
+       .update_vector                  = x2apic_savic_update_vector,
 };


- Neeraj

> - Neeraj
> 
>> Thanks,
>>
>>         tglx
>