On Wed, 2025-03-26 at 19:36 +0000, Yosry Ahmed wrote: > Following changes will track ASID to vCPU mappings for all ASIDs, not > just SEV ASIDs. Using per-CPU arrays with the maximum possible number of > ASIDs would be too expensive. Maybe add a word or two to explain that currently # of SEV ASIDS is small but # of all ASIDS is relatively large (like 16 bit number or so)? > Use xarrays to generalize tracking the > mappings instead. The logic is also mostly moved outside the SEV code to > allow future changes to reuse it for normal SVM VMs. > > Storing into an xarray is more expensive than reading/writing to an > array, but is only done on vCPU load and should be mostly uncontended. > Also, the size of the xarray should be O(# of VMs), so it is not > expected to be huge. In fact, the xarray will probably use less memory > than the normal array even for SEV on machines that only run a few VMs. > > When a new ASID is allocated, reserve an entry for it on all xarrays on > all CPUs. This allows the memory allocations to happen in a more relaxed > context (allowing reclaim and accounting), and failures to be handled at > VM creation time. However, entries will be allocated even on CPUs that > never run the VM. > > The alternative is relying on on-demand GFP_ATOMIC allocations with > xa_store() on vCPU load. These allocations are more likely to fail and > more difficult to handle since vCPU load cannot fail. Flushing the TLB > if the xa_store() fails is probably sufficient handling, but > preallocations are easier to reason about. > > Signed-off-by: Yosry Ahmed <yosry.ahmed@xxxxxxxxx> > --- > arch/x86/kvm/svm/sev.c | 25 ++++----------------- > arch/x86/kvm/svm/svm.c | 50 +++++++++++++++++++++++++++++++----------- > arch/x86/kvm/svm/svm.h | 7 +++--- > 3 files changed, 44 insertions(+), 38 deletions(-) > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 1742f51d4c194..c11da3259c089 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -211,6 +211,9 @@ static int sev_asid_new(struct kvm_sev_info *sev) > goto e_uncharge; > } > > + if (!svm_register_asid(asid)) > + goto e_uncharge; > + > __set_bit(asid, sev_asid_bitmap); > > mutex_unlock(&sev_bitmap_lock); > @@ -231,18 +234,10 @@ unsigned int sev_get_asid(struct kvm *kvm) > > static void sev_asid_free(struct kvm_sev_info *sev) > { > - struct svm_cpu_data *sd; > - int cpu; > + svm_unregister_asid(sev->asid); > > mutex_lock(&sev_bitmap_lock); > - > __set_bit(sev->asid, sev_reclaim_asid_bitmap); > - > - for_each_possible_cpu(cpu) { > - sd = per_cpu_ptr(&svm_data, cpu); > - sd->sev_vcpus[sev->asid] = NULL; > - } > - > mutex_unlock(&sev_bitmap_lock); > > sev_misc_cg_uncharge(sev); > @@ -3076,18 +3071,6 @@ void sev_hardware_unsetup(void) > misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0); > } > > -int sev_cpu_init(struct svm_cpu_data *sd) > -{ > - if (!sev_enabled) > - return 0; > - > - sd->sev_vcpus = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL); > - if (!sd->sev_vcpus) > - return -ENOMEM; > - > - return 0; > -} > - > /* > * Pages used by hardware to hold guest encrypted state must be flushed before > * returning them to the system. > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index ce67112732e8c..b740114a9d9bc 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -694,7 +694,7 @@ static void svm_cpu_uninit(int cpu) > if (!sd->save_area) > return; > > - kfree(sd->sev_vcpus); > + xa_destroy(&sd->asid_vcpu); > __free_page(__sme_pa_to_page(sd->save_area_pa)); > sd->save_area_pa = 0; > sd->save_area = NULL; > @@ -711,18 +711,11 @@ static int svm_cpu_init(int cpu) > if (!save_area_page) > return ret; > > - ret = sev_cpu_init(sd); > - if (ret) > - goto free_save_area; > + xa_init(&sd->asid_vcpu); > > sd->save_area = page_address(save_area_page); > sd->save_area_pa = __sme_page_pa(save_area_page); > return 0; > - > -free_save_area: > - __free_page(save_area_page); > - return ret; > - > } > > static void set_dr_intercepts(struct vcpu_svm *svm) > @@ -1557,6 +1550,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > unsigned int asid; > struct vcpu_svm *svm = to_svm(vcpu); > struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); > + struct kvm_vcpu *prev; > > if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm)) > shrink_ple_window(vcpu); > @@ -1573,13 +1567,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > if (sev_guest(vcpu->kvm)) { > /* > * Flush the TLB when a different vCPU using the same ASID is > - * run on the same CPU. > + * run on the same CPU. xa_store() should always succeed because > + * the entry is reserved when the ASID is allocated. > */ > asid = sev_get_asid(vcpu->kvm); > - if (sd->sev_vcpus[asid] != vcpu) { > - sd->sev_vcpus[asid] = vcpu; > + prev = xa_store(&sd->asid_vcpu, asid, vcpu, GFP_ATOMIC); > + if (prev != vcpu || WARN_ON_ONCE(xa_err(prev))) Tiny nitpick: I would have prefered to have WARN_ON_ONCE(xa_err(prev) first in the above condition, because in theory we shouldn't use a value before we know its not an error, but in practice this doesn't really matter. > kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > - } > } > } > > @@ -5047,6 +5041,36 @@ static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) > sev_vcpu_deliver_sipi_vector(vcpu, vector); > } > > +void svm_unregister_asid(unsigned int asid) > +{ > + struct svm_cpu_data *sd; > + int cpu; > + > + for_each_possible_cpu(cpu) { > + sd = per_cpu_ptr(&svm_data, cpu); > + xa_erase(&sd->asid_vcpu, asid); > + } > +} > + > +bool svm_register_asid(unsigned int asid) > +{ > + struct svm_cpu_data *sd; > + int cpu; > + > + /* > + * Preallocate entries on all CPUs for the ASID to avoid memory > + * allocations in the vCPU load path. > + */ > + for_each_possible_cpu(cpu) { > + sd = per_cpu_ptr(&svm_data, cpu); > + if (xa_reserve(&sd->asid_vcpu, asid, GFP_KERNEL_ACCOUNT)) { > + svm_unregister_asid(asid); > + return false; > + } > + } > + return true; > +} > + > static void svm_vm_destroy(struct kvm *kvm) > { > avic_vm_destroy(kvm); > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index 3ab2a424992c1..4929b96d3d700 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -340,8 +340,7 @@ struct svm_cpu_data { > > struct vmcb *current_vmcb; > > - /* index = sev_asid, value = vcpu pointer */ Maybe keep the above comment? > - struct kvm_vcpu **sev_vcpus; > + struct xarray asid_vcpu; > }; > > DECLARE_PER_CPU(struct svm_cpu_data, svm_data); > @@ -655,6 +654,8 @@ void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, > void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool disable); > void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, > int trig_mode, int vec); > +bool svm_register_asid(unsigned int asid); > +void svm_unregister_asid(unsigned int asid); > > /* nested.c */ > > @@ -793,7 +794,6 @@ void sev_vm_destroy(struct kvm *kvm); > void __init sev_set_cpu_caps(void); > void __init sev_hardware_setup(void); > void sev_hardware_unsetup(void); > -int sev_cpu_init(struct svm_cpu_data *sd); > int sev_dev_get_attr(u32 group, u64 attr, u64 *val); > extern unsigned int max_sev_asid; > void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code); > @@ -817,7 +817,6 @@ static inline void sev_vm_destroy(struct kvm *kvm) {} > static inline void __init sev_set_cpu_caps(void) {} > static inline void __init sev_hardware_setup(void) {} > static inline void sev_hardware_unsetup(void) {} > -static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; } > static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; } > #define max_sev_asid 0 > static inline void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) {} Overall looks good to me. Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> Best regards, Maxim Levitsky