Re: [PATCH 2/3] KVM: s390: Always allocate esca_block

Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx> · Thu, 15 May 2025 13:24:48 +0200

On Wed, 14 May 2025 18:34:50 +0200
Christoph Schlameuss <schlameuss@xxxxxxxxxxxxx> wrote:

> Instead of allocating a BSCA and upgrading it for PV or when adding the
> 65th cpu we can always use the ESCA.
> 
> The only downside of the change is that we will always allocate 4 pages
> for a 248 cpu ESCA instead of a single page for the BSCA per VM.
> In return we can delete a bunch of checks and special handling depending
> on the SCA type as well as the whole BSCA to ESCA conversion.
> 
> As a fallback we can still run without SCA entries when the SIGP
> interpretation facility is not available.

s/is/or BSCA are/

> 
> Signed-off-by: Christoph Schlameuss <schlameuss@xxxxxxxxxxxxx>
> ---
>  arch/s390/include/asm/kvm_host.h |   1 -
>  arch/s390/kvm/interrupt.c        |  74 ++++++------------
>  arch/s390/kvm/kvm-s390.c         | 159 ++++++---------------------------------
>  arch/s390/kvm/kvm-s390.h         |   4 +-
>  4 files changed, 47 insertions(+), 191 deletions(-)
> 
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index f51bac835260f562eaf4bbfd373a24bfdbc43834..d03e354a63d9c931522c1a1607eba8685c24527f 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -631,7 +631,6 @@ struct kvm_s390_pv {
>  
>  struct kvm_arch{
>  	void *sca;
> -	int use_esca;
>  	rwlock_t sca_lock;
>  	debug_info_t *dbf;
>  	struct kvm_s390_float_interrupt float_int;
> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> index 60c360c18690f6b94e8483dab2c25f016451204b..f0aaa234c6c36d70dad477c53840ead1c99d33f5 100644
> --- a/arch/s390/kvm/interrupt.c
> +++ b/arch/s390/kvm/interrupt.c
> @@ -51,21 +51,12 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
>  
>  	BUG_ON(!kvm_s390_use_sca_entries());
>  	read_lock(&vcpu->kvm->arch.sca_lock);
> -	if (vcpu->kvm->arch.use_esca) {
> -		struct esca_block *sca = vcpu->kvm->arch.sca;
> -		union esca_sigp_ctrl sigp_ctrl =
> -			sca->cpu[vcpu->vcpu_id].sigp_ctrl;
> +	struct esca_block *sca = vcpu->kvm->arch.sca;
> +	union esca_sigp_ctrl sigp_ctrl =
> +		sca->cpu[vcpu->vcpu_id].sigp_ctrl;

this ^ fits easily in one line

>  
> -		c = sigp_ctrl.c;
> -		scn = sigp_ctrl.scn;
> -	} else {
> -		struct bsca_block *sca = vcpu->kvm->arch.sca;
> -		union bsca_sigp_ctrl sigp_ctrl =
> -			sca->cpu[vcpu->vcpu_id].sigp_ctrl;
> -
> -		c = sigp_ctrl.c;
> -		scn = sigp_ctrl.scn;
> -	}
> +	c = sigp_ctrl.c;
> +	scn = sigp_ctrl.scn;
>  	read_unlock(&vcpu->kvm->arch.sca_lock);
>  
>  	if (src_id)
> @@ -80,33 +71,18 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
>  
>  	BUG_ON(!kvm_s390_use_sca_entries());
>  	read_lock(&vcpu->kvm->arch.sca_lock);
> -	if (vcpu->kvm->arch.use_esca) {
> -		struct esca_block *sca = vcpu->kvm->arch.sca;
> -		union esca_sigp_ctrl *sigp_ctrl =
> -			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> -		union esca_sigp_ctrl new_val = {0}, old_val;
> -
> -		old_val = READ_ONCE(*sigp_ctrl);
> -		new_val.scn = src_id;
> -		new_val.c = 1;
> -		old_val.c = 0;
> -
> -		expect = old_val.value;
> -		rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
> -	} else {
> -		struct bsca_block *sca = vcpu->kvm->arch.sca;
> -		union bsca_sigp_ctrl *sigp_ctrl =
> -			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> -		union bsca_sigp_ctrl new_val = {0}, old_val;
> -
> -		old_val = READ_ONCE(*sigp_ctrl);
> -		new_val.scn = src_id;
> -		new_val.c = 1;
> -		old_val.c = 0;
> -
> -		expect = old_val.value;
> -		rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
> -	}
> +	struct esca_block *sca = vcpu->kvm->arch.sca;
> +	union esca_sigp_ctrl *sigp_ctrl =
> +		&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);

this also fits in one line ^ 

> +	union esca_sigp_ctrl new_val = {0}, old_val;
> +
> +	old_val = READ_ONCE(*sigp_ctrl);
> +	new_val.scn = src_id;
> +	new_val.c = 1;
> +	old_val.c = 0;
> +
> +	expect = old_val.value;
> +	rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
>  	read_unlock(&vcpu->kvm->arch.sca_lock);
>  
>  	if (rc != expect) {
> @@ -123,19 +99,11 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
>  		return;
>  	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
>  	read_lock(&vcpu->kvm->arch.sca_lock);
> -	if (vcpu->kvm->arch.use_esca) {
> -		struct esca_block *sca = vcpu->kvm->arch.sca;
> -		union esca_sigp_ctrl *sigp_ctrl =
> -			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> +	struct esca_block *sca = vcpu->kvm->arch.sca;
> +	union esca_sigp_ctrl *sigp_ctrl =
> +		&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);

and this ^

>  
> -		WRITE_ONCE(sigp_ctrl->value, 0);
> -	} else {
> -		struct bsca_block *sca = vcpu->kvm->arch.sca;
> -		union bsca_sigp_ctrl *sigp_ctrl =
> -			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> -
> -		WRITE_ONCE(sigp_ctrl->value, 0);
> -	}
> +	WRITE_ONCE(sigp_ctrl->value, 0);
>  	read_unlock(&vcpu->kvm->arch.sca_lock);
>  }
>  
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index b65e4cbe67cf70a7d614607ebdd679060e7d31f4..d610447dbf2c1caa084bd98eabf19c4ca8f1e972 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -271,7 +271,6 @@ debug_info_t *kvm_s390_dbf_uv;
>  /* forward declarations */
>  static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
>  			      unsigned long end);
> -static int sca_switch_to_extended(struct kvm *kvm);
>  
>  static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
>  {
> @@ -631,11 +630,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_NR_VCPUS:
>  	case KVM_CAP_MAX_VCPUS:
>  	case KVM_CAP_MAX_VCPU_ID:
> -		r = KVM_S390_BSCA_CPU_SLOTS;
> +		r = KVM_S390_ESCA_CPU_SLOTS;
>  		if (!kvm_s390_use_sca_entries())
>  			r = KVM_MAX_VCPUS;
> -		else if (sclp.has_esca && sclp.has_64bscao)
> -			r = KVM_S390_ESCA_CPU_SLOTS;
>  		if (ext == KVM_CAP_NR_VCPUS)
>  			r = min_t(unsigned int, num_online_cpus(), r);
>  		else if (ext == KVM_CAP_MAX_VCPU_ID)
> @@ -1932,13 +1929,11 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
>   * Updates the Multiprocessor Topology-Change-Report bit to signal
>   * the guest with a topology change.
>   * This is only relevant if the topology facility is present.
> - *
> - * The SCA version, bsca or esca, doesn't matter as offset is the same.
>   */
>  static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
>  {
>  	union sca_utility new, old;
> -	struct bsca_block *sca;
> +	struct esca_block *sca;
>  
>  	read_lock(&kvm->arch.sca_lock);
>  	sca = kvm->arch.sca;
> @@ -1969,7 +1964,7 @@ static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
>  		return -ENXIO;
>  
>  	read_lock(&kvm->arch.sca_lock);
> -	topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
> +	topo = ((struct esca_block *)kvm->arch.sca)->utility.mtcr;
>  	read_unlock(&kvm->arch.sca_lock);
>  
>  	return put_user(topo, (u8 __user *)attr->addr);
> @@ -2668,14 +2663,6 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>  		if (kvm_s390_pv_is_protected(kvm))
>  			break;
>  
> -		/*
> -		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
> -		 *  esca, we need no cleanup in the error cases below
> -		 */
> -		r = sca_switch_to_extended(kvm);
> -		if (r)
> -			break;
> -
>  		r = s390_disable_cow_sharing();
>  		if (r)
>  			break;
> @@ -3316,10 +3303,7 @@ static void kvm_s390_crypto_init(struct kvm *kvm)
>  
>  static void sca_dispose(struct kvm *kvm)
>  {
> -	if (kvm->arch.use_esca)
> -		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
> -	else
> -		free_page((unsigned long)(kvm->arch.sca));
> +	free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
>  	kvm->arch.sca = NULL;
>  }
>  
> @@ -3333,10 +3317,9 @@ void kvm_arch_free_vm(struct kvm *kvm)
>  
>  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  {
> -	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
> -	int i, rc;
> +	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
>  	char debug_name[16];
> -	static unsigned long sca_offset;
> +	int i, rc;
>  
>  	rc = -EINVAL;
>  #ifdef CONFIG_KVM_S390_UCONTROL
> @@ -3358,17 +3341,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  	if (!sclp.has_64bscao)
>  		alloc_flags |= GFP_DMA;
>  	rwlock_init(&kvm->arch.sca_lock);
> -	/* start with basic SCA */
> -	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
> -	if (!kvm->arch.sca)
> -		goto out_err;
>  	mutex_lock(&kvm_lock);
> -	sca_offset += 16;
> -	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
> -		sca_offset = 0;
> -	kvm->arch.sca = (struct bsca_block *)
> -			((char *) kvm->arch.sca + sca_offset);
> +
> +	kvm->arch.sca = alloc_pages_exact(sizeof(*kvm->arch.sca), alloc_flags);
>  	mutex_unlock(&kvm_lock);
> +	if (!kvm->arch.sca)
> +		goto out_err;
>  
>  	sprintf(debug_name, "kvm-%u", current->pid);
>  
> @@ -3550,17 +3528,10 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu)
>  	if (!kvm_s390_use_sca_entries())
>  		return;
>  	read_lock(&vcpu->kvm->arch.sca_lock);
> -	if (vcpu->kvm->arch.use_esca) {
> -		struct esca_block *sca = vcpu->kvm->arch.sca;
> -
> -		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
> -		sca->cpu[vcpu->vcpu_id].sda = 0;
> -	} else {
> -		struct bsca_block *sca = vcpu->kvm->arch.sca;
> +	struct esca_block *sca = vcpu->kvm->arch.sca;
>  
> -		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
> -		sca->cpu[vcpu->vcpu_id].sda = 0;
> -	}
> +	clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);

didn't checkpatch complain about the unnecessary space ^ in the cast?

> +	sca->cpu[vcpu->vcpu_id].sda = 0;
>  	read_unlock(&vcpu->kvm->arch.sca_lock);
>  }
>  
> @@ -3575,105 +3546,23 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu)
>  		return;
>  	}
>  	read_lock(&vcpu->kvm->arch.sca_lock);
> -	if (vcpu->kvm->arch.use_esca) {
> -		struct esca_block *sca = vcpu->kvm->arch.sca;
> -		phys_addr_t sca_phys = virt_to_phys(sca);
> -
> -		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
> -		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
> -		vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
> -		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
> -		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
> -	} else {
> -		struct bsca_block *sca = vcpu->kvm->arch.sca;
> -		phys_addr_t sca_phys = virt_to_phys(sca);
> -
> -		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
> -		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
> -		vcpu->arch.sie_block->scaol = sca_phys;
> -		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
> -	}
> +	struct esca_block *sca = vcpu->kvm->arch.sca;
> +	phys_addr_t sca_phys = virt_to_phys(sca);
> +
> +	sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
> +	vcpu->arch.sie_block->scaoh = sca_phys >> 32;
> +	vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
> +	vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
> +	set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);

same here                                          ^^^

>  	read_unlock(&vcpu->kvm->arch.sca_lock);
>  }
>  
> -/* Basic SCA to Extended SCA data copy routines */
> -static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
> -{
> -	d->sda = s->sda;
> -	d->sigp_ctrl.c = s->sigp_ctrl.c;
> -	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
> -}
> -
> -static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
> -{
> -	int i;
> -
> -	d->ipte_control = s->ipte_control;
> -	d->mcn[0] = s->mcn;
> -	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
> -		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
> -}
> -
> -static int sca_switch_to_extended(struct kvm *kvm)
> -{
> -	struct bsca_block *old_sca = kvm->arch.sca;
> -	struct esca_block *new_sca;
> -	struct kvm_vcpu *vcpu;
> -	unsigned long vcpu_idx;
> -	u32 scaol, scaoh;
> -	phys_addr_t new_sca_phys;
> -
> -	if (kvm->arch.use_esca)
> -		return 0;
> -
> -	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
> -	if (!new_sca)
> -		return -ENOMEM;
> -
> -	new_sca_phys = virt_to_phys(new_sca);
> -	scaoh = new_sca_phys >> 32;
> -	scaol = new_sca_phys & ESCA_SCAOL_MASK;
> -
> -	kvm_s390_vcpu_block_all(kvm);
> -	write_lock(&kvm->arch.sca_lock);
> -
> -	sca_copy_b_to_e(new_sca, old_sca);
> -
> -	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
> -		vcpu->arch.sie_block->scaoh = scaoh;
> -		vcpu->arch.sie_block->scaol = scaol;
> -		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
> -	}
> -	kvm->arch.sca = new_sca;
> -	kvm->arch.use_esca = 1;
> -
> -	write_unlock(&kvm->arch.sca_lock);
> -	kvm_s390_vcpu_unblock_all(kvm);
> -
> -	free_page((unsigned long)old_sca);
> -
> -	VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)",
> -		 old_sca, kvm->arch.sca);
> -	return 0;
> -}
> -
>  static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
>  {
> -	int rc;
> -
> -	if (!kvm_s390_use_sca_entries()) {
> -		if (id < KVM_MAX_VCPUS)
> -			return true;
> -		return false;
> -	}
> -	if (id < KVM_S390_BSCA_CPU_SLOTS)
> -		return true;
> -	if (!sclp.has_esca || !sclp.has_64bscao)
> -		return false;
> -
> -	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
> +	if (!kvm_s390_use_sca_entries())
> +		return id < KVM_MAX_VCPUS;
>  
> -	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
> +	return id < KVM_S390_ESCA_CPU_SLOTS;
>  }
>  
>  /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
> diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
> index 8d3bbb2dd8d27802bbde2a7bd1378033ad614b8e..2c8e177e4af8f2dab07fd42a904cefdea80f6855 100644
> --- a/arch/s390/kvm/kvm-s390.h
> +++ b/arch/s390/kvm/kvm-s390.h
> @@ -531,7 +531,7 @@ int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
>  /* support for Basic/Extended SCA handling */
>  static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
>  {
> -	struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */
> +	struct esca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */
>  
>  	return &sca->ipte_control;
>  }
> @@ -542,7 +542,7 @@ static inline int kvm_s390_use_sca_entries(void)
>  	 * might use the entries. By not setting the entries and keeping them
>  	 * invalid, hardware will not access them but intercept.
>  	 */
> -	return sclp.has_sigpif;
> +	return sclp.has_sigpif && sclp.has_esca;
>  }
>  void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
>  				     struct mcck_volatile_info *mcck_info);
>