Re: [PATCH v3 2/3] s390/crypto: Rework protected key AES for true asynch support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 01/04/2025 16:50, Harald Freudenberger wrote:
> This is a complete rework of the protected key AES (PAES) implementation.
> The goal of this rework is to implement the 4 modes (ecb, cbc, ctr, xts)
> in a real asynchronous fashion:
> - init(), exit() and setkey() are synchronous and don't allocate any memory.
> - the encrypt/decrypt functions first try to do the job in a synchronous
>   manner. If this fails, for example the protected key got invalid caused
>   by for example a guest suspend/resume or guest migration action, the

reword: please drop one of the "for example".

>   encrypt/decrypt is transfered to an instance of the crypto engine (see

typo: transferred 

>   below) for asynchronous processing.
>   These via crypto engine postponed requests are then handled via the
>   do_one_request() callback but may of course again run into a still

reword: please drop at least one "via". Proposal (if I got it correctly): "These postponed requests are then handled by the crypto engine by calling the do_one_request() callback ..."

>   not converted key or the key is getting invalid. If the key is
>   still not converted, the first thread does the conversion and updates
>   the key status in the transformation context. The conversion is
>   invoked via pkey API with a new flag PKEY_XFLAG_NOMEMALLOC.
> 
> The pkey API used here - the function pkey_key2protkey() - uses
> a new version of this in-kernel-API. A new flag PKEY_XFLAG_NOMEMALLOC
> tells the PKEY layer (and subsidiary layers) that it must not allocate
> any memory causing IO operations. Note that the patches for this
> pkey/zcrypt/AP extensions are currently under review and yet not
> upstream available. SO THIS PATCH DOES NOT COMPILE YET.

As the ap-rework series is now on its way, you can remove parts of this paragraph.

> This patch together with the pkey/zcrypt/AP extensions should
> toughen the paes crypto algorithms to truly meet the requirements
> for in-kernel skcipher implementations and the usage patterns for
> the dm-crypt and dm-integrity layers.
> 
> Signed-off-by: Harald Freudenberger <freude@xxxxxxxxxxxxx>

It is very hard to review this patch. If there is any chance to split this up into smaller pieces, please do it.
This is the first part of the review, covering mainly common parts and ecb. The other modes will follow later.
See my comments below.

> ---
>  arch/s390/crypto/paes_s390.c | 1725 +++++++++++++++++++++++-----------
>  1 file changed, 1183 insertions(+), 542 deletions(-)
> 
> diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
> index 646cbbf0678d..1d1f1a98ec4d 100644
> --- a/arch/s390/crypto/paes_s390.c
> +++ b/arch/s390/crypto/paes_s390.c
> @@ -5,7 +5,7 @@
>   * s390 implementation of the AES Cipher Algorithm with protected keys.
>   *
>   * s390 Version:
> - *   Copyright IBM Corp. 2017, 2023
> + *   Copyright IBM Corp. 2017, 2025
>   *   Author(s): Martin Schwidefsky <schwidefsky@xxxxxxxxxx>
>   *		Harald Freudenberger <freude@xxxxxxxxxx>
>   */
> @@ -13,16 +13,17 @@
>  #define KMSG_COMPONENT "paes_s390"
>  #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
>  
> -#include <crypto/aes.h>
> -#include <crypto/algapi.h>
> -#include <linux/bug.h>
> -#include <linux/err.h>
> -#include <linux/module.h>
>  #include <linux/cpufeature.h>
> +#include <linux/delay.h>
> +#include <linux/err.h>
>  #include <linux/init.h>
> +#include <linux/miscdevice.h>
> +#include <linux/module.h>
>  #include <linux/mutex.h>
>  #include <linux/spinlock.h>
> -#include <linux/delay.h>
> +#include <crypto/aes.h>
> +#include <crypto/algapi.h>
> +#include <crypto/engine.h>
>  #include <crypto/internal/skcipher.h>
>  #include <crypto/xts.h>
>  #include <asm/cpacf.h>
> @@ -44,23 +45,55 @@ static DEFINE_MUTEX(ctrblk_lock);
>  
>  static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
>  
> +static struct crypto_engine *paes_crypto_engine;
> +#define MAX_QLEN 10
> +
> +/*
> + * protected key specific stuff
> + */
> +
>  struct paes_protkey {
>  	u32 type;
>  	u32 len;
>  	u8 protkey[PXTS_256_PROTKEY_SIZE];
>  };
>  
> -struct key_blob {
> -	/*
> -	 * Small keys will be stored in the keybuf. Larger keys are
> -	 * stored in extra allocated memory. In both cases does
> -	 * key point to the memory where the key is stored.
> -	 * The code distinguishes by checking keylen against
> -	 * sizeof(keybuf). See the two following helper functions.
> -	 */
> -	u8 *key;
> -	u8 keybuf[128];
> +#define PK_STATE_NO_KEY		     0
> +#define PK_STATE_CONVERT_IN_PROGRESS 1
> +#define PK_STATE_VALID		     2

Please use an enum here.

> +
> +struct s390_paes_ctx {
> +	/* source key material used to derive a protected key from */
> +	u8 keybuf[PAES_MAX_KEYSIZE];
> +	unsigned int keylen;
> +
> +	/* cpacf function code to use with this protected key type */
> +	long fc;
> +
> +	/* spinlock to atomic read/update all the following fields */
> +	spinlock_t pk_lock;
> +
> +	/* see PK_STATE* defines above, < 0 holds convert failure rc  */
> +	int pk_state;

I see no advantage to split the value range. On the contrary, it makes the status handling more complex.
I would prefer to use an enum for pk_state and use another element for the conversion rc.

> +	/* if state is valid, pk holds the protected key */
> +	struct paes_protkey pk;
> +};
> +
> +struct s390_pxts_ctx {
> +	/* source key material used to derive a protected key from */
> +	u8 keybuf[2 * PAES_MAX_KEYSIZE];
>  	unsigned int keylen;
> +
> +	/* cpacf function code to use with this protected key type */
> +	long fc;
> +
> +	/* spinlock to atomic read/update all the following fields */
> +	spinlock_t pk_lock;
> +
> +	/* see PK_STATE* defines above, < 0 holds convert failure rc  */
> +	int pk_state;

Same here.

> +	/* if state is valid, pk[] hold(s) the protected key(s) */
> +	struct paes_protkey pk[2];
>  };
>  
>  /*
> @@ -89,214 +122,344 @@ static inline u32 make_clrkey_token(const u8 *ck, size_t cklen, u8 *dest)
>  	return sizeof(*token) + cklen;
>  }
>  
> -static inline int _key_to_kb(struct key_blob *kb,
> -			     const u8 *key,
> -			     unsigned int keylen)
> +/*
> + * key_to_ctx() - Set key value into context, maybe construct
> + * a clear key token digestable by pkey from a clear key value.
> + */
> +static inline int key_to_ctx(struct s390_paes_ctx *ctx,
> +			     const u8 *key, unsigned int keylen)

The function name implies a transformation of a key into a context, not just a set of a context element. What about paes_ctx_setkey()?

>  {
> +	if (keylen > sizeof(ctx->keybuf))
> +		return -EINVAL;
> +
>  	switch (keylen) {
>  	case 16:
>  	case 24:
>  	case 32:
>  		/* clear key value, prepare pkey clear key token in keybuf */
> -		memset(kb->keybuf, 0, sizeof(kb->keybuf));
> -		kb->keylen = make_clrkey_token(key, keylen, kb->keybuf);
> -		kb->key = kb->keybuf;
> +		memset(ctx->keybuf, 0, sizeof(ctx->keybuf));
> +		ctx->keylen = make_clrkey_token(key, keylen, ctx->keybuf);
>  		break;
>  	default:
>  		/* other key material, let pkey handle this */
> -		if (keylen <= sizeof(kb->keybuf))
> -			kb->key = kb->keybuf;
> -		else {
> -			kb->key = kmalloc(keylen, GFP_KERNEL);
> -			if (!kb->key)
> -				return -ENOMEM;
> -		}
> -		memcpy(kb->key, key, keylen);
> -		kb->keylen = keylen;
> +		memcpy(ctx->keybuf, key, keylen);
> +		ctx->keylen = keylen;
>  		break;
>  	}
>  
>  	return 0;
>  }
>  
> -static inline int _xts_key_to_kb(struct key_blob *kb,
> -				 const u8 *key,
> -				 unsigned int keylen)
> +/*
> + * xts_key_to_ctx() - Set key value into context, maybe construct
> + * a clear key token digestable by pkey from a clear key value.
> + */
> +static inline int xts_key_to_ctx(struct s390_pxts_ctx *ctx,
> +				 const u8 *key, unsigned int keylen)

Same here, the function name implies a transformation of a key into a context, not just a set of a context element. What about pxts_ctx_setkey()?

>  {
>  	size_t cklen = keylen / 2;
>  
[...]
> +static int ecb_paes_do_crypt(struct s390_paes_ctx *ctx,
> +			     struct s390_pecb_req_ctx *req_ctx,
> +			     bool maysleep)
>  {
> -	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
> -	int rc;
> +	struct ecb_param *param = &req_ctx->param;
> +	struct skcipher_walk *walk = &req_ctx->walk;
> +	unsigned int nbytes, n, k;
> +	int pk_state, rc;
> +
> +	if (!req_ctx->param_init_done) {
> +		/* fetch and check protected key state */
> +		spin_lock_bh(&ctx->pk_lock);
> +		pk_state = ctx->pk_state;
> +		memcpy(param->key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);

I would prefer to use the size of param->key instead of a constant value as length.

> +		spin_unlock_bh(&ctx->pk_lock);
> +		switch (pk_state) {
> +		case PK_STATE_NO_KEY:
> +			rc = -ENOKEY;
> +			goto out;
> +		case PK_STATE_CONVERT_IN_PROGRESS:
> +			rc = -EKEYEXPIRED;
> +			goto out;
> +		case PK_STATE_VALID:
> +			req_ctx->param_init_done = true;
> +			break;
> +		default:
> +			rc = pk_state < 0 ? pk_state : -EIO;
> +			goto out;
> +		}
> +	}
>  
> -	_free_kb_keybuf(&ctx->kb);
> -	rc = _key_to_kb(&ctx->kb, in_key, key_len);
> -	if (rc)
> -		return rc;
> +	rc = 0;

Modify the param block in req_ctx only if the protected key is valid.

int rc = 0;

if (!req_ctx->param_init_done) {
	/* fetch and check protected key state */
	spin_lock_bh(&ctx->pk_lock);
	switch (ctx->pk_state) {
	case PK_STATE_NO_KEY:
		rc = -ENOKEY;
		break;
	case PK_STATE_CONVERT_IN_PROGRESS:
		rc = -EKEYEXPIRED;
		break;
	case PK_STATE_VALID:
		memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
		req_ctx->param_init_done = true;
		break;
	default:
		rc = pk_state < 0 ? pk_state : -EIO;
		break;
	}
	spin_unlock_bh(&ctx->pk_lock);
	if (rc)
		goto out;
}

> +
> +	/* always walk on the ... */

What does this comment mean? I'm afraid, I don't get it.

> +	while ((nbytes = walk->nbytes) != 0) {
> +		/* only use complete blocks */
> +		n = nbytes & ~(AES_BLOCK_SIZE - 1);
> +		k = cpacf_km(ctx->fc | req_ctx->modifier, param,
> +			     walk->dst.virt.addr, walk->src.virt.addr, n);
> +		if (k)
> +			rc = skcipher_walk_done(walk, nbytes - k);
> +		if (k < n) {
> +			if (!maysleep) {
> +				rc = -EKEYEXPIRED;
> +				goto out;
> +			}
> +			rc = paes_convert_key(ctx);
> +			if (rc)
> +				goto out;
> +			spin_lock_bh(&ctx->pk_lock);
> +			memcpy(param->key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
> +			spin_unlock_bh(&ctx->pk_lock);
> +		}
> +	}
>  
> -	return __ecb_paes_set_key(ctx);
> +out:
> +	pr_debug("rc=%d\n", rc);
> +	return rc;
>  }
>  
>  static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
>  {
> +	struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req);
>  	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
>  	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
> -	struct {
> -		u8 key[PAES_256_PROTKEY_SIZE];
> -	} param;
> -	struct skcipher_walk walk;
> -	unsigned int nbytes, n, k;
> +	struct skcipher_walk *walk = &req_ctx->walk;
>  	int rc;
>  
> -	rc = skcipher_walk_virt(&walk, req, false);
> +	/*
> +	 * First try synchronous. If this fails for any reason
> +	 * schedule this request asynchronous via crypto engine.
> +	 */
> +
> +	rc = skcipher_walk_virt(walk, req, false);
>  	if (rc)
> -		return rc;
> +		goto out;
>  
> -	spin_lock_bh(&ctx->pk_lock);
> -	memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
> -	spin_unlock_bh(&ctx->pk_lock);
> +	req_ctx->modifier = modifier;
> +	req_ctx->param_init_done = false;
>  
> -	while ((nbytes = walk.nbytes) != 0) {
> -		/* only use complete blocks */
> -		n = nbytes & ~(AES_BLOCK_SIZE - 1);
> -		k = cpacf_km(ctx->fc | modifier, &param,
> -			     walk.dst.virt.addr, walk.src.virt.addr, n);
> -		if (k)
> -			rc = skcipher_walk_done(&walk, nbytes - k);
> -		if (k < n) {
> -			if (__paes_convert_key(ctx))
> -				return skcipher_walk_done(&walk, -EIO);
> -			spin_lock_bh(&ctx->pk_lock);
> -			memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
> -			spin_unlock_bh(&ctx->pk_lock);
> -		}
> +	rc = ecb_paes_do_crypt(ctx, req_ctx, false);
> +	if (rc != -EKEYEXPIRED) {
> +		if (rc)
> +			skcipher_walk_done(walk, rc);
> +		goto out;
>  	}
> +
> +	rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
> +	if (rc)
> +		goto out;
> +
> +	rc = -EINPROGRESS;
> +
> +out:
> +	if (rc != -EINPROGRESS)
> +		memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
> +	pr_debug("rc=%d\n", rc);
>  	return rc;

If took me a while to find the synchronous good case code path. I would prefer to handle the various cases separately, either with a switch/case or by explicit checks in the main path.

rc = ecb_paes_do_crypt(ctx, req_ctx, false);
if (rc == -EKEYEXPIRED) {
	rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
	rc = rc ?: -EINPROGRESS;
} else if (rc) {
	skcipher_walk_done(walk, rc);
}

if (rc != -EINPROGRESS)
	memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
pr_debug("rc=%d\n", rc);
return rc;

>  }
>  
> @@ -310,112 +473,242 @@ static int ecb_paes_decrypt(struct skcipher_request *req)
>  	return ecb_paes_crypt(req, CPACF_DECRYPT);
>  }
>  
> -static struct skcipher_alg ecb_paes_alg = {
> -	.base.cra_name		=	"ecb(paes)",
> -	.base.cra_driver_name	=	"ecb-paes-s390",
> -	.base.cra_priority	=	401,	/* combo: aes + ecb + 1 */
> -	.base.cra_blocksize	=	AES_BLOCK_SIZE,
> -	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
> -	.base.cra_module	=	THIS_MODULE,
> -	.base.cra_list		=	LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
> -	.init			=	ecb_paes_init,
> -	.exit			=	ecb_paes_exit,
> -	.min_keysize		=	PAES_MIN_KEYSIZE,
> -	.max_keysize		=	PAES_MAX_KEYSIZE,
> -	.setkey			=	ecb_paes_set_key,
> -	.encrypt		=	ecb_paes_encrypt,
> -	.decrypt		=	ecb_paes_decrypt,
> -};
> -
> -static int cbc_paes_init(struct crypto_skcipher *tfm)
> +static int ecb_paes_init(struct crypto_skcipher *tfm)
>  {
>  	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
>  
> -	ctx->kb.key = NULL;
> +	memset(ctx, 0, sizeof(*ctx));
>  	spin_lock_init(&ctx->pk_lock);
>  
> +	crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pecb_req_ctx));
> +
>  	return 0;
>  }
>  
> -static void cbc_paes_exit(struct crypto_skcipher *tfm)
> +static void ecb_paes_exit(struct crypto_skcipher *tfm)
>  {
>  	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
>  
> -	_free_kb_keybuf(&ctx->kb);
> +	memzero_explicit(ctx, sizeof(*ctx));
>  }
>  
> -static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
> +static int ecb_paes_do_one_request(struct crypto_engine *engine, void *areq)
>  {
> -	unsigned long fc;
> +	struct skcipher_request *req = skcipher_request_cast(areq);
> +	struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req);
> +	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> +	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
> +	struct skcipher_walk *walk = &req_ctx->walk;
>  	int rc;
>  
> -	rc = __paes_convert_key(ctx);
> -	if (rc)
> -		return rc;
> -
> -	/* Pick the correct function code based on the protected key type */
> -	fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 :
> -		(ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMC_PAES_192 :
> -		(ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KMC_PAES_256 : 0;
> +	/* walk has already been prepared */
>  
> -	/* Check if the function code is available */
> -	ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
> +	rc = ecb_paes_do_crypt(ctx, req_ctx, true);
> +	if (rc != -EKEYEXPIRED) {
> +		if (rc)
> +			skcipher_walk_done(walk, rc);
> +		goto complete;
> +	}

Same here, I would prefer to reverse the logic of the error handling.

>  
> -	return ctx->fc ? 0 : -EINVAL;
> +	/*
> +	 * Protected key expired, conversion is in process.
> +	 * Trigger a re-schedule of this request by returning
> +	 * -ENOSPC ("hardware queue is full") to the crypto engine.
> +	 * To avoid immediately re-invocation of this callback,
> +	 * tell the scheduler to voluntarily give up the CPU here.
> +	 */
> +	yield();

As mentioned by checkpatch.pl, the use of yield() should be avoided. Please use alternatives (e.g. cond_reschedule()).

> +	pr_debug("rescheduling request\n");
> +	return -ENOSPC;
> +
> +complete:
> +	memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
> +	pr_debug("request complete with rc=%d\n", rc);
> +	local_bh_disable();
> +	crypto_finalize_skcipher_request(engine, req, rc);
> +	local_bh_enable();
> +	return rc;
>  }

[...]

-- 
Mit freundlichen Grüßen / Kind regards
Holger Dengler
--
IBM Systems, Linux on IBM Z Development
dengler@xxxxxxxxxxxxx





[Index of Archives]     [Kernel]     [Gnu Classpath]     [Gnu Crypto]     [DM Crypt]     [Netfilter]     [Bugtraq]
  Powered by Linux