> +static bool zswap_compress(struct folio *folio, long start, unsigned int nr_pages, > + struct zswap_entry *entries[], struct zswap_pool *pool, > + int node_id) > { > struct crypto_acomp_ctx *acomp_ctx; > struct scatterlist input, output; > - int comp_ret = 0, alloc_ret = 0; > - unsigned int dlen = PAGE_SIZE; > - unsigned long handle; > - struct zpool *zpool; > + struct zpool *zpool = pool->zpool; > + > + unsigned int dlens[ZSWAP_MAX_BATCH_SIZE]; > + int errors[ZSWAP_MAX_BATCH_SIZE]; > + > + unsigned int nr_comps = min(nr_pages, pool->compr_batch_size); > + unsigned int i, j; > + int err; > gfp_t gfp; > - u8 *dst; > + > + gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE; > > acomp_ctx = raw_cpu_ptr(pool->acomp_ctx); > > mutex_lock(&acomp_ctx->mutex); > > - dst = acomp_ctx->buffers[0]; > - sg_init_table(&input, 1); > - sg_set_page(&input, page, PAGE_SIZE, 0); > - > /* > - * We need PAGE_SIZE * 2 here since there maybe over-compression case, > - * and hardware-accelerators may won't check the dst buffer size, so > - * giving the dst buffer with enough length to avoid buffer overflow. > + * Note: > + * [i] refers to the incoming batch space and is used to > + * index into the folio pages, @entries and @errors. > */ > - sg_init_one(&output, dst, PAGE_SIZE * 2); > - acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); > + for (i = 0; i < nr_pages; i += nr_comps) { > + if (nr_comps == 1) { > + sg_init_table(&input, 1); > + sg_set_page(&input, folio_page(folio, start + i), PAGE_SIZE, 0); > > - /* > - * it maybe looks a little bit silly that we send an asynchronous request, > - * then wait for its completion synchronously. This makes the process look > - * synchronous in fact. > - * Theoretically, acomp supports users send multiple acomp requests in one > - * acomp instance, then get those requests done simultaneously. but in this > - * case, zswap actually does store and load page by page, there is no > - * existing method to send the second page before the first page is done > - * in one thread doing zwap. > - * but in different threads running on different cpu, we have different > - * acomp instance, so multiple threads can do (de)compression in parallel. > - */ > - comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); > - dlen = acomp_ctx->req->dlen; > - if (comp_ret) > - goto unlock; > + /* > + * We need PAGE_SIZE * 2 here since there maybe over-compression case, > + * and hardware-accelerators may won't check the dst buffer size, so > + * giving the dst buffer with enough length to avoid buffer overflow. > + */ > + sg_init_one(&output, acomp_ctx->buffers[0], PAGE_SIZE * 2); > + acomp_request_set_params(acomp_ctx->req, &input, > + &output, PAGE_SIZE, PAGE_SIZE); > + > + errors[i] = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), > + &acomp_ctx->wait); > + if (unlikely(errors[i])) > + goto compress_error; > + > + dlens[i] = acomp_ctx->req->dlen; > + } else { > + struct page *pages[ZSWAP_MAX_BATCH_SIZE]; > + unsigned int k; > + > + for (k = 0; k < nr_pages; ++k) > + pages[k] = folio_page(folio, start + k); > + > + struct swap_batch_comp_data batch_comp_data = { > + .pages = pages, > + .dsts = acomp_ctx->buffers, > + .dlens = dlens, > + .errors = errors, > + .nr_comps = nr_pages, > + }; Why would this work given that nr_pages might be larger than pool->compr_batch_size? unsigned int nr_comps = min(nr_pages, pool->compr_batch_size); So this actually doesn’t happen unless pool->compr_batch_size == 1, but the code is confusing, right? > + > + acomp_ctx->req->kernel_data = &batch_comp_data; Can you actually pass a request larger than pool->compr_batch_size to the crypto driver? By the way, swap_batch_comp_data seems like a poor name. Why should crypto drivers know anything about swap_? kernel_data isn’t ideal either; maybe batch_data would be better ? > + > + if (unlikely(crypto_acomp_compress(acomp_ctx->req))) > + goto compress_error; > + } Thanks Barry