The xarray can return the previous entry at a location. Use this fact to simplify the brd code when there is no existing page at a location. This also slighly improves the handling of racy discards as we now always have a page under RCU protection by the time we are ready to copy the data. Signed-off-by: Christoph Hellwig <hch@xxxxxx> --- drivers/block/brd.c | 89 ++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 53 deletions(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index a3725673cf16..1857da901227 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -51,37 +51,6 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT); } -/* - * Insert a new page for a given sector, if one does not already exist. - */ -static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) -{ - pgoff_t idx = sector >> PAGE_SECTORS_SHIFT; - struct page *page; - int ret = 0; - - page = brd_lookup_page(brd, sector); - if (page) - return 0; - - page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); - if (!page) - return -ENOMEM; - - xa_lock(&brd->brd_pages); - ret = __xa_insert(&brd->brd_pages, idx, page, gfp); - if (!ret) - brd->brd_nr_pages++; - xa_unlock(&brd->brd_pages); - - if (ret < 0) { - __free_page(page); - if (ret == -EBUSY) - ret = 0; - } - return ret; -} - /* * Free all backing store pages and xarray. This must only be called when * there are no other users of the device. @@ -109,41 +78,48 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) sector_t sector = bio->bi_iter.bi_sector; u32 offset = (sector & (PAGE_SECTORS - 1)) << SECTOR_SHIFT; blk_opf_t opf = bio->bi_opf; - struct page *page; + struct page *page, *ret; void *kaddr; + int err; bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); - if (op_is_write(opf)) { - int err; - + rcu_read_lock(); + page = brd_lookup_page(brd, sector); + if (!page && op_is_write(opf)) { /* * Must use NOIO because we don't want to recurse back into the * block or filesystem layers from page reclaim. */ - err = brd_insert_page(brd, sector, - (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO); - if (err) { - if (err == -ENOMEM && (opf & REQ_NOWAIT)) - bio_wouldblock_error(bio); - else - bio_io_error(bio); - return false; + gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO; + + rcu_read_unlock(); + page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); + if (!page) { + err = -ENOMEM; + goto out_error; + } + rcu_read_lock(); + + xa_lock(&brd->brd_pages); + ret = __xa_store(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, + page, gfp); + if (!ret) + brd->brd_nr_pages++; + xa_unlock(&brd->brd_pages); + + if (ret) { + __free_page(page); + err = xa_err(ret); + if (err < 0) + goto out_error; + page = ret; } } - rcu_read_lock(); - page = brd_lookup_page(brd, sector); - kaddr = bvec_kmap_local(&bv); if (op_is_write(opf)) { - /* - * Page can be removed by concurrent discard, it's fine to skip - * the write and user will read zero data if page does not - * exist. - */ - if (page) - memcpy_to_page(page, offset, kaddr, bv.bv_len); + memcpy_to_page(page, offset, kaddr, bv.bv_len); } else { if (page) memcpy_from_page(kaddr, page, offset, bv.bv_len); @@ -155,6 +131,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len); return true; + +out_error: + if (err == -ENOMEM && (opf & REQ_NOWAIT)) + bio_wouldblock_error(bio); + else + bio_io_error(bio); + return false; } static void brd_free_one_page(struct rcu_head *head) -- 2.47.2