Hi,
在 2025/05/06 23:42, Keith Busch 写道:
On Tue, May 06, 2025 at 04:38:36PM +0200, Christoph Hellwig wrote:
+ rcu_read_lock();
+ page = brd_lookup_page(brd, sector);
+ if (!page && op_is_write(opf)) {
/*
* Must use NOIO because we don't want to recurse back into the
* block or filesystem layers from page reclaim.
*/
- err = brd_insert_page(brd, sector,
- (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO);
- if (err) {
- if (err == -ENOMEM && (opf & REQ_NOWAIT))
- bio_wouldblock_error(bio);
- else
- bio_io_error(bio);
- return false;
+ gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
+
+ rcu_read_unlock();
+ page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
+ if (!page) {
+ err = -ENOMEM;
+ goto out_error;
+ }
+ rcu_read_lock();
+
+ xa_lock(&brd->brd_pages);
+ ret = __xa_store(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT,
+ page, gfp);
On success, __xa_store() says it replaces the old entry ("ret"), with
your new entry ("page"). I think you want to store the new entry only if
there is no old entry, so shouldn't this instead be:
ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT,
NULL, page, gfp);
?
Looks this is right, comments from xa_store:
After this function returns, loads from this index will return @entry.
+ if (!ret)
+ brd->brd_nr_pages++;
+ xa_unlock(&brd->brd_pages);
+
+ if (ret) {
+ __free_page(page);
+ err = xa_err(ret);
+ if (err < 0)
+ goto out_error;
+ page = ret;
}
.
BTW, can we keep the old brd_insert_page, and return inserted page
directly? This change should be simplier.
Thanks,
Kuai
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index a3725673cf16..ea481422e53e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -54,7 +54,7 @@ static struct page *brd_lookup_page(struct brd_device
*brd, sector_t sector)
/*
* Insert a new page for a given sector, if one does not already exist.
*/
-static int brd_insert_page(struct brd_device *brd, sector_t sector,
gfp_t gfp)
+static struct page *brd_insert_page(struct brd_device *brd, sector_t
sector, gfp_t gfp)
{
pgoff_t idx = sector >> PAGE_SECTORS_SHIFT;
struct page *page;
@@ -62,24 +62,30 @@ static int brd_insert_page(struct brd_device *brd,
sector_t sector, gfp_t gfp)
page = brd_lookup_page(brd, sector);
if (page)
- return 0;
+ return page;
+ rcu_read_unlock();
page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
- if (!page)
- return -ENOMEM;
+ if (!page) {
+ rcu_read_lock();
+ return ERR_PTR(-ENOMEM);
+ }
+ rcu_read_lock();
xa_lock(&brd->brd_pages);
ret = __xa_insert(&brd->brd_pages, idx, page, gfp);
if (!ret)
brd->brd_nr_pages++;
xa_unlock(&brd->brd_pages);
- if (ret < 0) {
- __free_page(page);
- if (ret == -EBUSY)
- ret = 0;
- }
- return ret;
+ if (likely(!ret))
+ return page;
+
+ __free_page(page);
+ if (unlikely(ret == -EBUSY))
+ return brd_lookup_page(brd, sector);
+
+ return ERR_PTR(ret);
}
/*
@@ -114,36 +120,31 @@ static bool brd_rw_bvec(struct brd_device *brd,
struct bio *bio)
bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
+ rcu_read_lock();
if (op_is_write(opf)) {
- int err;
-
/*
* Must use NOIO because we don't want to recurse back
into the
* block or filesystem layers from page reclaim.
*/
- err = brd_insert_page(brd, sector,
+ page = brd_insert_page(brd, sector,
(opf & REQ_NOWAIT) ? GFP_NOWAIT :
GFP_NOIO);
- if (err) {
+ if (IS_ERR(page)) {
+ int err = PTR_ERR(page);
+
+ rcu_read_unlock();
if (err == -ENOMEM && (opf & REQ_NOWAIT))
bio_wouldblock_error(bio);
else
bio_io_error(bio);
return false;
}
+ } else {
+ page = brd_lookup_page(brd, sector);
}
- rcu_read_lock();
- page = brd_lookup_page(brd, sector);
-
kaddr = bvec_kmap_local(&bv);
if (op_is_write(opf)) {
- /*
- * Page can be removed by concurrent discard, it's fine
to skip
- * the write and user will read zero data if page does not
- * exist.
- */
- if (page)
- memcpy_to_page(page, offset, kaddr, bv.bv_len);
+ memcpy_to_page(page, offset, kaddr, bv.bv_len);
} else {
if (page)
memcpy_from_page(kaddr, page, offset, bv.bv_len);