Re: [PATCH] brd: use page reference to protect page lifetime

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

在 2025/07/29 15:52, Hou Tao 写道:
Hi,

On 7/29/2025 3:09 PM, Yu Kuai wrote:
From: Yu Kuai <yukuai3@xxxxxxxxxx>

As discussed [1], hold rcu for copying data from/to page is too heavy.
it's better to protect page with rcu around for page lookup and then
grab a reference to prevent page to be freed by discard.

[1] https://lore.kernel.org/all/eb41cab3-5946-4fe3-a1be-843dd6fca159@xxxxxxxxx/

Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
---
  drivers/block/brd.c | 56 +++++++++++++++++++++++----------------------
  1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 0c2eabe14af3..ce311d054cab 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -44,45 +44,55 @@ struct brd_device {
  };
/*
- * Look up and return a brd's page for a given sector.
+ * Look up and return a brd's page with reference grabbed for a given sector.
   */
  static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
  {
-	return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
+	struct page *page;
+
+	rcu_read_lock();
+	page = xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
+	if (page)
+		get_page(page);

get_page_unless_zero() instead ? Er, it seems even with
get_page_unless_zero(), it is not enough, becausethe page may be reused.
Maybe we need to read the brd_pages xarray to ensure the page is still
there.

Yes, you're right, refer to the pagecache procedures, I'll switch to use
xas_load + xas_reload in the next version.

Thanks,
Kuai

+	rcu_read_unlock();
+
+	return page;
  }
/*
   * Insert a new page for a given sector, if one does not already exist.
+ * The returned page will grab reference.
   */
  static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
  		blk_opf_t opf)
-	__releases(rcu)
-	__acquires(rcu)
  {
  	gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
  	struct page *page, *ret;
- rcu_read_unlock();
  	page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
-	if (!page) {
-		rcu_read_lock();
+	if (!page)
  		return ERR_PTR(-ENOMEM);
-	}
xa_lock(&brd->brd_pages);
  	ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
  			page, gfp);
-	rcu_read_lock();
-	if (ret) {
+	if (!ret) {
+		brd->brd_nr_pages++;
+		get_page(page);
  		xa_unlock(&brd->brd_pages);

If I understand correctly, the initial ref-count after alloc_page() is 1
instead of 0, so I think the get_page(page) here and get_page(ret) below
is unnecessary and it will lead to memory leak.
-		__free_page(page);
-		if (xa_is_err(ret))
-			return ERR_PTR(xa_err(ret));
+		return page;
+	}
+
+	if (!xa_is_err(ret)) {
+		get_page(ret);
+		xa_unlock(&brd->brd_pages);
+		put_page(page);
  		return ret;
  	}
-	brd->brd_nr_pages++;
+
  	xa_unlock(&brd->brd_pages);
-	return page;
+	put_page(page);
+	return ERR_PTR(xa_err(ret));
  }
/*
@@ -95,7 +105,7 @@ static void brd_free_pages(struct brd_device *brd)
  	pgoff_t idx;
xa_for_each(&brd->brd_pages, idx, page) {
-		__free_page(page);
+		put_page(page);
  		cond_resched();
  	}
@@ -117,7 +127,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); - rcu_read_lock();
  	page = brd_lookup_page(brd, sector);
  	if (!page && op_is_write(opf)) {
  		page = brd_insert_page(brd, sector, opf);
@@ -135,13 +144,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
  			memset(kaddr, 0, bv.bv_len);
  	}
  	kunmap_local(kaddr);
-	rcu_read_unlock();
bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
+	if (page)
+		put_page(page);
  	return true;
out_error:
-	rcu_read_unlock();
  	if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
  		bio_wouldblock_error(bio);
  	else
@@ -149,13 +158,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
  	return false;
  }
-static void brd_free_one_page(struct rcu_head *head)
-{
-	struct page *page = container_of(head, struct page, rcu_head);
-
-	__free_page(page);
-}
-
  static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
  {
  	sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
@@ -170,7 +172,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
  	while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
  		page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
  		if (page) {
-			call_rcu(&page->rcu_head, brd_free_one_page);
+			put_page(page);
  			brd->brd_nr_pages--;
  		}
  		aligned_sector += PAGE_SECTORS;

.






[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux