Re: [PATCH] brd: avoid extra xarray lookups on first write

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

在 2025/05/06 23:42, Keith Busch 写道:
On Tue, May 06, 2025 at 04:38:36PM +0200, Christoph Hellwig wrote:
+	rcu_read_lock();
+	page = brd_lookup_page(brd, sector);
+	if (!page && op_is_write(opf)) {
  		/*
  		 * Must use NOIO because we don't want to recurse back into the
  		 * block or filesystem layers from page reclaim.
  		 */
-		err = brd_insert_page(brd, sector,
-				(opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO);
-		if (err) {
-			if (err == -ENOMEM && (opf & REQ_NOWAIT))
-				bio_wouldblock_error(bio);
-			else
-				bio_io_error(bio);
-			return false;
+		gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
+
+		rcu_read_unlock();
+		page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
+		if (!page) {
+			err = -ENOMEM;
+			goto out_error;
+		}
+		rcu_read_lock();
+
+		xa_lock(&brd->brd_pages);
+		ret = __xa_store(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT,
+				page, gfp);

On success, __xa_store() says it replaces the old entry ("ret"), with
your new entry ("page"). I think you want to store the new entry only if
there is no old entry, so shouldn't this instead be:

		ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT,
				   NULL, page, gfp);

?

Looks this is right, comments from xa_store:

After this function returns, loads from this index will return @entry.

+		if (!ret)
+			brd->brd_nr_pages++;
+		xa_unlock(&brd->brd_pages);
+
+		if (ret) {
+			__free_page(page);
+			err = xa_err(ret);
+			if (err < 0)
+				goto out_error;
+			page = ret;
  		}

.


BTW, can we keep the old brd_insert_page, and return inserted page
directly? This change should be simplier.

Thanks,
Kuai

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index a3725673cf16..ea481422e53e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -54,7 +54,7 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
 /*
  * Insert a new page for a given sector, if one does not already exist.
  */
-static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) +static struct page *brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
 {
        pgoff_t idx = sector >> PAGE_SECTORS_SHIFT;
        struct page *page;
@@ -62,24 +62,30 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)

        page = brd_lookup_page(brd, sector);
        if (page)
-               return 0;
+               return page;

+       rcu_read_unlock();
        page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
-       if (!page)
-               return -ENOMEM;
+       if (!page) {
+               rcu_read_lock();
+               return ERR_PTR(-ENOMEM);
+       }

+       rcu_read_lock();
        xa_lock(&brd->brd_pages);
        ret = __xa_insert(&brd->brd_pages, idx, page, gfp);
        if (!ret)
                brd->brd_nr_pages++;
        xa_unlock(&brd->brd_pages);

-       if (ret < 0) {
-               __free_page(page);
-               if (ret == -EBUSY)
-                       ret = 0;
-       }
-       return ret;
+       if (likely(!ret))
+               return page;
+
+       __free_page(page);
+       if (unlikely(ret == -EBUSY))
+               return brd_lookup_page(brd, sector);
+
+       return ERR_PTR(ret);
 }

 /*
@@ -114,36 +120,31 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)

        bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);

+       rcu_read_lock();
        if (op_is_write(opf)) {
-               int err;
-
                /*
* Must use NOIO because we don't want to recurse back into the
                 * block or filesystem layers from page reclaim.
                 */
-               err = brd_insert_page(brd, sector,
+               page = brd_insert_page(brd, sector,
(opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO);
-               if (err) {
+               if (IS_ERR(page)) {
+                       int err = PTR_ERR(page);
+
+                       rcu_read_unlock();
                        if (err == -ENOMEM && (opf & REQ_NOWAIT))
                                bio_wouldblock_error(bio);
                        else
                                bio_io_error(bio);
                        return false;
                }
+       } else {
+               page = brd_lookup_page(brd, sector);
        }

-       rcu_read_lock();
-       page = brd_lookup_page(brd, sector);
-
        kaddr = bvec_kmap_local(&bv);
        if (op_is_write(opf)) {
-               /*
- * Page can be removed by concurrent discard, it's fine to skip
-                * the write and user will read zero data if page does not
-                * exist.
-                */
-               if (page)
-                       memcpy_to_page(page, offset, kaddr, bv.bv_len);
+               memcpy_to_page(page, offset, kaddr, bv.bv_len);
        } else {
                if (page)
                        memcpy_from_page(kaddr, page, offset, bv.bv_len);





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux