On Wed, Sep 03, 2025 at 02:53:33PM -0400, Brian Foster wrote: > On Fri, Aug 29, 2025 at 04:39:40PM -0700, Joanne Koong wrote: > > Use find_next_bit()/find_next_zero_bit() for iomap dirty bitmap > > iteration. This uses __ffs() internally and is more efficient for > > finding the next dirty or clean bit than manually iterating through the > > bitmap range testing every bit. > > > > Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx> > > Suggested-by: Christoph Hellwig <hch@xxxxxxxxxxxxx> > > --- > > fs/iomap/buffered-io.c | 67 ++++++++++++++++++++++++++++++------------ > > 1 file changed, 48 insertions(+), 19 deletions(-) > > > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > > index fd827398afd2..dc1a1f371412 100644 > > --- a/fs/iomap/buffered-io.c > > +++ b/fs/iomap/buffered-io.c > > @@ -75,13 +75,42 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off, > > folio_mark_uptodate(folio); > > } > > > > -static inline bool ifs_block_is_dirty(struct folio *folio, > > - struct iomap_folio_state *ifs, int block) > > +/** > > + * ifs_next_dirty_block - find the next dirty block in the folio > > + * @folio: The folio > > + * @start_blk: Block number to begin searching at > > + * @end_blk: Last block number (inclusive) to search > > + * > > + * If no dirty block is found, this will return end_blk + 1. > > + */ > > +static unsigned ifs_next_dirty_block(struct folio *folio, > > + unsigned start_blk, unsigned end_blk) > > { > > + struct iomap_folio_state *ifs = folio->private; > > struct inode *inode = folio->mapping->host; > > - unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); > > + unsigned int blks = i_blocks_per_folio(inode, folio); > > + > > + return find_next_bit(ifs->state, blks + end_blk + 1, > > + blks + start_blk) - blks; > > +} > > + > > +/** > > + * ifs_next_clean_block - find the next clean block in the folio > > + * @folio: The folio > > + * @start_blk: Block number to begin searching at > > + * @end_blk: Last block number (inclusive) to search > > + * > > + * If no clean block is found, this will return end_blk + 1. > > + */ > > +static unsigned ifs_next_clean_block(struct folio *folio, > > + unsigned start_blk, unsigned end_blk) > > +{ > > + struct iomap_folio_state *ifs = folio->private; > > + struct inode *inode = folio->mapping->host; > > + unsigned int blks = i_blocks_per_folio(inode, folio); > > > > - return test_bit(block + blks_per_folio, ifs->state); > > + return find_next_zero_bit(ifs->state, blks + end_blk + 1, > > + blks + start_blk) - blks; > > } > > > > static unsigned ifs_find_dirty_range(struct folio *folio, > > @@ -92,18 +121,15 @@ static unsigned ifs_find_dirty_range(struct folio *folio, > > offset_in_folio(folio, *range_start) >> inode->i_blkbits; > > unsigned end_blk = min_not_zero( > > offset_in_folio(folio, range_end) >> inode->i_blkbits, > > - i_blocks_per_folio(inode, folio)); > > - unsigned nblks = 1; > > + i_blocks_per_folio(inode, folio)) - 1; > > + unsigned nblks; > > > > - while (!ifs_block_is_dirty(folio, ifs, start_blk)) > > - if (++start_blk == end_blk) > > - return 0; > > + start_blk = ifs_next_dirty_block(folio, start_blk, end_blk); > > + if (start_blk > end_blk) > > + return 0; > > > > - while (start_blk + nblks < end_blk) { > > - if (!ifs_block_is_dirty(folio, ifs, start_blk + nblks)) > > - break; > > - nblks++; > > - } > > + nblks = ifs_next_clean_block(folio, start_blk + 1, end_blk) > > + - start_blk; > > Not a critical problem since it looks like the helper bumps end_blk, but > something that stands out to me here as mildly annoying is that we check > for (start > end) just above, clearly implying that start == end is > possible, then go and pass start + 1 and end to the next call. It's not > clear to me if that's worth changing to make end exclusive, but may be > worth thinking about if you haven't already.. <nod> I was also wondering if there were overflow possibilities here. > Brian > > > > > *range_start = folio_pos(folio) + (start_blk << inode->i_blkbits); > > return nblks << inode->i_blkbits; > > @@ -1077,7 +1103,7 @@ static void iomap_write_delalloc_ifs_punch(struct inode *inode, > > struct folio *folio, loff_t start_byte, loff_t end_byte, > > struct iomap *iomap, iomap_punch_t punch) > > { > > - unsigned int first_blk, last_blk, i; > > + unsigned int first_blk, last_blk; > > loff_t last_byte; > > u8 blkbits = inode->i_blkbits; > > struct iomap_folio_state *ifs; > > @@ -1096,10 +1122,13 @@ static void iomap_write_delalloc_ifs_punch(struct inode *inode, > > folio_pos(folio) + folio_size(folio) - 1); > > first_blk = offset_in_folio(folio, start_byte) >> blkbits; > > last_blk = offset_in_folio(folio, last_byte) >> blkbits; > > - for (i = first_blk; i <= last_blk; i++) { > > - if (!ifs_block_is_dirty(folio, ifs, i)) > > - punch(inode, folio_pos(folio) + (i << blkbits), > > - 1 << blkbits, iomap); > > + while (first_blk <= last_blk) { > > + first_blk = ifs_next_clean_block(folio, first_blk, last_blk); > > + if (first_blk > last_blk) > > + break; I was wondering if the loop control logic would be cleaner done as a for loop and came up with this monstrosity: for (first_blk = ifs_next_clean_block(folio, first_blk, last_blk); first_blk <= last_blk; first_blk = ifs_next_clean_block(folio, first_blk + 1, last_blk)) { punch(inode, folio_pos(folio) + (first_blk << blkbits), 1 << blkbits, iomap); } Yeah.... better living through macros? #define for_each_clean_block(folio, blk, last_blk) \ for ((blk) = ifs_next_clean_block((folio), (blk), (last_blk)); (blk) <= (last_blk); (blk) = ifs_next_clean_block((folio), (blk) + 1, (last_blk))) Somewhat cleaner: for_each_clean_block(folio, first_blk, last_blk) punch(inode, folio_pos(folio) + (first_blk << blkbits), 1 << blkbits, iomap); <shrug> --D > > + punch(inode, folio_pos(folio) + (first_blk << blkbits), > > + 1 << blkbits, iomap); > > + first_blk++; > > } > > } > > > > -- > > 2.47.3 > > > > > >