On Mon 23-06-25 15:32:57, Baokun Li wrote: > Groups with no free blocks shouldn't be in any average fragment size list. > However, when all blocks in a group are allocated(i.e., bb_fragments or > bb_free is 0), we currently skip updating the average fragment size, which > means the group isn't removed from its previous s_mb_avg_fragment_size[old] > list. > > This created "zombie" groups that were always skipped during traversal as > they couldn't satisfy any block allocation requests, negatively impacting > traversal efficiency. > > Therefore, when a group becomes completely free, bb_avg_fragment_size_order ^^^ full > is now set to -1. If the old order was not -1, a removal operation is > performed; if the new order is not -1, an insertion is performed. > > Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning") > CC: stable@xxxxxxxxxxxxxxx > Signed-off-by: Baokun Li <libaokun1@xxxxxxxxxx> Good catch! The patch looks good. Feel free to add: Reviewed-by: Jan Kara <jack@xxxxxxx> Honza > --- > fs/ext4/mballoc.c | 36 ++++++++++++++++++------------------ > 1 file changed, 18 insertions(+), 18 deletions(-) > > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c > index 94950b07a577..e6d6c2da3c6e 100644 > --- a/fs/ext4/mballoc.c > +++ b/fs/ext4/mballoc.c > @@ -841,30 +841,30 @@ static void > mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) > { > struct ext4_sb_info *sbi = EXT4_SB(sb); > - int new_order; > + int new, old; > > - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0) > + if (!test_opt2(sb, MB_OPTIMIZE_SCAN)) > return; > > - new_order = mb_avg_fragment_size_order(sb, > - grp->bb_free / grp->bb_fragments); > - if (new_order == grp->bb_avg_fragment_size_order) > + old = grp->bb_avg_fragment_size_order; > + new = grp->bb_fragments == 0 ? -1 : > + mb_avg_fragment_size_order(sb, grp->bb_free / grp->bb_fragments); > + if (new == old) > return; > > - if (grp->bb_avg_fragment_size_order != -1) { > - write_lock(&sbi->s_mb_avg_fragment_size_locks[ > - grp->bb_avg_fragment_size_order]); > + if (old >= 0) { > + write_lock(&sbi->s_mb_avg_fragment_size_locks[old]); > list_del(&grp->bb_avg_fragment_size_node); > - write_unlock(&sbi->s_mb_avg_fragment_size_locks[ > - grp->bb_avg_fragment_size_order]); > - } > - grp->bb_avg_fragment_size_order = new_order; > - write_lock(&sbi->s_mb_avg_fragment_size_locks[ > - grp->bb_avg_fragment_size_order]); > - list_add_tail(&grp->bb_avg_fragment_size_node, > - &sbi->s_mb_avg_fragment_size[grp->bb_avg_fragment_size_order]); > - write_unlock(&sbi->s_mb_avg_fragment_size_locks[ > - grp->bb_avg_fragment_size_order]); > + write_unlock(&sbi->s_mb_avg_fragment_size_locks[old]); > + } > + > + grp->bb_avg_fragment_size_order = new; > + if (new >= 0) { > + write_lock(&sbi->s_mb_avg_fragment_size_locks[new]); > + list_add_tail(&grp->bb_avg_fragment_size_node, > + &sbi->s_mb_avg_fragment_size[new]); > + write_unlock(&sbi->s_mb_avg_fragment_size_locks[new]); > + } > } > > /* > -- > 2.46.1 > -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR