On Mon 23-06-25 15:32:56, Baokun Li wrote: > Attempt to merge ext4_free_data with already inserted free extents prior > to adding new ones. This strategy drastically cuts down the number of > times locks are held. > > For example, if prev, new, and next extents are all mergeable, the existing > code (before this patch) requires acquiring the s_md_lock three times: > > prev merge into new and free prev // hold lock > next merge into new and free next // hold lock > insert new // hold lock > > After the patch, it only needs to be acquired once: > > new merge next and free new // no lock > next merge into prev and free prev // hold lock > > Performance test data follows: > > Test: Running will-it-scale/fallocate2 on CPU-bound containers. > Observation: Average fallocate operations per container per second. > > | Kunpeng 920 / 512GB -P80| AMD 9654 / 1536GB -P96 | > Disk: 960GB SSD |-------------------------|-------------------------| > | base | patched | base | patched | > -------------------|-------|-----------------|-------|-----------------| > mb_optimize_scan=0 | 20982 | 21157 (+0.8%) | 50629 | 50420 (-0.4%) | > mb_optimize_scan=1 | 10703 | 12896 (+20.4%) | 14856 | 17273 (+16.2%) | > > Signed-off-by: Baokun Li <libaokun1@xxxxxxxxxx> Looks good. Feel free to add: Reviewed-by: Jan Kara <jack@xxxxxxx> Honza > --- > fs/ext4/mballoc.c | 113 +++++++++++++++++++++++++++++++--------------- > 1 file changed, 76 insertions(+), 37 deletions(-) > > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c > index 5410fb3688ee..94950b07a577 100644 > --- a/fs/ext4/mballoc.c > +++ b/fs/ext4/mballoc.c > @@ -6298,28 +6298,63 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, > * are contiguous, AND the extents were freed by the same transaction, > * AND the blocks are associated with the same group. > */ > -static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi, > - struct ext4_free_data *entry, > - struct ext4_free_data *new_entry, > - struct rb_root *entry_rb_root) > +static inline bool > +ext4_freed_extents_can_be_merged(struct ext4_free_data *entry1, > + struct ext4_free_data *entry2) > { > - if ((entry->efd_tid != new_entry->efd_tid) || > - (entry->efd_group != new_entry->efd_group)) > - return; > - if (entry->efd_start_cluster + entry->efd_count == > - new_entry->efd_start_cluster) { > - new_entry->efd_start_cluster = entry->efd_start_cluster; > - new_entry->efd_count += entry->efd_count; > - } else if (new_entry->efd_start_cluster + new_entry->efd_count == > - entry->efd_start_cluster) { > - new_entry->efd_count += entry->efd_count; > - } else > - return; > + if (entry1->efd_tid != entry2->efd_tid) > + return false; > + if (entry1->efd_start_cluster + entry1->efd_count != > + entry2->efd_start_cluster) > + return false; > + if (WARN_ON_ONCE(entry1->efd_group != entry2->efd_group)) > + return false; > + return true; > +} > + > +static inline void > +ext4_merge_freed_extents(struct ext4_sb_info *sbi, struct rb_root *root, > + struct ext4_free_data *entry1, > + struct ext4_free_data *entry2) > +{ > + entry1->efd_count += entry2->efd_count; > spin_lock(&sbi->s_md_lock); > - list_del(&entry->efd_list); > + list_del(&entry2->efd_list); > spin_unlock(&sbi->s_md_lock); > - rb_erase(&entry->efd_node, entry_rb_root); > - kmem_cache_free(ext4_free_data_cachep, entry); > + rb_erase(&entry2->efd_node, root); > + kmem_cache_free(ext4_free_data_cachep, entry2); > +} > + > +static inline void > +ext4_try_merge_freed_extent_prev(struct ext4_sb_info *sbi, struct rb_root *root, > + struct ext4_free_data *entry) > +{ > + struct ext4_free_data *prev; > + struct rb_node *node; > + > + node = rb_prev(&entry->efd_node); > + if (!node) > + return; > + > + prev = rb_entry(node, struct ext4_free_data, efd_node); > + if (ext4_freed_extents_can_be_merged(prev, entry)) > + ext4_merge_freed_extents(sbi, root, prev, entry); > +} > + > +static inline void > +ext4_try_merge_freed_extent_next(struct ext4_sb_info *sbi, struct rb_root *root, > + struct ext4_free_data *entry) > +{ > + struct ext4_free_data *next; > + struct rb_node *node; > + > + node = rb_next(&entry->efd_node); > + if (!node) > + return; > + > + next = rb_entry(node, struct ext4_free_data, efd_node); > + if (ext4_freed_extents_can_be_merged(entry, next)) > + ext4_merge_freed_extents(sbi, root, entry, next); > } > > static noinline_for_stack void > @@ -6329,11 +6364,12 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, > ext4_group_t group = e4b->bd_group; > ext4_grpblk_t cluster; > ext4_grpblk_t clusters = new_entry->efd_count; > - struct ext4_free_data *entry; > + struct ext4_free_data *entry = NULL; > struct ext4_group_info *db = e4b->bd_info; > struct super_block *sb = e4b->bd_sb; > struct ext4_sb_info *sbi = EXT4_SB(sb); > - struct rb_node **n = &db->bb_free_root.rb_node, *node; > + struct rb_root *root = &db->bb_free_root; > + struct rb_node **n = &root->rb_node; > struct rb_node *parent = NULL, *new_node; > > BUG_ON(!ext4_handle_valid(handle)); > @@ -6369,27 +6405,30 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, > } > } > > - rb_link_node(new_node, parent, n); > - rb_insert_color(new_node, &db->bb_free_root); > - > - /* Now try to see the extent can be merged to left and right */ > - node = rb_prev(new_node); > - if (node) { > - entry = rb_entry(node, struct ext4_free_data, efd_node); > - ext4_try_merge_freed_extent(sbi, entry, new_entry, > - &(db->bb_free_root)); > + atomic_add(clusters, &sbi->s_mb_free_pending); > + if (!entry) > + goto insert; > + > + /* Now try to see the extent can be merged to prev and next */ > + if (ext4_freed_extents_can_be_merged(new_entry, entry)) { > + entry->efd_start_cluster = cluster; > + entry->efd_count += new_entry->efd_count; > + kmem_cache_free(ext4_free_data_cachep, new_entry); > + ext4_try_merge_freed_extent_prev(sbi, root, entry); > + return; > } > - > - node = rb_next(new_node); > - if (node) { > - entry = rb_entry(node, struct ext4_free_data, efd_node); > - ext4_try_merge_freed_extent(sbi, entry, new_entry, > - &(db->bb_free_root)); > + if (ext4_freed_extents_can_be_merged(entry, new_entry)) { > + entry->efd_count += new_entry->efd_count; > + kmem_cache_free(ext4_free_data_cachep, new_entry); > + ext4_try_merge_freed_extent_next(sbi, root, entry); > + return; > } > +insert: > + rb_link_node(new_node, parent, n); > + rb_insert_color(new_node, root); > > spin_lock(&sbi->s_md_lock); > list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list[new_entry->efd_tid & 1]); > - atomic_add(clusters, &sbi->s_mb_free_pending); > spin_unlock(&sbi->s_md_lock); > } > > -- > 2.46.1 > -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR