The superblock is stored in the buffer_head s_sbh in struct ext2_sb_info. Replace this buffer head with the new ext2_buffer and update the buffer functions accordingly. This patch also introduces new buffer cache code needed for future patches. Signed-off-by: Catherine Hoang <catherine.hoang@xxxxxxxxxx> --- fs/ext2/Makefile | 2 +- fs/ext2/cache.c | 302 +++++++++++++++++++++++++++++++++++++++++++++++ fs/ext2/ext2.h | 43 ++++++- fs/ext2/super.c | 52 +++++--- fs/ext2/xattr.c | 2 +- 5 files changed, 379 insertions(+), 22 deletions(-) create mode 100644 fs/ext2/cache.c diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile index 8860948ef9ca..e8b38243058f 100644 --- a/fs/ext2/Makefile +++ b/fs/ext2/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_EXT2_FS) += ext2.o -ext2-y := balloc.o dir.o file.o ialloc.o inode.o \ +ext2-y := balloc.o cache.o dir.o file.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o trace.o # For tracepoints to include our trace.h from tracepoint infrastructure diff --git a/fs/ext2/cache.c b/fs/ext2/cache.c new file mode 100644 index 000000000000..464c506ba1b6 --- /dev/null +++ b/fs/ext2/cache.c @@ -0,0 +1,302 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2025 Oracle. All rights reserved. + */ + +#include "ext2.h" +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/rhashtable.h> +#include <linux/mm.h> +#include <linux/types.h> + +static const struct rhashtable_params buffer_cache_params = { + .key_len = sizeof(sector_t), + .key_offset = offsetof(struct ext2_buffer, b_block), + .head_offset = offsetof(struct ext2_buffer, b_rhash), + .automatic_shrinking = true, +}; + +void ext2_buffer_lock(struct ext2_buffer *buf) +{ + mutex_lock(&buf->b_lock); +} + +void ext2_buffer_unlock(struct ext2_buffer *buf) +{ + mutex_unlock(&buf->b_lock); +} + +void ext2_buffer_set_dirty(struct ext2_buffer *buf) +{ + set_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags); +} + +static int ext2_buffer_uptodate(struct ext2_buffer *buf) +{ + return test_bit(EXT2_BUF_UPTODATE_BIT, &buf->b_flags); +} + +void ext2_buffer_set_uptodate(struct ext2_buffer *buf) +{ + set_bit(EXT2_BUF_UPTODATE_BIT, &buf->b_flags); +} + +void ext2_buffer_clear_uptodate(struct ext2_buffer *buf) +{ + clear_bit(EXT2_BUF_UPTODATE_BIT, &buf->b_flags); +} + +int ext2_buffer_error(struct ext2_buffer *buf) +{ + return buf->b_error; +} + +void ext2_buffer_clear_error(struct ext2_buffer *buf) +{ + buf->b_error = 0; +} + +static struct ext2_buffer *ext2_insert_buffer_cache(struct super_block *sb, struct ext2_buffer *new_buf) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_buffer_cache *bc = &sbi->s_buffer_cache; + struct rhashtable *buffer_cache = &bc->bc_hash; + struct ext2_buffer *old_buf; + + rcu_read_lock(); + old_buf = rhashtable_lookup_get_insert_fast(buffer_cache, + &new_buf->b_rhash, buffer_cache_params); + + if (old_buf) { + refcount_inc(&old_buf->b_refcount); + rcu_read_unlock(); + return old_buf; + } + + refcount_inc(&new_buf->b_refcount); + rcu_read_unlock(); + return new_buf; +} + +static void ext2_buf_write_end_io(struct bio *bio) +{ + struct ext2_buffer *buf = bio->bi_private; + int err = blk_status_to_errno(bio->bi_status); + + buf->b_error = err; + complete(&buf->b_complete); + mutex_unlock(&buf->b_lock); + bio_put(bio); +} + +static int ext2_submit_buffer_read(struct super_block *sb, struct ext2_buffer *buf) +{ + struct bio_vec bio_vec; + struct bio bio; + sector_t sector = buf->b_block * (sb->s_blocksize >> 9); + int error; + + bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ); + bio.bi_iter.bi_sector = sector; + + buf->b_size = sb->s_blocksize; + __bio_add_page(&bio, buf->b_page, buf->b_size, 0); + + mutex_lock(&buf->b_lock); + error = submit_bio_wait(&bio); + ext2_buffer_set_uptodate(buf); + mutex_unlock(&buf->b_lock); + + return error; +} + +static void ext2_submit_buffer_write(struct super_block *sb, struct ext2_buffer *buf) +{ + struct bio *bio; + sector_t sector = buf->b_block * (sb->s_blocksize >> 9); + + bio = bio_alloc(sb->s_bdev, 1, REQ_OP_WRITE, GFP_KERNEL); + + bio->bi_iter.bi_sector = sector; + bio->bi_end_io = ext2_buf_write_end_io; + bio->bi_private = buf; + + __bio_add_page(bio, buf->b_page, buf->b_size, 0); + + mutex_lock(&buf->b_lock); + submit_bio(bio); +} + +static int ext2_sync_buffer_cache_wait(struct list_head *submit_list) +{ + struct ext2_buffer *buf, *n; + int error = 0, error2; + + list_for_each_entry_safe(buf, n, submit_list, b_list) { + wait_for_completion(&buf->b_complete); + refcount_dec(&buf->b_refcount); + error2 = buf->b_error; + if (!error) + error = error2; + } + + return error; +} + +int ext2_sync_buffer_wait(struct super_block *sb, struct ext2_buffer *buf) +{ + if (test_and_clear_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags)) { + ext2_submit_buffer_write(sb, buf); + wait_for_completion(&buf->b_complete); + return buf->b_error; + } + + return 0; +} + +int ext2_sync_buffer_cache(struct super_block *sb) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_buffer_cache *bc = &sbi->s_buffer_cache; + struct rhashtable *buffer_cache = &bc->bc_hash; + struct rhashtable_iter iter; + struct ext2_buffer *buf, *n; + struct blk_plug plug; + LIST_HEAD(submit_list); + + rhashtable_walk_enter(buffer_cache, &iter); + rhashtable_walk_start(&iter); + while ((buf = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(buf)) + continue; + if (test_and_clear_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags)) { + refcount_inc(&buf->b_refcount); + list_add(&buf->b_list, &submit_list); + } + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + + blk_start_plug(&plug); + list_for_each_entry_safe(buf, n, &submit_list, b_list) { + ext2_submit_buffer_write(sb, buf); + } + blk_finish_plug(&plug); + + return ext2_sync_buffer_cache_wait(&submit_list); +} + +static struct ext2_buffer *ext2_lookup_buffer_cache(struct super_block *sb, sector_t block) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_buffer_cache *bc = &sbi->s_buffer_cache; + struct rhashtable *buffer_cache = &bc->bc_hash; + struct ext2_buffer *found = NULL; + + rcu_read_lock(); + found = rhashtable_lookup(buffer_cache, &block, buffer_cache_params); + if (found && !refcount_inc_not_zero(&found->b_refcount)) + found = NULL; + rcu_read_unlock(); + + return found; +} + +static struct ext2_buffer *ext2_init_buffer(struct super_block *sb, sector_t block, bool need_uptodate) +{ + struct ext2_buffer *buf; + gfp_t gfp = GFP_KERNEL; + + buf = kmalloc(sizeof(struct ext2_buffer), GFP_KERNEL); + if (!buf) + return NULL; + + buf->b_block = block; + buf->b_size = sb->s_blocksize; + buf->b_flags = 0; + buf->b_error = 0; + + mutex_init(&buf->b_lock); + refcount_set(&buf->b_refcount, 1); + init_completion(&buf->b_complete); + + if (!need_uptodate) + gfp |= __GFP_ZERO; + + buf->b_page = alloc_page(gfp); + if (!buf->b_page) { + kfree_rcu(buf, b_rcu); + return NULL; + } + + buf->b_data = page_address(buf->b_page); + + return buf; +} + +static void ext2_destroy_buffer(void *ptr, void *arg) +{ + struct ext2_buffer *buf = ptr; + + WARN_ON(test_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags)); + __free_page(buf->b_page); + kfree(buf); +} + +void ext2_put_buffer(struct super_block *sb, struct ext2_buffer *buf) +{ + if (!buf) + return; + + WARN_ON(refcount_read(&buf->b_refcount) < 1); + refcount_dec(&buf->b_refcount); +} + + +static struct ext2_buffer *ext2_find_get_buffer(struct super_block *sb, sector_t block, bool need_uptodate) +{ + int err; + struct ext2_buffer *buf; + struct ext2_buffer *new_buf; + + buf = ext2_lookup_buffer_cache(sb, block); + + if (!buf) { + new_buf = ext2_init_buffer(sb, block, need_uptodate); + if (!new_buf) + return ERR_PTR(-ENOMEM); + + buf = ext2_insert_buffer_cache(sb, new_buf); + if (IS_ERR(buf) || buf != new_buf) + ext2_destroy_buffer(new_buf, NULL); + } + + if (need_uptodate && !ext2_buffer_uptodate(buf)) { + err = ext2_submit_buffer_read(sb, buf); + if (err) + return ERR_PTR(err); + } + + return buf; +} + +struct ext2_buffer *ext2_get_buffer(struct super_block *sb, sector_t bno) +{ + return ext2_find_get_buffer(sb, bno, false); +} + +struct ext2_buffer *ext2_read_buffer(struct super_block *sb, sector_t bno) +{ + return ext2_find_get_buffer(sb, bno, true); +} + +int ext2_init_buffer_cache(struct ext2_buffer_cache *bc) +{ + return rhashtable_init(&bc->bc_hash, &buffer_cache_params); +} + +void ext2_destroy_buffer_cache(struct ext2_buffer_cache *bc) +{ + rhashtable_free_and_destroy(&bc->bc_hash, ext2_destroy_buffer, NULL); +} diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index f38bdd46e4f7..bfed70fd6430 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -18,6 +18,7 @@ #include <linux/rbtree.h> #include <linux/mm.h> #include <linux/highmem.h> +#include <linux/rhashtable.h> /* XXX Here for now... not interested in restructing headers JUST now */ @@ -61,6 +62,29 @@ struct ext2_block_alloc_info { ext2_fsblk_t last_alloc_physical_block; }; +struct ext2_buffer { + sector_t b_block; + struct rhash_head b_rhash; + struct rcu_head b_rcu; + struct page *b_page; + size_t b_size; + char *b_data; + unsigned long b_flags; + refcount_t b_refcount; + struct mutex b_lock; + struct completion b_complete; + struct list_head b_list; + int b_error; +}; + +/* ext2_buffer flags */ +#define EXT2_BUF_DIRTY_BIT 0 +#define EXT2_BUF_UPTODATE_BIT 1 + +struct ext2_buffer_cache { + struct rhashtable bc_hash; +}; + #define rsv_start rsv_window._rsv_start #define rsv_end rsv_window._rsv_end @@ -79,7 +103,7 @@ struct ext2_sb_info { unsigned long s_groups_count; /* Number of groups in the fs */ unsigned long s_overhead_last; /* Last calculated overhead */ unsigned long s_blocks_last; /* Last seen block count */ - struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext2_buffer * s_sbuf; /* Buffer containing the super block */ struct ext2_super_block * s_es; /* Pointer to the super block in the buffer */ struct buffer_head ** s_group_desc; unsigned long s_mount_opt; @@ -116,6 +140,7 @@ struct ext2_sb_info { struct mb_cache *s_ea_block_cache; struct dax_device *s_daxdev; u64 s_dax_part_off; + struct ext2_buffer_cache s_buffer_cache; }; static inline spinlock_t * @@ -716,6 +741,22 @@ extern int ext2_should_retry_alloc(struct super_block *sb, int *retries); extern void ext2_init_block_alloc_info(struct inode *); extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_window_node *rsv); +/* cache.c */ +extern void ext2_buffer_lock(struct ext2_buffer *); +extern void ext2_buffer_unlock(struct ext2_buffer *); +extern int ext2_init_buffer_cache(struct ext2_buffer_cache *); +extern void ext2_destroy_buffer_cache(struct ext2_buffer_cache *); +extern int ext2_sync_buffer_wait(struct super_block *, struct ext2_buffer *); +extern int ext2_sync_buffer_cache(struct super_block *); +extern struct ext2_buffer *ext2_get_buffer(struct super_block *, sector_t); +extern struct ext2_buffer *ext2_read_buffer(struct super_block *, sector_t); +extern void ext2_put_buffer(struct super_block *, struct ext2_buffer *); +extern void ext2_buffer_set_dirty(struct ext2_buffer *); +extern void ext2_buffer_set_uptodate(struct ext2_buffer *); +extern void ext2_buffer_clear_uptodate(struct ext2_buffer *); +extern int ext2_buffer_error(struct ext2_buffer *); +extern void ext2_buffer_clear_error(struct ext2_buffer *); + /* dir.c */ int ext2_add_link(struct dentry *, struct inode *); int ext2_inode_by_name(struct inode *dir, diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 37f7ce56adce..ac53f587d140 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -168,7 +168,8 @@ static void ext2_put_super (struct super_block * sb) percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); - brelse (sbi->s_sbh); + ext2_put_buffer (sb, sbi->s_sbuf); + ext2_destroy_buffer_cache(&sbi->s_buffer_cache); sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev, NULL); @@ -803,7 +804,7 @@ static unsigned long descriptor_loc(struct super_block *sb, static int ext2_fill_super(struct super_block *sb, void *data, int silent) { - struct buffer_head * bh; + struct ext2_buffer * buf; struct ext2_sb_info * sbi; struct ext2_super_block * es; struct inode *root; @@ -835,6 +836,12 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off, NULL, NULL); + ret = ext2_init_buffer_cache(&sbi->s_buffer_cache); + if (ret) { + ext2_msg(sb, KERN_ERR, "error: unable to create buffer cache"); + goto failed_sbi; + } + spin_lock_init(&sbi->s_lock); ret = -EINVAL; @@ -862,7 +869,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) logic_sb_block = sb_block; } - if (!(bh = sb_bread(sb, logic_sb_block))) { + if (IS_ERR(buf = ext2_read_buffer(sb, logic_sb_block))) { ext2_msg(sb, KERN_ERR, "error: unable to read superblock"); goto failed_sbi; } @@ -870,7 +877,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) * Note: s_es must be initialized as soon as possible because * some ext2 macro-instructions depend on its value */ - es = (struct ext2_super_block *) (((char *)bh->b_data) + offset); + es = (struct ext2_super_block *) (((char *)buf->b_data) + offset); sbi->s_es = es; sb->s_magic = le16_to_cpu(es->s_magic); @@ -966,7 +973,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) /* If the blocksize doesn't match, re-read the thing.. */ if (sb->s_blocksize != blocksize) { - brelse(bh); + ext2_buffer_clear_uptodate(buf); + ext2_put_buffer(sb, buf); if (!sb_set_blocksize(sb, blocksize)) { ext2_msg(sb, KERN_ERR, @@ -976,13 +984,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize; offset = (sb_block*BLOCK_SIZE) % blocksize; - bh = sb_bread(sb, logic_sb_block); - if(!bh) { + buf = ext2_read_buffer(sb, logic_sb_block); + if(IS_ERR(buf)) { ext2_msg(sb, KERN_ERR, "error: couldn't read" "superblock on 2nd try"); goto failed_sbi; } - es = (struct ext2_super_block *) (((char *)bh->b_data) + offset); + es = (struct ext2_super_block *) (((char *)buf->b_data) + offset); sbi->s_es = es; if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) { ext2_msg(sb, KERN_ERR, "error: magic mismatch"); @@ -1021,7 +1029,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_inodes_per_block; sbi->s_desc_per_block = sb->s_blocksize / sizeof (struct ext2_group_desc); - sbi->s_sbh = bh; + sbi->s_sbuf = buf; sbi->s_mount_state = le16_to_cpu(es->s_state); sbi->s_addr_per_block_bits = ilog2 (EXT2_ADDR_PER_BLOCK(sb)); @@ -1031,7 +1039,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (sb->s_magic != EXT2_SUPER_MAGIC) goto cantfind_ext2; - if (sb->s_blocksize != bh->b_size) { + if (sb->s_blocksize != buf->b_size) { if (!silent) ext2_msg(sb, KERN_ERR, "error: unsupported blocksize"); goto failed_mount; @@ -1213,7 +1221,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) kvfree(sbi->s_group_desc); kfree(sbi->s_debts); failed_mount: - brelse(bh); + ext2_put_buffer(sb, buf); failed_sbi: fs_put_dax(sbi->s_daxdev, NULL); sb->s_fs_info = NULL; @@ -1224,9 +1232,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) static void ext2_clear_super_error(struct super_block *sb) { - struct buffer_head *sbh = EXT2_SB(sb)->s_sbh; + struct ext2_buffer *sbuf = EXT2_SB(sb)->s_sbuf; - if (buffer_write_io_error(sbh)) { + if (ext2_buffer_error(sbuf)) { /* * Oh, dear. A previous attempt to write the * superblock failed. This could happen because the @@ -1237,8 +1245,8 @@ static void ext2_clear_super_error(struct super_block *sb) */ ext2_msg(sb, KERN_ERR, "previous I/O error to superblock detected"); - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); + ext2_buffer_clear_error(sbuf); + ext2_buffer_set_uptodate(sbuf); } } @@ -1252,9 +1260,9 @@ void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, es->s_wtime = cpu_to_le32(ktime_get_real_seconds()); /* unlock before we do IO */ spin_unlock(&EXT2_SB(sb)->s_lock); - mark_buffer_dirty(EXT2_SB(sb)->s_sbh); + ext2_buffer_set_dirty(EXT2_SB(sb)->s_sbuf); if (wait) - sync_dirty_buffer(EXT2_SB(sb)->s_sbh); + ext2_sync_buffer_wait(sb, EXT2_SB(sb)->s_sbuf); } /* @@ -1271,13 +1279,19 @@ static int ext2_sync_fs(struct super_block *sb, int wait) { struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = EXT2_SB(sb)->s_es; + int err = 0; /* * Write quota structures to quota file, sync_blockdev() will write * them to disk later */ - dquot_writeback_dquots(sb, -1); + err = dquot_writeback_dquots(sb, -1); + if (err) + goto out; + + err = ext2_sync_buffer_cache(sb); +out: spin_lock(&sbi->s_lock); if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { ext2_debug("setting valid to 0\n"); @@ -1285,7 +1299,7 @@ static int ext2_sync_fs(struct super_block *sb, int wait) } spin_unlock(&sbi->s_lock); ext2_sync_super(sb, es, wait); - return 0; + return err; } static int ext2_freeze(struct super_block *sb) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index c885dcc3bd0d..1eb4a8607f67 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -387,7 +387,7 @@ static void ext2_xattr_update_super_block(struct super_block *sb) ext2_update_dynamic_rev(sb); EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); spin_unlock(&EXT2_SB(sb)->s_lock); - mark_buffer_dirty(EXT2_SB(sb)->s_sbh); + ext2_buffer_set_dirty(EXT2_SB(sb)->s_sbuf); } /* -- 2.43.0