From: Davidlohr Bueso <dave@xxxxxxxxxxxx> Add semantics to enable future optimizations for buffer head noref jbd2 migration. This adds a new BH_Migrate flag which ensures we can bail on the lookup path. This should enable jbd2 to get semantics of when a buffer head is under folio migration, and should yield to it and to eventually remove the buffer_meta() check skipping current jbd2 folio migration. Suggested-by: Jan Kara <jack@xxxxxxx> Co-developed-by: Luis Chamberlain <mcgrof@xxxxxxxxxx> Signed-off-by: Davidlohr Bueso <dave@xxxxxxxxxxxx> Signed-off-by: Luis Chamberlain <mcgrof@xxxxxxxxxx> --- fs/buffer.c | 12 +++++++++++- fs/ext4/ialloc.c | 3 ++- include/linux/buffer_head.h | 1 + mm/migrate.c | 4 ++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 07ec57ec100e..753fc45667da 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -211,6 +211,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic) head = folio_buffers(folio); if (!head) goto out_unlock; + /* + * Upon a noref migration, the folio lock serializes here; + * otherwise bail. + */ + if (test_bit_acquire(BH_Migrate, &head->b_state)) { + WARN_ON(folio_locked); + goto out_unlock; + } + bh = head; do { if (!buffer_mapped(bh)) @@ -1393,7 +1402,8 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) /* * Perform a pagecache lookup for the matching buffer. If it's there, refresh * it in the LRU and mark it as accessed. If it is not present then return - * NULL + * NULL. Atomic context callers may also return NULL if the buffer is being + * migrated; similarly the page is not marked accessed either. */ static struct buffer_head * find_get_block_common(struct block_device *bdev, sector_t block, diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 38bc8d74f4cc..e7ecc7c8a729 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -691,7 +691,8 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) if (!bh || !buffer_uptodate(bh)) /* * If the block is not in the buffer cache, then it - * must have been written out. + * must have been written out, or, most unlikely, is + * being migrated - false failure should be OK here. */ goto out; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 8db10ca288fc..5559b906b1de 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -34,6 +34,7 @@ enum bh_state_bits { BH_Meta, /* Buffer contains metadata */ BH_Prio, /* Buffer should be submitted with REQ_PRIO */ BH_Defer_Completion, /* Defer AIO completion to workqueue */ + BH_Migrate, /* Buffer is being migrated (norefs) */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities diff --git a/mm/migrate.c b/mm/migrate.c index 32fa72ba10b4..8fed2655f2e8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -851,6 +851,8 @@ static int __buffer_migrate_folio(struct address_space *mapping, bool busy; bool invalidated = false; + VM_WARN_ON_ONCE(test_and_set_bit_lock(BH_Migrate, + &head->b_state)); recheck_buffers: busy = false; spin_lock(&mapping->i_private_lock); @@ -884,6 +886,8 @@ static int __buffer_migrate_folio(struct address_space *mapping, bh = bh->b_this_page; } while (bh != head); unlock_buffers: + if (check_refs) + clear_bit_unlock(BH_Migrate, &head->b_state); bh = head; do { unlock_buffer(bh); -- 2.47.2