Hi Slava,
Sorry for the late reply.
在 2025/7/11 05:36, Viacheslav Dubeyko 写道:
The hfs_find_init() method can trigger the crash
if tree pointer is NULL:
[ 45.746290][ T9787] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000008: 0000 [#1] SMP KAI
[ 45.747287][ T9787] KASAN: null-ptr-deref in range [0x0000000000000040-0x0000000000000047]
[ 45.748716][ T9787] CPU: 2 UID: 0 PID: 9787 Comm: repro Not tainted 6.16.0-rc3 #10 PREEMPT(full)
[ 45.750250][ T9787] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[ 45.751983][ T9787] RIP: 0010:hfs_find_init+0x86/0x230
[ 45.752834][ T9787] Code: c1 ea 03 80 3c 02 00 0f 85 9a 01 00 00 4c 8d 6b 40 48 c7 45 18 00 00 00 00 48 b8 00 00 00 00 00 fc
[ 45.755574][ T9787] RSP: 0018:ffffc90015157668 EFLAGS: 00010202
[ 45.756432][ T9787] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff819a4d09
[ 45.757457][ T9787] RDX: 0000000000000008 RSI: ffffffff819acd3a RDI: ffffc900151576e8
[ 45.758282][ T9787] RBP: ffffc900151576d0 R08: 0000000000000005 R09: 0000000000000000
[ 45.758943][ T9787] R10: 0000000080000000 R11: 0000000000000001 R12: 0000000000000004
[ 45.759619][ T9787] R13: 0000000000000040 R14: ffff88802c50814a R15: 0000000000000000
[ 45.760293][ T9787] FS: 00007ffb72734540(0000) GS:ffff8880cec64000(0000) knlGS:0000000000000000
[ 45.761050][ T9787] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 45.761606][ T9787] CR2: 00007f9bd8225000 CR3: 000000010979a000 CR4: 00000000000006f0
[ 45.762286][ T9787] Call Trace:
[ 45.762570][ T9787] <TASK>
[ 45.762824][ T9787] hfs_ext_read_extent+0x190/0x9d0
[ 45.763269][ T9787] ? submit_bio_noacct_nocheck+0x2dd/0xce0
[ 45.763766][ T9787] ? __pfx_hfs_ext_read_extent+0x10/0x10
[ 45.764250][ T9787] hfs_get_block+0x55f/0x830
[ 45.764646][ T9787] block_read_full_folio+0x36d/0x850
[ 45.765105][ T9787] ? __pfx_hfs_get_block+0x10/0x10
[ 45.765541][ T9787] ? const_folio_flags+0x5b/0x100
[ 45.765972][ T9787] ? __pfx_hfs_read_folio+0x10/0x10
[ 45.766415][ T9787] filemap_read_folio+0xbe/0x290
[ 45.766840][ T9787] ? __pfx_filemap_read_folio+0x10/0x10
[ 45.767325][ T9787] ? __filemap_get_folio+0x32b/0xbf0
[ 45.767780][ T9787] do_read_cache_folio+0x263/0x5c0
[ 45.768223][ T9787] ? __pfx_hfs_read_folio+0x10/0x10
[ 45.768666][ T9787] read_cache_page+0x5b/0x160
[ 45.769070][ T9787] hfs_btree_open+0x491/0x1740
[ 45.769481][ T9787] hfs_mdb_get+0x15e2/0x1fb0
[ 45.769877][ T9787] ? __pfx_hfs_mdb_get+0x10/0x10
[ 45.770316][ T9787] ? find_held_lock+0x2b/0x80
[ 45.770731][ T9787] ? lockdep_init_map_type+0x5c/0x280
[ 45.771200][ T9787] ? lockdep_init_map_type+0x5c/0x280
[ 45.771674][ T9787] hfs_fill_super+0x38e/0x720
[ 45.772092][ T9787] ? __pfx_hfs_fill_super+0x10/0x10
[ 45.772549][ T9787] ? snprintf+0xbe/0x100
[ 45.772931][ T9787] ? __pfx_snprintf+0x10/0x10
[ 45.773350][ T9787] ? do_raw_spin_lock+0x129/0x2b0
[ 45.773796][ T9787] ? find_held_lock+0x2b/0x80
[ 45.774215][ T9787] ? set_blocksize+0x40a/0x510
[ 45.774636][ T9787] ? sb_set_blocksize+0x176/0x1d0
[ 45.775087][ T9787] ? setup_bdev_super+0x369/0x730
[ 45.775533][ T9787] get_tree_bdev_flags+0x384/0x620
[ 45.775985][ T9787] ? __pfx_hfs_fill_super+0x10/0x10
[ 45.776453][ T9787] ? __pfx_get_tree_bdev_flags+0x10/0x10
[ 45.776950][ T9787] ? bpf_lsm_capable+0x9/0x10
[ 45.777365][ T9787] ? security_capable+0x80/0x260
[ 45.777803][ T9787] vfs_get_tree+0x8e/0x340
[ 45.778203][ T9787] path_mount+0x13de/0x2010
[ 45.778604][ T9787] ? kmem_cache_free+0x2b0/0x4c0
[ 45.779052][ T9787] ? __pfx_path_mount+0x10/0x10
[ 45.779480][ T9787] ? getname_flags.part.0+0x1c5/0x550
[ 45.779954][ T9787] ? putname+0x154/0x1a0
[ 45.780335][ T9787] __x64_sys_mount+0x27b/0x300
[ 45.780758][ T9787] ? __pfx___x64_sys_mount+0x10/0x10
[ 45.781232][ T9787] do_syscall_64+0xc9/0x480
[ 45.781631][ T9787] entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 45.782149][ T9787] RIP: 0033:0x7ffb7265b6ca
[ 45.782539][ T9787] Code: 48 8b 0d c9 17 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48
[ 45.784212][ T9787] RSP: 002b:00007ffc0c10cfb8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 45.784935][ T9787] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ffb7265b6ca
[ 45.785626][ T9787] RDX: 0000200000000240 RSI: 0000200000000280 RDI: 00007ffc0c10d100
[ 45.786316][ T9787] RBP: 00007ffc0c10d190 R08: 00007ffc0c10d000 R09: 0000000000000000
[ 45.787011][ T9787] R10: 0000000000000048 R11: 0000000000000206 R12: 0000560246733250
[ 45.787697][ T9787] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 45.788393][ T9787] </TASK>
[ 45.788665][ T9787] Modules linked in:
[ 45.789058][ T9787] ---[ end trace 0000000000000000 ]---
[ 45.789554][ T9787] RIP: 0010:hfs_find_init+0x86/0x230
[ 45.790028][ T9787] Code: c1 ea 03 80 3c 02 00 0f 85 9a 01 00 00 4c 8d 6b 40 48 c7 45 18 00 00 00 00 48 b8 00 00 00 00 00 fc
[ 45.792364][ T9787] RSP: 0018:ffffc90015157668 EFLAGS: 00010202
[ 45.793155][ T9787] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff819a4d09
[ 45.794123][ T9787] RDX: 0000000000000008 RSI: ffffffff819acd3a RDI: ffffc900151576e8
[ 45.795105][ T9787] RBP: ffffc900151576d0 R08: 0000000000000005 R09: 0000000000000000
[ 45.796135][ T9787] R10: 0000000080000000 R11: 0000000000000001 R12: 0000000000000004
[ 45.797114][ T9787] R13: 0000000000000040 R14: ffff88802c50814a R15: 0000000000000000
[ 45.798024][ T9787] FS: 00007ffb72734540(0000) GS:ffff8880cec64000(0000) knlGS:0000000000000000
[ 45.799019][ T9787] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 45.799822][ T9787] CR2: 00007f9bd8225000 CR3: 000000010979a000 CR4: 00000000000006f0
[ 45.800747][ T9787] Kernel panic - not syncing: Fatal exception
The hfs_fill_super() calls hfs_mdb_get() method that tries
to construct Extents Tree and Catalog Tree:
HFS_SB(sb)->ext_tree = hfs_btree_open(sb, HFS_EXT_CNID, hfs_ext_keycmp);
if (!HFS_SB(sb)->ext_tree) {
pr_err("unable to open extent tree\n");
goto out;
}
HFS_SB(sb)->cat_tree = hfs_btree_open(sb, HFS_CAT_CNID, hfs_cat_keycmp);
if (!HFS_SB(sb)->cat_tree) {
pr_err("unable to open catalog tree\n");
goto out;
}
However, hfs_btree_open() calls read_mapping_page() that
calls hfs_get_block(). And this method calls hfs_ext_read_extent():
static int hfs_ext_read_extent(struct inode *inode, u16 block)
{
struct hfs_find_data fd;
int res;
if (block >= HFS_I(inode)->cached_start &&
block < HFS_I(inode)->cached_start + HFS_I(inode)->cached_blocks)
return 0;
res = hfs_find_init(HFS_SB(inode->i_sb)->ext_tree, &fd);
if (!res) {
res = __hfs_ext_cache_extent(&fd, inode, block);
hfs_find_exit(&fd);
}
return res;
}
The problem here that hfs_find_init() is trying to use
HFS_SB(inode->i_sb)->ext_tree that is not initialized yet.
It will be initailized when hfs_btree_open() finishes
the execution.
The patch adds checking of tree pointer in hfs_find_init()
and it reworks the logic of hfs_btree_open() by reading
the b-tree's header directly from the volume. The read_mapping_page()
is exchanged on filemap_grab_folio() that grab the folio from
mapping. Then, sb_bread() extracts the b-tree's header
content and copy it into the folio.
Reported-by: Wenzhi Wang <wenzhi.wang@xxxxxxxxxxxx>
Signed-off-by: Viacheslav Dubeyko <slava@xxxxxxxxxxx>
cc: John Paul Adrian Glaubitz <glaubitz@xxxxxxxxxxxxxxxxxxx>
cc: Yangtao Li <frank.li@xxxxxxxx>
cc: linux-fsdevel@xxxxxxxxxxxxxxx
---
fs/hfs/bfind.c | 3 +++
fs/hfs/btree.c | 57 +++++++++++++++++++++++++++++++++++++++----------
fs/hfs/extent.c | 2 +-
fs/hfs/hfs_fs.h | 1 +
4 files changed, 51 insertions(+), 12 deletions(-)
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c
index ef9498a6e88a..34e9804e0f36 100644
--- a/fs/hfs/bfind.c
+++ b/fs/hfs/bfind.c
@@ -16,6 +16,9 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
{
void *ptr;
+ if (!tree || !fd)
+ return -EINVAL;
+
fd->tree = tree;
fd->bnode = NULL;
ptr = kmalloc(tree->max_key_len * 2 + 4, GFP_KERNEL);
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 2fa4b1f8cc7f..e86e1e235658 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -21,8 +21,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
struct hfs_btree *tree;
struct hfs_btree_header_rec *head;
struct address_space *mapping;
- struct page *page;
+ struct folio *folio;
+ struct buffer_head *bh;
unsigned int size;
+ u16 dblock;
+ sector_t start_block;
+ loff_t offset;
tree = kzalloc(sizeof(*tree), GFP_KERNEL);
if (!tree)
@@ -75,12 +79,40 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
unlock_new_inode(tree->inode);
mapping = tree->inode->i_mapping;
- page = read_mapping_page(mapping, 0, NULL);
- if (IS_ERR(page))
We need to read hfs_btree_header_rec(106 bytes) from first block,
which is much smaller than a sector(512 bytes) or a page (4k or bigger?).
Maybe we don't need to read a full page?
+ folio = filemap_grab_folio(mapping, 0);
+ if (IS_ERR(folio))
goto free_inode;
+ folio_zero_range(folio, 0, folio_size(folio));
+
+ dblock = hfs_ext_find_block(HFS_I(tree->inode)->first_extents, 0);
+ start_block = HFS_SB(sb)->fs_start + (dblock * HFS_SB(sb)->fs_div);
It's not elegant to expose such mapping logic code.
Could we have a common map func converting logic block to physical
block, xxx_get_block uses it, and here uses it?
+
+ size = folio_size(folio);
+ offset = 0;
+ while (size > 0) {
+ size_t len;
+
+ bh = sb_bread(sb, start_block);
+ if (!bh) {
+ pr_err("unable to read tree header\n");
+ goto put_folio;
+ }
+
+ len = min_t(size_t, folio_size(folio), sb->s_blocksize);
+ memcpy_to_folio(folio, offset, bh->b_data, sb->s_blocksize);
+
+ brelse(bh);
+
+ start_block++;
+ offset += len;
+ size -= len;
+ }
We don't need to read full page, and it might be a good optimization for
hfsplus too?
Could we get rid of using buffer_head?
I'm not sure weather bdev_rw_virt is a good choice to simplify it.
Thx,
Yangtao