We've seen reports from the field panicing in xfs_trans_brelse after an io error for an attribute block. sd 0:0:23:0: [sdx] tag#271 CDB: Read(16) 88 00 00 00 00 00 9b df 5e 78 00 00 00 08 00 00 critical medium error, dev sdx, sector 2615107192 op 0x0:(READ) flags 0x1000 phys_seg 1 prio class 2 XFS (sdx1): metadata I/O error in "xfs_da_read_buf+0xe1/0x140 [xfs]" at daddr 0x9bdf5678 len 8 error 61 BUG: kernel NULL pointer dereference, address: 00000000000000e0 ... RIP: 0010:xfs_trans_brelse+0xb/0xe0 [xfs] ... Call Trace: <TASK> ... ? xfs_trans_brelse+0xb/0xe0 [xfs] xfs_attr_leaf_get+0xb6/0xc0 [xfs] xfs_attr_get+0xa0/0xd0 [xfs] xfs_xattr_get+0x75/0xb0 [xfs] __vfs_getxattr+0x53/0x70 inode_doinit_use_xattr+0x63/0x180 inode_doinit_with_dentry+0x196/0x510 security_d_instantiate+0x2f/0x50 d_splice_alias+0x46/0x2b0 xfs_vn_lookup+0x8b/0xb0 [xfs] __lookup_slow+0x84/0x130 walk_component+0x158/0x1d0 path_lookupat+0x6e/0x1c0 filename_lookup+0xcf/0x1d0 vfs_statx+0x8d/0x170 vfs_fstatat+0x54/0x70 __do_sys_newfstatat+0x26/0x60 As this is specific to ENODATA test using scsi_debug instead of dmerror which returns EIO. Signed-off-by: Donald Douwsma <ddouwsma@xxxxxxxxxx> --- tests/xfs/999 | 193 ++++++++++++++++++++++++++++++++++++++++++++++ tests/xfs/999.out | 2 + 2 files changed, 195 insertions(+) create mode 100755 tests/xfs/999 create mode 100644 tests/xfs/999.out diff --git a/tests/xfs/999 b/tests/xfs/999 new file mode 100755 index 00000000..2a45eb7c --- /dev/null +++ b/tests/xfs/999 @@ -0,0 +1,193 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2025 Red Hat Inc. All Rights Reserved. +# +# FS QA Test 999 +# +# Regression test for panic following IO error when reading extended attribute blocks +# +# XFS (sda): metadata I/O error in "xfs_da_read_buf+0xe1/0x140 [xfs]" at daddr 0x78 len 8 error 61 +# BUG: kernel NULL pointer dereference, address: 00000000000000e0 +# ... +# RIP: 0010:xfs_trans_brelse+0xb/0xe0 [xfs] +# +# [53887.310123] Call Trace: +# <TASK> +# ? show_trace_log_lvl+0x1c4/0x2df +# ? show_trace_log_lvl+0x1c4/0x2df +# ? xfs_attr_leaf_get+0xb6/0xc0 [xfs] +# ? __die_body.cold+0x8/0xd +# ? page_fault_oops+0x134/0x170 +# ? xfs_trans_read_buf_map+0x133/0x300 [xfs] +# ? exc_page_fault+0x62/0x150 +# ? asm_exc_page_fault+0x22/0x30 +# ? xfs_trans_brelse+0xb/0xe0 [xfs] +# xfs_attr_leaf_get+0xb6/0xc0 [xfs] +# xfs_attr_get+0xa0/0xd0 [xfs] +# xfs_xattr_get+0x88/0xd0 [xfs] +# __vfs_getxattr+0x7b/0xb0 +# inode_doinit_use_xattr+0x63/0x180 +# inode_doinit_with_dentry+0x196/0x510 +# security_d_instantiate+0x3a/0xb0 +# d_splice_alias+0x46/0x2b0 +# xfs_vn_lookup+0x8b/0xb0 [xfs] +# __lookup_slow+0x81/0x130 +# walk_component+0x158/0x1d0 +# ? path_init+0x2c5/0x3f0 +# path_lookupat+0x6e/0x1c0 +# filename_lookup+0xcf/0x1d0 +# ? tlb_finish_mmu+0x65/0x150 +# vfs_statx+0x82/0x160 +# vfs_fstatat+0x54/0x70 +# __do_sys_newfstatat+0x26/0x60 +# do_syscall_64+0x5c/0xe0 +# +# For SELinux enabled filesystems the attribute security.selinux will be +# created before any additional attributes are added. Any attempt to open the +# file will read this entry, leading to the panic being triggered as above, via +# security_d_instantiate, rather than fgetxattr(2), to test via fgetxattr mount +# with a fixed context= +# +# Kernels prior to v4.16 don't have medium_error_start, and only return errors +# for a specific location, making scsi_debug unsuitable for checking old kernels. +# See d9da891a892a scsi: scsi_debug: Add two new parameters to scsi_debug driver +# + +. ./common/preamble +_begin_fstest auto + +_fixed_by_kernel_commit ae668cd567a6 "xfs: do not propagate ENODATA disk errors into xattr code" + +# Override the default cleanup function. +_cleanup() +{ + cd / + rm -f $tmp.* + _unmount $SCRATCH_MNT + _put_scsi_debug_dev +} + +# Import common functions. +. ./common/scsi_debug + +_require_scsi_debug +modinfo scsi_debug | grep -wq medium_error_start || _notrun "test requires scsi_debug medium_error_start" +scsi_debug_dev=$(_get_scsi_debug_dev) +test -b $scsi_debug_dev || _notrun "Failed to initialize scsi debug device" +echo "SCSI debug device $scsi_debug_dev" >>$seqres.full + +_mkfs_dev $scsi_debug_dev >> $seqres.full +_mount $scsi_debug_dev $SCRATCH_MNT >> $seqres.full + +blocksize=$(_get_block_size $SCRATCH_MNT) >> $seqres.full +echo Blocksize $blocksize >> $seqres.full + +# Use dry_run=1 when verifying the test to avoid panicking. +enable_error=1 +[ ${dry_run:-0} -eq 1 ] && enable_error=0 + +test_attr() +{ + test=$1 + testfile=$SCRATCH_MNT/$test + attr_size=$2 # bytes + error_at_block=${3:-0} + shift 3 + + value_size=$attr_size + + echo 0 > /sys/module/scsi_debug/parameters/opts + + echo -e "\nTesting : $test" >> $seqres.full + echo -e "attr size: $attr_size" >> $seqres.full + echo -e "error at block: $error_at_block\n" >> $seqres.full + + touch $testfile + + inode=$(ls -i $testfile|awk '{print $1}') + printf "inode: %d Testfile: %s\n" $inode $testfile >> $seqres.full + setfattr -n user.test_attr -v $(printf "%0*d" $value_size $value_size) $testfile + + $XFS_IO_PROG -c "bmap -al" $testfile >> $seqres.full + start_blocks=($($XFS_IO_PROG -c "bmap -al" $testfile | awk 'match($3, /[0-9]+/, a) {print a[0]}')) + echo "Attribute fork extent(s) start at ${attrblocks[*]}" >> $seqres.full + + _unmount $SCRATCH_MNT >>$seqres.full 2>&1 + + echo "Dump inode $inode details with xfs_db" >> $seqres.full + $XFS_DB_PROG -c "inode $inode" -c "print core.aformat core.naextents a" $scsi_debug_dev >> $seqres.full + inode_daddr=$($XFS_DB_PROG -c "inode $inode" -c "daddr" $scsi_debug_dev | awk '{print $4}') + echo inode daddr $inode_daddr >> $seqres.full + + + _mount $scsi_debug_dev $SCRATCH_MNT >> $seqres.full + + if [[ start_blocks[0] -ne 0 ]]; then + # Choose the block to error, currently only works with a single extent. + error_daddr=$((start_blocks[0] + error_at_block*blocksize/512)) + else + # Default to the inode daddr when no extents were found. + # Fails reading the inode during stat, so arguably useless + # even for testing the upper layers of getfattr. + error_daddr=$inode_daddr + fi + + echo "Setup scsi_debug to error when reading attributes from block" \ + "$error_at_block at daddr $error_daddr" >> $seqres.full + echo $error_daddr > /sys/module/scsi_debug/parameters/medium_error_start + echo $(($blocksize/512)) > /sys/module/scsi_debug/parameters/medium_error_count + + if [ $enable_error -eq 1 ]; then + echo Enabling error >> $seqres.full + echo 2 > /sys/module/scsi_debug/parameters/opts + fi + + grep ^ /sys/module/scsi_debug/parameters/{medium_error_start,medium_error_count,opts} >> $seqres.full + + echo "Re-read the extended attribute on $testfile, panics on IO errors" >> $seqres.full + sync # Let folk see where we failed in the results. + getfattr -d -m - $testfile >> $seqres.full 2>&1 # Panic here on failure +} + + +# TODO: Avoid assumptions about inode size using _xfs_get_inode_size and +# _xfs_get_inode_core_bytes, currently assuming 512 byte inodes. + +# aformat shortform +# Include shortform for completeness, we can only inject errors for attributes +# stored outside of the inode. +test_attr "attr_local" 1 0 + +# aformat extents +# Single leaf block, known to panic +test_attr "attr_extent_one_block" 512 0 + +# Other tests to exercise multi block extents and multi extent attribute forks. +# Before the panic was understood it seemed possible that failing on the second +# block of a multi block attribute fork was involved. Seems like it may be +# worth testing in the future. + +test_attr "attr_extent_two_blocks_1" 5000 0 +test_attr "attr_extent_two_blocks_2" 5000 1 +# test_attr "attr_extent_many_blocks" 16000 4 +# +# When using a single name=value pair to fill the space these tend to push out +# to multiple extents, rather than a single long extent, so don't yet test +# failing subsequent blocks within the first extent. +# +# i.e. +# xfs_bmap -va $testfile +# /mnt/test/testfile: +# EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL +# 0: [0..7]: 96..103 0 (96..103) 8 +# 1: [8..23]: 80..95 0 (80..95) 16 + +# aformat btree +# test_attr "attr_extents" 50000 0 +# test_attr "attr_extents" 50000 $blocksize +# test_attr "attr_extents" 50000 $((blocksize+1)) + +# success, all done +echo Silence is golden +status=0 +exit diff --git a/tests/xfs/999.out b/tests/xfs/999.out new file mode 100644 index 00000000..3b276ca8 --- /dev/null +++ b/tests/xfs/999.out @@ -0,0 +1,2 @@ +QA output created by 999 +Silence is golden -- 2.47.3