On 6/9/25 07:07, Bart Van Assche wrote: > On 5/25/25 10:24 PM, Christoph Hellwig wrote: >> Umm, Bart I really expected better from you. You're ducking around >> providing a reproducer for over a week and waste multiple peoples >> time to tell us the only reproducer is your out of tree thingy >> reject upstream before? That's not really how Linux developement >> works. > > A reproducer for the blktests framework is available below. It took me > more time than I had expected to come up with a reproducer because I was > initially looking in the wrong direction (device mapper). As one can > see, the reproducer below does not involve any device mapper driver. > > Do you plan to work on a fix or do you perhaps expect me to do this? To be honest, I have never tested the block layer inline crypto feature using zoned devices. All the use cases I have with crypto are using dm-crypt. A quick look tells me that the issue is with that particular feature, especially the fallback path, as it is using work items to asynchronously handle write data encryption. Since this happens above the zone write plug, without that being aware of the sequential write constraint, problems may indeed happen. So yes, we need a fix. Can you work on one ? One additional thing: why do you need f2fs with your reproducer ? Problems should happen also doing writes directly to the block device, no ? > > Thanks, > > Bart. > > $ cat tests/zbd/014 > #!/bin/bash > # SPDX-License-Identifier: GPL-3.0+ > # Copyright (C) 2025 Google LLC > # > # Trigger multiple splitting of a bio. About the values of the size > parameters > # in this test: > # - The zone size is 4 MiB and is larger than max_sectors_kb. > # - The maximum bio size supported by the code in > block/blk-crypto-fallback.c > # is BIO_MAX_VECS * PAGE_SIZE or 1 MiB if the page size is 4 KiB. > # - max_sectors_kb has been set to 512 KiB to cause further bio splitting. > > . tests/zbd/rc > . common/null_blk > > DESCRIPTION="test multiple bio splitting" > QUICK=1 > > requires() { > _have_driver f2fs > _have_driver null_blk > _have_program fscrypt > _have_program getconf > _have_program mkfs.f2fs > for o in BLK_INLINE_ENCRYPTION_FALLBACK FS_ENCRYPTION_INLINE_CRYPT; do > if ! _check_kernel_option "$o"; then > SKIP_REASONS+=("Kernel option $o has not been enabled") > fi > done > } > > trace_block_io() { > ( > set -e > cd /sys/kernel/tracing > lsof -t trace_pipe | xargs -r kill > echo 1024 > buffer_size_kb > echo nop > current_tracer > echo 0 > tracing_on > echo > trace > echo 0 > events/enable > echo 1 > events/block/enable > echo 0 > events/block/block_dirty_buffer/enable > echo 0 > events/block/block_plug/enable > echo 0 > events/block/block_touch_buffer/enable > echo 0 > events/block/block_unplug/enable > grep -lw 1 events/block/*/enable | while read -r event_path; do > filter_path="${event_path%enable}filter" > echo "$1" > "$filter_path" > done > echo 1 > tracing_on > echo "Tracing has been enabled" >>"$FULL" > cat trace_pipe > "${FULL%.full}-block-trace.txt" > ) > } > > stop_tracing() { > kill "$1" > ( > set -e > cd /sys/kernel/tracing > echo 0 > tracing_on > echo 0 > events/enable > ) > } > > report_stats() { > local pfx=$1 before after > read -r -a before <<<"$2" > read -r -a after <<<"$3" > local reads=$((after[0]-before[0])) > local rmerge=$((after[1]-before[1])) > local writes=$((after[4]-before[4])) > local wmerge=$((after[5]-before[5])) > echo "$pfx reads: $reads rmerge: $rmerge writes: $writes wmerge: $wmerge" > } > > devno() { > IFS=: read -r maj min <"/sys/class/block/$(basename "$1")/dev" > # From <linux/kdev_t.h> MINORBITS = 20 > echo $(((maj << 20) + min)) > } > > test() { > echo "Running ${TEST_NAME}" > > local page_size > page_size=$(getconf PAGE_SIZE) || return $? > > local mount_dir="$TMPDIR/mnt" > > umount /dev/nullb1 >>"$FULL" 2>&1 > _remove_null_blk_devices > > # A small conventional block device for the F2FS metadata. > local null_blk_params=( > blocksize=4096 > discard=1 > max_sectors=$(((1 << 32) - 1)) > memory_backed=1 > size=64 # MiB > submit_queues=1 > power=1 > ) > _configure_null_blk nullb1 "${null_blk_params[@]}" > local cdev=/dev/nullb1 > > # A larger zoned block device for the data. > local null_blk_params=( > blocksize=4096 > completion_nsec=10000000 # 10 ms > irqmode=2 > max_sectors=$(((1 << 32) - 1)) > memory_backed=1 > hw_queue_depth=1 > size=1024 # MiB > submit_queues=1 > zone_size="$((page_size >> 10))" # 1024 times the page size. > zoned=1 > power=1 > ) > _configure_null_blk nullb2 "${null_blk_params[@]}" || return $? > local zdev_basename=nullb2 > local zdev=/dev/${zdev_basename} > local zdev_devno > zdev_devno=$(devno "$zdev") > > { > local > max_sectors_zdev=/sys/class/block/"${zdev_basename}"/queue/max_sectors_kb > echo $((128 * page_size)) > "${max_sectors_zdev}" > echo "${zdev_basename}: max_sectors_kb=$(<"${max_sectors_zdev}")" > > ls -ld "${cdev}" "${zdev}" > echo "${zdev_basename} settings:" > (cd "/sys/class/block/$zdev_basename/queue" && grep -vw 0 ./*) > } >>"${FULL}" 2>&1 > > trace_block_io "dev == ${zdev_devno}" & > local trace_pid=$! > while ! grep -q 'Tracing has been enabled' "$FULL"; do > sleep .1 > done > > { > mkfs.f2fs -q -O encrypt -m "${cdev}" -c "${zdev}" && > mkdir -p "${mount_dir}" && > mount -o inlinecrypt -t f2fs "${cdev}" "${mount_dir}" && > local encrypted_dir="${mount_dir}/encrypted" && > mkdir -p "${encrypted_dir}" > fscrypt setup "${mount_dir}" </dev/null > local keyfile=$TMPDIR/keyfile && > dd if=/dev/zero of="$keyfile" bs=32 count=1 status=none && > fscrypt encrypt "${encrypted_dir}" --source=raw_key \ > --name=protector --key="$keyfile" && > fscrypt status "${encrypted_dir}" && > > local before after && > read -r -a before <"/sys/class/block/${zdev_basename}/stat" && > echo "Starting IO" && > local cmd="dd if=/dev/zero of=${encrypted_dir}/file bs=64M count=15 > conv=fdatasync" && > echo "$cmd" && > eval "$cmd" && > ls -ld "${mount_dir}/encrypted/file" && > read -r -a after <"/sys/class/block/${zdev_basename}/stat" && > report_stats "zdev:" "${before[*]}" "${after[*]}" > } >>"$FULL" 2>&1 || fail=true > > umount "${mount_dir}" >>"${FULL}" 2>&1 > [ -n "${trace_pid}" ] && kill "${trace_pid}" > _exit_null_blk > > if [ -z "$fail" ]; then > echo "Test complete" > else > echo "Test failed" > return 1 > fi > } > > -- Damien Le Moal Western Digital Research