> This fell of my radar, I aologize and I was on PTO last week. > Here is the Fedora kernel to install as mentioned > https://people.redhat.com/loberman/customer/.fedora/ > tar hxvf fedora_kernel.tar.xz > rpm -ivh --force --nodeps *.rpm Rocky 9.5 kernel is still faster Fedora 42 kernel. [root@memverge4 ~]# uname -r 6.14.5-300.fc42.x86_64 [root@memverge4 ~]# [root@memverge4 ~]# cat /etc/*release NAME="Rocky Linux" VERSION="9.5 (Blue Onyx)" ID="rocky" ID_LIKE="rhel centos fedora" VERSION_ID="9.5" PLATFORM_ID="platform:el9" PRETTY_NAME="Rocky Linux 9.5 (Blue Onyx)" ANSI_COLOR="0;32" LOGO="fedora-logo-icon" CPE_NAME="cpe:/o:rocky:rocky:9::baseos" HOME_URL="https://rockylinux.org/" VENDOR_NAME="RESF" VENDOR_URL="https://resf.org/" BUG_REPORT_URL="https://bugs.rockylinux.org/" SUPPORT_END="2032-05-31" ROCKY_SUPPORT_PRODUCT="Rocky-Linux-9" ROCKY_SUPPORT_PRODUCT_VERSION="9.5" REDHAT_SUPPORT_PRODUCT="Rocky Linux" REDHAT_SUPPORT_PRODUCT_VERSION="9.5" Rocky Linux release 9.5 (Blue Onyx) Rocky Linux release 9.5 (Blue Onyx) Rocky Linux release 9.5 (Blue Onyx) Block access - [root@memverge4 ~]# fio --name=test --rw=read --bs=256k --filename=/dev/md127 --direct=1 --numjobs=1 --iodepth=64 --exitall --group_reporting --ioengine=libaio --runtime=30 --time_based test: (g=0): rw=read, bs=(R) 256KiB-256KiB, (W) 256KiB-256KiB, (T) 256KiB-256KiB, ioengine=libaio, iodepth=64 fio-3.40 Starting 1 process Jobs: 1 (f=1): [R(1)][100.0%][r=34.7GiB/s][r=142k IOPS][eta 00m:00s] test: (groupid=0, jobs=1): err= 0: pid=3566: Fri May 23 12:20:18 2025 read: IOPS=142k, BW=34.7GiB/s (37.2GB/s)(1040GiB/30001msec) slat (usec): min=3, max=1065, avg= 6.68, stdev= 2.19 clat (usec): min=75, max=2712, avg=443.75, stdev=36.12 lat (usec): min=83, max=2835, avg=450.43, stdev=36.49 File access - [root@memverge4 ~]# mount /dev/md127 /mnt [root@memverge4 ~]# fio --name=test --rw=read --bs=256k --filename=/mnt/testfile --direct=1 --numjobs=1 --iodepth=64 --exitall --group_reporting --ioengine=libaio --runtime=30 --time_based test: (g=0): rw=read, bs=(R) 256KiB-256KiB, (W) 256KiB-256KiB, (T) 256KiB-256KiB, ioengine=libaio, iodepth=64 fio-3.40 Starting 1 process Jobs: 1 (f=1): [R(1)][100.0%][r=41.4GiB/s][r=169k IOPS][eta 00m:00s] test: (groupid=0, jobs=1): err= 0: pid=3666: Fri May 23 12:21:33 2025 read: IOPS=172k, BW=42.1GiB/s (45.2GB/s)(1263GiB/30001msec) slat (usec): min=3, max=1054, avg= 5.46, stdev= 1.81 clat (usec): min=118, max=2500, avg=365.50, stdev=28.08 lat (usec): min=121, max=2794, avg=370.96, stdev=28.35 Back to latest 9.5 kernel (5.14.0-503.40.1.el9_5.x86_64) Block access - [root@memverge4 ~]# fio --name=test --rw=read --bs=256k --filename=/dev/md127 --direct=1 --numjobs=1 --iodepth=64 --exitall --group_reporting --ioengine=libaio --runtime=30 --time_based test: (g=0): rw=read, bs=(R) 256KiB-256KiB, (W) 256KiB-256KiB, (T) 256KiB-256KiB, ioengine=libaio, iodepth=64 fio-3.40 Starting 1 process Jobs: 1 (f=1): [R(1)][100.0%][r=70.8GiB/s][r=290k IOPS][eta 00m:00s] test: (groupid=0, jobs=1): err= 0: pid=6121: Fri May 23 12:35:22 2025 read: IOPS=287k, BW=70.1GiB/s (75.3GB/s)(2104GiB/30001msec) slat (nsec): min=1492, max=165338, avg=3029.64, stdev=1544.70 clat (usec): min=71, max=1069, avg=219.56, stdev=21.22 lat (usec): min=74, max=1233, avg=222.59, stdev=21.34 File access - [root@memverge4 ~]# mount /dev/md127 /mnt [root@memverge4 ~]# fio --name=test --rw=read --bs=256k --filename=/mnt/testfile --direct=1 --numjobs=1 --iodepth=64 --exitall --group_reporting --ioengine=libaio --runtime=30 --time_based test: (g=0): rw=read, bs=(R) 256KiB-256KiB, (W) 256KiB-256KiB, (T) 256KiB-256KiB, ioengine=libaio, iodepth=64 fio-3.40 Starting 1 process Jobs: 1 (f=1): [R(1)][100.0%][r=73.5GiB/s][r=301k IOPS][eta 00m:00s] test: (groupid=0, jobs=1): err= 0: pid=6200: Fri May 23 12:36:47 2025 read: IOPS=301k, BW=73.4GiB/s (78.8GB/s)(2201GiB/30001msec) slat (nsec): min=1443, max=291427, avg=2951.98, stdev=1952.66 clat (usec): min=118, max=1449, avg=209.84, stdev=23.13 lat (usec): min=121, max=1562, avg=212.79, stdev=23.23 Anton чт, 22 мая 2025 г. в 18:08, Laurence Oberman <loberman@xxxxxxxxxx>: > > On Mon, 2025-05-05 at 13:39 -0400, Laurence Oberman wrote: > > On Mon, 2025-05-05 at 09:21 -0400, Laurence Oberman wrote: > > > On Mon, 2025-05-05 at 08:29 -0400, Laurence Oberman wrote: > > > > On Mon, 2025-05-05 at 07:50 +1000, Dave Chinner wrote: > > > > > [cc linux-block] > > > > > > > > > > [original bug report: > > > > > https://lore.kernel.org/linux-xfs/CAAiJnjoo0--yp47UKZhbu8sNSZN6DZ-QzmZBMmtr1oC=fOOgAQ@xxxxxxxxxxxxxx/ > > > > > ] > > > > > > > > > > On Sun, May 04, 2025 at 10:22:58AM +0300, Anton Gavriliuk > > > > > wrote: > > > > > > > What's the comparitive performance of an identical read > > > > > > > profile > > > > > > > directly on the raw MD raid0 device? > > > > > > > > > > > > Rocky 9.5 (5.14.0-503.40.1.el9_5.x86_64) > > > > > > > > > > > > [root@localhost ~]# df -mh /mnt > > > > > > Filesystem Size Used Avail Use% Mounted on > > > > > > /dev/md127 35T 1.3T 34T 4% /mnt > > > > > > > > > > > > [root@localhost ~]# fio --name=test --rw=read --bs=256k > > > > > > --filename=/dev/md127 --direct=1 --numjobs=1 --iodepth=64 -- > > > > > > exitall > > > > > > --group_reporting --ioengine=libaio --runtime=30 --time_based > > > > > > test: (g=0): rw=read, bs=(R) 256KiB-256KiB, (W) 256KiB- > > > > > > 256KiB, > > > > > > (T) > > > > > > 256KiB-256KiB, ioengine=libaio, iodepth=64 > > > > > > fio-3.39-44-g19d9 > > > > > > Starting 1 process > > > > > > Jobs: 1 (f=1): [R(1)][100.0%][r=81.4GiB/s][r=334k IOPS][eta > > > > > > 00m:00s] > > > > > > test: (groupid=0, jobs=1): err= 0: pid=43189: Sun May 4 > > > > > > 08:22:12 > > > > > > 2025 > > > > > > read: IOPS=363k, BW=88.5GiB/s (95.1GB/s)(2656GiB/30001msec) > > > > > > slat (nsec): min=971, max=312380, avg=1817.92, > > > > > > stdev=1367.75 > > > > > > clat (usec): min=78, max=1351, avg=174.46, stdev=28.86 > > > > > > lat (usec): min=80, max=1352, avg=176.27, stdev=28.81 > > > > > > > > > > > > Fedora 42 (6.14.5-300.fc42.x86_64) > > > > > > > > > > > > [root@localhost anton]# df -mh /mnt > > > > > > Filesystem Size Used Avail Use% Mounted on > > > > > > /dev/md127 35T 1.3T 34T 4% /mnt > > > > > > > > > > > > [root@localhost ~]# fio --name=test --rw=read --bs=256k > > > > > > --filename=/dev/md127 --direct=1 --numjobs=1 --iodepth=64 -- > > > > > > exitall > > > > > > --group_reporting --ioengine=libaio --runtime=30 --time_based > > > > > > test: (g=0): rw=read, bs=(R) 256KiB-256KiB, (W) 256KiB- > > > > > > 256KiB, > > > > > > (T) > > > > > > 256KiB-256KiB, ioengine=libaio, iodepth=64 > > > > > > fio-3.39-44-g19d9 > > > > > > Starting 1 process > > > > > > Jobs: 1 (f=1): [R(1)][100.0%][r=41.0GiB/s][r=168k IOPS][eta > > > > > > 00m:00s] > > > > > > test: (groupid=0, jobs=1): err= 0: pid=5685: Sun May 4 > > > > > > 10:14:00 > > > > > > 2025 > > > > > > read: IOPS=168k, BW=41.0GiB/s (44.1GB/s)(1231GiB/30001msec) > > > > > > slat (usec): min=3, max=273, avg= 5.63, stdev= 1.48 > > > > > > clat (usec): min=67, max=2800, avg=374.99, stdev=29.90 > > > > > > lat (usec): min=72, max=2914, avg=380.62, stdev=30.22 > > > > > > > > > > So the MD block device shows the same read performance as the > > > > > filesystem on top of it. That means this is a regression at the > > > > > MD > > > > > device layer or in the block/driver layers below it. i.e. it is > > > > > not > > > > > an XFS of filesystem issue at all. > > > > > > > > > > -Dave. > > > > > > > > I have a lab setup, let me see if I can also reproduce and then > > > > trace > > > > this to see where it is spending the time > > > > > > > > > > > > > Not seeing 1/2 the bandwidth but also significantly slower on > > > Fedora42 > > > kernel. > > > I will trace it > > > > > > 9.5 kernel - 5.14.0-503.40.1.el9_5.x86_64 > > > > > > Run status group 0 (all jobs): > > > READ: bw=14.7GiB/s (15.8GB/s), 14.7GiB/s-14.7GiB/s (15.8GB/s- > > > 15.8GB/s), io=441GiB (473GB), run=30003-30003msec > > > > > > Fedora42 kernel - 6.14.5-300.fc42.x86_64 > > > > > > Run status group 0 (all jobs): > > > READ: bw=10.4GiB/s (11.2GB/s), 10.4GiB/s-10.4GiB/s (11.2GB/s- > > > 11.2GB/s), io=313GiB (336GB), run=30001-30001msec > > > > > > > > > > > > > > > > Fedora42 kernel issue > > > > While my difference is not as severe we do see a consistently lower > > performance on the Fedora > > kernel. (6.14.5-300.fc42.x86_64) > > > > When I remove the software raid and run against a single NVME we > > converge to be much closer. > > Also latest upstream does not show this regression either. > > > > Not sure yet what is in our Fedora kernel causing this. > > We will work it via the Bugzilla > > > > Regards > > Laurence > > > > TLDR > > > > > > Fedora Kernel > > ------------- > > root@penguin9 blktracefedora]# uname -a > > Linux penguin9.2 6.14.5-300.fc42.x86_64 #1 SMP PREEMPT_DYNAMIC Fri > > May > > 2 14:16:46 UTC 2025 x86_64 x86_64 x86_64 GNU/Linux > > > > 5 runs of the fio against /dev/md1 > > > > [root@penguin9 ~]# for i in 1 2 3 4 5 > > > do > > > ./run_fio.sh | grep -A1 "Run status group" > > > done > > Run status group 0 (all jobs): > > READ: bw=11.3GiB/s (12.2GB/s), 11.3GiB/s-11.3GiB/s (12.2GB/s- > > 12.2GB/s), io=679GiB (729GB), run=60001-60001msec > > Run status group 0 (all jobs): > > READ: bw=11.2GiB/s (12.0GB/s), 11.2GiB/s-11.2GiB/s (12.0GB/s- > > 12.0GB/s), io=669GiB (718GB), run=60001-60001msec > > Run status group 0 (all jobs): > > READ: bw=11.4GiB/s (12.2GB/s), 11.4GiB/s-11.4GiB/s (12.2GB/s- > > 12.2GB/s), io=682GiB (733GB), run=60001-60001msec > > Run status group 0 (all jobs): > > READ: bw=11.1GiB/s (11.9GB/s), 11.1GiB/s-11.1GiB/s (11.9GB/s- > > 11.9GB/s), io=664GiB (713GB), run=60001-60001msec > > Run status group 0 (all jobs): > > READ: bw=11.3GiB/s (12.1GB/s), 11.3GiB/s-11.3GiB/s (12.1GB/s- > > 12.1GB/s), io=678GiB (728GB), run=60001-60001msec > > > > RHEL9.5 > > ------------ > > Linux penguin9.2 5.14.0-503.40.1.el9_5.x86_64 #1 SMP PREEMPT_DYNAMIC > > Thu Apr 24 08:27:29 EDT 2025 x86_64 x86_64 x86_64 GNU/Linux > > > > [root@penguin9 ~]# for i in 1 2 3 4 5; do ./run_fio.sh | grep -A1 > > "Run > > status group"; done > > Run status group 0 (all jobs): > > READ: bw=14.9GiB/s (16.0GB/s), 14.9GiB/s-14.9GiB/s (16.0GB/s- > > 16.0GB/s), io=894GiB (960GB), run=60003-60003msec > > Run status group 0 (all jobs): > > READ: bw=14.6GiB/s (15.6GB/s), 14.6GiB/s-14.6GiB/s (15.6GB/s- > > 15.6GB/s), io=873GiB (938GB), run=60003-60003msec > > Run status group 0 (all jobs): > > READ: bw=14.9GiB/s (16.0GB/s), 14.9GiB/s-14.9GiB/s (16.0GB/s- > > 16.0GB/s), io=892GiB (958GB), run=60003-60003msec > > Run status group 0 (all jobs): > > READ: bw=14.5GiB/s (15.6GB/s), 14.5GiB/s-14.5GiB/s (15.6GB/s- > > 15.6GB/s), io=872GiB (936GB), run=60003-60003msec > > Run status group 0 (all jobs): > > READ: bw=14.7GiB/s (15.8GB/s), 14.7GiB/s-14.7GiB/s (15.8GB/s- > > 15.8GB/s), io=884GiB (950GB), run=60003-60003msec > > > > > > Remove software raid from the layers and test just on a single nvme > > --------------------------------------------------------------------- > > - > > > > fio --name=test --rw=read --bs=256k --filename=/dev/nvme23n1 -- > > direct=1 > > --numjobs=1 --iodepth=64 --exitall --group_reporting -- > > ioengine=libaio > > --runtime=60 --time_based > > > > Linux penguin9.2 5.14.0-503.40.1.el9_5.x86_64 #1 SMP PREEMPT_DYNAMIC > > Thu Apr 24 08:27:29 EDT 2025 x86_64 x86_64 x86_64 GNU/Linux > > > > [root@penguin9 ~]# ./run_nvme_fio.sh > > > > Run status group 0 (all jobs): > > READ: bw=3207MiB/s (3363MB/s), 3207MiB/s-3207MiB/s (3363MB/s- > > 3363MB/s), io=188GiB (202GB), run=60005-60005msec > > > > > > Back to fedora kernel > > > > [root@penguin9 ~]# uname -a > > Linux penguin9.2 6.14.5-300.fc42.x86_64 #1 SMP PREEMPT_DYNAMIC Fri > > May > > 2 14:16:46 UTC 2025 x86_64 x86_64 x86_64 GNU/Linux > > > > Within the margin of error > > > > Run status group 0 (all jobs): > > READ: bw=3061MiB/s (3210MB/s), 3061MiB/s-3061MiB/s (3210MB/s- > > 3210MB/s), io=179GiB (193GB), run=60006-60006msec > > > > > > Try recent upstream kernel > > --------------------------- > > [root@penguin9 ~]# uname -a > > Linux penguin9.2 6.13.0-rc7+ #2 SMP PREEMPT_DYNAMIC Mon May 5 > > 10:59:12 > > EDT 2025 x86_64 x86_64 x86_64 GNU/Linux > > > > [root@penguin9 ~]# for i in 1 2 3 4 5; do ./run_fio.sh | grep -A1 > > "Run > > status group"; done > > Run status group 0 (all jobs): > > READ: bw=14.6GiB/s (15.7GB/s), 14.6GiB/s-14.6GiB/s (15.7GB/s- > > 15.7GB/s), io=876GiB (941GB), run=60003-60003msec > > Run status group 0 (all jobs): > > READ: bw=14.8GiB/s (15.9GB/s), 14.8GiB/s-14.8GiB/s (15.9GB/s- > > 15.9GB/s), io=891GiB (957GB), run=60003-60003msec > > Run status group 0 (all jobs): > > READ: bw=14.8GiB/s (15.9GB/s), 14.8GiB/s-14.8GiB/s (15.9GB/s- > > 15.9GB/s), io=890GiB (956GB), run=60003-60003msec > > Run status group 0 (all jobs): > > READ: bw=14.5GiB/s (15.6GB/s), 14.5GiB/s-14.5GiB/s (15.6GB/s- > > 15.6GB/s), io=871GiB (935GB), run=60003-60003msec > > > > > > Update to latest upstream > > ------------------------- > > > > [root@penguin9 ~]# uname -a > > Linux penguin9.2 6.15.0-rc5 #1 SMP PREEMPT_DYNAMIC Mon May 5 > > 12:18:22 > > EDT 2025 x86_64 x86_64 x86_64 GNU/Linux > > > > Single nvme device is once again fine > > > > Run status group 0 (all jobs): > > READ: bw=3061MiB/s (3210MB/s), 3061MiB/s-3061MiB/s (3210MB/s- > > 3210MB/s), io=179GiB (193GB), run=60006-60006msec > > > > > > [root@penguin9 ~]# for i in 1 2 3 4 5; do ./run_fio.sh | grep -A1 > > "Run > > status group"; done > > Run status group 0 (all jobs): > > READ: bw=14.7GiB/s (15.7GB/s), 14.7GiB/s-14.7GiB/s (15.7GB/s- > > 15.7GB/s), io=880GiB (945GB), run=60003-60003msec > > Run status group 0 (all jobs): > > READ: bw=18.1GiB/s (19.4GB/s), 18.1GiB/s-18.1GiB/s (19.4GB/s- > > 19.4GB/s), io=1087GiB (1167GB), run=60003-60003msec > > Run status group 0 (all jobs): > > READ: bw=18.0GiB/s (19.4GB/s), 18.0GiB/s-18.0GiB/s (19.4GB/s- > > 19.4GB/s), io=1082GiB (1162GB), run=60003-60003msec > > Run status group 0 (all jobs): > > READ: bw=18.2GiB/s (19.5GB/s), 18.2GiB/s-18.2GiB/s (19.5GB/s- > > 19.5GB/s), io=1090GiB (1170GB), run=60005-60005msec > > > > > > This fell of my radar, I aologize and I was on PTO last week. > Here is the Fedora kernel to install as mentioned > https://people.redhat.com/loberman/customer/.fedora/ > > tar hxvf fedora_kernel.tar.xz > rpm -ivh --force --nodeps *.rpm >