On 4/23/25 16:55, Jens Axboe wrote:
Something like this, perhaps - it'll ensure that io-wq workers get a
chance to flush out pending work, which should prevent the looping. I've
attached a basic test case. It'll issue a write that will fault, and
then try and cancel that as a way to trigger the TIF_NOTIFY_SIGNAL based
looping.
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index d80f94346199..e18926dbf20a 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -32,6 +32,7 @@
#include <linux/swapops.h>
#include <linux/miscdevice.h>
#include <linux/uio.h>
+#include <linux/io_uring.h>
static int sysctl_unprivileged_userfaultfd __read_mostly;
@@ -376,6 +377,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
*/
if (current->flags & (PF_EXITING|PF_DUMPCORE))
goto out;
+ else if (current->flags & PF_IO_WORKER)
+ io_worker_fault();
assert_fault_locked(vmf);
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 85fe4e6b275c..d93dd7402a28 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -28,6 +28,7 @@ static inline void io_uring_free(struct task_struct *tsk)
if (tsk->io_uring)
__io_uring_free(tsk);
}
+void io_worker_fault(void);
#else
static inline void io_uring_task_cancel(void)
{
@@ -46,6 +47,9 @@ static inline bool io_is_uring_fops(struct file *file)
{
return false;
}
+static inline void io_worker_fault(void)
+{
+}
#endif
#endif
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index d52069b1177b..f74bea028ec7 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -1438,3 +1438,13 @@ static __init int io_wq_init(void)
return 0;
}
subsys_initcall(io_wq_init);
+
+void io_worker_fault(void)
+{
+ if (test_thread_flag(TIF_NOTIFY_SIGNAL))
+ clear_notify_signal();
+ if (test_thread_flag(TIF_NOTIFY_RESUME))
+ resume_user_mode_work(NULL);
+ if (task_work_pending(current))
+ task_work_run();
Looking at the stacktrace, that sounds dangerous
iou-wrk-44588 [kernel.kallsyms] [k] io_wq_worker
iou-wrk-44588 [kernel.kallsyms] [k] io_worker_handle_work
iou-wrk-44588 [kernel.kallsyms] [k] io_wq_submit_work
iou-wrk-44588 [kernel.kallsyms] [k] io_issue_sqe
iou-wrk-44588 [kernel.kallsyms] [k] io_write
iou-wrk-44588 [kernel.kallsyms] [k] blkdev_write_iter
iou-wrk-44588 [kernel.kallsyms] [k] iomap_file_buffered_write
iou-wrk-44588 [kernel.kallsyms] [k] iomap_write_iter
iou-wrk-44588 [kernel.kallsyms] [k] fault_in_iov_iter_readable
iou-wrk-44588 [kernel.kallsyms] [k] fault_in_readable
iou-wrk-44588 [kernel.kallsyms] [k] asm_exc_page_fault
iou-wrk-44588 [kernel.kallsyms] [k] exc_page_fault
iou-wrk-44588 [kernel.kallsyms] [k] do_user_addr_fault
iou-wrk-44588 [kernel.kallsyms] [k] handle_mm_fault
iou-wrk-44588 [kernel.kallsyms] [k] hugetlb_fault
iou-wrk-44588 [kernel.kallsyms] [k] hugetlb_no_page
iou-wrk-44588 [kernel.kallsyms] [k] hugetlb_handle_userfault
iou-wrk-44588 [kernel.kallsyms] [k] handle_userfault
It might be holding a good bunch of locks, and then it's trapped
in a page fault handler. Do normal / non-PF_IO_WORKER tasks run
task_work from handle_userfault?
--
Pavel Begunkov