[RFC PATCH] fanotify: add watchdog for permission events

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is to make it easier to debug issues with AV software, which time and
again deadlocks with no indication of where the issue comes from, and the
kernel being blamed for the deadlock.  Then we need to analyze dumps to
prove that the kernel is not in fact at fault.

With this patch a warning is printed when permission event is received by
userspace but not answered for more than 20 seconds.

The timeout is very coarse (20-40s) but I guess it's good enough for the
purpose.

Overhead should be minimal.

Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx>
---
 fs/notify/fanotify/Kconfig         |   5 ++
 fs/notify/fanotify/fanotify.h      |   6 +-
 fs/notify/fanotify/fanotify_user.c | 102 +++++++++++++++++++++++++++++
 include/linux/fsnotify_backend.h   |   4 ++
 4 files changed, 116 insertions(+), 1 deletion(-)

diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index 0e36aaf379b7..eeb9c443254e 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -24,3 +24,8 @@ config FANOTIFY_ACCESS_PERMISSIONS
 	   hierarchical storage management systems.
 
 	   If unsure, say N.
+
+config FANOTIFY_PERM_WATCHDOG
+       bool "fanotify permission event watchdog"
+       depends on FANOTIFY_ACCESS_PERMISSIONS
+       default n
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index b44e70e44be6..8b60fbb9594f 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -438,10 +438,14 @@ FANOTIFY_ME(struct fanotify_event *event)
 struct fanotify_perm_event {
 	struct fanotify_event fae;
 	struct path path;
-	const loff_t *ppos;		/* optional file range info */
+	union {
+		const loff_t *ppos;	/* optional file range info */
+		pid_t pid;		/* pid of task processing the event */
+	};
 	size_t count;
 	u32 response;			/* userspace answer to the event */
 	unsigned short state;		/* state of the event */
+	unsigned short watchdog_cnt;	/* already scanned by watchdog? */
 	int fd;		/* fd we passed to userspace for this event */
 	union {
 		struct fanotify_response_info_header hdr;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 87f861e9004f..a9a34da2c864 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -95,6 +95,96 @@ static void __init fanotify_sysctls_init(void)
 #define fanotify_sysctls_init() do { } while (0)
 #endif /* CONFIG_SYSCTL */
 
+#ifdef CONFIG_FANOTIFY_PERM_WATCHDOG
+static LIST_HEAD(perm_group_list);
+static DEFINE_SPINLOCK(perm_group_lock);
+static void perm_group_watchdog(struct work_struct *work);
+static DECLARE_DELAYED_WORK(perm_group_work, perm_group_watchdog);
+static unsigned int perm_group_timeout = 20;
+
+static void perm_group_watchdog_schedule(void)
+{
+	schedule_delayed_work(&perm_group_work, secs_to_jiffies(perm_group_timeout));
+}
+
+static void perm_group_watchdog(struct work_struct *work)
+{
+	struct fsnotify_group *group;
+	struct fanotify_perm_event *event;
+	struct task_struct *task;
+	pid_t failed_pid = 0;
+
+	guard(spinlock)(&perm_group_lock);
+	if (list_empty(&perm_group_list))
+		return;
+
+	list_for_each_entry(group, &perm_group_list, fanotify_data.perm_group) {
+		/*
+		 * Ok to test without lock, racing with an addition is
+		 * fine, will deal with it next round
+		 */
+		if (list_empty(&group->fanotify_data.access_list))
+			continue;
+
+		scoped_guard(spinlock, &group->notification_lock) {
+			list_for_each_entry(event, &group->fanotify_data.access_list, fae.fse.list) {
+				if (likely(event->watchdog_cnt == 0)) {
+					event->watchdog_cnt = 1;
+				} else if (event->watchdog_cnt == 1) {
+					/* Report on event only once */
+					event->watchdog_cnt = 2;
+
+					/* Do not report same pid repeatedly */
+					if (event->pid == failed_pid)
+						continue;
+
+					failed_pid = event->pid;
+					rcu_read_lock();
+					task = find_task_by_pid_ns(event->pid, &init_pid_ns);
+					pr_warn_ratelimited("PID %u (%s) failed to respond to fanotify queue for more than %i seconds\n",
+							    event->pid, task ? task->comm : NULL, perm_group_timeout);
+					rcu_read_unlock();
+				}
+			}
+		}
+	}
+	perm_group_watchdog_schedule();
+}
+
+static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group)
+{
+	if (!list_empty(&group->fanotify_data.perm_group)) {
+		/* Perm event watchdog can no longer scan this group. */
+		spin_lock(&perm_group_lock);
+		list_del(&group->fanotify_data.perm_group);
+		spin_unlock(&perm_group_lock);
+	}
+}
+
+static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group)
+{
+	if (list_empty(&group->fanotify_data.perm_group)) {
+		/* Add to perm_group_list for monitoring by watchdog. */
+		spin_lock(&perm_group_lock);
+		if (list_empty(&perm_group_list))
+			perm_group_watchdog_schedule();
+		list_add_tail(&group->fanotify_data.perm_group, &perm_group_list);
+		spin_unlock(&perm_group_lock);
+	}
+}
+
+#else
+
+static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group)
+{
+}
+
+static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group)
+{
+}
+
+#endif
+
 /*
  * All flags that may be specified in parameter event_f_flags of fanotify_init.
  *
@@ -210,6 +300,8 @@ static void fanotify_unhash_event(struct fsnotify_group *group,
 	hlist_del_init(&event->merge_list);
 }
 
+
+
 /*
  * Get an fanotify notification event if one exists and is small
  * enough to fit in "count". Return an error pointer if the count
@@ -953,6 +1045,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 				spin_lock(&group->notification_lock);
 				list_add_tail(&event->fse.list,
 					&group->fanotify_data.access_list);
+				FANOTIFY_PERM(event)->pid = current->pid;
 				spin_unlock(&group->notification_lock);
 			}
 		}
@@ -1012,6 +1105,8 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	 */
 	fsnotify_group_stop_queueing(group);
 
+	fanotify_perm_watchdog_group_remove(group);
+
 	/*
 	 * Process all permission events on access_list and notification queue
 	 * and simulate reply from userspace.
@@ -1464,6 +1559,10 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 	fsnotify_group_unlock(group);
 
 	fsnotify_put_mark(fsn_mark);
+
+	if (!ret && (mask & FANOTIFY_PERM_EVENTS))
+		fanotify_perm_watchdog_group_add(group);
+
 	return ret;
 }
 
@@ -1622,6 +1721,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	group->fanotify_data.f_flags = event_f_flags;
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
+#ifdef CONFIG_FANOTIFY_PERM_WATCHDOG
+	INIT_LIST_HEAD(&group->fanotify_data.perm_group);
+#endif
 	switch (class) {
 	case FAN_CLASS_NOTIF:
 		group->priority = FSNOTIFY_PRIO_NORMAL;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index fc27b53c58c2..5276fb3ada34 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -272,6 +272,10 @@ struct fsnotify_group {
 			int f_flags; /* event_f_flags from fanotify_init() */
 			struct ucounts *ucounts;
 			mempool_t error_events_pool;
+#ifdef CONFIG_FANOTIFY_PERM_WATCHDOG
+			/* chained on perm_group_list */
+			struct list_head perm_group;
+#endif
 		} fanotify_data;
 #endif /* CONFIG_FANOTIFY */
 	};
-- 
2.49.0





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux