[PATCH 3/3] [PATCH v2 3/3] fanotify: introduce event response identifier

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: ibrahimjirdeh <ibrahimjirdeh@xxxxxx>

Summary:
This adds support for responding to events via response identifier. This
prevents races if there are multiple processes backing the same fanotify
group (eg. handover of fanotify group to new instance of a backing daemon).
It is also useful for reporting pre-dir-content events without an
event->fd:
https://lore.kernel.org/linux-fsdevel/2dx3pbcnv5w75fxb2ghqtsk6gzl6cuxmd2rinzwbq7xxfjf5z7@3nqidi3mno46/.

Rather than introducing a new event identifier field and extending
fanotify_event_metadata, we have opted to overload event->fd and restrict
this functionality to use-cases which are using file handle apis
(FAN_REPORT_FID).

In terms of how response ids are allocated, we use an idr for allocation
and restrict the id range to below -255 to ensure there is no overlap with
existing fd-as-identifier usage. We can also leverage the added idr for
more efficient lookup when handling response although that is not done
in this patch.

Suggested-by: Amir Goldstein <amir73il@xxxxxxxxx>
Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxheeLXdTLLWrixnTJcxVP+BV4ViXijbvERHPenzgDMUTA@xxxxxxxxxxxxxx/
Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@xxxxxxxx>
---
 fs/notify/fanotify/fanotify.c       |   3 +
 fs/notify/fanotify/fanotify.h       |   5 +-
 fs/notify/fanotify/fanotify_user.c  | 129 ++++++++++++++++++----------
 include/linux/fanotify.h            |   3 +-
 include/linux/fsnotify_backend.h    |   1 +
 include/uapi/linux/fanotify.h       |  11 ++-
 tools/include/uapi/linux/fanotify.h |  11 ++-
 7 files changed, 110 insertions(+), 53 deletions(-)

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 34acb7c16e8b..d9aebd359199 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -1106,6 +1106,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
 
 	event = FANOTIFY_E(fsn_event);
 	put_pid(event->pid);
+	if (fanotify_is_perm_event(event->mask) &&
+	    FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID))
+		idr_remove(&group->response_idr, -FANOTIFY_PERM(event)->id);
 	switch (event->type) {
 	case FANOTIFY_EVENT_TYPE_PATH:
 		fanotify_free_path_event(event);
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index f6d25fcf8692..8d62321237d6 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -444,7 +444,10 @@ struct fanotify_perm_event {
 	size_t count;
 	u32 response;			/* userspace answer to the event */
 	unsigned short state;		/* state of the event */
-	int fd;		/* fd we passed to userspace for this event */
+	union {
+		__s32 fd;			/* fd we passed to userspace for this event */
+		__s32 id;			/* FAN_REPORT_RESPONSE_ID */
+	};
 	union {
 		struct fanotify_response_info_header hdr;
 		struct fanotify_response_info_audit_rule audit_rule;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 19d3f2d914fe..b033f86e0db3 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -330,14 +330,16 @@ static int process_access_response(struct fsnotify_group *group,
 				   size_t info_len)
 {
 	struct fanotify_perm_event *event;
-	int fd = response_struct->fd;
+	int id = FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID) ?
+			 response_struct->id :
+			 response_struct->fd;
 	u32 response = response_struct->response;
 	int errno = fanotify_get_response_errno(response);
 	int ret = info_len;
 	struct fanotify_response_info_audit_rule friar;
 
-	pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n",
-		 __func__, group, fd, response, errno, info, info_len);
+	pr_debug("%s: group=%p id=%d response=%x errno=%d buf=%p size=%zu\n",
+		 __func__, group, id, response, errno, info, info_len);
 	/*
 	 * make sure the response is valid, if invalid we do nothing and either
 	 * userspace can send a valid response or we will clean it up after the
@@ -385,19 +387,24 @@ static int process_access_response(struct fsnotify_group *group,
 		ret = process_access_response_info(info, info_len, &friar);
 		if (ret < 0)
 			return ret;
-		if (fd == FAN_NOFD)
+		if (id == FAN_NOFD)
 			return ret;
 	} else {
 		ret = 0;
 	}
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID) && id >= -255)
+		return -EINVAL;
 
-	if (fd < 0)
+	if (!FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID) && id < 0)
 		return -EINVAL;
 
 	spin_lock(&group->notification_lock);
 	list_for_each_entry(event, &group->fanotify_data.access_list,
 			    fae.fse.list) {
-		if (event->fd != fd)
+		int other_id = FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID) ?
+				       event->id :
+				       event->fd;
+		if (other_id != id)
 			continue;
 
 		list_del_init(&event->fae.fse.list);
@@ -765,48 +772,58 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 	    task_tgid(current) != event->pid)
 		metadata.pid = 0;
 
-	/*
-	 * For now, fid mode is required for an unprivileged listener and
-	 * fid mode does not report fd in events.  Keep this check anyway
-	 * for safety in case fid mode requirement is relaxed in the future
-	 * to allow unprivileged listener to get events with no fd and no fid.
-	 */
-	if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) &&
-	    path && path->mnt && path->dentry) {
-		fd = create_fd(group, path, &f);
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID)) {
+		ret = idr_alloc_cyclic(&group->response_idr, event, 256, INT_MAX,
+				       GFP_NOWAIT);
+		if (ret < 0)
+			return ret;
+		metadata.id = -ret;
+	} else {
 		/*
-		 * Opening an fd from dentry can fail for several reasons.
-		 * For example, when tasks are gone and we try to open their
-		 * /proc files or we try to open a WRONLY file like in sysfs
-		 * or when trying to open a file that was deleted on the
-		 * remote network server.
-		 *
-		 * For a group with FAN_REPORT_FD_ERROR, we will send the
-		 * event with the error instead of the open fd, otherwise
-		 * Userspace may not get the error at all.
-		 * In any case, userspace will not know which file failed to
-		 * open, so add a debug print for further investigation.
+		 * For now, fid mode is required for an unprivileged listener and
+		 * fid mode does not report fd in events.  Keep this check anyway
+		 * for safety in case fid mode requirement is relaxed in the future
+		 * to allow unprivileged listener to get events with no fd and no fid.
 		 */
-		if (fd < 0) {
-			pr_debug("fanotify: create_fd(%pd2) failed err=%d\n",
-				 path->dentry, fd);
-			if (!FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) {
-				/*
-				 * Historically, we've handled EOPENSTALE in a
-				 * special way and silently dropped such
-				 * events. Now we have to keep it to maintain
-				 * backward compatibility...
-				 */
-				if (fd == -EOPENSTALE)
-					fd = 0;
-				return fd;
+		if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && path &&
+		    path->mnt && path->dentry) {
+			fd = create_fd(group, path, &f);
+			/*
+			 * Opening an fd from dentry can fail for several reasons.
+			 * For example, when tasks are gone and we try to open their
+			 * /proc files or we try to open a WRONLY file like in sysfs
+			 * or when trying to open a file that was deleted on the
+			 * remote network server.
+			 *
+			 * For a group with FAN_REPORT_FD_ERROR, we will send the
+			 * event with the error instead of the open fd, otherwise
+			 * Userspace may not get the error at all.
+			 * In any case, userspace will not know which file failed to
+			 * open, so add a debug print for further investigation.
+			 */
+			if (fd < 0) {
+				pr_debug(
+					"fanotify: create_fd(%pd2) failed err=%d\n",
+					path->dentry, fd);
+				if (!FAN_GROUP_FLAG(group,
+						    FAN_REPORT_FD_ERROR)) {
+					/*
+					 * Historically, we've handled EOPENSTALE in a
+					 * special way and silently dropped such
+					 * events. Now we have to keep it to maintain
+					 * backward compatibility...
+					 */
+					if (fd == -EOPENSTALE)
+						fd = 0;
+					return fd;
+				}
 			}
 		}
+		if (FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR))
+			metadata.fd = fd;
+		else
+			metadata.fd = fd >= 0 ? fd : FAN_NOFD;
 	}
-	if (FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR))
-		metadata.fd = fd;
-	else
-		metadata.fd = fd >= 0 ? fd : FAN_NOFD;
 
 	if (pidfd_mode) {
 		/*
@@ -858,8 +875,12 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 	if (pidfd_file)
 		fd_install(pidfd, pidfd_file);
 
-	if (fanotify_is_perm_event(event->mask))
-		FANOTIFY_PERM(event)->fd = fd;
+	if (fanotify_is_perm_event(event->mask)) {
+		if (FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID))
+			FANOTIFY_PERM(event)->id = metadata.id;
+		else
+			FANOTIFY_PERM(event)->fd = fd;
+	}
 
 	return metadata.event_len;
 
@@ -944,7 +965,11 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 		if (!fanotify_is_perm_event(event->mask)) {
 			fsnotify_destroy_event(group, &event->fse);
 		} else {
-			if (ret <= 0 || FANOTIFY_PERM(event)->fd < 0) {
+			if (ret <= 0 ||
+			    ((FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID) &&
+			      FANOTIFY_PERM(event)->id >= -255) ||
+			     (!FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID) &&
+			      FANOTIFY_PERM(event)->fd < 0))) {
 				spin_lock(&group->notification_lock);
 				finish_permission_event(group,
 					FANOTIFY_PERM(event), FAN_DENY, NULL);
@@ -1584,6 +1609,15 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		return -EINVAL;
 
 	/*
+     * With group that reports fid info and allows only pre-content events,
+     * user may request to get a response id instead of event->fd.
+     * FAN_REPORT_FD_ERROR does not make sense in this case.
+     */
+	if ((flags & FAN_REPORT_RESPONSE_ID) &&
+	    (!fid_mode || class == FAN_CLASS_NOTIF))
+		return -EINVAL;
+
+	/*
 	 * Child name is reported with parent fid so requires dir fid.
 	 * We can report both child fid and dir fid with or without name.
 	 */
@@ -1660,6 +1694,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		fd = -EINVAL;
 		goto out_destroy_group;
 	}
+	idr_init(&group->response_idr);
 
 	BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE));
 	if (flags & FAN_UNLIMITED_QUEUE) {
@@ -2145,7 +2180,7 @@ static int __init fanotify_user_setup(void)
 				     FANOTIFY_DEFAULT_MAX_USER_MARKS);
 
 	BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
-	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 15);
 	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
 
 	fanotify_mark_cache = KMEM_CACHE(fanotify_mark,
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 879cff5eccd4..4463fcfd16bb 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -38,7 +38,8 @@
 					 FAN_REPORT_PIDFD | \
 					 FAN_REPORT_FD_ERROR | \
 					 FAN_UNLIMITED_QUEUE | \
-					 FAN_UNLIMITED_MARKS)
+					 FAN_UNLIMITED_MARKS | \
+					 FAN_REPORT_RESPONSE_ID)
 
 /*
  * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 832d94d783d9..83c82331866b 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -232,6 +232,7 @@ struct fsnotify_group {
 	unsigned int max_events;		/* maximum events allowed on the list */
 	enum fsnotify_group_prio priority;	/* priority for sending events */
 	bool shutdown;		/* group is being shut down, don't queue more events */
+	struct idr response_idr; /* used for response id allocation */
 
 #define FSNOTIFY_GROUP_USER	0x01 /* user allocated group */
 #define FSNOTIFY_GROUP_DUPS	0x02 /* allow multiple marks per object */
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 3201d8d6c51a..04bbbfb5f7f3 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -67,6 +67,7 @@
 #define FAN_REPORT_TARGET_FID	0x00001000	/* Report dirent target id  */
 #define FAN_REPORT_FD_ERROR	0x00002000	/* event->fd can report error */
 #define FAN_REPORT_MNT		0x00004000	/* Report mount events */
+#define FAN_REPORT_RESPONSE_ID		0x00008000 /* event->fd is a response id */
 
 /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
 #define FAN_REPORT_DFID_NAME	(FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
@@ -143,7 +144,10 @@ struct fanotify_event_metadata {
 	__u8 reserved;
 	__u16 metadata_len;
 	__aligned_u64 mask;
-	__s32 fd;
+	union {
+		__s32 fd;
+		__s32 id; /* FAN_REPORT_RESPONSE_ID */
+	};
 	__s32 pid;
 };
 
@@ -227,7 +231,10 @@ struct fanotify_event_info_mnt {
 #define FAN_RESPONSE_INFO_AUDIT_RULE	1
 
 struct fanotify_response {
-	__s32 fd;
+	union {
+		__s32 fd;
+		__s32 id; /* FAN_REPORT_RESPONSE_ID */
+	};
 	__u32 response;
 };
 
diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h
index 3201d8d6c51a..04bbbfb5f7f3 100644
--- a/tools/include/uapi/linux/fanotify.h
+++ b/tools/include/uapi/linux/fanotify.h
@@ -67,6 +67,7 @@
 #define FAN_REPORT_TARGET_FID	0x00001000	/* Report dirent target id  */
 #define FAN_REPORT_FD_ERROR	0x00002000	/* event->fd can report error */
 #define FAN_REPORT_MNT		0x00004000	/* Report mount events */
+#define FAN_REPORT_RESPONSE_ID		0x00008000 /* event->fd is a response id */
 
 /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
 #define FAN_REPORT_DFID_NAME	(FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
@@ -143,7 +144,10 @@ struct fanotify_event_metadata {
 	__u8 reserved;
 	__u16 metadata_len;
 	__aligned_u64 mask;
-	__s32 fd;
+	union {
+		__s32 fd;
+		__s32 id; /* FAN_REPORT_RESPONSE_ID */
+	};
 	__s32 pid;
 };
 
@@ -227,7 +231,10 @@ struct fanotify_event_info_mnt {
 #define FAN_RESPONSE_INFO_AUDIT_RULE	1
 
 struct fanotify_response {
-	__s32 fd;
+	union {
+		__s32 fd;
+		__s32 id; /* FAN_REPORT_RESPONSE_ID */
+	};
 	__u32 response;
 };
 
-- 
2.47.1






[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux