On Fri, Jul 11, 2025 at 8:32 PM Ibrahim Jirdeh <ibrahimjirdeh@xxxxxxxx> wrote: > > This adds support for responding to events via response identifier. This > prevents races if there are multiple processes backing the same fanotify > group (eg. handover of fanotify group to new instance of a backing daemon). > It is also useful for reporting pre-dir-content events without an > event->fd: > https://lore.kernel.org/linux-fsdevel/2dx3pbcnv5w75fxb2ghqtsk6gzl6cuxmd2rinzwbq7xxfjf5z7@3nqidi3mno46/. > > Rather than introducing a new event identifier field and extending > fanotify_event_metadata, we have opted to overload event->fd and restrict > this functionality to use-cases which are using file handle apis > (FAN_REPORT_FID). > > In terms of how response ids are allocated, we use an ida for allocation > and restrict the id range to below -255 to ensure there is no overlap with > existing fd-as-identifier usage. > > Suggested-by: Amir Goldstein <amir73il@xxxxxxxxx> > Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxheeLXdTLLWrixnTJcxVP+BV4ViXijbvERHPenzgDMUTA@xxxxxxxxxxxxxx/ > Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@xxxxxxxx> Looks nice! Reviewed-by: Amir Goldstein <amir73il@xxxxxxxxx> > --- > fs/notify/fanotify/fanotify.c | 4 ++ > fs/notify/fanotify/fanotify.h | 11 ++++- > fs/notify/fanotify/fanotify_user.c | 63 ++++++++++++++++++++--------- > include/linux/fanotify.h | 1 + > include/linux/fsnotify_backend.h | 1 + > include/uapi/linux/fanotify.h | 11 ++++- > tools/include/uapi/linux/fanotify.h | 11 ++++- > 7 files changed, 78 insertions(+), 24 deletions(-) > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c > index 34acb7c16e8b..307532464226 100644 > --- a/fs/notify/fanotify/fanotify.c > +++ b/fs/notify/fanotify/fanotify.c > @@ -1045,6 +1045,7 @@ static void fanotify_free_group_priv(struct fsnotify_group *group) > { > put_user_ns(group->user_ns); > kfree(group->fanotify_data.merge_hash); > + ida_destroy(&group->response_ida); > if (group->fanotify_data.ucounts) > dec_ucount(group->fanotify_data.ucounts, > UCOUNT_FANOTIFY_GROUPS); > @@ -1106,6 +1107,9 @@ static void fanotify_free_event(struct fsnotify_group *group, > > event = FANOTIFY_E(fsn_event); > put_pid(event->pid); > + if (fanotify_is_perm_event(event->mask) && > + FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID)) > + ida_free(&group->response_ida, -FANOTIFY_PERM(event)->id); > switch (event->type) { > case FANOTIFY_EVENT_TYPE_PATH: > fanotify_free_path_event(event); > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h > index f6d25fcf8692..b6a414f44acc 100644 > --- a/fs/notify/fanotify/fanotify.h > +++ b/fs/notify/fanotify/fanotify.h > @@ -444,7 +444,7 @@ struct fanotify_perm_event { > size_t count; > u32 response; /* userspace answer to the event */ > unsigned short state; /* state of the event */ > - int fd; /* fd we passed to userspace for this event */ > + int id; /* id we passed to userspace for this event */ > union { > struct fanotify_response_info_header hdr; > struct fanotify_response_info_audit_rule audit_rule; > @@ -559,3 +559,12 @@ static inline u32 fanotify_get_response_errno(int res) > { > return (res >> FAN_ERRNO_SHIFT) & FAN_ERRNO_MASK; > } > + > +static inline bool fanotify_is_valid_response_id(struct fsnotify_group *group, > + int id) > +{ > + if (FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID)) > + return id < -255; > + > + return id >= 0; > +} > diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c > index 19d3f2d914fe..99af23b257f9 100644 > --- a/fs/notify/fanotify/fanotify_user.c > +++ b/fs/notify/fanotify/fanotify_user.c > @@ -330,14 +330,19 @@ static int process_access_response(struct fsnotify_group *group, > size_t info_len) > { > struct fanotify_perm_event *event; > - int fd = response_struct->fd; > + int id = response_struct->id; > u32 response = response_struct->response; > int errno = fanotify_get_response_errno(response); > int ret = info_len; > struct fanotify_response_info_audit_rule friar; > > - pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n", > - __func__, group, fd, response, errno, info, info_len); > + BUILD_BUG_ON(sizeof(response_struct->id) != > + sizeof(response_struct->fd)); > + BUILD_BUG_ON(offsetof(struct fanotify_response, id) != > + offsetof(struct fanotify_response, fd)); > + > + pr_debug("%s: group=%p id=%d response=%x errno=%d buf=%p size=%zu\n", > + __func__, group, id, response, errno, info, info_len); > /* > * make sure the response is valid, if invalid we do nothing and either > * userspace can send a valid response or we will clean it up after the > @@ -385,19 +390,18 @@ static int process_access_response(struct fsnotify_group *group, > ret = process_access_response_info(info, info_len, &friar); > if (ret < 0) > return ret; > - if (fd == FAN_NOFD) > + if (id == FAN_NOFD) > return ret; > } else { > ret = 0; > } > - > - if (fd < 0) > + if (!fanotify_is_valid_response_id(group, id)) > return -EINVAL; > > spin_lock(&group->notification_lock); > list_for_each_entry(event, &group->fanotify_data.access_list, > fae.fse.list) { > - if (event->fd != fd) > + if (event->id != id) > continue; > > list_del_init(&event->fae.fse.list); > @@ -765,14 +769,20 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, > task_tgid(current) != event->pid) > metadata.pid = 0; > > - /* > - * For now, fid mode is required for an unprivileged listener and > - * fid mode does not report fd in events. Keep this check anyway > - * for safety in case fid mode requirement is relaxed in the future > - * to allow unprivileged listener to get events with no fd and no fid. > - */ > - if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && > - path && path->mnt && path->dentry) { > + if (FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID)) { > + ret = ida_alloc_min(&group->response_ida, 256, GFP_KERNEL); > + if (ret < 0) > + return ret; > + fd = -ret; > + } else if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && path && > + path->mnt && path->dentry) { > + /* > + * For now, fid mode and no-permission-events class are required for > + * FANOTIFY_UNPRIV listener and fid mode does not report fd in > + * non-permission notification events. Keep this check anyway for > + * safety in case fid mode requirement is relaxed in the future to > + * allow unprivileged listener to get events with no fd and no fid. > + */ > fd = create_fd(group, path, &f); > /* > * Opening an fd from dentry can fail for several reasons. > @@ -803,7 +813,11 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, > } > } > } > - if (FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) > + > + BUILD_BUG_ON(sizeof(metadata.id) != sizeof(metadata.fd)); > + BUILD_BUG_ON(offsetof(struct fanotify_event_metadata, id) != > + offsetof(struct fanotify_event_metadata, fd)); > + if (FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR | FAN_REPORT_RESPONSE_ID)) > metadata.fd = fd; > else > metadata.fd = fd >= 0 ? fd : FAN_NOFD; > @@ -859,7 +873,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, > fd_install(pidfd, pidfd_file); > > if (fanotify_is_perm_event(event->mask)) > - FANOTIFY_PERM(event)->fd = fd; > + FANOTIFY_PERM(event)->id = fd; > > return metadata.event_len; > > @@ -944,7 +958,9 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, > if (!fanotify_is_perm_event(event->mask)) { > fsnotify_destroy_event(group, &event->fse); > } else { > - if (ret <= 0 || FANOTIFY_PERM(event)->fd < 0) { > + if (ret <= 0 || > + !fanotify_is_valid_response_id( > + group, FANOTIFY_PERM(event)->id)) { > spin_lock(&group->notification_lock); > finish_permission_event(group, > FANOTIFY_PERM(event), FAN_DENY, NULL); > @@ -1583,6 +1599,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) > (class | fid_mode) != FAN_CLASS_PRE_CONTENT_FID) > return -EINVAL; > > + /* > + * With group that reports fid info and allows pre-content events, > + * user may request to get a response id instead of event->fd. > + */ > + if ((flags & FAN_REPORT_RESPONSE_ID) && > + (!fid_mode || class == FAN_CLASS_NOTIF)) > + return -EINVAL; > + > /* > * Child name is reported with parent fid so requires dir fid. > * We can report both child fid and dir fid with or without name. > @@ -1660,6 +1684,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) > fd = -EINVAL; > goto out_destroy_group; > } > + ida_init(&group->response_ida); > > BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE)); > if (flags & FAN_UNLIMITED_QUEUE) { > @@ -2145,7 +2170,7 @@ static int __init fanotify_user_setup(void) > FANOTIFY_DEFAULT_MAX_USER_MARKS); > > BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); > - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14); > + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 15); > BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); > > fanotify_mark_cache = KMEM_CACHE(fanotify_mark, > diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h > index 879cff5eccd4..85fce0a15005 100644 > --- a/include/linux/fanotify.h > +++ b/include/linux/fanotify.h > @@ -37,6 +37,7 @@ > FAN_REPORT_TID | \ > FAN_REPORT_PIDFD | \ > FAN_REPORT_FD_ERROR | \ > + FAN_REPORT_RESPONSE_ID | \ > FAN_UNLIMITED_QUEUE | \ > FAN_UNLIMITED_MARKS) > > diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h > index 832d94d783d9..27299d5ca668 100644 > --- a/include/linux/fsnotify_backend.h > +++ b/include/linux/fsnotify_backend.h > @@ -232,6 +232,7 @@ struct fsnotify_group { > unsigned int max_events; /* maximum events allowed on the list */ > enum fsnotify_group_prio priority; /* priority for sending events */ > bool shutdown; /* group is being shut down, don't queue more events */ > + struct ida response_ida; /* used for response id allocation */ > > #define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */ > #define FSNOTIFY_GROUP_DUPS 0x02 /* allow multiple marks per object */ > diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h > index 28074ab3e794..e705dda14dfc 100644 > --- a/include/uapi/linux/fanotify.h > +++ b/include/uapi/linux/fanotify.h > @@ -67,6 +67,7 @@ > #define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ > #define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ > #define FAN_REPORT_MNT 0x00004000 /* Report mount events */ > +#define FAN_REPORT_RESPONSE_ID 0x00008000 /* event->fd is a response id */ > > /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ > #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) > @@ -144,7 +145,10 @@ struct fanotify_event_metadata { > __u8 reserved; > __u16 metadata_len; > __aligned_u64 mask; > - __s32 fd; > + union { > + __s32 fd; > + __s32 id; /* FAN_REPORT_RESPONSE_ID */ > + }; > __s32 pid; > }; > > @@ -228,7 +232,10 @@ struct fanotify_event_info_mnt { > #define FAN_RESPONSE_INFO_AUDIT_RULE 1 > > struct fanotify_response { > - __s32 fd; > + union { > + __s32 fd; > + __s32 id; /* FAN_REPORT_RESPONSE_ID */ > + }; > __u32 response; > }; > > diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h > index e710967c7c26..6a3ada7c4abf 100644 > --- a/tools/include/uapi/linux/fanotify.h > +++ b/tools/include/uapi/linux/fanotify.h > @@ -67,6 +67,7 @@ > #define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ > #define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ > #define FAN_REPORT_MNT 0x00004000 /* Report mount events */ > +#define FAN_REPORT_RESPONSE_ID 0x00008000 /* event->fd is a response id */ > > /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ > #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) > @@ -141,7 +142,10 @@ struct fanotify_event_metadata { > __u8 reserved; > __u16 metadata_len; > __aligned_u64 mask; > - __s32 fd; > + union { > + __s32 fd; > + __s32 id; /* FAN_REPORT_RESPONSE_ID */ > + }; > __s32 pid; > }; > > @@ -225,7 +229,10 @@ struct fanotify_event_info_mnt { > #define FAN_RESPONSE_INFO_AUDIT_RULE 1 > > struct fanotify_response { > - __s32 fd; > + union { > + __s32 fd; > + __s32 id; /* FAN_REPORT_RESPONSE_ID */ > + }; > __u32 response; > }; > > -- > 2.47.1 >