On Thu, Jul 10, 2025 at 10:38 AM Amir Goldstein <amir73il@xxxxxxxxx> wrote: > > On Thu, Jul 10, 2025 at 6:21 AM Ibrahim Jirdeh <ibrahimjirdeh@xxxxxxxx> wrote: > > > > From: ibrahimjirdeh <ibrahimjirdeh@xxxxxx> > > > > Summary: > > Formatting issue. > > > This adds support for responding to events via response identifier. This > > prevents races if there are multiple processes backing the same fanotify > > group (eg. handover of fanotify group to new instance of a backing daemon). > > It is also useful for reporting pre-dir-content events without an > > event->fd: > > https://lore.kernel.org/linux-fsdevel/2dx3pbcnv5w75fxb2ghqtsk6gzl6cuxmd2rinzwbq7xxfjf5z7@3nqidi3mno46/. > > > > Rather than introducing a new event identifier field and extending > > fanotify_event_metadata, we have opted to overload event->fd and restrict > > this functionality to use-cases which are using file handle apis > > (FAN_REPORT_FID). > > > > In terms of how response ids are allocated, we use an idr for allocation > > and restrict the id range to below -255 to ensure there is no overlap with > > existing fd-as-identifier usage. We can also leverage the added idr for > > more efficient lookup when handling response although that is not done > > in this patch. > > > > Suggested-by: Amir Goldstein <amir73il@xxxxxxxxx> > > Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxheeLXdTLLWrixnTJcxVP+BV4ViXijbvERHPenzgDMUTA@xxxxxxxxxxxxxx/ > > Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@xxxxxxxx> > > --- > > fs/notify/fanotify/fanotify.c | 3 + > > fs/notify/fanotify/fanotify.h | 5 +- > > fs/notify/fanotify/fanotify_user.c | 129 ++++++++++++++++++---------- > > include/linux/fanotify.h | 3 +- > > include/linux/fsnotify_backend.h | 1 + > > include/uapi/linux/fanotify.h | 11 ++- > > tools/include/uapi/linux/fanotify.h | 11 ++- > > 7 files changed, 110 insertions(+), 53 deletions(-) > > > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c > > index 34acb7c16e8b..d9aebd359199 100644 > > --- a/fs/notify/fanotify/fanotify.c > > +++ b/fs/notify/fanotify/fanotify.c > > @@ -1106,6 +1106,9 @@ static void fanotify_free_event(struct fsnotify_group *group, > > > > event = FANOTIFY_E(fsn_event); > > put_pid(event->pid); > > + if (fanotify_is_perm_event(event->mask) && > > + FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID)) > > + idr_remove(&group->response_idr, -FANOTIFY_PERM(event)->id); > > switch (event->type) { > > case FANOTIFY_EVENT_TYPE_PATH: > > fanotify_free_path_event(event); > > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h > > index f6d25fcf8692..8d62321237d6 100644 > > --- a/fs/notify/fanotify/fanotify.h > > +++ b/fs/notify/fanotify/fanotify.h > > @@ -444,7 +444,10 @@ struct fanotify_perm_event { > > size_t count; > > u32 response; /* userspace answer to the event */ > > unsigned short state; /* state of the event */ > > - int fd; /* fd we passed to userspace for this event */ > > + union { > > + __s32 fd; /* fd we passed to userspace for this event */ > > + __s32 id; /* FAN_REPORT_RESPONSE_ID */ > > + }; > > That's an "int" and I don't think that we need a union in this internal context. > int id; /* id we passed to userspace for this event */ > is always correct, whether the id is also an fd or not. > > > > union { > > struct fanotify_response_info_header hdr; > > struct fanotify_response_info_audit_rule audit_rule; > > diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c > > index 19d3f2d914fe..b033f86e0db3 100644 > > --- a/fs/notify/fanotify/fanotify_user.c > > +++ b/fs/notify/fanotify/fanotify_user.c > > @@ -330,14 +330,16 @@ static int process_access_response(struct fsnotify_group *group, > > size_t info_len) > > { > > struct fanotify_perm_event *event; > > - int fd = response_struct->fd; > > + int id = FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID) ? > > + response_struct->id : > > + response_struct->fd; > > Not needed. Can always use int id = response_struct->id; > it is never used as an actual fd in this context. > But this function would be a good place to add: > > BUILD_BUG_ON(sizeof(response_struct->id) != sizeof(response_struct->fd)); > BUILD_BUG_ON(offsetof(struct fanotity_response, id) != offsetof(struct > fanotify_repose, fd)); > > to make sure that code can use either alternative safely. > > > u32 response = response_struct->response; > > int errno = fanotify_get_response_errno(response); > > int ret = info_len; > > struct fanotify_response_info_audit_rule friar; > > > > - pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n", > > - __func__, group, fd, response, errno, info, info_len); > > + pr_debug("%s: group=%p id=%d response=%x errno=%d buf=%p size=%zu\n", > > + __func__, group, id, response, errno, info, info_len); > > /* > > * make sure the response is valid, if invalid we do nothing and either > > * userspace can send a valid response or we will clean it up after the > > @@ -385,19 +387,24 @@ static int process_access_response(struct fsnotify_group *group, > > ret = process_access_response_info(info, info_len, &friar); > > if (ret < 0) > > return ret; > > - if (fd == FAN_NOFD) > > + if (id == FAN_NOFD) > > return ret; > > } else { > > ret = 0; > > } > > + if (FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID) && id >= -255) > > + return -EINVAL; > > > > - if (fd < 0) > > + if (!FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID) && id < 0) > > return -EINVAL; > > Please use helper for both cases above: > static inline fanotify_is_valid_response_id(struct fsnotify_group > *group, int id) > > > > > spin_lock(&group->notification_lock); > > list_for_each_entry(event, &group->fanotify_data.access_list, > > fae.fse.list) { > > - if (event->fd != fd) > > + int other_id = FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID) ? > > + event->id : > > + event->fd; > > + if (other_id != id) > > No need. can use if (event->id != id) > > > continue; > > > > list_del_init(&event->fae.fse.list); > > @@ -765,48 +772,58 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, > > task_tgid(current) != event->pid) > > metadata.pid = 0; > > > > - /* > > - * For now, fid mode is required for an unprivileged listener and > > - * fid mode does not report fd in events. Keep this check anyway > > - * for safety in case fid mode requirement is relaxed in the future > > - * to allow unprivileged listener to get events with no fd and no fid. > > - */ > > - if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && > > - path && path->mnt && path->dentry) { > > - fd = create_fd(group, path, &f); > > + if (FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID)) { > > + ret = idr_alloc_cyclic(&group->response_idr, event, 256, INT_MAX, > > + GFP_NOWAIT); > > + if (ret < 0) > > + return ret; > > + metadata.id = -ret; > > + } else { > > Please avoid this unneeded churn and unneeded nesting: > > + if (FAN_GROUP_FLAG(group, FAN_REPORT_RESPONSE_ID)) { > ... > + } else if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && ... > > And I would assign fd = -ret; to make the code below simpler... > > > /* > > - * Opening an fd from dentry can fail for several reasons. > > - * For example, when tasks are gone and we try to open their > > - * /proc files or we try to open a WRONLY file like in sysfs > > - * or when trying to open a file that was deleted on the > > - * remote network server. > > - * > > - * For a group with FAN_REPORT_FD_ERROR, we will send the > > - * event with the error instead of the open fd, otherwise > > - * Userspace may not get the error at all. > > - * In any case, userspace will not know which file failed to > > - * open, so add a debug print for further investigation. > > + * For now, fid mode is required for an unprivileged listener and > > + * fid mode does not report fd in events. Keep this check anyway > > + * for safety in case fid mode requirement is relaxed in the future > > + * to allow unprivileged listener to get events with no fd and no fid. > > */ > > - if (fd < 0) { > > - pr_debug("fanotify: create_fd(%pd2) failed err=%d\n", > > - path->dentry, fd); > > - if (!FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) { > > - /* > > - * Historically, we've handled EOPENSTALE in a > > - * special way and silently dropped such > > - * events. Now we have to keep it to maintain > > - * backward compatibility... > > - */ > > - if (fd == -EOPENSTALE) > > - fd = 0; > > - return fd; > > + if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && path && > > + path->mnt && path->dentry) { > > + fd = create_fd(group, path, &f); > > + /* > > + * Opening an fd from dentry can fail for several reasons. > > + * For example, when tasks are gone and we try to open their > > + * /proc files or we try to open a WRONLY file like in sysfs > > + * or when trying to open a file that was deleted on the > > + * remote network server. > > + * > > + * For a group with FAN_REPORT_FD_ERROR, we will send the > > + * event with the error instead of the open fd, otherwise > > + * Userspace may not get the error at all. > > + * In any case, userspace will not know which file failed to > > + * open, so add a debug print for further investigation. > > + */ > > + if (fd < 0) { > > + pr_debug( > > + "fanotify: create_fd(%pd2) failed err=%d\n", > > + path->dentry, fd); > > + if (!FAN_GROUP_FLAG(group, > > + FAN_REPORT_FD_ERROR)) { > > + /* > > + * Historically, we've handled EOPENSTALE in a > > + * special way and silently dropped such > > + * events. Now we have to keep it to maintain > > + * backward compatibility... > > + */ > > + if (fd == -EOPENSTALE) > > + fd = 0; > > + return fd; > > + } > > } > > } > > + if (FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) > > + metadata.fd = fd; > > + else > > + metadata.fd = fd >= 0 ? fd : FAN_NOFD; > > } > > - if (FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) > > - metadata.fd = fd; > > - else > > - metadata.fd = fd >= 0 ? fd : FAN_NOFD; > > > > IMO, this is shorter and nicer after assigning fd = -ret; above: > > if (FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR | > > FAN_REPORT_RESPONSE_ID)) > metadata.fd = fd; > else > metadata.fd = fd >= 0 ? fd : FAN_NOFD; > And above this code is also a good place to place the build assertion: BUILD_BUG_ON(sizeof(metadata.id) != sizeof(metadata.fd)); BUILD_BUG_ON(offsetof(struct fanotity_event_metadata, id) != offsetof(struct fanotity_event_metadata, fd)); Which provides the justification to use the union fields interchangeably in this simplified code. Thanks, Amir.