An unprivileged user is allowed to create an fanotify group and add inode marks, but not filesystem, mntns and mount marks. Add limited support for setting up filesystem, mntns and mount marks by an unprivileged user under the following conditions: 1. User has CAP_SYS_ADMIN in the user ns where the group was created 2.a. User has CAP_SYS_ADMIN in the user ns where the filesystem was mounted (implies FS_USERNS_MOUNT) OR (in case setting up a mntns or mount mark) 2.b. User has CAP_SYS_ADMIN in the user ns associated with the mntns Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> --- fs/notify/fanotify/fanotify.c | 1 + fs/notify/fanotify/fanotify_user.c | 36 +++++++++++++++++++++--------- include/linux/fanotify.h | 5 ++--- include/linux/fsnotify_backend.h | 1 + 4 files changed, 30 insertions(+), 13 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 6d386080faf2..060d9bee34bd 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -1009,6 +1009,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, static void fanotify_free_group_priv(struct fsnotify_group *group) { + put_user_ns(group->user_ns); kfree(group->fanotify_data.merge_hash); if (group->fanotify_data.ucounts) dec_ucount(group->fanotify_data.ucounts, diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 471c57832357..b4255b661bda 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1499,6 +1499,7 @@ static struct hlist_head *fanotify_alloc_merge_hash(void) /* fanotify syscalls */ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) { + struct user_namespace *user_ns = current_user_ns(); struct fsnotify_group *group; int f_flags, fd; unsigned int fid_mode = flags & FANOTIFY_FID_BITS; @@ -1513,10 +1514,11 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) /* * An unprivileged user can setup an fanotify group with * limited functionality - an unprivileged group is limited to - * notification events with file handles and it cannot use - * unlimited queue/marks. + * notification events with file handles or mount ids and it + * cannot use unlimited queue/marks. */ - if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) + if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || + !(flags & (FANOTIFY_FID_BITS | FAN_REPORT_MNT))) return -EPERM; /* @@ -1595,8 +1597,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) } /* Enforce groups limits per user in all containing user ns */ - group->fanotify_data.ucounts = inc_ucount(current_user_ns(), - current_euid(), + group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(), UCOUNT_FANOTIFY_GROUPS); if (!group->fanotify_data.ucounts) { fd = -EMFILE; @@ -1605,6 +1606,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.flags = flags | internal_flags; group->memcg = get_mem_cgroup_from_mm(current->mm); + group->user_ns = get_user_ns(user_ns); group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); if (!group->fanotify_data.merge_hash) { @@ -1804,6 +1806,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct path path; struct fan_fsid __fsid, *fsid = NULL; + struct user_namespace *user_ns = NULL; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; @@ -1897,12 +1900,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } /* - * An unprivileged user is not allowed to setup mount nor filesystem - * marks. This also includes setting up such marks by a group that - * was initialized by an unprivileged user. + * A user is allowed to setup sb/mount/mntns marks only if it is + * capable in the user ns where the group was created. */ - if ((!capable(CAP_SYS_ADMIN) || - FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && + if (!ns_capable(group->user_ns, CAP_SYS_ADMIN) && mark_type != FAN_MARK_INODE) return -EPERM; @@ -1987,12 +1988,27 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, obj = inode; } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { obj = path.mnt; + user_ns = real_mount(obj)->mnt_ns->user_ns; } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { obj = path.mnt->mnt_sb; + user_ns = path.mnt->mnt_sb->s_user_ns; } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { obj = mnt_ns_from_dentry(path.dentry); + user_ns = ((struct mnt_namespace *)obj)->user_ns; } + /* + * In addition to being capable in the user ns where group was created, + * the user also needs to be capable in the user ns associated with + * the marked filesystem (for FS_USERNS_MOUNT filesystems) or in the + * user ns associated with the mntns (when marking a mount or mntns). + * This is aligned with the required permissions to open_by_handle_at() + * a directory fid provided with the events. + */ + ret = -EPERM; + if (user_ns && !ns_capable(user_ns, CAP_SYS_ADMIN)) + goto path_put_and_out; + ret = -EINVAL; if (!obj) goto path_put_and_out; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 3c817dc6292e..879cff5eccd4 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -38,8 +38,7 @@ FAN_REPORT_PIDFD | \ FAN_REPORT_FD_ERROR | \ FAN_UNLIMITED_QUEUE | \ - FAN_UNLIMITED_MARKS | \ - FAN_REPORT_MNT) + FAN_UNLIMITED_MARKS) /* * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN. @@ -48,7 +47,7 @@ * so one of the flags for reporting file handles is required. */ #define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \ - FANOTIFY_FID_BITS | \ + FANOTIFY_FID_BITS | FAN_REPORT_MNT | \ FAN_CLOEXEC | FAN_NONBLOCK) #define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index fc27b53c58c2..d4034ddaf392 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -250,6 +250,7 @@ struct fsnotify_group { * full */ struct mem_cgroup *memcg; /* memcg to charge allocations */ + struct user_namespace *user_ns; /* user ns where group was created */ /* groups can define private fields here or use the void *private */ union { -- 2.34.1