On Fri, Oct 29, 2021 at 3:54 PM Miklos Szeredi <miklos@xxxxxxxxxx> wrote:
>
> On Fri, Oct 29, 2021 at 01:37:49AM +0300, Amir Goldstein wrote:
> > On Thu, Oct 28, 2021 at 11:41 PM Georg Müller <georgmueller@xxxxxxx> wrote:
> > >
> > > Hi,
> > >
> > > I was trying to implement .tmpfile for overlayfs inode_operations to support O_TMPFILE.
> > >
> > > Docker with aufs supports it, but this is deprecated and removed from current docker. I now have a work-around in my code (create tmpfile+unlink), but
> > > I thought it might be a good idea to have tmpfile support in overlayfs.
> > >
> > > I was trying to do it on my own, but I have some headaches to what is necessary to achieve the goal.
> > >
> > > From my understanding, I have to find the dentry for the upper dir (or workdir) and call vfs_tmpdir() for this, but I am running from oops to oops.
> > >
> > > Is there some hint what I have to do to achieve the goal?
> > >
> >
> > You'd want to use ovl_create_object() and probably pass a tmpfile argument
> > then pass it on struct ovl_cattr to ovl_create_or_link() after that
> > it becomes more complicated. You'd need ovl_create_tempfile() like
> > ovl_create_upper().
> > You can follow xfs_generic_create() for some clues.
> > You need parts of ovl_instantiate() but not all of it - it's a mess.
>
> Here's something I prepared earlier ;)
>
> Don't know why it got stuck, quite possibly I realized some fatal flaw that I
> can't remember anymore...
>
> Seems to work though, so getting this out for review and testing.
>
You may add:
Reviewed-by: Amir Goldstein <amir73il@xxxxxxxxx>
(See one suggestion below)
and
Tested-by: Amir Goldstein <amir73il@xxxxxxxxx>
With this patch, these fstests now run and pass:
generic/004 generic/389
generic/530 and generic/531 also use O_TMPFILE, but they also ran before this
patch because they fall back to creat+unlink when O_TMPFILE fails
generic/530 passes and generic/531 OOMs on my VM with or without this patch.
No regressions observed with -g overlay/quick.
Thanks,
Amir.
>
> ---
> fs/overlayfs/dir.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 122 insertions(+)
>
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -1295,6 +1295,127 @@ static int ovl_rename(struct user_namesp
> return err;
> }
>
> +static int ovl_create_upper_tmpfile(struct dentry *dentry, struct inode *inode,
> + umode_t mode)
> +{
> + struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
> + struct dentry *newdentry;
> + struct ovl_inode_params oip;
> +
> + if (!IS_POSIXACL(d_inode(upperdir)))
> + mode &= ~current_umask();
> +
> + newdentry = vfs_tmpfile(&init_user_ns, upperdir, mode, 0);
> + if (IS_ERR(newdentry))
> + return PTR_ERR(newdentry);
> +
> + oip = (struct ovl_inode_params) {
> + .upperdentry = newdentry,
> + .newinode = inode,
> + };
> +
> + ovl_dentry_set_upper_alias(dentry);
> + ovl_dentry_update_reval(dentry, newdentry,
> + DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
> +
> + /*
> + * ovl_obtain_alias() can be called after ovl_create_real()
> + * and before we get here, so we may get an inode from cache
> + * with the same real upperdentry that is not the inode we
> + * pre-allocated. In this case we will use the cached inode
> + * to instantiate the new dentry.
> + */
> + inode = ovl_get_inode(dentry->d_sb, &oip);
> + if (IS_ERR(inode)) {
> + dput(newdentry);
> + return PTR_ERR(inode);
> + }
> + /* d_tmpfile() expects inode to have a positive link count */
> + set_nlink(inode, 1);
> +
> + d_tmpfile(dentry, inode);
> + if (inode != oip.newinode) {
> + pr_warn_ratelimited("newly created inode found in cache (%pd2)\n",
> + dentry);
> + }
> + return 0;
> +}
> +
> +static int ovl_create_tmpfile(struct dentry *dentry, struct inode *inode,
> + umode_t mode)
> +{
> + int err;
> + const struct cred *old_cred;
> + struct cred *override_cred;
> + struct dentry *parent = dentry->d_parent;
> +
> + err = ovl_copy_up(parent);
> + if (err)
> + return err;
> +
> + old_cred = ovl_override_creds(dentry->d_sb);
> +
> + err = -ENOMEM;
> + override_cred = prepare_creds();
> + if (override_cred) {
> + override_cred->fsuid = inode->i_uid;
> + override_cred->fsgid = inode->i_gid;
> + err = security_dentry_create_files_as(dentry, mode,
> + &dentry->d_name, old_cred,
> + override_cred);
> + if (err) {
> + put_cred(override_cred);
> + goto out_revert_creds;
> + }
> + put_cred(override_creds(override_cred));
> + put_cred(override_cred);
> +
> + err = ovl_create_upper_tmpfile(dentry, inode, mode);
> + }
> +out_revert_creds:
> + revert_creds(old_cred);
> + return err;
> +}
> +
> +
> +static int ovl_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
> + struct dentry *dentry, umode_t mode)
> +{
> + int err;
> + struct inode *inode;
> +
You could add here:
+ if (!OVL_FS(dentry->d_sb)->tmpfile)
+ return -EOPNOTSUPP;
+
> + dentry->d_fsdata = ovl_alloc_entry(0);
> + if (!dentry->d_fsdata)
> + return -ENOMEM;
> +
> + err = ovl_want_write(dentry);
> + if (err)
> + goto out;
> +
> + /* Preallocate inode to be used by ovl_get_inode() */
> + err = -ENOMEM;
> + inode = ovl_new_inode(dentry->d_sb, mode, 0);
> + if (!inode)
> + goto out_drop_write;
> +
> + spin_lock(&inode->i_lock);
> + inode->i_state |= I_CREATING;
> + spin_unlock(&inode->i_lock);
> +
> + inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode);
> + mode = inode->i_mode;
> +
> + err = ovl_create_tmpfile(dentry, inode, mode);
> + /* Did we end up using the preallocated inode? */
> + if (inode != d_inode(dentry))
> + iput(inode);
> +
> +out_drop_write:
> + ovl_drop_write(dentry);
> +out:
> + return err;
> +}
> +
> const struct inode_operations ovl_dir_inode_operations = {
> .lookup = ovl_lookup,
> .mkdir = ovl_mkdir,
> @@ -1313,4 +1434,5 @@ const struct inode_operations ovl_dir_in
> .update_time = ovl_update_time,
> .fileattr_get = ovl_fileattr_get,
> .fileattr_set = ovl_fileattr_set,
> + .tmpfile = ovl_tmpfile,
> };