Re: [PATCH 1/2] fs: add a FMODE_ flag to indicate IOCB_HAS_METADATA availability

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Aug 19, 2025 at 03:34:47PM +0200, Christoph Hellwig wrote:
> On Tue, Aug 19, 2025 at 12:14:26PM +0200, Christian Brauner wrote:
> > On Tue, Aug 19, 2025 at 11:22:19AM +0200, Christoph Hellwig wrote:
> > > On Tue, Aug 19, 2025 at 11:14:41AM +0200, Christian Brauner wrote:
> > > > It kind of feels like that f_iocb_flags should be changed so that
> > > > subsystems like block can just raise some internal flags directly
> > > > instead of grabbing a f_mode flag everytime they need to make some
> > > > IOCB_* flag conditional on the file. That would mean changing the
> > > > unconditional assigment to file->f_iocb_flags to a |= to not mask flags
> > > > raised by the kernel itself.
> > > 
> > > This isn't about block.  I will be setting this for a file system
> > > operation as well and use the same io_uring code for that.  That's
> > > how I ran into the issue.
> > 
> > Yes, I get that. That's not what this is about. If IOCB_* flags keep
> > getting added that then need an additional opt-out via an FMODE_* flag
> > it's very annoying because you keep taking FMODE_* bits.
> 
> Agreed.
> 
> > The thing is
> > that it should be possible to keep that information completely contained
> > to f_iocb_flags without polluting f_mode.
> 
> I don't really understand how that would work.  The basic problem is that
> we add optional features/flags to read and write, and we need a way to
> check that they are supported and reject them without each time having
> to update all instances.  For that VFS-level code needs some way to do
> a per-instance check of available features.

I meant something like this which should effectively be the same thing
just that we move the burden of having to use two bits completely into
file->f_iocb_flags instead of wasting a file->f_mode bit:

diff --git a/block/fops.c b/block/fops.c
index ddbc69c0922b..a90f1127d035 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -689,7 +689,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
        if (bdev_can_atomic_write(bdev))
                filp->f_mode |= FMODE_CAN_ATOMIC_WRITE;
        if (blk_get_integrity(bdev->bd_disk))
-               filp->f_mode |= FMODE_HAS_METADATA;
+               filp->f_iocb_flags |= IOCB_MAY_USE_METADATA;

        ret = bdev_open(bdev, mode, filp->private_data, NULL, filp);
        if (ret)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 601d036a6c78..a40a1bf7bad5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -149,9 +149,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* Expect random access pattern */
 #define FMODE_RANDOM           ((__force fmode_t)(1 << 12))

-/* Supports IOCB_HAS_METADATA */
-#define FMODE_HAS_METADATA     ((__force fmode_t)(1 << 13))
-
 /* File is opened with O_PATH; almost nothing can be done with it */
 #define FMODE_PATH             ((__force fmode_t)(1 << 14))

@@ -384,25 +381,27 @@ struct readahead_control;
 /* kiocb is a read or write operation submitted by fs/aio.c. */
 #define IOCB_AIO_RW            (1 << 23)
 #define IOCB_HAS_METADATA      (1 << 24)
+#define IOCB_MAY_USE_METADATA  (1 << 25)

 /* for use in trace events */
 #define TRACE_IOCB_STRINGS \
-       { IOCB_HIPRI,           "HIPRI" }, \
-       { IOCB_DSYNC,           "DSYNC" }, \
-       { IOCB_SYNC,            "SYNC" }, \
-       { IOCB_NOWAIT,          "NOWAIT" }, \
-       { IOCB_APPEND,          "APPEND" }, \
-       { IOCB_ATOMIC,          "ATOMIC" }, \
-       { IOCB_DONTCACHE,       "DONTCACHE" }, \
-       { IOCB_EVENTFD,         "EVENTFD"}, \
-       { IOCB_DIRECT,          "DIRECT" }, \
-       { IOCB_WRITE,           "WRITE" }, \
-       { IOCB_WAITQ,           "WAITQ" }, \
-       { IOCB_NOIO,            "NOIO" }, \
-       { IOCB_ALLOC_CACHE,     "ALLOC_CACHE" }, \
-       { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \
-       { IOCB_AIO_RW,          "AIO_RW" }, \
-       { IOCB_HAS_METADATA,    "AIO_HAS_METADATA" }
+       { IOCB_HIPRI,                   "HIPRI" }, \
+       { IOCB_DSYNC,                   "DSYNC" }, \
+       { IOCB_SYNC,                    "SYNC" }, \
+       { IOCB_NOWAIT,                  "NOWAIT" }, \
+       { IOCB_APPEND,                  "APPEND" }, \
+       { IOCB_ATOMIC,                  "ATOMIC" }, \
+       { IOCB_DONTCACHE,               "DONTCACHE" }, \
+       { IOCB_EVENTFD,                 "EVENTFD"}, \
+       { IOCB_DIRECT,                  "DIRECT" }, \
+       { IOCB_WRITE,                   "WRITE" }, \
+       { IOCB_WAITQ,                   "WAITQ" }, \
+       { IOCB_NOIO,                    "NOIO" }, \
+       { IOCB_ALLOC_CACHE,             "ALLOC_CACHE" }, \
+       { IOCB_DIO_CALLER_COMP,         "CALLER_COMP" }, \
+       { IOCB_AIO_RW,                  "AIO_RW" }, \
+       { IOCB_HAS_METADATA,            "AIO_HAS_METADATA" }, \
+       { IOCB_MAY_USE_METADATA,        "AIO_MAY_USE_METADATA" }

 struct kiocb {
        struct file             *ki_filp;
@@ -3786,6 +3785,10 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma)
 static inline int iocb_flags(struct file *file)
 {
        int res = 0;
+
+       /* Retain flags that the kernel raises internally. */
+       res |= (file->f_iocb_flags & (IOCB_HAS_METADATA | IOCB_MAY_USE_METADATA));
+
        if (file->f_flags & O_APPEND)
                res |= IOCB_APPEND;
        if (file->f_flags & O_DIRECT)
diff --git a/io_uring/rw.c b/io_uring/rw.c
index af5a54b5db12..23e9103c62d4 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -886,7 +886,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
        if (req->flags & REQ_F_HAS_METADATA) {
                struct io_async_rw *io = req->async_data;

-               if (!(file->f_mode & FMODE_HAS_METADATA))
+               if (!(file->f_iocb_flags & IOCB_MAY_USE_METADATA))
                        return -EINVAL;

                /*





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux