On Tue, Sep 02, 2025 at 04:06:08PM +0100, John Garry wrote: > On 22/08/2025 09:02, Ojaswin Mujoo wrote: > > Implement atomic write support to help fuzz atomic writes > > with fsx. > > > > Suggested-by: Ritesh Harjani (IBM) <ritesh.list@xxxxxxxxx> > > Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx> > > Signed-off-by: Ojaswin Mujoo <ojaswin@xxxxxxxxxxxxx> > > Generally this looks ok, but I do have some comments, so please check them: > > Reviewed-by: John Garry <john.g.garry@xxxxxxxxxx> > > > --- > > ltp/fsx.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++--- > > 1 file changed, 110 insertions(+), 5 deletions(-) > > > > diff --git a/ltp/fsx.c b/ltp/fsx.c > > index 163b9453..1582f6d1 100644 > > --- a/ltp/fsx.c > > +++ b/ltp/fsx.c > > @@ -40,6 +40,7 @@ > > #include <liburing.h> > > #endif > > #include <sys/syscall.h> > > +#include "statx.h" > > #ifndef MAP_FILE > > # define MAP_FILE 0 > > @@ -49,6 +50,10 @@ > > #define RWF_DONTCACHE 0x80 > > #endif > > +#ifndef RWF_ATOMIC > > +#define RWF_ATOMIC 0x40 > > +#endif > > + > > #define NUMPRINTCOLUMNS 32 /* # columns of data to print on each line */ > > /* Operation flags (bitmask) */ > > @@ -110,6 +115,7 @@ enum { > > OP_READ_DONTCACHE, > > OP_WRITE, > > OP_WRITE_DONTCACHE, > > + OP_WRITE_ATOMIC, > > OP_MAPREAD, > > OP_MAPWRITE, > > OP_MAX_LITE, > > @@ -200,6 +206,11 @@ int uring = 0; > > int mark_nr = 0; > > int dontcache_io = 1; > > int hugepages = 0; /* -h flag */ > > +int do_atomic_writes = 1; /* -a flag disables */ > > + > > +/* User for atomic writes */ > > +int awu_min = 0; > > +int awu_max = 0; > > /* Stores info needed to periodically collapse hugepages */ > > struct hugepages_collapse_info { > > @@ -288,6 +299,7 @@ static const char *op_names[] = { > > [OP_READ_DONTCACHE] = "read_dontcache", > > [OP_WRITE] = "write", > > [OP_WRITE_DONTCACHE] = "write_dontcache", > > + [OP_WRITE_ATOMIC] = "write_atomic", > > [OP_MAPREAD] = "mapread", > > [OP_MAPWRITE] = "mapwrite", > > [OP_TRUNCATE] = "truncate", > > @@ -422,6 +434,7 @@ logdump(void) > > prt("\t***RRRR***"); > > break; > > case OP_WRITE_DONTCACHE: > > + case OP_WRITE_ATOMIC: > > case OP_WRITE: > > prt("WRITE 0x%x thru 0x%x\t(0x%x bytes)", > > lp->args[0], lp->args[0] + lp->args[1] - 1, > > @@ -1073,6 +1086,25 @@ update_file_size(unsigned offset, unsigned size) > > file_size = offset + size; > > } > > +static int is_power_of_2(unsigned n) { > > + return ((n & (n - 1)) == 0); > > +} > > + > > +/* > > + * Round down n to nearest power of 2. > > + * If n is already a power of 2, return n; > > + */ > > +static int rounddown_pow_of_2(int n) { > > + int i = 0; > > + > > + if (is_power_of_2(n)) > > + return n; > > + > > + for (; (1 << i) < n; i++); > > + > > + return 1 << (i - 1); > > Is this the neatest way to do this? Well it is a straigforward o(logn) way. Do you have something else in mind? > > > +} > > + > > void > > dowrite(unsigned offset, unsigned size, int flags) > > { > > @@ -1081,6 +1113,27 @@ dowrite(unsigned offset, unsigned size, int flags) > > offset -= offset % writebdy; > > if (o_direct) > > size -= size % writebdy; > > + if (flags & RWF_ATOMIC) { > > + /* atomic write len must be inbetween awu_min and awu_max */ > > in between > > > + if (size < awu_min) > > + size = awu_min; > > + if (size > awu_max) > > + size = awu_max; > > + > > + /* atomic writes need power-of-2 sizes */ > > + size = rounddown_pow_of_2(size); > > you could have: > > if (size < awu_min) > size = awu_min; > else if (size > awu_max) > size = awu_max; > else > size = rounddown_pow_of_2(size); sure I can do that > > > + > > + /* atomic writes need naturally aligned offsets */ > > + offset -= offset % size; > > + > > + /* Skip the write if we are crossing max filesize */ > > + if ((offset + size) > maxfilelen) { > > + if (!quiet && testcalls > simulatedopcount) > > + prt("skipping atomic write past maxfilelen\n"); > > + log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED); > > + return; > > + } > > + } > > if (size == 0) { > > if (!quiet && testcalls > simulatedopcount && !o_direct) > > prt("skipping zero size write\n"); > > @@ -1088,7 +1141,10 @@ dowrite(unsigned offset, unsigned size, int flags) > > return; > > } > > - log4(OP_WRITE, offset, size, FL_NONE); > > + if (flags & RWF_ATOMIC) > > + log4(OP_WRITE_ATOMIC, offset, size, FL_NONE); > > + else > > + log4(OP_WRITE, offset, size, FL_NONE); > > gendata(original_buf, good_buf, offset, size); > > if (offset + size > file_size) { > > @@ -1108,8 +1164,9 @@ dowrite(unsigned offset, unsigned size, int flags) > > (monitorstart == -1 || > > (offset + size > monitorstart && > > (monitorend == -1 || offset <= monitorend)))))) > > - prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d\n", testcalls, > > - offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0); > > + prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d atomic_wr=%d\n", testcalls, > > + offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0, > > + (flags & RWF_ATOMIC) != 0); > > nit: > > !!(flags & RWF_ATOMIC) > > I find that a bit neater, but I suppose you are following the example for > RWF_DONTCACHE Yep. > > > iret = fsxwrite(fd, good_buf + offset, size, offset, flags); > > if (iret != size) { > > if (iret == -1) > > @@ -1785,6 +1842,36 @@ do_dedupe_range(unsigned offset, unsigned length, unsigned dest) > > } > > #endif > > +int test_atomic_writes(void) { > > + int ret; > > + struct statx stx; > > + > > + if (o_direct != O_DIRECT) { > > + fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), " > > + "disabling!\n"); > > + return 0; > > + } > > + > > + ret = xfstests_statx(AT_FDCWD, fname, 0, STATX_WRITE_ATOMIC, &stx); > > + if (ret < 0) { > > + fprintf(stderr, "main: Statx failed with %d." > > + " Failed to determine atomic write limits, " > > + " disabling!\n", ret); > > + return 0; > > + } > > + > > + if (stx.stx_attributes & STATX_ATTR_WRITE_ATOMIC && > > + stx.stx_atomic_write_unit_min > 0) { > > + awu_min = stx.stx_atomic_write_unit_min; > > + awu_max = stx.stx_atomic_write_unit_max; > > + return 1; > > + } > > + > > + fprintf(stderr, "main: IO Stack does not support " > > + "atomic writes, disabling!\n"); > > Do we really need to spread this over multiple lines? > > Maybe that is the coding standard - I don't know. Yea, I wouldve liked to keep it on the same but looking at above fprintf()s seems like we break the strings. > > > + return 0; > > +} > > + > > #ifdef HAVE_COPY_FILE_RANGE > > int > > test_copy_range(void) > > @@ -2356,6 +2443,12 @@ have_op: > > goto out; > > } > > break; > > + case OP_WRITE_ATOMIC: > > + if (!do_atomic_writes) { > > + log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED); > > + goto out; > > + } > > + break; > > } > > switch (op) { > > @@ -2385,6 +2478,11 @@ have_op: > > dowrite(offset, size, 0); > > break; > > + case OP_WRITE_ATOMIC: > > + TRIM_OFF_LEN(offset, size, maxfilelen); > > + dowrite(offset, size, RWF_ATOMIC); > > + break; > > + > > case OP_MAPREAD: > > TRIM_OFF_LEN(offset, size, file_size); > > domapread(offset, size); > > @@ -2511,13 +2609,14 @@ void > > usage(void) > > { > > fprintf(stdout, "usage: %s", > > - "fsx [-dfhknqxyzBEFHIJKLORWXZ0]\n\ > > + "fsx [-adfhknqxyzBEFHIJKLORWXZ0]\n\ > > [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid]\n\ > > [-l flen] [-m start:end] [-o oplen] [-p progressinterval]\n\ > > [-r readbdy] [-s style] [-t truncbdy] [-w writebdy]\n\ > > [-A|-U] [-D startingop] [-N numops] [-P dirpath] [-S seed]\n\ > > [--replay-ops=opsfile] [--record-ops[=opsfile]] [--duration=seconds]\n\ > > ... fname\n\ > > + -a: disable atomic writes\n\ > > -b opnum: beginning operation number (default 1)\n\ > > -c P: 1 in P chance of file close+open at each op (default infinity)\n\ > > -d: debug output for all operations\n\ > > @@ -3059,9 +3158,13 @@ main(int argc, char **argv) > > setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */ > > while ((ch = getopt_long(argc, argv, > > - "0b:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ", > > + "0ab:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ", > > longopts, NULL)) != EOF) > > switch (ch) { > > + case 'a': > > + prt("main(): Atomic writes disabled\n"); > > + do_atomic_writes = 0; > > why an opt-out (and not opt-in)? That's something Darrick suggested a few iterations back so we automatically get the testing if the stack supports it. Thanks for the review. I'll incorporate the minor changes in next version. Regards, ojaswin > > > + break; > > case 'b': > > simulatedopcount = getnum(optarg, &endp); > > if (!quiet) > > @@ -3475,6 +3578,8 @@ main(int argc, char **argv) > > exchange_range_calls = test_exchange_range(); > > if (dontcache_io) > > dontcache_io = test_dontcache_io(); > > + if (do_atomic_writes) > > + do_atomic_writes = test_atomic_writes(); > > while (keep_running()) > > if (!test()) >