[RFC PATCH] xfs: add mount option for zone gc pressure

Hans Holmberg <Hans.Holmberg@xxxxxxx> · Wed, 19 Mar 2025 08:19:19 +0000

Presently we start garbage collection late - when we start running
out of free zones to backfill max_open_zones. This is a reasonable
default as it minimizes write amplification. The longer we wait,
the more blocks are invalidated and reclaim cost less in terms
of blocks to relocate.

Starting this late however introduces a risk of GC being outcompeted
by user writes. If GC can't keep up, user writes will be forced to
wait for free zones with high tail latencies as a result.

This is not a problem under normal circumstances, but if fragmentation
is bad and user write pressure is high (multiple full-throttle
writers) we will "bottom out" of free zones.

To mitigate this, introduce a gc_pressure mount option that lets the
user specify a percentage of how much of the unused space that gc
should keep available for writing. A high value will reclaim more of
the space occupied by unused blocks, creating a larger buffer against
write bursts.

This comes at a cost as write amplification is increased. To
illustrate this using a sample workload, setting gc_pressure to 60%
avoids high (500ms) max latencies while increasing write amplification
by 15%.

Signed-off-by: Hans Holmberg <hans.holmberg@xxxxxxx>
---

A patch for xfsprogs documenting the option will follow (if it makes
it beyond RFC)

 fs/xfs/xfs_mount.h      |  1 +
 fs/xfs/xfs_super.c      | 14 +++++++++++++-
 fs/xfs/xfs_zone_alloc.c |  5 +++++
 fs/xfs/xfs_zone_gc.c    | 16 ++++++++++++++--
 4 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 799b84220ebb..af595024de00 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -229,6 +229,7 @@ typedef struct xfs_mount {
 	bool			m_finobt_nores; /* no per-AG finobt resv. */
 	bool			m_update_sb;	/* sb needs update in mount */
 	unsigned int		m_max_open_zones;
+	unsigned int		m_gc_pressure;
 
 	/*
 	 * Bitsets of per-fs metadata that have been checked and/or are sick.
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index af5e63cb6a99..656f228cc3d9 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -111,7 +111,7 @@ enum {
 	Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
 	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
 	Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
-	Opt_lifetime, Opt_nolifetime,
+	Opt_lifetime, Opt_nolifetime, Opt_gc_pressure,
 };
 
 static const struct fs_parameter_spec xfs_fs_parameters[] = {
@@ -159,6 +159,7 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = {
 	fsparam_u32("max_open_zones",	Opt_max_open_zones),
 	fsparam_flag("lifetime",	Opt_lifetime),
 	fsparam_flag("nolifetime",	Opt_nolifetime),
+	fsparam_u32("gc_pressure",	Opt_gc_pressure),
 	{}
 };
 
@@ -242,6 +243,9 @@ xfs_fs_show_options(
 	if (mp->m_max_open_zones)
 		seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones);
 
+	if (mp->m_gc_pressure)
+		seq_printf(m, ",gc_pressure=%u", mp->m_gc_pressure);
+
 	return 0;
 }
 
@@ -1100,6 +1104,11 @@ xfs_finish_flags(
 "nolifetime mount option only supported on zoned file systems.");
 			return -EINVAL;
 		}
+		if (mp->m_gc_pressure) {
+			xfs_warn(mp,
+"gc_pressure mount option only supported on zoned file systems.");
+			return -EINVAL;
+		}
 	}
 
 	return 0;
@@ -1500,6 +1509,9 @@ xfs_fs_parse_param(
 	case Opt_max_open_zones:
 		parsing_mp->m_max_open_zones = result.uint_32;
 		return 0;
+	case Opt_gc_pressure:
+		parsing_mp->m_gc_pressure = result.uint_32;
+		return 0;
 	case Opt_lifetime:
 		parsing_mp->m_features &= ~XFS_FEAT_NOLIFETIME;
 		return 0;
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index 734b73470ef9..f75247c16600 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -1162,6 +1162,11 @@ xfs_mount_zones(
 		return -EFSCORRUPTED;
 	}
 
+	if (mp->m_gc_pressure > 100) {
+		xfs_notice(mp, "gc_pressure must not exceed 100 percent.");
+		return -EINVAL;
+	}
+
 	error = xfs_calc_open_zones(mp);
 	if (error)
 		return error;
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index a4abaac0fbc5..002a943860f5 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -162,18 +162,30 @@ struct xfs_zone_gc_data {
 
 /*
  * We aim to keep enough zones free in stock to fully use the open zone limit
- * for data placement purposes.
+ * for data placement purposes. Additionally, the gc_pressure mount option
+ * can be set to make sure a fraction of the unused/free blocks are available
+ * for writing.
  */
 bool
 xfs_zoned_need_gc(
 	struct xfs_mount	*mp)
 {
+	s64			available, free;
+
 	if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE))
 		return false;
-	if (xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE) <
+
+	available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE);
+
+	if (available <
 	    mp->m_groups[XG_TYPE_RTG].blocks *
 	    (mp->m_max_open_zones - XFS_OPEN_GC_ZONES))
 		return true;
+
+	free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);
+	if (available < div_s64(free * mp->m_gc_pressure, 100))
+		return true;
+
 	return false;
 }
 
-- 
2.34.1