[PATCH v7 1/2] proto: add ability to populate a filesystem from a directory

Luca Di Maio <luca.dimaio1@xxxxxxxxx> · Sat, 26 Apr 2025 15:55:34 +0200

This patch implements the functionality to populate a newly created XFS
filesystem directly from an existing directory structure.

It resuses existing protofile logic, it branches if input is a
directory.

The population process steps are as follows:
  - create the root inode before populating content
  - recursively process nested directories
  - handle regular files, directories, symlinks, char devices, block
    devices, and fifos
  - preserve attributes (ownership, permissions)
  - preserve mtime timestamps from source files to maintain file history
    - possible to specify noatime=1 to use current time also for atime
    - use current time for ctime/crtime
  - preserve extended attributes and fsxattrs for all file types
  - preserve hardlinks

This functionality makes it easier to create populated filesystems
without having to write protofiles manually.
It's particularly useful for reproducible builds.

Signed-off-by: Luca Di Maio <luca.dimaio1@xxxxxxxxx>
---
 mkfs/proto.c | 653 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 mkfs/proto.h |   2 +-
 2 files changed, 646 insertions(+), 9 deletions(-)

diff --git a/mkfs/proto.c b/mkfs/proto.c
index 7f56a3d8..23f7998b 100644
--- a/mkfs/proto.c
+++ b/mkfs/proto.c
@@ -5,11 +5,17 @@
  */

 #include "libxfs.h"
+#include "xfs_inode.h"
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <stdio.h>
+#include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/xattr.h>
 #include <linux/xattr.h>
 #include "libfrog/convert.h"
 #include "proto.h"
+#include <dirent.h>

 /*
  * Prototypes for internal functions.
@@ -22,6 +28,11 @@ static int newregfile(char **pp, char **fname);
 static void rtinit(xfs_mount_t *mp);
 static off_t filesize(int fd);
 static int slashes_are_spaces;
+static int noatime;
+static void populate_from_dir(struct xfs_mount *mp, struct xfs_inode *pip,
+				struct fsxattr *fsxp, char *cur_path);
+static void walk_dir(struct xfs_mount *mp, struct xfs_inode *pip,
+				struct fsxattr *fsxp, char *cur_path);

 /*
  * Use this for block reservations needed for mkfs's conditions
@@ -65,6 +76,18 @@ setup_proto(

 	if (!fname)
 		return dflt;
+
+	/*
+	 * handle directory inputs
+	 * in this case we noop and let successive
+	 * parse_proto() to handle the directory
+	 * input.
+	 */
+	if ((fd = open(fname, O_DIRECTORY)) > 0) {
+		close(fd);
+		return fname;
+	}
+
 	if ((fd = open(fname, O_RDONLY)) < 0 || (size = filesize(fd)) < 0) {
 		fprintf(stderr, _("%s: failed to open %s: %s\n"),
 			progname, fname, strerror(errno));
@@ -380,9 +403,17 @@ writeattr(

 	ret = fgetxattr(fd, attrname, valuebuf, valuelen);
 	if (ret < 0) {
-		if (errno == EOPNOTSUPP)
-			return;
-		fail(_("error collecting xattr value"), errno);
+		/*
+		 * in case of filedescriptors with O_PATH, fgetxattr() will
+		 * fail. let's try to fallback to lgetxattr() using input
+		 * path.
+		 */
+		ret = lgetxattr(fname, attrname, valuebuf, valuelen);
+		if (ret < 0) {
+			if (errno == EOPNOTSUPP)
+				return;
+			fail(_("error collecting xattr value"), errno);
+		}
 	}
 	if (ret == 0)
 		return;
@@ -426,9 +457,17 @@ writeattrs(

 	ret = flistxattr(fd, namebuf, XATTR_LIST_MAX);
 	if (ret < 0) {
-		if (errno == EOPNOTSUPP)
-			goto out_namebuf;
-		fail(_("error collecting xattr names"), errno);
+		/*
+		 * in case of filedescriptors with O_PATH, flistxattr() will
+		 * fail. let's try to fallback to llistxattr() using input
+		 * path.
+		 */
+		ret = llistxattr(fname, namebuf, XATTR_LIST_MAX);
+		if (ret < 0) {
+			if (errno == EOPNOTSUPP)
+				goto out_namebuf;
+			fail(_("error collecting xattr names"), errno);
+		}
 	}

 	p = namebuf;
@@ -934,10 +973,24 @@ parse_proto(
 	xfs_mount_t	*mp,
 	struct fsxattr	*fsx,
 	char		**pp,
-	int		proto_slashes_are_spaces)
+	int proto_slashes_are_spaces,
+	int proto_noatime)
 {
 	slashes_are_spaces = proto_slashes_are_spaces;
-	parseproto(mp, NULL, fsx, pp, NULL);
+	noatime = proto_noatime;
+
+	/*
+	 * in case of a file input, we will use the prototype file logic
+	 * else we will fallback to populate from dir.
+	 */
+	int fd;
+	if ((fd = open(*pp,  O_DIRECTORY)) < 0) {
+		parseproto(mp, NULL, fsx, pp, NULL);
+		return;
+	}
+
+	close(fd);
+	populate_from_dir(mp, NULL, fsx, *pp);
 }

 /* Create a sb-rooted metadata file. */
@@ -1171,3 +1224,587 @@ filesize(
 		return -1;
 	return stb.st_size;
 }
+
+/* Try to allow as many memfds as possible. */
+static void
+bump_max_fds(void)
+{
+	struct rlimit rlim = {};
+	int ret;
+
+	ret = getrlimit(RLIMIT_NOFILE, &rlim);
+	if (!ret) {
+		rlim.rlim_cur = rlim.rlim_max;
+		setrlimit(RLIMIT_NOFILE, &rlim);
+	}
+}
+
+static void
+writefsxattrs(
+		struct fsxattr *fsxp,
+		struct xfs_inode *ip)
+{
+	ip->i_projid = fsxp->fsx_projid;
+	ip->i_extsize = fsxp->fsx_extsize;
+	ip->i_diflags = xfs_flags2diflags(ip, fsxp->fsx_xflags);
+	if (xfs_has_v3inodes(ip->i_mount)) {
+		ip->i_diflags2 = xfs_flags2diflags2(ip, fsxp->fsx_xflags);
+		ip->i_cowextsize = fsxp->fsx_cowextsize;
+	}
+}
+
+static void
+writetimestamps(
+		struct xfs_inode *ip,
+		struct stat statbuf)
+{
+	struct timespec64 ts;
+
+	/*
+	 * Copy timestamps from source file to destination inode.
+	 * Usually reproducible archives will delete or not register
+	 * atime and ctime, for example:
+	 *    https://www.gnu.org/software/tar/manual/html_section/Reproducibility.html
+	 * hence we will only copy mtime, and let ctime/crtime be set to
+	 * current time.
+	 * atime will be copied over if noatime is false.
+	 */
+	ts.tv_sec = statbuf.st_mtim.tv_sec;
+	ts.tv_nsec = statbuf.st_mtim.tv_nsec;
+	inode_set_mtime_to_ts(VFS_I(ip), ts);
+
+	/*
+	 * in case of noatime option, we will not copy the atime
+	 * timestamp from source, but let it be set from gettimeofday()
+	 */
+	if (!noatime) {
+		ts.tv_sec = statbuf.st_atim.tv_sec;
+		ts.tv_nsec = statbuf.st_atim.tv_nsec;
+		inode_set_atime_to_ts(VFS_I(ip), ts);
+	}
+
+	return;
+}
+
+struct hardlink {
+	unsigned long i_ino;
+	struct xfs_inode *existing_ip;
+};
+
+struct hardlinks {
+	int count;
+	size_t size;
+	struct hardlink *entries;
+};
+
+/*
+ * keep track of source inodes that are from hardlinks
+ * so we can retrieve them when needed to setup in
+ * destination.
+ */
+static struct hardlinks *hardlink_tracker = { 0 };
+
+static void
+init_hardlink_tracker(void) {
+	hardlink_tracker = malloc(sizeof(struct hardlinks));
+	if (!hardlink_tracker)
+		fail(_("error allocating hardlinks tracking array"), errno);
+	memset(hardlink_tracker, 0, sizeof(struct hardlinks));
+
+	hardlink_tracker->count = 0;
+	hardlink_tracker->size = PATH_MAX;
+
+	hardlink_tracker->entries = malloc(
+			hardlink_tracker->size * sizeof(struct hardlink));
+	if (!hardlink_tracker->entries)
+		fail(_("error allocating hardlinks tracking array"), errno);
+}
+
+static void
+cleanup_hardlink_tracker(void) {
+	/*
+	 * cleanup all pending inodes, call libxfs_irele() on them
+	 * before freeing memory.
+	 */
+	for (int i = 0; i < hardlink_tracker->count; i++)
+		libxfs_irele(hardlink_tracker->entries[i].existing_ip);
+
+	free(hardlink_tracker->entries);
+	free(hardlink_tracker);
+}
+
+static struct xfs_inode*
+get_hardlink_src_inode(
+		unsigned long i_ino)
+{
+	for (int i = 0; i < hardlink_tracker->count; i++) {
+		if (hardlink_tracker->entries[i].i_ino == i_ino) {
+			return hardlink_tracker->entries[i].existing_ip;
+		}
+	}
+	return NULL;
+}
+
+static void
+track_hardlink_inode(
+		unsigned long i_ino,
+		struct xfs_inode *ip)
+{
+	if (hardlink_tracker->count >= hardlink_tracker->size) {
+		/*
+		 * double for smaller capacity.
+		 * instead grow by 25% steps for larger capacities.
+		 */
+		const size_t old_size = hardlink_tracker->size;
+		size_t new_size = old_size * 2;
+		if (old_size > 1024)
+			new_size = old_size + (old_size / 4);
+
+		struct hardlink *resized_array = realloc(
+				hardlink_tracker->entries,
+				new_size * sizeof(struct hardlink));
+		if (!resized_array) {
+			fail(_("error enlarging hardlinks tracking array"), errno);
+		}
+		memset(&resized_array[old_size],
+				0, (new_size - old_size) * sizeof(struct hardlink));
+
+		hardlink_tracker->entries = resized_array;
+		hardlink_tracker->size = new_size;
+	}
+
+	hardlink_tracker->entries[hardlink_tracker->count].i_ino = i_ino;
+	hardlink_tracker->entries[hardlink_tracker->count].existing_ip = ip;
+	hardlink_tracker->count++;
+}
+
+static int
+handle_hardlink(
+		struct xfs_mount *mp,
+		struct xfs_inode *pip,
+		struct fsxattr *fsxp,
+		int mode,
+		struct cred creds,
+		struct xfs_name xname,
+		int flags,
+		struct stat file_stat,
+		xfs_dev_t rdev,
+		int fd,
+		char *fname,
+		char *path)
+{
+	int error;
+	struct xfs_parent_args *ppargs = NULL;
+	struct xfs_inode *ip;
+	struct xfs_trans *tp;
+	tp = getres(mp, 0);
+	ppargs = newpptr(mp);
+
+	ip = get_hardlink_src_inode(file_stat.st_ino);
+	if (!ip) {
+		/*
+		 * we didn't find the hardlink inode, this means
+		 * it's the first time we see it, report error
+		 * so create_file() can continue handling the inode
+		 * as a regular file type, and later save
+		 * *ip in our buffer for future consumption.
+		 */
+		return 1;
+	}
+	/*
+	* In case the inode was already in our tracker
+	* we need to setup the hardlink and skip file
+	* copy.
+	*/
+	libxfs_trans_ijoin(tp, pip, 0);
+	libxfs_trans_ijoin(tp, ip, 0);
+
+	newdirent(mp, tp, pip, &xname, ip, ppargs);
+
+	/*
+	 * Increment the link count
+	 */
+	libxfs_bumplink(tp, ip);
+
+	/*
+	 * we won't need fd for hardlinks
+	 * so we close and reset it.
+	 */
+	if (fd >= 0)
+		close(fd);
+
+	libxfs_trans_log_inode(tp, ip, flags);
+	error = -libxfs_trans_commit(tp);
+	if (error)
+		fail(_("Error encountered creating file from prototype file"), error);
+
+	libxfs_parent_finish(mp, ppargs);
+
+	return 0;
+}
+
+static void
+create_file(
+		struct xfs_mount *mp,
+		struct xfs_inode *pip,
+		struct fsxattr *fsxp,
+		int mode,
+		struct cred creds,
+		struct xfs_name xname,
+		int flags,
+		struct stat file_stat,
+		xfs_dev_t rdev,
+		int fd,
+		char *fname,
+		char *path)
+{
+
+	int error;
+	struct xfs_parent_args *ppargs = NULL;
+	struct xfs_inode *ip;
+	struct xfs_trans *tp;
+
+	if (file_stat.st_nlink > 1) {
+		error = handle_hardlink(mp, pip, fsxp, mode, creds,
+			    xname, flags, file_stat,
+			    rdev, fd, fname, path);
+		/*
+		 * if no error is reported it means the hardlink has
+		 * been correctly found and set, so we don't need to
+		 * do anything else.
+		 */
+		if (!error)
+			return;
+		/*
+		 * if instead we have an error it means the hardlink
+		 * was not registered, so we proceed to treat it like
+		 * a regular file, and save it to our tracker later.
+		 */
+	}
+
+	tp = getres(mp, 0);
+	ppargs = newpptr(mp);
+
+	error = creatproto(&tp, pip, mode, rdev, &creds, fsxp, &ip);
+	if (error)
+		fail(_("Inode allocation failed"), error);
+	libxfs_trans_ijoin(tp, pip, 0);
+	newdirent(mp, tp, pip, &xname, ip, ppargs);
+
+	/*
+	 * copy over timestamps
+	 */
+	writetimestamps(ip, file_stat);
+	libxfs_trans_log_inode(tp, ip, flags);
+	error = -libxfs_trans_commit(tp);
+	if (error)
+		fail(_("Error encountered creating file from prototype file"), error);
+
+	libxfs_parent_finish(mp, ppargs);
+
+	/*
+	 * copy over file content, attributes,
+	 * extended attributes and timestamps
+	 *
+	 * hardlinks will be skipped as fd will
+	 * be closed before this.
+	 */
+	if (fd >= 0) {
+		writefile(ip, fname, fd);
+		writeattrs(ip, fname, fd);
+		writefsxattrs(fsxp, ip);
+		close(fd);
+	}
+
+	if (file_stat.st_nlink > 1)
+		/*
+		 * if we're here it means this is the first time we're
+		 * encountering an hardlink, so we need to store it and
+		 * skpi libxfs_irele() to keep it around.
+		 */
+		track_hardlink_inode(file_stat.st_ino, ip);
+	else
+		/*
+		 * We release the inode pointer only if we're dealing
+		 * with a regular file, we need to keep the original
+		 * inode pointer for hardlinks, they'll be released
+		 * at the end of the lifecycle when we cleanup the
+		 * hardlink_tracker.
+		 */
+		libxfs_irele(ip);
+}
+
+static void
+handle_direntry(
+		struct xfs_mount *mp,
+		struct xfs_inode *pip,
+		struct fsxattr *fsxp,
+		char *cur_path,
+		struct dirent *entry)
+{
+	char link_target[PATH_MAX];
+	char path[PATH_MAX];
+	int error;
+	int fd = -1;
+	int flags;
+	int majdev;
+	int mindev;
+	int mode;
+	off_t len;
+	struct cred creds;
+	struct stat file_stat;
+	struct xfs_name xname;
+	struct xfs_parent_args *ppargs = NULL;
+	struct xfs_inode *ip;
+	struct xfs_trans *tp;
+
+	/*
+	 * Skip "." and ".." directories to avoid looping
+	 */
+	if (strcmp(entry->d_name, ".") == 0 ||
+	    strcmp(entry->d_name, "..") == 0) {
+		return;
+	}
+
+	/*
+	 * Create the full path to the original file or directory
+	 */
+	snprintf(path, sizeof(path), "%s/%s", cur_path, entry->d_name);
+
+	if (lstat(path, &file_stat) < 0) {
+		fprintf(stderr, _("%s (error accessing)\n"), entry->d_name);
+		exit(1);
+	}
+
+	/*
+	 * symlinks will need to be opened with O_PATH to work, so we handle this
+	 * special case.
+	 */
+	int open_flags = O_NOFOLLOW | O_RDONLY | O_NOATIME;
+	if ((file_stat.st_mode & S_IFMT) == S_IFLNK) {
+		open_flags = O_NOFOLLOW | O_PATH;
+	}
+	if ((fd = open(path, open_flags)) < 0) {
+		fprintf(stderr, _("%s: cannot open %s: %s\n"), progname, path,
+			strerror(errno));
+		exit(1);
+	}
+
+	memset(&creds, 0, sizeof(creds));
+	creds.cr_uid = file_stat.st_uid;
+	creds.cr_gid = file_stat.st_gid;
+	xname.name = (unsigned char *)entry->d_name;
+	xname.len = strlen(entry->d_name);
+	xname.type = 0;
+	mode = file_stat.st_mode;
+	flags = XFS_ILOG_CORE;
+	switch (file_stat.st_mode & S_IFMT) {
+	case S_IFDIR:
+		tp = getres(mp, 0);
+		ppargs = newpptr(mp);
+		error = creatproto(&tp, pip, mode, 0, &creds, fsxp, &ip);
+		if (error)
+			fail(_("Inode allocation failed"), error);
+		libxfs_trans_ijoin(tp, pip, 0);
+		xname.type = XFS_DIR3_FT_DIR;
+		newdirent(mp, tp, pip, &xname, ip, ppargs);
+		libxfs_bumplink(tp, pip);
+		libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
+		newdirectory(mp, tp, ip, pip);
+
+		/*
+		 * copy over timestamps
+		 */
+		writetimestamps(ip, file_stat);
+
+		libxfs_trans_log_inode(tp, ip, flags);
+		error = -libxfs_trans_commit(tp);
+		if (error)
+			fail(_("Directory inode allocation failed."), error);
+
+		libxfs_parent_finish(mp, ppargs);
+		tp = NULL;
+
+		/*
+		 * copy over attributes
+		 */
+		writeattrs(ip, entry->d_name, fd);
+		writefsxattrs(fsxp, ip);
+		close(fd);
+
+		walk_dir(mp, ip, fsxp, path);
+
+		libxfs_irele(ip);
+		break;
+	case S_IFLNK:
+		len = readlink(path, link_target, PATH_MAX - 1);
+		tp = getres(mp, XFS_B_TO_FSB(mp, len));
+		ppargs = newpptr(mp);
+		error = creatproto(&tp, pip, mode, 0, &creds, fsxp, &ip);
+		if (error)
+			fail(_("Inode allocation failed"), error);
+		writesymlink(tp, ip, link_target, len);
+		libxfs_trans_ijoin(tp, pip, 0);
+		xname.type = XFS_DIR3_FT_SYMLINK;
+		newdirent(mp, tp, pip, &xname, ip, ppargs);
+
+		/*
+		 * copy over timestamps
+		 */
+		writetimestamps(ip, file_stat);
+
+		libxfs_trans_log_inode(tp, ip, flags);
+		error = -libxfs_trans_commit(tp);
+		if (error)
+			fail(_("Error encountered creating file from prototype file"),
+			     error);
+		libxfs_parent_finish(mp, ppargs);
+
+		/*
+		 * copy over attributes
+		 *
+		 * being a symlink we opened the filedescriptor with O_PATH
+		 * this will make flistxattr() and fgetxattr() fail, so we
+		 * will need to fallback to llistxattr() and lgetxattr(), this
+		 * will need the full path to the original file, not just the
+		 * entry name.
+		 */
+		writeattrs(ip, path, fd);
+		writefsxattrs(fsxp, ip);
+		close(fd);
+
+		libxfs_irele(ip);
+		break;
+	case S_IFREG:
+		xname.type = XFS_DIR3_FT_REG_FILE;
+		create_file(mp, pip, fsxp, mode, creds, xname, flags, file_stat,
+			    0, fd, entry->d_name, path);
+		break;
+	case S_IFCHR:
+		xname.type = XFS_DIR3_FT_CHRDEV;
+		majdev = major(file_stat.st_rdev);
+		mindev = minor(file_stat.st_rdev);
+		create_file(mp, pip, fsxp, mode, creds, xname, flags, file_stat,
+			    IRIX_MKDEV(majdev, mindev), fd, entry->d_name,
+			    path);
+		break;
+	case S_IFBLK:
+		xname.type = XFS_DIR3_FT_BLKDEV;
+		majdev = major(file_stat.st_rdev);
+		mindev = minor(file_stat.st_rdev);
+		create_file(mp, pip, fsxp, mode, creds, xname, flags, file_stat,
+			    IRIX_MKDEV(majdev, mindev), fd, entry->d_name,
+			    path);
+		break;
+	case S_IFIFO:
+		flags |= XFS_ILOG_DEV;
+		create_file(mp, pip, fsxp, mode, creds, xname, flags, file_stat,
+			    0, fd, entry->d_name, path);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * walk_dir will recursively list files and directories
+ * and populate the mountpoint *mp with them using handle_direntry().
+ */
+static void
+walk_dir(
+		struct xfs_mount *mp,
+		struct xfs_inode *pip,
+		struct fsxattr *fsxp,
+		char *cur_path)
+{
+	DIR *dir;
+	struct dirent *entry;
+
+	/*
+	 * open input directory and iterate over all entries in it.
+	 * when another directory is found, we will recursively call
+	 * walk_dir.
+	 */
+	if ((dir = opendir(cur_path)) == NULL)
+		fail(_("cannot open input dir"), 1);
+	while ((entry = readdir(dir)) != NULL) {
+		handle_direntry(mp, pip, fsxp, cur_path, entry);
+	}
+	closedir(dir);
+}
+
+static void
+populate_from_dir(
+		struct xfs_mount *mp,
+		struct xfs_inode *pip,
+		struct fsxattr *fsxp,
+		char *cur_path)
+{
+	int error;
+	int mode;
+	struct cred creds;
+	struct xfs_inode *ip;
+	struct xfs_trans *tp;
+
+	/*
+	 * we first ensure we have the root inode
+	 */
+	memset(&creds, 0, sizeof(creds));
+	creds.cr_uid = 0;
+	creds.cr_gid = 0;
+	mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
+	tp = getres(mp, 0);
+	error = creatproto(&tp, pip, mode | S_IFDIR, 0, &creds, fsxp, &ip);
+	if (error)
+		fail(_("Inode allocation failed"), error);
+	pip = ip;
+	mp->m_sb.sb_rootino = ip->i_ino;
+	libxfs_log_sb(tp);
+	newdirectory(mp, tp, ip, pip);
+	libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	error = -libxfs_trans_commit(tp);
+	if (error)
+		fail(_("Inode allocation failed"), error);
+
+	libxfs_parent_finish(mp, NULL);
+
+	/*
+	 * RT initialization.  Do this here to ensure that
+	 * the RT inodes get placed after the root inode.
+	 */
+	error = create_metadir(mp);
+	if (error)
+		fail(_("Creation of the metadata directory inode failed"), error);
+
+	rtinit(mp);
+
+	/*
+	 * by nature of walk_dir() we could be opening
+	 * a great number of fds for deeply nested directory
+	 * trees.
+	 * try to bump max fds limit.
+	 */
+	bump_max_fds();
+
+	/*
+	 * initialize the hardlinks tracker
+	 */
+	init_hardlink_tracker();
+	/*
+	 * now that we have a root inode, let's
+	 * walk the input dir and populate the partition
+	 */
+	walk_dir(mp, ip, fsxp, cur_path);
+
+	/*
+	 * cleanup hardlinks tracker
+	 */
+	cleanup_hardlink_tracker();
+
+	/*
+	 * we free up our root inode
+	 * only when we finished populating the
+	 * root filesystem
+	 */
+	libxfs_irele(ip);
+}
diff --git a/mkfs/proto.h b/mkfs/proto.h
index be1ceb45..fea416f6 100644
--- a/mkfs/proto.h
+++ b/mkfs/proto.h
@@ -8,7 +8,7 @@

 char *setup_proto(char *fname);
 void parse_proto(struct xfs_mount *mp, struct fsxattr *fsx, char **pp,
-		int proto_slashes_are_spaces);
+		int proto_slashes_are_spaces, int proto_noatime);
 void res_failed(int err);

 #endif /* MKFS_PROTO_H_ */
--
2.49.0