[RFC PATCH 06/20] ceph: add comments to metadata structures in ceph_fs.h

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Viacheslav Dubeyko <Slava.Dubeyko@xxxxxxx>

We have a lot of declarations and not enough good
comments on it.

Claude AI generated comments for CephFS metadata structure
declarations in include/linux/ceph/*.h. These comments
have been reviewed, checked, and corrected.

This patch adds comments for struct ceph_file_layout_legacy,
struct ceph_file_layout, struct ceph_dir_layout,
struct ceph_mon_request_header, struct ceph_mon_statfs,
struct ceph_statfs, struct ceph_mon_statfs_reply,
struct ceph_mon_command, struct ceph_osd_getmap,
struct ceph_mds_getmap, struct ceph_client_mount,
struct ceph_mon_subscribe_item, struct ceph_mon_subscribe_ack,
struct ceph_mds_session_head, union ceph_mds_request_args,
union ceph_mds_request_args_ext, struct ceph_mds_request_head_legacy,
struct ceph_mds_request_head, struct ceph_mds_request_release,
struct ceph_mds_reply_head, struct ceph_frag_tree_split,
struct ceph_frag_tree_head, struct ceph_mds_reply_cap,
struct ceph_mds_reply_inode, struct ceph_mds_reply_lease,
struct ceph_mds_reply_dirfrag, struct ceph_filelock,
struct ceph_mds_caps, struct ceph_mds_cap_peer,
struct ceph_mds_cap_release, struct ceph_mds_cap_item,
struct ceph_mds_lease, struct ceph_mds_cap_reconnect,
struct ceph_mds_cap_reconnect_v1, struct ceph_mds_snaprealm_reconnect,
struct ceph_mds_snap_head, struct ceph_mds_snap_realm,
struct ceph_mds_quota in /include/linux/ceph/ceph_fs.h.

Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@xxxxxxx>
cc: Alex Markuze <amarkuze@xxxxxxxxxx>
cc: Ilya Dryomov <idryomov@xxxxxxxxx>
cc: Ceph Development <ceph-devel@xxxxxxxxxxxxxxx>
---
 include/linux/ceph/ceph_fs.h | 792 +++++++++++++++++++++++------------
 1 file changed, 532 insertions(+), 260 deletions(-)

diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index c7f2c63b3bc3..8f3452439d97 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -35,36 +35,50 @@
 #define CEPH_MAX_MON   31
 
 /*
- * legacy ceph_file_layoute
+ * Legacy file layout metadata: Wire format for older file layout structures.
+ * Describes how a file's data is striped across RADOS objects and distributed
+ * across placement groups. Maintained for backward compatibility.
  */
 struct ceph_file_layout_legacy {
-	/* file -> object mapping */
-	__le32 fl_stripe_unit;     /* stripe unit, in bytes.  must be multiple
-				      of page size. */
-	__le32 fl_stripe_count;    /* over this many objects */
-	__le32 fl_object_size;     /* until objects are this big, then move to
-				      new objects */
-	__le32 fl_cas_hash;        /* UNUSED.  0 = none; 1 = sha256 */
-
-	/* pg -> disk layout */
-	__le32 fl_object_stripe_unit;  /* UNUSED.  for per-object parity, if any */
-
-	/* object -> pg layout */
-	__le32 fl_unused;       /* unused; used to be preferred primary for pg (-1 for none) */
-	__le32 fl_pg_pool;      /* namespace, crush ruleset, rep level */
+	/* File-to-object mapping parameters */
+	/* Stripe unit size in bytes (must be page-aligned) */
+	__le32 fl_stripe_unit;
+	/* Number of objects to stripe across */
+	__le32 fl_stripe_count;
+	/* Maximum object size before creating new objects */
+	__le32 fl_object_size;
+	/* Content-addressable storage hash (unused) */
+	__le32 fl_cas_hash;
+
+	/* Placement group to disk layout */
+	/* Per-object parity stripe unit (unused) */
+	__le32 fl_object_stripe_unit;
+
+	/* Object to placement group layout */
+	/* Unused field (was preferred primary PG) */
+	__le32 fl_unused;
+	/* Pool ID for namespace, CRUSH rules, replication level */
+	__le32 fl_pg_pool;
 } __attribute__ ((packed));
 
 struct ceph_string;
 /*
- * ceph_file_layout - describe data layout for a file/inode
+ * File layout metadata: Describes how a file's data is distributed across
+ * RADOS objects within a storage pool. Controls striping, object sizing,
+ * and namespace placement for optimal performance and data distribution.
  */
 struct ceph_file_layout {
-	/* file -> object mapping */
-	u32 stripe_unit;   /* stripe unit, in bytes */
-	u32 stripe_count;  /* over this many objects */
-	u32 object_size;   /* until objects are this big */
-	s64 pool_id;        /* rados pool id */
-	struct ceph_string __rcu *pool_ns; /* rados pool namespace */
+	/* File-to-object striping parameters */
+	/* Stripe unit size in bytes */
+	u32 stripe_unit;
+	/* Number of objects to stripe data across */
+	u32 stripe_count;
+	/* Maximum size of individual RADOS objects */
+	u32 object_size;
+	/* Target RADOS pool ID */
+	s64 pool_id;
+	/* Optional pool namespace (RCU-protected string) */
+	struct ceph_string __rcu *pool_ns;
 };
 
 extern int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
@@ -75,8 +89,15 @@ extern void ceph_file_layout_to_legacy(struct ceph_file_layout *fl,
 
 #define CEPH_MIN_STRIPE_UNIT 65536
 
+/*
+ * Directory layout metadata: Describes how directory entries are distributed
+ * and hashed for efficient lookup and enumeration. Currently minimal with
+ * most fields reserved for future expansion.
+ */
 struct ceph_dir_layout {
-	__u8   dl_dir_hash;   /* see ceph_hash.h for ids */
+	/* Directory hash function ID (see ceph_hash.h) */
+	__u8   dl_dir_hash;
+	/* Reserved fields for future use */
 	__u8   dl_unused1;
 	__u16  dl_unused2;
 	__u32  dl_unused3;
@@ -172,63 +193,137 @@ enum {
 };
 
 
+/*
+ * Monitor request header metadata: Common header for all client requests
+ * to Ceph monitors. Includes version tracking and session identification
+ * for proper request sequencing and duplicate detection.
+ */
 struct ceph_mon_request_header {
+	/* Highest map version client currently has */
 	__le64 have_version;
+	/* Monitor rank for this session */
 	__le16 session_mon;
+	/* Transaction ID for this monitor session */
 	__le64 session_mon_tid;
 } __attribute__ ((packed));
 
+/*
+ * Ceph monitor statfs request structure
+ *
+ * Sent to the monitor to request filesystem statistics information.
+ * Can request stats for the entire cluster or for a specific data pool.
+ * The monitor responds with usage, capacity, and object count information.
+ */
 struct ceph_mon_statfs {
-	struct ceph_mon_request_header monhdr;
-	struct ceph_fsid fsid;
-	__u8 contains_data_pool;
-	__le64 data_pool;
+	struct ceph_mon_request_header monhdr; /* standard monitor request header */
+	struct ceph_fsid fsid;                 /* filesystem identifier */
+	__u8 contains_data_pool;               /* whether requesting pool-specific stats */
+	__le64 data_pool;                      /* specific pool ID (if contains_data_pool) */
 } __attribute__ ((packed));
 
+/*
+ * Filesystem statistics metadata: Reports storage usage and capacity
+ * information for a Ceph filesystem or pool. Used by statfs() system call.
+ */
 struct ceph_statfs {
-	__le64 kb, kb_used, kb_avail;
+	/* Total capacity in kilobytes */
+	__le64 kb;
+	/* Used space in kilobytes */
+	__le64 kb_used;
+	/* Available space in kilobytes */
+	__le64 kb_avail;
+	/* Total number of objects stored */
 	__le64 num_objects;
 } __attribute__ ((packed));
 
+/*
+ * Ceph monitor statfs reply structure
+ *
+ * Response from the monitor containing filesystem statistics information.
+ * Sent in response to a ceph_mon_statfs request, providing current usage,
+ * capacity, and object count data for the requested filesystem or pool.
+ */
 struct ceph_mon_statfs_reply {
-	struct ceph_fsid fsid;
-	__le64 version;
-	struct ceph_statfs st;
+	struct ceph_fsid fsid;         /* filesystem identifier */
+	__le64 version;                /* statistics version/timestamp */
+	struct ceph_statfs st;         /* actual filesystem statistics */
 } __attribute__ ((packed));
 
+/*
+ * Ceph monitor command structure
+ *
+ * Used to send administrative commands to the Ceph monitor. The command
+ * is specified as a text string that follows this header structure.
+ * Monitor responds with command results or error information.
+ */
 struct ceph_mon_command {
-	struct ceph_mon_request_header monhdr;
-	struct ceph_fsid fsid;
-	__le32 num_strs;         /* always 1 */
-	__le32 str_len;
-	char str[];
+	struct ceph_mon_request_header monhdr; /* standard monitor request header */
+	struct ceph_fsid fsid;                 /* filesystem identifier */
+	__le32 num_strs;                       /* number of command strings (always 1) */
+	__le32 str_len;                        /* length of command string */
+	char str[];                            /* command string (variable length) */
 } __attribute__ ((packed));
 
+/*
+ * Ceph OSD map request structure
+ *
+ * Sent to the monitor to request OSD map updates. The client specifies
+ * a starting epoch to receive incremental map updates from that point.
+ * Essential for maintaining current cluster topology and OSD status.
+ */
 struct ceph_osd_getmap {
-	struct ceph_mon_request_header monhdr;
-	struct ceph_fsid fsid;
-	__le32 start;
+	struct ceph_mon_request_header monhdr; /* standard monitor request header */
+	struct ceph_fsid fsid;                 /* filesystem identifier */
+	__le32 start;                          /* starting epoch for map updates */
 } __attribute__ ((packed));
 
+/*
+ * Ceph MDS map request structure
+ *
+ * Sent to the monitor to request MDS map updates. Contains information
+ * about active metadata servers, their states, and filesystem layout.
+ * Critical for clients to know which MDS to contact for operations.
+ */
 struct ceph_mds_getmap {
-	struct ceph_mon_request_header monhdr;
-	struct ceph_fsid fsid;
+	struct ceph_mon_request_header monhdr; /* standard monitor request header */
+	struct ceph_fsid fsid;                 /* filesystem identifier */
 } __attribute__ ((packed));
 
+/*
+ * Ceph client mount request structure
+ *
+ * Minimal structure sent to the monitor during client mount operations.
+ * Used to signal client presence and initiate the mount handshake with
+ * the monitor. Contains only the basic monitor request header.
+ */
 struct ceph_client_mount {
-	struct ceph_mon_request_header monhdr;
+	struct ceph_mon_request_header monhdr; /* standard monitor request header */
 } __attribute__ ((packed));
 
 #define CEPH_SUBSCRIBE_ONETIME    1  /* i want only 1 update after have */
 
+/*
+ * Ceph monitor subscription item
+ *
+ * Specifies subscription parameters for receiving map updates from the
+ * monitor. Used within subscription requests to indicate starting epoch
+ * and subscription behavior (one-time vs continuous updates).
+ */
 struct ceph_mon_subscribe_item {
-	__le64 start;
-	__u8 flags;
+	__le64 start;                          /* starting epoch/version for updates */
+	__u8 flags;                            /* subscription flags (CEPH_SUBSCRIBE_*) */
 } __attribute__ ((packed));
 
+/*
+ * Ceph monitor subscription acknowledgment
+ *
+ * Response from monitor confirming subscription requests. Indicates how long
+ * the subscription will remain active and confirms the filesystem ID.
+ * Used for managing subscription renewal timing.
+ */
 struct ceph_mon_subscribe_ack {
-	__le32 duration;         /* seconds */
-	struct ceph_fsid fsid;
+	__le32 duration;                       /* subscription duration in seconds */
+	struct ceph_fsid fsid;                 /* filesystem identifier */
 } __attribute__ ((packed));
 
 #define CEPH_FS_CLUSTER_ID_NONE  -1
@@ -306,11 +401,21 @@ enum {
 
 extern const char *ceph_session_op_name(int op);
 
+/*
+ * MDS session header metadata: Header for metadata server session messages.
+ * Manages the client-MDS session lifecycle including capability and lease limits.
+ */
 struct ceph_mds_session_head {
+	/* Session operation type */
 	__le32 op;
+	/* Session sequence number */
 	__le64 seq;
+	/* Message timestamp */
 	struct ceph_timespec stamp;
-	__le32 max_caps, max_leases;
+	/* Maximum capabilities client can hold */
+	__le32 max_caps;
+	/* Maximum directory entry leases */
+	__le32 max_leases;
 } __attribute__ ((packed));
 
 /* client_request */
@@ -410,78 +515,113 @@ extern const char *ceph_mds_op_name(int op);
 #define CEPH_O_DIRECTORY	00200000
 #define CEPH_O_NOFOLLOW		00400000
 
+/*
+ * Ceph MDS request arguments union
+ *
+ * Contains operation-specific arguments for different MDS operations.
+ * Each operation type has its own structure within the union, providing
+ * the specific parameters needed for that operation while sharing the
+ * same memory space efficiently.
+ */
 union ceph_mds_request_args {
+	/* Get inode attributes operation */
 	struct {
-		__le32 mask;                 /* CEPH_CAP_* */
+		__le32 mask;                 /* attribute mask (CEPH_CAP_*) */
 	} __attribute__ ((packed)) getattr;
+
+	/* Set inode attributes operation */
 	struct {
-		__le32 mode;
-		__le32 uid;
-		__le32 gid;
-		struct ceph_timespec mtime;
-		struct ceph_timespec atime;
-		__le64 size, old_size;       /* old_size needed by truncate */
-		__le32 mask;                 /* CEPH_SETATTR_* */
+		__le32 mode;                 /* file permissions */
+		__le32 uid;                  /* user ID */
+		__le32 gid;                  /* group ID */
+		struct ceph_timespec mtime;  /* modification time */
+		struct ceph_timespec atime;  /* access time */
+		__le64 size, old_size;       /* new and old file sizes */
+		__le32 mask;                 /* which attributes to set (CEPH_SETATTR_*) */
 	} __attribute__ ((packed)) setattr;
+
+	/* Read directory entries operation */
 	struct {
-		__le32 frag;                 /* which dir fragment */
-		__le32 max_entries;          /* how many dentries to grab */
-		__le32 max_bytes;
-		__le16 flags;
-		__le32 offset_hash;
+		__le32 frag;                 /* directory fragment to read */
+		__le32 max_entries;          /* maximum number of entries to return */
+		__le32 max_bytes;            /* maximum response size in bytes */
+		__le16 flags;                /* readdir operation flags */
+		__le32 offset_hash;          /* hash offset for pagination */
 	} __attribute__ ((packed)) readdir;
+
+	/* Create device node (mknod) operation */
 	struct {
-		__le32 mode;
-		__le32 rdev;
+		__le32 mode;                 /* file type and permissions */
+		__le32 rdev;                 /* device number (major/minor) */
 	} __attribute__ ((packed)) mknod;
+
+	/* Create directory (mkdir) operation */
 	struct {
-		__le32 mode;
+		__le32 mode;                 /* directory permissions */
 	} __attribute__ ((packed)) mkdir;
+
+	/* Open/create file operation */
 	struct {
-		__le32 flags;
-		__le32 mode;
-		__le32 stripe_unit;          /* layout for newly created file */
-		__le32 stripe_count;         /* ... */
-		__le32 object_size;
-		__le32 pool;
-		__le32 mask;                 /* CEPH_CAP_* */
-		__le64 old_size;
+		__le32 flags;                /* open flags (O_RDWR, O_CREAT, etc.) */
+		__le32 mode;                 /* file permissions (for creation) */
+		__le32 stripe_unit;          /* RADOS striping unit size */
+		__le32 stripe_count;         /* number of objects to stripe across */
+		__le32 object_size;          /* RADOS object size */
+		__le32 pool;                 /* RADOS pool ID */
+		__le32 mask;                 /* capability mask for new file */
+		__le64 old_size;             /* previous file size (for truncation) */
 	} __attribute__ ((packed)) open;
+
+	/* Set extended attributes operation */
 	struct {
-		__le32 flags;
-		__le32 osdmap_epoch; /* used for setting file/dir layouts */
+		__le32 flags;                /* xattr operation flags */
+		__le32 osdmap_epoch;         /* OSD map epoch for consistency */
 	} __attribute__ ((packed)) setxattr;
+
+	/* Set file/directory layout operation */
 	struct {
-		struct ceph_file_layout_legacy layout;
+		struct ceph_file_layout_legacy layout; /* striping layout */
 	} __attribute__ ((packed)) setlayout;
+
+	/* File locking operation */
 	struct {
-		__u8 rule; /* currently fcntl or flock */
-		__u8 type; /* shared, exclusive, remove*/
-		__le64 owner; /* owner of the lock */
-		__le64 pid; /* process id requesting the lock */
-		__le64 start; /* initial location to lock */
-		__le64 length; /* num bytes to lock from start */
-		__u8 wait; /* will caller wait for lock to become available? */
+		__u8 rule;                   /* lock rule (CEPH_LOCK_FCNTL/FLOCK) */
+		__u8 type;                   /* lock type (SHARED/EXCL/UNLOCK) */
+		__le64 owner;                /* lock owner identifier */
+		__le64 pid;                  /* process ID holding the lock */
+		__le64 start;                /* byte offset where lock begins */
+		__le64 length;               /* number of bytes to lock */
+		__u8 wait;                   /* whether to wait for lock */
 	} __attribute__ ((packed)) filelock_change;
+
+	/* Lookup by inode number operation */
 	struct {
-		__le32 mask;                 /* CEPH_CAP_* */
-		__le64 snapid;
-		__le64 parent;
-		__le32 hash;
+		__le32 mask;                 /* attribute mask for returned data */
+		__le64 snapid;               /* snapshot ID */
+		__le64 parent;               /* parent inode number */
+		__le32 hash;                 /* inode hash for verification */
 	} __attribute__ ((packed)) lookupino;
 } __attribute__ ((packed));
 
+/*
+ * Ceph MDS request arguments union (extended version)
+ *
+ * This union extends the original ceph_mds_request_args with support
+ * for newer protocol features. It maintains backward compatibility
+ * while adding extended functionality like birth time support.
+ */
 union ceph_mds_request_args_ext {
-	union ceph_mds_request_args old;
+	union ceph_mds_request_args old; /* legacy argument formats */
+	/* Extended setattr arguments with birth time support */
 	struct {
-		__le32 mode;
-		__le32 uid;
-		__le32 gid;
-		struct ceph_timespec mtime;
-		struct ceph_timespec atime;
-		__le64 size, old_size;       /* old_size needed by truncate */
-		__le32 mask;                 /* CEPH_SETATTR_* */
-		struct ceph_timespec btime;
+		__le32 mode;                 /* file permissions */
+		__le32 uid;                  /* user ID */
+		__le32 gid;                  /* group ID */
+		struct ceph_timespec mtime;  /* modification time */
+		struct ceph_timespec atime;  /* access time */
+		__le64 size, old_size;       /* current and previous file sizes */
+		__le32 mask;                 /* attribute mask (CEPH_SETATTR_*) */
+		struct ceph_timespec btime;  /* birth/creation time (extended) */
 	} __attribute__ ((packed)) setattr_ext;
 };
 
@@ -489,119 +629,183 @@ union ceph_mds_request_args_ext {
 #define CEPH_MDS_FLAG_WANT_DENTRY	2 /* want dentry in reply */
 #define CEPH_MDS_FLAG_ASYNC		4 /* request is asynchronous */
 
+/*
+ * Ceph MDS request message header (legacy version)
+ *
+ * This is the original MDS request header format used before protocol
+ * version 4. It lacks the version field and extended features present
+ * in the modern header. Used for backward compatibility with older
+ * MDS servers that don't support newer protocol features.
+ */
 struct ceph_mds_request_head_legacy {
-	__le64 oldest_client_tid;
-	__le32 mdsmap_epoch;           /* on client */
-	__le32 flags;                  /* CEPH_MDS_FLAG_* */
-	__u8 num_retry, num_fwd;       /* count retry, fwd attempts */
-	__le16 num_releases;           /* # include cap/lease release records */
-	__le32 op;                     /* mds op code */
-	__le32 caller_uid, caller_gid;
-	__le64 ino;                    /* use this ino for openc, mkdir, mknod,
-					  etc. (if replaying) */
-	union ceph_mds_request_args args;
+	__le64 oldest_client_tid;      /* oldest transaction ID from client */
+	__le32 mdsmap_epoch;           /* MDS map epoch client is using */
+	__le32 flags;                  /* request flags (CEPH_MDS_FLAG_*) */
+	__u8 num_retry, num_fwd;       /* retry and forward attempt counters */
+	__le16 num_releases;           /* number of cap/lease release records */
+	__le32 op;                     /* MDS operation code to perform */
+	__le32 caller_uid, caller_gid; /* credentials of the caller */
+	__le64 ino;                    /* inode number for replay operations */
+	union ceph_mds_request_args args; /* operation-specific arguments */
 } __attribute__ ((packed));
 
 #define CEPH_MDS_REQUEST_HEAD_VERSION  3
 
+/*
+ * Ceph MDS request message header
+ *
+ * Contains the header information for all MDS request messages, including
+ * operation type, client context, retry information, and operation-specific
+ * arguments. This structure has evolved over time with different versions.
+ */
 struct ceph_mds_request_head {
-	__le16 version;                /* struct version */
-	__le64 oldest_client_tid;
-	__le32 mdsmap_epoch;           /* on client */
-	__le32 flags;                  /* CEPH_MDS_FLAG_* */
-	__u8 num_retry, num_fwd;       /* legacy count retry and fwd attempts */
-	__le16 num_releases;           /* # include cap/lease release records */
-	__le32 op;                     /* mds op code */
-	__le32 caller_uid, caller_gid;
-	__le64 ino;                    /* use this ino for openc, mkdir, mknod,
-					  etc. (if replaying) */
-	union ceph_mds_request_args_ext args;
-
-	__le32 ext_num_retry;          /* new count retry attempts */
-	__le32 ext_num_fwd;            /* new count fwd attempts */
-
-	__le32 struct_len;             /* to store size of struct ceph_mds_request_head */
-	__le32 owner_uid, owner_gid;   /* used for OPs which create inodes */
-} __attribute__ ((packed));
-
-/* cap/lease release record */
+	__le16 version;                /* header structure version */
+	__le64 oldest_client_tid;      /* oldest transaction ID from client */
+	__le32 mdsmap_epoch;           /* MDS map epoch client is using */
+	__le32 flags;                  /* request flags (CEPH_MDS_FLAG_*) */
+	__u8 num_retry, num_fwd;       /* legacy retry and forward counters */
+	__le16 num_releases;           /* number of cap/lease release records */
+	__le32 op;                     /* MDS operation code to perform */
+	__le32 caller_uid, caller_gid; /* credentials of the caller */
+	__le64 ino;                    /* inode number for replay operations */
+	union ceph_mds_request_args_ext args; /* operation-specific arguments */
+
+	__le32 ext_num_retry;          /* extended retry attempt counter */
+	__le32 ext_num_fwd;            /* extended forward attempt counter */
+
+	__le32 struct_len;             /* size of this header structure */
+	__le32 owner_uid, owner_gid;   /* ownership for inode creation operations */
+} __attribute__ ((packed));
+
+/*
+ * Ceph MDS capability/lease release record
+ *
+ * Included in MDS requests to inform the MDS about capabilities or
+ * directory leases that the client is releasing. This allows the
+ * client to proactively return unused capabilities to reduce overhead.
+ */
 struct ceph_mds_request_release {
-	__le64 ino, cap_id;            /* ino and unique cap id */
-	__le32 caps, wanted;           /* new issued, wanted */
-	__le32 seq, issue_seq, mseq;
-	__le32 dname_seq;              /* if releasing a dentry lease, a */
-	__le32 dname_len;              /* string follows. */
+	__le64 ino, cap_id;            /* inode number and capability identifier */
+	__le32 caps, wanted;           /* capabilities being released/still wanted */
+	__le32 seq, issue_seq, mseq;   /* sequence numbers for capability tracking */
+	__le32 dname_seq;              /* directory name lease sequence number */
+	__le32 dname_len;              /* length of dentry name string (follows) */
 } __attribute__ ((packed));
 
 /* client reply */
+/*
+ * Ceph MDS reply message header
+ *
+ * Contains the header information for all MDS reply messages, including
+ * operation status, result codes, and flags indicating what additional
+ * data structures follow in the message payload.
+ */
 struct ceph_mds_reply_head {
-	__le32 op;
-	__le32 result;
-	__le32 mdsmap_epoch;
-	__u8 safe;                     /* true if committed to disk */
-	__u8 is_dentry, is_target;     /* true if dentry, target inode records
-					  are included with reply */
+	__le32 op;                     /* MDS operation that was performed */
+	__le32 result;                 /* operation result code (errno) */
+	__le32 mdsmap_epoch;           /* MDS map epoch when reply was sent */
+	__u8 safe;                     /* true if operation committed to disk */
+	__u8 is_dentry, is_target;     /* flags: dentry and target inode data included */
 } __attribute__ ((packed));
 
 /* one for each node split */
+/*
+ * Ceph directory fragment tree split record
+ *
+ * Describes how a directory fragment is split into smaller fragments.
+ * Each record specifies a fragment ID and the number of bits by which
+ * it should be split to create multiple sub-fragments.
+ */
 struct ceph_frag_tree_split {
-	__le32 frag;                   /* this frag splits... */
-	__le32 by;                     /* ...by this many bits */
+	__le32 frag;                   /* fragment identifier to split */
+	__le32 by;                     /* number of bits to split by */
 } __attribute__ ((packed));
 
+/*
+ * Ceph directory fragment tree header
+ *
+ * Contains the complete fragment tree structure for a directory, describing
+ * how the directory namespace is divided among multiple fragments. Large
+ * directories are split into fragments for load distribution across MDS nodes.
+ */
 struct ceph_frag_tree_head {
-	__le32 nsplits;                /* num ceph_frag_tree_split records */
-	struct ceph_frag_tree_split splits[];
+	__le32 nsplits;                /* number of fragment split records */
+	struct ceph_frag_tree_split splits[]; /* array of split records */
 } __attribute__ ((packed));
 
 /* capability issue, for bundling with mds reply */
+/*
+ * Ceph MDS reply capability structure
+ *
+ * Contains capability information included in MDS replies, specifying
+ * what capabilities are being granted to the client for an inode.
+ */
 struct ceph_mds_reply_cap {
-	__le32 caps, wanted;           /* caps issued, wanted */
-	__le64 cap_id;
-	__le32 seq, mseq;
-	__le64 realm;                  /* snap realm */
-	__u8 flags;                    /* CEPH_CAP_FLAG_* */
+	__le32 caps, wanted;           /* capabilities issued and wanted */
+	__le64 cap_id;                 /* unique capability identifier */
+	__le32 seq, mseq;              /* sequence and migration sequence numbers */
+	__le64 realm;                  /* snapshot realm this cap belongs to */
+	__u8 flags;                    /* capability flags (CEPH_CAP_FLAG_*) */
 } __attribute__ ((packed));
 
 #define CEPH_CAP_FLAG_AUTH	(1 << 0)  /* cap is issued by auth mds */
 #define CEPH_CAP_FLAG_RELEASE	(1 << 1)  /* release the cap */
 
-/* inode record, for bundling with mds reply */
+/*
+ * Ceph MDS reply inode structure
+ *
+ * Contains complete inode metadata bundled with MDS replies. This allows
+ * the MDS to send updated inode information along with operation results
+ * to keep clients synchronized with the current inode state.
+ */
 struct ceph_mds_reply_inode {
-	__le64 ino;
-	__le64 snapid;
-	__le32 rdev;
-	__le64 version;                /* inode version */
-	__le64 xattr_version;          /* version for xattr blob */
-	struct ceph_mds_reply_cap cap; /* caps issued for this inode */
-	struct ceph_file_layout_legacy layout;
-	struct ceph_timespec ctime, mtime, atime;
-	__le32 time_warp_seq;
-	__le64 size, max_size, truncate_size;
-	__le32 truncate_seq;
-	__le32 mode, uid, gid;
-	__le32 nlink;
-	__le64 files, subdirs, rbytes, rfiles, rsubdirs;  /* dir stats */
-	struct ceph_timespec rctime;
-	struct ceph_frag_tree_head fragtree;  /* (must be at end of struct) */
+	__le64 ino;                    /* inode number */
+	__le64 snapid;                 /* snapshot ID */
+	__le32 rdev;                   /* device number for special files */
+	__le64 version;                /* inode version number */
+	__le64 xattr_version;          /* extended attributes version */
+	struct ceph_mds_reply_cap cap; /* capabilities issued for this inode */
+	struct ceph_file_layout_legacy layout; /* file striping layout */
+	struct ceph_timespec ctime, mtime, atime; /* timestamps */
+	__le32 time_warp_seq;          /* time warp sequence number */
+	__le64 size, max_size, truncate_size; /* file size information */
+	__le32 truncate_seq;           /* truncate operation sequence */
+	__le32 mode, uid, gid;         /* file permissions and ownership */
+	__le32 nlink;                  /* number of hard links */
+	__le64 files, subdirs, rbytes, rfiles, rsubdirs; /* directory statistics */
+	struct ceph_timespec rctime;   /* recursive change time */
+	struct ceph_frag_tree_head fragtree; /* fragment tree (must be at end) */
 } __attribute__ ((packed));
 /* followed by frag array, symlink string, dir layout, xattr blob */
 
-/* reply_lease follows dname, and reply_inode */
+/*
+ * Ceph MDS reply lease structure
+ *
+ * Contains directory name lease information included in MDS replies.
+ * Directory leases allow clients to cache directory entries and negative
+ * lookups to improve performance by reducing round trips to the MDS.
+ */
 struct ceph_mds_reply_lease {
-	__le16 mask;            /* lease type(s) */
-	__le32 duration_ms;     /* lease duration */
-	__le32 seq;
+	__le16 mask;            /* lease type mask (CEPH_LEASE_*) */
+	__le32 duration_ms;     /* lease duration in milliseconds */
+	__le32 seq;             /* lease sequence number */
 } __attribute__ ((packed));
 
 #define CEPH_LEASE_VALID        (1 | 2) /* old and new bit values */
 #define CEPH_LEASE_PRIMARY_LINK 4       /* primary linkage */
 
+/*
+ * Ceph MDS reply directory fragment structure
+ *
+ * Contains information about directory fragment distribution across MDS nodes.
+ * Large directories are split into fragments that can be distributed across
+ * multiple MDS nodes for load balancing and scalability.
+ */
 struct ceph_mds_reply_dirfrag {
-	__le32 frag;            /* fragment */
-	__le32 auth;            /* auth mds, if this is a delegation point */
-	__le32 ndist;           /* number of mds' this is replicated on */
-	__le32 dist[];
+	__le32 frag;            /* directory fragment identifier */
+	__le32 auth;            /* authoritative MDS for this fragment */
+	__le32 ndist;           /* number of MDS nodes this fragment is replicated on */
+	__le32 dist[];          /* array of MDS node IDs holding replicas */
 } __attribute__ ((packed));
 
 #define CEPH_LOCK_FCNTL		1
@@ -614,13 +818,20 @@ struct ceph_mds_reply_dirfrag {
 #define CEPH_LOCK_EXCL     2
 #define CEPH_LOCK_UNLOCK   4
 
+/*
+ * Ceph file lock structure
+ *
+ * Represents advisory file locks (fcntl/flock) used for coordination
+ * between clients accessing the same file. The MDS mediates these locks
+ * across the cluster to ensure consistency.
+ */
 struct ceph_filelock {
-	__le64 start;/* file offset to start lock at */
-	__le64 length; /* num bytes to lock; 0 for all following start */
-	__le64 client; /* which client holds the lock */
-	__le64 owner; /* owner the lock */
-	__le64 pid; /* process id holding the lock on the client */
-	__u8 type; /* shared lock, exclusive lock, or unlock */
+	__le64 start;   /* file byte offset where lock begins */
+	__le64 length;  /* number of bytes to lock (0 = lock to EOF) */
+	__le64 client;  /* client ID that holds the lock */
+	__le64 owner;   /* lock owner identifier (typically file pointer) */
+	__le64 pid;     /* process ID holding the lock on the client */
+	__u8 type;      /* lock type: CEPH_LOCK_SHARED/EXCL/UNLOCK */
 } __attribute__ ((packed));
 
 
@@ -762,53 +973,76 @@ extern const char *ceph_cap_op_name(int op);
 #define CEPH_CLIENT_CAPS_PENDING_CAPSNAP	(1<<2)
 
 /*
- * caps message, used for capability callbacks, acks, requests, etc.
+ * Ceph MDS capability message structure
+ *
+ * This structure represents capability-related messages exchanged between
+ * the MDS and clients. Capabilities grant permissions to perform operations
+ * on inodes and include cached metadata to reduce round trips.
  */
 struct ceph_mds_caps {
-	__le32 op;                  /* CEPH_CAP_OP_* */
-	__le64 ino, realm;
-	__le64 cap_id;
-	__le32 seq, issue_seq;
-	__le32 caps, wanted, dirty; /* latest issued/wanted/dirty */
-	__le32 migrate_seq;
-	__le64 snap_follows;
-	__le32 snap_trace_len;
-
-	/* authlock */
-	__le32 uid, gid, mode;
-
-	/* linklock */
-	__le32 nlink;
-
-	/* xattrlock */
-	__le32 xattr_len;
-	__le64 xattr_version;
-
-	/* a union of non-export and export bodies. */
-	__le64 size, max_size, truncate_size;
-	__le32 truncate_seq;
-	struct ceph_timespec mtime, atime, ctime;
-	struct ceph_file_layout_legacy layout;
-	__le32 time_warp_seq;
+	__le32 op;                  /* capability operation (CEPH_CAP_OP_*) */
+	__le64 ino, realm;          /* inode number and snapshot realm */
+	__le64 cap_id;              /* unique capability identifier */
+	__le32 seq, issue_seq;      /* sequence numbers for ordering */
+	__le32 caps, wanted, dirty; /* capability bits: granted/requested/dirty */
+	__le32 migrate_seq;         /* sequence number for cap migration */
+	__le64 snap_follows;        /* snapshot context this cap follows */
+	__le32 snap_trace_len;      /* length of snapshot trace following */
+
+	/* File ownership and permissions */
+	__le32 uid, gid, mode;      /* owner user/group ID and file mode */
+
+	/* Link count */
+	__le32 nlink;               /* number of hard links to this inode */
+
+	/* Extended attributes */
+	__le32 xattr_len;           /* length of xattr blob */
+	__le64 xattr_version;       /* version of extended attributes */
+
+	/* File data and layout (union for export/non-export operations) */
+	__le64 size, max_size, truncate_size;  /* current/max/truncate file sizes */
+	__le32 truncate_seq;        /* truncate operation sequence number */
+	struct ceph_timespec mtime, atime, ctime;  /* file timestamps */
+	struct ceph_file_layout_legacy layout;     /* file striping layout */
+	__le32 time_warp_seq;       /* sequence for time warp detection */
 } __attribute__ ((packed));
 
+/*
+ * Ceph MDS capability peer information structure
+ *
+ * This structure contains information about a capability at a peer MDS,
+ * used during capability import/export operations when capabilities are
+ * migrated between different MDS nodes.
+ */
 struct ceph_mds_cap_peer {
-	__le64 cap_id;
-	__le32 issue_seq;
-	__le32 mseq;
-	__le32 mds;
-	__u8   flags;
+	__le64 cap_id;		/* capability ID at the peer MDS */
+	__le32 issue_seq;	/* issue sequence number at peer MDS */
+	__le32 mseq;		/* migration sequence number at peer MDS */
+	__le32 mds;		/* MDS number of the peer */
+	__u8   flags;		/* capability flags at peer MDS */
 } __attribute__ ((packed));
 
-/* cap release msg head */
+/*
+ * Ceph MDS capability release message header
+ *
+ * This structure forms the header of a capability release message sent from
+ * client to MDS to inform that the client is releasing (giving up) capabilities
+ * on one or more inodes. The message contains a list of cap_item structures.
+ */
 struct ceph_mds_cap_release {
-	__le32 num;                /* number of cap_items that follow */
+	__le32 num;                /* number of ceph_mds_cap_item entries following */
 } __attribute__ ((packed));
 
+/*
+ * Ceph MDS capability release item
+ *
+ * Represents a single capability being released by the client. Multiple
+ * cap items can be batched together in a single cap release message.
+ */
 struct ceph_mds_cap_item {
-	__le64 ino;
-	__le64 cap_id;
-	__le32 migrate_seq, issue_seq;
+	__le64 ino;                /* inode number of the file */
+	__le64 cap_id;             /* unique capability identifier */
+	__le32 migrate_seq, issue_seq; /* migration and issue sequence numbers */
 } __attribute__ ((packed));
 
 #define CEPH_MDS_LEASE_REVOKE           1  /*    mds  -> client */
@@ -818,42 +1052,68 @@ struct ceph_mds_cap_item {
 
 extern const char *ceph_lease_op_name(int o);
 
-/* lease msg header */
+/*
+ * Ceph MDS lease message structure
+ *
+ * This structure represents directory name lease messages exchanged between
+ * MDS and clients. Directory leases grant clients permission to cache
+ * directory contents and negative dentries to improve performance.
+ */
 struct ceph_mds_lease {
-	__u8 action;            /* CEPH_MDS_LEASE_* */
-	__le16 mask;            /* which lease */
-	__le64 ino;
-	__le64 first, last;     /* snap range */
-	__le32 seq;
-	__le32 duration_ms;     /* duration of renewal */
+	__u8 action;            /* lease action (CEPH_MDS_LEASE_*) */
+	__le16 mask;            /* which lease type is being acted upon */
+	__le64 ino;             /* inode number of parent directory */
+	__le64 first, last;     /* snapshot range for the lease */
+	__le32 seq;             /* lease sequence number for ordering */
+	__le32 duration_ms;     /* lease duration in milliseconds (for renewals) */
 } __attribute__ ((packed));
 /* followed by a __le32+string for dname */
 
-/* client reconnect */
+/*
+ * Ceph MDS capability reconnect structure (version 2)
+ *
+ * Sent during MDS session reconnection to restore capability state
+ * after a session has been lost. This allows the client to inform
+ * the MDS about capabilities it believes it holds.
+ */
 struct ceph_mds_cap_reconnect {
-	__le64 cap_id;
-	__le32 wanted;
-	__le32 issued;
-	__le64 snaprealm;
-	__le64 pathbase;        /* base ino for our path to this ino */
-	__le32 flock_len;       /* size of flock state blob, if any */
+	__le64 cap_id;          /* unique capability identifier */
+	__le32 wanted;          /* capabilities the client wants */
+	__le32 issued;          /* capabilities the client believes are issued */
+	__le64 snaprealm;       /* snapshot realm this inode belongs to */
+	__le64 pathbase;        /* base inode number for path reconstruction */
+	__le32 flock_len;       /* size of file lock state blob following */
 } __attribute__ ((packed));
 /* followed by flock blob */
 
+/*
+ * Ceph MDS capability reconnect structure (version 1)
+ *
+ * Legacy version of the capability reconnect structure used for
+ * backwards compatibility with older MDS versions. Contains
+ * additional file metadata fields not present in version 2.
+ */
 struct ceph_mds_cap_reconnect_v1 {
-	__le64 cap_id;
-	__le32 wanted;
-	__le32 issued;
-	__le64 size;
-	struct ceph_timespec mtime, atime;
-	__le64 snaprealm;
-	__le64 pathbase;        /* base ino for our path to this ino */
+	__le64 cap_id;          /* unique capability identifier */
+	__le32 wanted;          /* capabilities the client wants */
+	__le32 issued;          /* capabilities the client believes are issued */
+	__le64 size;            /* file size */
+	struct ceph_timespec mtime, atime; /* file modification and access times */
+	__le64 snaprealm;       /* snapshot realm this inode belongs to */
+	__le64 pathbase;        /* base inode number for path reconstruction */
 } __attribute__ ((packed));
 
+/*
+ * Ceph MDS snapshot realm reconnect structure
+ *
+ * Sent during MDS session reconnection to restore snapshot realm
+ * hierarchy information. This helps the MDS reconstruct the client's
+ * view of snapshot realms after a session interruption.
+ */
 struct ceph_mds_snaprealm_reconnect {
-	__le64 ino;     /* snap realm base */
-	__le64 seq;     /* snap seq for this snap realm */
-	__le64 parent;  /* parent realm */
+	__le64 ino;     /* inode number of snapshot realm root directory */
+	__le64 seq;     /* sequence number of this snapshot realm */
+	__le64 parent;  /* inode number of parent snapshot realm */
 } __attribute__ ((packed));
 
 /*
@@ -868,44 +1128,56 @@ enum {
 
 extern const char *ceph_snap_op_name(int o);
 
-/* snap msg header */
+/*
+ * Ceph MDS snapshot message header
+ *
+ * This structure forms the header for snapshot-related messages from the MDS,
+ * containing operation type and metadata about snapshot realm operations.
+ */
 struct ceph_mds_snap_head {
-	__le32 op;                /* CEPH_SNAP_OP_* */
-	__le64 split;             /* ino to split off, if any */
-	__le32 num_split_inos;    /* # inos belonging to new child realm */
-	__le32 num_split_realms;  /* # child realms udner new child realm */
-	__le32 trace_len;         /* size of snap trace blob */
+	__le32 op;                /* snapshot operation type (CEPH_SNAP_OP_*) */
+	__le64 split;             /* inode number to split off into new realm */
+	__le32 num_split_inos;    /* number of inodes belonging to new child realm */
+	__le32 num_split_realms;  /* number of child realms under new child realm */
+	__le32 trace_len;         /* size of the snapshot trace blob following */
 } __attribute__ ((packed));
 /* followed by split ino list, then split realms, then the trace blob */
 
 /*
- * encode info about a snaprealm, as viewed by a client
+ * Ceph MDS snapshot realm information structure
+ *
+ * Encodes information about a snapshot realm as viewed by a client.
+ * A snapshot realm represents a subtree of the filesystem that shares
+ * the same snapshot history and can be snapshotted as a unit.
  */
 struct ceph_mds_snap_realm {
-	__le64 ino;           /* ino */
-	__le64 created;       /* snap: when created */
-	__le64 parent;        /* ino: parent realm */
-	__le64 parent_since;  /* snap: same parent since */
-	__le64 seq;           /* snap: version */
-	__le32 num_snaps;
-	__le32 num_prior_parent_snaps;
+	__le64 ino;           /* inode number of the realm root directory */
+	__le64 created;       /* snapshot ID when this realm was created */
+	__le64 parent;        /* inode number of parent realm (0 if root) */
+	__le64 parent_since;  /* snapshot ID since realm had same parent */
+	__le64 seq;           /* sequence number for realm version/updates */
+	__le32 num_snaps;     /* number of snapshots in this realm */
+	__le32 num_prior_parent_snaps; /* number of parent snapshots before split */
 } __attribute__ ((packed));
 /* followed by my snap list, then prior parent snap list */
 
 /*
- * quotas
+ * Ceph MDS quota information structure
+ *
+ * This structure represents quota-related metadata sent from the MDS
+ * to update directory quota limits and current usage statistics.
  */
 struct ceph_mds_quota {
-	__le64 ino;		/* ino */
-	struct ceph_timespec rctime;
-	__le64 rbytes;		/* dir stats */
-	__le64 rfiles;
-	__le64 rsubdirs;
-	__u8 struct_v;		/* compat */
-	__u8 struct_compat;
-	__le32 struct_len;
-	__le64 max_bytes;	/* quota max. bytes */
-	__le64 max_files;	/* quota max. files */
+	__le64 ino;			/* inode number of the directory */
+	struct ceph_timespec rctime;	/* recursive change time */
+	__le64 rbytes;			/* recursive bytes used in directory tree */
+	__le64 rfiles;			/* recursive file count in directory tree */
+	__le64 rsubdirs;		/* recursive subdirectory count */
+	__u8 struct_v;			/* structure version for compatibility */
+	__u8 struct_compat;		/* compatibility version */
+	__le32 struct_len;		/* length of this structure */
+	__le64 max_bytes;		/* quota limit: maximum bytes allowed */
+	__le64 max_files;		/* quota limit: maximum files allowed */
 } __attribute__ ((packed));
 
 #endif
-- 
2.51.0





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux