Re: [PATCH 2/4] fsck: avoid using an uninitialized variable

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Mar 27, 2025 at 12:43:47PM +0000, Johannes Schindelin via GitGitGadget wrote:

> From: Johannes Schindelin <johannes.schindelin@xxxxxx>
> 
> In `fsck_commit()`, after counting the authors of a commit, we set the
> `err` variable either when there was no author, or when there were more
> than two authors recorded. Then we access the `err` variable to figure
> out whether we should return early. But if there was exactly one author,
> that variable is still uninitialized.
> 
> Let's just initialize the variable.
> 
> This issue was pointed out by CodeQL.

Hmm, I'd think we would hit this case all the time, since commits
generally have one author. But I think it's another false positive.

The code in question is this:

          author_count = 0;
          while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
                  author_count++;
                  err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
                  if (err)
                          return err;
          }
          if (author_count < 1)
                  err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
          else if (author_count > 1)
                  err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
          if (err)
                  return err;

So we set "err" as soon as we find _any_ author (when we check whether
it is properly formatted via fsck_ident). And author_count will not be
incremented if we did not find one. So either we must have assigned
the result of fsck_ident(), or we will hit the "author_count < 1" case
and assign there.

It's certainly confusing, though, since "err" gets used in so many
spots. I think the whole thing would be easier to understand if we had
tighter-scoped single use variables like this:

diff --git a/fsck.c b/fsck.c
index 9fc4c25ffd..ea72b3247d 100644
--- a/fsck.c
+++ b/fsck.c
@@ -925,7 +925,6 @@ static int fsck_commit(const struct object_id *oid,
 {
 	struct object_id tree_oid, parent_oid;
 	unsigned author_count;
-	int err;
 	const char *buffer_begin = buffer;
 	const char *buffer_end = buffer + size;
 	const char *p;
@@ -941,39 +940,44 @@ static int fsck_commit(const struct object_id *oid,
 	if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))
 		return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
 	if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
-		err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
+		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
 		if (err)
 			return err;
 	}
 	buffer = p + 1;
 	while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {
 		if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
-			err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
+			int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
 			if (err)
 				return err;
 		}
 		buffer = p + 1;
 	}
 	author_count = 0;
 	while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
+		int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
+		if (err)
+			return err;
 		author_count++;
-		err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
+	}
+	if (author_count < 1) {
+		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
+		if (err)
+			return err;
+	} else if (author_count > 1) {
+		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
 		if (err)
 			return err;
 	}
-	if (author_count < 1)
-		err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
-	else if (author_count > 1)
-		err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
-	if (err)
-		return err;
 	if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))
 		return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
-	err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
-	if (err)
-		return err;
+	else {
+		int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
+		if (err)
+			return err;
+	}
 	if (memchr(buffer_begin, '\0', size)) {
-		err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
+		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
 			     "NUL byte in the commit object body");
 		if (err)
 			return err;

And then it is obvious that the general pattern is to propagate "err"
from individual calls (and the ones that do not stick out like sore
thumbs; are those bugs where we should keep going if the user set those
message types to warn/ignore?).

You could even wrap the pattern in a macro, though perhaps that is
getting too magical. The resulting logic is easier to follow, though, if
you can look past the macro:

diff --git a/fsck.c b/fsck.c
index ea72b3247d..8c7ac3c448 100644
--- a/fsck.c
+++ b/fsck.c
@@ -919,6 +919,12 @@ static int fsck_ident(const char **ident,
 	return 0;
 }
 
+#define MAYBE_RETURN(x) do { \
+	int err = (x); \
+	if (err) \
+		return err; \
+} while (0)
+
 static int fsck_commit(const struct object_id *oid,
 		       const char *buffer, unsigned long size,
 		       struct fsck_options *options)
@@ -939,49 +945,30 @@ static int fsck_commit(const struct object_id *oid,
 
 	if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))
 		return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
-	if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
-		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
-		if (err)
-			return err;
-	}
+	if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n')
+		MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1"));
 	buffer = p + 1;
 	while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {
-		if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
-			int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
-			if (err)
-				return err;
-		}
+		if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n')
+			MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1"));
 		buffer = p + 1;
 	}
 	author_count = 0;
 	while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
-		int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
-		if (err)
-			return err;
+		MAYBE_RETURN(fsck_ident(&buffer, oid, OBJ_COMMIT, options));
 		author_count++;
 	}
-	if (author_count < 1) {
-		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
-		if (err)
-			return err;
-	} else if (author_count > 1) {
-		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
-		if (err)
-			return err;
-	}
+	if (author_count < 1)
+		MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line"));
+	else if (author_count > 1)
+		MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines"));
 	if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))
 		return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
-	else {
-		int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
-		if (err)
-			return err;
-	}
-	if (memchr(buffer_begin, '\0', size)) {
-		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
-			     "NUL byte in the commit object body");
-		if (err)
-			return err;
-	}
+	else
+		MAYBE_RETURN(fsck_ident(&buffer, oid, OBJ_COMMIT, options));
+
+	if (memchr(buffer_begin, '\0', size))
+		MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT, "NUL byte in the commit object body"));
 	return 0;
 }
 

I'd suspect that just the first patch above would fix the CodeQL issue.
It's certainly a larger diff, but IMHO the result is less confusing for
humans, too.

-Peff




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux