Re: [PATCH v3 1/2] trailer: append trailers in-process and drop the fork to `interpret-trailers`

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Li

On 03/08/2025 16:00, Li Chen wrote:
From: Li Chen <chenl311@xxxxxxxxxxxxxxx>

All trailer insertion now funnels through trailer_process():

* builtin/interpret-trailers.c is reduced to file I/O + a single call.
* amend_file_with_trailers() shares the same path; the old
   amend_strbuf_with_trailers() helper is dropped.
* New helpers parse_trailer_args()/free_new_trailer_list() convert
   --trailer=... strings to new_trailer_item lists.

Behaviour is unchanged; the full test-suite still passes, and the
fork/exec is gone.

Normally commit messages should be written in prose rather than a bullet list and the message should explain the reason for the change.

This patch has much less code duplication than the last iteration which is most welcome. Whenever you are moving and refactoring code you should split the move into its own commit followed by the refactoring. That makes it much easier to review as the reviewer can clearly see the refactoring rather than having to manually compare the added code in one file to the deleted code in another.

@@ -84,6 +83,7 @@ static int parse_opt_parse(const struct option *opt, const char *arg,
  			   int unset)
  {
  	struct process_trailer_options *v = opt->value;
+

Let's not clutter this patch with unrelated changes.

  	v->only_trailers = 1;
  	v->only_input = 1;
  	v->unfold = 1;
@@ -92,37 +92,6 @@ static int parse_opt_parse(const struct option *opt, const char *arg,
  	return 0;
  }
-static FILE *create_in_place_tempfile(const char *file)
-{
[...]
-}

We don't need to create a temporary file anymore so this can be deleted - good.

-static void interpret_trailers(const struct process_trailer_options *opts,
-			       struct list_head *new_trailer_head,
-			       const char *file)
-{
-	LIST_HEAD(head);
-	struct strbuf sb = STRBUF_INIT;
-	struct strbuf trailer_block_sb = STRBUF_INIT;
-	struct trailer_block *trailer_block;
-	FILE *outfile = stdout;
-
-	trailer_config_init();
-
-	read_input_file(&sb, file);
-
-	if (opts->in_place)
-		outfile = create_in_place_tempfile(file);
-
-	trailer_block = parse_trailers(opts, sb.buf, &head);
-
-	/* Print the lines before the trailer block */
-	if (!opts->only_trailers)
-		fwrite(sb.buf, 1, trailer_block_start(trailer_block), outfile);
-
-	if (!opts->only_trailers && !blank_line_before_trailer_block(trailer_block))
-		fprintf(outfile, "\n");
-
-
-	if (!opts->only_input) {
-		LIST_HEAD(config_head);
-		LIST_HEAD(arg_head);
-		parse_trailers_from_config(&config_head);
-		parse_trailers_from_command_line_args(&arg_head, new_trailer_head);
-		list_splice(&config_head, &arg_head);
-		process_trailers_lists(&head, &arg_head);
-	}
-
-	/* Print trailer block. */
-	format_trailers(opts, &head, &trailer_block_sb);
-	free_trailers(&head);
-	fwrite(trailer_block_sb.buf, 1, trailer_block_sb.len, outfile);
-	strbuf_release(&trailer_block_sb);
-
-	/* Print the lines after the trailer block as is. */
-	if (!opts->only_trailers)
-		fwrite(sb.buf + trailer_block_end(trailer_block), 1,
-		       sb.len - trailer_block_end(trailer_block), outfile);
-	trailer_block_release(trailer_block);
-
-	if (opts->in_place)
-		if (rename_tempfile(&trailers_tempfile, file))
-			die_errno(_("could not rename temporary file to %s"), file);
-
-	strbuf_release(&sb);
-}

This code is moved to trailer.c which is good but it is heavily refactored at the same time which makes it hard to review. Completely removing this function leads to some duplication in cmd_interpret_trailers() which could be avoided by making interpret_trailers() a wrapper around process_trailers()

  int cmd_interpret_trailers(int argc,
  			   const char **argv,
  			   const char *prefix,
@@ -231,14 +145,37 @@ int cmd_interpret_trailers(int argc,
  			git_interpret_trailers_usage,
  			options);
+ trailer_config_init();
+
  	if (argc) {
  		int i;
-		for (i = 0; i < argc; i++)
-			interpret_trailers(&opts, &trailers, argv[i]);
+		for (i = 0; i < argc; i++) {
+			struct strbuf in_buf = STRBUF_INIT;
+			struct strbuf out_buf = STRBUF_INIT;
+
+			read_input_file(&in_buf, argv[i]);
+			if (trailer_process(&opts, in_buf.buf, &trailers, &out_buf) < 0)
+				die(_("failed to process trailers for %s"), argv[i]);
+			if (opts.in_place)
+				write_file_buf(argv[i], out_buf.buf, out_buf.len);
+			else
+				fwrite(out_buf.buf, 1, out_buf.len, stdout);
+			strbuf_release(&in_buf);
+			strbuf_release(&out_buf);
+		}
  	} else {
+		struct strbuf in_buf = STRBUF_INIT;
+		struct strbuf out_buf = STRBUF_INIT;
+
  		if (opts.in_place)
  			die(_("no input file given for in-place editing"));
-		interpret_trailers(&opts, &trailers, NULL);
+
+		read_input_file(&in_buf, NULL);
+		if (trailer_process(&opts, in_buf.buf, &trailers, &out_buf) < 0)
+			die(_("failed to process trailers"));
+		fwrite(out_buf.buf, 1, out_buf.len, stdout);
+		strbuf_release(&in_buf);
+		strbuf_release(&out_buf);
  	}

There is quite a bit of duplication here that could be avoided if you modified interpret_trailers() to call trailer_process() rather than deleting it entirely.

  	new_trailers_clear(&trailers);
diff --git a/trailer.c b/trailer.c
index 310cf582dc..03814443c3 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1224,14 +1224,121 @@ void trailer_iterator_release(struct trailer_iterator *iter)
  	strbuf_release(&iter->key);
  }
-int amend_file_with_trailers(const char *path, const struct strvec *trailer_args)
+static int amend_strbuf_with_trailers(struct strbuf *buf,
+				   const struct strvec *trailer_args)

Function argument declarations should be aligned

  {
-	struct child_process run_trailer = CHILD_PROCESS_INIT;
-
-	run_trailer.git_cmd = 1;
-	strvec_pushl(&run_trailer.args, "interpret-trailers",
-		     "--in-place", "--no-divider",
-		     path, NULL);
-	strvec_pushv(&run_trailer.args, trailer_args->v);
-	return run_command(&run_trailer);
+	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
+	LIST_HEAD(new_trailer_head);
+	struct strbuf out = STRBUF_INIT;
+	size_t i;
+
+	opts.no_divider = 1;
+
+	for (i = 0; i < trailer_args->nr; i++) {
+		const char *arg = trailer_args->v[i];
+		const char *text;
+		struct new_trailer_item *item;

There should be a blank line after the variable declarations at the start of each block of code.

+		if (!skip_prefix(arg, "--trailer=", &text))

Why do we need this? It would be much cleaner if we required the caller to pass a list of trailers without any optional prefix.

+			text = arg;
+		if (!*text)
+			continue;
+		item = xcalloc(1, sizeof(*item));
+		INIT_LIST_HEAD(&item->list);
+		item->text = text;
+		list_add_tail(&item->list, &new_trailer_head);
+	}
+	if (trailer_process(&opts, buf->buf, &new_trailer_head, &out) < 0)
+		die("failed to process trailers");

As this is library code lets return an error here rather than dying.

+	strbuf_swap(buf, &out);
+	strbuf_release(&out);
+	while (!list_empty(&new_trailer_head)) {
+		struct new_trailer_item *item =
+			list_first_entry(&new_trailer_head, struct new_trailer_item, list);
+		list_del(&item->list);
+		free(item);
+	}
+	return 0;
  }
+
+int trailer_process(const struct process_trailer_options *opts,
+				   const char *msg,
+				   struct list_head *new_trailer_head,
+				   struct strbuf *out)

Argument alignment again

+{
+		struct trailer_block *blk;

This is trailer_block in the original but has been re-ordered with respect to the other variable declarations making the patch harder to review.

+		LIST_HEAD(orig_head);

This is head in the original but moved relative to the other variable declarations

+		LIST_HEAD(config_head);
+		LIST_HEAD(ar1g_head);

These two have been moved from inside the if (!opts->only_input) below. They are only referenced there so do not need to be declared here. Moving them makes this patch harder to review.

+		struct strbuf trailers_sb = STRBUF_INIT;

This is from the original but moved relative to the other variable declarations.

+		int had_trailer_before;

This is new - lets see how it is used. We've just started using bool for boolean variables in the last few weeks so this could be a bool now.

From here to

+		blk = parse_trailers(opts, msg, &orig_head);
+		had_trailer_before = !list_empty(&orig_head);
+		if (!opts->only_input) {
+			parse_trailers_from_config(&config_head);
+			parse_trailers_from_command_line_args(&arg_head, new_trailer_head);
+			list_splice(&config_head, &arg_head);
+			process_trailers_lists(&orig_head, &arg_head);
+		}
+		format_trailers(opts, &orig_head, &trailers_sb);

here is copied from the original minus the code that copied the commit message to the output file. Rather than deleting the code that copied the commit message we could have replaced the calls to fwrite() and fprintf() with strbuf_add() and strbuf_addf() which would make it obvious that the behavior is not changed. The original then frees orig_head but that is done later here.

+		if (!opts->only_trailers && !opts->only_input && !opts->unfold &&
+			!opts->trim_empty && list_empty(&orig_head) &&
+			(list_empty(new_trailer_head) || opts->only_input)) {

I'm not sure what is happening here. By this point the original has copied the original commit message and is ready to append the new trailers. Instead the new version seems to have completely refactored the logic for adding the new trailers making it harder to see if the behavior has changed.

+			size_t split = trailer_block_start(blk); /* end-of-log-msg */
+			if (!blank_line_before_trailer_block(blk)) {
+				strbuf_add(out, msg, split);
+				strbuf_addch(out, '\n');
+				strbuf_addstr(out, msg + split);

This copies the original message but adds a newline before the trailer block if it is missing.

+			} else
+				strbuf_addstr(out, msg);

This just copies the whole message.

+			strbuf_rel2ease(&trailers_sb);
+			trailer_block_release(blk);


+			return 0;

We return a copy of the original message with no new trailers added. We do not free orig_head, arg_head or config_head. I'm still confused why we need to special case this.


+		}
+		if (opts->only_trailers) {
+			strbuf_addbuf(out, &trailers_sb);

This flips the logic in the original to handle opts->only_trailers separately making it harder to review.

+		} else if (had_trailer_before) {
+			strbuf_add(out, msg, trailer_block_start(blk));
+			if (!blank_line_before_trailer_block(blk))
+				strbuf_addch(out, '\n');
+			strbuf_addbuf(out, &trailers_sb);
+			strbuf_add(out, msg + trailer_block_end(blk),
+						strlen(msg) - trailer_block_end(blk));

This handles the case where we're replacing the headers in the original message

+		}
+		else {

Style - this should be "} else {"

+			size_t cpos = trailer_block_start(blk);
+			strbuf_add(out, msg, cpos);
+			if (cpos == 0)                     /* empty body → just one \n */
+				strbuf_addch(out, '\n');
+			else if (!blank_line_before_trailer_block(blk))
+				strbuf_addch(out, '\n');   /* body without trailing blank */
+
+			strbuf_addbuf(out, &trailers_sb);
+			strbuf_add(out, msg + cpos, strlen(msg) - cpos);
+	   }

I'm confused why we need a separate case for when the original did not have any trailers - was the original code broken? If it was we should separate out the bug fix from the refactoring. If not what's the point of this change?

+		strbuf_release(&trailers_sb);
+		free_trailers(&orig_head);
+		trailer_block_release(blk);
+		return 0;
+}
+
+int amend_file_with_trailers(const char *path,
+							 const struct strvec *trailer_args)

Alignment again

+{
+	struct strbuf buf = STRBUF_INIT;
+
+	if (!trailer_args || !trailer_args->nr)
+		return 0;
+
+	if (strbuf_read_file(&buf, path, 0) < 0)
+		return error_errno("could not read '%s'", path);
+
+	if (amend_strbuf_with_trailers(&buf, trailer_args))
+		die("failed to append trailers");

Why return an error() above but die() here? This is library code so lets return an error.

+
+	/* `write_file_buf()` aborts on error internally */
+	write_file_buf(path, buf.buf, buf.len);

Dying here is a change in behavior which callers might not be expecting. The original code always returned a error because it forked a sub-process to do the trailer processing. Ideally, in a separate commit, we'd update any existing callers that have the message in an strbuf so they don't have to write it to a file just to add some trailers to it.

+	strbuf_release(&buf);
+	return 0;
+ }

As I said above reusing the existing code as you have done here is a much better approach. However it would be much easier to review if the code movement was separated from the refactoring. I'm also struggling to see the benefit of a lot of the refactoring - I was expecting the conversion to use an strubf would essentially look like fwrite() being replaced with strbuf_add() and fprintf() being replaced with strbuf_addf() etc. rather than reworking the logic.

Thanks


Phillip

diff --git a/trailer.h b/trailer.h
index 4740549586..01f711fb13 100644
--- a/trailer.h
+++ b/trailer.h
@@ -196,10 +196,22 @@ int trailer_iterator_advance(struct trailer_iterator *iter);
  void trailer_iterator_release(struct trailer_iterator *iter);
/*
- * Augment a file to add trailers to it by running git-interpret-trailers.
- * This calls run_command() and its return value is the same (i.e. 0 for
- * success, various non-zero for other errors). See run-command.h.
+ * Augment a file to add trailers to it (similar to 'git interpret-trailers').
+ * Returns 0 on success or a non-zero error code on failure.
   */
  int amend_file_with_trailers(const char *path, const struct strvec *trailer_args);
+/*
+ * Process trailer lines for a commit message in-memory.
+ * @opts: trailer processing options (e.g. from parse-options)
+ * @msg: the input message string
+ * @new_trailer_head: list of new trailers to add (struct new_trailer_item)
+ * @out: strbuf to store the resulting message (must be initialized)
+ *
+ * Returns 0 on success, <0 on error.
+ */
+int trailer_process(const struct process_trailer_options *opts,
+			const char *msg,
+			struct list_head *new_trailer_head,
+			struct strbuf *out);
  #endif /* TRAILER_H */





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux