Re: [PATCH v3 1/2] trailer: append trailers in-process and drop the fork to `interpret-trailers`

Li Chen <me@linux.beauty> · Thu, 07 Aug 2025 10:45:04 +0800

Hi Phillip,

 ---- On Tue, 05 Aug 2025 21:17:01 +0800  Phillip Wood <phillip.wood123@xxxxxxxxx> wrote --- 
 > Hi Li
 > 
 > On 03/08/2025 16:00, Li Chen wrote:
 > > From: Li Chen <chenl311@xxxxxxxxxxxxxxx>
 > > 
 > > All trailer insertion now funnels through trailer_process():
 > > 
 > > * builtin/interpret-trailers.c is reduced to file I/O + a single call.
 > > * amend_file_with_trailers() shares the same path; the old
 > >    amend_strbuf_with_trailers() helper is dropped.
 > > * New helpers parse_trailer_args()/free_new_trailer_list() convert
 > >    --trailer=... strings to new_trailer_item lists.
 > > 
 > > Behaviour is unchanged; the full test-suite still passes, and the
 > > fork/exec is gone.
 > 
 > Normally commit messages should be written in prose rather than a bullet 
 > list and the message should explain the reason for the change.
 > 
 > This patch has much less code duplication than the last iteration which 
 > is most welcome. Whenever you are moving and refactoring code you should 
 > split the move into its own commit followed by the refactoring. That 
 > makes it much easier to review as the reviewer can clearly see the 
 > refactoring rather than having to manually compare the added code in one 
 > file to the deleted code in another.

I apologize for this, and I will add new commits to resolve all issues in the next versions.

 > > @@ -84,6 +83,7 @@ static int parse_opt_parse(const struct option *opt, const char *arg,
 > >                  int unset)
 > >   {
 > >       struct process_trailer_options *v = opt->value;
 > > +
 > 
 > Let's not clutter this patch with unrelated changes.
 > 
 > >       v->only_trailers = 1;
 > >       v->only_input = 1;
 > >       v->unfold = 1;
 > > @@ -92,37 +92,6 @@ static int parse_opt_parse(const struct option *opt, const char *arg,
 > >       return 0;
 > >   }
 > >   
 > > -static FILE *create_in_place_tempfile(const char *file)
 > > -{
 > > [...]
 > > -}
 > 
 > We don't need to create a temporary file anymore so this can be deleted 
 > - good.
 > 
 > > -static void interpret_trailers(const struct process_trailer_options *opts,
 > > -                   struct list_head *new_trailer_head,
 > > -                   const char *file)
 > > -{
 > > -    LIST_HEAD(head);
 > > -    struct strbuf sb = STRBUF_INIT;
 > > -    struct strbuf trailer_block_sb = STRBUF_INIT;
 > > -    struct trailer_block *trailer_block;
 > > -    FILE *outfile = stdout;
 > > -
 > > -    trailer_config_init();
 > > -
 > > -    read_input_file(&sb, file);
 > > -
 > > -    if (opts->in_place)
 > > -        outfile = create_in_place_tempfile(file);
 > > -
 > > -    trailer_block = parse_trailers(opts, sb.buf, &head);
 > > -
 > > -    /* Print the lines before the trailer block */
 > > -    if (!opts->only_trailers)
 > > -        fwrite(sb.buf, 1, trailer_block_start(trailer_block), outfile);
 > > -
 > > -    if (!opts->only_trailers && !blank_line_before_trailer_block(trailer_block))
 > > -        fprintf(outfile, "\n");
 > > -
 > > -
 > > -    if (!opts->only_input) {
 > > -        LIST_HEAD(config_head);
 > > -        LIST_HEAD(arg_head);
 > > -        parse_trailers_from_config(&config_head);
 > > -        parse_trailers_from_command_line_args(&arg_head, new_trailer_head);
 > > -        list_splice(&config_head, &arg_head);
 > > -        process_trailers_lists(&head, &arg_head);
 > > -    }
 > > -
 > > -    /* Print trailer block. */
 > > -    format_trailers(opts, &head, &trailer_block_sb);
 > > -    free_trailers(&head);
 > > -    fwrite(trailer_block_sb.buf, 1, trailer_block_sb.len, outfile);
 > > -    strbuf_release(&trailer_block_sb);
 > > -
 > > -    /* Print the lines after the trailer block as is. */
 > > -    if (!opts->only_trailers)
 > > -        fwrite(sb.buf + trailer_block_end(trailer_block), 1,
 > > -               sb.len - trailer_block_end(trailer_block), outfile);
 > > -    trailer_block_release(trailer_block);
 > > -
 > > -    if (opts->in_place)
 > > -        if (rename_tempfile(&trailers_tempfile, file))
 > > -            die_errno(_("could not rename temporary file to %s"), file);
 > > -
 > > -    strbuf_release(&sb);
 > > -}
 > 
 > This code is moved to trailer.c which is good but it is heavily 
 > refactored at the same time which makes it hard to review. Completely 
 > removing this function leads to some duplication in 
 > cmd_interpret_trailers() which could be avoided by making 
 > interpret_trailers() a wrapper around process_trailers()
 > 
 > >   int cmd_interpret_trailers(int argc,
 > >                  const char **argv,
 > >                  const char *prefix,
 > > @@ -231,14 +145,37 @@ int cmd_interpret_trailers(int argc,
 > >               git_interpret_trailers_usage,
 > >               options);
 > >   
 > > +    trailer_config_init();
 > > +
 > >       if (argc) {
 > >           int i;
 > > -        for (i = 0; i < argc; i++)
 > > -            interpret_trailers(&opts, &trailers, argv[i]);
 > > +        for (i = 0; i < argc; i++) {
 > > +            struct strbuf in_buf = STRBUF_INIT;
 > > +            struct strbuf out_buf = STRBUF_INIT;
 > > +
 > > +            read_input_file(&in_buf, argv[i]);
 > > +            if (trailer_process(&opts, in_buf.buf, &trailers, &out_buf) < 0)
 > > +                die(_("failed to process trailers for %s"), argv[i]);
 > > +            if (opts.in_place)
 > > +                write_file_buf(argv[i], out_buf.buf, out_buf.len);
 > > +            else
 > > +                fwrite(out_buf.buf, 1, out_buf.len, stdout);
 > > +            strbuf_release(&in_buf);
 > > +            strbuf_release(&out_buf);
 > > +        }
 > >       } else {
 > > +        struct strbuf in_buf = STRBUF_INIT;
 > > +        struct strbuf out_buf = STRBUF_INIT;
 > > +
 > >           if (opts.in_place)
 > >               die(_("no input file given for in-place editing"));
 > > -        interpret_trailers(&opts, &trailers, NULL);
 > > +
 > > +        read_input_file(&in_buf, NULL);
 > > +        if (trailer_process(&opts, in_buf.buf, &trailers, &out_buf) < 0)
 > > +            die(_("failed to process trailers"));
 > > +        fwrite(out_buf.buf, 1, out_buf.len, stdout);
 > > +        strbuf_release(&in_buf);
 > > +        strbuf_release(&out_buf);
 > >       }
 > 
 > There is quite a bit of duplication here that could be avoided if you 
 > modified interpret_trailers() to call trailer_process() rather than 
 > deleting it entirely.
 > 
 > >       new_trailers_clear(&trailers);
 > > diff --git a/trailer.c b/trailer.c
 > > index 310cf582dc..03814443c3 100644
 > > --- a/trailer.c
 > > +++ b/trailer.c
 > > @@ -1224,14 +1224,121 @@ void trailer_iterator_release(struct trailer_iterator *iter)
 > >       strbuf_release(&iter->key);
 > >   }
 > >   
 > > -int amend_file_with_trailers(const char *path, const struct strvec *trailer_args)
 > > +static int amend_strbuf_with_trailers(struct strbuf *buf,
 > > +                   const struct strvec *trailer_args)
 > 
 > Function argument declarations should be aligned
 > 
 > >   {
 > > -    struct child_process run_trailer = CHILD_PROCESS_INIT;
 > > -
 > > -    run_trailer.git_cmd = 1;
 > > -    strvec_pushl(&run_trailer.args, "interpret-trailers",
 > > -             "--in-place", "--no-divider",
 > > -             path, NULL);
 > > -    strvec_pushv(&run_trailer.args, trailer_args->v);
 > > -    return run_command(&run_trailer);
 > > +    struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
 > > +    LIST_HEAD(new_trailer_head);
 > > +    struct strbuf out = STRBUF_INIT;
 > > +    size_t i;
 > > +
 > > +    opts.no_divider = 1;
 > > +
 > > +    for (i = 0; i < trailer_args->nr; i++) {
 > > +        const char *arg = trailer_args->v[i];
 > > +        const char *text;
 > > +        struct new_trailer_item *item;
 > 
 > There should be a blank line after the variable declarations at the 
 > start of each block of code.
 > 
 > > +        if (!skip_prefix(arg, "--trailer=", &text))
 > 
 > Why do we need this? It would be much cleaner if we required the caller 
 > to pass a list of trailers without any optional prefix.
 > 
 > > +            text = arg;
 > > +        if (!*text)
 > > +            continue;
 > > +        item = xcalloc(1, sizeof(*item));
 > > +        INIT_LIST_HEAD(&item->list);
 > > +        item->text = text;
 > > +        list_add_tail(&item->list, &new_trailer_head);
 > > +    }
 > > +    if (trailer_process(&opts, buf->buf, &new_trailer_head, &out) < 0)
 > > +        die("failed to process trailers");
 > 
 > As this is library code lets return an error here rather than dying.
 > 
 > > +    strbuf_swap(buf, &out);
 > > +    strbuf_release(&out);
 > > +    while (!list_empty(&new_trailer_head)) {
 > > +        struct new_trailer_item *item =
 > > +            list_first_entry(&new_trailer_head, struct new_trailer_item, list);
 > > +        list_del(&item->list);
 > > +        free(item);
 > > +    }
 > > +    return 0;
 > >   }
 > > +
 > > +int trailer_process(const struct process_trailer_options *opts,
 > > +                   const char *msg,
 > > +                   struct list_head *new_trailer_head,
 > > +                   struct strbuf *out)
 > 
 > Argument alignment again
 > 
 > > +{
 > > +        struct trailer_block *blk;
 > 
 > This is trailer_block in the original but has been re-ordered with 
 > respect to the other variable declarations making the patch harder to 
 > review.
 > 
 > > +        LIST_HEAD(orig_head);
 > 
 > This is head in the original but moved relative to the other variable 
 > declarations
 > 
 > > +        LIST_HEAD(config_head);
 > > +        LIST_HEAD(ar1g_head);
 > 
 > These two have been moved from inside the if (!opts->only_input) below. 
 > They are only referenced there so do not need to be declared here. 
 > Moving them makes this patch harder to review.
 > 
 > > +        struct strbuf trailers_sb = STRBUF_INIT;
 > 
 > This is from the original but moved relative to the other variable 
 > declarations.
 > 
 > > +        int had_trailer_before;
 > 
 > This is new - lets see how it is used. We've just started using bool for 
 > boolean variables in the last few weeks so this could be a bool now.
 > 
 >  From here to
 > 
 > > +        blk = parse_trailers(opts, msg, &orig_head);
 > > +        had_trailer_before = !list_empty(&orig_head);
 > > +        if (!opts->only_input) {
 > > +            parse_trailers_from_config(&config_head);
 > > +            parse_trailers_from_command_line_args(&arg_head, new_trailer_head);
 > > +            list_splice(&config_head, &arg_head);
 > > +            process_trailers_lists(&orig_head, &arg_head);
 > > +        }
 > > +        format_trailers(opts, &orig_head, &trailers_sb);
 > 
 > here is copied from the original minus the code that copied the commit 
 > message to the output file. Rather than deleting the code that copied 
 > the commit message we could have replaced the calls to fwrite() and 
 > fprintf() with strbuf_add() and strbuf_addf() which would make it 
 > obvious that the behavior is not changed. The original then frees 
 > orig_head but that is done later here.
 > 
 > > +        if (!opts->only_trailers && !opts->only_input && !opts->unfold &&
 > > +            !opts->trim_empty && list_empty(&orig_head) &&
 > > +            (list_empty(new_trailer_head) || opts->only_input)) {
 > 
 > I'm not sure what is happening here. By this point the original has 
 > copied the original commit message and is ready to append the new 
 > trailers. Instead the new version seems to have completely refactored 
 > the logic for adding the new trailers making it harder to see if the 
 > behavior has changed.
 > 
 > > +            size_t split = trailer_block_start(blk); /* end-of-log-msg */
 > > +            if (!blank_line_before_trailer_block(blk)) {
 > > +                strbuf_add(out, msg, split);
 > > +                strbuf_addch(out, '\n');
 > > +                strbuf_addstr(out, msg + split);
 > 
 > This copies the original message but adds a newline before the trailer 
 > block if it is missing.
 > 
 > > +            } else
 > > +                strbuf_addstr(out, msg);
 > 
 > This just copies the whole message.
 > 
 > > +            strbuf_rel2ease(&trailers_sb);
 > > +            trailer_block_release(blk);
 > 
 > 
 > > +            return 0;
 > 
 > We return a copy of the original message with no new trailers added. We 
 > do not free orig_head, arg_head or config_head. I'm still confused why 
 > we need to special case this.
 > 
 > 
 > > +        }
 > > +        if (opts->only_trailers) {
 > > +            strbuf_addbuf(out, &trailers_sb);
 > 
 > This flips the logic in the original to handle opts->only_trailers 
 > separately making it harder to review.
 > 
 > > +        } else if (had_trailer_before) {
 > > +            strbuf_add(out, msg, trailer_block_start(blk));
 > > +            if (!blank_line_before_trailer_block(blk))
 > > +                strbuf_addch(out, '\n');
 > > +            strbuf_addbuf(out, &trailers_sb);
 > > +            strbuf_add(out, msg + trailer_block_end(blk),
 > > +                        strlen(msg) - trailer_block_end(blk));
 > 
 > This handles the case where we're replacing the headers in the original 
 > message
 > 
 > > +        }
 > > +        else {
 > 
 > Style - this should be "} else {"
 > 
 > > +            size_t cpos = trailer_block_start(blk);
 > > +            strbuf_add(out, msg, cpos);
 > > +            if (cpos == 0)                     /* empty body → just one \n */
 > > +                strbuf_addch(out, '\n');
 > > +            else if (!blank_line_before_trailer_block(blk))
 > > +                strbuf_addch(out, '\n');   /* body without trailing blank */
 > > +
 > > +            strbuf_addbuf(out, &trailers_sb);
 > > +            strbuf_add(out, msg + cpos, strlen(msg) - cpos);
 > > +       }
 > 
 > I'm confused why we need a separate case for when the original did not 
 > have any trailers - was the original code broken? If it was we should 
 > separate out the bug fix from the refactoring. If not what's the point 
 > of this change?
 > 
 > > +        strbuf_release(&trailers_sb);
 > > +        free_trailers(&orig_head);
 > > +        trailer_block_release(blk);
 > > +        return 0;
 > > +}
 > > +
 > > +int amend_file_with_trailers(const char *path,
 > > +                             const struct strvec *trailer_args)
 > 
 > Alignment again
 > 
 > > +{
 > > +    struct strbuf buf = STRBUF_INIT;
 > > +
 > > +    if (!trailer_args || !trailer_args->nr)
 > > +        return 0;
 > > +
 > > +    if (strbuf_read_file(&buf, path, 0) < 0)
 > > +        return error_errno("could not read '%s'", path);
 > > +
 > > +    if (amend_strbuf_with_trailers(&buf, trailer_args))
 > > +        die("failed to append trailers");
 > 
 > Why return an error() above but die() here? This is library code so lets 
 > return an error.
 > 
 > > +
 > > +    /* `write_file_buf()` aborts on error internally */
 > > +    write_file_buf(path, buf.buf, buf.len);
 > 
 > Dying here is a change in behavior which callers might not be expecting. 
 > The original code always returned a error because it forked a 
 > sub-process to do the trailer processing. Ideally, in a separate commit, 
 > we'd update any existing callers that have the message in an strbuf so 
 > they don't have to write it to a file just to add some trailers to it.
 > 
 > > +    strbuf_release(&buf);
 > > +    return 0;
 > > + }
 > 
 > As I said above reusing the existing code as you have done here is a 
 > much better approach. However it would be much easier to review if the 
 > code movement was separated from the refactoring. I'm also struggling to 
 > see the benefit of a lot of the refactoring - I was expecting the 
 > conversion to use an strubf would essentially look like fwrite() being 
 > replaced with strbuf_add() and fprintf() being replaced with 
 > strbuf_addf() etc. rather than reworking the logic.
 > 
 > Thanks
 > 
 > 
 > Phillip
 > 
 > > diff --git a/trailer.h b/trailer.h
 > > index 4740549586..01f711fb13 100644
 > > --- a/trailer.h
 > > +++ b/trailer.h
 > > @@ -196,10 +196,22 @@ int trailer_iterator_advance(struct trailer_iterator *iter);
 > >   void trailer_iterator_release(struct trailer_iterator *iter);
 > >   
 > >   /*
 > > - * Augment a file to add trailers to it by running git-interpret-trailers.
 > > - * This calls run_command() and its return value is the same (i.e. 0 for
 > > - * success, various non-zero for other errors). See run-command.h.
 > > + * Augment a file to add trailers to it (similar to 'git interpret-trailers').
 > > + * Returns 0 on success or a non-zero error code on failure.
 > >    */
 > >   int amend_file_with_trailers(const char *path, const struct strvec *trailer_args);
 > >   
 > > +/*
 > > + * Process trailer lines for a commit message in-memory.
 > > + * @opts: trailer processing options (e.g. from parse-options)
 > > + * @msg: the input message string
 > > + * @new_trailer_head: list of new trailers to add (struct new_trailer_item)
 > > + * @out: strbuf to store the resulting message (must be initialized)
 > > + *
 > > + * Returns 0 on success, <0 on error.
 > > + */
 > > +int trailer_process(const struct process_trailer_options *opts,
 > > +            const char *msg,
 > > +            struct list_head *new_trailer_head,
 > > +            struct strbuf *out);
 > >   #endif /* TRAILER_H */
 > 
 > 
Regards,

Li