Hi, this patch series is another step on our long road towards not having global state. In addition to that, as commit-graphs are part of the object database layer, this is also another step towards pluggable object databases. Changes in v2: - Use `unsigned` instead of `size_t` to count number of Bloom filters. - Use `uint32_t` instead of `size_t` for number of commit graphs, as this type is also used to iterate through this count already. - Refactor `parse_commit_graph()` to take a repository instead of both repo settings and a hash algo. - Link to v1: https://lore.kernel.org/r/20250804-b4-pks-commit-graph-wo-the-repository-v1-0-850d626eb2e8@xxxxxx Thanks! Patrick --- Patrick Steinhardt (10): trace2: introduce function to trace unsigned integers commit-graph: stop using signed integers to count Bloom filters commit-graph: fix type for some write options commit-graph: fix sign comparison warnings commit-graph: stop using `the_hash_algo` via macros commit-graph: store the hash algorithm instead of its length commit-graph: refactor `parse_commit_graph()` to take a repository commit-graph: stop using `the_hash_algo` commit-graph: stop using `the_repository` commit-graph: stop passing in redundant repository builtin/commit-graph.c | 13 +- builtin/commit.c | 2 +- builtin/merge.c | 2 +- commit-graph.c | 371 +++++++++++++++++++++---------------------- commit-graph.h | 25 ++- oss-fuzz/fuzz-commit-graph.c | 6 +- t/helper/test-read-graph.c | 2 +- trace2.c | 14 ++ trace2.h | 9 ++ 9 files changed, 227 insertions(+), 217 deletions(-) Range-diff versus v1: 1: cb92085a3b = 1: a25e9cdbcc trace2: introduce function to trace unsigned integers 2: 25520448c6 ! 2: e03ca21ec2 commit-graph: stop using signed integers to count bloom filters @@ Metadata Author: Patrick Steinhardt <ps@xxxxxx> ## Commit message ## - commit-graph: stop using signed integers to count bloom filters + commit-graph: stop using signed integers to count Bloom filters When writing a new commit graph we have a couple of counters that - provide statistics around what kind of bloom filters we have or have not + provide statistics around what kind of Bloom filters we have or have not written. These counters naturally count from zero and are only ever incremented, but they use a signed integer as type regardless. - Refactor those fields to be of type `size_t` instead. + Refactor those fields to be unsigned instead. Using an unsigned type + makes it explicit to the reader that they never have to worry about + negative values and thus makes the code easier to understand. Signed-off-by: Patrick Steinhardt <ps@xxxxxx> @@ commit-graph.c: struct write_commit_graph_context { - int count_bloom_filter_trunc_empty; - int count_bloom_filter_trunc_large; - int count_bloom_filter_upgraded; -+ size_t count_bloom_filter_computed; -+ size_t count_bloom_filter_not_computed; -+ size_t count_bloom_filter_trunc_empty; -+ size_t count_bloom_filter_trunc_large; -+ size_t count_bloom_filter_upgraded; ++ unsigned count_bloom_filter_computed; ++ unsigned count_bloom_filter_not_computed; ++ unsigned count_bloom_filter_trunc_empty; ++ unsigned count_bloom_filter_trunc_large; ++ unsigned count_bloom_filter_upgraded; }; static int write_graph_chunk_fanout(struct hashfile *f, 3: 12e150a326 = 3: d569434715 commit-graph: fix type for some write options 4: 0bdaff4e76 ! 4: 3f820e3347 commit-graph: fix sign comparison warnings @@ Commit message quantity, we still return a signed integer that we then later compare with unsigned values. - - The bloom settings hash version is being assigned `-1` even though + - The Bloom settings hash version is being assigned `-1` even though it's an unsigned value. This is used to indicate an unspecified value and relies on 1's complement. @@ commit-graph.c: struct write_commit_graph_context { char *base_graph_name; - int num_commit_graphs_before; - int num_commit_graphs_after; -+ size_t num_commit_graphs_before; -+ size_t num_commit_graphs_after; ++ uint32_t num_commit_graphs_before; ++ uint32_t num_commit_graphs_after; char **commit_graph_filenames_before; char **commit_graph_filenames_after; char **commit_graph_hash_after; 5: 6e5d4da7f1 = 5: c3be366e36 commit-graph: stop using `the_hash_algo` via macros 6: 8e8bf531d1 = 6: 2476994769 commit-graph: store the hash algorithm instead of its length -: ---------- > 7: b582b49437 commit-graph: refactor `parse_commit_graph()` to take a repository 7: 20bce2f981 ! 8: 0d0bd20ceb commit-graph: stop using `the_hash_algo` @@ commit-graph.c: struct commit_graph *load_commit_graph_one_fd_st(struct reposito close(fd); error(_("commit-graph file is too small")); return NULL; -@@ commit-graph.c: struct commit_graph *load_commit_graph_one_fd_st(struct repository *r, - graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); - prepare_repo_settings(r); -- ret = parse_commit_graph(&r->settings, graph_map, graph_size); -+ ret = parse_commit_graph(&r->settings, r->hash_algo, graph_map, graph_size); - - if (ret) - ret->odb_source = source; @@ commit-graph.c: static int graph_read_commit_data(const unsigned char *chunk_start, size_t chunk_size, void *data) { @@ commit-graph.c: static int graph_read_commit_data(const unsigned char *chunk_sta return error(_("commit-graph commit data chunk is wrong size")); g->chunk_commit_data = chunk_start; return 0; -@@ commit-graph.c: static int graph_read_bloom_data(const unsigned char *chunk_start, - } - - struct commit_graph *parse_commit_graph(struct repo_settings *s, -+ const struct git_hash_algo *hash_algo, - void *graph_map, size_t graph_size) - { - const unsigned char *data; -@@ commit-graph.c: struct commit_graph *parse_commit_graph(struct repo_settings *s, - if (!graph_map) - return NULL; - -- if (graph_size < graph_min_size(the_hash_algo)) -+ if (graph_size < graph_min_size(hash_algo)) - return NULL; - - data = (const unsigned char *)graph_map; -@@ commit-graph.c: struct commit_graph *parse_commit_graph(struct repo_settings *s, - } - - hash_version = *(unsigned char*)(data + 5); -- if (hash_version != oid_version(the_hash_algo)) { -+ if (hash_version != oid_version(hash_algo)) { - error(_("commit-graph hash version %X does not match version %X"), -- hash_version, oid_version(the_hash_algo)); -+ hash_version, oid_version(hash_algo)); - return NULL; - } - - graph = alloc_commit_graph(); - -- graph->hash_algo = the_hash_algo; -+ graph->hash_algo = hash_algo; - graph->num_chunks = *(unsigned char*)(data + 6); - graph->data = graph_map; - graph->data_len = graph_size; - - if (graph_size < GRAPH_HEADER_SIZE + - (graph->num_chunks + 1) * CHUNK_TOC_ENTRY_SIZE + -- GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) { -+ GRAPH_FANOUT_SIZE + hash_algo->rawsz) { - error(_("commit-graph file is too small to hold %u chunks"), - graph->num_chunks); - free(graph); @@ commit-graph.c: static int add_graph_to_chain(struct commit_graph *g, } @@ commit-graph.h: struct string_list; /* * Given a commit struct, try to fill the commit struct info, including: -@@ commit-graph.h: struct repo_settings; - * prior to calling parse_commit_graph(). - */ - struct commit_graph *parse_commit_graph(struct repo_settings *s, -+ const struct git_hash_algo *hash_algo, - void *graph_map, size_t graph_size); - - /* - - ## oss-fuzz/fuzz-commit-graph.c ## -@@ - #include "repository.h" - - struct commit_graph *parse_commit_graph(struct repo_settings *s, -+ const struct git_hash_algo *hash_algo, - void *graph_map, size_t graph_size); - - int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); -@@ oss-fuzz/fuzz-commit-graph.c: int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); - the_repository->settings.commit_graph_generation_version = 2; - the_repository->settings.commit_graph_changed_paths_version = 1; -- g = parse_commit_graph(&the_repository->settings, (void *)data, size); -+ g = parse_commit_graph(&the_repository->settings, the_repository->hash_algo, -+ (void *)data, size); - repo_clear(the_repository); - free_commit_graph(g); - 8: 424567998e ! 9: a86c1ab958 commit-graph: stop using `the_repository` @@ commit-graph.c: void git_test_write_commit_graph_or_die(void) die("failed to write commit-graph under GIT_TEST_COMMIT_GRAPH"); } -@@ commit-graph.c: struct commit_graph *parse_commit_graph(struct repo_settings *s, - } - - oidread(&graph->oid, graph->data + graph->data_len - graph->hash_algo->rawsz, -- the_repository->hash_algo); -+ hash_algo); - - free_chunkfile(cf); - return graph; @@ commit-graph.c: static int add_graph_to_chain(struct commit_graph *g, if (!cur_g || !oideq(&oids[n], &cur_g->oid) || 9: cff4bc0329 ! 10: 70a7f6fecf commit-graph: stop passing in redundant repository @@ commit-graph.c: struct commit_graph *load_commit_graph_one_fd_st(struct reposito close(fd); error(_("commit-graph file is too small")); return NULL; - } +@@ commit-graph.c: struct commit_graph *load_commit_graph_one_fd_st(struct repository *r, graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); -- prepare_repo_settings(r); -- ret = parse_commit_graph(&r->settings, r->hash_algo, graph_map, graph_size); -+ prepare_repo_settings(source->odb->repo); -+ ret = parse_commit_graph(&source->odb->repo->settings, source->odb->repo->hash_algo, -+ graph_map, graph_size); +- ret = parse_commit_graph(r, graph_map, graph_size); ++ ret = parse_commit_graph(source->odb->repo, graph_map, graph_size); if (ret) ret->odb_source = source; -@@ commit-graph.c: struct commit_graph *parse_commit_graph(struct repo_settings *s, + else +@@ commit-graph.c: struct commit_graph *parse_commit_graph(struct repository *r, return NULL; } --- base-commit: e813a0200a7121b97fec535f0d0b460b0a33356c change-id: 20250717-b4-pks-commit-graph-wo-the-repository-1dc2cacbc8e3