Greetings, The bulk-checkin subsystem provides an interface to write objects to the object database in a bulk transaction. The state of an ongoing transaction is stored across several global variables. This series aims to remove this global transaction state in favor of storing state in in `struct object_database`. This is done in preparation for a follow-up change where the goal is to eventually move these transaction interfaces into "odb.h". Changes since V1: - `index_blob_bulk_checkin()` now assumes that the caller always provides a setup `struct odb_transaction`. Callers are adjusted to ensure this. - Functions in bulk-checkin.c now consistently access the repository through the provided `odb_transaction`. Thanks, -Justin Justin Tobler (4): bulk-checkin: introduce object database transaction structure bulk-checkin: remove global transaction state bulk-checkin: require transaction for index_blob_bulk_checkin() bulk-checkin: use repository variable from transaction builtin/add.c | 5 +- builtin/unpack-objects.c | 5 +- builtin/update-index.c | 7 +- bulk-checkin.c | 153 +++++++++++++++++++++------------------ bulk-checkin.h | 25 ++++--- cache-tree.c | 5 +- object-file.c | 30 +++++--- odb.h | 8 ++ read-cache.c | 5 +- 9 files changed, 142 insertions(+), 101 deletions(-) Range-diff against v1: 1: 5c9358e0b03 = 1: 5c9358e0b03 bulk-checkin: introduce object database transaction structure 2: 4a1b80a6baf = 2: 4a1b80a6baf bulk-checkin: remove global transaction state -: ----------- > 3: ce329932fdd bulk-checkin: require transaction for index_blob_bulk_checkin() 3: 2ca78c8d343 ! 4: 08e26647915 bulk-checkin: wire repository variable @@ Metadata Author: Justin Tobler <jltobler@xxxxxxxxx> ## Commit message ## - bulk-checkin: wire repository variable + bulk-checkin: use repository variable from transaction The bulk-checkin subsystem depends on `the_repository`. Adapt functions - and call sites to wire the repository variable where needed. The - `USE_THE_REPOSITORY_VARIBALE` is still required as the + and call sites to access the repository through `struct odb_transaction` + instead. The `USE_THE_REPOSITORY_VARIBALE` is still required as the `pack_compression_level` and `pack_size_limit_cfg` globals are still used. @@ bulk-checkin.c: struct odb_transaction { }; -static void finish_tmp_packfile(struct strbuf *basename, -+static void finish_tmp_packfile(struct repository *repo, +- const char *pack_tmp_name, +- struct pack_idx_entry **written_list, +- uint32_t nr_written, +- struct pack_idx_option *pack_idx_opts, ++static void finish_tmp_packfile(struct odb_transaction *transaction, + struct strbuf *basename, - const char *pack_tmp_name, - struct pack_idx_entry **written_list, - uint32_t nr_written, -@@ bulk-checkin.c: static void finish_tmp_packfile(struct strbuf *basename, + unsigned char hash[]) { ++ struct bulk_checkin_packfile *state = &transaction->packfile; ++ struct repository *repo = transaction->odb->repo; char *idx_tmp_name = NULL; - stage_tmp_packfiles(the_repository, basename, pack_tmp_name, -+ stage_tmp_packfiles(repo, basename, pack_tmp_name, - written_list, nr_written, NULL, pack_idx_opts, hash, - &idx_tmp_name); +- written_list, nr_written, NULL, pack_idx_opts, hash, +- &idx_tmp_name); - rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name); ++ stage_tmp_packfiles(repo, basename, state->pack_tmp_name, ++ state->written, state->nr_written, NULL, ++ &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); free(idx_tmp_name); } -static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) -+static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state, -+ struct repository *repo) ++static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) { ++ struct bulk_checkin_packfile *state = &transaction->packfile; ++ struct repository *repo = transaction->odb->repo; unsigned char hash[GIT_MAX_RAWSZ]; struct strbuf packname = STRBUF_INIT; + @@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); } else { @@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_pack - strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository), - hash_to_hex(hash)); - finish_tmp_packfile(&packname, state->pack_tmp_name, -+ strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(repo), +- state->written, state->nr_written, +- &state->pack_idx_opts, hash); ++ strbuf_addf(&packname, "%s/pack/pack-%s.", ++ transaction->odb->sources->path, + hash_to_hex_algop(hash, repo->hash_algo)); -+ finish_tmp_packfile(repo, &packname, state->pack_tmp_name, - state->written, state->nr_written, - &state->pack_idx_opts, hash); ++ ++ finish_tmp_packfile(transaction, &packname, hash); for (uint32_t i = 0; i < state->nr_written; i++) + free(state->written[i]); + @@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) strbuf_release(&packname); @@ bulk-checkin.c: static void flush_batch_fsync(struct odb_transaction *transactio */ - strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository)); + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", -+ repo_get_object_directory(transaction->odb->repo)); ++ transaction->odb->sources->path); temp = xmks_tempfile(temp_path.buf); fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); delete_tempfile(&temp); @@ bulk-checkin.c: static void flush_batch_fsync(struct odb_transaction *transactio } -static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) -+static int already_written(struct bulk_checkin_packfile *state, -+ struct repository *repo, struct object_id *oid) ++static int already_written(struct odb_transaction *transaction, ++ struct object_id *oid) { /* The object may already exist in the repository */ - if (odb_has_object(the_repository->objects, oid, -+ if (odb_has_object(repo->objects, oid, ++ if (odb_has_object(transaction->odb, oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return 1; + /* Might want to keep the list sorted */ +- for (uint32_t i = 0; i < state->nr_written; i++) +- if (oideq(&state->written[i]->oid, oid)) ++ for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) ++ if (oideq(&transaction->packfile.written[i]->oid, oid)) + return 1; + + /* This is a new object we need to keep */ @@ bulk-checkin.c: static int stream_blob_to_pack(struct bulk_checkin_packfile *state, + } /* Lazily create backing packfile for the state */ - static void prepare_to_stream(struct bulk_checkin_packfile *state, -+ struct repository *repo, +-static void prepare_to_stream(struct bulk_checkin_packfile *state, ++static void prepare_to_stream(struct odb_transaction *transaction, unsigned flags) { ++ struct bulk_checkin_packfile *state = &transaction->packfile; if (!(flags & INDEX_WRITE_OBJECT) || state->f) return; - state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name); -+ state->f = create_tmp_packfile(repo, &state->pack_tmp_name); ++ state->f = create_tmp_packfile(transaction->odb->repo, ++ &state->pack_tmp_name); reset_pack_idx_option(&state->pack_idx_opts); /* Pretend we are going to write only one object */ @@ bulk-checkin.c: static void prepare_to_stream(struct bulk_checkin_packfile *state, + die_errno("unable to write pack header"); } - static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, -+ struct repository *repo, - struct object_id *result_oid, - int fd, size_t size, - const char *path, unsigned flags) +-static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, +- struct object_id *result_oid, +- int fd, size_t size, +- const char *path, unsigned flags) ++int index_blob_bulk_checkin(struct odb_transaction *transaction, ++ struct object_id *result_oid, ++ int fd, size_t size, ++ const char *path, unsigned flags) + { ++ struct bulk_checkin_packfile *state = &transaction->packfile; + off_t seekback, already_hashed_to; + struct git_hash_ctx ctx; + unsigned char obuf[16384]; @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, header_len = format_object_header((char *)obuf, sizeof(obuf), OBJ_BLOB, size); - the_hash_algo->init_fn(&ctx); -+ repo->hash_algo->init_fn(&ctx); ++ transaction->odb->repo->hash_algo->init_fn(&ctx); git_hash_update(&ctx, obuf, header_len); /* Note: idx is non-NULL when we are writing */ @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st CALLOC_ARRAY(idx, 1); - prepare_to_stream(state, flags); -+ prepare_to_stream(state, repo, flags); ++ prepare_to_stream(transaction, flags); hashfile_checkpoint_init(state->f, &checkpoint); } @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st while (1) { - prepare_to_stream(state, flags); -+ prepare_to_stream(state, repo, flags); ++ prepare_to_stream(transaction, flags); if (idx) { hashfile_checkpoint(state->f, &checkpoint); idx->offset = state->offset; @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(state); -+ flush_bulk_checkin_packfile(state, repo); ++ flush_bulk_checkin_packfile(transaction); if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) return error("cannot seek back"); } @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st idx->crc32 = crc32_end(state->f); - if (already_written(state, result_oid)) { -+ if (already_written(state, repo, result_oid)) { ++ if (already_written(transaction, result_oid)) { hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; free(idx); @@ bulk-checkin.c: void fsync_loose_object_bulk_checkin(struct odb_transaction *tra } -int index_blob_bulk_checkin(struct odb_transaction *transaction, -+int index_blob_bulk_checkin(struct repository *repo, -+ struct odb_transaction *transaction, - struct object_id *oid, int fd, size_t size, - const char *path, unsigned flags) +- struct object_id *oid, int fd, size_t size, +- const char *path, unsigned flags) +-{ +- return deflate_blob_to_pack(&transaction->packfile, oid, fd, size, path, +- flags); +-} +- + struct odb_transaction *begin_odb_transaction(struct object_database *odb) { - int status; - - if (transaction) { -- status = deflate_blob_to_pack(&transaction->packfile, oid, fd, -- size, path, flags); -+ status = deflate_blob_to_pack(&transaction->packfile, -+ repo, oid, fd, size, path, flags); - } else { - struct bulk_checkin_packfile state = { 0 }; - -- status = deflate_blob_to_pack(&state, oid, fd, size, path, flags); -- flush_bulk_checkin_packfile(&state); -+ status = deflate_blob_to_pack(&state, repo, oid, fd, size, path, flags); -+ flush_bulk_checkin_packfile(&state, repo); - } - - return status; + if (!odb->transaction) { @@ bulk-checkin.c: void flush_odb_transaction(struct odb_transaction *transaction) return; flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(&transaction->packfile); -+ flush_bulk_checkin_packfile(&transaction->packfile, -+ transaction->odb->repo); ++ flush_bulk_checkin_packfile(transaction); } void end_odb_transaction(struct odb_transaction *transaction) - - ## bulk-checkin.h ## -@@ bulk-checkin.h: void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, - * binary blobs, they generally do not want to get any conversion, and - * callers should avoid this code path when filters are requested. - */ --int index_blob_bulk_checkin(struct odb_transaction *transaction, -+int index_blob_bulk_checkin(struct repository *repo, -+ struct odb_transaction *transaction, - struct object_id *oid, int fd, size_t size, - const char *path, unsigned flags); - - - ## object-file.c ## -@@ object-file.c: int index_fd(struct index_state *istate, struct object_id *oid, - ret = index_core(istate, oid, fd, xsize_t(st->st_size), - type, path, flags); - else -- ret = index_blob_bulk_checkin(the_repository->objects->transaction, -+ ret = index_blob_bulk_checkin(the_repository, -+ the_repository->objects->transaction, - oid, fd, xsize_t(st->st_size), - path, flags); - close(fd); base-commit: c44beea485f0f2feaf460e2ac87fdd5608d63cf0 -- 2.51.0