The bulk-checkin subsystem provides a mechanism to write blobs directly to a packfile via `index_blob_bulk_checkin()`. If there is an ongoing transaction when invoked, objects written via this function are stored in the same packfile. The packfile is not flushed until the transaction itself is flushed. If there is no transaction, the single object is written to a packfile and immediately flushed. This complicates `index_blob_bulk_checkin()` as it cannot reliably use the provided transaction to get the associated repository. Update `index_blob_bulk_checkin()` to assume that a valid transaction is always provided. Callers are now expected to ensure a transaction is set up beforehand. With this simplification, `deflate_blob_bulk_checkin()` is no longer needed as a standalone internal function and is combined with `index_blob_bulk_checkin()`. The single call site in `object-file.c:index_fd()` is updated accordingly. Due to how `{begin,end}_odb_transaction()` handles nested transactions, a new transaction is only created and committed if there is not already an ongoing transaction. Signed-off-by: Justin Tobler <jltobler@xxxxxxxxx> --- bulk-checkin.c | 27 ++++----------------------- bulk-checkin.h | 7 +++++-- object-file.c | 21 ++++++++++++++------- 3 files changed, 23 insertions(+), 32 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index 53a20a2d92f..542d8125a86 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -254,11 +254,11 @@ static void prepare_to_stream(struct bulk_checkin_packfile *state, die_errno("unable to write pack header"); } -static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, - struct object_id *result_oid, - int fd, size_t size, - const char *path, unsigned flags) +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, size_t size, + const char *path, unsigned flags) { + struct bulk_checkin_packfile *state = &transaction->packfile; off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; @@ -361,25 +361,6 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, } } -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *oid, int fd, size_t size, - const char *path, unsigned flags) -{ - int status; - - if (transaction) { - status = deflate_blob_to_pack(&transaction->packfile, oid, fd, - size, path, flags); - } else { - struct bulk_checkin_packfile state = { 0 }; - - status = deflate_blob_to_pack(&state, oid, fd, size, path, flags); - flush_bulk_checkin_packfile(&state); - } - - return status; -} - struct odb_transaction *begin_odb_transaction(struct object_database *odb) { if (!odb->transaction) { diff --git a/bulk-checkin.h b/bulk-checkin.h index 16254ce6a70..ac8887f476b 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -14,8 +14,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, int fd, const char *filename); /* - * This creates one packfile per large blob unless bulk-checkin - * machinery is "plugged". + * This writes the specified object to a packfile. Objects written here + * during the same transaction are written to the same packfile. The + * packfile is not flushed until the transaction is flushed. The caller + * is expected to ensure a valid transaction is setup for objects to be + * recorded to. * * This also bypasses the usual "convert-to-git" dance, and that is on * purpose. We could write a streaming version of the converting diff --git a/object-file.c b/object-file.c index 1740aa2b2e3..bc15af42450 100644 --- a/object-file.c +++ b/object-file.c @@ -1253,19 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid, * Call xsize_t() only when needed to avoid potentially unnecessary * die() for large files. */ - if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) + if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) { ret = index_stream_convert_blob(istate, oid, fd, path, flags); - else if (!S_ISREG(st->st_mode)) + } else if (!S_ISREG(st->st_mode)) { ret = index_pipe(istate, oid, fd, type, path, flags); - else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || - type != OBJ_BLOB || - (path && would_convert_to_git(istate, path))) + } else if ((st->st_size >= 0 && + (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || + type != OBJ_BLOB || + (path && would_convert_to_git(istate, path))) { ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); - else - ret = index_blob_bulk_checkin(the_repository->objects->transaction, + } else { + struct odb_transaction *transaction; + + transaction = begin_odb_transaction(the_repository->objects); + ret = index_blob_bulk_checkin(transaction, oid, fd, xsize_t(st->st_size), path, flags); + end_odb_transaction(transaction); + } + close(fd); return ret; } -- 2.51.0