[PATCH v2 0/4] bulk-checkin: remove global transaction state

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Greetings,

The bulk-checkin subsystem provides an interface to write objects to the
object database in a bulk transaction. The state of an ongoing
transaction is stored across several global variables. This series aims
to remove this global transaction state in favor of storing state in in
`struct object_database`. This is done in preparation for a follow-up
change where the goal is to eventually move these transaction interfaces
into "odb.h".

Changes since V1:

- `index_blob_bulk_checkin()` now assumes that the caller always
  provides a setup `struct odb_transaction`. Callers are adjusted to
  ensure this.
- Functions in bulk-checkin.c now consistently access the repository
  through the provided `odb_transaction`.

Thanks,
-Justin

Justin Tobler (4):
  bulk-checkin: introduce object database transaction structure
  bulk-checkin: remove global transaction state
  bulk-checkin: require transaction for index_blob_bulk_checkin()
  bulk-checkin: use repository variable from transaction

 builtin/add.c            |   5 +-
 builtin/unpack-objects.c |   5 +-
 builtin/update-index.c   |   7 +-
 bulk-checkin.c           | 153 +++++++++++++++++++++------------------
 bulk-checkin.h           |  25 ++++---
 cache-tree.c             |   5 +-
 object-file.c            |  30 +++++---
 odb.h                    |   8 ++
 read-cache.c             |   5 +-
 9 files changed, 142 insertions(+), 101 deletions(-)

Range-diff against v1:
1:  5c9358e0b03 = 1:  5c9358e0b03 bulk-checkin: introduce object database transaction structure
2:  4a1b80a6baf = 2:  4a1b80a6baf bulk-checkin: remove global transaction state
-:  ----------- > 3:  ce329932fdd bulk-checkin: require transaction for index_blob_bulk_checkin()
3:  2ca78c8d343 ! 4:  08e26647915 bulk-checkin: wire repository variable
    @@ Metadata
     Author: Justin Tobler <jltobler@xxxxxxxxx>
     
      ## Commit message ##
    -    bulk-checkin: wire repository variable
    +    bulk-checkin: use repository variable from transaction
     
         The bulk-checkin subsystem depends on `the_repository`. Adapt functions
    -    and call sites to wire the repository variable where needed. The
    -    `USE_THE_REPOSITORY_VARIBALE` is still required as the
    +    and call sites to access the repository through `struct odb_transaction`
    +    instead. The `USE_THE_REPOSITORY_VARIBALE` is still required as the
         `pack_compression_level` and `pack_size_limit_cfg` globals are still
         used.
     
    @@ bulk-checkin.c: struct odb_transaction {
      };
      
     -static void finish_tmp_packfile(struct strbuf *basename,
    -+static void finish_tmp_packfile(struct repository *repo,
    +-				const char *pack_tmp_name,
    +-				struct pack_idx_entry **written_list,
    +-				uint32_t nr_written,
    +-				struct pack_idx_option *pack_idx_opts,
    ++static void finish_tmp_packfile(struct odb_transaction *transaction,
     +				struct strbuf *basename,
    - 				const char *pack_tmp_name,
    - 				struct pack_idx_entry **written_list,
    - 				uint32_t nr_written,
    -@@ bulk-checkin.c: static void finish_tmp_packfile(struct strbuf *basename,
    + 				unsigned char hash[])
      {
    ++	struct bulk_checkin_packfile *state = &transaction->packfile;
    ++	struct repository *repo = transaction->odb->repo;
      	char *idx_tmp_name = NULL;
      
     -	stage_tmp_packfiles(the_repository, basename, pack_tmp_name,
    -+	stage_tmp_packfiles(repo, basename, pack_tmp_name,
    - 			    written_list, nr_written, NULL, pack_idx_opts, hash,
    - 			    &idx_tmp_name);
    +-			    written_list, nr_written, NULL, pack_idx_opts, hash,
    +-			    &idx_tmp_name);
     -	rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name);
    ++	stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
    ++			    state->written, state->nr_written, NULL,
    ++			    &state->pack_idx_opts, hash, &idx_tmp_name);
     +	rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
      
      	free(idx_tmp_name);
      }
      
     -static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
    -+static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state,
    -+					struct repository *repo)
    ++static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
      {
    ++	struct bulk_checkin_packfile *state = &transaction->packfile;
    ++	struct repository *repo = transaction->odb->repo;
      	unsigned char hash[GIT_MAX_RAWSZ];
      	struct strbuf packname = STRBUF_INIT;
    + 
     @@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
      				  CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
      	} else {
    @@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_pack
     -	strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository),
     -		    hash_to_hex(hash));
     -	finish_tmp_packfile(&packname, state->pack_tmp_name,
    -+	strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(repo),
    +-			    state->written, state->nr_written,
    +-			    &state->pack_idx_opts, hash);
    ++	strbuf_addf(&packname, "%s/pack/pack-%s.",
    ++		    transaction->odb->sources->path,
     +		    hash_to_hex_algop(hash, repo->hash_algo));
    -+	finish_tmp_packfile(repo, &packname, state->pack_tmp_name,
    - 			    state->written, state->nr_written,
    - 			    &state->pack_idx_opts, hash);
    ++
    ++	finish_tmp_packfile(transaction, &packname, hash);
      	for (uint32_t i = 0; i < state->nr_written; i++)
    + 		free(state->written[i]);
    + 
     @@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
      
      	strbuf_release(&packname);
    @@ bulk-checkin.c: static void flush_batch_fsync(struct odb_transaction *transactio
      	 */
     -	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository));
     +	strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
    -+		    repo_get_object_directory(transaction->odb->repo));
    ++		    transaction->odb->sources->path);
      	temp = xmks_tempfile(temp_path.buf);
      	fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
      	delete_tempfile(&temp);
    @@ bulk-checkin.c: static void flush_batch_fsync(struct odb_transaction *transactio
      }
      
     -static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
    -+static int already_written(struct bulk_checkin_packfile *state,
    -+			   struct repository *repo, struct object_id *oid)
    ++static int already_written(struct odb_transaction *transaction,
    ++			   struct object_id *oid)
      {
      	/* The object may already exist in the repository */
     -	if (odb_has_object(the_repository->objects, oid,
    -+	if (odb_has_object(repo->objects, oid,
    ++	if (odb_has_object(transaction->odb, oid,
      			   HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
      		return 1;
      
    + 	/* Might want to keep the list sorted */
    +-	for (uint32_t i = 0; i < state->nr_written; i++)
    +-		if (oideq(&state->written[i]->oid, oid))
    ++	for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
    ++		if (oideq(&transaction->packfile.written[i]->oid, oid))
    + 			return 1;
    + 
    + 	/* This is a new object we need to keep */
     @@ bulk-checkin.c: static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
    + }
      
      /* Lazily create backing packfile for the state */
    - static void prepare_to_stream(struct bulk_checkin_packfile *state,
    -+			      struct repository *repo,
    +-static void prepare_to_stream(struct bulk_checkin_packfile *state,
    ++static void prepare_to_stream(struct odb_transaction *transaction,
      			      unsigned flags)
      {
    ++	struct bulk_checkin_packfile *state = &transaction->packfile;
      	if (!(flags & INDEX_WRITE_OBJECT) || state->f)
      		return;
      
     -	state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name);
    -+	state->f = create_tmp_packfile(repo, &state->pack_tmp_name);
    ++	state->f = create_tmp_packfile(transaction->odb->repo,
    ++				       &state->pack_tmp_name);
      	reset_pack_idx_option(&state->pack_idx_opts);
      
      	/* Pretend we are going to write only one object */
     @@ bulk-checkin.c: static void prepare_to_stream(struct bulk_checkin_packfile *state,
    + 		die_errno("unable to write pack header");
      }
      
    - static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    -+				struct repository *repo,
    - 				struct object_id *result_oid,
    - 				int fd, size_t size,
    - 				const char *path, unsigned flags)
    +-static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    +-				struct object_id *result_oid,
    +-				int fd, size_t size,
    +-				const char *path, unsigned flags)
    ++int index_blob_bulk_checkin(struct odb_transaction *transaction,
    ++			    struct object_id *result_oid,
    ++			    int fd, size_t size,
    ++			    const char *path, unsigned flags)
    + {
    ++	struct bulk_checkin_packfile *state = &transaction->packfile;
    + 	off_t seekback, already_hashed_to;
    + 	struct git_hash_ctx ctx;
    + 	unsigned char obuf[16384];
     @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
      
      	header_len = format_object_header((char *)obuf, sizeof(obuf),
      					  OBJ_BLOB, size);
     -	the_hash_algo->init_fn(&ctx);
    -+	repo->hash_algo->init_fn(&ctx);
    ++	transaction->odb->repo->hash_algo->init_fn(&ctx);
      	git_hash_update(&ctx, obuf, header_len);
      
      	/* Note: idx is non-NULL when we are writing */
    @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
      		CALLOC_ARRAY(idx, 1);
      
     -		prepare_to_stream(state, flags);
    -+		prepare_to_stream(state, repo, flags);
    ++		prepare_to_stream(transaction, flags);
      		hashfile_checkpoint_init(state->f, &checkpoint);
      	}
      
    @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
      
      	while (1) {
     -		prepare_to_stream(state, flags);
    -+		prepare_to_stream(state, repo, flags);
    ++		prepare_to_stream(transaction, flags);
      		if (idx) {
      			hashfile_checkpoint(state->f, &checkpoint);
      			idx->offset = state->offset;
    @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
      		hashfile_truncate(state->f, &checkpoint);
      		state->offset = checkpoint.offset;
     -		flush_bulk_checkin_packfile(state);
    -+		flush_bulk_checkin_packfile(state, repo);
    ++		flush_bulk_checkin_packfile(transaction);
      		if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
      			return error("cannot seek back");
      	}
    @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
      
      	idx->crc32 = crc32_end(state->f);
     -	if (already_written(state, result_oid)) {
    -+	if (already_written(state, repo, result_oid)) {
    ++	if (already_written(transaction, result_oid)) {
      		hashfile_truncate(state->f, &checkpoint);
      		state->offset = checkpoint.offset;
      		free(idx);
    @@ bulk-checkin.c: void fsync_loose_object_bulk_checkin(struct odb_transaction *tra
      }
      
     -int index_blob_bulk_checkin(struct odb_transaction *transaction,
    -+int index_blob_bulk_checkin(struct repository *repo,
    -+			    struct odb_transaction *transaction,
    - 			    struct object_id *oid, int fd, size_t size,
    - 			    const char *path, unsigned flags)
    +-			    struct object_id *oid, int fd, size_t size,
    +-			    const char *path, unsigned flags)
    +-{
    +-	return deflate_blob_to_pack(&transaction->packfile, oid, fd, size, path,
    +-				    flags);
    +-}
    +-
    + struct odb_transaction *begin_odb_transaction(struct object_database *odb)
      {
    - 	int status;
    - 
    - 	if (transaction) {
    --		status = deflate_blob_to_pack(&transaction->packfile, oid, fd,
    --					      size, path, flags);
    -+		status = deflate_blob_to_pack(&transaction->packfile,
    -+					      repo, oid, fd, size, path, flags);
    - 	} else {
    - 		struct bulk_checkin_packfile state = { 0 };
    - 
    --		status = deflate_blob_to_pack(&state, oid, fd, size, path, flags);
    --		flush_bulk_checkin_packfile(&state);
    -+		status = deflate_blob_to_pack(&state, repo, oid, fd, size, path, flags);
    -+		flush_bulk_checkin_packfile(&state, repo);
    - 	}
    - 
    - 	return status;
    + 	if (!odb->transaction) {
     @@ bulk-checkin.c: void flush_odb_transaction(struct odb_transaction *transaction)
      		return;
      
      	flush_batch_fsync(transaction);
     -	flush_bulk_checkin_packfile(&transaction->packfile);
    -+	flush_bulk_checkin_packfile(&transaction->packfile,
    -+				    transaction->odb->repo);
    ++	flush_bulk_checkin_packfile(transaction);
      }
      
      void end_odb_transaction(struct odb_transaction *transaction)
    -
    - ## bulk-checkin.h ##
    -@@ bulk-checkin.h: void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
    -  * binary blobs, they generally do not want to get any conversion, and
    -  * callers should avoid this code path when filters are requested.
    -  */
    --int index_blob_bulk_checkin(struct odb_transaction *transaction,
    -+int index_blob_bulk_checkin(struct repository *repo,
    -+			    struct odb_transaction *transaction,
    - 			    struct object_id *oid, int fd, size_t size,
    - 			    const char *path, unsigned flags);
    - 
    -
    - ## object-file.c ##
    -@@ object-file.c: int index_fd(struct index_state *istate, struct object_id *oid,
    - 		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
    - 				 type, path, flags);
    - 	else
    --		ret = index_blob_bulk_checkin(the_repository->objects->transaction,
    -+		ret = index_blob_bulk_checkin(the_repository,
    -+					      the_repository->objects->transaction,
    - 					      oid, fd, xsize_t(st->st_size),
    - 					      path, flags);
    - 	close(fd);

base-commit: c44beea485f0f2feaf460e2ac87fdd5608d63cf0
-- 
2.51.0





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux