From: Ezekiel Newren <ezekielnewren@xxxxxxxxx> When no whitespace flags are present use xxhash, for faster hashing, otherwise use DJB2a (which is what xdiff has been using all along). The benchmark below compares my series with version v2.49.0 (built in build_release/ and build_v2.49.0/ respectively), running log commands on linux kernel with 3 different machines. $ BASE=/path/to/git/root // laptop // CPU: 6-core Intel Core i7-8750H (-MT MCP-) speed/min/max: 726/800/4100 MHz $ hyperfine --warmup 3 -L exe $BASE/build_release/git,$BASE/build_v2.49.0/git '{exe} log --oneline --shortstat v6.8..v6.9 >/dev/null' Benchmark 1: /home/ezekiel/development/work/git/build_release/git log --oneline --shortstat v6.8..v6.9 >/dev/null Time (mean ± σ): 10.419 s ± 0.166 s [User: 10.097 s, System: 0.284 s] Range (min … max): 10.215 s … 10.680 s 10 runs Benchmark 2: /home/ezekiel/development/work/git/build_v2.49.0/git log --oneline --shortstat v6.8..v6.9 >/dev/null Time (mean ± σ): 10.980 s ± 0.137 s [User: 10.633 s, System: 0.308 s] Range (min … max): 10.791 s … 11.178 s 10 runs Summary /home/ezekiel/development/work/git/build_release/git log --oneline --shortstat v6.8..v6.9 >/dev/null ran 1.05 ± 0.02 times faster than /home/ezekiel/development/work/git/build_v2.49.0/git log --oneline --shortstat v6.8..v6.9 >/dev/null // desktop // CPU: 8-core Intel Core i7-9700 (-MCP-) speed/min/max: 800/800/4700 MHz $ hyperfine --warmup 3 -L exe $BASE/build_release/git,$BASE/build_v2.49.0/git '{exe} log --oneline --shortstat v6.8..v6.9 >/dev/null' Benchmark 1: /home/steamuser/dev/git/build_release/git log --oneline --shortstat v6.8..v6.9 >/dev/null Time (mean ± σ): 6.823 s ± 0.020 s [User: 6.624 s, System: 0.180 s] Range (min … max): 6.801 s … 6.858 s 10 runs Benchmark 2: /home/steamuser/dev/git/build_v2.49.0/git log --oneline --shortstat v6.8..v6.9 >/dev/null Time (mean ± σ): 8.151 s ± 0.024 s [User: 7.928 s, System: 0.198 s] Range (min … max): 8.105 s … 8.184 s 10 runs Summary /home/steamuser/dev/git/build_release/git log --oneline --shortstat v6.8..v6.9 >/dev/null ran 1.19 ± 0.01 times faster than /home/steamuser/dev/git/build_v2.49.0/git log --oneline --shortstat v6.8..v6.9 >/dev/null // router // CPU: dual core Intel Celeron 3965U (-MCP-) speed/min/max: 1300/400/2200 MHz $ hyperfine --warmup 3 -L exe $BASE/build_release/git,$BASE/build_v2.49.0/git '{exe} log --oneline --shortstat v6.8..v6.9 >/dev/null' Benchmark 1: /home/metal/dev/git/build_release/git log --oneline --shortstat v6.8..v6.9 >/dev/null Time (mean ± σ): 21.209 s ± 0.054 s [User: 20.341 s, System: 0.605 s] Range (min … max): 21.135 s … 21.309 s 10 runs Benchmark 2: /home/metal/dev/git/build_v2.49.0/git log --oneline --shortstat v6.8..v6.9 >/dev/null Time (mean ± σ): 23.683 s ± 0.060 s [User: 22.735 s, System: 0.672 s] Range (min … max): 23.566 s … 23.751 s 10 runs Summary /home/metal/dev/git/build_release/git log --oneline --shortstat v6.8..v6.9 >/dev/null ran 1.12 ± 0.00 times faster than /home/metal/dev/git/build_v2.49.0/git log --oneline --shortstat v6.8..v6.9 >/dev/null Signed-off-by: Ezekiel Newren <ezekielnewren@xxxxxxxxx> --- rust/Cargo.lock | 7 +++++++ rust/xdiff/Cargo.toml | 1 + rust/xdiff/src/lib.rs | 7 +++++++ xdiff/xprepare.c | 19 +++++++++++++++++-- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index fb1eac690b39..5f84617b1049 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -11,4 +11,11 @@ name = "xdiff" version = "0.1.0" dependencies = [ "interop", + "xxhash-rust", ] + +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" diff --git a/rust/xdiff/Cargo.toml b/rust/xdiff/Cargo.toml index eb7966aada64..1516e829db18 100644 --- a/rust/xdiff/Cargo.toml +++ b/rust/xdiff/Cargo.toml @@ -13,3 +13,4 @@ crate-type = ["staticlib", "rlib"] [dependencies] interop = { path = "../interop" } +xxhash-rust = { version = "0.8.15", features = ["xxh3"] } diff --git a/rust/xdiff/src/lib.rs b/rust/xdiff/src/lib.rs index e69de29bb2d1..96975975a1ba 100644 --- a/rust/xdiff/src/lib.rs +++ b/rust/xdiff/src/lib.rs @@ -0,0 +1,7 @@ + + +#[no_mangle] +unsafe extern "C" fn xxh3_64(ptr: *const u8, size: usize) -> u64 { + let slice = std::slice::from_raw_parts(ptr, size); + xxhash_rust::xxh3::xxh3_64(slice) +} diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index c44005e9bbb8..5a2e52f102cf 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -160,6 +160,9 @@ static void xdl_parse_lines(mmfile_t *mf, long narec, xdfile_t *xdf) { } +extern u64 xxh3_64(u8 const* ptr, usize size); + + static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) { unsigned long *ha; @@ -175,14 +178,26 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ xdl_parse_lines(mf, narec, xdf); + if ((xpp->flags & XDF_WHITESPACE_FLAGS) == 0) { + for (usize i = 0; i < (usize) xdf->nrec; i++) { + xrecord_t *rec = xdf->recs[i]; + rec->ha = xxh3_64(rec->ptr, rec->size); + } + } else { + for (usize i = 0; i < (usize) xdf->nrec; i++) { + xrecord_t *rec = xdf->recs[i]; + char const* dump = (char const*) rec->ptr; + rec->ha = xdl_hash_record(&dump, (char const*) (rec->ptr + rec->size), xpp->flags); + } + } + for (usize i = 0; i < (usize) xdf->nrec; i++) { xrecord_t *rec = xdf->recs[i]; - char const* dump = (char const*) rec->ptr; - rec->ha = xdl_hash_record(&dump, (char const*) (rec->ptr + rec->size), xpp->flags); xdl_classify_record(pass, cf, rec); } + if (!XDL_CALLOC_ARRAY(rchg, xdf->nrec + 2)) goto abort; -- gitgitgadget