This follows up commit ce125d431a ("submodule: extract path to submodule gitdir func") to resolve gitdir path conflicts, eg. names "a" and "a/b", by encoding the submodule names before the paths are built. Based on previous work by Brandon & all [1]. A custom encoding can become unnecesarily complex, while url-encoding is relatively well-known, however it needs some extending to support case insensitive filesystems and quirks like Windows reserving "COM1" names. Hence why I opted to encode A as _a, B as _b and so on, which also fixes the COM1 case, as suggested in [1]. The current implementation errors out if the encoded name is too long. This can be improved, for e.g. the encoded name could be trimmed as the conflict probability is low at the NAME_MAX length mark, or long names could be sharded into subdirectories in the future. This also fixes the existing affected tests and adds a TODO to cleanup the short-circuit in validate_submodule_git_dir(). A further commit will some more tests to exercise these codepaths. Link: https://lore.kernel.org/git/20180807230637.247200-1-bmwill@xxxxxxxxxx/ [1] Based-on-patch-by: Brandon Williams <bmwill@xxxxxxxxxx> Signed-off-by: Adrian Ratiu <adrian.ratiu@xxxxxxxxxxxxx> --- submodule.c | 65 ++++++++++++++++++++++++------------- t/t7400-submodule-basic.sh | 2 +- t/t7406-submodule-update.sh | 10 +++--- t/t7450-bad-git-dotfiles.sh | 39 ++++++++++++---------- 4 files changed, 69 insertions(+), 47 deletions(-) diff --git a/submodule.c b/submodule.c index bf78636195..722a8e4f2a 100644 --- a/submodule.c +++ b/submodule.c @@ -2271,8 +2271,13 @@ int validate_submodule_git_dir(char *git_dir, const char *submodule_name) if (len <= suffix_len || (p = git_dir + len - suffix_len)[-1] != '/' || strcmp(p, submodule_name)) - BUG("submodule name '%s' not a suffix of git dir '%s'", - submodule_name, git_dir); + /* + * TODO: revisit and cleanup this test short-circuit, because + * submodules with encoded names are expected to take this path. + * Likely just move the invariants to submodule_name_to_gitdir() + * and delete this entire function in a future commit. + */ + return 0; /* * We prevent the contents of sibling submodules' git directories to @@ -2588,30 +2593,26 @@ int submodule_to_gitdir(struct repository *repo, return ret; } +static void strbuf_addstr_case_encode(struct strbuf *dst, const char *src) +{ + for (; *src; src++) { + unsigned char c = *src; + if (c >= 'A' && c <= 'Z') { + strbuf_addch(dst, '_'); + strbuf_addch(dst, c - 'A' + 'a'); + } else { + strbuf_addch(dst, c); + } + } +} + void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r, const char *submodule_name) { - /* - * NEEDSWORK: The current way of mapping a submodule's name to - * its location in .git/modules/ has problems with some naming - * schemes. For example, if a submodule is named "foo" and - * another is named "foo/bar" (whether present in the same - * superproject commit or not - the problem will arise if both - * superproject commits have been checked out at any point in - * time), or if two submodule names only have different cases in - * a case-insensitive filesystem. - * - * There are several solutions, including encoding the path in - * some way, introducing a submodule.<name>.gitdir config in - * .git/config (not .gitmodules) that allows overriding what the - * gitdir of a submodule would be (and teach Git, upon noticing - * a clash, to automatically determine a non-clashing name and - * to write such a config), or introducing a - * submodule.<name>.gitdir config in .gitmodules that repo - * administrators can explicitly set. Nothing has been decided, - * so for now, just append the name at the end of the path. - */ + struct strbuf encoded_sub_name = STRBUF_INIT, tmp = STRBUF_INIT; + size_t base_len, encoded_len; char *gitdir_path, *key; + long name_max; /* Allow config override. */ key = xstrfmt("submodule.%s.gitdirpath", submodule_name); @@ -2632,5 +2633,23 @@ void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r, /* New style (encoded) paths go under submodules/<encoded>. */ strbuf_reset(buf); repo_git_path_append(r, buf, "submodules/"); - strbuf_addstr(buf, submodule_name); + base_len = buf->len; + + /* URL-encode then case case-encode A to _a, B to _b and so on */ + strbuf_addstr_urlencode(&tmp, submodule_name, is_rfc3986_unreserved); + strbuf_addstr_case_encode(&encoded_sub_name, tmp.buf); + strbuf_release(&tmp); + strbuf_addbuf(buf, &encoded_sub_name); + + /* Ensure final path length is below NAME_MAX after encoding */ + name_max = pathconf(buf->buf, _PC_NAME_MAX); + if (name_max == -1) + name_max = NAME_MAX; + + encoded_len = buf->len - base_len; + if (encoded_len >= name_max) + die(_("encoded submodule name '%s' is too long (%zu bytes, limit is %ld)"), + encoded_sub_name.buf, encoded_len, name_max); + + strbuf_release(&encoded_sub_name); } diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index f4d4fb8397..607fdd26bd 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -1047,7 +1047,7 @@ test_expect_success 'recursive relative submodules stay relative' ' cd clone2 && git submodule update --init --recursive && echo "gitdir: ../.git/submodules/sub3" >./sub3/.git_expect && - echo "gitdir: ../../../.git/submodules/sub3/submodules/dirdir/subsub" >./sub3/dirdir/subsub/.git_expect + echo "gitdir: ../../../.git/submodules/sub3/submodules/dirdir%2fsubsub" >./sub3/dirdir/subsub/.git_expect ) && test_cmp clone2/sub3/.git_expect clone2/sub3/.git && test_cmp clone2/sub3/dirdir/subsub/.git_expect clone2/sub3/dirdir/subsub/.git diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index f0c4da1ffa..c44a7e9513 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -864,8 +864,8 @@ test_expect_success 'submodule add places git-dir in superprojects git-dir' ' (cd deeper/submodule && git log > ../../expected ) && - (cd .git/submodules/deeper/submodule && - git log > ../../../../actual + (cd .git/submodules/deeper%2fsubmodule && + git log > ../../../actual ) && test_cmp expected actual ) @@ -882,8 +882,8 @@ test_expect_success 'submodule update places git-dir in superprojects git-dir' ' (cd deeper/submodule && git log > ../../expected ) && - (cd .git/submodules/deeper/submodule && - git log > ../../../../actual + (cd .git/submodules/deeper%2fsubmodule && + git log > ../../../actual ) && test_cmp expected actual ) @@ -899,7 +899,7 @@ test_expect_success 'submodule add places git-dir in superprojects git-dir recur git commit -m "added subsubmodule" && git push origin : ) && - (cd .git/submodules/deeper/submodule/submodules/subsubmodule && + (cd .git/submodules/deeper%2fsubmodule/submodules/subsubmodule && git log > ../../../../../actual ) && git add deeper/submodule && diff --git a/t/t7450-bad-git-dotfiles.sh b/t/t7450-bad-git-dotfiles.sh index 4e2ced3636..27254300f8 100755 --- a/t/t7450-bad-git-dotfiles.sh +++ b/t/t7450-bad-git-dotfiles.sh @@ -15,6 +15,7 @@ Such as: . ./test-lib.sh . "$TEST_DIRECTORY"/lib-pack.sh +. "$TEST_DIRECTORY"/lib-verify-submodule-gitdir-path.sh test_expect_success 'setup' ' git config --global protocol.file.allow always @@ -319,6 +320,8 @@ test_expect_success WINDOWS 'prevent git~1 squatting on Windows' ' fi ' +# TODO: move these nested gitdir tests to another location in a later commit because +# they are not pathological cases anymore: by encoding the gitdir paths do not conflict. test_expect_success 'setup submodules with nested git dirs' ' git init nested && test_commit -C nested nested && @@ -341,35 +344,35 @@ test_expect_success 'setup submodules with nested git dirs' ' ' test_expect_success 'git dirs of sibling submodules must not be nested' ' - test_must_fail git clone --recurse-submodules nested clone 2>err && - test_grep "is inside git dir" err + git clone --recurse-submodules nested clone_nested && + verify_submodule_gitdir_path clone_nested hippo submodules/hippo && + verify_submodule_gitdir_path clone_nested hippo/hooks submodules/hippo%2fhooks ' test_expect_success 'submodule git dir nesting detection must work with parallel cloning' ' - test_must_fail git clone --recurse-submodules --jobs=2 nested clone_parallel 2>err && - cat err && - grep -E "(already exists|is inside git dir|not a git repository)" err && - { - test_path_is_missing .git/submodules/hippo/HEAD || - test_path_is_missing .git/submodules/hippo/hooks/HEAD - } + git clone --recurse-submodules --jobs=2 nested clone_parallel && + verify_submodule_gitdir_path clone_nested hippo submodules/hippo && + verify_submodule_gitdir_path clone_nested hippo/hooks submodules/hippo%2fhooks ' -test_expect_success 'checkout -f --recurse-submodules must not use a nested gitdir' ' - git clone nested nested_checkout && +test_expect_success 'checkout -f --recurse-submodules must corectly handle nested gitdirs' ' + git clone nested clone_recursive_checkout && ( - cd nested_checkout && + cd clone_recursive_checkout && + git submodule init && - git submodule update thing1 && + git submodule update thing1 thing2 && + + # simulate a malicious nested alternate which git should not follow mkdir -p .git/submodules/hippo/hooks/refs && mkdir -p .git/submodules/hippo/hooks/objects/info && echo "../../../../objects" >.git/submodules/hippo/hooks/objects/info/alternates && - echo "ref: refs/heads/master" >.git/submodules/hippo/hooks/HEAD + echo "ref: refs/heads/master" >.git/submodules/hippo/hooks/HEAD && + + git checkout -f --recurse-submodules HEAD ) && - test_must_fail git -C nested_checkout checkout -f --recurse-submodules HEAD 2>err && - cat err && - grep "is inside git dir" err && - test_path_is_missing nested_checkout/thing2/.git + verify_submodule_gitdir_path clone_nested hippo submodules/hippo && + verify_submodule_gitdir_path clone_nested hippo/hooks submodules/hippo%2fhooks ' test_expect_success SYMLINKS,!WINDOWS,!MINGW 'submodule must not checkout into different directory' ' -- 2.50.1.679.gbf363a8fbb.dirty