Re: [GSoC PATCH] pathspec: fix sign comparison warnings

Karthik Nayak <karthik.188@xxxxxxxxx> · Thu, 13 Mar 2025 08:52:58 -0400

Arnav Bhate <bhatearnav@xxxxxxxxx> writes:

> There are multiple places, especially in loops, where a signed and an
> unsigned data type are compared. Git uses a mix of signed and unsigned
> types to store lengths of arrays. This sometimes leads to using a signed
> index for an array whose length is stored in an unsigned variable or
> vice versa.
>
> Replace signed data types with unsigned data types and vice versa
> wherever necessary. In some cases, introduce a new variable, where both
> signed and unsigned data types have been used to store lengths of arrays
> in the same function, where previously only one variable was used to
> iterate over both types. In cases where this is not possible, add
> appropriate cast. Remove #define DISABLE_SIGN_COMPARE_WARNINGS.
>
> Signed-off-by: Arnav Bhate <bhatearnav@xxxxxxxxx>
> ---
>  pathspec.c | 26 +++++++++++++-------------
>  1 file changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/pathspec.c b/pathspec.c
> index 89663645e1..fd7dfdfd84 100644
> --- a/pathspec.c
> +++ b/pathspec.c
> @@ -1,5 +1,4 @@
>  #define USE_THE_REPOSITORY_VARIABLE
> -#define DISABLE_SIGN_COMPARE_WARNINGS
>
>  #include "git-compat-util.h"
>  #include "abspath.h"
> @@ -36,6 +35,7 @@ void add_pathspec_matches_against_index(const struct pathspec *pathspec,
>  					enum ps_skip_worktree_action sw_action)
>  {
>  	int num_unmatched = 0, i;
> +	unsigned int j;
>
>  	/*
>  	 * Since we are walking the index as if we were walking the directory,
> @@ -48,8 +48,8 @@ void add_pathspec_matches_against_index(const struct pathspec *pathspec,
>  			num_unmatched++;
>  	if (!num_unmatched)
>  		return;
> -	for (i = 0; i < istate->cache_nr; i++) {
> -		const struct cache_entry *ce = istate->cache[i];
> +	for (j = 0; j < istate->cache_nr; j++) {
> +		const struct cache_entry *ce = istate->cache[j];
>  		if (sw_action == PS_IGNORE_SKIP_WORKTREE &&
>  		    (ce_skip_worktree(ce) || !path_in_sparse_checkout(ce->name, istate)))
>  			continue;

While this is correct, now we have 'i' & 'j' as iteration variables,
generally this is used in O(n^2) loops to define the outer and inner
loops. Here, however, we use it to simply define two different types. I
find this deviation from convention a little confusing.

Perhaps, we could simply utilize the option of intializing loop
variables in the loop itself?

  diff --git a/pathspec.c b/pathspec.c
  index 89663645e1..ff8854afb8 100644
  --- a/pathspec.c
  +++ b/pathspec.c
  @@ -35,7 +35,7 @@ void add_pathspec_matches_against_index(const
struct pathspec *pathspec,
   					char *seen,
   					enum ps_skip_worktree_action sw_action)
   {
  -	int num_unmatched = 0, i;
  +	int num_unmatched = 0;

   	/*
   	 * Since we are walking the index as if we were walking the directory,
  @@ -43,12 +43,12 @@ void add_pathspec_matches_against_index(const
struct pathspec *pathspec,
   	 * mistakenly think that the user gave a pathspec that did not match
   	 * anything.
   	 */
  -	for (i = 0; i < pathspec->nr; i++)
  +	for (int i = 0; i < pathspec->nr; i++)
   		if (!seen[i])
   			num_unmatched++;
   	if (!num_unmatched)
   		return;
  -	for (i = 0; i < istate->cache_nr; i++) {
  +	for (unsigned int i = 0; i < istate->cache_nr; i++) {
   		const struct cache_entry *ce = istate->cache[i];
   		if (sw_action == PS_IGNORE_SKIP_WORKTREE &&
   		    (ce_skip_worktree(ce) || !path_in_sparse_checkout(ce->name, istate)))

This would read much cleaner and also avoid two different loop
variables. WDYT?

Also a bigger question is, shouldn't the type of `pathspec.nr` and
'istate.cache_nr' be the actual change required? Shouldn't they be set
to 'size_t'?

> @@ -78,7 +78,7 @@ char *find_pathspecs_matching_skip_worktree(const struct pathspec *pathspec)
>  {
>  	struct index_state *istate = the_repository->index;
>  	char *seen = xcalloc(pathspec->nr, 1);
> -	int i;
> +	unsigned int i;
>

Nit: We could also drop this and move the initialization to the line
below.

>  	for (i = 0; i < istate->cache_nr; i++) {
>  		struct cache_entry *ce = istate->cache[i];
> @@ -130,7 +130,7 @@ static void prefix_magic(struct strbuf *sb, int prefixlen,
>  	if (element[1] != '(') {
>  		/* Process an element in shorthand form (e.g. ":!/<match>") */
>  		strbuf_addstr(sb, ":(");
> -		for (int i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
> +		for (unsigned int i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
>  			if ((magic & pathspec_magic[i].bit) &&
>  			    pathspec_magic[i].mnemonic) {
>  				if (sb->buf[sb->len - 1] != '(')

Shouldn't we use 'size_t' for this, since we're iterating over the
elements of an array?

> @@ -341,7 +341,7 @@ static const char *parse_long_magic(unsigned *magic, int *prefix_len,
>
>  	for (pos = elem + 2; *pos && *pos != ')'; pos = nextat) {
>  		size_t len = strcspn_escaped(pos, ",)");
> -		int i;
> +		unsigned int i;
>

This too should be 'size_t'.

>  		if (pos[len] == ',')
>  			nextat = pos + len + 1; /* handle ',' */
> @@ -354,7 +354,7 @@ static const char *parse_long_magic(unsigned *magic, int *prefix_len,
>  		if (starts_with(pos, "prefix:")) {
>  			char *endptr;
>  			*prefix_len = strtol(pos + 7, &endptr, 10);
> -			if (endptr - pos != len)
> +			if ((size_t)(endptr - pos) != len)
>  				die(_("invalid parameter for pathspec magic 'prefix'"));
>  			continue;
>  		}

This makes sense. But is it guaranteed that `endptr - pos` is greater
than 0?

> @@ -400,7 +400,7 @@ static const char *parse_short_magic(unsigned *magic, const char *elem)
>
>  	for (pos = elem + 1; *pos && *pos != ':'; pos++) {
>  		char ch = *pos;
> -		int i;
> +		unsigned int i;
>

This too, should be 'size_t'

>  		/* Special case alias for '!' */
>  		if (ch == '^') {
> @@ -564,7 +564,7 @@ static int pathspec_item_cmp(const void *a_, const void *b_)
>
>  void pathspec_magic_names(unsigned magic, struct strbuf *out)
>  {
> -	int i;
> +	unsigned int i;

This can be inlined and made 'size_t'.

>  	for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
>  		const struct pathspec_magic *m = pathspec_magic + i;
>  		if (!(magic & m->bit))
> @@ -803,8 +803,8 @@ int match_pathspec_attrs(struct index_state *istate,
>  int pathspec_needs_expanded_index(struct index_state *istate,
>  				  const struct pathspec *pathspec)
>  {
> -	unsigned int i, pos;
> -	int res = 0;
> +	unsigned int pos;
> +	int i, res = 0;
>  	char *skip_worktree_seen = NULL;
>

This can be inlined, but this change is done to match 'pathspec.nr''s
type. This goes to my earlier question, I would say we first need to
modify 'pathspec.nr' itself to be 'size_t'.

>  	/*
> @@ -845,7 +845,7 @@ int pathspec_needs_expanded_index(struct index_state *istate,
>  			 * - not-in-cone/bar*: may need expanded index
>  			 * - **.c: may need expanded index
>  			 */
> -			if (strspn(item.original + item.nowildcard_len, "*") == item.len - item.nowildcard_len &&
> +			if (strspn(item.original + item.nowildcard_len, "*") == (unsigned int)(item.len - item.nowildcard_len) &&
>  			    path_in_cone_mode_sparse_checkout(item.original, istate))
>  				continue;
>

Similar here, I see the types of 'item.len' and 'item.nowwildcard_len'
are 'int'. Do they need to be 'size_t'?

> @@ -860,7 +860,7 @@ int pathspec_needs_expanded_index(struct index_state *istate,
>  				 * directory name and the sparse directory is the first
>  				 * component of the pathspec, need to expand the index.
>  				 */
> -				if (item.nowildcard_len > ce_namelen(ce) &&
> +				if ((unsigned int)item.nowildcard_len > ce_namelen(ce) &&
>  				    !strncmp(item.original, ce->name, ce_namelen(ce))) {
>  					res = 1;
>  					break;
> --
> 2.48.1

Same question as above!
Attachment:
signature.asc

Description: PGP signature