This patch improves function detection in userdiff for Bash scripts. The old regex tried to match function bodies explicitly, which caused issues with line continuations (`\`) and simple command bodies. Instead, I have replaced it with `.*$`, making it more consistent with other userdiff drivers and ensuring we capture the full function definition line. I also refined the word regex to better handle Bash syntax, including parameter expansions, arithmetic expressions, and command-line options. I have added test cases to cover these changes, making sure everything works as expected. Moumita Dhar (1): userdiff: extend Bash pattern to cover more shell function forms t/t4018/bash-bashism-style-multiline-function | 4 +++ t/t4018/bash-posix-style-multiline-function | 4 +++ .../bash-posix-style-single-command-function | 3 ++ t/t4034-diff-words.sh | 1 + t/t4034/bash/expect | 30 +++++++++++++++++++ t/t4034/bash/post | 25 ++++++++++++++++ t/t4034/bash/pre | 25 ++++++++++++++++ userdiff.c | 24 +++++++++++---- 8 files changed, 110 insertions(+), 6 deletions(-) create mode 100644 t/t4018/bash-bashism-style-multiline-function create mode 100644 t/t4018/bash-posix-style-multiline-function create mode 100644 t/t4018/bash-posix-style-single-command-function create mode 100644 t/t4034/bash/expect create mode 100644 t/t4034/bash/post create mode 100644 t/t4034/bash/pre Range-diff against v2: 1: de2e8f9792 ! 1: 3d077fadc4 userdiff: extend Bash pattern to cover more shell function forms @@ Metadata ## Commit message ## userdiff: extend Bash pattern to cover more shell function forms - The existing Bash userdiff pattern misses some shell function forms, such as - `function foo()`, multi-line definitions, and extra whitespace. + The previous function regex required explicit matching of function + bodies using `{`, `(`, `((`, or `[[`, which caused several issues: - Extend the pattern to: - - Support `function foo()` syntax. - - Allow spaces in `foo ( )` definitions. - - Recognize multi-line definitions with backslashes. - - Broaden function body detection. + - It failed to capture valid functions where `{` was on the next line + due to line continuation (`\`). + - It did not recognize functions with single command body, such as + `x () echo hello`. + + Replacing the function body matching logic with `.*$`, ensures + that everything on the function definition line is captured, + aligning with other userdiff drivers and improving hunk headers in + `git diff`. + + Additionally, the word regex is refined to better recognize shell + syntax, including additional parameter expansion operators and + command-line options, improving syntax-aware diffs. Signed-off-by: Moumita Dhar <dhar61595@xxxxxxxxx> + ## t/t4018/bash-bashism-style-multiline-function (new) ## +@@ ++function RIGHT \ ++{ ++ echo 'ChangeMe' ++} + \ No newline at end of file + + ## t/t4018/bash-posix-style-multiline-function (new) ## +@@ ++RIGHT() \ ++{ ++ ChangeMe ++} + \ No newline at end of file + + ## t/t4018/bash-posix-style-single-command-function (new) ## +@@ ++RIGHT() echo "hello" ++ ++ ChangeMe + + ## t/t4034-diff-words.sh ## +@@ t/t4034-diff-words.sh: test_expect_success 'unset default driver' ' + + test_language_driver ada + test_language_driver bibtex ++test_language_driver bash + test_language_driver cpp + test_language_driver csharp + test_language_driver css + + ## t/t4034/bash/expect (new) ## +@@ ++<BOLD>diff --git a/pre b/post<RESET> ++<BOLD>index 09ac008..60ba6a2 100644<RESET> ++<BOLD>--- a/pre<RESET> ++<BOLD>+++ b/post<RESET> ++<CYAN>@@ -1,25 +1,25 @@<RESET> ++<RED>my_var<RESET><GREEN>new_var<RESET>=10 ++x=<RED>123<RESET><GREEN>456<RESET> ++y=<RED>3.14<RESET><GREEN>2.71<RESET> ++z=<RED>.5<RESET><GREEN>.75<RESET> ++echo <RED>$USER<RESET><GREEN>$USERNAME<RESET> ++${<RED>HOME<RESET><GREEN>HOMEDIR<RESET>} ++if [ "<RED>$a<RESET><GREEN>$x<RESET>" == "<RED>$b<RESET><GREEN>$y<RESET>" ] || [ "<RED>$c<RESET><GREEN>$x<RESET>" != "<RED>$d<RESET><GREEN>$y<RESET>" ]; then echo "OK"; fi ++((<RED>a<RESET><GREEN>x<RESET>+=<RED>b<RESET><GREEN>y<RESET>)) ++((<RED>a<RESET><GREEN>x<RESET>-=<RED>b<RESET><GREEN>y<RESET>)) ++$((<RED>a<RESET><GREEN>x<RESET><<<RED>b<RESET><GREEN>y<RESET>)) ++$((<RED>a<RESET><GREEN>x<RESET>>><RED>b<RESET><GREEN>y<RESET>)) ++${<RED>a<RESET><GREEN>x<RESET>:-<RED>b<RESET><GREEN>y<RESET>} ++${<RED>a<RESET><GREEN>x<RESET>:=<RED>b<RESET><GREEN>y<RESET>} ++${<RED>a<RESET><GREEN>x<RESET>##*/} ++${<RED>a<RESET><GREEN>x<RESET>%.*} ++${<RED>a<RESET><GREEN>x<RESET>%%.*} ++${<RED>a<RESET><GREEN>x<RESET>^^} ++${<RED>a<RESET><GREEN>x<RESET>,} ++${<RED>a<RESET><GREEN>x<RESET>,,} ++${!<RED>a<RESET><GREEN>x<RESET>} ++${<RED>a<RESET><GREEN>x<RESET>[@]} ++${<RED>a<RESET><GREEN>x<RESET>:?error message} ++${<RED>a<RESET><GREEN>x<RESET>:2:3} ++ls <RED>-a<RESET><GREEN>-x<RESET> ++ls <RED>--a<RESET><GREEN>--x<RESET> + + ## t/t4034/bash/post (new) ## +@@ ++new_var=10 ++x=456 ++y=2.71 ++z=.75 ++echo $USERNAME ++${HOMEDIR} ++if [ "$x" == "$y" ] || [ "$x" != "$y" ]; then echo "OK"; fi ++((x+=y)) ++((x-=y)) ++$((x<<y)) ++$((x>>y)) ++${x:-y} ++${x:=y} ++${x##*/} ++${x%.*} ++${x%%.*} ++${x^^} ++${x,} ++${x,,} ++${!x} ++${x[@]} ++${x:?error message} ++${x:2:3} ++ls -x ++ls --x + + ## t/t4034/bash/pre (new) ## +@@ ++my_var=10 ++x=123 ++y=3.14 ++z=.5 ++echo $USER ++${HOME} ++if [ "$a" == "$b" ] || [ "$c" != "$d" ]; then echo "OK"; fi ++((a+=b)) ++((a-=b)) ++$((a << b)) ++$((a >> b)) ++${a:-b} ++${a:=b} ++${a##*/} ++${a%.*} ++${a%%.*} ++${a^^} ++${a,} ++${a,,} ++${!a} ++${a[@]} ++${a:?error message} ++${a:2:3} ++ls -a ++ls --a + ## userdiff.c ## -@@ userdiff.c: IPATTERN("ada", - "|[-+]?[0-9][0-9#_.aAbBcCdDeEfF]*([eE][+-]?[0-9_]+)?" - "|=>|\\.\\.|\\*\\*|:=|/=|>=|<=|<<|>>|<>"), - PATTERNS("bash", -- /* Optional leading indentation */ -+ /* Optional leading indentation */ - "^[ \t]*" -- /* Start of captured text */ -+ /* Start of captured function name */ - "(" - "(" -- /* POSIX identifier with mandatory parentheses */ -- "[a-zA-Z_][a-zA-Z0-9_]*[ \t]*\\([ \t]*\\))" -+ /* POSIX identifier with mandatory parentheses (allow spaces inside) */ -+ "[a-zA-Z_][a-zA-Z0-9_]*[ \t]*\\([ \t]*\\)" - "|" -- /* Bashism identifier with optional parentheses */ -- "(function[ \t]+[a-zA-Z_][a-zA-Z0-9_]*(([ \t]*\\([ \t]*\\))|([ \t]+))" -+ /* Bash-style function definitions, allowing optional `function` keyword */ -+ "(?:function[ \t]+(?=[a-zA-Z_]))?[a-zA-Z_][a-zA-Z0-9_]*(([ \t]*\\([ \t]*\\))|([ \t]+))?" +@@ userdiff.c: PATTERNS("bash", + /* Bashism identifier with optional parentheses */ + "(function[ \t]+[a-zA-Z_][a-zA-Z0-9_]*(([ \t]*\\([ \t]*\\))|([ \t]+))" ")" - /* Optional whitespace */ - "[ \t]*" +- /* Optional whitespace */ +- "[ \t]*" - /* Compound command starting with `{`, `(`, `((` or `[[` */ - "(\\{|\\(\\(?|\\[\\[)" -- /* End of captured text */ -+ /* Allow function body to start with `{`, `(` (subshell), `[[` */ -+ "(\\{|\\(|\\[\\[)" -+ /* End of captured function name */ ++ /* Everything after the function header is captured */ ++ ".*$" + /* End of captured text */ ")", /* -- */ - /* Characters not in the default $IFS value */ - "[^ \t]+"), + /* Identifiers: variable and function names */ -+ "[a-zA-Z_][a-zA-Z0-9_]*" ++ "[a-zA-Z_][a-zA-Z0-9_]*" + /* Numeric constants: integers and decimals */ -+ "|[-+]?[0-9]+(\\.[0-9]*)?|[-+]?\\.[0-9]+" -+ /* Shell variables: `$VAR`, `${VAR}` */ -+ "|\\$[a-zA-Z_][a-zA-Z0-9_]*|\\$\\{[^}]+\\}" -+ /* Logical and comparison operators */ ++ "|[0-9]+(\\.[0-9]*)?|[-+]?\\.[0-9]+" ++ /* Shell variables: $VAR, ${VAR} */ ++ "|\\$[a-zA-Z_][a-zA-Z0-9_]*|\\$\\{" ++ /* Logical and comparison operators */ + "|\\|\\||&&|<<|>>|==|!=|<=|>=" + /* Assignment and arithmetic operators */ + "|[-+*/%&|^!=<>]=?" -+ /* Command-line options (to avoid splitting `-option`) */ ++ /* Additional parameter expansion operators */ ++ "|:?=|:-|:\\+|:\\?|:|#|##|%|%%|/[a-zA-Z0-9_-]+|\\^\\^?|,|,,?|!|@|:[0-9]+(:[0-9]+)?" ++ /* Command-line options (to avoid splitting -option) */ + "|--?[a-zA-Z0-9_-]+" + /* Brackets and grouping symbols */ + "|\\(|\\)|\\{|\\}|\\[|\\]"), -- 2.48.0