Re: [PATCH 4/7] docs: kdoc: rework type prototype parsing

Mauro Carvalho Chehab <mchehab+huawei@xxxxxxxxxx> · Thu, 3 Jul 2025 17:46:48 +0200

Em Tue,  1 Jul 2025 14:57:27 -0600
Jonathan Corbet <corbet@xxxxxxx> escreveu:

> process_proto_type() is using a complex regex and a "while True" loop to
> split a declaration into chunks and, in the end, count brackets.  Switch to
> using a simpler regex to just do the split directly, and handle each chunk
> as it comes.  The result is, IMO, easier to understand and reason about.
> 
> The old algorithm would occasionally elide the space between function
> parameters; see struct rng_alg->generate(), foe example.  The only output
> difference is to not elide that space, which is more correct.
> 
> Signed-off-by: Jonathan Corbet <corbet@xxxxxxx>

LGTM.
Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@xxxxxxxxxx>

> ---
>  scripts/lib/kdoc/kdoc_parser.py | 43 +++++++++++++++++++--------------
>  1 file changed, 25 insertions(+), 18 deletions(-)
> 
> diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py
> index 935f2a3c4b47..61da297df623 100644
> --- a/scripts/lib/kdoc/kdoc_parser.py
> +++ b/scripts/lib/kdoc/kdoc_parser.py
> @@ -1594,30 +1594,37 @@ class KernelDoc:
>  
>          # Strip C99-style comments and surrounding whitespace
>          line = KernRe(r"//.*$", re.S).sub('', line).strip()
> +        if not line:
> +            return # nothing to see here
>  
>          # To distinguish preprocessor directive from regular declaration later.
>          if line.startswith('#'):
>              line += ";"
> -
> -        r = KernRe(r'([^\{\};]*)([\{\};])(.*)')
> -        while True:
> -            if r.search(line):
> -                if self.entry.prototype:
> -                    self.entry.prototype += " "
> -                self.entry.prototype += r.group(1) + r.group(2)
> -
> -                self.entry.brcount += r.group(2).count('{')
> -                self.entry.brcount -= r.group(2).count('}')
> -
> -                if r.group(2) == ';' and self.entry.brcount <= 0:
> +        #
> +        # Split the declaration on any of { } or ;, and accumulate pieces
> +        # until we hit a semicolon while not inside {brackets}
> +        #
> +        r = KernRe(r'(.*?)([{};])')
> +        for chunk in r.split(line):
> +            if chunk:  # Ignore empty matches
> +                self.entry.prototype += chunk
> +                #
> +                # This cries out for a match statement ... someday after we can
> +                # drop Python 3.9 ...
> +                #
> +                if chunk == '{':
> +                    self.entry.brcount += 1
> +                elif chunk == '}':
> +                    self.entry.brcount -= 1
> +                elif chunk == ';' and self.entry.brcount <= 0:
>                      self.dump_declaration(ln, self.entry.prototype)
>                      self.reset_state(ln)
> -                    break
> -
> -                line = r.group(3)
> -            else:
> -                self.entry.prototype += line
> -                break
> +                    return
> +        #
> +        # We hit the end of the line while still in the declaration; put
> +        # in a space to represent the newline.
> +        #
> +        self.entry.prototype += ' '
>  
>      def process_proto(self, ln, line):
>          """STATE_PROTO: reading a function/whatever prototype."""