Re: [PATCH BlueZ] shared/util: Refactor code from strisutf8 and strtoutf8

Frédéric Danis <frederic.danis@xxxxxxxxxxxxx> · Wed, 9 Jul 2025 15:30:26 +0200

Hi Luiz,

On 09/07/2025 15:11, Luiz Augusto von Dentz wrote:
Hi Frédéric,

On Wed, Jul 9, 2025 at 5:46 AM Frédéric Danis
<frederic.danis@xxxxxxxxxxxxx> wrote:
Move duplicate code to static validateutf8() and fix boundary access
on multi-byte character check.
---
  src/shared/util.c | 56 +++++++++++++++--------------------------------
  1 file changed, 18 insertions(+), 38 deletions(-)

diff --git a/src/shared/util.c b/src/shared/util.c
index 4780f26b6..36c06188f 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -1909,7 +1909,7 @@ char *strstrip(char *str)
         return str;
  }

-bool strisutf8(const char *str, size_t len)
+static bool validateutf8(const char *str, size_t len, size_t *invalid_index)
  {
         size_t i = 0;

@@ -1928,17 +1928,23 @@ bool strisutf8(const char *str, size_t len)
                         size = 3;
                 else if ((c & 0xF8) == 0xF0)
                         size = 4;
-               else
+               else {
                         /* Invalid UTF-8 sequence */
+                       if (invalid_index)
+                               *invalid_index = i;
                         return false;
+               }

                 /* Check the following bytes to ensure they have the correct
                  * format.
                  */
                 for (size_t j = 1; j < size; ++j) {
-                       if (i + j > len || (str[i + j] & 0xC0) != 0x80)
+                       if (i + j >= len || (str[i + j] & 0xC0) != 0x80) {
                                 /* Invalid UTF-8 sequence */
+                               if (invalid_index)
+                                       *invalid_index = i;
                                 return false;
+                       }
                 }

                 /* Move to the next character */
@@ -1948,6 +1954,11 @@ bool strisutf8(const char *str, size_t len)
         return true;
  }

+bool strisutf8(const char *str, size_t len)
+{
+       return validateutf8(str, len, NULL);
+}
+
  bool argsisutf8(int argc, char *argv[])
  {
         for (int i = 0; i < argc; i++) {
@@ -1962,42 +1973,11 @@ bool argsisutf8(int argc, char *argv[])

  char *strtoutf8(char *str, size_t len)
  {
-       size_t i = 0;
-
-       while (i < len) {
-               unsigned char c = str[i];
-               size_t size = 0;
-
-               /* Check the first byte to determine the number of bytes in the
-                * UTF-8 character.
-                */
-               if ((c & 0x80) == 0x00)
-                       size = 1;
-               else if ((c & 0xE0) == 0xC0)
-                       size = 2;
-               else if ((c & 0xF0) == 0xE0)
-                       size = 3;
-               else if ((c & 0xF8) == 0xF0)
-                       size = 4;
-               else
-                       /* Invalid UTF-8 sequence */
-                       goto done;
-
-               /* Check the following bytes to ensure they have the correct
-                * format.
-                */
-               for (size_t j = 1; j < size; ++j) {
-                       if (i + j > len || (str[i + j] & 0xC0) != 0x80)
-                               /* Invalid UTF-8 sequence */
-                               goto done;
-               }
+       size_t invalid_index = 0;

-               /* Move to the next character */
-               i += size;
-       }
+       if (!validateutf8(str, len, &invalid_index))
+               /* Truncate to the longest valid UTF-8 string */
+               memset(str + invalid_index, 0, len - invalid_index);

-done:
-       /* Truncate to the longest valid UTF-8 string */
-       memset(str + i, 0, len - i);
         return str;
  }
--
2.43.0

I did something similar yesterday:

https://patchwork.kernel.org/project/bluetooth/patch/20250708174628.2949030-1-luiz.dentz@xxxxxxxxx/

Let me know if you have any comments.

Sorry, I missed it, you can discard mine

--
Frédéric Danis
Senior Software Engineer

Collabora Ltd.
Platinum Building, St John's Innovation Park, Cambridge CB4 0DS, United Kingdom
Registered in England & Wales, no. 5513718