Hi Frédéric, On Wed, Jul 9, 2025 at 5:46 AM Frédéric Danis <frederic.danis@xxxxxxxxxxxxx> wrote: > > Move duplicate code to static validateutf8() and fix boundary access > on multi-byte character check. > --- > src/shared/util.c | 56 +++++++++++++++-------------------------------- > 1 file changed, 18 insertions(+), 38 deletions(-) > > diff --git a/src/shared/util.c b/src/shared/util.c > index 4780f26b6..36c06188f 100644 > --- a/src/shared/util.c > +++ b/src/shared/util.c > @@ -1909,7 +1909,7 @@ char *strstrip(char *str) > return str; > } > > -bool strisutf8(const char *str, size_t len) > +static bool validateutf8(const char *str, size_t len, size_t *invalid_index) > { > size_t i = 0; > > @@ -1928,17 +1928,23 @@ bool strisutf8(const char *str, size_t len) > size = 3; > else if ((c & 0xF8) == 0xF0) > size = 4; > - else > + else { > /* Invalid UTF-8 sequence */ > + if (invalid_index) > + *invalid_index = i; > return false; > + } > > /* Check the following bytes to ensure they have the correct > * format. > */ > for (size_t j = 1; j < size; ++j) { > - if (i + j > len || (str[i + j] & 0xC0) != 0x80) > + if (i + j >= len || (str[i + j] & 0xC0) != 0x80) { > /* Invalid UTF-8 sequence */ > + if (invalid_index) > + *invalid_index = i; > return false; > + } > } > > /* Move to the next character */ > @@ -1948,6 +1954,11 @@ bool strisutf8(const char *str, size_t len) > return true; > } > > +bool strisutf8(const char *str, size_t len) > +{ > + return validateutf8(str, len, NULL); > +} > + > bool argsisutf8(int argc, char *argv[]) > { > for (int i = 0; i < argc; i++) { > @@ -1962,42 +1973,11 @@ bool argsisutf8(int argc, char *argv[]) > > char *strtoutf8(char *str, size_t len) > { > - size_t i = 0; > - > - while (i < len) { > - unsigned char c = str[i]; > - size_t size = 0; > - > - /* Check the first byte to determine the number of bytes in the > - * UTF-8 character. > - */ > - if ((c & 0x80) == 0x00) > - size = 1; > - else if ((c & 0xE0) == 0xC0) > - size = 2; > - else if ((c & 0xF0) == 0xE0) > - size = 3; > - else if ((c & 0xF8) == 0xF0) > - size = 4; > - else > - /* Invalid UTF-8 sequence */ > - goto done; > - > - /* Check the following bytes to ensure they have the correct > - * format. > - */ > - for (size_t j = 1; j < size; ++j) { > - if (i + j > len || (str[i + j] & 0xC0) != 0x80) > - /* Invalid UTF-8 sequence */ > - goto done; > - } > + size_t invalid_index = 0; > > - /* Move to the next character */ > - i += size; > - } > + if (!validateutf8(str, len, &invalid_index)) > + /* Truncate to the longest valid UTF-8 string */ > + memset(str + invalid_index, 0, len - invalid_index); > > -done: > - /* Truncate to the longest valid UTF-8 string */ > - memset(str + i, 0, len - i); > return str; > } > -- > 2.43.0 > I did something similar yesterday: https://patchwork.kernel.org/project/bluetooth/patch/20250708174628.2949030-1-luiz.dentz@xxxxxxxxx/ Let me know if you have any comments. -- Luiz Augusto von Dentz