Hi Frédéric, On Tue, Jul 8, 2025 at 3:08 AM Frédéric Danis <frederic.danis@xxxxxxxxxxxxx> wrote: > > This adds the strtoutf8 function that truncate a string before the > first non UTF-8 character. > This truncation is done in place. > --- > src/shared/util.c | 42 ++++++++++++++++++++++++++++++++++++++++++ > src/shared/util.h | 1 + > 2 files changed, 43 insertions(+) > > diff --git a/src/shared/util.c b/src/shared/util.c > index 5d3a14d96..5262458cb 100644 > --- a/src/shared/util.c > +++ b/src/shared/util.c > @@ -1959,3 +1959,45 @@ bool argsisutf8(int argc, char *argv[]) > > return true; > } > + > +char *strtoutf8(char *str, size_t len) > +{ > + size_t i = 0; > + > + while (i < len) { > + unsigned char c = str[i]; > + size_t size = 0; > + > + /* Check the first byte to determine the number of bytes in the > + * UTF-8 character. > + */ > + if ((c & 0x80) == 0x00) > + size = 1; > + else if ((c & 0xE0) == 0xC0) > + size = 2; > + else if ((c & 0xF0) == 0xE0) > + size = 3; > + else if ((c & 0xF8) == 0xF0) > + size = 4; > + else > + /* Invalid UTF-8 sequence */ > + goto done; > + > + /* Check the following bytes to ensure they have the correct > + * format. > + */ > + for (size_t j = 1; j < size; ++j) { > + if (i + j > len || (str[i + j] & 0xC0) != 0x80) > + /* Invalid UTF-8 sequence */ > + goto done; > + } > + > + /* Move to the next character */ > + i += size; > + } Perhaps we should split this part on its own function or use strisutf8 but somehow return the last valid character found. > + > +done: > + /* Truncate to the longest valid UTF-8 string */ > + memset(str + i, 0, len - i); > + return str; > +} > diff --git a/src/shared/util.h b/src/shared/util.h > index dd357fb93..6fc02a9dc 100644 > --- a/src/shared/util.h > +++ b/src/shared/util.h > @@ -92,6 +92,7 @@ int strsuffix(const char *str, const char *suffix); > char *strstrip(char *str); > bool strisutf8(const char *str, size_t length); > bool argsisutf8(int argc, char *argv[]); > +char *strtoutf8(char *str, size_t len); > > void *util_malloc(size_t size); > void *util_memdup(const void *src, size_t size); > -- > 2.43.0 > > -- Luiz Augusto von Dentz