From: Luiz Augusto von Dentz <luiz.von.dentz@xxxxxxxxx> This introduces strnlenutf8 which works similarly to strnlen but return only the number of valid bytes of UTF-8 encoded string then replace the other copies of similar code. --- src/shared/util.c | 82 +++++++++++++++++------------------------------ src/shared/util.h | 2 ++ 2 files changed, 32 insertions(+), 52 deletions(-) diff --git a/src/shared/util.c b/src/shared/util.c index 4780f26b6d59..fa058170e124 100644 --- a/src/shared/util.c +++ b/src/shared/util.c @@ -1909,58 +1909,8 @@ char *strstrip(char *str) return str; } -bool strisutf8(const char *str, size_t len) -{ - size_t i = 0; +size_t strnlenutf8(const char *str, size_t len) - while (i < len) { - unsigned char c = str[i]; - size_t size = 0; - - /* Check the first byte to determine the number of bytes in the - * UTF-8 character. - */ - if ((c & 0x80) == 0x00) - size = 1; - else if ((c & 0xE0) == 0xC0) - size = 2; - else if ((c & 0xF0) == 0xE0) - size = 3; - else if ((c & 0xF8) == 0xF0) - size = 4; - else - /* Invalid UTF-8 sequence */ - return false; - - /* Check the following bytes to ensure they have the correct - * format. - */ - for (size_t j = 1; j < size; ++j) { - if (i + j > len || (str[i + j] & 0xC0) != 0x80) - /* Invalid UTF-8 sequence */ - return false; - } - - /* Move to the next character */ - i += size; - } - - return true; -} - -bool argsisutf8(int argc, char *argv[]) -{ - for (int i = 0; i < argc; i++) { - if (!strisutf8(argv[i], strlen(argv[i]))) { - printf("Invalid character in string: %s\n", argv[i]); - return false; - } - } - - return true; -} - -char *strtoutf8(char *str, size_t len) { size_t i = 0; @@ -1987,7 +1937,7 @@ char *strtoutf8(char *str, size_t len) * format. */ for (size_t j = 1; j < size; ++j) { - if (i + j > len || (str[i + j] & 0xC0) != 0x80) + if (i + j >= len || (str[i + j] & 0xC0) != 0x80) /* Invalid UTF-8 sequence */ goto done; } @@ -1997,6 +1947,34 @@ char *strtoutf8(char *str, size_t len) } done: + return i; +} + +bool strisutf8(const char *str, size_t len) +{ + return strnlenutf8(str, len) == len; +} + +bool argsisutf8(int argc, char *argv[]) +{ + for (int i = 0; i < argc; i++) { + if (!strisutf8(argv[i], strlen(argv[i]))) { + printf("Invalid character in string: %s\n", argv[i]); + return false; + } + } + + return true; +} + +char *strtoutf8(char *str, size_t len) +{ + size_t i = 0; + + i = strnlenutf8(str, len); + if (i == len) + return str; + /* Truncate to the longest valid UTF-8 string */ memset(str + i, 0, len - i); return str; diff --git a/src/shared/util.h b/src/shared/util.h index 6fc02a9dcb5a..c480351d6e9f 100644 --- a/src/shared/util.h +++ b/src/shared/util.h @@ -90,6 +90,8 @@ do { \ char *strdelimit(char *str, char *del, char c); int strsuffix(const char *str, const char *suffix); char *strstrip(char *str); + +size_t strnlenutf8(const char *str, size_t len); bool strisutf8(const char *str, size_t length); bool argsisutf8(int argc, char *argv[]); char *strtoutf8(char *str, size_t len); -- 2.50.0