Signed-off-by: David Howells <dhowells@xxxxxxxxxx> cc: Steve French <sfrench@xxxxxxxxx> cc: Paulo Alcantara <pc@xxxxxxxxxxxxx> cc: Shyam Prasad N <sprasad@xxxxxxxxxxxxx> cc: Tom Talpey <tom@xxxxxxxxxx> cc: linux-cifs@xxxxxxxxxxxxxxx cc: netfs@xxxxxxxxxxxxxxx cc: linux-fsdevel@xxxxxxxxxxxxxxx --- fs/nls/nls_base.c | 33 ++++++++++++++++++++++++++++++ fs/smb/client/cifs_unicode.c | 39 ++++++++++++++++++++++++++++++++++++ fs/smb/client/cifs_unicode.h | 2 ++ include/linux/nls.h | 1 + 4 files changed, 75 insertions(+) diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 18d597e49a19..f6927c7d9fe1 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -171,6 +171,39 @@ int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian, } EXPORT_SYMBOL(utf8s_to_utf16s); +/** + * utf8s_to_len_utf16s - Determine the length of a conversion of UTF8 to UTF16. + * @s: The source utf8 string + * @inlen: The length of the string + */ +ssize_t utf8s_to_len_utf16s(const u8 *s, int inlen) +{ + unicode_t u; + size_t outcount = 0; + int size; + + while (inlen > 0 && *s) { + if (*s & 0x80) { + size = utf8_to_utf32(s, inlen, &u); + if (size < 0) + return -EINVAL; + s += size; + inlen -= size; + + if (u >= PLANE_SIZE) + outcount += 2; + else + outcount++; + } else { + s++; + outcount++; + inlen--; + } + } + return outcount * sizeof(wchar_t); +} +EXPORT_SYMBOL(utf8s_to_len_utf16s); + static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian) { switch (endian) { diff --git a/fs/smb/client/cifs_unicode.c b/fs/smb/client/cifs_unicode.c index 4cc6e0896fad..ba4b361613f6 100644 --- a/fs/smb/client/cifs_unicode.c +++ b/fs/smb/client/cifs_unicode.c @@ -290,6 +290,45 @@ cifs_strtoUTF16(__le16 *to, const char *from, int len, return i; } +/* + * Work out how long a string will be once converted to UTF16 in bytes. This + * does not include a NUL terminator. + */ +size_t cifs_size_strtoUTF16(const char *from, int len, + const struct nls_table *codepage) +{ + wchar_t wchar_to; /* needed to quiet sparse */ + ssize_t out_len = 0; + int charlen; + + /* special case for utf8 to handle no plane0 chars */ + if (strcmp(codepage->charset, "utf8") == 0) { + out_len = utf8s_to_len_utf16s(from, len); + if (out_len >= 0) + goto success; + /* + * On failure, fall back to UCS encoding as this function + * should not return negative values currently can fail only if + * source contains invalid encoded characters + */ + } + + for (; len && *from; len -= charlen) { + charlen = codepage->char2uni(from, len, &wchar_to); + if (charlen < 1) { + cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n", + *from, charlen); + /* Replace with a question mark */ + charlen = 1; + } + from += charlen; + out_len += 2; + } + +success: + return out_len; +} + /* * cifs_utf16_bytes - how long will a string be after conversion? * @utf16 - pointer to input string diff --git a/fs/smb/client/cifs_unicode.h b/fs/smb/client/cifs_unicode.h index e137a0dfbbe9..c3519a46a2b5 100644 --- a/fs/smb/client/cifs_unicode.h +++ b/fs/smb/client/cifs_unicode.h @@ -60,6 +60,8 @@ int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, int cifs_utf16_bytes(const __le16 *from, int maxbytes, const struct nls_table *codepage); int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *); +size_t cifs_size_strtoUTF16(const char *from, int len, + const struct nls_table *codepage); char *cifs_strndup_from_utf16(const char *src, const int maxlen, const bool is_unicode, const struct nls_table *codepage); diff --git a/include/linux/nls.h b/include/linux/nls.h index e0bf8367b274..026da1d5ffaa 100644 --- a/include/linux/nls.h +++ b/include/linux/nls.h @@ -56,6 +56,7 @@ extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu); extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen); extern int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, wchar_t *pwcs, int maxlen); +ssize_t utf8s_to_len_utf16s(const u8 *s, int inlen); extern int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, u8 *s, int maxlen);