From a96c0528c68093d155b674269a9c8bf48315fc01 Mon Sep 17 00:00:00 2001 From: Douglas Bagnall Date: Tue, 24 Nov 2015 13:47:16 +1300 Subject: [PATCH 1/3] CVE-2015-5330: Fix handling of unicode near string endings Until now next_codepoint_ext() and next_codepoint_handle_ext() were using strnlen(str, 5) to determine how much string they should try to decode. This ended up looking past the end of the string when it was not null terminated and the final character looked like a multi-byte encoding. The fix is to let the caller say how long the string can be. Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599 Signed-off-by: Douglas Bagnall Pair-programmed-with: Andrew Bartlett Reviewed-by: Ralph Boehme --- lib/util/charset/charset.h | 9 +++++---- lib/util/charset/codepoints.c | 19 +++++++++++++------ lib/util/charset/util_unistr.c | 5 ++++- source3/lib/util_str.c | 2 +- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h index 474d77e..b70aa61 100644 --- a/lib/util/charset/charset.h +++ b/lib/util/charset/charset.h @@ -175,15 +175,16 @@ smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic, charset_t from, charset_t to); const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch); -codepoint_t next_codepoint_ext(const char *str, charset_t src_charset, - size_t *size); +codepoint_t next_codepoint_ext(const char *str, size_t len, + charset_t src_charset, size_t *size); codepoint_t next_codepoint(const char *str, size_t *size); ssize_t push_codepoint(char *str, codepoint_t c); /* codepoints */ codepoint_t next_codepoint_convenience_ext(struct smb_iconv_convenience *ic, - const char *str, charset_t src_charset, - size_t *size); + const char *str, size_t len, + charset_t src_charset, + size_t *size); codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, const char *str, size_t *size); ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic, diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c index 5ee95a8..8dd647e 100644 --- a/lib/util/charset/codepoints.c +++ b/lib/util/charset/codepoints.c @@ -346,7 +346,8 @@ smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic, */ _PUBLIC_ codepoint_t next_codepoint_convenience_ext( struct smb_iconv_convenience *ic, - const char *str, charset_t src_charset, + const char *str, size_t len, + charset_t src_charset, size_t *bytes_consumed) { /* it cannot occupy more than 4 bytes in UTF16 format */ @@ -366,7 +367,7 @@ _PUBLIC_ codepoint_t next_codepoint_convenience_ext( * we assume that no multi-byte character can take more than 5 bytes. * This is OK as we only support codepoints up to 1M (U+100000) */ - ilen_orig = strnlen(str, 5); + ilen_orig = MIN(len, 5); ilen = ilen_orig; descriptor = get_conv_handle(ic, src_charset, CH_UTF16); @@ -424,7 +425,13 @@ _PUBLIC_ codepoint_t next_codepoint_convenience_ext( _PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, const char *str, size_t *size) { - return next_codepoint_convenience_ext(ic, str, CH_UNIX, size); + /* + * We assume that no multi-byte character can take more than 5 bytes + * thus avoiding walking all the way down a long string. This is OK as + * Unicode codepoints only go up to (U+10ffff), which can always be + * encoded in 4 bytes or less. + */ + return next_codepoint_convenience_ext(ic, str, strnlen(str, 5), CH_UNIX, size); } /* @@ -486,10 +493,10 @@ _PUBLIC_ ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic, return 5 - olen; } -_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset, - size_t *size) +_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, size_t len, + charset_t src_charset, size_t *size) { - return next_codepoint_convenience_ext(get_iconv_convenience(), str, + return next_codepoint_convenience_ext(get_iconv_convenience(), str, len, src_charset, size); } diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c index 760be77..d9e9b34 100644 --- a/lib/util/charset/util_unistr.c +++ b/lib/util/charset/util_unistr.c @@ -485,7 +485,10 @@ _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n) while (n-- && *src) { size_t c_size; - codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size); + codepoint_t c = next_codepoint_convenience_ext(iconv_convenience, + src, + n, + &c_size); src += c_size; c = toupper_m(c); diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c index 4701528..f8a5160 100644 --- a/source3/lib/util_str.c +++ b/source3/lib/util_str.c @@ -1486,7 +1486,7 @@ size_t strlen_m_ext(const char *s, const charset_t src_charset, while (*s) { size_t c_size; - codepoint_t c = next_codepoint_ext(s, src_charset, &c_size); + codepoint_t c = next_codepoint_ext(s, strnlen(s, 5), src_charset, &c_size); s += c_size; switch (dst_charset) { -- 2.5.0 From 8298252a1ba9c014f7ceb76736abb38132181f79 Mon Sep 17 00:00:00 2001 From: Douglas Bagnall Date: Tue, 24 Nov 2015 13:54:09 +1300 Subject: [PATCH 2/3] CVE-2015-5330: next_codepoint_handle_ext: don't short-circuit UTF16 low bytes UTF16 contains zero bytes when it is encoding ASCII (for example), so we can't assume the absense of the 0x80 bit means a one byte encoding. No current callers use UTF16. Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599 Signed-off-by: Douglas Bagnall Pair-programmed-with: Andrew Bartlett Reviewed-by: Ralph Boehme --- lib/util/charset/codepoints.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c index 8dd647e..cf5f3e6 100644 --- a/lib/util/charset/codepoints.c +++ b/lib/util/charset/codepoints.c @@ -358,7 +358,10 @@ _PUBLIC_ codepoint_t next_codepoint_convenience_ext( size_t olen; char *outbuf; - if ((str[0] & 0x80) == 0) { + + if (((str[0] & 0x80) == 0) && (src_charset == CH_DOS || + src_charset == CH_UNIX || + src_charset == CH_UTF8)) { *bytes_consumed = 1; return (codepoint_t)str[0]; } -- 2.5.0 From 0988b7cb606a7e4cd73fd8db02806abbc9d8f2e0 Mon Sep 17 00:00:00 2001 From: Douglas Bagnall Date: Tue, 24 Nov 2015 13:49:09 +1300 Subject: [PATCH 3/3] CVE-2015-5330: strupper_talloc_n_handle(): properly count characters When a codepoint eats more than one byte we really want to know, especially if the string is not NUL terminated. Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599 Signed-off-by: Douglas Bagnall Pair-programmed-with: Andrew Bartlett Reviewed-by: Ralph Boehme --- lib/util/charset/util_unistr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c index d9e9b34..6dad43f 100644 --- a/lib/util/charset/util_unistr.c +++ b/lib/util/charset/util_unistr.c @@ -483,13 +483,14 @@ _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n) return NULL; } - while (n-- && *src) { + while (n && *src) { size_t c_size; codepoint_t c = next_codepoint_convenience_ext(iconv_convenience, src, n, &c_size); src += c_size; + n -= c_size; c = toupper_m(c); -- 2.5.0