Skip to content

Commit

Permalink
enc/utf_8.c: limit UTF-8
Browse files Browse the repository at this point in the history
* enc/utf_8.c (code_to_mbclen, code_to_mbc): reject values larger
  than UTF-8 max codepoints.  [Feature ruby#11094]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@50392 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
  • Loading branch information
nobu committed Apr 25, 2015
1 parent 4489c13 commit 859f88f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 18 deletions.
5 changes: 5 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
Sun Apr 26 07:36:48 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>

* enc/utf_8.c (code_to_mbclen, code_to_mbc): reject values larger
than UTF-8 max codepoints. [Feature #11094]

Sat Apr 25 14:26:19 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>

* string.c (str_buf_cat): expand later so that the buffer can be
Expand Down
21 changes: 3 additions & 18 deletions enc/utf_8.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
#define INVALID_CODE_FE 0xfffffffe
#define INVALID_CODE_FF 0xffffffff
#define VALID_CODE_LIMIT 0x7fffffff
#endif
#define VALID_CODE_LIMIT 0x0010ffff

#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)

Expand Down Expand Up @@ -297,9 +297,7 @@ code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
if ((code & 0xffffff80) == 0) return 1;
else if ((code & 0xfffff800) == 0) return 2;
else if ((code & 0xffff0000) == 0) return 3;
else if ((code & 0xffe00000) == 0) return 4;
else if ((code & 0xfc000000) == 0) return 5;
else if ((code & 0x80000000) == 0) return 6;
else if (code <= VALID_CODE_LIMIT) return 4;
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) return 1;
else if (code == INVALID_CODE_FF) return 1;
Expand Down Expand Up @@ -328,24 +326,11 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
*p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0xffe00000) == 0) {
else if (code <= VALID_CODE_LIMIT) {
*p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0xfc000000) == 0) {
*p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
*p++ = UTF8_TRAILS(code, 18);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0x80000000) == 0) {
*p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
*p++ = UTF8_TRAILS(code, 24);
*p++ = UTF8_TRAILS(code, 18);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) {
*p = 0xfe;
Expand Down

0 comments on commit 859f88f

Please sign in to comment.