File aa0188eaedc056dca8374ac03d0177429b495515.patch of Package oniguruma.25626
From aa0188eaedc056dca8374ac03d0177429b495515 Mon Sep 17 00:00:00 2001
From: "K.Kosako" <kosako@sofnec.co.jp>
Date: Thu, 7 Nov 2019 14:13:55 +0900
Subject: [PATCH] fix #163: heap-buffer-overflow in gb18030_mbc_enc_len
---
src/gb18030.c | 16 +++++++++++++++-
src/regparse.c | 32 ++++++++++++++++++++++----------
2 files changed, 37 insertions(+), 11 deletions(-)
Index: onig-6.7.0/src/gb18030.c
===================================================================
--- onig-6.7.0.orig/src/gb18030.c
+++ onig-6.7.0/src/gb18030.c
@@ -76,6 +76,20 @@ gb18030_mbc_enc_len(const UChar* p)
}
static int
+gb18030_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff000000) != 0) return 4;
+ else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else if ((code & 0xff00) != 0) return 2;
+ else {
+ if (GB18030_MAP[(int )(code & 0xff)] == CM)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+
+ return 1;
+ }
+}
+
+static int
is_valid_mbc_string(const UChar* p, const UChar* end)
{
while (p < end) {
@@ -522,7 +536,7 @@ OnigEncodingType OnigEncodingGB18030 = {
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
gb18030_mbc_to_code,
- onigenc_mb4_code_to_mbclen,
+ gb18030_code_to_mbclen,
gb18030_code_to_mbc,
gb18030_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
Index: onig-6.7.0/src/regparse.c
===================================================================
--- onig-6.7.0.orig/src/regparse.c
+++ onig-6.7.0/src/regparse.c
@@ -4826,6 +4826,7 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
int c, r;
int ascii_mode;
+ int is_single;
const OnigCodePoint *ranges;
OnigCodePoint limit;
OnigCodePoint sb_out;
@@ -4847,6 +4848,7 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
}
r = 0;
+ is_single = ONIGENC_IS_SINGLEBYTE(enc);
limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
switch (ctype) {
@@ -4863,19 +4865,25 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
case ONIGENC_CTYPE_ALNUM:
if (not != 0) {
for (c = 0; c < (int )limit; c++) {
- if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
- BITSET_SET_BIT(cc->bs, c);
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
}
for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
- BITSET_SET_BIT(cc->bs, c);
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
+ BITSET_SET_BIT(cc->bs, c);
}
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ if (is_single == 0)
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
else {
for (c = 0; c < (int )limit; c++) {
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
- BITSET_SET_BIT(cc->bs, c);
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
}
}
break;
@@ -4885,21 +4893,25 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
case ONIGENC_CTYPE_WORD:
if (not != 0) {
for (c = 0; c < (int )limit; c++) {
- if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */
+ /* check invalid code point */
+ if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
&& ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
}
for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
- if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
BITSET_SET_BIT(cc->bs, c);
}
+ if (ascii_mode != 0 && is_single == 0)
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
else {
for (c = 0; c < (int )limit; c++) {
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
+ && ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
}
- if (ascii_mode == 0)
+ if (ascii_mode == 0 && is_single == 0)
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
break;