Kaydet (Commit) 68786ad8 authored tarafından Eike Rathke's avatar Eike Rathke

icu: various regex fixes

Backported from
http://bugs.icu-project.org/trac/changeset/36724
http://bugs.icu-project.org/trac/changeset/36727
http://bugs.icu-project.org/trac/changeset/36801

Change-Id: Idd85c3344e3ef86e390341038f53ad2a398b3fa3
üst df0ea866
......@@ -25,6 +25,9 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
external/icu/icu4c-scriptrun.patch \
external/icu/icu4c-icu11451.patch.1 \
external/icu/rtti.patch.0 \
external/icu/icu.changeset_36724.patch.1 \
external/icu/icu.changeset_36727.patch.1 \
external/icu/icu.changeset_36801.patch.1 \
))
# vim: set noet sw=4 ts=4:
Index: icu/source/i18n/regexcmp.cpp
===================================================================
--- icu/source/i18n/regexcmp.cpp (revision 36723)
+++ icu/source/i18n/regexcmp.cpp (revision 36724)
@@ -2136,4 +2136,8 @@
int32_t minML = minMatchLength(fMatchOpenParen, patEnd);
int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd);
+ if (URX_TYPE(maxML) != 0) {
+ error(U_REGEX_LOOK_BEHIND_LIMIT);
+ break;
+ }
if (maxML == INT32_MAX) {
error(U_REGEX_LOOK_BEHIND_LIMIT);
@@ -2169,4 +2173,8 @@
int32_t minML = minMatchLength(fMatchOpenParen, patEnd);
int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd);
+ if (URX_TYPE(maxML) != 0) {
+ error(U_REGEX_LOOK_BEHIND_LIMIT);
+ break;
+ }
if (maxML == INT32_MAX) {
error(U_REGEX_LOOK_BEHIND_LIMIT);
Index: icu/source/test/testdata/regextst.txt
===================================================================
--- icu/source/test/testdata/regextst.txt (revision 36723)
+++ icu/source/test/testdata/regextst.txt (revision 36724)
@@ -1201,4 +1201,12 @@
"A|B|\U00012345" "hello <0>\U00012345</0>"
"A|B|\U00010000" "hello \ud800"
+
+# Bug 11370
+# Max match length computation of look-behind expression gives result that is too big to fit in the
+# in the 24 bit operand portion of the compiled code. Expressions should fail to compile
+# (Look-behind match length must be bounded. This case is treated as unbounded, an error.)
+
+"(?<!(0123456789a){10000000})x" E "no match"
+"(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match"
# Random debugging, Temporary
Index: icu/source/i18n/regexcmp.cpp
===================================================================
--- icu/source/i18n/regexcmp.cpp (revision 36726)
+++ icu/source/i18n/regexcmp.cpp (revision 36727)
@@ -2340,5 +2340,13 @@
if (fIntervalUpper == 0) {
// Pathological case. Attempt no matches, as if the block doesn't exist.
+ // Discard the generated code for the block.
+ // If the block included parens, discard the info pertaining to them as well.
fRXPat->fCompiledPat->setSize(topOfBlock);
+ if (fMatchOpenParen >= topOfBlock) {
+ fMatchOpenParen = -1;
+ }
+ if (fMatchCloseParen >= topOfBlock) {
+ fMatchCloseParen = -1;
+ }
return TRUE;
}
Index: icu/source/i18n/regexcmp.h
===================================================================
--- icu/source/i18n/regexcmp.h (revision 36726)
+++ icu/source/i18n/regexcmp.h (revision 36727)
@@ -188,5 +188,7 @@
// of the slot reserved for a state save
// at the start of the most recently processed
- // parenthesized block.
+ // parenthesized block. Updated when processing
+ // a close to the location for the corresponding open.
+
int32_t fMatchCloseParen; // The position in the pattern of the first
// location after the most recently processed
Index: icu/source/test/testdata/regextst.txt
===================================================================
--- icu/source/test/testdata/regextst.txt (revision 36726)
+++ icu/source/test/testdata/regextst.txt (revision 36727)
@@ -1202,4 +1202,13 @@
"A|B|\U00010000" "hello \ud800"
+# Bug 11369
+# Incorrect optimization of patterns with a zero length quantifier {0}
+
+"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE"
+"(|b)ab(c)" "<0><1></1>ab<2>c</2></0>"
+"(|b){0}a{3}(D*)" "<0>aaa<2></2></0>"
+"(|b){0,1}a{3}(D*)" "<0><1></1>aaa<2></2></0>"
+"((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>"
+
# Bug 11370
# Max match length computation of look-behind expression gives result that is too big to fit in the
@@ -1209,4 +1218,5 @@
"(?<!(0123456789a){10000000})x" E "no match"
"(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match"
+
# Random debugging, Temporary
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment