From bc83221d3c667b70e51dfff6ed212aada3906ee6 Mon Sep 17 00:00:00 2001 From: Parabola Date: Fri, 8 Jun 2012 19:39:34 +0000 Subject: Fri Jun 8 19:39:34 UTC 2012 --- extra/icu/fix_broken_regex.diff | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 extra/icu/fix_broken_regex.diff (limited to 'extra/icu') diff --git a/extra/icu/fix_broken_regex.diff b/extra/icu/fix_broken_regex.diff deleted file mode 100644 index 8161a1107..000000000 --- a/extra/icu/fix_broken_regex.diff +++ /dev/null @@ -1,35 +0,0 @@ ---- i18n/regexcmp.cpp -+++ i18n/regexcmp.cpp -@@ -3307,8 +3307,29 @@ - - case URX_STRING_I: -- // TODO: Is the case-folded string the longest? -- // If so we can optimize this the same as URX_STRING. -- loc++; -- currentLen = INT32_MAX; -+ // TODO: This code assumes that any user string that matches will be no longer -+ // than our compiled string, with case insensitive matching. -+ // Our compiled string has been case-folded already. -+ // -+ // Any matching user string will have no more code points than our -+ // compiled (folded) string. Folding may add code points, but -+ // not remove them. -+ // -+ // There is a potential problem if a supplemental code point -+ // case-folds to a BMP code point. In this case our compiled string -+ // could be shorter (in code units) than a matching user string. -+ // -+ // At this time (Unicode 6.1) there are no such characters, and this case -+ // is not being handled. A test, intltest regex/Bug9283, will fail if -+ // any problematic characters are added to Unicode. -+ // -+ // If this happens, we can make a set of the BMP chars that the -+ // troublesome supplementals fold to, scan our string, and bump the -+ // currentLen one extra for each that is found. -+ // -+ { -+ loc++; -+ int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc); -+ currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp)); -+ } - break; -- cgit v1.2.3-54-g00ecf