summaryrefslogtreecommitdiff
path: root/extra/libreoffice/icu53_crashfix.diff
blob: 11da5819beb26cdd74d6c9b4feb697c3d74dff82 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
From 1ce42d1001139a9168e9451dbd48a6daef95c691 Mon Sep 17 00:00:00 2001
From: Eike Rathke <erack@redhat.com>
Date: Wed, 30 Apr 2014 16:51:05 +0000
Subject: resolve crashes with ICU 53.1 in locales with collator data, fdo#77071

ICU 53.1 changed API behavior:
https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RuleBasedCollator.html#a2f4c7eeaf020ad68e3bd9722dd272357
isn't correct anymore:

length: size of the image. If negative, the API will try to figure out the
        length of the image

        NO, IT WILL NOT! It bails out with an error instead.
        Introduced a function to obtain the length for each collator data.

base:  collator, usually root. The base is required to be present through the
       lifetime of the collator. Currently it cannot be NULL.

       NOT "usually root"! There's a check now that bails out if
       if(base->tailoring != CollationRoot::getRoot(errorCode))
       So using an instance created with icu::Locale::getRoot()

(cherry picked from commit a3c627fe38236e82bc6008075d862b3cbfbd9ce3)

Conflicts:
	i18npool/source/collator/collator_unicode.cxx

Backported.

make DISABLE_DYNLOADING on Android happy, fdo#77071 related

(cherry picked from commit dc7ba1af236ec28d399eff833d56608fde9fb70d)

Change-Id: Ia9c4e27d5ef4f8083bbe57e4e2f4b3ff63bb42ed
Reviewed-on: https://gerrit.libreoffice.org/9215
Reviewed-by: Caolán McNamara <caolanm@redhat.com>
Tested-by: Caolán McNamara <caolanm@redhat.com>
---
diff --git a/i18npool/source/collator/collator_unicode.cxx b/i18npool/source/collator/collator_unicode.cxx
index 90dd2f3b..42dfef3 100644
--- a/i18npool/source/collator/collator_unicode.cxx
+++ b/i18npool/source/collator/collator_unicode.cxx
@@ -84,6 +84,27 @@ const sal_uInt8* get_collator_data_zh_radical();
 const sal_uInt8* get_collator_data_zh_stroke();
 const sal_uInt8* get_collator_data_zh_zhuyin();
 
+size_t get_collator_data_ca_charset_length();
+size_t get_collator_data_dz_charset_length();
+size_t get_collator_data_hu_charset_length();
+size_t get_collator_data_ja_charset_length();
+size_t get_collator_data_ja_phonetic_alphanumeric_first_length();
+size_t get_collator_data_ja_phonetic_alphanumeric_last_length();
+size_t get_collator_data_ko_charset_length();
+size_t get_collator_data_ku_alphanumeric_length();
+size_t get_collator_data_ln_charset_length();
+size_t get_collator_data_my_dictionary_length();
+size_t get_collator_data_ne_charset_length();
+size_t get_collator_data_sid_charset_length();
+size_t get_collator_data_zh_TW_charset_length();
+size_t get_collator_data_zh_TW_radical_length();
+size_t get_collator_data_zh_TW_stroke_length();
+size_t get_collator_data_zh_charset_length();
+size_t get_collator_data_zh_pinyin_length();
+size_t get_collator_data_zh_radical_length();
+size_t get_collator_data_zh_stroke_length();
+size_t get_collator_data_zh_zhuyin_length();
+
 }
 
 #endif
@@ -120,6 +141,7 @@ Collator_Unicode::loadCollatorAlgorithm(const OUString& rAlgorithm, const lang::
         }
         if (!collator && OUString::createFromAscii(LOCAL_RULE_LANGS).indexOf(rLocale.Language) >= 0) {
             const sal_uInt8* (*func)() = NULL;
+            size_t (*funclen)() = NULL;
 
 #ifndef DISABLE_DYNLOADING
             OUStringBuffer aBuf;
@@ -132,11 +154,21 @@ Collator_Unicode::loadCollatorAlgorithm(const OUString& rAlgorithm, const lang::
                 aBuf.appendAscii("get_").append(rLocale.Language).appendAscii("_");
                 if ( rLocale.Language == "zh" ) {
                     OUString func_base = aBuf.makeStringAndClear();
+                    OUString funclen_base = func_base + "_length";
                     if (OUString("TW HK MO").indexOf(rLocale.Country) >= 0)
-                        func=(const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule,
+                    {
+                        func = (const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule,
                                     OUString(func_base + "TW_" + rAlgorithm).pData);
+                        funclen = (size_t (*)()) osl_getFunctionSymbol(hModule,
+                                    OUString(funclen_base + "TW_" + rAlgorithm).pData);
+                    }
                     if (!func)
-                        func=(const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, OUString(func_base + rAlgorithm).pData);
+                    {
+                        func = (const sal_uInt8* (*)()) osl_getFunctionSymbol(
+                                hModule, OUString(func_base + rAlgorithm).pData);
+                        funclen = (size_t (*)()) osl_getFunctionSymbol(
+                                hModule, OUString(funclen_base + rAlgorithm).pData);
+                    }
                 } else {
                     if ( rLocale.Language == "ja" ) {
                         // replace algorithm name to implementation name.
@@ -149,72 +181,147 @@ Collator_Unicode::loadCollatorAlgorithm(const OUString& rAlgorithm, const lang::
                     } else {
                         aBuf.append(rAlgorithm);
                     }
-                    func=(const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, aBuf.makeStringAndClear().pData);
+                    OUString func_base = aBuf.makeStringAndClear();
+                    OUString funclen_base = func_base + "_length";
+                    func = (const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, func_base.pData);
+                    funclen = (size_t (*)()) osl_getFunctionSymbol(hModule, funclen_base.pData);
                 }
             }
 #else
             if ( rLocale.Language == "ca" ) {
                 if ( rAlgorithm == "charset" )
+                {
                     func = get_collator_data_ca_charset;
+                    funclen = get_collator_data_ca_charset_length;
+                }
             } else if ( rLocale.Language == "dz" || rLocale.Language == "bo" ) {
                 // 'bo' Tibetan uses the same collation rules as 'dz' Dzongkha
                 if ( rAlgorithm == "charset" )
+                {
                     func = get_collator_data_dz_charset;
+                    funclen = get_collator_data_dz_charset_length;
+                }
             } else if ( rLocale.Language == "hu" ) {
                 if ( rAlgorithm == "charset" )
+                {
                     func = get_collator_data_hu_charset;
+                    funclen = get_collator_data_hu_charset_length;
+                }
             } else if ( rLocale.Language == "ja" ) {
                 if ( rAlgorithm == "charset" )
+                {
                     func = get_collator_data_ja_charset;
+                    funclen = get_collator_data_ja_charset_length;
+                }
                 else if ( rAlgorithm == "phonetic (alphanumeric first)" )
+                {
                     func = get_collator_data_ja_phonetic_alphanumeric_first;
+                    funclen = get_collator_data_ja_phonetic_alphanumeric_first_length;
+                }
                 else if ( rAlgorithm == "phonetic (alphanumeric last)" )
+                {
                     func = get_collator_data_ja_phonetic_alphanumeric_last;
+                    funclen = get_collator_data_ja_phonetic_alphanumeric_last_length;
+                }
 #if (U_ICU_VERSION_MAJOR_NUM < 53)
             } else if ( rLocale.Language == "ko" ) {
                 if ( rAlgorithm == "charset" )
+                {
                     func = get_collator_data_ko_charset;
+                    funclen = get_collator_data_ko_charset_length;
+                }
 #endif
             } else if ( rLocale.Language == "ku" ) {
                 if ( rAlgorithm == "alphanumeric" )
+                {
                     func = get_collator_data_ku_alphanumeric;
+                    funclen = get_collator_data_ku_alphanumeric_length;
+                }
             } else if ( rLocale.Language == "ln" ) {
                 if ( rAlgorithm == "charset" )
+                {
                     func = get_collator_data_ln_charset;
+                    funclen = get_collator_data_ln_charset_length;
+                }
             } else if ( rLocale.Language == "my" ) {
                 if ( rAlgorithm == "dictionary" )
+                {
                     func = get_collator_data_my_dictionary;
+                    funclen = get_collator_data_my_dictionary_length;
+                }
             } else if ( rLocale.Language == "ne" ) {
                 if ( rAlgorithm == "charset" )
+                {
                     func = get_collator_data_ne_charset;
+                    funclen = get_collator_data_ne_charset_length;
+                }
             } else if ( rLocale.Language == "sid" ) {
                 if ( rAlgorithm == "charset" )
+                {
                     func = get_collator_data_sid_charset;
+                    funclen = get_collator_data_sid_charset_length;
+                }
             } else if ( rLocale.Language == "zh" && (rLocale.Country == "TW" || rLocale.Country == "HK" || rLocale.Country == "MO") ) {
                 if ( rAlgorithm == "charset" )
+                {
                     func = get_collator_data_zh_TW_charset;
+                    funclen = get_collator_data_zh_TW_charset_length;
+                }
                 else if ( rAlgorithm == "radical" )
+                {
                     func = get_collator_data_zh_TW_radical;
+                    funclen = get_collator_data_zh_TW_radical_length;
+                }
                 else if ( rAlgorithm == "stroke" )
+                {
                     func = get_collator_data_zh_TW_stroke;
+                    funclen = get_collator_data_zh_TW_stroke_length;
+                }
             } else if ( rLocale.Language == "zh" ) {
                 if ( rAlgorithm == "charset" )
+                {
                     func = get_collator_data_zh_charset;
+                    funclen = get_collator_data_zh_charset_length;
+                }
                 else if ( rAlgorithm == "pinyin" )
+                {
                     func = get_collator_data_zh_pinyin;
+                    funclen = get_collator_data_zh_pinyin_length;
+                }
                 else if ( rAlgorithm == "radical" )
+                {
                     func = get_collator_data_zh_radical;
+                    funclen = get_collator_data_zh_radical_length;
+                }
                 else if ( rAlgorithm == "stroke" )
+                {
                     func = get_collator_data_zh_stroke;
+                    funclen = get_collator_data_zh_stroke_length;
+                }
                 else if ( rAlgorithm == "zhuyin" )
+                {
                     func = get_collator_data_zh_zhuyin;
+                    funclen = get_collator_data_zh_zhuyin_length;
+                }
             }
 #endif
-            if (func) {
+            if (func && funclen) {
                 const sal_uInt8* ruleImage=func();
-                uca_base = new RuleBasedCollator(static_cast<UChar*>(NULL), status);
+                size_t ruleImageSize = funclen();
+                // Not only changed ICU 53.1 the API behavior that a negative
+                // length (ruleImageSize) now leads to failure, but also that
+                // the base RuleBasedCollator passed as uca_base here needs to
+                // have a base->tailoring == CollationRoot::getRoot() otherwise
+                // the init bails out as well, as it does for the previously
+                // used "empty" RuleBasedCollator.
+                // The default collator of the en-US locale would also fulfill
+                // the requirement. The collator of the actual locale or the
+                // NULL (default) locale does not.
+                uca_base = static_cast<RuleBasedCollator*>(icu::Collator::createInstance(
+                            icu::Locale::getRoot(), status));
                 if (! U_SUCCESS(status)) throw RuntimeException();
-                collator = new RuleBasedCollator(reinterpret_cast<const uint8_t*>(ruleImage), -1, uca_base, status);
+                collator = new RuleBasedCollator(
+                        reinterpret_cast<const uint8_t*>(ruleImage), ruleImageSize, uca_base, status);
                 if (! U_SUCCESS(status)) throw RuntimeException();
             }
         }
diff --git a/i18npool/source/collator/gencoll_rule.cxx b/i18npool/source/collator/gencoll_rule.cxx
index a801545..5ba9f5d 100644
--- a/i18npool/source/collator/gencoll_rule.cxx
+++ b/i18npool/source/collator/gencoll_rule.cxx
@@ -65,8 +65,10 @@ void data_write(char* file, char* name, sal_uInt8 *data, sal_Int32 len)
 
     fprintf(fp, "#ifndef DISABLE_DYNLOADING\n");
     fprintf(fp, "SAL_DLLPUBLIC_EXPORT const sal_uInt8* get_%s() { return %s; }\n", name, name);
+    fprintf(fp, "SAL_DLLPUBLIC_EXPORT size_t get_%s_length() { return sizeof(%s); }\n", name, name);
     fprintf(fp, "#else\n");
     fprintf(fp, "SAL_DLLPUBLIC_EXPORT const sal_uInt8* get_collator_data_%s() { return %s; }\n", name, name);
+    fprintf(fp, "SAL_DLLPUBLIC_EXPORT size_t get_collator_data_%s_length() { return sizeof(%s); }\n", name, name);
     fprintf(fp, "#endif\n");
     fprintf(fp, "\n");
     fprintf (fp, "}\n");
--
cgit v0.9.0.2-2-gbebe