diff options
Diffstat (limited to 'languages/classes/LanguageYue.php')
-rw-r--r-- | languages/classes/LanguageYue.php | 28 |
1 files changed, 13 insertions, 15 deletions
diff --git a/languages/classes/LanguageYue.php b/languages/classes/LanguageYue.php index fdc227b3..fc7f233c 100644 --- a/languages/classes/LanguageYue.php +++ b/languages/classes/LanguageYue.php @@ -4,20 +4,18 @@ */ class LanguageYue extends Language { function stripForSearch( $string ) { - # MySQL fulltext index doesn't grok utf-8, so we - # need to fold cases and convert to hex - # we also separate characters as "words" - if( function_exists( 'mb_strtolower' ) ) { - return preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' U8' . bin2hex( \"$1\" )", - mb_strtolower( $string ) ); - } else { - list( , $wikiLowerChars ) = Language::getCaseMaps(); - return preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )", - $string ); - } + wfProfileIn( __METHOD__ ); + + // eventually this should be a word segmentation + // for now just treat each character as a word + // @fixme only do this for Han characters... + $t = preg_replace( + "/([\\xc0-\\xff][\\x80-\\xbf]*)/", + " $1", $string); + + // Do general case folding and UTF-8 armoring + $t = parent::stripForSearch( $t ); + wfProfileOut( __METHOD__ ); + return $t; } } |