diff options
Diffstat (limited to 'languages/classes/LanguageZh_hans.php')
-rw-r--r-- | languages/classes/LanguageZh_hans.php | 43 |
1 files changed, 26 insertions, 17 deletions
diff --git a/languages/classes/LanguageZh_hans.php b/languages/classes/LanguageZh_hans.php index 983dd485..5b03d731 100644 --- a/languages/classes/LanguageZh_hans.php +++ b/languages/classes/LanguageZh_hans.php @@ -4,21 +4,30 @@ * @ingroup Language */ class LanguageZh_hans extends Language { - function stripForSearch( $string ) { - # MySQL fulltext index doesn't grok utf-8, so we - # need to fold cases and convert to hex - # we also separate characters as "words" - if( function_exists( 'mb_strtolower' ) ) { - return preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' U8' . bin2hex( \"$1\" )", - mb_strtolower( $string ) ); - } else { - list( , $wikiLowerChars ) = Language::getCaseMaps(); - return preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )", - $string ); - } + function hasWordBreaks() { + return false; } -} + + /** + * Eventually this should be a word segmentation; + * for now just treat each character as a word. + * @todo Fixme: only do this for Han characters... + */ + function wordSegmentation( $string ) { + $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/"; + $s = self::insertSpace( $string, $reg ); + return $s; + } + + function normalizeForSearch( $string ) { + wfProfileIn( __METHOD__ ); + + // Double-width roman characters + $s = self::convertDoubleWidth( $string ); + $s = trim( $s ); + $s = parent::normalizeForSearch( $s ); + + wfProfileOut( __METHOD__ ); + return $s; + } +}
\ No newline at end of file |