1 files changed, 26 insertions, 17 deletions
diff --git a/languages/classes/LanguageZh_hans.php b/languages/classes/LanguageZh_hans.php
index 983dd485..5b03d731 100644
--- a/languages/classes/LanguageZh_hans.php
+++ b/languages/classes/LanguageZh_hans.php
@@ -4,21 +4,30 @@
  * @ingroup Language
  */
 class LanguageZh_hans extends Language {
-	function stripForSearch( $string ) {
-		# MySQL fulltext index doesn't grok utf-8, so we
-		# need to fold cases and convert to hex
-		# we also separate characters as "words"
-		if( function_exists( 'mb_strtolower' ) ) {
-			return preg_replace(
-				"/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-				"' U8' . bin2hex( \"$1\" )",
-				mb_strtolower( $string ) );
-		} else {
-			list( , $wikiLowerChars ) = Language::getCaseMaps();
-			return preg_replace(
-				"/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-				"' U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
-				$string );
-		}
+	function hasWordBreaks() {
+		return false;
 	}
-}
+
+	/**
+	 * Eventually this should be a word segmentation;
+	 * for now just treat each character as a word.
+	 * @todo Fixme: only do this for Han characters...
+	 */
+	function wordSegmentation( $string ) {
+		$reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/";
+		$s = self::insertSpace( $string, $reg );
+		return $s;
+	}
+
+	function normalizeForSearch( $string ) {
+		wfProfileIn( __METHOD__ );
+
+		// Double-width roman characters
+		$s = self::convertDoubleWidth( $string );
+		$s = trim( $s );
+		$s = parent::normalizeForSearch( $s );
+
+		wfProfileOut( __METHOD__ );
+		return $s;
+	}
+}
+\ No newline at end of file