From 396b28f3d881f5debd888ba9bb9b47c2d478a76f Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Mon, 15 Dec 2008 18:02:47 +0100 Subject: update to Mediawiki 1.13.3; some cleanups --- languages/LanguageZh.php | 103 ----------------------------------------------- 1 file changed, 103 deletions(-) delete mode 100644 languages/LanguageZh.php (limited to 'languages/LanguageZh.php') diff --git a/languages/LanguageZh.php b/languages/LanguageZh.php deleted file mode 100644 index b24c08e6..00000000 --- a/languages/LanguageZh.php +++ /dev/null @@ -1,103 +0,0 @@ -mTables = array(); - $this->mTables['zh-cn'] = $zh2CN; - $this->mTables['zh-tw'] = $zh2TW; - $this->mTables['zh-sg'] = array_merge($zh2CN, $zh2SG); - $this->mTables['zh-hk'] = array_merge($zh2TW, $zh2HK); - $this->mTables['zh'] = array(); - } - - function postLoadTables() { - $this->mTables['zh-sg'] = array_merge($this->mTables['zh-cn'], $this->mTables['zh-sg']); - $this->mTables['zh-hk'] = array_merge($this->mTables['zh-tw'], $this->mTables['zh-hk']); - } - - /* there shouldn't be any latin text in Chinese conversion, so no need - to mark anything - */ - function markNoConversion($text) { - return $text; - } - - function convertCategoryKey( $key ) { - return $this->autoConvert( $key, 'zh-cn' ); - } -} - - -/* class that handles both Traditional and Simplified Chinese - right now it only distinguish zh_cn, zh_tw, zh_sg and zh_hk. -*/ -class LanguageZh extends LanguageZh_cn { - - function __construct() { - global $wgHooks; - $this->mConverter = new ZhConverter($this, 'zh', - array('zh', 'zh-cn', 'zh-tw', 'zh-sg', 'zh-hk'), - array('zh'=>'zh-cn', - 'zh-cn'=>'zh-sg', - 'zh-sg'=>'zh-cn', - 'zh-tw'=>'zh-hk', - 'zh-hk'=>'zh-tw')); - $wgHooks['ArticleSaveComplete'][] = $this->mConverter; - } - - - # this should give much better diff info - function segmentForDiff( $text ) { - return preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' ' .\"$1\"", $text); - } - - function unsegmentForDiff( $text ) { - return preg_replace( - "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "\"$1\"", $text); - } - - // word segmentation - function stripForSearch( $string ) { - $fname="LanguageZh::stripForSearch"; - wfProfileIn( $fname ); - - // eventually this should be a word segmentation - // for now just treat each character as a word - $t = preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' ' .\"$1\"", $string); - - //always convert to zh-cn before indexing. it should be - //better to use zh-cn for search, since conversion from - //Traditional to Simplified is less ambiguous than the - //other way around - - $t = $this->mConverter->autoConvert($t, 'zh-cn'); - $t = LanguageUtf8::stripForSearch( $t ); - wfProfileOut( $fname ); - return $t; - - } - - function convertForSearchResult( $termsArray ) { - $terms = implode( '|', $termsArray ); - $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) ); - $ret = array_unique( explode('|', $terms) ); - return $ret; - } - -} -?> -- cgit v1.2.3-54-g00ecf