diff options
Diffstat (limited to 'languages/LanguageConverter.php')
-rw-r--r-- | languages/LanguageConverter.php | 896 |
1 files changed, 612 insertions, 284 deletions
diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 08171c0a..bbf15971 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -1,58 +1,95 @@ <?php + /** - * @addtogroup Language - * - * @author Zhengzhu Feng <zhengzhu@gmail.com> - * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License - */ + * Contains the LanguageConverter class and ConverterRule class + * @ingroup Language + * + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License + * @file + */ +/** + * base class for language convert + * @ingroup Language + * + * @author Zhengzhu Feng <zhengzhu@gmail.com> + * @maintainers fdcn <fdcn64@gmail.com>, shinjiman <shinjiman@gmail.com> + */ class LanguageConverter { var $mPreferredVariant=''; var $mMainLanguageCode; - var $mVariants, $mVariantFallbacks; + var $mVariants, $mVariantFallbacks, $mVariantNames; var $mTablesLoaded = false; var $mTables; var $mTitleDisplay=''; var $mDoTitleConvert=true, $mDoContentConvert=true; + var $mManualLevel; // 'bidirectional' 'unidirectional' 'disable' for each variants var $mTitleFromFlag = false; var $mCacheKey; var $mLangObj; var $mMarkup; var $mFlags; + var $mDescCodeSep = ':',$mDescVarSep = ';'; var $mUcfirst = false; - const CACHE_VERSION_KEY = 'VERSION 5'; + const CACHE_VERSION_KEY = 'VERSION 6'; /** - * Constructor + * Constructor * - * @param string $maincode the main language code of this language - * @param array $variants the supported variants of this language - * @param array $variantfallback the fallback language of each variant - * @param array $markup array defining the markup used for manual conversion + * @param string $maincode the main language code of this language + * @param array $variants the supported variants of this language + * @param array $variantfallback the fallback language of each variant + * @param array $markup array defining the markup used for manual conversion * @param array $flags array defining the custom strings that maps to the flags - * @access public - */ + * @param array $manualLevel limit for supported variants + * @public + */ function __construct($langobj, $maincode, $variants=array(), $variantfallbacks=array(), $markup=array(), - $flags = array()) { + $flags = array(), + $manualLevel = array() ) { $this->mLangObj = $langobj; $this->mMainLanguageCode = $maincode; $this->mVariants = $variants; $this->mVariantFallbacks = $variantfallbacks; + global $wgLanguageNames; + $this->mVariantNames = $wgLanguageNames; $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); - $m = array('begin'=>'-{', 'flagsep'=>'|', 'codesep'=>':', - 'varsep'=>';', 'end'=>'}-'); + $m = array( + 'begin'=>'-{', + 'flagsep'=>'|', + 'unidsep'=>'=>', //for unidirectional conversion + 'codesep'=>':', + 'varsep'=>';', + 'end'=>'}-' + ); $this->mMarkup = array_merge($m, $markup); - $f = array('A'=>'A', 'T'=>'T', 'R' => 'R'); + $f = array( + // 'S' show converted text + // '+' add rules for alltext + // 'E' the gave flags is error + // these flags above are reserved for program + 'A'=>'A', // add rule for convert code (all text convert) + 'T'=>'T', // title convert + 'R'=>'R', // raw content + 'D'=>'D', // convert description (subclass implement) + '-'=>'-', // remove convert (not implement) + 'H'=>'H', // add rule for convert code (but no display in placed code ) + 'N'=>'N' // current variant name + ); $this->mFlags = array_merge($f, $flags); + foreach( $this->mVariants as $v) + $this->mManualLevel[$v]=array_key_exists($v,$manualLevel) + ?$manualLevel[$v] + :'bidirectional'; } /** - * @access public - */ + * @public + */ function getVariants() { return $this->mVariants; } @@ -60,31 +97,45 @@ class LanguageConverter { /** * in case some variant is not defined in the markup, we need * to have some fallback. for example, in zh, normally people - * will define zh-cn and zh-tw, but less so for zh-sg or zh-hk. - * when zh-sg is preferred but not defined, we will pick zh-cn + * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk. + * when zh-sg is preferred but not defined, we will pick zh-hans * in this case. right now this is only used by zh. * * @param string $v the language code of the variant - * @return string the code of the fallback language or false if there is no fallback - * @private - */ - function getVariantFallback($v) { - return $this->mVariantFallbacks[$v]; + * @return string array the code of the fallback language or false if there is no fallback + * @public + */ + function getVariantFallbacks($v) { + if( isset( $this->mVariantFallbacks[$v] ) ) { + return $this->mVariantFallbacks[$v]; + } + return $this->mMainLanguageCode; } - /** * get preferred language variants. * @param boolean $fromUser Get it from $wgUser's preferences - * @return string the preferred language code - * @access public - */ + * @return string the preferred language code + * @public + */ function getPreferredVariant( $fromUser = true ) { global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant; if($this->mPreferredVariant) return $this->mPreferredVariant; + // figure out user lang without constructing wgLang to avoid infinite recursion + if( $fromUser ) + $defaultUserLang = $wgUser->getOption( 'language' ); + else + $defaultUserLang = $this->mMainLanguageCode; + $userLang = $wgRequest->getVal( 'uselang', $defaultUserLang ); + // see if interface language is same as content, if not, prevent conversion + if( ! in_array( $userLang, $this->mVariants ) ){ + $this->mPreferredVariant = $this->mMainLanguageCode; // no conversion + return $this->mPreferredVariant; + } + // see if the preference is set in the request $req = $wgRequest->getText( 'variant' ); if( in_array( $req, $this->mVariants ) ) { @@ -140,15 +191,15 @@ class LanguageConverter { return $this->mMainLanguageCode; } - + /** - * dictionary-based conversion - * - * @param string $text the text to be converted - * @param string $toVariant the target language code - * @return string the converted text - * @private - */ + * dictionary-based conversion + * + * @param string $text the text to be converted + * @param string $toVariant the target language code + * @return string the converted text + * @private + */ function autoConvert($text, $toVariant=false) { $fname="LanguageConverter::autoConvert"; @@ -168,9 +219,9 @@ class LanguageConverter { 3. place holders created by the parser */ global $wgParser; - if (isset($wgParser)) + if (isset($wgParser) && $wgParser->UniqPrefix()!=''){ $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+'; - else + } else $marker = ""; // this one is needed when the text is inside an html markup @@ -180,9 +231,11 @@ class LanguageConverter { $codefix = '<code>.+?<\/code>|'; // disable convertsion of <script type="text/javascript"> ... </script> $scriptfix = '<script.*?>.*?<\/script>|'; + // disable conversion of <pre xxxx> ... </pre> + $prefix = '<pre.*?>.*?<\/pre>|'; + + $reg = '/'.$codefix . $scriptfix . $prefix . '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; - $reg = '/'.$codefix . $scriptfix . '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; - $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE); $m = array_shift($matches); @@ -205,6 +258,7 @@ class LanguageConverter { * @param string $text Text to convert * @param string $variant Variant language code * @return string Translated text + * @private */ function translate( $text, $variant ) { wfProfileIn( __METHOD__ ); @@ -216,12 +270,12 @@ class LanguageConverter { } /** - * convert text to all supported variants - * - * @param string $text the text to be converted - * @return array of string - * @public - */ + * convert text to all supported variants + * + * @param string $text the text to be converted + * @return array of string + * @public + */ function autoConvertToAllVariants($text) { $fname="LanguageConverter::autoConvertToAllVariants"; wfProfileIn( $fname ); @@ -238,12 +292,12 @@ class LanguageConverter { } /** - * convert link text to all supported variants - * - * @param string $text the text to be converted - * @return array of string - * @public - */ + * convert link text to all supported variants + * + * @param string $text the text to be converted + * @return array of string + * @public + */ function convertLinkToAllVariants($text) { if( !$this->mTablesLoaded ) $this->loadTables(); @@ -271,7 +325,32 @@ class LanguageConverter { /** + * apply manual conversion + * @private + */ + function applyManualConv($convRule){ + // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title + $title = $convRule->getTitle(); + if($title){ + $this->mTitleFromFlag = true; + $this->mTitleDisplay = $title; + } + + //apply manual conversion table to global table + $convTable = $convRule->getConvTable(); + $action = $convRule->getRulesAction(); + foreach($convTable as $v=>$t) { + if( !in_array($v,$this->mVariants) )continue; + if( $action=="add" ) + $this->mTables[$v]->mergeArray($t); + elseif ( $action=="remove" ) + $this->mTables[$v]->removeArray($t); + } + } + + /** * Convert text using a parser object for context + * @public */ function parserConvert( $text, &$parser ) { global $wgDisableLangConversion; @@ -291,39 +370,31 @@ class LanguageConverter { } /** - * Parse flags with syntax -{FLAG| ... }- - * + * convert title + * @private */ - function parseFlags($marked){ - $flags = array(); - - // process flag only if the flag is valid - if(strlen($marked) < 2 || !(in_array($marked[0],$this->mFlags) && $marked[1]=='|' ) ) - return array($marked,array()); - - $tt = explode($this->mMarkup['flagsep'], $marked, 2); - - if(sizeof($tt) == 2) { - $f = explode($this->mMarkup['varsep'], $tt[0]); - foreach($f as $ff) { - $ff = trim($ff); - if(array_key_exists($ff, $this->mFlags) && - !array_key_exists($this->mFlags[$ff], $flags)) - $flags[] = $this->mFlags[$ff]; - } - $rules = $tt[1]; - } - else - $rules = $marked; + function convertTitle($text){ + // check for __NOTC__ tag + if( !$this->mDoTitleConvert ) { + $this->mTitleDisplay = $text; + return $text; + } - if( !in_array('R',$flags) ){ - //FIXME: may cause trouble here... - //strip since it interferes with the parsing, plus, - //all spaces should be stripped in this tag anyway. - $rules = str_replace(' ', '', $rules); - } + // use the title from the T flag if any + if($this->mTitleFromFlag){ + $this->mTitleFromFlag = false; + return $this->mTitleDisplay; + } - return array($rules,$flags); + global $wgRequest; + $isredir = $wgRequest->getText( 'redirect', 'yes' ); + $action = $wgRequest->getText( 'action' ); + if ( $isredir == 'no' || $action == 'edit' ) { + return $text; + } else { + $this->mTitleDisplay = $this->convert($text); + return $this->mTitleDisplay; + } } /** @@ -334,136 +405,50 @@ class LanguageConverter { * * syntax of the markup: * -{code1:text1;code2:text2;...}- or + * -{flags|code1:text1;code2:text2;...}- or * -{text}- in which case no conversion should take place for text - * - * @param string $text text to be converted - * @param bool $isTitle whether this conversion is for the article title - * @return string converted text - * @access public - */ + * + * @param string $text text to be converted + * @param bool $isTitle whether this conversion is for the article title + * @return string converted text + * @public + */ function convert( $text , $isTitle=false) { - $mw =& MagicWord::get( 'notitleconvert' ); + + $mw =& MagicWord::get( 'notitleconvert' ); if( $mw->matchAndRemove( $text ) ) $this->mDoTitleConvert = false; - - $mw =& MagicWord::get( 'nocontentconvert' ); + $mw =& MagicWord::get( 'nocontentconvert' ); if( $mw->matchAndRemove( $text ) ) { $this->mDoContentConvert = false; } // no conversion if redirecting - $mw =& MagicWord::get( 'redirect' ); + $mw =& MagicWord::get( 'redirect' ); if( $mw->matchStart( $text )) return $text; - if( $isTitle ) { - - // use the title from the T flag if any - if($this->mTitleFromFlag){ - $this->mTitleFromFlag = false; - return $this->mTitleDisplay; - } - - // check for __NOTC__ tag - if( !$this->mDoTitleConvert ) { - $this->mTitleDisplay = $text; - return $text; - } - - global $wgRequest; - $isredir = $wgRequest->getText( 'redirect', 'yes' ); - $action = $wgRequest->getText( 'action' ); - if ( $isredir == 'no' || $action == 'edit' ) { - return $text; - } - else { - $this->mTitleDisplay = $this->convert($text); - return $this->mTitleDisplay; - } - } + // for title convertion + if ($isTitle) return $this->convertTitle($text); $plang = $this->getPreferredVariant(); - if( isset( $this->mVariantFallbacks[$plang] ) ) { - $fallback = $this->mVariantFallbacks[$plang]; - } else { - $fallback = $this->mMainLanguageCode; - } - - $tarray = explode($this->mMarkup['begin'], $text); - $tfirst = array_shift($tarray); - if($this->mDoContentConvert) - $text = $this->autoConvert($tfirst); - else - $text = $tfirst; - foreach($tarray as $txt) { - $marked = explode($this->mMarkup['end'], $txt, 2); - - // strip the flags from syntax like -{T| ... }- - list($rules,$flags) = $this->parseFlags($marked[0]); - - // proces R flag: output raw content of -{ ... }- - if( in_array('R',$flags) ){ - $disp = $rules; - } else if( $this->mDoContentConvert){ - // parse the contents -{ ... }- - $carray = $this->parseManualRule($rules, $flags); - - $disp = ''; - if(array_key_exists($plang, $carray)) { - $disp = $carray[$plang]; - } else if(array_key_exists($fallback, $carray)) { - $disp = $carray[$fallback]; - } - } else{ - // if we don't do content convert, still strip the -{}- tags - $disp = $rules; - $flags = array(); - } - - if($disp) { - // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title - if(in_array('T', $flags)){ - $this->mTitleFromFlag = true; - $this->mTitleDisplay = $disp; - } - else - $text .= $disp; - - // use syntax -{A|zh:WordZh;zh-tw:WordTw}- to introduce a custom mapping between - // words WordZh and WordTw in the whole text - if(in_array('A', $flags)) { - - /* fill in the missing variants, if any, - with fallbacks */ - foreach($this->mVariants as $v) { - if(!array_key_exists($v, $carray)) { - $vf = $this->getVariantFallback($v); - if(array_key_exists($vf, $carray)) - $carray[$v] = $carray[$vf]; - } - } + $tarray = explode($this->mMarkup['end'], $text); + $text = ''; + foreach($tarray as $txt) { + $marked = explode($this->mMarkup['begin'], $txt, 2); - foreach($this->mVariants as $vfrom) { - if(!array_key_exists($vfrom, $carray)) - continue; - foreach($this->mVariants as $vto) { - if($vfrom == $vto) - continue; - if(!array_key_exists($vto, $carray)) - continue; - $this->mTables[$vto]->setPair($carray[$vfrom], $carray[$vto]); - } - } - } - } - else { + if( $this->mDoContentConvert ) + $text .= $this->autoConvert($marked[0],$plang); + else $text .= $marked[0]; - } + if(array_key_exists(1, $marked)){ - if( $this->mDoContentConvert ) - $text .= $this->autoConvert($marked[1]); - else - $text .= $marked[1]; + // strip the flags from syntax like -{T| ... }- + $crule = new ConverterRule($marked[1], $this); + $crule->parse($plang); + + $text .= $crule->getDisplay(); + $this->applyManualConv($crule); } } @@ -471,32 +456,6 @@ class LanguageConverter { } /** - * parse the manually marked conversion rule - * @param string $rule the text of the rule - * @return array of the translation in each variant - * @private - */ - function parseManualRule($rules, $flags=array()) { - - $choice = explode($this->mMarkup['varsep'], $rules); - $carray = array(); - if(sizeof($choice) == 1) { - /* a single choice */ - foreach($this->mVariants as $v) - $carray[$v] = $choice[0]; - } - else { - foreach($choice as $c) { - $v = explode($this->mMarkup['codesep'], $c); - if(sizeof($v) != 2) // syntax error, skip - continue; - $carray[trim($v[0])] = trim($v[1]); - } - } - return $carray; - } - - /** * if a language supports multiple variants, it is * possible that non-existing link in one variant * actually exists in another variant. this function @@ -505,7 +464,7 @@ class LanguageConverter { * @param string $link the name of the link * @param mixed $nt the title object of the link * @return null the input parameters may be modified upon return - * @access public + * @public */ function findVariantLink( &$link, &$nt ) { global $wgDisableLangConversion; @@ -546,29 +505,29 @@ class LanguageConverter { } /** - * returns language specific hash options - * - * @access public - */ + * returns language specific hash options + * + * @public + */ function getExtraHashOptions() { $variant = $this->getPreferredVariant(); return '!' . $variant ; } /** - * get title text as defined in the body of the article text - * - * @access public - */ + * get title text as defined in the body of the article text + * + * @public + */ function getParsedTitle() { return $this->mTitleDisplay; } /** - * a write lock to the cache - * - * @private - */ + * a write lock to the cache + * + * @private + */ function lockCache() { global $wgMemc; $success = false; @@ -581,10 +540,10 @@ class LanguageConverter { } /** - * unlock cache - * - * @private - */ + * unlock cache + * + * @private + */ function unlockCache() { global $wgMemc; $wgMemc->delete($this->mCacheKey . "lock"); @@ -592,20 +551,20 @@ class LanguageConverter { /** - * Load default conversion tables - * This method must be implemented in derived class - * - * @private - */ + * Load default conversion tables + * This method must be implemented in derived class + * + * @private + */ function loadDefaultTables() { $name = get_class($this); wfDie("Must implement loadDefaultTables() method in class $name"); } /** - * load conversion tables either from the cache or the disk - * @private - */ + * load conversion tables either from the cache or the disk + * @private + */ function loadTables($fromcache=true) { global $wgMemc; if( $this->mTablesLoaded ) @@ -642,16 +601,16 @@ class LanguageConverter { } /** - * Hook for post processig after conversion tables are loaded - * - */ + * Hook for post processig after conversion tables are loaded + * + */ function postLoadTables() {} /** - * Reload the conversion tables - * - * @private - */ + * Reload the conversion tables + * + * @private + */ function reloadTables() { if($this->mTables) unset($this->mTables); @@ -661,20 +620,18 @@ class LanguageConverter { /** - * parse the conversion table stored in the cache - * - * the tables should be in blocks of the following form: - - * -{ - * word => word ; - * word => word ; - * ... - * }- - * - * to make the tables more manageable, subpages are allowed - * and will be parsed recursively if $recursive=true - * - * @private + * parse the conversion table stored in the cache + * + * the tables should be in blocks of the following form: + * -{ + * word => word ; + * word => word ; + * ... + * }- + * + * to make the tables more manageable, subpages are allowed + * and will be parsed recursively if $recursive=true + * */ function parseCachedTable($code, $subpage='', $recursive=true) { global $wgMessageCache; @@ -690,8 +647,17 @@ class LanguageConverter { if(array_key_exists($key, $parsed)) return array(); - - $txt = $wgMessageCache->get( $key, true, true, true ); + if ( strpos( $code, '/' ) === false ) { + $txt = $wgMessageCache->get( 'Conversiontable', true, $code ); + } else { + $title = Title::makeTitleSafe( NS_MEDIAWIKI, "Conversiontable/$code" ); + if ( $title && $title->exists() ) { + $article = new Article( $title ); + $txt = $article->getContents(); + } else { + $txt = ''; + } + } // get all subpage links of the form // [[MediaWiki:conversiontable/zh-xx/...|...]] @@ -757,37 +723,33 @@ class LanguageConverter { * * @param string $text text to be tagged for no conversion * @return string the tagged text - */ + * @public + */ function markNoConversion($text, $noParse=false) { # don't mark if already marked if(strpos($text, $this->mMarkup['begin']) || strpos($text, $this->mMarkup['end'])) return $text; - $ret = $this->mMarkup['begin'] . $text . $this->mMarkup['end']; + $ret = $this->mMarkup['begin'] .'R|'. $text . $this->mMarkup['end']; return $ret; } /** * convert the sorting key for category links. this should make different * keys that are variants of each other map to the same key - */ + */ function convertCategoryKey( $key ) { return $key; } /** - * hook to refresh the cache of conversion tables when - * MediaWiki:conversiontable* is updated - * @private - */ + * hook to refresh the cache of conversion tables when + * MediaWiki:conversiontable* is updated + * @private + */ function OnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section, $flags, $revision) { $titleobj = $article->getTitle(); if($titleobj->getNamespace() == NS_MEDIAWIKI) { - /* - global $wgContLang; // should be an LanguageZh. - if(get_class($wgContLang) != 'languagezh') - return true; - */ $title = $titleobj->getDBkey(); $t = explode('/', $title, 3); $c = count($t); @@ -803,11 +765,377 @@ class LanguageConverter { /** * Armour rendered math against conversion * Wrap math into rawoutput -{R| math }- syntax + * @public */ function armourMath($text){ $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end']; return $ret; } +} + +/** + * parser for rules of language conversion , parse rules in -{ }- tag + * @ingroup Language + * @author fdcn <fdcn64@gmail.com> + */ +class ConverterRule { + var $mText; // original text in -{text}- + var $mConverter; // LanguageConverter object + var $mManualCodeError='<strong class="error">code error!</strong>'; + var $mRuleDisplay = '',$mRuleTitle=false; + var $mRules = '';// string : the text of the rules + var $mRulesAction = 'none'; + var $mFlags = array(); + var $mConvTable = array(); + var $mBidtable = array();// array of the translation in each variant + var $mUnidtable = array();// array of the translation in each variant + + /** + * Constructor + * + * @param string $text the text between -{ and }- + * @param object $converter a LanguageConverter object + * @access public + */ + function __construct($text,$converter){ + $this->mText = $text; + $this->mConverter=$converter; + foreach($converter->mVariants as $v){ + $this->mConvTable[$v]=array(); + } + } + + /** + * check if variants array in convert array + * + * @param string $variant Variant language code + * @return string Translated text + * @public + */ + function getTextInBidtable($variants){ + if(is_string($variants)){ $variants=array($variants); } + if(!is_array($variants)) return false; + foreach ($variants as $variant){ + if(array_key_exists($variant, $this->mBidtable)){ + return $this->mBidtable[$variant]; + } + } + return false; + } + + /** + * Parse flags with syntax -{FLAG| ... }- + * @private + */ + function parseFlags(){ + $text = $this->mText; + if(strlen($text) < 2 ) { + $this->mFlags = array( 'R' ); + $this->mRules = $text; + return; + } + + $flags = array(); + $markup = $this->mConverter->mMarkup; + $validFlags = $this->mConverter->mFlags; + + $tt = explode($markup['flagsep'], $text, 2); + if(count($tt) == 2) { + $f = explode($markup['varsep'], $tt[0]); + foreach($f as $ff) { + $ff = trim($ff); + if(array_key_exists($ff, $validFlags) && + !in_array($validFlags[$ff], $flags)) + $flags[] = $validFlags[$ff]; + } + $rules = $tt[1]; + } else { + $rules = $text; + } + + //check flags + if( in_array('R',$flags) ){ + $flags = array('R');// remove other flags + } elseif ( in_array('N',$flags) ){ + $flags = array('N');// remove other flags + } elseif ( in_array('-',$flags) ){ + $flags = array('-');// remove other flags + } elseif (count($flags)==1 && $flags[0]=='T'){ + $flags[]='H'; + } elseif ( in_array('H',$flags) ){ + // replace A flag, and remove other flags except T + $temp=array('+','H'); + if(in_array('T',$flags)) $temp[] = 'T'; + if(in_array('D',$flags)) $temp[] = 'D'; + $flags = $temp; + } else { + if ( in_array('A',$flags)) { + $flags[]='+'; + $flags[]='S'; + } + if ( in_array('D',$flags) ) + $flags=array_diff($flags,array('S')); + } + if ( count($flags)==0 ) + $flags = array('S'); + $this->mRules=$rules; + $this->mFlags=$flags; + } + + /** + * generate conversion table + * @private + */ + function parseRules() { + $rules = $this->mRules; + $flags = $this->mFlags; + $bidtable = array(); + $unidtable = array(); + $markup = $this->mConverter->mMarkup; + + $choice = explode($markup['varsep'], $rules ); + foreach($choice as $c) { + $v = explode($markup['codesep'], $c); + if(count($v) != 2) + continue;// syntax error, skip + $to=trim($v[1]); + $v=trim($v[0]); + $u = explode($markup['unidsep'], $v); + if(count($u) == 1) { + $bidtable[$v] = $to; + } else if(count($u) == 2){ + $from=trim($u[0]);$v=trim($u[1]); + if( array_key_exists($v,$unidtable) && !is_array($unidtable[$v]) ) + $unidtable[$v]=array($from=>$to); + else + $unidtable[$v][$from]=$to; + } + // syntax error, pass + if (!array_key_exists($v,$this->mConverter->mVariantNames)){ + $bidtable = array(); + $unidtable = array(); + break; + } + } + $this->mBidtable = $bidtable; + $this->mUnidtable = $unidtable; + } + + /** + * @private + */ + function getRulesDesc(){ + $codesep = $this->mConverter->mDescCodeSep; + $varsep = $this->mConverter->mDescVarSep; + $text=''; + foreach($this->mBidtable as $k => $v) + $text .= $this->mConverter->mVariantNames[$k]."$codesep$v$varsep"; + foreach($this->mUnidtable as $k => $a) + foreach($a as $from=>$to) + $text.=$from.'⇒'.$this->mConverter->mVariantNames[$k]."$codesep$to$varsep"; + return $text; + } + + /** + * Parse rules conversion + * @private + */ + function getRuleConvertedStr($variant,$doConvert){ + $bidtable = $this->mBidtable; + $unidtable = $this->mUnidtable; + + if( count($bidtable) + count($unidtable) == 0 ){ + return $this->mRules; + } elseif ($doConvert){// the text converted + // display current variant in bidirectional array + $disp = $this->getTextInBidtable($variant); + // or display current variant in fallbacks + if(!$disp) + $disp = $this->getTextInBidtable( + $this->mConverter->getVariantFallbacks($variant)); + // or display current variant in unidirectional array + if(!$disp && array_key_exists($variant,$unidtable)){ + $disp = array_values($unidtable[$variant]); + $disp = $disp[0]; + } + // or display frist text under disable manual convert + if(!$disp && $this->mConverter->mManualLevel[$variant]=='disable') { + if(count($bidtable)>0){ + $disp = array_values($bidtable); + $disp = $disp[0]; + } else { + $disp = array_values($unidtable); + $disp = array_values($disp[0]); + $disp = $disp[0]; + } + } + return $disp; + } else {// no convert + return $this->mRules; + } + } + + /** + * generate conversion table for all text + * @private + */ + function generateConvTable(){ + $flags = $this->mFlags; + $bidtable = $this->mBidtable; + $unidtable = $this->mUnidtable; + $manLevel = $this->mConverter->mManualLevel; + + $vmarked=array(); + foreach($this->mConverter->mVariants as $v) { + /* for bidirectional array + fill in the missing variants, if any, + with fallbacks */ + if(!array_key_exists($v, $bidtable)) { + $variantFallbacks = $this->mConverter->getVariantFallbacks($v); + $vf = $this->getTextInBidtable($variantFallbacks); + if($vf) $bidtable[$v] = $vf; + } + + if(array_key_exists($v,$bidtable)){ + foreach($vmarked as $vo){ + // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- + // or -{H|zh:WordZh;zh-tw:WordTw}- or -{-|zh:WordZh;zh-tw:WordTw}- + // to introduce a custom mapping between + // words WordZh and WordTw in the whole text + if($manLevel[$v]=='bidirectional'){ + $this->mConvTable[$v][$bidtable[$vo]]=$bidtable[$v]; + } + if($manLevel[$vo]=='bidirectional'){ + $this->mConvTable[$vo][$bidtable[$v]]=$bidtable[$vo]; + } + } + $vmarked[]=$v; + } + /*for unidirectional array + fill to convert tables */ + $allow_unid = $manLevel[$v]=='bidirectional' + || $manLevel[$v]=='unidirectional'; + if($allow_unid && array_key_exists($v,$unidtable)){ + $ct=$this->mConvTable[$v]; + $this->mConvTable[$v] = array_merge($ct,$unidtable[$v]); + } + } + } + + /** + * Parse rules and flags + * @public + */ + function parse($variant){ + if(!$variant) $variant = $this->mConverter->getPreferredVariant(); + $this->parseFlags(); + $flags = $this->mFlags; + if( !in_array('R',$flags) || !in_array('N',$flags) ){ + //FIXME: may cause trouble here... + //strip since it interferes with the parsing, plus, + //all spaces should be stripped in this tag anyway. + $this->mRules = str_replace(' ', '', $this->mRules); + // decode => HTML entities modified by Sanitizer::removeHTMLtags + $this->mRules = str_replace('=>','=>',$this->mRules); + + $this->parseRules(); + } + $rules = $this->mRules; + + if(count($this->mBidtable)==0 && count($this->mUnidtable)==0){ + if(in_array('+',$flags) || in_array('-',$flags)) + // fill all variants if text in -{A/H/-|text} without rules + foreach($this->mConverter->mVariants as $v) + $this->mBidtable[$v] = $rules; + elseif (!in_array('N',$flags) && !in_array('T',$flags) ) + $this->mFlags = $flags = array('R'); + } + + if( in_array('R',$flags) ) { + // if we don't do content convert, still strip the -{}- tags + $this->mRuleDisplay = $rules; + } elseif ( in_array('N',$flags) ){ + // proces N flag: output current variant name + $this->mRuleDisplay = $this->mConverter->mVariantNames[trim($rules)]; + } elseif ( in_array('D',$flags) ){ + // proces D flag: output rules description + $this->mRuleDisplay = $this->getRulesDesc(); + } elseif ( in_array('H',$flags) || in_array('-',$flags) ) { + // proces H,- flag or T only: output nothing + $this->mRuleDisplay = ''; + } elseif ( in_array('S',$flags) ){ + $this->mRuleDisplay = $this->getRuleConvertedStr($variant, + $this->mConverter->mDoContentConvert); + } else { + $this->mRuleDisplay= $this->mManualCodeError; + } + // proces T flag + if ( in_array('T',$flags) ) { + $this->mRuleTitle = $this->getRuleConvertedStr($variant, + $this->mConverter->mDoTitleConvert); + } + + if (in_array('-', $flags)) + $this->mRulesAction='remove'; + if (in_array('+', $flags)) + $this->mRulesAction='add'; + + $this->generateConvTable(); + } + + /** + * @public + */ + function hasRules(){ + // TODO: + } + + /** + * get display text on markup -{...}- + * @public + */ + function getDisplay(){ + return $this->mRuleDisplay; + } + /** + * get converted title + * @public + */ + function getTitle(){ + return $this->mRuleTitle; + } + + /** + * return how deal with conversion rules + * @public + */ + function getRulesAction(){ + return $this->mRulesAction; + } + + /** + * get conversion table ( bidirectional and unidirectional conversion table ) + * @public + */ + function getConvTable(){ + return $this->mConvTable; + } + + /** + * get conversion rules string + * @public + */ + function getRules(){ + return $this->mRules; + } + + /** + * get conversion flags + * @public + */ + function getFlags(){ + return $this->mFlags; + } } |