diff options
Diffstat (limited to 'languages/LanguageConverter.php')
-rw-r--r-- | languages/LanguageConverter.php | 205 |
1 files changed, 137 insertions, 68 deletions
diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 8b7d6cbe..96a71a09 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -47,28 +47,28 @@ class LanguageConverter { 'zh', ); - var $mMainLanguageCode; - var $mVariants, $mVariantFallbacks, $mVariantNames; - var $mTablesLoaded = false; - var $mTables; + public $mMainLanguageCode; + public $mVariants, $mVariantFallbacks, $mVariantNames; + public $mTablesLoaded = false; + public $mTables; // 'bidirectional' 'unidirectional' 'disable' for each variant - var $mManualLevel; + public $mManualLevel; /** * @var String: memcached key name */ - var $mCacheKey; - - var $mLangObj; - var $mFlags; - var $mDescCodeSep = ':', $mDescVarSep = ';'; - var $mUcfirst = false; - var $mConvRuleTitle = false; - var $mURLVariant; - var $mUserVariant; - var $mHeaderVariant; - var $mMaxDepth = 10; - var $mVarSeparatorPattern; + public $mCacheKey; + + public $mLangObj; + public $mFlags; + public $mDescCodeSep = ':', $mDescVarSep = ';'; + public $mUcfirst = false; + public $mConvRuleTitle = false; + public $mURLVariant; + public $mUserVariant; + public $mHeaderVariant; + public $mMaxDepth = 10; + public $mVarSeparatorPattern; const CACHE_VERSION_KEY = 'VERSION 6'; @@ -184,7 +184,7 @@ class LanguageConverter { /** * Get default variant. - * This function would not be affected by user's settings or headers + * This function would not be affected by user's settings * @return String: the default variant code */ public function getDefaultVariant() { @@ -192,6 +192,10 @@ class LanguageConverter { $req = $this->getURLVariant(); + if ( !$req ) { + $req = $this->getHeaderVariant(); + } + if ( $wgDefaultLanguageVariant && !$req ) { $req = $this->validateVariant( $wgDefaultLanguageVariant ); } @@ -242,7 +246,7 @@ class LanguageConverter { * @return Mixed: variant if one found, false otherwise. */ protected function getUserVariant() { - global $wgUser; + global $wgUser, $wgContLang; // memoizing this function wreaks havoc on parserTest.php /* @@ -254,8 +258,12 @@ class LanguageConverter { // Get language variant preference from logged in users // Don't call this on stub objects because that causes infinite // recursion during initialisation - if ( $wgUser->isLoggedIn() ) { - $ret = $wgUser->getOption( 'variant' ); + if ( $wgUser->isLoggedIn() ) { + if ( $this->mMainLanguageCode == $wgContLang->getCode() ) { + $ret = $wgUser->getOption( 'variant' ); + } else { + $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode ); + } } else { // figure out user lang without constructing wgLang to avoid // infinite recursion @@ -340,7 +348,7 @@ class LanguageConverter { } } - if( $this->guessVariant( $text, $toVariant ) ) { + if ( $this->guessVariant( $text, $toVariant ) ) { wfProfileOut( __METHOD__ ); return $text; } @@ -360,11 +368,11 @@ class LanguageConverter { // this one is needed when the text is inside an HTML markup $htmlfix = '|<[^>]+$|^[^<>]*>'; - // disable convert to variants between <code></code> tags + // disable convert to variants between <code> tags $codefix = '<code>.+?<\/code>|'; - // disable convertsion of <script type="text/javascript"> ... </script> + // disable conversion of <script> tags $scriptfix = '<script.*?>.*?<\/script>|'; - // disable conversion of <pre xxxx> ... </pre> + // disable conversion of <pre> tags $prefix = '<pre.*?>.*?<\/pre>|'; $reg = '/' . $codefix . $scriptfix . $prefix . @@ -406,7 +414,7 @@ class LanguageConverter { $attr = $attrs[$attrName]; // Don't convert URLs if ( !strpos( $attr, '://' ) ) { - $attr = $this->translate( $attr, $toVariant ); + $attr = $this->recursiveConvertTopLevel( $attr, $toVariant ); } // Remove HTML tags to avoid disrupting the layout @@ -546,24 +554,44 @@ class LanguageConverter { public function convertTitle( $title ) { $variant = $this->getPreferredVariant(); $index = $title->getNamespace(); - if ( $index === NS_MAIN ) { + if ( $index !== NS_MAIN ) { + $text = $this->convertNamespace( $index, $variant ) . ':'; + } else { $text = ''; + } + $text .= $this->translate( $title->getText(), $variant ); + return $text; + } + + /** + * Get the namespace display name in the preferred variant. + * + * @param $index int namespace id + * @param $variant string|null variant code or null for preferred variant + * @return String: namespace name for display + */ + public function convertNamespace( $index, $variant = null ) { + if ( $variant === null ) { + $variant = $this->getPreferredVariant(); + } + if ( $index === NS_MAIN ) { + return ''; } else { - // first let's check if a message has given us a converted name + // First check if a message gives a converted name in the target variant. + $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant ); + if ( $nsConvMsg->exists() ) { + return $nsConvMsg->plain(); + } + // Then check if a message gives a converted name in content language + // which needs extra translation to the target variant. $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); if ( $nsConvMsg->exists() ) { - $text = $nsConvMsg->plain(); - } else { - // the message does not exist, try retrieve it from the current - // variant's namespace names. - $langObj = $this->mLangObj->factory( $variant ); - $text = $langObj->getFormattedNsText( $index ); + return $this->translate( $nsConvMsg->plain(), $variant ); } - $text .= ':'; + // No message exists, retrieve it from the target variant's namespace names. + $langObj = $this->mLangObj->factory( $variant ); + return $langObj->getFormattedNsText( $index ); } - $text .= $title->getText(); - $text = $this->translate( $text, $variant ); - return $text; } /** @@ -597,6 +625,8 @@ class LanguageConverter { if ( $wgDisableLangConversion ) { return $text; } + // Reset converter state for a new converter run. + $this->mConvRuleTitle = false; return $this->recursiveConvertTopLevel( $text, $variant ); } @@ -621,14 +651,14 @@ class LanguageConverter { if ( $pos === false ) { // No more markup, append final segment $fragment = substr( $text, $startPos ); - $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment; + $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; return $out; } // Markup found // Append initial segment $fragment = substr( $text, $startPos, $pos - $startPos ); - $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment; + $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; // Advance position $startPos = $pos; @@ -664,7 +694,7 @@ class LanguageConverter { while ( $startPos < $length ) { $m = false; - preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); + preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); if ( !$m ) { // Unclosed rule break; @@ -803,14 +833,14 @@ class LanguageConverter { /** * Guess if a text is written in a variant. This should be implemented in subclasses. * - * @param string $text the text to be checked - * @param string $variant language code of the variant to be checked for - * @return bool true if $text appears to be written in $variant, false if not + * @param string $text the text to be checked + * @param string $variant language code of the variant to be checked for + * @return bool true if $text appears to be written in $variant, false if not * * @author Nikola Smolenski <smolensk@eunet.rs> * @since 1.19 */ - public function guessVariant($text, $variant) { + public function guessVariant( $text, $variant ) { return false; } @@ -918,14 +948,18 @@ class LanguageConverter { $parsed[$key] = true; if ( $subpage === '' ) { - $txt = MessageCache::singleton()->get( 'conversiontable', true, $code ); + $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code ); } else { $txt = false; $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); if ( $title && $title->exists() ) { $revision = Revision::newFromTitle( $title ); if ( $revision ) { - $txt = $revision->getRawText(); + if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) { + $txt = $revision->getContent( Revision::RAW )->getNativeData(); + } + + // @todo in the future, use a specialized content model, perhaps based on json! } } } @@ -1035,9 +1069,9 @@ class LanguageConverter { * MediaWiki:Conversiontable* is updated. * @private * - * @param $article Article object + * @param $page WikiPage object * @param $user Object: User object for the current user - * @param $text String: article text (?) + * @param $content Content: new page content * @param $summary String: edit summary of the edit * @param $isMinor Boolean: was the edit marked as minor? * @param $isWatch Boolean: did the user watch this page or not? @@ -1046,9 +1080,9 @@ class LanguageConverter { * @param $revision Object: new Revision object or null * @return Boolean: true */ - function OnArticleSaveComplete( $article, $user, $text, $summary, $isMinor, + function OnPageContentSaveComplete( $page, $user, $content, $summary, $isMinor, $isWatch, $section, $flags, $revision ) { - $titleobj = $article->getTitle(); + $titleobj = $page->getTitle(); if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) { $title = $titleobj->getDBkey(); $t = explode( '/', $title, 3 ); @@ -1069,6 +1103,7 @@ class LanguageConverter { * @param $text String: text to armour against conversion * @return String: armoured text where { and } have been converted to * { and } + * @deprecated since 1.22 is no longer used */ public function armourMath( $text ) { // convert '-{' and '}-' to '-{' and '}-' to prevent @@ -1113,17 +1148,17 @@ class LanguageConverter { * @author fdcn <fdcn64@gmail.com>, PhiLiP <philip.npc@gmail.com> */ class ConverterRule { - var $mText; // original text in -{text}- - var $mConverter; // LanguageConverter object - var $mRuleDisplay = ''; - var $mRuleTitle = false; - var $mRules = '';// string : the text of the rules - var $mRulesAction = 'none'; - var $mFlags = array(); - var $mVariantFlags = array(); - var $mConvTable = array(); - var $mBidtable = array();// array of the translation in each variant - var $mUnidtable = array();// array of the translation in each variant + public $mText; // original text in -{text}- + public $mConverter; // LanguageConverter object + public $mRuleDisplay = ''; + public $mRuleTitle = false; + public $mRules = '';// string : the text of the rules + public $mRulesAction = 'none'; + public $mFlags = array(); + public $mVariantFlags = array(); + public $mConvTable = array(); + public $mBidtable = array();// array of the translation in each variant + public $mUnidtable = array();// array of the translation in each variant /** * Constructor @@ -1229,23 +1264,26 @@ class ConverterRule { $variants = $this->mConverter->mVariants; $varsep_pattern = $this->mConverter->getVarSeparatorPattern(); + // Split according to $varsep_pattern, but ignore semicolons from HTML entities + $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules ); $choice = preg_split( $varsep_pattern, $rules ); + $choice = str_replace( "\x01", ';', $choice ); foreach ( $choice as $c ) { - $v = explode( ':', $c, 2 ); + $v = explode( ':', $c, 2 ); if ( count( $v ) != 2 ) { // syntax error, skip continue; } $to = trim( $v[1] ); - $v = trim( $v[0] ); - $u = explode( '=>', $v, 2 ); + $v = trim( $v[0] ); + $u = explode( '=>', $v, 2 ); // if $to is empty, strtr() could return a wrong result if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) { $bidtable[$v] = $to; } elseif ( count( $u ) == 2 ) { $from = trim( $u[0] ); - $v = trim( $u[1] ); + $v = trim( $u[1] ); if ( array_key_exists( $v, $unidtable ) && !is_array( $unidtable[$v] ) && $to @@ -1331,6 +1369,37 @@ class ConverterRule { } /** + * Similar to getRuleConvertedStr(), but this prefers to use original + * page title if $variant === $this->mConverter->mMainLanguageCode + * and may return false in this case (so this title conversion rule + * will be ignored and the original title is shown). + * + * @since 1.22 + * @param $variant The variant code to display page title in + * @return String|false The converted title or false if just page name + */ + function getRuleConvertedTitle( $variant ) { + if ( $variant === $this->mConverter->mMainLanguageCode ) { + // If a string targeting exactly this variant is set, + // use it. Otherwise, just return false, so the real + // page name can be shown (and because variant === main, + // there'll be no further automatic conversion). + $disp = $this->getTextInBidtable( $variant ); + if ( $disp ) { + return $disp; + } + if ( array_key_exists( $variant, $this->mUnidtable ) ) { + $disp = array_values( $this->mUnidtable[$variant] ); + $disp = $disp[0]; + } + // Assigned above or still false. + return $disp; + } else { + return $this->getRuleConvertedStr( $variant ); + } + } + + /** * Generate conversion table for all text. * @private */ @@ -1412,7 +1481,7 @@ class ConverterRule { // then we check its fallback variants. $variantFallbacks = $this->mConverter->getVariantFallbacks( $variant ); - if( is_array( $variantFallbacks ) ) { + if ( is_array( $variantFallbacks ) ) { foreach ( $variantFallbacks as $variantFallback ) { // if current variant's fallback exist in flags if ( isset( $this->mVariantFlags[$variantFallback] ) ) { @@ -1482,7 +1551,7 @@ class ConverterRule { $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); break; case 'T': - $this->mRuleTitle = $this->getRuleConvertedStr( $variant ); + $this->mRuleTitle = $this->getRuleConvertedTitle( $variant ); $this->mRuleDisplay = ''; break; default: |