Update to MediaWiki 1.21.1

author: Pierre Schmitz <pierre@archlinux.de> 2013-08-12 09:28:15 +0200
committer: Pierre Schmitz <pierre@archlinux.de> 2013-08-12 09:28:15 +0200
commit: 08aa4418c30cfc18ccc69a0f0f9cb9e17be6c196 (patch)
tree: 577a29fb579188d16003a209ce2a2e9c5b0aa2bd /languages/Language.php
parent: cacc939b34e315b85e2d72997811eb6677996cc1 (diff)
1 files changed, 267 insertions, 100 deletions
diff --git a/languages/Language.php b/languages/Language.php
index 40d1f36c..1d2e7164 100644
--- a/languages/Language.php
+++ b/languages/Language.php
@@ -48,12 +48,13 @@ class FakeConverter {
 	/**
 	 * @var Language
 	 */
-	var $mLang;
+	public $mLang;
 	function __construct( $langobj ) { $this->mLang = $langobj; }
 	function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
 	function convert( $t ) { return $t; }
 	function convertTo( $text, $variant ) { return $text; }
 	function convertTitle( $t ) { return $t->getPrefixedText(); }
+	function convertNamespace( $ns ) { return $this->mLang->getFormattedNsText( $ns ); }
 	function getVariants() { return array( $this->mLang->getCode() ); }
 	function getPreferredVariant() { return $this->mLang->getCode(); }
 	function getDefaultVariant() { return $this->mLang->getCode(); }
@@ -77,21 +78,21 @@ class Language {
 	/**
 	 * @var LanguageConverter
 	 */
-	var $mConverter;
+	public $mConverter;
 
-	var $mVariants, $mCode, $mLoaded = false;
-	var $mMagicExtensions = array(), $mMagicHookDone = false;
+	public $mVariants, $mCode, $mLoaded = false;
+	public $mMagicExtensions = array(), $mMagicHookDone = false;
 	private $mHtmlCode = null;
 
-	var $dateFormatStrings = array();
-	var $mExtendedSpecialPageAliases;
+	public $dateFormatStrings = array();
+	public $mExtendedSpecialPageAliases;
 
 	protected $namespaceNames, $mNamespaceIds, $namespaceAliases;
 
 	/**
 	 * ReplacementArray object caches
 	 */
-	var $transformData = array();
+	public $transformData = array();
 
 	/**
 	 * @var LocalisationCache
@@ -159,10 +160,10 @@ class Language {
 	 * @var array
 	 */
 	static public $durationIntervals = array(
-		'millennia' => 31557600000,
-		'centuries' => 3155760000,
-		'decades' => 315576000,
-		'years' => 31557600, // 86400 * 365.25
+		'millennia' => 31556952000,
+		'centuries' => 3155695200,
+		'decades' => 315569520,
+		'years' => 31556952, // 86400 * ( 365 + ( 24 * 3 + 25 ) / 400 )
 		'weeks' => 604800,
 		'days' => 86400,
 		'hours' => 3600,
@@ -171,19 +172,28 @@ class Language {
 	);
 
 	/**
-	 * Get a cached language object for a given language code
+	 * Get a cached or new language object for a given language code
 	 * @param $code String
 	 * @return Language
 	 */
 	static function factory( $code ) {
-		if ( !isset( self::$mLangObjCache[$code] ) ) {
-			if ( count( self::$mLangObjCache ) > 10 ) {
-				// Don't keep a billion objects around, that's stupid.
-				self::$mLangObjCache = array();
-			}
-			self::$mLangObjCache[$code] = self::newFromCode( $code );
+		global $wgDummyLanguageCodes, $wgLangObjCacheSize;
+
+		if ( isset( $wgDummyLanguageCodes[$code] ) ) {
+			$code = $wgDummyLanguageCodes[$code];
 		}
-		return self::$mLangObjCache[$code];
+
+		// get the language object to process
+		$langObj = isset( self::$mLangObjCache[$code] )
+			? self::$mLangObjCache[$code]
+			: self::newFromCode( $code );
+
+		// merge the language object in to get it up front in the cache
+		self::$mLangObjCache = array_merge( array( $code => $langObj ), self::$mLangObjCache );
+		// get rid of the oldest ones in case we have an overflow
+		self::$mLangObjCache = array_slice( self::$mLangObjCache, 0, $wgLangObjCacheSize, true );
+
+		return $langObj;
 	}
 
 	/**
@@ -236,6 +246,78 @@ class Language {
 	}
 
 	/**
+	 * Checks whether any localisation is available for that language tag
+	 * in MediaWiki (MessagesXx.php exists).
+	 *
+	 * @param string $code Language tag (in lower case)
+	 * @return bool Whether language is supported
+	 * @since 1.21
+	 */
+	public static function isSupportedLanguage( $code ) {
+		return $code === strtolower( $code ) && is_readable( self::getMessagesFileName( $code ) );
+	}
+
+	/**
+	 * Returns true if a language code string is a well-formed language tag
+	 * according to RFC 5646.
+	 * This function only checks well-formedness; it doesn't check that
+	 * language, script or variant codes actually exist in the repositories.
+	 *
+	 * Based on regexes by Mark Davis of the Unicode Consortium:
+	 * http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
+	 *
+	 * @param $code string
+	 * @param $lenient boolean Whether to allow '_' as separator. The default is only '-'.
+	 *
+	 * @return bool
+	 * @since 1.21
+	 */
+	public static function isWellFormedLanguageTag( $code, $lenient = false ) {
+		$alpha = '[a-z]';
+		$digit = '[0-9]';
+		$alphanum = '[a-z0-9]';
+		$x = 'x' ; # private use singleton
+		$singleton = '[a-wy-z]'; # other singleton
+		$s = $lenient ? '[-_]' : '-';
+
+		$language = "$alpha{2,8}|$alpha{2,3}$s$alpha{3}";
+		$script = "$alpha{4}"; # ISO 15924
+		$region = "(?:$alpha{2}|$digit{3})"; # ISO 3166-1 alpha-2 or UN M.49
+		$variant = "(?:$alphanum{5,8}|$digit$alphanum{3})";
+		$extension = "$singleton(?:$s$alphanum{2,8})+";
+		$privateUse = "$x(?:$s$alphanum{1,8})+";
+
+		# Define certain grandfathered codes, since otherwise the regex is pretty useless.
+		# Since these are limited, this is safe even later changes to the registry --
+		# the only oddity is that it might change the type of the tag, and thus
+		# the results from the capturing groups.
+		# http://www.iana.org/assignments/language-subtag-registry
+
+		$grandfathered = "en{$s}GB{$s}oed"
+			. "|i{$s}(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|tao|tay|tsu)"
+			. "|no{$s}(?:bok|nyn)"
+			. "|sgn{$s}(?:BE{$s}(?:fr|nl)|CH{$s}de)"
+			. "|zh{$s}min{$s}nan";
+
+		$variantList = "$variant(?:$s$variant)*";
+		$extensionList = "$extension(?:$s$extension)*";
+
+		$langtag = "(?:($language)"
+			. "(?:$s$script)?"
+			. "(?:$s$region)?"
+			. "(?:$s$variantList)?"
+			. "(?:$s$extensionList)?"
+			. "(?:$s$privateUse)?)";
+
+		# The final breakdown, with capturing groups for each of these components
+		# The variants, extensions, grandfathered, and private-use may have interior '-'
+
+		$root = "^(?:$langtag|$privateUse|$grandfathered)$";
+
+		return (bool)preg_match( "/$root/", strtolower( $code ) );
+	}
+
+	/**
 	 * Returns true if a language code string is of a valid form, whether or
 	 * not it exists. This includes codes which are used solely for
 	 * customisation via the MediaWiki namespace.
@@ -276,7 +358,31 @@ class Language {
 			throw new MWException( __METHOD__ . " must be passed a string, $type given$addmsg" );
 		}
 
-		return preg_match( '/^[a-z0-9-]+$/i', $code );
+		return (bool)preg_match( '/^[a-z0-9-]+$/i', $code );
+	}
+
+	/**
+	 * Returns true if a language code is an IETF tag known to MediaWiki.
+	 *
+	 * @param $code string
+	 *
+	 * @since 1.21
+	 * @return bool
+	 */
+	public static function isKnownLanguageTag( $tag ) {
+		static $coreLanguageNames;
+
+		if ( $coreLanguageNames === null ) {
+			include( MWInit::compiledPath( 'languages/Names.php' ) );
+		}
+
+		if ( isset( $coreLanguageNames[$tag] )
+			|| self::fetchLanguageName( $tag, $tag ) !== ''
+		) {
+			return true;
+		}
+
+		return false;
 	}
 
 	/**
@@ -356,7 +462,7 @@ class Language {
 	 * @deprecated in 1.19
 	 */
 	function getFallbackLanguageCode() {
-		wfDeprecated( __METHOD__ );
+		wfDeprecated( __METHOD__, '1.19' );
 		return self::getFallbackFor( $this->mCode );
 	}
 
@@ -419,6 +525,16 @@ class Language {
 	 */
 	public function setNamespaces( array $namespaces ) {
 		$this->namespaceNames = $namespaces;
+		$this->mNamespaceIds = null;
+	}
+
+	/**
+	 * Resets all of the namespace caches. Mainly used for testing
+	 */
+	public function resetNamespaces() {
+		$this->namespaceNames = null;
+		$this->mNamespaceIds = null;
+		$this->namespaceAliases = null;
 	}
 
 	/**
@@ -457,9 +573,13 @@ class Language {
 	 * getNsText() except with '_' changed to ' ', useful for
 	 * producing output.
 	 *
-	 * @param $index string
+	 * <code>
+	 * $mw_ns = $wgContLang->getFormattedNsText( NS_MEDIAWIKI_TALK );
+	 * echo $mw_ns; // prints 'MediaWiki talk'
+	 * </code>
 	 *
-	 * @return array
+	 * @param int $index The array key of the namespace to return
+	 * @return string Namespace name without underscores (empty string if namespace does not exist)
 	 */
 	function getFormattedNsText( $index ) {
 		$ns = $this->getNsText( $index );
@@ -1965,7 +2085,7 @@ class Language {
 		$segments = array();
 
 		foreach ( $intervals as $intervalName => $intervalValue ) {
-			$message = new Message( 'duration-' . $intervalName, array( $intervalValue ) );
+			$message = wfMessage( 'duration-' . $intervalName )->numParams( $intervalValue );
 			$segments[] = $message->inLanguage( $this )->escaped();
 		}
 
@@ -2405,19 +2525,7 @@ class Language {
 		if ( is_array( $s ) ) {
 			wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
 		}
-		# Check for non-UTF-8 URLs
-		$ishigh = preg_match( '/[\x80-\xff]/', $s );
-		if ( !$ishigh ) {
-			return $s;
-		}
-
-		if ( function_exists( 'mb_check_encoding' ) ) {
-			$isutf8 = mb_check_encoding( $s, 'UTF-8' );
-		} else {
-			$isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
-					'[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
-		}
-		if ( $isutf8 ) {
+		if ( StringUtils::isUtf8( $s ) ) {
 			return $s;
 		}
 
@@ -2855,30 +2963,30 @@ class Language {
 		return "<em>$text</em>";
 	}
 
-	 /**
-	  * Normally we output all numbers in plain en_US style, that is
-	  * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
-	  * point twohundredthirtyfive. However this is not suitable for all
-	  * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
-	  * Icelandic just want to use commas instead of dots, and dots instead
-	  * of commas like "293.291,235".
-	  *
-	  * An example of this function being called:
-	  * <code>
-	  * wfMessage( 'message' )->numParams( $num )->text()
-	  * </code>
-	  *
-	  * See LanguageGu.php for the Gujarati implementation and
-	  * $separatorTransformTable on MessageIs.php for
-	  * the , => . and . => , implementation.
-	  *
-	  * @todo check if it's viable to use localeconv() for the decimal
-	  *       separator thing.
-	  * @param $number Mixed: the string to be formatted, should be an integer
-	  *        or a floating point number.
-	  * @param $nocommafy Bool: set to true for special numbers like dates
-	  * @return string
-	  */
+	/**
+	 * Normally we output all numbers in plain en_US style, that is
+	 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
+	 * point twohundredthirtyfive. However this is not suitable for all
+	 * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
+	 * Icelandic just want to use commas instead of dots, and dots instead
+	 * of commas like "293.291,235".
+	 *
+	 * An example of this function being called:
+	 * <code>
+	 * wfMessage( 'message' )->numParams( $num )->text()
+	 * </code>
+	 *
+	 * See LanguageGu.php for the Gujarati implementation and
+	 * $separatorTransformTable on MessageIs.php for
+	 * the , => . and . => , implementation.
+	 *
+	 * @todo check if it's viable to use localeconv() for the decimal
+	 *       separator thing.
+	 * @param $number Mixed: the string to be formatted, should be an integer
+	 *        or a floating point number.
+	 * @param $nocommafy Bool: set to true for special numbers like dates
+	 * @return string
+	 */
 	public function formatNum( $number, $nocommafy = false ) {
 		global $wgTranslateNumerals;
 		if ( !$nocommafy ) {
@@ -2900,6 +3008,18 @@ class Language {
 	}
 
 	/**
+	 * Front-end for non-commafied formatNum
+	 *
+	 * @param mixed $number the string to be formatted, should be an integer
+	 *        or a floating point number.
+	 * @since 1.21
+	 * @return string
+	 */
+	public function formatNumNoSeparators( $number ) {
+		return $this->formatNum( $number, true );
+	}
+
+	/**
 	 * @param $number string
 	 * @return string
 	 */
@@ -2921,37 +3041,37 @@ class Language {
 	/**
 	 * Adds commas to a given number
 	 * @since 1.19
-	 * @param $_ mixed
+	 * @param $number mixed
 	 * @return string
 	 */
-	function commafy( $_ ) {
+	function commafy( $number ) {
 		$digitGroupingPattern = $this->digitGroupingPattern();
-		if ( $_ === null ) {
+		if ( $number === null ) {
 			return '';
 		}
 
 		if ( !$digitGroupingPattern || $digitGroupingPattern === "###,###,###" ) {
 			// default grouping is at thousands,  use the same for ###,###,### pattern too.
-			return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
+			return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) );
 		} else {
 			// Ref: http://cldr.unicode.org/translation/number-patterns
 			$sign = "";
-			if ( intval( $_ ) < 0 ) {
+			if ( intval( $number ) < 0 ) {
 				// For negative numbers apply the algorithm like positive number and add sign.
 				$sign =  "-";
-				$_ = substr( $_, 1 );
+				$number = substr( $number, 1 );
 			}
-			$numberpart = array();
-			$decimalpart = array();
+			$integerPart = array();
+			$decimalPart = array();
 			$numMatches = preg_match_all( "/(#+)/", $digitGroupingPattern, $matches );
-			preg_match( "/\d+/", $_, $numberpart );
-			preg_match( "/\.\d*/", $_, $decimalpart );
-			$groupedNumber = ( count( $decimalpart ) > 0 ) ? $decimalpart[0]:"";
-			if ( $groupedNumber  === $_ ) {
+			preg_match( "/\d+/", $number, $integerPart );
+			preg_match( "/\.\d*/", $number, $decimalPart );
+			$groupedNumber = ( count( $decimalPart ) > 0 ) ? $decimalPart[0]:"";
+			if ( $groupedNumber  === $number ) {
 				// the string does not have any number part. Eg: .12345
 				return $sign . $groupedNumber;
 			}
-			$start = $end = strlen( $numberpart[0] );
+			$start = $end = strlen( $integerPart[0] );
 			while ( $start > 0 ) {
 				$match = $matches[0][$numMatches -1] ;
 				$matchLen = strlen( $match );
@@ -2959,7 +3079,7 @@ class Language {
 				if ( $start < 0 ) {
 					$start = 0;
 				}
-				$groupedNumber = substr( $_ , $start, $end -$start ) . $groupedNumber ;
+				$groupedNumber = substr( $number, $start, $end -$start ) . $groupedNumber ;
 				$end = $start;
 				if ( $numMatches > 1 ) {
 					// use the last pattern for the rest of the number
@@ -2972,6 +3092,7 @@ class Language {
 			return $sign . $groupedNumber;
 		}
 	}
+
 	/**
 	 * @return String
 	 */
@@ -3003,25 +3124,26 @@ class Language {
 	 * @return string
 	 */
 	function listToText( array $l ) {
-		$s = '';
 		$m = count( $l ) - 1;
-
-		if ( $m === 0 ) {
-			return $l[0];
-		} elseif ( $m === 1 ) {
-			return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
-		} else {
-			for ( $i = $m; $i >= 0; $i-- ) {
-				if ( $i == $m ) {
-					$s = $l[$i];
-				} elseif ( $i == $m - 1 ) {
-					$s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
-				} else {
-					$s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
-				}
+		if ( $m < 0 ) {
+			return '';
+		}
+		if ( $m > 0 ) {
+			$and = $this->getMessageFromDB( 'and' );
+			$space = $this->getMessageFromDB( 'word-separator' );
+			if ( $m > 1 ) {
+				$comma = $this->getMessageFromDB( 'comma-separator' );
+			}
+		}
+		$s = $l[$m];
+		for ( $i = $m - 1; $i >= 0; $i-- ) {
+			if ( $i == $m - 1 ) {
+				$s = $l[$i] . $and . $space . $s;
+			} else {
+				$s = $l[$i] . $comma . $s;
 			}
-			return $s;
 		}
+		return $s;
 	}
 
 	/**
@@ -3409,6 +3531,19 @@ class Language {
 		if ( !count( $forms ) ) {
 			return '';
 		}
+
+		// Handle explicit n=pluralform cases
+		foreach ( $forms as $index => $form ) {
+			if ( preg_match( '/\d+=/i', $form ) ) {
+				$pos = strpos( $form, '=' );
+				if ( substr( $form, 0, $pos ) === (string) $count ) {
+					return substr( $form, $pos + 1 );
+				}
+				unset( $forms[$index] );
+			}
+		}
+		$forms = array_values( $forms );
+
 		$pluralForm = $this->getPluralForm( $count );
 		$pluralForm = min( $pluralForm, count( $forms ) - 1 );
 		return $forms[$pluralForm];
@@ -3459,8 +3594,22 @@ class Language {
 				}
 			}
 		}
-		// If all else fails, return the original string.
-		return $str;
+
+		// If all else fails, return a standard duration or timestamp description.
+		$time = strtotime( $str, 0 );
+		if ( $time === false ) { // Unknown format. Return it as-is in case.
+			return $str;
+		} elseif ( $time !== strtotime( $str, 1 ) ) { // It's a relative timestamp.
+			// $time is relative to 0 so it's a duration length.
+			return $this->formatDuration( $time );
+		} else { // It's an absolute timestamp.
+			if ( $time === 0 ) {
+				// wfTimestamp() handles 0 as current time instead of epoch.
+				return $this->timeanddate( '19700101000000' );
+			} else {
+				return $this->timeanddate( $time );
+			}
+		}
 	}
 
 	/**
@@ -3525,12 +3674,22 @@ class Language {
 	}
 
 	/**
+	 * Convert a namespace index to a string in the preferred variant
+	 *
+	 * @param $ns int
+	 * @return string
+	 */
+	public function convertNamespace( $ns ) {
+		return $this->mConverter->convertNamespace( $ns );
+	}
+
+	/**
 	 * Check if this is a language with variants
 	 *
 	 * @return bool
 	 */
 	public function hasVariants() {
-		return sizeof( $this->getVariants() ) > 1;
+		return count( $this->getVariants() ) > 1;
 	}
 
 	/**
@@ -3657,15 +3816,24 @@ class Language {
 	}
 
 	/**
-	 * Enclose a string with the "no conversion" tag. This is used by
-	 * various functions in the Parser
+	 * Prepare external link text for conversion. When the text is
+	 * a URL, it shouldn't be converted, and it'll be wrapped in
+	 * the "raw" tag (-{R| }-) to prevent conversion.
+	 *
+	 * This function is called "markNoConversion" for historical
+	 * reasons.
 	 *
-	 * @param $text String: text to be tagged for no conversion
-	 * @param $noParse bool
+	 * @param $text String: text to be used for external link
+	 * @param $noParse bool: wrap it without confirming it's a real URL first
 	 * @return string the tagged text
 	 */
 	public function markNoConversion( $text, $noParse = false ) {
-		return $this->mConverter->markNoConversion( $text, $noParse );
+		// Excluding protocal-relative URLs may avoid many false positives.
+		if ( $noParse || preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
+			return $this->mConverter->markNoConversion( $text );
+		} else {
+			return $text;
+		}
 	}
 
 	/**
@@ -4229,5 +4397,4 @@ class Language {
 		$form = CLDRPluralRuleEvaluator::evaluateCompiled( $number, $pluralRules );
 		return $form;
 	}
-
 }
author	Pierre Schmitz <pierre@archlinux.de>	2013-08-12 09:28:15 +0200
committer	Pierre Schmitz <pierre@archlinux.de>	2013-08-12 09:28:15 +0200
commit	08aa4418c30cfc18ccc69a0f0f9cb9e17be6c196 (patch)
tree	577a29fb579188d16003a209ce2a2e9c5b0aa2bd /languages/Language.php
parent	cacc939b34e315b85e2d72997811eb6677996cc1 (diff)