diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2013-08-12 09:28:15 +0200 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2013-08-12 09:28:15 +0200 |
commit | 08aa4418c30cfc18ccc69a0f0f9cb9e17be6c196 (patch) | |
tree | 577a29fb579188d16003a209ce2a2e9c5b0aa2bd /languages/Language.php | |
parent | cacc939b34e315b85e2d72997811eb6677996cc1 (diff) |
Update to MediaWiki 1.21.1
Diffstat (limited to 'languages/Language.php')
-rw-r--r-- | languages/Language.php | 367 |
1 files changed, 267 insertions, 100 deletions
diff --git a/languages/Language.php b/languages/Language.php index 40d1f36c..1d2e7164 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -48,12 +48,13 @@ class FakeConverter { /** * @var Language */ - var $mLang; + public $mLang; function __construct( $langobj ) { $this->mLang = $langobj; } function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); } function convert( $t ) { return $t; } function convertTo( $text, $variant ) { return $text; } function convertTitle( $t ) { return $t->getPrefixedText(); } + function convertNamespace( $ns ) { return $this->mLang->getFormattedNsText( $ns ); } function getVariants() { return array( $this->mLang->getCode() ); } function getPreferredVariant() { return $this->mLang->getCode(); } function getDefaultVariant() { return $this->mLang->getCode(); } @@ -77,21 +78,21 @@ class Language { /** * @var LanguageConverter */ - var $mConverter; + public $mConverter; - var $mVariants, $mCode, $mLoaded = false; - var $mMagicExtensions = array(), $mMagicHookDone = false; + public $mVariants, $mCode, $mLoaded = false; + public $mMagicExtensions = array(), $mMagicHookDone = false; private $mHtmlCode = null; - var $dateFormatStrings = array(); - var $mExtendedSpecialPageAliases; + public $dateFormatStrings = array(); + public $mExtendedSpecialPageAliases; protected $namespaceNames, $mNamespaceIds, $namespaceAliases; /** * ReplacementArray object caches */ - var $transformData = array(); + public $transformData = array(); /** * @var LocalisationCache @@ -159,10 +160,10 @@ class Language { * @var array */ static public $durationIntervals = array( - 'millennia' => 31557600000, - 'centuries' => 3155760000, - 'decades' => 315576000, - 'years' => 31557600, // 86400 * 365.25 + 'millennia' => 31556952000, + 'centuries' => 3155695200, + 'decades' => 315569520, + 'years' => 31556952, // 86400 * ( 365 + ( 24 * 3 + 25 ) / 400 ) 'weeks' => 604800, 'days' => 86400, 'hours' => 3600, @@ -171,19 +172,28 @@ class Language { ); /** - * Get a cached language object for a given language code + * Get a cached or new language object for a given language code * @param $code String * @return Language */ static function factory( $code ) { - if ( !isset( self::$mLangObjCache[$code] ) ) { - if ( count( self::$mLangObjCache ) > 10 ) { - // Don't keep a billion objects around, that's stupid. - self::$mLangObjCache = array(); - } - self::$mLangObjCache[$code] = self::newFromCode( $code ); + global $wgDummyLanguageCodes, $wgLangObjCacheSize; + + if ( isset( $wgDummyLanguageCodes[$code] ) ) { + $code = $wgDummyLanguageCodes[$code]; } - return self::$mLangObjCache[$code]; + + // get the language object to process + $langObj = isset( self::$mLangObjCache[$code] ) + ? self::$mLangObjCache[$code] + : self::newFromCode( $code ); + + // merge the language object in to get it up front in the cache + self::$mLangObjCache = array_merge( array( $code => $langObj ), self::$mLangObjCache ); + // get rid of the oldest ones in case we have an overflow + self::$mLangObjCache = array_slice( self::$mLangObjCache, 0, $wgLangObjCacheSize, true ); + + return $langObj; } /** @@ -236,6 +246,78 @@ class Language { } /** + * Checks whether any localisation is available for that language tag + * in MediaWiki (MessagesXx.php exists). + * + * @param string $code Language tag (in lower case) + * @return bool Whether language is supported + * @since 1.21 + */ + public static function isSupportedLanguage( $code ) { + return $code === strtolower( $code ) && is_readable( self::getMessagesFileName( $code ) ); + } + + /** + * Returns true if a language code string is a well-formed language tag + * according to RFC 5646. + * This function only checks well-formedness; it doesn't check that + * language, script or variant codes actually exist in the repositories. + * + * Based on regexes by Mark Davis of the Unicode Consortium: + * http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagRegex.txt + * + * @param $code string + * @param $lenient boolean Whether to allow '_' as separator. The default is only '-'. + * + * @return bool + * @since 1.21 + */ + public static function isWellFormedLanguageTag( $code, $lenient = false ) { + $alpha = '[a-z]'; + $digit = '[0-9]'; + $alphanum = '[a-z0-9]'; + $x = 'x' ; # private use singleton + $singleton = '[a-wy-z]'; # other singleton + $s = $lenient ? '[-_]' : '-'; + + $language = "$alpha{2,8}|$alpha{2,3}$s$alpha{3}"; + $script = "$alpha{4}"; # ISO 15924 + $region = "(?:$alpha{2}|$digit{3})"; # ISO 3166-1 alpha-2 or UN M.49 + $variant = "(?:$alphanum{5,8}|$digit$alphanum{3})"; + $extension = "$singleton(?:$s$alphanum{2,8})+"; + $privateUse = "$x(?:$s$alphanum{1,8})+"; + + # Define certain grandfathered codes, since otherwise the regex is pretty useless. + # Since these are limited, this is safe even later changes to the registry -- + # the only oddity is that it might change the type of the tag, and thus + # the results from the capturing groups. + # http://www.iana.org/assignments/language-subtag-registry + + $grandfathered = "en{$s}GB{$s}oed" + . "|i{$s}(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|tao|tay|tsu)" + . "|no{$s}(?:bok|nyn)" + . "|sgn{$s}(?:BE{$s}(?:fr|nl)|CH{$s}de)" + . "|zh{$s}min{$s}nan"; + + $variantList = "$variant(?:$s$variant)*"; + $extensionList = "$extension(?:$s$extension)*"; + + $langtag = "(?:($language)" + . "(?:$s$script)?" + . "(?:$s$region)?" + . "(?:$s$variantList)?" + . "(?:$s$extensionList)?" + . "(?:$s$privateUse)?)"; + + # The final breakdown, with capturing groups for each of these components + # The variants, extensions, grandfathered, and private-use may have interior '-' + + $root = "^(?:$langtag|$privateUse|$grandfathered)$"; + + return (bool)preg_match( "/$root/", strtolower( $code ) ); + } + + /** * Returns true if a language code string is of a valid form, whether or * not it exists. This includes codes which are used solely for * customisation via the MediaWiki namespace. @@ -276,7 +358,31 @@ class Language { throw new MWException( __METHOD__ . " must be passed a string, $type given$addmsg" ); } - return preg_match( '/^[a-z0-9-]+$/i', $code ); + return (bool)preg_match( '/^[a-z0-9-]+$/i', $code ); + } + + /** + * Returns true if a language code is an IETF tag known to MediaWiki. + * + * @param $code string + * + * @since 1.21 + * @return bool + */ + public static function isKnownLanguageTag( $tag ) { + static $coreLanguageNames; + + if ( $coreLanguageNames === null ) { + include( MWInit::compiledPath( 'languages/Names.php' ) ); + } + + if ( isset( $coreLanguageNames[$tag] ) + || self::fetchLanguageName( $tag, $tag ) !== '' + ) { + return true; + } + + return false; } /** @@ -356,7 +462,7 @@ class Language { * @deprecated in 1.19 */ function getFallbackLanguageCode() { - wfDeprecated( __METHOD__ ); + wfDeprecated( __METHOD__, '1.19' ); return self::getFallbackFor( $this->mCode ); } @@ -419,6 +525,16 @@ class Language { */ public function setNamespaces( array $namespaces ) { $this->namespaceNames = $namespaces; + $this->mNamespaceIds = null; + } + + /** + * Resets all of the namespace caches. Mainly used for testing + */ + public function resetNamespaces() { + $this->namespaceNames = null; + $this->mNamespaceIds = null; + $this->namespaceAliases = null; } /** @@ -457,9 +573,13 @@ class Language { * getNsText() except with '_' changed to ' ', useful for * producing output. * - * @param $index string + * <code> + * $mw_ns = $wgContLang->getFormattedNsText( NS_MEDIAWIKI_TALK ); + * echo $mw_ns; // prints 'MediaWiki talk' + * </code> * - * @return array + * @param int $index The array key of the namespace to return + * @return string Namespace name without underscores (empty string if namespace does not exist) */ function getFormattedNsText( $index ) { $ns = $this->getNsText( $index ); @@ -1965,7 +2085,7 @@ class Language { $segments = array(); foreach ( $intervals as $intervalName => $intervalValue ) { - $message = new Message( 'duration-' . $intervalName, array( $intervalValue ) ); + $message = wfMessage( 'duration-' . $intervalName )->numParams( $intervalValue ); $segments[] = $message->inLanguage( $this )->escaped(); } @@ -2405,19 +2525,7 @@ class Language { if ( is_array( $s ) ) { wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' ); } - # Check for non-UTF-8 URLs - $ishigh = preg_match( '/[\x80-\xff]/', $s ); - if ( !$ishigh ) { - return $s; - } - - if ( function_exists( 'mb_check_encoding' ) ) { - $isutf8 = mb_check_encoding( $s, 'UTF-8' ); - } else { - $isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . - '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); - } - if ( $isutf8 ) { + if ( StringUtils::isUtf8( $s ) ) { return $s; } @@ -2855,30 +2963,30 @@ class Language { return "<em>$text</em>"; } - /** - * Normally we output all numbers in plain en_US style, that is - * 293,291.235 for twohundredninetythreethousand-twohundredninetyone - * point twohundredthirtyfive. However this is not suitable for all - * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as - * Icelandic just want to use commas instead of dots, and dots instead - * of commas like "293.291,235". - * - * An example of this function being called: - * <code> - * wfMessage( 'message' )->numParams( $num )->text() - * </code> - * - * See LanguageGu.php for the Gujarati implementation and - * $separatorTransformTable on MessageIs.php for - * the , => . and . => , implementation. - * - * @todo check if it's viable to use localeconv() for the decimal - * separator thing. - * @param $number Mixed: the string to be formatted, should be an integer - * or a floating point number. - * @param $nocommafy Bool: set to true for special numbers like dates - * @return string - */ + /** + * Normally we output all numbers in plain en_US style, that is + * 293,291.235 for twohundredninetythreethousand-twohundredninetyone + * point twohundredthirtyfive. However this is not suitable for all + * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as + * Icelandic just want to use commas instead of dots, and dots instead + * of commas like "293.291,235". + * + * An example of this function being called: + * <code> + * wfMessage( 'message' )->numParams( $num )->text() + * </code> + * + * See LanguageGu.php for the Gujarati implementation and + * $separatorTransformTable on MessageIs.php for + * the , => . and . => , implementation. + * + * @todo check if it's viable to use localeconv() for the decimal + * separator thing. + * @param $number Mixed: the string to be formatted, should be an integer + * or a floating point number. + * @param $nocommafy Bool: set to true for special numbers like dates + * @return string + */ public function formatNum( $number, $nocommafy = false ) { global $wgTranslateNumerals; if ( !$nocommafy ) { @@ -2900,6 +3008,18 @@ class Language { } /** + * Front-end for non-commafied formatNum + * + * @param mixed $number the string to be formatted, should be an integer + * or a floating point number. + * @since 1.21 + * @return string + */ + public function formatNumNoSeparators( $number ) { + return $this->formatNum( $number, true ); + } + + /** * @param $number string * @return string */ @@ -2921,37 +3041,37 @@ class Language { /** * Adds commas to a given number * @since 1.19 - * @param $_ mixed + * @param $number mixed * @return string */ - function commafy( $_ ) { + function commafy( $number ) { $digitGroupingPattern = $this->digitGroupingPattern(); - if ( $_ === null ) { + if ( $number === null ) { return ''; } if ( !$digitGroupingPattern || $digitGroupingPattern === "###,###,###" ) { // default grouping is at thousands, use the same for ###,###,### pattern too. - return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) ); + return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) ); } else { // Ref: http://cldr.unicode.org/translation/number-patterns $sign = ""; - if ( intval( $_ ) < 0 ) { + if ( intval( $number ) < 0 ) { // For negative numbers apply the algorithm like positive number and add sign. $sign = "-"; - $_ = substr( $_, 1 ); + $number = substr( $number, 1 ); } - $numberpart = array(); - $decimalpart = array(); + $integerPart = array(); + $decimalPart = array(); $numMatches = preg_match_all( "/(#+)/", $digitGroupingPattern, $matches ); - preg_match( "/\d+/", $_, $numberpart ); - preg_match( "/\.\d*/", $_, $decimalpart ); - $groupedNumber = ( count( $decimalpart ) > 0 ) ? $decimalpart[0]:""; - if ( $groupedNumber === $_ ) { + preg_match( "/\d+/", $number, $integerPart ); + preg_match( "/\.\d*/", $number, $decimalPart ); + $groupedNumber = ( count( $decimalPart ) > 0 ) ? $decimalPart[0]:""; + if ( $groupedNumber === $number ) { // the string does not have any number part. Eg: .12345 return $sign . $groupedNumber; } - $start = $end = strlen( $numberpart[0] ); + $start = $end = strlen( $integerPart[0] ); while ( $start > 0 ) { $match = $matches[0][$numMatches -1] ; $matchLen = strlen( $match ); @@ -2959,7 +3079,7 @@ class Language { if ( $start < 0 ) { $start = 0; } - $groupedNumber = substr( $_ , $start, $end -$start ) . $groupedNumber ; + $groupedNumber = substr( $number, $start, $end -$start ) . $groupedNumber ; $end = $start; if ( $numMatches > 1 ) { // use the last pattern for the rest of the number @@ -2972,6 +3092,7 @@ class Language { return $sign . $groupedNumber; } } + /** * @return String */ @@ -3003,25 +3124,26 @@ class Language { * @return string */ function listToText( array $l ) { - $s = ''; $m = count( $l ) - 1; - - if ( $m === 0 ) { - return $l[0]; - } elseif ( $m === 1 ) { - return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1]; - } else { - for ( $i = $m; $i >= 0; $i-- ) { - if ( $i == $m ) { - $s = $l[$i]; - } elseif ( $i == $m - 1 ) { - $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s; - } else { - $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s; - } + if ( $m < 0 ) { + return ''; + } + if ( $m > 0 ) { + $and = $this->getMessageFromDB( 'and' ); + $space = $this->getMessageFromDB( 'word-separator' ); + if ( $m > 1 ) { + $comma = $this->getMessageFromDB( 'comma-separator' ); + } + } + $s = $l[$m]; + for ( $i = $m - 1; $i >= 0; $i-- ) { + if ( $i == $m - 1 ) { + $s = $l[$i] . $and . $space . $s; + } else { + $s = $l[$i] . $comma . $s; } - return $s; } + return $s; } /** @@ -3409,6 +3531,19 @@ class Language { if ( !count( $forms ) ) { return ''; } + + // Handle explicit n=pluralform cases + foreach ( $forms as $index => $form ) { + if ( preg_match( '/\d+=/i', $form ) ) { + $pos = strpos( $form, '=' ); + if ( substr( $form, 0, $pos ) === (string) $count ) { + return substr( $form, $pos + 1 ); + } + unset( $forms[$index] ); + } + } + $forms = array_values( $forms ); + $pluralForm = $this->getPluralForm( $count ); $pluralForm = min( $pluralForm, count( $forms ) - 1 ); return $forms[$pluralForm]; @@ -3459,8 +3594,22 @@ class Language { } } } - // If all else fails, return the original string. - return $str; + + // If all else fails, return a standard duration or timestamp description. + $time = strtotime( $str, 0 ); + if ( $time === false ) { // Unknown format. Return it as-is in case. + return $str; + } elseif ( $time !== strtotime( $str, 1 ) ) { // It's a relative timestamp. + // $time is relative to 0 so it's a duration length. + return $this->formatDuration( $time ); + } else { // It's an absolute timestamp. + if ( $time === 0 ) { + // wfTimestamp() handles 0 as current time instead of epoch. + return $this->timeanddate( '19700101000000' ); + } else { + return $this->timeanddate( $time ); + } + } } /** @@ -3525,12 +3674,22 @@ class Language { } /** + * Convert a namespace index to a string in the preferred variant + * + * @param $ns int + * @return string + */ + public function convertNamespace( $ns ) { + return $this->mConverter->convertNamespace( $ns ); + } + + /** * Check if this is a language with variants * * @return bool */ public function hasVariants() { - return sizeof( $this->getVariants() ) > 1; + return count( $this->getVariants() ) > 1; } /** @@ -3657,15 +3816,24 @@ class Language { } /** - * Enclose a string with the "no conversion" tag. This is used by - * various functions in the Parser + * Prepare external link text for conversion. When the text is + * a URL, it shouldn't be converted, and it'll be wrapped in + * the "raw" tag (-{R| }-) to prevent conversion. + * + * This function is called "markNoConversion" for historical + * reasons. * - * @param $text String: text to be tagged for no conversion - * @param $noParse bool + * @param $text String: text to be used for external link + * @param $noParse bool: wrap it without confirming it's a real URL first * @return string the tagged text */ public function markNoConversion( $text, $noParse = false ) { - return $this->mConverter->markNoConversion( $text, $noParse ); + // Excluding protocal-relative URLs may avoid many false positives. + if ( $noParse || preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) { + return $this->mConverter->markNoConversion( $text ); + } else { + return $text; + } } /** @@ -4229,5 +4397,4 @@ class Language { $form = CLDRPluralRuleEvaluator::evaluateCompiled( $number, $pluralRules ); return $form; } - } |