diff options
Diffstat (limited to 'languages/Language.php')
-rw-r--r-- | languages/Language.php | 500 |
1 files changed, 349 insertions, 151 deletions
diff --git a/languages/Language.php b/languages/Language.php index 1d2e7164..c24dc180 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -30,10 +30,6 @@ if ( !defined( 'MEDIAWIKI' ) ) { exit( 1 ); } -# Read language names -global $wgLanguageNames; -require_once( __DIR__ . '/Names.php' ); - if ( function_exists( 'mb_strtoupper' ) ) { mb_internal_encoding( 'UTF-8' ); } @@ -44,18 +40,19 @@ if ( function_exists( 'mb_strtoupper' ) ) { * @ingroup Language */ class FakeConverter { - /** * @var Language */ public $mLang; function __construct( $langobj ) { $this->mLang = $langobj; } + function autoConvert( $text, $variant = false ) { return $text; } function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); } function convert( $t ) { return $t; } function convertTo( $text, $variant ) { return $text; } function convertTitle( $t ) { return $t->getPrefixedText(); } function convertNamespace( $ns ) { return $this->mLang->getFormattedNsText( $ns ); } function getVariants() { return array( $this->mLang->getCode() ); } + function getVariantFallbacks( $variant ) { return $this->mLang->getCode(); } function getPreferredVariant() { return $this->mLang->getCode(); } function getDefaultVariant() { return $this->mLang->getCode(); } function getURLVariant() { return ''; } @@ -66,7 +63,10 @@ class FakeConverter { function markNoConversion( $text, $noParse = false ) { return $text; } function convertCategoryKey( $key ) { return $key; } function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); } + /** @deprecated since 1.22 is no longer used */ function armourMath( $text ) { return $text; } + function validateVariant( $variant = null ) { return $variant === $this->mLang->getCode() ? $variant : null; } + function translate( $text, $variant ) { return $text; } } /** @@ -82,7 +82,7 @@ class Language { public $mVariants, $mCode, $mLoaded = false; public $mMagicExtensions = array(), $mMagicHookDone = false; - private $mHtmlCode = null; + private $mHtmlCode = null, $mParentLanguage = false; public $dateFormatStrings = array(); public $mExtendedSpecialPageAliases; @@ -172,6 +172,14 @@ class Language { ); /** + * Cache for language fallbacks. + * @see Language::getFallbacksIncludingSiteLanguage + * @since 1.21 + * @var array + */ + static private $fallbackLanguageCache = array(); + + /** * Get a cached or new language object for a given language code * @param $code String * @return Language @@ -221,7 +229,7 @@ class Language { // Check if there is a language class for the code $class = self::classFromCode( $code ); self::preloadLanguageClass( $class ); - if ( MWInit::classExists( $class ) ) { + if ( class_exists( $class ) ) { $lang = new $class; return $lang; } @@ -235,7 +243,7 @@ class Language { $class = self::classFromCode( $fallbackCode ); self::preloadLanguageClass( $class ); - if ( MWInit::classExists( $class ) ) { + if ( class_exists( $class ) ) { $lang = Language::newFromCode( $fallbackCode ); $lang->setCode( $code ); return $lang; @@ -276,7 +284,7 @@ class Language { $alpha = '[a-z]'; $digit = '[0-9]'; $alphanum = '[a-z0-9]'; - $x = 'x' ; # private use singleton + $x = 'x'; # private use singleton $singleton = '[a-wy-z]'; # other singleton $s = $lenient ? '[-_]' : '-'; @@ -327,13 +335,19 @@ class Language { * @return bool */ public static function isValidCode( $code ) { - return - // People think language codes are html safe, so enforce it. - // Ideally we should only allow a-zA-Z0-9- - // but, .+ and other chars are often used for {{int:}} hacks - // see bugs 37564, 37587, 36938 + static $cache = array(); + if ( isset( $cache[$code] ) ) { + return $cache[$code]; + } + // People think language codes are html safe, so enforce it. + // Ideally we should only allow a-zA-Z0-9- + // but, .+ and other chars are often used for {{int:}} hacks + // see bugs 37564, 37587, 36938 + $cache[$code] = strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) && !preg_match( Title::getTitleInvalidRegex(), $code ); + + return $cache[$code]; } /** @@ -349,16 +363,16 @@ class Language { public static function isValidBuiltInCode( $code ) { if ( !is_string( $code ) ) { - $type = gettype( $code ); - if ( $type === 'object' ) { + if ( is_object( $code ) ) { $addmsg = " of class " . get_class( $code ); } else { $addmsg = ''; } + $type = gettype( $code ); throw new MWException( __METHOD__ . " must be passed a string, $type given$addmsg" ); } - return (bool)preg_match( '/^[a-z0-9-]+$/i', $code ); + return (bool)preg_match( '/^[a-z0-9-]{2,}$/i', $code ); } /** @@ -372,8 +386,15 @@ class Language { public static function isKnownLanguageTag( $tag ) { static $coreLanguageNames; + // Quick escape for invalid input to avoid exceptions down the line + // when code tries to process tags which are not valid at all. + if ( !self::isValidBuiltInCode( $tag ) ) { + return false; + } + if ( $coreLanguageNames === null ) { - include( MWInit::compiledPath( 'languages/Names.php' ) ); + global $IP; + include "$IP/languages/Names.php"; } if ( isset( $coreLanguageNames[$tag] ) @@ -409,10 +430,8 @@ class Language { return; } - if ( !defined( 'MW_COMPILED' ) ) { - if ( file_exists( "$IP/languages/classes/$class.php" ) ) { - include_once( "$IP/languages/classes/$class.php" ); - } + if ( file_exists( "$IP/languages/classes/$class.php" ) ) { + include_once "$IP/languages/classes/$class.php"; } } @@ -483,6 +502,9 @@ class Language { } /** + * Returns an array of localised namespaces indexed by their numbers. If the namespace is not + * available in localised form, it will be included in English. + * * @return array */ public function getNamespaces() { @@ -662,7 +684,18 @@ class Language { } } - $this->namespaceAliases = $aliases; + # Also add converted namespace names as aliases, to avoid confusion. + $convertedNames = array(); + foreach ( $this->getVariants() as $variant ) { + if ( $variant === $this->mCode ) { + continue; + } + foreach ( $this->getNamespaces() as $ns => $_ ) { + $convertedNames[$this->getConverter()->convertNamespace( $ns, $variant )] = $ns; + } + } + + $this->namespaceAliases = $aliases + $convertedNames; } return $this->namespaceAliases; } @@ -746,20 +779,6 @@ class Language { /** * @return array */ - function getQuickbarSettings() { - return array( - $this->getMessage( 'qbsettings-none' ), - $this->getMessage( 'qbsettings-fixedleft' ), - $this->getMessage( 'qbsettings-fixedright' ), - $this->getMessage( 'qbsettings-floatingleft' ), - $this->getMessage( 'qbsettings-floatingright' ), - $this->getMessage( 'qbsettings-directionality' ) - ); - } - - /** - * @return array - */ function getDatePreferences() { return self::$dataCache->getItem( $this->mCode, 'datePreferences' ); } @@ -857,7 +876,8 @@ class Language { static $coreLanguageNames; if ( $coreLanguageNames === null ) { - include( MWInit::compiledPath( 'languages/Names.php' ) ); + global $IP; + include "$IP/languages/Names.php"; } $names = array(); @@ -1033,8 +1053,8 @@ class Language { * internationalisation, a reduced set of format characters, and a better * escaping format. * - * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the - * PHP manual for definitions. There are a number of extensions, which + * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrUeIOPTZ. See + * the PHP manual for definitions. There are a number of extensions, which * start with "x": * * xn Do not translate digits of the next numeric format character @@ -1080,22 +1100,25 @@ class Language { * Backslash escaping is also supported. * * Input timestamp is assumed to be pre-normalized to the desired local - * time zone, if any. + * time zone, if any. Note that the format characters crUeIOPTZ will assume + * $ts is UTC if $zone is not given. * * @param $format String * @param $ts String: 14-character timestamp * YYYYMMDDHHMMSS * 01234567890123 + * @param $zone DateTimeZone: Timezone of $ts * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai? * + * @throws MWException * @return string */ - function sprintfDate( $format, $ts ) { + function sprintfDate( $format, $ts, DateTimeZone $zone = null ) { $s = ''; $raw = false; $roman = false; $hebrewNum = false; - $unix = false; + $dateTimeObj = false; $rawToggle = false; $iranian = false; $hebrew = false; @@ -1103,6 +1126,15 @@ class Language { $thai = false; $minguo = false; $tenno = false; + + if ( strlen( $ts ) !== 14 ) { + throw new MWException( __METHOD__ . ": The timestamp $ts should have 14 characters" ); + } + + if ( !ctype_digit( $ts ) ) { + throw new MWException( __METHOD__ . ": The timestamp $ts should be a number" ); + } + for ( $p = 0; $p < strlen( $format ); $p++ ) { $num = false; $code = $format[$p]; @@ -1134,15 +1166,21 @@ class Language { $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) ); break; case 'xjx': - if ( !$hebrew ) $hebrew = self::tsToHebrew( $ts ); + if ( !$hebrew ) { + $hebrew = self::tsToHebrew( $ts ); + } $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] ); break; case 'd': $num = substr( $ts, 6, 2 ); break; case 'D': - if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts ); - $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 ); + if ( !$dateTimeObj ) { + $dateTimeObj = DateTime::createFromFormat( + 'YmdHis', $ts, $zone ?: new DateTimeZone( 'UTC' ) + ); + } + $s .= $this->getWeekdayAbbreviation( $dateTimeObj->format( 'w' ) + 1 ); break; case 'j': $num = intval( substr( $ts, 6, 2 ) ); @@ -1166,35 +1204,12 @@ class Language { $num = $hebrew[2]; break; case 'l': - if ( !$unix ) { - $unix = wfTimestamp( TS_UNIX, $ts ); - } - $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 ); - break; - case 'N': - if ( !$unix ) { - $unix = wfTimestamp( TS_UNIX, $ts ); + if ( !$dateTimeObj ) { + $dateTimeObj = DateTime::createFromFormat( + 'YmdHis', $ts, $zone ?: new DateTimeZone( 'UTC' ) + ); } - $w = gmdate( 'w', $unix ); - $num = $w ? $w : 7; - break; - case 'w': - if ( !$unix ) { - $unix = wfTimestamp( TS_UNIX, $ts ); - } - $num = gmdate( 'w', $unix ); - break; - case 'z': - if ( !$unix ) { - $unix = wfTimestamp( TS_UNIX, $ts ); - } - $num = gmdate( 'z', $unix ); - break; - case 'W': - if ( !$unix ) { - $unix = wfTimestamp( TS_UNIX, $ts ); - } - $num = gmdate( 'W', $unix ); + $s .= $this->getWeekdayName( $dateTimeObj->format( 'w' ) + 1 ); break; case 'F': $s .= $this->getMonthName( substr( $ts, 4, 2 ) ); @@ -1244,30 +1259,12 @@ class Language { } $num = $hebrew[1]; break; - case 't': - if ( !$unix ) { - $unix = wfTimestamp( TS_UNIX, $ts ); - } - $num = gmdate( 't', $unix ); - break; case 'xjt': if ( !$hebrew ) { $hebrew = self::tsToHebrew( $ts ); } $num = $hebrew[3]; break; - case 'L': - if ( !$unix ) { - $unix = wfTimestamp( TS_UNIX, $ts ); - } - $num = gmdate( 'L', $unix ); - break; - case 'o': - if ( !$unix ) { - $unix = wfTimestamp( TS_UNIX, $ts ); - } - $num = gmdate( 'o', $unix ); - break; case 'Y': $num = substr( $ts, 0, 4 ); break; @@ -1343,22 +1340,36 @@ class Language { $num = substr( $ts, 12, 2 ); break; case 'c': - if ( !$unix ) { - $unix = wfTimestamp( TS_UNIX, $ts ); - } - $s .= gmdate( 'c', $unix ); - break; case 'r': - if ( !$unix ) { - $unix = wfTimestamp( TS_UNIX, $ts ); + case 'e': + case 'O': + case 'P': + case 'T': + // Pass through string from $dateTimeObj->format() + if ( !$dateTimeObj ) { + $dateTimeObj = DateTime::createFromFormat( + 'YmdHis', $ts, $zone ?: new DateTimeZone( 'UTC' ) + ); } - $s .= gmdate( 'r', $unix ); + $s .= $dateTimeObj->format( $code ); break; + case 'w': + case 'N': + case 'z': + case 'W': + case 't': + case 'L': + case 'o': case 'U': - if ( !$unix ) { - $unix = wfTimestamp( TS_UNIX, $ts ); + case 'I': + case 'Z': + // Pass through number from $dateTimeObj->format() + if ( !$dateTimeObj ) { + $dateTimeObj = DateTime::createFromFormat( + 'YmdHis', $ts, $zone ?: new DateTimeZone( 'UTC' ) + ); } - $num = $unix; + $num = $dateTimeObj->format( $code ); break; case '\\': # Backslash escaping @@ -1673,7 +1684,7 @@ class Language { private static function hebrewYearStart( $year ) { $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 ); $b = intval( ( $year - 1 ) % 4 ); - $m = 32.044093161144 + 1.5542417966212 * $a + $b / 4.0 - 0.0031777940220923 * ( $year - 1 ); + $m = 32.044093161144 + 1.5542417966212 * $a + $b / 4.0 - 0.0031777940220923 * ( $year - 1 ); if ( $m < 0 ) { $m--; } @@ -1847,13 +1858,13 @@ class Language { } if ( strlen( $s ) == 2 ) { $str = $s . "'"; - } else { + } else { $str = substr( $s, 0, strlen( $s ) - 2 ) . '"'; $str .= substr( $s, strlen( $s ) - 2, 2 ); } $start = substr( $str, 0, strlen( $str ) - 2 ); $end = substr( $str, strlen( $str ) - 2 ); - switch( $end ) { + switch ( $end ) { case 'כ': $str = $start . 'ך'; break; @@ -1936,12 +1947,12 @@ class Language { # will normalize out-of-range values so we don't have to split $minDiff # into hours and minutes. $t = mktime( ( - (int)substr( $ts, 8, 2 ) ), # Hours - (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes - (int)substr( $ts, 12, 2 ), # Seconds - (int)substr( $ts, 4, 2 ), # Month - (int)substr( $ts, 6, 2 ), # Day - (int)substr( $ts, 0, 4 ) ); # Year + (int)substr( $ts, 8, 2 ) ), # Hours + (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes + (int)substr( $ts, 12, 2 ), # Seconds + (int)substr( $ts, 4, 2 ), # Month + (int)substr( $ts, 6, 2 ), # Day + (int)substr( $ts, 0, 4 ) ); # Year $date = date( 'YmdHis', $t ); wfRestoreWarnings(); @@ -1992,6 +2003,8 @@ class Language { * @param $type string May be date, time or both * @param $pref string The format name as it appears in Messages*.php * + * @since 1.22 New type 'pretty' that provides a more readable timestamp format + * * @return string */ function getDateFormatString( $type, $pref ) { @@ -2001,7 +2014,12 @@ class Language { $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" ); } else { $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" ); - if ( is_null( $df ) ) { + + if ( $type === 'pretty' && $df === null ) { + $df = $this->getDateFormatString( 'date', $pref ); + } + + if ( $df === null ) { $pref = $this->getDefaultDateFormat(); $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" ); } @@ -2085,6 +2103,8 @@ class Language { $segments = array(); foreach ( $intervals as $intervalName => $intervalValue ) { + // Messages: duration-seconds, duration-minutes, duration-hours, duration-days, duration-weeks, + // duration-years, duration-decades, duration-centuries, duration-millennia $message = wfMessage( 'duration-' . $intervalName )->numParams( $intervalValue ); $segments[] = $message->inLanguage( $this )->escaped(); } @@ -2235,6 +2255,81 @@ class Language { } /** + * Convert an MWTimestamp into a pretty human-readable timestamp using + * the given user preferences and relative base time. + * + * DO NOT USE THIS FUNCTION DIRECTLY. Instead, call MWTimestamp::getHumanTimestamp + * on your timestamp object, which will then call this function. Calling + * this function directly will cause hooks to be skipped over. + * + * @see MWTimestamp::getHumanTimestamp + * @param MWTimestamp $ts Timestamp to prettify + * @param MWTimestamp $relativeTo Base timestamp + * @param User $user User preferences to use + * @return string Human timestamp + * @since 1.22 + */ + public function getHumanTimestamp( MWTimestamp $ts, MWTimestamp $relativeTo, User $user ) { + $diff = $ts->diff( $relativeTo ); + $diffDay = (bool)( (int)$ts->timestamp->format( 'w' ) - (int)$relativeTo->timestamp->format( 'w' ) ); + $days = $diff->days ?: (int)$diffDay; + if ( $diff->invert || $days > 5 && $ts->timestamp->format( 'Y' ) !== $relativeTo->timestamp->format( 'Y' ) ) { + // Timestamps are in different years: use full timestamp + // Also do full timestamp for future dates + /** + * @FIXME Add better handling of future timestamps. + */ + $format = $this->getDateFormatString( 'both', $user->getDatePreference() ?: 'default' ); + $ts = $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ); + } elseif ( $days > 5 ) { + // Timestamps are in same year, but more than 5 days ago: show day and month only. + $format = $this->getDateFormatString( 'pretty', $user->getDatePreference() ?: 'default' ); + $ts = $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ); + } elseif ( $days > 1 ) { + // Timestamp within the past week: show the day of the week and time + $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' ); + $weekday = self::$mWeekdayMsgs[$ts->timestamp->format( 'w' )]; + // Messages: + // sunday-at, monday-at, tuesday-at, wednesday-at, thursday-at, friday-at, saturday-at + $ts = wfMessage( "$weekday-at" ) + ->inLanguage( $this ) + ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) ) + ->text(); + } elseif ( $days == 1 ) { + // Timestamp was yesterday: say 'yesterday' and the time. + $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' ); + $ts = wfMessage( 'yesterday-at' ) + ->inLanguage( $this ) + ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) ) + ->text(); + } elseif ( $diff->h > 1 || $diff->h == 1 && $diff->i > 30 ) { + // Timestamp was today, but more than 90 minutes ago: say 'today' and the time. + $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' ); + $ts = wfMessage( 'today-at' ) + ->inLanguage( $this ) + ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) ) + ->text(); + + // From here on in, the timestamp was soon enough ago so that we can simply say + // XX units ago, e.g., "2 hours ago" or "5 minutes ago" + } elseif ( $diff->h == 1 ) { + // Less than 90 minutes, but more than an hour ago. + $ts = wfMessage( 'hours-ago' )->inLanguage( $this )->numParams( 1 )->text(); + } elseif ( $diff->i >= 1 ) { + // A few minutes ago. + $ts = wfMessage( 'minutes-ago' )->inLanguage( $this )->numParams( $diff->i )->text(); + } elseif ( $diff->s >= 30 ) { + // Less than a minute, but more than 30 sec ago. + $ts = wfMessage( 'seconds-ago' )->inLanguage( $this )->numParams( $diff->s )->text(); + } else { + // Less than 30 seconds ago. + $ts = wfMessage( 'just-now' )->text(); + } + + return $ts; + } + + /** * @param $key string * @return array|null */ @@ -2523,7 +2618,7 @@ class Language { */ function checkTitleEncoding( $s ) { if ( is_array( $s ) ) { - wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' ); + throw new MWException( 'Given array to checkTitleEncoding.' ); } if ( StringUtils::isUtf8( $s ) ) { return $s; @@ -2817,7 +2912,9 @@ class Language { * @since 1.20 */ function getDirMarkEntity( $opposite = false ) { - if ( $opposite ) { return $this->isRTL() ? '‎' : '‏'; } + if ( $opposite ) { + return $this->isRTL() ? '‎' : '‏'; + } return $this->isRTL() ? '‏' : '‎'; } @@ -2834,7 +2931,9 @@ class Language { function getDirMark( $opposite = false ) { $lrm = "\xE2\x80\x8E"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM $rlm = "\xE2\x80\x8F"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM - if ( $opposite ) { return $this->isRTL() ? $lrm : $rlm; } + if ( $opposite ) { + return $this->isRTL() ? $lrm : $rlm; + } return $this->isRTL() ? $rlm : $lrm; } @@ -2878,12 +2977,16 @@ class Language { } /** + * Get all magic words from cache. * @return array */ function getMagicWords() { return self::$dataCache->getItem( $this->mCode, 'magicWords' ); } + /** + * Run the LanguageGetMagic hook once. + */ protected function doMagicHook() { if ( $this->mMagicHookDone ) { return; @@ -2900,17 +3003,16 @@ class Language { * @param $mw */ function getMagic( $mw ) { - $this->doMagicHook(); + // Saves a function call + if ( ! $this->mMagicHookDone ) { + $this->doMagicHook(); + } if ( isset( $this->mMagicExtensions[$mw->mId] ) ) { $rawEntry = $this->mMagicExtensions[$mw->mId]; } else { - $magicWords = $this->getMagicWords(); - if ( isset( $magicWords[$mw->mId] ) ) { - $rawEntry = $magicWords[$mw->mId]; - } else { - $rawEntry = false; - } + $rawEntry = self::$dataCache->getSubitem( + $this->mCode, 'magicWords', $mw->mId ); } if ( !is_array( $rawEntry ) ) { @@ -2967,7 +3069,7 @@ class Language { * Normally we output all numbers in plain en_US style, that is * 293,291.235 for twohundredninetythreethousand-twohundredninetyone * point twohundredthirtyfive. However this is not suitable for all - * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as + * languages, some such as Punjabi want ੨੯੩,੨੯੫.੨੩੫ and others such as * Icelandic just want to use commas instead of dots, and dots instead * of commas like "293.291,235". * @@ -3058,7 +3160,7 @@ class Language { $sign = ""; if ( intval( $number ) < 0 ) { // For negative numbers apply the algorithm like positive number and add sign. - $sign = "-"; + $sign = "-"; $number = substr( $number, 1 ); } $integerPart = array(); @@ -3066,20 +3168,20 @@ class Language { $numMatches = preg_match_all( "/(#+)/", $digitGroupingPattern, $matches ); preg_match( "/\d+/", $number, $integerPart ); preg_match( "/\.\d*/", $number, $decimalPart ); - $groupedNumber = ( count( $decimalPart ) > 0 ) ? $decimalPart[0]:""; - if ( $groupedNumber === $number ) { + $groupedNumber = ( count( $decimalPart ) > 0 ) ? $decimalPart[0] : ""; + if ( $groupedNumber === $number ) { // the string does not have any number part. Eg: .12345 return $sign . $groupedNumber; } $start = $end = strlen( $integerPart[0] ); while ( $start > 0 ) { - $match = $matches[0][$numMatches -1] ; + $match = $matches[0][$numMatches - 1]; $matchLen = strlen( $match ); $start = $end - $matchLen; if ( $start < 0 ) { $start = 0; } - $groupedNumber = substr( $number, $start, $end -$start ) . $groupedNumber ; + $groupedNumber = substr( $number, $start, $end -$start ) . $groupedNumber; $end = $start; if ( $numMatches > 1 ) { // use the last pattern for the rest of the number @@ -3256,9 +3358,9 @@ class Language { # We got the first byte only of a multibyte char; remove it. $string = substr( $string, 0, -1 ); } elseif ( $char >= 0x80 && - preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' . - '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) - { + preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' . + '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) + ) { # We chopped in the middle of a character; remove it $string = $m[1]; } @@ -3342,7 +3444,9 @@ class Language { break; } } - if ( $pos >= $textLen ) break; // extra iteration just for above checks + if ( $pos >= $textLen ) { + break; // extra iteration just for above checks + } # Read the next char... $ch = $text[$pos]; @@ -3475,7 +3579,7 @@ class Language { function getGrammarForms() { global $wgGrammarForms; if ( isset( $wgGrammarForms[$this->getCode()] ) && is_array( $wgGrammarForms[$this->getCode()] ) ) { - return $wgGrammarForms[$this->getCode()]; + return $wgGrammarForms[$this->getCode()]; } return array(); } @@ -3528,23 +3632,23 @@ class Language { * @return string Correct form of plural for $count in this language */ function convertPlural( $count, $forms ) { - if ( !count( $forms ) ) { - return ''; - } - // Handle explicit n=pluralform cases foreach ( $forms as $index => $form ) { - if ( preg_match( '/\d+=/i', $form ) ) { + if ( preg_match( '/^\d+=/i', $form ) ) { $pos = strpos( $form, '=' ); - if ( substr( $form, 0, $pos ) === (string) $count ) { + if ( substr( $form, 0, $pos ) === (string)$count ) { return substr( $form, $pos + 1 ); } unset( $forms[$index] ); } } + $forms = array_values( $forms ); + if ( !count( $forms ) ) { + return ''; + } - $pluralForm = $this->getPluralForm( $count ); + $pluralForm = $this->getPluralRuleIndexNumber( $count ); $pluralForm = min( $pluralForm, count( $forms ) - 1 ); return $forms[$pluralForm]; } @@ -3708,6 +3812,7 @@ class Language { * * @param $text string * @return string + * @deprecated since 1.22 is no longer used */ public function armourMath( $text ) { return $this->mConverter->armourMath( $text ); @@ -3854,6 +3959,34 @@ class Language { } /** + * Get the "parent" language which has a converter to convert a "compatible" language + * (in another variant) to this language (eg. zh for zh-cn, but not en for en-gb). + * + * @return Language|null + * @since 1.22 + */ + public function getParentLanguage() { + if ( $this->mParentLanguage !== false ) { + return $this->mParentLanguage; + } + + $pieces = explode( '-', $this->getCode() ); + $code = $pieces[0]; + if ( !in_array( $code, LanguageConverter::$languagesWithVariants ) ) { + $this->mParentLanguage = null; + return null; + } + $lang = Language::factory( $code ); + if ( !$lang->hasVariant( $this->getCode() ) ) { + $this->mParentLanguage = null; + return null; + } + + $this->mParentLanguage = $lang; + return $lang; + } + + /** * Get the RFC 3066 code for this language object * * NOTE: The return value of this function is NOT HTML-safe and must be escaped with @@ -3887,8 +4020,9 @@ class Language { */ public function setCode( $code ) { $this->mCode = $code; - // Ensure we don't leave an incorrect html code lying around + // Ensure we don't leave incorrect cached data lying around $this->mHtmlCode = null; + $this->mParentLanguage = false; } /** @@ -3985,6 +4119,36 @@ class Language { } /** + * Get the ordered list of fallback languages, ending with the fallback + * language chain for the site language. + * + * @since 1.22 + * @param string $code Language code + * @return array array( fallbacks, site fallbacks ) + */ + public static function getFallbacksIncludingSiteLanguage( $code ) { + global $wgLanguageCode; + + // Usually, we will only store a tiny number of fallback chains, so we + // keep them in static memory. + $cacheKey = "{$code}-{$wgLanguageCode}"; + + if ( !array_key_exists( $cacheKey, self::$fallbackLanguageCache ) ) { + $fallbacks = self::getFallbacksFor( $code ); + + // Append the site's fallback chain, including the site language itself + $siteFallbacks = self::getFallbacksFor( $wgLanguageCode ); + array_unshift( $siteFallbacks, $wgLanguageCode ); + + // Eliminate any languages already included in the chain + $siteFallbacks = array_diff( $siteFallbacks, $fallbacks ); + + self::$fallbackLanguageCache[$cacheKey] = array( $fallbacks, $siteFallbacks ); + } + return self::$fallbackLanguageCache[$cacheKey]; + } + + /** * Get all messages for a given language * WARNING: this may take a long time. If you just need all message *keys* * but need the *contents* of only a few messages, consider using getMessageKeysFor(). @@ -4084,15 +4248,14 @@ class Language { * @since 1.18 */ public function formatExpiry( $expiry, $format = true ) { - static $infinity, $infinityMsg; + static $infinity; if ( $infinity === null ) { - $infinityMsg = wfMessage( 'infiniteblock' ); $infinity = wfGetDB( DB_SLAVE )->getInfinity(); } if ( $expiry == '' || $expiry == $infinity ) { return $format === true - ? $infinityMsg + ? $this->getMessageFromDB( 'infiniteblock' ) : $infinity; } else { return $format === true @@ -4307,7 +4470,7 @@ class Language { $nlink = htmlspecialchars( $next ); } else { $nlink = $this->numLink( $title, $offset + $limit, $limit, - $query, $next, 'prevn-title', 'mw-nextlink' ); + $query, $next, 'nextn-title', 'mw-nextlink' ); } # Make links to set number of items per page @@ -4371,7 +4534,7 @@ class Language { /** * Get the plural rules for the language * @since 1.20 - * @return array Associative array with plural form, and plural rule as key-value pairs + * @return array Associative array with plural form number and plural rule as key-value pairs */ public function getPluralRules() { $pluralRules = self::$dataCache->getItem( strtolower( $this->mCode ), 'pluralRules' ); @@ -4388,13 +4551,48 @@ class Language { } /** - * Find the plural form matching to the given number - * It return the form index. - * @return int The index of the plural form + * Get the plural rule types for the language + * @since 1.22 + * @return array Associative array with plural form number and plural rule type as key-value pairs */ - private function getPluralForm( $number ) { + public function getPluralRuleTypes() { + $pluralRuleTypes = self::$dataCache->getItem( strtolower( $this->mCode ), 'pluralRuleTypes' ); + $fallbacks = Language::getFallbacksFor( $this->mCode ); + if ( !$pluralRuleTypes ) { + foreach ( $fallbacks as $fallbackCode ) { + $pluralRuleTypes = self::$dataCache->getItem( strtolower( $fallbackCode ), 'pluralRuleTypes' ); + if ( $pluralRuleTypes ) { + break; + } + } + } + return $pluralRuleTypes; + } + + /** + * Find the index number of the plural rule appropriate for the given number + * @return int The index number of the plural rule + */ + public function getPluralRuleIndexNumber( $number ) { $pluralRules = $this->getCompiledPluralRules(); $form = CLDRPluralRuleEvaluator::evaluateCompiled( $number, $pluralRules ); return $form; } + + /** + * Find the plural rule type appropriate for the given number + * For example, if the language is set to Arabic, getPluralType(5) should + * return 'few'. + * @since 1.22 + * @return string The name of the plural rule type, e.g. one, two, few, many + */ + public function getPluralRuleType( $number ) { + $index = $this->getPluralRuleIndexNumber( $number ); + $pluralRuleTypes = $this->getPluralRuleTypes(); + if ( isset( $pluralRuleTypes[$index] ) ) { + return $pluralRuleTypes[$index]; + } else { + return 'other'; + } + } } |