From f6d65e533c62f6deb21342d4901ece24497b433e Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Thu, 4 Jun 2015 07:31:04 +0200 Subject: Update to MediaWiki 1.25.1 --- includes/parser/CacheTime.php | 26 +- includes/parser/CoreParserFunctions.php | 92 ++--- includes/parser/CoreTagHooks.php | 27 ++ includes/parser/DateFormatter.php | 15 +- includes/parser/LinkHolderArray.php | 25 +- includes/parser/MWTidy.php | 76 +++- includes/parser/Parser.php | 686 +++++++++++++++++--------------- includes/parser/ParserCache.php | 7 - includes/parser/ParserOptions.php | 97 ++++- includes/parser/ParserOutput.php | 141 +++++-- includes/parser/Preprocessor_DOM.php | 30 +- includes/parser/Preprocessor_Hash.php | 20 +- includes/parser/StripState.php | 2 - 13 files changed, 710 insertions(+), 534 deletions(-) (limited to 'includes/parser') diff --git a/includes/parser/CacheTime.php b/includes/parser/CacheTime.php index 94abc266..950c0d46 100644 --- a/includes/parser/CacheTime.php +++ b/includes/parser/CacheTime.php @@ -34,8 +34,7 @@ class CacheTime { public $mVersion = Parser::VERSION, # Compatibility check $mCacheTime = '', # Time when this object was generated, or -1 for uncacheable. Used in ParserCache. - $mCacheExpiry = null, # Seconds after which the object should expire, use 0 for uncachable. Used in ParserCache. - $mContainsOldMagic, # Boolean variable indicating if the input contained variables like {{CURRENTDAY}} + $mCacheExpiry = null, # Seconds after which the object should expire, use 0 for uncacheable. Used in ParserCache. $mCacheRevisionId = null; # Revision ID that was parsed /** @@ -45,21 +44,6 @@ class CacheTime { return wfTimestamp( TS_MW, $this->mCacheTime ); } - /** - * @return bool - */ - public function containsOldMagic() { - return $this->mContainsOldMagic; - } - - /** - * @param bool $com - * @return bool - */ - public function setContainsOldMagic( $com ) { - return wfSetVar( $this->mContainsOldMagic, $com ); - } - /** * setCacheTime() sets the timestamp expressing when the page has been rendered. * This does not control expiry, see updateCacheExpiry() for that! @@ -123,7 +107,7 @@ class CacheTime { if ( $this->mCacheTime < 0 ) { return 0; - } // old-style marker for "not cachable" + } // old-style marker for "not cacheable" $expire = $this->mCacheExpiry; @@ -133,12 +117,8 @@ class CacheTime { $expire = min( $expire, $wgParserCacheExpireTime ); } - if ( $this->containsOldMagic() ) { //compatibility hack - $expire = min( $expire, 3600 ); # 1 hour - } - if ( $expire <= 0 ) { - return 0; // not cachable + return 0; // not cacheable } else { return $expire; } diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php index eacbecd4..830a68fc 100644 --- a/includes/parser/CoreParserFunctions.php +++ b/includes/parser/CoreParserFunctions.php @@ -36,7 +36,7 @@ class CoreParserFunctions { # Syntax for arguments (see Parser::setFunctionHook): # "name for lookup in localized magic words array", # function callback, - # optional SFH_NO_HASH to omit the hash from calls (e.g. {{int:...}} + # optional Parser::SFH_NO_HASH to omit the hash from calls (e.g. {{int:...}} # instead of {{#int:...}}) $noHashFunctions = array( 'ns', 'nse', 'urlencode', 'lcfirst', 'ucfirst', 'lc', 'uc', @@ -44,7 +44,7 @@ class CoreParserFunctions { 'canonicalurle', 'formatnum', 'grammar', 'gender', 'plural', 'numberofpages', 'numberofusers', 'numberofactiveusers', 'numberofarticles', 'numberoffiles', 'numberofadmins', - 'numberingroup', 'numberofedits', 'numberofviews', 'language', + 'numberingroup', 'numberofedits', 'language', 'padleft', 'padright', 'anchorencode', 'defaultsort', 'filepath', 'pagesincategory', 'pagesize', 'protectionlevel', 'namespacee', 'namespacenumber', 'talkspace', 'talkspacee', @@ -57,24 +57,24 @@ class CoreParserFunctions { 'revisiontimestamp', 'revisionuser', 'cascadingsources', ); foreach ( $noHashFunctions as $func ) { - $parser->setFunctionHook( $func, array( __CLASS__, $func ), SFH_NO_HASH ); + $parser->setFunctionHook( $func, array( __CLASS__, $func ), Parser::SFH_NO_HASH ); } - $parser->setFunctionHook( 'namespace', array( __CLASS__, 'mwnamespace' ), SFH_NO_HASH ); - $parser->setFunctionHook( 'int', array( __CLASS__, 'intFunction' ), SFH_NO_HASH ); + $parser->setFunctionHook( 'namespace', array( __CLASS__, 'mwnamespace' ), Parser::SFH_NO_HASH ); + $parser->setFunctionHook( 'int', array( __CLASS__, 'intFunction' ), Parser::SFH_NO_HASH ); $parser->setFunctionHook( 'special', array( __CLASS__, 'special' ) ); $parser->setFunctionHook( 'speciale', array( __CLASS__, 'speciale' ) ); - $parser->setFunctionHook( 'tag', array( __CLASS__, 'tagObj' ), SFH_OBJECT_ARGS ); + $parser->setFunctionHook( 'tag', array( __CLASS__, 'tagObj' ), Parser::SFH_OBJECT_ARGS ); $parser->setFunctionHook( 'formatdate', array( __CLASS__, 'formatDate' ) ); if ( $wgAllowDisplayTitle ) { - $parser->setFunctionHook( 'displaytitle', array( __CLASS__, 'displaytitle' ), SFH_NO_HASH ); + $parser->setFunctionHook( 'displaytitle', array( __CLASS__, 'displaytitle' ), Parser::SFH_NO_HASH ); } if ( $wgAllowSlowParserFunctions ) { $parser->setFunctionHook( 'pagesinnamespace', array( __CLASS__, 'pagesinnamespace' ), - SFH_NO_HASH + Parser::SFH_NO_HASH ); } } @@ -111,7 +111,7 @@ class CoreParserFunctions { $pref = $parser->getOptions()->getDateFormat(); - // Specify a different default date format other than the the normal default + // Specify a different default date format other than the normal default // if the user has 'default' for their setting if ( $pref == 'default' && $defaultPref ) { $pref = $defaultPref; @@ -309,15 +309,12 @@ class CoreParserFunctions { * @return string */ public static function gender( $parser, $username ) { - wfProfileIn( __METHOD__ ); $forms = array_slice( func_get_args(), 2 ); // Some shortcuts to avoid loading user data unnecessarily if ( count( $forms ) === 0 ) { - wfProfileOut( __METHOD__ ); return ''; } elseif ( count( $forms ) === 1 ) { - wfProfileOut( __METHOD__ ); return $forms[0]; } @@ -341,7 +338,6 @@ class CoreParserFunctions { $gender = GenderCache::singleton()->getGenderOf( $parser->getOptions()->getUser(), __METHOD__ ); } $ret = $parser->getFunctionLang()->gender( $gender, $forms ); - wfProfileOut( __METHOD__ ); return $ret; } @@ -379,8 +375,7 @@ class CoreParserFunctions { $text = $parser->doQuotes( $text ); // remove stripped text (e.g. the UNIQ-QINU stuff) that was generated by tag extensions/whatever - $text = preg_replace( '/' . preg_quote( $parser->uniqPrefix(), '/' ) . '.*?' - . preg_quote( Parser::MARKER_SUFFIX, '/' ) . '/', '', $text ); + $text = $parser->killMarkers( $text ); // list of disallowed tags for DISPLAYTITLE // these will be escaped even though they are allowed in normal wiki text @@ -489,10 +484,6 @@ class CoreParserFunctions { public static function numberofedits( $parser, $raw = null ) { return self::formatRaw( SiteStats::edits(), $raw ); } - public static function numberofviews( $parser, $raw = null ) { - global $wgDisableCounters; - return !$wgDisableCounters ? self::formatRaw( SiteStats::views(), $raw ) : ''; - } public static function pagesinnamespace( $parser, $namespace = 0, $raw = null ) { return self::formatRaw( SiteStats::pagesInNs( intval( $namespace ) ), $raw ); } @@ -902,9 +893,17 @@ class CoreParserFunctions { } } - // Usage {{filepath|300}}, {{filepath|nowiki}}, {{filepath|nowiki|300}} - // or {{filepath|300|nowiki}} or {{filepath|300px}}, {{filepath|200x300px}}, - // {{filepath|nowiki|200x300px}}, {{filepath|200x300px|nowiki}}. + /** + * Usage {{filepath|300}}, {{filepath|nowiki}}, {{filepath|nowiki|300}} + * or {{filepath|300|nowiki}} or {{filepath|300px}}, {{filepath|200x300px}}, + * {{filepath|nowiki|200x300px}}, {{filepath|200x300px|nowiki}}. + * + * @param Parser $parser + * @param string $name + * @param string $argA + * @param string $argB + * @return array|string + */ public static function filepath( $parser, $name = '', $argA = '', $argB = '' ) { $file = wfFindFile( $name ); @@ -943,7 +942,7 @@ class CoreParserFunctions { * Parser function to extension tag adaptor * @param Parser $parser * @param PPFrame $frame - * @param array $args + * @param PPNode[] $args * @return string */ public static function tagObj( $parser, $frame, $args ) { @@ -958,13 +957,6 @@ class CoreParserFunctions { $inner = null; } - $stripList = $parser->getStripList(); - if ( !in_array( $tagName, $stripList ) ) { - return '' . - wfMessage( 'unknown_extension_tag', $tagName )->inContentLanguage()->text() . - ''; - } - $attributes = array(); foreach ( $args as $arg ) { $bits = $arg->splitArg(); @@ -978,6 +970,19 @@ class CoreParserFunctions { } } + $stripList = $parser->getStripList(); + if ( !in_array( $tagName, $stripList ) ) { + // we can't handle this tag (at least not now), so just re-emit it as an ordinary tag + $attrText = ''; + foreach ( $attributes as $name => $value ) { + $attrText .= ' ' . htmlspecialchars( $name ) . '="' . htmlspecialchars( $value ) . '"'; + } + if ( $inner === null ) { + return "<$tagName$attrText/>"; + } + return "<$tagName$attrText>$inner"; + } + $params = array( 'name' => $tagName, 'inner' => $inner, @@ -1000,11 +1005,6 @@ class CoreParserFunctions { * @since 1.23 */ private static function getCachedRevisionObject( $parser, $title = null ) { - static $cache = null; - if ( $cache == null ) { - $cache = new MapCacheLRU( 50 ); - } - if ( is_null( $title ) ) { return null; } @@ -1024,22 +1024,18 @@ class CoreParserFunctions { // Normalize name for cache $page = $title->getPrefixedDBkey(); - if ( $cache->has( $page ) ) { // cache contains null values - return $cache->get( $page ); + if ( !( $parser->currentRevisionCache && $parser->currentRevisionCache->has( $page ) ) + && !$parser->incrementExpensiveFunctionCount() ) { + return null; } - if ( $parser->incrementExpensiveFunctionCount() ) { - $rev = Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); - $pageID = $rev ? $rev->getPage() : 0; - $revID = $rev ? $rev->getId() : 0; - $cache->set( $page, $rev ); // maybe null + $rev = $parser->fetchCurrentRevisionOfTitle( $title ); + $pageID = $rev ? $rev->getPage() : 0; + $revID = $rev ? $rev->getId() : 0; - // Register dependency in templatelinks - $parser->getOutput()->addTemplate( $title, $pageID, $revID ); + // Register dependency in templatelinks + $parser->getOutput()->addTemplate( $title, $pageID, $revID ); - return $rev; - } - $cache->set( $page, null ); - return null; + return $rev; } /** diff --git a/includes/parser/CoreTagHooks.php b/includes/parser/CoreTagHooks.php index 85920cc1..9755ea93 100644 --- a/includes/parser/CoreTagHooks.php +++ b/includes/parser/CoreTagHooks.php @@ -35,6 +35,7 @@ class CoreTagHooks { $parser->setHook( 'pre', array( __CLASS__, 'pre' ) ); $parser->setHook( 'nowiki', array( __CLASS__, 'nowiki' ) ); $parser->setHook( 'gallery', array( __CLASS__, 'gallery' ) ); + $parser->setHook( 'indicator', array( __CLASS__, 'indicator' ) ); if ( $wgRawHtml ) { $parser->setHook( 'html', array( __CLASS__, 'html' ) ); } @@ -119,4 +120,30 @@ class CoreTagHooks { public static function gallery( $content, $attributes, $parser ) { return $parser->renderImageGallery( $content, $attributes ); } + + /** + * XML-style tag for page status indicators: icons (or short text snippets) usually displayed in + * the top-right corner of the page, outside of the main content. + * + * @param string $content + * @param array $attributes + * @param Parser $parser + * @param PPFrame $frame + * @return string + * @since 1.25 + */ + public static function indicator( $content, array $attributes, Parser $parser, PPFrame $frame ) { + if ( !isset( $attributes['name'] ) || trim( $attributes['name'] ) === '' ) { + return '' . + wfMessage( 'invalid-indicator-name' )->inContentLanguage()->parse() . + ''; + } + + $parser->getOutput()->setIndicator( + trim( $attributes['name'] ), + Parser::stripOuterParagraph( $parser->recursiveTagParseFully( $content, $frame ) ) + ); + + return ''; + } } diff --git a/includes/parser/DateFormatter.php b/includes/parser/DateFormatter.php index 82f0e9d4..ef295ab2 100644 --- a/includes/parser/DateFormatter.php +++ b/includes/parser/DateFormatter.php @@ -33,7 +33,7 @@ class DateFormatter { public $regexes, $pDays, $pMonths, $pYears; public $rules, $xMonths, $preferences; - protected $lang; + protected $lang, $mLinked; const ALL = -1; const NONE = 0; @@ -315,8 +315,8 @@ class DateFormatter { } /** - * @todo document - * @return string + * Return a regex that can be used to find month names in string + * @return string regex to find the months with */ public function getMonthRegex() { $names = array(); @@ -338,7 +338,7 @@ class DateFormatter { } /** - * @todo document + * Make an ISO year from a year name, for instance: '-1199' from '1200 BC' * @param string $year Year name * @return string ISO year name */ @@ -356,9 +356,10 @@ class DateFormatter { } /** - * @todo document - * @param string $iso - * @return int|string + * Make a year one from an ISO year, for instance: '400 BC' from '-0399'. + * @param string $iso ISO year + * @return int|string int representing year number in case of AD dates, or string containing + * year number and 'BC' at the end otherwise. */ public function makeNormalYear( $iso ) { if ( $iso[0] == '-' ) { diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php index 7794fae4..7026c5ce 100644 --- a/includes/parser/LinkHolderArray.php +++ b/includes/parser/LinkHolderArray.php @@ -229,7 +229,6 @@ class LinkHolderArray { * @return string */ public function makeHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { - wfProfileIn( __METHOD__ ); if ( !is_object( $nt ) ) { # Fail gracefully $retVal = "{$prefix}{$text}{$trail}"; @@ -259,7 +258,6 @@ class LinkHolderArray { } $this->size++; } - wfProfileOut( __METHOD__ ); return $retVal; } @@ -267,17 +265,12 @@ class LinkHolderArray { * Replace link placeholders with actual links, in the buffer * * @param string $text - * @return array Array of link CSS classes, indexed by PDBK. */ public function replace( &$text ) { - wfProfileIn( __METHOD__ ); - /** @todo FIXME: replaceInternal doesn't return a value */ - $colours = $this->replaceInternal( $text ); + $this->replaceInternal( $text ); $this->replaceInterwiki( $text ); - wfProfileOut( __METHOD__ ); - return $colours; } /** @@ -289,14 +282,12 @@ class LinkHolderArray { return; } - wfProfileIn( __METHOD__ ); global $wgContLang, $wgContentHandlerUseDB; $colours = array(); $linkCache = LinkCache::singleton(); $output = $this->parent->getOutput(); - wfProfileIn( __METHOD__ . '-check' ); $dbr = wfGetDB( DB_SLAVE ); $threshold = $this->parent->getOptions()->getStubThreshold(); @@ -380,9 +371,8 @@ class LinkHolderArray { } if ( count( $linkcolour_ids ) ) { //pass an array of page_ids to an extension - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); + Hooks::run( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); } - wfProfileOut( __METHOD__ . '-check' ); # Do a second query for different language variants of links and categories if ( $wgContLang->hasVariants() ) { @@ -390,7 +380,6 @@ class LinkHolderArray { } # Construct search and replace arrays - wfProfileIn( __METHOD__ . '-construct' ); $replacePairs = array(); foreach ( $this->internals as $ns => $entries ) { foreach ( $entries as $index => $entry ) { @@ -426,18 +415,14 @@ class LinkHolderArray { } } $replacer = new HashtableReplacer( $replacePairs, 1 ); - wfProfileOut( __METHOD__ . '-construct' ); # Do the thing - wfProfileIn( __METHOD__ . '-replace' ); $text = preg_replace_callback( '/()/', $replacer->cb(), $text ); - wfProfileOut( __METHOD__ . '-replace' ); - wfProfileOut( __METHOD__ ); } /** @@ -449,7 +434,6 @@ class LinkHolderArray { return; } - wfProfileIn( __METHOD__ ); # Make interwiki link HTML $output = $this->parent->getOutput(); $replacePairs = array(); @@ -463,7 +447,6 @@ class LinkHolderArray { '//', $replacer->cb(), $text ); - wfProfileOut( __METHOD__ ); } /** @@ -617,7 +600,7 @@ class LinkHolderArray { } } } - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); + Hooks::run( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); // rebuild the categories in original order (if there are replacements) if ( count( $varCategories ) > 0 ) { @@ -644,14 +627,12 @@ class LinkHolderArray { * @return string */ public function replaceText( $text ) { - wfProfileIn( __METHOD__ ); $text = preg_replace_callback( '//', array( &$this, 'replaceTextCallback' ), $text ); - wfProfileOut( __METHOD__ ); return $text; } diff --git a/includes/parser/MWTidy.php b/includes/parser/MWTidy.php index b310862f..d446ccf6 100644 --- a/includes/parser/MWTidy.php +++ b/includes/parser/MWTidy.php @@ -127,17 +127,11 @@ class MWTidy { * @return string Corrected HTML output */ public static function tidy( $text ) { - global $wgTidyInternal; - $wrapper = new MWTidyWrapper; $wrappedtext = $wrapper->getWrapped( $text ); $retVal = null; - if ( $wgTidyInternal ) { - $correctedtext = self::execInternalTidy( $wrappedtext, false, $retVal ); - } else { - $correctedtext = self::execExternalTidy( $wrappedtext, false, $retVal ); - } + $correctedtext = self::clean( $wrappedtext, false, $retVal ); if ( $retVal < 0 ) { wfDebug( "Possible tidy configuration error!\n" ); @@ -160,16 +154,34 @@ class MWTidy { * @return bool Whether the HTML is valid */ public static function checkErrors( $text, &$errorStr = null ) { + $retval = 0; + $errorStr = self::clean( $text, true, $retval ); + return ( $retval < 0 && $errorStr == '' ) || $retval == 0; + } + + /** + * Perform a clean/repair operation + * @param string $text HTML to check + * @param bool $stderr Whether to read result from STDERR rather than STDOUT + * @param int &$retval Exit code (-1 on internal error) + * @return null|string + * @throws MWException + */ + private static function clean( $text, $stderr = false, &$retval = null ) { global $wgTidyInternal; - $retval = 0; if ( $wgTidyInternal ) { - $errorStr = self::execInternalTidy( $text, true, $retval ); + if ( wfIsHHVM() ) { + if ( $stderr ) { + throw new MWException( __METHOD__ . ": error text return from HHVM tidy is not supported" ); + } + return self::hhvmClean( $text, $retval ); + } else { + return self::phpClean( $text, $stderr, $retval ); + } } else { - $errorStr = self::execExternalTidy( $text, true, $retval ); + return self::externalClean( $text, $stderr, $retval ); } - - return ( $retval < 0 && $errorStr == '' ) || $retval == 0; } /** @@ -181,9 +193,8 @@ class MWTidy { * @param int &$retval Exit code (-1 on internal error) * @return string|null */ - private static function execExternalTidy( $text, $stderr = false, &$retval = null ) { + private static function externalClean( $text, $stderr = false, &$retval = null ) { global $wgTidyConf, $wgTidyBin, $wgTidyOpts; - wfProfileIn( __METHOD__ ); $cleansource = ''; $opts = ' -utf8'; @@ -235,7 +246,6 @@ class MWTidy { $cleansource = null; } - wfProfileOut( __METHOD__ ); return $cleansource; } @@ -248,15 +258,15 @@ class MWTidy { * @param int &$retval Exit code (-1 on internal error) * @return string|null */ - private static function execInternalTidy( $text, $stderr = false, &$retval = null ) { + private static function phpClean( $text, $stderr = false, &$retval = null ) { global $wgTidyConf, $wgDebugTidy; - wfProfileIn( __METHOD__ ); - if ( !class_exists( 'tidy' ) ) { + if ( ( !wfIsHHVM() && !class_exists( 'tidy' ) ) || + ( wfIsHHVM() && !function_exists( 'tidy_repair_string' ) ) + ) { wfWarn( "Unable to load internal tidy class." ); $retval = -1; - wfProfileOut( __METHOD__ ); return null; } @@ -265,8 +275,6 @@ class MWTidy { if ( $stderr ) { $retval = $tidy->getStatus(); - - wfProfileOut( __METHOD__ ); return $tidy->errorBuffer; } @@ -285,7 +293,31 @@ class MWTidy { } } - wfProfileOut( __METHOD__ ); + return $cleansource; + } + + /** + * Use the tidy extension for HHVM from + * https://github.com/wikimedia/mediawiki-php-tidy + * + * This currently does not support the object-oriented interface, but + * tidy_repair_string() can be used for the most common tasks. + * + * @param string $text HTML to check + * @param int &$retval Exit code (-1 on internal error) + * @return string|null + */ + private static function hhvmClean( $text, &$retval ) { + global $wgTidyConf; + + $cleansource = tidy_repair_string( $text, $wgTidyConf, 'utf8' ); + if ( $cleansource === false ) { + $cleansource = null; + $retval = -1; + } else { + $retval = 0; + } + return $cleansource; } } diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 84bb2243..ace63a09 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -35,18 +35,18 @@ * * - Parser::parse() * produces HTML output - * - Parser::preSaveTransform(). - * produces altered wiki markup. + * - Parser::preSaveTransform() + * produces altered wiki markup * - Parser::preprocess() * removes HTML comments and expands templates * - Parser::cleanSig() and Parser::cleanSigInSig() - * Cleans a signature before saving it to preferences + * cleans a signature before saving it to preferences * - Parser::getSection() - * Return the content of a section from an article for section editing + * return the content of a section from an article for section editing * - Parser::replaceSection() - * Replaces a section by number inside an article + * replaces a section by number inside an article * - Parser::getPreloadText() - * Removes sections, and tags. + * removes sections and tags * * Globals used: * object: $wgContLang @@ -79,7 +79,6 @@ class Parser { const HALF_PARSED_VERSION = 2; # Flags for Parser::setFunctionHook - # Also available as global constants from Defines.php const SFH_NO_HASH = 1; const SFH_OBJECT_ARGS = 2; @@ -91,6 +90,9 @@ class Parser { const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+) \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu'; + # Regular expression for a non-newline space + const SPACE_NOT_NL = '(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})'; + # State constants for the definition list colon extraction const COLON_STATE_TEXT = 0; const COLON_STATE_TAG = 1; @@ -144,7 +146,12 @@ class Parser { * @var MagicWordArray */ public $mSubstWords; - public $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor + # Initialised in constructor + public $mConf, $mExtLinkBracketedRegex, $mUrlProtocols; + + # Initialized in getPreprocessor() + /** @var Preprocessor */ + public $mPreprocessor; # Cleared with clearState(): /** @@ -210,12 +217,23 @@ class Parser { */ public $mLangLinkLanguages; + /** + * @var MapCacheLRU|null + * @since 1.24 + * + * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey() + */ + public $currentRevisionCache; + /** * @var bool Recursive call protection. * This variable should be treated as if it were private. */ public $mInParse = false; + /** @var SectionProfiler */ + protected $mProfiler; + /** * @param array $conf */ @@ -258,7 +276,22 @@ class Parser { */ public function __clone() { $this->mInParse = false; - wfRunHooks( 'ParserCloned', array( $this ) ); + + // Bug 56226: When you create a reference "to" an object field, that + // makes the object field itself be a reference too (until the other + // reference goes out of scope). When cloning, any field that's a + // reference is copied as a reference in the new object. Both of these + // are defined PHP5 behaviors, as inconvenient as it is for us when old + // hooks from PHP4 days are passing fields by reference. + foreach ( array( 'mStripState', 'mVarCache' ) as $k ) { + // Make a non-reference copy of the field, then rebind the field to + // reference the new copy. + $tmp = $this->$k; + $this->$k =& $tmp; + unset( $tmp ); + } + + Hooks::run( 'ParserCloned', array( $this ) ); } /** @@ -270,14 +303,11 @@ class Parser { } $this->mFirstCall = false; - wfProfileIn( __METHOD__ ); - CoreParserFunctions::register( $this ); CoreTagHooks::register( $this ); $this->initialiseVariables(); - wfRunHooks( 'ParserFirstCallInit', array( &$this ) ); - wfProfileOut( __METHOD__ ); + Hooks::run( 'ParserFirstCallInit', array( &$this ) ); } /** @@ -286,7 +316,6 @@ class Parser { * @private */ public function clearState() { - wfProfileIn( __METHOD__ ); if ( $this->mFirstCall ) { $this->firstCallInit(); } @@ -305,6 +334,7 @@ class Parser { $this->mVarCache = array(); $this->mUser = null; $this->mLangLinkLanguages = array(); + $this->currentRevisionCache = null; /** * Prefix for temporary replacement strings for the multipass parser. @@ -341,8 +371,9 @@ class Parser { $this->mPreprocessor = null; } - wfRunHooks( 'ParserClearState', array( &$this ) ); - wfProfileOut( __METHOD__ ); + $this->mProfiler = new SectionProfiler(); + + Hooks::run( 'ParserClearState', array( &$this ) ); } /** @@ -365,10 +396,7 @@ class Parser { * to internalParse() which does all the real work. */ - global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames; - $fname = __METHOD__ . '-' . wfGetCaller(); - wfProfileIn( __METHOD__ ); - wfProfileIn( $fname ); + global $wgShowHostnames; if ( $clearState ) { $magicScopeVariable = $this->lock(); @@ -376,6 +404,7 @@ class Parser { $this->startParse( $title, $options, self::OT_HTML, $clearState ); + $this->currentRevisionCache = null; $this->mInputSize = strlen( $text ); if ( $this->mOptions->getEnableLimitReport() ) { $this->mOutput->resetParseStartTime(); @@ -399,46 +428,13 @@ class Parser { $this->mRevisionSize = null; } - wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); + Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); # No more strip! - wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); + Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); $text = $this->internalParse( $text ); - wfRunHooks( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) ); - - $text = $this->mStripState->unstripGeneral( $text ); + Hooks::run( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) ); - # Clean up special characters, only run once, next-to-last before doBlockLevels - $fixtags = array( - # french spaces, last one Guillemet-left - # only if there is something before the space - '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', - # french spaces, Guillemet-right - '/(\\302\\253) /' => '\\1 ', - '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874. - ); - $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text ); - - $text = $this->doBlockLevels( $text, $linestart ); - - $this->replaceLinkHolders( $text ); - - /** - * The input doesn't get language converted if - * a) It's disabled - * b) Content isn't converted - * c) It's a conversion table - * d) it is an interface message (which is in the user language) - */ - if ( !( $options->getDisableContentConversion() - || isset( $this->mDoubleUnderscores['nocontentconvert'] ) ) - ) { - if ( !$this->mOptions->getInterfaceMessage() ) { - # The position of the convert() call should not be changed. it - # assumes that the links are all replaced and the only thing left - # is the mark. - $text = $this->getConverterLanguage()->convert( $text ); - } - } + $text = $this->internalParseHalfParsed( $text, true, $linestart ); /** * A converted title will be provided in the output object if title and @@ -461,45 +457,6 @@ class Parser { } } - $text = $this->mStripState->unstripNoWiki( $text ); - - wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) ); - - $text = $this->replaceTransparentTags( $text ); - $text = $this->mStripState->unstripGeneral( $text ); - - $text = Sanitizer::normalizeCharReferences( $text ); - - if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) { - $text = MWTidy::tidy( $text ); - } else { - # attempt to sanitize at least some nesting problems - # (bug #2702 and quite a few others) - $tidyregs = array( - # ''Something [http://www.cool.com cool''] --> - # Somethingcool> - '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' => - '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', - # fix up an anchor inside another anchor, only - # at least for a single single nested link (bug 3695) - '/(]+>)([^<]*)(]+>[^<]*)<\/a>(.*)<\/a>/' => - '\\1\\2\\3\\1\\4', - # fix div inside inline elements- doBlockLevels won't wrap a line which - # contains a div, so fix it up here; replace - # div with escaped text - '/(<([aib]) [^>]+>)([^<]*)(]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' => - '\\1\\3<div\\5>\\6</div>\\8\\9', - # remove empty italic or bold tag pairs, some - # introduced by rules above - '/<([bi])><\/\\1>/' => '', - ); - - $text = preg_replace( - array_keys( $tidyregs ), - array_values( $tidyregs ), - $text ); - } - if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) { $this->limitationWarn( 'expensive-parserfunction', $this->mExpensiveFunctionCount, @@ -507,8 +464,6 @@ class Parser { ); } - wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) ); - # Information on include size limits, for the benefit of users who try to skirt them if ( $this->mOptions->getEnableLimitReport() ) { $max = $this->mOptions->getMaxIncludeSize(); @@ -543,14 +498,14 @@ class Parser { $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount', array( $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ) ); - wfRunHooks( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) ); + Hooks::run( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) ); $limitReport = "NewPP limit report\n"; if ( $wgShowHostnames ) { $limitReport .= 'Parsed by ' . wfHostname() . "\n"; } foreach ( $this->mOutput->getLimitReportData() as $key => $value ) { - if ( wfRunHooks( 'ParserLimitReportFormat', + if ( Hooks::run( 'ParserLimitReportFormat', array( $key, &$value, &$limitReport, false, false ) ) ) { $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false ); @@ -568,13 +523,26 @@ class Parser { // Since we're not really outputting HTML, decode the entities and // then re-encode the things that need hiding inside HTML comments. $limitReport = htmlspecialchars_decode( $limitReport ); - wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) ); + Hooks::run( 'ParserLimitReport', array( $this, &$limitReport ) ); // Sanitize for comment. Note '‐' in the replacement is U+2010, // which looks much like the problematic '-'. $limitReport = str_replace( array( '-', '&' ), array( '‐', '&' ), $limitReport ); $text .= "\n\n"; + // Add on template profiling data + $dataByFunc = $this->mProfiler->getFunctionStats(); + uasort( $dataByFunc, function ( $a, $b ) { + return $a['real'] < $b['real']; // descending order + } ); + $profileReport = "Transclusion expansion time report (%,ms,calls,template)\n"; + foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) { + $profileReport .= sprintf( "%6.2f%% %8.3f %6d - %s\n", + $item['%real'], $item['real'], $item['calls'], + htmlspecialchars( $item['name'] ) ); + } + $text .= "\n\n"; + if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) { wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . $this->mTitle->getPrefixedDBkey() ); @@ -588,29 +556,60 @@ class Parser { $this->mRevisionUser = $oldRevisionUser; $this->mRevisionSize = $oldRevisionSize; $this->mInputSize = false; - wfProfileOut( $fname ); - wfProfileOut( __METHOD__ ); + $this->currentRevisionCache = null; return $this->mOutput; } /** - * Recursive parser entry point that can be called from an extension tag - * hook. + * Half-parse wikitext to half-parsed HTML. This recursive parser entry point + * can be called from an extension tag hook. + * + * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed" + * instead, which means that lists and links have not been fully parsed yet, + * and strip markers are still present. * - * If $frame is not provided, then template variables (e.g., {{{1}}}) within $text are not expanded + * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML. + * + * Use this function if you're a parser tag hook and you want to parse + * wikitext before or after applying additional transformations, and you + * intend to *return the result as hook output*, which will cause it to go + * through the rest of parsing process automatically. + * + * If $frame is not provided, then template variables (e.g., {{{1}}}) within + * $text are not expanded * * @param string $text Text extension wants to have parsed * @param bool|PPFrame $frame The frame to use for expanding any template variables - * - * @return string + * @return string UNSAFE half-parsed HTML */ public function recursiveTagParse( $text, $frame = false ) { - wfProfileIn( __METHOD__ ); - wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); - wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); + Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); + Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); $text = $this->internalParse( $text, false, $frame ); - wfProfileOut( __METHOD__ ); + return $text; + } + + /** + * Fully parse wikitext to fully parsed HTML. This recursive parser entry + * point can be called from an extension tag hook. + * + * The output of this function is fully-parsed HTML that is safe for output. + * If you're a parser tag hook, you might want to use recursiveTagParse() + * instead. + * + * If $frame is not provided, then template variables (e.g., {{{1}}}) within + * $text are not expanded + * + * @since 1.25 + * + * @param string $text Text extension wants to have parsed + * @param bool|PPFrame $frame The frame to use for expanding any template variables + * @return string Fully parsed HTML + */ + public function recursiveTagParseFully( $text, $frame = false ) { + $text = $this->recursiveTagParse( $text, $frame ); + $text = $this->internalParseHalfParsed( $text, false ); return $text; } @@ -625,18 +624,18 @@ class Parser { * @param bool|PPFrame $frame * @return mixed|string */ - public function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null, $frame = false ) { - wfProfileIn( __METHOD__ ); + public function preprocess( $text, Title $title = null, + ParserOptions $options, $revid = null, $frame = false + ) { $magicScopeVariable = $this->lock(); $this->startParse( $title, $options, self::OT_PREPROCESS, true ); if ( $revid !== null ) { $this->mRevisionId = $revid; } - wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); - wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); + Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); + Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); $text = $this->replaceVariables( $text, $frame ); $text = $this->mStripState->unstripBoth( $text ); - wfProfileOut( __METHOD__ ); return $text; } @@ -650,10 +649,8 @@ class Parser { * @since 1.19 */ public function recursivePreprocess( $text, $frame = false ) { - wfProfileIn( __METHOD__ ); $text = $this->replaceVariables( $text, $frame ); $text = $this->mStripState->unstripBoth( $text ); - wfProfileOut( __METHOD__ ); return $text; } @@ -1008,7 +1005,6 @@ class Parser { * @return string */ public function doTableStuff( $text ) { - wfProfileIn( __METHOD__ ); $lines = StringUtils::explode( "\n", $text ); $out = ''; @@ -1195,31 +1191,27 @@ class Parser { $out = ''; } - wfProfileOut( __METHOD__ ); - return $out; } /** - * Helper function for parse() that transforms wiki markup into + * Helper function for parse() that transforms wiki markup into half-parsed * HTML. Only called for $mOutputType == self::OT_HTML. * * @private * * @param string $text * @param bool $isMain - * @param bool $frame + * @param PPFrame|bool $frame * * @return string */ public function internalParse( $text, $isMain = true, $frame = false ) { - wfProfileIn( __METHOD__ ); $origText = $text; # Hook to suspend the parser in this state - if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { - wfProfileOut( __METHOD__ ); + if ( !Hooks::run( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { return $text; } @@ -1239,14 +1231,14 @@ class Parser { $text = $this->replaceVariables( $text ); } - wfRunHooks( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) ); + Hooks::run( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) ); $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) ); - wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) ); + Hooks::run( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) ); # Tables need to come after variable replacement for things to work # properly; putting them before other transformations should keep @@ -1270,7 +1262,101 @@ class Parser { $text = $this->doMagicLinks( $text ); $text = $this->formatHeadings( $text, $origText, $isMain ); - wfProfileOut( __METHOD__ ); + return $text; + } + + /** + * Helper function for parse() that transforms half-parsed HTML into fully + * parsed HTML. + * + * @param string $text + * @param bool $isMain + * @param bool $linestart + * @return string + */ + private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) { + global $wgUseTidy, $wgAlwaysUseTidy; + + $text = $this->mStripState->unstripGeneral( $text ); + + # Clean up special characters, only run once, next-to-last before doBlockLevels + $fixtags = array( + # french spaces, last one Guillemet-left + # only if there is something before the space + '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', + # french spaces, Guillemet-right + '/(\\302\\253) /' => '\\1 ', + '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874. + ); + $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text ); + + $text = $this->doBlockLevels( $text, $linestart ); + + $this->replaceLinkHolders( $text ); + + /** + * The input doesn't get language converted if + * a) It's disabled + * b) Content isn't converted + * c) It's a conversion table + * d) it is an interface message (which is in the user language) + */ + if ( !( $this->mOptions->getDisableContentConversion() + || isset( $this->mDoubleUnderscores['nocontentconvert'] ) ) + ) { + if ( !$this->mOptions->getInterfaceMessage() ) { + # The position of the convert() call should not be changed. it + # assumes that the links are all replaced and the only thing left + # is the mark. + $text = $this->getConverterLanguage()->convert( $text ); + } + } + + $text = $this->mStripState->unstripNoWiki( $text ); + + if ( $isMain ) { + Hooks::run( 'ParserBeforeTidy', array( &$this, &$text ) ); + } + + $text = $this->replaceTransparentTags( $text ); + $text = $this->mStripState->unstripGeneral( $text ); + + $text = Sanitizer::normalizeCharReferences( $text ); + + if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) { + $text = MWTidy::tidy( $text ); + } else { + # attempt to sanitize at least some nesting problems + # (bug #2702 and quite a few others) + $tidyregs = array( + # ''Something [http://www.cool.com cool''] --> + # Somethingcool> + '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' => + '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', + # fix up an anchor inside another anchor, only + # at least for a single single nested link (bug 3695) + '/(]+>)([^<]*)(]+>[^<]*)<\/a>(.*)<\/a>/' => + '\\1\\2\\3\\1\\4', + # fix div inside inline elements- doBlockLevels won't wrap a line which + # contains a div, so fix it up here; replace + # div with escaped text + '/(<([aib]) [^>]+>)([^<]*)(]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' => + '\\1\\3<div\\5>\\6</div>\\8\\9', + # remove empty italic or bold tag pairs, some + # introduced by rules above + '/<([bi])><\/\\1>/' => '', + ); + + $text = preg_replace( + array_keys( $tidyregs ), + array_values( $tidyregs ), + $text ); + } + + if ( $isMain ) { + Hooks::run( 'ParserAfterTidy', array( &$this, &$text ) ); + } + return $text; } @@ -1286,22 +1372,24 @@ class Parser { * @return string */ public function doMagicLinks( $text ) { - wfProfileIn( __METHOD__ ); $prots = wfUrlProtocolsWithoutProtRel(); $urlChar = self::EXT_LINK_URL_CLASS; + $space = self::SPACE_NOT_NL; # non-newline space + $spdash = "(?:-|$space)"; # a dash or a non-newline space + $spaces = "$space++"; # possessive match of 1 or more spaces $text = preg_replace_callback( '!(?: # Start cases (].*?) | # m[1]: Skip link text (<.*?>) | # m[2]: Skip stuff inside HTML elements' . " - (\\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . ' - (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number - ISBN\s+(\b # m[5]: ISBN, capture number - (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix - (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters + (\b(?i:$prots)$urlChar+) | # m[3]: Free external links + \b(?:RFC|PMID) $spaces # m[4]: RFC or PMID, capture number + ([0-9]+)\b | + \bISBN $spaces ( # m[5]: ISBN, capture number + (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix + (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters [0-9Xx] # check digit - \b) - )!xu', array( &$this, 'magicLinkCallback' ), $text ); - wfProfileOut( __METHOD__ ); + )\b + )!xu", array( &$this, 'magicLinkCallback' ), $text ); return $text; } @@ -1341,6 +1429,8 @@ class Parser { } elseif ( isset( $m[5] ) && $m[5] !== '' ) { # ISBN $isbn = $m[5]; + $space = self::SPACE_NOT_NL; # non-newline space + $isbn = preg_replace( "/$space/", ' ', $isbn ); $num = strtr( $isbn, array( '-' => '', ' ' => '', @@ -1364,7 +1454,6 @@ class Parser { * @private */ public function makeFreeExternalLink( $url ) { - wfProfileIn( __METHOD__ ); $trail = ''; @@ -1384,7 +1473,20 @@ class Parser { $sep .= ')'; } - $numSepChars = strspn( strrev( $url ), $sep ); + $urlRev = strrev( $url ); + $numSepChars = strspn( $urlRev, $sep ); + # Don't break a trailing HTML entity by moving the ; into $trail + # This is in hot code, so use substr_compare to avoid having to + # create a new string object for the comparison + if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0) { + # more optimization: instead of running preg_match with a $ + # anchor, which can be slow, do the match on the reversed + # string starting at the desired offset. + # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i + if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) { + $numSepChars--; + } + } if ( $numSepChars ) { $trail = substr( $url, -$numSepChars ) . $trail; $url = substr( $url, 0, -$numSepChars ); @@ -1405,7 +1507,6 @@ class Parser { $pasteurized = self::normalizeLinkUrl( $url ); $this->mOutput->addExternalLink( $pasteurized ); } - wfProfileOut( __METHOD__ ); return $text . $trail; } @@ -1419,12 +1520,10 @@ class Parser { * @return string */ public function doHeadings( $text ) { - wfProfileIn( __METHOD__ ); for ( $i = 6; $i >= 1; --$i ) { $h = str_repeat( '=', $i ); $text = preg_replace( "/^$h(.+)$h\\s*$/m", "\\1", $text ); } - wfProfileOut( __METHOD__ ); return $text; } @@ -1437,14 +1536,12 @@ class Parser { * @return string The altered text */ public function doAllQuotes( $text ) { - wfProfileIn( __METHOD__ ); $outtext = ''; $lines = StringUtils::explode( "\n", $text ); foreach ( $lines as $line ) { $outtext .= $this->doQuotes( $line ) . "\n"; } $outtext = substr( $outtext, 0, -1 ); - wfProfileOut( __METHOD__ ); return $outtext; } @@ -1646,11 +1743,9 @@ class Parser { * @return string */ public function replaceExternalLinks( $text ) { - wfProfileIn( __METHOD__ ); $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); if ( $bits === false ) { - wfProfileOut( __METHOD__ ); throw new MWException( "PCRE needs to be compiled with " . "--enable-unicode-properties in order for MediaWiki to function" ); } @@ -1714,7 +1809,6 @@ class Parser { $this->mOutput->addExternalLink( $pasteurized ); } - wfProfileOut( __METHOD__ ); return $s; } @@ -1912,9 +2006,7 @@ class Parser { */ public function replaceInternalLinks2( &$s ) { global $wgExtraInterlanguageLinkPrefixes; - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-setup' ); static $tc = false, $e1, $e1_img; # the % is needed to support urlencoded titles as well if ( !$tc ) { @@ -1946,8 +2038,6 @@ class Parser { } if ( is_null( $this->mTitle ) ) { - wfProfileOut( __METHOD__ . '-setup' ); - wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" ); } $nottalk = !$this->mTitle->isTalkPage(); @@ -1964,7 +2054,6 @@ class Parser { } $useSubpages = $this->areSubpagesAllowed(); - wfProfileOut( __METHOD__ . '-setup' ); // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect # Loop for each link @@ -1980,7 +2069,6 @@ class Parser { } if ( $useLinkPrefixExtension ) { - wfProfileIn( __METHOD__ . '-prefixhandling' ); if ( preg_match( $e2, $s, $m ) ) { $prefix = $m[2]; $s = $m[1]; @@ -1992,12 +2080,10 @@ class Parser { $prefix = $first_prefix; $first_prefix = false; } - wfProfileOut( __METHOD__ . '-prefixhandling' ); } $might_be_img = false; - wfProfileIn( __METHOD__ . "-e1" ); if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; # If we get a ] at the beginning of $m[3] that means we have a link that's something like: @@ -2031,11 +2117,8 @@ class Parser { $trail = ""; } else { # Invalid form; output directly $s .= $prefix . '[[' . $line; - wfProfileOut( __METHOD__ . "-e1" ); continue; } - wfProfileOut( __METHOD__ . "-e1" ); - wfProfileIn( __METHOD__ . "-misc" ); $origLink = $m[1]; @@ -2044,7 +2127,6 @@ class Parser { # should be external links. if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) { $s .= $prefix . '[[' . $line; - wfProfileOut( __METHOD__ . "-misc" ); continue; } @@ -2061,21 +2143,16 @@ class Parser { $link = substr( $link, 1 ); } - wfProfileOut( __METHOD__ . "-misc" ); - wfProfileIn( __METHOD__ . "-title" ); $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) ); if ( $nt === null ) { $s .= $prefix . '[[' . $line; - wfProfileOut( __METHOD__ . "-title" ); continue; } $ns = $nt->getNamespace(); $iw = $nt->getInterwiki(); - wfProfileOut( __METHOD__ . "-title" ); if ( $might_be_img ) { # if this is actually an invalid link - wfProfileIn( __METHOD__ . "-might_be_img" ); if ( $ns == NS_FILE && $noforce ) { # but might be an image $found = false; while ( true ) { @@ -2107,16 +2184,13 @@ class Parser { $holders->merge( $this->replaceInternalLinks2( $text ) ); $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__ . "-might_be_img" ); continue; } } else { # it's not an image, so output it raw $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__ . "-might_be_img" ); continue; } - wfProfileOut( __METHOD__ . "-might_be_img" ); } $wasblank = ( $text == '' ); @@ -2133,7 +2207,6 @@ class Parser { # Link not escaped by : , create the various objects if ( $noforce && !$nt->wasLocalInterwiki() ) { # Interwikis - wfProfileIn( __METHOD__ . "-interwiki" ); if ( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && ( Language::fetchLanguageName( $iw, null, 'mw' ) || @@ -2148,13 +2221,10 @@ class Parser { $s = rtrim( $s . $prefix ); $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail; - wfProfileOut( __METHOD__ . "-interwiki" ); continue; } - wfProfileOut( __METHOD__ . "-interwiki" ); if ( $ns == NS_FILE ) { - wfProfileIn( __METHOD__ . "-image" ); if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { if ( $wasblank ) { # if no parameters were passed, $text @@ -2175,12 +2245,10 @@ class Parser { } else { $s .= $prefix . $trail; } - wfProfileOut( __METHOD__ . "-image" ); continue; } if ( $ns == NS_CATEGORY ) { - wfProfileIn( __METHOD__ . "-category" ); $s = rtrim( $s . "\n" ); # bug 87 if ( $wasblank ) { @@ -2198,7 +2266,6 @@ class Parser { */ $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail; - wfProfileOut( __METHOD__ . "-category" ); continue; } } @@ -2214,22 +2281,19 @@ class Parser { # NS_MEDIA is a pseudo-namespace for linking directly to a file # @todo FIXME: Should do batch file existence checks, see comment below if ( $ns == NS_MEDIA ) { - wfProfileIn( __METHOD__ . "-media" ); # Give extensions a chance to select the file revision for us $options = array(); $descQuery = false; - wfRunHooks( 'BeforeParserFetchFileAndTitle', + Hooks::run( 'BeforeParserFetchFileAndTitle', array( $this, $nt, &$options, &$descQuery ) ); # Fetch and register the file (file title may be different via hooks) list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options ); # Cloak with NOPARSE to avoid replacement in replaceExternalLinks $s .= $prefix . $this->armorLinks( Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail; - wfProfileOut( __METHOD__ . "-media" ); continue; } - wfProfileIn( __METHOD__ . "-always_known" ); # Some titles, such as valid special pages or files in foreign repos, should # be shown as bluelinks even though they're not included in the page table # @@ -2242,9 +2306,7 @@ class Parser { # Links will be added to the output link list after checking $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix ); } - wfProfileOut( __METHOD__ . "-always_known" ); } - wfProfileOut( __METHOD__ ); return $holders; } @@ -2443,7 +2505,6 @@ class Parser { * @return string The lists rendered as HTML */ public function doBlockLevels( $text, $linestart ) { - wfProfileIn( __METHOD__ ); # Parsing through the text line by line. The main thing # happening here is handling of block-level elements p, pre, @@ -2552,7 +2613,6 @@ class Parser { # If we have no prefixes, go to paragraph mode. if ( 0 == $prefixLength ) { - wfProfileIn( __METHOD__ . "-paragraph" ); # No prefix (not in list)--go to paragraph mode # XXX: use a stack for nestable elements like span, table and div $openmatch = preg_match( @@ -2568,11 +2628,11 @@ class Parser { $t ); - if ( $openmatch or $closematch ) { + if ( $openmatch || $closematch ) { $paragraphStack = false; # @todo bug 5718: paragraph closed $output .= $this->closeParagraph(); - if ( $preOpenMatch and !$preCloseMatch ) { + if ( $preOpenMatch && !$preCloseMatch ) { $this->mInPre = true; } $bqOffset = 0; @@ -2621,7 +2681,6 @@ class Parser { } } } - wfProfileOut( __METHOD__ . "-paragraph" ); } # somewhere above we forget to get out of pre block (bug 785) if ( $preCloseMatch && $this->mInPre ) { @@ -2646,7 +2705,6 @@ class Parser { $this->mLastSection = ''; } - wfProfileOut( __METHOD__ ); return $output; } @@ -2661,12 +2719,10 @@ class Parser { * @return string The position of the ':', or false if none found */ public function findColonNoLinks( $str, &$before, &$after ) { - wfProfileIn( __METHOD__ ); $pos = strpos( $str, ':' ); if ( $pos === false ) { # Nothing to find! - wfProfileOut( __METHOD__ ); return false; } @@ -2675,7 +2731,6 @@ class Parser { # Easy; no tag nesting to worry about $before = substr( $str, 0, $pos ); $after = substr( $str, $pos + 1 ); - wfProfileOut( __METHOD__ ); return $pos; } @@ -2699,7 +2754,6 @@ class Parser { # We found it! $before = substr( $str, 0, $i ); $after = substr( $str, $i + 1 ); - wfProfileOut( __METHOD__ ); return $i; } # Embedded in a tag; don't break it. @@ -2709,7 +2763,6 @@ class Parser { $colon = strpos( $str, ':', $i ); if ( $colon === false ) { # Nothing else interesting - wfProfileOut( __METHOD__ ); return false; } $lt = strpos( $str, '<', $i ); @@ -2718,7 +2771,6 @@ class Parser { # We found it! $before = substr( $str, 0, $colon ); $after = substr( $str, $colon + 1 ); - wfProfileOut( __METHOD__ ); return $i; } } @@ -2769,7 +2821,6 @@ class Parser { $stack--; if ( $stack < 0 ) { wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" ); - wfProfileOut( __METHOD__ ); return false; } $state = self::COLON_STATE_TEXT; @@ -2804,16 +2855,13 @@ class Parser { } break; default: - wfProfileOut( __METHOD__ ); throw new MWException( "State machine error in " . __METHOD__ ); } } if ( $stack > 0 ) { wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" ); - wfProfileOut( __METHOD__ ); return false; } - wfProfileOut( __METHOD__ ); return false; } @@ -2845,14 +2893,14 @@ class Parser { * Some of these require message or data lookups and can be * expensive to check many times. */ - if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) { + if ( Hooks::run( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) { if ( isset( $this->mVarCache[$index] ) ) { return $this->mVarCache[$index]; } } $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() ); - wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); + Hooks::run( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); $pageLang = $this->getFunctionLang(); @@ -3129,10 +3177,6 @@ class Parser { case 'numberofedits': $value = $pageLang->formatNum( SiteStats::edits() ); break; - case 'numberofviews': - global $wgDisableCounters; - $value = !$wgDisableCounters ? $pageLang->formatNum( SiteStats::views() ) : ''; - break; case 'currenttimestamp': $value = wfTimestamp( TS_MW, $ts ); break; @@ -3164,7 +3208,7 @@ class Parser { break; default: $ret = null; - wfRunHooks( + Hooks::run( 'ParserGetVariableValueSwitch', array( &$this, &$this->mVarCache, &$index, &$ret, &$frame ) ); @@ -3185,13 +3229,11 @@ class Parser { * @private */ public function initialiseVariables() { - wfProfileIn( __METHOD__ ); $variableIDs = MagicWord::getVariableIDs(); $substIDs = MagicWord::getSubstIDs(); $this->mVariables = new MagicWordArray( $variableIDs ); $this->mSubstWords = new MagicWordArray( $substIDs ); - wfProfileOut( __METHOD__ ); } /** @@ -3266,7 +3308,6 @@ class Parser { if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { return $text; } - wfProfileIn( __METHOD__ ); if ( $frame === false ) { $frame = $this->getPreprocessor()->newFrame(); @@ -3280,7 +3321,6 @@ class Parser { $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; $text = $frame->expand( $dom, $flags ); - wfProfileOut( __METHOD__ ); return $text; } @@ -3358,8 +3398,6 @@ class Parser { * @return string The text of the template */ public function braceSubstitution( $piece, $frame ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-setup' ); // Flags @@ -3392,12 +3430,10 @@ class Parser { # @todo FIXME: If piece['parts'] is null then the call to getLength() # below won't work b/c this $args isn't an object $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; - wfProfileOut( __METHOD__ . '-setup' ); - $titleProfileIn = null; // profile templates + $profileSection = null; // profile templates # SUBST - wfProfileIn( __METHOD__ . '-modifiers' ); if ( !$found ) { $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 ); @@ -3454,11 +3490,9 @@ class Parser { $forceRawInterwiki = true; } } - wfProfileOut( __METHOD__ . '-modifiers' ); # Parser functions if ( !$found ) { - wfProfileIn( __METHOD__ . '-pfunc' ); $colonPos = strpos( $part1, ':' ); if ( $colonPos !== false ) { @@ -3470,8 +3504,6 @@ class Parser { try { $result = $this->callParserFunction( $frame, $func, $funcArgs ); } catch ( Exception $ex ) { - wfProfileOut( __METHOD__ . '-pfunc' ); - wfProfileOut( __METHOD__ ); throw $ex; } @@ -3480,7 +3512,6 @@ class Parser { # here. extract( $result ); } - wfProfileOut( __METHOD__ . '-pfunc' ); } # Finish mangling title and then check for loops. @@ -3515,12 +3546,7 @@ class Parser { # Load from database if ( !$found && $title ) { - if ( !Profiler::instance()->isPersistent() ) { - # Too many unique items can kill profiling DBs/collectors - $titleProfileIn = __METHOD__ . "-title-" . $title->getPrefixedDBkey(); - wfProfileIn( $titleProfileIn ); // template in - } - wfProfileIn( __METHOD__ . '-loadtpl' ); + $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() ); if ( !$title->isExternal() ) { if ( $title->isSpecialPage() && $this->mOptions->getAllowSpecialInclusion() @@ -3594,17 +3620,15 @@ class Parser { . ''; wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" ); } - wfProfileOut( __METHOD__ . '-loadtpl' ); } # If we haven't found text to substitute by now, we're done # Recover the source wikitext and return it if ( !$found ) { $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); - if ( $titleProfileIn ) { - wfProfileOut( $titleProfileIn ); // template out + if ( $profileSection ) { + $this->mProfiler->scopedProfileOut( $profileSection ); } - wfProfileOut( __METHOD__ ); return array( 'object' => $text ); } @@ -3628,8 +3652,8 @@ class Parser { $isLocalObj = false; } - if ( $titleProfileIn ) { - wfProfileOut( $titleProfileIn ); // template out + if ( $profileSection ) { + $this->mProfiler->scopedProfileOut( $profileSection ); } # Replace raw HTML by a placeholder @@ -3670,7 +3694,6 @@ class Parser { $ret = array( 'text' => $text ); } - wfProfileOut( __METHOD__ ); return $ret; } @@ -3696,7 +3719,6 @@ class Parser { public function callParserFunction( $frame, $function, array $args = array() ) { global $wgContLang; - wfProfileIn( __METHOD__ ); # Case sensitive functions if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { @@ -3707,23 +3729,19 @@ class Parser { if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { $function = $this->mFunctionSynonyms[0][$function]; } else { - wfProfileOut( __METHOD__ ); return array( 'found' => false ); } } - wfProfileIn( __METHOD__ . '-pfunc-' . $function ); list( $callback, $flags ) = $this->mFunctionHooks[$function]; # Workaround for PHP bug 35229 and similar if ( !is_callable( $callback ) ) { - wfProfileOut( __METHOD__ . '-pfunc-' . $function ); - wfProfileOut( __METHOD__ ); throw new MWException( "Tag hook for $function is not callable\n" ); } $allArgs = array( &$this ); - if ( $flags & SFH_OBJECT_ARGS ) { + if ( $flags & self::SFH_OBJECT_ARGS ) { # Convert arguments to PPNodes and collect for appending to $allArgs $funcArgs = array(); foreach ( $args as $k => $v ) { @@ -3783,8 +3801,6 @@ class Parser { $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags ); $result['isChildObj'] = true; } - wfProfileOut( __METHOD__ . '-pfunc-' . $function ); - wfProfileOut( __METHOD__ ); return $result; } @@ -3829,6 +3845,44 @@ class Parser { return array( $dom, $title ); } + /** + * Fetch the current revision of a given title. Note that the revision + * (and even the title) may not exist in the database, so everything + * contributing to the output of the parser should use this method + * where possible, rather than getting the revisions themselves. This + * method also caches its results, so using it benefits performance. + * + * @since 1.24 + * @param Title $title + * @return Revision + */ + public function fetchCurrentRevisionOfTitle( $title ) { + $cacheKey = $title->getPrefixedDBkey(); + if ( !$this->currentRevisionCache ) { + $this->currentRevisionCache = new MapCacheLRU( 100 ); + } + if ( !$this->currentRevisionCache->has( $cacheKey ) ) { + $this->currentRevisionCache->set( $cacheKey, + // Defaults to Parser::statelessFetchRevision() + call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this ) + ); + } + return $this->currentRevisionCache->get( $cacheKey ); + } + + /** + * Wrapper around Revision::newFromTitle to allow passing additional parameters + * without passing them on to it. + * + * @since 1.24 + * @param Title $title + * @param Parser|bool $parser + * @return Revision + */ + public static function statelessFetchRevision( $title, $parser = false ) { + return Revision::newFromTitle( $title ); + } + /** * Fetch the unparsed text of a template and register a reference to it. * @param Title $title @@ -3881,7 +3935,7 @@ class Parser { for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { # Give extensions a chance to select the revision instead $id = false; # Assume current - wfRunHooks( 'BeforeParserFetchTemplateAndtitle', + Hooks::run( 'BeforeParserFetchTemplateAndtitle', array( $parser, $title, &$skip, &$id ) ); if ( $skip ) { @@ -3894,9 +3948,13 @@ class Parser { break; } # Get the revision - $rev = $id - ? Revision::newFromId( $id ) - : Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); + if ( $id ) { + $rev = Revision::newFromId( $id ); + } elseif ( $parser ) { + $rev = $parser->fetchCurrentRevisionOfTitle( $title ); + } else { + $rev = Revision::newFromTitle( $title ); + } $rev_id = $rev ? $rev->getId() : 0; # If there is no current revision, there is no page if ( $id === false && !$rev ) { @@ -4041,7 +4099,7 @@ class Parser { return $obj->tc_contents; } - $req = MWHttpRequest::factory( $url ); + $req = MWHttpRequest::factory( $url, array(), __METHOD__ ); $status = $req->execute(); // Status object if ( $status->isOK() ) { $text = $req->getContent(); @@ -4072,7 +4130,6 @@ class Parser { * @return array */ public function argSubstitution( $piece, $frame ) { - wfProfileIn( __METHOD__ ); $error = false; $parts = $piece['parts']; @@ -4107,7 +4164,6 @@ class Parser { $ret = array( 'text' => $text ); } - wfProfileOut( __METHOD__ ); return $ret; } @@ -4238,7 +4294,6 @@ class Parser { * @return string */ public function doDoubleUnderscore( $text ) { - wfProfileIn( __METHOD__ ); # The position of __TOC__ needs to be recorded $mw = MagicWord::get( 'toc' ); @@ -4286,45 +4341,16 @@ class Parser { $this->mOutput->setProperty( $key, '' ); } - wfProfileOut( __METHOD__ ); return $text; } /** - * Add a tracking category, getting the title from a system message, - * or print a debug message if the title is invalid. - * - * Please add any message that you use with this function to - * $wgTrackingCategories. That way they will be listed on - * Special:TrackingCategories. - * + * @see ParserOutput::addTrackingCategory() * @param string $msg Message key * @return bool Whether the addition was successful */ public function addTrackingCategory( $msg ) { - if ( $this->mTitle->getNamespace() === NS_SPECIAL ) { - wfDebug( __METHOD__ . ": Not adding tracking category $msg to special page!\n" ); - return false; - } - // Important to parse with correct title (bug 31469) - $cat = wfMessage( $msg ) - ->title( $this->getTitle() ) - ->inContentLanguage() - ->text(); - - # Allow tracking categories to be disabled by setting them to "-" - if ( $cat === '-' ) { - return false; - } - - $containerCategory = Title::makeTitleSafe( NS_CATEGORY, $cat ); - if ( $containerCategory ) { - $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() ); - return true; - } else { - wfDebug( __METHOD__ . ": [[MediaWiki:$msg]] is not a valid title!\n" ); - return false; - } + return $this->mOutput->addTrackingCategory( $msg, $this->mTitle ); } /** @@ -4361,7 +4387,7 @@ class Parser { # links - this is for later, but we need the number of headlines right now $matches = array(); $numMatches = preg_match_all( - '/[1-6])(?P.*?' . '>)\s*(?P
[\s\S]*?)\s*<\/H[1-6] *>/i', + '/[1-6])(?P.*?>)\s*(?P
[\s\S]*?)\s*<\/H[1-6] *>/i', $text, $matches ); @@ -4508,14 +4534,15 @@ class Parser { # * and (bug 8393) # * (bug 26375) # * (r105284) + # * (bug 72884) # * and (bug 35167) # # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from , # to allow setting directionality in toc items. $tocline = preg_replace( array( - '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?' . '>#', - '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?' . '>#' + '#<(?!/?(span|sup|sub|bdi|i|b)(?: [^>]*)?>).*?>#', + '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b))(?: .*?)?>#' ), array( '', '<$1>' ), $safeHeadline @@ -4523,7 +4550,7 @@ class Parser { $tocline = trim( $tocline ); # For the anchor, strip out HTML-y stuff period - $safeHeadline = preg_replace( '/<.*?' . '>/', '', $safeHeadline ); + $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline ); $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline ); # Save headline for section edit hint before it's escaped @@ -4556,7 +4583,7 @@ class Parser { # HTML names must be case-insensitively unique (bug 10721). # This does not apply to Unicode characters per - # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison + # http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison # @todo FIXME: We may be changing them depending on the current locale. $arrayKey = strtolower( $safeHeadline ); if ( $legacyHeadline === false ) { @@ -4565,16 +4592,22 @@ class Parser { $legacyArrayKey = strtolower( $legacyHeadline ); } - # count how many in assoc. array so we can track dupes in anchors + # Create the anchor for linking from the TOC to the section + $anchor = $safeHeadline; + $legacyAnchor = $legacyHeadline; if ( isset( $refers[$arrayKey] ) ) { - $refers[$arrayKey]++; + for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i ); + $anchor .= "_$i"; + $refers["${arrayKey}_$i"] = true; } else { - $refers[$arrayKey] = 1; + $refers[$arrayKey] = true; } - if ( isset( $refers[$legacyArrayKey] ) ) { - $refers[$legacyArrayKey]++; + if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) { + for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i ); + $legacyAnchor .= "_$i"; + $refers["${legacyArrayKey}_$i"] = true; } else { - $refers[$legacyArrayKey] = 1; + $refers[$legacyArrayKey] = true; } # Don't number the heading if it is the only one (looks silly) @@ -4587,15 +4620,6 @@ class Parser { ) . ' ' . $headline; } - # Create the anchor for linking from the TOC to the section - $anchor = $safeHeadline; - $legacyAnchor = $legacyHeadline; - if ( $refers[$arrayKey] > 1 ) { - $anchor .= '_' . $refers[$arrayKey]; - } - if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) { - $legacyAnchor .= '_' . $refers[$legacyArrayKey]; - } if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) { $toc .= Linker::tocLine( $anchor, $tocline, $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) ); @@ -4691,7 +4715,7 @@ class Parser { } # split up and insert constructed headlines - $blocks = preg_split( '/[\s\S]*?<\/H[1-6]>/i', $text ); + $blocks = preg_split( '/[\s\S]*?<\/H[1-6]>/i', $text ); $i = 0; // build an array of document sections @@ -4714,7 +4738,7 @@ class Parser { * &$sectionContent : ref to the content of the section * $showEditLinks : boolean describing whether this section has an edit link */ - wfRunHooks( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) ); + Hooks::run( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) ); $i++; } @@ -4736,7 +4760,7 @@ class Parser { /** * Transform wiki markup when saving a page by doing "\r\n" -> "\n" - * conversion, substitting signatures, {{subst:}} templates, etc. + * conversion, substituting signatures, {{subst:}} templates, etc. * * @param string $text The text to transform * @param Title $title The Title object for the current article @@ -4756,6 +4780,7 @@ class Parser { $pairs = array( "\r\n" => "\n", + "\r" => "\n", ); $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text ); if ( $options->getPreSaveTransform() ) { @@ -4920,7 +4945,7 @@ class Parser { /** * Clean up signature text * - * 1) Strip ~~~, ~~~~ and ~~~~~ out of signatures @see cleanSigInSig + * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig * 2) Substitute all transclusions * * @param string $text @@ -4959,7 +4984,7 @@ class Parser { } /** - * Strip ~~~, ~~~~ and ~~~~~ out of signatures + * Strip 3, 4 or 5 tildes out of signatures. * * @param string $text * @return string Signature text with /~{3,5}/ removed @@ -5018,7 +5043,6 @@ class Parser { } $executing = true; - wfProfileIn( __METHOD__ ); if ( !$title ) { global $wgTitle; $title = $wgTitle; @@ -5027,7 +5051,6 @@ class Parser { $text = $this->preprocess( $text, $title, $options ); $executing = false; - wfProfileOut( __METHOD__ ); return $text; } @@ -5111,7 +5134,7 @@ class Parser { * The callback function should have the form: * function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... } * - * Or with SFH_OBJECT_ARGS: + * Or with Parser::SFH_OBJECT_ARGS: * function myParserFunction( $parser, $frame, $args ) { ... } * * The callback may either return the text result of the function, or an array with the text @@ -5125,10 +5148,10 @@ class Parser { * @param string $id The magic word ID * @param callable $callback The callback function (and object) to use * @param int $flags A combination of the following flags: - * SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}} + * Parser::SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}} * - * SFH_OBJECT_ARGS Pass the template arguments as PPNode objects instead of text. This - * allows for conditional expansion of the parse tree, allowing you to eliminate dead + * Parser::SFH_OBJECT_ARGS Pass the template arguments as PPNode objects instead of text. + * This allows for conditional expansion of the parse tree, allowing you to eliminate dead * branches and thus speed up parsing. It is also possible to analyse the parse tree of * the arguments, and to control the way they are expanded. * @@ -5170,7 +5193,7 @@ class Parser { $syn = $wgContLang->lc( $syn ); } # Add leading hash - if ( !( $flags & SFH_NO_HASH ) ) { + if ( !( $flags & self::SFH_NO_HASH ) ) { $syn = '#' . $syn; } # Remove trailing colon @@ -5224,11 +5247,9 @@ class Parser { * * @param string $text * @param int $options - * - * @return array Array of link CSS classes, indexed by PDBK. */ public function replaceLinkHolders( &$text, $options = 0 ) { - return $this->mLinkHolders->replace( $text ); + $this->mLinkHolders->replace( $text ); } /** @@ -5256,7 +5277,6 @@ class Parser { * @return string HTML */ public function renderImageGallery( $text, $params ) { - wfProfileIn( __METHOD__ ); $mode = false; if ( isset( $params['mode'] ) ) { @@ -5265,7 +5285,7 @@ class Parser { try { $ig = ImageGalleryBase::factory( $mode ); - } catch ( MWException $e ) { + } catch ( Exception $e ) { // If invalid type set, fallback to default. $ig = ImageGalleryBase::factory( false ); } @@ -5299,7 +5319,7 @@ class Parser { } $ig->setAdditionalOptions( $params ); - wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); + Hooks::run( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); $lines = StringUtils::explode( "\n", $text ); foreach ( $lines as $line ) { @@ -5326,13 +5346,12 @@ class Parser { # file (which potentially could be of a different type and have different handler). $options = array(); $descQuery = false; - wfRunHooks( 'BeforeParserFetchFileAndTitle', + Hooks::run( 'BeforeParserFetchFileAndTitle', array( $this, $title, &$options, &$descQuery ) ); # Don't register it now, as ImageGallery does that later. $file = $this->fetchFileNoRegister( $title, $options ); $handler = $file ? $file->getHandler() : false; - wfProfileIn( __METHOD__ . '-getMagicWord' ); $paramMap = array( 'img_alt' => 'gallery-internal-alt', 'img_link' => 'gallery-internal-link', @@ -5345,7 +5364,6 @@ class Parser { } $mwArray = new MagicWordArray( array_keys( $paramMap ) ); - wfProfileOut( __METHOD__ . '-getMagicWord' ); $label = ''; $alt = ''; @@ -5407,13 +5425,12 @@ class Parser { $ig->add( $title, $label, $alt, $link, $handlerOptions ); } $html = $ig->toHTML(); - wfRunHooks( 'AfterParserFetchFileAndTitle', array( $this, $ig, &$html ) ); - wfProfileOut( __METHOD__ ); + Hooks::run( 'AfterParserFetchFileAndTitle', array( $this, $ig, &$html ) ); return $html; } /** - * @param string $handler + * @param MediaHandler $handler * @return array */ public function getImageParams( $handler ) { @@ -5496,7 +5513,7 @@ class Parser { # Give extensions a chance to select the file revision for us $options = array(); $descQuery = false; - wfRunHooks( 'BeforeParserFetchFileAndTitle', + Hooks::run( 'BeforeParserFetchFileAndTitle', array( $this, $title, &$options, &$descQuery ) ); # Fetch and register the file (file title may be different via hooks) list( $file, $title ) = $this->fetchFileAndTitle( $title, $options ); @@ -5660,7 +5677,7 @@ class Parser { $params['frame']['title'] = $this->stripAltText( $caption, $holders ); } - wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params, $this ) ); + Hooks::run( 'ParserMakeImageParams', array( $title, $file, &$params, $this ) ); # Linker does the rest $time = isset( $options['time'] ) ? $options['time'] : false; @@ -5969,7 +5986,19 @@ class Parser { return null; } - $this->mRevisionObject = Revision::newFromId( $this->mRevisionId ); + $rev = call_user_func( + $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this + ); + + # If the parse is for a new revision, then the callback should have + # already been set to force the object and should match mRevisionId. + # If not, try to fetch by mRevisionId for sanity. + if ( $rev && $rev->getId() != $this->mRevisionId ) { + $rev = Revision::newFromId( $this->mRevisionId ); + } + + $this->mRevisionObject = $rev; + return $this->mRevisionObject; } @@ -5980,8 +6009,6 @@ class Parser { */ public function getRevisionTimestamp() { if ( is_null( $this->mRevisionTimestamp ) ) { - wfProfileIn( __METHOD__ ); - global $wgContLang; $revObject = $this->getRevisionObject(); @@ -5995,7 +6022,6 @@ class Parser { # it needs to be consistent for all visitors. $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' ); - wfProfileOut( __METHOD__ ); } return $this->mRevisionTimestamp; } @@ -6250,14 +6276,12 @@ class Parser { * @return array */ public function serializeHalfParsedText( $text ) { - wfProfileIn( __METHOD__ ); $data = array( 'text' => $text, 'version' => self::HALF_PARSED_VERSION, 'stripState' => $this->mStripState->getSubState( $text ), 'linkHolders' => $this->mLinkHolders->getSubArray( $text ) ); - wfProfileOut( __METHOD__ ); return $data; } diff --git a/includes/parser/ParserCache.php b/includes/parser/ParserCache.php index 79523003..bc8e4a69 100644 --- a/includes/parser/ParserCache.php +++ b/includes/parser/ParserCache.php @@ -184,12 +184,10 @@ class ParserCache { */ public function get( $article, $popts, $useOutdated = false ) { global $wgCacheEpoch; - wfProfileIn( __METHOD__ ); $canCache = $article->checkTouched(); if ( !$canCache ) { // It's a redirect now - wfProfileOut( __METHOD__ ); return false; } @@ -198,7 +196,6 @@ class ParserCache { $parserOutputKey = $this->getKey( $article, $popts, $useOutdated ); if ( $parserOutputKey === false ) { wfIncrStats( 'pcache_miss_absent' ); - wfProfileOut( __METHOD__ ); return false; } @@ -206,7 +203,6 @@ class ParserCache { if ( !$value ) { wfDebug( "ParserOutput cache miss.\n" ); wfIncrStats( "pcache_miss_absent" ); - wfProfileOut( __METHOD__ ); return false; } @@ -233,7 +229,6 @@ class ParserCache { wfIncrStats( "pcache_hit" ); } - wfProfileOut( __METHOD__ ); return $value; } @@ -262,8 +257,6 @@ class ParserCache { $optionsKey->setCacheRevisionId( $revId ); $parserOutput->setCacheRevisionId( $revId ); - $optionsKey->setContainsOldMagic( $parserOutput->containsOldMagic() ); - $parserOutputKey = $this->getParserOutputKey( $page, $popts->optionsHash( $optionsKey->mUsedOptions, $page->getTitle() ) ); diff --git a/includes/parser/ParserOptions.php b/includes/parser/ParserOptions.php index 7e4059b8..100656d1 100644 --- a/includes/parser/ParserOptions.php +++ b/includes/parser/ParserOptions.php @@ -25,7 +25,7 @@ * @brief Set options of the Parser * * All member variables are supposed to be private in theory, although in - * practise this is not the case. + * practice this is not the case. * * @ingroup Parser */ @@ -116,6 +116,12 @@ class ParserOptions { */ public $mRemoveComments = true; + /** + * Callback for current revision fetching. Used as first argument to call_user_func(). + */ + public $mCurrentRevisionCallback = + array( 'Parser', 'statelessFetchRevision' ); + /** * Callback for template fetching. Used as first argument to call_user_func(). */ @@ -139,9 +145,7 @@ class ParserOptions { /** * Clean up signature texts? - * - * 1) Strip ~~~, ~~~~ and ~~~~~ out of signatures - * 2) Substitute all transclusions + * @see Parser::cleanSig */ public $mCleanSignatures; @@ -289,6 +293,11 @@ class ParserOptions { return $this->mRemoveComments; } + /* @since 1.24 */ + public function getCurrentRevisionCallback() { + return $this->mCurrentRevisionCallback; + } + public function getTemplateCallback() { return $this->mTemplateCallback; } @@ -462,6 +471,11 @@ class ParserOptions { return wfSetVar( $this->mRemoveComments, $x ); } + /* @since 1.24 */ + public function setCurrentRevisionCallback( $x ) { + return wfSetVar( $this->mCurrentRevisionCallback, $x ); + } + public function setTemplateCallback( $x ) { return wfSetVar( $this->mTemplateCallback, $x ); } @@ -623,8 +637,7 @@ class ParserOptions { $wgCleanSignatures, $wgExternalLinkTarget, $wgExpensiveParserFunctionLimit, $wgMaxGeneratedPPNodeCount, $wgDisableLangConversion, $wgDisableTitleConversion; - wfProfileIn( __METHOD__ ); - + // *UPDATE* ParserOptions::matches() if any of this changes as needed $this->mInterwikiMagic = $wgInterwikiMagic; $this->mAllowExternalImages = $wgAllowExternalImages; $this->mAllowExternalImagesFrom = $wgAllowExternalImagesFrom; @@ -647,7 +660,33 @@ class ParserOptions { $this->mStubThreshold = $user->getStubThreshold(); $this->mUserLang = $lang; - wfProfileOut( __METHOD__ ); + } + + /** + * Check if these options match that of another options set + * + * This ignores report limit settings that only affect HTML comments + * + * @param ParserOptions $other + * @return bool + * @since 1.25 + */ + public function matches( ParserOptions $other ) { + $fields = array_keys( get_class_vars( __CLASS__ ) ); + $fields = array_diff( $fields, array( + 'mEnableLimitReport', // only effects HTML comments + 'onAccessCallback', // only used for ParserOutput option tracking + ) ); + foreach ( $fields as $field ) { + if ( !is_object( $this->$field ) && $this->$field !== $other->$field ) { + return false; + } + } + // Check the object and lazy-loaded options + return ( + $this->mUserLang->getCode() === $other->mUserLang->getCode() && + $this->getDateFormat() === $other->getDateFormat() + ); } /** @@ -768,11 +807,53 @@ class ParserOptions { // Give a chance for extensions to modify the hash, if they have // extra options or other effects on the parser cache. - wfRunHooks( 'PageRenderingHash', array( &$confstr, $this->getUser(), &$forOptions ) ); + Hooks::run( 'PageRenderingHash', array( &$confstr, $this->getUser(), &$forOptions ) ); // Make it a valid memcached key fragment $confstr = str_replace( ' ', '_', $confstr ); return $confstr; } + + /** + * Sets a hook to force that a page exists, and sets a current revision callback to return a + * revision with custom content when the current revision of the page is requested. + * + * @since 1.25 + * @param Title $title + * @param Content $content + * @param User $user The user that the fake revision is attributed to + * @return ScopedCallback to unset the hook + */ + public function setupFakeRevision( $title, $content, $user ) { + $oldCallback = $this->setCurrentRevisionCallback( function ( $titleToCheck, $parser = false ) use ( $title, $content, $user, &$oldCallback ) { + if ( $titleToCheck->equals( $title ) ) { + return new Revision( array( + 'page' => $title->getArticleID(), + 'user_text' => $user->getName(), + 'user' => $user->getId(), + 'parent_id' => $title->getLatestRevId(), + 'title' => $title, + 'content' => $content + ) ); + } else { + return call_user_func( $oldCallback, $titleToCheck, $parser ); + } + } ); + global $wgHooks; + $wgHooks['TitleExists'][] = + function ( $titleToCheck, &$exists ) use ( $title ) { + if ( $titleToCheck->equals( $title ) ) { + $exists = true; + } + }; + end( $wgHooks['TitleExists'] ); + $key = key( $wgHooks['TitleExists'] ); + LinkCache::singleton()->clearBadLink( $title->getPrefixedDBkey() ); + return new ScopedCallback( function () use ( $title, $key ) { + global $wgHooks; + unset( $wgHooks['TitleExists'][$key] ); + LinkCache::singleton()->clearLink( $title ); + } ); + } } diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index 5037ce18..65b527c8 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -25,6 +25,7 @@ class ParserOutput extends CacheTime { public $mText, # The output text $mLanguageLinks, # List of the full text of language links, in the order they appear $mCategories, # Map of category names to sort keys + $mIndicators = array(), # Page status indicators, usually displayed in top-right corner $mTitleText, # title text of the chosen language variant $mLinks = array(), # 2-D map of NS/DBK to ID for the links in the document. ID=zero for broken. $mTemplates = array(), # 2-D map of NS/DBK to ID for the template references. ID=zero for broken. @@ -52,27 +53,25 @@ class ParserOutput extends CacheTime { $mTOCEnabled = true; # Whether TOC should be shown, can't override __NOTOC__ private $mIndexPolicy = ''; # 'index' or 'noindex'? Any other value will result in no change. private $mAccessedOptions = array(); # List of ParserOptions (stored in the keys) - private $mSecondaryDataUpdates = array(); # List of DataUpdate, used to save info from the page somewhere else. private $mExtensionData = array(); # extra data used by extensions private $mLimitReportData = array(); # Parser limit report data private $mParseStartTime = array(); # Timestamps for getTimeSinceStart() private $mPreventClickjacking = false; # Whether to emit X-Frame-Options: DENY + private $mFlags = array(); # Generic flags const EDITSECTION_REGEX = '#<(?:mw:)?editsection page="(.*?)" section="(.*?)"(?:/>|>(.*?)())#'; public function __construct( $text = '', $languageLinks = array(), $categoryLinks = array(), - $containsOldMagic = false, $titletext = '' + $unused = false, $titletext = '' ) { $this->mText = $text; $this->mLanguageLinks = $languageLinks; $this->mCategories = $categoryLinks; - $this->mContainsOldMagic = $containsOldMagic; $this->mTitleText = $titletext; } public function getText() { - wfProfileIn( __METHOD__ ); $text = $this->mText; if ( $this->mEditSectionTokens ) { $text = preg_replace_callback( @@ -110,7 +109,6 @@ class ParserOutput extends CacheTime { $text ); } - wfProfileOut( __METHOD__ ); return $text; } @@ -130,6 +128,13 @@ class ParserOutput extends CacheTime { return $this->mCategories; } + /** + * @since 1.25 + */ + public function getIndicators() { + return $this->mIndicators; + } + public function getTitleText() { return $this->mTitleText; } @@ -267,6 +272,13 @@ class ParserOutput extends CacheTime { $this->mCategories[$c] = $sort; } + /** + * @since 1.25 + */ + public function setIndicator( $id, $content ) { + $this->mIndicators[$id] = $content; + } + public function addLanguageLink( $t ) { $this->mLanguageLinks[] = $t; } @@ -471,6 +483,47 @@ class ParserOutput extends CacheTime { $this->mPreventClickjacking = $this->mPreventClickjacking || $out->getPreventClickjacking(); } + /** + * Add a tracking category, getting the title from a system message, + * or print a debug message if the title is invalid. + * + * Any message used with this function should be registered so it will + * show up on Special:TrackingCategories. Core messages should be added + * to SpecialTrackingCategories::$coreTrackingCategories, and extensions + * should add to "TrackingCategories" in their extension.json. + * + * @param string $msg Message key + * @param Title $title title of the page which is being tracked + * @return bool Whether the addition was successful + * @since 1.25 + */ + public function addTrackingCategory( $msg, $title ) { + if ( $title->getNamespace() === NS_SPECIAL ) { + wfDebug( __METHOD__ . ": Not adding tracking category $msg to special page!\n" ); + return false; + } + + // Important to parse with correct title (bug 31469) + $cat = wfMessage( $msg ) + ->title( $title ) + ->inContentLanguage() + ->text(); + + # Allow tracking categories to be disabled by setting them to "-" + if ( $cat === '-' ) { + return false; + } + + $containerCategory = Title::makeTitleSafe( NS_CATEGORY, $cat ); + if ( $containerCategory ) { + $this->addCategory( $containerCategory->getDBkey(), $this->getProperty( 'defaultsort' ) ?: '' ); + return true; + } else { + wfDebug( __METHOD__ . ": [[MediaWiki:$msg]] is not a valid title!\n" ); + return false; + } + } + /** * Override the title to be used for display * -- this is assumed to have been validated @@ -622,43 +675,57 @@ class ParserOutput extends CacheTime { } /** - * Adds an update job to the output. Any update jobs added to the output will - * eventually be executed in order to store any secondary information extracted - * from the page's content. This is triggered by calling getSecondaryDataUpdates() - * and is used for forward links updates on edit and backlink updates by jobs. + * @deprecated since 1.25. Instead, store any relevant data using setExtensionData, + * and implement Content::getSecondaryDataUpdates() if possible, or use the + * 'SecondaryDataUpdates' hook to construct the necessary update objects. * - * @since 1.20 + * @note Hard deprecation and removal without long deprecation period, since there are no + * known users, but known conceptual issues. + * + * @todo remove in 1.26 * * @param DataUpdate $update + * + * @throws MWException */ public function addSecondaryDataUpdate( DataUpdate $update ) { - $this->mSecondaryDataUpdates[] = $update; + wfDeprecated( __METHOD__, '1.25' ); + throw new MWException( 'ParserOutput::addSecondaryDataUpdate() is no longer supported. Override Content::getSecondaryDataUpdates() or use the SecondaryDataUpdates hook instead.' ); } /** - * Returns any DataUpdate jobs to be executed in order to store secondary information - * extracted from the page's content, including a LinksUpdate object for all links stored in - * this ParserOutput object. + * @deprecated since 1.25. * - * @note Avoid using this method directly, use ContentHandler::getSecondaryDataUpdates() - * instead! The content handler may provide additional update objects. + * @note Hard deprecation and removal without long deprecation period, since there are no + * known users, but known conceptual issues. * - * @since 1.20 + * @todo remove in 1.26 * - * @param Title $title The title of the page we're updating. If not given, a title object will - * be created based on $this->getTitleText() - * @param bool $recursive Queue jobs for recursive updates? + * @return bool false (since 1.25) + */ + public function hasCustomDataUpdates() { + wfDeprecated( __METHOD__, '1.25' ); + return false; + } + + /** + * @deprecated since 1.25. Instead, store any relevant data using setExtensionData, + * and implement Content::getSecondaryDataUpdates() if possible, or use the + * 'SecondaryDataUpdates' hook to construct the necessary update objects. + * + * @note Hard deprecation and removal without long deprecation period, since there are no + * known users, but known conceptual issues. + * + * @todo remove in 1.26 + * + * @param Title $title + * @param bool $recursive * * @return array An array of instances of DataUpdate */ public function getSecondaryDataUpdates( Title $title = null, $recursive = true ) { - if ( is_null( $title ) ) { - $title = Title::newFromText( $this->getTitleText() ); - } - - $linksUpdate = new LinksUpdate( $title, $this, $recursive ); - - return array_merge( $this->mSecondaryDataUpdates, array( $linksUpdate ) ); + wfDeprecated( __METHOD__, '1.25' ); + return array(); } /** @@ -794,6 +861,22 @@ class ParserOutput extends CacheTime { $this->mLimitReportData[$key] = $value; } + /** + * Check whether the cache TTL was lowered due to dynamic content + * + * When content is determined by more than hard state (e.g. page edits), + * such as template/file transclusions based on the current timestamp or + * extension tags that generate lists based on queries, this return true. + * + * @return bool + * @since 1.25 + */ + public function hasDynamicContent() { + global $wgParserCacheExpireTime; + + return $this->getCacheExpiry() < $wgParserCacheExpireTime; + } + /** * Get or set the prevent-clickjacking flag * @@ -806,13 +889,13 @@ class ParserOutput extends CacheTime { } /** - * Save space for for serialization by removing useless values + * Save space for serialization by removing useless values * @return array */ public function __sleep() { return array_diff( array_keys( get_object_vars( $this ) ), - array( 'mSecondaryDataUpdates', 'mParseStartTime' ) + array( 'mParseStartTime' ) ); } } diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index 2edb79a2..0351f2a8 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -68,6 +68,7 @@ class Preprocessor_DOM implements Preprocessor { /** * @param array $values * @return PPNode_DOM + * @throws MWException */ public function newPartNodeArray( $values ) { //NOTE: DOM manipulation is slower than building & parsing XML! (or so Tim sais) @@ -85,19 +86,17 @@ class Preprocessor_DOM implements Preprocessor { $xml .= ""; - wfProfileIn( __METHOD__ . '-loadXML' ); $dom = new DOMDocument(); wfSuppressWarnings(); $result = $dom->loadXML( $xml ); wfRestoreWarnings(); if ( !$result ) { // Try running the XML through UtfNormal to get rid of invalid characters - $xml = UtfNormal::cleanUp( $xml ); + $xml = UtfNormal\Validator::cleanUp( $xml ); // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 // don't barf when the XML is >256 levels deep $result = $dom->loadXML( $xml, 1 << 19 ); } - wfProfileOut( __METHOD__ . '-loadXML' ); if ( !$result ) { throw new MWException( 'Parameters passed to ' . __METHOD__ . ' result in invalid XML' ); @@ -149,14 +148,12 @@ class Preprocessor_DOM implements Preprocessor { * @return PPNode_DOM */ public function preprocessToObj( $text, $flags = 0 ) { - wfProfileIn( __METHOD__ ); global $wgMemc, $wgPreprocessorCacheThreshold; $xml = false; $cacheable = ( $wgPreprocessorCacheThreshold !== false && strlen( $text ) > $wgPreprocessorCacheThreshold ); if ( $cacheable ) { - wfProfileIn( __METHOD__ . '-cacheable' ); $cacheKey = wfMemcKey( 'preprocess-xml', md5( $text ), $flags ); $cacheValue = $wgMemc->get( $cacheKey ); @@ -169,11 +166,9 @@ class Preprocessor_DOM implements Preprocessor { } } if ( $xml === false ) { - wfProfileIn( __METHOD__ . '-cache-miss' ); $xml = $this->preprocessToXml( $text, $flags ); $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml; $wgMemc->set( $cacheKey, $cacheValue, 86400 ); - wfProfileOut( __METHOD__ . '-cache-miss' ); wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" ); } } else { @@ -186,20 +181,17 @@ class Preprocessor_DOM implements Preprocessor { $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount(); if ( $this->parser->mGeneratedPPNodeCount > $max ) { if ( $cacheable ) { - wfProfileOut( __METHOD__ . '-cacheable' ); } - wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__ . ': generated node count limit exceeded' ); } - wfProfileIn( __METHOD__ . '-loadXML' ); $dom = new DOMDocument; wfSuppressWarnings(); $result = $dom->loadXML( $xml ); wfRestoreWarnings(); if ( !$result ) { // Try running the XML through UtfNormal to get rid of invalid characters - $xml = UtfNormal::cleanUp( $xml ); + $xml = UtfNormal\Validator::cleanUp( $xml ); // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 // don't barf when the XML is >256 levels deep. $result = $dom->loadXML( $xml, 1 << 19 ); @@ -207,14 +199,10 @@ class Preprocessor_DOM implements Preprocessor { if ( $result ) { $obj = new PPNode_DOM( $dom->documentElement ); } - wfProfileOut( __METHOD__ . '-loadXML' ); if ( $cacheable ) { - wfProfileOut( __METHOD__ . '-cacheable' ); } - wfProfileOut( __METHOD__ ); - if ( !$result ) { throw new MWException( __METHOD__ . ' generated invalid XML' ); } @@ -227,7 +215,6 @@ class Preprocessor_DOM implements Preprocessor { * @return string */ public function preprocessToXml( $text, $flags = 0 ) { - wfProfileIn( __METHOD__ ); $rules = array( '{' => array( 'end' => '}', @@ -764,8 +751,6 @@ class Preprocessor_DOM implements Preprocessor { $stack->rootAccum .= ''; $xml = $stack->rootAccum; - wfProfileOut( __METHOD__ ); - return $xml; } } @@ -1043,11 +1028,17 @@ class PPFrame_DOM implements PPFrame { // Numbered parameter $index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent; $index = $index - $indexOffset; + if ( isset( $namedArgs[$index] ) || isset( $numberedArgs[$index] ) ) { + $this->parser->addTrackingCategory( 'duplicate-args-category' ); + } $numberedArgs[$index] = $value->item( 0 ); unset( $namedArgs[$index] ); } else { // Named parameter $name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) ); + if ( isset( $namedArgs[$name] ) || isset( $numberedArgs[$name] ) ) { + $this->parser->addTrackingCategory( 'duplicate-args-category' ); + } $namedArgs[$name] = $value->item( 0 ); unset( $numberedArgs[$name] ); } @@ -1095,7 +1086,6 @@ class PPFrame_DOM implements PPFrame { ); return 'Expansion depth limit exceeded'; } - wfProfileIn( __METHOD__ ); ++$expansionDepth; if ( $expansionDepth > $this->parser->mHighestExpansionDepth ) { $this->parser->mHighestExpansionDepth = $expansionDepth; @@ -1284,7 +1274,6 @@ class PPFrame_DOM implements PPFrame { $newIterator = $contextNode->childNodes; } } else { - wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__ . ': Invalid parameter type' ); } @@ -1308,7 +1297,6 @@ class PPFrame_DOM implements PPFrame { } } --$expansionDepth; - wfProfileOut( __METHOD__ ); return $outStack[0]; } diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php index 63763967..af91ad47 100644 --- a/includes/parser/Preprocessor_Hash.php +++ b/includes/parser/Preprocessor_Hash.php @@ -112,7 +112,6 @@ class Preprocessor_Hash implements Preprocessor { * @return PPNode_Hash_Tree */ public function preprocessToObj( $text, $flags = 0 ) { - wfProfileIn( __METHOD__ ); // Check cache. global $wgMemc, $wgPreprocessorCacheThreshold; @@ -121,7 +120,6 @@ class Preprocessor_Hash implements Preprocessor { && strlen( $text ) > $wgPreprocessorCacheThreshold; if ( $cacheable ) { - wfProfileIn( __METHOD__ . '-cacheable' ); $cacheKey = wfMemcKey( 'preprocess-hash', md5( $text ), $flags ); $cacheValue = $wgMemc->get( $cacheKey ); @@ -132,12 +130,9 @@ class Preprocessor_Hash implements Preprocessor { // From the cache wfDebugLog( "Preprocessor", "Loaded preprocessor hash from memcached (key $cacheKey)" ); - wfProfileOut( __METHOD__ . '-cacheable' ); - wfProfileOut( __METHOD__ ); return $hash; } } - wfProfileIn( __METHOD__ . '-cache-miss' ); } $rules = array( @@ -637,18 +632,12 @@ class Preprocessor_Hash implements Preprocessor { } if ( !$node ) { if ( $cacheable ) { - wfProfileOut( __METHOD__ . '-cache-miss' ); - wfProfileOut( __METHOD__ . '-cacheable' ); } - wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__ . ': eqpos not found' ); } if ( $node->name !== 'equals' ) { if ( $cacheable ) { - wfProfileOut( __METHOD__ . '-cache-miss' ); - wfProfileOut( __METHOD__ . '-cacheable' ); } - wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__ . ': eqpos is not equals' ); } $equalsNode = $node; @@ -748,12 +737,9 @@ class Preprocessor_Hash implements Preprocessor { if ( $cacheable ) { $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode ); $wgMemc->set( $cacheKey, $cacheValue, 86400 ); - wfProfileOut( __METHOD__ . '-cache-miss' ); - wfProfileOut( __METHOD__ . '-cacheable' ); wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" ); } - wfProfileOut( __METHOD__ ); return $rootNode; } } @@ -985,11 +971,17 @@ class PPFrame_Hash implements PPFrame { if ( $bits['index'] !== '' ) { // Numbered parameter $index = $bits['index'] - $indexOffset; + if ( isset( $namedArgs[$index] ) || isset( $numberedArgs[$index] ) ) { + $this->parser->addTrackingCategory( 'duplicate-args-category' ); + } $numberedArgs[$index] = $bits['value']; unset( $namedArgs[$index] ); } else { // Named parameter $name = trim( $this->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) ); + if ( isset( $namedArgs[$name] ) || isset( $numberedArgs[$name] ) ) { + $this->parser->addTrackingCategory( 'duplicate-args-category' ); + } $namedArgs[$name] = $bits['value']; unset( $numberedArgs[$name] ); } diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php index 5d1743e6..51ae42dc 100644 --- a/includes/parser/StripState.php +++ b/includes/parser/StripState.php @@ -117,12 +117,10 @@ class StripState { return $text; } - wfProfileIn( __METHOD__ ); $oldType = $this->tempType; $this->tempType = $type; $text = preg_replace_callback( $this->regex, array( $this, 'unstripCallback' ), $text ); $this->tempType = $oldType; - wfProfileOut( __METHOD__ ); return $text; } -- cgit v1.2.3-54-g00ecf