diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2015-12-20 09:00:55 +0100 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2015-12-20 09:00:55 +0100 |
commit | a2190ac74dd4d7080b12bab90e552d7aa81209ef (patch) | |
tree | 8b31f38de9882d18df54cf8d9e0de74167a094eb /includes/parser | |
parent | 15e69f7b20b6596b9148030acce5b59993b95a45 (diff) | |
parent | 257401d8b2cf661adf36c84b0e3fd1cf85e33c22 (diff) |
Merge branch 'mw-1.26'
Diffstat (limited to 'includes/parser')
-rw-r--r-- | includes/parser/CacheTime.php | 2 | ||||
-rw-r--r-- | includes/parser/CoreParserFunctions.php | 25 | ||||
-rw-r--r-- | includes/parser/LinkHolderArray.php | 1 | ||||
-rw-r--r-- | includes/parser/MWTidy.php | 323 | ||||
-rw-r--r-- | includes/parser/Parser.php | 221 | ||||
-rw-r--r-- | includes/parser/ParserCache.php | 51 | ||||
-rw-r--r-- | includes/parser/ParserDiffTest.php | 21 | ||||
-rw-r--r-- | includes/parser/ParserOptions.php | 99 | ||||
-rw-r--r-- | includes/parser/ParserOutput.php | 37 | ||||
-rw-r--r-- | includes/parser/Preprocessor_DOM.php | 26 | ||||
-rw-r--r-- | includes/parser/Preprocessor_Hash.php | 24 | ||||
-rw-r--r-- | includes/parser/StripState.php | 27 |
12 files changed, 394 insertions, 463 deletions
diff --git a/includes/parser/CacheTime.php b/includes/parser/CacheTime.php index 950c0d46..c4506897 100644 --- a/includes/parser/CacheTime.php +++ b/includes/parser/CacheTime.php @@ -47,7 +47,7 @@ class CacheTime { /** * setCacheTime() sets the timestamp expressing when the page has been rendered. * This does not control expiry, see updateCacheExpiry() for that! - * @param string $t + * @param string $t TS_MW timestamp * @return string */ public function setCacheTime( $t ) { diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php index 830a68fc..7639e2f8 100644 --- a/includes/parser/CoreParserFunctions.php +++ b/includes/parser/CoreParserFunctions.php @@ -41,7 +41,7 @@ class CoreParserFunctions { $noHashFunctions = array( 'ns', 'nse', 'urlencode', 'lcfirst', 'ucfirst', 'lc', 'uc', 'localurl', 'localurle', 'fullurl', 'fullurle', 'canonicalurl', - 'canonicalurle', 'formatnum', 'grammar', 'gender', 'plural', + 'canonicalurle', 'formatnum', 'grammar', 'gender', 'plural', 'bidi', 'numberofpages', 'numberofusers', 'numberofactiveusers', 'numberofarticles', 'numberoffiles', 'numberofadmins', 'numberingroup', 'numberofedits', 'language', @@ -88,9 +88,13 @@ class CoreParserFunctions { if ( strval( $part1 ) !== '' ) { $args = array_slice( func_get_args(), 2 ); $message = wfMessage( $part1, $args ) - ->inLanguage( $parser->getOptions()->getUserLangObj() )->plain(); - - return array( $message, 'noparse' => false ); + ->inLanguage( $parser->getOptions()->getUserLangObj() ); + if ( !$message->exists() ) { + // When message does not exists, the message name is surrounded by angle + // and can result in a tag, therefore escape the angles + return $message->escaped(); + } + return array( $message->plain(), 'noparse' => false ); } else { return array( 'found' => false ); } @@ -178,7 +182,9 @@ class CoreParserFunctions { default: $func = 'urlencode'; } - return $parser->markerSkipCallback( $s, $func ); + // See T105242, where the choice to kill markers and various + // other options were discussed. + return $func( $parser->killMarkers( $s ) ); } public static function lcfirst( $parser, $s = '' ) { @@ -354,6 +360,15 @@ class CoreParserFunctions { } /** + * @param Parser $parser + * @param string $text + * @return string + */ + public static function bidi( $parser, $text = '' ) { + return $parser->getFunctionLang()->embedBidi( $text ); + } + + /** * Override the title of the page when viewed, provided we've been given a * title which will normalise to the canonical title * diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php index 7026c5ce..b4ca7c8e 100644 --- a/includes/parser/LinkHolderArray.php +++ b/includes/parser/LinkHolderArray.php @@ -560,7 +560,6 @@ class LinkHolderArray { // for each found variants, figure out link holders and replace foreach ( $varRes as $s ) { - $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); $varPdbk = $variantTitle->getPrefixedDBkey(); $vardbk = $variantTitle->getDBkey(); diff --git a/includes/parser/MWTidy.php b/includes/parser/MWTidy.php index d446ccf6..807842b6 100644 --- a/includes/parser/MWTidy.php +++ b/includes/parser/MWTidy.php @@ -22,93 +22,6 @@ */ /** - * Class used to hide mw:editsection tokens from Tidy so that it doesn't break them - * or break on them. This is a bit of a hack for now, but hopefully in the future - * we may create a real postprocessor or something that will replace this. - * It's called wrapper because for now it basically takes over MWTidy::tidy's task - * of wrapping the text in a xhtml block - * - * This re-uses some of the parser's UNIQ tricks, though some of it is private so it's - * duplicated. Perhaps we should create an abstract marker hiding class. - * - * @ingroup Parser - */ -class MWTidyWrapper { - - /** - * @var ReplacementArray - */ - protected $mTokens; - - protected $mUniqPrefix; - - protected $mMarkerIndex; - - public function __construct() { - $this->mTokens = null; - $this->mUniqPrefix = null; - } - - /** - * @param string $text - * @return string - */ - public function getWrapped( $text ) { - $this->mTokens = new ReplacementArray; - $this->mUniqPrefix = "\x7fUNIQ" . - dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); - $this->mMarkerIndex = 0; - - // Replace <mw:editsection> elements with placeholders - $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX, - array( &$this, 'replaceCallback' ), $text ); - // ...and <mw:toc> markers - $wrappedtext = preg_replace_callback( '/\<\\/?mw:toc\>/', - array( &$this, 'replaceCallback' ), $wrappedtext ); - // ... and <math> tags - $wrappedtext = preg_replace_callback( '/\<math(.*?)\<\\/math\>/s', - array( &$this, 'replaceCallback' ), $wrappedtext ); - // Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so - // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config - $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext ); - - // Wrap the whole thing in a doctype and body for Tidy. - $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' . - ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>' . - '<head><title>test</title></head><body>' . $wrappedtext . '</body></html>'; - - return $wrappedtext; - } - - /** - * @param array $m - * - * @return string - */ - public function replaceCallback( $m ) { - $marker = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX; - $this->mMarkerIndex++; - $this->mTokens->setPair( $marker, $m[0] ); - return $marker; - } - - /** - * @param string $text - * @return string - */ - public function postprocess( $text ) { - // Revert <html-{link,meta}> back to <{link,meta}> - $text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text ); - - // Restore the contents of placeholder tokens - $text = $this->mTokens->replace( $text ); - - return $text; - } - -} - -/** * Class to interact with HTML tidy * * Either the external tidy program or the in-process tidy extension @@ -118,32 +31,24 @@ class MWTidyWrapper { * @ingroup Parser */ class MWTidy { + private static $instance; + /** - * Interface with html tidy, used if $wgUseTidy = true. + * Interface with html tidy. * If tidy isn't able to correct the markup, the original will be * returned in all its glory with a warning comment appended. * - * @param string $text Hideous HTML input + * @param string $text HTML input fragment. This should not contain a + * <body> or <html> tag. * @return string Corrected HTML output */ public static function tidy( $text ) { - $wrapper = new MWTidyWrapper; - $wrappedtext = $wrapper->getWrapped( $text ); - - $retVal = null; - $correctedtext = self::clean( $wrappedtext, false, $retVal ); - - if ( $retVal < 0 ) { - wfDebug( "Possible tidy configuration error!\n" ); - return $text . "\n<!-- Tidy was unable to run -->\n"; - } elseif ( is_null( $correctedtext ) ) { - wfDebug( "Tidy error detected!\n" ); - return $text . "\n<!-- Tidy found serious XHTML errors -->\n"; + $driver = self::singleton(); + if ( !$driver ) { + throw new MWException( __METHOD__. + ': tidy is disabled, caller should have checked MWTidy::isEnabled()' ); } - - $correctedtext = $wrapper->postprocess( $correctedtext ); // restore any hidden tokens - - return $correctedtext; + return $driver->tidy( $text ); } /** @@ -154,170 +59,80 @@ class MWTidy { * @return bool Whether the HTML is valid */ public static function checkErrors( $text, &$errorStr = null ) { - $retval = 0; - $errorStr = self::clean( $text, true, $retval ); - return ( $retval < 0 && $errorStr == '' ) || $retval == 0; + $driver = self::singleton(); + if ( !$driver ) { + throw new MWException( __METHOD__. + ': tidy is disabled, caller should have checked MWTidy::isEnabled()' ); + } + if ( $driver->supportsValidate() ) { + return $driver->validate( $text, $errorStr ); + } else { + throw new MWException( __METHOD__ . ": error text return from HHVM tidy is not supported" ); + } } - /** - * Perform a clean/repair operation - * @param string $text HTML to check - * @param bool $stderr Whether to read result from STDERR rather than STDOUT - * @param int &$retval Exit code (-1 on internal error) - * @return null|string - * @throws MWException - */ - private static function clean( $text, $stderr = false, &$retval = null ) { - global $wgTidyInternal; + public static function isEnabled() { + return self::singleton() !== false; + } - if ( $wgTidyInternal ) { - if ( wfIsHHVM() ) { - if ( $stderr ) { - throw new MWException( __METHOD__ . ": error text return from HHVM tidy is not supported" ); + protected static function singleton() { + global $wgUseTidy, $wgTidyInternal, $wgTidyConf, $wgDebugTidy, $wgTidyConfig, + $wgTidyBin, $wgTidyOpts; + + if ( self::$instance === null ) { + if ( $wgTidyConfig !== null ) { + $config = $wgTidyConfig; + } elseif ( $wgUseTidy ) { + // b/c configuration + $config = array( + 'tidyConfigFile' => $wgTidyConf, + 'debugComment' => $wgDebugTidy, + 'tidyBin' => $wgTidyBin, + 'tidyCommandLine' => $wgTidyOpts ); + if ( $wgTidyInternal ) { + if ( wfIsHHVM() ) { + $config['driver'] = 'RaggettInternalHHVM'; + } else { + $config['driver'] = 'RaggettInternalPHP'; + } + } else { + $config['driver'] = 'RaggettExternal'; } - return self::hhvmClean( $text, $retval ); } else { - return self::phpClean( $text, $stderr, $retval ); + return false; } - } else { - return self::externalClean( $text, $stderr, $retval ); - } - } - - /** - * Spawn an external HTML tidy process and get corrected markup back from it. - * Also called in OutputHandler.php for full page validation - * - * @param string $text HTML to check - * @param bool $stderr Whether to read result from STDERR rather than STDOUT - * @param int &$retval Exit code (-1 on internal error) - * @return string|null - */ - private static function externalClean( $text, $stderr = false, &$retval = null ) { - global $wgTidyConf, $wgTidyBin, $wgTidyOpts; - - $cleansource = ''; - $opts = ' -utf8'; - - if ( $stderr ) { - $descriptorspec = array( - 0 => array( 'pipe', 'r' ), - 1 => array( 'file', wfGetNull(), 'a' ), - 2 => array( 'pipe', 'w' ) - ); - } else { - $descriptorspec = array( - 0 => array( 'pipe', 'r' ), - 1 => array( 'pipe', 'w' ), - 2 => array( 'file', wfGetNull(), 'a' ) - ); - } - - $readpipe = $stderr ? 2 : 1; - $pipes = array(); - - $process = proc_open( - "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes ); - - //NOTE: At least on linux, the process will be created even if tidy is not installed. - // This means that missing tidy will be treated as a validation failure. - - if ( is_resource( $process ) ) { - // Theoretically, this style of communication could cause a deadlock - // here. If the stdout buffer fills up, then writes to stdin could - // block. This doesn't appear to happen with tidy, because tidy only - // writes to stdout after it's finished reading from stdin. Search - // for tidyParseStdin and tidySaveStdout in console/tidy.c - fwrite( $pipes[0], $text ); - fclose( $pipes[0] ); - while ( !feof( $pipes[$readpipe] ) ) { - $cleansource .= fgets( $pipes[$readpipe], 1024 ); + switch ( $config['driver'] ) { + case 'RaggettInternalHHVM': + self::$instance = new MediaWiki\Tidy\RaggettInternalHHVM( $config ); + break; + case 'RaggettInternalPHP': + self::$instance = new MediaWiki\Tidy\RaggettInternalPHP( $config ); + break; + case 'RaggettExternal': + self::$instance = new MediaWiki\Tidy\RaggettExternal( $config ); + break; + case 'Html5Depurate': + self::$instance = new MediaWiki\Tidy\Html5Depurate( $config ); + break; + default: + throw new MWException( "Invalid tidy driver: \"{$config['driver']}\"" ); } - fclose( $pipes[$readpipe] ); - $retval = proc_close( $process ); - } else { - wfWarn( "Unable to start external tidy process" ); - $retval = -1; } - - if ( !$stderr && $cleansource == '' && $text != '' ) { - // Some kind of error happened, so we couldn't get the corrected text. - // Just give up; we'll use the source text and append a warning. - $cleansource = null; - } - - return $cleansource; + return self::$instance; } /** - * Use the HTML tidy extension to use the tidy library in-process, - * saving the overhead of spawning a new process. - * - * @param string $text HTML to check - * @param bool $stderr Whether to read result from error status instead of output - * @param int &$retval Exit code (-1 on internal error) - * @return string|null + * Set the driver to be used. This is for testing. + * @param TidyDriverBase|false|null $instance */ - private static function phpClean( $text, $stderr = false, &$retval = null ) { - global $wgTidyConf, $wgDebugTidy; - - if ( ( !wfIsHHVM() && !class_exists( 'tidy' ) ) || - ( wfIsHHVM() && !function_exists( 'tidy_repair_string' ) ) - ) { - wfWarn( "Unable to load internal tidy class." ); - $retval = -1; - - return null; - } - - $tidy = new tidy; - $tidy->parseString( $text, $wgTidyConf, 'utf8' ); - - if ( $stderr ) { - $retval = $tidy->getStatus(); - return $tidy->errorBuffer; - } - - $tidy->cleanRepair(); - $retval = $tidy->getStatus(); - if ( $retval == 2 ) { - // 2 is magic number for fatal error - // http://www.php.net/manual/en/function.tidy-get-status.php - $cleansource = null; - } else { - $cleansource = tidy_get_output( $tidy ); - if ( $wgDebugTidy && $retval > 0 ) { - $cleansource .= "<!--\nTidy reports:\n" . - str_replace( '-->', '-->', $tidy->errorBuffer ) . - "\n-->"; - } - } - - return $cleansource; + public static function setInstance( $instance ) { + self::$instance = $instance; } /** - * Use the tidy extension for HHVM from - * https://github.com/wikimedia/mediawiki-php-tidy - * - * This currently does not support the object-oriented interface, but - * tidy_repair_string() can be used for the most common tasks. - * - * @param string $text HTML to check - * @param int &$retval Exit code (-1 on internal error) - * @return string|null + * Destroy the current singleton instance */ - private static function hhvmClean( $text, &$retval ) { - global $wgTidyConf; - - $cleansource = tidy_repair_string( $text, $wgTidyConf, 'utf8' ); - if ( $cleansource === false ) { - $cleansource = null; - $retval = -1; - } else { - $retval = 0; - } - - return $cleansource; + public static function destroySingleton() { + self::$instance = null; } } diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index ace63a09..c07a08ac 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -87,7 +87,11 @@ class Parser { # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20 # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]'; - const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+) + # Simplified expression to match an IPv4 or IPv6 address, or + # at least one character of a host name (embeds EXT_LINK_URL_CLASS) + const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])'; + # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR) + const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+) \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu'; # Regular expression for a non-newline space @@ -114,8 +118,20 @@ class Parser { const OT_MSG = 3; const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged. - # Marker Suffix needs to be accessible staticly. + /** + * @var string Prefix and suffix for temporary replacement strings + * for the multipass parser. + * + * \x7f should never appear in input as it's disallowed in XML. + * Using it at the front also gives us a little extra robustness + * since it shouldn't match when butted up against identifier-like + * string constructs. + * + * Must not consist of all title characters, or else it will change + * the behavior of <nowiki> in a link. + */ const MARKER_SUFFIX = "-QINU\x7f"; + const MARKER_PREFIX = "\x7fUNIQ-"; # Markers used for wrapping the table of contents const TOC_START = '<mw:toc>'; @@ -206,9 +222,10 @@ class Parser { public $mInputSize = false; # For {{PAGESIZE}} on current page. /** - * @var string - */ - public $mUniqPrefix; + * @var string Deprecated accessor for the strip marker prefix. + * @deprecated since 1.26; use Parser::MARKER_PREFIX instead. + **/ + public $mUniqPrefix = Parser::MARKER_PREFIX; /** * @var array Array with the language name of each language link (i.e. the @@ -241,7 +258,8 @@ class Parser { $this->mConf = $conf; $this->mUrlProtocols = wfUrlProtocols(); $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' . - self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; + self::EXT_LINK_ADDR . + self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; } elseif ( defined( 'HPHP_VERSION' ) ) { @@ -336,18 +354,7 @@ class Parser { $this->mLangLinkLanguages = array(); $this->currentRevisionCache = null; - /** - * Prefix for temporary replacement strings for the multipass parser. - * \x07 should never appear in input as it's disallowed in XML. - * Using it at the front also gives us a little extra robustness - * since it shouldn't match when butted up against identifier-like - * string constructs. - * - * Must not consist of all title characters, or else it will change - * the behavior of <nowiki> in a link. - */ - $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); - $this->mStripState = new StripState( $this->mUniqPrefix ); + $this->mStripState = new StripState; # Clear these on every parse, bug 4549 $this->mTplRedirCache = $this->mTplDomCache = array(); @@ -399,6 +406,9 @@ class Parser { global $wgShowHostnames; if ( $clearState ) { + // We use U+007F DELETE to construct strip markers, so we have to make + // sure that this character does not occur in the input text. + $text = strtr( $text, "\x7f", "?" ); $magicScopeVariable = $this->lock(); } @@ -410,11 +420,6 @@ class Parser { $this->mOutput->resetParseStartTime(); } - # Remove the strip marker tag prefix from the input, if present. - if ( $clearState ) { - $text = str_replace( $this->mUniqPrefix, '', $text ); - } - $oldRevisionId = $this->mRevisionId; $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; @@ -504,6 +509,9 @@ class Parser { if ( $wgShowHostnames ) { $limitReport .= 'Parsed by ' . wfHostname() . "\n"; } + $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n"; + $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n"; + $limitReport .= 'Dynamic content: ' . ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) . "\n"; foreach ( $this->mOutput->getLimitReportData() as $key => $value ) { if ( Hooks::run( 'ParserLimitReportFormat', array( $key, &$value, &$limitReport, false, false ) @@ -686,8 +694,10 @@ class Parser { * Get a random string * * @return string + * @deprecated since 1.26; use wfRandomString() instead. */ public static function getRandomString() { + wfDeprecated( __METHOD__, '1.26' ); return wfRandomString( 16 ); } @@ -705,18 +715,11 @@ class Parser { * Accessor for mUniqPrefix. * * @return string + * @deprecated since 1.26; use Parser::MARKER_PREFIX instead. */ public function uniqPrefix() { - if ( !isset( $this->mUniqPrefix ) ) { - # @todo FIXME: This is probably *horribly wrong* - # LanguageConverter seems to want $wgParser's uniqPrefix, however - # if this is called for a parser cache hit, the parser may not - # have ever been initialized in the first place. - # Not really sure what the heck is supposed to be going on here. - return ''; - # throw new MWException( "Accessing uninitialized mUniqPrefix" ); - } - return $this->mUniqPrefix; + wfDeprecated( __METHOD__, '1.26' ); + return self::MARKER_PREFIX; } /** @@ -907,10 +910,14 @@ class Parser { * @param array $elements List of element names. Comments are always extracted. * @param string $text Source text string. * @param array $matches Out parameter, Array: extracted tags - * @param string $uniq_prefix + * @param string|null $uniq_prefix * @return string Stripped text + * @since 1.26 The uniq_prefix argument is deprecated. */ - public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { + public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) { + if ( $uniq_prefix !== null ) { + wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' ); + } static $n = 1; $stripped = ''; $matches = array(); @@ -938,7 +945,7 @@ class Parser { $inside = $p[4]; } - $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX; + $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX; $stripped .= $marker; if ( $close === '/>' ) { @@ -991,10 +998,10 @@ class Parser { * @return string */ public function insertStripItem( $text ) { - $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; + $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; $this->mMarkerIndex++; - $this->mStripState->addGeneral( $rnd, $text ); - return $rnd; + $this->mStripState->addGeneral( $marker, $text ); + return $marker; } /** @@ -1024,9 +1031,10 @@ class Parser { } $first_character = $line[0]; + $first_two = substr( $line, 0, 2 ); $matches = array(); - if ( preg_match( '/^(:*)\{\|(.*)$/', $line, $matches ) ) { + if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) { # First check if we are starting a new table $indent_level = strlen( $matches[1] ); @@ -1043,7 +1051,7 @@ class Parser { # Don't do any of the following $out .= $outLine . "\n"; continue; - } elseif ( substr( $line, 0, 2 ) === '|}' ) { + } elseif ( $first_two === '|}' ) { # We are ending a table $line = '</table>' . substr( $line, 2 ); $last_tag = array_pop( $last_tag_history ); @@ -1061,7 +1069,7 @@ class Parser { } array_pop( $tr_attributes ); $outLine = $line . str_repeat( '</dd></dl>', $indent_level ); - } elseif ( substr( $line, 0, 2 ) === '|-' ) { + } elseif ( $first_two === '|-' ) { # Now we have a table row $line = preg_replace( '#^\|-+#', '', $line ); @@ -1090,16 +1098,16 @@ class Parser { array_push( $last_tag_history, '' ); } elseif ( $first_character === '|' || $first_character === '!' - || substr( $line, 0, 2 ) === '|+' + || $first_two === '|+' ) { # This might be cell elements, td, th or captions - if ( substr( $line, 0, 2 ) === '|+' ) { + if ( $first_two === '|+' ) { $first_character = '+'; + $line = substr( $line, 2 ); + } else { $line = substr( $line, 1 ); } - $line = substr( $line, 1 ); - if ( $first_character === '!' ) { $line = str_replace( '!!', '||', $line ); } @@ -1257,7 +1265,7 @@ class Parser { # replaceInternalLinks may sometimes leave behind # absolute URLs, which have to be masked to hide them from replaceExternalLinks - $text = str_replace( $this->mUniqPrefix . 'NOPARSE', '', $text ); + $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text ); $text = $this->doMagicLinks( $text ); $text = $this->formatHeadings( $text, $origText, $isMain ); @@ -1275,10 +1283,12 @@ class Parser { * @return string */ private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) { - global $wgUseTidy, $wgAlwaysUseTidy; - $text = $this->mStripState->unstripGeneral( $text ); + if ( $isMain ) { + Hooks::run( 'ParserAfterUnstrip', array( &$this, &$text ) ); + } + # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array( # french spaces, last one Guillemet-left @@ -1323,7 +1333,7 @@ class Parser { $text = Sanitizer::normalizeCharReferences( $text ); - if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) { + if ( MWTidy::isEnabled() && $this->mOptions->getTidy() ) { $text = MWTidy::tidy( $text ); } else { # attempt to sanitize at least some nesting problems @@ -1374,20 +1384,23 @@ class Parser { public function doMagicLinks( $text ) { $prots = wfUrlProtocolsWithoutProtRel(); $urlChar = self::EXT_LINK_URL_CLASS; + $addr = self::EXT_LINK_ADDR; $space = self::SPACE_NOT_NL; # non-newline space $spdash = "(?:-|$space)"; # a dash or a non-newline space $spaces = "$space++"; # possessive match of 1 or more spaces $text = preg_replace_callback( - '!(?: # Start cases - (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text - (<.*?>) | # m[2]: Skip stuff inside HTML elements' . " - (\b(?i:$prots)$urlChar+) | # m[3]: Free external links - \b(?:RFC|PMID) $spaces # m[4]: RFC or PMID, capture number + '!(?: # Start cases + (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text + (<.*?>) | # m[2]: Skip stuff inside + # HTML elements' . " + (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links + # m[4]: Post-protocol path + \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number ([0-9]+)\b | - \bISBN $spaces ( # m[5]: ISBN, capture number - (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix - (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters - [0-9Xx] # check digit + \bISBN $spaces ( # m[6]: ISBN, capture number + (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix + (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters + [0-9Xx] # check digit )\b )!xu", array( &$this, 'magicLinkCallback' ), $text ); return $text; @@ -1407,35 +1420,35 @@ class Parser { return $m[0]; } elseif ( isset( $m[3] ) && $m[3] !== '' ) { # Free external link - return $this->makeFreeExternalLink( $m[0] ); - } elseif ( isset( $m[4] ) && $m[4] !== '' ) { + return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) ); + } elseif ( isset( $m[5] ) && $m[5] !== '' ) { # RFC or PMID if ( substr( $m[0], 0, 3 ) === 'RFC' ) { $keyword = 'RFC'; $urlmsg = 'rfcurl'; $cssClass = 'mw-magiclink-rfc'; - $id = $m[4]; + $id = $m[5]; } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) { $keyword = 'PMID'; $urlmsg = 'pubmedurl'; $cssClass = 'mw-magiclink-pmid'; - $id = $m[4]; + $id = $m[5]; } else { throw new MWException( __METHOD__ . ': unrecognised match type "' . substr( $m[0], 0, 20 ) . '"' ); } $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text(); return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass ); - } elseif ( isset( $m[5] ) && $m[5] !== '' ) { + } elseif ( isset( $m[6] ) && $m[6] !== '' ) { # ISBN - $isbn = $m[5]; + $isbn = $m[6]; $space = self::SPACE_NOT_NL; # non-newline space $isbn = preg_replace( "/$space/", ' ', $isbn ); $num = strtr( $isbn, array( '-' => '', ' ' => '', 'x' => 'X', - )); + ) ); $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); return '<a href="' . htmlspecialchars( $titleObj->getLocalURL() ) . @@ -1449,11 +1462,12 @@ class Parser { * Make a free external link, given a user-supplied URL * * @param string $url - * + * @param int $numPostProto + * The number of characters after the protocol. * @return string HTML * @private */ - public function makeFreeExternalLink( $url ) { + public function makeFreeExternalLink( $url, $numPostProto ) { $trail = ''; @@ -1478,7 +1492,7 @@ class Parser { # Don't break a trailing HTML entity by moving the ; into $trail # This is in hot code, so use substr_compare to avoid having to # create a new string object for the comparison - if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0) { + if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) { # more optimization: instead of running preg_match with a $ # anchor, which can be slow, do the match on the reversed # string starting at the desired offset. @@ -1492,6 +1506,12 @@ class Parser { $url = substr( $url, 0, -$numSepChars ); } + # Verify that we still have a real URL after trail removal, and + # not just lone protocol + if ( strlen( $trail ) >= $numPostProto ) { + return $url . $trail; + } + $url = Sanitizer::cleanUrl( $url ); # Is this an external image? @@ -1609,12 +1629,10 @@ class Parser { $firstspace = $i; } } elseif ( $x2 === ' ' ) { - if ( $firstsingleletterword == -1 ) { - $firstsingleletterword = $i; - // if $firstsingleletterword is set, we don't - // look at the other options, so we can bail early. - break; - } + $firstsingleletterword = $i; + // if $firstsingleletterword is set, we don't + // look at the other options, so we can bail early. + break; } else { if ( $firstmultiletterword == -1 ) { $firstmultiletterword = $i; @@ -2143,7 +2161,8 @@ class Parser { $link = substr( $link, 1 ); } - $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) ); + $unstrip = $this->mStripState->unstripNoWiki( $link ); + $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null; if ( $nt === null ) { $s .= $prefix . '[[' . $line; continue; @@ -2351,7 +2370,7 @@ class Parser { */ public function armorLinks( $text ) { return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', - "{$this->mUniqPrefix}NOPARSE$1", $text ); + self::MARKER_PREFIX . "NOPARSE$1", $text ); } /** @@ -2623,7 +2642,7 @@ class Parser { $closematch = preg_match( '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|' - . $this->mUniqPrefix + . self::MARKER_PREFIX . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t ); @@ -3305,7 +3324,8 @@ class Parser { */ public function replaceVariables( $text, $frame = false, $argsOnly = false ) { # Is there any text? Also, Prevent too big inclusions! - if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { + $textSize = strlen( $text ); + if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) { return $text; } @@ -3435,7 +3455,6 @@ class Parser { # SUBST if ( !$found ) { - $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 ); # Possibilities for substMatch: "subst", "safesubst" or FALSE @@ -3493,7 +3512,6 @@ class Parser { # Parser functions if ( !$found ) { - $colonPos = strpos( $part1, ':' ); if ( $colonPos !== false ) { $func = substr( $part1, 0, $colonPos ); @@ -3892,7 +3910,11 @@ class Parser { // Defaults to Parser::statelessFetchTemplate() $templateCb = $this->mOptions->getTemplateCallback(); $stuff = call_user_func( $templateCb, $title, $this ); + // We use U+007F DELETE to distinguish strip markers from regular text. $text = $stuff['text']; + if ( is_string( $stuff['text'] ) ) { + $text = strtr( $text, "\x7f", "?" ); + } $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title; if ( isset( $stuff['deps'] ) ) { foreach ( $stuff['deps'] as $dep ) { @@ -4186,7 +4208,7 @@ class Parser { $name = $frame->expand( $params['name'] ); $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] ); $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); - $marker = "{$this->mUniqPrefix}-$name-" + $marker = self::MARKER_PREFIX . "-$name-" . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX; $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) && @@ -4431,7 +4453,7 @@ class Parser { $prevlevel = 0; $toclevel = 0; $prevtoclevel = 0; - $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self::MARKER_SUFFIX; + $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX; $baseTitleText = $this->mTitle->getPrefixedDBkey(); $oldType = $this->mOutputType; $this->setOutputType( self::OT_WIKI ); @@ -4442,7 +4464,9 @@ class Parser { $tocraw = array(); $refers = array(); - foreach ( $matches[3] as $headline ) { + $headlines = $numMatches !== false ? $matches[3] : array(); + + foreach ( $headlines as $headline ) { $isTemplate = false; $titleText = false; $sectionIndex = false; @@ -4547,6 +4571,12 @@ class Parser { array( '', '<$1>' ), $safeHeadline ); + + # Strip '<span></span>', which is the result from the above if + # <span id="foo"></span> is used to produce an additional anchor + # for a section. + $tocline = str_replace( '<span></span>', '', $tocline ); + $tocline = trim( $tocline ); # For the anchor, strip out HTML-y stuff period @@ -5068,7 +5098,7 @@ class Parser { * in the Parser class. * * This interface (introduced r61913) appears to be undocumented, but - * 'markerName' is used by some core tag hooks to override which strip + * 'markerType' is used by some core tag hooks to override which strip * array their results are placed in. **Use great caution if attempting * this interface, as it is not documented and injudicious use could smash * private variables.** @@ -5391,9 +5421,10 @@ class Parser { case 'gallery-internal-link': $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); $chars = self::EXT_LINK_URL_CLASS; + $addr = self::EXT_LINK_ADDR; $prots = $this->mUrlProtocols; //check to see if link matches an absolute url, if not then it must be a wiki link. - if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) { + if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) { $link = $linkValue; } else { $localLinkTitle = Title::newFromText( $linkValue ); @@ -5575,13 +5606,14 @@ class Parser { break; case 'link': $chars = self::EXT_LINK_URL_CLASS; + $addr = self::EXT_LINK_ADDR; $prots = $this->mUrlProtocols; if ( $value === '' ) { $paramName = 'no-link'; $value = true; $validated = true; } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) { - if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) { + if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) { $paramName = 'link-url'; $this->mOutput->addExternalLink( $value ); if ( $this->mOptions->getExternalLinkTarget() ) { @@ -5770,7 +5802,7 @@ class Parser { public function replaceTransparentTags( $text ) { $matches = array(); $elements = array_keys( $this->mTransparentTagHooks ); - $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix ); + $text = self::extractTagsAndParams( $elements, $text, $matches ); $replacements = array(); foreach ( $matches as $marker => $data ) { @@ -6229,7 +6261,7 @@ class Parser { $i = 0; $out = ''; while ( $i < strlen( $s ) ) { - $markerStart = strpos( $s, $this->mUniqPrefix, $i ); + $markerStart = strpos( $s, self::MARKER_PREFIX, $i ); if ( $markerStart === false ) { $out .= call_user_func( $callback, substr( $s, $i ) ); break; @@ -6420,4 +6452,15 @@ class Parser { return $this; } } + + /** + * Set's up the PHP implementation of OOUI for use in this request + * and instructs OutputPage to enable OOUI for itself. + * + * @since 1.26 + */ + public function enableOOUI() { + OutputPage::setupOOUI(); + $this->mOutput->setEnableOOUI( true ); + } } diff --git a/includes/parser/ParserCache.php b/includes/parser/ParserCache.php index bc8e4a69..abff5435 100644 --- a/includes/parser/ParserCache.php +++ b/includes/parser/ParserCache.php @@ -26,7 +26,7 @@ * @todo document */ class ParserCache { - /** @var MWMemcached */ + /** @var BagOStuff */ private $mMemc; /** * Get an instance of this object @@ -44,20 +44,19 @@ class ParserCache { /** * Setup a cache pathway with a given back-end storage mechanism. - * May be a memcached client or a BagOStuff derivative. * - * @param MWMemcached $memCached + * This class use an invalidation strategy that is compatible with + * MultiWriteBagOStuff in async replication mode. + * + * @param BagOStuff $memCached * @throws MWException */ - protected function __construct( $memCached ) { - if ( !$memCached ) { - throw new MWException( "Tried to create a ParserCache with an invalid memcached" ); - } + protected function __construct( BagOStuff $memCached ) { $this->mMemc = $memCached; } /** - * @param Article $article + * @param WikiPage $article * @param string $hash * @return mixed|string */ @@ -73,7 +72,7 @@ class ParserCache { } /** - * @param Article $article + * @param WikiPage $article * @return mixed|string */ protected function getOptionsKey( $article ) { @@ -91,7 +90,7 @@ class ParserCache { * English preferences. That's why we take into account *all* user * options. (r70809 CR) * - * @param Article $article + * @param WikiPage $article * @param ParserOptions $popts * @return string */ @@ -103,7 +102,7 @@ class ParserCache { /** * Retrieve the ParserOutput from ParserCache, even if it's outdated. - * @param Article $article + * @param WikiPage $article * @param ParserOptions $popts * @return ParserOutput|bool False on failure */ @@ -126,7 +125,7 @@ class ParserCache { * * @todo Document parameter $useOutdated * - * @param Article $article + * @param WikiPage $article * @param ParserOptions $popts * @param bool $useOutdated (default true) * @return bool|mixed|string @@ -141,15 +140,15 @@ class ParserCache { // Determine the options which affect this article $optionsKey = $this->mMemc->get( $this->getOptionsKey( $article ) ); - if ( $optionsKey != false ) { + if ( $optionsKey instanceof CacheTime ) { if ( !$useOutdated && $optionsKey->expired( $article->getTouched() ) ) { - wfIncrStats( "pcache_miss_expired" ); + wfIncrStats( "pcache.miss.expired" ); $cacheTime = $optionsKey->getCacheTime(); wfDebug( "Parser options key expired, touched " . $article->getTouched() . ", epoch $wgCacheEpoch, cached $cacheTime\n" ); return false; } elseif ( $optionsKey->isDifferentRevision( $article->getLatest() ) ) { - wfIncrStats( "pcache_miss_revid" ); + wfIncrStats( "pcache.miss.revid" ); $revId = $article->getLatest(); $cachedRevId = $optionsKey->getCacheRevisionId(); wfDebug( "ParserOutput key is for an old revision, latest $revId, cached $cachedRevId\n" ); @@ -176,7 +175,7 @@ class ParserCache { * Retrieve the ParserOutput from ParserCache. * false if not found or outdated. * - * @param Article $article + * @param WikiPage|Article $article * @param ParserOptions $popts * @param bool $useOutdated (default false) * @@ -195,14 +194,14 @@ class ParserCache { $parserOutputKey = $this->getKey( $article, $popts, $useOutdated ); if ( $parserOutputKey === false ) { - wfIncrStats( 'pcache_miss_absent' ); + wfIncrStats( 'pcache.miss.absent' ); return false; } $value = $this->mMemc->get( $parserOutputKey ); if ( !$value ) { wfDebug( "ParserOutput cache miss.\n" ); - wfIncrStats( "pcache_miss_absent" ); + wfIncrStats( "pcache.miss.absent" ); return false; } @@ -213,20 +212,28 @@ class ParserCache { // key. Force it here. See bug 31445. $value->setEditSectionTokens( $popts->getEditSection() ); + $wikiPage = method_exists( $article, 'getPage' ) + ? $article->getPage() + : $article; + if ( !$useOutdated && $value->expired( $touched ) ) { - wfIncrStats( "pcache_miss_expired" ); + wfIncrStats( "pcache.miss.expired" ); $cacheTime = $value->getCacheTime(); wfDebug( "ParserOutput key expired, touched $touched, " . "epoch $wgCacheEpoch, cached $cacheTime\n" ); $value = false; } elseif ( $value->isDifferentRevision( $article->getLatest() ) ) { - wfIncrStats( "pcache_miss_revid" ); + wfIncrStats( "pcache.miss.revid" ); $revId = $article->getLatest(); $cachedRevId = $value->getCacheRevisionId(); wfDebug( "ParserOutput key is for an old revision, latest $revId, cached $cachedRevId\n" ); $value = false; + } elseif ( Hooks::run( 'RejectParserCacheValue', array( $value, $wikiPage, $popts ) ) === false ) { + wfIncrStats( 'pcache.miss.rejected' ); + wfDebug( "ParserOutput key valid, but rejected by RejectParserCacheValue hook handler.\n" ); + $value = false; } else { - wfIncrStats( "pcache_hit" ); + wfIncrStats( "pcache.hit" ); } return $value; @@ -276,6 +283,8 @@ class ParserCache { // ...and its pointer $this->mMemc->set( $this->getOptionsKey( $page ), $optionsKey, $expire ); + + Hooks::run( 'ParserCacheSaveComplete', array( $this, $parserOutput, $page->getTitle(), $popts, $revId ) ); } else { wfDebug( "Parser output was marked as uncacheable and has not been saved.\n" ); } diff --git a/includes/parser/ParserDiffTest.php b/includes/parser/ParserDiffTest.php index 174c1d61..32f5d068 100644 --- a/includes/parser/ParserDiffTest.php +++ b/includes/parser/ParserDiffTest.php @@ -29,7 +29,6 @@ class ParserDiffTest public $parsers; public $conf; public $shortOutput = false; - public $dtUniqPrefix; public function __construct( $conf ) { if ( !isset( $conf['parsers'] ) ) { @@ -43,12 +42,6 @@ class ParserDiffTest return; } - global $wgHooks; - static $doneHook = false; - if ( !$doneHook ) { - $doneHook = true; - $wgHooks['ParserClearState'][] = array( $this, 'onClearState' ); - } if ( isset( $this->conf['shortOutput'] ) ) { $this->shortOutput = $this->conf['shortOutput']; } @@ -126,18 +119,4 @@ class ParserDiffTest $parser->setFunctionHook( $id, $callback, $flags ); } } - - /** - * @param Parser $parser - * @return bool - */ - public function onClearState( &$parser ) { - // hack marker prefixes to get identical output - if ( !isset( $this->dtUniqPrefix ) ) { - $this->dtUniqPrefix = $parser->uniqPrefix(); - } else { - $parser->mUniqPrefix = $this->dtUniqPrefix; - } - return true; - } } diff --git a/includes/parser/ParserOptions.php b/includes/parser/ParserOptions.php index 100656d1..1073aed8 100644 --- a/includes/parser/ParserOptions.php +++ b/includes/parser/ParserOptions.php @@ -34,145 +34,145 @@ class ParserOptions { /** * Interlanguage links are removed and returned in an array */ - public $mInterwikiMagic; + private $mInterwikiMagic; /** * Allow external images inline? */ - public $mAllowExternalImages; + private $mAllowExternalImages; /** * If not, any exception? */ - public $mAllowExternalImagesFrom; + private $mAllowExternalImagesFrom; /** * If not or it doesn't match, should we check an on-wiki whitelist? */ - public $mEnableImageWhitelist; + private $mEnableImageWhitelist; /** * Date format index */ - public $mDateFormat = null; + private $mDateFormat = null; /** * Create "edit section" links? */ - public $mEditSection = true; + private $mEditSection = true; /** * Allow inclusion of special pages? */ - public $mAllowSpecialInclusion; + private $mAllowSpecialInclusion; /** * Use tidy to cleanup output HTML? */ - public $mTidy = false; + private $mTidy = false; /** * Which lang to call for PLURAL and GRAMMAR */ - public $mInterfaceMessage = false; + private $mInterfaceMessage = false; /** * Overrides $mInterfaceMessage with arbitrary language */ - public $mTargetLanguage = null; + private $mTargetLanguage = null; /** * Maximum size of template expansions, in bytes */ - public $mMaxIncludeSize; + private $mMaxIncludeSize; /** * Maximum number of nodes touched by PPFrame::expand() */ - public $mMaxPPNodeCount; + private $mMaxPPNodeCount; /** * Maximum number of nodes generated by Preprocessor::preprocessToObj() */ - public $mMaxGeneratedPPNodeCount; + private $mMaxGeneratedPPNodeCount; /** * Maximum recursion depth in PPFrame::expand() */ - public $mMaxPPExpandDepth; + private $mMaxPPExpandDepth; /** * Maximum recursion depth for templates within templates */ - public $mMaxTemplateDepth; + private $mMaxTemplateDepth; /** * Maximum number of calls per parse to expensive parser functions */ - public $mExpensiveParserFunctionLimit; + private $mExpensiveParserFunctionLimit; /** * Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS */ - public $mRemoveComments = true; + private $mRemoveComments = true; /** * Callback for current revision fetching. Used as first argument to call_user_func(). */ - public $mCurrentRevisionCallback = + private $mCurrentRevisionCallback = array( 'Parser', 'statelessFetchRevision' ); /** * Callback for template fetching. Used as first argument to call_user_func(). */ - public $mTemplateCallback = + private $mTemplateCallback = array( 'Parser', 'statelessFetchTemplate' ); /** * Enable limit report in an HTML comment on output */ - public $mEnableLimitReport = false; + private $mEnableLimitReport = false; /** * Timestamp used for {{CURRENTDAY}} etc. */ - public $mTimestamp; + private $mTimestamp; /** * Target attribute for external links */ - public $mExternalLinkTarget; + private $mExternalLinkTarget; /** * Clean up signature texts? * @see Parser::cleanSig */ - public $mCleanSignatures; + private $mCleanSignatures; /** * Transform wiki markup when saving the page? */ - public $mPreSaveTransform = true; + private $mPreSaveTransform = true; /** * Whether content conversion should be disabled */ - public $mDisableContentConversion; + private $mDisableContentConversion; /** * Whether title conversion should be disabled */ - public $mDisableTitleConversion; + private $mDisableTitleConversion; /** * Automatically number headings? */ - public $mNumberHeadings; + private $mNumberHeadings; /** * Thumb size preferred by the user. */ - public $mThumbSize; + private $mThumbSize; /** * Maximum article size of an article to be marked as "stub" @@ -182,38 +182,38 @@ class ParserOptions { /** * Language object of the User language. */ - public $mUserLang; + private $mUserLang; /** * @var User * Stored user object */ - public $mUser; + private $mUser; /** * Parsing the page for a "preview" operation? */ - public $mIsPreview = false; + private $mIsPreview = false; /** * Parsing the page for a "preview" operation on a single section? */ - public $mIsSectionPreview = false; + private $mIsSectionPreview = false; /** * Parsing the printable version of the page? */ - public $mIsPrintable = false; + private $mIsPrintable = false; /** * Extra key that should be present in the caching key. */ - public $mExtraKey = ''; + private $mExtraKey = ''; /** * Function to be called when an option is accessed. */ - protected $onAccessCallback = null; + private $onAccessCallback = null; /** * If the page being parsed is a redirect, this should hold the redirect @@ -372,16 +372,17 @@ class ParserOptions { } /** - * Get the user language used by the parser for this page. + * Get the user language used by the parser for this page and split the parser cache. * - * You shouldn't use this. Really. $parser->getFunctionLang() is all you need. + * @warning: Calling this causes the parser cache to be fragmented by user language! + * To avoid cache fragmentation, output should not depend on the user language. + * Use Parser::getFunctionLang() or Parser::getTargetLanguage() instead! * - * To avoid side-effects where the page will be rendered based on the language - * of the user who last saved, this function will triger a cache fragmentation. - * Usage of this method is discouraged for that reason. - * - * When saving, this will return the default language instead of the user's. + * @note This function will trigger a cache fragmentation by recording the + * 'userlang' option, see optionUsed(). This is done to avoid cache pollution + * when the page is rendered based on the language of the user. * + * @note When saving, this will return the default language instead of the user's. * {{int: }} uses this which used to produce inconsistent link tables (bug 14404). * * @return Language @@ -395,6 +396,12 @@ class ParserOptions { /** * Same as getUserLangObj() but returns a string instead. * + * @warning: Calling this causes the parser cache to be fragmented by user language! + * To avoid cache fragmentation, output should not depend on the user language. + * Use Parser::getFunctionLang() or Parser::getTargetLanguage() instead! + * + * @see getUserLangObj() + * * @return string Language code * @since 1.17 */ @@ -700,6 +707,10 @@ class ParserOptions { /** * Called when an option is accessed. + * Calls the watcher that was set using registerWatcher(). + * Typically, the watcher callback is ParserOutput::registerOption(). + * The information registered that way will be used by ParserCache::save(). + * * @param string $optionName Name of the option */ public function optionUsed( $optionName ) { @@ -791,6 +802,10 @@ class ParserOptions { $confstr .= $wgRenderHashAppend; + // @note: as of Feb 2015, core never sets the editsection flag, since it uses + // <mw:editsection> tags to inject editsections on the fly. However, extensions + // may be using it by calling ParserOption::optionUsed resp. ParserOutput::registerOption + // directly. At least Wikibase does at this point in time. if ( !in_array( 'editsection', $forOptions ) ) { $confstr .= '!*'; } elseif ( !$this->mEditSection ) { diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index 65b527c8..2eb1dc9f 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -41,7 +41,6 @@ class ParserOutput extends CacheTime { $mModules = array(), # Modules to be loaded by the resource loader $mModuleScripts = array(), # Modules of which only the JS will be loaded by the resource loader $mModuleStyles = array(), # Modules of which only the CSSS will be loaded by the resource loader - $mModuleMessages = array(), # Modules of which only the messages will be loaded by the resource loader $mJsConfigVars = array(), # JavaScript config variable for mw.config combined with this page $mOutputHooks = array(), # Hook tags as per $wgParserOutputHooks $mWarnings = array(), # Warning text to be returned to the user. Wikitext formatted, in the key only @@ -50,7 +49,8 @@ class ParserOutput extends CacheTime { $mProperties = array(), # Name/value pairs to be cached in the DB $mTOCHTML = '', # HTML of the TOC $mTimestamp, # Timestamp of the revision - $mTOCEnabled = true; # Whether TOC should be shown, can't override __NOTOC__ + $mTOCEnabled = true, # Whether TOC should be shown, can't override __NOTOC__ + $mEnableOOUI = false; # Whether OOUI should be enabled private $mIndexPolicy = ''; # 'index' or 'noindex'? Any other value will result in no change. private $mAccessedOptions = array(); # List of ParserOptions (stored in the keys) private $mExtensionData = array(); # extra data used by extensions @@ -104,7 +104,7 @@ class ParserOutput extends CacheTime { $text = str_replace( array( Parser::TOC_START, Parser::TOC_END ), '', $text ); } else { $text = preg_replace( - '#' . preg_quote( Parser::TOC_START ) . '.*?' . preg_quote( Parser::TOC_END ) . '#s', + '#' . preg_quote( Parser::TOC_START, '#' ) . '.*?' . preg_quote( Parser::TOC_END, '#' ) . '#s', '', $text ); @@ -191,8 +191,13 @@ class ParserOutput extends CacheTime { return $this->mModuleStyles; } + /** + * @deprecated since 1.26 Obsolete + * @return array + */ public function getModuleMessages() { - return $this->mModuleMessages; + wfDeprecated( __METHOD__, '1.26' ); + return array(); } /** @since 1.23 */ @@ -228,6 +233,10 @@ class ParserOutput extends CacheTime { return $this->mTOCEnabled; } + public function getEnableOOUI() { + return $this->mEnableOOUI; + } + public function setText( $text ) { return wfSetVar( $this->mText, $text ); } @@ -279,6 +288,17 @@ class ParserOutput extends CacheTime { $this->mIndicators[$id] = $content; } + /** + * Enables OOUI, if true, in any OutputPage instance this ParserOutput + * object is added to. + * + * @since 1.26 + * @param bool $enable If OOUI should be enabled or not + */ + public function setEnableOOUI( $enable = false ) { + $this->mEnableOOUI = $enable; + } + public function addLanguageLink( $t ) { $this->mLanguageLinks[] = $t; } @@ -445,8 +465,12 @@ class ParserOutput extends CacheTime { $this->mModuleStyles = array_merge( $this->mModuleStyles, (array)$modules ); } + /** + * @deprecated since 1.26 Use addModules() instead + * @param string|array $modules + */ public function addModuleMessages( $modules ) { - $this->mModuleMessages = array_merge( $this->mModuleMessages, (array)$modules ); + wfDeprecated( __METHOD__, '1.26' ); } /** @@ -476,7 +500,6 @@ class ParserOutput extends CacheTime { $this->addModules( $out->getModules() ); $this->addModuleScripts( $out->getModuleScripts() ); $this->addModuleStyles( $out->getModuleStyles() ); - $this->addModuleMessages( $out->getModuleMessages() ); $this->addJsConfigVars( $out->getJsConfigVars() ); $this->mHeadItems = array_merge( $this->mHeadItems, $out->getHeadItemsArray() ); @@ -663,6 +686,8 @@ class ParserOutput extends CacheTime { /** * Tags a parser option for use in the cache key for this parser output. * Registered as a watcher at ParserOptions::registerWatcher() by Parser::clearState(). + * The information gathered here is available via getUsedOptions(), + * and is used by ParserCache::save(). * * @see ParserCache::getKey * @see ParserCache::save diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index 0351f2a8..8a09be83 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -87,9 +87,9 @@ class Preprocessor_DOM implements Preprocessor { $xml .= "</list>"; $dom = new DOMDocument(); - wfSuppressWarnings(); + MediaWiki\suppressWarnings(); $result = $dom->loadXML( $xml ); - wfRestoreWarnings(); + MediaWiki\restoreWarnings(); if ( !$result ) { // Try running the XML through UtfNormal to get rid of invalid characters $xml = UtfNormal\Validator::cleanUp( $xml ); @@ -154,7 +154,6 @@ class Preprocessor_DOM implements Preprocessor { $cacheable = ( $wgPreprocessorCacheThreshold !== false && strlen( $text ) > $wgPreprocessorCacheThreshold ); if ( $cacheable ) { - $cacheKey = wfMemcKey( 'preprocess-xml', md5( $text ), $flags ); $cacheValue = $wgMemc->get( $cacheKey ); if ( $cacheValue ) { @@ -186,9 +185,9 @@ class Preprocessor_DOM implements Preprocessor { } $dom = new DOMDocument; - wfSuppressWarnings(); + MediaWiki\suppressWarnings(); $result = $dom->loadXML( $xml ); - wfRestoreWarnings(); + MediaWiki\restoreWarnings(); if ( !$result ) { // Try running the XML through UtfNormal to get rid of invalid characters $xml = UtfNormal\Validator::cleanUp( $xml ); @@ -853,7 +852,8 @@ class PPDStackElement { $close, // Matching closing character $count, // Number of opening characters found (number of "=" for heading) $parts, // Array of PPDPart objects describing pipe-separated parts. - $lineStart; // True if the open char appeared at the start of the input line. Not set for headings. + $lineStart; // True if the open char appeared at the start of the input line. + // Not set for headings. public $partClass = 'PPDPart'; @@ -1029,6 +1029,10 @@ class PPFrame_DOM implements PPFrame { $index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent; $index = $index - $indexOffset; if ( isset( $namedArgs[$index] ) || isset( $numberedArgs[$index] ) ) { + $this->parser->getOutput()->addWarning( wfMessage( 'duplicate-args-warning', + wfEscapeWikiText( $this->title ), + wfEscapeWikiText( $title ), + wfEscapeWikiText( $index ) )->text() ); $this->parser->addTrackingCategory( 'duplicate-args-category' ); } $numberedArgs[$index] = $value->item( 0 ); @@ -1037,6 +1041,10 @@ class PPFrame_DOM implements PPFrame { // Named parameter $name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) ); if ( isset( $namedArgs[$name] ) || isset( $numberedArgs[$name] ) ) { + $this->parser->getOutput()->addWarning( wfMessage( 'duplicate-args-warning', + wfEscapeWikiText( $this->title ), + wfEscapeWikiText( $title ), + wfEscapeWikiText( $name ) )->text() ); $this->parser->addTrackingCategory( 'duplicate-args-category' ); } $namedArgs[$name] = $value->item( 0 ); @@ -1195,9 +1203,11 @@ class PPFrame_DOM implements PPFrame { } elseif ( $contextNode->nodeName == 'comment' ) { # HTML-style comment # Remove it in HTML, pre+remove and STRIP_COMMENTS modes - if ( $this->parser->ot['html'] + # Not in RECOVER_COMMENTS mode (msgnw) though. + if ( ( $this->parser->ot['html'] || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() ) || ( $flags & PPFrame::STRIP_COMMENTS ) + ) && !( $flags & PPFrame::RECOVER_COMMENTS ) ) { $out .= ''; } elseif ( $this->parser->ot['wiki'] && !( $flags & PPFrame::RECOVER_COMMENTS ) ) { @@ -1263,7 +1273,7 @@ class PPFrame_DOM implements PPFrame { $titleText = $this->title->getPrefixedDBkey(); $this->parser->mHeadings[] = array( $titleText, $headingIndex ); $serial = count( $this->parser->mHeadings ) - 1; - $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX; + $marker = Parser::MARKER_PREFIX . "-h-$serial-" . Parser::MARKER_SUFFIX; $count = $contextNode->getAttribute( 'level' ); $s = substr( $s, 0, $count ) . $marker . substr( $s, $count ); $this->parser->mStripState->addGeneral( $marker, '' ); diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php index af91ad47..9429e442 100644 --- a/includes/parser/Preprocessor_Hash.php +++ b/includes/parser/Preprocessor_Hash.php @@ -112,7 +112,6 @@ class Preprocessor_Hash implements Preprocessor { * @return PPNode_Hash_Tree */ public function preprocessToObj( $text, $flags = 0 ) { - // Check cache. global $wgMemc, $wgPreprocessorCacheThreshold; @@ -120,7 +119,6 @@ class Preprocessor_Hash implements Preprocessor { && strlen( $text ) > $wgPreprocessorCacheThreshold; if ( $cacheable ) { - $cacheKey = wfMemcKey( 'preprocess-hash', md5( $text ), $flags ); $cacheValue = $wgMemc->get( $cacheKey ); if ( $cacheValue ) { @@ -736,8 +734,12 @@ class Preprocessor_Hash implements Preprocessor { // Cache if ( $cacheable ) { $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode ); - $wgMemc->set( $cacheKey, $cacheValue, 86400 ); - wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" ); + + // T111289: Cache values should not exceed 1 Mb, but they do. + if ( strlen( $cacheValue ) <= 1e6 ) { + $wgMemc->set( $cacheKey, $cacheValue, 86400 ); + wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" ); + } } return $rootNode; @@ -972,6 +974,10 @@ class PPFrame_Hash implements PPFrame { // Numbered parameter $index = $bits['index'] - $indexOffset; if ( isset( $namedArgs[$index] ) || isset( $numberedArgs[$index] ) ) { + $this->parser->getOutput()->addWarning( wfMessage( 'duplicate-args-warning', + wfEscapeWikiText( $this->title ), + wfEscapeWikiText( $title ), + wfEscapeWikiText( $index ) )->text() ); $this->parser->addTrackingCategory( 'duplicate-args-category' ); } $numberedArgs[$index] = $bits['value']; @@ -980,6 +986,10 @@ class PPFrame_Hash implements PPFrame { // Named parameter $name = trim( $this->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) ); if ( isset( $namedArgs[$name] ) || isset( $numberedArgs[$name] ) ) { + $this->parser->getOutput()->addWarning( wfMessage( 'duplicate-args-warning', + wfEscapeWikiText( $this->title ), + wfEscapeWikiText( $title ), + wfEscapeWikiText( $name ) )->text() ); $this->parser->addTrackingCategory( 'duplicate-args-category' ); } $namedArgs[$name] = $bits['value']; @@ -1118,9 +1128,11 @@ class PPFrame_Hash implements PPFrame { } elseif ( $contextNode->name == 'comment' ) { # HTML-style comment # Remove it in HTML, pre+remove and STRIP_COMMENTS modes - if ( $this->parser->ot['html'] + # Not in RECOVER_COMMENTS mode (msgnw) though. + if ( ( $this->parser->ot['html'] || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() ) || ( $flags & PPFrame::STRIP_COMMENTS ) + ) && !( $flags & PPFrame::RECOVER_COMMENTS ) ) { $out .= ''; } elseif ( $this->parser->ot['wiki'] && !( $flags & PPFrame::RECOVER_COMMENTS ) ) { @@ -1177,7 +1189,7 @@ class PPFrame_Hash implements PPFrame { $titleText = $this->title->getPrefixedDBkey(); $this->parser->mHeadings[] = array( $titleText, $bits['i'] ); $serial = count( $this->parser->mHeadings ) - 1; - $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX; + $marker = Parser::MARKER_PREFIX . "-h-$serial-" . Parser::MARKER_SUFFIX; $s = substr( $s, 0, $bits['level'] ) . $marker . substr( $s, $bits['level'] ); $this->parser->mStripState->addGeneral( $marker, '' ); $out .= $s; diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php index 51ae42dc..b11dc8c3 100644 --- a/includes/parser/StripState.php +++ b/includes/parser/StripState.php @@ -37,15 +37,20 @@ class StripState { const UNSTRIP_RECURSION_LIMIT = 20; /** - * @param string $prefix + * @param string|null $prefix + * @since 1.26 The prefix argument should be omitted, as the strip marker + * prefix string is now a constant. */ - public function __construct( $prefix ) { - $this->prefix = $prefix; + public function __construct( $prefix = null ) { + if ( $prefix !== null ) { + wfDeprecated( __METHOD__ . ' with called with $prefix argument' . + ' (call with no arguments instead)', '1.26' ); + } $this->data = array( 'nowiki' => array(), 'general' => array() ); - $this->regex = "/{$this->prefix}([^\x7f]+)" . Parser::MARKER_SUFFIX . '/'; + $this->regex = '/' . Parser::MARKER_PREFIX . "([^\x7f]+)" . Parser::MARKER_SUFFIX . '/'; $this->circularRefGuard = array(); } @@ -144,7 +149,11 @@ class StripState { } $this->circularRefGuard[$marker] = true; $this->recursionLevel++; - $ret = $this->unstripType( $this->tempType, $this->data[$this->tempType][$marker] ); + $value = $this->data[$this->tempType][$marker]; + if ( $value instanceof Closure ) { + $value = $value(); + } + $ret = $this->unstripType( $this->tempType, $value ); $this->recursionLevel--; unset( $this->circularRefGuard[$marker] ); return $ret; @@ -162,10 +171,10 @@ class StripState { * @return StripState */ public function getSubState( $text ) { - $subState = new StripState( $this->prefix ); + $subState = new StripState(); $pos = 0; while ( true ) { - $startPos = strpos( $text, $this->prefix, $pos ); + $startPos = strpos( $text, Parser::MARKER_PREFIX, $pos ); $endPos = strpos( $text, Parser::MARKER_SUFFIX, $pos ); if ( $startPos === false || $endPos === false ) { break; @@ -198,7 +207,7 @@ class StripState { * @return array */ public function merge( $otherState, $texts ) { - $mergePrefix = Parser::getRandomString(); + $mergePrefix = wfRandomString( 16 ); foreach ( $otherState->data as $type => $items ) { foreach ( $items as $key => $value ) { @@ -218,7 +227,7 @@ class StripState { */ protected function mergeCallback( $m ) { $key = $m[1]; - return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX; + return Parser::MARKER_PREFIX . $this->tempMergePrefix . '-' . $key . Parser::MARKER_SUFFIX; } /** |