diff options
Diffstat (limited to 'includes/parser/Parser.php')
-rw-r--r-- | includes/parser/Parser.php | 1544 |
1 files changed, 924 insertions, 620 deletions
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 10765de2..1f14223d 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -28,7 +28,7 @@ /** * PHP Parser - Processes wiki markup (which uses a more user-friendly * syntax, such as "[[link]]" for making links), and provides a one-way - * transformation of that wiki markup it into XHTML output / markup + * transformation of that wiki markup it into (X)HTML output / markup * (which in turn the browser understands, and can display). * * There are seven main entry points into the Parser class: @@ -54,7 +54,6 @@ * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away! * * @par Settings: - * $wgLocaltimezone * $wgNamespacesWithSubpages * * @par Settings only within ParserOptions: @@ -62,7 +61,6 @@ * $wgAllowSpecialInclusion * $wgInterwikiMagic * $wgMaxArticleSize - * $wgUseDynamicDates * * @ingroup Parser */ @@ -117,14 +115,18 @@ class Parser { # Marker Suffix needs to be accessible staticly. const MARKER_SUFFIX = "-QINU\x7f"; + # Markers used for wrapping the table of contents + const TOC_START = '<mw:toc>'; + const TOC_END = '</mw:toc>'; + # Persistent: var $mTagHooks = array(); var $mTransparentTagHooks = array(); var $mFunctionHooks = array(); var $mFunctionSynonyms = array( 0 => array(), 1 => array() ); var $mFunctionTagHooks = array(); - var $mStripList = array(); - var $mDefaultStripList = array(); + var $mStripList = array(); + var $mDefaultStripList = array(); var $mVarCache = array(); var $mImageParams = array(); var $mImageParamsMagicArray = array(); @@ -193,7 +195,9 @@ class Parser { var $mRevisionId; # ID to display in {{REVISIONID}} tags var $mRevisionTimestamp; # The timestamp of the specified revision ID var $mRevisionUser; # User to display in {{REVISIONUSER}} tag + var $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable var $mRevIdForTs; # The revision ID which was used to fetch the timestamp + var $mInputSize = false; # For {{PAGESIZE}} on current page. /** * @var string @@ -201,6 +205,13 @@ class Parser { var $mUniqPrefix; /** + * @var Array with the language name of each language link (i.e. the + * interwiki prefix) in the key, value arbitrary. Used to avoid sending + * duplicate language links to the ParserOutput. + */ + var $mLangLinkLanguages; + + /** * Constructor * * @param $conf array @@ -208,12 +219,12 @@ class Parser { public function __construct( $conf = array() ) { $this->mConf = $conf; $this->mUrlProtocols = wfUrlProtocols(); - $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')'. - self::EXT_LINK_URL_CLASS.'+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; + $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' . + self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; - } elseif ( defined( 'MW_COMPILED' ) ) { - # Preprocessor_Hash is much faster than Preprocessor_DOM in compiled mode + } elseif ( defined( 'HPHP_VERSION' ) ) { + # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop $this->mPreprocessorClass = 'Preprocessor_Hash'; } elseif ( extension_loaded( 'domxml' ) ) { # PECL extension that conflicts with the core DOM extension (bug 13770) @@ -240,6 +251,13 @@ class Parser { } /** + * Allow extensions to clean up when the parser is cloned + */ + function __clone() { + wfRunHooks( 'ParserCloned', array( $this ) ); + } + + /** * Do various kinds of initialisation on the first call of the parser */ function firstCallInit() { @@ -279,9 +297,10 @@ class Parser { $this->mLinkHolders = new LinkHolderArray( $this ); $this->mLinkID = 0; $this->mRevisionObject = $this->mRevisionTimestamp = - $this->mRevisionId = $this->mRevisionUser = null; + $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null; $this->mVarCache = array(); $this->mUser = null; + $this->mLangLinkLanguages = array(); /** * Prefix for temporary replacement strings for the multipass parser. @@ -291,12 +310,11 @@ class Parser { * string constructs. * * Must not consist of all title characters, or else it will change - * the behaviour of <nowiki> in a link. + * the behavior of <nowiki> in a link. */ $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); $this->mStripState = new StripState( $this->mUniqPrefix ); - # Clear these on every parse, bug 4549 $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array(); @@ -327,12 +345,12 @@ class Parser { * Convert wikitext to HTML * Do not call this function recursively. * - * @param $text String: text we want to parse + * @param string $text text we want to parse * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean - * @param $revid Int: number to pass in {{REVISIONID}} + * @param int $revid number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) { @@ -341,13 +359,18 @@ class Parser { * to internalParse() which does all the real work. */ - global $wgUseTidy, $wgAlwaysUseTidy; - $fname = __METHOD__.'-' . wfGetCaller(); + global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames; + $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn( __METHOD__ ); wfProfileIn( $fname ); $this->startParse( $title, $options, self::OT_HTML, $clearState ); + $this->mInputSize = strlen( $text ); + if ( $this->mOptions->getEnableLimitReport() ) { + $this->mOutput->resetParseStartTime(); + } + # Remove the strip marker tag prefix from the input, if present. if ( $clearState ) { $text = str_replace( $this->mUniqPrefix, '', $text ); @@ -357,11 +380,13 @@ class Parser { $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; $oldRevisionUser = $this->mRevisionUser; + $oldRevisionSize = $this->mRevisionSize; if ( $revid !== null ) { $this->mRevisionId = $revid; $this->mRevisionObject = null; $this->mRevisionTimestamp = null; $this->mRevisionUser = null; + $this->mRevisionSize = null; } wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); @@ -397,9 +422,7 @@ class Parser { if ( !( $options->getDisableContentConversion() || isset( $this->mDoubleUnderscores['nocontentconvert'] ) ) ) { - # Run convert unconditionally in 1.18-compatible mode - global $wgBug34832TransitionalRollback; - if ( $wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage() ) { + if ( !$this->mOptions->getInterfaceMessage() ) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. @@ -479,23 +502,73 @@ class Parser { # Information on include size limits, for the benefit of users who try to skirt them if ( $this->mOptions->getEnableLimitReport() ) { $max = $this->mOptions->getMaxIncludeSize(); - $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n"; - $limitReport = - "NewPP limit report\n" . - "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" . - "Preprocessor generated node count: " . - "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . - "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" . - "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n". - "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n". - $PFreport; + + $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' ); + if ( $cpuTime !== null ) { + $this->mOutput->setLimitReportData( 'limitreport-cputime', + sprintf( "%.3f", $cpuTime ) + ); + } + + $wallTime = $this->mOutput->getTimeSinceStart( 'wall' ); + $this->mOutput->setLimitReportData( 'limitreport-walltime', + sprintf( "%.3f", $wallTime ) + ); + + $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes', + array( $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ) + ); + $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes', + array( $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ) + ); + $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize', + array( $this->mIncludeSizes['post-expand'], $max ) + ); + $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize', + array( $this->mIncludeSizes['arg'], $max ) + ); + $this->mOutput->setLimitReportData( 'limitreport-expansiondepth', + array( $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ) + ); + $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount', + array( $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ) + ); + wfRunHooks( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) ); + + $limitReport = "NewPP limit report\n"; + if ( $wgShowHostnames ) { + $limitReport .= 'Parsed by ' . wfHostname() . "\n"; + } + foreach ( $this->mOutput->getLimitReportData() as $key => $value ) { + if ( wfRunHooks( 'ParserLimitReportFormat', + array( $key, $value, &$limitReport, false, false ) + ) ) { + $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false ); + $valueMsg = wfMessage( array( "$key-value-text", "$key-value" ) ) + ->inLanguage( 'en' )->useDatabase( false ); + if ( !$valueMsg->exists() ) { + $valueMsg = new RawMessage( '$1' ); + } + if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) { + $valueMsg->params( $value ); + $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n"; + } + } + } + // Since we're not really outputting HTML, decode the entities and + // then re-encode the things that need hiding inside HTML comments. + $limitReport = htmlspecialchars_decode( $limitReport ); wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) ); // Sanitize for comment. Note '‐' in the replacement is U+2010, // which looks much like the problematic '-'. $limitReport = str_replace( array( '-', '&' ), array( '‐', '&' ), $limitReport ); - $text .= "\n<!-- \n$limitReport-->\n"; + + if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) { + wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . + $this->mTitle->getPrefixedDBkey() ); + } } $this->mOutput->setText( $text ); @@ -503,6 +576,8 @@ class Parser { $this->mRevisionObject = $oldRevisionObject; $this->mRevisionTimestamp = $oldRevisionTimestamp; $this->mRevisionUser = $oldRevisionUser; + $this->mRevisionSize = $oldRevisionSize; + $this->mInputSize = false; wfProfileOut( $fname ); wfProfileOut( __METHOD__ ); @@ -515,12 +590,12 @@ class Parser { * * If $frame is not provided, then template variables (e.g., {{{1}}}) within $text are not expanded * - * @param $text String: text extension wants to have parsed + * @param string $text text extension wants to have parsed * @param $frame PPFrame: The frame to use for expanding any template variables * * @return string */ - function recursiveTagParse( $text, $frame=false ) { + function recursiveTagParse( $text, $frame = false ) { wfProfileIn( __METHOD__ ); wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); @@ -534,7 +609,7 @@ class Parser { * Also removes comments. * @return mixed|string */ - function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) { + function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null ) { wfProfileIn( __METHOD__ ); $this->startParse( $title, $options, self::OT_PREPROCESS, true ); if ( $revid !== null ) { @@ -552,7 +627,7 @@ class Parser { * Recursive parser entry point that can be called from an extension tag * hook. * - * @param $text String: text to be expanded + * @param string $text text to be expanded * @param $frame PPFrame: The frame to use for expanding any template variables * @return String * @since 1.19 @@ -593,7 +668,7 @@ class Parser { * * @return string */ - static public function getRandomString() { + public static function getRandomString() { return wfRandomString( 16 ); } @@ -682,7 +757,7 @@ class Parser { /** * Accessor/mutator for the output type * - * @param $x int|null New value or null to just get the current one + * @param int|null $x New value or null to just get the current one * @return Integer */ function OutputType( $x = null ) { @@ -745,6 +820,7 @@ class Parser { * * @since 1.19 * + * @throws MWException * @return Language|null */ public function getTargetLanguage() { @@ -752,9 +828,9 @@ class Parser { if ( $target !== null ) { return $target; - } elseif( $this->mOptions->getInterfaceMessage() ) { + } elseif ( $this->mOptions->getInterfaceMessage() ) { return $this->mOptions->getUserLangObj(); - } elseif( is_null( $this->mTitle ) ) { + } elseif ( is_null( $this->mTitle ) ) { throw new MWException( __METHOD__ . ': $this->mTitle is null' ); } @@ -765,12 +841,7 @@ class Parser { * Get the language object for language conversion */ function getConverterLanguage() { - global $wgBug34832TransitionalRollback, $wgContLang; - if ( $wgBug34832TransitionalRollback ) { - return $wgContLang; - } else { - return $this->getTargetLanguage(); - } + return $this->getTargetLanguage(); } /** @@ -813,9 +884,9 @@ class Parser { * '<element param="x">tag content</element>' ) ) * @endcode * - * @param $elements array list of element names. Comments are always extracted. - * @param $text string Source text string. - * @param $matches array Out parameter, Array: extracted tags + * @param array $elements list of element names. Comments are always extracted. + * @param string $text Source text string. + * @param array $matches Out parameter, Array: extracted tags * @param $uniq_prefix string * @return String: stripped text */ @@ -835,16 +906,16 @@ class Parser { } if ( count( $p ) > 5 ) { # comment - $element = $p[4]; + $element = $p[4]; $attributes = ''; - $close = ''; - $inside = $p[5]; + $close = ''; + $inside = $p[5]; } else { # tag - $element = $p[1]; + $element = $p[1]; $attributes = $p[2]; - $close = $p[3]; - $inside = $p[4]; + $close = $p[3]; + $inside = $p[4]; } $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX; @@ -928,33 +999,33 @@ class Parser { $line = trim( $outLine ); if ( $line === '' ) { # empty line, go to next line - $out .= $outLine."\n"; + $out .= $outLine . "\n"; continue; } $first_character = $line[0]; $matches = array(); - if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) { + if ( preg_match( '/^(:*)\{\|(.*)$/', $line, $matches ) ) { # First check if we are starting a new table $indent_level = strlen( $matches[1] ); $attributes = $this->mStripState->unstripBoth( $matches[2] ); - $attributes = Sanitizer::fixTagAttributes( $attributes , 'table' ); - - $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>"; - array_push( $td_history , false ); - array_push( $last_tag_history , '' ); - array_push( $tr_history , false ); - array_push( $tr_attributes , '' ); - array_push( $has_opened_tr , false ); + $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' ); + + $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>"; + array_push( $td_history, false ); + array_push( $last_tag_history, '' ); + array_push( $tr_history, false ); + array_push( $tr_attributes, '' ); + array_push( $has_opened_tr, false ); } elseif ( count( $td_history ) == 0 ) { # Don't do any of the following - $out .= $outLine."\n"; + $out .= $outLine . "\n"; continue; - } elseif ( substr( $line , 0 , 2 ) === '|}' ) { + } elseif ( substr( $line, 0, 2 ) === '|}' ) { # We are ending a table - $line = '</table>' . substr( $line , 2 ); + $line = '</table>' . substr( $line, 2 ); $last_tag = array_pop( $last_tag_history ); if ( !array_pop( $has_opened_tr ) ) { @@ -969,8 +1040,8 @@ class Parser { $line = "</{$last_tag}>{$line}"; } array_pop( $tr_attributes ); - $outLine = $line . str_repeat( '</dd></dl>' , $indent_level ); - } elseif ( substr( $line , 0 , 2 ) === '|-' ) { + $outLine = $line . str_repeat( '</dd></dl>', $indent_level ); + } elseif ( substr( $line, 0, 2 ) === '|-' ) { # Now we have a table row $line = preg_replace( '#^\|-+#', '', $line ); @@ -983,7 +1054,7 @@ class Parser { $line = ''; $last_tag = array_pop( $last_tag_history ); array_pop( $has_opened_tr ); - array_push( $has_opened_tr , true ); + array_push( $has_opened_tr, true ); if ( array_pop( $tr_history ) ) { $line = '</tr>'; @@ -994,27 +1065,27 @@ class Parser { } $outLine = $line; - array_push( $tr_history , false ); - array_push( $td_history , false ); - array_push( $last_tag_history , '' ); - } elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 ) === '|+' ) { + array_push( $tr_history, false ); + array_push( $td_history, false ); + array_push( $last_tag_history, '' ); + } elseif ( $first_character === '|' || $first_character === '!' || substr( $line, 0, 2 ) === '|+' ) { # This might be cell elements, td, th or captions - if ( substr( $line , 0 , 2 ) === '|+' ) { + if ( substr( $line, 0, 2 ) === '|+' ) { $first_character = '+'; - $line = substr( $line , 1 ); + $line = substr( $line, 1 ); } - $line = substr( $line , 1 ); + $line = substr( $line, 1 ); if ( $first_character === '!' ) { - $line = str_replace( '!!' , '||' , $line ); + $line = str_replace( '!!', '||', $line ); } # Split up multiple cells on the same line. # FIXME : This can result in improper nesting of tags processed # by earlier parser steps, but should avoid splitting up eg # attribute values containing literal "||". - $cells = StringUtils::explodeMarkup( '||' , $line ); + $cells = StringUtils::explodeMarkup( '||', $line ); $outLine = ''; @@ -1026,10 +1097,10 @@ class Parser { if ( !array_pop( $tr_history ) ) { $previous = "<tr{$tr_after}>\n"; } - array_push( $tr_history , true ); - array_push( $tr_attributes , '' ); + array_push( $tr_history, true ); + array_push( $tr_attributes, '' ); array_pop( $has_opened_tr ); - array_push( $has_opened_tr , true ); + array_push( $has_opened_tr, true ); } $last_tag = array_pop( $last_tag_history ); @@ -1048,10 +1119,10 @@ class Parser { $last_tag = ''; } - array_push( $last_tag_history , $last_tag ); + array_push( $last_tag_history, $last_tag ); # A cell could contain both parameters and data - $cell_data = explode( '|' , $cell , 2 ); + $cell_data = explode( '|', $cell, 2 ); # Bug 553: Note that a '|' inside an invalid link should not # be mistaken as delimiting cell parameters @@ -1061,12 +1132,12 @@ class Parser { $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; } else { $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); - $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag ); + $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag ); $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; } $outLine .= $cell; - array_push( $td_history , true ); + array_push( $td_history, true ); } } $out .= $outLine . "\n"; @@ -1081,7 +1152,7 @@ class Parser { $out .= "</tr>\n"; } if ( !array_pop( $has_opened_tr ) ) { - $out .= "<tr><td></td></tr>\n" ; + $out .= "<tr><td></td></tr>\n"; } $out .= "</table>\n"; @@ -1122,7 +1193,7 @@ class Parser { # Hook to suspend the parser in this state if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { wfProfileOut( __METHOD__ ); - return $text ; + return $text; } # if $frame is provided, then use $frame for replacing any variables @@ -1156,17 +1227,13 @@ class Parser { $text = $this->doDoubleUnderscore( $text ); $text = $this->doHeadings( $text ); - if ( $this->mOptions->getUseDynamicDates() ) { - $df = DateFormatter::getInstance(); - $text = $df->reformat( $this->mOptions->getDateFormat(), $text ); - } $text = $this->replaceInternalLinks( $text ); $text = $this->doAllQuotes( $text ); $text = $this->replaceExternalLinks( $text ); # replaceInternalLinks may sometimes leave behind # absolute URLs, which have to be masked to hide them from replaceExternalLinks - $text = str_replace( $this->mUniqPrefix.'NOPARSE', '', $text ); + $text = str_replace( $this->mUniqPrefix . 'NOPARSE', '', $text ); $text = $this->doMagicLinks( $text ); $text = $this->formatHeadings( $text, $origText, $isMain ); @@ -1234,7 +1301,7 @@ class Parser { $CssClass = 'mw-magiclink-pmid'; $id = $m[4]; } else { - throw new MWException( __METHOD__.': unrecognised match type "' . + throw new MWException( __METHOD__ . ': unrecognised match type "' . substr( $m[0], 0, 20 ) . '"' ); } $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text(); @@ -1248,8 +1315,8 @@ class Parser { 'x' => 'X', )); $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); - return'<a href="' . - htmlspecialchars( $titleObj->getLocalUrl() ) . + return '<a href="' . + htmlspecialchars( $titleObj->getLocalURL() ) . "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>"; } else { return $m[0]; @@ -1298,7 +1365,8 @@ class Parser { if ( $text === false ) { # Not an image, make a link $text = Linker::makeExternalLink( $url, - $this->getConverterLanguage()->markNoConversion($url), true, 'free', + $this->getConverterLanguage()->markNoConversion( $url, true ), + true, 'free', $this->getExternalLinkAttribs( $url ) ); # Register it in the output object... # Replace unnecessary URL escape codes with their equivalent characters @@ -1309,7 +1377,6 @@ class Parser { return $text . $trail; } - /** * Parse headers and return html * @@ -1323,8 +1390,7 @@ class Parser { wfProfileIn( __METHOD__ ); for ( $i = 6; $i >= 1; --$i ) { $h = str_repeat( '=', $i ); - $text = preg_replace( "/^$h(.+)$h\\s*$/m", - "<h$i>\\1</h$i>", $text ); + $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text ); } wfProfileOut( __METHOD__ ); return $text; @@ -1345,7 +1411,7 @@ class Parser { foreach ( $lines as $line ) { $outtext .= $this->doQuotes( $line ) . "\n"; } - $outtext = substr( $outtext, 0,-1 ); + $outtext = substr( $outtext, 0, -1 ); wfProfileOut( __METHOD__ ); return $outtext; } @@ -1359,170 +1425,192 @@ class Parser { */ public function doQuotes( $text ) { $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); - if ( count( $arr ) == 1 ) { + $countarr = count( $arr ); + if ( $countarr == 1 ) { return $text; - } else { - # First, do some preliminary work. This may shift some apostrophes from - # being mark-up to being text. It also counts the number of occurrences - # of bold and italics mark-ups. - $numbold = 0; - $numitalics = 0; - for ( $i = 0; $i < count( $arr ); $i++ ) { - if ( ( $i % 2 ) == 1 ) { - # If there are ever four apostrophes, assume the first is supposed to - # be text, and the remaining three constitute mark-up for bold text. - if ( strlen( $arr[$i] ) == 4 ) { - $arr[$i-1] .= "'"; - $arr[$i] = "'''"; - } elseif ( strlen( $arr[$i] ) > 5 ) { - # If there are more than 5 apostrophes in a row, assume they're all - # text except for the last 5. - $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 ); - $arr[$i] = "'''''"; - } - # Count the number of occurrences of bold and italics mark-ups. - # We are not counting sequences of five apostrophes. - if ( strlen( $arr[$i] ) == 2 ) { - $numitalics++; - } elseif ( strlen( $arr[$i] ) == 3 ) { - $numbold++; - } elseif ( strlen( $arr[$i] ) == 5 ) { - $numitalics++; - $numbold++; - } - } + } + + // First, do some preliminary work. This may shift some apostrophes from + // being mark-up to being text. It also counts the number of occurrences + // of bold and italics mark-ups. + $numbold = 0; + $numitalics = 0; + for ( $i = 1; $i < $countarr; $i += 2 ) { + $thislen = strlen( $arr[$i] ); + // If there are ever four apostrophes, assume the first is supposed to + // be text, and the remaining three constitute mark-up for bold text. + // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''') + if ( $thislen == 4 ) { + $arr[$i - 1] .= "'"; + $arr[$i] = "'''"; + $thislen = 3; + } elseif ( $thislen > 5 ) { + // If there are more than 5 apostrophes in a row, assume they're all + // text except for the last 5. + // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''') + $arr[$i - 1] .= str_repeat( "'", $thislen - 5 ); + $arr[$i] = "'''''"; + $thislen = 5; + } + // Count the number of occurrences of bold and italics mark-ups. + if ( $thislen == 2 ) { + $numitalics++; + } elseif ( $thislen == 3 ) { + $numbold++; + } elseif ( $thislen == 5 ) { + $numitalics++; + $numbold++; } + } - # If there is an odd number of both bold and italics, it is likely - # that one of the bold ones was meant to be an apostrophe followed - # by italics. Which one we cannot know for certain, but it is more - # likely to be one that has a single-letter word before it. - if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { - $i = 0; - $firstsingleletterword = -1; - $firstmultiletterword = -1; - $firstspace = -1; - foreach ( $arr as $r ) { - if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) { - $x1 = substr( $arr[$i-1], -1 ); - $x2 = substr( $arr[$i-1], -2, 1 ); - if ( $x1 === ' ' ) { - if ( $firstspace == -1 ) { - $firstspace = $i; - } - } elseif ( $x2 === ' ') { - if ( $firstsingleletterword == -1 ) { - $firstsingleletterword = $i; - } - } else { - if ( $firstmultiletterword == -1 ) { - $firstmultiletterword = $i; - } + // If there is an odd number of both bold and italics, it is likely + // that one of the bold ones was meant to be an apostrophe followed + // by italics. Which one we cannot know for certain, but it is more + // likely to be one that has a single-letter word before it. + if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { + $firstsingleletterword = -1; + $firstmultiletterword = -1; + $firstspace = -1; + for ( $i = 1; $i < $countarr; $i += 2 ) { + if ( strlen( $arr[$i] ) == 3 ) { + $x1 = substr( $arr[$i - 1], -1 ); + $x2 = substr( $arr[$i - 1], -2, 1 ); + if ( $x1 === ' ' ) { + if ( $firstspace == -1 ) { + $firstspace = $i; + } + } elseif ( $x2 === ' ' ) { + if ( $firstsingleletterword == -1 ) { + $firstsingleletterword = $i; + // if $firstsingleletterword is set, we don't + // look at the other options, so we can bail early. + break; + } + } else { + if ( $firstmultiletterword == -1 ) { + $firstmultiletterword = $i; } } - $i++; } + } - # If there is a single-letter word, use it! - if ( $firstsingleletterword > -1 ) { - $arr[$firstsingleletterword] = "''"; - $arr[$firstsingleletterword-1] .= "'"; - } elseif ( $firstmultiletterword > -1 ) { - # If not, but there's a multi-letter word, use that one. - $arr[$firstmultiletterword] = "''"; - $arr[$firstmultiletterword-1] .= "'"; - } elseif ( $firstspace > -1 ) { - # ... otherwise use the first one that has neither. - # (notice that it is possible for all three to be -1 if, for example, - # there is only one pentuple-apostrophe in the line) - $arr[$firstspace] = "''"; - $arr[$firstspace-1] .= "'"; - } + // If there is a single-letter word, use it! + if ( $firstsingleletterword > -1 ) { + $arr[$firstsingleletterword] = "''"; + $arr[$firstsingleletterword - 1] .= "'"; + } elseif ( $firstmultiletterword > -1 ) { + // If not, but there's a multi-letter word, use that one. + $arr[$firstmultiletterword] = "''"; + $arr[$firstmultiletterword - 1] .= "'"; + } elseif ( $firstspace > -1 ) { + // ... otherwise use the first one that has neither. + // (notice that it is possible for all three to be -1 if, for example, + // there is only one pentuple-apostrophe in the line) + $arr[$firstspace] = "''"; + $arr[$firstspace - 1] .= "'"; } + } - # Now let's actually convert our apostrophic mush to HTML! - $output = ''; - $buffer = ''; - $state = ''; - $i = 0; - foreach ( $arr as $r ) { - if ( ( $i % 2 ) == 0 ) { - if ( $state === 'both' ) { - $buffer .= $r; - } else { - $output .= $r; - } + // Now let's actually convert our apostrophic mush to HTML! + $output = ''; + $buffer = ''; + $state = ''; + $i = 0; + foreach ( $arr as $r ) { + if ( ( $i % 2 ) == 0 ) { + if ( $state === 'both' ) { + $buffer .= $r; } else { - if ( strlen( $r ) == 2 ) { - if ( $state === 'i' ) { - $output .= '</i>'; $state = ''; - } elseif ( $state === 'bi' ) { - $output .= '</i>'; $state = 'b'; - } elseif ( $state === 'ib' ) { - $output .= '</b></i><b>'; $state = 'b'; - } elseif ( $state === 'both' ) { - $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; - } else { # $state can be 'b' or '' - $output .= '<i>'; $state .= 'i'; - } - } elseif ( strlen( $r ) == 3 ) { - if ( $state === 'b' ) { - $output .= '</b>'; $state = ''; - } elseif ( $state === 'bi' ) { - $output .= '</i></b><i>'; $state = 'i'; - } elseif ( $state === 'ib' ) { - $output .= '</b>'; $state = 'i'; - } elseif ( $state === 'both' ) { - $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; - } else { # $state can be 'i' or '' - $output .= '<b>'; $state .= 'b'; - } - } elseif ( strlen( $r ) == 5 ) { - if ( $state === 'b' ) { - $output .= '</b><i>'; $state = 'i'; - } elseif ( $state === 'i' ) { - $output .= '</i><b>'; $state = 'b'; - } elseif ( $state === 'bi' ) { - $output .= '</i></b>'; $state = ''; - } elseif ( $state === 'ib' ) { - $output .= '</b></i>'; $state = ''; - } elseif ( $state === 'both' ) { - $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; - } else { # ($state == '') - $buffer = ''; $state = 'both'; - } + $output .= $r; + } + } else { + $thislen = strlen( $r ); + if ( $thislen == 2 ) { + if ( $state === 'i' ) { + $output .= '</i>'; + $state = ''; + } elseif ( $state === 'bi' ) { + $output .= '</i>'; + $state = 'b'; + } elseif ( $state === 'ib' ) { + $output .= '</b></i><b>'; + $state = 'b'; + } elseif ( $state === 'both' ) { + $output .= '<b><i>' . $buffer . '</i>'; + $state = 'b'; + } else { // $state can be 'b' or '' + $output .= '<i>'; + $state .= 'i'; + } + } elseif ( $thislen == 3 ) { + if ( $state === 'b' ) { + $output .= '</b>'; + $state = ''; + } elseif ( $state === 'bi' ) { + $output .= '</i></b><i>'; + $state = 'i'; + } elseif ( $state === 'ib' ) { + $output .= '</b>'; + $state = 'i'; + } elseif ( $state === 'both' ) { + $output .= '<i><b>' . $buffer . '</b>'; + $state = 'i'; + } else { // $state can be 'i' or '' + $output .= '<b>'; + $state .= 'b'; + } + } elseif ( $thislen == 5 ) { + if ( $state === 'b' ) { + $output .= '</b><i>'; + $state = 'i'; + } elseif ( $state === 'i' ) { + $output .= '</i><b>'; + $state = 'b'; + } elseif ( $state === 'bi' ) { + $output .= '</i></b>'; + $state = ''; + } elseif ( $state === 'ib' ) { + $output .= '</b></i>'; + $state = ''; + } elseif ( $state === 'both' ) { + $output .= '<i><b>' . $buffer . '</b></i>'; + $state = ''; + } else { // ($state == '') + $buffer = ''; + $state = 'both'; } } - $i++; - } - # Now close all remaining tags. Notice that the order is important. - if ( $state === 'b' || $state === 'ib' ) { - $output .= '</b>'; } - if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) { - $output .= '</i>'; - } - if ( $state === 'bi' ) { - $output .= '</b>'; - } - # There might be lonely ''''', so make sure we have a buffer - if ( $state === 'both' && $buffer ) { - $output .= '<b><i>'.$buffer.'</i></b>'; - } - return $output; + $i++; + } + // Now close all remaining tags. Notice that the order is important. + if ( $state === 'b' || $state === 'ib' ) { + $output .= '</b>'; + } + if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) { + $output .= '</i>'; + } + if ( $state === 'bi' ) { + $output .= '</b>'; + } + // There might be lonely ''''', so make sure we have a buffer + if ( $state === 'both' && $buffer ) { + $output .= '<b><i>' . $buffer . '</i></b>'; } + return $output; } /** * Replace external links (REL) * * Note: this is all very hackish and the order of execution matters a lot. - * Make sure to run maintenance/parserTests.php if you change this code. + * Make sure to run tests/parserTests.php if you change this code. * * @private * * @param $text string * + * @throws MWException * @return string */ function replaceExternalLinks( $text ) { @@ -1530,14 +1618,15 @@ class Parser { $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); if ( $bits === false ) { + wfProfileOut( __METHOD__ ); throw new MWException( "PCRE needs to be compiled with --enable-unicode-properties in order for MediaWiki to function" ); } $s = array_shift( $bits ); $i = 0; - while ( $i<count( $bits ) ) { + while ( $i < count( $bits ) ) { $url = $bits[$i++]; - $protocol = $bits[$i++]; + $i++; // protocol $text = $bits[$i++]; $trail = $bits[$i++]; @@ -1595,26 +1684,39 @@ class Parser { wfProfileOut( __METHOD__ ); return $s; } - + /** + * Get the rel attribute for a particular external link. + * + * @since 1.21 + * @param string|bool $url optional URL, to extract the domain from for rel => + * nofollow if appropriate + * @param $title Title optional Title, for wgNoFollowNsExceptions lookups + * @return string|null rel attribute for $url + */ + public static function getExternalLinkRel( $url = false, $title = null ) { + global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions; + $ns = $title ? $title->getNamespace() : false; + if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) && + !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) ) + { + return 'nofollow'; + } + return null; + } /** * Get an associative array of additional HTML attributes appropriate for a * particular external link. This currently may include rel => nofollow * (depending on configuration, namespace, and the URL's domain) and/or a * target attribute (depending on configuration). * - * @param $url String|bool optional URL, to extract the domain from for rel => + * @param string|bool $url optional URL, to extract the domain from for rel => * nofollow if appropriate * @return Array associative array of HTML attributes */ function getExternalLinkAttribs( $url = false ) { $attribs = array(); - global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions; - $ns = $this->mTitle->getNamespace(); - if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) && - !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) ) - { - $attribs['rel'] = 'nofollow'; - } + $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle ); + if ( $this->mOptions->getExternalLinkTarget() ) { $attribs['target'] = $this->mOptions->getExternalLinkTarget(); } @@ -1726,6 +1828,8 @@ class Parser { /** * Process [[ ]] wikilinks (RIL) + * @param $s + * @throws MWException * @return LinkHolderArray * * @private @@ -1733,8 +1837,8 @@ class Parser { function replaceInternalLinks2( &$s ) { wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__.'-setup' ); - static $tc = FALSE, $e1, $e1_img; + wfProfileIn( __METHOD__ . '-setup' ); + static $tc = false, $e1, $e1_img; # the % is needed to support urlencoded titles as well if ( !$tc ) { $tc = Title::legalChars() . '#%'; @@ -1763,9 +1867,9 @@ class Parser { } if ( is_null( $this->mTitle ) ) { - wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( __METHOD__ . '-setup' ); wfProfileOut( __METHOD__ ); - throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); + throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" ); } $nottalk = !$this->mTitle->isTalkPage(); @@ -1780,17 +1884,11 @@ class Parser { $prefix = ''; } - if ( $this->getConverterLanguage()->hasVariants() ) { - $selflink = $this->getConverterLanguage()->autoConvertToAllVariants( - $this->mTitle->getPrefixedText() ); - } else { - $selflink = array( $this->mTitle->getPrefixedText() ); - } $useSubpages = $this->areSubpagesAllowed(); - wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( __METHOD__ . '-setup' ); # Loop for each link - for ( ; $line !== false && $line !== null ; $a->next(), $line = $a->current() ) { + for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) { # Check for excessive memory usage if ( $holders->isBig() ) { # Too big @@ -1800,24 +1898,24 @@ class Parser { } if ( $useLinkPrefixExtension ) { - wfProfileIn( __METHOD__.'-prefixhandling' ); + wfProfileIn( __METHOD__ . '-prefixhandling' ); if ( preg_match( $e2, $s, $m ) ) { $prefix = $m[2]; $s = $m[1]; } else { - $prefix=''; + $prefix = ''; } # first link if ( $first_prefix ) { $prefix = $first_prefix; $first_prefix = false; } - wfProfileOut( __METHOD__.'-prefixhandling' ); + wfProfileOut( __METHOD__ . '-prefixhandling' ); } $might_be_img = false; - wfProfileIn( __METHOD__."-e1" ); + wfProfileIn( __METHOD__ . "-e1" ); if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; # If we get a ] at the beginning of $m[3] that means we have a link that's something like: @@ -1839,7 +1937,7 @@ class Parser { # fix up urlencoded title texts if ( strpos( $m[1], '%' ) !== false ) { # Should anchors '#' also be rejected? - $m[1] = str_replace( array('<', '>'), array('<', '>'), rawurldecode( $m[1] ) ); + $m[1] = str_replace( array( '<', '>' ), array( '<', '>' ), rawurldecode( $m[1] ) ); } $trail = $m[3]; } elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption @@ -1850,19 +1948,19 @@ class Parser { } $trail = ""; } else { # Invalid form; output directly - $s .= $prefix . '[[' . $line ; - wfProfileOut( __METHOD__."-e1" ); + $s .= $prefix . '[[' . $line; + wfProfileOut( __METHOD__ . "-e1" ); continue; } - wfProfileOut( __METHOD__."-e1" ); - wfProfileIn( __METHOD__."-misc" ); + wfProfileOut( __METHOD__ . "-e1" ); + wfProfileIn( __METHOD__ . "-misc" ); # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $m[1] ) ) { - $s .= $prefix . '[[' . $line ; - wfProfileOut( __METHOD__."-misc" ); + $s .= $prefix . '[[' . $line; + wfProfileOut( __METHOD__ . "-misc" ); continue; } @@ -1879,21 +1977,21 @@ class Parser { $link = substr( $link, 1 ); } - wfProfileOut( __METHOD__."-misc" ); - wfProfileIn( __METHOD__."-title" ); + wfProfileOut( __METHOD__ . "-misc" ); + wfProfileIn( __METHOD__ . "-title" ); $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) ); if ( $nt === null ) { $s .= $prefix . '[[' . $line; - wfProfileOut( __METHOD__."-title" ); + wfProfileOut( __METHOD__ . "-title" ); continue; } $ns = $nt->getNamespace(); $iw = $nt->getInterWiki(); - wfProfileOut( __METHOD__."-title" ); + wfProfileOut( __METHOD__ . "-title" ); if ( $might_be_img ) { # if this is actually an invalid link - wfProfileIn( __METHOD__."-might_be_img" ); + wfProfileIn( __METHOD__ . "-might_be_img" ); if ( $ns == NS_FILE && $noforce ) { # but might be an image $found = false; while ( true ) { @@ -1925,19 +2023,19 @@ class Parser { $holders->merge( $this->replaceInternalLinks2( $text ) ); $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( __METHOD__ . "-might_be_img" ); continue; } } else { # it's not an image, so output it raw $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( __METHOD__ . "-might_be_img" ); continue; } - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( __METHOD__ . "-might_be_img" ); } - $wasblank = ( $text == '' ); + $wasblank = ( $text == '' ); if ( $wasblank ) { $text = $link; } else { @@ -1951,18 +2049,25 @@ class Parser { # Link not escaped by : , create the various objects if ( $noforce ) { # Interwikis - wfProfileIn( __METHOD__."-interwiki" ); + wfProfileIn( __METHOD__ . "-interwiki" ); if ( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && Language::fetchLanguageName( $iw, null, 'mw' ) ) { - $this->mOutput->addLanguageLink( $nt->getFullText() ); + // XXX: the above check prevents links to sites with identifiers that are not language codes + + # Bug 24502: filter duplicates + if ( !isset( $this->mLangLinkLanguages[$iw] ) ) { + $this->mLangLinkLanguages[$iw] = true; + $this->mOutput->addLanguageLink( $nt->getFullText() ); + } + $s = rtrim( $s . $prefix ); $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail; - wfProfileOut( __METHOD__."-interwiki" ); + wfProfileOut( __METHOD__ . "-interwiki" ); continue; } - wfProfileOut( __METHOD__."-interwiki" ); + wfProfileOut( __METHOD__ . "-interwiki" ); if ( $ns == NS_FILE ) { - wfProfileIn( __METHOD__."-image" ); + wfProfileIn( __METHOD__ . "-image" ); if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { if ( $wasblank ) { # if no parameters were passed, $text @@ -1983,12 +2088,12 @@ class Parser { } else { $s .= $prefix . $trail; } - wfProfileOut( __METHOD__."-image" ); + wfProfileOut( __METHOD__ . "-image" ); continue; } if ( $ns == NS_CATEGORY ) { - wfProfileIn( __METHOD__."-category" ); + wfProfileIn( __METHOD__ . "-category" ); $s = rtrim( $s . "\n" ); # bug 87 if ( $wasblank ) { @@ -2007,23 +2112,23 @@ class Parser { */ $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail; - wfProfileOut( __METHOD__."-category" ); + wfProfileOut( __METHOD__ . "-category" ); continue; } } - # Self-link checking - if ( $nt->getFragment() === '' && $ns != NS_SPECIAL ) { - if ( in_array( $nt->getPrefixedText(), $selflink, true ) ) { - $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); - continue; - } + # Self-link checking. For some languages, variants of the title are checked in + # LinkHolderArray::doVariants() to allow batching the existence checks necessary + # for linking to a different variant. + if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && $nt->getFragment() === '' ) { + $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); + continue; } # NS_MEDIA is a pseudo-namespace for linking directly to a file # @todo FIXME: Should do batch file existence checks, see comment below if ( $ns == NS_MEDIA ) { - wfProfileIn( __METHOD__."-media" ); + wfProfileIn( __METHOD__ . "-media" ); # Give extensions a chance to select the file revision for us $options = array(); $descQuery = false; @@ -2034,11 +2139,11 @@ class Parser { # Cloak with NOPARSE to avoid replacement in replaceExternalLinks $s .= $prefix . $this->armorLinks( Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail; - wfProfileOut( __METHOD__."-media" ); + wfProfileOut( __METHOD__ . "-media" ); continue; } - wfProfileIn( __METHOD__."-always_known" ); + wfProfileIn( __METHOD__ . "-always_known" ); # Some titles, such as valid special pages or files in foreign repos, should # be shown as bluelinks even though they're not included in the page table # @@ -2051,7 +2156,7 @@ class Parser { # Links will be added to the output link list after checking $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix ); } - wfProfileOut( __METHOD__."-always_known" ); + wfProfileOut( __METHOD__ . "-always_known" ); } wfProfileOut( __METHOD__ ); return $holders; @@ -2066,7 +2171,7 @@ class Parser { * * @param $nt Title * @param $text String - * @param $query Array or String + * @param array $query or String * @param $trail String * @param $prefix String * @return String: HTML-wikitext mix oh yuck @@ -2093,7 +2198,7 @@ class Parser { * Not needed quite as much as it used to be since free links are a bit * more sensible these days. But bracketed links are still an issue. * - * @param $text String: more-or-less HTML + * @param string $text more-or-less HTML * @return String: less-or-more HTML with NOPARSE bits */ function armorLinks( $text ) { @@ -2113,7 +2218,7 @@ class Parser { /** * Handle link to subpage if necessary * - * @param $target String: the source of the link + * @param string $target the source of the link * @param &$text String: the link text, modified as necessary * @return string the full name of the link * @private @@ -2131,7 +2236,7 @@ class Parser { function closeParagraph() { $result = ''; if ( $this->mLastSection != '' ) { - $result = '</' . $this->mLastSection . ">\n"; + $result = '</' . $this->mLastSection . ">\n"; } $this->mInPre = false; $this->mLastSection = ''; @@ -2176,13 +2281,13 @@ class Parser { $result = $this->closeParagraph(); if ( '*' === $char ) { - $result .= '<ul><li>'; + $result .= "<ul>\n<li>"; } elseif ( '#' === $char ) { - $result .= '<ol><li>'; + $result .= "<ol>\n<li>"; } elseif ( ':' === $char ) { - $result .= '<dl><dd>'; + $result .= "<dl>\n<dd>"; } elseif ( ';' === $char ) { - $result .= '<dl><dt>'; + $result .= "<dl>\n<dt>"; $this->mDTopen = true; } else { $result = '<!-- ERR 1 -->'; @@ -2200,11 +2305,11 @@ class Parser { */ function nextItem( $char ) { if ( '*' === $char || '#' === $char ) { - return '</li><li>'; + return "</li>\n<li>"; } elseif ( ':' === $char || ';' === $char ) { - $close = '</dd>'; + $close = "</dd>\n"; if ( $this->mDTopen ) { - $close = '</dt>'; + $close = "</dt>\n"; } if ( ';' === $char ) { $this->mDTopen = true; @@ -2226,20 +2331,20 @@ class Parser { */ function closeList( $char ) { if ( '*' === $char ) { - $text = '</li></ul>'; + $text = "</li>\n</ul>"; } elseif ( '#' === $char ) { - $text = '</li></ol>'; + $text = "</li>\n</ol>"; } elseif ( ':' === $char ) { if ( $this->mDTopen ) { $this->mDTopen = false; - $text = '</dt></dl>'; + $text = "</dt>\n</dl>"; } else { - $text = '</dd></dl>'; + $text = "</dd>\n</dl>"; } } else { return '<!-- ERR 3 -->'; } - return $text."\n"; + return $text . "\n"; } /**#@-*/ @@ -2264,6 +2369,7 @@ class Parser { $this->mDTopen = $inBlockElem = false; $prefixLength = 0; $paragraphStack = false; + $inBlockquote = false; foreach ( $textLines as $oLine ) { # Fix up $linestart @@ -2306,7 +2412,7 @@ class Parser { $output .= $this->nextItem( substr( $prefix, -1 ) ); $paragraphStack = false; - if ( substr( $prefix, -1 ) === ';') { + if ( substr( $prefix, -1 ) === ';' ) { # The one nasty exception: definition lists work like this: # ; title : definition text # So we check for : in the remainder text to split up the @@ -2326,13 +2432,13 @@ class Parser { # Close all the prefixes which aren't shared. while ( $commonPrefixLength < $lastPrefixLength ) { - $output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] ); + $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] ); --$lastPrefixLength; } # Continue the current prefix if appropriate. if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) { - $output .= $this->nextItem( $prefix[$commonPrefixLength-1] ); + $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] ); } # Open prefixes where appropriate. @@ -2354,13 +2460,13 @@ class Parser { # If we have no prefixes, go to paragraph mode. if ( 0 == $prefixLength ) { - wfProfileIn( __METHOD__."-paragraph" ); + wfProfileIn( __METHOD__ . "-paragraph" ); # No prefix (not in list)--go to paragraph mode # XXX: use a stack for nestable elements like span, table and div - $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t ); + $openmatch = preg_match( '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS', $t ); $closematch = preg_match( - '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'. - '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t ); + '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' . + '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|' . $this->mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t ); if ( $openmatch or $closematch ) { $paragraphStack = false; # TODO bug 5718: paragraph closed @@ -2368,13 +2474,18 @@ class Parser { if ( $preOpenMatch and !$preCloseMatch ) { $this->mInPre = true; } + $bqOffset = 0; + while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset ) ) { + $inBlockquote = !$bqMatch[1][0]; // is this a close tag? + $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] ); + } $inBlockElem = !$closematch; } elseif ( !$inBlockElem && !$this->mInPre ) { - if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) ) { + if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) and !$inBlockquote ) { # pre if ( $this->mLastSection !== 'pre' ) { $paragraphStack = false; - $output .= $this->closeParagraph().'<pre>'; + $output .= $this->closeParagraph() . '<pre>'; $this->mLastSection = 'pre'; } $t = substr( $t, 1 ); @@ -2382,7 +2493,7 @@ class Parser { # paragraph if ( trim( $t ) === '' ) { if ( $paragraphStack ) { - $output .= $paragraphStack.'<br />'; + $output .= $paragraphStack . '<br />'; $paragraphStack = false; $this->mLastSection = 'p'; } else { @@ -2400,24 +2511,24 @@ class Parser { $paragraphStack = false; $this->mLastSection = 'p'; } elseif ( $this->mLastSection !== 'p' ) { - $output .= $this->closeParagraph().'<p>'; + $output .= $this->closeParagraph() . '<p>'; $this->mLastSection = 'p'; } } } } - wfProfileOut( __METHOD__."-paragraph" ); + wfProfileOut( __METHOD__ . "-paragraph" ); } # somewhere above we forget to get out of pre block (bug 785) if ( $preCloseMatch && $this->mInPre ) { $this->mInPre = false; } if ( $paragraphStack === false ) { - $output .= $t."\n"; + $output .= $t . "\n"; } } while ( $prefixLength ) { - $output .= $this->closeList( $prefix2[$prefixLength-1] ); + $output .= $this->closeList( $prefix2[$prefixLength - 1] ); --$prefixLength; } if ( $this->mLastSection != '' ) { @@ -2433,9 +2544,10 @@ class Parser { * Split up a string on ':', ignoring any occurrences inside tags * to prevent illegal overlapping. * - * @param $str String the string to split + * @param string $str the string to split * @param &$before String set to everything before the ':' * @param &$after String set to everything after the ':' + * @throws MWException * @return String the position of the ':', or false if none found */ function findColonNoLinks( $str, &$before, &$after ) { @@ -2452,7 +2564,7 @@ class Parser { if ( $lt === false || $lt > $pos ) { # Easy; no tag nesting to worry about $before = substr( $str, 0, $pos ); - $after = substr( $str, $pos+1 ); + $after = substr( $str, $pos + 1 ); wfProfileOut( __METHOD__ ); return $pos; } @@ -2461,13 +2573,13 @@ class Parser { $state = self::COLON_STATE_TEXT; $stack = 0; $len = strlen( $str ); - for( $i = 0; $i < $len; $i++ ) { + for ( $i = 0; $i < $len; $i++ ) { $c = $str[$i]; - switch( $state ) { + switch ( $state ) { # (Using the number is a performance hack for common cases) case 0: # self::COLON_STATE_TEXT: - switch( $c ) { + switch ( $c ) { case "<": # Could be either a <start> tag or an </end> tag $state = self::COLON_STATE_TAGSTART; @@ -2512,7 +2624,7 @@ class Parser { break; case 1: # self::COLON_STATE_TAG: # In a <tag> - switch( $c ) { + switch ( $c ) { case ">": $stack++; $state = self::COLON_STATE_TEXT; @@ -2526,7 +2638,7 @@ class Parser { } break; case 2: # self::COLON_STATE_TAGSTART: - switch( $c ) { + switch ( $c ) { case "/": $state = self::COLON_STATE_CLOSETAG; break; @@ -2546,7 +2658,7 @@ class Parser { if ( $c === ">" ) { $stack--; if ( $stack < 0 ) { - wfDebug( __METHOD__.": Invalid input; too many close tags\n" ); + wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" ); wfProfileOut( __METHOD__ ); return false; } @@ -2582,11 +2694,12 @@ class Parser { } break; default: + wfProfileOut( __METHOD__ ); throw new MWException( "State machine error in " . __METHOD__ ); } } if ( $stack > 0 ) { - wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" ); + wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" ); wfProfileOut( __METHOD__ ); return false; } @@ -2600,8 +2713,9 @@ class Parser { * @private * * @param $index integer - * @param $frame PPFrame + * @param bool|\PPFrame $frame * + * @throws MWException * @return string */ function getVariableValue( $index, $frame = false ) { @@ -2630,71 +2744,50 @@ class Parser { $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() ); wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); - # Use the time zone - global $wgLocaltimezone; - if ( isset( $wgLocaltimezone ) ) { - $oldtz = date_default_timezone_get(); - date_default_timezone_set( $wgLocaltimezone ); - } - - $localTimestamp = date( 'YmdHis', $ts ); - $localMonth = date( 'm', $ts ); - $localMonth1 = date( 'n', $ts ); - $localMonthName = date( 'n', $ts ); - $localDay = date( 'j', $ts ); - $localDay2 = date( 'd', $ts ); - $localDayOfWeek = date( 'w', $ts ); - $localWeek = date( 'W', $ts ); - $localYear = date( 'Y', $ts ); - $localHour = date( 'H', $ts ); - if ( isset( $wgLocaltimezone ) ) { - date_default_timezone_set( $oldtz ); - } - $pageLang = $this->getFunctionLang(); switch ( $index ) { case 'currentmonth': - $value = $pageLang->formatNum( gmdate( 'm', $ts ) ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) ); break; case 'currentmonth1': - $value = $pageLang->formatNum( gmdate( 'n', $ts ) ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) ); break; case 'currentmonthname': - $value = $pageLang->getMonthName( gmdate( 'n', $ts ) ); + $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) ); break; case 'currentmonthnamegen': - $value = $pageLang->getMonthNameGen( gmdate( 'n', $ts ) ); + $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) ); break; case 'currentmonthabbrev': - $value = $pageLang->getMonthAbbreviation( gmdate( 'n', $ts ) ); + $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) ); break; case 'currentday': - $value = $pageLang->formatNum( gmdate( 'j', $ts ) ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) ); break; case 'currentday2': - $value = $pageLang->formatNum( gmdate( 'd', $ts ) ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) ); break; case 'localmonth': - $value = $pageLang->formatNum( $localMonth ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) ); break; case 'localmonth1': - $value = $pageLang->formatNum( $localMonth1 ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); break; case 'localmonthname': - $value = $pageLang->getMonthName( $localMonthName ); + $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); break; case 'localmonthnamegen': - $value = $pageLang->getMonthNameGen( $localMonthName ); + $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); break; case 'localmonthabbrev': - $value = $pageLang->getMonthAbbreviation( $localMonthName ); + $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); break; case 'localday': - $value = $pageLang->formatNum( $localDay ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) ); break; case 'localday2': - $value = $pageLang->formatNum( $localDay2 ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) ); break; case 'pagename': $value = wfEscapeWikiText( $this->mTitle->getText() ); @@ -2714,6 +2807,12 @@ class Parser { case 'subpagenamee': $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() ); break; + case 'rootpagename': + $value = wfEscapeWikiText( $this->mTitle->getRootText() ); + break; + case 'rootpagenamee': + $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getRootText() ) ) ); + break; case 'basepagename': $value = wfEscapeWikiText( $this->mTitle->getBaseText() ); break; @@ -2731,7 +2830,7 @@ class Parser { case 'talkpagenamee': if ( $this->mTitle->canTalk() ) { $talkPage = $this->mTitle->getTalkPage(); - $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() ); + $value = wfEscapeWikiText( $talkPage->getPrefixedURL() ); } else { $value = ''; } @@ -2742,11 +2841,11 @@ class Parser { break; case 'subjectpagenamee': $subjPage = $this->mTitle->getSubjectPage(); - $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() ); + $value = wfEscapeWikiText( $subjPage->getPrefixedURL() ); break; case 'pageid': // requested in bug 23427 - $pageid = $this->getTitle()->getArticleId(); - if( $pageid == 0 ) { + $pageid = $this->getTitle()->getArticleID(); + if ( $pageid == 0 ) { # 0 means the page doesn't exist in the database, # which means the user is previewing a new page. # The vary-revision flag must be set, because the magic word @@ -2812,8 +2911,15 @@ class Parser { wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" ); $value = $this->getRevisionUser(); break; + case 'revisionsize': + # Let the edit saving system know we should parse the page + # *after* a revision ID has been assigned. This is for null edits. + $this->mOutput->setFlag( 'vary-revision' ); + wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" ); + $value = $this->getRevisionSize(); + break; case 'namespace': - $value = str_replace( '_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) ); + $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); break; case 'namespacee': $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); @@ -2822,56 +2928,56 @@ class Parser { $value = $this->mTitle->getNamespace(); break; case 'talkspace': - $value = $this->mTitle->canTalk() ? str_replace( '_',' ',$this->mTitle->getTalkNsText() ) : ''; + $value = $this->mTitle->canTalk() ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() ) : ''; break; case 'talkspacee': $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : ''; break; case 'subjectspace': - $value = $this->mTitle->getSubjectNsText(); + $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() ); break; case 'subjectspacee': $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); break; case 'currentdayname': - $value = $pageLang->getWeekdayName( gmdate( 'w', $ts ) + 1 ); + $value = $pageLang->getWeekdayName( MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 ); break; case 'currentyear': - $value = $pageLang->formatNum( gmdate( 'Y', $ts ), true ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true ); break; case 'currenttime': $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false ); break; case 'currenthour': - $value = $pageLang->formatNum( gmdate( 'H', $ts ), true ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true ); break; case 'currentweek': # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to # int to remove the padding - $value = $pageLang->formatNum( (int)gmdate( 'W', $ts ) ); + $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) ); break; case 'currentdow': - $value = $pageLang->formatNum( gmdate( 'w', $ts ) ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) ); break; case 'localdayname': - $value = $pageLang->getWeekdayName( $localDayOfWeek + 1 ); + $value = $pageLang->getWeekdayName( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1 ); break; case 'localyear': - $value = $pageLang->formatNum( $localYear, true ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true ); break; case 'localtime': - $value = $pageLang->time( $localTimestamp, false, false ); + $value = $pageLang->time( MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ), false, false ); break; case 'localhour': - $value = $pageLang->formatNum( $localHour, true ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true ); break; case 'localweek': # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to # int to remove the padding - $value = $pageLang->formatNum( (int)$localWeek ); + $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) ); break; case 'localdow': - $value = $pageLang->formatNum( $localDayOfWeek ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) ); break; case 'numberofarticles': $value = $pageLang->formatNum( SiteStats::articles() ); @@ -2902,7 +3008,7 @@ class Parser { $value = wfTimestamp( TS_MW, $ts ); break; case 'localtimestamp': - $value = $localTimestamp; + $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ); break; case 'currentversion': $value = SpecialVersion::getVersion(); @@ -2960,7 +3066,7 @@ class Parser { * Preprocess some wikitext and return the document tree. * This is the ghost of replace_variables(). * - * @param $text String: The text to parse + * @param string $text The text to parse * @param $flags Integer: bitwise combination of: * self::PTD_FOR_INCLUSION Handle "<noinclude>" and "<includeonly>" as if the text is being * included. Default is to assume a direct page view. @@ -3015,7 +3121,7 @@ class Parser { * self::OT_PREPROCESS: templates but not extension tags * self::OT_HTML: all templates and extension tags * - * @param $text String the text to transform + * @param string $text the text to transform * @param $frame PPFrame Object describing the arguments passed to the template. * Arguments may also be provided as an associative array, as was the usual case before MW1.12. * Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly. @@ -3034,7 +3140,7 @@ class Parser { if ( $frame === false ) { $frame = $this->getPreprocessor()->newFrame(); } elseif ( !( $frame instanceof PPFrame ) ) { - wfDebug( __METHOD__." called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" ); + wfDebug( __METHOD__ . " called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" ); $frame = $this->getPreprocessor()->newCustomFrame( $frame ); } @@ -3062,7 +3168,7 @@ class Parser { $assocArgs[$index++] = $arg; } else { $name = trim( substr( $arg, 0, $eqpos ) ); - $value = trim( substr( $arg, $eqpos+1 ) ); + $value = trim( substr( $arg, $eqpos + 1 ) ); if ( $value === false ) { $value = ''; } @@ -3079,7 +3185,7 @@ class Parser { * Warn the user when a parser limitation is reached * Will warn at most once the user per limitation type * - * @param $limitationType String: should be one of: + * @param string $limitationType should be one of: * 'expensive-parserfunction' (corresponding messages: * 'expensive-parserfunction-warning', * 'expensive-parserfunction-category') @@ -3089,14 +3195,20 @@ class Parser { * 'post-expand-template-inclusion' (corresponding messages: * 'post-expand-template-inclusion-warning', * 'post-expand-template-inclusion-category') - * @param $current int|null Current value - * @param $max int|null Maximum allowed, when an explicit limit has been + * 'node-count-exceeded' (corresponding messages: + * 'node-count-exceeded-warning', + * 'node-count-exceeded-category') + * 'expansion-depth-exceeded' (corresponding messages: + * 'expansion-depth-exceeded-warning', + * 'expansion-depth-exceeded-category') + * @param int|null $current Current value + * @param int|null $max Maximum allowed, when an explicit limit has been * exceeded, provide the values (optional) */ function limitationWarn( $limitationType, $current = '', $max = '' ) { # does no harm if $current and $max are present but are unnecessary for the message $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max ) - ->inContentLanguage()->escaped(); + ->inLanguage( $this->mOptions->getUserLangObj() )->text(); $this->mOutput->addWarning( $warning ); $this->addTrackingCategory( "$limitationType-category" ); } @@ -3105,18 +3217,18 @@ class Parser { * Return the text of a template, after recursively * replacing any variables or templates within the template. * - * @param $piece Array: the parts of the template + * @param array $piece the parts of the template * $piece['title']: the title, i.e. the part before the | * $piece['parts']: the parameter array * $piece['lineStart']: whether the brace was at the start of a line * @param $frame PPFrame The current frame, contains template arguments + * @throws MWException * @return String: the text of the template * @private */ function braceSubstitution( $piece, $frame ) { - global $wgContLang; wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__.'-setup' ); + wfProfileIn( __METHOD__ . '-setup' ); # Flags $found = false; # $text has been filled @@ -3141,12 +3253,12 @@ class Parser { # $args is a list of argument nodes, starting from index 0, not including $part1 # @todo FIXME: If piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; - wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( __METHOD__ . '-setup' ); $titleProfileIn = null; // profile templates # SUBST - wfProfileIn( __METHOD__.'-modifiers' ); + wfProfileIn( __METHOD__ . '-modifiers' ); if ( !$found ) { $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 ); @@ -3203,7 +3315,7 @@ class Parser { $forceRawInterwiki = true; } } - wfProfileOut( __METHOD__.'-modifiers' ); + wfProfileOut( __METHOD__ . '-modifiers' ); # Parser functions if ( !$found ) { @@ -3211,70 +3323,23 @@ class Parser { $colonPos = strpos( $part1, ':' ); if ( $colonPos !== false ) { - # Case sensitive functions - $function = substr( $part1, 0, $colonPos ); - if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { - $function = $this->mFunctionSynonyms[1][$function]; - } else { - # Case insensitive functions - $function = $wgContLang->lc( $function ); - if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { - $function = $this->mFunctionSynonyms[0][$function]; - } else { - $function = false; - } + $func = substr( $part1, 0, $colonPos ); + $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) ); + for ( $i = 0; $i < $args->getLength(); $i++ ) { + $funcArgs[] = $args->item( $i ); } - if ( $function ) { - wfProfileIn( __METHOD__ . '-pfunc-' . $function ); - list( $callback, $flags ) = $this->mFunctionHooks[$function]; - $initialArgs = array( &$this ); - $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) ); - if ( $flags & SFH_OBJECT_ARGS ) { - # Add a frame parameter, and pass the arguments as an array - $allArgs = $initialArgs; - $allArgs[] = $frame; - for ( $i = 0; $i < $args->getLength(); $i++ ) { - $funcArgs[] = $args->item( $i ); - } - $allArgs[] = $funcArgs; - } else { - # Convert arguments to plain text - for ( $i = 0; $i < $args->getLength(); $i++ ) { - $funcArgs[] = trim( $frame->expand( $args->item( $i ) ) ); - } - $allArgs = array_merge( $initialArgs, $funcArgs ); - } - - # Workaround for PHP bug 35229 and similar - if ( !is_callable( $callback ) ) { - wfProfileOut( __METHOD__ . '-pfunc-' . $function ); - wfProfileOut( __METHOD__ . '-pfunc' ); - wfProfileOut( __METHOD__ ); - throw new MWException( "Tag hook for $function is not callable\n" ); - } - $result = call_user_func_array( $callback, $allArgs ); - $found = true; - $noparse = true; - $preprocessFlags = 0; - - if ( is_array( $result ) ) { - if ( isset( $result[0] ) ) { - $text = $result[0]; - unset( $result[0] ); - } - - # Extract flags into the local scope - # This allows callers to set flags such as nowiki, found, etc. - extract( $result ); - } else { - $text = $result; - } - if ( !$noparse ) { - $text = $this->preprocessToDom( $text, $preprocessFlags ); - $isChildObj = true; - } - wfProfileOut( __METHOD__ . '-pfunc-' . $function ); + try { + $result = $this->callParserFunction( $frame, $func, $funcArgs ); + } catch ( Exception $ex ) { + wfProfileOut( __METHOD__ . '-pfunc' ); + wfProfileOut( __METHOD__ ); + throw $ex; } + + # The interface for parser functions allows for extracting + # flags into the local scope. Extract any forwarded flags + # here. + extract( $result ); } wfProfileOut( __METHOD__ . '-pfunc' ); } @@ -3285,8 +3350,9 @@ class Parser { $ns = NS_TEMPLATE; # Split the title into page and subpage $subpage = ''; - $part1 = $this->maybeDoSubpageLink( $part1, $subpage ); - if ( $subpage !== '' ) { + $relative = $this->maybeDoSubpageLink( $part1, $subpage ); + if ( $part1 !== $relative ) { + $part1 = $relative; $ns = $this->mTitle->getNamespace(); } $title = Title::newFromText( $part1, $ns ); @@ -3312,7 +3378,7 @@ class Parser { if ( !$found && $title ) { if ( !Profiler::instance()->isPersistent() ) { # Too many unique items can kill profiling DBs/collectors - $titleProfileIn = __METHOD__ . "-title-" . $title->getDBKey(); + $titleProfileIn = __METHOD__ . "-title-" . $title->getPrefixedDBkey(); wfProfileIn( $titleProfileIn ); // template in } wfProfileIn( __METHOD__ . '-loadtpl' ); @@ -3350,7 +3416,7 @@ class Parser { } } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) { $found = false; # access denied - wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() ); + wfDebug( __METHOD__ . ": template inclusion denied for " . $title->getPrefixedDBkey() ); } else { list( $text, $title ) = $this->getTemplateDom( $title ); if ( $text !== false ) { @@ -3385,7 +3451,7 @@ class Parser { $text = '<span class="error">' . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text() . '</span>'; - wfDebug( __METHOD__.": template loop broken at '$titleText'\n" ); + wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" ); } wfProfileOut( __METHOD__ . '-loadtpl' ); } @@ -3442,7 +3508,7 @@ class Parser { { # Bug 529: if the template begins with a table or block-level # element, it should be treated as beginning a new line. - # This behaviour is somewhat controversial. + # This behavior is somewhat controversial. $text = "\n" . $text; } @@ -3472,6 +3538,120 @@ class Parser { } /** + * Call a parser function and return an array with text and flags. + * + * The returned array will always contain a boolean 'found', indicating + * whether the parser function was found or not. It may also contain the + * following: + * text: string|object, resulting wikitext or PP DOM object + * isHTML: bool, $text is HTML, armour it against wikitext transformation + * isChildObj: bool, $text is a DOM node needing expansion in a child frame + * isLocalObj: bool, $text is a DOM node needing expansion in the current frame + * nowiki: bool, wiki markup in $text should be escaped + * + * @since 1.21 + * @param $frame PPFrame The current frame, contains template arguments + * @param $function string Function name + * @param $args array Arguments to the function + * @return array + */ + public function callParserFunction( $frame, $function, array $args = array() ) { + global $wgContLang; + + wfProfileIn( __METHOD__ ); + + # Case sensitive functions + if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { + $function = $this->mFunctionSynonyms[1][$function]; + } else { + # Case insensitive functions + $function = $wgContLang->lc( $function ); + if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { + $function = $this->mFunctionSynonyms[0][$function]; + } else { + wfProfileOut( __METHOD__ ); + return array( 'found' => false ); + } + } + + wfProfileIn( __METHOD__ . '-pfunc-' . $function ); + list( $callback, $flags ) = $this->mFunctionHooks[$function]; + + # Workaround for PHP bug 35229 and similar + if ( !is_callable( $callback ) ) { + wfProfileOut( __METHOD__ . '-pfunc-' . $function ); + wfProfileOut( __METHOD__ ); + throw new MWException( "Tag hook for $function is not callable\n" ); + } + + $allArgs = array( &$this ); + if ( $flags & SFH_OBJECT_ARGS ) { + # Convert arguments to PPNodes and collect for appending to $allArgs + $funcArgs = array(); + foreach ( $args as $k => $v ) { + if ( $v instanceof PPNode || $k === 0 ) { + $funcArgs[] = $v; + } else { + $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 ); + } + } + + # Add a frame parameter, and pass the arguments as an array + $allArgs[] = $frame; + $allArgs[] = $funcArgs; + } else { + # Convert arguments to plain text and append to $allArgs + foreach ( $args as $k => $v ) { + if ( $v instanceof PPNode ) { + $allArgs[] = trim( $frame->expand( $v ) ); + } elseif ( is_int( $k ) && $k >= 0 ) { + $allArgs[] = trim( $v ); + } else { + $allArgs[] = trim( "$k=$v" ); + } + } + } + + $result = call_user_func_array( $callback, $allArgs ); + + # The interface for function hooks allows them to return a wikitext + # string or an array containing the string and any flags. This mungs + # things around to match what this method should return. + if ( !is_array( $result ) ) { + $result = array( + 'found' => true, + 'text' => $result, + ); + } else { + if ( isset( $result[0] ) && !isset( $result['text'] ) ) { + $result['text'] = $result[0]; + } + unset( $result[0] ); + $result += array( + 'found' => true, + ); + } + + $noparse = true; + $preprocessFlags = 0; + if ( isset( $result['noparse'] ) ) { + $noparse = $result['noparse']; + } + if ( isset( $result['preprocessFlags'] ) ) { + $preprocessFlags = $result['preprocessFlags']; + } + + if ( !$noparse ) { + $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags ); + $result['isChildObj'] = true; + } + wfProfileOut( __METHOD__ . '-pfunc-' . $function ); + wfProfileOut( __METHOD__ ); + + return $result; + } + + /** * Get the semi-parsed DOM representation of a template with a given title, * and its redirect destination title. Cached. * @@ -3501,7 +3681,7 @@ class Parser { } $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); - $this->mTplDomCache[ $titleText ] = $dom; + $this->mTplDomCache[$titleText] = $dom; if ( !$title->equals( $cacheTitle ) ) { $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] = @@ -3524,6 +3704,11 @@ class Parser { if ( isset( $stuff['deps'] ) ) { foreach ( $stuff['deps'] as $dep ) { $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] ); + if ( $dep['title']->equals( $this->getTitle() ) ) { + // If we transclude ourselves, the final result + // will change based on the new version of the page + $this->mOutput->setFlag( 'vary-revision' ); + } } } return array( $text, $finalTitle ); @@ -3563,9 +3748,9 @@ class Parser { if ( $skip ) { $text = false; $deps[] = array( - 'title' => $title, - 'page_id' => $title->getArticleID(), - 'rev_id' => null + 'title' => $title, + 'page_id' => $title->getArticleID(), + 'rev_id' => null ); break; } @@ -3581,19 +3766,25 @@ class Parser { } $deps[] = array( - 'title' => $title, - 'page_id' => $title->getArticleID(), - 'rev_id' => $rev_id ); + 'title' => $title, + 'page_id' => $title->getArticleID(), + 'rev_id' => $rev_id ); if ( $rev && !$title->equals( $rev->getTitle() ) ) { # We fetched a rev from a different title; register it too... $deps[] = array( - 'title' => $rev->getTitle(), - 'page_id' => $rev->getPage(), - 'rev_id' => $rev_id ); + 'title' => $rev->getTitle(), + 'page_id' => $rev->getPage(), + 'rev_id' => $rev_id ); } if ( $rev ) { - $text = $rev->getText(); + $content = $rev->getContent(); + $text = $content ? $content->getWikitextForTransclusion() : null; + + if ( $text === false || $text === null ) { + $text = false; + break; + } } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) { global $wgContLang; $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage(); @@ -3601,16 +3792,17 @@ class Parser { $text = false; break; } + $content = $message->content(); $text = $message->plain(); } else { break; } - if ( $text === false ) { + if ( !$content ) { break; } # Redirect? $finalTitle = $title; - $title = Title::newFromRedirect( $text ); + $title = $content->getRedirectTarget(); } return array( 'text' => $text, @@ -3622,7 +3814,7 @@ class Parser { * Fetch a file and its title and register a reference to it. * If 'broken' is a key in $options then the file will appear as a broken thumbnail. * @param Title $title - * @param Array $options Array of options to RepoGroup::findFile + * @param array $options Array of options to RepoGroup::findFile * @return File|bool */ function fetchFile( $title, $options = array() ) { @@ -3634,17 +3826,12 @@ class Parser { * Fetch a file and its title and register a reference to it. * If 'broken' is a key in $options then the file will appear as a broken thumbnail. * @param Title $title - * @param Array $options Array of options to RepoGroup::findFile + * @param array $options Array of options to RepoGroup::findFile * @return Array ( File or false, Title of file ) */ function fetchFileAndTitle( $title, $options = array() ) { - if ( isset( $options['broken'] ) ) { - $file = false; // broken thumbnail forced by hook - } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp) - $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options ); - } else { // get by (name,timestamp) - $file = wfFindFile( $title, $options ); - } + $file = $this->fetchFileNoRegister( $title, $options ); + $time = $file ? $file->getTimestamp() : false; $sha1 = $file ? $file->getSha1() : false; # Register the file as a dependency... @@ -3663,6 +3850,27 @@ class Parser { } /** + * Helper function for fetchFileAndTitle. + * + * Also useful if you need to fetch a file but not use it yet, + * for example to get the file's handler. + * + * @param Title $title + * @param array $options Array of options to RepoGroup::findFile + * @return File or false + */ + protected function fetchFileNoRegister( $title, $options = array() ) { + if ( isset( $options['broken'] ) ) { + $file = false; // broken thumbnail forced by hook + } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp) + $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options ); + } else { // get by (name,timestamp) + $file = wfFindFile( $title, $options ); + } + return $file; + } + + /** * Transclude an interwiki link. * * @param $title Title @@ -3674,10 +3882,10 @@ class Parser { global $wgEnableScaryTranscluding; if ( !$wgEnableScaryTranscluding ) { - return wfMessage('scarytranscludedisabled')->inContentLanguage()->text(); + return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text(); } - $url = $title->getFullUrl( "action=$action" ); + $url = $title->getFullURL( array( 'action' => $action ) ); if ( strlen( $url ) > 255 ) { return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text(); @@ -3693,23 +3901,28 @@ class Parser { global $wgTranscludeCacheExpiry; $dbr = wfGetDB( DB_SLAVE ); $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry ); - $obj = $dbr->selectRow( 'transcache', array('tc_time', 'tc_contents' ), + $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ), array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) ); if ( $obj ) { return $obj->tc_contents; } - $text = Http::get( $url ); - if ( !$text ) { + $req = MWHttpRequest::factory( $url ); + $status = $req->execute(); // Status object + if ( $status->isOK() ) { + $text = $req->getContent(); + } elseif ( $req->getStatus() != 200 ) { // Though we failed to fetch the content, this status is useless. + return wfMessage( 'scarytranscludefailed-httpstatus', $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text(); + } else { return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text(); } $dbw = wfGetDB( DB_MASTER ); - $dbw->replace( 'transcache', array('tc_url'), array( + $dbw->replace( 'transcache', array( 'tc_url' ), array( 'tc_url' => $url, 'tc_time' => $dbw->timestamp( time() ), - 'tc_contents' => $text) - ); + 'tc_contents' => $text + ) ); return $text; } @@ -3731,7 +3944,7 @@ class Parser { $argName = trim( $nameWithSpaces ); $object = false; $text = $frame->getArgument( $argName ); - if ( $text === false && $parts->getLength() > 0 + if ( $text === false && $parts->getLength() > 0 && ( $this->ot['html'] || $this->ot['pre'] @@ -3767,7 +3980,7 @@ class Parser { * Return the text to be used for a given extension tag. * This is the ghost of strip(). * - * @param $params array Associative array of parameters: + * @param array $params Associative array of parameters: * name PPNode for the tag name * attr PPNode for unparsed text where tag attributes are thought to be * attributes Optional associative array of parsed attributes @@ -3775,6 +3988,7 @@ class Parser { * noClose Original text did not have a close tag * @param $frame PPFrame * + * @throws MWException * @return string */ function extensionSubstitution( $params, $frame ) { @@ -3783,7 +3997,7 @@ class Parser { $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); $marker = "{$this->mUniqPrefix}-$name-" . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX; - $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower($name)] ) && + $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) && ( $this->ot['html'] || $this->ot['pre'] ); if ( $isFunctionTag ) { $markerType = 'none'; @@ -3805,7 +4019,7 @@ class Parser { $output = call_user_func_array( $this->mTagHooks[$name], array( $content, $attributes, $this, $frame ) ); } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) { - list( $callback, $flags ) = $this->mFunctionTagHooks[$name]; + list( $callback, ) = $this->mFunctionTagHooks[$name]; if ( !is_callable( $callback ) ) { throw new MWException( "Tag hook for $name is not callable\n" ); } @@ -3848,7 +4062,7 @@ class Parser { } elseif ( $markerType === 'general' ) { $this->mStripState->addGeneral( $marker, $output ); } else { - throw new MWException( __METHOD__.': invalid marker type' ); + throw new MWException( __METHOD__ . ': invalid marker type' ); } return $marker; } @@ -3856,7 +4070,7 @@ class Parser { /** * Increment an include size counter * - * @param $type String: the type of expansion + * @param string $type the type of expansion * @param $size Integer: the size of the text * @return Boolean: false if this inclusion would take it over the maximum, true otherwise */ @@ -3942,12 +4156,12 @@ class Parser { * Add a tracking category, getting the title from a system message, * or print a debug message if the title is invalid. * - * @param $msg String: message key + * @param string $msg message key * @return Boolean: whether the addition was successful */ public function addTrackingCategory( $msg ) { if ( $this->mTitle->getNamespace() === NS_SPECIAL ) { - wfDebug( __METHOD__.": Not adding tracking category $msg to special page!\n" ); + wfDebug( __METHOD__ . ": Not adding tracking category $msg to special page!\n" ); return false; } // Important to parse with correct title (bug 31469) @@ -3966,7 +4180,7 @@ class Parser { $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() ); return true; } else { - wfDebug( __METHOD__.": [[MediaWiki:$msg]] is not a valid title!\n" ); + wfDebug( __METHOD__ . ": [[MediaWiki:$msg]] is not a valid title!\n" ); return false; } } @@ -3982,13 +4196,13 @@ class Parser { * string and re-inserts the newly formatted headlines. * * @param $text String - * @param $origText String: original, untouched wikitext + * @param string $origText original, untouched wikitext * @param $isMain Boolean * @return mixed|string * @private */ - function formatHeadings( $text, $origText, $isMain=true ) { - global $wgMaxTocLevel, $wgHtml5, $wgExperimentalHtmlIds; + function formatHeadings( $text, $origText, $isMain = true ) { + global $wgMaxTocLevel, $wgExperimentalHtmlIds; # Inhibit editsection links if requested in the page if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { @@ -4004,7 +4218,7 @@ class Parser { # Get all headlines for numbering them and adding funky stuff like [edit] # links - this is for later, but we need the number of headlines right now $matches = array(); - $numMatches = preg_match_all( '/<H(?P<level>[1-6])(?P<attrib>.*?'.'>)(?P<header>.*?)<\/H[1-6] *>/i', $text, $matches ); + $numMatches = preg_match_all( '/<H(?P<level>[1-6])(?P<attrib>.*?' . '>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i', $text, $matches ); # if there are fewer than 4 headlines in the article, do not show TOC # unless it's been explicitly enabled. @@ -4062,11 +4276,11 @@ class Parser { $sectionIndex = false; $numbering = ''; $markerMatches = array(); - if ( preg_match("/^$markerRegex/", $headline, $markerMatches ) ) { + if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) { $serial = $markerMatches[1]; list( $titleText, $sectionIndex ) = $this->mHeadings[$serial]; $isTemplate = ( $titleText != $baseTitleText ); - $headline = preg_replace( "/^$markerRegex/", "", $headline ); + $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline ); } if ( $toclevel ) { @@ -4078,7 +4292,7 @@ class Parser { # Increase TOC level $toclevel++; $sublevelCount[$toclevel] = 0; - if ( $toclevel<$wgMaxTocLevel ) { + if ( $toclevel < $wgMaxTocLevel ) { $prevtoclevel = $toclevel; $toc .= Linker::tocIndent(); $numVisible++; @@ -4100,7 +4314,7 @@ class Parser { if ( $i == 0 ) { $toclevel = 1; } - if ( $toclevel<$wgMaxTocLevel ) { + if ( $toclevel < $wgMaxTocLevel ) { if ( $prevtoclevel < $wgMaxTocLevel ) { # Unindent only if the previous toc level was shown :p $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel ); @@ -4111,7 +4325,7 @@ class Parser { } } else { # No change in level, end TOC line - if ( $toclevel<$wgMaxTocLevel ) { + if ( $toclevel < $wgMaxTocLevel ) { $toc .= Linker::tocLineEnd(); } } @@ -4119,9 +4333,9 @@ class Parser { $levelCount[$toclevel] = $level; # count number of headlines for each level - @$sublevelCount[$toclevel]++; + $sublevelCount[$toclevel]++; $dot = 0; - for( $i = 1; $i <= $toclevel; $i++ ) { + for ( $i = 1; $i <= $toclevel; $i++ ) { if ( !empty( $sublevelCount[$i] ) ) { if ( $dot ) { $numbering .= '.'; @@ -4144,23 +4358,29 @@ class Parser { $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline ); # Strip out HTML (first regex removes any tag not allowed) - # Allowed tags are <sup> and <sub> (bug 8393), <i> (bug 26375) and <b> (r105284) - # We strip any parameter from accepted tags (second regex) + # Allowed tags are: + # * <sup> and <sub> (bug 8393) + # * <i> (bug 26375) + # * <b> (r105284) + # * <span dir="rtl"> and <span dir="ltr"> (bug 35167) + # + # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>, + # to allow setting directionality in toc items. $tocline = preg_replace( - array( '#<(?!/?(sup|sub|i|b)(?: [^>]*)?>).*?'.'>#', '#<(/?(sup|sub|i|b))(?: .*?)?'.'>#' ), - array( '', '<$1>' ), + array( '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?' . '>#', '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?' . '>#' ), + array( '', '<$1>' ), $safeHeadline ); $tocline = trim( $tocline ); # For the anchor, strip out HTML-y stuff period - $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline ); + $safeHeadline = preg_replace( '/<.*?' . '>/', '', $safeHeadline ); $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline ); # Save headline for section edit hint before it's escaped $headlineHint = $safeHeadline; - if ( $wgHtml5 && $wgExperimentalHtmlIds ) { + if ( $wgExperimentalHtmlIds ) { # For reverse compatibility, provide an id that's # HTML4-compatible, like we used to. # @@ -4230,7 +4450,8 @@ class Parser { # Add the section to the section tree # Find the DOM node for this header - while ( $node && !$isTemplate ) { + $noOffset = ( $isTemplate || $sectionIndex === false ); + while ( $node && !$noOffset ) { if ( $node->getName() === 'h' ) { $bits = $node->splitHeading(); if ( $bits['i'] == $sectionIndex ) { @@ -4248,7 +4469,7 @@ class Parser { 'number' => $numbering, 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex, 'fromtitle' => $titleText, - 'byteoffset' => ( $isTemplate ? null : $byteOffset ), + 'byteoffset' => ( $noOffset ? null : $byteOffset ), 'anchor' => $anchor, ); @@ -4269,9 +4490,9 @@ class Parser { // We use a page and section attribute to stop the language converter from converting these important bits // of data, but put the headline hint inside a content block because the language converter is supposed to // be able to convert that piece of data. - $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]); - $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"'; - if ( isset($editlinkArgs[2]) ) { + $editlink = '<mw:editsection page="' . htmlspecialchars( $editlinkArgs[0] ); + $editlink .= '" section="' . htmlspecialchars( $editlinkArgs[1] ) . '"'; + if ( isset( $editlinkArgs[2] ) ) { $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>'; } else { $editlink .= '/>'; @@ -4299,6 +4520,7 @@ class Parser { } $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() ); $this->mOutput->setTOCHTML( $toc ); + $toc = self::TOC_START . $toc . self::TOC_END; } if ( $isMain ) { @@ -4306,7 +4528,7 @@ class Parser { } # split up and insert constructed headlines - $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text ); + $blocks = preg_split( '/<H[1-6].*?' . '>[\s\S]*?<\/H[1-6]>/i', $text ); $i = 0; // build an array of document sections @@ -4353,7 +4575,7 @@ class Parser { * Transform wiki markup when saving a page by doing "\r\n" -> "\n" * conversion, substitting signatures, {{subst:}} templates, etc. * - * @param $text String: the text to transform + * @param string $text the text to transform * @param $title Title: the Title object for the current article * @param $user User: the User object describing the current user * @param $options ParserOptions: parsing options @@ -4368,7 +4590,7 @@ class Parser { "\r\n" => "\n", ); $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text ); - if( $options->getPreSaveTransform() ) { + if ( $options->getPreSaveTransform() ) { $text = $this->pstPass2( $text, $user ); } $text = $this->mStripState->unstripBoth( $text ); @@ -4388,7 +4610,7 @@ class Parser { * @return string */ function pstPass2( $text, $user ) { - global $wgContLang, $wgLocaltimezone; + global $wgContLang; # Note: This is the timestamp saved as hardcoded wikitext to # the database, we use $wgContLang here in order to give @@ -4396,19 +4618,11 @@ class Parser { # than the one selected in each user's preferences. # (see also bug 12815) $ts = $this->mOptions->getTimestamp(); - if ( isset( $wgLocaltimezone ) ) { - $tz = $wgLocaltimezone; - } else { - $tz = date_default_timezone_get(); - } + $timestamp = MWTimestamp::getLocalInstance( $ts ); + $ts = $timestamp->format( 'YmdHis' ); + $tzMsg = $timestamp->format( 'T' ); # might vary on DST changeover! - $unixts = wfTimestamp( TS_UNIX, $ts ); - $oldtz = date_default_timezone_get(); - date_default_timezone_set( $tz ); - $ts = date( 'YmdHis', $unixts ); - $tzMsg = date( 'T', $unixts ); # might vary on DST changeover! - - # Allow translation of timezones through wiki. date() can return + # Allow translation of timezones through wiki. format() can return # whatever crap the system uses, localised or not, so we cannot # ship premade translations. $key = 'timezone-' . strtolower( trim( $tzMsg ) ); @@ -4417,8 +4631,6 @@ class Parser { $tzMsg = $msg->text(); } - date_default_timezone_set( $oldtz ); - $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)"; # Variable replacement @@ -4436,14 +4648,14 @@ class Parser { '~~~' => $sigText ) ); - # Context links: [[|name]] and [[name (context)|]] + # Context links ("pipe tricks"): [[|name]] and [[name (context)|]] $tc = '[' . Title::legalChars() . ']'; $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii! - $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; # [[ns:page (context)|]] - $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; # [[ns:page(context)|]] - $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/"; # [[ns:page (context), context|]] - $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] + $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; # [[ns:page (context)|]] + $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; # [[ns:page(context)|]] (double-width brackets, added in r40257) + $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/"; # [[ns:page (context), context|]] (using either single or double-width comma) + $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] (reverse pipe trick: add context from page title) # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); @@ -4476,7 +4688,7 @@ class Parser { * as it may have changed if it's the $wgParser. * * @param $user User - * @param $nickname String|bool nickname to use or false to use user's default nickname + * @param string|bool $nickname nickname to use or false to use user's default nickname * @param $fancySig Boolean|null whether the nicknname is the complete signature * or null to use default value * @return string @@ -4487,8 +4699,9 @@ class Parser { $username = $user->getName(); # If not given, retrieve from the user object. - if ( $nickname === false ) + if ( $nickname === false ) { $nickname = $user->getOption( 'nickname' ); + } if ( is_null( $fancySig ) ) { $fancySig = $user->getBoolOption( 'fancysig' ); @@ -4507,7 +4720,7 @@ class Parser { } else { # Failed to validate; fall back to the default $nickname = $username; - wfDebug( __METHOD__.": $username has bad XML tags in signature.\n" ); + wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" ); } } @@ -4529,7 +4742,7 @@ class Parser { * @return mixed An expanded string, or false if invalid. */ function validateSig( $text ) { - return( Xml::isWellFormedXmlFragment( $text ) ? $text : false ); + return Xml::isWellFormedXmlFragment( $text ) ? $text : false; } /** @@ -4539,7 +4752,7 @@ class Parser { * 2) Substitute all transclusions * * @param $text String - * @param $parsing bool Whether we're cleaning (preferences save) or parsing + * @param bool $parsing Whether we're cleaning (preferences save) or parsing * @return String: signature text */ public function cleanSig( $text, $parsing = false ) { @@ -4614,7 +4827,7 @@ class Parser { /** * Wrapper for preprocess() * - * @param $text String: the text to preprocess + * @param string $text the text to preprocess * @param $options ParserOptions: options * @param $title Title object or null to use $wgTitle * @return String @@ -4633,11 +4846,7 @@ class Parser { global $wgTitle; $title = $wgTitle; } - if ( !$title ) { - # It's not uncommon having a null $wgTitle in scripts. See r80898 - # Create a ghost title in such case - $title = Title::newFromText( 'Dwimmerlaik' ); - } + $text = $this->preprocess( $text, $title, $options ); $executing = false; @@ -4666,6 +4875,7 @@ class Parser { * * @param $tag Mixed: the tag to use, e.g. 'hook' for "<hook>" * @param $callback Mixed: the callback function (and object) to use for the tag + * @throws MWException * @return Mixed|null The old value of the mTagHooks array associated with the hook */ public function setHook( $tag, $callback ) { @@ -4696,6 +4906,7 @@ class Parser { * * @param $tag Mixed: the tag to use, e.g. 'hook' for "<hook>" * @param $callback Mixed: the callback function (and object) to use for the tag + * @throws MWException * @return Mixed|null The old value of the mTagHooks array associated with the hook */ function setTransparentTagHook( $tag, $callback ) { @@ -4734,7 +4945,7 @@ class Parser { * nowiki Wiki markup in the return value should be escaped * isHTML The returned text is HTML, armour it against wikitext transformation * - * @param $id String: The magic word ID + * @param string $id The magic word ID * @param $callback Mixed: the callback function (and object) to use * @param $flags Integer: a combination of the following flags: * SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}} @@ -4758,6 +4969,7 @@ class Parser { * Please read the documentation in includes/parser/Preprocessor.php for more information * about the methods available in PPFrame and PPNode. * + * @throws MWException * @return string|callback The old callback function for this name, if any */ public function setFunctionHook( $id, $callback, $flags = 0 ) { @@ -4768,8 +4980,9 @@ class Parser { # Add to function cache $mw = MagicWord::get( $id ); - if ( !$mw ) - throw new MWException( __METHOD__.'() expecting a magic word identifier.' ); + if ( !$mw ) { + throw new MWException( __METHOD__ . '() expecting a magic word identifier.' ); + } $synonyms = $mw->getSynonyms(); $sensitive = intval( $mw->isCaseSensitive() ); @@ -4805,11 +5018,17 @@ class Parser { * Create a tag function, e.g. "<test>some stuff</test>". * Unlike tag hooks, tag functions are parsed at preprocessor level. * Unlike parser functions, their content is not preprocessed. + * @param $tag + * @param $callback + * @param $flags + * @throws MWException * @return null */ function setFunctionTagHook( $tag, $callback, $flags ) { $tag = strtolower( $tag ); - if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" ); + if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { + throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" ); + } $old = isset( $this->mFunctionTagHooks[$tag] ) ? $this->mFunctionTagHooks[$tag] : null; $this->mFunctionTagHooks[$tag] = array( $callback, $flags ); @@ -4860,7 +5079,20 @@ class Parser { * @return string HTML */ function renderImageGallery( $text, $params ) { - $ig = new ImageGallery(); + wfProfileIn( __METHOD__ ); + + $mode = false; + if ( isset( $params['mode'] ) ) { + $mode = $params['mode']; + } + + try { + $ig = ImageGalleryBase::factory( $mode ); + } catch ( MWException $e ) { + // If invalid type set, fallback to default. + $ig = ImageGalleryBase::factory( false ); + } + $ig->setContextTitle( $this->mTitle ); $ig->setShowBytes( false ); $ig->setShowFilename( false ); @@ -4888,6 +5120,7 @@ class Parser { if ( isset( $params['heights'] ) ) { $ig->setHeights( $params['heights'] ); } + $ig->setAdditionalOptions( $params ); wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); @@ -4911,34 +5144,81 @@ class Parser { continue; } + # We need to get what handler the file uses, to figure out parameters. + # Note, a hook can overide the file name, and chose an entirely different + # file (which potentially could be of a different type and have different handler). + $options = array(); + $descQuery = false; + wfRunHooks( 'BeforeParserFetchFileAndTitle', + array( $this, $title, &$options, &$descQuery ) ); + # Don't register it now, as ImageGallery does that later. + $file = $this->fetchFileNoRegister( $title, $options ); + $handler = $file ? $file->getHandler() : false; + + wfProfileIn( __METHOD__ . '-getMagicWord' ); + $paramMap = array( + 'img_alt' => 'gallery-internal-alt', + 'img_link' => 'gallery-internal-link', + ); + if ( $handler ) { + $paramMap = $paramMap + $handler->getParamMap(); + // We don't want people to specify per-image widths. + // Additionally the width parameter would need special casing anyhow. + unset( $paramMap['img_width'] ); + } + + $mwArray = new MagicWordArray( array_keys( $paramMap ) ); + wfProfileOut( __METHOD__ . '-getMagicWord' ); + $label = ''; $alt = ''; $link = ''; + $handlerOptions = array(); if ( isset( $matches[3] ) ) { // look for an |alt= definition while trying not to break existing // captions with multiple pipes (|) in it, until a more sensible grammar // is defined for images in galleries + // FIXME: Doing recursiveTagParse at this stage, and the trim before + // splitting on '|' is a bit odd, and different from makeImage. $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); - $parameterMatches = StringUtils::explode('|', $matches[3]); - $magicWordAlt = MagicWord::get( 'img_alt' ); - $magicWordLink = MagicWord::get( 'img_link' ); + $parameterMatches = StringUtils::explode( '|', $matches[3] ); foreach ( $parameterMatches as $parameterMatch ) { - if ( $match = $magicWordAlt->matchVariableStartToEnd( $parameterMatch ) ) { - $alt = $this->stripAltText( $match, false ); - } - elseif( $match = $magicWordLink->matchVariableStartToEnd( $parameterMatch ) ){ - $link = strip_tags($this->replaceLinkHoldersText($match)); - $chars = self::EXT_LINK_URL_CLASS; - $prots = $this->mUrlProtocols; - //check to see if link matches an absolute url, if not then it must be a wiki link. - if(!preg_match( "/^($prots)$chars+$/u", $link)){ - $localLinkTitle = Title::newFromText($link); - $link = $localLinkTitle->getLocalURL(); + list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch ); + if ( $magicName ) { + $paramName = $paramMap[$magicName]; + + switch ( $paramName ) { + case 'gallery-internal-alt': + $alt = $this->stripAltText( $match, false ); + break; + case 'gallery-internal-link': + $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); + $chars = self::EXT_LINK_URL_CLASS; + $prots = $this->mUrlProtocols; + //check to see if link matches an absolute url, if not then it must be a wiki link. + if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) { + $link = $linkValue; + } else { + $localLinkTitle = Title::newFromText( $linkValue ); + if ( $localLinkTitle !== null ) { + $link = $localLinkTitle->getLocalURL(); + } + } + break; + default: + // Must be a handler specific parameter. + if ( $handler->validateParam( $paramName, $match ) ) { + $handlerOptions[$paramName] = $match; + } else { + // Guess not. Append it to the caption. + wfDebug( "$parameterMatch failed parameter validation" ); + $label .= '|' . $parameterMatch; + } } - } - else { + + } else { // concatenate all other pipes $label .= '|' . $parameterMatch; } @@ -4947,9 +5227,11 @@ class Parser { $label = substr( $label, 1 ); } - $ig->add( $title, $label, $alt ,$link); + $ig->add( $title, $label, $alt, $link, $handlerOptions ); } - return $ig->toHTML(); + $html = $ig->toHTML(); + wfProfileOut( __METHOD__ ); + return $html; } /** @@ -4962,7 +5244,7 @@ class Parser { } else { $handlerClass = ''; } - if ( !isset( $this->mImageParams[$handlerClass] ) ) { + if ( !isset( $this->mImageParams[$handlerClass] ) ) { # Initialise static lists static $internalParamNames = array( 'horizAlign' => array( 'left', 'right', 'center', 'none' ), @@ -5064,14 +5346,14 @@ class Parser { # Special case; width and height come in one variable together if ( $type === 'handler' && $paramName === 'width' ) { $parsedWidthParam = $this->parseWidthParam( $value ); - if( isset( $parsedWidthParam['width'] ) ) { + if ( isset( $parsedWidthParam['width'] ) ) { $width = $parsedWidthParam['width']; if ( $handler->validateParam( 'width', $width ) ) { $params[$type]['width'] = $width; $validated = true; } } - if( isset( $parsedWidthParam['height'] ) ) { + if ( isset( $parsedWidthParam['height'] ) ) { $height = $parsedWidthParam['height']; if ( $handler->validateParam( 'height', $height ) ) { $params[$type]['height'] = $height; @@ -5085,7 +5367,7 @@ class Parser { $validated = $handler->validateParam( $paramName, $value ); } else { # Validate internal parameters - switch( $paramName ) { + switch ( $paramName ) { case 'manualthumb': case 'alt': case 'class': @@ -5180,7 +5462,7 @@ class Parser { } else { # Inline image if ( !isset( $params['frame']['alt'] ) ) { # No alt text, use the "caption" for the alt text - if ( $caption !== '') { + if ( $caption !== '' ) { $params['frame']['alt'] = $this->stripAltText( $caption, $holders ); } else { # No caption, fall back to using the filename for the @@ -5303,8 +5585,8 @@ class Parser { * * External callers should use the getSection and replaceSection methods. * - * @param $text String: Page wikitext - * @param $section String: a section identifier string of the form: + * @param string $text Page wikitext + * @param string $section a section identifier string of the form: * "<flag1> - <flag2> - ... - <section number>" * * Currently the only recognised flag is "T", which means the target section number @@ -5321,12 +5603,12 @@ class Parser { * string. If $text is the empty string and section 0 is replaced, $newText is * returned. * - * @param $mode String: one of "get" or "replace" - * @param $newText String: replacement text for section data. + * @param string $mode one of "get" or "replace" + * @param string $newText replacement text for section data. * @return String: for "get", the extracted section text. * for "replace", the whole page with the section replaced. */ - private function extractSections( $text, $section, $mode, $newText='' ) { + private function extractSections( $text, $section, $mode, $newText = '' ) { global $wgTitle; # not generally used but removes an ugly failure mode $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true ); $outText = ''; @@ -5442,12 +5724,12 @@ class Parser { * * If a section contains subsections, these are also returned. * - * @param $text String: text to look in - * @param $section String: section identifier - * @param $deftext String: default to return if section is not found + * @param string $text text to look in + * @param string $section section identifier + * @param string $deftext default to return if section is not found * @return string text of the requested section */ - public function getSection( $text, $section, $deftext='' ) { + public function getSection( $text, $section, $deftext = '' ) { return $this->extractSections( $text, $section, "get", $deftext ); } @@ -5456,9 +5738,9 @@ class Parser { * specified by $section has been replaced with $text. If the target * section does not exist, $oldtext is returned unchanged. * - * @param $oldtext String: former text of the article - * @param $section int section identifier - * @param $text String: replacing text + * @param string $oldtext former text of the article + * @param int $section section identifier + * @param string $text replacing text * @return String: modified text */ public function replaceSection( $oldtext, $section, $text ) { @@ -5523,14 +5805,14 @@ class Parser { * @return String: user name */ function getRevisionUser() { - if( is_null( $this->mRevisionUser ) ) { + if ( is_null( $this->mRevisionUser ) ) { $revObject = $this->getRevisionObject(); # if this template is subst: the revision id will be blank, # so just use the current user's name - if( $revObject ) { + if ( $revObject ) { $this->mRevisionUser = $revObject->getUserText(); - } elseif( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { + } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { $this->mRevisionUser = $this->getUser()->getName(); } } @@ -5538,9 +5820,30 @@ class Parser { } /** + * Get the size of the revision + * + * @return int|null revision size + */ + function getRevisionSize() { + if ( is_null( $this->mRevisionSize ) ) { + $revObject = $this->getRevisionObject(); + + # if this variable is subst: the revision id will be blank, + # so just use the parser input size, because the own substituation + # will change the size. + if ( $revObject ) { + $this->mRevisionSize = $revObject->getSize(); + } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { + $this->mRevisionSize = $this->mInputSize; + } + } + return $this->mRevisionSize; + } + + /** * Mutator for $mDefaultSort * - * @param $sort string New value + * @param string $sort New value */ public function setDefaultSort( $sort ) { $this->mDefaultSort = $sort; @@ -5596,7 +5899,7 @@ class Parser { * instead. For use in redirects, since IE6 interprets Redirect: headers * as something other than UTF-8 (apparently?), resulting in breakage. * - * @param $text String: The section name + * @param string $text The section name * @return string An anchor */ public function guessLegacySectionNameFromWikiText( $text ) { @@ -5616,7 +5919,7 @@ class Parser { * to create valid section anchors by mimicing the output of the * parser when headings are parsed. * - * @param $text String: text string to be stripped of wikitext + * @param string $text text string to be stripped of wikitext * for use in a Section anchor * @return string Filtered text string */ @@ -5767,12 +6070,13 @@ class Parser { * If the $data array has been stored persistently, the caller should first * check whether it is still valid, by calling isValidHalfParsedText(). * - * @param $data array Serialized data + * @param array $data Serialized data + * @throws MWException * @return String */ function unserializeHalfParsedText( $data ) { if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) { - throw new MWException( __METHOD__.': invalid version' ); + throw new MWException( __METHOD__ . ': invalid version' ); } # First, extract the strip state. @@ -5809,7 +6113,7 @@ class Parser { */ public function parseWidthParam( $value ) { $parsedWidthParam = array(); - if( $value === '' ) { + if ( $value === '' ) { return $parsedWidthParam; } $m = array(); |