diff options
Diffstat (limited to 'includes/parser/Parser.php')
-rw-r--r-- | includes/parser/Parser.php | 744 |
1 files changed, 434 insertions, 310 deletions
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 10765de2..5ef0bc71 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -62,7 +62,6 @@ * $wgAllowSpecialInclusion * $wgInterwikiMagic * $wgMaxArticleSize - * $wgUseDynamicDates * * @ingroup Parser */ @@ -123,8 +122,8 @@ class Parser { var $mFunctionHooks = array(); var $mFunctionSynonyms = array( 0 => array(), 1 => array() ); var $mFunctionTagHooks = array(); - var $mStripList = array(); - var $mDefaultStripList = array(); + var $mStripList = array(); + var $mDefaultStripList = array(); var $mVarCache = array(); var $mImageParams = array(); var $mImageParamsMagicArray = array(); @@ -201,6 +200,13 @@ class Parser { var $mUniqPrefix; /** + * @var Array with the language name of each language link (i.e. the + * interwiki prefix) in the key, value arbitrary. Used to avoid sending + * duplicate language links to the ParserOutput. + */ + var $mLangLinkLanguages; + + /** * Constructor * * @param $conf array @@ -208,8 +214,8 @@ class Parser { public function __construct( $conf = array() ) { $this->mConf = $conf; $this->mUrlProtocols = wfUrlProtocols(); - $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')'. - self::EXT_LINK_URL_CLASS.'+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; + $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' . + self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; } elseif ( defined( 'MW_COMPILED' ) ) { @@ -240,6 +246,13 @@ class Parser { } /** + * Allow extensions to clean up when the parser is cloned + */ + function __clone() { + wfRunHooks( 'ParserCloned', array( $this ) ); + } + + /** * Do various kinds of initialisation on the first call of the parser */ function firstCallInit() { @@ -282,6 +295,7 @@ class Parser { $this->mRevisionId = $this->mRevisionUser = null; $this->mVarCache = array(); $this->mUser = null; + $this->mLangLinkLanguages = array(); /** * Prefix for temporary replacement strings for the multipass parser. @@ -291,12 +305,11 @@ class Parser { * string constructs. * * Must not consist of all title characters, or else it will change - * the behaviour of <nowiki> in a link. + * the behavior of <nowiki> in a link. */ $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); $this->mStripState = new StripState( $this->mUniqPrefix ); - # Clear these on every parse, bug 4549 $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array(); @@ -327,12 +340,12 @@ class Parser { * Convert wikitext to HTML * Do not call this function recursively. * - * @param $text String: text we want to parse + * @param string $text text we want to parse * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean - * @param $revid Int: number to pass in {{REVISIONID}} + * @param int $revid number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) { @@ -342,7 +355,7 @@ class Parser { */ global $wgUseTidy, $wgAlwaysUseTidy; - $fname = __METHOD__.'-' . wfGetCaller(); + $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn( __METHOD__ ); wfProfileIn( $fname ); @@ -397,9 +410,7 @@ class Parser { if ( !( $options->getDisableContentConversion() || isset( $this->mDoubleUnderscores['nocontentconvert'] ) ) ) { - # Run convert unconditionally in 1.18-compatible mode - global $wgBug34832TransitionalRollback; - if ( $wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage() ) { + if ( !$this->mOptions->getInterfaceMessage() ) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. @@ -486,8 +497,8 @@ class Parser { "Preprocessor generated node count: " . "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" . - "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n". - "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n". + "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n" . + "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n" . $PFreport; wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) ); @@ -496,6 +507,11 @@ class Parser { $limitReport = str_replace( array( '-', '&' ), array( '‐', '&' ), $limitReport ); $text .= "\n<!-- \n$limitReport-->\n"; + + if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) { + wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . + $this->mTitle->getPrefixedDBkey() ); + } } $this->mOutput->setText( $text ); @@ -515,7 +531,7 @@ class Parser { * * If $frame is not provided, then template variables (e.g., {{{1}}}) within $text are not expanded * - * @param $text String: text extension wants to have parsed + * @param string $text text extension wants to have parsed * @param $frame PPFrame: The frame to use for expanding any template variables * * @return string @@ -534,7 +550,7 @@ class Parser { * Also removes comments. * @return mixed|string */ - function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) { + function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null ) { wfProfileIn( __METHOD__ ); $this->startParse( $title, $options, self::OT_PREPROCESS, true ); if ( $revid !== null ) { @@ -552,7 +568,7 @@ class Parser { * Recursive parser entry point that can be called from an extension tag * hook. * - * @param $text String: text to be expanded + * @param string $text text to be expanded * @param $frame PPFrame: The frame to use for expanding any template variables * @return String * @since 1.19 @@ -593,7 +609,7 @@ class Parser { * * @return string */ - static public function getRandomString() { + public static function getRandomString() { return wfRandomString( 16 ); } @@ -682,7 +698,7 @@ class Parser { /** * Accessor/mutator for the output type * - * @param $x int|null New value or null to just get the current one + * @param int|null $x New value or null to just get the current one * @return Integer */ function OutputType( $x = null ) { @@ -745,6 +761,7 @@ class Parser { * * @since 1.19 * + * @throws MWException * @return Language|null */ public function getTargetLanguage() { @@ -765,12 +782,7 @@ class Parser { * Get the language object for language conversion */ function getConverterLanguage() { - global $wgBug34832TransitionalRollback, $wgContLang; - if ( $wgBug34832TransitionalRollback ) { - return $wgContLang; - } else { - return $this->getTargetLanguage(); - } + return $this->getTargetLanguage(); } /** @@ -813,9 +825,9 @@ class Parser { * '<element param="x">tag content</element>' ) ) * @endcode * - * @param $elements array list of element names. Comments are always extracted. - * @param $text string Source text string. - * @param $matches array Out parameter, Array: extracted tags + * @param array $elements list of element names. Comments are always extracted. + * @param string $text Source text string. + * @param array $matches Out parameter, Array: extracted tags * @param $uniq_prefix string * @return String: stripped text */ @@ -835,16 +847,16 @@ class Parser { } if ( count( $p ) > 5 ) { # comment - $element = $p[4]; + $element = $p[4]; $attributes = ''; - $close = ''; - $inside = $p[5]; + $close = ''; + $inside = $p[5]; } else { # tag - $element = $p[1]; + $element = $p[1]; $attributes = $p[2]; - $close = $p[3]; - $inside = $p[4]; + $close = $p[3]; + $inside = $p[4]; } $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX; @@ -928,33 +940,33 @@ class Parser { $line = trim( $outLine ); if ( $line === '' ) { # empty line, go to next line - $out .= $outLine."\n"; + $out .= $outLine . "\n"; continue; } $first_character = $line[0]; $matches = array(); - if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) { + if ( preg_match( '/^(:*)\{\|(.*)$/', $line, $matches ) ) { # First check if we are starting a new table $indent_level = strlen( $matches[1] ); $attributes = $this->mStripState->unstripBoth( $matches[2] ); - $attributes = Sanitizer::fixTagAttributes( $attributes , 'table' ); - - $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>"; - array_push( $td_history , false ); - array_push( $last_tag_history , '' ); - array_push( $tr_history , false ); - array_push( $tr_attributes , '' ); - array_push( $has_opened_tr , false ); + $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' ); + + $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>"; + array_push( $td_history, false ); + array_push( $last_tag_history, '' ); + array_push( $tr_history, false ); + array_push( $tr_attributes, '' ); + array_push( $has_opened_tr, false ); } elseif ( count( $td_history ) == 0 ) { # Don't do any of the following - $out .= $outLine."\n"; + $out .= $outLine . "\n"; continue; - } elseif ( substr( $line , 0 , 2 ) === '|}' ) { + } elseif ( substr( $line, 0, 2 ) === '|}' ) { # We are ending a table - $line = '</table>' . substr( $line , 2 ); + $line = '</table>' . substr( $line, 2 ); $last_tag = array_pop( $last_tag_history ); if ( !array_pop( $has_opened_tr ) ) { @@ -969,8 +981,8 @@ class Parser { $line = "</{$last_tag}>{$line}"; } array_pop( $tr_attributes ); - $outLine = $line . str_repeat( '</dd></dl>' , $indent_level ); - } elseif ( substr( $line , 0 , 2 ) === '|-' ) { + $outLine = $line . str_repeat( '</dd></dl>', $indent_level ); + } elseif ( substr( $line, 0, 2 ) === '|-' ) { # Now we have a table row $line = preg_replace( '#^\|-+#', '', $line ); @@ -983,7 +995,7 @@ class Parser { $line = ''; $last_tag = array_pop( $last_tag_history ); array_pop( $has_opened_tr ); - array_push( $has_opened_tr , true ); + array_push( $has_opened_tr, true ); if ( array_pop( $tr_history ) ) { $line = '</tr>'; @@ -994,27 +1006,27 @@ class Parser { } $outLine = $line; - array_push( $tr_history , false ); - array_push( $td_history , false ); - array_push( $last_tag_history , '' ); - } elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 ) === '|+' ) { + array_push( $tr_history, false ); + array_push( $td_history, false ); + array_push( $last_tag_history, '' ); + } elseif ( $first_character === '|' || $first_character === '!' || substr( $line, 0, 2 ) === '|+' ) { # This might be cell elements, td, th or captions - if ( substr( $line , 0 , 2 ) === '|+' ) { + if ( substr( $line, 0, 2 ) === '|+' ) { $first_character = '+'; - $line = substr( $line , 1 ); + $line = substr( $line, 1 ); } - $line = substr( $line , 1 ); + $line = substr( $line, 1 ); if ( $first_character === '!' ) { - $line = str_replace( '!!' , '||' , $line ); + $line = str_replace( '!!', '||', $line ); } # Split up multiple cells on the same line. # FIXME : This can result in improper nesting of tags processed # by earlier parser steps, but should avoid splitting up eg # attribute values containing literal "||". - $cells = StringUtils::explodeMarkup( '||' , $line ); + $cells = StringUtils::explodeMarkup( '||', $line ); $outLine = ''; @@ -1026,10 +1038,10 @@ class Parser { if ( !array_pop( $tr_history ) ) { $previous = "<tr{$tr_after}>\n"; } - array_push( $tr_history , true ); - array_push( $tr_attributes , '' ); + array_push( $tr_history, true ); + array_push( $tr_attributes, '' ); array_pop( $has_opened_tr ); - array_push( $has_opened_tr , true ); + array_push( $has_opened_tr, true ); } $last_tag = array_pop( $last_tag_history ); @@ -1048,10 +1060,10 @@ class Parser { $last_tag = ''; } - array_push( $last_tag_history , $last_tag ); + array_push( $last_tag_history, $last_tag ); # A cell could contain both parameters and data - $cell_data = explode( '|' , $cell , 2 ); + $cell_data = explode( '|', $cell, 2 ); # Bug 553: Note that a '|' inside an invalid link should not # be mistaken as delimiting cell parameters @@ -1061,12 +1073,12 @@ class Parser { $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; } else { $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); - $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag ); + $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag ); $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; } $outLine .= $cell; - array_push( $td_history , true ); + array_push( $td_history, true ); } } $out .= $outLine . "\n"; @@ -1081,7 +1093,7 @@ class Parser { $out .= "</tr>\n"; } if ( !array_pop( $has_opened_tr ) ) { - $out .= "<tr><td></td></tr>\n" ; + $out .= "<tr><td></td></tr>\n"; } $out .= "</table>\n"; @@ -1122,7 +1134,7 @@ class Parser { # Hook to suspend the parser in this state if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { wfProfileOut( __METHOD__ ); - return $text ; + return $text; } # if $frame is provided, then use $frame for replacing any variables @@ -1156,17 +1168,13 @@ class Parser { $text = $this->doDoubleUnderscore( $text ); $text = $this->doHeadings( $text ); - if ( $this->mOptions->getUseDynamicDates() ) { - $df = DateFormatter::getInstance(); - $text = $df->reformat( $this->mOptions->getDateFormat(), $text ); - } $text = $this->replaceInternalLinks( $text ); $text = $this->doAllQuotes( $text ); $text = $this->replaceExternalLinks( $text ); # replaceInternalLinks may sometimes leave behind # absolute URLs, which have to be masked to hide them from replaceExternalLinks - $text = str_replace( $this->mUniqPrefix.'NOPARSE', '', $text ); + $text = str_replace( $this->mUniqPrefix . 'NOPARSE', '', $text ); $text = $this->doMagicLinks( $text ); $text = $this->formatHeadings( $text, $origText, $isMain ); @@ -1234,7 +1242,7 @@ class Parser { $CssClass = 'mw-magiclink-pmid'; $id = $m[4]; } else { - throw new MWException( __METHOD__.': unrecognised match type "' . + throw new MWException( __METHOD__ . ': unrecognised match type "' . substr( $m[0], 0, 20 ) . '"' ); } $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text(); @@ -1298,7 +1306,8 @@ class Parser { if ( $text === false ) { # Not an image, make a link $text = Linker::makeExternalLink( $url, - $this->getConverterLanguage()->markNoConversion($url), true, 'free', + $this->getConverterLanguage()->markNoConversion( $url, true ), + true, 'free', $this->getExternalLinkAttribs( $url ) ); # Register it in the output object... # Replace unnecessary URL escape codes with their equivalent characters @@ -1309,7 +1318,6 @@ class Parser { return $text . $trail; } - /** * Parse headers and return html * @@ -1323,8 +1331,7 @@ class Parser { wfProfileIn( __METHOD__ ); for ( $i = 6; $i >= 1; --$i ) { $h = str_repeat( '=', $i ); - $text = preg_replace( "/^$h(.+)$h\\s*$/m", - "<h$i>\\1</h$i>", $text ); + $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text ); } wfProfileOut( __METHOD__ ); return $text; @@ -1345,7 +1352,7 @@ class Parser { foreach ( $lines as $line ) { $outtext .= $this->doQuotes( $line ) . "\n"; } - $outtext = substr( $outtext, 0,-1 ); + $outtext = substr( $outtext, 0, -1 ); wfProfileOut( __METHOD__ ); return $outtext; } @@ -1410,7 +1417,7 @@ class Parser { if ( $firstspace == -1 ) { $firstspace = $i; } - } elseif ( $x2 === ' ') { + } elseif ( $x2 === ' ' ) { if ( $firstsingleletterword == -1 ) { $firstsingleletterword = $i; } @@ -1461,7 +1468,7 @@ class Parser { } elseif ( $state === 'ib' ) { $output .= '</b></i><b>'; $state = 'b'; } elseif ( $state === 'both' ) { - $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; + $output .= '<b><i>' . $buffer . '</i>'; $state = 'b'; } else { # $state can be 'b' or '' $output .= '<i>'; $state .= 'i'; } @@ -1473,7 +1480,7 @@ class Parser { } elseif ( $state === 'ib' ) { $output .= '</b>'; $state = 'i'; } elseif ( $state === 'both' ) { - $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; + $output .= '<i><b>' . $buffer . '</b>'; $state = 'i'; } else { # $state can be 'i' or '' $output .= '<b>'; $state .= 'b'; } @@ -1487,7 +1494,7 @@ class Parser { } elseif ( $state === 'ib' ) { $output .= '</b></i>'; $state = ''; } elseif ( $state === 'both' ) { - $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; + $output .= '<i><b>' . $buffer . '</b></i>'; $state = ''; } else { # ($state == '') $buffer = ''; $state = 'both'; } @@ -1507,7 +1514,7 @@ class Parser { } # There might be lonely ''''', so make sure we have a buffer if ( $state === 'both' && $buffer ) { - $output .= '<b><i>'.$buffer.'</i></b>'; + $output .= '<b><i>' . $buffer . '</i></b>'; } return $output; } @@ -1523,6 +1530,7 @@ class Parser { * * @param $text string * + * @throws MWException * @return string */ function replaceExternalLinks( $text ) { @@ -1537,7 +1545,7 @@ class Parser { $i = 0; while ( $i<count( $bits ) ) { $url = $bits[$i++]; - $protocol = $bits[$i++]; + $i++; // protocol $text = $bits[$i++]; $trail = $bits[$i++]; @@ -1595,26 +1603,39 @@ class Parser { wfProfileOut( __METHOD__ ); return $s; } - + /** + * Get the rel attribute for a particular external link. + * + * @since 1.21 + * @param string|bool $url optional URL, to extract the domain from for rel => + * nofollow if appropriate + * @param $title Title optional Title, for wgNoFollowNsExceptions lookups + * @return string|null rel attribute for $url + */ + public static function getExternalLinkRel( $url = false, $title = null ) { + global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions; + $ns = $title ? $title->getNamespace() : false; + if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) && + !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) ) + { + return 'nofollow'; + } + return null; + } /** * Get an associative array of additional HTML attributes appropriate for a * particular external link. This currently may include rel => nofollow * (depending on configuration, namespace, and the URL's domain) and/or a * target attribute (depending on configuration). * - * @param $url String|bool optional URL, to extract the domain from for rel => + * @param string|bool $url optional URL, to extract the domain from for rel => * nofollow if appropriate * @return Array associative array of HTML attributes */ function getExternalLinkAttribs( $url = false ) { $attribs = array(); - global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions; - $ns = $this->mTitle->getNamespace(); - if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) && - !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) ) - { - $attribs['rel'] = 'nofollow'; - } + $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle ); + if ( $this->mOptions->getExternalLinkTarget() ) { $attribs['target'] = $this->mOptions->getExternalLinkTarget(); } @@ -1726,6 +1747,8 @@ class Parser { /** * Process [[ ]] wikilinks (RIL) + * @param $s + * @throws MWException * @return LinkHolderArray * * @private @@ -1733,8 +1756,8 @@ class Parser { function replaceInternalLinks2( &$s ) { wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__.'-setup' ); - static $tc = FALSE, $e1, $e1_img; + wfProfileIn( __METHOD__ . '-setup' ); + static $tc = false, $e1, $e1_img; # the % is needed to support urlencoded titles as well if ( !$tc ) { $tc = Title::legalChars() . '#%'; @@ -1763,9 +1786,9 @@ class Parser { } if ( is_null( $this->mTitle ) ) { - wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( __METHOD__ . '-setup' ); wfProfileOut( __METHOD__ ); - throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); + throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" ); } $nottalk = !$this->mTitle->isTalkPage(); @@ -1780,17 +1803,11 @@ class Parser { $prefix = ''; } - if ( $this->getConverterLanguage()->hasVariants() ) { - $selflink = $this->getConverterLanguage()->autoConvertToAllVariants( - $this->mTitle->getPrefixedText() ); - } else { - $selflink = array( $this->mTitle->getPrefixedText() ); - } $useSubpages = $this->areSubpagesAllowed(); - wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( __METHOD__ . '-setup' ); # Loop for each link - for ( ; $line !== false && $line !== null ; $a->next(), $line = $a->current() ) { + for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) { # Check for excessive memory usage if ( $holders->isBig() ) { # Too big @@ -1800,24 +1817,24 @@ class Parser { } if ( $useLinkPrefixExtension ) { - wfProfileIn( __METHOD__.'-prefixhandling' ); + wfProfileIn( __METHOD__ . '-prefixhandling' ); if ( preg_match( $e2, $s, $m ) ) { $prefix = $m[2]; $s = $m[1]; } else { - $prefix=''; + $prefix = ''; } # first link if ( $first_prefix ) { $prefix = $first_prefix; $first_prefix = false; } - wfProfileOut( __METHOD__.'-prefixhandling' ); + wfProfileOut( __METHOD__ . '-prefixhandling' ); } $might_be_img = false; - wfProfileIn( __METHOD__."-e1" ); + wfProfileIn( __METHOD__ . "-e1" ); if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; # If we get a ] at the beginning of $m[3] that means we have a link that's something like: @@ -1839,7 +1856,7 @@ class Parser { # fix up urlencoded title texts if ( strpos( $m[1], '%' ) !== false ) { # Should anchors '#' also be rejected? - $m[1] = str_replace( array('<', '>'), array('<', '>'), rawurldecode( $m[1] ) ); + $m[1] = str_replace( array( '<', '>' ), array( '<', '>' ), rawurldecode( $m[1] ) ); } $trail = $m[3]; } elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption @@ -1850,19 +1867,19 @@ class Parser { } $trail = ""; } else { # Invalid form; output directly - $s .= $prefix . '[[' . $line ; - wfProfileOut( __METHOD__."-e1" ); + $s .= $prefix . '[[' . $line; + wfProfileOut( __METHOD__ . "-e1" ); continue; } - wfProfileOut( __METHOD__."-e1" ); - wfProfileIn( __METHOD__."-misc" ); + wfProfileOut( __METHOD__ . "-e1" ); + wfProfileIn( __METHOD__ . "-misc" ); # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $m[1] ) ) { - $s .= $prefix . '[[' . $line ; - wfProfileOut( __METHOD__."-misc" ); + $s .= $prefix . '[[' . $line; + wfProfileOut( __METHOD__ . "-misc" ); continue; } @@ -1879,21 +1896,21 @@ class Parser { $link = substr( $link, 1 ); } - wfProfileOut( __METHOD__."-misc" ); - wfProfileIn( __METHOD__."-title" ); + wfProfileOut( __METHOD__ . "-misc" ); + wfProfileIn( __METHOD__ . "-title" ); $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) ); if ( $nt === null ) { $s .= $prefix . '[[' . $line; - wfProfileOut( __METHOD__."-title" ); + wfProfileOut( __METHOD__ . "-title" ); continue; } $ns = $nt->getNamespace(); $iw = $nt->getInterWiki(); - wfProfileOut( __METHOD__."-title" ); + wfProfileOut( __METHOD__ . "-title" ); if ( $might_be_img ) { # if this is actually an invalid link - wfProfileIn( __METHOD__."-might_be_img" ); + wfProfileIn( __METHOD__ . "-might_be_img" ); if ( $ns == NS_FILE && $noforce ) { # but might be an image $found = false; while ( true ) { @@ -1925,19 +1942,19 @@ class Parser { $holders->merge( $this->replaceInternalLinks2( $text ) ); $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( __METHOD__ . "-might_be_img" ); continue; } } else { # it's not an image, so output it raw $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( __METHOD__ . "-might_be_img" ); continue; } - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( __METHOD__ . "-might_be_img" ); } - $wasblank = ( $text == '' ); + $wasblank = ( $text == '' ); if ( $wasblank ) { $text = $link; } else { @@ -1951,18 +1968,25 @@ class Parser { # Link not escaped by : , create the various objects if ( $noforce ) { # Interwikis - wfProfileIn( __METHOD__."-interwiki" ); + wfProfileIn( __METHOD__ . "-interwiki" ); if ( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && Language::fetchLanguageName( $iw, null, 'mw' ) ) { - $this->mOutput->addLanguageLink( $nt->getFullText() ); + // XXX: the above check prevents links to sites with identifiers that are not language codes + + # Bug 24502: filter duplicates + if ( !isset( $this->mLangLinkLanguages[$iw] ) ) { + $this->mLangLinkLanguages[$iw] = true; + $this->mOutput->addLanguageLink( $nt->getFullText() ); + } + $s = rtrim( $s . $prefix ); $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail; - wfProfileOut( __METHOD__."-interwiki" ); + wfProfileOut( __METHOD__ . "-interwiki" ); continue; } - wfProfileOut( __METHOD__."-interwiki" ); + wfProfileOut( __METHOD__ . "-interwiki" ); if ( $ns == NS_FILE ) { - wfProfileIn( __METHOD__."-image" ); + wfProfileIn( __METHOD__ . "-image" ); if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { if ( $wasblank ) { # if no parameters were passed, $text @@ -1983,12 +2007,12 @@ class Parser { } else { $s .= $prefix . $trail; } - wfProfileOut( __METHOD__."-image" ); + wfProfileOut( __METHOD__ . "-image" ); continue; } if ( $ns == NS_CATEGORY ) { - wfProfileIn( __METHOD__."-category" ); + wfProfileIn( __METHOD__ . "-category" ); $s = rtrim( $s . "\n" ); # bug 87 if ( $wasblank ) { @@ -2007,14 +2031,18 @@ class Parser { */ $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail; - wfProfileOut( __METHOD__."-category" ); + wfProfileOut( __METHOD__ . "-category" ); continue; } } # Self-link checking if ( $nt->getFragment() === '' && $ns != NS_SPECIAL ) { - if ( in_array( $nt->getPrefixedText(), $selflink, true ) ) { + if ( $nt->equals( $this->mTitle ) || ( !$nt->isKnown() && in_array( + $this->mTitle->getPrefixedText(), + $this->getConverterLanguage()->autoConvertToAllVariants( $nt->getPrefixedText() ), + true + ) ) ) { $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); continue; } @@ -2023,7 +2051,7 @@ class Parser { # NS_MEDIA is a pseudo-namespace for linking directly to a file # @todo FIXME: Should do batch file existence checks, see comment below if ( $ns == NS_MEDIA ) { - wfProfileIn( __METHOD__."-media" ); + wfProfileIn( __METHOD__ . "-media" ); # Give extensions a chance to select the file revision for us $options = array(); $descQuery = false; @@ -2034,11 +2062,11 @@ class Parser { # Cloak with NOPARSE to avoid replacement in replaceExternalLinks $s .= $prefix . $this->armorLinks( Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail; - wfProfileOut( __METHOD__."-media" ); + wfProfileOut( __METHOD__ . "-media" ); continue; } - wfProfileIn( __METHOD__."-always_known" ); + wfProfileIn( __METHOD__ . "-always_known" ); # Some titles, such as valid special pages or files in foreign repos, should # be shown as bluelinks even though they're not included in the page table # @@ -2051,7 +2079,7 @@ class Parser { # Links will be added to the output link list after checking $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix ); } - wfProfileOut( __METHOD__."-always_known" ); + wfProfileOut( __METHOD__ . "-always_known" ); } wfProfileOut( __METHOD__ ); return $holders; @@ -2066,7 +2094,7 @@ class Parser { * * @param $nt Title * @param $text String - * @param $query Array or String + * @param array $query or String * @param $trail String * @param $prefix String * @return String: HTML-wikitext mix oh yuck @@ -2093,7 +2121,7 @@ class Parser { * Not needed quite as much as it used to be since free links are a bit * more sensible these days. But bracketed links are still an issue. * - * @param $text String: more-or-less HTML + * @param string $text more-or-less HTML * @return String: less-or-more HTML with NOPARSE bits */ function armorLinks( $text ) { @@ -2113,7 +2141,7 @@ class Parser { /** * Handle link to subpage if necessary * - * @param $target String: the source of the link + * @param string $target the source of the link * @param &$text String: the link text, modified as necessary * @return string the full name of the link * @private @@ -2239,7 +2267,7 @@ class Parser { } else { return '<!-- ERR 3 -->'; } - return $text."\n"; + return $text . "\n"; } /**#@-*/ @@ -2306,7 +2334,7 @@ class Parser { $output .= $this->nextItem( substr( $prefix, -1 ) ); $paragraphStack = false; - if ( substr( $prefix, -1 ) === ';') { + if ( substr( $prefix, -1 ) === ';' ) { # The one nasty exception: definition lists work like this: # ; title : definition text # So we check for : in the remainder text to split up the @@ -2354,13 +2382,13 @@ class Parser { # If we have no prefixes, go to paragraph mode. if ( 0 == $prefixLength ) { - wfProfileIn( __METHOD__."-paragraph" ); + wfProfileIn( __METHOD__ . "-paragraph" ); # No prefix (not in list)--go to paragraph mode # XXX: use a stack for nestable elements like span, table and div - $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t ); + $openmatch = preg_match( '/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS', $t ); $closematch = preg_match( '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'. - '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t ); + '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t ); if ( $openmatch or $closematch ) { $paragraphStack = false; # TODO bug 5718: paragraph closed @@ -2374,7 +2402,7 @@ class Parser { # pre if ( $this->mLastSection !== 'pre' ) { $paragraphStack = false; - $output .= $this->closeParagraph().'<pre>'; + $output .= $this->closeParagraph() . '<pre>'; $this->mLastSection = 'pre'; } $t = substr( $t, 1 ); @@ -2382,7 +2410,7 @@ class Parser { # paragraph if ( trim( $t ) === '' ) { if ( $paragraphStack ) { - $output .= $paragraphStack.'<br />'; + $output .= $paragraphStack . '<br />'; $paragraphStack = false; $this->mLastSection = 'p'; } else { @@ -2400,20 +2428,20 @@ class Parser { $paragraphStack = false; $this->mLastSection = 'p'; } elseif ( $this->mLastSection !== 'p' ) { - $output .= $this->closeParagraph().'<p>'; + $output .= $this->closeParagraph() . '<p>'; $this->mLastSection = 'p'; } } } } - wfProfileOut( __METHOD__."-paragraph" ); + wfProfileOut( __METHOD__ . "-paragraph" ); } # somewhere above we forget to get out of pre block (bug 785) if ( $preCloseMatch && $this->mInPre ) { $this->mInPre = false; } if ( $paragraphStack === false ) { - $output .= $t."\n"; + $output .= $t . "\n"; } } while ( $prefixLength ) { @@ -2433,9 +2461,10 @@ class Parser { * Split up a string on ':', ignoring any occurrences inside tags * to prevent illegal overlapping. * - * @param $str String the string to split + * @param string $str the string to split * @param &$before String set to everything before the ':' * @param &$after String set to everything after the ':' + * @throws MWException * @return String the position of the ':', or false if none found */ function findColonNoLinks( $str, &$before, &$after ) { @@ -2546,7 +2575,7 @@ class Parser { if ( $c === ">" ) { $stack--; if ( $stack < 0 ) { - wfDebug( __METHOD__.": Invalid input; too many close tags\n" ); + wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" ); wfProfileOut( __METHOD__ ); return false; } @@ -2586,7 +2615,7 @@ class Parser { } } if ( $stack > 0 ) { - wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" ); + wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" ); wfProfileOut( __METHOD__ ); return false; } @@ -2600,8 +2629,9 @@ class Parser { * @private * * @param $index integer - * @param $frame PPFrame + * @param bool|\PPFrame $frame * + * @throws MWException * @return string */ function getVariableValue( $index, $frame = false ) { @@ -2813,7 +2843,7 @@ class Parser { $value = $this->getRevisionUser(); break; case 'namespace': - $value = str_replace( '_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) ); + $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); break; case 'namespacee': $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); @@ -2822,7 +2852,7 @@ class Parser { $value = $this->mTitle->getNamespace(); break; case 'talkspace': - $value = $this->mTitle->canTalk() ? str_replace( '_',' ',$this->mTitle->getTalkNsText() ) : ''; + $value = $this->mTitle->canTalk() ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() ) : ''; break; case 'talkspacee': $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : ''; @@ -2960,7 +2990,7 @@ class Parser { * Preprocess some wikitext and return the document tree. * This is the ghost of replace_variables(). * - * @param $text String: The text to parse + * @param string $text The text to parse * @param $flags Integer: bitwise combination of: * self::PTD_FOR_INCLUSION Handle "<noinclude>" and "<includeonly>" as if the text is being * included. Default is to assume a direct page view. @@ -3015,7 +3045,7 @@ class Parser { * self::OT_PREPROCESS: templates but not extension tags * self::OT_HTML: all templates and extension tags * - * @param $text String the text to transform + * @param string $text the text to transform * @param $frame PPFrame Object describing the arguments passed to the template. * Arguments may also be provided as an associative array, as was the usual case before MW1.12. * Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly. @@ -3034,7 +3064,7 @@ class Parser { if ( $frame === false ) { $frame = $this->getPreprocessor()->newFrame(); } elseif ( !( $frame instanceof PPFrame ) ) { - wfDebug( __METHOD__." called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" ); + wfDebug( __METHOD__ . " called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" ); $frame = $this->getPreprocessor()->newCustomFrame( $frame ); } @@ -3079,7 +3109,7 @@ class Parser { * Warn the user when a parser limitation is reached * Will warn at most once the user per limitation type * - * @param $limitationType String: should be one of: + * @param string $limitationType should be one of: * 'expensive-parserfunction' (corresponding messages: * 'expensive-parserfunction-warning', * 'expensive-parserfunction-category') @@ -3089,8 +3119,8 @@ class Parser { * 'post-expand-template-inclusion' (corresponding messages: * 'post-expand-template-inclusion-warning', * 'post-expand-template-inclusion-category') - * @param $current int|null Current value - * @param $max int|null Maximum allowed, when an explicit limit has been + * @param int|null $current Current value + * @param int|null $max Maximum allowed, when an explicit limit has been * exceeded, provide the values (optional) */ function limitationWarn( $limitationType, $current = '', $max = '' ) { @@ -3105,18 +3135,19 @@ class Parser { * Return the text of a template, after recursively * replacing any variables or templates within the template. * - * @param $piece Array: the parts of the template + * @param array $piece the parts of the template * $piece['title']: the title, i.e. the part before the | * $piece['parts']: the parameter array * $piece['lineStart']: whether the brace was at the start of a line * @param $frame PPFrame The current frame, contains template arguments + * @throws MWException * @return String: the text of the template * @private */ function braceSubstitution( $piece, $frame ) { global $wgContLang; wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__.'-setup' ); + wfProfileIn( __METHOD__ . '-setup' ); # Flags $found = false; # $text has been filled @@ -3141,12 +3172,12 @@ class Parser { # $args is a list of argument nodes, starting from index 0, not including $part1 # @todo FIXME: If piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; - wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( __METHOD__ . '-setup' ); $titleProfileIn = null; // profile templates # SUBST - wfProfileIn( __METHOD__.'-modifiers' ); + wfProfileIn( __METHOD__ . '-modifiers' ); if ( !$found ) { $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 ); @@ -3203,7 +3234,7 @@ class Parser { $forceRawInterwiki = true; } } - wfProfileOut( __METHOD__.'-modifiers' ); + wfProfileOut( __METHOD__ . '-modifiers' ); # Parser functions if ( !$found ) { @@ -3211,70 +3242,22 @@ class Parser { $colonPos = strpos( $part1, ':' ); if ( $colonPos !== false ) { - # Case sensitive functions - $function = substr( $part1, 0, $colonPos ); - if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { - $function = $this->mFunctionSynonyms[1][$function]; - } else { - # Case insensitive functions - $function = $wgContLang->lc( $function ); - if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { - $function = $this->mFunctionSynonyms[0][$function]; - } else { - $function = false; - } + $func = substr( $part1, 0, $colonPos ); + $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) ); + for ( $i = 0; $i < $args->getLength(); $i++ ) { + $funcArgs[] = $args->item( $i ); } - if ( $function ) { - wfProfileIn( __METHOD__ . '-pfunc-' . $function ); - list( $callback, $flags ) = $this->mFunctionHooks[$function]; - $initialArgs = array( &$this ); - $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) ); - if ( $flags & SFH_OBJECT_ARGS ) { - # Add a frame parameter, and pass the arguments as an array - $allArgs = $initialArgs; - $allArgs[] = $frame; - for ( $i = 0; $i < $args->getLength(); $i++ ) { - $funcArgs[] = $args->item( $i ); - } - $allArgs[] = $funcArgs; - } else { - # Convert arguments to plain text - for ( $i = 0; $i < $args->getLength(); $i++ ) { - $funcArgs[] = trim( $frame->expand( $args->item( $i ) ) ); - } - $allArgs = array_merge( $initialArgs, $funcArgs ); - } - - # Workaround for PHP bug 35229 and similar - if ( !is_callable( $callback ) ) { - wfProfileOut( __METHOD__ . '-pfunc-' . $function ); - wfProfileOut( __METHOD__ . '-pfunc' ); - wfProfileOut( __METHOD__ ); - throw new MWException( "Tag hook for $function is not callable\n" ); - } - $result = call_user_func_array( $callback, $allArgs ); - $found = true; - $noparse = true; - $preprocessFlags = 0; - - if ( is_array( $result ) ) { - if ( isset( $result[0] ) ) { - $text = $result[0]; - unset( $result[0] ); - } - - # Extract flags into the local scope - # This allows callers to set flags such as nowiki, found, etc. - extract( $result ); - } else { - $text = $result; - } - if ( !$noparse ) { - $text = $this->preprocessToDom( $text, $preprocessFlags ); - $isChildObj = true; - } - wfProfileOut( __METHOD__ . '-pfunc-' . $function ); + try { + $result = $this->callParserFunction( $frame, $func, $funcArgs ); + } catch ( Exception $ex ) { + wfProfileOut( __METHOD__ . '-pfunc' ); + throw $ex; } + + # The interface for parser functions allows for extracting + # flags into the local scope. Extract any forwarded flags + # here. + extract( $result ); } wfProfileOut( __METHOD__ . '-pfunc' ); } @@ -3350,7 +3333,7 @@ class Parser { } } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) { $found = false; # access denied - wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() ); + wfDebug( __METHOD__ . ": template inclusion denied for " . $title->getPrefixedDBkey() ); } else { list( $text, $title ) = $this->getTemplateDom( $title ); if ( $text !== false ) { @@ -3385,7 +3368,7 @@ class Parser { $text = '<span class="error">' . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text() . '</span>'; - wfDebug( __METHOD__.": template loop broken at '$titleText'\n" ); + wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" ); } wfProfileOut( __METHOD__ . '-loadtpl' ); } @@ -3442,7 +3425,7 @@ class Parser { { # Bug 529: if the template begins with a table or block-level # element, it should be treated as beginning a new line. - # This behaviour is somewhat controversial. + # This behavior is somewhat controversial. $text = "\n" . $text; } @@ -3472,6 +3455,120 @@ class Parser { } /** + * Call a parser function and return an array with text and flags. + * + * The returned array will always contain a boolean 'found', indicating + * whether the parser function was found or not. It may also contain the + * following: + * text: string|object, resulting wikitext or PP DOM object + * isHTML: bool, $text is HTML, armour it against wikitext transformation + * isChildObj: bool, $text is a DOM node needing expansion in a child frame + * isLocalObj: bool, $text is a DOM node needing expansion in the current frame + * nowiki: bool, wiki markup in $text should be escaped + * + * @since 1.21 + * @param $frame PPFrame The current frame, contains template arguments + * @param $function string Function name + * @param $args array Arguments to the function + * @return array + */ + public function callParserFunction( $frame, $function, array $args = array() ) { + global $wgContLang; + + wfProfileIn( __METHOD__ ); + + # Case sensitive functions + if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { + $function = $this->mFunctionSynonyms[1][$function]; + } else { + # Case insensitive functions + $function = $wgContLang->lc( $function ); + if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { + $function = $this->mFunctionSynonyms[0][$function]; + } else { + wfProfileOut( __METHOD__ ); + return array( 'found' => false ); + } + } + + wfProfileIn( __METHOD__ . '-pfunc-' . $function ); + list( $callback, $flags ) = $this->mFunctionHooks[$function]; + + # Workaround for PHP bug 35229 and similar + if ( !is_callable( $callback ) ) { + wfProfileOut( __METHOD__ . '-pfunc-' . $function ); + wfProfileOut( __METHOD__ ); + throw new MWException( "Tag hook for $function is not callable\n" ); + } + + $allArgs = array( &$this ); + if ( $flags & SFH_OBJECT_ARGS ) { + # Convert arguments to PPNodes and collect for appending to $allArgs + $funcArgs = array(); + foreach ( $args as $k => $v ) { + if ( $v instanceof PPNode || $k === 0 ) { + $funcArgs[] = $v; + } else { + $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 ); + } + } + + # Add a frame parameter, and pass the arguments as an array + $allArgs[] = $frame; + $allArgs[] = $funcArgs; + } else { + # Convert arguments to plain text and append to $allArgs + foreach ( $args as $k => $v ) { + if ( $v instanceof PPNode ) { + $allArgs[] = trim( $frame->expand( $v ) ); + } elseif ( is_int( $k ) && $k >= 0 ) { + $allArgs[] = trim( $v ); + } else { + $allArgs[] = trim( "$k=$v" ); + } + } + } + + $result = call_user_func_array( $callback, $allArgs ); + + # The interface for function hooks allows them to return a wikitext + # string or an array containing the string and any flags. This mungs + # things around to match what this method should return. + if ( !is_array( $result ) ) { + $result = array( + 'found' => true, + 'text' => $result, + ); + } else { + if ( isset( $result[0] ) && !isset( $result['text'] ) ) { + $result['text'] = $result[0]; + } + unset( $result[0] ); + $result += array( + 'found' => true, + ); + } + + $noparse = true; + $preprocessFlags = 0; + if ( isset( $result['noparse'] ) ) { + $noparse = $result['noparse']; + } + if ( isset( $result['preprocessFlags'] ) ) { + $preprocessFlags = $result['preprocessFlags']; + } + + if ( !$noparse ) { + $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags ); + $result['isChildObj'] = true; + } + wfProfileOut( __METHOD__ . '-pfunc-' . $function ); + wfProfileOut( __METHOD__ ); + + return $result; + } + + /** * Get the semi-parsed DOM representation of a template with a given title, * and its redirect destination title. Cached. * @@ -3593,7 +3690,13 @@ class Parser { } if ( $rev ) { - $text = $rev->getText(); + $content = $rev->getContent(); + $text = $content ? $content->getWikitextForTransclusion() : null; + + if ( $text === false || $text === null ) { + $text = false; + break; + } } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) { global $wgContLang; $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage(); @@ -3601,16 +3704,17 @@ class Parser { $text = false; break; } + $content = $message->content(); $text = $message->plain(); } else { break; } - if ( $text === false ) { + if ( !$content ) { break; } # Redirect? $finalTitle = $title; - $title = Title::newFromRedirect( $text ); + $title = $content->getRedirectTarget(); } return array( 'text' => $text, @@ -3622,7 +3726,7 @@ class Parser { * Fetch a file and its title and register a reference to it. * If 'broken' is a key in $options then the file will appear as a broken thumbnail. * @param Title $title - * @param Array $options Array of options to RepoGroup::findFile + * @param array $options Array of options to RepoGroup::findFile * @return File|bool */ function fetchFile( $title, $options = array() ) { @@ -3634,7 +3738,7 @@ class Parser { * Fetch a file and its title and register a reference to it. * If 'broken' is a key in $options then the file will appear as a broken thumbnail. * @param Title $title - * @param Array $options Array of options to RepoGroup::findFile + * @param array $options Array of options to RepoGroup::findFile * @return Array ( File or false, Title of file ) */ function fetchFileAndTitle( $title, $options = array() ) { @@ -3674,7 +3778,7 @@ class Parser { global $wgEnableScaryTranscluding; if ( !$wgEnableScaryTranscluding ) { - return wfMessage('scarytranscludedisabled')->inContentLanguage()->text(); + return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text(); } $url = $title->getFullUrl( "action=$action" ); @@ -3693,19 +3797,24 @@ class Parser { global $wgTranscludeCacheExpiry; $dbr = wfGetDB( DB_SLAVE ); $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry ); - $obj = $dbr->selectRow( 'transcache', array('tc_time', 'tc_contents' ), + $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ), array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) ); if ( $obj ) { return $obj->tc_contents; } - $text = Http::get( $url ); - if ( !$text ) { + $req = MWHttpRequest::factory( $url ); + $status = $req->execute(); // Status object + if ( $status->isOK() ) { + $text = $req->getContent(); + } elseif ( $req->getStatus() != 200 ) { // Though we failed to fetch the content, this status is useless. + return wfMessage( 'scarytranscludefailed-httpstatus', $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text(); + } else { return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text(); } $dbw = wfGetDB( DB_MASTER ); - $dbw->replace( 'transcache', array('tc_url'), array( + $dbw->replace( 'transcache', array( 'tc_url' ), array( 'tc_url' => $url, 'tc_time' => $dbw->timestamp( time() ), 'tc_contents' => $text) @@ -3731,7 +3840,7 @@ class Parser { $argName = trim( $nameWithSpaces ); $object = false; $text = $frame->getArgument( $argName ); - if ( $text === false && $parts->getLength() > 0 + if ( $text === false && $parts->getLength() > 0 && ( $this->ot['html'] || $this->ot['pre'] @@ -3767,7 +3876,7 @@ class Parser { * Return the text to be used for a given extension tag. * This is the ghost of strip(). * - * @param $params array Associative array of parameters: + * @param array $params Associative array of parameters: * name PPNode for the tag name * attr PPNode for unparsed text where tag attributes are thought to be * attributes Optional associative array of parsed attributes @@ -3775,6 +3884,7 @@ class Parser { * noClose Original text did not have a close tag * @param $frame PPFrame * + * @throws MWException * @return string */ function extensionSubstitution( $params, $frame ) { @@ -3783,7 +3893,7 @@ class Parser { $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); $marker = "{$this->mUniqPrefix}-$name-" . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX; - $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower($name)] ) && + $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) && ( $this->ot['html'] || $this->ot['pre'] ); if ( $isFunctionTag ) { $markerType = 'none'; @@ -3805,7 +3915,7 @@ class Parser { $output = call_user_func_array( $this->mTagHooks[$name], array( $content, $attributes, $this, $frame ) ); } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) { - list( $callback, $flags ) = $this->mFunctionTagHooks[$name]; + list( $callback, ) = $this->mFunctionTagHooks[$name]; if ( !is_callable( $callback ) ) { throw new MWException( "Tag hook for $name is not callable\n" ); } @@ -3848,7 +3958,7 @@ class Parser { } elseif ( $markerType === 'general' ) { $this->mStripState->addGeneral( $marker, $output ); } else { - throw new MWException( __METHOD__.': invalid marker type' ); + throw new MWException( __METHOD__ . ': invalid marker type' ); } return $marker; } @@ -3856,7 +3966,7 @@ class Parser { /** * Increment an include size counter * - * @param $type String: the type of expansion + * @param string $type the type of expansion * @param $size Integer: the size of the text * @return Boolean: false if this inclusion would take it over the maximum, true otherwise */ @@ -3942,12 +4052,12 @@ class Parser { * Add a tracking category, getting the title from a system message, * or print a debug message if the title is invalid. * - * @param $msg String: message key + * @param string $msg message key * @return Boolean: whether the addition was successful */ public function addTrackingCategory( $msg ) { if ( $this->mTitle->getNamespace() === NS_SPECIAL ) { - wfDebug( __METHOD__.": Not adding tracking category $msg to special page!\n" ); + wfDebug( __METHOD__ . ": Not adding tracking category $msg to special page!\n" ); return false; } // Important to parse with correct title (bug 31469) @@ -3966,7 +4076,7 @@ class Parser { $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() ); return true; } else { - wfDebug( __METHOD__.": [[MediaWiki:$msg]] is not a valid title!\n" ); + wfDebug( __METHOD__ . ": [[MediaWiki:$msg]] is not a valid title!\n" ); return false; } } @@ -3982,7 +4092,7 @@ class Parser { * string and re-inserts the newly formatted headlines. * * @param $text String - * @param $origText String: original, untouched wikitext + * @param string $origText original, untouched wikitext * @param $isMain Boolean * @return mixed|string * @private @@ -4062,7 +4172,7 @@ class Parser { $sectionIndex = false; $numbering = ''; $markerMatches = array(); - if ( preg_match("/^$markerRegex/", $headline, $markerMatches ) ) { + if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) { $serial = $markerMatches[1]; list( $titleText, $sectionIndex ) = $this->mHeadings[$serial]; $isTemplate = ( $titleText != $baseTitleText ); @@ -4078,7 +4188,7 @@ class Parser { # Increase TOC level $toclevel++; $sublevelCount[$toclevel] = 0; - if ( $toclevel<$wgMaxTocLevel ) { + if ( $toclevel < $wgMaxTocLevel ) { $prevtoclevel = $toclevel; $toc .= Linker::tocIndent(); $numVisible++; @@ -4100,7 +4210,7 @@ class Parser { if ( $i == 0 ) { $toclevel = 1; } - if ( $toclevel<$wgMaxTocLevel ) { + if ( $toclevel < $wgMaxTocLevel ) { if ( $prevtoclevel < $wgMaxTocLevel ) { # Unindent only if the previous toc level was shown :p $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel ); @@ -4111,7 +4221,7 @@ class Parser { } } else { # No change in level, end TOC line - if ( $toclevel<$wgMaxTocLevel ) { + if ( $toclevel < $wgMaxTocLevel ) { $toc .= Linker::tocLineEnd(); } } @@ -4119,7 +4229,7 @@ class Parser { $levelCount[$toclevel] = $level; # count number of headlines for each level - @$sublevelCount[$toclevel]++; + $sublevelCount[$toclevel]++; $dot = 0; for( $i = 1; $i <= $toclevel; $i++ ) { if ( !empty( $sublevelCount[$i] ) ) { @@ -4144,11 +4254,17 @@ class Parser { $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline ); # Strip out HTML (first regex removes any tag not allowed) - # Allowed tags are <sup> and <sub> (bug 8393), <i> (bug 26375) and <b> (r105284) - # We strip any parameter from accepted tags (second regex) + # Allowed tags are: + # * <sup> and <sub> (bug 8393) + # * <i> (bug 26375) + # * <b> (r105284) + # * <span dir="rtl"> and <span dir="ltr"> (bug 35167) + # + # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>, + # to allow setting directionality in toc items. $tocline = preg_replace( - array( '#<(?!/?(sup|sub|i|b)(?: [^>]*)?>).*?'.'>#', '#<(/?(sup|sub|i|b))(?: .*?)?'.'>#' ), - array( '', '<$1>' ), + array( '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?'.'>#', '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?'.'>#' ), + array( '', '<$1>' ), $safeHeadline ); $tocline = trim( $tocline ); @@ -4269,9 +4385,9 @@ class Parser { // We use a page and section attribute to stop the language converter from converting these important bits // of data, but put the headline hint inside a content block because the language converter is supposed to // be able to convert that piece of data. - $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]); - $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"'; - if ( isset($editlinkArgs[2]) ) { + $editlink = '<mw:editsection page="' . htmlspecialchars( $editlinkArgs[0] ); + $editlink .= '" section="' . htmlspecialchars( $editlinkArgs[1] ) . '"'; + if ( isset( $editlinkArgs[2] ) ) { $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>'; } else { $editlink .= '/>'; @@ -4353,7 +4469,7 @@ class Parser { * Transform wiki markup when saving a page by doing "\r\n" -> "\n" * conversion, substitting signatures, {{subst:}} templates, etc. * - * @param $text String: the text to transform + * @param string $text the text to transform * @param $title Title: the Title object for the current article * @param $user User: the User object describing the current user * @param $options ParserOptions: parsing options @@ -4436,14 +4552,14 @@ class Parser { '~~~' => $sigText ) ); - # Context links: [[|name]] and [[name (context)|]] + # Context links ("pipe tricks"): [[|name]] and [[name (context)|]] $tc = '[' . Title::legalChars() . ']'; $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii! - $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; # [[ns:page (context)|]] - $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; # [[ns:page(context)|]] - $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/"; # [[ns:page (context), context|]] - $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] + $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; # [[ns:page (context)|]] + $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; # [[ns:page(context)|]] (double-width brackets, added in r40257) + $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/"; # [[ns:page (context), context|]] (using either single or double-width comma) + $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] (reverse pipe trick: add context from page title) # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); @@ -4476,7 +4592,7 @@ class Parser { * as it may have changed if it's the $wgParser. * * @param $user User - * @param $nickname String|bool nickname to use or false to use user's default nickname + * @param string|bool $nickname nickname to use or false to use user's default nickname * @param $fancySig Boolean|null whether the nicknname is the complete signature * or null to use default value * @return string @@ -4507,7 +4623,7 @@ class Parser { } else { # Failed to validate; fall back to the default $nickname = $username; - wfDebug( __METHOD__.": $username has bad XML tags in signature.\n" ); + wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" ); } } @@ -4539,7 +4655,7 @@ class Parser { * 2) Substitute all transclusions * * @param $text String - * @param $parsing bool Whether we're cleaning (preferences save) or parsing + * @param bool $parsing Whether we're cleaning (preferences save) or parsing * @return String: signature text */ public function cleanSig( $text, $parsing = false ) { @@ -4614,7 +4730,7 @@ class Parser { /** * Wrapper for preprocess() * - * @param $text String: the text to preprocess + * @param string $text the text to preprocess * @param $options ParserOptions: options * @param $title Title object or null to use $wgTitle * @return String @@ -4633,11 +4749,7 @@ class Parser { global $wgTitle; $title = $wgTitle; } - if ( !$title ) { - # It's not uncommon having a null $wgTitle in scripts. See r80898 - # Create a ghost title in such case - $title = Title::newFromText( 'Dwimmerlaik' ); - } + $text = $this->preprocess( $text, $title, $options ); $executing = false; @@ -4666,6 +4778,7 @@ class Parser { * * @param $tag Mixed: the tag to use, e.g. 'hook' for "<hook>" * @param $callback Mixed: the callback function (and object) to use for the tag + * @throws MWException * @return Mixed|null The old value of the mTagHooks array associated with the hook */ public function setHook( $tag, $callback ) { @@ -4696,6 +4809,7 @@ class Parser { * * @param $tag Mixed: the tag to use, e.g. 'hook' for "<hook>" * @param $callback Mixed: the callback function (and object) to use for the tag + * @throws MWException * @return Mixed|null The old value of the mTagHooks array associated with the hook */ function setTransparentTagHook( $tag, $callback ) { @@ -4734,7 +4848,7 @@ class Parser { * nowiki Wiki markup in the return value should be escaped * isHTML The returned text is HTML, armour it against wikitext transformation * - * @param $id String: The magic word ID + * @param string $id The magic word ID * @param $callback Mixed: the callback function (and object) to use * @param $flags Integer: a combination of the following flags: * SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}} @@ -4758,6 +4872,7 @@ class Parser { * Please read the documentation in includes/parser/Preprocessor.php for more information * about the methods available in PPFrame and PPNode. * + * @throws MWException * @return string|callback The old callback function for this name, if any */ public function setFunctionHook( $id, $callback, $flags = 0 ) { @@ -4769,7 +4884,7 @@ class Parser { # Add to function cache $mw = MagicWord::get( $id ); if ( !$mw ) - throw new MWException( __METHOD__.'() expecting a magic word identifier.' ); + throw new MWException( __METHOD__ . '() expecting a magic word identifier.' ); $synonyms = $mw->getSynonyms(); $sensitive = intval( $mw->isCaseSensitive() ); @@ -4805,6 +4920,10 @@ class Parser { * Create a tag function, e.g. "<test>some stuff</test>". * Unlike tag hooks, tag functions are parsed at preprocessor level. * Unlike parser functions, their content is not preprocessed. + * @param $tag + * @param $callback + * @param $flags + * @throws MWException * @return null */ function setFunctionTagHook( $tag, $callback, $flags ) { @@ -4920,7 +5039,7 @@ class Parser { // is defined for images in galleries $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); - $parameterMatches = StringUtils::explode('|', $matches[3]); + $parameterMatches = StringUtils::explode( '|', $matches[3] ); $magicWordAlt = MagicWord::get( 'img_alt' ); $magicWordLink = MagicWord::get( 'img_link' ); @@ -4928,14 +5047,18 @@ class Parser { if ( $match = $magicWordAlt->matchVariableStartToEnd( $parameterMatch ) ) { $alt = $this->stripAltText( $match, false ); } - elseif( $match = $magicWordLink->matchVariableStartToEnd( $parameterMatch ) ){ - $link = strip_tags($this->replaceLinkHoldersText($match)); + elseif( $match = $magicWordLink->matchVariableStartToEnd( $parameterMatch ) ) { + $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); $chars = self::EXT_LINK_URL_CLASS; $prots = $this->mUrlProtocols; //check to see if link matches an absolute url, if not then it must be a wiki link. - if(!preg_match( "/^($prots)$chars+$/u", $link)){ - $localLinkTitle = Title::newFromText($link); - $link = $localLinkTitle->getLocalURL(); + if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) { + $link = $linkValue; + } else { + $localLinkTitle = Title::newFromText( $linkValue ); + if ( $localLinkTitle !== null ) { + $link = $localLinkTitle->getLocalURL(); + } } } else { @@ -4947,7 +5070,7 @@ class Parser { $label = substr( $label, 1 ); } - $ig->add( $title, $label, $alt ,$link); + $ig->add( $title, $label, $alt, $link ); } return $ig->toHTML(); } @@ -4962,7 +5085,7 @@ class Parser { } else { $handlerClass = ''; } - if ( !isset( $this->mImageParams[$handlerClass] ) ) { + if ( !isset( $this->mImageParams[$handlerClass] ) ) { # Initialise static lists static $internalParamNames = array( 'horizAlign' => array( 'left', 'right', 'center', 'none' ), @@ -5180,7 +5303,7 @@ class Parser { } else { # Inline image if ( !isset( $params['frame']['alt'] ) ) { # No alt text, use the "caption" for the alt text - if ( $caption !== '') { + if ( $caption !== '' ) { $params['frame']['alt'] = $this->stripAltText( $caption, $holders ); } else { # No caption, fall back to using the filename for the @@ -5303,8 +5426,8 @@ class Parser { * * External callers should use the getSection and replaceSection methods. * - * @param $text String: Page wikitext - * @param $section String: a section identifier string of the form: + * @param string $text Page wikitext + * @param string $section a section identifier string of the form: * "<flag1> - <flag2> - ... - <section number>" * * Currently the only recognised flag is "T", which means the target section number @@ -5321,12 +5444,12 @@ class Parser { * string. If $text is the empty string and section 0 is replaced, $newText is * returned. * - * @param $mode String: one of "get" or "replace" - * @param $newText String: replacement text for section data. + * @param string $mode one of "get" or "replace" + * @param string $newText replacement text for section data. * @return String: for "get", the extracted section text. * for "replace", the whole page with the section replaced. */ - private function extractSections( $text, $section, $mode, $newText='' ) { + private function extractSections( $text, $section, $mode, $newText = '' ) { global $wgTitle; # not generally used but removes an ugly failure mode $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true ); $outText = ''; @@ -5442,12 +5565,12 @@ class Parser { * * If a section contains subsections, these are also returned. * - * @param $text String: text to look in - * @param $section String: section identifier - * @param $deftext String: default to return if section is not found + * @param string $text text to look in + * @param string $section section identifier + * @param string $deftext default to return if section is not found * @return string text of the requested section */ - public function getSection( $text, $section, $deftext='' ) { + public function getSection( $text, $section, $deftext = '' ) { return $this->extractSections( $text, $section, "get", $deftext ); } @@ -5456,9 +5579,9 @@ class Parser { * specified by $section has been replaced with $text. If the target * section does not exist, $oldtext is returned unchanged. * - * @param $oldtext String: former text of the article - * @param $section int section identifier - * @param $text String: replacing text + * @param string $oldtext former text of the article + * @param int $section section identifier + * @param string $text replacing text * @return String: modified text */ public function replaceSection( $oldtext, $section, $text ) { @@ -5540,7 +5663,7 @@ class Parser { /** * Mutator for $mDefaultSort * - * @param $sort string New value + * @param string $sort New value */ public function setDefaultSort( $sort ) { $this->mDefaultSort = $sort; @@ -5596,7 +5719,7 @@ class Parser { * instead. For use in redirects, since IE6 interprets Redirect: headers * as something other than UTF-8 (apparently?), resulting in breakage. * - * @param $text String: The section name + * @param string $text The section name * @return string An anchor */ public function guessLegacySectionNameFromWikiText( $text ) { @@ -5616,7 +5739,7 @@ class Parser { * to create valid section anchors by mimicing the output of the * parser when headings are parsed. * - * @param $text String: text string to be stripped of wikitext + * @param string $text text string to be stripped of wikitext * for use in a Section anchor * @return string Filtered text string */ @@ -5767,12 +5890,13 @@ class Parser { * If the $data array has been stored persistently, the caller should first * check whether it is still valid, by calling isValidHalfParsedText(). * - * @param $data array Serialized data + * @param array $data Serialized data + * @throws MWException * @return String */ function unserializeHalfParsedText( $data ) { if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) { - throw new MWException( __METHOD__.': invalid version' ); + throw new MWException( __METHOD__ . ': invalid version' ); } # First, extract the strip state. |