diff options
Diffstat (limited to 'includes/parser/Parser.php')
-rw-r--r-- | includes/parser/Parser.php | 1471 |
1 files changed, 686 insertions, 785 deletions
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 3ff56a2b..7fcfb90a 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -92,17 +92,18 @@ class Parser # Persistent: var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex, $mPreprocessor, - $mExtLinkBracketedRegex, $mDefaultStripList, $mVarCache, $mConf; + $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList, $mVarCache, $mConf; # Cleared with clearState(): var $mOutput, $mAutonumber, $mDTopen, $mStripState; var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; - var $mInterwikiLinkHolders, $mLinkHolders; + var $mLinkHolders, $mLinkID; var $mIncludeSizes, $mPPNodeCount, $mDefaultSort; var $mTplExpandCache; // empty-frame expansion cache var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; var $mExpensiveFunctionCount; // number of expensive parser function calls + var $mFileCache; # Temporary # These are variables reset at least once per parse regardless of $clearState @@ -128,6 +129,7 @@ class Parser $this->mFunctionHooks = array(); $this->mFunctionSynonyms = array( 0 => array(), 1 => array() ); $this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery' ); + $this->mUrlProtocols = wfUrlProtocols(); $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S'; $this->mVarCache = array(); @@ -147,6 +149,18 @@ class Parser } /** + * Reduce memory usage to reduce the impact of circular references + */ + function __destruct() { + if ( isset( $this->mLinkHolders ) ) { + $this->mLinkHolders->__destruct(); + } + foreach ( $this as $name => $value ) { + unset( $this->$name ); + } + } + + /** * Do various kinds of initialisation on the first call of the parser */ function firstCallInit() { @@ -183,17 +197,8 @@ class Parser $this->mStripState = new StripState; $this->mArgStack = false; $this->mInPre = false; - $this->mInterwikiLinkHolders = array( - 'texts' => array(), - 'titles' => array() - ); - $this->mLinkHolders = array( - 'namespaces' => array(), - 'dbkeys' => array(), - 'queries' => array(), - 'texts' => array(), - 'titles' => array() - ); + $this->mLinkHolders = new LinkHolderArray( $this ); + $this->mLinkID = 0; $this->mRevisionTimestamp = $this->mRevisionId = null; /** @@ -208,7 +213,7 @@ class Parser */ #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); # Changed to \x7f to allow XML double-parsing -- TS - $this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString(); + $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); # Clear these on every parse, bug 4549 @@ -225,6 +230,7 @@ class Parser $this->mHeadings = array(); $this->mDoubleUnderscores = array(); $this->mExpensiveFunctionCount = 0; + $this->mFileCache = array(); # Fix cloning if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) { @@ -283,22 +289,22 @@ class Parser * Convert wikitext to HTML * Do not call this function recursively. * - * @param string $text Text we want to parse - * @param Title &$title A title object - * @param array $options - * @param boolean $linestart - * @param boolean $clearState - * @param int $revid number to pass in {{REVISIONID}} + * @param $text String: text we want to parse + * @param $title A title object + * @param $options ParserOptions + * @param $linestart boolean + * @param $clearState boolean + * @param $revid Int: number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ - public function parse( $text, &$title, $options, $linestart = true, $clearState = true, $revid = null ) { + public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang; - $fname = 'Parser::parse-' . wfGetCaller(); + $fname = __METHOD__.'-' . wfGetCaller(); wfProfileIn( __METHOD__ ); wfProfileIn( $fname ); @@ -332,7 +338,6 @@ class Parser ); $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); - # only once and last $text = $this->doBlockLevels( $text, $linestart ); $this->replaceLinkHolders( $text ); @@ -352,7 +357,7 @@ class Parser $uniq_prefix = $this->mUniqPrefix; $matches = array(); $elements = array_keys( $this->mTransparentTagHooks ); - $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); + $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); foreach( $matches as $marker => $data ) { list( $element, $content, $params, $tag ) = $data; @@ -370,7 +375,7 @@ class Parser $text = Sanitizer::normalizeCharReferences( $text ); if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { - $text = Parser::tidy($text); + $text = self::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) @@ -475,6 +480,8 @@ class Parser function &getTitle() { return $this->mTitle; } function getOptions() { return $this->mOptions; } function getRevisionId() { return $this->mRevisionId; } + function getOutput() { return $this->mOutput; } + function nextLinkID() { return $this->mLinkID++; } function getFunctionLang() { global $wgLang, $wgContLang; @@ -553,7 +560,7 @@ class Parser $text = $inside; $tail = null; } else { - if( $element == '!--' ) { + if( $element === '!--' ) { $end = '/(-->)/'; } else { $end = "/(<\\/$element\\s*>)/i"; @@ -658,18 +665,27 @@ class Parser */ function tidy( $text ) { global $wgTidyInternal; + $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'. ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'. '<head><title>test</title></head><body>'.$text.'</body></html>'; + + # Tidy is known to clobber tabs; convert 'em to entities + $wrappedtext = str_replace("\t", '	', $wrappedtext); + if( $wgTidyInternal ) { - $correctedtext = Parser::internalTidy( $wrappedtext ); + $correctedtext = self::internalTidy( $wrappedtext ); } else { - $correctedtext = Parser::externalTidy( $wrappedtext ); + $correctedtext = self::externalTidy( $wrappedtext ); } if( is_null( $correctedtext ) ) { wfDebug( "Tidy error detected!\n" ); return $text . "\n<!-- Tidy found serious XHTML errors -->\n"; } + + # Convert the tabs back from entities + $correctedtext = str_replace('	', "\t", $correctedtext); + return $correctedtext; } @@ -681,8 +697,7 @@ class Parser */ function externalTidy( $text ) { global $wgTidyConf, $wgTidyBin, $wgTidyOpts; - $fname = 'Parser::externalTidy'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $cleansource = ''; $opts = ' -utf8'; @@ -693,23 +708,25 @@ class Parser 2 => array('file', wfGetNull(), 'a') ); $pipes = array(); - $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); - if (is_resource($process)) { - // Theoretically, this style of communication could cause a deadlock - // here. If the stdout buffer fills up, then writes to stdin could - // block. This doesn't appear to happen with tidy, because tidy only - // writes to stdout after it's finished reading from stdin. Search - // for tidyParseStdin and tidySaveStdout in console/tidy.c - fwrite($pipes[0], $text); - fclose($pipes[0]); - while (!feof($pipes[1])) { - $cleansource .= fgets($pipes[1], 1024); + if( function_exists('proc_open') ) { + $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); + if (is_resource($process)) { + // Theoretically, this style of communication could cause a deadlock + // here. If the stdout buffer fills up, then writes to stdin could + // block. This doesn't appear to happen with tidy, because tidy only + // writes to stdout after it's finished reading from stdin. Search + // for tidyParseStdin and tidySaveStdout in console/tidy.c + fwrite($pipes[0], $text); + fclose($pipes[0]); + while (!feof($pipes[1])) { + $cleansource .= fgets($pipes[1], 1024); + } + fclose($pipes[1]); + proc_close($process); } - fclose($pipes[1]); - proc_close($process); } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); if( $cleansource == '' && $text != '') { // Some kind of error happened, so we couldn't get the corrected text. @@ -731,8 +748,7 @@ class Parser */ function internalTidy( $text ) { global $wgTidyConf, $IP, $wgDebugTidy; - $fname = 'Parser::internalTidy'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $tidy = new tidy; $tidy->parseString( $text, $wgTidyConf, 'utf8' ); @@ -750,7 +766,7 @@ class Parser "\n-->"; } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $cleansource; } @@ -760,34 +776,35 @@ class Parser * @private */ function doTableStuff ( $text ) { - $fname = 'Parser::doTableStuff'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); - $lines = explode ( "\n" , $text ); + $lines = StringUtils::explode( "\n", $text ); + $out = ''; $td_history = array (); // Is currently a td tag open? $last_tag_history = array (); // Save history of last lag activated (td, th or caption) $tr_history = array (); // Is currently a tr tag open? $tr_attributes = array (); // history of tr attributes $has_opened_tr = array(); // Did this table open a <tr> element? $indent_level = 0; // indent level of the table - foreach ( $lines as $key => $line ) - { - $line = trim ( $line ); + + foreach ( $lines as $outLine ) { + $line = trim( $outLine ); if( $line == '' ) { // empty line, go to next line + $out .= $outLine."\n"; continue; } - $first_character = $line{0}; + $first_character = $line[0]; $matches = array(); - if ( preg_match( '/^(:*)\{\|(.*)$/' , $line , $matches ) ) { + if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) { // First check if we are starting a new table $indent_level = strlen( $matches[1] ); $attributes = $this->mStripState->unstripBoth( $matches[2] ); $attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' ); - $lines[$key] = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>"; + $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>"; array_push ( $td_history , false ); array_push ( $last_tag_history , '' ); array_push ( $tr_history , false ); @@ -795,8 +812,9 @@ class Parser array_push ( $has_opened_tr , false ); } else if ( count ( $td_history ) == 0 ) { // Don't do any of the following + $out .= $outLine."\n"; continue; - } else if ( substr ( $line , 0 , 2 ) == '|}' ) { + } else if ( substr ( $line , 0 , 2 ) === '|}' ) { // We are ending a table $line = '</table>' . substr ( $line , 2 ); $last_tag = array_pop ( $last_tag_history ); @@ -813,8 +831,8 @@ class Parser $line = "</{$last_tag}>{$line}"; } array_pop ( $tr_attributes ); - $lines[$key] = $line . str_repeat( '</dd></dl>' , $indent_level ); - } else if ( substr ( $line , 0 , 2 ) == '|-' ) { + $outLine = $line . str_repeat( '</dd></dl>' , $indent_level ); + } else if ( substr ( $line , 0 , 2 ) === '|-' ) { // Now we have a table row $line = preg_replace( '#^\|-+#', '', $line ); @@ -837,21 +855,21 @@ class Parser $line = "</{$last_tag}>{$line}"; } - $lines[$key] = $line; + $outLine = $line; array_push ( $tr_history , false ); array_push ( $td_history , false ); array_push ( $last_tag_history , '' ); } - else if ( $first_character == '|' || $first_character == '!' || substr ( $line , 0 , 2 ) == '|+' ) { + else if ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 ) === '|+' ) { // This might be cell elements, td, th or captions - if ( substr ( $line , 0 , 2 ) == '|+' ) { + if ( substr ( $line , 0 , 2 ) === '|+' ) { $first_character = '+'; $line = substr ( $line , 1 ); } $line = substr ( $line , 1 ); - if ( $first_character == '!' ) { + if ( $first_character === '!' ) { $line = str_replace ( '!!' , '||' , $line ); } @@ -861,13 +879,13 @@ class Parser // attribute values containing literal "||". $cells = StringUtils::explodeMarkup( '||' , $line ); - $lines[$key] = ''; + $outLine = ''; // Loop through each table cell foreach ( $cells as $cell ) { $previous = ''; - if ( $first_character != '+' ) + if ( $first_character !== '+' ) { $tr_after = array_pop ( $tr_attributes ); if ( !array_pop ( $tr_history ) ) { @@ -885,11 +903,11 @@ class Parser $previous = "</{$last_tag}>{$previous}"; } - if ( $first_character == '|' ) { + if ( $first_character === '|' ) { $last_tag = 'td'; - } else if ( $first_character == '!' ) { + } else if ( $first_character === '!' ) { $last_tag = 'th'; - } else if ( $first_character == '+' ) { + } else if ( $first_character === '+' ) { $last_tag = 'caption'; } else { $last_tag = ''; @@ -912,38 +930,42 @@ class Parser $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; } - $lines[$key] .= $cell; + $outLine .= $cell; array_push ( $td_history , true ); } } + $out .= $outLine . "\n"; } // Closing open td, tr && table while ( count ( $td_history ) > 0 ) { if ( array_pop ( $td_history ) ) { - $lines[] = '</td>' ; + $out .= "</td>\n"; } if ( array_pop ( $tr_history ) ) { - $lines[] = '</tr>' ; + $out .= "</tr>\n"; } if ( !array_pop ( $has_opened_tr ) ) { - $lines[] = "<tr><td></td></tr>" ; + $out .= "<tr><td></td></tr>\n" ; } - $lines[] = '</table>' ; + $out .= "</table>\n"; } - $output = implode ( "\n" , $lines ) ; + // Remove trailing line-ending (b/c) + if ( substr( $out, -1 ) === "\n" ) { + $out = substr( $out, 0, -1 ); + } // special case: don't return empty table - if( $output == "<table>\n<tr><td></td></tr>\n</table>" ) { - $output = ''; + if( $out === "<table>\n<tr><td></td></tr>\n</table>" ) { + $out = ''; } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); - return $output; + return $out; } /** @@ -954,12 +976,11 @@ class Parser */ function internalParse( $text ) { $isMain = true; - $fname = 'Parser::internalParse'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); # Hook to suspend the parser in this state if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $text ; } @@ -992,84 +1013,147 @@ class Parser $text = $this->doMagicLinks( $text ); $text = $this->formatHeadings( $text, $isMain ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $text; } /** * Replace special strings like "ISBN xxx" and "RFC xxx" with * magic external links. - * + * + * DML * @private */ function doMagicLinks( $text ) { wfProfileIn( __METHOD__ ); + $prots = $this->mUrlProtocols; + $urlChar = self::EXT_LINK_URL_CLASS; $text = preg_replace_callback( '!(?: # Start cases - <a.*?</a> | # Skip link text - <.*?> | # Skip stuff inside HTML elements - (?:RFC|PMID)\s+([0-9]+) | # RFC or PMID, capture number as m[1] - ISBN\s+(\b # ISBN, capture number as m[2] - (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix - (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters - [0-9Xx] # check digit - \b) + (<a.*?</a>) | # m[1]: Skip link text + (<.*?>) | # m[2]: Skip stuff inside HTML elements' . " + (\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . ' + (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number + ISBN\s+(\b # m[5]: ISBN, capture number + (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix + (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters + [0-9Xx] # check digit + \b) )!x', array( &$this, 'magicLinkCallback' ), $text ); wfProfileOut( __METHOD__ ); return $text; } function magicLinkCallback( $m ) { - if ( substr( $m[0], 0, 1 ) == '<' ) { + if ( isset( $m[1] ) && strval( $m[1] ) !== '' ) { + # Skip anchor + return $m[0]; + } elseif ( isset( $m[2] ) && strval( $m[2] ) !== '' ) { # Skip HTML element return $m[0]; - } elseif ( substr( $m[0], 0, 4 ) == 'ISBN' ) { - $isbn = $m[2]; - $num = strtr( $isbn, array( - '-' => '', - ' ' => '', - 'x' => 'X', - )); - $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); - $text = '<a href="' . - $titleObj->escapeLocalUrl() . - "\" class=\"internal\">ISBN $isbn</a>"; - } else { - if ( substr( $m[0], 0, 3 ) == 'RFC' ) { + } elseif ( isset( $m[3] ) && strval( $m[3] ) !== '' ) { + # Free external link + return $this->makeFreeExternalLink( $m[0] ); + } elseif ( isset( $m[4] ) && strval( $m[4] ) !== '' ) { + # RFC or PMID + if ( substr( $m[0], 0, 3 ) === 'RFC' ) { $keyword = 'RFC'; $urlmsg = 'rfcurl'; - $id = $m[1]; - } elseif ( substr( $m[0], 0, 4 ) == 'PMID' ) { + $id = $m[4]; + } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) { $keyword = 'PMID'; $urlmsg = 'pubmedurl'; - $id = $m[1]; + $id = $m[4]; } else { throw new MWException( __METHOD__.': unrecognised match type "' . substr($m[0], 0, 20 ) . '"' ); } - $url = wfMsg( $urlmsg, $id); $sk = $this->mOptions->getSkin(); $la = $sk->getExternalLinkAttributes( $url, $keyword.$id ); - $text = "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>"; + return "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>"; + } elseif ( isset( $m[5] ) && strval( $m[5] ) !== '' ) { + # ISBN + $isbn = $m[5]; + $num = strtr( $isbn, array( + '-' => '', + ' ' => '', + 'x' => 'X', + )); + $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); + return'<a href="' . + $titleObj->escapeLocalUrl() . + "\" class=\"internal\">ISBN $isbn</a>"; + } else { + return $m[0]; } - return $text; } /** + * Make a free external link, given a user-supplied URL + * @return HTML + * @private + */ + function makeFreeExternalLink( $url ) { + global $wgContLang; + wfProfileIn( __METHOD__ ); + + $sk = $this->mOptions->getSkin(); + $trail = ''; + + # The characters '<' and '>' (which were escaped by + # removeHTMLtags()) should not be included in + # URLs, per RFC 2396. + $m2 = array(); + if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) { + $trail = substr($url, $m2[0][1]) . $trail; + $url = substr($url, 0, $m2[0][1]); + } + + # Move trailing punctuation to $trail + $sep = ',;\.:!?'; + # If there is no left bracket, then consider right brackets fair game too + if ( strpos( $url, '(' ) === false ) { + $sep .= ')'; + } + + $numSepChars = strspn( strrev( $url ), $sep ); + if ( $numSepChars ) { + $trail = substr( $url, -$numSepChars ) . $trail; + $url = substr( $url, 0, -$numSepChars ); + } + + $url = Sanitizer::cleanUrl( $url ); + + # Is this an external image? + $text = $this->maybeMakeExternalImage( $url ); + if ( $text === false ) { + # Not an image, make a link + $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', + $this->getExternalLinkAttribs() ); + # Register it in the output object... + # Replace unnecessary URL escape codes with their equivalent characters + $pasteurized = self::replaceUnusualEscapes( $url ); + $this->mOutput->addExternalLink( $pasteurized ); + } + wfProfileOut( __METHOD__ ); + return $text . $trail; + } + + + /** * Parse headers and return html * * @private */ function doHeadings( $text ) { - $fname = 'Parser::doHeadings'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); for ( $i = 6; $i >= 1; --$i ) { $h = str_repeat( '=', $i ); $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text ); } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $text; } @@ -1079,15 +1163,14 @@ class Parser * @return string the altered text */ function doAllQuotes( $text ) { - $fname = 'Parser::doAllQuotes'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $outtext = ''; - $lines = explode( "\n", $text ); + $lines = StringUtils::explode( "\n", $text ); foreach ( $lines as $line ) { - $outtext .= $this->doQuotes ( $line ) . "\n"; + $outtext .= $this->doQuotes( $line ) . "\n"; } $outtext = substr($outtext, 0,-1); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $outtext; } @@ -1149,9 +1232,9 @@ class Parser { $x1 = substr ($arr[$i-1], -1); $x2 = substr ($arr[$i-1], -2, 1); - if ($x1 == ' ') { + if ($x1 === ' ') { if ($firstspace == -1) $firstspace = $i; - } else if ($x2 == ' ') { + } else if ($x2 === ' ') { if ($firstsingleletterword == -1) $firstsingleletterword = $i; } else { if ($firstmultiletterword == -1) $firstmultiletterword = $i; @@ -1191,7 +1274,7 @@ class Parser { if (($i % 2) == 0) { - if ($state == 'both') + if ($state === 'both') $buffer .= $r; else $output .= $r; @@ -1200,41 +1283,41 @@ class Parser { if (strlen ($r) == 2) { - if ($state == 'i') + if ($state === 'i') { $output .= '</i>'; $state = ''; } - else if ($state == 'bi') + else if ($state === 'bi') { $output .= '</i>'; $state = 'b'; } - else if ($state == 'ib') + else if ($state === 'ib') { $output .= '</b></i><b>'; $state = 'b'; } - else if ($state == 'both') + else if ($state === 'both') { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; } else # $state can be 'b' or '' { $output .= '<i>'; $state .= 'i'; } } else if (strlen ($r) == 3) { - if ($state == 'b') + if ($state === 'b') { $output .= '</b>'; $state = ''; } - else if ($state == 'bi') + else if ($state === 'bi') { $output .= '</i></b><i>'; $state = 'i'; } - else if ($state == 'ib') + else if ($state === 'ib') { $output .= '</b>'; $state = 'i'; } - else if ($state == 'both') + else if ($state === 'both') { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; } else # $state can be 'i' or '' { $output .= '<b>'; $state .= 'b'; } } else if (strlen ($r) == 5) { - if ($state == 'b') + if ($state === 'b') { $output .= '</b><i>'; $state = 'i'; } - else if ($state == 'i') + else if ($state === 'i') { $output .= '</i><b>'; $state = 'b'; } - else if ($state == 'bi') + else if ($state === 'bi') { $output .= '</i></b>'; $state = ''; } - else if ($state == 'ib') + else if ($state === 'ib') { $output .= '</b></i>'; $state = ''; } - else if ($state == 'both') + else if ($state === 'both') { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; } else # ($state == '') { $buffer = ''; $state = 'both'; } @@ -1243,21 +1326,21 @@ class Parser $i++; } # Now close all remaining tags. Notice that the order is important. - if ($state == 'b' || $state == 'ib') + if ($state === 'b' || $state === 'ib') $output .= '</b>'; - if ($state == 'i' || $state == 'bi' || $state == 'ib') + if ($state === 'i' || $state === 'bi' || $state === 'ib') $output .= '</i>'; - if ($state == 'bi') + if ($state === 'bi') $output .= '</b>'; # There might be lonely ''''', so make sure we have a buffer - if ($state == 'both' && $buffer) + if ($state === 'both' && $buffer) $output .= '<b><i>'.$buffer.'</i></b>'; return $output; } } /** - * Replace external links + * Replace external links (REL) * * Note: this is all very hackish and the order of execution matters a lot. * Make sure to run maintenance/parserTests.php if you change this code. @@ -1266,14 +1349,12 @@ class Parser */ function replaceExternalLinks( $text ) { global $wgContLang; - $fname = 'Parser::replaceExternalLinks'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $sk = $this->mOptions->getSkin(); $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); - - $s = $this->replaceFreeExternalLinks( array_shift( $bits ) ); + $s = array_shift( $bits ); $i = 0; while ( $i<count( $bits ) ) { @@ -1301,13 +1382,14 @@ class Parser $dtrail = ''; # Set linktype for CSS - if URL==text, link is essentially free - $linktype = ($text == $url) ? 'free' : 'text'; + $linktype = ($text === $url) ? 'free' : 'text'; # No link text, e.g. [http://domain.tld/some.link] if ( $text == '' ) { # Autonumber if allowed. See bug #5918 if ( strpos( wfUrlProtocols(), substr($protocol, 0, strpos($protocol, ':')) ) !== false ) { - $text = '[' . ++$this->mAutonumber . ']'; + $langObj = $this->getFunctionLang(); + $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']'; $linktype = 'autonumber'; } else { # Otherwise just use the URL @@ -1324,108 +1406,44 @@ class Parser $url = Sanitizer::cleanUrl( $url ); - # Process the trail (i.e. everything after this link up until start of the next link), - # replacing any non-bracketed links - $trail = $this->replaceFreeExternalLinks( $trail ); + if ( $this->mOptions->mExternalLinkTarget ) { + $attribs = array( 'target' => $this->mOptions->mExternalLinkTarget ); + } else { + $attribs = array(); + } # Use the encoded URL # This means that users can paste URLs directly into the text # Funny characters like ö aren't valid in URLs anyway # This was changed in August 2004 - $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->mTitle->getNamespace() ) . $dtrail . $trail; + $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->getExternalLinkAttribs() ) + . $dtrail . $trail; # Register link in the output object. # Replace unnecessary URL escape codes with the referenced character # This prevents spammers from hiding links from the filters - $pasteurized = Parser::replaceUnusualEscapes( $url ); + $pasteurized = self::replaceUnusualEscapes( $url ); $this->mOutput->addExternalLink( $pasteurized ); } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $s; } - /** - * Replace anything that looks like a URL with a link - * @private - */ - function replaceFreeExternalLinks( $text ) { - global $wgContLang; - $fname = 'Parser::replaceFreeExternalLinks'; - wfProfileIn( $fname ); - - $bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); - $s = array_shift( $bits ); - $i = 0; - - $sk = $this->mOptions->getSkin(); - - while ( $i < count( $bits ) ){ - $protocol = $bits[$i++]; - $remainder = $bits[$i++]; - - $m = array(); - if ( preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) { - # Found some characters after the protocol that look promising - $url = $protocol . $m[1]; - $trail = $m[2]; - - # special case: handle urls as url args: - # http://www.example.com/foo?=http://www.example.com/bar - if(strlen($trail) == 0 && - isset($bits[$i]) && - preg_match('/^'. wfUrlProtocols() . '$/S', $bits[$i]) && - preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m )) - { - # add protocol, arg - $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link - $i += 2; - $trail = $m[2]; - } - - # The characters '<' and '>' (which were escaped by - # removeHTMLtags()) should not be included in - # URLs, per RFC 2396. - $m2 = array(); - if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) { - $trail = substr($url, $m2[0][1]) . $trail; - $url = substr($url, 0, $m2[0][1]); - } - - # Move trailing punctuation to $trail - $sep = ',;\.:!?'; - # If there is no left bracket, then consider right brackets fair game too - if ( strpos( $url, '(' ) === false ) { - $sep .= ')'; - } - - $numSepChars = strspn( strrev( $url ), $sep ); - if ( $numSepChars ) { - $trail = substr( $url, -$numSepChars ) . $trail; - $url = substr( $url, 0, -$numSepChars ); - } - - $url = Sanitizer::cleanUrl( $url ); - - # Is this an external image? - $text = $this->maybeMakeExternalImage( $url ); - if ( $text === false ) { - # Not an image, make a link - $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() ); - # Register it in the output object... - # Replace unnecessary URL escape codes with their equivalent characters - $pasteurized = Parser::replaceUnusualEscapes( $url ); - $this->mOutput->addExternalLink( $pasteurized ); - } - $s .= $text . $trail; - } else { - $s .= $protocol . $remainder; - } + function getExternalLinkAttribs() { + $attribs = array(); + global $wgNoFollowLinks, $wgNoFollowNsExceptions; + $ns = $this->mTitle->getNamespace(); + if( $wgNoFollowLinks && !in_array($ns, $wgNoFollowNsExceptions) ) { + $attribs['rel'] = 'nofollow'; } - wfProfileOut( $fname ); - return $s; + if ( $this->mOptions->getExternalLinkTarget() ) { + $attribs['target'] = $this->mOptions->getExternalLinkTarget(); + } + return $attribs; } + /** * Replace unusual URL escape codes with their equivalent characters * @param string @@ -1438,7 +1456,7 @@ class Parser */ static function replaceUnusualEscapes( $url ) { return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', - array( 'Parser', 'replaceUnusualEscapesCallback' ), $url ); + array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url ); } /** @@ -1462,7 +1480,7 @@ class Parser /** * make an image if it's allowed, either through the global - * option or through the exception + * option, through the exception, or through the on-wiki whitelist * @private */ function maybeMakeExternalImage( $url ) { @@ -1470,47 +1488,88 @@ class Parser $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); $imagesexception = !empty($imagesfrom); $text = false; + # $imagesfrom could be either a single string or an array of strings, parse out the latter + if( $imagesexception && is_array( $imagesfrom ) ) { + $imagematch = false; + foreach( $imagesfrom as $match ) { + if( strpos( $url, $match ) === 0 ) { + $imagematch = true; + break; + } + } + } elseif( $imagesexception ) { + $imagematch = (strpos( $url, $imagesfrom ) === 0); + } else { + $imagematch = false; + } if ( $this->mOptions->getAllowExternalImages() - || ( $imagesexception && strpos( $url, $imagesfrom ) === 0 ) ) { + || ( $imagesexception && $imagematch ) ) { if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) { # Image found $text = $sk->makeExternalImage( $url ); } } + if( !$text && $this->mOptions->getEnableImageWhitelist() + && preg_match( self::EXT_IMAGE_REGEX, $url ) ) { + $whitelist = explode( "\n", wfMsgForContent( 'external_image_whitelist' ) ); + foreach( $whitelist as $entry ) { + # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments + if( strpos( $entry, '#' ) === 0 || $entry === '' ) + continue; + if( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) { + # Image matches a whitelist entry + $text = $sk->makeExternalImage( $url ); + break; + } + } + } return $text; } /** * Process [[ ]] wikilinks + * @return processed text * * @private */ function replaceInternalLinks( $s ) { + $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) ); + return $s; + } + + /** + * Process [[ ]] wikilinks (RIL) + * @return LinkHolderArray + * + * @private + */ + function replaceInternalLinks2( &$s ) { global $wgContLang; - static $fname = 'Parser::replaceInternalLinks' ; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); - wfProfileIn( $fname.'-setup' ); - static $tc = FALSE; + wfProfileIn( __METHOD__.'-setup' ); + static $tc = FALSE, $e1, $e1_img; # the % is needed to support urlencoded titles as well - if ( !$tc ) { $tc = Title::legalChars() . '#%'; } + if ( !$tc ) { + $tc = Title::legalChars() . '#%'; + # Match a link having the form [[namespace:link|alternate]]trail + $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; + # Match cases where there is no "]]", which might still be images + $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; + } $sk = $this->mOptions->getSkin(); + $holders = new LinkHolderArray( $this ); #split the entire text string on occurences of [[ - $a = explode( '[[', ' ' . $s ); + $a = StringUtils::explode( '[[', ' ' . $s ); #get the first element (all text up to first [[), and remove the space we added - $s = array_shift( $a ); + $s = $a->current(); + $a->next(); + $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void" $s = substr( $s, 1 ); - # Match a link having the form [[namespace:link|alternate]]trail - static $e1 = FALSE; - if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; } - # Match cases where there is no "]]", which might still be images - static $e1_img = FALSE; - if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; } - $useLinkPrefixExtension = $wgContLang->linkPrefixExtension(); $e2 = null; if ( $useLinkPrefixExtension ) { @@ -1520,8 +1579,8 @@ class Parser } if( is_null( $this->mTitle ) ) { - wfProfileOut( $fname ); - wfProfileOut( $fname.'-setup' ); + wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); } $nottalk = !$this->mTitle->isTalkPage(); @@ -1543,13 +1602,20 @@ class Parser $selflink = array($this->mTitle->getPrefixedText()); } $useSubpages = $this->areSubpagesAllowed(); - wfProfileOut( $fname.'-setup' ); + wfProfileOut( __METHOD__.'-setup' ); # Loop for each link - for ($k = 0; isset( $a[$k] ); $k++) { - $line = $a[$k]; + for ( ; $line !== false && $line !== null ; $a->next(), $line = $a->current() ) { + # Check for excessive memory usage + if ( $holders->isBig() ) { + # Too big + # Do the existence check, replace the link holders and clear the array + $holders->replace( $s ); + $holders->clear(); + } + if ( $useLinkPrefixExtension ) { - wfProfileIn( $fname.'-prefixhandling' ); + wfProfileIn( __METHOD__.'-prefixhandling' ); if ( preg_match( $e2, $s, $m ) ) { $prefix = $m[2]; $s = $m[1]; @@ -1561,12 +1627,12 @@ class Parser $prefix = $first_prefix; $first_prefix = false; } - wfProfileOut( $fname.'-prefixhandling' ); + wfProfileOut( __METHOD__.'-prefixhandling' ); } $might_be_img = false; - wfProfileIn( "$fname-e1" ); + wfProfileIn( __METHOD__."-e1" ); if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; # If we get a ] at the beginning of $m[3] that means we have a link that's something like: @@ -1600,18 +1666,18 @@ class Parser $trail = ""; } else { # Invalid form; output directly $s .= $prefix . '[[' . $line ; - wfProfileOut( "$fname-e1" ); + wfProfileOut( __METHOD__."-e1" ); continue; } - wfProfileOut( "$fname-e1" ); - wfProfileIn( "$fname-misc" ); + wfProfileOut( __METHOD__."-e1" ); + wfProfileIn( __METHOD__."-misc" ); # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) { $s .= $prefix . '[[' . $line ; - wfProfileOut( "$fname-misc" ); + wfProfileOut( __METHOD__."-misc" ); continue; } @@ -1622,33 +1688,36 @@ class Parser $link = $m[1]; } - $noforce = (substr($m[1], 0, 1) != ':'); + $noforce = (substr($m[1], 0, 1) !== ':'); if (!$noforce) { # Strip off leading ':' $link = substr($link, 1); } - wfProfileOut( "$fname-misc" ); - wfProfileIn( "$fname-title" ); + wfProfileOut( __METHOD__."-misc" ); + wfProfileIn( __METHOD__."-title" ); $nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) ); if( !$nt ) { $s .= $prefix . '[[' . $line; - wfProfileOut( "$fname-title" ); + wfProfileOut( __METHOD__."-title" ); continue; } $ns = $nt->getNamespace(); $iw = $nt->getInterWiki(); - wfProfileOut( "$fname-title" ); + wfProfileOut( __METHOD__."-title" ); if ($might_be_img) { # if this is actually an invalid link - wfProfileIn( "$fname-might_be_img" ); - if ($ns == NS_IMAGE && $noforce) { #but might be an image + wfProfileIn( __METHOD__."-might_be_img" ); + if ($ns == NS_FILE && $noforce) { #but might be an image $found = false; - while (isset ($a[$k+1]) ) { + while ( true ) { #look at the next 'line' to see if we can close it there - $spliced = array_splice( $a, $k + 1, 1 ); - $next_line = array_shift( $spliced ); + $a->next(); + $next_line = $a->current(); + if ( $next_line === false || $next_line === null ) { + break; + } $m = explode( ']]', $next_line, 3 ); if ( count( $m ) == 3 ) { # the first ]] closes the inner link, the second the image @@ -1668,19 +1737,19 @@ class Parser if ( !$found ) { # we couldn't find the end of this imageLink, so output it raw #but don't ignore what might be perfectly normal links in the text we've examined - $text = $this->replaceInternalLinks($text); + $holders->merge( $this->replaceInternalLinks2( $text ) ); $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( "$fname-might_be_img" ); + wfProfileOut( __METHOD__."-might_be_img" ); continue; } } else { #it's not an image, so output it raw $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( "$fname-might_be_img" ); + wfProfileOut( __METHOD__."-might_be_img" ); continue; } - wfProfileOut( "$fname-might_be_img" ); + wfProfileOut( __METHOD__."-might_be_img" ); } $wasblank = ( '' == $text ); @@ -1690,41 +1759,36 @@ class Parser if( $noforce ) { # Interwikis - wfProfileIn( "$fname-interwiki" ); + wfProfileIn( __METHOD__."-interwiki" ); if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) { $this->mOutput->addLanguageLink( $nt->getFullText() ); $s = rtrim($s . $prefix); $s .= trim($trail, "\n") == '' ? '': $prefix . $trail; - wfProfileOut( "$fname-interwiki" ); + wfProfileOut( __METHOD__."-interwiki" ); continue; } - wfProfileOut( "$fname-interwiki" ); + wfProfileOut( __METHOD__."-interwiki" ); - if ( $ns == NS_IMAGE ) { - wfProfileIn( "$fname-image" ); + if ( $ns == NS_FILE ) { + wfProfileIn( __METHOD__."-image" ); if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { # recursively parse links inside the image caption # actually, this will parse them in any other parameters, too, # but it might be hard to fix that, and it doesn't matter ATM $text = $this->replaceExternalLinks($text); - $text = $this->replaceInternalLinks($text); + $holders->merge( $this->replaceInternalLinks2( $text ) ); # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them - $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail; - $this->mOutput->addImage( $nt->getDBkey() ); - - wfProfileOut( "$fname-image" ); - continue; - } else { - # We still need to record the image's presence on the page - $this->mOutput->addImage( $nt->getDBkey() ); + $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail; } - wfProfileOut( "$fname-image" ); + $this->mOutput->addImage( $nt->getDBkey() ); + wfProfileOut( __METHOD__."-image" ); + continue; } if ( $ns == NS_CATEGORY ) { - wfProfileIn( "$fname-category" ); + wfProfileIn( __METHOD__."-category" ); $s = rtrim($s . "\n"); # bug 87 if ( $wasblank ) { @@ -1743,26 +1807,27 @@ class Parser */ $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail; - wfProfileOut( "$fname-category" ); + wfProfileOut( __METHOD__."-category" ); continue; } } # Self-link checking - if( $nt->getFragment() === '' ) { + if( $nt->getFragment() === '' && $ns != NS_SPECIAL ) { if( in_array( $nt->getPrefixedText(), $selflink, true ) ) { $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail ); continue; } } - # Special and Media are pseudo-namespaces; no pages actually exist in them + # NS_MEDIA is a pseudo-namespace for linking directly to a file + # FIXME: Should do batch file existence checks, see comment below if( $ns == NS_MEDIA ) { # Give extensions a chance to select the file revision for us $skip = $time = false; wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$nt, &$skip, &$time ) ); if ( $skip ) { - $link = $sk->makeLinkObj( $nt ); + $link = $sk->link( $nt ); } else { $link = $sk->makeMediaLinkObj( $nt, $text, $time ); } @@ -1770,28 +1835,23 @@ class Parser $s .= $prefix . $this->armorLinks( $link ) . $trail; $this->mOutput->addImage( $nt->getDBkey() ); continue; - } elseif( $ns == NS_SPECIAL ) { - if( SpecialPage::exists( $nt->getDBkey() ) ) { - $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); - } else { - $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); - } - continue; - } elseif( $ns == NS_IMAGE ) { - $img = wfFindFile( $nt ); - if( $img ) { - // Force a blue link if the file exists; may be a remote - // upload on the shared repository, and we want to see its - // auto-generated page. - $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); - $this->mOutput->addLink( $nt ); - continue; - } } - $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); + + # Some titles, such as valid special pages or files in foreign repos, should + # be shown as bluelinks even though they're not included in the page table + # + # FIXME: isAlwaysKnown() can be expensive for file links; we should really do + # batch file existence checks for NS_FILE and NS_MEDIA + if( $iw == '' && $nt->isAlwaysKnown() ) { + $this->mOutput->addLink( $nt ); + $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); + } else { + # Links will be added to the output link list after checking + $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix ); + } } - wfProfileOut( $fname ); - return $s; + wfProfileOut( __METHOD__ ); + return $holders; } /** @@ -1800,32 +1860,10 @@ class Parser * parsing of interwiki links, and secondly to allow all existence checks and * article length checks (for stub links) to be bundled into a single query. * + * @deprecated */ function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) { - wfProfileIn( __METHOD__ ); - if ( ! is_object($nt) ) { - # Fail gracefully - $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}"; - } else { - # Separate the link trail from the rest of the link - list( $inside, $trail ) = Linker::splitTrail( $trail ); - - if ( $nt->isExternal() ) { - $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside ); - $this->mInterwikiLinkHolders['titles'][] = $nt; - $retVal = '<!--IWLINK '. ($nr-1) ."-->{$trail}"; - } else { - $nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() ); - $this->mLinkHolders['dbkeys'][] = $nt->getDBkey(); - $this->mLinkHolders['queries'][] = $query; - $this->mLinkHolders['texts'][] = $prefix.$text.$inside; - $this->mLinkHolders['titles'][] = $nt; - - $retVal = '<!--LINK '. ($nr-1) ."-->{$trail}"; - } - } - wfProfileOut( __METHOD__ ); - return $retVal; + return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix ); } /** @@ -1853,10 +1891,8 @@ class Parser * Insert a NOPARSE hacky thing into any inline links in a chunk that's * going to go through further parsing steps before inline URL expansion. * - * In particular this is important when using action=render, which causes - * full URLs to be included. - * - * Oh man I hate our multi-layer parser! + * Not needed quite as much as it used to be since free links are a bit + * more sensible these days. But bracketed links are still an issue. * * @param string more-or-less HTML * @return string less-or-more HTML with NOPARSE bits @@ -1891,8 +1927,7 @@ class Parser # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage - $fname = 'Parser::maybeDoSubpageLink'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $ret = $target; # default return value is no change # Some namespaces don't allow subpages, @@ -1908,7 +1943,7 @@ class Parser # bug 7425 $target = trim( $target ); # Look at the first character - if( $target != '' && $target{0} == '/' ) { + if( $target != '' && $target{0} === '/' ) { # / at end means we don't want the slash to be shown $m = array(); $trailingSlashes = preg_match_all( '%(/+)$%', $target, $m ); @@ -1935,7 +1970,7 @@ class Parser if( count( $exploded ) > $dotdotcount ) { # not allowed to go below top level page $ret = implode( '/', array_slice( $exploded, 0, -$dotdotcount ) ); # / at the end means don't show full path - if( substr( $nodotdot, -1, 1 ) == '/' ) { + if( substr( $nodotdot, -1, 1 ) === '/' ) { $nodotdot = substr( $nodotdot, 0, -1 ); if( '' === $text ) { $text = $nodotdot . $suffix; @@ -1951,7 +1986,7 @@ class Parser } } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $ret; } @@ -1987,10 +2022,10 @@ class Parser /* private */ function openList( $char ) { $result = $this->closeParagraph(); - if ( '*' == $char ) { $result .= '<ul><li>'; } - else if ( '#' == $char ) { $result .= '<ol><li>'; } - else if ( ':' == $char ) { $result .= '<dl><dd>'; } - else if ( ';' == $char ) { + if ( '*' === $char ) { $result .= '<ul><li>'; } + else if ( '#' === $char ) { $result .= '<ol><li>'; } + else if ( ':' === $char ) { $result .= '<dl><dd>'; } + else if ( ';' === $char ) { $result .= '<dl><dt>'; $this->mDTopen = true; } @@ -2000,11 +2035,11 @@ class Parser } /* private */ function nextItem( $char ) { - if ( '*' == $char || '#' == $char ) { return '</li><li>'; } - else if ( ':' == $char || ';' == $char ) { + if ( '*' === $char || '#' === $char ) { return '</li><li>'; } + else if ( ':' === $char || ';' === $char ) { $close = '</dd>'; if ( $this->mDTopen ) { $close = '</dt>'; } - if ( ';' == $char ) { + if ( ';' === $char ) { $this->mDTopen = true; return $close . '<dt>'; } else { @@ -2016,9 +2051,9 @@ class Parser } /* private */ function closeList( $char ) { - if ( '*' == $char ) { $text = '</li></ul>'; } - else if ( '#' == $char ) { $text = '</li></ol>'; } - else if ( ':' == $char ) { + if ( '*' === $char ) { $text = '</li></ul>'; } + else if ( '#' === $char ) { $text = '</li></ol>'; } + else if ( ':' === $char ) { if ( $this->mDTopen ) { $this->mDTopen = false; $text = '</dt></dl>'; @@ -2032,56 +2067,59 @@ class Parser /**#@-*/ /** - * Make lists from lines starting with ':', '*', '#', etc. + * Make lists from lines starting with ':', '*', '#', etc. (DBL) * * @private * @return string the lists rendered as HTML */ function doBlockLevels( $text, $linestart ) { - $fname = 'Parser::doBlockLevels'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); # Parsing through the text line by line. The main thing # happening here is handling of block-level elements p, pre, # and making lists from lines starting with * # : etc. # - $textLines = explode( "\n", $text ); + $textLines = StringUtils::explode( "\n", $text ); $lastPrefix = $output = ''; $this->mDTopen = $inBlockElem = false; $prefixLength = 0; $paragraphStack = false; - if ( !$linestart ) { - $output .= array_shift( $textLines ); - } foreach ( $textLines as $oLine ) { + # Fix up $linestart + if ( !$linestart ) { + $output .= $oLine; + $linestart = true; + continue; + } + $lastPrefixLength = strlen( $lastPrefix ); $preCloseMatch = preg_match('/<\\/pre/i', $oLine ); $preOpenMatch = preg_match('/<pre/i', $oLine ); if ( !$this->mInPre ) { # Multiple prefixes may abut each other for nested lists. $prefixLength = strspn( $oLine, '*#:;' ); - $pref = substr( $oLine, 0, $prefixLength ); + $prefix = substr( $oLine, 0, $prefixLength ); # eh? - $pref2 = str_replace( ';', ':', $pref ); + $prefix2 = str_replace( ';', ':', $prefix ); $t = substr( $oLine, $prefixLength ); - $this->mInPre = !empty($preOpenMatch); + $this->mInPre = (bool)$preOpenMatch; } else { # Don't interpret any other prefixes in preformatted text $prefixLength = 0; - $pref = $pref2 = ''; + $prefix = $prefix2 = ''; $t = $oLine; } # List generation - if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) { + if( $prefixLength && $lastPrefix === $prefix2 ) { # Same as the last item, so no need to deal with nesting or opening stuff - $output .= $this->nextItem( substr( $pref, -1 ) ); + $output .= $this->nextItem( substr( $prefix, -1 ) ); $paragraphStack = false; - if ( substr( $pref, -1 ) == ';') { + if ( substr( $prefix, -1 ) === ';') { # The one nasty exception: definition lists work like this: # ; title : definition text # So we check for : in the remainder text to split up the @@ -2094,21 +2132,21 @@ class Parser } } elseif( $prefixLength || $lastPrefixLength ) { # Either open or close a level... - $commonPrefixLength = $this->getCommon( $pref, $lastPrefix ); + $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix ); $paragraphStack = false; while( $commonPrefixLength < $lastPrefixLength ) { - $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} ); + $output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] ); --$lastPrefixLength; } if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) { - $output .= $this->nextItem( $pref{$commonPrefixLength-1} ); + $output .= $this->nextItem( $prefix[$commonPrefixLength-1] ); } while ( $prefixLength > $commonPrefixLength ) { - $char = substr( $pref, $commonPrefixLength, 1 ); + $char = substr( $prefix, $commonPrefixLength, 1 ); $output .= $this->openList( $char ); - if ( ';' == $char ) { + if ( ';' === $char ) { # FIXME: This is dupe of code above if ($this->findColonNoLinks($t, $term, $t2) !== false) { $t = $t2; @@ -2117,10 +2155,10 @@ class Parser } ++$commonPrefixLength; } - $lastPrefix = $pref2; + $lastPrefix = $prefix2; } if( 0 == $prefixLength ) { - wfProfileIn( "$fname-paragraph" ); + wfProfileIn( __METHOD__."-paragraph" ); # No prefix (not in list)--go to paragraph mode // XXX: use a stack for nestable elements like span, table and div $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t ); @@ -2140,9 +2178,9 @@ class Parser $inBlockElem = true; } } else if ( !$inBlockElem && !$this->mInPre ) { - if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) { + if ( ' ' == $t{0} and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) { // pre - if ($this->mLastSection != 'pre') { + if ($this->mLastSection !== 'pre') { $paragraphStack = false; $output .= $this->closeParagraph().'<pre>'; $this->mLastSection = 'pre'; @@ -2156,7 +2194,7 @@ class Parser $paragraphStack = false; $this->mLastSection = 'p'; } else { - if ($this->mLastSection != 'p' ) { + if ($this->mLastSection !== 'p' ) { $output .= $this->closeParagraph(); $this->mLastSection = ''; $paragraphStack = '<p>'; @@ -2169,14 +2207,14 @@ class Parser $output .= $paragraphStack; $paragraphStack = false; $this->mLastSection = 'p'; - } else if ($this->mLastSection != 'p') { + } else if ($this->mLastSection !== 'p') { $output .= $this->closeParagraph().'<p>'; $this->mLastSection = 'p'; } } } } - wfProfileOut( "$fname-paragraph" ); + wfProfileOut( __METHOD__."-paragraph" ); } // somewhere above we forget to get out of pre block (bug 785) if($preCloseMatch && $this->mInPre) { @@ -2187,7 +2225,7 @@ class Parser } } while ( $prefixLength ) { - $output .= $this->closeList( $pref2{$prefixLength-1} ); + $output .= $this->closeList( $prefix2[$prefixLength-1] ); --$prefixLength; } if ( '' != $this->mLastSection ) { @@ -2195,7 +2233,7 @@ class Parser $this->mLastSection = ''; } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $output; } @@ -2208,13 +2246,12 @@ class Parser * return string the position of the ':', or false if none found */ function findColonNoLinks($str, &$before, &$after) { - $fname = 'Parser::findColonNoLinks'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $pos = strpos( $str, ':' ); if( $pos === false ) { // Nothing to find! - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } @@ -2223,7 +2260,7 @@ class Parser // Easy; no tag nesting to worry about $before = substr( $str, 0, $pos ); $after = substr( $str, $pos+1 ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $pos; } @@ -2247,7 +2284,7 @@ class Parser // We found it! $before = substr( $str, 0, $i ); $after = substr( $str, $i + 1 ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $i; } // Embedded in a tag; don't break it. @@ -2257,7 +2294,7 @@ class Parser $colon = strpos( $str, ':', $i ); if( $colon === false ) { // Nothing else interesting - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } $lt = strpos( $str, '<', $i ); @@ -2266,7 +2303,7 @@ class Parser // We found it! $before = substr( $str, 0, $colon ); $after = substr( $str, $colon + 1 ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $i; } } @@ -2313,18 +2350,18 @@ class Parser break; case 3: // self::COLON_STATE_CLOSETAG: // In a </tag> - if( $c == ">" ) { + if( $c === ">" ) { $stack--; if( $stack < 0 ) { - wfDebug( "Invalid input in $fname; too many close tags\n" ); - wfProfileOut( $fname ); + wfDebug( __METHOD__.": Invalid input; too many close tags\n" ); + wfProfileOut( __METHOD__ ); return false; } $state = self::COLON_STATE_TEXT; } break; case self::COLON_STATE_TAGSLASH: - if( $c == ">" ) { + if( $c === ">" ) { // Yes, a self-closed tag <blah/> $state = self::COLON_STATE_TEXT; } else { @@ -2333,33 +2370,33 @@ class Parser } break; case 5: // self::COLON_STATE_COMMENT: - if( $c == "-" ) { + if( $c === "-" ) { $state = self::COLON_STATE_COMMENTDASH; } break; case self::COLON_STATE_COMMENTDASH: - if( $c == "-" ) { + if( $c === "-" ) { $state = self::COLON_STATE_COMMENTDASHDASH; } else { $state = self::COLON_STATE_COMMENT; } break; case self::COLON_STATE_COMMENTDASHDASH: - if( $c == ">" ) { + if( $c === ">" ) { $state = self::COLON_STATE_TEXT; } else { $state = self::COLON_STATE_COMMENT; } break; default: - throw new MWException( "State machine error in $fname" ); + throw new MWException( "State machine error in " . __METHOD__ ); } } if( $stack > 0 ) { - wfDebug( "Invalid input in $fname; not enough close tags (stack $stack, state $state)\n" ); + wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" ); return false; } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } @@ -2552,9 +2589,11 @@ class Parser case 'numberofpages': return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::pages() ); case 'numberofadmins': - return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::admins() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::numberingroup('sysop') ); case 'numberofedits': return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::edits() ); + case 'numberofviews': + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::views() ); case 'currenttimestamp': return $this->mVarCache[$index] = wfTimestamp( TS_MW, $ts ); case 'localtimestamp': @@ -2589,12 +2628,11 @@ class Parser * @private */ function initialiseVariables() { - $fname = 'Parser::initialiseVariables'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $variableIDs = MagicWord::getVariableIDs(); $this->mVariables = new MagicWordArray( $variableIDs ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); } /** @@ -2663,8 +2701,7 @@ class Parser return $text; } - $fname = __METHOD__; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); if ( $frame === false ) { $frame = $this->getPreprocessor()->newFrame(); @@ -2677,7 +2714,7 @@ class Parser $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; $text = $frame->expand( $dom, $flags ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $text; } @@ -2718,7 +2755,7 @@ class Parser function limitationWarn( $limitationType, $current=null, $max=null) { $msgName = $limitationType . '-warning'; //does no harm if $current and $max are present but are unnecessary for the message - $warning = wfMsg( $msgName, $current, $max); + $warning = wfMsgExt( $msgName, array( 'parsemag', 'escape' ), $current, $max ); $this->mOutput->addWarning( $warning ); $cat = Title::makeTitleSafe( NS_CATEGORY, wfMsgForContent( $limitationType . '-category' ) ); if ( $cat ) { @@ -2739,9 +2776,8 @@ class Parser * @private */ function braceSubstitution( $piece, $frame ) { - global $wgContLang, $wgLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces; - $fname = __METHOD__; - wfProfileIn( $fname ); + global $wgContLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces; + wfProfileIn( __METHOD__ ); wfProfileIn( __METHOD__.'-setup' ); # Flags @@ -2855,7 +2891,7 @@ class Parser # Workaround for PHP bug 35229 and similar if ( !is_callable( $callback ) ) { - throw new MWException( "Tag hook for $name is not callable\n" ); + throw new MWException( "Tag hook for $function is not callable\n" ); } $result = call_user_func_array( $callback, $allArgs ); $found = true; @@ -2898,19 +2934,19 @@ class Parser $titleText = $title->getPrefixedText(); # Check for language variants if the template is not found if($wgContLang->hasVariants() && $title->getArticleID() == 0){ - $wgContLang->findVariantLink($part1, $title); + $wgContLang->findVariantLink( $part1, $title, true ); } # Do infinite loop check if ( !$frame->loopCheck( $title ) ) { $found = true; - $text = "<span class=\"error\">Template loop detected: [[$titleText]]</span>"; + $text = '<span class="error">' . wfMsgForContent( 'parser-template-loop-warning', $titleText ) . '</span>'; wfDebug( __METHOD__.": template loop broken at '$titleText'\n" ); } # Do recursion depth check $limit = $this->mOptions->getMaxTemplateDepth(); if ( $frame->depth >= $limit ) { $found = true; - $text = "<span class=\"error\">Template recursion depth limit exceeded ($limit)</span>"; + $text = '<span class="error">' . wfMsgForContent( 'parser-template-recursion-depth-warning', $limit ) . '</span>'; } } } @@ -2928,7 +2964,7 @@ class Parser } } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) { $found = false; //access denied - wfDebug( "$fname: template inclusion denied for " . $title->getPrefixedDBkey() ); + wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() ); } else { list( $text, $title ) = $this->getTemplateDom( $title ); if ( $text !== false ) { @@ -2962,7 +2998,7 @@ class Parser # Recover the source wikitext and return it if ( !$found ) { $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return array( 'object' => $text ); } @@ -3021,7 +3057,7 @@ class Parser $ret = array( 'text' => $text ); } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $ret; } @@ -3121,8 +3157,8 @@ class Parser if( $rev ) { $text = $rev->getText(); } elseif( $title->getNamespace() == NS_MEDIAWIKI ) { - global $wgLang; - $message = $wgLang->lcfirst( $title->getText() ); + global $wgContLang; + $message = $wgContLang->lcfirst( $title->getText() ); $text = wfMsgForContentNoTrans( $message ); if( wfEmptyMsg( $message, $text ) ) { $text = false; @@ -3308,7 +3344,7 @@ class Parser } } - if ( $name == 'html' || $name == 'nowiki' ) { + if ( $name === 'html' || $name === 'nowiki' ) { $this->mStripState->nowiki->setPair( $marker, $output ); } else { $this->mStripState->general->setPair( $marker, $output ); @@ -3384,6 +3420,16 @@ class Parser wfDebug( __METHOD__.": [[MediaWiki:hidden-category-category]] is not a valid title!\n" ); } } + # (bug 8068) Allow control over whether robots index a page. + # + # FIXME (bug 14899): __INDEX__ always overrides __NOINDEX__ here! This + # is not desirable, the last one on the page should win. + if( isset( $this->mDoubleUnderscores['noindex'] ) ) { + $this->mOutput->setIndexPolicy( 'noindex' ); + } elseif( isset( $this->mDoubleUnderscores['index'] ) ) { + $this->mOutput->setIndexPolicy( 'index' ); + } + return $text; } @@ -3402,13 +3448,14 @@ class Parser * @private */ function formatHeadings( $text, $isMain=true ) { - global $wgMaxTocLevel, $wgContLang; + global $wgMaxTocLevel, $wgContLang, $wgEnforceHtmlIds; $doNumberHeadings = $this->mOptions->getNumberHeadings(); - if( !$this->mTitle->quickUserCan( 'edit' ) ) { + $showEditLink = $this->mOptions->getEditSection(); + + // Do not call quickUserCan unless necessary + if( $showEditLink && !$this->mTitle->quickUserCan( 'edit' ) ) { $showEditLink = 0; - } else { - $showEditLink = $this->mOptions->getEditSection(); } # Inhibit editsection links if requested in the page @@ -3554,12 +3601,7 @@ class Parser # <!--LINK number--> # turns into # link text with suffix - $safeHeadline = preg_replace( '/<!--LINK ([0-9]*)-->/e', - "\$this->mLinkHolders['texts'][\$1]", - $safeHeadline ); - $safeHeadline = preg_replace( '/<!--IWLINK ([0-9]*)-->/e', - "\$this->mInterwikiLinkHolders['texts'][\$1]", - $safeHeadline ); + $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline ); # Strip out HTML (other than plain <sup> and <sub>: bug 8393) $tocline = preg_replace( @@ -3575,13 +3617,60 @@ class Parser # Save headline for section edit hint before it's escaped $headlineHint = $safeHeadline; - $safeHeadline = Sanitizer::escapeId( $safeHeadline ); - # HTML names must be case-insensitively unique (bug 10721) + + if ( $wgEnforceHtmlIds ) { + $legacyHeadline = false; + $safeHeadline = Sanitizer::escapeId( $safeHeadline, + 'noninitial' ); + } else { + # For reverse compatibility, provide an id that's + # HTML4-compatible, like we used to. + # + # It may be worth noting, academically, that it's possible for + # the legacy anchor to conflict with a non-legacy headline + # anchor on the page. In this case likely the "correct" thing + # would be to either drop the legacy anchors or make sure + # they're numbered first. However, this would require people + # to type in section names like "abc_.D7.93.D7.90.D7.A4" + # manually, so let's not bother worrying about it. + $legacyHeadline = Sanitizer::escapeId( $safeHeadline, + 'noninitial' ); + $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' ); + + if ( $legacyHeadline == $safeHeadline ) { + # No reason to have both (in fact, we can't) + $legacyHeadline = false; + } elseif ( $legacyHeadline != Sanitizer::escapeId( + $legacyHeadline, 'xml' ) ) { + # The legacy id is invalid XML. We used to allow this, but + # there's no reason to do so anymore. Backward + # compatibility will fail slightly in this case, but it's + # no big deal. + $legacyHeadline = false; + } + } + + # HTML names must be case-insensitively unique (bug 10721). FIXME: + # Does this apply to Unicode characters? Because we aren't + # handling those here. $arrayKey = strtolower( $safeHeadline ); + if ( $legacyHeadline === false ) { + $legacyArrayKey = false; + } else { + $legacyArrayKey = strtolower( $legacyHeadline ); + } # count how many in assoc. array so we can track dupes in anchors - isset( $refers[$arrayKey] ) ? $refers[$arrayKey]++ : $refers[$arrayKey] = 1; - $refcount[$headlineCount] = $refers[$arrayKey]; + if ( isset( $refers[$arrayKey] ) ) { + $refers[$arrayKey]++; + } else { + $refers[$arrayKey] = 1; + } + if ( isset( $refers[$legacyArrayKey] ) ) { + $refers[$legacyArrayKey]++; + } else { + $refers[$legacyArrayKey] = 1; + } # Don't number the heading if it is the only one (looks silly) if( $doNumberHeadings && count( $matches[3] ) > 1) { @@ -3591,8 +3680,12 @@ class Parser # Create the anchor for linking from the TOC to the section $anchor = $safeHeadline; - if($refcount[$headlineCount] > 1 ) { - $anchor .= '_' . $refcount[$headlineCount]; + $legacyAnchor = $legacyHeadline; + if ( $refers[$arrayKey] > 1 ) { + $anchor .= '_' . $refers[$arrayKey]; + } + if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) { + $legacyAnchor .= '_' . $refers[$legacyArrayKey]; } if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) { $toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel); @@ -3603,14 +3696,16 @@ class Parser if( $isTemplate ) { # Put a T flag in the section identifier, to indicate to extractSections() # that sections inside <includeonly> should be counted. - $editlink = $sk->editSectionLinkForOther($titleText, "T-$sectionIndex"); + $editlink = $sk->doEditSectionLink(Title::newFromText( $titleText ), "T-$sectionIndex"); } else { - $editlink = $sk->editSectionLink($this->mTitle, $sectionIndex, $headlineHint); + $editlink = $sk->doEditSectionLink($this->mTitle, $sectionIndex, $headlineHint); } } else { $editlink = ''; } - $head[$headlineCount] = $sk->makeHeadline( $level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink ); + $head[$headlineCount] = $sk->makeHeadline( $level, + $matches['attrib'][$headlineCount], $anchor, $headline, + $editlink, $legacyAnchor ); $headlineCount++; } @@ -3635,7 +3730,7 @@ class Parser $i = 0; foreach( $blocks as $block ) { - if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) { + if( $showEditLink && $headlineCount > 0 && $i == 0 && $block !== "\n" ) { # This is the [edit] link that appears for the top block of text when # section editing is enabled @@ -3737,11 +3832,13 @@ class Parser $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii! $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]] + $p4 = "/\[\[(:?$nc+:|:|)($tc+?)(($tc+))\\|]]/"; # [[ns:page(context)|]] $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]] $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); + $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text ); $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text ); $t = $this->mTitle->getText(); @@ -3787,7 +3884,7 @@ class Parser } else { # Failed to validate; fall back to the default $nickname = $username; - wfDebug( "Parser::getUserSig: $username has bad XML tags in signature.\n" ); + wfDebug( __METHOD__.": $username has bad XML tags in signature.\n" ); } } @@ -3811,7 +3908,7 @@ class Parser * @return mixed An expanded string, or false if invalid. */ function validateSig( $text ) { - return( wfIsWellFormedXmlFragment( $text ) ? $text : false ); + return( Xml::isWellFormedXmlFragment( $text ) ? $text : false ); } /** @@ -3833,6 +3930,11 @@ class Parser $this->setOutputType = self::OT_PREPROCESS; } + # Option to disable this feature + if ( !$this->mOptions->getCleanSignatures() ) { + return $text; + } + # FIXME: regex doesn't respect extension tags or nowiki # => Move this logic to braceSubstitution() $substWord = MagicWord::get( 'subst' ); @@ -3888,19 +3990,17 @@ class Parser global $wgTitle; static $executing = false; - $fname = "Parser::transformMsg"; - # Guard against infinite recursion if ( $executing ) { return $text; } $executing = true; - wfProfileIn($fname); + wfProfileIn(__METHOD__); $text = $this->preprocess( $text, $wgTitle, $options ); $executing = false; - wfProfileOut($fname); + wfProfileOut(__METHOD__); return $text; } @@ -3997,7 +4097,7 @@ class Parser # Add to function cache $mw = MagicWord::get( $id ); if( !$mw ) - throw new MWException( 'Parser::setFunctionHook() expecting a magic word identifier.' ); + throw new MWException( __METHOD__.'() expecting a magic word identifier.' ); $synonyms = $mw->getSynonyms(); $sensitive = intval( $mw->isCaseSensitive() ); @@ -4012,7 +4112,7 @@ class Parser $syn = '#' . $syn; } # Remove trailing colon - if ( substr( $syn, -1, 1 ) == ':' ) { + if ( substr( $syn, -1, 1 ) === ':' ) { $syn = substr( $syn, 0, -1 ); } $this->mFunctionSynonyms[$sensitive][$syn] = $id; @@ -4033,266 +4133,9 @@ class Parser * Replace <!--LINK--> link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() * Returns an array of link CSS classes, indexed by PDBK. - * $options is a bit field, RLH_FOR_UPDATE to select for update */ function replaceLinkHolders( &$text, $options = 0 ) { - global $wgUser; - global $wgContLang; - - $fname = 'Parser::replaceLinkHolders'; - wfProfileIn( $fname ); - - $pdbks = array(); - $colours = array(); - $linkcolour_ids = array(); - $sk = $this->mOptions->getSkin(); - $linkCache = LinkCache::singleton(); - - if ( !empty( $this->mLinkHolders['namespaces'] ) ) { - wfProfileIn( $fname.'-check' ); - $dbr = wfGetDB( DB_SLAVE ); - $page = $dbr->tableName( 'page' ); - $threshold = $wgUser->getOption('stubthreshold'); - - # Sort by namespace - asort( $this->mLinkHolders['namespaces'] ); - - # Generate query - $query = false; - $current = null; - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { - # Make title object - $title = $this->mLinkHolders['titles'][$key]; - - # Skip invalid entries. - # Result will be ugly, but prevents crash. - if ( is_null( $title ) ) { - continue; - } - $pdbk = $pdbks[$key] = $title->getPrefixedDBkey(); - - # Check if it's a static known link, e.g. interwiki - if ( $title->isAlwaysKnown() ) { - $colours[$pdbk] = ''; - } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { - $colours[$pdbk] = ''; - $this->mOutput->addLink( $title, $id ); - } elseif ( $linkCache->isBadLink( $pdbk ) ) { - $colours[$pdbk] = 'new'; - } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) { - $colours[$pdbk] = 'new'; - } else { - # Not in the link cache, add it to the query - if ( !isset( $current ) ) { - $current = $ns; - $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; - $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; - } elseif ( $current != $ns ) { - $current = $ns; - $query .= ")) OR (page_namespace=$ns AND page_title IN("; - } else { - $query .= ', '; - } - - $query .= $dbr->addQuotes( $this->mLinkHolders['dbkeys'][$key] ); - } - } - if ( $query ) { - $query .= '))'; - if ( $options & RLH_FOR_UPDATE ) { - $query .= ' FOR UPDATE'; - } - - $res = $dbr->query( $query, $fname ); - - # Fetch data and form into an associative array - # non-existent = broken - while ( $s = $dbr->fetchObject($res) ) { - $title = Title::makeTitle( $s->page_namespace, $s->page_title ); - $pdbk = $title->getPrefixedDBkey(); - $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect ); - $this->mOutput->addLink( $title, $s->page_id ); - $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); - //add id to the extension todolist - $linkcolour_ids[$s->page_id] = $pdbk; - } - //pass an array of page_ids to an extension - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); - } - wfProfileOut( $fname.'-check' ); - - # Do a second query for different language variants of links and categories - if($wgContLang->hasVariants()){ - $linkBatch = new LinkBatch(); - $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders) - $categoryMap = array(); // maps $category_variant => $category (dbkeys) - $varCategories = array(); // category replacements oldDBkey => newDBkey - - $categories = $this->mOutput->getCategoryLinks(); - - // Add variants of links to link batch - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { - $title = $this->mLinkHolders['titles'][$key]; - if ( is_null( $title ) ) - continue; - - $pdbk = $title->getPrefixedDBkey(); - $titleText = $title->getText(); - - // generate all variants of the link title text - $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText); - - // if link was not found (in first query), add all variants to query - if ( !isset($colours[$pdbk]) ){ - foreach($allTextVariants as $textVariant){ - if($textVariant != $titleText){ - $variantTitle = Title::makeTitle( $ns, $textVariant ); - if(is_null($variantTitle)) continue; - $linkBatch->addObj( $variantTitle ); - $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; - } - } - } - } - - // process categories, check if a category exists in some variant - foreach( $categories as $category ){ - $variants = $wgContLang->convertLinkToAllVariants($category); - foreach($variants as $variant){ - if($variant != $category){ - $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) ); - if(is_null($variantTitle)) continue; - $linkBatch->addObj( $variantTitle ); - $categoryMap[$variant] = $category; - } - } - } - - - if(!$linkBatch->isEmpty()){ - // construct query - $titleClause = $linkBatch->constructSet('page', $dbr); - - $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; - - $variantQuery .= " FROM $page WHERE $titleClause"; - if ( $options & RLH_FOR_UPDATE ) { - $variantQuery .= ' FOR UPDATE'; - } - - $varRes = $dbr->query( $variantQuery, $fname ); - - // for each found variants, figure out link holders and replace - while ( $s = $dbr->fetchObject($varRes) ) { - - $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); - $varPdbk = $variantTitle->getPrefixedDBkey(); - $vardbk = $variantTitle->getDBkey(); - - $holderKeys = array(); - if(isset($variantMap[$varPdbk])){ - $holderKeys = $variantMap[$varPdbk]; - $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect ); - $this->mOutput->addLink( $variantTitle, $s->page_id ); - } - - // loop over link holders - foreach($holderKeys as $key){ - $title = $this->mLinkHolders['titles'][$key]; - if ( is_null( $title ) ) continue; - - $pdbk = $title->getPrefixedDBkey(); - - if(!isset($colours[$pdbk])){ - // found link in some of the variants, replace the link holder data - $this->mLinkHolders['titles'][$key] = $variantTitle; - $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey(); - - // set pdbk and colour - $pdbks[$key] = $varPdbk; - $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold ); - $linkcolour_ids[$s->page_id] = $pdbk; - } - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); - } - - // check if the object is a variant of a category - if(isset($categoryMap[$vardbk])){ - $oldkey = $categoryMap[$vardbk]; - if($oldkey != $vardbk) - $varCategories[$oldkey]=$vardbk; - } - } - - // rebuild the categories in original order (if there are replacements) - if(count($varCategories)>0){ - $newCats = array(); - $originalCats = $this->mOutput->getCategories(); - foreach($originalCats as $cat => $sortkey){ - // make the replacement - if( array_key_exists($cat,$varCategories) ) - $newCats[$varCategories[$cat]] = $sortkey; - else $newCats[$cat] = $sortkey; - } - $this->mOutput->setCategoryLinks($newCats); - } - } - } - - # Construct search and replace arrays - wfProfileIn( $fname.'-construct' ); - $replacePairs = array(); - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { - $pdbk = $pdbks[$key]; - $searchkey = "<!--LINK $key-->"; - $title = $this->mLinkHolders['titles'][$key]; - if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) { - $linkCache->addBadLinkObj( $title ); - $colours[$pdbk] = 'new'; - $this->mOutput->addLink( $title, 0 ); - $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title, - $this->mLinkHolders['texts'][$key], - $this->mLinkHolders['queries'][$key] ); - } else { - $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk], - $this->mLinkHolders['texts'][$key], - $this->mLinkHolders['queries'][$key] ); - } - } - $replacer = new HashtableReplacer( $replacePairs, 1 ); - wfProfileOut( $fname.'-construct' ); - - # Do the thing - wfProfileIn( $fname.'-replace' ); - $text = preg_replace_callback( - '/(<!--LINK .*?-->)/', - $replacer->cb(), - $text); - - wfProfileOut( $fname.'-replace' ); - } - - # Now process interwiki link holders - # This is quite a bit simpler than internal links - if ( !empty( $this->mInterwikiLinkHolders['texts'] ) ) { - wfProfileIn( $fname.'-interwiki' ); - # Make interwiki link HTML - $replacePairs = array(); - foreach( $this->mInterwikiLinkHolders['texts'] as $key => $link ) { - $title = $this->mInterwikiLinkHolders['titles'][$key]; - $replacePairs[$key] = $sk->makeLinkObj( $title, $link ); - } - $replacer = new HashtableReplacer( $replacePairs, 1 ); - - $text = preg_replace_callback( - '/<!--IWLINK (.*?)-->/', - $replacer->cb(), - $text ); - wfProfileOut( $fname.'-interwiki' ); - } - - wfProfileOut( $fname ); - return $colours; + return $this->mLinkHolders->replace( $text ); } /** @@ -4302,36 +4145,7 @@ class Parser * @return string */ function replaceLinkHoldersText( $text ) { - $fname = 'Parser::replaceLinkHoldersText'; - wfProfileIn( $fname ); - - $text = preg_replace_callback( - '/<!--(LINK|IWLINK) (.*?)-->/', - array( &$this, 'replaceLinkHoldersTextCallback' ), - $text ); - - wfProfileOut( $fname ); - return $text; - } - - /** - * @param array $matches - * @return string - * @private - */ - function replaceLinkHoldersTextCallback( $matches ) { - $type = $matches[1]; - $key = $matches[2]; - if( $type == 'LINK' ) { - if( isset( $this->mLinkHolders['texts'][$key] ) ) { - return $this->mLinkHolders['texts'][$key]; - } - } elseif( $type == 'IWLINK' ) { - if( isset( $this->mInterwikiLinkHolders['texts'][$key] ) ) { - return $this->mInterwikiLinkHolders['texts'][$key]; - } - } - return $matches[0]; + return $this->mLinkHolders->replaceText( $text ); } /** @@ -4342,7 +4156,7 @@ class Parser $content = StringUtils::delimiterReplace( '<nowiki>', '</nowiki>', '$1', $text, 'i' ); $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' ); - return wfOpenElement( 'pre', $attribs ) . + return Xml::openElement( 'pre', $attribs ) . Xml::escapeTagsOnly( $content ) . '</pre>'; } @@ -4385,7 +4199,7 @@ class Parser wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); - $lines = explode( "\n", $text ); + $lines = StringUtils::explode( "\n", $text ); foreach ( $lines as $line ) { # match lines like these: # Image:someimage.jpg|This is some image @@ -4398,7 +4212,7 @@ class Parser if ( strpos( $matches[0], '%' ) !== false ) $matches[1] = urldecode( $matches[1] ); - $tp = Title::newFromText( $matches[1] ); + $tp = Title::newFromText( $matches[1]/*, NS_FILE*/ ); $nt =& $tp; if( is_null( $nt ) ) { # Bogus title. Ignore these so we don't bomb out later. @@ -4415,7 +4229,7 @@ class Parser $ig->add( $nt, $html ); # Only add real images (bug #5586) - if ( $nt->getNamespace() == NS_IMAGE ) { + if ( $nt->getNamespace() == NS_FILE ) { $this->mOutput->addImage( $nt->getDBkey() ); } } @@ -4435,7 +4249,7 @@ class Parser 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 'bottom', 'text-bottom' ), 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless', - 'upright', 'border' ), + 'upright', 'border', 'link', 'alt' ), ); static $internalParamMap; if ( !$internalParamMap ) { @@ -4464,20 +4278,24 @@ class Parser /** * Parse image options text and use it to make an image + * @param Title $title + * @param string $options + * @param LinkHolderArray $holders */ - function makeImage( $title, $options ) { + function makeImage( $title, $options, $holders = false ) { # Check if the options text is of the form "options|alt text" # Options are: - # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang - # * left no resizing, just left align. label is used for alt= only - # * right same, but right aligned - # * none same, but not aligned - # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox - # * center center the image - # * framed Keep original image size, no magnify-button. - # * frameless like 'thumb' but without a frame. Keeps user preferences for width - # * upright reduce width for upright images, rounded to full __0 px - # * border draw a 1px border around the image + # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang + # * left no resizing, just left align. label is used for alt= only + # * right same, but right aligned + # * none same, but not aligned + # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox + # * center center the image + # * framed Keep original image size, no magnify-button. + # * frameless like 'thumb' but without a frame. Keeps user preferences for width + # * upright reduce width for upright images, rounded to full __0 px + # * border draw a 1px border around the image + # * alt Text for HTML alt attribute (defaults to empty) # vertical-align values (no % or length right now): # * baseline # * sub @@ -4488,7 +4306,7 @@ class Parser # * bottom # * text-bottom - $parts = array_map( 'trim', explode( '|', $options) ); + $parts = StringUtils::explode( "|", $options ); $sk = $this->mOptions->getSkin(); # Give extensions a chance to select the file revision for us @@ -4496,11 +4314,21 @@ class Parser wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$title, &$skip, &$time, &$descQuery ) ); if ( $skip ) { - return $sk->makeLinkObj( $title ); + return $sk->link( $title ); } + # Get the file + $imagename = $title->getDBkey(); + if ( isset( $this->mFileCache[$imagename][$time] ) ) { + $file = $this->mFileCache[$imagename][$time]; + } else { + $file = wfFindFile( $title, $time ); + if ( count( $this->mFileCache ) > 1000 ) { + $this->mFileCache = array(); + } + $this->mFileCache[$imagename][$time] = $file; + } # Get parameter map - $file = wfFindFile( $title, $time ); $handler = $file ? $file->getHandler() : false; list( $paramMap, $mwArray ) = $this->getImageParams( $handler ); @@ -4510,13 +4338,14 @@ class Parser $params = array( 'frame' => array(), 'handler' => array(), 'horizAlign' => array(), 'vertAlign' => array() ); foreach( $parts as $part ) { + $part = trim( $part ); list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part ); $validated = false; if( isset( $paramMap[$magicName] ) ) { list( $type, $paramName ) = $paramMap[$magicName]; // Special case; width and height come in one variable together - if( $type == 'handler' && $paramName == 'width' ) { + if( $type === 'handler' && $paramName === 'width' ) { $m = array(); # (bug 13500) In both cases (width/height and width only), # permit trailing "px" for backward compatibility. @@ -4539,16 +4368,42 @@ class Parser } } // else no validation -- bug 13436 } else { - if ( $type == 'handler' ) { + if ( $type === 'handler' ) { # Validate handler parameter $validated = $handler->validateParam( $paramName, $value ); } else { # Validate internal parameters switch( $paramName ) { - case "manualthumb": - /// @fixme - possibly check validity here? - /// downstream behavior seems odd with missing manual thumbs. + case 'manualthumb': + case 'alt': + // @fixme - possibly check validity here for + // manualthumb? downstream behavior seems odd with + // missing manual thumbs. $validated = true; + $value = $this->stripAltText( $value, $holders ); + break; + case 'link': + $chars = self::EXT_LINK_URL_CLASS; + $prots = $this->mUrlProtocols; + if ( $value === '' ) { + $paramName = 'no-link'; + $value = true; + $validated = true; + } elseif ( preg_match( "/^$prots/", $value ) ) { + if ( preg_match( "/^($prots)$chars+$/", $value, $m ) ) { + $paramName = 'link-url'; + $this->mOutput->addExternalLink( $value ); + $validated = true; + } + } else { + $linkTitle = Title::newFromText( $value ); + if ( $linkTitle ) { + $paramName = 'link-title'; + $value = $linkTitle; + $this->mOutput->addLink( $linkTitle ); + $validated = true; + } + } break; default: // Most other things appear to be empty or numeric... @@ -4574,17 +4429,32 @@ class Parser $params['frame']['valign'] = key( $params['vertAlign'] ); } - # Strip bad stuff out of the alt text - $alt = $this->replaceLinkHoldersText( $caption ); + $params['frame']['caption'] = $caption; - # make sure there are no placeholders in thumbnail attributes - # that are later expanded to html- so expand them now and - # remove the tags - $alt = $this->mStripState->unstripBoth( $alt ); - $alt = Sanitizer::stripAllTags( $alt ); + $params['frame']['title'] = $this->stripAltText( $caption, $holders ); - $params['frame']['alt'] = $alt; - $params['frame']['caption'] = $caption; + # In the old days, [[Image:Foo|text...]] would set alt text. Later it + # came to also set the caption, ordinary text after the image -- which + # makes no sense, because that just repeats the text multiple times in + # screen readers. It *also* came to set the title attribute. + # + # Now that we have an alt attribute, we should not set the alt text to + # equal the caption: that's worse than useless, it just repeats the + # text. This is the framed/thumbnail case. If there's no caption, we + # use the unnamed parameter for alt text as well, just for the time be- + # ing, if the unnamed param is set and the alt param is not. + # + # For the future, we need to figure out if we want to tweak this more, + # e.g., introducing a title= parameter for the title; ignoring the un- + # named parameter entirely for images without a caption; adding an ex- + # plicit caption= parameter and preserving the old magic unnamed para- + # meter for BC; ... + if( $caption !== '' && !isset( $params['frame']['alt'] ) + && !isset( $params['frame']['framed'] ) + && !isset( $params['frame']['thumbnail'] ) + && !isset( $params['frame']['manualthumb'] ) ) { + $params['frame']['alt'] = $params['frame']['title']; + } wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params ) ); @@ -4598,6 +4468,25 @@ class Parser return $ret; } + + protected function stripAltText( $caption, $holders ) { + # Strip bad stuff out of the title (tooltip). We can't just use + # replaceLinkHoldersText() here, because if this function is called + # from replaceInternalLinks2(), mLinkHolders won't be up-to-date. + if ( $holders ) { + $tooltip = $holders->replaceText( $caption ); + } else { + $tooltip = $this->replaceLinkHoldersText( $caption ); + } + + # make sure there are no placeholders in thumbnail attributes + # that are later expanded to html- so expand them now and + # remove the tags + $tooltip = $this->mStripState->unstripBoth( $tooltip ); + $tooltip = Sanitizer::stripAllTags( $tooltip ); + + return $tooltip; + } /** * Set a flag in the output object indicating that the content is dynamic and @@ -4678,7 +4567,7 @@ class Parser $sectionParts = explode( '-', $section ); $sectionIndex = array_pop( $sectionParts ); foreach ( $sectionParts as $part ) { - if ( $part == 'T' ) { + if ( $part === 'T' ) { $flags |= self::PTD_FOR_INCLUSION; } } @@ -4695,14 +4584,14 @@ class Parser $targetLevel = 1000; } else { while ( $node ) { - if ( $node->getName() == 'h' ) { + if ( $node->getName() === 'h' ) { $bits = $node->splitHeading(); if ( $bits['i'] == $sectionIndex ) { $targetLevel = $bits['level']; break; } } - if ( $mode == 'replace' ) { + if ( $mode === 'replace' ) { $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); } $node = $node->getNextSibling(); @@ -4711,7 +4600,7 @@ class Parser if ( !$node ) { // Not found - if ( $mode == 'get' ) { + if ( $mode === 'get' ) { return $newText; } else { return $text; @@ -4720,21 +4609,21 @@ class Parser // Find the end of the section, including nested sections do { - if ( $node->getName() == 'h' ) { + if ( $node->getName() === 'h' ) { $bits = $node->splitHeading(); $curLevel = $bits['level']; if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) { break; } } - if ( $mode == 'get' ) { + if ( $mode === 'get' ) { $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); } $node = $node->getNextSibling(); } while ( $node ); // Write out the remainder (in replace mode only) - if ( $mode == 'replace' ) { + if ( $mode === 'replace' ) { // Output the replacement text // Add two newlines on -- trailing whitespace in $newText is conventionally // stripped by the editor, so we need both newlines to restore the paragraph gap @@ -4820,16 +4709,28 @@ class Parser * @return string */ public function getDefaultSort() { + global $wgCategoryPrefixedDefaultSortkey; if( $this->mDefaultSort !== false ) { return $this->mDefaultSort; + } elseif ($this->mTitle->getNamespace() == NS_CATEGORY || + !$wgCategoryPrefixedDefaultSortkey) { + return $this->mTitle->getText(); } else { - return $this->mTitle->getNamespace() == NS_CATEGORY - ? $this->mTitle->getText() - : $this->mTitle->getPrefixedText(); + return $this->mTitle->getPrefixedText(); } } /** + * Accessor for $mDefaultSort + * Unlike getDefaultSort(), will return false if none is set + * + * @return string or false + */ + public function getCustomDefaultSort() { + return $this->mDefaultSort; + } + + /** * Try to guess the section anchor name based on a wikitext fragment * presumably extracted from a heading, for example "Header" from * "== Header ==". @@ -4962,7 +4863,7 @@ class StripState { do { $oldText = $text; $text = $this->general->replace( $text ); - } while ( $text != $oldText ); + } while ( $text !== $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4972,7 +4873,7 @@ class StripState { do { $oldText = $text; $text = $this->nowiki->replace( $text ); - } while ( $text != $oldText ); + } while ( $text !== $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4983,7 +4884,7 @@ class StripState { $oldText = $text; $text = $this->general->replace( $text ); $text = $this->nowiki->replace( $text ); - } while ( $text != $oldText ); + } while ( $text !== $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4997,7 +4898,7 @@ class OnlyIncludeReplacer { var $output = ''; function replace( $matches ) { - if ( substr( $matches[1], -1 ) == "\n" ) { + if ( substr( $matches[1], -1 ) === "\n" ) { $this->output .= substr( $matches[1], 0, -1 ); } else { $this->output .= $matches[1]; |