diff options
Diffstat (limited to 'includes/Parser.php')
-rw-r--r-- | includes/Parser.php | 1410 |
1 files changed, 809 insertions, 601 deletions
diff --git a/includes/Parser.php b/includes/Parser.php index 31976baf..76783448 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -13,25 +13,13 @@ */ define( 'MW_PARSER_VERSION', '1.6.1' ); -/** - * Variable substitution O(N^2) attack - * - * Without countermeasures, it would be possible to attack the parser by saving - * a page filled with a large number of inclusions of large pages. The size of - * the generated page would be proportional to the square of the input size. - * Hence, we limit the number of inclusions of any given page, thus bringing any - * attack back to O(N). - */ - -define( 'MAX_INCLUDE_REPEAT', 100 ); -define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million - define( 'RLH_FOR_UPDATE', 1 ); # Allowed values for $mOutputType define( 'OT_HTML', 1 ); define( 'OT_WIKI', 2 ); define( 'OT_MSG' , 3 ); +define( 'OT_PREPROCESS', 4 ); # Flags for setFunctionHook define( 'SFH_NO_HASH', 1 ); @@ -90,7 +78,8 @@ define( 'MW_COLON_STATE_COMMENTDASHDASH', 7 ); * settings: * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*, * $wgNamespacesWithSubpages, $wgAllowExternalImages*, - * $wgLocaltimezone, $wgAllowSpecialInclusion* + * $wgLocaltimezone, $wgAllowSpecialInclusion*, + * $wgMaxArticleSize* * * * only within ParserOptions * </pre> @@ -109,6 +98,7 @@ class Parser var $mOutput, $mAutonumber, $mDTopen, $mStripState = array(); var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; var $mInterwikiLinkHolders, $mLinkHolders, $mUniqPrefix; + var $mIncludeSizes; var $mTemplates, // cache of already loaded templates, avoids // multiple SQL queries for the same string $mTemplatePath; // stores an unsorted hash of all the templates already loaded @@ -119,6 +109,7 @@ class Parser var $mOptions, // ParserOptions object $mTitle, // Title context, used for self-link rendering and similar things $mOutputType, // Output type, one of the OT_xxx constants + $ot, // Shortcut alias, see setOutputType() $mRevisionId; // ID to display in {{REVISIONID}} tags /**#@-*/ @@ -148,31 +139,35 @@ class Parser $this->setHook( 'pre', array( $this, 'renderPreTag' ) ); - $this->setFunctionHook( MAG_NS, array( 'CoreParserFunctions', 'ns' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_URLENCODE, array( 'CoreParserFunctions', 'urlencode' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_LCFIRST, array( 'CoreParserFunctions', 'lcfirst' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_UCFIRST, array( 'CoreParserFunctions', 'ucfirst' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_LC, array( 'CoreParserFunctions', 'lc' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_UC, array( 'CoreParserFunctions', 'uc' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_LOCALURL, array( 'CoreParserFunctions', 'localurl' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_LOCALURLE, array( 'CoreParserFunctions', 'localurle' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_FULLURL, array( 'CoreParserFunctions', 'fullurl' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_FULLURLE, array( 'CoreParserFunctions', 'fullurle' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_FORMATNUM, array( 'CoreParserFunctions', 'formatnum' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_GRAMMAR, array( 'CoreParserFunctions', 'grammar' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_PLURAL, array( 'CoreParserFunctions', 'plural' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_NUMBEROFPAGES, array( 'CoreParserFunctions', 'numberofpages' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_NUMBEROFUSERS, array( 'CoreParserFunctions', 'numberofusers' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_NUMBEROFARTICLES, array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_NUMBEROFFILES, array( 'CoreParserFunctions', 'numberoffiles' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_NUMBEROFADMINS, array( 'CoreParserFunctions', 'numberofadmins' ), SFH_NO_HASH ); - $this->setFunctionHook( MAG_LANGUAGE, array( 'CoreParserFunctions', 'language' ), SFH_NO_HASH ); + $this->setFunctionHook( 'int', array( 'CoreParserFunctions', 'intFunction' ), SFH_NO_HASH ); + $this->setFunctionHook( 'ns', array( 'CoreParserFunctions', 'ns' ), SFH_NO_HASH ); + $this->setFunctionHook( 'urlencode', array( 'CoreParserFunctions', 'urlencode' ), SFH_NO_HASH ); + $this->setFunctionHook( 'lcfirst', array( 'CoreParserFunctions', 'lcfirst' ), SFH_NO_HASH ); + $this->setFunctionHook( 'ucfirst', array( 'CoreParserFunctions', 'ucfirst' ), SFH_NO_HASH ); + $this->setFunctionHook( 'lc', array( 'CoreParserFunctions', 'lc' ), SFH_NO_HASH ); + $this->setFunctionHook( 'uc', array( 'CoreParserFunctions', 'uc' ), SFH_NO_HASH ); + $this->setFunctionHook( 'localurl', array( 'CoreParserFunctions', 'localurl' ), SFH_NO_HASH ); + $this->setFunctionHook( 'localurle', array( 'CoreParserFunctions', 'localurle' ), SFH_NO_HASH ); + $this->setFunctionHook( 'fullurl', array( 'CoreParserFunctions', 'fullurl' ), SFH_NO_HASH ); + $this->setFunctionHook( 'fullurle', array( 'CoreParserFunctions', 'fullurle' ), SFH_NO_HASH ); + $this->setFunctionHook( 'formatnum', array( 'CoreParserFunctions', 'formatnum' ), SFH_NO_HASH ); + $this->setFunctionHook( 'grammar', array( 'CoreParserFunctions', 'grammar' ), SFH_NO_HASH ); + $this->setFunctionHook( 'plural', array( 'CoreParserFunctions', 'plural' ), SFH_NO_HASH ); + $this->setFunctionHook( 'numberofpages', array( 'CoreParserFunctions', 'numberofpages' ), SFH_NO_HASH ); + $this->setFunctionHook( 'numberofusers', array( 'CoreParserFunctions', 'numberofusers' ), SFH_NO_HASH ); + $this->setFunctionHook( 'numberofarticles', array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH ); + $this->setFunctionHook( 'numberoffiles', array( 'CoreParserFunctions', 'numberoffiles' ), SFH_NO_HASH ); + $this->setFunctionHook( 'numberofadmins', array( 'CoreParserFunctions', 'numberofadmins' ), SFH_NO_HASH ); + $this->setFunctionHook( 'language', array( 'CoreParserFunctions', 'language' ), SFH_NO_HASH ); + $this->setFunctionHook( 'padleft', array( 'CoreParserFunctions', 'padleft' ), SFH_NO_HASH ); + $this->setFunctionHook( 'padright', array( 'CoreParserFunctions', 'padright' ), SFH_NO_HASH ); + $this->setFunctionHook( 'anchorencode', array( 'CoreParserFunctions', 'anchorencode' ), SFH_NO_HASH ); if ( $wgAllowDisplayTitle ) { - $this->setFunctionHook( MAG_DISPLAYTITLE, array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH ); + $this->setFunctionHook( 'displaytitle', array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH ); } if ( $wgAllowSlowParserFunctions ) { - $this->setFunctionHook( MAG_PAGESINNAMESPACE, array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH ); + $this->setFunctionHook( 'pagesinnamespace', array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH ); } $this->initialiseVariables(); @@ -187,6 +182,7 @@ class Parser * @private */ function clearState() { + wfProfileIn( __METHOD__ ); if ( $this->mFirstCall ) { $this->firstCallInit(); } @@ -226,8 +222,25 @@ class Parser $this->mShowToc = true; $this->mForceTocPosition = false; + $this->mIncludeSizes = array( + 'pre-expand' => 0, + 'post-expand' => 0, + 'arg' => 0 + ); wfRunHooks( 'ParserClearState', array( &$this ) ); + wfProfileOut( __METHOD__ ); + } + + function setOutputType( $ot ) { + $this->mOutputType = $ot; + // Shortcut alias + $this->ot = array( + 'html' => $ot == OT_HTML, + 'wiki' => $ot == OT_WIKI, + 'msg' => $ot == OT_MSG, + 'pre' => $ot == OT_PREPROCESS, + ); } /** @@ -235,7 +248,7 @@ class Parser * * @public */ - function UniqPrefix() { + function uniqPrefix() { return $this->mUniqPrefix; } @@ -259,7 +272,7 @@ class Parser */ global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang; - $fname = 'Parser::parse'; + $fname = 'Parser::parse-' . wfGetCaller(); wfProfileIn( $fname ); if ( $clearState ) { @@ -268,8 +281,11 @@ class Parser $this->mOptions = $options; $this->mTitle =& $title; - $this->mRevisionId = $revid; - $this->mOutputType = OT_HTML; + $oldRevisionId = $this->mRevisionId; + if( $revid !== null ) { + $this->mRevisionId = $revid; + } + $this->setOutputType( OT_HTML ); //$text = $this->strip( $text, $this->mStripState ); // VOODOO MAGIC FIX! Sometimes the above segfaults in PHP5. @@ -279,12 +295,6 @@ class Parser $text = $this->strip( $text, $x ); wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$x ) ); - # Hook to suspend the parser in this state - if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$x ) ) ) { - wfProfileOut( $fname ); - return $text ; - } - $text = $this->internalParse( $text ); $text = $this->unstrip( $text, $this->mStripState ); @@ -321,8 +331,8 @@ class Parser } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) - $tidyregs = array( - # ''Something [http://www.cool.com cool''] --> + $tidyregs = array( + # ''Something [http://www.cool.com cool''] --> # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a> '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', @@ -337,10 +347,10 @@ class Parser '\\1\\3<div\\5>\\6</div>\\8\\9', # remove empty italic or bold tag pairs, some # introduced by rules above - '/<([bi])><\/\\1>/' => '' + '/<([bi])><\/\\1>/' => '', ); - $text = preg_replace( + $text = preg_replace( array_keys( $tidyregs ), array_values( $tidyregs ), $text ); @@ -348,13 +358,63 @@ class Parser wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) ); + # Information on include size limits, for the benefit of users who try to skirt them + if ( max( $this->mIncludeSizes ) > 1000 ) { + $max = $this->mOptions->getMaxIncludeSize(); + $text .= "<!-- \n" . + "Pre-expand include size: {$this->mIncludeSizes['pre-expand']} bytes\n" . + "Post-expand include size: {$this->mIncludeSizes['post-expand']} bytes\n" . + "Template argument size: {$this->mIncludeSizes['arg']} bytes\n" . + "Maximum: $max bytes\n" . + "-->\n"; + } $this->mOutput->setText( $text ); + $this->mRevisionId = $oldRevisionId; wfProfileOut( $fname ); return $this->mOutput; } /** + * Recursive parser entry point that can be called from an extension tag + * hook. + */ + function recursiveTagParse( $text ) { + wfProfileIn( __METHOD__ ); + $x =& $this->mStripState; + wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$x ) ); + $text = $this->strip( $text, $x ); + wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$x ) ); + $text = $this->internalParse( $text ); + wfProfileOut( __METHOD__ ); + return $text; + } + + /** + * Expand templates and variables in the text, producing valid, static wikitext. + * Also removes comments. + */ + function preprocess( $text, $title, $options ) { + wfProfileIn( __METHOD__ ); + $this->clearState(); + $this->setOutputType( OT_PREPROCESS ); + $this->mOptions = $options; + $this->mTitle = $title; + $x =& $this->mStripState; + wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$x ) ); + $text = $this->strip( $text, $x ); + wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$x ) ); + if ( $this->mOptions->getRemoveComments() ) { + $text = Sanitizer::removeHTMLcomments( $text ); + } + $text = $this->replaceVariables( $text ); + $text = $this->unstrip( $text, $x ); + $text = $this->unstripNowiki( $text, $x ); + wfProfileOut( __METHOD__ ); + return $text; + } + + /** * Get a random string * * @private @@ -391,8 +451,7 @@ class Parser * @static */ function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){ - $rand = Parser::getRandomString(); - $n = 1; + static $n = 1; $stripped = ''; $matches = array(); @@ -419,7 +478,7 @@ class Parser $inside = $p[4]; } - $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++) . '-QINU'; + $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU'; $stripped .= $marker; if ( $close === '/>' ) { @@ -470,11 +529,10 @@ class Parser * @private */ function strip( $text, &$state, $stripcomments = false , $dontstrip = array () ) { + wfProfileIn( __METHOD__ ); $render = ($this->mOutputType == OT_HTML); - # Replace any instances of the placeholders $uniq_prefix = $this->mUniqPrefix; - #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text ); $commentState = array(); $elements = array_merge( @@ -501,6 +559,7 @@ class Parser list( $element, $content, $params, $tag ) = $data; if( $render ) { $tagName = strtolower( $element ); + wfProfileIn( __METHOD__."-render-$tagName" ); switch( $tagName ) { case '!--': // Comment @@ -535,17 +594,25 @@ class Parser throw new MWException( "Invalid call hook $element" ); } } + wfProfileOut( __METHOD__."-render-$tagName" ); } else { // Just stripping tags; keep the source $output = $tag; } + + // Unstrip the output, because unstrip() is no longer recursive so + // it won't do it itself + $output = $this->unstrip( $output, $state ); + if( !$stripcomments && $element == '!--' ) { $commentState[$marker] = $output; + } elseif ( $element == 'html' || $element == 'nowiki' ) { + $state['nowiki'][$marker] = $output; } else { - $state[$element][$marker] = $output; + $state['general'][$marker] = $output; } } - + # Unstrip comments unless explicitly told otherwise. # (The comments are always stripped prior to this point, so as to # not invoke any extension tags / parser hooks contained within @@ -555,6 +622,7 @@ class Parser $text = strtr( $text, $commentState ); } + wfProfileOut( __METHOD__ ); return $text; } @@ -565,20 +633,14 @@ class Parser * @private */ function unstrip( $text, &$state ) { - if ( !is_array( $state ) ) { + if ( !isset( $state['general'] ) ) { return $text; } - $replacements = array(); - foreach( $state as $tag => $contentDict ) { - if( $tag != 'nowiki' && $tag != 'html' ) { - foreach( $contentDict as $uniq => $content ) { - $replacements[$uniq] = $content; - } - } - } - $text = strtr( $text, $replacements ); - + wfProfileIn( __METHOD__ ); + # TODO: good candidate for FSS + $text = strtr( $text, $state['general'] ); + wfProfileOut( __METHOD__ ); return $text; } @@ -588,20 +650,15 @@ class Parser * @private */ function unstripNoWiki( $text, &$state ) { - if ( !is_array( $state ) ) { + if ( !isset( $state['nowiki'] ) ) { return $text; } - $replacements = array(); - foreach( $state as $tag => $contentDict ) { - if( $tag == 'nowiki' || $tag == 'html' ) { - foreach( $contentDict as $uniq => $content ) { - $replacements[$uniq] = $content; - } - } - } - $text = strtr( $text, $replacements ); - + wfProfileIn( __METHOD__ ); + # TODO: good candidate for FSS + $text = strtr( $text, $state['nowiki'] ); + wfProfileOut( __METHOD__ ); + return $text; } @@ -617,7 +674,7 @@ class Parser if ( !$state ) { $state = array(); } - $state['item'][$rnd] = $text; + $state['general'][$rnd] = $text; return $rnd; } @@ -797,13 +854,13 @@ class Parser } $after = substr ( $x , 1 ) ; if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ; - + // Split up multiple cells on the same line. // FIXME: This can result in improper nesting of tags processed // by earlier parser steps, but should avoid splitting up eg // attribute values containing literal "||". $after = wfExplodeMarkup( '||', $after ); - + $t[$k] = '' ; # Loop through each table cell @@ -877,11 +934,17 @@ class Parser $fname = 'Parser::internalParse'; wfProfileIn( $fname ); + # Hook to suspend the parser in this state + if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$x ) ) ) { + wfProfileOut( $fname ); + return $text ; + } + # Remove <noinclude> tags and <includeonly> sections $text = strtr( $text, array( '<onlyinclude>' => '' , '</onlyinclude>' => '' ) ); $text = strtr( $text, array( '<noinclude>' => '', '</noinclude>' => '') ); $text = preg_replace( '/<includeonly>.*?<\/includeonly>/s', '', $text ); - + $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ) ); $text = $this->replaceVariables( $text, $args ); @@ -923,9 +986,52 @@ class Parser * @private */ function &doMagicLinks( &$text ) { - $text = $this->magicISBN( $text ); - $text = $this->magicRFC( $text, 'RFC ', 'rfcurl' ); - $text = $this->magicRFC( $text, 'PMID ', 'pubmedurl' ); + wfProfileIn( __METHOD__ ); + $text = preg_replace_callback( + '!(?: # Start cases + <a.*?</a> | # Skip link text + <.*?> | # Skip stuff inside HTML elements + (?:RFC|PMID)\s+([0-9]+) | # RFC or PMID, capture number as m[1] + ISBN\s+([0-9Xx-]+) # ISBN, capture number as m[2] + )!x', array( &$this, 'magicLinkCallback' ), $text ); + wfProfileOut( __METHOD__ ); + return $text; + } + + function magicLinkCallback( $m ) { + if ( substr( $m[0], 0, 1 ) == '<' ) { + # Skip HTML element + return $m[0]; + } elseif ( substr( $m[0], 0, 4 ) == 'ISBN' ) { + $isbn = $m[2]; + $num = strtr( $isbn, array( + '-' => '', + ' ' => '', + 'x' => 'X', + )); + $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' ); + $text = '<a href="' . + $titleObj->escapeLocalUrl( "isbn=$num" ) . + "\" class=\"internal\">ISBN $isbn</a>"; + } else { + if ( substr( $m[0], 0, 3 ) == 'RFC' ) { + $keyword = 'RFC'; + $urlmsg = 'rfcurl'; + $id = $m[1]; + } elseif ( substr( $m[0], 0, 4 ) == 'PMID' ) { + $keyword = 'PMID'; + $urlmsg = 'pubmedurl'; + $id = $m[1]; + } else { + throw new MWException( __METHOD__.': unrecognised match type "' . + substr($m[0], 0, 20 ) . '"' ); + } + + $url = wfMsg( $urlmsg, $id); + $sk =& $this->mOptions->getSkin(); + $la = $sk->getExternalLinkAttributes( $url, $keyword.$id ); + $text = "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>"; + } return $text; } @@ -1193,13 +1299,8 @@ class Parser } $text = $wgContLang->markNoConversion($text); - - # Normalize any HTML entities in input. They will be - # re-escaped by makeExternalLink(). - $url = Sanitizer::decodeCharReferences( $url ); - # Escape any control characters introduced by the above step - $url = preg_replace( '/[\][<>"\\x00-\\x20\\x7F]/e', "urlencode('\\0')", $url ); + $url = Sanitizer::cleanUrl( $url ); # Process the trail (i.e. everything after this link up until start of the next link), # replacing any non-bracketed links @@ -1280,12 +1381,7 @@ class Parser $url = substr( $url, 0, -$numSepChars ); } - # Normalize any HTML entities in input. They will be - # re-escaped by makeExternalLink() or maybeMakeExternalImage() - $url = Sanitizer::decodeCharReferences( $url ); - - # Escape any control characters introduced by the above step - $url = preg_replace( '/[\][<>"\\x00-\\x20\\x7F]/e', "urlencode('\\0')", $url ); + $url = Sanitizer::cleanUrl( $url ); # Is this an external image? $text = $this->maybeMakeExternalImage( $url ); @@ -1316,7 +1412,7 @@ class Parser * the URL differently; as a workaround, just use the output for * statistical records, not for actual linking/output. */ - function replaceUnusualEscapes( $url ) { + static function replaceUnusualEscapes( $url ) { return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', array( 'Parser', 'replaceUnusualEscapesCallback' ), $url ); } @@ -1327,7 +1423,7 @@ class Parser * @static * @private */ - function replaceUnusualEscapesCallback( $matches ) { + private static function replaceUnusualEscapesCallback( $matches ) { $char = urldecode( $matches[0] ); $ord = ord( $char ); // Is it an unsafe or HTTP reserved character according to RFC 1738? @@ -1397,7 +1493,7 @@ class Parser $useLinkPrefixExtension = $wgContLang->linkPrefixExtension(); if( is_null( $this->mTitle ) ) { - throw new MWException( 'nooo' ); + throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); } $nottalk = !$this->mTitle->isTalkPage(); @@ -1412,10 +1508,8 @@ class Parser } $selflink = $this->mTitle->getPrefixedText(); - wfProfileOut( $fname.'-setup' ); - - $checkVariantLink = sizeof($wgContLang->getVariants())>1; $useSubpages = $this->areSubpagesAllowed(); + wfProfileOut( $fname.'-setup' ); # Loop for each link for ($k = 0; isset( $a[$k] ); $k++) { @@ -1438,6 +1532,7 @@ class Parser $might_be_img = false; + wfProfileIn( "$fname-e1" ); if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; # If we get a ] at the beginning of $m[3] that means we have a link that's something like: @@ -1449,27 +1544,33 @@ class Parser # and no image is in sight. See bug 2095. # if( $text !== '' && - preg_match( "/^\](.*)/s", $m[3], $n ) && + substr( $m[3], 0, 1 ) === ']' && strpos($text, '[') !== false ) { $text .= ']'; # so that replaceExternalLinks($text) works later - $m[3] = $n[1]; + $m[3] = substr( $m[3], 1 ); } # fix up urlencoded title texts - if(preg_match('/%/', $m[1] )) + if( strpos( $m[1], '%' ) !== false ) { # Should anchors '#' also be rejected? $m[1] = str_replace( array('<', '>'), array('<', '>'), urldecode($m[1]) ); + } $trail = $m[3]; } elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption $might_be_img = true; $text = $m[2]; - if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]); + if ( strpos( $m[1], '%' ) !== false ) { + $m[1] = urldecode($m[1]); + } $trail = ""; } else { # Invalid form; output directly $s .= $prefix . '[[' . $line ; + wfProfileOut( "$fname-e1" ); continue; } + wfProfileOut( "$fname-e1" ); + wfProfileIn( "$fname-misc" ); # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these @@ -1492,38 +1593,37 @@ class Parser $link = substr($link, 1); } + wfProfileOut( "$fname-misc" ); + wfProfileIn( "$fname-title" ); $nt = Title::newFromText( $this->unstripNoWiki($link, $this->mStripState) ); if( !$nt ) { $s .= $prefix . '[[' . $line; + wfProfileOut( "$fname-title" ); continue; } - #check other language variants of the link - #if the article does not exist - if( $checkVariantLink - && $nt->getArticleID() == 0 ) { - $wgContLang->findVariantLink($link, $nt); - } - $ns = $nt->getNamespace(); $iw = $nt->getInterWiki(); - + wfProfileOut( "$fname-title" ); + if ($might_be_img) { # if this is actually an invalid link + wfProfileIn( "$fname-might_be_img" ); if ($ns == NS_IMAGE && $noforce) { #but might be an image $found = false; while (isset ($a[$k+1]) ) { #look at the next 'line' to see if we can close it there $spliced = array_splice( $a, $k + 1, 1 ); $next_line = array_shift( $spliced ); - if( preg_match("/^(.*?]].*?)]](.*)$/sD", $next_line, $m) ) { - # the first ]] closes the inner link, the second the image + $m = explode( ']]', $next_line, 3 ); + if ( count( $m ) == 3 ) { + # the first ]] closes the inner link, the second the image $found = true; - $text .= '[[' . $m[1]; + $text .= "[[{$m[0]}]]{$m[1]}"; $trail = $m[2]; break; - } elseif( preg_match("/^.*?]].*$/sD", $next_line, $m) ) { + } elseif ( count( $m ) == 2 ) { #if there's exactly one ]] that's fine, we'll keep looking - $text .= '[[' . $m[0]; + $text .= "[[{$m[0]}]]{$m[1]}"; } else { #if $next_line is invalid too, we need look no further $text .= '[[' . $next_line; @@ -1534,35 +1634,40 @@ class Parser # we couldn't find the end of this imageLink, so output it raw #but don't ignore what might be perfectly normal links in the text we've examined $text = $this->replaceInternalLinks($text); - $s .= $prefix . '[[' . $link . '|' . $text; + $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail + wfProfileOut( "$fname-might_be_img" ); continue; } } else { #it's not an image, so output it raw - $s .= $prefix . '[[' . $link . '|' . $text; + $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail + wfProfileOut( "$fname-might_be_img" ); continue; } + wfProfileOut( "$fname-might_be_img" ); } $wasblank = ( '' == $text ); if( $wasblank ) $text = $link; - # Link not escaped by : , create the various objects if( $noforce ) { # Interwikis + wfProfileIn( "$fname-interwiki" ); if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) { $this->mOutput->addLanguageLink( $nt->getFullText() ); $s = rtrim($s . "\n"); $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail; + wfProfileOut( "$fname-interwiki" ); continue; } + wfProfileOut( "$fname-interwiki" ); if ( $ns == NS_IMAGE ) { wfProfileIn( "$fname-image" ); - if ( !wfIsBadImage( $nt->getDBkey() ) ) { + if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { # recursively parse links inside the image caption # actually, this will parse them in any other parameters, too, # but it might be hard to fix that, and it doesn't matter ATM @@ -1630,12 +1735,13 @@ class Parser $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); continue; } elseif( $ns == NS_IMAGE ) { - $img = Image::newFromTitle( $nt ); + $img = new Image( $nt ); if( $img->exists() ) { // Force a blue link if the file exists; may be a remote // upload on the shared repository, and we want to see its // auto-generated page. $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); + $this->mOutput->addLink( $nt ); continue; } } @@ -1648,11 +1754,12 @@ class Parser /** * Make a link placeholder. The text returned can be later resolved to a real link with * replaceLinkHolders(). This is done for two reasons: firstly to avoid further - * parsing of interwiki links, and secondly to allow all extistence checks and + * parsing of interwiki links, and secondly to allow all existence checks and * article length checks (for stub links) to be bundled into a single query. * */ function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) { + wfProfileIn( __METHOD__ ); if ( ! is_object($nt) ) { # Fail gracefully $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}"; @@ -1674,6 +1781,7 @@ class Parser $retVal = '<!--LINK '. ($nr-1) ."-->{$trail}"; } } + wfProfileOut( __METHOD__ ); return $retVal; } @@ -1745,6 +1853,9 @@ class Parser wfProfileIn( $fname ); $ret = $target; # default return value is no change + # bug 7425 + $target = trim( $target ); + # Some namespaces don't allow subpages, # so only perform processing if subpages are allowed if( $this->areSubpagesAllowed() ) { @@ -2049,14 +2160,14 @@ class Parser function findColonNoLinks($str, &$before, &$after) { $fname = 'Parser::findColonNoLinks'; wfProfileIn( $fname ); - + $pos = strpos( $str, ':' ); if( $pos === false ) { // Nothing to find! wfProfileOut( $fname ); return false; } - + $lt = strpos( $str, '<' ); if( $lt === false || $lt > $pos ) { // Easy; no tag nesting to worry about @@ -2065,14 +2176,14 @@ class Parser wfProfileOut( $fname ); return $pos; } - + // Ugly state machine to walk through avoiding tags. $state = MW_COLON_STATE_TEXT; $stack = 0; $len = strlen( $str ); for( $i = 0; $i < $len; $i++ ) { $c = $str{$i}; - + switch( $state ) { // (Using the number is a performance hack for common cases) case 0: // MW_COLON_STATE_TEXT: @@ -2222,107 +2333,165 @@ class Parser $ts = time(); wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); + # Use the time zone + global $wgLocaltimezone; + if ( isset( $wgLocaltimezone ) ) { + $oldtz = getenv( 'TZ' ); + putenv( 'TZ='.$wgLocaltimezone ); + } + $localTimestamp = date( 'YmdHis', $ts ); + $localMonth = date( 'm', $ts ); + $localMonthName = date( 'n', $ts ); + $localDay = date( 'j', $ts ); + $localDay2 = date( 'd', $ts ); + $localDayOfWeek = date( 'w', $ts ); + $localWeek = date( 'W', $ts ); + $localYear = date( 'Y', $ts ); + $localHour = date( 'H', $ts ); + if ( isset( $wgLocaltimezone ) ) { + putenv( 'TZ='.$oldtz ); + } + switch ( $index ) { - case MAG_CURRENTMONTH: + case 'currentmonth': return $varCache[$index] = $wgContLang->formatNum( date( 'm', $ts ) ); - case MAG_CURRENTMONTHNAME: + case 'currentmonthname': return $varCache[$index] = $wgContLang->getMonthName( date( 'n', $ts ) ); - case MAG_CURRENTMONTHNAMEGEN: + case 'currentmonthnamegen': return $varCache[$index] = $wgContLang->getMonthNameGen( date( 'n', $ts ) ); - case MAG_CURRENTMONTHABBREV: + case 'currentmonthabbrev': return $varCache[$index] = $wgContLang->getMonthAbbreviation( date( 'n', $ts ) ); - case MAG_CURRENTDAY: + case 'currentday': return $varCache[$index] = $wgContLang->formatNum( date( 'j', $ts ) ); - case MAG_CURRENTDAY2: + case 'currentday2': return $varCache[$index] = $wgContLang->formatNum( date( 'd', $ts ) ); - case MAG_PAGENAME: + case 'localmonth': + return $varCache[$index] = $wgContLang->formatNum( $localMonth ); + case 'localmonthname': + return $varCache[$index] = $wgContLang->getMonthName( $localMonthName ); + case 'localmonthnamegen': + return $varCache[$index] = $wgContLang->getMonthNameGen( $localMonthName ); + case 'localmonthabbrev': + return $varCache[$index] = $wgContLang->getMonthAbbreviation( $localMonthName ); + case 'localday': + return $varCache[$index] = $wgContLang->formatNum( $localDay ); + case 'localday2': + return $varCache[$index] = $wgContLang->formatNum( $localDay2 ); + case 'pagename': return $this->mTitle->getText(); - case MAG_PAGENAMEE: + case 'pagenamee': return $this->mTitle->getPartialURL(); - case MAG_FULLPAGENAME: + case 'fullpagename': return $this->mTitle->getPrefixedText(); - case MAG_FULLPAGENAMEE: + case 'fullpagenamee': return $this->mTitle->getPrefixedURL(); - case MAG_SUBPAGENAME: + case 'subpagename': return $this->mTitle->getSubpageText(); - case MAG_SUBPAGENAMEE: + case 'subpagenamee': return $this->mTitle->getSubpageUrlForm(); - case MAG_BASEPAGENAME: + case 'basepagename': return $this->mTitle->getBaseText(); - case MAG_BASEPAGENAMEE: + case 'basepagenamee': return wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ); - case MAG_TALKPAGENAME: + case 'talkpagename': if( $this->mTitle->canTalk() ) { $talkPage = $this->mTitle->getTalkPage(); return $talkPage->getPrefixedText(); } else { return ''; } - case MAG_TALKPAGENAMEE: + case 'talkpagenamee': if( $this->mTitle->canTalk() ) { $talkPage = $this->mTitle->getTalkPage(); return $talkPage->getPrefixedUrl(); } else { return ''; } - case MAG_SUBJECTPAGENAME: + case 'subjectpagename': $subjPage = $this->mTitle->getSubjectPage(); return $subjPage->getPrefixedText(); - case MAG_SUBJECTPAGENAMEE: + case 'subjectpagenamee': $subjPage = $this->mTitle->getSubjectPage(); return $subjPage->getPrefixedUrl(); - case MAG_REVISIONID: + case 'revisionid': return $this->mRevisionId; - case MAG_NAMESPACE: + case 'revisionday': + return intval( substr( wfRevisionTimestamp( $this->mRevisionId ), 6, 2 ) ); + case 'revisionday2': + return substr( wfRevisionTimestamp( $this->mRevisionId ), 6, 2 ); + case 'revisionmonth': + return intval( substr( wfRevisionTimestamp( $this->mRevisionId ), 4, 2 ) ); + case 'revisionyear': + return substr( wfRevisionTimestamp( $this->mRevisionId ), 0, 4 ); + case 'revisiontimestamp': + return wfRevisionTimestamp( $this->mRevisionId ); + case 'namespace': return str_replace('_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) ); - case MAG_NAMESPACEE: + case 'namespacee': return wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); - case MAG_TALKSPACE: + case 'talkspace': return $this->mTitle->canTalk() ? str_replace('_',' ',$this->mTitle->getTalkNsText()) : ''; - case MAG_TALKSPACEE: + case 'talkspacee': return $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : ''; - case MAG_SUBJECTSPACE: + case 'subjectspace': return $this->mTitle->getSubjectNsText(); - case MAG_SUBJECTSPACEE: + case 'subjectspacee': return( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); - case MAG_CURRENTDAYNAME: + case 'currentdayname': return $varCache[$index] = $wgContLang->getWeekdayName( date( 'w', $ts ) + 1 ); - case MAG_CURRENTYEAR: + case 'currentyear': return $varCache[$index] = $wgContLang->formatNum( date( 'Y', $ts ), true ); - case MAG_CURRENTTIME: + case 'currenttime': return $varCache[$index] = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false ); - case MAG_CURRENTWEEK: + case 'currenthour': + return $varCache[$index] = $wgContLang->formatNum( date( 'H', $ts ), true ); + case 'currentweek': // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to // int to remove the padding return $varCache[$index] = $wgContLang->formatNum( (int)date( 'W', $ts ) ); - case MAG_CURRENTDOW: + case 'currentdow': return $varCache[$index] = $wgContLang->formatNum( date( 'w', $ts ) ); - case MAG_NUMBEROFARTICLES: + case 'localdayname': + return $varCache[$index] = $wgContLang->getWeekdayName( $localDayOfWeek + 1 ); + case 'localyear': + return $varCache[$index] = $wgContLang->formatNum( $localYear, true ); + case 'localtime': + return $varCache[$index] = $wgContLang->time( $localTimestamp, false, false ); + case 'localhour': + return $varCache[$index] = $wgContLang->formatNum( $localHour, true ); + case 'localweek': + // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to + // int to remove the padding + return $varCache[$index] = $wgContLang->formatNum( (int)$localWeek ); + case 'localdow': + return $varCache[$index] = $wgContLang->formatNum( $localDayOfWeek ); + case 'numberofarticles': return $varCache[$index] = $wgContLang->formatNum( wfNumberOfArticles() ); - case MAG_NUMBEROFFILES: + case 'numberoffiles': return $varCache[$index] = $wgContLang->formatNum( wfNumberOfFiles() ); - case MAG_NUMBEROFUSERS: + case 'numberofusers': return $varCache[$index] = $wgContLang->formatNum( wfNumberOfUsers() ); - case MAG_NUMBEROFPAGES: + case 'numberofpages': return $varCache[$index] = $wgContLang->formatNum( wfNumberOfPages() ); - case MAG_NUMBEROFADMINS: + case 'numberofadmins': return $varCache[$index] = $wgContLang->formatNum( wfNumberOfAdmins() ); - case MAG_CURRENTTIMESTAMP: + case 'currenttimestamp': return $varCache[$index] = wfTimestampNow(); - case MAG_CURRENTVERSION: - global $wgVersion; - return $wgVersion; - case MAG_SITENAME: + case 'localtimestamp': + return $varCache[$index] = $localTimestamp; + case 'currentversion': + return $varCache[$index] = SpecialVersion::getVersion(); + case 'sitename': return $wgSitename; - case MAG_SERVER: + case 'server': return $wgServer; - case MAG_SERVERNAME: + case 'servername': return $wgServerName; - case MAG_SCRIPTPATH: + case 'scriptpath': return $wgScriptPath; - case MAG_DIRECTIONMARK: + case 'directionmark': return $wgContLang->getDirMark(); - case MAG_CONTENTLANGUAGE: + case 'contentlanguage': global $wgContLanguageCode; return $wgContLanguageCode; default: @@ -2342,10 +2511,10 @@ class Parser function initialiseVariables() { $fname = 'Parser::initialiseVariables'; wfProfileIn( $fname ); - global $wgVariableIDs; + $variableIDs = MagicWord::getVariableIDs(); $this->mVariables = array(); - foreach ( $wgVariableIDs as $id ) { + foreach ( $variableIDs as $id ) { $mw =& MagicWord::get( $id ); $mw->addToArray( $this->mVariables, $id ); } @@ -2361,172 +2530,169 @@ class Parser * '{' => array( # opening parentheses * 'end' => '}', # closing parentheses * 'cb' => array(2 => callback, # replacement callback to call if {{..}} is found - * 4 => callback # replacement callback to call if {{{{..}}}} is found + * 3 => callback # replacement callback to call if {{{..}}} is found * ) * ) + * 'min' => 2, # Minimum parenthesis count in cb + * 'max' => 3, # Maximum parenthesis count in cb * @private */ function replace_callback ($text, $callbacks) { - wfProfileIn( __METHOD__ . '-self' ); + wfProfileIn( __METHOD__ ); $openingBraceStack = array(); # this array will hold a stack of parentheses which are not closed yet $lastOpeningBrace = -1; # last not closed parentheses - for ($i = 0; $i < strlen($text); $i++) { - # check for any opening brace - $rule = null; - $nextPos = -1; - foreach ($callbacks as $key => $value) { - $pos = strpos ($text, $key, $i); - if (false !== $pos && (-1 == $nextPos || $pos < $nextPos)) { - $rule = $value; - $nextPos = $pos; - } + $validOpeningBraces = implode( '', array_keys( $callbacks ) ); + + $i = 0; + while ( $i < strlen( $text ) ) { + # Find next opening brace, closing brace or pipe + if ( $lastOpeningBrace == -1 ) { + $currentClosing = ''; + $search = $validOpeningBraces; + } else { + $currentClosing = $openingBraceStack[$lastOpeningBrace]['braceEnd']; + $search = $validOpeningBraces . '|' . $currentClosing; } - - if ($lastOpeningBrace >= 0) { - $pos = strpos ($text, $openingBraceStack[$lastOpeningBrace]['braceEnd'], $i); - - if (false !== $pos && (-1 == $nextPos || $pos < $nextPos)){ - $rule = null; - $nextPos = $pos; - } - - $pos = strpos ($text, '|', $i); - - if (false !== $pos && (-1 == $nextPos || $pos < $nextPos)){ - $rule = null; - $nextPos = $pos; + $rule = null; + $i += strcspn( $text, $search, $i ); + if ( $i < strlen( $text ) ) { + if ( $text[$i] == '|' ) { + $found = 'pipe'; + } elseif ( $text[$i] == $currentClosing ) { + $found = 'close'; + } elseif ( isset( $callbacks[$text[$i]] ) ) { + $found = 'open'; + $rule = $callbacks[$text[$i]]; + } else { + # Some versions of PHP have a strcspn which stops on null characters + # Ignore and continue + ++$i; + continue; } - } - - if ($nextPos == -1) + } else { + # All done break; + } - $i = $nextPos; - - # found openning brace, lets add it to parentheses stack - if (null != $rule) { + if ( $found == 'open' ) { + # found opening brace, let's add it to parentheses stack $piece = array('brace' => $text[$i], 'braceEnd' => $rule['end'], - 'count' => 1, 'title' => '', 'parts' => null); - # count openning brace characters - while ($i+1 < strlen($text) && $text[$i+1] == $piece['brace']) { - $piece['count']++; - $i++; - } - - $piece['startAt'] = $i+1; - $piece['partStart'] = $i+1; + # count opening brace characters + $piece['count'] = strspn( $text, $piece['brace'], $i ); + $piece['startAt'] = $piece['partStart'] = $i + $piece['count']; + $i += $piece['count']; - # we need to add to stack only if openning brace count is enough for any given rule - foreach ($rule['cb'] as $cnt => $fn) { - if ($piece['count'] >= $cnt) { - $lastOpeningBrace ++; - $openingBraceStack[$lastOpeningBrace] = $piece; - break; + # we need to add to stack only if opening brace count is enough for one of the rules + if ( $piece['count'] >= $rule['min'] ) { + $lastOpeningBrace ++; + $openingBraceStack[$lastOpeningBrace] = $piece; + } + } elseif ( $found == 'close' ) { + # lets check if it is enough characters for closing brace + $maxCount = $openingBraceStack[$lastOpeningBrace]['count']; + $count = strspn( $text, $text[$i], $i, $maxCount ); + + # check for maximum matching characters (if there are 5 closing + # characters, we will probably need only 3 - depending on the rules) + $matchingCount = 0; + $matchingCallback = null; + $cbType = $callbacks[$openingBraceStack[$lastOpeningBrace]['brace']]; + if ( $count > $cbType['max'] ) { + # The specified maximum exists in the callback array, unless the caller + # has made an error + $matchingCount = $cbType['max']; + } else { + # Count is less than the maximum + # Skip any gaps in the callback array to find the true largest match + # Need to use array_key_exists not isset because the callback can be null + $matchingCount = $count; + while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $cbType['cb'] ) ) { + --$matchingCount; } } - continue; - } - else if ($lastOpeningBrace >= 0) { - # first check if it is a closing brace - if ($openingBraceStack[$lastOpeningBrace]['braceEnd'] == $text[$i]) { - # lets check if it is enough characters for closing brace - $count = 1; - while ($i+$count < strlen($text) && $text[$i+$count] == $text[$i]) - $count++; - - # if there are more closing parentheses than opening ones, we parse less - if ($openingBraceStack[$lastOpeningBrace]['count'] < $count) - $count = $openingBraceStack[$lastOpeningBrace]['count']; - - # check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules) - $matchingCount = 0; - $matchingCallback = null; - foreach ($callbacks[$openingBraceStack[$lastOpeningBrace]['brace']]['cb'] as $cnt => $fn) { - if ($count >= $cnt && $matchingCount < $cnt) { - $matchingCount = $cnt; - $matchingCallback = $fn; - } - } + if ($matchingCount <= 0) { + $i += $count; + continue; + } + $matchingCallback = $cbType['cb'][$matchingCount]; - if ($matchingCount == 0) { - $i += $count - 1; - continue; - } + # let's set a title or last part (if '|' was found) + if (null === $openingBraceStack[$lastOpeningBrace]['parts']) { + $openingBraceStack[$lastOpeningBrace]['title'] = + substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], + $i - $openingBraceStack[$lastOpeningBrace]['partStart']); + } else { + $openingBraceStack[$lastOpeningBrace]['parts'][] = + substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], + $i - $openingBraceStack[$lastOpeningBrace]['partStart']); + } - # lets set a title or last part (if '|' was found) - if (null === $openingBraceStack[$lastOpeningBrace]['parts']) - $openingBraceStack[$lastOpeningBrace]['title'] = substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], $i - $openingBraceStack[$lastOpeningBrace]['partStart']); - else - $openingBraceStack[$lastOpeningBrace]['parts'][] = substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], $i - $openingBraceStack[$lastOpeningBrace]['partStart']); - - $pieceStart = $openingBraceStack[$lastOpeningBrace]['startAt'] - $matchingCount; - $pieceEnd = $i + $matchingCount; - - if( is_callable( $matchingCallback ) ) { - $cbArgs = array ( - 'text' => substr($text, $pieceStart, $pieceEnd - $pieceStart), - 'title' => trim($openingBraceStack[$lastOpeningBrace]['title']), - 'parts' => $openingBraceStack[$lastOpeningBrace]['parts'], - 'lineStart' => (($pieceStart > 0) && ($text[$pieceStart-1] == "\n")), - ); - # finally we can call a user callback and replace piece of text - wfProfileOut( __METHOD__ . '-self' ); - $replaceWith = call_user_func( $matchingCallback, $cbArgs ); - wfProfileIn( __METHOD__ . '-self' ); - $text = substr($text, 0, $pieceStart) . $replaceWith . substr($text, $pieceEnd); - $i = $pieceStart + strlen($replaceWith) - 1; - } - else { - # null value for callback means that parentheses should be parsed, but not replaced - $i += $matchingCount - 1; - } + $pieceStart = $openingBraceStack[$lastOpeningBrace]['startAt'] - $matchingCount; + $pieceEnd = $i + $matchingCount; + + if( is_callable( $matchingCallback ) ) { + $cbArgs = array ( + 'text' => substr($text, $pieceStart, $pieceEnd - $pieceStart), + 'title' => trim($openingBraceStack[$lastOpeningBrace]['title']), + 'parts' => $openingBraceStack[$lastOpeningBrace]['parts'], + 'lineStart' => (($pieceStart > 0) && ($text[$pieceStart-1] == "\n")), + ); + # finally we can call a user callback and replace piece of text + $replaceWith = call_user_func( $matchingCallback, $cbArgs ); + $text = substr($text, 0, $pieceStart) . $replaceWith . substr($text, $pieceEnd); + $i = $pieceStart + strlen($replaceWith); + } else { + # null value for callback means that parentheses should be parsed, but not replaced + $i += $matchingCount; + } - # reset last openning parentheses, but keep it in case there are unused characters - $piece = array('brace' => $openingBraceStack[$lastOpeningBrace]['brace'], - 'braceEnd' => $openingBraceStack[$lastOpeningBrace]['braceEnd'], - 'count' => $openingBraceStack[$lastOpeningBrace]['count'], - 'title' => '', - 'parts' => null, - 'startAt' => $openingBraceStack[$lastOpeningBrace]['startAt']); - $openingBraceStack[$lastOpeningBrace--] = null; - - if ($matchingCount < $piece['count']) { - $piece['count'] -= $matchingCount; - $piece['startAt'] -= $matchingCount; - $piece['partStart'] = $piece['startAt']; - # do we still qualify for any callback with remaining count? - foreach ($callbacks[$piece['brace']]['cb'] as $cnt => $fn) { - if ($piece['count'] >= $cnt) { - $lastOpeningBrace ++; - $openingBraceStack[$lastOpeningBrace] = $piece; - break; - } + # reset last opening parentheses, but keep it in case there are unused characters + $piece = array('brace' => $openingBraceStack[$lastOpeningBrace]['brace'], + 'braceEnd' => $openingBraceStack[$lastOpeningBrace]['braceEnd'], + 'count' => $openingBraceStack[$lastOpeningBrace]['count'], + 'title' => '', + 'parts' => null, + 'startAt' => $openingBraceStack[$lastOpeningBrace]['startAt']); + $openingBraceStack[$lastOpeningBrace--] = null; + + if ($matchingCount < $piece['count']) { + $piece['count'] -= $matchingCount; + $piece['startAt'] -= $matchingCount; + $piece['partStart'] = $piece['startAt']; + # do we still qualify for any callback with remaining count? + $currentCbList = $callbacks[$piece['brace']]['cb']; + while ( $piece['count'] ) { + if ( array_key_exists( $piece['count'], $currentCbList ) ) { + $lastOpeningBrace++; + $openingBraceStack[$lastOpeningBrace] = $piece; + break; } + --$piece['count']; } - continue; } - + } elseif ( $found == 'pipe' ) { # lets set a title if it is a first separator, or next part otherwise - if ($text[$i] == '|') { - if (null === $openingBraceStack[$lastOpeningBrace]['parts']) { - $openingBraceStack[$lastOpeningBrace]['title'] = substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], $i - $openingBraceStack[$lastOpeningBrace]['partStart']); - $openingBraceStack[$lastOpeningBrace]['parts'] = array(); - } - else - $openingBraceStack[$lastOpeningBrace]['parts'][] = substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], $i - $openingBraceStack[$lastOpeningBrace]['partStart']); - - $openingBraceStack[$lastOpeningBrace]['partStart'] = $i + 1; + if (null === $openingBraceStack[$lastOpeningBrace]['parts']) { + $openingBraceStack[$lastOpeningBrace]['title'] = + substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], + $i - $openingBraceStack[$lastOpeningBrace]['partStart']); + $openingBraceStack[$lastOpeningBrace]['parts'] = array(); + } else { + $openingBraceStack[$lastOpeningBrace]['parts'][] = + substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], + $i - $openingBraceStack[$lastOpeningBrace]['partStart']); } + $openingBraceStack[$lastOpeningBrace]['partStart'] = ++$i; } } - wfProfileOut( __METHOD__ . '-self' ); + wfProfileOut( __METHOD__ ); return $text; } @@ -2547,11 +2713,11 @@ class Parser */ function replaceVariables( $text, $args = array(), $argsOnly = false ) { # Prevent too big inclusions - if( strlen( $text ) > MAX_INCLUDE_SIZE ) { + if( strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { return $text; } - $fname = 'Parser::replaceVariables'; + $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/; wfProfileIn( $fname ); # This function is called recursively. To keep track of arguments we need a stack: @@ -2561,32 +2727,45 @@ class Parser if ( !$argsOnly ) { $braceCallbacks[2] = array( &$this, 'braceSubstitution' ); } - if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI ) { + if ( $this->mOutputType != OT_MSG ) { $braceCallbacks[3] = array( &$this, 'argSubstitution' ); } - $callbacks = array(); - $callbacks['{'] = array('end' => '}', 'cb' => $braceCallbacks); - $callbacks['['] = array('end' => ']', 'cb' => array(2=>null)); - $text = $this->replace_callback ($text, $callbacks); - - array_pop( $this->mArgStack ); + if ( $braceCallbacks ) { + $callbacks = array( + '{' => array( + 'end' => '}', + 'cb' => $braceCallbacks, + 'min' => $argsOnly ? 3 : 2, + 'max' => isset( $braceCallbacks[3] ) ? 3 : 2, + ), + '[' => array( + 'end' => ']', + 'cb' => array(2=>null), + 'min' => 2, + 'max' => 2, + ) + ); + $text = $this->replace_callback ($text, $callbacks); + array_pop( $this->mArgStack ); + } wfProfileOut( $fname ); return $text; } - + /** * Replace magic variables * @private */ function variableSubstitution( $matches ) { + global $wgContLang; $fname = 'Parser::variableSubstitution'; - $varname = $matches[1]; + $varname = $wgContLang->lc($matches[1]); wfProfileIn( $fname ); $skip = false; if ( $this->mOutputType == OT_WIKI ) { # Do only magic variables prefixed by SUBST - $mwSubst =& MagicWord::get( MAG_SUBST ); + $mwSubst =& MagicWord::get( 'subst' ); if (!$mwSubst->matchStartAndRemove( $varname )) $skip = true; # Note that if we don't substitute the variable below, @@ -2595,8 +2774,14 @@ class Parser } if ( !$skip && array_key_exists( $varname, $this->mVariables ) ) { $id = $this->mVariables[$varname]; - $text = $this->getVariableValue( $id ); - $this->mOutput->mContainsOldMagic = true; + # Now check if we did really match, case sensitive or not + $mw =& MagicWord::get( $id ); + if ($mw->match($matches[1])) { + $text = $this->getVariableValue( $id ); + $this->mOutput->mContainsOldMagic = true; + } else { + $text = $matches[0]; + } } else { $text = $matches[0]; } @@ -2642,8 +2827,9 @@ class Parser */ function braceSubstitution( $piece ) { global $wgContLang, $wgLang, $wgAllowDisplayTitle, $action; - $fname = 'Parser::braceSubstitution'; + $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/; wfProfileIn( $fname ); + wfProfileIn( __METHOD__.'-setup' ); # Flags $found = false; # $text has been filled @@ -2659,10 +2845,11 @@ class Parser $linestart = ''; + # $part1 is the bit before the first |, and must contain only title characters # $args is a list of arguments, starting from index 0, not including $part1 - $part1 = $piece['title']; + $titleText = $part1 = $piece['title']; # If the third subpattern matched anything, it will start with | if (null == $piece['parts']) { @@ -2677,11 +2864,13 @@ class Parser $args = (null == $piece['parts']) ? array() : $piece['parts']; $argc = count( $args ); + wfProfileOut( __METHOD__.'-setup' ); # SUBST + wfProfileIn( __METHOD__.'-modifiers' ); if ( !$found ) { - $mwSubst =& MagicWord::get( MAG_SUBST ); - if ( $mwSubst->matchStartAndRemove( $part1 ) xor ($this->mOutputType == OT_WIKI) ) { + $mwSubst =& MagicWord::get( 'subst' ); + if ( $mwSubst->matchStartAndRemove( $part1 ) xor $this->ot['wiki'] ) { # One of two possibilities is true: # 1) Found SUBST but not in the PST phase # 2) Didn't find SUBST and in the PST phase @@ -2693,33 +2882,25 @@ class Parser } } - # MSG, MSGNW, INT and RAW + # MSG, MSGNW and RAW if ( !$found ) { # Check for MSGNW: - $mwMsgnw =& MagicWord::get( MAG_MSGNW ); + $mwMsgnw =& MagicWord::get( 'msgnw' ); if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { $nowiki = true; } else { # Remove obsolete MSG: - $mwMsg =& MagicWord::get( MAG_MSG ); + $mwMsg =& MagicWord::get( 'msg' ); $mwMsg->matchStartAndRemove( $part1 ); } # Check for RAW: - $mwRaw =& MagicWord::get( MAG_RAW ); + $mwRaw =& MagicWord::get( 'raw' ); if ( $mwRaw->matchStartAndRemove( $part1 ) ) { $forceRawInterwiki = true; } - - # Check if it is an internal message - $mwInt =& MagicWord::get( MAG_INT ); - if ( $mwInt->matchStartAndRemove( $part1 ) ) { - if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) { - $text = $linestart . wfMsgReal( $part1, $args, true ); - $found = true; - } - } } + wfProfileOut( __METHOD__.'-modifiers' ); # Parser functions if ( !$found ) { @@ -2764,7 +2945,7 @@ class Parser } } } - wfProfileOut( __METHOD__ . '-pfunc' ); + wfProfileOut( __METHOD__ . '-pfunc' ); } # Template table test @@ -2780,9 +2961,8 @@ class Parser $noargs = true; $found = true; $text = $linestart . - '{{' . $part1 . '}}' . - '<!-- WARNING: template loop detected -->'; - wfDebug( "$fname: template loop broken at '$part1'\n" ); + "[[$part1]]<!-- WARNING: template loop detected -->"; + wfDebug( __METHOD__.": template loop broken at '$part1'\n" ); } else { # set $text to cached message. $text = $linestart . $this->mTemplates[$piece['title']]; @@ -2806,6 +2986,7 @@ class Parser if ( !is_null( $title ) ) { + $titleText = $title->getPrefixedText(); $checkVariantLink = sizeof($wgContLang->getVariants())>1; # Check for language variants if the template is not found if($checkVariantLink && $title->getArticleID() == 0){ @@ -2813,36 +2994,32 @@ class Parser } if ( !$title->isExternal() ) { - # Check for excessive inclusion - $dbk = $title->getPrefixedDBkey(); - if ( $this->incrementIncludeCount( $dbk ) ) { - if ( $title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() && $this->mOutputType != OT_WIKI ) { - $text = SpecialPage::capturePath( $title ); - if ( is_string( $text ) ) { - $found = true; - $noparse = true; - $noargs = true; - $isHTML = true; - $this->disableCache(); - } - } else { - $articleContent = $this->fetchTemplate( $title ); - if ( $articleContent !== false ) { - $found = true; - $text = $articleContent; - $replaceHeadings = true; - } + if ( $title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html'] ) { + $text = SpecialPage::capturePath( $title ); + if ( is_string( $text ) ) { + $found = true; + $noparse = true; + $noargs = true; + $isHTML = true; + $this->disableCache(); + } + } else { + $articleContent = $this->fetchTemplate( $title ); + if ( $articleContent !== false ) { + $found = true; + $text = $articleContent; + $replaceHeadings = true; } } # If the title is valid but undisplayable, make a link to it - if ( $this->mOutputType == OT_HTML && !$found ) { - $text = '[['.$title->getPrefixedText().']]'; + if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) { + $text = "[[:$titleText]]"; $found = true; } } elseif ( $title->isTrans() ) { // Interwiki transclusion - if ( $this->mOutputType == OT_HTML && !$forceRawInterwiki ) { + if ( $this->ot['html'] && !$forceRawInterwiki ) { $text = $this->interwikiTransclude( $title, 'render' ); $isHTML = true; $noparse = true; @@ -2852,7 +3029,7 @@ class Parser } $found = true; } - + # Template cache array insertion # Use the original $piece['title'] not the mangled $part1, so that # modifiers such as RAW: produce separate cache entries @@ -2865,14 +3042,22 @@ class Parser $text = $linestart . $text; } } - wfProfileOut( __METHOD__ . '-loadtpl' ); + wfProfileOut( __METHOD__ . '-loadtpl' ); + } + + if ( $found && !$this->incrementIncludeSize( 'pre-expand', strlen( $text ) ) ) { + # Error, oversize inclusion + $text = $linestart . + "[[$titleText]]<!-- WARNING: template omitted, pre-expand include size too large -->"; + $noparse = true; + $noargs = true; } # Recursive parsing, escaping and link table handling # Only for HTML output - if ( $nowiki && $found && $this->mOutputType == OT_HTML ) { + if ( $nowiki && $found && ( $this->ot['html'] || $this->ot['pre'] ) ) { $text = wfEscapeWikiText( $text ); - } elseif ( ($this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI) && $found ) { + } elseif ( !$this->ot['msg'] && $found ) { if ( $noargs ) { $assocArgs = array(); } else { @@ -2911,16 +3096,20 @@ class Parser $text = preg_replace( '/<noinclude>.*?<\/noinclude>/s', '', $text ); $text = strtr( $text, array( '<includeonly>' => '' , '</includeonly>' => '' ) ); - if( $this->mOutputType == OT_HTML ) { + if( $this->ot['html'] || $this->ot['pre'] ) { # Strip <nowiki>, <pre>, etc. $text = $this->strip( $text, $this->mStripState ); - $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'replaceVariables' ), $assocArgs ); + if ( $this->ot['html'] ) { + $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'replaceVariables' ), $assocArgs ); + } elseif ( $this->ot['pre'] && $this->mOptions->getRemoveComments() ) { + $text = Sanitizer::removeHTMLcomments( $text ); + } } $text = $this->replaceVariables( $text, $assocArgs ); # If the template begins with a table or block-level # element, it should be treated as beginning a new line. - if (!$piece['lineStart'] && preg_match('/^({\\||:|;|#|\*)/', $text)) { + if (!$piece['lineStart'] && preg_match('/^({\\||:|;|#|\*)/', $text)) /*}*/{ $text = "\n" . $text; } } elseif ( !$noargs ) { @@ -2933,6 +3122,14 @@ class Parser # Prune lower levels off the recursion check path $this->mTemplatePath = $lastPathLevel; + if ( $found && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { + # Error, oversize inclusion + $text = $linestart . + "[[$titleText]]<!-- WARNING: template omitted, post-expand include size too large -->"; + $noparse = true; + $noargs = true; + } + if ( !$found ) { wfProfileOut( $fname ); return $piece['text']; @@ -2946,7 +3143,7 @@ class Parser } else { # replace ==section headers== # XXX this needs to go away once we have a better parser. - if ( $this->mOutputType != OT_WIKI && $replaceHeadings ) { + if ( !$this->ot['wiki'] && !$this->ot['pre'] && $replaceHeadings ) { if( !is_null( $title ) ) $encodedname = base64_encode($title->getPrefixedDBkey()); else @@ -3070,25 +3267,31 @@ class Parser if ( array_key_exists( $arg, $inputArgs ) ) { $text = $inputArgs[$arg]; - } else if ($this->mOutputType == OT_HTML && null != $matches['parts'] && count($matches['parts']) > 0) { + } else if (($this->mOutputType == OT_HTML || $this->mOutputType == OT_PREPROCESS ) && + null != $matches['parts'] && count($matches['parts']) > 0) { $text = $matches['parts'][0]; } + if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { + $text = $matches['text'] . + '<!-- WARNING: argument omitted, expansion size too large -->'; + } return $text; } /** - * Returns true if the function is allowed to include this entity - * @private + * Increment an include size counter + * + * @param string $type The type of expansion + * @param integer $size The size of the text + * @return boolean False if this inclusion would take it over the maximum, true otherwise */ - function incrementIncludeCount( $dbk ) { - if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) { - $this->mIncludeCount[$dbk] = 0; - } - if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) { - return true; - } else { + function incrementIncludeSize( $type, $size ) { + if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) { return false; + } else { + $this->mIncludeSizes[$type] += $size; + return true; } } @@ -3098,7 +3301,7 @@ class Parser function stripNoGallery( &$text ) { # if the string __NOGALLERY__ (not case-sensitive) occurs in the HTML, # do not add TOC - $mw = MagicWord::get( MAG_NOGALLERY ); + $mw = MagicWord::get( 'nogallery' ); $this->mOutput->mNoGallery = $mw->matchAndRemove( $text ) ; } @@ -3108,19 +3311,19 @@ class Parser function stripToc( $text ) { # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, # do not add TOC - $mw = MagicWord::get( MAG_NOTOC ); + $mw = MagicWord::get( 'notoc' ); if( $mw->matchAndRemove( $text ) ) { $this->mShowToc = false; } - - $mw = MagicWord::get( MAG_TOC ); + + $mw = MagicWord::get( 'toc' ); if( $mw->match( $text ) ) { $this->mShowToc = true; $this->mForceTocPosition = true; - + // Set a placeholder. At the end we'll fill it in with the TOC. $text = $mw->replace( '<!--MWTOC-->', $text, 1 ); - + // Only keep the first one. $text = $mw->replace( '', $text ); } @@ -3152,7 +3355,7 @@ class Parser } # Inhibit editsection links if requested in the page - $esw =& MagicWord::get( MAG_NOEDITSECTION ); + $esw =& MagicWord::get( 'noeditsection' ); if( $esw->matchAndRemove( $text ) ) { $showEditLink = 0; } @@ -3168,13 +3371,13 @@ class Parser # Allow user to stipulate that a page should have a "new section" # link added via __NEWSECTIONLINK__ - $mw =& MagicWord::get( MAG_NEWSECTIONLINK ); + $mw =& MagicWord::get( 'newsectionlink' ); if( $mw->matchAndRemove( $text ) ) $this->mOutput->setNewSection( true ); # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, # override above conditions and always show TOC above first header - $mw =& MagicWord::get( MAG_FORCETOC ); + $mw =& MagicWord::get( 'forcetoc' ); if ($mw->matchAndRemove( $text ) ) { $this->mShowToc = true; $enoughToc = true; @@ -3381,137 +3584,6 @@ class Parser } /** - * Return an HTML link for the "ISBN 123456" text - * @private - */ - function magicISBN( $text ) { - $fname = 'Parser::magicISBN'; - wfProfileIn( $fname ); - - $a = split( 'ISBN ', ' '.$text ); - if ( count ( $a ) < 2 ) { - wfProfileOut( $fname ); - return $text; - } - $text = substr( array_shift( $a ), 1); - $valid = '0123456789-Xx'; - - foreach ( $a as $x ) { - # hack: don't replace inside thumbnail title/alt - # attributes - if(preg_match('/<[^>]+(alt|title)="[^">]*$/', $text)) { - $text .= "ISBN $x"; - continue; - } - - $isbn = $blank = '' ; - while ( $x !== '' && ' ' == $x{0} ) { - $blank .= ' '; - $x = substr( $x, 1 ); - } - if ( $x == '' ) { # blank isbn - $text .= "ISBN $blank"; - continue; - } - while ( strstr( $valid, $x{0} ) != false ) { - $isbn .= $x{0}; - $x = substr( $x, 1 ); - } - $num = str_replace( '-', '', $isbn ); - $num = str_replace( ' ', '', $num ); - $num = str_replace( 'x', 'X', $num ); - - if ( '' == $num ) { - $text .= "ISBN $blank$x"; - } else { - $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' ); - $text .= '<a href="' . - $titleObj->escapeLocalUrl( 'isbn='.$num ) . - "\" class=\"internal\">ISBN $isbn</a>"; - $text .= $x; - } - } - wfProfileOut( $fname ); - return $text; - } - - /** - * Return an HTML link for the "RFC 1234" text - * - * @private - * @param string $text Text to be processed - * @param string $keyword Magic keyword to use (default RFC) - * @param string $urlmsg Interface message to use (default rfcurl) - * @return string - */ - function magicRFC( $text, $keyword='RFC ', $urlmsg='rfcurl' ) { - - $valid = '0123456789'; - $internal = false; - - $a = split( $keyword, ' '.$text ); - if ( count ( $a ) < 2 ) { - return $text; - } - $text = substr( array_shift( $a ), 1); - - /* Check if keyword is preceed by [[. - * This test is made here cause of the array_shift above - * that prevent the test to be done in the foreach. - */ - if ( substr( $text, -2 ) == '[[' ) { - $internal = true; - } - - foreach ( $a as $x ) { - /* token might be empty if we have RFC RFC 1234 */ - if ( $x=='' ) { - $text.=$keyword; - continue; - } - - # hack: don't replace inside thumbnail title/alt - # attributes - if(preg_match('/<[^>]+(alt|title)="[^">]*$/', $text)) { - $text .= $keyword . $x; - continue; - } - - $id = $blank = '' ; - - /** remove and save whitespaces in $blank */ - while ( $x{0} == ' ' ) { - $blank .= ' '; - $x = substr( $x, 1 ); - } - - /** remove and save the rfc number in $id */ - while ( strstr( $valid, $x{0} ) != false ) { - $id .= $x{0}; - $x = substr( $x, 1 ); - } - - if ( $id == '' ) { - /* call back stripped spaces*/ - $text .= $keyword.$blank.$x; - } elseif( $internal ) { - /* normal link */ - $text .= $keyword.$id.$x; - } else { - /* build the external link*/ - $url = wfMsg( $urlmsg, $id); - $sk =& $this->mOptions->getSkin(); - $la = $sk->getExternalLinkAttributes( $url, $keyword.$id ); - $text .= "<a href=\"{$url}\"{$la}>{$keyword}{$id}</a>{$x}"; - } - - /* Check if the next RFC keyword is preceed by [[ */ - $internal = ( substr($x,-2) == '[[' ); - } - return $text; - } - - /** * Transform wiki markup when saving a page by doing \r\n -> \n * conversion, substitting signatures, {{subst:}} templates, etc. * @@ -3526,7 +3598,7 @@ class Parser function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) { $this->mOptions = $options; $this->mTitle =& $title; - $this->mOutputType = OT_WIKI; + $this->setOutputType( OT_WIKI ); if ( $clearState ) { $this->clearState(); @@ -3569,10 +3641,10 @@ class Parser # Variable replacement # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags $text = $this->replaceVariables( $text ); - + # Strip out <nowiki> etc. added via replaceVariables $text = $this->strip( $text, $stripState, false, array( 'gallery' ) ); - + # Signatures $sigText = $this->getUserSig( $user ); $text = strtr( $text, array( @@ -3585,35 +3657,31 @@ class Parser # global $wgLegalTitleChars; $tc = "[$wgLegalTitleChars]"; - $np = str_replace( array( '(', ')' ), array( '', '' ), $tc ); # No parens + $nc = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii! - $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii! - $conpat = "/^({$np}+) \\(({$tc}+)\\)$/"; + $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]] + $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]] + $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] - $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]] - $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]] - $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]] and [[:namespace:page|]] - $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]] - $context = ''; - $t = $this->mTitle->getText(); - if ( preg_match( $conpat, $t, $m ) ) { - $context = $m[2]; - } - $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text ); - $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text ); - $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text ); + # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" + $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); + $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text ); - if ( '' == $context ) { - $text = preg_replace( $p2, '[[\\1]]', $text ); + $t = $this->mTitle->getText(); + if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) { + $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); + } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && '' != "$m[1]$m[2]" ) { + $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); } else { - $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text ); + # if there's no context, don't bother duplicating the title + $text = preg_replace( $p2, '[[\\1]]', $text ); } # Trim trailing whitespace - # MAG_END (__END__) tag allows for trailing + # __END__ tag allows for trailing # whitespace to be deliberately included $text = rtrim( $text ); - $mw =& MagicWord::get( MAG_END ); + $mw =& MagicWord::get( 'end' ); $mw->matchAndRemove( $text ); return $text; @@ -3631,7 +3699,7 @@ class Parser $username = $user->getName(); $nickname = $user->getOption( 'nickname' ); $nickname = $nickname === '' ? $username : $nickname; - + if( $user->getBoolOption( 'fancysig' ) !== false ) { # Sig. might contain markup; validate this if( $this->validateSig( $nickname ) !== false ) { @@ -3661,7 +3729,7 @@ class Parser function validateSig( $text ) { return( wfIsWellFormedXmlFragment( $text ) ? $text : false ); } - + /** * Clean up signature text * @@ -3675,16 +3743,16 @@ class Parser function cleanSig( $text, $parsing = false ) { global $wgTitle; $this->startExternalParse( $wgTitle, new ParserOptions(), $parsing ? OT_WIKI : OT_MSG ); - - $substWord = MagicWord::get( MAG_SUBST ); + + $substWord = MagicWord::get( 'subst' ); $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); $substText = '{{' . $substWord->getSynonym( 0 ); $text = preg_replace( $substRegex, $substText, $text ); $text = $this->cleanSigInSig( $text ); $text = $this->replaceVariables( $text ); - - $this->clearState(); + + $this->clearState(); return $text; } @@ -3697,7 +3765,7 @@ class Parser $text = preg_replace( '/~{3,5}/', '', $text ); return $text; } - + /** * Set up some variables which are usually set up in parse() * so that an external function can call some class members with confidence @@ -3706,7 +3774,7 @@ class Parser function startExternalParse( &$title, $options, $outputType, $clearState = true ) { $this->mTitle =& $title; $this->mOptions = $options; - $this->mOutputType = $outputType; + $this->setOutputType( $outputType ); if ( $clearState ) { $this->clearState(); } @@ -3734,9 +3802,13 @@ class Parser wfProfileIn($fname); - $this->mTitle = $wgTitle; + if ( $wgTitle ) { + $this->mTitle = $wgTitle; + } else { + $this->mTitle = Title::newFromText('msg'); + } $this->mOptions = $options; - $this->mOutputType = OT_MSG; + $this->setOutputType( OT_MSG ); $this->clearState(); $text = $this->replaceVariables( $text ); @@ -3785,7 +3857,7 @@ class Parser * * @public * - * @param mixed $id The magic word ID, or (deprecated) the function name. Function names are case-insensitive. + * @param string $id The magic word ID * @param mixed $callback The callback function (and object) to use * @param integer $flags a combination of the following flags: * SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}} @@ -3793,21 +3865,16 @@ class Parser * @return The old callback function for this name, if any */ function setFunctionHook( $id, $callback, $flags = 0 ) { - if( is_string( $id ) ) { - $id = strtolower( $id ); - } $oldVal = @$this->mFunctionHooks[$id]; $this->mFunctionHooks[$id] = $callback; # Add to function cache - if ( is_int( $id ) ) { - $mw = MagicWord::get( $id ); - $synonyms = $mw->getSynonyms(); - $sensitive = intval( $mw->isCaseSensitive() ); - } else { - $synonyms = array( $id ); - $sensitive = 0; - } + $mw = MagicWord::get( $id ); + if( !$mw ) + throw new MWException( 'Parser::setFunctionHook() expecting a magic word identifier.' ); + + $synonyms = $mw->getSynonyms(); + $sensitive = intval( $mw->isCaseSensitive() ); foreach ( $synonyms as $syn ) { # Case @@ -3828,6 +3895,15 @@ class Parser } /** + * Get all registered function hook identifiers + * + * @return array + */ + function getFunctionHooks() { + return array_keys( $this->mFunctionHooks ); + } + + /** * Replace <!--LINK--> link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() * Returns an array of links found, indexed by PDBK: @@ -3839,6 +3915,7 @@ class Parser function replaceLinkHolders( &$text, $options = 0 ) { global $wgUser; global $wgOutputReplace; + global $wgContLang, $wgLanguageCode; $fname = 'Parser::replaceLinkHolders'; wfProfileIn( $fname ); @@ -3929,6 +4006,91 @@ class Parser } wfProfileOut( $fname.'-check' ); + # Do a second query for different language variants of links (if needed) + if($wgContLang->hasVariants()){ + $linkBatch = new LinkBatch(); + $variantMap = array(); // maps $pdbkey_Variant => $pdbkey_original + + // Add variants of links to link batch + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { + $title = $this->mLinkHolders['titles'][$key]; + if ( is_null( $title ) ) + continue; + + $pdbk = $title->getPrefixedDBkey(); + + // generate all variants of the link title text + $allTextVariants = $wgContLang->convertLinkToAllVariants($title->getText()); + + // if link was not found (in first query), add all variants to query + if ( !isset($colours[$pdbk]) ){ + foreach($allTextVariants as $textVariant){ + $variantTitle = Title::makeTitle( $ns, $textVariant ); + if(is_null($variantTitle)) continue; + $linkBatch->addObj( $variantTitle ); + $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; + } + } + } + + + if(!$linkBatch->isEmpty()){ + // construct query + $titleClause = $linkBatch->constructSet('page', $dbr); + + $variantQuery = "SELECT page_id, page_namespace, page_title"; + if ( $threshold > 0 ) { + $variantQuery .= ', page_len, page_is_redirect'; + } + + $variantQuery .= " FROM $page WHERE $titleClause"; + if ( $options & RLH_FOR_UPDATE ) { + $variantQuery .= ' FOR UPDATE'; + } + + $varRes = $dbr->query( $variantQuery, $fname ); + + // for each found variants, figure out link holders and replace + while ( $s = $dbr->fetchObject($varRes) ) { + + $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); + $varPdbk = $variantTitle->getPrefixedDBkey(); + $linkCache->addGoodLinkObj( $s->page_id, $variantTitle ); + $this->mOutput->addLink( $variantTitle, $s->page_id ); + + $holderKeys = $variantMap[$varPdbk]; + + // loop over link holders + foreach($holderKeys as $key){ + $title = $this->mLinkHolders['titles'][$key]; + if ( is_null( $title ) ) continue; + + $pdbk = $title->getPrefixedDBkey(); + + if(!isset($colours[$pdbk])){ + // found link in some of the variants, replace the link holder data + $this->mLinkHolders['titles'][$key] = $variantTitle; + $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey(); + + // set pdbk and colour + $pdbks[$key] = $varPdbk; + if ( $threshold > 0 ) { + $size = $s->page_len; + if ( $s->page_is_redirect || $s->page_namespace != 0 || $size >= $threshold ) { + $colours[$varPdbk] = 1; + } else { + $colours[$varPdbk] = 2; + } + } + else { + $colours[$varPdbk] = 1; + } + } + } + } + } + } + # Construct search and replace arrays wfProfileIn( $fname.'-construct' ); $wgOutputReplace = array(); @@ -4033,13 +4195,13 @@ class Parser function renderPreTag( $text, $attribs, $parser ) { // Backwards-compatibility hack $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $text ); - + $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' ); return wfOpenElement( 'pre', $attribs ) . wfEscapeHTMLTagsOnly( $content ) . '</pre>'; } - + /** * Renders an image gallery from a text with one line per image. * text labels may be given by using |-style alternative text. E.g. @@ -4058,7 +4220,7 @@ class Parser if( isset( $params['caption'] ) ) $ig->setCaption( $params['caption'] ); - + $lines = explode( "\n", $text ); foreach ( $lines as $line ) { # match lines like these: @@ -4068,7 +4230,8 @@ class Parser if ( count( $matches ) == 0 ) { continue; } - $nt =& Title::newFromText( $matches[1] ); + $tp = Title::newFromText( $matches[1] ); + $nt =& $tp; if( is_null( $nt ) ) { # Bogus title. Ignore these so we don't bomb out later. continue; @@ -4101,7 +4264,7 @@ class Parser * Parse image options text and use it to make an image */ function makeImage( &$nt, $options ) { - global $wgUseImageResize; + global $wgUseImageResize, $wgDjvuRenderer; $align = ''; @@ -4117,17 +4280,19 @@ class Parser $part = explode( '|', $options); - $mwThumb =& MagicWord::get( MAG_IMG_THUMBNAIL ); - $mwManualThumb =& MagicWord::get( MAG_IMG_MANUALTHUMB ); - $mwLeft =& MagicWord::get( MAG_IMG_LEFT ); - $mwRight =& MagicWord::get( MAG_IMG_RIGHT ); - $mwNone =& MagicWord::get( MAG_IMG_NONE ); - $mwWidth =& MagicWord::get( MAG_IMG_WIDTH ); - $mwCenter =& MagicWord::get( MAG_IMG_CENTER ); - $mwFramed =& MagicWord::get( MAG_IMG_FRAMED ); + $mwThumb =& MagicWord::get( 'img_thumbnail' ); + $mwManualThumb =& MagicWord::get( 'img_manualthumb' ); + $mwLeft =& MagicWord::get( 'img_left' ); + $mwRight =& MagicWord::get( 'img_right' ); + $mwNone =& MagicWord::get( 'img_none' ); + $mwWidth =& MagicWord::get( 'img_width' ); + $mwCenter =& MagicWord::get( 'img_center' ); + $mwFramed =& MagicWord::get( 'img_framed' ); + $mwPage =& MagicWord::get( 'img_page' ); $caption = ''; $width = $height = $framed = $thumb = false; + $page = null; $manual_thumb = '' ; foreach( $part as $key => $val ) { @@ -4149,8 +4314,12 @@ class Parser } elseif ( ! is_null( $mwNone->matchVariableStartToEnd($val) ) ) { # remember to set an alignment, don't render immediately $align = 'none'; + } elseif ( isset( $wgDjvuRenderer ) && $wgDjvuRenderer + && ! is_null( $match = $mwPage->matchVariableStartToEnd($val) ) ) { + # Select a page in a multipage document + $page = $match; } elseif ( $wgUseImageResize && ! is_null( $match = $mwWidth->matchVariableStartToEnd($val) ) ) { - wfDebug( "MAG_IMG_WIDTH match: $match\n" ); + wfDebug( "img_width match: $match\n" ); # $match is the image width in pixels if ( preg_match( '/^([0-9]*)x([0-9]*)$/', $match, $m ) ) { $width = intval( $m[1] ); @@ -4175,7 +4344,7 @@ class Parser # Linker does the rest $sk =& $this->mOptions->getSkin(); - return $sk->makeImageLinkObj( $nt, $caption, $alt, $align, $width, $height, $framed, $thumb, $manual_thumb ); + return $sk->makeImageLinkObj( $nt, $caption, $alt, $align, $width, $height, $framed, $thumb, $manual_thumb, $page ); } /** @@ -4242,15 +4411,15 @@ class Parser # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML # comments to be stripped as well) $striparray = array(); - + $oldOutputType = $this->mOutputType; $oldOptions = $this->mOptions; $this->mOptions = new ParserOptions(); - $this->mOutputType = OT_WIKI; - + $this->setOutputType( OT_WIKI ); + $striptext = $this->strip( $text, $striparray, true ); - - $this->mOutputType = $oldOutputType; + + $this->setOutputType( $oldOutputType ); $this->mOptions = $oldOptions; # now that we can be sure that no pseudo-sections are in the source, @@ -4293,7 +4462,7 @@ class Parser /mix", $striptext, -1, PREG_SPLIT_DELIM_CAPTURE); - + if( $mode == "get" ) { if( $section == 0 ) { // "Section 0" returns the content before any other section. @@ -4360,7 +4529,7 @@ class Parser $rv = trim( $rv ); return $rv; } - + /** * This function returns the text of a section, specified by a number ($section). * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or @@ -4375,7 +4544,7 @@ class Parser function getSection( $text, $section ) { return $this->extractSections( $text, $section, "get" ); } - + function replaceSection( $oldtext, $section, $text ) { return $this->extractSections( $oldtext, $section, "replace", $text ); } @@ -4435,6 +4604,7 @@ class ParserOutput function &getImages() { return $this->mImages; } function &getExternalLinks() { return $this->mExternalLinks; } function getNoGallery() { return $this->mNoGallery; } + function getSubtitle() { return $this->mSubtitle; } function containsOldMagic() { return $this->mContainsOldMagic; } function setText( $text ) { return wfSetVar( $this->mText, $text ); } @@ -4442,7 +4612,8 @@ class ParserOutput function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategories, $cl ); } function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); } function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); } - function setTitleText( $t ) { return wfSetVar ($this->mTitleText, $t); } + function setTitleText( $t ) { return wfSetVar($this->mTitleText, $t); } + function setSubtitle( $st ) { return wfSetVar( $this->mSubtitle, $st ); } function addCategory( $c, $sort ) { $this->mCategories[$c] = $sort; } function addImage( $name ) { $this->mImages[$name] = 1; } @@ -4456,12 +4627,15 @@ class ParserOutput return (bool)$this->mNewSection; } - function addLink( $title, $id ) { + function addLink( $title, $id = null ) { $ns = $title->getNamespace(); $dbk = $title->getDBkey(); if ( !isset( $this->mLinks[$ns] ) ) { $this->mLinks[$ns] = array(); } + if ( is_null( $id ) ) { + $id = $title->getArticleID(); + } $this->mLinks[$ns][$dbk] = $id; } @@ -4513,6 +4687,8 @@ class ParserOptions var $mAllowSpecialInclusion; # Allow inclusion of special pages var $mTidy; # Ask for tidy cleanup var $mInterfaceMessage; # Which lang to call for PLURAL and GRAMMAR + var $mMaxIncludeSize; # Maximum size of template expansions, in bytes + var $mRemoveComments; # Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS var $mUser; # Stored user object, just used to initialise the skin @@ -4521,12 +4697,13 @@ class ParserOptions function getInterwikiMagic() { return $this->mInterwikiMagic; } function getAllowExternalImages() { return $this->mAllowExternalImages; } function getAllowExternalImagesFrom() { return $this->mAllowExternalImagesFrom; } - function getDateFormat() { return $this->mDateFormat; } function getEditSection() { return $this->mEditSection; } function getNumberHeadings() { return $this->mNumberHeadings; } function getAllowSpecialInclusion() { return $this->mAllowSpecialInclusion; } function getTidy() { return $this->mTidy; } function getInterfaceMessage() { return $this->mInterfaceMessage; } + function getMaxIncludeSize() { return $this->mMaxIncludeSize; } + function getRemoveComments() { return $this->mRemoveComments; } function &getSkin() { if ( !isset( $this->mSkin ) ) { @@ -4535,6 +4712,13 @@ class ParserOptions return $this->mSkin; } + function getDateFormat() { + if ( !isset( $this->mDateFormat ) ) { + $this->mDateFormat = $this->mUser->getDatePreference(); + } + return $this->mDateFormat; + } + function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); } function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); } function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); } @@ -4547,6 +4731,8 @@ class ParserOptions function setTidy( $x ) { return wfSetVar( $this->mTidy, $x); } function setSkin( &$x ) { $this->mSkin =& $x; } function setInterfaceMessage( $x ) { return wfSetVar( $this->mInterfaceMessage, $x); } + function setMaxIncludeSize( $x ) { return wfSetVar( $this->mMaxIncludeSize, $x ); } + function setRemoveComments( $x ) { return wfSetVar( $this->mRemoveComments, $x ); } function ParserOptions( $user = null ) { $this->initialiseFromUser( $user ); @@ -4556,14 +4742,14 @@ class ParserOptions * Get parser options * @static */ - function newFromUser( &$user ) { + static function newFromUser( $user ) { return new ParserOptions( $user ); } /** Get user options */ - function initialiseFromUser( &$userInput ) { + function initialiseFromUser( $userInput ) { global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages; - global $wgAllowExternalImagesFrom, $wgAllowSpecialInclusion; + global $wgAllowExternalImagesFrom, $wgAllowSpecialInclusion, $wgMaxArticleSize; $fname = 'ParserOptions::initialiseFromUser'; wfProfileIn( $fname ); if ( !$userInput ) { @@ -4586,12 +4772,14 @@ class ParserOptions $this->mAllowExternalImages = $wgAllowExternalImages; $this->mAllowExternalImagesFrom = $wgAllowExternalImagesFrom; $this->mSkin = null; # Deferred - $this->mDateFormat = $user->getOption( 'date' ); + $this->mDateFormat = null; # Deferred $this->mEditSection = true; $this->mNumberHeadings = $user->getOption( 'numberheadings' ); $this->mAllowSpecialInclusion = $wgAllowSpecialInclusion; $this->mTidy = false; $this->mInterfaceMessage = false; + $this->mMaxIncludeSize = $wgMaxArticleSize * 1024; + $this->mRemoveComments = true; wfProfileOut( $fname ); } } @@ -4711,6 +4899,26 @@ function wfLoadSiteStats() { } /** + * Get revision timestamp from the database considering timecorrection + * + * @param $id Int: page revision id + * @return integer + */ +function wfRevisionTimestamp( $id ) { + global $wgContLang; + $fname = 'wfRevisionTimestamp'; + + wfProfileIn( $fname ); + $dbr =& wfGetDB( DB_SLAVE ); + $timestamp = $dbr->selectField( 'revision', 'rev_timestamp', + array( 'rev_id' => $id ), __METHOD__ ); + $timestamp = $wgContLang->userAdjust( $timestamp ); + wfProfileOut( $fname ); + + return $timestamp; +} + +/** * Escape html tags * Basically replacing " > and < with HTML entities ( ", >, <) * |