From 4ac9fa081a7c045f6a9f1cfc529d82423f485b2e Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Sun, 8 Dec 2013 09:55:49 +0100 Subject: Update to MediaWiki 1.22.0 --- includes/parser/CacheTime.php | 2 +- includes/parser/CoreLinkFunctions.php | 92 ---- includes/parser/CoreParserFunctions.php | 197 +++++--- includes/parser/CoreTagHooks.php | 2 +- includes/parser/DateFormatter.php | 24 +- includes/parser/LinkHolderArray.php | 216 ++++---- includes/parser/Parser.php | 848 +++++++++++++++++++------------- includes/parser/ParserCache.php | 20 +- includes/parser/ParserOptions.php | 21 +- includes/parser/ParserOutput.php | 111 ++++- includes/parser/Parser_DiffTest.php | 2 +- includes/parser/Parser_LinkHooks.php | 326 ------------ includes/parser/Preprocessor_DOM.php | 99 ++-- includes/parser/Preprocessor_Hash.php | 61 ++- includes/parser/Tidy.php | 11 +- 15 files changed, 1029 insertions(+), 1003 deletions(-) delete mode 100644 includes/parser/CoreLinkFunctions.php delete mode 100644 includes/parser/Parser_LinkHooks.php (limited to 'includes/parser') diff --git a/includes/parser/CacheTime.php b/includes/parser/CacheTime.php index 6b70e1da..8190a8a0 100644 --- a/includes/parser/CacheTime.php +++ b/includes/parser/CacheTime.php @@ -93,7 +93,7 @@ class CacheTime { $expire = min( $expire, $wgParserCacheExpireTime ); } - if( $this->containsOldMagic() ) { //compatibility hack + if ( $this->containsOldMagic() ) { //compatibility hack $expire = min( $expire, 3600 ); # 1 hour } diff --git a/includes/parser/CoreLinkFunctions.php b/includes/parser/CoreLinkFunctions.php deleted file mode 100644 index 1cabf766..00000000 --- a/includes/parser/CoreLinkFunctions.php +++ /dev/null @@ -1,92 +0,0 @@ -setLinkHook( NS_CATEGORY, array( __CLASS__, 'categoryLinkHook' ) ); - return true; - } - - /** - * @param $parser Parser - * @param $holders LinkHolderArray - * @param $markers LinkMarkerReplacer - * @param Title $title - * @param $titleText - * @param null $displayText - * @param bool $leadingColon - * @return bool - */ - static function defaultLinkHook( $parser, $holders, $markers, - Title $title, $titleText, &$displayText = null, &$leadingColon = false ) { - if( isset( $displayText ) && $markers->findMarker( $displayText ) ) { - # There are links inside of the displayText - # For backwards compatibility the deepest links are dominant so this - # link should not be handled - $displayText = $markers->expand( $displayText ); - # Return false so that this link is reverted back to WikiText - return false; - } - return $holders->makeHolder( $title, isset( $displayText ) ? $displayText : $titleText, array(), '', '' ); - } - - /** - * @param $parser Parser - * @param $holders LinkHolderArray - * @param $markers LinkMarkerReplacer - * @param Title $title - * @param $titleText - * @param null $sortText - * @param bool $leadingColon - * @return bool|string - */ - static function categoryLinkHook( $parser, $holders, $markers, - Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { - global $wgContLang; - # When a category link starts with a : treat it as a normal link - if( $leadingColon ) return true; - if( isset( $sortText ) && $markers->findMarker( $sortText ) ) { - # There are links inside of the sortText - # For backwards compatibility the deepest links are dominant so this - # link should not be handled - $sortText = $markers->expand( $sortText ); - # Return false so that this link is reverted back to WikiText - return false; - } - if( !isset( $sortText ) ) $sortText = $parser->getDefaultSort(); - $sortText = Sanitizer::decodeCharReferences( $sortText ); - $sortText = str_replace( "\n", '', $sortText ); - $sortText = $wgContLang->convertCategoryKey( $sortText ); - $parser->mOutput->addCategory( $title->getDBkey(), $sortText ); - return ''; - } - -} diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php index cdd03aa4..4b6eeca2 100644 --- a/includes/parser/CoreParserFunctions.php +++ b/includes/parser/CoreParserFunctions.php @@ -88,6 +88,8 @@ class CoreParserFunctions { $parser->setFunctionHook( 'pagenamee', array( __CLASS__, 'pagenamee' ), SFH_NO_HASH ); $parser->setFunctionHook( 'fullpagename', array( __CLASS__, 'fullpagename' ), SFH_NO_HASH ); $parser->setFunctionHook( 'fullpagenamee', array( __CLASS__, 'fullpagenamee' ), SFH_NO_HASH ); + $parser->setFunctionHook( 'rootpagename', array( __CLASS__, 'rootpagename' ), SFH_NO_HASH ); + $parser->setFunctionHook( 'rootpagenamee', array( __CLASS__, 'rootpagenamee' ), SFH_NO_HASH ); $parser->setFunctionHook( 'basepagename', array( __CLASS__, 'basepagename' ), SFH_NO_HASH ); $parser->setFunctionHook( 'basepagenamee', array( __CLASS__, 'basepagenamee' ), SFH_NO_HASH ); $parser->setFunctionHook( 'subpagename', array( __CLASS__, 'subpagename' ), SFH_NO_HASH ); @@ -137,9 +139,10 @@ class CoreParserFunctions { $pref = $parser->getOptions()->getDateFormat(); // Specify a different default date format other than the the normal default - // iff the user has 'default' for their setting - if ( $pref == 'default' && $defaultPref ) + // if the user has 'default' for their setting + if ( $pref == 'default' && $defaultPref ) { $pref = $defaultPref; + } $date = $df->reformat( $pref, $date, array( 'match-whole' ) ); return $date; @@ -184,7 +187,7 @@ class CoreParserFunctions { if ( is_null( $magicWords ) ) { $magicWords = new MagicWordArray( array( 'url_path', 'url_query', 'url_wiki' ) ); } - switch( $magicWords->matchStartToEnd( $arg ) ) { + switch ( $magicWords->matchStartToEnd( $arg ) ) { // Encode as though it's a wiki page, '_' for ' '. case 'url_wiki': @@ -248,14 +251,15 @@ class CoreParserFunctions { # before arriving here; if that's true, then the title can't be created # and the variable will fail. If we can't get a decent title from the first # attempt, url-decode and try for a second. - if( is_null( $title ) ) + if ( is_null( $title ) ) { $title = Title::newFromURL( urldecode( $s ) ); - if( !is_null( $title ) ) { + } + if ( !is_null( $title ) ) { # Convert NS_MEDIA -> NS_FILE - if( $title->getNamespace() == NS_MEDIA ) { + if ( $title->getNamespace() == NS_MEDIA ) { $title = Title::makeTitle( NS_FILE, $title->getDBkey() ); } - if( !is_null( $arg ) ) { + if ( !is_null( $arg ) ) { $text = $title->$func( $arg ); } else { $text = $title->$func(); @@ -359,30 +363,49 @@ class CoreParserFunctions { static function displaytitle( $parser, $text = '' ) { global $wgRestrictDisplayTitle; - #parse a limited subset of wiki markup (just the single quote items) + // parse a limited subset of wiki markup (just the single quote items) $text = $parser->doQuotes( $text ); - #remove stripped text (e.g. the UNIQ-QINU stuff) that was generated by tag extensions/whatever + // remove stripped text (e.g. the UNIQ-QINU stuff) that was generated by tag extensions/whatever $text = preg_replace( '/' . preg_quote( $parser->uniqPrefix(), '/' ) . '.*?' . preg_quote( Parser::MARKER_SUFFIX, '/' ) . '/', '', $text ); - #list of disallowed tags for DISPLAYTITLE - #these will be escaped even though they are allowed in normal wiki text + // list of disallowed tags for DISPLAYTITLE + // these will be escaped even though they are allowed in normal wiki text $bad = array( 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'blockquote', 'ol', 'ul', 'li', 'hr', 'table', 'tr', 'th', 'td', 'dl', 'dd', 'caption', 'p', 'ruby', 'rb', 'rt', 'rp', 'br' ); - #only requested titles that normalize to the actual title are allowed through - #if $wgRestrictDisplayTitle is true (it is by default) - #mimic the escaping process that occurs in OutputPage::setPageTitle - $text = Sanitizer::normalizeCharReferences( Sanitizer::removeHTMLtags( $text, null, array(), array(), $bad ) ); + // disallow some styles that could be used to bypass $wgRestrictDisplayTitle + if ( $wgRestrictDisplayTitle ) { + $htmlTagsCallback = function ( &$params ) { + $decoded = Sanitizer::decodeTagAttributes( $params ); + + if ( isset( $decoded['style'] ) ) { + // this is called later anyway, but we need it right now for the regexes below to be safe + // calling it twice doesn't hurt + $decoded['style'] = Sanitizer::checkCss( $decoded['style'] ); + + if ( preg_match( '/(display|user-select|visibility)\s*:/i', $decoded['style'] ) ) { + $decoded['style'] = '/* attempt to bypass $wgRestrictDisplayTitle */'; + } + } + + $params = Sanitizer::safeEncodeTagAttributes( $decoded ); + }; + } else { + $htmlTagsCallback = null; + } + + // only requested titles that normalize to the actual title are allowed through + // if $wgRestrictDisplayTitle is true (it is by default) + // mimic the escaping process that occurs in OutputPage::setPageTitle + $text = Sanitizer::normalizeCharReferences( Sanitizer::removeHTMLtags( $text, $htmlTagsCallback, array(), array(), $bad ) ); $title = Title::newFromText( Sanitizer::stripAllTags( $text ) ); - if( !$wgRestrictDisplayTitle ) { + if ( !$wgRestrictDisplayTitle ) { + $parser->mOutput->setDisplayTitle( $text ); + } elseif ( $title instanceof Title && $title->getFragment() == '' && $title->equals( $parser->mTitle ) ) { $parser->mOutput->setDisplayTitle( $text ); - } else { - if ( $title instanceof Title && $title->getFragment() == '' && $title->equals( $parser->mTitle ) ) { - $parser->mOutput->setDisplayTitle( $text ); - } } return ''; @@ -404,7 +427,7 @@ class CoreParserFunctions { } static function formatRaw( $num, $raw ) { - if( self::matchAgainstMagicword( 'rawsuffix', $raw ) ) { + if ( self::matchAgainstMagicword( 'rawsuffix', $raw ) ) { return $num; } else { global $wgContLang; @@ -433,12 +456,13 @@ class CoreParserFunctions { return self::formatRaw( SiteStats::edits(), $raw ); } static function numberofviews( $parser, $raw = null ) { - return self::formatRaw( SiteStats::views(), $raw ); + global $wgDisableCounters; + return !$wgDisableCounters ? self::formatRaw( SiteStats::views(), $raw ) : ''; } static function pagesinnamespace( $parser, $namespace = 0, $raw = null ) { return self::formatRaw( SiteStats::pagesInNs( intval( $namespace ) ), $raw ); } - static function numberingroup( $parser, $name = '', $raw = null) { + static function numberingroup( $parser, $name = '', $raw = null ) { return self::formatRaw( SiteStats::numberingroup( strtolower( $name ) ), $raw ); } @@ -451,44 +475,51 @@ class CoreParserFunctions { */ static function mwnamespace( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return str_replace( '_', ' ', $t->getNsText() ); } static function namespacee( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return wfUrlencode( $t->getNsText() ); } static function namespacenumber( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return $t->getNamespace(); } static function talkspace( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) || !$t->canTalk() ) + if ( is_null( $t ) || !$t->canTalk() ) { return ''; + } return str_replace( '_', ' ', $t->getTalkNsText() ); } static function talkspacee( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) || !$t->canTalk() ) + if ( is_null( $t ) || !$t->canTalk() ) { return ''; + } return wfUrlencode( $t->getTalkNsText() ); } static function subjectspace( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return str_replace( '_', ' ', $t->getSubjectNsText() ); } static function subjectspacee( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return wfUrlencode( $t->getSubjectNsText() ); } @@ -499,75 +530,101 @@ class CoreParserFunctions { */ static function pagename( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return wfEscapeWikiText( $t->getText() ); } static function pagenamee( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return wfEscapeWikiText( $t->getPartialURL() ); } static function fullpagename( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) || !$t->canTalk() ) + if ( is_null( $t ) || !$t->canTalk() ) { return ''; + } return wfEscapeWikiText( $t->getPrefixedText() ); } static function fullpagenamee( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) || !$t->canTalk() ) + if ( is_null( $t ) || !$t->canTalk() ) { return ''; + } return wfEscapeWikiText( $t->getPrefixedURL() ); } static function subpagename( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return wfEscapeWikiText( $t->getSubpageText() ); } static function subpagenamee( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return wfEscapeWikiText( $t->getSubpageUrlForm() ); } + static function rootpagename( $parser, $title = null ) { + $t = Title::newFromText( $title ); + if ( is_null( $t ) ) { + return ''; + } + return wfEscapeWikiText( $t->getRootText() ); + } + static function rootpagenamee( $parser, $title = null ) { + $t = Title::newFromText( $title ); + if ( is_null( $t ) ) { + return ''; + } + return wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $t->getRootText() ) ) ); + } static function basepagename( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return wfEscapeWikiText( $t->getBaseText() ); } static function basepagenamee( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $t->getBaseText() ) ) ); } static function talkpagename( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) || !$t->canTalk() ) + if ( is_null( $t ) || !$t->canTalk() ) { return ''; + } return wfEscapeWikiText( $t->getTalkPage()->getPrefixedText() ); } static function talkpagenamee( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) || !$t->canTalk() ) + if ( is_null( $t ) || !$t->canTalk() ) { return ''; - return wfEscapeWikiText( $t->getTalkPage()->getPrefixedUrl() ); + } + return wfEscapeWikiText( $t->getTalkPage()->getPrefixedURL() ); } static function subjectpagename( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; + } return wfEscapeWikiText( $t->getSubjectPage()->getPrefixedText() ); } static function subjectpagenamee( $parser, $title = null ) { $t = Title::newFromText( $title ); - if ( is_null( $t ) ) + if ( is_null( $t ) ) { return ''; - return wfEscapeWikiText( $t->getSubjectPage()->getPrefixedUrl() ); + } + return wfEscapeWikiText( $t->getSubjectPage()->getPrefixedURL() ); } /** @@ -577,6 +634,7 @@ class CoreParserFunctions { * @return string */ static function pagesincategory( $parser, $name = '', $arg1 = null, $arg2 = null ) { + global $wgContLang; static $magicWords = null; if ( is_null( $magicWords ) ) { $magicWords = new MagicWordArray( array( @@ -589,7 +647,7 @@ class CoreParserFunctions { static $cache = array(); // split the given option to its variable - if( self::matchAgainstMagicword( 'rawsuffix', $arg1 ) ) { + if ( self::matchAgainstMagicword( 'rawsuffix', $arg1 ) ) { //{{pagesincategory:|raw[|type]}} $raw = $arg1; $type = $magicWords->matchStartToEnd( $arg2 ); @@ -598,23 +656,24 @@ class CoreParserFunctions { $type = $magicWords->matchStartToEnd( $arg1 ); $raw = $arg2; } - if( !$type ) { //backward compatibility + if ( !$type ) { //backward compatibility $type = 'pagesincategory_all'; } $title = Title::makeTitleSafe( NS_CATEGORY, $name ); - if( !$title ) { # invalid title + if ( !$title ) { # invalid title return self::formatRaw( 0, $raw ); } + $wgContLang->findVariantLink( $name, $title, true ); // Normalize name for cache $name = $title->getDBkey(); - if( !isset( $cache[$name] ) ) { + if ( !isset( $cache[$name] ) ) { $category = Category::newFromTitle( $title ); $allCount = $subcatCount = $fileCount = $pagesCount = 0; - if( $parser->incrementExpensiveFunctionCount() ) { + if ( $parser->incrementExpensiveFunctionCount() ) { // $allCount is the total number of cat members, // not the count of how many members are normal pages. $allCount = (int)$category->getPageCount(); @@ -636,8 +695,6 @@ class CoreParserFunctions { * Return the size of the given page, or 0 if it's nonexistent. This is an * expensive parser function and can't be called too many times per page. * - * @todo FIXME: This doesn't work correctly on preview for getting the size - * of the current page. * @todo FIXME: Title::getLength() documentation claims that it adds things * to the link cache, so the local cache here should be unnecessary, but * in fact calling getLength() repeatedly for the same $page does seem to @@ -645,15 +702,15 @@ class CoreParserFunctions { * @todo Document parameters * * @param $parser Parser - * @param string $page TODO DOCUMENT (Default: empty string) - * @param $raw TODO DOCUMENT (Default: null) + * @param $page String Name of page to check (Default: empty string) + * @param $raw String Should number be human readable with commas or just number * @return string */ static function pagesize( $parser, $page = '', $raw = null ) { static $cache = array(); $title = Title::newFromText( $page ); - if( !is_object( $title ) ) { + if ( !is_object( $title ) ) { $cache[$page] = 0; return self::formatRaw( 0, $raw ); } @@ -662,9 +719,9 @@ class CoreParserFunctions { $page = $title->getPrefixedText(); $length = 0; - if( isset( $cache[$page] ) ) { + if ( isset( $cache[$page] ) ) { $length = $cache[$page]; - } elseif( $parser->incrementExpensiveFunctionCount() ) { + } elseif ( $parser->incrementExpensiveFunctionCount() ) { $rev = Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); $pageID = $rev ? $rev->getPage() : 0; $revID = $rev ? $rev->getId() : 0; @@ -717,7 +774,9 @@ class CoreParserFunctions { static function pad( $parser, $string, $length, $padding = '0', $direction = STR_PAD_RIGHT ) { $padding = $parser->killMarkers( $padding ); $lengthOfPadding = mb_strlen( $padding ); - if ( $lengthOfPadding == 0 ) return $string; + if ( $lengthOfPadding == 0 ) { + return $string; + } # The remaining length to add counts down to 0 as padding is added $length = min( $length, 500 ) - mb_strlen( $string ); @@ -762,7 +821,9 @@ class CoreParserFunctions { $title = SpecialPage::getTitleFor( $page, $subpage ); return $title->getPrefixedText(); } else { - return wfMessage( 'nosuchspecialpage' )->inContentLanguage()->text(); + // unknown special page, just use the given text as its title, if at all possible + $title = Title::makeTitleSafe( NS_SPECIAL, $text ); + return $title ? $title->getPrefixedText() : self::special( $parser, 'Badtitle' ); } } @@ -786,19 +847,25 @@ class CoreParserFunctions { $arg = $magicWords->matchStartToEnd( $uarg ); $text = trim( $text ); - if( strlen( $text ) == 0 ) + if ( strlen( $text ) == 0 ) { return ''; + } $old = $parser->getCustomDefaultSort(); if ( $old === false || $arg !== 'defaultsort_noreplace' ) { $parser->setDefaultSort( $text ); } - if( $old === false || $old == $text || $arg ) { + if ( $old === false || $old == $text || $arg ) { return ''; } else { - return( '' . - wfMessage( 'duplicate-defaultsort', $old, $text )->inContentLanguage()->escaped() . - '' ); + $converter = $parser->getConverterLanguage()->getConverter(); + return '' . + wfMessage( 'duplicate-defaultsort', + // Message should be parsed, but these params should only be escaped. + $converter->markNoConversion( wfEscapeWikiText( $old ) ), + $converter->markNoConversion( wfEscapeWikiText( $text ) ) + )->inContentLanguage()->text() . + ''; } } @@ -807,14 +874,14 @@ class CoreParserFunctions { public static function filepath( $parser, $name = '', $argA = '', $argB = '' ) { $file = wfFindFile( $name ); - if( $argA == 'nowiki' ) { + if ( $argA == 'nowiki' ) { // {{filepath: | option [| size] }} $isNowiki = true; $parsedWidthParam = $parser->parseWidthParam( $argB ); } else { // {{filepath: [| size [|option]] }} $parsedWidthParam = $parser->parseWidthParam( $argA ); - $isNowiki = ($argB == 'nowiki'); + $isNowiki = ( $argB == 'nowiki' ); } if ( $file ) { diff --git a/includes/parser/CoreTagHooks.php b/includes/parser/CoreTagHooks.php index 65051839..a2eb6987 100644 --- a/includes/parser/CoreTagHooks.php +++ b/includes/parser/CoreTagHooks.php @@ -77,7 +77,7 @@ class CoreTagHooks { */ static function html( $content, $attributes, $parser ) { global $wgRawHtml; - if( $wgRawHtml ) { + if ( $wgRawHtml ) { return array( $content, 'markerType' => 'nowiki' ); } else { throw new MWException( ' extension tag encountered unexpectedly' ); diff --git a/includes/parser/DateFormatter.php b/includes/parser/DateFormatter.php index a2da3074..0a69b045 100644 --- a/includes/parser/DateFormatter.php +++ b/includes/parser/DateFormatter.php @@ -156,10 +156,10 @@ class DateFormatter { } for ( $i = 1; $i <= self::LAST; $i++ ) { $this->mSource = $i; - if ( isset ( $this->rules[$preference][$i] ) ) { + if ( isset( $this->rules[$preference][$i] ) ) { # Specific rules $this->mTarget = $this->rules[$preference][$i]; - } elseif ( isset ( $this->rules[self::ALL][$i] ) ) { + } elseif ( isset( $this->rules[self::ALL][$i] ) ) { # General rules $this->mTarget = $this->rules[self::ALL][$i]; } elseif ( $preference ) { @@ -198,14 +198,15 @@ class DateFormatter { function replace( $matches ) { # Extract information from $matches $linked = true; - if ( isset( $this->mLinked ) ) + if ( isset( $this->mLinked ) ) { $linked = $this->mLinked; + } $bits = array(); $key = $this->keys[$this->mSource]; for ( $p = 0; $p < strlen( $key ); $p++ ) { if ( $key[$p] != ' ' ) { - $bits[$key[$p]] = $matches[$p+1]; + $bits[$key[$p]] = $matches[$p + 1]; } } @@ -232,10 +233,12 @@ class DateFormatter { $fail = false; // Pre-generate y/Y stuff because we need the year for the title. - if ( !isset( $bits['y'] ) && isset( $bits['Y'] ) ) + if ( !isset( $bits['y'] ) && isset( $bits['Y'] ) ) { $bits['y'] = $this->makeIsoYear( $bits['Y'] ); - if ( !isset( $bits['Y'] ) && isset( $bits['y'] ) ) + } + if ( !isset( $bits['Y'] ) && isset( $bits['y'] ) ) { $bits['Y'] = $this->makeNormalYear( $bits['y'] ); + } if ( !isset( $bits['m'] ) ) { $m = $this->makeIsoMonth( $bits['F'] ); @@ -293,8 +296,9 @@ class DateFormatter { } $isoBits = array(); - if ( isset( $bits['y'] ) ) + if ( isset( $bits['y'] ) ) { $isoBits[] = $bits['y']; + } $isoBits[] = $bits['m']; $isoBits[] = $bits['d']; $isoDate = implode( '-', $isoBits ); @@ -312,7 +316,7 @@ class DateFormatter { */ function getMonthRegex() { $names = array(); - for( $i = 1; $i <= 12; $i++ ) { + for ( $i = 1; $i <= 12; $i++ ) { $names[] = $this->lang->getMonthName( $i ); $names[] = $this->lang->getMonthAbbreviation( $i ); } @@ -337,7 +341,7 @@ class DateFormatter { function makeIsoYear( $year ) { # Assumes the year is in a nice format, as enforced by the regex if ( substr( $year, -2 ) == 'BC' ) { - $num = intval(substr( $year, 0, -3 )) - 1; + $num = intval( substr( $year, 0, -3 ) ) - 1; # PHP bug note: sprintf( "%04d", -1 ) fails poorly $text = sprintf( '-%04d', $num ); @@ -353,7 +357,7 @@ class DateFormatter { */ function makeNormalYear( $iso ) { if ( $iso[0] == '-' ) { - $text = (intval( substr( $iso, 1 ) ) + 1) . ' BC'; + $text = ( intval( substr( $iso, 1 ) ) + 1 ) . ' BC'; } else { $text = intval( $iso ); } diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php index 49b2d333..27ff9e7d 100644 --- a/includes/parser/LinkHolderArray.php +++ b/includes/parser/LinkHolderArray.php @@ -281,88 +281,90 @@ class LinkHolderArray { $linkCache = LinkCache::singleton(); $output = $this->parent->getOutput(); - wfProfileIn( __METHOD__ . '-check' ); - $dbr = wfGetDB( DB_SLAVE ); - $threshold = $this->parent->getOptions()->getStubThreshold(); + if( $linkCache->useDatabase() ) { + wfProfileIn( __METHOD__ . '-check' ); + $dbr = wfGetDB( DB_SLAVE ); + $threshold = $this->parent->getOptions()->getStubThreshold(); - # Sort by namespace - ksort( $this->internals ); + # Sort by namespace + ksort( $this->internals ); - $linkcolour_ids = array(); + $linkcolour_ids = array(); - # Generate query - $queries = array(); - foreach ( $this->internals as $ns => $entries ) { - foreach ( $entries as $entry ) { - $title = $entry['title']; - $pdbk = $entry['pdbk']; + # Generate query + $queries = array(); + foreach ( $this->internals as $ns => $entries ) { + foreach ( $entries as $entry ) { + $title = $entry['title']; + $pdbk = $entry['pdbk']; - # Skip invalid entries. - # Result will be ugly, but prevents crash. - if ( is_null( $title ) ) { - continue; - } + # Skip invalid entries. + # Result will be ugly, but prevents crash. + if ( is_null( $title ) ) { + continue; + } - # Check if it's a static known link, e.g. interwiki - if ( $title->isAlwaysKnown() ) { - $colours[$pdbk] = ''; - } elseif ( $ns == NS_SPECIAL ) { - $colours[$pdbk] = 'new'; - } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { - $colours[$pdbk] = Linker::getLinkColour( $title, $threshold ); - $output->addLink( $title, $id ); - $linkcolour_ids[$id] = $pdbk; - } elseif ( $linkCache->isBadLink( $pdbk ) ) { - $colours[$pdbk] = 'new'; - } else { - # Not in the link cache, add it to the query - $queries[$ns][] = $title->getDBkey(); + # Check if it's a static known link, e.g. interwiki + if ( $title->isAlwaysKnown() ) { + $colours[$pdbk] = ''; + } elseif ( $ns == NS_SPECIAL ) { + $colours[$pdbk] = 'new'; + } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { + $colours[$pdbk] = Linker::getLinkColour( $title, $threshold ); + $output->addLink( $title, $id ); + $linkcolour_ids[$id] = $pdbk; + } elseif ( $linkCache->isBadLink( $pdbk ) ) { + $colours[$pdbk] = 'new'; + } else { + # Not in the link cache, add it to the query + $queries[$ns][] = $title->getDBkey(); + } } } - } - if ( $queries ) { - $where = array(); - foreach( $queries as $ns => $pages ) { - $where[] = $dbr->makeList( - array( - 'page_namespace' => $ns, - 'page_title' => $pages, - ), - LIST_AND - ); - } + if ( $queries ) { + $where = array(); + foreach ( $queries as $ns => $pages ) { + $where[] = $dbr->makeList( + array( + 'page_namespace' => $ns, + 'page_title' => $pages, + ), + LIST_AND + ); + } - $res = $dbr->select( - 'page', - array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ), - $dbr->makeList( $where, LIST_OR ), - __METHOD__ - ); + $res = $dbr->select( + 'page', + array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ), + $dbr->makeList( $where, LIST_OR ), + __METHOD__ + ); - # Fetch data and form into an associative array - # non-existent = broken - foreach ( $res as $s ) { - $title = Title::makeTitle( $s->page_namespace, $s->page_title ); - $pdbk = $title->getPrefixedDBkey(); - $linkCache->addGoodLinkObjFromRow( $title, $s ); - $output->addLink( $title, $s->page_id ); - # @todo FIXME: Convoluted data flow - # The redirect status and length is passed to getLinkColour via the LinkCache - # Use formal parameters instead - $colours[$pdbk] = Linker::getLinkColour( $title, $threshold ); - //add id to the extension todolist - $linkcolour_ids[$s->page_id] = $pdbk; + # Fetch data and form into an associative array + # non-existent = broken + foreach ( $res as $s ) { + $title = Title::makeTitle( $s->page_namespace, $s->page_title ); + $pdbk = $title->getPrefixedDBkey(); + $linkCache->addGoodLinkObjFromRow( $title, $s ); + $output->addLink( $title, $s->page_id ); + # @todo FIXME: Convoluted data flow + # The redirect status and length is passed to getLinkColour via the LinkCache + # Use formal parameters instead + $colours[$pdbk] = Linker::getLinkColour( $title, $threshold ); + //add id to the extension todolist + $linkcolour_ids[$s->page_id] = $pdbk; + } + unset( $res ); } - unset( $res ); - } - if ( count( $linkcolour_ids ) ) { - //pass an array of page_ids to an extension - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); + if ( count( $linkcolour_ids ) ) { + //pass an array of page_ids to an extension + wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); + } + wfProfileOut( __METHOD__ . '-check' ); } - wfProfileOut( __METHOD__ . '-check' ); # Do a second query for different language variants of links and categories - if( $wgContLang->hasVariants() ) { + if ( $wgContLang->hasVariants() ) { $this->doVariants( $colours ); } @@ -377,6 +379,10 @@ class LinkHolderArray { $key = "$ns:$index"; $searchkey = ""; $displayText = $entry['text']; + if ( isset( $entry['selflink'] ) ) { + $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query ); + continue; + } if ( $displayText === '' ) { $displayText = null; } @@ -406,7 +412,8 @@ class LinkHolderArray { $text = preg_replace_callback( '/()/', $replacer->cb(), - $text); + $text + ); wfProfileOut( __METHOD__ . '-replace' ); wfProfileOut( __METHOD__ ); @@ -424,7 +431,7 @@ class LinkHolderArray { # Make interwiki link HTML $output = $this->parent->getOutput(); $replacePairs = array(); - foreach( $this->interwikis as $key => $link ) { + foreach ( $this->interwikis as $key => $link ) { $replacePairs[$key] = Linker::link( $link['title'], $link['text'] ); $output->addInterwikiLink( $link['title'] ); } @@ -454,20 +461,17 @@ class LinkHolderArray { // single string to all variants. This would improve parser's performance // significantly. foreach ( $this->internals as $ns => $entries ) { + if ( $ns == NS_SPECIAL ) { + continue; + } foreach ( $entries as $index => $entry ) { $pdbk = $entry['pdbk']; // we only deal with new links (in its first query) if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) { - $title = $entry['title']; - $titleText = $title->getText(); - $titlesAttrs[] = array( - 'ns' => $ns, - 'key' => "$ns:$index", - 'titleText' => $titleText, - ); + $titlesAttrs[] = array( $index, $entry['title'] ); // separate titles with \0 because it would never appears // in a valid title - $titlesToBeConverted .= $titleText . "\0"; + $titlesToBeConverted .= $entry['title']->getText() . "\0"; } } } @@ -478,19 +482,35 @@ class LinkHolderArray { foreach ( $titlesAllVariants as &$titlesVariant ) { $titlesVariant = explode( "\0", $titlesVariant ); } - $l = count( $titlesAttrs ); + // Then add variants of links to link batch - for ( $i = 0; $i < $l; $i ++ ) { + $parentTitle = $this->parent->getTitle(); + foreach ( $titlesAttrs as $i => $attrs ) { + list( $index, $title ) = $attrs; + $ns = $title->getNamespace(); + $text = $title->getText(); + foreach ( $allVariantsName as $variantName ) { $textVariant = $titlesAllVariants[$variantName][$i]; - if ( $textVariant != $titlesAttrs[$i]['titleText'] ) { - $variantTitle = Title::makeTitle( $titlesAttrs[$i]['ns'], $textVariant ); - if( is_null( $variantTitle ) ) { - continue; - } - $linkBatch->addObj( $variantTitle ); - $variantMap[$variantTitle->getPrefixedDBkey()][] = $titlesAttrs[$i]['key']; + if ( $textVariant === $text ) { + continue; } + + $variantTitle = Title::makeTitle( $ns, $textVariant ); + if ( is_null( $variantTitle ) ) { + continue; + } + + // Self-link checking for mixed/different variant titles. At this point, we + // already know the exact title does not exist, so the link cannot be to a + // variant of the current title that exists as a separate page. + if ( $variantTitle->equals( $parentTitle ) && $title->getFragment() === '' ) { + $this->internals[$ns][$index]['selflink'] = true; + continue 2; + } + + $linkBatch->addObj( $variantTitle ); + $variantMap[$variantTitle->getPrefixedDBkey()][] = "$ns:$index"; } } @@ -513,7 +533,7 @@ class LinkHolderArray { } } - if( !$linkBatch->isEmpty() ) { + if ( !$linkBatch->isEmpty() ) { // construct query $dbr = wfGetDB( DB_SLAVE ); $varRes = $dbr->select( 'page', @@ -532,14 +552,14 @@ class LinkHolderArray { $vardbk = $variantTitle->getDBkey(); $holderKeys = array(); - if( isset( $variantMap[$varPdbk] ) ) { + if ( isset( $variantMap[$varPdbk] ) ) { $holderKeys = $variantMap[$varPdbk]; $linkCache->addGoodLinkObjFromRow( $variantTitle, $s ); $output->addLink( $variantTitle, $s->page_id ); } // loop over link holders - foreach( $holderKeys as $key ) { + foreach ( $holderKeys as $key ) { list( $ns, $index ) = explode( ':', $key, 2 ); $entry =& $this->internals[$ns][$index]; $pdbk = $entry['pdbk']; @@ -569,12 +589,12 @@ class LinkHolderArray { wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); // rebuild the categories in original order (if there are replacements) - if( count( $varCategories ) > 0 ) { + if ( count( $varCategories ) > 0 ) { $newCats = array(); $originalCats = $output->getCategories(); - foreach( $originalCats as $cat => $sortkey ) { + foreach ( $originalCats as $cat => $sortkey ) { // make the replacement - if( array_key_exists( $cat, $varCategories ) ) { + if ( array_key_exists( $cat, $varCategories ) ) { $newCats[$varCategories[$cat]] = $sortkey; } else { $newCats[$cat] = $sortkey; @@ -614,13 +634,13 @@ class LinkHolderArray { function replaceTextCallback( $matches ) { $type = $matches[1]; $key = $matches[2]; - if( $type == 'LINK' ) { + if ( $type == 'LINK' ) { list( $ns, $index ) = explode( ':', $key, 2 ); - if( isset( $this->internals[$ns][$index]['text'] ) ) { + if ( isset( $this->internals[$ns][$index]['text'] ) ) { return $this->internals[$ns][$index]['text']; } - } elseif( $type == 'IWLINK' ) { - if( isset( $this->interwikis[$key]['text'] ) ) { + } elseif ( $type == 'IWLINK' ) { + if ( isset( $this->interwikis[$key]['text'] ) ) { return $this->interwikis[$key]['text']; } } diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 5ef0bc71..1f14223d 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -28,7 +28,7 @@ /** * PHP Parser - Processes wiki markup (which uses a more user-friendly * syntax, such as "[[link]]" for making links), and provides a one-way - * transformation of that wiki markup it into XHTML output / markup + * transformation of that wiki markup it into (X)HTML output / markup * (which in turn the browser understands, and can display). * * There are seven main entry points into the Parser class: @@ -54,7 +54,6 @@ * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away! * * @par Settings: - * $wgLocaltimezone * $wgNamespacesWithSubpages * * @par Settings only within ParserOptions: @@ -116,6 +115,10 @@ class Parser { # Marker Suffix needs to be accessible staticly. const MARKER_SUFFIX = "-QINU\x7f"; + # Markers used for wrapping the table of contents + const TOC_START = ''; + const TOC_END = ''; + # Persistent: var $mTagHooks = array(); var $mTransparentTagHooks = array(); @@ -192,7 +195,9 @@ class Parser { var $mRevisionId; # ID to display in {{REVISIONID}} tags var $mRevisionTimestamp; # The timestamp of the specified revision ID var $mRevisionUser; # User to display in {{REVISIONUSER}} tag + var $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable var $mRevIdForTs; # The revision ID which was used to fetch the timestamp + var $mInputSize = false; # For {{PAGESIZE}} on current page. /** * @var string @@ -218,8 +223,8 @@ class Parser { self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; - } elseif ( defined( 'MW_COMPILED' ) ) { - # Preprocessor_Hash is much faster than Preprocessor_DOM in compiled mode + } elseif ( defined( 'HPHP_VERSION' ) ) { + # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop $this->mPreprocessorClass = 'Preprocessor_Hash'; } elseif ( extension_loaded( 'domxml' ) ) { # PECL extension that conflicts with the core DOM extension (bug 13770) @@ -292,7 +297,7 @@ class Parser { $this->mLinkHolders = new LinkHolderArray( $this ); $this->mLinkID = 0; $this->mRevisionObject = $this->mRevisionTimestamp = - $this->mRevisionId = $this->mRevisionUser = null; + $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null; $this->mVarCache = array(); $this->mUser = null; $this->mLangLinkLanguages = array(); @@ -354,13 +359,18 @@ class Parser { * to internalParse() which does all the real work. */ - global $wgUseTidy, $wgAlwaysUseTidy; + global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames; $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn( __METHOD__ ); wfProfileIn( $fname ); $this->startParse( $title, $options, self::OT_HTML, $clearState ); + $this->mInputSize = strlen( $text ); + if ( $this->mOptions->getEnableLimitReport() ) { + $this->mOutput->resetParseStartTime(); + } + # Remove the strip marker tag prefix from the input, if present. if ( $clearState ) { $text = str_replace( $this->mUniqPrefix, '', $text ); @@ -370,11 +380,13 @@ class Parser { $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; $oldRevisionUser = $this->mRevisionUser; + $oldRevisionSize = $this->mRevisionSize; if ( $revid !== null ) { $this->mRevisionId = $revid; $this->mRevisionObject = null; $this->mRevisionTimestamp = null; $this->mRevisionUser = null; + $this->mRevisionSize = null; } wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); @@ -490,22 +502,67 @@ class Parser { # Information on include size limits, for the benefit of users who try to skirt them if ( $this->mOptions->getEnableLimitReport() ) { $max = $this->mOptions->getMaxIncludeSize(); - $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n"; - $limitReport = - "NewPP limit report\n" . - "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" . - "Preprocessor generated node count: " . - "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . - "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" . - "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n" . - "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n" . - $PFreport; + + $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' ); + if ( $cpuTime !== null ) { + $this->mOutput->setLimitReportData( 'limitreport-cputime', + sprintf( "%.3f", $cpuTime ) + ); + } + + $wallTime = $this->mOutput->getTimeSinceStart( 'wall' ); + $this->mOutput->setLimitReportData( 'limitreport-walltime', + sprintf( "%.3f", $wallTime ) + ); + + $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes', + array( $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ) + ); + $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes', + array( $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ) + ); + $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize', + array( $this->mIncludeSizes['post-expand'], $max ) + ); + $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize', + array( $this->mIncludeSizes['arg'], $max ) + ); + $this->mOutput->setLimitReportData( 'limitreport-expansiondepth', + array( $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ) + ); + $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount', + array( $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ) + ); + wfRunHooks( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) ); + + $limitReport = "NewPP limit report\n"; + if ( $wgShowHostnames ) { + $limitReport .= 'Parsed by ' . wfHostname() . "\n"; + } + foreach ( $this->mOutput->getLimitReportData() as $key => $value ) { + if ( wfRunHooks( 'ParserLimitReportFormat', + array( $key, $value, &$limitReport, false, false ) + ) ) { + $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false ); + $valueMsg = wfMessage( array( "$key-value-text", "$key-value" ) ) + ->inLanguage( 'en' )->useDatabase( false ); + if ( !$valueMsg->exists() ) { + $valueMsg = new RawMessage( '$1' ); + } + if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) { + $valueMsg->params( $value ); + $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n"; + } + } + } + // Since we're not really outputting HTML, decode the entities and + // then re-encode the things that need hiding inside HTML comments. + $limitReport = htmlspecialchars_decode( $limitReport ); wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) ); // Sanitize for comment. Note '‐' in the replacement is U+2010, // which looks much like the problematic '-'. $limitReport = str_replace( array( '-', '&' ), array( '‐', '&' ), $limitReport ); - $text .= "\n\n"; if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) { @@ -519,6 +576,8 @@ class Parser { $this->mRevisionObject = $oldRevisionObject; $this->mRevisionTimestamp = $oldRevisionTimestamp; $this->mRevisionUser = $oldRevisionUser; + $this->mRevisionSize = $oldRevisionSize; + $this->mInputSize = false; wfProfileOut( $fname ); wfProfileOut( __METHOD__ ); @@ -536,7 +595,7 @@ class Parser { * * @return string */ - function recursiveTagParse( $text, $frame=false ) { + function recursiveTagParse( $text, $frame = false ) { wfProfileIn( __METHOD__ ); wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); @@ -769,9 +828,9 @@ class Parser { if ( $target !== null ) { return $target; - } elseif( $this->mOptions->getInterfaceMessage() ) { + } elseif ( $this->mOptions->getInterfaceMessage() ) { return $this->mOptions->getUserLangObj(); - } elseif( is_null( $this->mTitle ) ) { + } elseif ( is_null( $this->mTitle ) ) { throw new MWException( __METHOD__ . ': $this->mTitle is null' ); } @@ -1256,8 +1315,8 @@ class Parser { 'x' => 'X', )); $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); - return'getLocalURL() ) . "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn"; } else { return $m[0]; @@ -1366,165 +1425,186 @@ class Parser { */ public function doQuotes( $text ) { $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); - if ( count( $arr ) == 1 ) { + $countarr = count( $arr ); + if ( $countarr == 1 ) { return $text; - } else { - # First, do some preliminary work. This may shift some apostrophes from - # being mark-up to being text. It also counts the number of occurrences - # of bold and italics mark-ups. - $numbold = 0; - $numitalics = 0; - for ( $i = 0; $i < count( $arr ); $i++ ) { - if ( ( $i % 2 ) == 1 ) { - # If there are ever four apostrophes, assume the first is supposed to - # be text, and the remaining three constitute mark-up for bold text. - if ( strlen( $arr[$i] ) == 4 ) { - $arr[$i-1] .= "'"; - $arr[$i] = "'''"; - } elseif ( strlen( $arr[$i] ) > 5 ) { - # If there are more than 5 apostrophes in a row, assume they're all - # text except for the last 5. - $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 ); - $arr[$i] = "'''''"; - } - # Count the number of occurrences of bold and italics mark-ups. - # We are not counting sequences of five apostrophes. - if ( strlen( $arr[$i] ) == 2 ) { - $numitalics++; - } elseif ( strlen( $arr[$i] ) == 3 ) { - $numbold++; - } elseif ( strlen( $arr[$i] ) == 5 ) { - $numitalics++; - $numbold++; - } - } + } + + // First, do some preliminary work. This may shift some apostrophes from + // being mark-up to being text. It also counts the number of occurrences + // of bold and italics mark-ups. + $numbold = 0; + $numitalics = 0; + for ( $i = 1; $i < $countarr; $i += 2 ) { + $thislen = strlen( $arr[$i] ); + // If there are ever four apostrophes, assume the first is supposed to + // be text, and the remaining three constitute mark-up for bold text. + // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''') + if ( $thislen == 4 ) { + $arr[$i - 1] .= "'"; + $arr[$i] = "'''"; + $thislen = 3; + } elseif ( $thislen > 5 ) { + // If there are more than 5 apostrophes in a row, assume they're all + // text except for the last 5. + // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''') + $arr[$i - 1] .= str_repeat( "'", $thislen - 5 ); + $arr[$i] = "'''''"; + $thislen = 5; } + // Count the number of occurrences of bold and italics mark-ups. + if ( $thislen == 2 ) { + $numitalics++; + } elseif ( $thislen == 3 ) { + $numbold++; + } elseif ( $thislen == 5 ) { + $numitalics++; + $numbold++; + } + } - # If there is an odd number of both bold and italics, it is likely - # that one of the bold ones was meant to be an apostrophe followed - # by italics. Which one we cannot know for certain, but it is more - # likely to be one that has a single-letter word before it. - if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { - $i = 0; - $firstsingleletterword = -1; - $firstmultiletterword = -1; - $firstspace = -1; - foreach ( $arr as $r ) { - if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) { - $x1 = substr( $arr[$i-1], -1 ); - $x2 = substr( $arr[$i-1], -2, 1 ); - if ( $x1 === ' ' ) { - if ( $firstspace == -1 ) { - $firstspace = $i; - } - } elseif ( $x2 === ' ' ) { - if ( $firstsingleletterword == -1 ) { - $firstsingleletterword = $i; - } - } else { - if ( $firstmultiletterword == -1 ) { - $firstmultiletterword = $i; - } + // If there is an odd number of both bold and italics, it is likely + // that one of the bold ones was meant to be an apostrophe followed + // by italics. Which one we cannot know for certain, but it is more + // likely to be one that has a single-letter word before it. + if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { + $firstsingleletterword = -1; + $firstmultiletterword = -1; + $firstspace = -1; + for ( $i = 1; $i < $countarr; $i += 2 ) { + if ( strlen( $arr[$i] ) == 3 ) { + $x1 = substr( $arr[$i - 1], -1 ); + $x2 = substr( $arr[$i - 1], -2, 1 ); + if ( $x1 === ' ' ) { + if ( $firstspace == -1 ) { + $firstspace = $i; + } + } elseif ( $x2 === ' ' ) { + if ( $firstsingleletterword == -1 ) { + $firstsingleletterword = $i; + // if $firstsingleletterword is set, we don't + // look at the other options, so we can bail early. + break; + } + } else { + if ( $firstmultiletterword == -1 ) { + $firstmultiletterword = $i; } } - $i++; } + } - # If there is a single-letter word, use it! - if ( $firstsingleletterword > -1 ) { - $arr[$firstsingleletterword] = "''"; - $arr[$firstsingleletterword-1] .= "'"; - } elseif ( $firstmultiletterword > -1 ) { - # If not, but there's a multi-letter word, use that one. - $arr[$firstmultiletterword] = "''"; - $arr[$firstmultiletterword-1] .= "'"; - } elseif ( $firstspace > -1 ) { - # ... otherwise use the first one that has neither. - # (notice that it is possible for all three to be -1 if, for example, - # there is only one pentuple-apostrophe in the line) - $arr[$firstspace] = "''"; - $arr[$firstspace-1] .= "'"; - } + // If there is a single-letter word, use it! + if ( $firstsingleletterword > -1 ) { + $arr[$firstsingleletterword] = "''"; + $arr[$firstsingleletterword - 1] .= "'"; + } elseif ( $firstmultiletterword > -1 ) { + // If not, but there's a multi-letter word, use that one. + $arr[$firstmultiletterword] = "''"; + $arr[$firstmultiletterword - 1] .= "'"; + } elseif ( $firstspace > -1 ) { + // ... otherwise use the first one that has neither. + // (notice that it is possible for all three to be -1 if, for example, + // there is only one pentuple-apostrophe in the line) + $arr[$firstspace] = "''"; + $arr[$firstspace - 1] .= "'"; } + } - # Now let's actually convert our apostrophic mush to HTML! - $output = ''; - $buffer = ''; - $state = ''; - $i = 0; - foreach ( $arr as $r ) { - if ( ( $i % 2 ) == 0 ) { - if ( $state === 'both' ) { - $buffer .= $r; - } else { - $output .= $r; - } + // Now let's actually convert our apostrophic mush to HTML! + $output = ''; + $buffer = ''; + $state = ''; + $i = 0; + foreach ( $arr as $r ) { + if ( ( $i % 2 ) == 0 ) { + if ( $state === 'both' ) { + $buffer .= $r; } else { - if ( strlen( $r ) == 2 ) { - if ( $state === 'i' ) { - $output .= ''; $state = ''; - } elseif ( $state === 'bi' ) { - $output .= ''; $state = 'b'; - } elseif ( $state === 'ib' ) { - $output .= ''; $state = 'b'; - } elseif ( $state === 'both' ) { - $output .= '' . $buffer . ''; $state = 'b'; - } else { # $state can be 'b' or '' - $output .= ''; $state .= 'i'; - } - } elseif ( strlen( $r ) == 3 ) { - if ( $state === 'b' ) { - $output .= ''; $state = ''; - } elseif ( $state === 'bi' ) { - $output .= ''; $state = 'i'; - } elseif ( $state === 'ib' ) { - $output .= ''; $state = 'i'; - } elseif ( $state === 'both' ) { - $output .= '' . $buffer . ''; $state = 'i'; - } else { # $state can be 'i' or '' - $output .= ''; $state .= 'b'; - } - } elseif ( strlen( $r ) == 5 ) { - if ( $state === 'b' ) { - $output .= ''; $state = 'i'; - } elseif ( $state === 'i' ) { - $output .= ''; $state = 'b'; - } elseif ( $state === 'bi' ) { - $output .= ''; $state = ''; - } elseif ( $state === 'ib' ) { - $output .= ''; $state = ''; - } elseif ( $state === 'both' ) { - $output .= '' . $buffer . ''; $state = ''; - } else { # ($state == '') - $buffer = ''; $state = 'both'; - } + $output .= $r; + } + } else { + $thislen = strlen( $r ); + if ( $thislen == 2 ) { + if ( $state === 'i' ) { + $output .= ''; + $state = ''; + } elseif ( $state === 'bi' ) { + $output .= ''; + $state = 'b'; + } elseif ( $state === 'ib' ) { + $output .= ''; + $state = 'b'; + } elseif ( $state === 'both' ) { + $output .= '' . $buffer . ''; + $state = 'b'; + } else { // $state can be 'b' or '' + $output .= ''; + $state .= 'i'; + } + } elseif ( $thislen == 3 ) { + if ( $state === 'b' ) { + $output .= ''; + $state = ''; + } elseif ( $state === 'bi' ) { + $output .= ''; + $state = 'i'; + } elseif ( $state === 'ib' ) { + $output .= ''; + $state = 'i'; + } elseif ( $state === 'both' ) { + $output .= '' . $buffer . ''; + $state = 'i'; + } else { // $state can be 'i' or '' + $output .= ''; + $state .= 'b'; + } + } elseif ( $thislen == 5 ) { + if ( $state === 'b' ) { + $output .= ''; + $state = 'i'; + } elseif ( $state === 'i' ) { + $output .= ''; + $state = 'b'; + } elseif ( $state === 'bi' ) { + $output .= ''; + $state = ''; + } elseif ( $state === 'ib' ) { + $output .= ''; + $state = ''; + } elseif ( $state === 'both' ) { + $output .= '' . $buffer . ''; + $state = ''; + } else { // ($state == '') + $buffer = ''; + $state = 'both'; } } - $i++; - } - # Now close all remaining tags. Notice that the order is important. - if ( $state === 'b' || $state === 'ib' ) { - $output .= ''; } - if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) { - $output .= ''; - } - if ( $state === 'bi' ) { - $output .= ''; - } - # There might be lonely ''''', so make sure we have a buffer - if ( $state === 'both' && $buffer ) { - $output .= '' . $buffer . ''; - } - return $output; + $i++; + } + // Now close all remaining tags. Notice that the order is important. + if ( $state === 'b' || $state === 'ib' ) { + $output .= ''; + } + if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) { + $output .= ''; + } + if ( $state === 'bi' ) { + $output .= ''; + } + // There might be lonely ''''', so make sure we have a buffer + if ( $state === 'both' && $buffer ) { + $output .= '' . $buffer . ''; } + return $output; } /** * Replace external links (REL) * * Note: this is all very hackish and the order of execution matters a lot. - * Make sure to run maintenance/parserTests.php if you change this code. + * Make sure to run tests/parserTests.php if you change this code. * * @private * @@ -1538,12 +1618,13 @@ class Parser { $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); if ( $bits === false ) { + wfProfileOut( __METHOD__ ); throw new MWException( "PCRE needs to be compiled with --enable-unicode-properties in order for MediaWiki to function" ); } $s = array_shift( $bits ); $i = 0; - while ( $igetFragment() === '' && $ns != NS_SPECIAL ) { - if ( $nt->equals( $this->mTitle ) || ( !$nt->isKnown() && in_array( - $this->mTitle->getPrefixedText(), - $this->getConverterLanguage()->autoConvertToAllVariants( $nt->getPrefixedText() ), - true - ) ) ) { - $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); - continue; - } + # Self-link checking. For some languages, variants of the title are checked in + # LinkHolderArray::doVariants() to allow batching the existence checks necessary + # for linking to a different variant. + if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && $nt->getFragment() === '' ) { + $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); + continue; } # NS_MEDIA is a pseudo-namespace for linking directly to a file @@ -2159,7 +2236,7 @@ class Parser { function closeParagraph() { $result = ''; if ( $this->mLastSection != '' ) { - $result = 'mLastSection . ">\n"; + $result = 'mLastSection . ">\n"; } $this->mInPre = false; $this->mLastSection = ''; @@ -2204,13 +2281,13 @@ class Parser { $result = $this->closeParagraph(); if ( '*' === $char ) { - $result .= '
  • '; + $result .= "
      \n
    • "; } elseif ( '#' === $char ) { - $result .= '
      1. '; + $result .= "
          \n
        1. "; } elseif ( ':' === $char ) { - $result .= '
          '; + $result .= "
          \n
          "; } elseif ( ';' === $char ) { - $result .= '
          '; + $result .= "
          \n
          "; $this->mDTopen = true; } else { $result = ''; @@ -2228,11 +2305,11 @@ class Parser { */ function nextItem( $char ) { if ( '*' === $char || '#' === $char ) { - return '
        2. '; + return "
        3. \n
        4. "; } elseif ( ':' === $char || ';' === $char ) { - $close = ''; + $close = "\n"; if ( $this->mDTopen ) { - $close = ''; + $close = "\n"; } if ( ';' === $char ) { $this->mDTopen = true; @@ -2254,15 +2331,15 @@ class Parser { */ function closeList( $char ) { if ( '*' === $char ) { - $text = '
    '; + $text = "
  • \n
"; } elseif ( '#' === $char ) { - $text = ''; + $text = "\n"; } elseif ( ':' === $char ) { if ( $this->mDTopen ) { $this->mDTopen = false; - $text = ''; + $text = "\n"; } else { - $text = ''; + $text = "\n"; } } else { return ''; @@ -2292,6 +2369,7 @@ class Parser { $this->mDTopen = $inBlockElem = false; $prefixLength = 0; $paragraphStack = false; + $inBlockquote = false; foreach ( $textLines as $oLine ) { # Fix up $linestart @@ -2354,13 +2432,13 @@ class Parser { # Close all the prefixes which aren't shared. while ( $commonPrefixLength < $lastPrefixLength ) { - $output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] ); + $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] ); --$lastPrefixLength; } # Continue the current prefix if appropriate. if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) { - $output .= $this->nextItem( $prefix[$commonPrefixLength-1] ); + $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] ); } # Open prefixes where appropriate. @@ -2385,10 +2463,10 @@ class Parser { wfProfileIn( __METHOD__ . "-paragraph" ); # No prefix (not in list)--go to paragraph mode # XXX: use a stack for nestable elements like span, table and div - $openmatch = preg_match( '/(?:mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t ); + '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' . + 'mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t ); if ( $openmatch or $closematch ) { $paragraphStack = false; # TODO bug 5718: paragraph closed @@ -2396,9 +2474,14 @@ class Parser { if ( $preOpenMatch and !$preCloseMatch ) { $this->mInPre = true; } + $bqOffset = 0; + while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset ) ) { + $inBlockquote = !$bqMatch[1][0]; // is this a close tag? + $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] ); + } $inBlockElem = !$closematch; } elseif ( !$inBlockElem && !$this->mInPre ) { - if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) ) { + if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) and !$inBlockquote ) { # pre if ( $this->mLastSection !== 'pre' ) { $paragraphStack = false; @@ -2445,7 +2528,7 @@ class Parser { } } while ( $prefixLength ) { - $output .= $this->closeList( $prefix2[$prefixLength-1] ); + $output .= $this->closeList( $prefix2[$prefixLength - 1] ); --$prefixLength; } if ( $this->mLastSection != '' ) { @@ -2481,7 +2564,7 @@ class Parser { if ( $lt === false || $lt > $pos ) { # Easy; no tag nesting to worry about $before = substr( $str, 0, $pos ); - $after = substr( $str, $pos+1 ); + $after = substr( $str, $pos + 1 ); wfProfileOut( __METHOD__ ); return $pos; } @@ -2490,13 +2573,13 @@ class Parser { $state = self::COLON_STATE_TEXT; $stack = 0; $len = strlen( $str ); - for( $i = 0; $i < $len; $i++ ) { + for ( $i = 0; $i < $len; $i++ ) { $c = $str[$i]; - switch( $state ) { + switch ( $state ) { # (Using the number is a performance hack for common cases) case 0: # self::COLON_STATE_TEXT: - switch( $c ) { + switch ( $c ) { case "<": # Could be either a tag or an tag $state = self::COLON_STATE_TAGSTART; @@ -2541,7 +2624,7 @@ class Parser { break; case 1: # self::COLON_STATE_TAG: # In a - switch( $c ) { + switch ( $c ) { case ">": $stack++; $state = self::COLON_STATE_TEXT; @@ -2555,7 +2638,7 @@ class Parser { } break; case 2: # self::COLON_STATE_TAGSTART: - switch( $c ) { + switch ( $c ) { case "/": $state = self::COLON_STATE_CLOSETAG; break; @@ -2611,6 +2694,7 @@ class Parser { } break; default: + wfProfileOut( __METHOD__ ); throw new MWException( "State machine error in " . __METHOD__ ); } } @@ -2660,71 +2744,50 @@ class Parser { $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() ); wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); - # Use the time zone - global $wgLocaltimezone; - if ( isset( $wgLocaltimezone ) ) { - $oldtz = date_default_timezone_get(); - date_default_timezone_set( $wgLocaltimezone ); - } - - $localTimestamp = date( 'YmdHis', $ts ); - $localMonth = date( 'm', $ts ); - $localMonth1 = date( 'n', $ts ); - $localMonthName = date( 'n', $ts ); - $localDay = date( 'j', $ts ); - $localDay2 = date( 'd', $ts ); - $localDayOfWeek = date( 'w', $ts ); - $localWeek = date( 'W', $ts ); - $localYear = date( 'Y', $ts ); - $localHour = date( 'H', $ts ); - if ( isset( $wgLocaltimezone ) ) { - date_default_timezone_set( $oldtz ); - } - $pageLang = $this->getFunctionLang(); switch ( $index ) { case 'currentmonth': - $value = $pageLang->formatNum( gmdate( 'm', $ts ) ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) ); break; case 'currentmonth1': - $value = $pageLang->formatNum( gmdate( 'n', $ts ) ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) ); break; case 'currentmonthname': - $value = $pageLang->getMonthName( gmdate( 'n', $ts ) ); + $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) ); break; case 'currentmonthnamegen': - $value = $pageLang->getMonthNameGen( gmdate( 'n', $ts ) ); + $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) ); break; case 'currentmonthabbrev': - $value = $pageLang->getMonthAbbreviation( gmdate( 'n', $ts ) ); + $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) ); break; case 'currentday': - $value = $pageLang->formatNum( gmdate( 'j', $ts ) ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) ); break; case 'currentday2': - $value = $pageLang->formatNum( gmdate( 'd', $ts ) ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) ); break; case 'localmonth': - $value = $pageLang->formatNum( $localMonth ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) ); break; case 'localmonth1': - $value = $pageLang->formatNum( $localMonth1 ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); break; case 'localmonthname': - $value = $pageLang->getMonthName( $localMonthName ); + $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); break; case 'localmonthnamegen': - $value = $pageLang->getMonthNameGen( $localMonthName ); + $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); break; case 'localmonthabbrev': - $value = $pageLang->getMonthAbbreviation( $localMonthName ); + $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); break; case 'localday': - $value = $pageLang->formatNum( $localDay ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) ); break; case 'localday2': - $value = $pageLang->formatNum( $localDay2 ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) ); break; case 'pagename': $value = wfEscapeWikiText( $this->mTitle->getText() ); @@ -2744,6 +2807,12 @@ class Parser { case 'subpagenamee': $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() ); break; + case 'rootpagename': + $value = wfEscapeWikiText( $this->mTitle->getRootText() ); + break; + case 'rootpagenamee': + $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getRootText() ) ) ); + break; case 'basepagename': $value = wfEscapeWikiText( $this->mTitle->getBaseText() ); break; @@ -2761,7 +2830,7 @@ class Parser { case 'talkpagenamee': if ( $this->mTitle->canTalk() ) { $talkPage = $this->mTitle->getTalkPage(); - $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() ); + $value = wfEscapeWikiText( $talkPage->getPrefixedURL() ); } else { $value = ''; } @@ -2772,11 +2841,11 @@ class Parser { break; case 'subjectpagenamee': $subjPage = $this->mTitle->getSubjectPage(); - $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() ); + $value = wfEscapeWikiText( $subjPage->getPrefixedURL() ); break; case 'pageid': // requested in bug 23427 - $pageid = $this->getTitle()->getArticleId(); - if( $pageid == 0 ) { + $pageid = $this->getTitle()->getArticleID(); + if ( $pageid == 0 ) { # 0 means the page doesn't exist in the database, # which means the user is previewing a new page. # The vary-revision flag must be set, because the magic word @@ -2842,6 +2911,13 @@ class Parser { wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" ); $value = $this->getRevisionUser(); break; + case 'revisionsize': + # Let the edit saving system know we should parse the page + # *after* a revision ID has been assigned. This is for null edits. + $this->mOutput->setFlag( 'vary-revision' ); + wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" ); + $value = $this->getRevisionSize(); + break; case 'namespace': $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); break; @@ -2858,50 +2934,50 @@ class Parser { $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : ''; break; case 'subjectspace': - $value = $this->mTitle->getSubjectNsText(); + $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() ); break; case 'subjectspacee': $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); break; case 'currentdayname': - $value = $pageLang->getWeekdayName( gmdate( 'w', $ts ) + 1 ); + $value = $pageLang->getWeekdayName( MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 ); break; case 'currentyear': - $value = $pageLang->formatNum( gmdate( 'Y', $ts ), true ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true ); break; case 'currenttime': $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false ); break; case 'currenthour': - $value = $pageLang->formatNum( gmdate( 'H', $ts ), true ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true ); break; case 'currentweek': # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to # int to remove the padding - $value = $pageLang->formatNum( (int)gmdate( 'W', $ts ) ); + $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) ); break; case 'currentdow': - $value = $pageLang->formatNum( gmdate( 'w', $ts ) ); + $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) ); break; case 'localdayname': - $value = $pageLang->getWeekdayName( $localDayOfWeek + 1 ); + $value = $pageLang->getWeekdayName( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1 ); break; case 'localyear': - $value = $pageLang->formatNum( $localYear, true ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true ); break; case 'localtime': - $value = $pageLang->time( $localTimestamp, false, false ); + $value = $pageLang->time( MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ), false, false ); break; case 'localhour': - $value = $pageLang->formatNum( $localHour, true ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true ); break; case 'localweek': # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to # int to remove the padding - $value = $pageLang->formatNum( (int)$localWeek ); + $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) ); break; case 'localdow': - $value = $pageLang->formatNum( $localDayOfWeek ); + $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) ); break; case 'numberofarticles': $value = $pageLang->formatNum( SiteStats::articles() ); @@ -2932,7 +3008,7 @@ class Parser { $value = wfTimestamp( TS_MW, $ts ); break; case 'localtimestamp': - $value = $localTimestamp; + $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ); break; case 'currentversion': $value = SpecialVersion::getVersion(); @@ -3092,7 +3168,7 @@ class Parser { $assocArgs[$index++] = $arg; } else { $name = trim( substr( $arg, 0, $eqpos ) ); - $value = trim( substr( $arg, $eqpos+1 ) ); + $value = trim( substr( $arg, $eqpos + 1 ) ); if ( $value === false ) { $value = ''; } @@ -3119,6 +3195,12 @@ class Parser { * 'post-expand-template-inclusion' (corresponding messages: * 'post-expand-template-inclusion-warning', * 'post-expand-template-inclusion-category') + * 'node-count-exceeded' (corresponding messages: + * 'node-count-exceeded-warning', + * 'node-count-exceeded-category') + * 'expansion-depth-exceeded' (corresponding messages: + * 'expansion-depth-exceeded-warning', + * 'expansion-depth-exceeded-category') * @param int|null $current Current value * @param int|null $max Maximum allowed, when an explicit limit has been * exceeded, provide the values (optional) @@ -3126,7 +3208,7 @@ class Parser { function limitationWarn( $limitationType, $current = '', $max = '' ) { # does no harm if $current and $max are present but are unnecessary for the message $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max ) - ->inContentLanguage()->escaped(); + ->inLanguage( $this->mOptions->getUserLangObj() )->text(); $this->mOutput->addWarning( $warning ); $this->addTrackingCategory( "$limitationType-category" ); } @@ -3145,7 +3227,6 @@ class Parser { * @private */ function braceSubstitution( $piece, $frame ) { - global $wgContLang; wfProfileIn( __METHOD__ ); wfProfileIn( __METHOD__ . '-setup' ); @@ -3251,6 +3332,7 @@ class Parser { $result = $this->callParserFunction( $frame, $func, $funcArgs ); } catch ( Exception $ex ) { wfProfileOut( __METHOD__ . '-pfunc' ); + wfProfileOut( __METHOD__ ); throw $ex; } @@ -3268,8 +3350,9 @@ class Parser { $ns = NS_TEMPLATE; # Split the title into page and subpage $subpage = ''; - $part1 = $this->maybeDoSubpageLink( $part1, $subpage ); - if ( $subpage !== '' ) { + $relative = $this->maybeDoSubpageLink( $part1, $subpage ); + if ( $part1 !== $relative ) { + $part1 = $relative; $ns = $this->mTitle->getNamespace(); } $title = Title::newFromText( $part1, $ns ); @@ -3295,7 +3378,7 @@ class Parser { if ( !$found && $title ) { if ( !Profiler::instance()->isPersistent() ) { # Too many unique items can kill profiling DBs/collectors - $titleProfileIn = __METHOD__ . "-title-" . $title->getDBKey(); + $titleProfileIn = __METHOD__ . "-title-" . $title->getPrefixedDBkey(); wfProfileIn( $titleProfileIn ); // template in } wfProfileIn( __METHOD__ . '-loadtpl' ); @@ -3598,7 +3681,7 @@ class Parser { } $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); - $this->mTplDomCache[ $titleText ] = $dom; + $this->mTplDomCache[$titleText] = $dom; if ( !$title->equals( $cacheTitle ) ) { $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] = @@ -3621,6 +3704,11 @@ class Parser { if ( isset( $stuff['deps'] ) ) { foreach ( $stuff['deps'] as $dep ) { $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] ); + if ( $dep['title']->equals( $this->getTitle() ) ) { + // If we transclude ourselves, the final result + // will change based on the new version of the page + $this->mOutput->setFlag( 'vary-revision' ); + } } } return array( $text, $finalTitle ); @@ -3660,9 +3748,9 @@ class Parser { if ( $skip ) { $text = false; $deps[] = array( - 'title' => $title, - 'page_id' => $title->getArticleID(), - 'rev_id' => null + 'title' => $title, + 'page_id' => $title->getArticleID(), + 'rev_id' => null ); break; } @@ -3678,15 +3766,15 @@ class Parser { } $deps[] = array( - 'title' => $title, - 'page_id' => $title->getArticleID(), - 'rev_id' => $rev_id ); + 'title' => $title, + 'page_id' => $title->getArticleID(), + 'rev_id' => $rev_id ); if ( $rev && !$title->equals( $rev->getTitle() ) ) { # We fetched a rev from a different title; register it too... $deps[] = array( - 'title' => $rev->getTitle(), - 'page_id' => $rev->getPage(), - 'rev_id' => $rev_id ); + 'title' => $rev->getTitle(), + 'page_id' => $rev->getPage(), + 'rev_id' => $rev_id ); } if ( $rev ) { @@ -3742,13 +3830,8 @@ class Parser { * @return Array ( File or false, Title of file ) */ function fetchFileAndTitle( $title, $options = array() ) { - if ( isset( $options['broken'] ) ) { - $file = false; // broken thumbnail forced by hook - } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp) - $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options ); - } else { // get by (name,timestamp) - $file = wfFindFile( $title, $options ); - } + $file = $this->fetchFileNoRegister( $title, $options ); + $time = $file ? $file->getTimestamp() : false; $sha1 = $file ? $file->getSha1() : false; # Register the file as a dependency... @@ -3766,6 +3849,27 @@ class Parser { return array( $file, $title ); } + /** + * Helper function for fetchFileAndTitle. + * + * Also useful if you need to fetch a file but not use it yet, + * for example to get the file's handler. + * + * @param Title $title + * @param array $options Array of options to RepoGroup::findFile + * @return File or false + */ + protected function fetchFileNoRegister( $title, $options = array() ) { + if ( isset( $options['broken'] ) ) { + $file = false; // broken thumbnail forced by hook + } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp) + $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options ); + } else { // get by (name,timestamp) + $file = wfFindFile( $title, $options ); + } + return $file; + } + /** * Transclude an interwiki link. * @@ -3781,7 +3885,7 @@ class Parser { return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text(); } - $url = $title->getFullUrl( "action=$action" ); + $url = $title->getFullURL( array( 'action' => $action ) ); if ( strlen( $url ) > 255 ) { return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text(); @@ -3817,8 +3921,8 @@ class Parser { $dbw->replace( 'transcache', array( 'tc_url' ), array( 'tc_url' => $url, 'tc_time' => $dbw->timestamp( time() ), - 'tc_contents' => $text) - ); + 'tc_contents' => $text + ) ); return $text; } @@ -4097,8 +4201,8 @@ class Parser { * @return mixed|string * @private */ - function formatHeadings( $text, $origText, $isMain=true ) { - global $wgMaxTocLevel, $wgHtml5, $wgExperimentalHtmlIds; + function formatHeadings( $text, $origText, $isMain = true ) { + global $wgMaxTocLevel, $wgExperimentalHtmlIds; # Inhibit editsection links if requested in the page if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { @@ -4114,7 +4218,7 @@ class Parser { # Get all headlines for numbering them and adding funky stuff like [edit] # links - this is for later, but we need the number of headlines right now $matches = array(); - $numMatches = preg_match_all( '/[1-6])(?P.*?'.'>)(?P
.*?)<\/H[1-6] *>/i', $text, $matches ); + $numMatches = preg_match_all( '/[1-6])(?P.*?' . '>)\s*(?P
[\s\S]*?)\s*<\/H[1-6] *>/i', $text, $matches ); # if there are fewer than 4 headlines in the article, do not show TOC # unless it's been explicitly enabled. @@ -4176,7 +4280,7 @@ class Parser { $serial = $markerMatches[1]; list( $titleText, $sectionIndex ) = $this->mHeadings[$serial]; $isTemplate = ( $titleText != $baseTitleText ); - $headline = preg_replace( "/^$markerRegex/", "", $headline ); + $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline ); } if ( $toclevel ) { @@ -4231,7 +4335,7 @@ class Parser { # count number of headlines for each level $sublevelCount[$toclevel]++; $dot = 0; - for( $i = 1; $i <= $toclevel; $i++ ) { + for ( $i = 1; $i <= $toclevel; $i++ ) { if ( !empty( $sublevelCount[$i] ) ) { if ( $dot ) { $numbering .= '.'; @@ -4263,20 +4367,20 @@ class Parser { # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from , # to allow setting directionality in toc items. $tocline = preg_replace( - array( '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?'.'>#', '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?'.'>#' ), + array( '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?' . '>#', '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?' . '>#' ), array( '', '<$1>' ), $safeHeadline ); $tocline = trim( $tocline ); # For the anchor, strip out HTML-y stuff period - $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline ); + $safeHeadline = preg_replace( '/<.*?' . '>/', '', $safeHeadline ); $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline ); # Save headline for section edit hint before it's escaped $headlineHint = $safeHeadline; - if ( $wgHtml5 && $wgExperimentalHtmlIds ) { + if ( $wgExperimentalHtmlIds ) { # For reverse compatibility, provide an id that's # HTML4-compatible, like we used to. # @@ -4346,7 +4450,8 @@ class Parser { # Add the section to the section tree # Find the DOM node for this header - while ( $node && !$isTemplate ) { + $noOffset = ( $isTemplate || $sectionIndex === false ); + while ( $node && !$noOffset ) { if ( $node->getName() === 'h' ) { $bits = $node->splitHeading(); if ( $bits['i'] == $sectionIndex ) { @@ -4364,7 +4469,7 @@ class Parser { 'number' => $numbering, 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex, 'fromtitle' => $titleText, - 'byteoffset' => ( $isTemplate ? null : $byteOffset ), + 'byteoffset' => ( $noOffset ? null : $byteOffset ), 'anchor' => $anchor, ); @@ -4415,6 +4520,7 @@ class Parser { } $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() ); $this->mOutput->setTOCHTML( $toc ); + $toc = self::TOC_START . $toc . self::TOC_END; } if ( $isMain ) { @@ -4422,7 +4528,7 @@ class Parser { } # split up and insert constructed headlines - $blocks = preg_split( '/.*?<\/H[1-6]>/i', $text ); + $blocks = preg_split( '/[\s\S]*?<\/H[1-6]>/i', $text ); $i = 0; // build an array of document sections @@ -4484,7 +4590,7 @@ class Parser { "\r\n" => "\n", ); $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text ); - if( $options->getPreSaveTransform() ) { + if ( $options->getPreSaveTransform() ) { $text = $this->pstPass2( $text, $user ); } $text = $this->mStripState->unstripBoth( $text ); @@ -4504,7 +4610,7 @@ class Parser { * @return string */ function pstPass2( $text, $user ) { - global $wgContLang, $wgLocaltimezone; + global $wgContLang; # Note: This is the timestamp saved as hardcoded wikitext to # the database, we use $wgContLang here in order to give @@ -4512,19 +4618,11 @@ class Parser { # than the one selected in each user's preferences. # (see also bug 12815) $ts = $this->mOptions->getTimestamp(); - if ( isset( $wgLocaltimezone ) ) { - $tz = $wgLocaltimezone; - } else { - $tz = date_default_timezone_get(); - } + $timestamp = MWTimestamp::getLocalInstance( $ts ); + $ts = $timestamp->format( 'YmdHis' ); + $tzMsg = $timestamp->format( 'T' ); # might vary on DST changeover! - $unixts = wfTimestamp( TS_UNIX, $ts ); - $oldtz = date_default_timezone_get(); - date_default_timezone_set( $tz ); - $ts = date( 'YmdHis', $unixts ); - $tzMsg = date( 'T', $unixts ); # might vary on DST changeover! - - # Allow translation of timezones through wiki. date() can return + # Allow translation of timezones through wiki. format() can return # whatever crap the system uses, localised or not, so we cannot # ship premade translations. $key = 'timezone-' . strtolower( trim( $tzMsg ) ); @@ -4533,8 +4631,6 @@ class Parser { $tzMsg = $msg->text(); } - date_default_timezone_set( $oldtz ); - $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)"; # Variable replacement @@ -4603,8 +4699,9 @@ class Parser { $username = $user->getName(); # If not given, retrieve from the user object. - if ( $nickname === false ) + if ( $nickname === false ) { $nickname = $user->getOption( 'nickname' ); + } if ( is_null( $fancySig ) ) { $fancySig = $user->getBoolOption( 'fancysig' ); @@ -4645,7 +4742,7 @@ class Parser { * @return mixed An expanded string, or false if invalid. */ function validateSig( $text ) { - return( Xml::isWellFormedXmlFragment( $text ) ? $text : false ); + return Xml::isWellFormedXmlFragment( $text ) ? $text : false; } /** @@ -4883,8 +4980,9 @@ class Parser { # Add to function cache $mw = MagicWord::get( $id ); - if ( !$mw ) + if ( !$mw ) { throw new MWException( __METHOD__ . '() expecting a magic word identifier.' ); + } $synonyms = $mw->getSynonyms(); $sensitive = intval( $mw->isCaseSensitive() ); @@ -4928,7 +5026,9 @@ class Parser { */ function setFunctionTagHook( $tag, $callback, $flags ) { $tag = strtolower( $tag ); - if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" ); + if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { + throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" ); + } $old = isset( $this->mFunctionTagHooks[$tag] ) ? $this->mFunctionTagHooks[$tag] : null; $this->mFunctionTagHooks[$tag] = array( $callback, $flags ); @@ -4979,7 +5079,20 @@ class Parser { * @return string HTML */ function renderImageGallery( $text, $params ) { - $ig = new ImageGallery(); + wfProfileIn( __METHOD__ ); + + $mode = false; + if ( isset( $params['mode'] ) ) { + $mode = $params['mode']; + } + + try { + $ig = ImageGalleryBase::factory( $mode ); + } catch ( MWException $e ) { + // If invalid type set, fallback to default. + $ig = ImageGalleryBase::factory( false ); + } + $ig->setContextTitle( $this->mTitle ); $ig->setShowBytes( false ); $ig->setShowFilename( false ); @@ -5007,6 +5120,7 @@ class Parser { if ( isset( $params['heights'] ) ) { $ig->setHeights( $params['heights'] ); } + $ig->setAdditionalOptions( $params ); wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); @@ -5030,38 +5144,81 @@ class Parser { continue; } + # We need to get what handler the file uses, to figure out parameters. + # Note, a hook can overide the file name, and chose an entirely different + # file (which potentially could be of a different type and have different handler). + $options = array(); + $descQuery = false; + wfRunHooks( 'BeforeParserFetchFileAndTitle', + array( $this, $title, &$options, &$descQuery ) ); + # Don't register it now, as ImageGallery does that later. + $file = $this->fetchFileNoRegister( $title, $options ); + $handler = $file ? $file->getHandler() : false; + + wfProfileIn( __METHOD__ . '-getMagicWord' ); + $paramMap = array( + 'img_alt' => 'gallery-internal-alt', + 'img_link' => 'gallery-internal-link', + ); + if ( $handler ) { + $paramMap = $paramMap + $handler->getParamMap(); + // We don't want people to specify per-image widths. + // Additionally the width parameter would need special casing anyhow. + unset( $paramMap['img_width'] ); + } + + $mwArray = new MagicWordArray( array_keys( $paramMap ) ); + wfProfileOut( __METHOD__ . '-getMagicWord' ); + $label = ''; $alt = ''; $link = ''; + $handlerOptions = array(); if ( isset( $matches[3] ) ) { // look for an |alt= definition while trying not to break existing // captions with multiple pipes (|) in it, until a more sensible grammar // is defined for images in galleries + // FIXME: Doing recursiveTagParse at this stage, and the trim before + // splitting on '|' is a bit odd, and different from makeImage. $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); $parameterMatches = StringUtils::explode( '|', $matches[3] ); - $magicWordAlt = MagicWord::get( 'img_alt' ); - $magicWordLink = MagicWord::get( 'img_link' ); foreach ( $parameterMatches as $parameterMatch ) { - if ( $match = $magicWordAlt->matchVariableStartToEnd( $parameterMatch ) ) { - $alt = $this->stripAltText( $match, false ); - } - elseif( $match = $magicWordLink->matchVariableStartToEnd( $parameterMatch ) ) { - $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); - $chars = self::EXT_LINK_URL_CLASS; - $prots = $this->mUrlProtocols; - //check to see if link matches an absolute url, if not then it must be a wiki link. - if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) { - $link = $linkValue; - } else { - $localLinkTitle = Title::newFromText( $linkValue ); - if ( $localLinkTitle !== null ) { - $link = $localLinkTitle->getLocalURL(); + list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch ); + if ( $magicName ) { + $paramName = $paramMap[$magicName]; + + switch ( $paramName ) { + case 'gallery-internal-alt': + $alt = $this->stripAltText( $match, false ); + break; + case 'gallery-internal-link': + $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); + $chars = self::EXT_LINK_URL_CLASS; + $prots = $this->mUrlProtocols; + //check to see if link matches an absolute url, if not then it must be a wiki link. + if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) { + $link = $linkValue; + } else { + $localLinkTitle = Title::newFromText( $linkValue ); + if ( $localLinkTitle !== null ) { + $link = $localLinkTitle->getLocalURL(); + } + } + break; + default: + // Must be a handler specific parameter. + if ( $handler->validateParam( $paramName, $match ) ) { + $handlerOptions[$paramName] = $match; + } else { + // Guess not. Append it to the caption. + wfDebug( "$parameterMatch failed parameter validation" ); + $label .= '|' . $parameterMatch; } } - } - else { + + } else { // concatenate all other pipes $label .= '|' . $parameterMatch; } @@ -5070,9 +5227,11 @@ class Parser { $label = substr( $label, 1 ); } - $ig->add( $title, $label, $alt, $link ); + $ig->add( $title, $label, $alt, $link, $handlerOptions ); } - return $ig->toHTML(); + $html = $ig->toHTML(); + wfProfileOut( __METHOD__ ); + return $html; } /** @@ -5187,14 +5346,14 @@ class Parser { # Special case; width and height come in one variable together if ( $type === 'handler' && $paramName === 'width' ) { $parsedWidthParam = $this->parseWidthParam( $value ); - if( isset( $parsedWidthParam['width'] ) ) { + if ( isset( $parsedWidthParam['width'] ) ) { $width = $parsedWidthParam['width']; if ( $handler->validateParam( 'width', $width ) ) { $params[$type]['width'] = $width; $validated = true; } } - if( isset( $parsedWidthParam['height'] ) ) { + if ( isset( $parsedWidthParam['height'] ) ) { $height = $parsedWidthParam['height']; if ( $handler->validateParam( 'height', $height ) ) { $params[$type]['height'] = $height; @@ -5208,7 +5367,7 @@ class Parser { $validated = $handler->validateParam( $paramName, $value ); } else { # Validate internal parameters - switch( $paramName ) { + switch ( $paramName ) { case 'manualthumb': case 'alt': case 'class': @@ -5646,20 +5805,41 @@ class Parser { * @return String: user name */ function getRevisionUser() { - if( is_null( $this->mRevisionUser ) ) { + if ( is_null( $this->mRevisionUser ) ) { $revObject = $this->getRevisionObject(); # if this template is subst: the revision id will be blank, # so just use the current user's name - if( $revObject ) { + if ( $revObject ) { $this->mRevisionUser = $revObject->getUserText(); - } elseif( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { + } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { $this->mRevisionUser = $this->getUser()->getName(); } } return $this->mRevisionUser; } + /** + * Get the size of the revision + * + * @return int|null revision size + */ + function getRevisionSize() { + if ( is_null( $this->mRevisionSize ) ) { + $revObject = $this->getRevisionObject(); + + # if this variable is subst: the revision id will be blank, + # so just use the parser input size, because the own substituation + # will change the size. + if ( $revObject ) { + $this->mRevisionSize = $revObject->getSize(); + } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { + $this->mRevisionSize = $this->mInputSize; + } + } + return $this->mRevisionSize; + } + /** * Mutator for $mDefaultSort * @@ -5933,7 +6113,7 @@ class Parser { */ public function parseWidthParam( $value ) { $parsedWidthParam = array(); - if( $value === '' ) { + if ( $value === '' ) { return $parsedWidthParam; } $m = array(); diff --git a/includes/parser/ParserCache.php b/includes/parser/ParserCache.php index 0faa40a8..7053f134 100644 --- a/includes/parser/ParserCache.php +++ b/includes/parser/ParserCache.php @@ -67,7 +67,7 @@ class ParserCache { // idhash seem to mean 'page id' + 'rendering hash' (r3710) $pageid = $article->getID(); - $renderkey = (int)($wgRequest->getVal( 'action' ) == 'render'); + $renderkey = (int)( $wgRequest->getVal( 'action' ) == 'render' ); $key = wfMemcKey( 'pcache', 'idhash', "{$pageid}-{$renderkey}!{$hash}" ); return $key; @@ -128,7 +128,7 @@ class ParserCache { public function getKey( $article, $popts, $useOutdated = true ) { global $wgCacheEpoch; - if( $popts instanceof User ) { + if ( $popts instanceof User ) { wfWarn( "Use of outdated prototype ParserCache::getKey( &\$article, &\$user )\n" ); $popts = ParserOptions::newFromUser( $popts ); } @@ -223,19 +223,19 @@ class ParserCache { * @param $parserOutput ParserOutput * @param $article Article * @param $popts ParserOptions + * @param $cacheTime Time when the cache was generated */ - public function save( $parserOutput, $article, $popts ) { + public function save( $parserOutput, $article, $popts, $cacheTime = null ) { $expire = $parserOutput->getCacheExpiry(); - - if( $expire > 0 ) { - $now = wfTimestampNow(); + if ( $expire > 0 ) { + $cacheTime = $cacheTime ?: wfTimestampNow(); $optionsKey = new CacheTime; $optionsKey->mUsedOptions = $parserOutput->getUsedOptions(); $optionsKey->updateCacheExpiry( $expire ); - $optionsKey->setCacheTime( $now ); - $parserOutput->setCacheTime( $now ); + $optionsKey->setCacheTime( $cacheTime ); + $parserOutput->setCacheTime( $cacheTime ); $optionsKey->setContainsOldMagic( $parserOutput->containsOldMagic() ); @@ -245,8 +245,8 @@ class ParserCache { // Save the timestamp so that we don't have to load the revision row on view $parserOutput->setTimestamp( $article->getTimestamp() ); - $parserOutput->mText .= "\n\n"; - wfDebug( "Saved in parser cache with key $parserOutputKey and timestamp $now\n" ); + $parserOutput->mText .= "\n\n"; + wfDebug( "Saved in parser cache with key $parserOutputKey and timestamp $cacheTime\n" ); // Save the parser output $this->mMemc->set( $parserOutputKey, $parserOutput, $expire ); diff --git a/includes/parser/ParserOptions.php b/includes/parser/ParserOptions.php index 3eb83e36..e12f32d8 100644 --- a/includes/parser/ParserOptions.php +++ b/includes/parser/ParserOptions.php @@ -240,6 +240,7 @@ class ParserOptions { function getExternalLinkTarget() { return $this->mExternalLinkTarget; } function getDisableContentConversion() { return $this->mDisableContentConversion; } function getDisableTitleConversion() { return $this->mDisableTitleConversion; } + /** @deprecated since 1.22 use User::getOption('math') instead */ function getMath() { $this->optionUsed( 'math' ); return $this->mMath; } function getThumbSize() { $this->optionUsed( 'thumbsize' ); @@ -280,9 +281,17 @@ class ParserOptions { } /** + * Get the user language used by the parser for this page. + * * You shouldn't use this. Really. $parser->getFunctionLang() is all you need. - * Using this fragments the cache and is discouraged. Yes, {{int: }} uses this, - * producing inconsistent tables (Bug 14404). + * + * To avoid side-effects where the page will be rendered based on the language + * of the user who last saved, this function will triger a cache fragmentation. + * Usage of this method is discouraged for that reason. + * + * When saving, this will return the default language instead of the user's. + * + * {{int: }} uses this which used to produce inconsistent link tables (bug 14404). * * @return Language object * @since 1.19 @@ -312,7 +321,7 @@ class ParserOptions { function setAllowSpecialInclusion( $x ) { return wfSetVar( $this->mAllowSpecialInclusion, $x ); } function setTidy( $x ) { return wfSetVar( $this->mTidy, $x ); } - /** @deprecated in 1.19; will be removed in 1.20 */ + /** @deprecated in 1.19 */ function setSkin( $x ) { wfDeprecated( __METHOD__, '1.19' ); } function setInterfaceMessage( $x ) { return wfSetVar( $this->mInterfaceMessage, $x ); } function setTargetLanguage( $x ) { return wfSetVar( $this->mTargetLanguage, $x, true ); } @@ -330,6 +339,7 @@ class ParserOptions { function setExternalLinkTarget( $x ) { return wfSetVar( $this->mExternalLinkTarget, $x ); } function disableContentConversion( $x = true ) { return wfSetVar( $this->mDisableContentConversion, $x ); } function disableTitleConversion( $x = true ) { return wfSetVar( $this->mDisableTitleConversion, $x ); } + /** @deprecated since 1.22 */ function setMath( $x ) { return wfSetVar( $this->mMath, $x ); } function setUserLang( $x ) { if ( is_string( $x ) ) { @@ -538,7 +548,7 @@ class ParserOptions { // add in language specific options, if any // @todo FIXME: This is just a way of retrieving the url/user preferred variant - if( !is_null( $title ) ) { + if ( !is_null( $title ) ) { $confstr .= $title->getPageLanguage()->getExtraHashOptions(); } else { global $wgContLang; @@ -557,8 +567,9 @@ class ParserOptions { $confstr .= '!printable=1'; } - if ( $this->mExtraKey != '' ) + if ( $this->mExtraKey != '' ) { $confstr .= $this->mExtraKey; + } // Give a chance for extensions to modify the hash, if they have // extra options or other effects on the parser cache. diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index db649f11..502f0fd1 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -47,11 +47,14 @@ class ParserOutput extends CacheTime { $mEditSectionTokens = false, # prefix/suffix markers if edit sections were output as tokens $mProperties = array(), # Name/value pairs to be cached in the DB $mTOCHTML = '', # HTML of the TOC - $mTimestamp; # Timestamp of the revision + $mTimestamp, # Timestamp of the revision + $mTOCEnabled = true; # Whether TOC should be shown, can't override __NOTOC__ private $mIndexPolicy = ''; # 'index' or 'noindex'? Any other value will result in no change. private $mAccessedOptions = array(); # List of ParserOptions (stored in the keys) private $mSecondaryDataUpdates = array(); # List of DataUpdate, used to save info from the page somewhere else. private $mExtensionData = array(); # extra data used by extensions + private $mLimitReportData = array(); # Parser limit report data + private $mParseStartTime = array(); # Timestamps for getTimeSinceStart() const EDITSECTION_REGEX = '#<(?:mw:)?editsection page="(.*?)" section="(.*?)"(?:/>|>(.*?)())#'; @@ -66,11 +69,27 @@ class ParserOutput extends CacheTime { } function getText() { + wfProfileIn( __METHOD__ ); + $text = $this->mText; if ( $this->mEditSectionTokens ) { - return preg_replace_callback( ParserOutput::EDITSECTION_REGEX, - array( &$this, 'replaceEditSectionLinksCallback' ), $this->mText ); + $text = preg_replace_callback( ParserOutput::EDITSECTION_REGEX, + array( &$this, 'replaceEditSectionLinksCallback' ), $text ); + } else { + $text = preg_replace( ParserOutput::EDITSECTION_REGEX, '', $text ); + } + + // If you have an old cached version of this class - sorry, you can't disable the TOC + if ( isset( $this->mTOCEnabled ) && $this->mTOCEnabled ) { + $text = str_replace( array( Parser::TOC_START, Parser::TOC_END ), '', $text ); + } else { + $text = preg_replace( + '#'. preg_quote( Parser::TOC_START ) . '.*?' . preg_quote( Parser::TOC_END ) . '#s', + '', + $text + ); } - return preg_replace( ParserOutput::EDITSECTION_REGEX, '', $this->mText ); + wfProfileOut( __METHOD__ ); + return $text; } /** @@ -120,6 +139,8 @@ class ParserOutput extends CacheTime { function getIndexPolicy() { return $this->mIndexPolicy; } function getTOCHTML() { return $this->mTOCHTML; } function getTimestamp() { return $this->mTimestamp; } + function getLimitReportData() { return $this->mLimitReportData; } + function getTOCEnabled() { return $this->mTOCEnabled; } function setText( $text ) { return wfSetVar( $this->mText, $text ); } function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); } @@ -131,6 +152,7 @@ class ParserOutput extends CacheTime { function setIndexPolicy( $policy ) { return wfSetVar( $this->mIndexPolicy, $policy ); } function setTOCHTML( $tochtml ) { return wfSetVar( $this->mTOCHTML, $tochtml ); } function setTimestamp( $timestamp ) { return wfSetVar( $this->mTimestamp, $timestamp ); } + function setTOCEnabled( $flag ) { return wfSetVar( $this->mTOCEnabled, $flag ); } function addCategory( $c, $sort ) { $this->mCategories[$c] = $sort; } function addLanguageLink( $t ) { $this->mLanguageLinks[] = $t; } @@ -143,10 +165,10 @@ class ParserOutput extends CacheTime { function setNewSection( $value ) { $this->mNewSection = (bool)$value; } - function hideNewSection ( $value ) { + function hideNewSection( $value ) { $this->mHideNewSection = (bool)$value; } - function getHideNewSection () { + function getHideNewSection() { return (bool)$this->mHideNewSection; } function getNewSection() { @@ -176,10 +198,10 @@ class ParserOutput extends CacheTime { global $wgServer, $wgRegisterInternalExternals; $registerExternalLink = true; - if( !$wgRegisterInternalExternals ) { + if ( !$wgRegisterInternalExternals ) { $registerExternalLink = !self::isLinkInternal( $wgServer, $url ); } - if( $registerExternalLink ) { + if ( $registerExternalLink ) { $this->mExternalLinks[$url] = 1; } } @@ -201,11 +223,11 @@ class ParserOutput extends CacheTime { if ( $ns == NS_MEDIA ) { // Normalize this pseudo-alias if it makes it down here... $ns = NS_FILE; - } elseif( $ns == NS_SPECIAL ) { + } elseif ( $ns == NS_SPECIAL ) { // We don't record Special: links currently // It might actually be wise to, but we'd need to do some normalization. return; - } elseif( $dbk === '' ) { + } elseif ( $dbk === '' ) { // Don't record self links - [[#Foo]] return; } @@ -258,7 +280,7 @@ class ParserOutput extends CacheTime { */ function addInterwikiLink( $title ) { $prefix = $title->getInterwiki(); - if( $prefix == '' ) { + if ( $prefix == '' ) { throw new MWException( 'Non-interwiki link passed, internal parser error.' ); } if ( !isset( $this->mInterwikiLinks[$prefix] ) ) { @@ -281,7 +303,7 @@ class ParserOutput extends CacheTime { } public function addModules( $modules ) { - $this->mModules = array_merge( $this->mModules, (array) $modules ); + $this->mModules = array_merge( $this->mModules, (array)$modules ); } public function addModuleScripts( $modules ) { @@ -329,7 +351,7 @@ class ParserOutput extends CacheTime { */ public function getDisplayTitle() { $t = $this->getTitleText(); - if( $t === '' ) { + if ( $t === '' ) { return false; } return $t; @@ -544,4 +566,67 @@ class ParserOutput extends CacheTime { return null; } + private static function getTimes( $clock = null ) { + $ret = array(); + if ( !$clock || $clock === 'wall' ) { + $ret['wall'] = microtime( true ); + } + if ( ( !$clock || $clock === 'cpu' ) && function_exists( 'getrusage' ) ) { + $ru = getrusage(); + $ret['cpu'] = $ru['ru_utime.tv_sec'] + $ru['ru_utime.tv_usec'] / 1e6; + $ret['cpu'] += $ru['ru_stime.tv_sec'] + $ru['ru_stime.tv_usec'] / 1e6; + } + return $ret; + } + + /** + * Resets the parse start timestamps for future calls to getTimeSinceStart() + * @since 1.22 + */ + function resetParseStartTime() { + $this->mParseStartTime = self::getTimes(); + } + + /** + * Returns the time since resetParseStartTime() was last called + * + * Clocks available are: + * - wall: Wall clock time + * - cpu: CPU time (requires getrusage) + * + * @since 1.22 + * @param string $clock + * @return float|null + */ + function getTimeSinceStart( $clock ) { + if ( !isset( $this->mParseStartTime[$clock] ) ) { + return null; + } + + $end = self::getTimes( $clock ); + return $end[$clock] - $this->mParseStartTime[$clock]; + } + + /** + * Sets parser limit report data for a key + * + * The key is used as the prefix for various messages used for formatting: + * - $key: The label for the field in the limit report + * - $key-value-text: Message used to format the value in the "NewPP limit + * report" HTML comment. If missing, uses $key-format. + * - $key-value-html: Message used to format the value in the preview + * limit report table. If missing, uses $key-format. + * - $key-value: Message used to format the value. If missing, uses "$1". + * + * Note that all values are interpreted as wikitext, and so should be + * encoded with htmlspecialchars() as necessary, but should avoid complex + * HTML for sanity of display in the "NewPP limit report" comment. + * + * @since 1.22 + * @param string $key Message key + * @param mixed $value Appropriate for Message::params() + */ + function setLimitReportData( $key, $value ) { + $this->mLimitReportData[$key] = $value; + } } diff --git a/includes/parser/Parser_DiffTest.php b/includes/parser/Parser_DiffTest.php index f25340fa..aeae234a 100644 --- a/includes/parser/Parser_DiffTest.php +++ b/includes/parser/Parser_DiffTest.php @@ -122,7 +122,7 @@ class Parser_DiffTest function setFunctionHook( $id, $callback, $flags = 0 ) { $this->init(); - foreach ( $this->parsers as $parser ) { + foreach ( $this->parsers as $parser ) { $parser->setFunctionHook( $id, $callback, $flags ); } } diff --git a/includes/parser/Parser_LinkHooks.php b/includes/parser/Parser_LinkHooks.php deleted file mode 100644 index b2cdc41a..00000000 --- a/includes/parser/Parser_LinkHooks.php +++ /dev/null @@ -1,326 +0,0 @@ -"\\x00-\\x20\\x7F]'; - const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+) - \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx'; - - /**#@+ - * @private - */ - # Persistent: - var $mLinkHooks; - - /**#@-*/ - - /** - * Constructor - */ - public function __construct( $conf = array() ) { - parent::__construct( $conf ); - $this->mLinkHooks = array(); - } - - /** - * Do various kinds of initialisation on the first call of the parser - */ - function firstCallInit() { - parent::__construct(); - if ( !$this->mFirstCall ) { - return; - } - $this->mFirstCall = false; - - wfProfileIn( __METHOD__ ); - - $this->setHook( 'pre', array( $this, 'renderPreTag' ) ); - CoreParserFunctions::register( $this ); - CoreLinkFunctions::register( $this ); - $this->initialiseVariables(); - - wfRunHooks( 'ParserFirstCallInit', array( &$this ) ); - wfProfileOut( __METHOD__ ); - } - - /** - * Create a link hook, e.g. [[Namepsace:...|display}} - * The callback function should have the form: - * function myLinkCallback( $parser, $holders, $markers, - * Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { ... } - * - * Or with SLH_PATTERN: - * function myLinkCallback( $parser, $holders, $markers, ) - * &$titleText, &$sortText = null, &$leadingColon = false ) { ... } - * - * The callback may either return a number of different possible values: - * String) Text result of the link - * True) (Treat as link) Parse the link according to normal link rules - * False) (Bad link) Just output the raw wikitext (You may modify the text first) - * - * @param $ns Integer or String: the Namespace ID or regex pattern if SLH_PATTERN is set - * @param $callback Mixed: the callback function (and object) to use - * @param $flags Integer: a combination of the following flags: - * SLH_PATTERN Use a regex link pattern rather than a namespace - * - * @throws MWException - * @return callback|null The old callback function for this name, if any - */ - public function setLinkHook( $ns, $callback, $flags = 0 ) { - if( $flags & SLH_PATTERN && !is_string($ns) ) - throw new MWException( __METHOD__ . '() expecting a regex string pattern.' ); - elseif( $flags | ~SLH_PATTERN && !is_int( $ns ) ) - throw new MWException( __METHOD__ . '() expecting a namespace index.' ); - $oldVal = isset( $this->mLinkHooks[$ns] ) ? $this->mLinkHooks[$ns][0] : null; - $this->mLinkHooks[$ns] = array( $callback, $flags ); - return $oldVal; - } - - /** - * Get all registered link hook identifiers - * - * @return array - */ - function getLinkHooks() { - return array_keys( $this->mLinkHooks ); - } - - /** - * Process [[ ]] wikilinks - * @param $s - * @throws MWException - * @return LinkHolderArray - * - * @private - */ - function replaceInternalLinks2( &$s ) { - wfProfileIn( __METHOD__ ); - - wfProfileIn( __METHOD__ . '-setup' ); - static $tc = false, $titleRegex; //$e1, $e1_img; - if( !$tc ) { - # the % is needed to support urlencoded titles as well - $tc = Title::legalChars() . '#%'; - # Match a link having the form [[namespace:link|alternate]]trail - //$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; - # Match cases where there is no "]]", which might still be images - //$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; - # Match a valid plain title - $titleRegex = "/^([{$tc}]+)$/sD"; - } - - $holders = new LinkHolderArray( $this ); - - if( is_null( $this->mTitle ) ) { - wfProfileOut( __METHOD__ . '-setup' ); - wfProfileOut( __METHOD__ ); - throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" ); - } - - wfProfileOut( __METHOD__ . '-setup' ); - - $offset = 0; - $offsetStack = array(); - $markers = new LinkMarkerReplacer( $this, $holders, array( &$this, 'replaceInternalLinksCallback' ) ); - while( true ) { - $startBracketOffset = strpos( $s, '[[', $offset ); - $endBracketOffset = strpos( $s, ']]', $offset ); - # Finish when there are no more brackets - if( $startBracketOffset === false && $endBracketOffset === false ) break; - # Determine if the bracket is a starting or ending bracket - # When we find both, use the first one - elseif( $startBracketOffset !== false && $endBracketOffset !== false ) - $isStart = $startBracketOffset <= $endBracketOffset; - # When we only found one, check which it is - else $isStart = $startBracketOffset !== false; - $bracketOffset = $isStart ? $startBracketOffset : $endBracketOffset; - if( $isStart ) { - /** Opening bracket **/ - # Just push our current offset in the string onto the stack - $offsetStack[] = $startBracketOffset; - } else { - /** Closing bracket **/ - # Pop the start pos for our current link zone off the stack - $startBracketOffset = array_pop( $offsetStack ); - # Just to clean up the code, lets place offsets on the outer ends - $endBracketOffset += 2; - - # Only do logic if we actually have a opening bracket for this - if( isset( $startBracketOffset ) ) { - # Extract text inside the link - @list( $titleText, $paramText ) = explode( '|', - substr( $s, $startBracketOffset + 2, $endBracketOffset - $startBracketOffset - 4 ), 2 ); - # Create markers only for valid links - if( preg_match( $titleRegex, $titleText ) ) { - # Store the text for the marker - $marker = $markers->addMarker( $titleText, $paramText ); - # Replace the current link with the marker - $s = substr( $s, 0, $startBracketOffset ) . - $marker . - substr( $s, $endBracketOffset ); - # We have modified $s, because of this we need to set the - # offset manually since the end position is different now - $offset = $startBracketOffset+strlen( $marker ); - continue; - } - # ToDo: Some LinkHooks may allow recursive links inside of - # the link text, create a regex that also matches our - # sequence in titles - # ToDO: Some LinkHooks use patterns rather than namespaces - # these need to be tested at this point here - } - } - # Bump our offset to after our current bracket - $offset = $bracketOffset+2; - } - - # Now expand our tree - wfProfileIn( __METHOD__ . '-expand' ); - $s = $markers->expand( $s ); - wfProfileOut( __METHOD__ . '-expand' ); - - wfProfileOut( __METHOD__ ); - return $holders; - } - - function replaceInternalLinksCallback( $parser, $holders, $markers, $titleText, $paramText ) { - wfProfileIn( __METHOD__ ); - $wt = isset( $paramText ) ? "[[$titleText|$paramText]]" : "[[$titleText]]"; - wfProfileIn( __METHOD__ . "-misc" ); - - # Don't allow internal links to pages containing - # PROTO: where PROTO is a valid URL protocol; these - # should be external links. - if( preg_match( '/^\b(?i:' . wfUrlProtocols() . ')/', $titleText ) ) { - wfProfileOut( __METHOD__ . "-misc" ); - wfProfileOut( __METHOD__ ); - return $wt; - } - - # Make subpage if necessary - if( $this->areSubpagesAllowed() ) { - $titleText = $this->maybeDoSubpageLink( $titleText, $paramText ); - } - - # Check for a leading colon and strip it if it is there - $leadingColon = $titleText[0] == ':'; - if( $leadingColon ) $titleText = substr( $titleText, 1 ); - - wfProfileOut( __METHOD__ . "-misc" ); - # Make title object - wfProfileIn( __METHOD__ . "-title" ); - $title = Title::newFromText( $this->mStripState->unstripNoWiki( $titleText ) ); - if( !$title ) { - wfProfileOut( __METHOD__ . "-title" ); - wfProfileOut( __METHOD__ ); - return $wt; - } - $ns = $title->getNamespace(); - wfProfileOut( __METHOD__ . "-title" ); - - # Default for Namespaces is a default link - # ToDo: Default for patterns is plain wikitext - $return = true; - if( isset( $this->mLinkHooks[$ns] ) ) { - list( $callback, $flags ) = $this->mLinkHooks[$ns]; - if( $flags & SLH_PATTERN ) { - $args = array( $parser, $holders, $markers, $titleText, &$paramText, &$leadingColon ); - } else { - $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon ); - } - # Workaround for PHP bug 35229 and similar - if ( !is_callable( $callback ) ) { - throw new MWException( "Tag hook for namespace $ns is not callable\n" ); - } - $return = call_user_func_array( $callback, $args ); - } - if( $return === true ) { - # True (treat as plain link) was returned, call the defaultLinkHook - $return = CoreLinkFunctions::defaultLinkHook( $parser, $holders, $markers, $title, - $titleText, $paramText, $leadingColon ); - } - if( $return === false ) { - # False (no link) was returned, output plain wikitext - # Build it again as the hook is allowed to modify $paramText - $return = isset( $paramText ) ? "[[$titleText|$paramText]]" : "[[$titleText]]"; - } - # Content was returned, return it - wfProfileOut( __METHOD__ ); - return $return; - } - -} - -class LinkMarkerReplacer { - - protected $markers, $nextId, $parser, $holders, $callback; - - function __construct( $parser, $holders, $callback ) { - $this->nextId = 0; - $this->markers = array(); - $this->parser = $parser; - $this->holders = $holders; - $this->callback = $callback; - } - - function addMarker( $titleText, $paramText ) { - $id = $this->nextId++; - $this->markers[$id] = array( $titleText, $paramText ); - return ""; - } - - function findMarker( $string ) { - return (bool) preg_match( '//', $string ); - } - - function expand( $string ) { - return StringUtils::delimiterReplaceCallback( "", array( &$this, 'callback' ), $string ); - } - - function callback( $m ) { - $id = intval( $m[1] ); - if( !array_key_exists( $id, $this->markers ) ) return $m[0]; - $args = $this->markers[$id]; - array_unshift( $args, $this ); - array_unshift( $args, $this->holders ); - array_unshift( $args, $this->parser ); - return call_user_func_array( $this->callback, $args ); - } -} diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index d0c57ab5..3138f483 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -72,9 +72,8 @@ class Preprocessor_DOM implements Preprocessor { $xml = ""; foreach ( $values as $k => $val ) { - if ( is_int( $k ) ) { - $xml .= "" . htmlspecialchars( $val ) .""; + $xml .= "" . htmlspecialchars( $val ) . ""; } else { $xml .= "" . htmlspecialchars( $k ) . "=" . htmlspecialchars( $val ) . ""; } @@ -149,26 +148,28 @@ class Preprocessor_DOM implements Preprocessor { wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" ); } } - } - if ( $xml === false ) { - if ( $cacheable ) { + if ( $xml === false ) { wfProfileIn( __METHOD__ . '-cache-miss' ); $xml = $this->preprocessToXml( $text, $flags ); $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml; $wgMemc->set( $cacheKey, $cacheValue, 86400 ); wfProfileOut( __METHOD__ . '-cache-miss' ); wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" ); - } else { - $xml = $this->preprocessToXml( $text, $flags ); } - + } else { + $xml = $this->preprocessToXml( $text, $flags ); } + // Fail if the number of elements exceeds acceptable limits // Do not attempt to generate the DOM $this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' ); $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount(); if ( $this->parser->mGeneratedPPNodeCount > $max ) { + if ( $cacheable ) { + wfProfileOut( __METHOD__ . '-cacheable' ); + } + wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__ . ': generated node count limit exceeded' ); } @@ -182,16 +183,21 @@ class Preprocessor_DOM implements Preprocessor { $xml = UtfNormal::cleanUp( $xml ); // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep $result = $dom->loadXML( $xml, 1 << 19 ); - if ( !$result ) { - throw new MWException( __METHOD__ . ' generated invalid XML' ); - } } - $obj = new PPNode_DOM( $dom->documentElement ); + if ( $result ) { + $obj = new PPNode_DOM( $dom->documentElement ); + } wfProfileOut( __METHOD__ . '-loadXML' ); + if ( $cacheable ) { wfProfileOut( __METHOD__ . '-cacheable' ); } + wfProfileOut( __METHOD__ ); + + if ( !$result ) { + throw new MWException( __METHOD__ . ' generated invalid XML' ); + } return $obj; } @@ -355,9 +361,11 @@ class Preprocessor_DOM implements Preprocessor { } // Handle comments if ( isset( $matches[2] ) && $matches[2] == '!--' ) { - // To avoid leaving blank lines, when a comment is both preceded - // and followed by a newline (ignoring spaces), trim leading and - // trailing spaces and one of the newlines. + + // To avoid leaving blank lines, when a sequence of + // space-separated comments is both preceded and followed by + // a newline (ignoring spaces), then + // trim leading and trailing spaces and the trailing newline. // Find the end $endPos = strpos( $text, '-->', $i + 4 ); @@ -368,10 +376,25 @@ class Preprocessor_DOM implements Preprocessor { $i = $lengthText; } else { // Search backwards for leading whitespace - $wsStart = $i ? ( $i - strspn( $revText, ' ', $lengthText - $i ) ) : 0; + $wsStart = $i ? ( $i - strspn( $revText, " \t", $lengthText - $i ) ) : 0; + // Search forwards for trailing whitespace // $wsEnd will be the position of the last space (or the '>' if there's none) - $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 ); + $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 ); + + // Keep looking forward as long as we're finding more + // comments. + $comments = array( array( $wsStart, $wsEnd ) ); + while ( substr( $text, $wsEnd + 1, 4 ) == '', $wsEnd + 4 ); + if ( $c === false ) { + break; + } + $c = $c + 2 + strspn( $text, " \t", $c + 3 ); + $comments[] = array( $wsEnd + 1, $c ); + $wsEnd = $c; + } + // Eat the line if possible // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but @@ -379,14 +402,26 @@ class Preprocessor_DOM implements Preprocessor { if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n" && substr( $text, $wsEnd + 1, 1 ) == "\n" ) { - $startPos = $wsStart; - $endPos = $wsEnd + 1; // Remove leading whitespace from the end of the accumulator // Sanity check first though $wsLength = $i - $wsStart; - if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) { + if ( $wsLength > 0 + && strspn( $accum, " \t", -$wsLength ) === $wsLength ) + { $accum = substr( $accum, 0, -$wsLength ); } + + // Dump all but the last comment to the accumulator + foreach ( $comments as $j => $com ) { + $startPos = $com[0]; + $endPos = $com[1] + 1; + if ( $j == ( count( $comments ) - 1 ) ) { + break; + } + $inner = substr( $text, $startPos, $endPos - $startPos ); + $accum .= '' . htmlspecialchars( $inner ) . ''; + } + // Do a line-start run next time to look for headings after the comment $fakeLineStart = true; } else { @@ -397,7 +432,7 @@ class Preprocessor_DOM implements Preprocessor { if ( $stack->top ) { $part = $stack->top->getCurrentPart(); - if ( !(isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) { + if ( !( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) ) { $part->visualEnd = $wsStart; } // Else comments abutting, no change in visual end @@ -432,7 +467,7 @@ class Preprocessor_DOM implements Preprocessor { } $tagStartPos = $i; - if ( $text[$tagEndPos-1] == '/' ) { + if ( $text[$tagEndPos - 1] == '/' ) { $attrEnd = $tagEndPos - 1; $inner = null; $i = $tagEndPos + 1; @@ -569,7 +604,7 @@ class Preprocessor_DOM implements Preprocessor { 'open' => $curChar, 'close' => $rule['end'], 'count' => $count, - 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), + 'lineStart' => ( $i > 0 && $text[$i - 1] == "\n" ), ); $stack->push( $piece ); @@ -746,7 +781,7 @@ class PPDStack { $class = $this->elementClass; $this->stack[] = new $class( $data ); } - $this->top = $this->stack[ count( $this->stack ) - 1 ]; + $this->top = $this->stack[count( $this->stack ) - 1]; $this->accum =& $this->top->getAccum(); } @@ -757,7 +792,7 @@ class PPDStack { $temp = array_pop( $this->stack ); if ( count( $this->stack ) ) { - $this->top = $this->stack[ count( $this->stack ) - 1 ]; + $this->top = $this->stack[count( $this->stack ) - 1]; $this->accum =& $this->top->getAccum(); } else { $this->top = self::$false; @@ -1014,11 +1049,13 @@ class PPFrame_DOM implements PPFrame { while ( count( $iteratorStack ) > 1 ) { $level = count( $outStack ) - 1; - $iteratorNode =& $iteratorStack[ $level ]; + $iteratorNode =& $iteratorStack[$level]; $out =& $outStack[$level]; $index =& $indexStack[$level]; - if ( $iteratorNode instanceof PPNode_DOM ) $iteratorNode = $iteratorNode->node; + if ( $iteratorNode instanceof PPNode_DOM ) { + $iteratorNode = $iteratorNode->node; + } if ( is_array( $iteratorNode ) ) { if ( $index >= count( $iteratorNode ) ) { @@ -1148,9 +1185,7 @@ class PPFrame_DOM implements PPFrame { # Insert a heading marker only for children of # This is to stop extractSections from going over multiple tree levels - if ( $contextNode->parentNode->nodeName == 'root' - && $this->parser->ot['html'] ) - { + if ( $contextNode->parentNode->nodeName == 'root' && $this->parser->ot['html'] ) { # Insert heading index marker $headingIndex = $contextNode->getAttribute( 'i' ); $titleText = $this->title->getPrefixedDBkey(); @@ -1206,7 +1241,9 @@ class PPFrame_DOM implements PPFrame { $first = true; $s = ''; foreach ( $args as $root ) { - if ( $root instanceof PPNode_DOM ) $root = $root->node; + if ( $root instanceof PPNode_DOM ) { + $root = $root->node; + } if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { $root = array( $root ); } diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php index fad1adbb..2fc5e118 100644 --- a/includes/parser/Preprocessor_Hash.php +++ b/includes/parser/Preprocessor_Hash.php @@ -287,9 +287,11 @@ class Preprocessor_Hash implements Preprocessor { } // Handle comments if ( isset( $matches[2] ) && $matches[2] == '!--' ) { - // To avoid leaving blank lines, when a comment is both preceded - // and followed by a newline (ignoring spaces), trim leading and - // trailing spaces and one of the newlines. + + // To avoid leaving blank lines, when a sequence of + // space-separated comments is both preceded and followed by + // a newline (ignoring spaces), then + // trim leading and trailing spaces and the trailing newline. // Find the end $endPos = strpos( $text, '-->', $i + 4 ); @@ -300,10 +302,25 @@ class Preprocessor_Hash implements Preprocessor { $i = $lengthText; } else { // Search backwards for leading whitespace - $wsStart = $i ? ( $i - strspn( $revText, ' ', $lengthText - $i ) ) : 0; + $wsStart = $i ? ( $i - strspn( $revText, " \t", $lengthText - $i ) ) : 0; + // Search forwards for trailing whitespace // $wsEnd will be the position of the last space (or the '>' if there's none) - $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 ); + $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 ); + + // Keep looking forward as long as we're finding more + // comments. + $comments = array( array( $wsStart, $wsEnd ) ); + while ( substr( $text, $wsEnd + 1, 4 ) == '', $wsEnd + 4 ); + if ( $c === false ) { + break; + } + $c = $c + 2 + strspn( $text, " \t", $c + 3 ); + $comments[] = array( $wsEnd + 1, $c ); + $wsEnd = $c; + } + // Eat the line if possible // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but @@ -311,17 +328,27 @@ class Preprocessor_Hash implements Preprocessor { if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n" && substr( $text, $wsEnd + 1, 1 ) == "\n" ) { - $startPos = $wsStart; - $endPos = $wsEnd + 1; // Remove leading whitespace from the end of the accumulator // Sanity check first though $wsLength = $i - $wsStart; if ( $wsLength > 0 && $accum->lastNode instanceof PPNode_Hash_Text - && substr( $accum->lastNode->value, -$wsLength ) === str_repeat( ' ', $wsLength ) ) + && strspn( $accum->lastNode->value, " \t", -$wsLength ) === $wsLength ) { $accum->lastNode->value = substr( $accum->lastNode->value, 0, -$wsLength ); } + + // Dump all but the last comment to the accumulator + foreach ( $comments as $j => $com ) { + $startPos = $com[0]; + $endPos = $com[1] + 1; + if ( $j == ( count( $comments ) - 1 ) ) { + break; + } + $inner = substr( $text, $startPos, $endPos - $startPos ); + $accum->addNodeWithText( 'comment', $inner ); + } + // Do a line-start run next time to look for headings after the comment $fakeLineStart = true; } else { @@ -332,7 +359,7 @@ class Preprocessor_Hash implements Preprocessor { if ( $stack->top ) { $part = $stack->top->getCurrentPart(); - if ( !(isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) { + if ( !( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) ) { $part->visualEnd = $wsStart; } // Else comments abutting, no change in visual end @@ -367,7 +394,7 @@ class Preprocessor_Hash implements Preprocessor { } $tagStartPos = $i; - if ( $text[$tagEndPos-1] == '/' ) { + if ( $text[$tagEndPos - 1] == '/' ) { // Short end tag $attrEnd = $tagEndPos - 1; $inner = null; @@ -515,7 +542,7 @@ class Preprocessor_Hash implements Preprocessor { 'open' => $curChar, 'close' => $rule['end'], 'count' => $count, - 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), + 'lineStart' => ( $i > 0 && $text[$i - 1] == "\n" ), ); $stack->push( $piece ); @@ -591,9 +618,19 @@ class Preprocessor_Hash implements Preprocessor { $lastNode = $node; } if ( !$node ) { + if ( $cacheable ) { + wfProfileOut( __METHOD__ . '-cache-miss' ); + wfProfileOut( __METHOD__ . '-cacheable' ); + } + wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__ . ': eqpos not found' ); } if ( $node->name !== 'equals' ) { + if ( $cacheable ) { + wfProfileOut( __METHOD__ . '-cache-miss' ); + wfProfileOut( __METHOD__ . '-cacheable' ); + } + wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__ . ': eqpos is not equals' ); } $equalsNode = $node; @@ -952,7 +989,7 @@ class PPFrame_Hash implements PPFrame { while ( count( $iteratorStack ) > 1 ) { $level = count( $outStack ) - 1; - $iteratorNode =& $iteratorStack[ $level ]; + $iteratorNode =& $iteratorStack[$level]; $out =& $outStack[$level]; $index =& $indexStack[$level]; diff --git a/includes/parser/Tidy.php b/includes/parser/Tidy.php index 0f7e0d31..32b16aaf 100644 --- a/includes/parser/Tidy.php +++ b/includes/parser/Tidy.php @@ -61,7 +61,10 @@ class MWTidyWrapper { // Replace elements with placeholders $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX, - array( &$this, 'replaceEditSectionLinksCallback' ), $text ); + array( &$this, 'replaceCallback' ), $text ); + // ...and markers + $wrappedtext = preg_replace_callback( '/\<\\/?mw:toc\>/', + array( &$this, 'replaceCallback' ), $wrappedtext ); // Modify inline Microdata and elements so they say and so // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config @@ -80,7 +83,7 @@ class MWTidyWrapper { * * @return string */ - function replaceEditSectionLinksCallback( $m ) { + function replaceCallback( $m ) { $marker = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX; $this->mMarkerIndex++; $this->mTokens->setPair( $marker, $m[0] ); @@ -158,7 +161,7 @@ class MWTidy { global $wgTidyInternal; $retval = 0; - if( $wgTidyInternal ) { + if ( $wgTidyInternal ) { $errorStr = self::execInternalTidy( $text, true, $retval ); } else { $errorStr = self::execExternalTidy( $text, true, $retval ); @@ -244,7 +247,7 @@ class MWTidy { global $wgTidyConf, $wgDebugTidy; wfProfileIn( __METHOD__ ); - if ( !MWInit::classExists( 'tidy' ) ) { + if ( !class_exists( 'tidy' ) ) { wfWarn( "Unable to load internal tidy class." ); $retval = -1; -- cgit v1.2.3-54-g00ecf