diff options
Diffstat (limited to 'includes/parser')
-rw-r--r-- | includes/parser/CoreLinkFunctions.php | 30 | ||||
-rw-r--r-- | includes/parser/CoreParserFunctions.php | 173 | ||||
-rw-r--r-- | includes/parser/CoreTagHooks.php | 65 | ||||
-rw-r--r-- | includes/parser/DateFormatter.php | 8 | ||||
-rw-r--r-- | includes/parser/LinkHolderArray.php | 252 | ||||
-rw-r--r-- | includes/parser/Parser.php | 1224 | ||||
-rw-r--r-- | includes/parser/ParserCache.php | 44 | ||||
-rw-r--r-- | includes/parser/ParserOptions.php | 192 | ||||
-rw-r--r-- | includes/parser/ParserOutput.php | 133 | ||||
-rw-r--r-- | includes/parser/Parser_DiffTest.php | 4 | ||||
-rw-r--r-- | includes/parser/Parser_LinkHooks.php | 21 | ||||
-rw-r--r-- | includes/parser/Preprocessor.php | 48 | ||||
-rw-r--r-- | includes/parser/Preprocessor_DOM.php | 249 | ||||
-rw-r--r-- | includes/parser/Preprocessor_Hash.php | 214 | ||||
-rw-r--r-- | includes/parser/Preprocessor_HipHop.hphp | 1941 | ||||
-rw-r--r-- | includes/parser/StripState.php | 175 | ||||
-rw-r--r-- | includes/parser/Tidy.php | 163 |
17 files changed, 4012 insertions, 924 deletions
diff --git a/includes/parser/CoreLinkFunctions.php b/includes/parser/CoreLinkFunctions.php index 913ec22b..8de13278 100644 --- a/includes/parser/CoreLinkFunctions.php +++ b/includes/parser/CoreLinkFunctions.php @@ -10,11 +10,25 @@ * @ingroup Parser */ class CoreLinkFunctions { + /** + * @param $parser Parser_LinkHooks + * @return bool + */ static function register( $parser ) { $parser->setLinkHook( NS_CATEGORY, array( __CLASS__, 'categoryLinkHook' ) ); return true; } + /** + * @param $parser Parser + * @param $holders LinkHolderArray + * @param $markers LinkMarkerReplacer + * @param Title $title + * @param $titleText + * @param null $displayText + * @param bool $leadingColon + * @return bool + */ static function defaultLinkHook( $parser, $holders, $markers, Title $title, $titleText, &$displayText = null, &$leadingColon = false ) { if( isset($displayText) && $markers->findMarker( $displayText ) ) { @@ -25,9 +39,19 @@ class CoreLinkFunctions { # Return false so that this link is reverted back to WikiText return false; } - return $holders->makeHolder( $title, isset($displayText) ? $displayText : $titleText, '', '', '' ); + return $holders->makeHolder( $title, isset($displayText) ? $displayText : $titleText, array(), '', '' ); } - + + /** + * @param $parser Parser + * @param $holders LinkHolderArray + * @param $markers LinkMarkerReplacer + * @param Title $title + * @param $titleText + * @param null $sortText + * @param bool $leadingColon + * @return bool|string + */ static function categoryLinkHook( $parser, $holders, $markers, Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { global $wgContLang; @@ -48,5 +72,5 @@ class CoreLinkFunctions { $parser->mOutput->addCategory( $title->getDBkey(), $sortText ); return ''; } - + } diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php index 94949221..eebed44c 100644 --- a/includes/parser/CoreParserFunctions.php +++ b/includes/parser/CoreParserFunctions.php @@ -10,6 +10,10 @@ * @ingroup Parser */ class CoreParserFunctions { + /** + * @param $parser Parser + * @return void + */ static function register( $parser ) { global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions; @@ -31,6 +35,8 @@ class CoreParserFunctions { $parser->setFunctionHook( 'localurle', array( __CLASS__, 'localurle' ), SFH_NO_HASH ); $parser->setFunctionHook( 'fullurl', array( __CLASS__, 'fullurl' ), SFH_NO_HASH ); $parser->setFunctionHook( 'fullurle', array( __CLASS__, 'fullurle' ), SFH_NO_HASH ); + $parser->setFunctionHook( 'canonicalurl', array( __CLASS__, 'canonicalurl' ), SFH_NO_HASH ); + $parser->setFunctionHook( 'canonicalurle', array( __CLASS__, 'canonicalurle' ), SFH_NO_HASH ); $parser->setFunctionHook( 'formatnum', array( __CLASS__, 'formatnum' ), SFH_NO_HASH ); $parser->setFunctionHook( 'grammar', array( __CLASS__, 'grammar' ), SFH_NO_HASH ); $parser->setFunctionHook( 'gender', array( __CLASS__, 'gender' ), SFH_NO_HASH ); @@ -83,18 +89,27 @@ class CoreParserFunctions { } } + /** + * @param $parser Parser + * @param string $part1 + * @return array + */ static function intFunction( $parser, $part1 = '' /*, ... */ ) { if ( strval( $part1 ) !== '' ) { $args = array_slice( func_get_args(), 2 ); - $message = wfMsgGetKey( $part1, true, $parser->getOptions()->getUserLang(), false ); - $message = wfMsgReplaceArgs( $message, $args ); - $message = $parser->replaceVariables( $message ); // like $wgMessageCache->transform() - return $message; + $message = wfMessage( $part1, $args )->inLanguage( $parser->getOptions()->getUserLang() )->plain(); + return array( $message, 'noparse' => false ); } else { return array( 'found' => false ); } } + /** + * @param $parser Parser + * @param $date + * @param null $defaultPref + * @return mixed|string + */ static function formatDate( $parser, $date, $defaultPref = null ) { $df = DateFormatter::getInstance(); @@ -172,6 +187,11 @@ class CoreParserFunctions { return $wgContLang->ucfirst( $s ); } + /** + * @param $parser Parser + * @param string $s + * @return + */ static function lc( $parser, $s = '' ) { global $wgContLang; if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) { @@ -181,6 +201,11 @@ class CoreParserFunctions { } } + /** + * @param $parser Parser + * @param string $s + * @return + */ static function uc( $parser, $s = '' ) { global $wgContLang; if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) { @@ -194,6 +219,8 @@ class CoreParserFunctions { static function localurle( $parser, $s = '', $arg = null ) { return self::urlFunction( 'escapeLocalURL', $s, $arg ); } static function fullurl( $parser, $s = '', $arg = null ) { return self::urlFunction( 'getFullURL', $s, $arg ); } static function fullurle( $parser, $s = '', $arg = null ) { return self::urlFunction( 'escapeFullURL', $s, $arg ); } + static function canonicalurl( $parser, $s = '', $arg = null ) { return self::urlFunction( 'getCanonicalURL', $s, $arg ); } + static function canonicalurle( $parser, $s = '', $arg = null ) { return self::urlFunction( 'escapeCanonicalURL', $s, $arg ); } static function urlFunction( $func, $s = '', $arg = null ) { $title = Title::newFromText( $s ); @@ -219,6 +246,12 @@ class CoreParserFunctions { } } + /** + * @param $parser Parser + * @param string $num + * @param null $raw + * @return + */ static function formatNum( $parser, $num = '', $raw = null) { if ( self::israw( $raw ) ) { return $parser->getFunctionLang()->parseFormattedNumber( $num ); @@ -227,35 +260,54 @@ class CoreParserFunctions { } } + /** + * @param $parser Parser + * @param string $case + * @param string $word + * @return + */ static function grammar( $parser, $case = '', $word = '' ) { return $parser->getFunctionLang()->convertGrammar( $word, $case ); } - static function gender( $parser, $user ) { + /** + * @param $parser Parser + * @param $username string + * @return + */ + static function gender( $parser, $username ) { wfProfileIn( __METHOD__ ); $forms = array_slice( func_get_args(), 2); + $username = trim( $username ); + // default $gender = User::getDefaultOption( 'gender' ); // allow prefix. - $title = Title::newFromText( $user ); + $title = Title::newFromText( $username ); - if ( is_object( $title ) && $title->getNamespace() == NS_USER ) - $user = $title->getText(); + if ( $title && $title->getNamespace() == NS_USER ) { + $username = $title->getText(); + } - // check parameter, or use $wgUser if in interface message - $user = User::newFromName( $user ); + // check parameter, or use the ParserOptions if in interface message + $user = User::newFromName( $username ); if ( $user ) { $gender = $user->getOption( 'gender' ); - } elseif ( $parser->getOptions()->getInterfaceMessage() ) { - global $wgUser; - $gender = $wgUser->getOption( 'gender' ); + } elseif ( $username === '' && $parser->getOptions()->getInterfaceMessage() ) { + $gender = $parser->getOptions()->getUser()->getOption( 'gender' ); } $ret = $parser->getFunctionLang()->gender( $gender, $forms ); wfProfileOut( __METHOD__ ); return $ret; } + + /** + * @param $parser Parser + * @param string $text + * @return + */ static function plural( $parser, $text = '' ) { $forms = array_slice( func_get_args(), 2 ); $text = $parser->getFunctionLang()->parseFormattedNumber( $text ); @@ -396,10 +448,11 @@ class CoreParserFunctions { return ''; return wfUrlencode( $t->getSubjectNsText() ); } - /* + + /** * Functions to get and normalize pagenames, corresponding to the magic words * of the same names - */ + */ static function pagename( $parser, $title = null ) { $t = Title::newFromText( $title ); if ( is_null( $t ) ) @@ -410,7 +463,7 @@ class CoreParserFunctions { $t = Title::newFromText( $title ); if ( is_null( $t ) ) return ''; - return $t->getPartialURL(); + return wfEscapeWikiText( $t->getPartialURL() ); } static function fullpagename( $parser, $title = null ) { $t = Title::newFromText( $title ); @@ -422,31 +475,31 @@ class CoreParserFunctions { $t = Title::newFromText( $title ); if ( is_null( $t ) || !$t->canTalk() ) return ''; - return $t->getPrefixedURL(); + return wfEscapeWikiText( $t->getPrefixedURL() ); } static function subpagename( $parser, $title = null ) { $t = Title::newFromText( $title ); if ( is_null( $t ) ) return ''; - return $t->getSubpageText(); + return wfEscapeWikiText( $t->getSubpageText() ); } static function subpagenamee( $parser, $title = null ) { $t = Title::newFromText( $title ); if ( is_null( $t ) ) return ''; - return $t->getSubpageUrlForm(); + return wfEscapeWikiText( $t->getSubpageUrlForm() ); } static function basepagename( $parser, $title = null ) { $t = Title::newFromText( $title ); if ( is_null( $t ) ) return ''; - return $t->getBaseText(); + return wfEscapeWikiText( $t->getBaseText() ); } static function basepagenamee( $parser, $title = null ) { $t = Title::newFromText( $title ); if ( is_null( $t ) ) return ''; - return wfUrlEncode( str_replace( ' ', '_', $t->getBaseText() ) ); + return wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $t->getBaseText() ) ) ); } static function talkpagename( $parser, $title = null ) { $t = Title::newFromText( $title ); @@ -458,7 +511,7 @@ class CoreParserFunctions { $t = Title::newFromText( $title ); if ( is_null( $t ) || !$t->canTalk() ) return ''; - return $t->getTalkPage()->getPrefixedUrl(); + return wfEscapeWikiText( $t->getTalkPage()->getPrefixedUrl() ); } static function subjectpagename( $parser, $title = null ) { $t = Title::newFromText( $title ); @@ -470,7 +523,7 @@ class CoreParserFunctions { $t = Title::newFromText( $title ); if ( is_null( $t ) ) return ''; - return $t->getSubjectPage()->getPrefixedUrl(); + return wfEscapeWikiText( $t->getSubjectPage()->getPrefixedUrl() ); } /** @@ -503,12 +556,13 @@ class CoreParserFunctions { * Return the size of the given page, or 0 if it's nonexistent. This is an * expensive parser function and can't be called too many times per page. * - * @todo Fixme: This doesn't work correctly on preview for getting the size + * @todo FIXME: This doesn't work correctly on preview for getting the size * of the current page. - * @todo Fixme: Title::getLength() documentation claims that it adds things + * @todo FIXME: Title::getLength() documentation claims that it adds things * to the link cache, so the local cache here should be unnecessary, but * in fact calling getLength() repeatedly for the same $page does seem to * run one query for each call? + * @param $parser Parser */ static function pagesize( $parser, $page = '', $raw = null ) { static $cache = array(); @@ -546,10 +600,25 @@ class CoreParserFunctions { return implode( $restrictions, ',' ); } - static function language( $parser, $arg = '' ) { + /** + * Gives language names. + * @param $parser Parser + * @param $code String Language code + * @param $language String Language code + * @return String + */ + static function language( $parser, $code = '', $language = '' ) { global $wgContLang; - $lang = $wgContLang->getLanguageName( strtolower( $arg ) ); - return $lang != '' ? $lang : $arg; + $code = strtolower( $code ); + $language = strtolower( $language ); + + if ( $language !== '' ) { + $names = Language::getTranslatedLanguageNames( $language ); + return isset( $names[$code] ) ? $names[$code] : wfBCP47( $code ); + } + + $lang = $wgContLang->getLanguageName( $code ); + return $lang !== '' ? $lang : wfBCP47( $code ); } /** @@ -586,12 +655,17 @@ class CoreParserFunctions { return self::pad( $string, $length, $padding ); } + /** + * @param $parser Parser + * @param $text + * @return string + */ static function anchorencode( $parser, $text ) { return substr( $parser->guessSectionNameFromWikiText( $text ), 1); } static function special( $parser, $text ) { - list( $page, $subpage ) = SpecialPage::resolveAliasWithSubpage( $text ); + list( $page, $subpage ) = SpecialPageFactory::resolveAlias( $text ); if ( $page ) { $title = SpecialPage::getTitleFor( $page, $subpage ); return $title; @@ -600,6 +674,11 @@ class CoreParserFunctions { } } + /** + * @param $parser Parser + * @param $text + * @return string + */ public static function defaultsort( $parser, $text ) { $text = trim( $text ); if( strlen( $text ) == 0 ) @@ -616,11 +695,41 @@ class CoreParserFunctions { '</span>' ); } - public static function filepath( $parser, $name='', $option='' ) { + // Usage {{filepath|300}}, {{filepath|nowiki}}, {{filepath|nowiki|300}} or {{filepath|300|nowiki}} + public static function filepath( $parser, $name='', $argA='', $argB='' ) { $file = wfFindFile( $name ); - if( $file ) { + $size = ''; + $argA_int = intval( $argA ); + $argB_int = intval( $argB ); + + if ( $argB_int > 0 ) { + // {{filepath: | option | size }} + $size = $argB_int; + $option = $argA; + + } elseif ( $argA_int > 0 ) { + // {{filepath: | size [|option] }} + $size = $argA_int; + $option = $argB; + + } else { + // {{filepath: [|option] }} + $option = $argA; + } + + if ( $file ) { $url = $file->getFullUrl(); - if( $option == 'nowiki' ) { + + // If a size is requested... + if ( is_integer( $size ) ) { + $mto = $file->transform( array( 'width' => $size ) ); + // ... and we can + if ( $mto && !$mto->isError() ) { + // ... change the URL to point to a thumbnail. + $url = wfExpandUrl( $mto->getUrl(), PROTO_RELATIVE ); + } + } + if ( $option == 'nowiki' ) { return array( $url, 'nowiki' => true ); } return $url; diff --git a/includes/parser/CoreTagHooks.php b/includes/parser/CoreTagHooks.php index 33f3c824..7d488c4b 100644 --- a/includes/parser/CoreTagHooks.php +++ b/includes/parser/CoreTagHooks.php @@ -10,19 +10,30 @@ * @ingroup Parser */ class CoreTagHooks { + /** + * @param $parser Parser + * @return void + */ static function register( $parser ) { - global $wgRawHtml, $wgUseTeX; + global $wgRawHtml; $parser->setHook( 'pre', array( __CLASS__, 'pre' ) ); $parser->setHook( 'nowiki', array( __CLASS__, 'nowiki' ) ); $parser->setHook( 'gallery', array( __CLASS__, 'gallery' ) ); if ( $wgRawHtml ) { $parser->setHook( 'html', array( __CLASS__, 'html' ) ); } - if ( $wgUseTeX ) { - $parser->setHook( 'math', array( __CLASS__, 'math' ) ); - } } + /** + * Core parser tag hook function for 'pre'. + * Text is treated roughly as 'nowiki' wrapped in an HTML 'pre' tag; + * valid HTML attributes are passed on. + * + * @param string $text + * @param array $attribs + * @param Parser $parser + * @return string HTML + */ static function pre( $text, $attribs, $parser ) { // Backwards-compatibility hack $content = StringUtils::delimiterReplace( '<nowiki>', '</nowiki>', '$1', $text, 'i' ); @@ -33,6 +44,20 @@ class CoreTagHooks { '</pre>'; } + /** + * Core parser tag hook function for 'html', used only when + * $wgRawHtml is enabled. + * + * This is potentially unsafe and should be used only in very careful + * circumstances, as the contents are emitted as raw HTML. + * + * Uses undocumented extended tag hook return values, introduced in r61913. + * + * @param $content string + * @param $attributes array + * @param $parser Parser + * @return array + */ static function html( $content, $attributes, $parser ) { global $wgRawHtml; if( $wgRawHtml ) { @@ -42,16 +67,38 @@ class CoreTagHooks { } } + /** + * Core parser tag hook function for 'nowiki'. Text within this section + * gets interpreted as a string of text with HTML-compatible character + * references, and wiki markup within it will not be expanded. + * + * Uses undocumented extended tag hook return values, introduced in r61913. + * + * @param $content string + * @param $attributes array + * @param $parser Parser + * @return array + */ static function nowiki( $content, $attributes, $parser ) { $content = strtr( $content, array( '-{' => '-{', '}-' => '}-' ) ); return array( Xml::escapeTagsOnly( $content ), 'markerType' => 'nowiki' ); } - static function math( $content, $attributes, $parser ) { - global $wgContLang; - return $wgContLang->armourMath( MathRenderer::renderMath( $content, $attributes, $parser->getOptions() ) ); - } - + /** + * Core parser tag hook function for 'gallery'. + * + * Renders a thumbnail list of the given images, with optional captions. + * Full syntax documented on the wiki: + * + * http://www.mediawiki.org/wiki/Help:Images#Gallery_syntax + * + * @todo break Parser::renderImageGallery out here too. + * + * @param string $content + * @param array $attributes + * @param Parser $parser + * @return string HTML + */ static function gallery( $content, $attributes, $parser ) { return $parser->renderImageGallery( $content, $attributes ); } diff --git a/includes/parser/DateFormatter.php b/includes/parser/DateFormatter.php index cf510171..6559e886 100644 --- a/includes/parser/DateFormatter.php +++ b/includes/parser/DateFormatter.php @@ -182,8 +182,8 @@ class DateFormatter $bits = array(); $key = $this->keys[$this->mSource]; for ( $p=0; $p < strlen($key); $p++ ) { - if ( $key{$p} != ' ' ) { - $bits[$key{$p}] = $matches[$p+1]; + if ( $key[$p] != ' ' ) { + $bits[$key[$p]] = $matches[$p+1]; } } @@ -224,7 +224,7 @@ class DateFormatter } for ( $p=0; $p < strlen( $format ); $p++ ) { - $char = $format{$p}; + $char = $format[$p]; switch ( $char ) { case 'd': # ISO day of month $text .= $bits['d']; @@ -327,7 +327,7 @@ class DateFormatter * @todo document */ function makeNormalYear( $iso ) { - if ( $iso{0} == '-' ) { + if ( $iso[0] == '-' ) { $text = (intval( substr( $iso, 1 ) ) + 1) . ' BC'; } else { $text = intval( $iso ); diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php index 19313b80..5418b6e5 100644 --- a/includes/parser/LinkHolderArray.php +++ b/includes/parser/LinkHolderArray.php @@ -12,6 +12,7 @@ class LinkHolderArray { var $internals = array(), $interwikis = array(); var $size = 0; var $parent; + protected $tempIdOffset; function __construct( $parent ) { $this->parent = $parent; @@ -26,8 +27,51 @@ class LinkHolderArray { } } + /** + * Don't serialize the parent object, it is big, and not needed when it is + * a parameter to mergeForeign(), which is the only application of + * serializing at present. + * + * Compact the titles, only serialize the text form. + */ + function __sleep() { + foreach ( $this->internals as &$nsLinks ) { + foreach ( $nsLinks as &$entry ) { + unset( $entry['title'] ); + } + } + unset( $nsLinks ); + unset( $entry ); + + foreach ( $this->interwikis as &$entry ) { + unset( $entry['title'] ); + } + unset( $entry ); + + return array( 'internals', 'interwikis', 'size' ); + } + + /** + * Recreate the Title objects + */ + function __wakeup() { + foreach ( $this->internals as &$nsLinks ) { + foreach ( $nsLinks as &$entry ) { + $entry['title'] = Title::newFromText( $entry['pdbk'] ); + } + } + unset( $nsLinks ); + unset( $entry ); + + foreach ( $this->interwikis as &$entry ) { + $entry['title'] = Title::newFromText( $entry['pdbk'] ); + } + unset( $entry ); + } + /** * Merge another LinkHolderArray into this one + * @param $other LinkHolderArray */ function merge( $other ) { foreach ( $other->internals as $ns => $entries ) { @@ -42,6 +86,86 @@ class LinkHolderArray { } /** + * Merge a LinkHolderArray from another parser instance into this one. The + * keys will not be preserved. Any text which went with the old + * LinkHolderArray and needs to work with the new one should be passed in + * the $texts array. The strings in this array will have their link holders + * converted for use in the destination link holder. The resulting array of + * strings will be returned. + * + * @param $other LinkHolderArray + * @param $texts Array of strings + * @return Array + */ + function mergeForeign( $other, $texts ) { + $this->tempIdOffset = $idOffset = $this->parent->nextLinkID(); + $maxId = 0; + + # Renumber internal links + foreach ( $other->internals as $ns => $nsLinks ) { + foreach ( $nsLinks as $key => $entry ) { + $newKey = $idOffset + $key; + $this->internals[$ns][$newKey] = $entry; + $maxId = $newKey > $maxId ? $newKey : $maxId; + } + } + $texts = preg_replace_callback( '/(<!--LINK \d+:)(\d+)(-->)/', + array( $this, 'mergeForeignCallback' ), $texts ); + + # Renumber interwiki links + foreach ( $other->interwikis as $key => $entry ) { + $newKey = $idOffset + $key; + $this->interwikis[$newKey] = $entry; + $maxId = $newKey > $maxId ? $newKey : $maxId; + } + $texts = preg_replace_callback( '/(<!--IWLINK )(\d+)(-->)/', + array( $this, 'mergeForeignCallback' ), $texts ); + + # Set the parent link ID to be beyond the highest used ID + $this->parent->setLinkID( $maxId + 1 ); + $this->tempIdOffset = null; + return $texts; + } + + protected function mergeForeignCallback( $m ) { + return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3]; + } + + /** + * Get a subset of the current LinkHolderArray which is sufficient to + * interpret the given text. + */ + function getSubArray( $text ) { + $sub = new LinkHolderArray( $this->parent ); + + # Internal links + $pos = 0; + while ( $pos < strlen( $text ) ) { + if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/', + $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) + { + break; + } + $ns = $m[1][0]; + $key = $m[2][0]; + $sub->internals[$ns][$key] = $this->internals[$ns][$key]; + $pos = $m[0][1] + strlen( $m[0][0] ); + } + + # Interwiki links + $pos = 0; + while ( $pos < strlen( $text ) ) { + if ( !preg_match( '/<!--IWLINK (\d+)-->/', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) { + break; + } + $key = $m[1][0]; + $sub->interwikis[$key] = $this->interwikis[$key]; + $pos = $m[0][1] + strlen( $m[0][0] ); + } + return $sub; + } + + /** * Returns true if the memory requirements of this object are getting large */ function isBig() { @@ -65,8 +189,9 @@ class LinkHolderArray { * parsing of interwiki links, and secondly to allow all existence checks and * article length checks (for stub links) to be bundled into a single query. * + * @param $nt Title */ - function makeHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) { + function makeHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { wfProfileIn( __METHOD__ ); if ( ! is_object($nt) ) { # Fail gracefully @@ -80,7 +205,7 @@ class LinkHolderArray { 'text' => $prefix.$text.$inside, 'pdbk' => $nt->getPrefixedDBkey(), ); - if ( $query !== '' ) { + if ( $query !== array() ) { $entry['query'] = $query; } @@ -102,18 +227,7 @@ class LinkHolderArray { } /** - * Get the stub threshold - */ - function getStubThreshold() { - global $wgUser; - if ( !isset( $this->stubThreshold ) ) { - $this->stubThreshold = $wgUser->getStubThreshold(); - } - return $this->stubThreshold; - } - - /** - * FIXME: update documentation. makeLinkObj() is deprecated. + * @todo FIXME: Update documentation. makeLinkObj() is deprecated. * Replace <!--LINK--> link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() * Returns an array of link CSS classes, indexed by PDBK. @@ -140,14 +254,12 @@ class LinkHolderArray { global $wgContLang; $colours = array(); - $sk = $this->parent->getOptions()->getSkin( $this->parent->mTitle ); $linkCache = LinkCache::singleton(); $output = $this->parent->getOutput(); wfProfileIn( __METHOD__.'-check' ); $dbr = wfGetDB( DB_SLAVE ); - $page = $dbr->tableName( 'page' ); - $threshold = $this->getStubThreshold(); + $threshold = $this->parent->getOptions()->getStubThreshold(); # Sort by namespace ksort( $this->internals ); @@ -155,8 +267,7 @@ class LinkHolderArray { $linkcolour_ids = array(); # Generate query - $query = false; - $current = null; + $queries = array(); foreach ( $this->internals as $ns => $entries ) { foreach ( $entries as $entry ) { $title = $entry['title']; @@ -174,32 +285,35 @@ class LinkHolderArray { } elseif ( $ns == NS_SPECIAL ) { $colours[$pdbk] = 'new'; } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { - $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); + $colours[$pdbk] = Linker::getLinkColour( $title, $threshold ); $output->addLink( $title, $id ); $linkcolour_ids[$id] = $pdbk; } elseif ( $linkCache->isBadLink( $pdbk ) ) { $colours[$pdbk] = 'new'; } else { # Not in the link cache, add it to the query - if ( !isset( $current ) ) { - $current = $ns; - $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len, page_latest"; - $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; - } elseif ( $current != $ns ) { - $current = $ns; - $query .= ")) OR (page_namespace=$ns AND page_title IN("; - } else { - $query .= ', '; - } - - $query .= $dbr->addQuotes( $title->getDBkey() ); + $queries[$ns][] = $title->getDBkey(); } } } - if ( $query ) { - $query .= '))'; + if ( $queries ) { + $where = array(); + foreach( $queries as $ns => $pages ){ + $where[] = $dbr->makeList( + array( + 'page_namespace' => $ns, + 'page_title' => $pages, + ), + LIST_AND + ); + } - $res = $dbr->query( $query, __METHOD__ ); + $res = $dbr->select( + 'page', + array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ), + $dbr->makeList( $where, LIST_OR ), + __METHOD__ + ); # Fetch data and form into an associative array # non-existent = broken @@ -208,10 +322,10 @@ class LinkHolderArray { $pdbk = $title->getPrefixedDBkey(); $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect, $s->page_latest ); $output->addLink( $title, $s->page_id ); - # FIXME: convoluted data flow + # @todo FIXME: Convoluted data flow # The redirect status and length is passed to getLinkColour via the LinkCache # Use formal parameters instead - $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); + $colours[$pdbk] = Linker::getLinkColour( $title, $threshold ); //add id to the extension todolist $linkcolour_ids[$s->page_id] = $pdbk; } @@ -235,23 +349,29 @@ class LinkHolderArray { foreach ( $entries as $index => $entry ) { $pdbk = $entry['pdbk']; $title = $entry['title']; - $query = isset( $entry['query'] ) ? $entry['query'] : ''; + $query = isset( $entry['query'] ) ? $entry['query'] : array(); $key = "$ns:$index"; $searchkey = "<!--LINK $key-->"; - if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) { - $linkCache->addBadLinkObj( $title ); + $displayText = $entry['text']; + if ( $displayText === '' ) { + $displayText = null; + } + if ( !isset( $colours[$pdbk] ) ) { $colours[$pdbk] = 'new'; + } + $attribs = array(); + if ( $colours[$pdbk] == 'new' ) { + $linkCache->addBadLinkObj( $title ); $output->addLink( $title, 0 ); - // FIXME: replace deprecated makeBrokenLinkObj() by link() - $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title, - $entry['text'], - $query ); + $type = array( 'broken' ); } else { - // FIXME: replace deprecated makeColouredLinkObj() by link() - $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk], - $entry['text'], - $query ); + if ( $colours[$pdbk] != '' ) { + $attribs['class'] = $colours[$pdbk]; + } + $type = array( 'known', 'noclasses' ); } + $replacePairs[$searchkey] = Linker::link( $title, $displayText, + $attribs, $query, $type ); } } $replacer = new HashtableReplacer( $replacePairs, 1 ); @@ -278,11 +398,10 @@ class LinkHolderArray { wfProfileIn( __METHOD__ ); # Make interwiki link HTML - $sk = $this->parent->getOptions()->getSkin( $this->parent->mTitle ); $output = $this->parent->getOutput(); $replacePairs = array(); foreach( $this->interwikis as $key => $link ) { - $replacePairs[$key] = $sk->link( $link['title'], $link['text'] ); + $replacePairs[$key] = Linker::link( $link['title'], $link['text'] ); $output->addInterwikiLink( $link['title'] ); } $replacer = new HashtableReplacer( $replacePairs, 1 ); @@ -303,11 +422,10 @@ class LinkHolderArray { $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders) $output = $this->parent->getOutput(); $linkCache = LinkCache::singleton(); - $sk = $this->parent->getOptions()->getSkin( $this->parent->mTitle ); - $threshold = $this->getStubThreshold(); + $threshold = $this->parent->getOptions()->getStubThreshold(); $titlesToBeConverted = ''; $titlesAttrs = array(); - + // Concatenate titles to a single string, thus we only need auto convert the // single string to all variants. This would improve parser's performance // significantly. @@ -322,14 +440,14 @@ class LinkHolderArray { 'ns' => $ns, 'key' => "$ns:$index", 'titleText' => $titleText, - ); + ); // separate titles with \0 because it would never appears // in a valid title $titlesToBeConverted .= $titleText . "\0"; } } } - + // Now do the conversion and explode string to text of titles $titlesAllVariants = $wgContLang->autoConvertToAllVariants( $titlesToBeConverted ); $allVariantsName = array_keys( $titlesAllVariants ); @@ -341,9 +459,8 @@ class LinkHolderArray { for ( $i = 0; $i < $l; $i ++ ) { foreach ( $allVariantsName as $variantName ) { $textVariant = $titlesAllVariants[$variantName][$i]; - extract( $titlesAttrs[$i] ); - if($textVariant != $titleText){ - $variantTitle = Title::makeTitle( $ns, $textVariant ); + if ( $textVariant != $titlesAttrs[$i]['titleText'] ) { + $variantTitle = Title::makeTitle( $titlesAttrs[$i]['ns'], $textVariant ); if( is_null( $variantTitle ) ) { continue; } @@ -372,11 +489,12 @@ class LinkHolderArray { if(!$linkBatch->isEmpty()){ // construct query $dbr = wfGetDB( DB_SLAVE ); - $page = $dbr->tableName( 'page' ); - $titleClause = $linkBatch->constructSet('page', $dbr); - $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; - $variantQuery .= " FROM $page WHERE $titleClause"; - $varRes = $dbr->query( $variantQuery, __METHOD__ ); + $varRes = $dbr->select( 'page', + array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len' ), + $linkBatch->constructSet( 'page', $dbr ), + __METHOD__ + ); + $linkcolour_ids = array(); // for each found variants, figure out link holders and replace @@ -387,14 +505,14 @@ class LinkHolderArray { $vardbk = $variantTitle->getDBkey(); $holderKeys = array(); - if(isset($variantMap[$varPdbk])){ + if( isset( $variantMap[$varPdbk] ) ) { $holderKeys = $variantMap[$varPdbk]; $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect ); $output->addLink( $variantTitle, $s->page_id ); } // loop over link holders - foreach($holderKeys as $key){ + foreach( $holderKeys as $key ) { list( $ns, $index ) = explode( ':', $key, 2 ); $entry =& $this->internals[$ns][$index]; $pdbk = $entry['pdbk']; @@ -405,10 +523,10 @@ class LinkHolderArray { $entry['pdbk'] = $varPdbk; // set pdbk and colour - # FIXME: convoluted data flow + # @todo FIXME: Convoluted data flow # The redirect status and length is passed to getLinkColour via the LinkCache # Use formal parameters instead - $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold ); + $colours[$varPdbk] = Linker::getLinkColour( $variantTitle, $threshold ); $linkcolour_ids[$s->page_id] = $pdbk; } } diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 4a3aa03b..8d4c60df 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -24,18 +24,20 @@ * removes HTML comments and expands templates * cleanSig() / cleanSigInSig() * Cleans a signature before saving it to preferences - * extractSections() - * Extracts sections from an article for section editing + * getSection() + * Return the content of a section from an article for section editing + * replaceSection() + * Replaces a section by number inside an article * getPreloadText() * Removes <noinclude> sections, and <includeonly> tags. * * Globals used: * objects: $wgLang, $wgContLang * - * NOT $wgArticle, $wgUser or $wgTitle. Keep them away! + * NOT $wgUser or $wgTitle. Keep them away! * * settings: - * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*, + * $wgUseDynamicDates*, $wgInterwikiMagic*, * $wgNamespacesWithSubpages, $wgAllowExternalImages*, * $wgLocaltimezone, $wgAllowSpecialInclusion*, * $wgMaxArticleSize* @@ -53,6 +55,12 @@ class Parser { */ const VERSION = '1.6.4'; + /** + * Update this version number when the output of serialiseHalfParsedText() + * changes in an incompatible way + */ + const HALF_PARSED_VERSION = 2; + # Flags for Parser::setFunctionHook # Also available as global constants from Defines.php const SFH_NO_HASH = 1; @@ -89,50 +97,99 @@ class Parser { const MARKER_SUFFIX = "-QINU\x7f"; # Persistent: - var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables; - var $mSubstWords, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex; - var $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList; - var $mVarCache, $mConf, $mFunctionTagHooks; + var $mTagHooks = array(); + var $mTransparentTagHooks = array(); + var $mFunctionHooks = array(); + var $mFunctionSynonyms = array( 0 => array(), 1 => array() ); + var $mFunctionTagHooks = array(); + var $mStripList = array(); + var $mDefaultStripList = array(); + var $mVarCache = array(); + var $mImageParams = array(); + var $mImageParamsMagicArray = array(); + var $mMarkerIndex = 0; + var $mFirstCall = true; + + # Initialised by initialiseVariables() + + /** + * @var MagicWordArray + */ + var $mVariables; + /** + * @var MagicWordArray + */ + var $mSubstWords; + var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor # Cleared with clearState(): - var $mOutput, $mAutonumber, $mDTopen, $mStripState; + /** + * @var ParserOutput + */ + var $mOutput; + var $mAutonumber, $mDTopen; + + /** + * @var StripState + */ + var $mStripState; + var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; - var $mLinkHolders, $mLinkID; + /** + * @var LinkHolderArray + */ + var $mLinkHolders; + + var $mLinkID; var $mIncludeSizes, $mPPNodeCount, $mDefaultSort; var $mTplExpandCache; # empty-frame expansion cache var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; var $mExpensiveFunctionCount; # number of expensive parser function calls + /** + * @var User + */ + var $mUser; # User object; only used when doing pre-save transform + # Temporary # These are variables reset at least once per parse regardless of $clearState - var $mOptions; # ParserOptions object + + /** + * @var ParserOptions + */ + var $mOptions; + + /** + * @var Title + */ var $mTitle; # Title context, used for self-link rendering and similar things var $mOutputType; # Output type, one of the OT_xxx constants var $ot; # Shortcut alias, see setOutputType() + var $mRevisionObject; # The revision object of the specified revision ID var $mRevisionId; # ID to display in {{REVISIONID}} tags var $mRevisionTimestamp; # The timestamp of the specified revision ID + var $mRevisionUser; # User to display in {{REVISIONUSER}} tag var $mRevIdForTs; # The revision ID which was used to fetch the timestamp /** + * @var string + */ + var $mUniqPrefix; + + /** * Constructor - * - * @public */ - function __construct( $conf = array() ) { + public function __construct( $conf = array() ) { $this->mConf = $conf; - $this->mTagHooks = array(); - $this->mTransparentTagHooks = array(); - $this->mFunctionHooks = array(); - $this->mFunctionTagHooks = array(); - $this->mFunctionSynonyms = array( 0 => array(), 1 => array() ); - $this->mDefaultStripList = $this->mStripList = array(); $this->mUrlProtocols = wfUrlProtocols(); - $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. + $this->mExtLinkBracketedRegex = '/\[((' . wfUrlProtocols() . ')'. '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S'; - $this->mVarCache = array(); if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; + } elseif ( defined( 'MW_COMPILED' ) ) { + # Preprocessor_Hash is much faster than Preprocessor_DOM in compiled mode + $this->mPreprocessorClass = 'Preprocessor_Hash'; } elseif ( extension_loaded( 'domxml' ) ) { # PECL extension that conflicts with the core DOM extension (bug 13770) wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" ); @@ -142,8 +199,7 @@ class Parser { } else { $this->mPreprocessorClass = 'Preprocessor_Hash'; } - $this->mMarkerIndex = 0; - $this->mFirstCall = true; + wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" ); } /** @@ -151,7 +207,7 @@ class Parser { */ function __destruct() { if ( isset( $this->mLinkHolders ) ) { - $this->mLinkHolders->__destruct(); + unset( $this->mLinkHolders ); } foreach ( $this as $name => $value ) { unset( $this->$name ); @@ -193,13 +249,14 @@ class Parser { $this->mLastSection = ''; $this->mDTopen = false; $this->mIncludeCount = array(); - $this->mStripState = new StripState; $this->mArgStack = false; $this->mInPre = false; $this->mLinkHolders = new LinkHolderArray( $this ); $this->mLinkID = 0; - $this->mRevisionTimestamp = $this->mRevisionId = null; + $this->mRevisionObject = $this->mRevisionTimestamp = + $this->mRevisionId = $this->mRevisionUser = null; $this->mVarCache = array(); + $this->mUser = null; /** * Prefix for temporary replacement strings for the multipass parser. @@ -214,6 +271,7 @@ class Parser { # $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); # Changed to \x7f to allow XML double-parsing -- TS $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); + $this->mStripState = new StripState( $this->mUniqPrefix ); # Clear these on every parse, bug 4549 @@ -245,7 +303,7 @@ class Parser { * Do not call this function recursively. * * @param $text String: text we want to parse - * @param $title A title object + * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean @@ -263,20 +321,19 @@ class Parser { wfProfileIn( __METHOD__ ); wfProfileIn( $fname ); - $this->mOptions = $options; - if ( $clearState ) { - $this->clearState(); - } - - $this->setTitle( $title ); # Page title has to be set for the pre-processor + $this->startParse( $title, $options, self::OT_HTML, $clearState ); $oldRevisionId = $this->mRevisionId; + $oldRevisionObject = $this->mRevisionObject; $oldRevisionTimestamp = $this->mRevisionTimestamp; + $oldRevisionUser = $this->mRevisionUser; if ( $revid !== null ) { $this->mRevisionId = $revid; + $this->mRevisionObject = null; $this->mRevisionTimestamp = null; + $this->mRevisionUser = null; } - $this->setOutputType( self::OT_HTML ); + wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); # No more strip! wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); @@ -327,7 +384,7 @@ class Parser { || $wgDisableTitleConversion || isset( $this->mDoubleUnderscores['nocontentconvert'] ) || isset( $this->mDoubleUnderscores['notitleconvert'] ) - || $this->mOutput->getDisplayTitle() !== false ) ) + || $this->mOutput->getDisplayTitle() !== false ) ) { $convruletitle = $wgContLang->getConvRuleTitle(); if ( $convruletitle ) { @@ -342,23 +399,7 @@ class Parser { wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) ); -//!JF Move to its own function - - $uniq_prefix = $this->mUniqPrefix; - $matches = array(); - $elements = array_keys( $this->mTransparentTagHooks ); - $text = $this->extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); - - foreach ( $matches as $marker => $data ) { - list( $element, $content, $params, $tag ) = $data; - $tagName = strtolower( $element ); - if ( isset( $this->mTransparentTagHooks[$tagName] ) ) { - $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) ); - } else { - $output = $tag; - } - $this->mStripState->general->setPair( $marker, $output ); - } + $text = $this->replaceTransparentTags( $text ); $text = $this->mStripState->unstripGeneral( $text ); $text = Sanitizer::normalizeCharReferences( $text ); @@ -415,7 +456,9 @@ class Parser { $this->mOutput->setText( $text ); $this->mRevisionId = $oldRevisionId; + $this->mRevisionObject = $oldRevisionObject; $this->mRevisionTimestamp = $oldRevisionTimestamp; + $this->mRevisionUser = $oldRevisionUser; wfProfileOut( $fname ); wfProfileOut( __METHOD__ ); @@ -430,6 +473,8 @@ class Parser { * * @param $text String: text extension wants to have parsed * @param $frame PPFrame: The frame to use for expanding any template variables + * + * @return string */ function recursiveTagParse( $text, $frame=false ) { wfProfileIn( __METHOD__ ); @@ -446,10 +491,7 @@ class Parser { */ function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) { wfProfileIn( __METHOD__ ); - $this->mOptions = $options; - $this->clearState(); - $this->setOutputType( self::OT_PREPROCESS ); - $this->setTitle( $title ); + $this->startParse( $title, $options, self::OT_PREPROCESS, true ); if ( $revid !== null ) { $this->mRevisionId = $revid; } @@ -469,10 +511,7 @@ class Parser { */ public function getPreloadText( $text, Title $title, ParserOptions $options ) { # Parser (re)initialisation - $this->mOptions = $options; - $this->clearState(); - $this->setOutputType( self::OT_PLAIN ); - $this->setTitle( $title ); + $this->startParse( $title, $options, self::OT_PLAIN, true ); $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES; $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); @@ -484,21 +523,30 @@ class Parser { /** * Get a random string * - * @private - * @static + * @return string */ - static private function getRandomString() { + static public function getRandomString() { return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); } /** + * Set the current user. + * Should only be used when doing pre-save transform. + * + * @param $user Mixed: User object or null (to reset) + */ + function setUser( $user ) { + $this->mUser = $user; + } + + /** * Accessor for mUniqPrefix. * * @return String */ public function uniqPrefix() { if ( !isset( $this->mUniqPrefix ) ) { - # @todo Fixme: this is probably *horribly wrong* + # @todo FIXME: This is probably *horribly wrong* # LanguageConverter seems to want $wgParser's uniqPrefix, however # if this is called for a parser cache hit, the parser may not # have ever been initialized in the first place. @@ -511,11 +559,13 @@ class Parser { /** * Set the context title + * + * @param $t Title */ function setTitle( $t ) { - if ( !$t || $t instanceof FakeTitle ) { - $t = Title::newFromText( 'NO TITLE' ); - } + if ( !$t || $t instanceof FakeTitle ) { + $t = Title::newFromText( 'NO TITLE' ); + } if ( strval( $t->getFragment() ) !== '' ) { # Strip the fragment to avoid various odd effects @@ -599,19 +649,47 @@ class Parser { return wfSetVar( $this->mOptions, $x ); } + /** + * @return int + */ function nextLinkID() { return $this->mLinkID++; } - function getFunctionLang() { - global $wgLang, $wgContLang; + /** + * @param $id int + */ + function setLinkID( $id ) { + $this->mLinkID = $id; + } + /** + * @return Language + */ + function getFunctionLang() { $target = $this->mOptions->getTargetLanguage(); if ( $target !== null ) { return $target; - } else { - return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang; + } elseif( $this->mOptions->getInterfaceMessage() ) { + global $wgLang; + return $wgLang; + } elseif( is_null( $this->mTitle ) ) { + throw new MWException( __METHOD__.': $this->mTitle is null' ); } + return $this->mTitle->getPageLanguage(); + } + + /** + * Get a User object either from $this->mUser, if set, or from the + * ParserOptions object otherwise + * + * @return User object + */ + function getUser() { + if ( !is_null( $this->mUser ) ) { + return $this->mUser; + } + return $this->mOptions->getUser(); } /** @@ -638,15 +716,13 @@ class Parser { * array( 'param' => 'x' ), * '<element param="x">tag content</element>' ) ) * - * @param $elements list of element names. Comments are always extracted. - * @param $text Source text string. - * @param $matches Out parameter, Array: extracted tags - * @param $uniq_prefix + * @param $elements array list of element names. Comments are always extracted. + * @param $text string Source text string. + * @param $matches array Out parameter, Array: extracted tags + * @param $uniq_prefix string * @return String: stripped text - * - * @static */ - public function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { + public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { static $n = 1; $stripped = ''; $matches = array(); @@ -710,77 +786,33 @@ class Parser { /** * Get a list of strippable XML-like elements + * + * @return array */ function getStripList() { return $this->mStripList; } /** - * @deprecated use replaceVariables - */ - function strip( $text, $state, $stripcomments = false , $dontstrip = array() ) { - return $text; - } - - /** - * Restores pre, math, and other extensions removed by strip() - * - * always call unstripNoWiki() after this one - * @private - * @deprecated use $this->mStripState->unstrip() - */ - function unstrip( $text, $state ) { - return $state->unstripGeneral( $text ); - } - - /** - * Always call this after unstrip() to preserve the order - * - * @private - * @deprecated use $this->mStripState->unstrip() - */ - function unstripNoWiki( $text, $state ) { - return $state->unstripNoWiki( $text ); - } - - /** - * @deprecated use $this->mStripState->unstripBoth() - */ - function unstripForHTML( $text ) { - return $this->mStripState->unstripBoth( $text ); - } - - /** * Add an item to the strip state * Returns the unique tag which must be inserted into the stripped text * The tag will be replaced with the original text in unstrip() - * - * @private */ function insertStripItem( $text ) { $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; $this->mMarkerIndex++; - $this->mStripState->general->setPair( $rnd, $text ); + $this->mStripState->addGeneral( $rnd, $text ); return $rnd; } /** - * Interface with html tidy - * @deprecated Use MWTidy::tidy() - */ - public static function tidy( $text ) { - wfDeprecated( __METHOD__ ); - return MWTidy::tidy( $text ); - } - - /** * parse the wiki syntax used to render tables * * @private */ function doTableStuff( $text ) { wfProfileIn( __METHOD__ ); - + $lines = StringUtils::explode( "\n", $text ); $out = ''; $td_history = array(); # Is currently a td tag open? @@ -793,7 +825,7 @@ class Parser { foreach ( $lines as $outLine ) { $line = trim( $outLine ); - if ( $line === '' ) { # empty line, go to next line + if ( $line === '' ) { # empty line, go to next line $out .= $outLine."\n"; continue; } @@ -1043,7 +1075,7 @@ class Parser { */ function doMagicLinks( $text ) { wfProfileIn( __METHOD__ ); - $prots = $this->mUrlProtocols; + $prots = wfUrlProtocolsWithoutProtRel(); $urlChar = self::EXT_LINK_URL_CLASS; $text = preg_replace_callback( '!(?: # Start cases @@ -1052,15 +1084,20 @@ class Parser { (\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . ' (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number ISBN\s+(\b # m[5]: ISBN, capture number - (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix - (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters - [0-9Xx] # check digit - \b) + (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix + (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters + [0-9Xx] # check digit + \b) )!x', array( &$this, 'magicLinkCallback' ), $text ); wfProfileOut( __METHOD__ ); return $text; } + /** + * @throws MWException + * @param $m array + * @return HTML|string + */ function magicLinkCallback( $m ) { if ( isset( $m[1] ) && $m[1] !== '' ) { # Skip anchor @@ -1087,10 +1124,8 @@ class Parser { throw new MWException( __METHOD__.': unrecognised match type "' . substr( $m[0], 0, 20 ) . '"' ); } - $url = wfMsgForContent( $urlmsg, $id); - $sk = $this->mOptions->getSkin( $this->mTitle ); - $la = $sk->getExternalLinkAttributes( "external $CssClass" ); - return "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>"; + $url = wfMsgForContent( $urlmsg, $id ); + return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $CssClass ); } elseif ( isset( $m[5] ) && $m[5] !== '' ) { # ISBN $isbn = $m[5]; @@ -1117,7 +1152,6 @@ class Parser { global $wgContLang; wfProfileIn( __METHOD__ ); - $sk = $this->mOptions->getSkin( $this->mTitle ); $trail = ''; # The characters '<' and '>' (which were escaped by @@ -1148,7 +1182,7 @@ class Parser { $text = $this->maybeMakeExternalImage( $url ); if ( $text === false ) { # Not an image, make a link - $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', + $text = Linker::makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->getExternalLinkAttribs( $url ) ); # Register it in the output object... # Replace unnecessary URL escape codes with their equivalent characters @@ -1355,7 +1389,7 @@ class Parser { /** * Replace external links (REL) * - * Note: this is all very hackish and the order of execution matters a lot. + * Note: this is all very hackish and the order of execution matters a lot. * Make sure to run maintenance/parserTests.php if you change this code. * * @private @@ -1364,8 +1398,6 @@ class Parser { global $wgContLang; wfProfileIn( __METHOD__ ); - $sk = $this->mOptions->getSkin( $this->mTitle ); - $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); $s = array_shift( $bits ); @@ -1399,16 +1431,10 @@ class Parser { # No link text, e.g. [http://domain.tld/some.link] if ( $text == '' ) { - # Autonumber if allowed. See bug #5918 - if ( strpos( wfUrlProtocols(), substr( $protocol, 0, strpos( $protocol, ':' ) ) ) !== false ) { - $langObj = $this->getFunctionLang(); - $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']'; - $linktype = 'autonumber'; - } else { - # Otherwise just use the URL - $text = htmlspecialchars( $url ); - $linktype = 'free'; - } + # Autonumber + $langObj = $this->getFunctionLang(); + $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']'; + $linktype = 'autonumber'; } else { # Have link text, e.g. [http://domain.tld/some.link text]s # Check for trail @@ -1423,7 +1449,7 @@ class Parser { # This means that users can paste URLs directly into the text # Funny characters like ö aren't valid in URLs anyway # This was changed in August 2004 - $s .= $sk->makeExternalLink( $url, $text, false, $linktype, + $s .= Linker::makeExternalLink( $url, $text, false, $linktype, $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail; # Register link in the output object. @@ -1449,23 +1475,12 @@ class Parser { */ function getExternalLinkAttribs( $url = false ) { $attribs = array(); - global $wgNoFollowLinks, $wgNoFollowNsExceptions; + global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions; $ns = $this->mTitle->getNamespace(); - if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) ) { + if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) && + !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) ) + { $attribs['rel'] = 'nofollow'; - - global $wgNoFollowDomainExceptions; - if ( $wgNoFollowDomainExceptions ) { - $bits = wfParseUrl( $url ); - if ( is_array( $bits ) && isset( $bits['host'] ) ) { - foreach ( $wgNoFollowDomainExceptions as $domain ) { - if ( substr( $bits['host'], -strlen( $domain ) ) == $domain ) { - unset( $attribs['rel'] ); - break; - } - } - } - } } if ( $this->mOptions->getExternalLinkTarget() ) { $attribs['target'] = $this->mOptions->getExternalLinkTarget(); @@ -1473,7 +1488,6 @@ class Parser { return $attribs; } - /** * Replace unusual URL escape codes with their equivalent characters * @@ -1513,7 +1527,6 @@ class Parser { * @private */ function maybeMakeExternalImage( $url ) { - $sk = $this->mOptions->getSkin( $this->mTitle ); $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); $imagesexception = !empty( $imagesfrom ); $text = false; @@ -1532,10 +1545,10 @@ class Parser { $imagematch = false; } if ( $this->mOptions->getAllowExternalImages() - || ( $imagesexception && $imagematch ) ) { + || ( $imagesexception && $imagematch ) ) { if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) { # Image found - $text = $sk->makeExternalImage( $url ); + $text = Linker::makeExternalImage( $url ); } } if ( !$text && $this->mOptions->getEnableImageWhitelist() @@ -1548,7 +1561,7 @@ class Parser { } if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) { # Image matches a whitelist entry - $text = $sk->makeExternalImage( $url ); + $text = Linker::makeExternalImage( $url ); break; } } @@ -1589,10 +1602,9 @@ class Parser { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; } - $sk = $this->mOptions->getSkin( $this->mTitle ); $holders = new LinkHolderArray( $this ); - # split the entire text string on occurences of [[ + # split the entire text string on occurences of [[ $a = StringUtils::explode( '[[', ' ' . $s ); # get the first element (all text up to first [[), and remove the space we added $s = $a->current(); @@ -1684,14 +1696,14 @@ class Parser { # fix up urlencoded title texts if ( strpos( $m[1], '%' ) !== false ) { # Should anchors '#' also be rejected? - $m[1] = str_replace( array('<', '>'), array('<', '>'), urldecode( $m[1] ) ); + $m[1] = str_replace( array('<', '>'), array('<', '>'), rawurldecode( $m[1] ) ); } $trail = $m[3]; } elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption $might_be_img = true; $text = $m[2]; if ( strpos( $m[1], '%' ) !== false ) { - $m[1] = urldecode( $m[1] ); + $m[1] = rawurldecode( $m[1] ); } $trail = ""; } else { # Invalid form; output directly @@ -1705,7 +1717,7 @@ class Parser { # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. - if ( preg_match( '/^\b(?:' . wfUrlProtocols() . ')/', $m[1] ) ) { + if ( preg_match( '/^(?:' . wfUrlProtocols() . ')/', $m[1] ) ) { $s .= $prefix . '[[' . $line ; wfProfileOut( __METHOD__."-misc" ); continue; @@ -1787,9 +1799,10 @@ class Parser { $text = $link; } else { # Bug 4598 madness. Handle the quotes only if they come from the alternate part - # [[Lista d''e paise d''o munno]] -> <a href="">Lista d''e paise d''o munno</a> - # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a> - $text = $this->doQuotes($text); + # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a> + # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] + # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a> + $text = $this->doQuotes( $text ); } # Link not escaped by : , create the various objects @@ -1823,14 +1836,13 @@ class Parser { $holders->merge( $this->replaceInternalLinks2( $text ) ); } # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them - $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail; + $s .= $prefix . $this->armorLinks( + $this->makeImage( $nt, $text, $holders ) ) . $trail; } else { $s .= $prefix . $trail; } - $this->mOutput->addImage( $nt->getDBkey() ); wfProfileOut( __METHOD__."-image" ); continue; - } if ( $ns == NS_CATEGORY ) { @@ -1851,7 +1863,7 @@ class Parser { * Strip the whitespace Category links produce, see bug 87 * @todo We might want to use trim($tmp, "\n") here. */ - $s .= trim( $prefix . $trail, "\n" ) == '' ? '': $prefix . $trail; + $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail; wfProfileOut( __METHOD__."-category" ); continue; @@ -1861,26 +1873,24 @@ class Parser { # Self-link checking if ( $nt->getFragment() === '' && $ns != NS_SPECIAL ) { if ( in_array( $nt->getPrefixedText(), $selflink, true ) ) { - $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail ); + $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); continue; } } # NS_MEDIA is a pseudo-namespace for linking directly to a file - # FIXME: Should do batch file existence checks, see comment below + # @todo FIXME: Should do batch file existence checks, see comment below if ( $ns == NS_MEDIA ) { wfProfileIn( __METHOD__."-media" ); # Give extensions a chance to select the file revision for us - $skip = $time = false; - wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$nt, &$skip, &$time ) ); - if ( $skip ) { - $link = $sk->link( $nt ); - } else { - $link = $sk->makeMediaLinkObj( $nt, $text, $time ); - } + $time = $sha1 = $descQuery = false; + wfRunHooks( 'BeforeParserFetchFileAndTitle', + array( $this, $nt, &$time, &$sha1, &$descQuery ) ); + # Fetch and register the file (file title may be different via hooks) + list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $time, $sha1 ); # Cloak with NOPARSE to avoid replacement in replaceExternalLinks - $s .= $prefix . $this->armorLinks( $link ) . $trail; - $this->mOutput->addImage( $nt->getDBkey() ); + $s .= $prefix . $this->armorLinks( + Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail; wfProfileOut( __METHOD__."-media" ); continue; } @@ -1889,14 +1899,14 @@ class Parser { # Some titles, such as valid special pages or files in foreign repos, should # be shown as bluelinks even though they're not included in the page table # - # FIXME: isAlwaysKnown() can be expensive for file links; we should really do + # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do # batch file existence checks for NS_FILE and NS_MEDIA if ( $iw == '' && $nt->isAlwaysKnown() ) { $this->mOutput->addLink( $nt ); - $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); + $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix ); } else { # Links will be added to the output link list after checking - $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix ); + $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix ); } wfProfileOut( __METHOD__."-always_known" ); } @@ -1905,18 +1915,6 @@ class Parser { } /** - * Make a link placeholder. The text returned can be later resolved to a real link with - * replaceLinkHolders(). This is done for two reasons: firstly to avoid further - * parsing of interwiki links, and secondly to allow all existence checks and - * article length checks (for stub links) to be bundled into a single query. - * - * @deprecated - */ - function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) { - return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix ); - } - - /** * Render a forced-blue link inline; protect against double expansion of * URLs if we're in a mode that prepends full URL prefixes to internal links. * Since this little disaster has to split off the trail text to avoid @@ -1925,16 +1923,23 @@ class Parser { * * @param $nt Title * @param $text String - * @param $query String + * @param $query Array or String * @param $trail String * @param $prefix String * @return String: HTML-wikitext mix oh yuck */ - function makeKnownLinkHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) { + function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { list( $inside, $trail ) = Linker::splitTrail( $trail ); - $sk = $this->mOptions->getSkin( $this->mTitle ); - # FIXME: use link() instead of deprecated makeKnownLinkObj() - $link = $sk->makeKnownLinkObj( $nt, $text, $query, $inside, $prefix ); + + if ( is_string( $query ) ) { + $query = wfCgiToArray( $query ); + } + if ( $text == '' ) { + $text = htmlspecialchars( $nt->getPrefixedText() ); + } + + $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query ); + return $this->armorLinks( $link ) . $trail; } @@ -1977,6 +1982,8 @@ class Parser { /**#@+ * Used by doBlockLevels() * @private + * + * @return string */ function closeParagraph() { $result = ''; @@ -2001,7 +2008,7 @@ class Parser { } for ( $i = 0; $i < $shorter; ++$i ) { - if ( $st1{$i} != $st2{$i} ) { + if ( $st1[$i] != $st2[$i] ) { break; } } @@ -2012,6 +2019,8 @@ class Parser { * These next three functions open, continue, and close the list * element appropriate to the prefix character passed into them. * @private + * + * @return string */ function openList( $char ) { $result = $this->closeParagraph(); @@ -2036,6 +2045,8 @@ class Parser { * TODO: document * @param $char String * @private + * + * @return string */ function nextItem( $char ) { if ( '*' === $char || '#' === $char ) { @@ -2060,6 +2071,8 @@ class Parser { * TODO: document * @param $char String * @private + * + * @return string */ function closeList( $char ) { if ( '*' === $char ) { @@ -2178,7 +2191,7 @@ class Parser { $output .= $this->openList( $char ); if ( ';' === $char ) { - # FIXME: This is dupe of code above + # @todo FIXME: This is dupe of code above if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) { $t = $t2; $output .= $term . $this->nextItem( ':' ); @@ -2200,7 +2213,7 @@ class Parser { '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t ); if ( $openmatch or $closematch ) { $paragraphStack = false; - # TODO bug 5718: paragraph closed + # TODO bug 5718: paragraph closed $output .= $this->closeParagraph(); if ( $preOpenMatch and !$preCloseMatch ) { $this->mInPre = true; @@ -2299,7 +2312,7 @@ class Parser { $stack = 0; $len = strlen( $str ); for( $i = 0; $i < $len; $i++ ) { - $c = $str{$i}; + $c = $str[$i]; switch( $state ) { # (Using the number is a performance hack for common cases) @@ -2435,6 +2448,9 @@ class Parser { * Return value of a magic variable (like PAGENAME) * * @private + * + * @param $index integer + * @param $frame PPFrame */ function getVariableValue( $index, $frame=false ) { global $wgContLang, $wgSitename, $wgServer; @@ -2521,25 +2537,25 @@ class Parser { $value = wfEscapeWikiText( $this->mTitle->getText() ); break; case 'pagenamee': - $value = $this->mTitle->getPartialURL(); + $value = wfEscapeWikiText( $this->mTitle->getPartialURL() ); break; case 'fullpagename': $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() ); break; case 'fullpagenamee': - $value = $this->mTitle->getPrefixedURL(); + $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() ); break; case 'subpagename': $value = wfEscapeWikiText( $this->mTitle->getSubpageText() ); break; case 'subpagenamee': - $value = $this->mTitle->getSubpageUrlForm(); + $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() ); break; case 'basepagename': $value = wfEscapeWikiText( $this->mTitle->getBaseText() ); break; case 'basepagenamee': - $value = wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ); + $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) ); break; case 'talkpagename': if ( $this->mTitle->canTalk() ) { @@ -2552,7 +2568,7 @@ class Parser { case 'talkpagenamee': if ( $this->mTitle->canTalk() ) { $talkPage = $this->mTitle->getTalkPage(); - $value = $talkPage->getPrefixedUrl(); + $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() ); } else { $value = ''; } @@ -2563,7 +2579,7 @@ class Parser { break; case 'subjectpagenamee': $subjPage = $this->mTitle->getSubjectPage(); - $value = $subjPage->getPrefixedUrl(); + $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() ); break; case 'revisionid': # Let the edit saving system know we should parse the page @@ -2719,10 +2735,8 @@ class Parser { case 'server': return $wgServer; case 'servername': - wfSuppressWarnings(); # May give an E_WARNING in PHP < 5.3.3 - $serverName = parse_url( $wgServer, PHP_URL_HOST ); - wfRestoreWarnings(); - return $serverName ? $serverName : $wgServer; + $serverParts = wfParseUrl( $wgServer ); + return $serverParts && isset( $serverParts['host'] ) ? $serverParts['host'] : $wgServer; case 'scriptpath': return $wgScriptPath; case 'stylepath': @@ -2783,6 +2797,8 @@ class Parser { * dependency requirements. * * @private + * + * @return PPNode */ function preprocessToDom( $text, $flags = 0 ) { $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags ); @@ -2791,6 +2807,8 @@ class Parser { /** * Return a three-element array: leading whitespace, string contents, trailing whitespace + * + * @return array */ public static function splitWhitespace( $s ) { $ltrimmed = ltrim( $s ); @@ -2821,6 +2839,8 @@ class Parser { * Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly. * @param $argsOnly Boolean: only do argument (triple-brace) expansion, not double-brace expansion * @private + * + * @return string */ function replaceVariables( $text, $frame = false, $argsOnly = false ) { # Is there any text? Also, Prevent too big inclusions! @@ -2844,7 +2864,11 @@ class Parser { return $text; } - # Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. + /** + * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. + * + * @return array + */ static function createAssocArgs( $args ) { $assocArgs = array(); $index = 1; @@ -2918,7 +2942,7 @@ class Parser { $isLocalObj = false; # $text is a DOM node needing expansion in the current frame # Title object, where $text came from - $title = null; + $title = false; # $part1 is the bit before the first |, and must contain only title characters. # Various prefixes will be stripped from it later. @@ -2930,8 +2954,10 @@ class Parser { $originalTitle = $part1; # $args is a list of argument nodes, starting from index 0, not including $part1 + # @todo FIXME: If piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; wfProfileOut( __METHOD__.'-setup' ); + wfProfileIn( __METHOD__."-title-$originalTitle" ); # SUBST wfProfileIn( __METHOD__.'-modifiers' ); @@ -3100,7 +3126,7 @@ class Parser { && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html'] ) { - $text = SpecialPage::capturePath( $title ); + $text = SpecialPageFactory::capturePath( $title ); if ( is_string( $text ) ) { $found = true; $isHTML = true; @@ -3150,6 +3176,7 @@ class Parser { # Recover the source wikitext and return it if ( !$found ) { $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); + wfProfileOut( __METHOD__."-title-$originalTitle" ); wfProfileOut( __METHOD__ ); return array( 'object' => $text ); } @@ -3218,6 +3245,7 @@ class Parser { $ret = array( 'text' => $text ); } + wfProfileOut( __METHOD__."-title-$originalTitle" ); wfProfileOut( __METHOD__ ); return $ret; } @@ -3225,6 +3253,8 @@ class Parser { /** * Get the semi-parsed DOM representation of a template with a given title, * and its redirect destination title. Cached. + * + * @return array */ function getTemplateDom( $title ) { $cacheTitle = $title; @@ -3260,6 +3290,8 @@ class Parser { /** * Fetch the unparsed text of a template and register a reference to it. + * @param Title $title + * @return Array ( string or false, Title ) */ function fetchTemplateAndTitle( $title ) { $templateCb = $this->mOptions->getTemplateCallback(); # Defaults to Parser::statelessFetchTemplate() @@ -3274,6 +3306,11 @@ class Parser { return array( $text, $finalTitle ); } + /** + * Fetch the unparsed text of a template and register a reference to it. + * @param Title $title + * @return mixed string or false + */ function fetchTemplate( $title ) { $rv = $this->fetchTemplateAndTitle( $title ); return $rv[0]; @@ -3282,8 +3319,10 @@ class Parser { /** * Static function to get a template * Can be overridden via ParserOptions::setTemplateCallback(). + * + * @return array */ - static function statelessFetchTemplate( $title, $parser=false ) { + static function statelessFetchTemplate( $title, $parser = false ) { $text = $skip = false; $finalTitle = $title; $deps = array(); @@ -3292,17 +3331,22 @@ class Parser { for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { # Give extensions a chance to select the revision instead $id = false; # Assume current - wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) ); + wfRunHooks( 'BeforeParserFetchTemplateAndtitle', + array( $parser, $title, &$skip, &$id ) ); if ( $skip ) { $text = false; $deps[] = array( - 'title' => $title, - 'page_id' => $title->getArticleID(), - 'rev_id' => null ); + 'title' => $title, + 'page_id' => $title->getArticleID(), + 'rev_id' => null + ); break; } - $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title ); + # Get the revision + $rev = $id + ? Revision::newFromId( $id ) + : Revision::newFromTitle( $title ); $rev_id = $rev ? $rev->getId() : 0; # If there is no current revision, there is no page if ( $id === false && !$rev ) { @@ -3311,20 +3355,27 @@ class Parser { } $deps[] = array( - 'title' => $title, - 'page_id' => $title->getArticleID(), - 'rev_id' => $rev_id ); + 'title' => $title, + 'page_id' => $title->getArticleID(), + 'rev_id' => $rev_id ); + if ( $rev && !$title->equals( $rev->getTitle() ) ) { + # We fetched a rev from a different title; register it too... + $deps[] = array( + 'title' => $rev->getTitle(), + 'page_id' => $rev->getPage(), + 'rev_id' => $rev_id ); + } if ( $rev ) { $text = $rev->getText(); } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) { global $wgContLang; - $message = $wgContLang->lcfirst( $title->getText() ); - $text = wfMsgForContentNoTrans( $message ); - if ( wfEmptyMsg( $message, $text ) ) { + $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage(); + if ( !$message->exists() ) { $text = false; break; } + $text = $message->plain(); } else { break; } @@ -3342,7 +3393,56 @@ class Parser { } /** + * Fetch a file and its title and register a reference to it. + * @param Title $title + * @param string $time MW timestamp + * @param string $sha1 base 36 SHA-1 + * @return mixed File or false + */ + function fetchFile( $title, $time = false, $sha1 = false ) { + $res = $this->fetchFileAndTitle( $title, $time, $sha1 ); + return $res[0]; + } + + /** + * Fetch a file and its title and register a reference to it. + * @param Title $title + * @param string $time MW timestamp + * @param string $sha1 base 36 SHA-1 + * @return Array ( File or false, Title of file ) + */ + function fetchFileAndTitle( $title, $time = false, $sha1 = false ) { + if ( $time === '0' ) { + $file = false; // broken thumbnail forced by hook + } elseif ( $sha1 ) { // get by (sha1,timestamp) + $file = RepoGroup::singleton()->findFileFromKey( $sha1, array( 'time' => $time ) ); + } else { // get by (name,timestamp) + $file = wfFindFile( $title, array( 'time' => $time ) ); + } + $time = $file ? $file->getTimestamp() : false; + $sha1 = $file ? $file->getSha1() : false; + # Register the file as a dependency... + $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); + if ( $file && !$title->equals( $file->getTitle() ) ) { + # Update fetched file title + $title = $file->getTitle(); + if ( is_null( $file->getRedirectedTitle() ) ) { + # This file was not a redirect, but the title does not match. + # Register under the new name because otherwise the link will + # get lost. + $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); + } + } + return array( $file, $title ); + } + + /** * Transclude an interwiki link. + * + * @param $title Title + * @param $action + * + * @return string */ function interwikiTransclude( $title, $action ) { global $wgEnableScaryTranscluding; @@ -3359,6 +3459,10 @@ class Parser { return $this->fetchScaryTemplateMaybeFromCache( $url ); } + /** + * @param $url string + * @return Mixed|String + */ function fetchScaryTemplateMaybeFromCache( $url ) { global $wgTranscludeCacheExpiry; $dbr = wfGetDB( DB_SLAVE ); @@ -3383,10 +3487,14 @@ class Parser { return $text; } - /** * Triple brace replacement -- used for template arguments * @private + * + * @param $peice array + * @param $frame PPFrame + * + * @return array */ function argSubstitution( $piece, $frame ) { wfProfileIn( __METHOD__ ); @@ -3399,9 +3507,9 @@ class Parser { $text = $frame->getArgument( $argName ); if ( $text === false && $parts->getLength() > 0 && ( - $this->ot['html'] - || $this->ot['pre'] - || ( $this->ot['wiki'] && $frame->isTemplate() ) + $this->ot['html'] + || $this->ot['pre'] + || ( $this->ot['wiki'] && $frame->isTemplate() ) ) ) { # No match in frame, use the supplied default @@ -3440,6 +3548,8 @@ class Parser { * inner Contents of extension element * noClose Original text did not have a close tag * @param $frame PPFrame + * + * @return string */ function extensionSubstitution( $params, $frame ) { $name = $frame->expand( $params['name'] ); @@ -3508,9 +3618,9 @@ class Parser { if ( $markerType === 'none' ) { return $output; } elseif ( $markerType === 'nowiki' ) { - $this->mStripState->nowiki->setPair( $marker, $output ); + $this->mStripState->addNoWiki( $marker, $output ); } elseif ( $markerType === 'general' ) { - $this->mStripState->general->setPair( $marker, $output ); + $this->mStripState->addGeneral( $marker, $output ); } else { throw new MWException( __METHOD__.': invalid marker type' ); } @@ -3525,7 +3635,7 @@ class Parser { * @return Boolean: false if this inclusion would take it over the maximum, true otherwise */ function incrementIncludeSize( $type, $size ) { - if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize( $type ) ) { + if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) { return false; } else { $this->mIncludeSizes[$type] += $size; @@ -3582,7 +3692,7 @@ class Parser { } # (bug 8068) Allow control over whether robots index a page. # - # FIXME (bug 14899): __INDEX__ always overrides __NOINDEX__ here! This + # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This # is not desirable, the last one on the page should win. if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) { $this->mOutput->setIndexPolicy( 'noindex' ); @@ -3592,7 +3702,7 @@ class Parser { $this->mOutput->setIndexPolicy( 'index' ); $this->addTrackingCategory( 'index-category' ); } - + # Cache all double underscores in the database foreach ( $this->mDoubleUnderscores as $key => $val ) { $this->mOutput->setProperty( $key, '' ); @@ -3610,6 +3720,10 @@ class Parser { * @return Boolean: whether the addition was successful */ protected function addTrackingCategory( $msg ) { + if ( $this->mTitle->getNamespace() === NS_SPECIAL ) { + wfDebug( __METHOD__.": Not adding tracking category $msg to special page!\n" ); + return false; + } $cat = wfMsgForContent( $msg ); # Allow tracking categories to be disabled by setting them to "-" @@ -3643,16 +3757,17 @@ class Parser { * @private */ function formatHeadings( $text, $origText, $isMain=true ) { - global $wgMaxTocLevel, $wgContLang, $wgHtml5, $wgExperimentalHtmlIds; + global $wgMaxTocLevel, $wgHtml5, $wgExperimentalHtmlIds; - $doNumberHeadings = $this->mOptions->getNumberHeadings(); - # Inhibit editsection links if requested in the page if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { $showEditLink = 0; } else { $showEditLink = $this->mOptions->getEditSection(); } + if ( $showEditLink ) { + $this->mOutput->setEditSectionTokens( true ); + } # Get all headlines for numbering them and adding funky stuff like [edit] # links - this is for later, but we need the number of headlines right now @@ -3683,9 +3798,6 @@ class Parser { $enoughToc = true; } - # We need this to perform operations on the HTML - $sk = $this->mOptions->getSkin( $this->mTitle ); - # headline counter $headlineCount = 0; $numVisible = 0; @@ -3736,7 +3848,7 @@ class Parser { $sublevelCount[$toclevel] = 0; if ( $toclevel<$wgMaxTocLevel ) { $prevtoclevel = $toclevel; - $toc .= $sk->tocIndent(); + $toc .= Linker::tocIndent(); $numVisible++; } } elseif ( $level < $prevlevel && $toclevel > 1 ) { @@ -3759,16 +3871,16 @@ class Parser { if ( $toclevel<$wgMaxTocLevel ) { if ( $prevtoclevel < $wgMaxTocLevel ) { # Unindent only if the previous toc level was shown :p - $toc .= $sk->tocUnindent( $prevtoclevel - $toclevel ); + $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel ); $prevtoclevel = $toclevel; } else { - $toc .= $sk->tocLineEnd(); + $toc .= Linker::tocLineEnd(); } } } else { # No change in level, end TOC line if ( $toclevel<$wgMaxTocLevel ) { - $toc .= $sk->tocLineEnd(); + $toc .= Linker::tocLineEnd(); } } @@ -3782,7 +3894,7 @@ class Parser { if ( $dot ) { $numbering .= '.'; } - $numbering .= $wgContLang->formatNum( $sublevelCount[$i] ); + $numbering .= $this->getFunctionLang()->formatNum( $sublevelCount[$i] ); $dot = 1; } } @@ -3837,10 +3949,10 @@ class Parser { 'noninitial' ); } - # HTML names must be case-insensitively unique (bug 10721). - # This does not apply to Unicode characters per + # HTML names must be case-insensitively unique (bug 10721). + # This does not apply to Unicode characters per # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison - # FIXME: We may be changing them depending on the current locale. + # @todo FIXME: We may be changing them depending on the current locale. $arrayKey = strtolower( $safeHeadline ); if ( $legacyHeadline === false ) { $legacyArrayKey = false; @@ -3861,7 +3973,7 @@ class Parser { } # Don't number the heading if it is the only one (looks silly) - if ( $doNumberHeadings && count( $matches[3] ) > 1) { + if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) { # the two are different if the line contains a link $headline = $numbering . ' ' . $headline; } @@ -3876,7 +3988,7 @@ class Parser { $legacyAnchor .= '_' . $refers[$legacyArrayKey]; } if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) { - $toc .= $sk->tocLine( $anchor, $tocline, + $toc .= Linker::tocLine( $anchor, $tocline, $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) ); } @@ -3905,18 +4017,33 @@ class Parser { ); # give headline the correct <h#> tag - if ( $showEditLink && $sectionIndex !== false ) { + if ( $sectionIndex !== false ) { + // Output edit section links as markers with styles that can be customized by skins if ( $isTemplate ) { # Put a T flag in the section identifier, to indicate to extractSections() # that sections inside <includeonly> should be counted. - $editlink = $sk->doEditSectionLink( Title::newFromText( $titleText ), "T-$sectionIndex", null, $this->mOptions->getUserLang() ); + $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ ); + } else { + $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint ); + } + // We use a bit of pesudo-xml for editsection markers. The language converter is run later on + // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff + // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped + // so we don't have to worry about a user trying to input one of these markers directly. + // We use a page and section attribute to stop the language converter from converting these important bits + // of data, but put the headline hint inside a content block because the language converter is supposed to + // be able to convert that piece of data. + $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]); + $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"'; + if ( isset($editlinkArgs[2]) ) { + $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>'; } else { - $editlink = $sk->doEditSectionLink( $this->mTitle, $sectionIndex, $headlineHint, $this->mOptions->getUserLang() ); + $editlink .= '/>'; } } else { $editlink = ''; } - $head[$headlineCount] = $sk->makeHeadline( $level, + $head[$headlineCount] = Linker::makeHeadline( $level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink, $legacyAnchor ); @@ -3932,9 +4059,9 @@ class Parser { if ( $enoughToc ) { if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) { - $toc .= $sk->tocUnindent( $prevtoclevel - 1 ); + $toc .= Linker::tocUnindent( $prevtoclevel - 1 ); } - $toc = $sk->tocList( $toc, $this->mOptions->getUserLang() ); + $toc = Linker::tocList( $toc, $this->mOptions->getUserLang() ); $this->mOutput->setTOCHTML( $toc ); } @@ -3985,21 +4112,21 @@ class Parser { * @param $clearState Boolean: whether to clear the parser state first * @return String: the altered wiki markup */ - public function preSaveTransform( $text, Title $title, $user, $options, $clearState = true ) { - $this->mOptions = $options; - $this->setTitle( $title ); - $this->setOutputType( self::OT_WIKI ); - - if ( $clearState ) { - $this->clearState(); - } + public function preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState = true ) { + $this->startParse( $title, $options, self::OT_WIKI, $clearState ); + $this->setUser( $user ); $pairs = array( "\r\n" => "\n", ); $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text ); - $text = $this->pstPass2( $text, $user ); + if( $options->getPreSaveTransform() ) { + $text = $this->pstPass2( $text, $user ); + } $text = $this->mStripState->unstripBoth( $text ); + + $this->setUser( null ); #Reset + return $text; } @@ -4032,9 +4159,9 @@ class Parser { # whatever crap the system uses, localised or not, so we cannot # ship premade translations. $key = 'timezone-' . strtolower( trim( $tzMsg ) ); - $value = wfMsgForContent( $key ); - if ( !wfEmptyMsg( $key, $value ) ) { - $tzMsg = $value; + $msg = wfMessage( $key )->inContentLanguage(); + if ( $msg->exists() ) { + $tzMsg = $msg->text(); } date_default_timezone_set( $oldtz ); @@ -4093,6 +4220,8 @@ class Parser { * validated, ready-to-insert wikitext. * If you have pre-fetched the nickname or the fancySig option, you can * specify them here to save a database query. + * Do not reuse this parser instance after calling getUserSig(), + * as it may have changed if it's the $wgParser. * * @param $user User * @param $nickname String: nickname to use or false to use user's default nickname @@ -4136,11 +4265,9 @@ class Parser { # If we're still here, make it a link to the user page $userText = wfEscapeWikiText( $username ); $nickText = wfEscapeWikiText( $nickname ); - if ( $user->isAnon() ) { - return wfMsgExt( 'signature-anon', array( 'content', 'parsemag' ), $userText, $nickText ); - } else { - return wfMsgExt( 'signature', array( 'content', 'parsemag' ), $userText, $nickText ); - } + $msgName = $user->isAnon() ? 'signature-anon' : 'signature'; + + return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()->title( $this->getTitle() )->text(); } /** @@ -4177,7 +4304,7 @@ class Parser { return $text; } - # FIXME: regex doesn't respect extension tags or nowiki + # @todo FIXME: Regex doesn't respect extension tags or nowiki # => Move this logic to braceSubstitution() $substWord = MagicWord::get( 'subst' ); $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); @@ -4212,6 +4339,10 @@ class Parser { * so that an external function can call some class members with confidence */ public function startExternalParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) { + $this->startParse( $title, $options, $outputType, $clearState ); + } + + private function startParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) { $this->setTitle( $title ); $this->mOptions = $options; $this->setOutputType( $outputType ); @@ -4225,10 +4356,10 @@ class Parser { * * @param $text String: the text to preprocess * @param $options ParserOptions: options + * @param $title Title object or null to use $wgTitle * @return String */ - public function transformMsg( $text, $options ) { - global $wgTitle; + public function transformMsg( $text, $options, $title = null ) { static $executing = false; # Guard against infinite recursion @@ -4238,7 +4369,10 @@ class Parser { $executing = true; wfProfileIn( __METHOD__ ); - $title = $wgTitle; + if ( !$title ) { + global $wgTitle; + $title = $wgTitle; + } if ( !$title ) { # It's not uncommon having a null $wgTitle in scripts. See r80898 # Create a ghost title in such case @@ -4254,17 +4388,29 @@ class Parser { /** * Create an HTML-style tag, e.g. <yourtag>special text</yourtag> * The callback should have the following form: - * function myParserHook( $text, $params, $parser ) { ... } + * function myParserHook( $text, $params, $parser, $frame ) { ... } * * Transform and return $text. Use $parser for any required context, e.g. use * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions * + * Hooks may return extended information by returning an array, of which the + * first numbered element (index 0) must be the return string, and all other + * entries are extracted into local variables within an internal function + * in the Parser class. + * + * This interface (introduced r61913) appears to be undocumented, but + * 'markerName' is used by some core tag hooks to override which strip + * array their results are placed in. **Use great caution if attempting + * this interface, as it is not documented and injudicious use could smash + * private variables.** + * * @param $tag Mixed: the tag to use, e.g. 'hook' for <hook> * @param $callback Mixed: the callback function (and object) to use for the tag * @return The old value of the mTagHooks array associated with the hook */ public function setHook( $tag, $callback ) { $tag = strtolower( $tag ); + if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" ); $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null; $this->mTagHooks[$tag] = $callback; if ( !in_array( $tag, $this->mStripList ) ) { @@ -4274,8 +4420,25 @@ class Parser { return $oldVal; } + /** + * As setHook(), but letting the contents be parsed. + * + * Transparent tag hooks are like regular XML-style tag hooks, except they + * operate late in the transformation sequence, on HTML instead of wikitext. + * + * This is probably obsoleted by things dealing with parser frames? + * The only extension currently using it is geoserver. + * + * @since 1.10 + * @todo better document or deprecate this + * + * @param $tag Mixed: the tag to use, e.g. 'hook' for <hook> + * @param $callback Mixed: the callback function (and object) to use for the tag + * @return The old value of the mTagHooks array associated with the hook + */ function setTransparentTagHook( $tag, $callback ) { $tag = strtolower( $tag ); + if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" ); $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null; $this->mTransparentTagHooks[$tag] = $callback; @@ -4380,6 +4543,7 @@ class Parser { */ function setFunctionTagHook( $tag, $callback, $flags ) { $tag = strtolower( $tag ); + if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" ); $old = isset( $this->mFunctionTagHooks[$tag] ) ? $this->mFunctionTagHooks[$tag] : null; $this->mFunctionTagHooks[$tag] = array( $callback, $flags ); @@ -4392,7 +4556,7 @@ class Parser { } /** - * FIXME: update documentation. makeLinkObj() is deprecated. + * @todo FIXME: Update documentation. makeLinkObj() is deprecated. * Replace <!--LINK--> link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() * Returns an array of link CSS classes, indexed by PDBK. @@ -4420,6 +4584,10 @@ class Parser { * given as text will return the HTML of a gallery with two images, * labeled 'The number "1"' and * 'A tree'. + * + * @param string $text + * @param array $param + * @return string HTML */ function renderImageGallery( $text, $params ) { $ig = new ImageGallery(); @@ -4429,8 +4597,6 @@ class Parser { $ig->setParser( $this ); $ig->setHideBadImages(); $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) ); - $ig->useSkin( $this->mOptions->getSkin( $this->mTitle ) ); - $ig->mRevisionId = $this->mRevisionId; if ( isset( $params['showfilename'] ) ) { $ig->setShowFilename( true ); @@ -4467,28 +4633,40 @@ class Parser { } if ( strpos( $matches[0], '%' ) !== false ) { - $matches[1] = urldecode( $matches[1] ); + $matches[1] = rawurldecode( $matches[1] ); } - $tp = Title::newFromText( $matches[1] ); - $nt =& $tp; - if ( is_null( $nt ) ) { + $title = Title::newFromText( $matches[1], NS_FILE ); + if ( is_null( $title ) ) { # Bogus title. Ignore these so we don't bomb out later. continue; } + + $label = ''; + $alt = ''; if ( isset( $matches[3] ) ) { - $label = $matches[3]; - } else { - $label = ''; + // look for an |alt= definition while trying not to break existing + // captions with multiple pipes (|) in it, until a more sensible grammar + // is defined for images in galleries + + $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); + $altmatches = StringUtils::explode('|', $matches[3]); + $magicWordAlt = MagicWord::get( 'img_alt' ); + + foreach ( $altmatches as $altmatch ) { + $match = $magicWordAlt->matchVariableStartToEnd( $altmatch ); + if ( $match ) { + $alt = $this->stripAltText( $match, false ); + } + else { + // concatenate all other pipes + $label .= '|' . $altmatch; + } + } + // remove the first pipe + $label = substr( $label, 1 ); } - $html = $this->recursiveTagParse( trim( $label ) ); - - $ig->add( $nt, $html ); - - # Only add real images (bug #5586) - if ( $nt->getNamespace() == NS_FILE ) { - $this->mOutput->addImage( $nt->getDBkey() ); - } + $ig->add( $title, $label, $alt ); } return $ig->toHTML(); } @@ -4539,6 +4717,7 @@ class Parser { * @param $title Title * @param $options String * @param $holders LinkHolderArray + * @return string HTML */ function makeImage( $title, $options, $holders = false ) { # Check if the options text is of the form "options|alt text" @@ -4567,23 +4746,23 @@ class Parser { # * text-bottom $parts = StringUtils::explode( "|", $options ); - $sk = $this->mOptions->getSkin( $this->mTitle ); # Give extensions a chance to select the file revision for us - $skip = $time = $descQuery = false; - wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$title, &$skip, &$time, &$descQuery ) ); + $time = $sha1 = $descQuery = false; + wfRunHooks( 'BeforeParserFetchFileAndTitle', + array( $this, $title, &$time, &$sha1, &$descQuery ) ); + # Fetch and register the file (file title may be different via hooks) + list( $file, $title ) = $this->fetchFileAndTitle( $title, $time, $sha1 ); - if ( $skip ) { - return $sk->link( $title ); - } - - # Get the file - $file = wfFindFile( $title, array( 'time' => $time ) ); # Get parameter map $handler = $file ? $file->getHandler() : false; list( $paramMap, $mwArray ) = $this->getImageParams( $handler ); + if ( !$file ) { + $this->addTrackingCategory( 'broken-file-category' ); + } + # Process the input parameters $caption = ''; $params = array( 'frame' => array(), 'handler' => array(), @@ -4627,7 +4806,7 @@ class Parser { switch( $paramName ) { case 'manualthumb': case 'alt': - # @todo Fixme: possibly check validity here for + # @todo FIXME: Possibly check validity here for # manualthumb? downstream behavior seems odd with # missing manual thumbs. $validated = true; @@ -4687,9 +4866,9 @@ class Parser { # Will the image be presented in a frame, with the caption below? $imageIsFramed = isset( $params['frame']['frame'] ) || - isset( $params['frame']['framed'] ) || - isset( $params['frame']['thumbnail'] ) || - isset( $params['frame']['manualthumb'] ); + isset( $params['frame']['framed'] ) || + isset( $params['frame']['thumbnail'] ) || + isset( $params['frame']['manualthumb'] ); # In the old days, [[Image:Foo|text...]] would set alt text. Later it # came to also set the caption, ordinary text after the image -- which @@ -4733,7 +4912,8 @@ class Parser { wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params ) ); # Linker does the rest - $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'], $time, $descQuery, $this->mOptions->getThumbSize() ); + $ret = Linker::makeImageLink2( $title, $file, $params['frame'], $params['handler'], + $time, $descQuery, $this->mOptions->getThumbSize() ); # Give the handler a chance to modify the parser object if ( $handler ) { @@ -4743,6 +4923,11 @@ class Parser { return $ret; } + /** + * @param $caption + * @param $holders LinkHolderArray + * @return mixed|String + */ protected function stripAltText( $caption, $holders ) { # Strip bad stuff out of the title (tooltip). We can't just use # replaceLinkHoldersText() here, because if this function is called @@ -4779,7 +4964,6 @@ class Parser { * @param $text String * @param $frame PPFrame * @return String - * @private */ function attributeStripCallback( &$text, $frame = false ) { $text = $this->replaceVariables( $text, $frame ); @@ -4789,12 +4973,39 @@ class Parser { /** * Accessor + * + * @return array */ function getTags() { return array_merge( array_keys( $this->mTransparentTagHooks ), array_keys( $this->mTagHooks ) ); } /** + * Replace transparent tags in $text with the values given by the callbacks. + * + * Transparent tag hooks are like regular XML-style tag hooks, except they + * operate late in the transformation sequence, on HTML instead of wikitext. + */ + function replaceTransparentTags( $text ) { + $matches = array(); + $elements = array_keys( $this->mTransparentTagHooks ); + $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix ); + $replacements = array(); + + foreach ( $matches as $marker => $data ) { + list( $element, $content, $params, $tag ) = $data; + $tagName = strtolower( $element ); + if ( isset( $this->mTransparentTagHooks[$tagName] ) ) { + $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) ); + } else { + $output = $tag; + } + $replacements[$marker] = $output; + } + return strtr( $text, $replacements ); + } + + /** * Break wikitext input into sections, and either pull or replace * some particular section's text. * @@ -4814,17 +5025,18 @@ class Parser { * pull the given section along with its lower-level subsections. If the section is * not found, $mode=get will return $newtext, and $mode=replace will return $text. * + * Section 0 is always considered to exist, even if it only contains the empty + * string. If $text is the empty string and section 0 is replaced, $newText is + * returned. + * * @param $mode String: one of "get" or "replace" * @param $newText String: replacement text for section data. * @return String: for "get", the extracted section text. * for "replace", the whole page with the section replaced. */ private function extractSections( $text, $section, $mode, $newText='' ) { - global $wgTitle; - $this->mOptions = new ParserOptions; - $this->clearState(); - $this->setTitle( $wgTitle ); # not generally used but removes an ugly failure mode - $this->setOutputType( self::OT_PLAIN ); + global $wgTitle; # not generally used but removes an ugly failure mode + $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true ); $outText = ''; $frame = $this->getPreprocessor()->newFrame(); @@ -4837,6 +5049,25 @@ class Parser { $flags |= self::PTD_FOR_INCLUSION; } } + + # Check for empty input + if ( strval( $text ) === '' ) { + # Only sections 0 and T-0 exist in an empty document + if ( $sectionIndex == 0 ) { + if ( $mode === 'get' ) { + return ''; + } else { + return $newText; + } + } else { + if ( $mode === 'get' ) { + return $newText; + } else { + return $text; + } + } + } + # Preprocess the text $root = $this->preprocessToDom( $text, $flags ); @@ -4930,12 +5161,13 @@ class Parser { /** * This function returns $oldtext after the content of the section - * specified by $section has been replaced with $text. + * specified by $section has been replaced with $text. If the target + * section does not exist, $oldtext is returned unchanged. * - * @param $text String: former text of the article + * @param $oldtext String: former text of the article * @param $section Numeric: section identifier * @param $text String: replacing text - * #return String: modified text + * @return String: modified text */ public function replaceSection( $oldtext, $section, $text ) { return $this->extractSections( $oldtext, $section, "replace", $text ); @@ -4951,30 +5183,44 @@ class Parser { } /** + * Get the revision object for $this->mRevisionId + * + * @return Revision|null either a Revision object or null + */ + protected function getRevisionObject() { + if ( !is_null( $this->mRevisionObject ) ) { + return $this->mRevisionObject; + } + if ( is_null( $this->mRevisionId ) ) { + return null; + } + + $this->mRevisionObject = Revision::newFromId( $this->mRevisionId ); + return $this->mRevisionObject; + } + + /** * Get the timestamp associated with the current revision, adjusted for * the default server-local timestamp */ function getRevisionTimestamp() { if ( is_null( $this->mRevisionTimestamp ) ) { wfProfileIn( __METHOD__ ); - global $wgContLang; - $dbr = wfGetDB( DB_SLAVE ); - $timestamp = $dbr->selectField( 'revision', 'rev_timestamp', - array( 'rev_id' => $this->mRevisionId ), __METHOD__ ); - - # Normalize timestamp to internal MW format for timezone processing. - # This has the added side-effect of replacing a null value with - # the current time, which gives us more sensible behavior for - # previews. - $timestamp = wfTimestamp( TS_MW, $timestamp ); - - # The cryptic '' timezone parameter tells to use the site-default - # timezone offset instead of the user settings. - # - # Since this value will be saved into the parser cache, served - # to other users, and potentially even used inside links and such, - # it needs to be consistent for all visitors. - $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' ); + + $revObject = $this->getRevisionObject(); + $timestamp = $revObject ? $revObject->getTimestamp() : false; + + if( $timestamp !== false ) { + global $wgContLang; + + # The cryptic '' timezone parameter tells to use the site-default + # timezone offset instead of the user settings. + # + # Since this value will be saved into the parser cache, served + # to other users, and potentially even used inside links and such, + # it needs to be consistent for all visitors. + $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' ); + } wfProfileOut( __METHOD__ ); } @@ -4987,16 +5233,18 @@ class Parser { * @return String: user name */ function getRevisionUser() { - # if this template is subst: the revision id will be blank, - # so just use the current user's name - if ( $this->mRevisionId ) { - $revision = Revision::newFromId( $this->mRevisionId ); - $revuser = $revision->getUserText(); - } else { - global $wgUser; - $revuser = $wgUser->getName(); + if( is_null( $this->mRevisionUser ) ) { + $revObject = $this->getRevisionObject(); + + # if this template is subst: the revision id will be blank, + # so just use the current user's name + if( $revObject ) { + $this->mRevisionUser = $revObject->getUserText(); + } elseif( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { + $this->mRevisionUser = $this->getUser()->getName(); + } } - return $revuser; + return $this->mRevisionUser; } /** @@ -5083,7 +5331,8 @@ class Parser { $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text ); $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text ); - # Strip external link markup (FIXME: Not Tolerant to blank link text + # Strip external link markup + # @todo FIXME: Not tolerant to blank link text # I.E. [http://www.mediawiki.org] will render as [1] or something depending # on how many empty links there are on the page - need to figure that out. $text = preg_replace( '/\[(?:' . wfUrlProtocols() . ')([^ ]+?) ([^[]+)\]/', '$2', $text ); @@ -5098,36 +5347,39 @@ class Parser { /** * strip/replaceVariables/unstrip for preprocessor regression testing + * + * @return string */ - function testSrvus( $text, $title, ParserOptions $options, $outputType = self::OT_HTML ) { - $this->mOptions = $options; - $this->clearState(); - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } - $this->mTitle = $title; - $this->setOutputType( $outputType ); + function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) { + $this->startParse( $title, $options, $outputType, true ); + $text = $this->replaceVariables( $text ); $text = $this->mStripState->unstripBoth( $text ); $text = Sanitizer::removeHTMLtags( $text ); return $text; } - function testPst( $text, $title, $options ) { - global $wgUser; - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } - return $this->preSaveTransform( $text, $title, $wgUser, $options ); + function testPst( $text, Title $title, ParserOptions $options ) { + return $this->preSaveTransform( $text, $title, $options->getUser(), $options ); } - function testPreprocess( $text, $title, $options ) { - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } + function testPreprocess( $text, Title $title, ParserOptions $options ) { return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); } + /** + * Call a callback function on all regions of the given text that are not + * inside strip markers, and replace those regions with the return value + * of the callback. For example, with input: + * + * aaa<MARKER>bbb + * + * This will call the callback function twice, with 'aaa' and 'bbb'. Those + * two strings will be replaced with the value returned by the callback in + * each case. + * + * @return string + */ function markerSkipCallback( $s, $callback ) { $i = 0; $out = ''; @@ -5152,168 +5404,72 @@ class Parser { return $out; } - function serialiseHalfParsedText( $text ) { - $data = array(); - $data['text'] = $text; - - # First, find all strip markers, and store their - # data in an array. - $stripState = new StripState; - $pos = 0; - while ( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) ) - && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) ) - { - $end_pos += strlen( self::MARKER_SUFFIX ); - $marker = substr( $text, $start_pos, $end_pos-$start_pos ); - - if ( !empty( $this->mStripState->general->data[$marker] ) ) { - $replaceArray = $stripState->general; - $stripText = $this->mStripState->general->data[$marker]; - } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) { - $replaceArray = $stripState->nowiki; - $stripText = $this->mStripState->nowiki->data[$marker]; - } else { - throw new MWException( "Hanging strip marker: '$marker'." ); - } - - $replaceArray->setPair( $marker, $stripText ); - $pos = $end_pos; - } - $data['stripstate'] = $stripState; - - # Now, find all of our links, and store THEIR - # data in an array! :) - $links = array( 'internal' => array(), 'interwiki' => array() ); - $pos = 0; - - # Internal links - while ( ( $start_pos = strpos( $text, '<!--LINK ', $pos ) ) ) { - list( $ns, $trail ) = explode( ':', substr( $text, $start_pos + strlen( '<!--LINK ' ) ), 2 ); - - $ns = trim( $ns ); - if ( empty( $links['internal'][$ns] ) ) { - $links['internal'][$ns] = array(); - } - - $key = trim( substr( $trail, 0, strpos( $trail, '-->' ) ) ); - $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key]; - $pos = $start_pos + strlen( "<!--LINK $ns:$key-->" ); - } - - $pos = 0; - - # Interwiki links - while ( ( $start_pos = strpos( $text, '<!--IWLINK ', $pos ) ) ) { - $data = substr( $text, $start_pos ); - $key = trim( substr( $data, 0, strpos( $data, '-->' ) ) ); - $links['interwiki'][] = $this->mLinkHolders->interwiki[$key]; - $pos = $start_pos + strlen( "<!--IWLINK $key-->" ); - } - - $data['linkholder'] = $links; - + /** + * Save the parser state required to convert the given half-parsed text to + * HTML. "Half-parsed" in this context means the output of + * recursiveTagParse() or internalParse(). This output has strip markers + * from replaceVariables (extensionSubstitution() etc.), and link + * placeholders from replaceLinkHolders(). + * + * Returns an array which can be serialized and stored persistently. This + * array can later be loaded into another parser instance with + * unserializeHalfParsedText(). The text can then be safely incorporated into + * the return value of a parser hook. + * + * @return array + */ + function serializeHalfParsedText( $text ) { + wfProfileIn( __METHOD__ ); + $data = array( + 'text' => $text, + 'version' => self::HALF_PARSED_VERSION, + 'stripState' => $this->mStripState->getSubState( $text ), + 'linkHolders' => $this->mLinkHolders->getSubArray( $text ) + ); + wfProfileOut( __METHOD__ ); return $data; } /** - * TODO: document - * @param $data Array - * @param $intPrefix String unique identifying prefix + * Load the parser state given in the $data array, which is assumed to + * have been generated by serializeHalfParsedText(). The text contents is + * extracted from the array, and its markers are transformed into markers + * appropriate for the current Parser instance. This transformed text is + * returned, and can be safely included in the return value of a parser + * hook. + * + * If the $data array has been stored persistently, the caller should first + * check whether it is still valid, by calling isValidHalfParsedText(). + * + * @param $data Serialized data * @return String */ - function unserialiseHalfParsedText( $data, $intPrefix = null ) { - if ( !$intPrefix ) { - $intPrefix = self::getRandomString(); + function unserializeHalfParsedText( $data ) { + if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) { + throw new MWException( __METHOD__.': invalid version' ); } # First, extract the strip state. - $stripState = $data['stripstate']; - $this->mStripState->general->merge( $stripState->general ); - $this->mStripState->nowiki->merge( $stripState->nowiki ); - - # Now, extract the text, and renumber links - $text = $data['text']; - $links = $data['linkholder']; - - # Internal... - foreach ( $links['internal'] as $ns => $nsLinks ) { - foreach ( $nsLinks as $key => $entry ) { - $newKey = $intPrefix . '-' . $key; - $this->mLinkHolders->internals[$ns][$newKey] = $entry; - - $text = str_replace( "<!--LINK $ns:$key-->", "<!--LINK $ns:$newKey-->", $text ); - } - } + $texts = array( $data['text'] ); + $texts = $this->mStripState->merge( $data['stripState'], $texts ); - # Interwiki... - foreach ( $links['interwiki'] as $key => $entry ) { - $newKey = "$intPrefix-$key"; - $this->mLinkHolders->interwikis[$newKey] = $entry; - - $text = str_replace( "<!--IWLINK $key-->", "<!--IWLINK $newKey-->", $text ); - } + # Now renumber links + $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts ); # Should be good to go. - return $text; - } -} - -/** - * @todo document, briefly. - * @ingroup Parser - */ -class StripState { - var $general, $nowiki; - - function __construct() { - $this->general = new ReplacementArray; - $this->nowiki = new ReplacementArray; - } - - function unstripGeneral( $text ) { - wfProfileIn( __METHOD__ ); - do { - $oldText = $text; - $text = $this->general->replace( $text ); - } while ( $text !== $oldText ); - wfProfileOut( __METHOD__ ); - return $text; - } - - function unstripNoWiki( $text ) { - wfProfileIn( __METHOD__ ); - do { - $oldText = $text; - $text = $this->nowiki->replace( $text ); - } while ( $text !== $oldText ); - wfProfileOut( __METHOD__ ); - return $text; - } - - function unstripBoth( $text ) { - wfProfileIn( __METHOD__ ); - do { - $oldText = $text; - $text = $this->general->replace( $text ); - $text = $this->nowiki->replace( $text ); - } while ( $text !== $oldText ); - wfProfileOut( __METHOD__ ); - return $text; + return $texts[0]; } -} -/** - * @todo document, briefly. - * @ingroup Parser - */ -class OnlyIncludeReplacer { - var $output = ''; - - function replace( $matches ) { - if ( substr( $matches[1], -1 ) === "\n" ) { - $this->output .= substr( $matches[1], 0, -1 ); - } else { - $this->output .= $matches[1]; - } + /** + * Returns true if the given array, presumed to be generated by + * serializeHalfParsedText(), is compatible with the current version of the + * parser. + * + * @param $data Array + * + * @return bool + */ + function isValidHalfParsedText( $data ) { + return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION; } } diff --git a/includes/parser/ParserCache.php b/includes/parser/ParserCache.php index 1e028ae5..dcbf7a4d 100644 --- a/includes/parser/ParserCache.php +++ b/includes/parser/ParserCache.php @@ -31,13 +31,18 @@ class ParserCache { * * @param $memCached Object */ - function __construct( $memCached ) { + protected function __construct( $memCached ) { if ( !$memCached ) { throw new MWException( "Tried to create a ParserCache with an invalid memcached" ); } $this->mMemc = $memCached; } + /** + * @param $article Article + * @param $hash string + * @return mixed|string + */ protected function getParserOutputKey( $article, $hash ) { global $wgRequest; @@ -49,6 +54,10 @@ class ParserCache { return $key; } + /** + * @param $article Article + * @return mixed|string + */ protected function getOptionsKey( $article ) { $pageid = $article->getID(); return wfMemcKey( 'pcache', 'idoptions', "{$pageid}" ); @@ -63,6 +72,9 @@ class ParserCache { * $article. For example give a Chinese interface to a user with * English preferences. That's why we take into account *all* user * options. (r70809 CR) + * + * @param $article Article + * @param $popts ParserOptions */ function getETag( $article, $popts ) { return 'W/"' . $this->getParserOutputKey( $article, @@ -72,6 +84,9 @@ class ParserCache { /** * Retrieve the ParserOutput from ParserCache, even if it's outdated. + * @param $article Article + * @param $popts ParserOptions + * @return ParserOutput|false */ public function getDirty( $article, $popts ) { $value = $this->get( $article, $popts, true ); @@ -82,6 +97,9 @@ class ParserCache { * Used to provide a unique id for the PoolCounter. * It would be preferable to have this code in get() * instead of having Article looking in our internals. + * + * @param $article Article + * @param $popts ParserOptions */ public function getKey( $article, $popts, $useOutdated = true ) { global $wgCacheEpoch; @@ -116,6 +134,12 @@ class ParserCache { /** * Retrieve the ParserOutput from ParserCache. * false if not found or outdated. + * + * @param $article Article + * @param $popts ParserOptions + * @param $useOutdated + * + * @return ParserOutput|false */ public function get( $article, $popts, $useOutdated = false ) { global $wgCacheEpoch; @@ -150,6 +174,11 @@ class ParserCache { } wfDebug( "Found.\n" ); + + // The edit section preference may not be the appropiate one in + // the ParserOutput, as we are not storing it in the parsercache + // key. Force it here. See bug 31445. + $value->setEditSectionTokens( $popts->getEditSection() ); if ( !$useOutdated && $value->expired( $touched ) ) { wfIncrStats( "pcache_miss_expired" ); @@ -157,9 +186,6 @@ class ParserCache { wfDebug( "ParserOutput key expired, touched $touched, epoch $wgCacheEpoch, cached $cacheTime\n" ); $value = false; } else { - if ( isset( $value->mTimestamp ) ) { - $article->mTimestamp = $value->mTimestamp; - } wfIncrStats( "pcache_hit" ); } @@ -167,7 +193,12 @@ class ParserCache { return $value; } - + /** + * @param $parserOutput ParserOutput + * @param $article Article + * @param $popts ParserOptions + * @return void + */ public function save( $parserOutput, $article, $popts ) { $expire = $parserOutput->getCacheExpiry(); @@ -183,7 +214,8 @@ class ParserCache { $optionsKey->setContainsOldMagic( $parserOutput->containsOldMagic() ); - $parserOutputKey = $this->getParserOutputKey( $article, $popts->optionsHash( $optionsKey->mUsedOptions ) ); + $parserOutputKey = $this->getParserOutputKey( $article, + $popts->optionsHash( $optionsKey->mUsedOptions ) ); // Save the timestamp so that we don't have to load the revision row on view $parserOutput->mTimestamp = $article->getTimestamp(); diff --git a/includes/parser/ParserOptions.php b/includes/parser/ParserOptions.php index 1bda0792..07752768 100644 --- a/includes/parser/ParserOptions.php +++ b/includes/parser/ParserOptions.php @@ -5,7 +5,7 @@ * @file * @ingroup Parser */ - + /** * Set options of the Parser * @todo document @@ -18,46 +18,51 @@ class ParserOptions { var $mAllowExternalImages; # Allow external images inline var $mAllowExternalImagesFrom; # If not, any exception? var $mEnableImageWhitelist; # If not or it doesn't match, should we check an on-wiki whitelist? - var $mSkin; # Reference to the preferred skin - var $mDateFormat; # Date format index - var $mEditSection; # Create "edit section" links - var $mNumberHeadings; # Automatically number headings + var $mDateFormat = null; # Date format index + var $mEditSection = true; # Create "edit section" links var $mAllowSpecialInclusion; # Allow inclusion of special pages - var $mTidy; # Ask for tidy cleanup - var $mInterfaceMessage; # Which lang to call for PLURAL and GRAMMAR - var $mTargetLanguage; # Overrides above setting with arbitrary language + var $mTidy = false; # Ask for tidy cleanup + var $mInterfaceMessage = false; # Which lang to call for PLURAL and GRAMMAR + var $mTargetLanguage = null; # Overrides above setting with arbitrary language var $mMaxIncludeSize; # Maximum size of template expansions, in bytes var $mMaxPPNodeCount; # Maximum number of nodes touched by PPFrame::expand() var $mMaxPPExpandDepth; # Maximum recursion depth in PPFrame::expand() var $mMaxTemplateDepth; # Maximum recursion depth for templates within templates - var $mRemoveComments; # Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS - var $mTemplateCallback; # Callback for template fetching - var $mEnableLimitReport; # Enable limit report in an HTML comment on output + var $mRemoveComments = true; # Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS + var $mTemplateCallback = # Callback for template fetching + array( 'Parser', 'statelessFetchTemplate' ); + var $mEnableLimitReport = false; # Enable limit report in an HTML comment on output var $mTimestamp; # Timestamp used for {{CURRENTDAY}} etc. var $mExternalLinkTarget; # Target attribute for external links + var $mCleanSignatures; # + var $mPreSaveTransform = true; # Transform wiki markup when saving the page. + + var $mNumberHeadings; # Automatically number headings var $mMath; # User math preference (as integer) - var $mUserLang; # Language code of the User language. var $mThumbSize; # Thumb size preferred by the user. - var $mCleanSignatures; # + private $mStubThreshold; # Maximum article size of an article to be marked as "stub" + var $mUserLang; # Language code of the User language. + + /** + * @var User + */ + var $mUser; # Stored user object + var $mIsPreview = false; # Parsing the page for a "preview" operation + var $mIsSectionPreview = false; # Parsing the page for a "preview" operation on a single section + var $mIsPrintable = false; # Parsing the printable version of the page - var $mUser; # Stored user object, just used to initialise the skin - var $mIsPreview; # Parsing the page for a "preview" operation - var $mIsSectionPreview; # Parsing the page for a "preview" operation on a single section - var $mIsPrintable; # Parsing the printable version of the page - var $mExtraKey = ''; # Extra key that should be present in the caching key. - + protected $onAccessCallback = null; - + function getUseDynamicDates() { return $this->mUseDynamicDates; } function getInterwikiMagic() { return $this->mInterwikiMagic; } function getAllowExternalImages() { return $this->mAllowExternalImages; } function getAllowExternalImagesFrom() { return $this->mAllowExternalImagesFrom; } function getEnableImageWhitelist() { return $this->mEnableImageWhitelist; } - function getEditSection() { $this->optionUsed('editsection'); - return $this->mEditSection; } - function getNumberHeadings() { $this->optionUsed('numberheadings'); - return $this->mNumberHeadings; } + function getEditSection() { return $this->mEditSection; } + function getNumberHeadings() { $this->optionUsed( 'numberheadings' ); + return $this->mNumberHeadings; } function getAllowSpecialInclusion() { return $this->mAllowSpecialInclusion; } function getTidy() { return $this->mTidy; } function getInterfaceMessage() { return $this->mInterfaceMessage; } @@ -71,25 +76,32 @@ class ParserOptions { function getEnableLimitReport() { return $this->mEnableLimitReport; } function getCleanSignatures() { return $this->mCleanSignatures; } function getExternalLinkTarget() { return $this->mExternalLinkTarget; } - function getMath() { $this->optionUsed('math'); - return $this->mMath; } - function getThumbSize() { $this->optionUsed('thumbsize'); - return $this->mThumbSize; } - + function getMath() { $this->optionUsed( 'math' ); + return $this->mMath; } + function getThumbSize() { $this->optionUsed( 'thumbsize' ); + return $this->mThumbSize; } + function getStubThreshold() { $this->optionUsed( 'stubthreshold' ); + return $this->mStubThreshold; } + function getIsPreview() { return $this->mIsPreview; } function getIsSectionPreview() { return $this->mIsSectionPreview; } - function getIsPrintable() { $this->optionUsed('printable'); - return $this->mIsPrintable; } + function getIsPrintable() { $this->optionUsed( 'printable' ); + return $this->mIsPrintable; } + function getUser() { return $this->mUser; } + function getPreSaveTransform() { return $this->mPreSaveTransform; } + /** + * @param $title Title + * @return Skin + * @deprecated since 1.18 Use Linker::* instead + */ function getSkin( $title = null ) { - if ( !isset( $this->mSkin ) ) { - $this->mSkin = $this->mUser->getSkin( $title ); - } - return $this->mSkin; + wfDeprecated( __METHOD__ ); + return new DummyLinker; } function getDateFormat() { - $this->optionUsed('dateformat'); + $this->optionUsed( 'dateformat' ); if ( !isset( $this->mDateFormat ) ) { $this->mDateFormat = $this->mUser->getDatePreference(); } @@ -107,9 +119,11 @@ class ParserOptions { * You shouldn't use this. Really. $parser->getFunctionLang() is all you need. * Using this fragments the cache and is discouraged. Yes, {{int: }} uses this, * producing inconsistent tables (Bug 14404). + * @return String Language code + * @since 1.17 */ function getUserLang() { - $this->optionUsed('userlang'); + $this->optionUsed( 'userlang' ); return $this->mUserLang; } @@ -122,9 +136,9 @@ class ParserOptions { function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); } function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); } function setAllowSpecialInclusion( $x ) { return wfSetVar( $this->mAllowSpecialInclusion, $x ); } - function setTidy( $x ) { return wfSetVar( $this->mTidy, $x); } + function setTidy( $x ) { return wfSetVar( $this->mTidy, $x ); } function setSkin( $x ) { $this->mSkin = $x; } - function setInterfaceMessage( $x ) { return wfSetVar( $this->mInterfaceMessage, $x); } + function setInterfaceMessage( $x ) { return wfSetVar( $this->mInterfaceMessage, $x ); } function setTargetLanguage( $x ) { return wfSetVar( $this->mTargetLanguage, $x, true ); } function setMaxIncludeSize( $x ) { return wfSetVar( $this->mMaxIncludeSize, $x ); } function setMaxPPNodeCount( $x ) { return wfSetVar( $this->mMaxPPNodeCount, $x ); } @@ -136,9 +150,16 @@ class ParserOptions { function setCleanSignatures( $x ) { return wfSetVar( $this->mCleanSignatures, $x ); } function setExternalLinkTarget( $x ) { return wfSetVar( $this->mExternalLinkTarget, $x ); } function setMath( $x ) { return wfSetVar( $this->mMath, $x ); } - function setUserLang( $x ) { return wfSetVar( $this->mUserLang, $x ); } + function setUserLang( $x ) { + if ( $x instanceof Language ) { + $x = $x->getCode(); + } + return wfSetVar( $this->mUserLang, $x ); + } function setThumbSize( $x ) { return wfSetVar( $this->mThumbSize, $x ); } - + function setStubThreshold( $x ) { return wfSetVar( $this->mStubThreshold, $x ); } + function setPreSaveTransform( $x ) { return wfSetVar( $this->mPreSaveTransform, $x ); } + function setIsPreview( $x ) { return wfSetVar( $this->mIsPreview, $x ); } function setIsSectionPreview( $x ) { return wfSetVar( $this->mIsSectionPreview, $x ); } function setIsPrintable( $x ) { return wfSetVar( $this->mIsPrintable, $x ); } @@ -191,30 +212,19 @@ class ParserOptions { $this->mAllowExternalImages = $wgAllowExternalImages; $this->mAllowExternalImagesFrom = $wgAllowExternalImagesFrom; $this->mEnableImageWhitelist = $wgEnableImageWhitelist; - $this->mSkin = null; # Deferred - $this->mDateFormat = null; # Deferred - $this->mEditSection = true; - $this->mNumberHeadings = $user->getOption( 'numberheadings' ); $this->mAllowSpecialInclusion = $wgAllowSpecialInclusion; - $this->mTidy = false; - $this->mInterfaceMessage = false; - $this->mTargetLanguage = null; // default depends on InterfaceMessage setting $this->mMaxIncludeSize = $wgMaxArticleSize * 1024; $this->mMaxPPNodeCount = $wgMaxPPNodeCount; $this->mMaxPPExpandDepth = $wgMaxPPExpandDepth; $this->mMaxTemplateDepth = $wgMaxTemplateDepth; - $this->mRemoveComments = true; - $this->mTemplateCallback = array( 'Parser', 'statelessFetchTemplate' ); - $this->mEnableLimitReport = false; $this->mCleanSignatures = $wgCleanSignatures; $this->mExternalLinkTarget = $wgExternalLinkTarget; + + $this->mNumberHeadings = $user->getOption( 'numberheadings' ); $this->mMath = $user->getOption( 'math' ); - $this->mUserLang = $wgLang->getCode(); $this->mThumbSize = $user->getOption( 'thumbsize' ); - - $this->mIsPreview = false; - $this->mIsSectionPreview = false; - $this->mIsPrintable = false; + $this->mStubThreshold = $user->getStubThreshold(); + $this->mUserLang = $wgLang->getCode(); wfProfileOut( __METHOD__ ); } @@ -226,7 +236,7 @@ class ParserOptions { function registerWatcher( $callback ) { $this->onAccessCallback = $callback; } - + /** * Called when an option is accessed. */ @@ -235,9 +245,9 @@ class ParserOptions { call_user_func( $this->onAccessCallback, $optionName ); } } - + /** - * Returns the full array of options that would have been used by + * Returns the full array of options that would have been used by * in 1.16. * Used to get the old parser cache entries when available. */ @@ -249,14 +259,14 @@ class ParserOptions { } return $legacyOpts; } - + /** * Generate a hash string with the values set on these ParserOptions * for the keys given in the array. * This will be used as part of the hash key for the parser cache, - * so users sharign the options with vary for the same page share + * so users sharign the options with vary for the same page share * the same cached data safely. - * + * * Replaces User::getPageRenderingHash() * * Extensions which require it should install 'PageRenderingHash' hook, @@ -270,48 +280,49 @@ class ParserOptions { global $wgContLang, $wgRenderHashAppend; $confstr = ''; - - if ( in_array( 'math', $forOptions ) ) + + if ( in_array( 'math', $forOptions ) ) { $confstr .= $this->mMath; - else + } else { $confstr .= '*'; - + } + // Space assigned for the stubthreshold but unused - // since it disables the parser cache, its value will always + // since it disables the parser cache, its value will always // be 0 when this function is called by parsercache. - // The conditional is here to avoid a confusing 0 - if ( true || in_array( 'stubthreshold', $forOptions ) ) - $confstr .= '!0' ; - else + if ( in_array( 'stubthreshold', $forOptions ) ) { + $confstr .= '!' . $this->mStubThreshold; + } else { $confstr .= '!*' ; + } - if ( in_array( 'dateformat', $forOptions ) ) + if ( in_array( 'dateformat', $forOptions ) ) { $confstr .= '!' . $this->getDateFormat(); - - if ( in_array( 'numberheadings', $forOptions ) ) + } + + if ( in_array( 'numberheadings', $forOptions ) ) { $confstr .= '!' . ( $this->mNumberHeadings ? '1' : '' ); - else + } else { $confstr .= '!*'; - - if ( in_array( 'userlang', $forOptions ) ) + } + + if ( in_array( 'userlang', $forOptions ) ) { $confstr .= '!' . $this->mUserLang; - else + } else { $confstr .= '!*'; + } - if ( in_array( 'thumbsize', $forOptions ) ) + if ( in_array( 'thumbsize', $forOptions ) ) { $confstr .= '!' . $this->mThumbSize; - else + } else { $confstr .= '!*'; + } // add in language specific options, if any - // FIXME: This is just a way of retrieving the url/user preferred variant + // @todo FIXME: This is just a way of retrieving the url/user preferred variant $confstr .= $wgContLang->getExtraHashOptions(); - // Since the skin could be overloading link(), it should be - // included here but in practice, none of our skins do that. - // $confstr .= "!" . $this->mSkin->getSkinName(); - $confstr .= $wgRenderHashAppend; if ( !in_array( 'editsection', $forOptions ) ) { @@ -319,20 +330,21 @@ class ParserOptions { } elseif ( !$this->mEditSection ) { $confstr .= '!edit=0'; } - - if ( $this->mIsPrintable && in_array( 'printable', $forOptions ) ) + + if ( $this->mIsPrintable && in_array( 'printable', $forOptions ) ) { $confstr .= '!printable=1'; - + } + if ( $this->mExtraKey != '' ) $confstr .= $this->mExtraKey; - + // Give a chance for extensions to modify the hash, if they have // extra options or other effects on the parser cache. wfRunHooks( 'PageRenderingHash', array( &$confstr ) ); // Make it a valid memcached key fragment $confstr = str_replace( ' ', '_', $confstr ); - + return $confstr; } } diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index 1e4765db..403b6625 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -21,14 +21,15 @@ class CacheTime { function containsOldMagic() { return $this->mContainsOldMagic; } function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); } - - /** - * setCacheTime() sets the timestamp expressing when the page has been rendered. + + /** + * setCacheTime() sets the timestamp expressing when the page has been rendered. * This doesn not control expiry, see updateCacheExpiry() for that! + * @param $t string + * @return string */ - function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); } + function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); } - /** * Sets the number of seconds after which this object should expire. * This value is used with the ParserCache. @@ -36,16 +37,20 @@ class CacheTime { * the new call has no effect. The value returned by getCacheExpiry is smaller * or equal to the smallest number that was provided as an argument to * updateCacheExpiry(). + * + * @param $seconds number */ function updateCacheExpiry( $seconds ) { $seconds = (int)$seconds; - if ( $this->mCacheExpiry === null || $this->mCacheExpiry > $seconds ) - $this->mCacheExpiry = $seconds; + if ( $this->mCacheExpiry === null || $this->mCacheExpiry > $seconds ) { + $this->mCacheExpiry = $seconds; + } // hack: set old-style marker for uncacheable entries. - if ( $this->mCacheExpiry !== null && $this->mCacheExpiry <= 0 ) + if ( $this->mCacheExpiry !== null && $this->mCacheExpiry <= 0 ) { $this->mCacheTime = -1; + } } /** @@ -59,28 +64,36 @@ class CacheTime { function getCacheExpiry() { global $wgParserCacheExpireTime; - if ( $this->mCacheTime < 0 ) return 0; // old-style marker for "not cachable" + if ( $this->mCacheTime < 0 ) { + return 0; + } // old-style marker for "not cachable" $expire = $this->mCacheExpiry; - if ( $expire === null ) + if ( $expire === null ) { $expire = $wgParserCacheExpireTime; - else + } else { $expire = min( $expire, $wgParserCacheExpireTime ); + } if( $this->containsOldMagic() ) { //compatibility hack $expire = min( $expire, 3600 ); # 1 hour } - if ( $expire <= 0 ) return 0; // not cachable - else return $expire; + if ( $expire <= 0 ) { + return 0; // not cachable + } else { + return $expire; + } } - + /** + * @return bool + */ function isCacheable() { return $this->getCacheExpiry() > 0; } - + /** * Return true if this cached output object predates the global or * per-article cache invalidation timestamps, or if it comes from @@ -100,8 +113,7 @@ class CacheTime { } } -class ParserOutput extends CacheTime -{ +class ParserOutput extends CacheTime { var $mText, # The output text $mLanguageLinks, # List of the full text of language links, in the order they appear $mCategories, # Map of category names to sort keys @@ -110,6 +122,7 @@ class ParserOutput extends CacheTime $mTemplates = array(), # 2-D map of NS/DBK to ID for the template references. ID=zero for broken. $mTemplateIds = array(), # 2-D map of NS/DBK to rev ID for the template references. ID=zero for broken. $mImages = array(), # DB keys of the images used, in the array key only + $mImageTimeKeys = array(), # DB keys of the images used mapped to sha1 and MW timestamp $mExternalLinks = array(), # External link URLs, in the key only $mInterwikiLinks = array(), # 2-D map of prefix/DBK (in keys only) for the inline interwiki links in the document. $mNewSection = false, # Show a new section link? @@ -117,13 +130,19 @@ class ParserOutput extends CacheTime $mNoGallery = false, # No gallery on category page? (__NOGALLERY__) $mHeadItems = array(), # Items to put in the <head> section $mModules = array(), # Modules to be loaded by the resource loader + $mModuleScripts = array(), # Modules of which only the JS will be loaded by the resource loader + $mModuleStyles = array(), # Modules of which only the CSSS will be loaded by the resource loader + $mModuleMessages = array(), # Modules of which only the messages will be loaded by the resource loader $mOutputHooks = array(), # Hook tags as per $wgParserOutputHooks $mWarnings = array(), # Warning text to be returned to the user. Wikitext formatted, in the key only $mSections = array(), # Table of contents + $mEditSectionTokens = false, # prefix/suffix markers if edit sections were output as tokens $mProperties = array(), # Name/value pairs to be cached in the DB $mTOCHTML = ''; # HTML of the TOC private $mIndexPolicy = ''; # 'index' or 'noindex'? Any other value will result in no change. - private $mAccessedOptions = null; # List of ParserOptions (stored in the keys) + private $mAccessedOptions = array(); # List of ParserOptions (stored in the keys) + + const EDITSECTION_REGEX = '#<(?:mw:)?editsection page="(.*?)" section="(.*?)"(?:/>|>(.*?)(</(?:mw:)?editsection>))#'; function __construct( $text = '', $languageLinks = array(), $categoryLinks = array(), $containsOldMagic = false, $titletext = '' ) @@ -135,21 +154,55 @@ class ParserOutput extends CacheTime $this->mTitleText = $titletext; } - function getText() { return $this->mText; } + function getText() { + if ( $this->mEditSectionTokens ) { + return preg_replace_callback( ParserOutput::EDITSECTION_REGEX, + array( &$this, 'replaceEditSectionLinksCallback' ), $this->mText ); + } else { + return preg_replace( ParserOutput::EDITSECTION_REGEX, '', $this->mText ); + } + return $this->mText; + } + + /** + * callback used by getText to replace editsection tokens + * @private + */ + function replaceEditSectionLinksCallback( $m ) { + global $wgOut, $wgLang; + $args = array( + htmlspecialchars_decode($m[1]), + htmlspecialchars_decode($m[2]), + isset($m[4]) ? $m[3] : null, + ); + $args[0] = Title::newFromText( $args[0] ); + if ( !is_object($args[0]) ) { + throw new MWException("Bad parser output text."); + } + $args[] = $wgLang->getCode(); + $skin = $wgOut->getSkin(); + return call_user_func_array( array( $skin, 'doEditSectionLink' ), $args ); + } + function &getLanguageLinks() { return $this->mLanguageLinks; } function getInterwikiLinks() { return $this->mInterwikiLinks; } function getCategoryLinks() { return array_keys( $this->mCategories ); } function &getCategories() { return $this->mCategories; } function getTitleText() { return $this->mTitleText; } function getSections() { return $this->mSections; } + function getEditSectionTokens() { return $this->mEditSectionTokens; } function &getLinks() { return $this->mLinks; } function &getTemplates() { return $this->mTemplates; } + function &getTemplateIds() { return $this->mTemplateIds; } function &getImages() { return $this->mImages; } + function &getImageTimeKeys() { return $this->mImageTimeKeys; } function &getExternalLinks() { return $this->mExternalLinks; } function getNoGallery() { return $this->mNoGallery; } function getHeadItems() { return $this->mHeadItems; } function getModules() { return $this->mModules; } - function getSubtitle() { return $this->mSubtitle; } + function getModuleScripts() { return $this->mModuleScripts; } + function getModuleStyles() { return $this->mModuleStyles; } + function getModuleMessages() { return $this->mModuleMessages; } function getOutputHooks() { return (array)$this->mOutputHooks; } function getWarnings() { return array_keys( $this->mWarnings ); } function getIndexPolicy() { return $this->mIndexPolicy; } @@ -161,6 +214,7 @@ class ParserOutput extends CacheTime function setTitleText( $t ) { return wfSetVar( $this->mTitleText, $t ); } function setSections( $toc ) { return wfSetVar( $this->mSections, $toc ); } + function setEditSectionTokens( $t ) { return wfSetVar( $this->mEditSectionTokens, $t ); } function setIndexPolicy( $policy ) { return wfSetVar( $this->mIndexPolicy, $policy ); } function setTOCHTML( $tochtml ) { return wfSetVar( $this->mTOCHTML, $tochtml ); } @@ -226,10 +280,27 @@ class ParserOutput extends CacheTime $this->mLinks[$ns][$dbk] = $id; } - function addImage( $name ) { + /** + * Register a file dependency for this output + * @param $name string Title dbKey + * @param $timestamp string MW timestamp of file creation (or false if non-existing) + * @param $sha string base 36 SHA-1 of file (or false if non-existing) + * @return void + */ + function addImage( $name, $timestamp = null, $sha1 = null ) { $this->mImages[$name] = 1; + if ( $timestamp !== null && $sha1 !== null ) { + $this->mImageTimeKeys[$name] = array( 'time' => $timestamp, 'sha1' => $sha1 ); + } } + /** + * Register a template dependency for this output + * @param $title Title + * @param $page_id + * @param $rev_id + * @return void + */ function addTemplate( $title, $page_id, $rev_id ) { $ns = $title->getNamespace(); $dbk = $title->getDBkey(); @@ -271,10 +342,22 @@ class ParserOutput extends CacheTime } } - function addModules( $modules ) { + public function addModules( $modules ) { $this->mModules = array_merge( $this->mModules, (array) $modules ); } + public function addModuleScripts( $modules ) { + $this->mModuleScripts = array_merge( $this->mModuleScripts, (array)$modules ); + } + + public function addModuleStyles( $modules ) { + $this->mModuleStyles = array_merge( $this->mModuleStyles, (array)$modules ); + } + + public function addModuleMessages( $modules ) { + $this->mModuleMessages = array_merge( $this->mModuleMessages, (array)$modules ); + } + /** * Override the title to be used for display * -- this is assumed to have been validated @@ -293,7 +376,7 @@ class ParserOutput extends CacheTime * @return String */ public function getDisplayTitle() { - $t = $this->getTitleText( ); + $t = $this->getTitleText(); if( $t === '' ) { return false; } @@ -333,11 +416,11 @@ class ParserOutput extends CacheTime /** * Returns the options from its ParserOptions which have been taken * into account to produce this output or false if not available. - * @return mixed Array/false + * @return mixed Array */ public function getUsedOptions() { if ( !isset( $this->mAccessedOptions ) ) { - return false; + return array(); } return array_keys( $this->mAccessedOptions ); } diff --git a/includes/parser/Parser_DiffTest.php b/includes/parser/Parser_DiffTest.php index c6dd76e5..efad33f9 100644 --- a/includes/parser/Parser_DiffTest.php +++ b/includes/parser/Parser_DiffTest.php @@ -111,6 +111,10 @@ class Parser_DiffTest } } + /** + * @param $parser Parser + * @return bool + */ function onClearState( &$parser ) { // hack marker prefixes to get identical output if ( !isset( $this->dtUniqPrefix ) ) { diff --git a/includes/parser/Parser_LinkHooks.php b/includes/parser/Parser_LinkHooks.php index 7c17ce4e..90e44943 100644 --- a/includes/parser/Parser_LinkHooks.php +++ b/includes/parser/Parser_LinkHooks.php @@ -9,8 +9,7 @@ * Parser with LinkHooks experiment * @ingroup Parser */ -class Parser_LinkHooks extends Parser -{ +class Parser_LinkHooks extends Parser { /** * Update this version number when the ParserOutput format * changes in an incompatible way, so the parser cache @@ -38,10 +37,8 @@ class Parser_LinkHooks extends Parser /** * Constructor - * - * @public */ - function __construct( $conf = array() ) { + public function __construct( $conf = array() ) { parent::__construct( $conf ); $this->mLinkHooks = array(); } @@ -82,8 +79,6 @@ class Parser_LinkHooks extends Parser * True) (Treat as link) Parse the link according to normal link rules * False) (Bad link) Just output the raw wikitext (You may modify the text first) * - * @public - * * @param $ns Integer or String: the Namespace ID or regex pattern if SLH_PATTERN is set * @param $callback Mixed: the callback function (and object) to use * @param $flags Integer: a combination of the following flags: @@ -91,7 +86,7 @@ class Parser_LinkHooks extends Parser * * @return The old callback function for this name, if any */ - function setLinkHook( $ns, $callback, $flags = 0 ) { + public function setLinkHook( $ns, $callback, $flags = 0 ) { if( $flags & SLH_PATTERN && !is_string($ns) ) throw new MWException( __METHOD__.'() expecting a regex string pattern.' ); elseif( $flags | ~SLH_PATTERN && !is_int($ns) ) @@ -232,7 +227,7 @@ class Parser_LinkHooks extends Parser wfProfileOut( __METHOD__."-misc" ); # Make title object wfProfileIn( __METHOD__."-title" ); - $title = Title::newFromText( $this->mStripState->unstripNoWiki($titleText) ); + $title = Title::newFromText( $this->mStripState->unstripNoWiki( $titleText ) ); if( !$title ) { wfProfileOut( __METHOD__."-title" ); wfProfileOut( __METHOD__ ); @@ -244,7 +239,7 @@ class Parser_LinkHooks extends Parser # Default for Namespaces is a default link # ToDo: Default for patterns is plain wikitext $return = true; - if( isset($this->mLinkHooks[$ns]) ) { + if( isset( $this->mLinkHooks[$ns] ) ) { list( $callback, $flags ) = $this->mLinkHooks[$ns]; if( $flags & SLH_PATTERN ) { $args = array( $parser, $holders, $markers, $titleText, &$paramText, &$leadingColon ); @@ -253,14 +248,14 @@ class Parser_LinkHooks extends Parser } # Workaround for PHP bug 35229 and similar if ( !is_callable( $callback ) ) { - throw new MWException( "Tag hook for $name is not callable\n" ); + throw new MWException( "Tag hook for namespace $ns is not callable\n" ); } $return = call_user_func_array( $callback, $args ); } if( $return === true ) { # True (treat as plain link) was returned, call the defaultLinkHook - $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon ); - $return = call_user_func_array( array( 'CoreLinkFunctions', 'defaultLinkHook' ), $args ); + $return = CoreLinkFunctions::defaultLinkHook( $parser, $holders, $markers, $title, + $titleText, $paramText, $leadingColon ); } if( $return === false ) { # False (no link) was returned, output plain wikitext diff --git a/includes/parser/Preprocessor.php b/includes/parser/Preprocessor.php index c31f37bf..d6328aa7 100644 --- a/includes/parser/Preprocessor.php +++ b/includes/parser/Preprocessor.php @@ -9,19 +9,44 @@ * @ingroup Parser */ interface Preprocessor { - /** Create a new preprocessor object based on an initialised Parser object */ + /** + * Create a new preprocessor object based on an initialised Parser object + * + * @param $parser Parser + */ function __construct( $parser ); - /** Create a new top-level frame for expansion of a page */ + /** + * Create a new top-level frame for expansion of a page + * + * @return PPFrame + */ function newFrame(); - /** Create a new custom frame for programmatic use of parameter replacement as used in some extensions */ + /** + * Create a new custom frame for programmatic use of parameter replacement as used in some extensions + * + * @param $args array + * + * @return PPFrame + */ function newCustomFrame( $args ); - /** Create a new custom node for programmatic use of parameter replacement as used in some extensions */ + /** + * Create a new custom node for programmatic use of parameter replacement as used in some extensions + * + * @param $values + */ function newPartNodeArray( $values ); - /** Preprocess text to a PPNode */ + /** + * Preprocess text to a PPNode + * + * @param $text + * @param $flags + * + * @return PPNode + */ function preprocessToObj( $text, $flags = 0 ); } @@ -39,6 +64,11 @@ interface PPFrame { /** * Create a child frame + * + * @param $args array + * @param $title Title + * + * @return PPFrame */ function newChild( $args = false, $title = false ); @@ -70,6 +100,8 @@ interface PPFrame { /** * Returns true if there are no arguments in this frame + * + * @return bool */ function isEmpty(); @@ -95,6 +127,10 @@ interface PPFrame { /** * Returns true if the infinite loop check is OK, false if a loop is detected + * + * @param $title + * + * @return bool */ function loopCheck( $title ); @@ -126,6 +162,8 @@ interface PPNode { /** * Get the first child of a tree node. False if there isn't one. + * + * @return PPNode */ function getFirstChild(); diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index 2b635f7c..755563a0 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -5,12 +5,18 @@ * @file * @ingroup Parser */ - + /** * @ingroup Parser */ class Preprocessor_DOM implements Preprocessor { - var $parser, $memoryLimit; + + /** + * @var Parser + */ + var $parser; + + var $memoryLimit; const CACHE_VERSION = 1; @@ -27,21 +33,31 @@ class Preprocessor_DOM implements Preprocessor { } } + /** + * @return PPFrame_DOM + */ function newFrame() { return new PPFrame_DOM( $this ); } + /** + * @param $args + * @return PPCustomFrame_DOM + */ function newCustomFrame( $args ) { return new PPCustomFrame_DOM( $this, $args ); } + /** + * @param $values + * @return PPNode_DOM + */ function newPartNodeArray( $values ) { //NOTE: DOM manipulation is slower than building & parsing XML! (or so Tim sais) - $xml = ""; - $xml .= "<list>"; + $xml = "<list>"; foreach ( $values as $k => $val ) { - + if ( is_int( $k ) ) { $xml .= "<part><name index=\"$k\"/><value>" . htmlspecialchars( $val ) ."</value></part>"; } else { @@ -59,6 +75,10 @@ class Preprocessor_DOM implements Preprocessor { return $node; } + /** + * @throws MWException + * @return bool + */ function memCheck() { if ( $this->memoryLimit === false ) { return; @@ -91,14 +111,15 @@ class Preprocessor_DOM implements Preprocessor { * cache may be implemented at a later date which takes further advantage of these strict * dependency requirements. * - * @private + * @return PPNode_DOM */ function preprocessToObj( $text, $flags = 0 ) { wfProfileIn( __METHOD__ ); global $wgMemc, $wgPreprocessorCacheThreshold; - + $xml = false; - $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold; + $cacheable = ( $wgPreprocessorCacheThreshold !== false + && strlen( $text ) > $wgPreprocessorCacheThreshold ); if ( $cacheable ) { wfProfileIn( __METHOD__.'-cacheable' ); @@ -134,7 +155,8 @@ class Preprocessor_DOM implements Preprocessor { if ( !$result ) { // Try running the XML through UtfNormal to get rid of invalid characters $xml = UtfNormal::cleanUp( $xml ); - $result = $dom->loadXML( $xml ); + // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep + $result = $dom->loadXML( $xml, 1 << 19 ); if ( !$result ) { throw new MWException( __METHOD__.' generated invalid XML' ); } @@ -147,7 +169,12 @@ class Preprocessor_DOM implements Preprocessor { wfProfileOut( __METHOD__ ); return $obj; } - + + /** + * @param $text string + * @param $flags int + * @return string + */ function preprocessToXml( $text, $flags = 0 ) { wfProfileIn( __METHOD__ ); $rules = array( @@ -317,7 +344,7 @@ class Preprocessor_DOM implements Preprocessor { // Search backwards for leading whitespace $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0; // Search forwards for trailing whitespace - // $wsEnd will be the position of the last space + // $wsEnd will be the position of the last space (or the '>' if there's none) $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 ); // Eat the line if possible // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at @@ -344,13 +371,11 @@ class Preprocessor_DOM implements Preprocessor { if ( $stack->top ) { $part = $stack->top->getCurrentPart(); - if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) { - // Comments abutting, no change in visual end - $part->commentEnd = $wsEnd; - } else { + if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) { $part->visualEnd = $wsStart; - $part->commentEnd = $endPos; } + // Else comments abutting, no change in visual end + $part->commentEnd = $endPos; } $i = $endPos + 1; $inner = substr( $text, $startPos, $endPos - $startPos + 1 ); @@ -389,8 +414,8 @@ class Preprocessor_DOM implements Preprocessor { } else { $attrEnd = $tagEndPos; // Find closing tag - if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", - $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) + if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", + $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) { $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ); $i = $matches[0][1] + strlen( $matches[0][0] ); @@ -423,9 +448,7 @@ class Preprocessor_DOM implements Preprocessor { $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>'; } $accum .= $close . '</ext>'; - } - - elseif ( $found == 'line-start' ) { + } elseif ( $found == 'line-start' ) { // Is this the start of a heading? // Line break belongs before the heading element in any case if ( $fakeLineStart ) { @@ -453,9 +476,7 @@ class Preprocessor_DOM implements Preprocessor { extract( $flags ); $i += $count; } - } - - elseif ( $found == 'line-end' ) { + } elseif ( $found == 'line-end' ) { $piece = $stack->top; // A heading must be open, otherwise \n wouldn't have been in the search list assert( $piece->open == "\n" ); @@ -522,7 +543,7 @@ class Preprocessor_DOM implements Preprocessor { 'open' => $curChar, 'close' => $rule['end'], 'count' => $count, - 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), + 'lineStart' => ($i == 0 || $text[$i-1] == "\n"), ); $stack->push( $piece ); @@ -557,7 +578,7 @@ class Preprocessor_DOM implements Preprocessor { } } - if ($matchingCount <= 0) { + if ( $matchingCount <= 0 ) { # No matching element found in callback array # Output a literal closing brace and continue $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); @@ -607,7 +628,7 @@ class Preprocessor_DOM implements Preprocessor { $accum =& $stack->getAccum(); # Re-add the old stack element if it still has unmatched opening characters remaining - if ($matchingCount < $piece->count) { + if ( $matchingCount < $piece->count ) { $piece->parts = array( new PPDPart ); $piece->count -= $matchingCount; # do we still qualify for any callback with remaining count? @@ -630,16 +651,12 @@ class Preprocessor_DOM implements Preprocessor { # Add XML element to the enclosing accumulator $accum .= $element; - } - - elseif ( $found == 'pipe' ) { + } elseif ( $found == 'pipe' ) { $findEquals = true; // shortcut for getFlags() $stack->addPart(); $accum =& $stack->getAccum(); ++$i; - } - - elseif ( $found == 'equals' ) { + } elseif ( $found == 'equals' ) { $findEquals = false; // shortcut for getFlags() $stack->getCurrentPart()->eqpos = strlen( $accum ); $accum .= '='; @@ -655,7 +672,7 @@ class Preprocessor_DOM implements Preprocessor { $xml = $stack->rootAccum; wfProfileOut( __METHOD__ ); - + return $xml; } } @@ -665,7 +682,12 @@ class Preprocessor_DOM implements Preprocessor { * @ingroup Parser */ class PPDStack { - var $stack, $rootAccum, $top; + var $stack, $rootAccum; + + /** + * @var PPDStack + */ + var $top; var $out; var $elementClass = 'PPDStackElement'; @@ -678,6 +700,9 @@ class PPDStack { $this->accum =& $this->rootAccum; } + /** + * @return int + */ function count() { return count( $this->stack ); } @@ -726,6 +751,9 @@ class PPDStack { $this->accum =& $this->top->getAccum(); } + /** + * @return array + */ function getFlags() { if ( !count( $this->stack ) ) { return array( @@ -773,6 +801,9 @@ class PPDStackElement { return $this->parts[count($this->parts) - 1]; } + /** + * @return array + */ function getFlags() { $partCount = count( $this->parts ); $findPipe = $this->open != "\n" && $this->open != '['; @@ -785,6 +816,8 @@ class PPDStackElement { /** * Get the output string that would result if the close is not found. + * + * @return string */ function breakSyntax( $openingCount = false ) { if ( $this->open == "\n" ) { @@ -829,7 +862,21 @@ class PPDPart { * @ingroup Parser */ class PPFrame_DOM implements PPFrame { - var $preprocessor, $parser, $title; + + /** + * @var Preprocessor + */ + var $preprocessor; + + /** + * @var Parser + */ + var $parser; + + /** + * @var Title + */ + var $title; var $titleCache; /** @@ -847,7 +894,7 @@ class PPFrame_DOM implements PPFrame { /** * Construct a new preprocessor frame. - * @param $preprocessor Preprocessor: The parent preprocessor + * @param $preprocessor Preprocessor The parent preprocessor */ function __construct( $preprocessor ) { $this->preprocessor = $preprocessor; @@ -861,6 +908,8 @@ class PPFrame_DOM implements PPFrame { /** * Create a new child frame * $args is optionally a multi-root PPNode or array containing the template arguments + * + * @return PPTemplateFrame_DOM */ function newChild( $args = false, $title = false ) { $namedArgs = array(); @@ -896,6 +945,12 @@ class PPFrame_DOM implements PPFrame { return new PPTemplateFrame_DOM( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title ); } + /** + * @throws MWException + * @param $root + * @param $flags int + * @return string + */ function expand( $root, $flags = 0 ) { static $expansionDepth = 0; if ( is_string( $root ) ) { @@ -1058,11 +1113,11 @@ class PPFrame_DOM implements PPFrame { # Heading $s = $this->expand( $contextNode->childNodes, $flags ); - # Insert a heading marker only for <h> children of <root> - # This is to stop extractSections from going over multiple tree levels - if ( $contextNode->parentNode->nodeName == 'root' - && $this->parser->ot['html'] ) - { + # Insert a heading marker only for <h> children of <root> + # This is to stop extractSections from going over multiple tree levels + if ( $contextNode->parentNode->nodeName == 'root' + && $this->parser->ot['html'] ) + { # Insert heading index marker $headingIndex = $contextNode->getAttribute( 'i' ); $titleText = $this->title->getPrefixedDBkey(); @@ -1071,7 +1126,7 @@ class PPFrame_DOM implements PPFrame { $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX; $count = $contextNode->getAttribute( 'level' ); $s = substr( $s, 0, $count ) . $marker . substr( $s, $count ); - $this->parser->mStripState->general->setPair( $marker, '' ); + $this->parser->mStripState->addGeneral( $marker, '' ); } $out .= $s; } else { @@ -1107,6 +1162,11 @@ class PPFrame_DOM implements PPFrame { return $outStack[0]; } + /** + * @param $sep + * @param $flags + * @return string + */ function implodeWithFlags( $sep, $flags /*, ... */ ) { $args = array_slice( func_get_args(), 2 ); @@ -1132,6 +1192,8 @@ class PPFrame_DOM implements PPFrame { /** * Implode with no flags specified * This previously called implodeWithFlags but has now been inlined to reduce stack depth + * + * @return string */ function implode( $sep /*, ... */ ) { $args = array_slice( func_get_args(), 1 ); @@ -1160,6 +1222,8 @@ class PPFrame_DOM implements PPFrame { /** * Makes an object that, when expand()ed, will be the same as one obtained * with implode() + * + * @return array */ function virtualImplode( $sep /*, ... */ ) { $args = array_slice( func_get_args(), 1 ); @@ -1225,20 +1289,31 @@ class PPFrame_DOM implements PPFrame { } } + /** + * @return array + */ function getArguments() { return array(); } + /** + * @return array + */ function getNumberedArguments() { return array(); } + /** + * @return array + */ function getNamedArguments() { return array(); } /** * Returns true if there are no arguments in this frame + * + * @return bool */ function isEmpty() { return true; @@ -1250,6 +1325,8 @@ class PPFrame_DOM implements PPFrame { /** * Returns true if the infinite loop check is OK, false if a loop is detected + * + * @return bool */ function loopCheck( $title ) { return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] ); @@ -1257,6 +1334,8 @@ class PPFrame_DOM implements PPFrame { /** * Return true if the frame is a template frame + * + * @return bool */ function isTemplate() { return false; @@ -1268,9 +1347,21 @@ class PPFrame_DOM implements PPFrame { * @ingroup Parser */ class PPTemplateFrame_DOM extends PPFrame_DOM { - var $numberedArgs, $namedArgs, $parent; + var $numberedArgs, $namedArgs; + + /** + * @var PPFrame_DOM + */ + var $parent; var $numberedExpansionCache, $namedExpansionCache; + /** + * @param $preprocessor + * @param $parent PPFrame_DOM + * @param $numberedArgs array + * @param $namedArgs array + * @param $title Title + */ function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) { parent::__construct( $preprocessor ); @@ -1305,8 +1396,11 @@ class PPTemplateFrame_DOM extends PPFrame_DOM { $s .= '}'; return $s; } + /** * Returns true if there are no arguments in this frame + * + * @return bool */ function isEmpty() { return !count( $this->numberedArgs ) && !count( $this->namedArgs ); @@ -1321,7 +1415,7 @@ class PPTemplateFrame_DOM extends PPFrame_DOM { } return $arguments; } - + function getNumberedArguments() { $arguments = array(); foreach ( array_keys($this->numberedArgs) as $key ) { @@ -1329,7 +1423,7 @@ class PPTemplateFrame_DOM extends PPFrame_DOM { } return $arguments; } - + function getNamedArguments() { $arguments = array(); foreach ( array_keys($this->namedArgs) as $key ) { @@ -1371,6 +1465,8 @@ class PPTemplateFrame_DOM extends PPFrame_DOM { /** * Return true if the frame is a template frame + * + * @return bool */ function isTemplate() { return true; @@ -1405,6 +1501,9 @@ class PPCustomFrame_DOM extends PPFrame_DOM { return $s; } + /** + * @return bool + */ function isEmpty() { return !count( $this->args ); } @@ -1421,14 +1520,22 @@ class PPCustomFrame_DOM extends PPFrame_DOM { * @ingroup Parser */ class PPNode_DOM implements PPNode { + + /** + * @var DOMElement + */ var $node; + var $xpath; function __construct( $node, $xpath = false ) { $this->node = $node; } - function __get( $name ) { - if ( $name == 'xpath' ) { + /** + * @return DOMXPath + */ + function getXPath() { + if ( $this->xpath === null ) { $this->xpath = new DOMXPath( $this->node->ownerDocument ); } return $this->xpath; @@ -1446,22 +1553,39 @@ class PPNode_DOM implements PPNode { return $s; } + /** + * @return bool|PPNode_DOM + */ function getChildren() { return $this->node->childNodes ? new self( $this->node->childNodes ) : false; } + /** + * @return bool|PPNode_DOM + */ function getFirstChild() { return $this->node->firstChild ? new self( $this->node->firstChild ) : false; } + /** + * @return bool|PPNode_DOM + */ function getNextSibling() { return $this->node->nextSibling ? new self( $this->node->nextSibling ) : false; } + /** + * @param $type + * + * @return bool|PPNode_DOM + */ function getChildrenOfType( $type ) { - return new self( $this->xpath->query( $type, $this->node ) ); + return new self( $this->getXPath()->query( $type, $this->node ) ); } + /** + * @return int + */ function getLength() { if ( $this->node instanceof DOMNodeList ) { return $this->node->length; @@ -1470,11 +1594,18 @@ class PPNode_DOM implements PPNode { } } + /** + * @param $i + * @return bool|PPNode_DOM + */ function item( $i ) { $item = $this->node->item( $i ); return $item ? new self( $item ) : false; } + /** + * @return string + */ function getName() { if ( $this->node instanceof DOMNodeList ) { return '#nodelist'; @@ -1488,10 +1619,13 @@ class PPNode_DOM implements PPNode { * name PPNode name * index String index * value PPNode value + * + * @return array */ function splitArg() { - $names = $this->xpath->query( 'name', $this->node ); - $values = $this->xpath->query( 'value', $this->node ); + $xpath = $this->getXPath(); + $names = $xpath->query( 'name', $this->node ); + $values = $xpath->query( 'value', $this->node ); if ( !$names->length || !$values->length ) { throw new MWException( 'Invalid brace node passed to ' . __METHOD__ ); } @@ -1506,12 +1640,15 @@ class PPNode_DOM implements PPNode { /** * Split an <ext> node into an associative array containing name, attr, inner and close * All values in the resulting array are PPNodes. Inner and close are optional. + * + * @return array */ function splitExt() { - $names = $this->xpath->query( 'name', $this->node ); - $attrs = $this->xpath->query( 'attr', $this->node ); - $inners = $this->xpath->query( 'inner', $this->node ); - $closes = $this->xpath->query( 'close', $this->node ); + $xpath = $this->getXPath(); + $names = $xpath->query( 'name', $this->node ); + $attrs = $xpath->query( 'attr', $this->node ); + $inners = $xpath->query( 'inner', $this->node ); + $closes = $xpath->query( 'close', $this->node ); if ( !$names->length || !$attrs->length ) { throw new MWException( 'Invalid ext node passed to ' . __METHOD__ ); } @@ -1531,7 +1668,7 @@ class PPNode_DOM implements PPNode { * Split a <h> node */ function splitHeading() { - if ( !$this->nodeName == 'h' ) { + if ( $this->getName() !== 'h' ) { throw new MWException( 'Invalid h node passed to ' . __METHOD__ ); } return array( diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php index 6cb2febc..c2d7d3d8 100644 --- a/includes/parser/Preprocessor_Hash.php +++ b/includes/parser/Preprocessor_Hash.php @@ -5,7 +5,7 @@ * @file * @ingroup Parser */ - + /** * Differences from DOM schema: * * attribute nodes are children @@ -13,22 +13,36 @@ * @ingroup Parser */ class Preprocessor_Hash implements Preprocessor { + /** + * @var Parser + */ var $parser; - + const CACHE_VERSION = 1; function __construct( $parser ) { $this->parser = $parser; } + /** + * @return PPFrame_Hash + */ function newFrame() { return new PPFrame_Hash( $this ); } + /** + * @param $args + * @return PPCustomFrame_Hash + */ function newCustomFrame( $args ) { return new PPCustomFrame_Hash( $this, $args ); } + /** + * @param $values array + * @return PPNode_Hash_Array + */ function newPartNodeArray( $values ) { $list = array(); @@ -76,16 +90,15 @@ class Preprocessor_Hash implements Preprocessor { * cache may be implemented at a later date which takes further advantage of these strict * dependency requirements. * - * @private + * @return PPNode_Hash_Tree */ function preprocessToObj( $text, $flags = 0 ) { wfProfileIn( __METHOD__ ); - - + // Check cache. global $wgMemc, $wgPreprocessorCacheThreshold; - - $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold; + + $cacheable = $wgPreprocessorCacheThreshold !== false && strlen( $text ) > $wgPreprocessorCacheThreshold; if ( $cacheable ) { wfProfileIn( __METHOD__.'-cacheable' ); @@ -272,7 +285,7 @@ class Preprocessor_Hash implements Preprocessor { // Search backwards for leading whitespace $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0; // Search forwards for trailing whitespace - // $wsEnd will be the position of the last space + // $wsEnd will be the position of the last space (or the '>' if there's none) $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 ); // Eat the line if possible // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at @@ -302,13 +315,11 @@ class Preprocessor_Hash implements Preprocessor { if ( $stack->top ) { $part = $stack->top->getCurrentPart(); - if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) { - // Comments abutting, no change in visual end - $part->commentEnd = $wsEnd; - } else { + if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) { $part->visualEnd = $wsStart; - $part->commentEnd = $endPos; } + // Else comments abutting, no change in visual end + $part->commentEnd = $endPos; } $i = $endPos + 1; $inner = substr( $text, $startPos, $endPos - $startPos + 1 ); @@ -348,8 +359,8 @@ class Preprocessor_Hash implements Preprocessor { } else { $attrEnd = $tagEndPos; // Find closing tag - if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", - $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) + if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", + $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) { $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ); $i = $matches[0][1] + strlen( $matches[0][0] ); @@ -414,9 +425,7 @@ class Preprocessor_Hash implements Preprocessor { extract( $stack->getFlags() ); $i += $count; } - } - - elseif ( $found == 'line-end' ) { + } elseif ( $found == 'line-end' ) { $piece = $stack->top; // A heading must be open, otherwise \n wouldn't have been in the search list assert( $piece->open == "\n" ); @@ -478,9 +487,7 @@ class Preprocessor_Hash implements Preprocessor { // another heading. Infinite loops are avoided because the next iteration MUST // hit the heading open case above, which unconditionally increments the // input pointer. - } - - elseif ( $found == 'open' ) { + } elseif ( $found == 'open' ) { # count opening brace characters $count = strspn( $text, $curChar, $i ); @@ -491,7 +498,7 @@ class Preprocessor_Hash implements Preprocessor { 'open' => $curChar, 'close' => $rule['end'], 'count' => $count, - 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), + 'lineStart' => ($i == 0 || $text[$i-1] == "\n"), ); $stack->push( $piece ); @@ -502,9 +509,7 @@ class Preprocessor_Hash implements Preprocessor { $accum->addLiteral( str_repeat( $curChar, $count ) ); } $i += $count; - } - - elseif ( $found == 'close' ) { + } elseif ( $found == 'close' ) { $piece = $stack->top; # lets check if there are enough characters for closing brace $maxCount = $piece->count; @@ -644,16 +649,12 @@ class Preprocessor_Hash implements Preprocessor { } else { $accum->addAccum( $element ); } - } - - elseif ( $found == 'pipe' ) { + } elseif ( $found == 'pipe' ) { $findEquals = true; // shortcut for getFlags() $stack->addPart(); $accum =& $stack->getAccum(); ++$i; - } - - elseif ( $found == 'equals' ) { + } elseif ( $found == 'equals' ) { $findEquals = false; // shortcut for getFlags() $accum->addNodeWithText( 'equals', '=' ); $stack->getCurrentPart()->eqpos = $accum->lastNode; @@ -676,7 +677,7 @@ class Preprocessor_Hash implements Preprocessor { $rootNode = new PPNode_Hash_Tree( 'root' ); $rootNode->firstChild = $stack->rootAccum->firstNode; $rootNode->lastChild = $stack->rootAccum->lastNode; - + // Cache if ($cacheable) { $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode ); @@ -685,7 +686,7 @@ class Preprocessor_Hash implements Preprocessor { wfProfileOut( __METHOD__.'-cacheable' ); wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" ); } - + wfProfileOut( __METHOD__ ); return $rootNode; } @@ -714,6 +715,8 @@ class PPDStackElement_Hash extends PPDStackElement { /** * Get the accumulator that would result if the close is not found. + * + * @return PPDAccum_Hash */ function breakSyntax( $openingCount = false ) { if ( $this->open == "\n" ) { @@ -818,7 +821,21 @@ class PPDAccum_Hash { * @ingroup Parser */ class PPFrame_Hash implements PPFrame { - var $preprocessor, $parser, $title; + + /** + * @var Parser + */ + var $parser; + + /** + * @var Preprocessor + */ + var $preprocessor; + + /** + * @var Title + */ + var $title; var $titleCache; /** @@ -850,6 +867,11 @@ class PPFrame_Hash implements PPFrame { /** * Create a new child frame * $args is optionally a multi-root PPNode or array containing the template arguments + * + * @param $args PPNode_Hash_Array|array + * @param $title Title|false + * + * @return PPTemplateFrame_Hash */ function newChild( $args = false, $title = false ) { $namedArgs = array(); @@ -880,14 +902,19 @@ class PPFrame_Hash implements PPFrame { return new PPTemplateFrame_Hash( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title ); } + /** + * @throws MWException + * @param $root + * @param $flags int + * @return string + */ function expand( $root, $flags = 0 ) { static $expansionDepth = 0; if ( is_string( $root ) ) { return $root; } - if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() ) - { + if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() ) { return '<span class="error">Node-count limit exceeded</span>'; } if ( $expansionDepth > $this->parser->mOptions->getMaxPPExpandDepth() ) { @@ -1016,7 +1043,7 @@ class PPFrame_Hash implements PPFrame { $serial = count( $this->parser->mHeadings ) - 1; $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX; $s = substr( $s, 0, $bits['level'] ) . $marker . substr( $s, $bits['level'] ); - $this->parser->mStripState->general->setPair( $marker, '' ); + $this->parser->mStripState->addGeneral( $marker, '' ); $out .= $s; } else { # Expand in virtual stack @@ -1050,6 +1077,11 @@ class PPFrame_Hash implements PPFrame { return $outStack[0]; } + /** + * @param $sep + * @param $flags + * @return string + */ function implodeWithFlags( $sep, $flags /*, ... */ ) { $args = array_slice( func_get_args(), 2 ); @@ -1077,6 +1109,7 @@ class PPFrame_Hash implements PPFrame { /** * Implode with no flags specified * This previously called implodeWithFlags but has now been inlined to reduce stack depth + * @return string */ function implode( $sep /*, ... */ ) { $args = array_slice( func_get_args(), 1 ); @@ -1105,6 +1138,8 @@ class PPFrame_Hash implements PPFrame { /** * Makes an object that, when expand()ed, will be the same as one obtained * with implode() + * + * @return PPNode_Hash_Array */ function virtualImplode( $sep /*, ... */ ) { $args = array_slice( func_get_args(), 1 ); @@ -1132,6 +1167,8 @@ class PPFrame_Hash implements PPFrame { /** * Virtual implode with brackets + * + * @return PPNode_Hash_Array */ function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) { $args = array_slice( func_get_args(), 3 ); @@ -1162,6 +1199,10 @@ class PPFrame_Hash implements PPFrame { return 'frame{}'; } + /** + * @param $level bool + * @return array|bool|String + */ function getPDBK( $level = false ) { if ( $level === false ) { return $this->title->getPrefixedDBkey(); @@ -1170,31 +1211,50 @@ class PPFrame_Hash implements PPFrame { } } + /** + * @return array + */ function getArguments() { return array(); } + /** + * @return array + */ function getNumberedArguments() { return array(); } + /** + * @return array + */ function getNamedArguments() { return array(); } /** * Returns true if there are no arguments in this frame + * + * @return bool */ function isEmpty() { return true; } + /** + * @param $name + * @return bool + */ function getArgument( $name ) { return false; } /** * Returns true if the infinite loop check is OK, false if a loop is detected + * + * @param $title Title + * + * @return bool */ function loopCheck( $title ) { return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] ); @@ -1202,6 +1262,8 @@ class PPFrame_Hash implements PPFrame { /** * Return true if the frame is a template frame + * + * @return bool */ function isTemplate() { return false; @@ -1216,6 +1278,13 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { var $numberedArgs, $namedArgs, $parent; var $numberedExpansionCache, $namedExpansionCache; + /** + * @param $preprocessor + * @param $parent + * @param $numberedArgs array + * @param $namedArgs array + * @param $title Title + */ function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) { parent::__construct( $preprocessor ); @@ -1252,11 +1321,16 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { } /** * Returns true if there are no arguments in this frame + * + * @return bool */ function isEmpty() { return !count( $this->numberedArgs ) && !count( $this->namedArgs ); } + /** + * @return array + */ function getArguments() { $arguments = array(); foreach ( array_merge( @@ -1266,7 +1340,10 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { } return $arguments; } - + + /** + * @return array + */ function getNumberedArguments() { $arguments = array(); foreach ( array_keys($this->numberedArgs) as $key ) { @@ -1274,7 +1351,10 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { } return $arguments; } - + + /** + * @return array + */ function getNamedArguments() { $arguments = array(); foreach ( array_keys($this->namedArgs) as $key ) { @@ -1283,6 +1363,10 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { return $arguments; } + /** + * @param $index + * @return array|bool + */ function getNumberedArgument( $index ) { if ( !isset( $this->numberedArgs[$index] ) ) { return false; @@ -1294,6 +1378,10 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { return $this->numberedExpansionCache[$index]; } + /** + * @param $name + * @return bool + */ function getNamedArgument( $name ) { if ( !isset( $this->namedArgs[$name] ) ) { return false; @@ -1306,6 +1394,10 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { return $this->namedExpansionCache[$name]; } + /** + * @param $name + * @return array|bool + */ function getArgument( $name ) { $text = $this->getNumberedArgument( $name ); if ( $text === false ) { @@ -1316,6 +1408,8 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { /** * Return true if the frame is a template frame + * + * @return bool */ function isTemplate() { return true; @@ -1350,10 +1444,17 @@ class PPCustomFrame_Hash extends PPFrame_Hash { return $s; } + /** + * @return bool + */ function isEmpty() { return !count( $this->args ); } + /** + * @param $index + * @return bool + */ function getArgument( $index ) { if ( !isset( $this->args[$index] ) ) { return false; @@ -1390,6 +1491,11 @@ class PPNode_Hash_Tree implements PPNode { } } + /** + * @param $name + * @param $text + * @return PPNode_Hash_Tree + */ static function newWithText( $name, $text ) { $obj = new self( $name ); $obj->addChild( new PPNode_Hash_Text( $text ) ); @@ -1405,6 +1511,9 @@ class PPNode_Hash_Tree implements PPNode { } } + /** + * @return PPNode_Hash_Array + */ function getChildren() { $children = array(); for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { @@ -1431,9 +1540,24 @@ class PPNode_Hash_Tree implements PPNode { return $children; } - function getLength() { return false; } - function item( $i ) { return false; } + /** + * @return bool + */ + function getLength() { + return false; + } + /** + * @param $i + * @return bool + */ + function item( $i ) { + return false; + } + + /** + * @return string + */ function getName() { return $this->name; } @@ -1443,6 +1567,8 @@ class PPNode_Hash_Tree implements PPNode { * name PPNode name * index String index * value PPNode value + * + * @return array */ function splitArg() { $bits = array(); @@ -1474,6 +1600,8 @@ class PPNode_Hash_Tree implements PPNode { /** * Split an <ext> node into an associative array containing name, attr, inner and close * All values in the resulting array are PPNodes. Inner and close are optional. + * + * @return array */ function splitExt() { $bits = array(); @@ -1499,6 +1627,8 @@ class PPNode_Hash_Tree implements PPNode { /** * Split an <h> node + * + * @return array */ function splitHeading() { if ( $this->name !== 'h' ) { @@ -1523,6 +1653,8 @@ class PPNode_Hash_Tree implements PPNode { /** * Split a <template> or <tplarg> node + * + * @return array */ function splitTemplate() { $parts = array(); diff --git a/includes/parser/Preprocessor_HipHop.hphp b/includes/parser/Preprocessor_HipHop.hphp new file mode 100644 index 00000000..dc404f7c --- /dev/null +++ b/includes/parser/Preprocessor_HipHop.hphp @@ -0,0 +1,1941 @@ +<?php +/** + * A preprocessor optimised for HipHop, using HipHop-specific syntax. + * vim: ft=php + * + * @file + * @ingroup Parser + */ + +/** + * @ingroup Parser + */ +class Preprocessor_HipHop implements Preprocessor { + /** + * @var Parser + */ + var $parser; + + const CACHE_VERSION = 1; + + function __construct( $parser ) { + $this->parser = $parser; + } + + /** + * @return PPFrame_HipHop + */ + function newFrame() { + return new PPFrame_HipHop( $this ); + } + + /** + * @param $args + * @return PPCustomFrame_HipHop + */ + function newCustomFrame( array $args ) { + return new PPCustomFrame_HipHop( $this, $args ); + } + + /** + * @param $values array + * @return PPNode_HipHop_Array + */ + function newPartNodeArray( $values ) { + $list = array(); + + foreach ( $values as $k => $val ) { + $partNode = new PPNode_HipHop_Tree( 'part' ); + $nameNode = new PPNode_HipHop_Tree( 'name' ); + + if ( is_int( $k ) ) { + $nameNode->addChild( new PPNode_HipHop_Attr( 'index', $k ) ); + $partNode->addChild( $nameNode ); + } else { + $nameNode->addChild( new PPNode_HipHop_Text( $k ) ); + $partNode->addChild( $nameNode ); + $partNode->addChild( new PPNode_HipHop_Text( '=' ) ); + } + + $valueNode = new PPNode_HipHop_Tree( 'value' ); + $valueNode->addChild( new PPNode_HipHop_Text( $val ) ); + $partNode->addChild( $valueNode ); + + $list[] = $partNode; + } + + $node = new PPNode_HipHop_Array( $list ); + return $node; + } + + /** + * Preprocess some wikitext and return the document tree. + * This is the ghost of Parser::replace_variables(). + * + * @param $text String: the text to parse + * @param $flags Integer: bitwise combination of: + * Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being + * included. Default is to assume a direct page view. + * + * The generated DOM tree must depend only on the input text and the flags. + * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899. + * + * Any flag added to the $flags parameter here, or any other parameter liable to cause a + * change in the DOM tree for a given text, must be passed through the section identifier + * in the section edit link and thus back to extractSections(). + * + * The output of this function is currently only cached in process memory, but a persistent + * cache may be implemented at a later date which takes further advantage of these strict + * dependency requirements. + * + * @return PPNode_HipHop_Tree + */ + function preprocessToObj( string $text, int $flags = 0 ) { + wfProfileIn( __METHOD__ ); + + // Check cache. + global $wgMemc, $wgPreprocessorCacheThreshold; + + $cacheable = ($wgPreprocessorCacheThreshold !== false && strlen( $text ) > $wgPreprocessorCacheThreshold); + if ( $cacheable ) { + wfProfileIn( __METHOD__.'-cacheable' ); + + $cacheKey = strval( wfMemcKey( 'preprocess-hash', md5($text), $flags ) ); + $cacheValue = strval( $wgMemc->get( $cacheKey ) ); + if ( $cacheValue !== '' ) { + $version = substr( $cacheValue, 0, 8 ); + if ( intval( $version ) == self::CACHE_VERSION ) { + $hash = unserialize( substr( $cacheValue, 8 ) ); + // From the cache + wfDebugLog( "Preprocessor", + "Loaded preprocessor hash from memcached (key $cacheKey)" ); + wfProfileOut( __METHOD__.'-cacheable' ); + wfProfileOut( __METHOD__ ); + return $hash; + } + } + wfProfileIn( __METHOD__.'-cache-miss' ); + } + + $rules = array( + '{' => array( + 'end' => '}', + 'names' => array( + 2 => 'template', + 3 => 'tplarg', + ), + 'min' => 2, + 'max' => 3, + ), + '[' => array( + 'end' => ']', + 'names' => array( 2 => 'LITERAL' ), + 'min' => 2, + 'max' => 2, + ) + ); + + $forInclusion = (bool)( $flags & Parser::PTD_FOR_INCLUSION ); + + $xmlishElements = (array)$this->parser->getStripList(); + $enableOnlyinclude = false; + if ( $forInclusion ) { + $ignoredTags = array( 'includeonly', '/includeonly' ); + $ignoredElements = array( 'noinclude' ); + $xmlishElements[] = 'noinclude'; + if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) { + $enableOnlyinclude = true; + } + } else if ( $this->parser->ot['wiki'] ) { + $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude', 'includeonly', '/includeonly' ); + $ignoredElements = array(); + } else { + $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ); + $ignoredElements = array( 'includeonly' ); + $xmlishElements[] = 'includeonly'; + } + $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) ); + + // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset + $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; + + $stack = new PPDStack_HipHop; + + $searchBase = "[{<\n"; + $revText = strrev( $text ); // For fast reverse searches + + $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start + $accum = $stack->getAccum(); # Current accumulator + $headingIndex = 1; + $stackFlags = array( + 'findPipe' => false, # True to take notice of pipe characters + 'findEquals' => false, # True to find equals signs in arguments + 'inHeading' => false, # True if $i is inside a possible heading + ); + $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i + $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude> + $fakeLineStart = true; # Do a line-start run without outputting an LF character + + while ( true ) { + //$this->memCheck(); + + if ( $findOnlyinclude ) { + // Ignore all input up to the next <onlyinclude> + $variantStartPos = strpos( $text, '<onlyinclude>', $i ); + if ( $variantStartPos === false ) { + // Ignored section runs to the end + $accum->addNodeWithText( 'ignore', strval( substr( $text, $i ) ) ); + break; + } + $startPos1 = intval( $variantStartPos ); + $tagEndPos = $startPos1 + strlen( '<onlyinclude>' ); // past-the-end + $accum->addNodeWithText( 'ignore', strval( substr( $text, $i, $tagEndPos - $i ) ) ); + $i = $tagEndPos; + $findOnlyinclude = false; + } + + if ( $fakeLineStart ) { + $found = 'line-start'; + $curChar = ''; + } else { + # Find next opening brace, closing brace or pipe + $search = $searchBase; + if ( $stack->top === false ) { + $currentClosing = ''; + } else { + $currentClosing = strval( $stack->getTop()->close ); + $search .= $currentClosing; + } + if ( $stackFlags['findPipe'] ) { + $search .= '|'; + } + if ( $stackFlags['findEquals'] ) { + // First equals will be for the template + $search .= '='; + } + $rule = null; + # Output literal section, advance input counter + $literalLength = intval( strcspn( $text, $search, $i ) ); + if ( $literalLength > 0 ) { + $accum->addLiteral( strval( substr( $text, $i, $literalLength ) ) ); + $i += $literalLength; + } + if ( $i >= strlen( $text ) ) { + if ( $currentClosing === "\n" ) { + // Do a past-the-end run to finish off the heading + $curChar = ''; + $found = 'line-end'; + } else { + # All done + break; + } + } else { + $curChar = $text[$i]; + if ( $curChar === '|' ) { + $found = 'pipe'; + } elseif ( $curChar === '=' ) { + $found = 'equals'; + } elseif ( $curChar === '<' ) { + $found = 'angle'; + } elseif ( $curChar === "\n" ) { + if ( $stackFlags['inHeading'] ) { + $found = 'line-end'; + } else { + $found = 'line-start'; + } + } elseif ( $curChar === $currentClosing ) { + $found = 'close'; + } elseif ( isset( $rules[$curChar] ) ) { + $found = 'open'; + $rule = $rules[$curChar]; + } else { + # Some versions of PHP have a strcspn which stops on null characters + # Ignore and continue + ++$i; + continue; + } + } + } + + if ( $found === 'angle' ) { + $matches = false; + // Handle </onlyinclude> + if ( $enableOnlyinclude + && substr( $text, $i, strlen( '</onlyinclude>' ) ) === '</onlyinclude>' ) + { + $findOnlyinclude = true; + continue; + } + + // Determine element name + if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) { + // Element name missing or not listed + $accum->addLiteral( '<' ); + ++$i; + continue; + } + // Handle comments + if ( isset( $matches[2] ) && $matches[2] === '!--' ) { + // To avoid leaving blank lines, when a comment is both preceded + // and followed by a newline (ignoring spaces), trim leading and + // trailing spaces and one of the newlines. + + // Find the end + $variantEndPos = strpos( $text, '-->', $i + 4 ); + if ( $variantEndPos === false ) { + // Unclosed comment in input, runs to end + $inner = strval( substr( $text, $i ) ); + $accum->addNodeWithText( 'comment', $inner ); + $i = strlen( $text ); + } else { + $endPos = intval( $variantEndPos ); + // Search backwards for leading whitespace + if ( $i ) { + $wsStart = $i - intval( strspn( $revText, ' ', strlen( $text ) - $i ) ); + } else { + $wsStart = 0; + } + // Search forwards for trailing whitespace + // $wsEnd will be the position of the last space (or the '>' if there's none) + $wsEnd = $endPos + 2 + intval( strspn( $text, ' ', $endPos + 3 ) ); + // Eat the line if possible + // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at + // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but + // it's a possible beneficial b/c break. + if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) === "\n" + && substr( $text, $wsEnd + 1, 1 ) === "\n" ) + { + $startPos2 = $wsStart; + $endPos = $wsEnd + 1; + // Remove leading whitespace from the end of the accumulator + // Sanity check first though + $wsLength = $i - $wsStart; + if ( $wsLength > 0 + && $accum->lastNode instanceof PPNode_HipHop_Text + && substr( $accum->lastNode->value, -$wsLength ) === str_repeat( ' ', $wsLength ) ) + { + $accum->lastNode->value = strval( substr( $accum->lastNode->value, 0, -$wsLength ) ); + } + // Do a line-start run next time to look for headings after the comment + $fakeLineStart = true; + } else { + // No line to eat, just take the comment itself + $startPos2 = $i; + $endPos += 2; + } + + if ( $stack->top ) { + $part = $stack->getTop()->getCurrentPart(); + if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) { + $part->visualEnd = $wsStart; + } + // Else comments abutting, no change in visual end + $part->commentEnd = $endPos; + } + $i = $endPos + 1; + $inner = strval( substr( $text, $startPos2, $endPos - $startPos2 + 1 ) ); + $accum->addNodeWithText( 'comment', $inner ); + } + continue; + } + $name = strval( $matches[1] ); + $lowerName = strtolower( $name ); + $attrStart = $i + strlen( $name ) + 1; + + // Find end of tag + $variantTagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart ); + if ( $variantTagEndPos === false ) { + // Infinite backtrack + // Disable tag search to prevent worst-case O(N^2) performance + $noMoreGT = true; + $accum->addLiteral( '<' ); + ++$i; + continue; + } + $tagEndPos = intval( $variantTagEndPos ); + + // Handle ignored tags + if ( in_array( $lowerName, $ignoredTags ) ) { + $accum->addNodeWithText( 'ignore', strval( substr( $text, $i, $tagEndPos - $i + 1 ) ) ); + $i = $tagEndPos + 1; + continue; + } + + $tagStartPos = $i; + $inner = $close = ''; + if ( $text[$tagEndPos-1] === '/' ) { + // Short end tag + $attrEnd = $tagEndPos - 1; + $shortEnd = true; + $inner = ''; + $i = $tagEndPos + 1; + $haveClose = false; + } else { + $attrEnd = $tagEndPos; + $shortEnd = false; + // Find closing tag + if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", + $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) + { + $inner = strval( substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ) ); + $i = intval( $matches[0][1] ) + strlen( $matches[0][0] ); + $close = strval( $matches[0][0] ); + $haveClose = true; + } else { + // No end tag -- let it run out to the end of the text. + $inner = strval( substr( $text, $tagEndPos + 1 ) ); + $i = strlen( $text ); + $haveClose = false; + } + } + // <includeonly> and <noinclude> just become <ignore> tags + if ( in_array( $lowerName, $ignoredElements ) ) { + $accum->addNodeWithText( 'ignore', strval( substr( $text, $tagStartPos, $i - $tagStartPos ) ) ); + continue; + } + + if ( $attrEnd <= $attrStart ) { + $attr = ''; + } else { + // Note that the attr element contains the whitespace between name and attribute, + // this is necessary for precise reconstruction during pre-save transform. + $attr = strval( substr( $text, $attrStart, $attrEnd - $attrStart ) ); + } + + $extNode = new PPNode_HipHop_Tree( 'ext' ); + $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'name', $name ) ); + $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'attr', $attr ) ); + if ( !$shortEnd ) { + $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'inner', $inner ) ); + } + if ( $haveClose ) { + $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'close', $close ) ); + } + $accum->addNode( $extNode ); + } + + elseif ( $found === 'line-start' ) { + // Is this the start of a heading? + // Line break belongs before the heading element in any case + if ( $fakeLineStart ) { + $fakeLineStart = false; + } else { + $accum->addLiteral( $curChar ); + $i++; + } + + $count = intval( strspn( $text, '=', $i, 6 ) ); + if ( $count == 1 && $stackFlags['findEquals'] ) { + // DWIM: This looks kind of like a name/value separator + // Let's let the equals handler have it and break the potential heading + // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex. + } elseif ( $count > 0 ) { + $partData = array( + 'open' => "\n", + 'close' => "\n", + 'parts' => array( new PPDPart_HipHop( str_repeat( '=', $count ) ) ), + 'startPos' => $i, + 'count' => $count ); + $stack->push( $partData ); + $accum = $stack->getAccum(); + $stackFlags = $stack->getFlags(); + $i += $count; + } + } elseif ( $found === 'line-end' ) { + $piece = $stack->getTop(); + // A heading must be open, otherwise \n wouldn't have been in the search list + assert( $piece->open === "\n" ); + $part = $piece->getCurrentPart(); + // Search back through the input to see if it has a proper close + // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient + $wsLength = intval( strspn( $revText, " \t", strlen( $text ) - $i ) ); + $searchStart = $i - $wsLength; + if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) { + // Comment found at line end + // Search for equals signs before the comment + $searchStart = intval( $part->visualEnd ); + $searchStart -= intval( strspn( $revText, " \t", strlen( $text ) - $searchStart ) ); + } + $count = intval( $piece->count ); + $equalsLength = intval( strspn( $revText, '=', strlen( $text ) - $searchStart ) ); + $isTreeNode = false; + $resultAccum = $accum; + if ( $equalsLength > 0 ) { + if ( $searchStart - $equalsLength == $piece->startPos ) { + // This is just a single string of equals signs on its own line + // Replicate the doHeadings behaviour /={count}(.+)={count}/ + // First find out how many equals signs there really are (don't stop at 6) + $count = $equalsLength; + if ( $count < 3 ) { + $count = 0; + } else { + $count = intval( ( $count - 1 ) / 2 ); + if ( $count > 6 ) { + $count = 6; + } + } + } else { + if ( $count > $equalsLength ) { + $count = $equalsLength; + } + } + if ( $count > 0 ) { + // Normal match, output <h> + $tree = new PPNode_HipHop_Tree( 'possible-h' ); + $tree->addChild( new PPNode_HipHop_Attr( 'level', $count ) ); + $tree->addChild( new PPNode_HipHop_Attr( 'i', $headingIndex++ ) ); + $tree->lastChild->nextSibling = $accum->firstNode; + $tree->lastChild = $accum->lastNode; + $isTreeNode = true; + } else { + // Single equals sign on its own line, count=0 + // Output $resultAccum + } + } else { + // No match, no <h>, just pass down the inner text + // Output $resultAccum + } + // Unwind the stack + $stack->pop(); + $accum = $stack->getAccum(); + $stackFlags = $stack->getFlags(); + + // Append the result to the enclosing accumulator + if ( $isTreeNode ) { + $accum->addNode( $tree ); + } else { + $accum->addAccum( $resultAccum ); + } + // Note that we do NOT increment the input pointer. + // This is because the closing linebreak could be the opening linebreak of + // another heading. Infinite loops are avoided because the next iteration MUST + // hit the heading open case above, which unconditionally increments the + // input pointer. + } elseif ( $found === 'open' ) { + # count opening brace characters + $count = intval( strspn( $text, $curChar, $i ) ); + + # we need to add to stack only if opening brace count is enough for one of the rules + if ( $count >= $rule['min'] ) { + # Add it to the stack + $partData = array( + 'open' => $curChar, + 'close' => $rule['end'], + 'count' => $count, + 'lineStart' => ($i == 0 || $text[$i-1] === "\n"), + ); + + $stack->push( $partData ); + $accum = $stack->getAccum(); + $stackFlags = $stack->getFlags(); + } else { + # Add literal brace(s) + $accum->addLiteral( str_repeat( $curChar, $count ) ); + } + $i += $count; + } elseif ( $found === 'close' ) { + $piece = $stack->getTop(); + # lets check if there are enough characters for closing brace + $maxCount = intval( $piece->count ); + $count = intval( strspn( $text, $curChar, $i, $maxCount ) ); + + # check for maximum matching characters (if there are 5 closing + # characters, we will probably need only 3 - depending on the rules) + $rule = $rules[$piece->open]; + if ( $count > $rule['max'] ) { + # The specified maximum exists in the callback array, unless the caller + # has made an error + $matchingCount = intval( $rule['max'] ); + } else { + # Count is less than the maximum + # Skip any gaps in the callback array to find the true largest match + # Need to use array_key_exists not isset because the callback can be null + $matchingCount = $count; + while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) { + --$matchingCount; + } + } + + if ($matchingCount <= 0) { + # No matching element found in callback array + # Output a literal closing brace and continue + $accum->addLiteral( str_repeat( $curChar, $count ) ); + $i += $count; + continue; + } + $name = strval( $rule['names'][$matchingCount] ); + $isTreeNode = false; + if ( $name === 'LITERAL' ) { + // No element, just literal text + $resultAccum = $piece->breakSyntax( $matchingCount ); + $resultAccum->addLiteral( str_repeat( $rule['end'], $matchingCount ) ); + } else { + # Create XML element + # Note: $parts is already XML, does not need to be encoded further + $isTreeNode = true; + $parts = $piece->parts; + $titleAccum = PPDAccum_HipHop::cast( $parts[0]->out ); + unset( $parts[0] ); + + $tree = new PPNode_HipHop_Tree( $name ); + + # The invocation is at the start of the line if lineStart is set in + # the stack, and all opening brackets are used up. + if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) { + $tree->addChild( new PPNode_HipHop_Attr( 'lineStart', 1 ) ); + } + $titleNode = new PPNode_HipHop_Tree( 'title' ); + $titleNode->firstChild = $titleAccum->firstNode; + $titleNode->lastChild = $titleAccum->lastNode; + $tree->addChild( $titleNode ); + $argIndex = 1; + foreach ( $parts as $variantPart ) { + $part = PPDPart_HipHop::cast( $variantPart ); + if ( isset( $part->eqpos ) ) { + // Find equals + $lastNode = false; + for ( $node = $part->out->firstNode; $node; $node = $node->nextSibling ) { + if ( $node === $part->eqpos ) { + break; + } + $lastNode = $node; + } + if ( !$node ) { + throw new MWException( __METHOD__. ': eqpos not found' ); + } + if ( $node->name !== 'equals' ) { + throw new MWException( __METHOD__ .': eqpos is not equals' ); + } + $equalsNode = $node; + + // Construct name node + $nameNode = new PPNode_HipHop_Tree( 'name' ); + if ( $lastNode !== false ) { + $lastNode->nextSibling = false; + $nameNode->firstChild = $part->out->firstNode; + $nameNode->lastChild = $lastNode; + } + + // Construct value node + $valueNode = new PPNode_HipHop_Tree( 'value' ); + if ( $equalsNode->nextSibling !== false ) { + $valueNode->firstChild = $equalsNode->nextSibling; + $valueNode->lastChild = $part->out->lastNode; + } + $partNode = new PPNode_HipHop_Tree( 'part' ); + $partNode->addChild( $nameNode ); + $partNode->addChild( $equalsNode->firstChild ); + $partNode->addChild( $valueNode ); + $tree->addChild( $partNode ); + } else { + $partNode = new PPNode_HipHop_Tree( 'part' ); + $nameNode = new PPNode_HipHop_Tree( 'name' ); + $nameNode->addChild( new PPNode_HipHop_Attr( 'index', $argIndex++ ) ); + $valueNode = new PPNode_HipHop_Tree( 'value' ); + $valueNode->firstChild = $part->out->firstNode; + $valueNode->lastChild = $part->out->lastNode; + $partNode->addChild( $nameNode ); + $partNode->addChild( $valueNode ); + $tree->addChild( $partNode ); + } + } + } + + # Advance input pointer + $i += $matchingCount; + + # Unwind the stack + $stack->pop(); + $accum = $stack->getAccum(); + + # Re-add the old stack element if it still has unmatched opening characters remaining + if ($matchingCount < $piece->count) { + $piece->parts = array( new PPDPart_HipHop ); + $piece->count -= $matchingCount; + # do we still qualify for any callback with remaining count? + $names = $rules[$piece->open]['names']; + $skippedBraces = 0; + $enclosingAccum = $accum; + while ( $piece->count ) { + if ( array_key_exists( $piece->count, $names ) ) { + $stack->push( $piece ); + $accum = $stack->getAccum(); + break; + } + --$piece->count; + $skippedBraces ++; + } + $enclosingAccum->addLiteral( str_repeat( $piece->open, $skippedBraces ) ); + } + + $stackFlags = $stack->getFlags(); + + # Add XML element to the enclosing accumulator + if ( $isTreeNode ) { + $accum->addNode( $tree ); + } else { + $accum->addAccum( $resultAccum ); + } + } elseif ( $found === 'pipe' ) { + $stackFlags['findEquals'] = true; // shortcut for getFlags() + $stack->addPart(); + $accum = $stack->getAccum(); + ++$i; + } elseif ( $found === 'equals' ) { + $stackFlags['findEquals'] = false; // shortcut for getFlags() + $accum->addNodeWithText( 'equals', '=' ); + $stack->getCurrentPart()->eqpos = $accum->lastNode; + ++$i; + } + } + + # Output any remaining unclosed brackets + foreach ( $stack->stack as $variantPiece ) { + $piece = PPDStackElement_HipHop::cast( $variantPiece ); + $stack->rootAccum->addAccum( $piece->breakSyntax() ); + } + + # Enable top-level headings + for ( $node = $stack->rootAccum->firstNode; $node; $node = $node->nextSibling ) { + if ( isset( $node->name ) && $node->name === 'possible-h' ) { + $node->name = 'h'; + } + } + + $rootNode = new PPNode_HipHop_Tree( 'root' ); + $rootNode->firstChild = $stack->rootAccum->firstNode; + $rootNode->lastChild = $stack->rootAccum->lastNode; + + // Cache + if ($cacheable) { + $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode ); + $wgMemc->set( $cacheKey, $cacheValue, 86400 ); + wfProfileOut( __METHOD__.'-cache-miss' ); + wfProfileOut( __METHOD__.'-cacheable' ); + wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" ); + } + + wfProfileOut( __METHOD__ ); + return $rootNode; + } +} + + + +/** + * Stack class to help Preprocessor::preprocessToObj() + * @ingroup Parser + */ +class PPDStack_HipHop { + var $stack, $rootAccum; + + /** + * @var PPDStack + */ + var $top; + var $out; + + static $false = false; + + function __construct() { + $this->stack = array(); + $this->top = false; + $this->rootAccum = new PPDAccum_HipHop; + $this->accum = $this->rootAccum; + } + + /** + * @return int + */ + function count() { + return count( $this->stack ); + } + + function getAccum() { + return PPDAccum_HipHop::cast( $this->accum ); + } + + function getCurrentPart() { + return $this->getTop()->getCurrentPart(); + } + + function getTop() { + return PPDStackElement_HipHop::cast( $this->top ); + } + + function push( $data ) { + if ( $data instanceof PPDStackElement_HipHop ) { + $this->stack[] = $data; + } else { + $this->stack[] = new PPDStackElement_HipHop( $data ); + } + $this->top = $this->stack[ count( $this->stack ) - 1 ]; + $this->accum = $this->top->getAccum(); + } + + function pop() { + if ( !count( $this->stack ) ) { + throw new MWException( __METHOD__.': no elements remaining' ); + } + $temp = array_pop( $this->stack ); + + if ( count( $this->stack ) ) { + $this->top = $this->stack[ count( $this->stack ) - 1 ]; + $this->accum = $this->top->getAccum(); + } else { + $this->top = self::$false; + $this->accum = $this->rootAccum; + } + return $temp; + } + + function addPart( $s = '' ) { + $this->top->addPart( $s ); + $this->accum = $this->top->getAccum(); + } + + /** + * @return array + */ + function getFlags() { + if ( !count( $this->stack ) ) { + return array( + 'findEquals' => false, + 'findPipe' => false, + 'inHeading' => false, + ); + } else { + return $this->top->getFlags(); + } + } +} + +/** + * @ingroup Parser + */ +class PPDStackElement_HipHop { + var $open, // Opening character (\n for heading) + $close, // Matching closing character + $count, // Number of opening characters found (number of "=" for heading) + $parts, // Array of PPDPart objects describing pipe-separated parts. + $lineStart; // True if the open char appeared at the start of the input line. Not set for headings. + + static function cast( PPDStackElement_HipHop $obj ) { + return $obj; + } + + function __construct( $data = array() ) { + $this->parts = array( new PPDPart_HipHop ); + + foreach ( $data as $name => $value ) { + $this->$name = $value; + } + } + + function getAccum() { + return PPDAccum_HipHop::cast( $this->parts[count($this->parts) - 1]->out ); + } + + function addPart( $s = '' ) { + $this->parts[] = new PPDPart_HipHop( $s ); + } + + function getCurrentPart() { + return PPDPart_HipHop::cast( $this->parts[count($this->parts) - 1] ); + } + + /** + * @return array + */ + function getFlags() { + $partCount = count( $this->parts ); + $findPipe = $this->open !== "\n" && $this->open !== '['; + return array( + 'findPipe' => $findPipe, + 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->parts[$partCount - 1]->eqpos ), + 'inHeading' => $this->open === "\n", + ); + } + + /** + * Get the accumulator that would result if the close is not found. + * + * @return PPDAccum_HipHop + */ + function breakSyntax( $openingCount = false ) { + if ( $this->open === "\n" ) { + $accum = PPDAccum_HipHop::cast( $this->parts[0]->out ); + } else { + if ( $openingCount === false ) { + $openingCount = $this->count; + } + $accum = new PPDAccum_HipHop; + $accum->addLiteral( str_repeat( $this->open, $openingCount ) ); + $first = true; + foreach ( $this->parts as $part ) { + if ( $first ) { + $first = false; + } else { + $accum->addLiteral( '|' ); + } + $accum->addAccum( $part->out ); + } + } + return $accum; + } +} + +/** + * @ingroup Parser + */ +class PPDPart_HipHop { + var $out; // Output accumulator object + + // Optional member variables: + // eqpos Position of equals sign in output accumulator + // commentEnd Past-the-end input pointer for the last comment encountered + // visualEnd Past-the-end input pointer for the end of the accumulator minus comments + + function __construct( $out = '' ) { + $this->out = new PPDAccum_HipHop; + if ( $out !== '' ) { + $this->out->addLiteral( $out ); + } + } + + static function cast( PPDPart_HipHop $obj ) { + return $obj; + } +} + +/** + * @ingroup Parser + */ +class PPDAccum_HipHop { + var $firstNode, $lastNode; + + function __construct() { + $this->firstNode = $this->lastNode = false; + } + + static function cast( PPDAccum_HipHop $obj ) { + return $obj; + } + + /** + * Append a string literal + */ + function addLiteral( string $s ) { + if ( $this->lastNode === false ) { + $this->firstNode = $this->lastNode = new PPNode_HipHop_Text( $s ); + } elseif ( $this->lastNode instanceof PPNode_HipHop_Text ) { + $this->lastNode->value .= $s; + } else { + $this->lastNode->nextSibling = new PPNode_HipHop_Text( $s ); + $this->lastNode = $this->lastNode->nextSibling; + } + } + + /** + * Append a PPNode + */ + function addNode( PPNode $node ) { + if ( $this->lastNode === false ) { + $this->firstNode = $this->lastNode = $node; + } else { + $this->lastNode->nextSibling = $node; + $this->lastNode = $node; + } + } + + /** + * Append a tree node with text contents + */ + function addNodeWithText( string $name, string $value ) { + $node = PPNode_HipHop_Tree::newWithText( $name, $value ); + $this->addNode( $node ); + } + + /** + * Append a PPDAccum_HipHop + * Takes over ownership of the nodes in the source argument. These nodes may + * subsequently be modified, especially nextSibling. + */ + function addAccum( PPDAccum_HipHop $accum ) { + if ( $accum->lastNode === false ) { + // nothing to add + } elseif ( $this->lastNode === false ) { + $this->firstNode = $accum->firstNode; + $this->lastNode = $accum->lastNode; + } else { + $this->lastNode->nextSibling = $accum->firstNode; + $this->lastNode = $accum->lastNode; + } + } +} + +/** + * An expansion frame, used as a context to expand the result of preprocessToObj() + * @ingroup Parser + */ +class PPFrame_HipHop implements PPFrame { + + /** + * @var Parser + */ + var $parser; + + /** + * @var Preprocessor + */ + var $preprocessor; + + /** + * @var Title + */ + var $title; + var $titleCache; + + /** + * Hashtable listing templates which are disallowed for expansion in this frame, + * having been encountered previously in parent frames. + */ + var $loopCheckHash; + + /** + * Recursion depth of this frame, top = 0 + * Note that this is NOT the same as expansion depth in expand() + */ + var $depth; + + + /** + * Construct a new preprocessor frame. + * @param $preprocessor Preprocessor: the parent preprocessor + */ + function __construct( $preprocessor ) { + $this->preprocessor = $preprocessor; + $this->parser = $preprocessor->parser; + $this->title = $this->parser->mTitle; + $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false ); + $this->loopCheckHash = array(); + $this->depth = 0; + } + + /** + * Create a new child frame + * $args is optionally a multi-root PPNode or array containing the template arguments + * + * @param $args PPNode_HipHop_Array|array + * @param $title Title|false + * + * @return PPTemplateFrame_HipHop + */ + function newChild( $args = false, $title = false ) { + $namedArgs = array(); + $numberedArgs = array(); + if ( $title === false ) { + $title = $this->title; + } + if ( $args !== false ) { + if ( $args instanceof PPNode_HipHop_Array ) { + $args = $args->value; + } elseif ( !is_array( $args ) ) { + throw new MWException( __METHOD__ . ': $args must be array or PPNode_HipHop_Array' ); + } + foreach ( $args as $arg ) { + $bits = $arg->splitArg(); + if ( $bits['index'] !== '' ) { + // Numbered parameter + $numberedArgs[$bits['index']] = $bits['value']; + unset( $namedArgs[$bits['index']] ); + } else { + // Named parameter + $name = trim( $this->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) ); + $namedArgs[$name] = $bits['value']; + unset( $numberedArgs[$name] ); + } + } + } + return new PPTemplateFrame_HipHop( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title ); + } + + /** + * @throws MWException + * @param $root + * @param $flags int + * @return string + */ + function expand( $root, $flags = 0 ) { + static $expansionDepth = 0; + if ( is_string( $root ) ) { + return $root; + } + + if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() ) { + return '<span class="error">Node-count limit exceeded</span>'; + } + if ( $expansionDepth > $this->parser->mOptions->getMaxPPExpandDepth() ) { + return '<span class="error">Expansion depth limit exceeded</span>'; + } + ++$expansionDepth; + + $outStack = array( '', '' ); + $iteratorStack = array( false, $root ); + $indexStack = array( 0, 0 ); + + while ( count( $iteratorStack ) > 1 ) { + $level = count( $outStack ) - 1; + $iteratorNode =& $iteratorStack[ $level ]; + $out =& $outStack[$level]; + $index =& $indexStack[$level]; + + if ( is_array( $iteratorNode ) ) { + if ( $index >= count( $iteratorNode ) ) { + // All done with this iterator + $iteratorStack[$level] = false; + $contextNode = false; + } else { + $contextNode = $iteratorNode[$index]; + $index++; + } + } elseif ( $iteratorNode instanceof PPNode_HipHop_Array ) { + if ( $index >= $iteratorNode->getLength() ) { + // All done with this iterator + $iteratorStack[$level] = false; + $contextNode = false; + } else { + $contextNode = $iteratorNode->item( $index ); + $index++; + } + } else { + // Copy to $contextNode and then delete from iterator stack, + // because this is not an iterator but we do have to execute it once + $contextNode = $iteratorStack[$level]; + $iteratorStack[$level] = false; + } + + $newIterator = false; + + if ( $contextNode === false ) { + // nothing to do + } elseif ( is_string( $contextNode ) ) { + $out .= $contextNode; + } elseif ( is_array( $contextNode ) || $contextNode instanceof PPNode_HipHop_Array ) { + $newIterator = $contextNode; + } elseif ( $contextNode instanceof PPNode_HipHop_Attr ) { + // No output + } elseif ( $contextNode instanceof PPNode_HipHop_Text ) { + $out .= $contextNode->value; + } elseif ( $contextNode instanceof PPNode_HipHop_Tree ) { + if ( $contextNode->name === 'template' ) { + # Double-brace expansion + $bits = $contextNode->splitTemplate(); + if ( $flags & PPFrame::NO_TEMPLATES ) { + $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $bits['title'], $bits['parts'] ); + } else { + $ret = $this->parser->braceSubstitution( $bits, $this ); + if ( isset( $ret['object'] ) ) { + $newIterator = $ret['object']; + } else { + $out .= $ret['text']; + } + } + } elseif ( $contextNode->name === 'tplarg' ) { + # Triple-brace expansion + $bits = $contextNode->splitTemplate(); + if ( $flags & PPFrame::NO_ARGS ) { + $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $bits['title'], $bits['parts'] ); + } else { + $ret = $this->parser->argSubstitution( $bits, $this ); + if ( isset( $ret['object'] ) ) { + $newIterator = $ret['object']; + } else { + $out .= $ret['text']; + } + } + } elseif ( $contextNode->name === 'comment' ) { + # HTML-style comment + # Remove it in HTML, pre+remove and STRIP_COMMENTS modes + if ( $this->parser->ot['html'] + || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() ) + || ( $flags & PPFrame::STRIP_COMMENTS ) ) + { + $out .= ''; + } + # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result + # Not in RECOVER_COMMENTS mode (extractSections) though + elseif ( $this->parser->ot['wiki'] && ! ( $flags & PPFrame::RECOVER_COMMENTS ) ) { + $out .= $this->parser->insertStripItem( $contextNode->firstChild->value ); + } + # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove + else { + $out .= $contextNode->firstChild->value; + } + } elseif ( $contextNode->name === 'ignore' ) { + # Output suppression used by <includeonly> etc. + # OT_WIKI will only respect <ignore> in substed templates. + # The other output types respect it unless NO_IGNORE is set. + # extractSections() sets NO_IGNORE and so never respects it. + if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & PPFrame::NO_IGNORE ) ) { + $out .= $contextNode->firstChild->value; + } else { + //$out .= ''; + } + } elseif ( $contextNode->name === 'ext' ) { + # Extension tag + $bits = $contextNode->splitExt() + array( 'attr' => null, 'inner' => null, 'close' => null ); + $out .= $this->parser->extensionSubstitution( $bits, $this ); + } elseif ( $contextNode->name === 'h' ) { + # Heading + if ( $this->parser->ot['html'] ) { + # Expand immediately and insert heading index marker + $s = ''; + for ( $node = $contextNode->firstChild; $node; $node = $node->nextSibling ) { + $s .= $this->expand( $node, $flags ); + } + + $bits = $contextNode->splitHeading(); + $titleText = $this->title->getPrefixedDBkey(); + $this->parser->mHeadings[] = array( $titleText, $bits['i'] ); + $serial = count( $this->parser->mHeadings ) - 1; + $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX; + $s = substr( $s, 0, $bits['level'] ) . $marker . substr( $s, $bits['level'] ); + $this->parser->mStripState->addGeneral( $marker, '' ); + $out .= $s; + } else { + # Expand in virtual stack + $newIterator = $contextNode->getChildren(); + } + } else { + # Generic recursive expansion + $newIterator = $contextNode->getChildren(); + } + } else { + throw new MWException( __METHOD__.': Invalid parameter type' ); + } + + if ( $newIterator !== false ) { + $outStack[] = ''; + $iteratorStack[] = $newIterator; + $indexStack[] = 0; + } elseif ( $iteratorStack[$level] === false ) { + // Return accumulated value to parent + // With tail recursion + while ( $iteratorStack[$level] === false && $level > 0 ) { + $outStack[$level - 1] .= $out; + array_pop( $outStack ); + array_pop( $iteratorStack ); + array_pop( $indexStack ); + $level--; + } + } + } + --$expansionDepth; + return $outStack[0]; + } + + /** + * @param $sep + * @param $flags + * @return string + */ + function implodeWithFlags( $sep, $flags /*, ... */ ) { + $args = array_slice( func_get_args(), 2 ); + + $first = true; + $s = ''; + foreach ( $args as $root ) { + if ( $root instanceof PPNode_HipHop_Array ) { + $root = $root->value; + } + if ( !is_array( $root ) ) { + $root = array( $root ); + } + foreach ( $root as $node ) { + if ( $first ) { + $first = false; + } else { + $s .= $sep; + } + $s .= $this->expand( $node, $flags ); + } + } + return $s; + } + + /** + * Implode with no flags specified + * This previously called implodeWithFlags but has now been inlined to reduce stack depth + * @return string + */ + function implode( $sep /*, ... */ ) { + $args = array_slice( func_get_args(), 1 ); + + $first = true; + $s = ''; + foreach ( $args as $root ) { + if ( $root instanceof PPNode_HipHop_Array ) { + $root = $root->value; + } + if ( !is_array( $root ) ) { + $root = array( $root ); + } + foreach ( $root as $node ) { + if ( $first ) { + $first = false; + } else { + $s .= $sep; + } + $s .= $this->expand( $node ); + } + } + return $s; + } + + /** + * Makes an object that, when expand()ed, will be the same as one obtained + * with implode() + * + * @return PPNode_HipHop_Array + */ + function virtualImplode( $sep /*, ... */ ) { + $args = array_slice( func_get_args(), 1 ); + $out = array(); + $first = true; + + foreach ( $args as $root ) { + if ( $root instanceof PPNode_HipHop_Array ) { + $root = $root->value; + } + if ( !is_array( $root ) ) { + $root = array( $root ); + } + foreach ( $root as $node ) { + if ( $first ) { + $first = false; + } else { + $out[] = $sep; + } + $out[] = $node; + } + } + return new PPNode_HipHop_Array( $out ); + } + + /** + * Virtual implode with brackets + * + * @return PPNode_HipHop_Array + */ + function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) { + $args = array_slice( func_get_args(), 3 ); + $out = array( $start ); + $first = true; + + foreach ( $args as $root ) { + if ( $root instanceof PPNode_HipHop_Array ) { + $root = $root->value; + } + if ( !is_array( $root ) ) { + $root = array( $root ); + } + foreach ( $root as $node ) { + if ( $first ) { + $first = false; + } else { + $out[] = $sep; + } + $out[] = $node; + } + } + $out[] = $end; + return new PPNode_HipHop_Array( $out ); + } + + function __toString() { + return 'frame{}'; + } + + /** + * @param $level bool + * @return array|bool|String + */ + function getPDBK( $level = false ) { + if ( $level === false ) { + return $this->title->getPrefixedDBkey(); + } else { + return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false; + } + } + + /** + * @return array + */ + function getArguments() { + return array(); + } + + /** + * @return array + */ + function getNumberedArguments() { + return array(); + } + + /** + * @return array + */ + function getNamedArguments() { + return array(); + } + + /** + * Returns true if there are no arguments in this frame + * + * @return bool + */ + function isEmpty() { + return true; + } + + /** + * @param $name + * @return bool + */ + function getArgument( $name ) { + return false; + } + + /** + * Returns true if the infinite loop check is OK, false if a loop is detected + * + * @param $title Title + * + * @return bool + */ + function loopCheck( $title ) { + return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] ); + } + + /** + * Return true if the frame is a template frame + * + * @return bool + */ + function isTemplate() { + return false; + } +} + +/** + * Expansion frame with template arguments + * @ingroup Parser + */ +class PPTemplateFrame_HipHop extends PPFrame_HipHop { + var $numberedArgs, $namedArgs, $parent; + var $numberedExpansionCache, $namedExpansionCache; + + /** + * @param $preprocessor + * @param $parent + * @param $numberedArgs array + * @param $namedArgs array + * @param $title Title + */ + function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) { + parent::__construct( $preprocessor ); + + $this->parent = $parent; + $this->numberedArgs = $numberedArgs; + $this->namedArgs = $namedArgs; + $this->title = $title; + $pdbk = $title ? $title->getPrefixedDBkey() : false; + $this->titleCache = $parent->titleCache; + $this->titleCache[] = $pdbk; + $this->loopCheckHash = /*clone*/ $parent->loopCheckHash; + if ( $pdbk !== false ) { + $this->loopCheckHash[$pdbk] = true; + } + $this->depth = $parent->depth + 1; + $this->numberedExpansionCache = $this->namedExpansionCache = array(); + } + + function __toString() { + $s = 'tplframe{'; + $first = true; + $args = $this->numberedArgs + $this->namedArgs; + foreach ( $args as $name => $value ) { + if ( $first ) { + $first = false; + } else { + $s .= ', '; + } + $s .= "\"$name\":\"" . + str_replace( '"', '\\"', $value->__toString() ) . '"'; + } + $s .= '}'; + return $s; + } + /** + * Returns true if there are no arguments in this frame + * + * @return bool + */ + function isEmpty() { + return !count( $this->numberedArgs ) && !count( $this->namedArgs ); + } + + /** + * @return array + */ + function getArguments() { + $arguments = array(); + foreach ( array_merge( + array_keys($this->numberedArgs), + array_keys($this->namedArgs)) as $key ) { + $arguments[$key] = $this->getArgument($key); + } + return $arguments; + } + + /** + * @return array + */ + function getNumberedArguments() { + $arguments = array(); + foreach ( array_keys($this->numberedArgs) as $key ) { + $arguments[$key] = $this->getArgument($key); + } + return $arguments; + } + + /** + * @return array + */ + function getNamedArguments() { + $arguments = array(); + foreach ( array_keys($this->namedArgs) as $key ) { + $arguments[$key] = $this->getArgument($key); + } + return $arguments; + } + + /** + * @param $index + * @return array|bool + */ + function getNumberedArgument( $index ) { + if ( !isset( $this->numberedArgs[$index] ) ) { + return false; + } + if ( !isset( $this->numberedExpansionCache[$index] ) ) { + # No trimming for unnamed arguments + $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], PPFrame::STRIP_COMMENTS ); + } + return $this->numberedExpansionCache[$index]; + } + + /** + * @param $name + * @return bool + */ + function getNamedArgument( $name ) { + if ( !isset( $this->namedArgs[$name] ) ) { + return false; + } + if ( !isset( $this->namedExpansionCache[$name] ) ) { + # Trim named arguments post-expand, for backwards compatibility + $this->namedExpansionCache[$name] = trim( + $this->parent->expand( $this->namedArgs[$name], PPFrame::STRIP_COMMENTS ) ); + } + return $this->namedExpansionCache[$name]; + } + + /** + * @param $name + * @return array|bool + */ + function getArgument( $name ) { + $text = $this->getNumberedArgument( $name ); + if ( $text === false ) { + $text = $this->getNamedArgument( $name ); + } + return $text; + } + + /** + * Return true if the frame is a template frame + * + * @return bool + */ + function isTemplate() { + return true; + } +} + +/** + * Expansion frame with custom arguments + * @ingroup Parser + */ +class PPCustomFrame_HipHop extends PPFrame_HipHop { + var $args; + + function __construct( $preprocessor, $args ) { + parent::__construct( $preprocessor ); + $this->args = $args; + } + + function __toString() { + $s = 'cstmframe{'; + $first = true; + foreach ( $this->args as $name => $value ) { + if ( $first ) { + $first = false; + } else { + $s .= ', '; + } + $s .= "\"$name\":\"" . + str_replace( '"', '\\"', $value->__toString() ) . '"'; + } + $s .= '}'; + return $s; + } + + /** + * @return bool + */ + function isEmpty() { + return !count( $this->args ); + } + + /** + * @param $index + * @return bool + */ + function getArgument( $index ) { + if ( !isset( $this->args[$index] ) ) { + return false; + } + return $this->args[$index]; + } +} + +/** + * @ingroup Parser + */ +class PPNode_HipHop_Tree implements PPNode { + var $name, $firstChild, $lastChild, $nextSibling; + + function __construct( $name ) { + $this->name = $name; + $this->firstChild = $this->lastChild = $this->nextSibling = false; + } + + function __toString() { + $inner = ''; + $attribs = ''; + for ( $node = $this->firstChild; $node; $node = $node->nextSibling ) { + if ( $node instanceof PPNode_HipHop_Attr ) { + $attribs .= ' ' . $node->name . '="' . htmlspecialchars( $node->value ) . '"'; + } else { + $inner .= $node->__toString(); + } + } + if ( $inner === '' ) { + return "<{$this->name}$attribs/>"; + } else { + return "<{$this->name}$attribs>$inner</{$this->name}>"; + } + } + + /** + * @param $name + * @param $text + * @return PPNode_HipHop_Tree + */ + static function newWithText( $name, $text ) { + $obj = new self( $name ); + $obj->addChild( new PPNode_HipHop_Text( $text ) ); + return $obj; + } + + function addChild( $node ) { + if ( $this->lastChild === false ) { + $this->firstChild = $this->lastChild = $node; + } else { + $this->lastChild->nextSibling = $node; + $this->lastChild = $node; + } + } + + /** + * @return PPNode_HipHop_Array + */ + function getChildren() { + $children = array(); + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { + $children[] = $child; + } + return new PPNode_HipHop_Array( $children ); + } + + function getFirstChild() { + return $this->firstChild; + } + + function getNextSibling() { + return $this->nextSibling; + } + + function getChildrenOfType( $name ) { + $children = array(); + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { + if ( isset( $child->name ) && $child->name === $name ) { + $children[] = $name; + } + } + return $children; + } + + /** + * @return bool + */ + function getLength() { + return false; + } + + /** + * @param $i + * @return bool + */ + function item( $i ) { + return false; + } + + /** + * @return string + */ + function getName() { + return $this->name; + } + + /** + * Split a <part> node into an associative array containing: + * name PPNode name + * index String index + * value PPNode value + * + * @return array + */ + function splitArg() { + $bits = array(); + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { + if ( !isset( $child->name ) ) { + continue; + } + if ( $child->name === 'name' ) { + $bits['name'] = $child; + if ( $child->firstChild instanceof PPNode_HipHop_Attr + && $child->firstChild->name === 'index' ) + { + $bits['index'] = $child->firstChild->value; + } + } elseif ( $child->name === 'value' ) { + $bits['value'] = $child; + } + } + + if ( !isset( $bits['name'] ) ) { + throw new MWException( 'Invalid brace node passed to ' . __METHOD__ ); + } + if ( !isset( $bits['index'] ) ) { + $bits['index'] = ''; + } + return $bits; + } + + /** + * Split an <ext> node into an associative array containing name, attr, inner and close + * All values in the resulting array are PPNodes. Inner and close are optional. + * + * @return array + */ + function splitExt() { + $bits = array(); + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { + if ( !isset( $child->name ) ) { + continue; + } + if ( $child->name === 'name' ) { + $bits['name'] = $child; + } elseif ( $child->name === 'attr' ) { + $bits['attr'] = $child; + } elseif ( $child->name === 'inner' ) { + $bits['inner'] = $child; + } elseif ( $child->name === 'close' ) { + $bits['close'] = $child; + } + } + if ( !isset( $bits['name'] ) ) { + throw new MWException( 'Invalid ext node passed to ' . __METHOD__ ); + } + return $bits; + } + + /** + * Split an <h> node + * + * @return array + */ + function splitHeading() { + if ( $this->name !== 'h' ) { + throw new MWException( 'Invalid h node passed to ' . __METHOD__ ); + } + $bits = array(); + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { + if ( !isset( $child->name ) ) { + continue; + } + if ( $child->name === 'i' ) { + $bits['i'] = $child->value; + } elseif ( $child->name === 'level' ) { + $bits['level'] = $child->value; + } + } + if ( !isset( $bits['i'] ) ) { + throw new MWException( 'Invalid h node passed to ' . __METHOD__ ); + } + return $bits; + } + + /** + * Split a <template> or <tplarg> node + * + * @return array + */ + function splitTemplate() { + $parts = array(); + $bits = array( 'lineStart' => '' ); + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { + if ( !isset( $child->name ) ) { + continue; + } + if ( $child->name === 'title' ) { + $bits['title'] = $child; + } + if ( $child->name === 'part' ) { + $parts[] = $child; + } + if ( $child->name === 'lineStart' ) { + $bits['lineStart'] = '1'; + } + } + if ( !isset( $bits['title'] ) ) { + throw new MWException( 'Invalid node passed to ' . __METHOD__ ); + } + $bits['parts'] = new PPNode_HipHop_Array( $parts ); + return $bits; + } +} + +/** + * @ingroup Parser + */ +class PPNode_HipHop_Text implements PPNode { + var $value, $nextSibling; + + function __construct( $value ) { + if ( is_object( $value ) ) { + throw new MWException( __CLASS__ . ' given object instead of string' ); + } + $this->value = $value; + } + + function __toString() { + return htmlspecialchars( $this->value ); + } + + function getNextSibling() { + return $this->nextSibling; + } + + function getChildren() { return false; } + function getFirstChild() { return false; } + function getChildrenOfType( $name ) { return false; } + function getLength() { return false; } + function item( $i ) { return false; } + function getName() { return '#text'; } + function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); } + function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); } + function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); } +} + +/** + * @ingroup Parser + */ +class PPNode_HipHop_Array implements PPNode { + var $value, $nextSibling; + + function __construct( $value ) { + $this->value = $value; + } + + function __toString() { + return var_export( $this, true ); + } + + function getLength() { + return count( $this->value ); + } + + function item( $i ) { + return $this->value[$i]; + } + + function getName() { return '#nodelist'; } + + function getNextSibling() { + return $this->nextSibling; + } + + function getChildren() { return false; } + function getFirstChild() { return false; } + function getChildrenOfType( $name ) { return false; } + function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); } + function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); } + function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); } +} + +/** + * @ingroup Parser + */ +class PPNode_HipHop_Attr implements PPNode { + var $name, $value, $nextSibling; + + function __construct( $name, $value ) { + $this->name = $name; + $this->value = $value; + } + + function __toString() { + return "<@{$this->name}>" . htmlspecialchars( $this->value ) . "</@{$this->name}>"; + } + + function getName() { + return $this->name; + } + + function getNextSibling() { + return $this->nextSibling; + } + + function getChildren() { return false; } + function getFirstChild() { return false; } + function getChildrenOfType( $name ) { return false; } + function getLength() { return false; } + function item( $i ) { return false; } + function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); } + function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); } + function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); } +} diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php new file mode 100644 index 00000000..c7bd1e77 --- /dev/null +++ b/includes/parser/StripState.php @@ -0,0 +1,175 @@ +<?php + +/** + * @todo document, briefly. + * @ingroup Parser + */ +class StripState { + protected $prefix; + protected $data; + protected $regex; + + protected $tempType, $tempMergePrefix; + + function __construct( $prefix ) { + $this->prefix = $prefix; + $this->data = array( + 'nowiki' => array(), + 'general' => array() + ); + $this->regex = "/{$this->prefix}([^\x7f]+)" . Parser::MARKER_SUFFIX . '/'; + } + + /** + * Add a nowiki strip item + * @param $marker + * @param $value + */ + function addNoWiki( $marker, $value ) { + $this->addItem( 'nowiki', $marker, $value ); + } + + /** + * @param $marker + * @param $value + */ + function addGeneral( $marker, $value ) { + $this->addItem( 'general', $marker, $value ); + } + + /** + * @throws MWException + * @param $type + * @param $marker + * @param $value + */ + protected function addItem( $type, $marker, $value ) { + if ( !preg_match( $this->regex, $marker, $m ) ) { + throw new MWException( "Invalid marker: $marker" ); + } + + $this->data[$type][$m[1]] = $value; + } + + /** + * @param $text + * @return mixed + */ + function unstripGeneral( $text ) { + return $this->unstripType( 'general', $text ); + } + + /** + * @param $text + * @return mixed + */ + function unstripNoWiki( $text ) { + return $this->unstripType( 'nowiki', $text ); + } + + /** + * @param $text + * @return mixed + */ + function unstripBoth( $text ) { + $text = $this->unstripType( 'general', $text ); + $text = $this->unstripType( 'nowiki', $text ); + return $text; + } + + /** + * @param $type + * @param $text + * @return mixed + */ + protected function unstripType( $type, $text ) { + // Shortcut + if ( !count( $this->data[$type] ) ) { + return $text; + } + + wfProfileIn( __METHOD__ ); + $this->tempType = $type; + $out = preg_replace_callback( $this->regex, array( $this, 'unstripCallback' ), $text ); + $this->tempType = null; + wfProfileOut( __METHOD__ ); + return $out; + } + + /** + * @param $m array + * @return array + */ + protected function unstripCallback( $m ) { + if ( isset( $this->data[$this->tempType][$m[1]] ) ) { + return $this->data[$this->tempType][$m[1]]; + } else { + return $m[0]; + } + } + + /** + * Get a StripState object which is sufficient to unstrip the given text. + * It will contain the minimum subset of strip items necessary. + * + * @param $text string + * + * @return StripState + */ + function getSubState( $text ) { + $subState = new StripState( $this->prefix ); + $pos = 0; + while ( true ) { + $startPos = strpos( $text, $this->prefix, $pos ); + $endPos = strpos( $text, Parser::MARKER_SUFFIX, $pos ); + if ( $startPos === false || $endPos === false ) { + break; + } + + $endPos += strlen( Parser::MARKER_SUFFIX ); + $marker = substr( $text, $startPos, $endPos - $startPos ); + if ( !preg_match( $this->regex, $marker, $m ) ) { + continue; + } + + $key = $m[1]; + if ( isset( $this->data['nowiki'][$key] ) ) { + $subState->data['nowiki'][$key] = $this->data['nowiki'][$key]; + } elseif ( isset( $this->data['general'][$key] ) ) { + $subState->data['general'][$key] = $this->data['general'][$key]; + } + $pos = $endPos; + } + return $subState; + } + + /** + * Merge another StripState object into this one. The strip marker keys + * will not be preserved. The strings in the $texts array will have their + * strip markers rewritten, the resulting array of strings will be returned. + * + * @param $otherState StripState + * @param $texts Array + * @return Array + */ + function merge( $otherState, $texts ) { + $mergePrefix = Parser::getRandomString(); + + foreach ( $otherState->data as $type => $items ) { + foreach ( $items as $key => $value ) { + $this->data[$type]["$mergePrefix-$key"] = $value; + } + } + + $this->tempMergePrefix = $mergePrefix; + $texts = preg_replace_callback( $otherState->regex, array( $this, 'mergeCallback' ), $texts ); + $this->tempMergePrefix = null; + return $texts; + } + + protected function mergeCallback( $m ) { + $key = $m[1]; + return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX; + } +} + diff --git a/includes/parser/Tidy.php b/includes/parser/Tidy.php index 38f22fd8..3a6d3e9c 100644 --- a/includes/parser/Tidy.php +++ b/includes/parser/Tidy.php @@ -6,6 +6,74 @@ */ /** + * Class used to hide mw:editsection tokens from Tidy so that it doesn't break them + * or break on them. This is a bit of a hack for now, but hopefully in the future + * we may create a real postprocessor or something that will replace this. + * It's called wrapper because for now it basically takes over MWTidy::tidy's task + * of wrapping the text in a xhtml block + * + * This re-uses some of the parser's UNIQ tricks, though some of it is private so it's + * duplicated. Perhaps we should create an abstract marker hiding class. + */ +class MWTidyWrapper { + + /** + * @var ReplacementArray + */ + protected $mTokens; + + protected $mUniqPrefix; + + protected $mMarkerIndex; + + public function __construct() { + $this->mTokens = null; + $this->mUniqPrefix = null; + } + + /** + * @param $text string + * @return string + */ + public function getWrapped( $text ) { + $this->mTokens = new ReplacementArray; + $this->mUniqPrefix = "\x7fUNIQ" . + dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); + $this->mMarkerIndex = 0; + + $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX, + array( &$this, 'replaceEditSectionLinksCallback' ), $text ); + + $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'. + ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'. + '<head><title>test</title></head><body>'.$wrappedtext.'</body></html>'; + + return $wrappedtext; + } + + /** + * @param $m array + * + * @return string + */ + function replaceEditSectionLinksCallback( $m ) { + $marker = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX; + $this->mMarkerIndex++; + $this->mTokens->setPair( $marker, $m[0] ); + return $marker; + } + + /** + * @param $text string + * @return string + */ + public function postprocess( $text ) { + return $this->mTokens->replace( $text ); + } + +} + +/** * Class to interact with HTML tidy * * Either the external tidy program or the in-process tidy extension @@ -15,7 +83,6 @@ * @ingroup Parser */ class MWTidy { - /** * Interface with html tidy, used if $wgUseTidy = true. * If tidy isn't able to correct the markup, the original will be @@ -27,20 +94,26 @@ class MWTidy { public static function tidy( $text ) { global $wgTidyInternal; - $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'. -' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'. -'<head><title>test</title></head><body>'.$text.'</body></html>'; + $wrapper = new MWTidyWrapper; + $wrappedtext = $wrapper->getWrapped( $text ); - if( $wgTidyInternal ) { - $correctedtext = self::execInternalTidy( $wrappedtext ); + $retVal = null; + if ( $wgTidyInternal ) { + $correctedtext = self::execInternalTidy( $wrappedtext, false, $retVal ); } else { - $correctedtext = self::execExternalTidy( $wrappedtext ); + $correctedtext = self::execExternalTidy( $wrappedtext, false, $retVal ); } - if( is_null( $correctedtext ) ) { + + if ( $retVal < 0 ) { + wfDebug( "Possible tidy configuration error!\n" ); + return $text . "\n<!-- Tidy was unable to run -->\n"; + } elseif ( is_null( $correctedtext ) ) { wfDebug( "Tidy error detected!\n" ); return $text . "\n<!-- Tidy found serious XHTML errors -->\n"; } + $correctedtext = $wrapper->postprocess( $correctedtext ); // restore any hidden tokens + return $correctedtext; } @@ -60,6 +133,7 @@ class MWTidy { } else { $errorStr = self::execExternalTidy( $text, true, $retval ); } + return ( $retval < 0 && $errorStr == '' ) || $retval == 0; } @@ -68,7 +142,7 @@ class MWTidy { * Also called in OutputHandler.php for full page validation * * @param $text String: HTML to check - * @param $stderr Boolean: Whether to read from STDERR rather than STDOUT + * @param $stderr Boolean: Whether to read result from STDERR rather than STDOUT * @param &$retval Exit code (-1 on internal error) * @return mixed String or null */ @@ -79,7 +153,7 @@ class MWTidy { $cleansource = ''; $opts = ' -utf8'; - if( $stderr ) { + if ( $stderr ) { $descriptorspec = array( 0 => array( 'pipe', 'r' ), 1 => array( 'file', wfGetNull(), 'a' ), @@ -96,73 +170,84 @@ class MWTidy { $readpipe = $stderr ? 2 : 1; $pipes = array(); - if( function_exists( 'proc_open' ) ) { - $process = proc_open( "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes ); - if ( is_resource( $process ) ) { - // Theoretically, this style of communication could cause a deadlock - // here. If the stdout buffer fills up, then writes to stdin could - // block. This doesn't appear to happen with tidy, because tidy only - // writes to stdout after it's finished reading from stdin. Search - // for tidyParseStdin and tidySaveStdout in console/tidy.c - fwrite( $pipes[0], $text ); - fclose( $pipes[0] ); - while ( !feof( $pipes[$readpipe] ) ) { - $cleansource .= fgets( $pipes[$readpipe], 1024 ); - } - fclose( $pipes[$readpipe] ); - $retval = proc_close( $process ); - } else { - $retval = -1; + $process = proc_open( + "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes ); + + if ( is_resource( $process ) ) { + // Theoretically, this style of communication could cause a deadlock + // here. If the stdout buffer fills up, then writes to stdin could + // block. This doesn't appear to happen with tidy, because tidy only + // writes to stdout after it's finished reading from stdin. Search + // for tidyParseStdin and tidySaveStdout in console/tidy.c + fwrite( $pipes[0], $text ); + fclose( $pipes[0] ); + while ( !feof( $pipes[$readpipe] ) ) { + $cleansource .= fgets( $pipes[$readpipe], 1024 ); } + fclose( $pipes[$readpipe] ); + $retval = proc_close( $process ); } else { - $retval = -1; + wfWarn( "Unable to start external tidy process" ); + $retval = -1; } - if( !$stderr && $cleansource == '' && $text != '' ) { + if ( !$stderr && $cleansource == '' && $text != '' ) { // Some kind of error happened, so we couldn't get the corrected text. // Just give up; we'll use the source text and append a warning. $cleansource = null; } + wfProfileOut( __METHOD__ ); return $cleansource; } /** - * Use the HTML tidy PECL extension to use the tidy library in-process, + * Use the HTML tidy extension to use the tidy library in-process, * saving the overhead of spawning a new process. * - * 'pear install tidy' should be able to compile the extension module. + * @param $text String: HTML to check + * @param $stderr Boolean: Whether to read result from error status instead of output + * @param &$retval Exit code (-1 on internal error) + * @return mixed String or null */ private static function execInternalTidy( $text, $stderr = false, &$retval = null ) { global $wgTidyConf, $wgDebugTidy; wfProfileIn( __METHOD__ ); + if ( !MWInit::classExists( 'tidy' ) ) { + wfWarn( "Unable to load internal tidy class." ); + $retval = -1; + + wfProfileOut( __METHOD__ ); + return null; + } + $tidy = new tidy; $tidy->parseString( $text, $wgTidyConf, 'utf8' ); - if( $stderr ) { + if ( $stderr ) { $retval = $tidy->getStatus(); + wfProfileOut( __METHOD__ ); return $tidy->errorBuffer; } else { $tidy->cleanRepair(); $retval = $tidy->getStatus(); - if( $retval == 2 ) { + if ( $retval == 2 ) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $cleansource = null; } else { $cleansource = tidy_get_output( $tidy ); - } - if ( $wgDebugTidy && $retval > 0 ) { - $cleansource .= "<!--\nTidy reports:\n" . - str_replace( '-->', '-->', $tidy->errorBuffer ) . - "\n-->"; + if ( $wgDebugTidy && $retval > 0 ) { + $cleansource .= "<!--\nTidy reports:\n" . + str_replace( '-->', '-->', $tidy->errorBuffer ) . + "\n-->"; + } } wfProfileOut( __METHOD__ ); return $cleansource; } } - } |