diff options
Diffstat (limited to 'includes/parser')
-rw-r--r-- | includes/parser/CacheTime.php | 2 | ||||
-rw-r--r-- | includes/parser/CoreLinkFunctions.php | 12 | ||||
-rw-r--r-- | includes/parser/CoreParserFunctions.php | 76 | ||||
-rw-r--r-- | includes/parser/CoreTagHooks.php | 1 | ||||
-rw-r--r-- | includes/parser/DateFormatter.php | 54 | ||||
-rw-r--r-- | includes/parser/LinkHolderArray.php | 96 | ||||
-rw-r--r-- | includes/parser/Parser.php | 744 | ||||
-rw-r--r-- | includes/parser/ParserCache.php | 7 | ||||
-rw-r--r-- | includes/parser/ParserOptions.php | 86 | ||||
-rw-r--r-- | includes/parser/ParserOutput.php | 208 | ||||
-rw-r--r-- | includes/parser/Parser_LinkHooks.php | 118 | ||||
-rw-r--r-- | includes/parser/Preprocessor.php | 7 | ||||
-rw-r--r-- | includes/parser/Preprocessor_DOM.php | 77 | ||||
-rw-r--r-- | includes/parser/Preprocessor_Hash.php | 76 | ||||
-rw-r--r-- | includes/parser/Preprocessor_HipHop.hphp | 2013 | ||||
-rw-r--r-- | includes/parser/StripState.php | 7 | ||||
-rw-r--r-- | includes/parser/Tidy.php | 58 |
17 files changed, 958 insertions, 2684 deletions
diff --git a/includes/parser/CacheTime.php b/includes/parser/CacheTime.php index 881dded7..6b70e1da 100644 --- a/includes/parser/CacheTime.php +++ b/includes/parser/CacheTime.php @@ -116,7 +116,7 @@ class CacheTime { * per-article cache invalidation timestamps, or if it comes from * an incompatible older version. * - * @param $touched String: the affected article's last touched timestamp + * @param string $touched the affected article's last touched timestamp * @return Boolean */ public function expired( $touched ) { diff --git a/includes/parser/CoreLinkFunctions.php b/includes/parser/CoreLinkFunctions.php index 4bfa9d35..1cabf766 100644 --- a/includes/parser/CoreLinkFunctions.php +++ b/includes/parser/CoreLinkFunctions.php @@ -47,15 +47,15 @@ class CoreLinkFunctions { */ static function defaultLinkHook( $parser, $holders, $markers, Title $title, $titleText, &$displayText = null, &$leadingColon = false ) { - if( isset($displayText) && $markers->findMarker( $displayText ) ) { + if( isset( $displayText ) && $markers->findMarker( $displayText ) ) { # There are links inside of the displayText # For backwards compatibility the deepest links are dominant so this # link should not be handled - $displayText = $markers->expand($displayText); + $displayText = $markers->expand( $displayText ); # Return false so that this link is reverted back to WikiText return false; } - return $holders->makeHolder( $title, isset($displayText) ? $displayText : $titleText, array(), '', '' ); + return $holders->makeHolder( $title, isset( $displayText ) ? $displayText : $titleText, array(), '', '' ); } /** @@ -73,15 +73,15 @@ class CoreLinkFunctions { global $wgContLang; # When a category link starts with a : treat it as a normal link if( $leadingColon ) return true; - if( isset($sortText) && $markers->findMarker( $sortText ) ) { + if( isset( $sortText ) && $markers->findMarker( $sortText ) ) { # There are links inside of the sortText # For backwards compatibility the deepest links are dominant so this # link should not be handled - $sortText = $markers->expand($sortText); + $sortText = $markers->expand( $sortText ); # Return false so that this link is reverted back to WikiText return false; } - if( !isset($sortText) ) $sortText = $parser->getDefaultSort(); + if( !isset( $sortText ) ) $sortText = $parser->getDefaultSort(); $sortText = Sanitizer::decodeCharReferences( $sortText ); $sortText = str_replace( "\n", '', $sortText ); $sortText = $wgContLang->convertCategoryKey( $sortText ); diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php index 8917b6d0..cdd03aa4 100644 --- a/includes/parser/CoreParserFunctions.php +++ b/includes/parser/CoreParserFunctions.php @@ -175,8 +175,8 @@ class CoreParserFunctions { * For links to "wiki"s, or similar software, spaces are encoded as '_', * * @param $parser Parser object - * @param $s String: The text to encode. - * @param $arg String (optional): The type of encoding. + * @param string $s The text to encode. + * @param string $arg (optional): The type of encoding. * @return string */ static function urlencode( $parser, $s = '', $arg = null ) { @@ -269,12 +269,14 @@ class CoreParserFunctions { /** * @param $parser Parser * @param string $num - * @param null $raw - * @return + * @param string $arg + * @return string */ - static function formatnum( $parser, $num = '', $raw = null) { - if ( self::isRaw( $raw ) ) { + static function formatnum( $parser, $num = '', $arg = null ) { + if ( self::matchAgainstMagicword( 'rawsuffix', $arg ) ) { $func = array( $parser->getFunctionLang(), 'parseFormattedNumber' ); + } elseif ( self::matchAgainstMagicword( 'nocommafysuffix', $arg ) ) { + $func = array( $parser->getFunctionLang(), 'formatNumNoSeparators' ); } else { $func = array( $parser->getFunctionLang(), 'formatNum' ); } @@ -351,7 +353,7 @@ class CoreParserFunctions { * title which will normalise to the canonical title * * @param $parser Parser: parent parser - * @param $text String: desired title text + * @param string $text desired title text * @return String */ static function displaytitle( $parser, $text = '' ) { @@ -386,20 +388,23 @@ class CoreParserFunctions { return ''; } - static function isRaw( $param ) { - static $mwRaw; - if ( !$mwRaw ) { - $mwRaw =& MagicWord::get( 'rawsuffix' ); - } - if ( is_null( $param ) ) { + /** + * Matches the given value against the value of given magic word + * + * @param string $magicword magic word key + * @param mixed $value value to match + * @return boolean true on successful match + */ + static private function matchAgainstMagicword( $magicword, $value ) { + if ( strval( $value ) === '' ) { return false; - } else { - return $mwRaw->match( $param ); } + $mwObject = MagicWord::get( $magicword ); + return $mwObject->match( $value ); } static function formatRaw( $num, $raw ) { - if( self::isRaw( $raw ) ) { + if( self::matchAgainstMagicword( 'rawsuffix', $raw ) ) { return $num; } else { global $wgContLang; @@ -422,7 +427,7 @@ class CoreParserFunctions { return self::formatRaw( SiteStats::images(), $raw ); } static function numberofadmins( $parser, $raw = null ) { - return self::formatRaw( SiteStats::numberingroup('sysop'), $raw ); + return self::formatRaw( SiteStats::numberingroup( 'sysop' ), $raw ); } static function numberofedits( $parser, $raw = null ) { return self::formatRaw( SiteStats::edits(), $raw ); @@ -437,7 +442,6 @@ class CoreParserFunctions { return self::formatRaw( SiteStats::numberingroup( strtolower( $name ) ), $raw ); } - /** * Given a title, return the namespace name that would be given by the * corresponding magic word @@ -585,7 +589,7 @@ class CoreParserFunctions { static $cache = array(); // split the given option to its variable - if( self::isRaw( $arg1 ) ) { + if( self::matchAgainstMagicword( 'rawsuffix', $arg1 ) ) { //{{pagesincategory:|raw[|type]}} $raw = $arg1; $type = $magicWords->matchStartToEnd( $arg2 ); @@ -641,7 +645,7 @@ class CoreParserFunctions { * @todo Document parameters * * @param $parser Parser - * @param $page String TODO DOCUMENT (Default: empty string) + * @param string $page TODO DOCUMENT (Default: empty string) * @param $raw TODO DOCUMENT (Default: null) * @return string */ @@ -662,21 +666,31 @@ class CoreParserFunctions { $length = $cache[$page]; } elseif( $parser->incrementExpensiveFunctionCount() ) { $rev = Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); - $id = $rev ? $rev->getPage() : 0; + $pageID = $rev ? $rev->getPage() : 0; + $revID = $rev ? $rev->getId() : 0; $length = $cache[$page] = $rev ? $rev->getSize() : 0; // Register dependency in templatelinks - $parser->mOutput->addTemplate( $title, $id, $rev ? $rev->getId() : 0 ); + $parser->mOutput->addTemplate( $title, $pageID, $revID ); } return self::formatRaw( $length, $raw ); } /** - * Returns the requested protection level for the current page + * Returns the requested protection level for the current page + * + * @param Parser $parser + * @param string $type + * @param string $title + * * @return string */ - static function protectionlevel( $parser, $type = '' ) { - $restrictions = $parser->mTitle->getRestrictions( strtolower( $type ) ); + static function protectionlevel( $parser, $type = '', $title = '' ) { + $titleObject = Title::newFromText( $title ); + if ( !( $titleObject instanceof Title ) ) { + $titleObject = $parser->mTitle; + } + $restrictions = $titleObject->getRestrictions( strtolower( $type ) ); # Title::getRestrictions returns an array, its possible it may have # multiple values in the future return implode( $restrictions, ',' ); @@ -685,8 +699,8 @@ class CoreParserFunctions { /** * Gives language names. * @param $parser Parser - * @param $code String Language code (of which to get name) - * @param $inLanguage String Language code (in which to get name) + * @param string $code Language code (of which to get name) + * @param string $inLanguage Language code (in which to get name) * @return String */ static function language( $parser, $code = '', $inLanguage = '' ) { @@ -739,7 +753,7 @@ class CoreParserFunctions { */ static function anchorencode( $parser, $text ) { $text = $parser->killMarkers( $text ); - return substr( $parser->guessSectionNameFromWikiText( $text ), 1); + return (string)substr( $parser->guessSectionNameFromWikiText( $text ), 1 ); } static function special( $parser, $text ) { @@ -758,8 +772,8 @@ class CoreParserFunctions { /** * @param $parser Parser - * @param $text String The sortkey to use - * @param $uarg String Either "noreplace" or "noerror" (in en) + * @param string $text The sortkey to use + * @param string $uarg Either "noreplace" or "noerror" (in en) * both suppress errors, and noreplace does nothing if * a default sortkey already exists. * @return string @@ -790,7 +804,7 @@ class CoreParserFunctions { // Usage {{filepath|300}}, {{filepath|nowiki}}, {{filepath|nowiki|300}} or {{filepath|300|nowiki}} // or {{filepath|300px}}, {{filepath|200x300px}}, {{filepath|nowiki|200x300px}}, {{filepath|200x300px|nowiki}} - public static function filepath( $parser, $name='', $argA='', $argB='' ) { + public static function filepath( $parser, $name = '', $argA = '', $argB = '' ) { $file = wfFindFile( $name ); if( $argA == 'nowiki' ) { diff --git a/includes/parser/CoreTagHooks.php b/includes/parser/CoreTagHooks.php index 296be66f..65051839 100644 --- a/includes/parser/CoreTagHooks.php +++ b/includes/parser/CoreTagHooks.php @@ -72,6 +72,7 @@ class CoreTagHooks { * @param $content string * @param $attributes array * @param $parser Parser + * @throws MWException * @return array */ static function html( $content, $attributes, $parser ) { diff --git a/includes/parser/DateFormatter.php b/includes/parser/DateFormatter.php index 2917b4a7..a2da3074 100644 --- a/includes/parser/DateFormatter.php +++ b/includes/parser/DateFormatter.php @@ -22,7 +22,7 @@ */ /** - * Date formatter, recognises dates in plain text and formats them accoding to user preferences. + * Date formatter, recognises dates in plain text and formats them according to user preferences. * @todo preferences, OutputPage * @ingroup Parser */ @@ -55,7 +55,7 @@ class DateFormatter { $this->lang = $lang; $this->monthNames = $this->getMonthRegex(); - for ( $i=1; $i<=12; $i++ ) { + for ( $i = 1; $i <= 12; $i++ ) { $this->xMonths[$this->lang->lc( $this->lang->getMonthName( $i ) )] = $i; $this->xMonths[$this->lang->lc( $this->lang->getMonthAbbreviation( $i ) )] = $i; } @@ -102,11 +102,11 @@ class DateFormatter { # Rules # pref source target - $this->rules[self::DMY][self::MD] = self::DM; - $this->rules[self::ALL][self::MD] = self::MD; - $this->rules[self::MDY][self::DM] = self::MD; - $this->rules[self::ALL][self::DM] = self::DM; - $this->rules[self::NONE][self::ISO2] = self::ISO1; + $this->rules[self::DMY][self::MD] = self::DM; + $this->rules[self::ALL][self::MD] = self::MD; + $this->rules[self::MDY][self::DM] = self::MD; + $this->rules[self::ALL][self::DM] = self::DM; + $this->rules[self::NONE][self::ISO2] = self::ISO1; $this->preferences = array( 'default' => self::NONE, @@ -140,12 +140,12 @@ class DateFormatter { } /** - * @param $preference String: User preference - * @param $text String: Text to reformat - * @param $options Array: can contain 'linked' and/or 'match-whole' + * @param string $preference User preference + * @param string $text Text to reformat + * @param array $options can contain 'linked' and/or 'match-whole' * @return mixed|String */ - function reformat( $preference, $text, $options = array('linked') ) { + function reformat( $preference, $text, $options = array( 'linked' ) ) { $linked = in_array( 'linked', $options ); $match_whole = in_array( 'match-whole', $options ); @@ -154,7 +154,7 @@ class DateFormatter { } else { $preference = self::NONE; } - for ( $i=1; $i<=self::LAST; $i++ ) { + for ( $i = 1; $i <= self::LAST; $i++ ) { $this->mSource = $i; if ( isset ( $this->rules[$preference][$i] ) ) { # Specific rules @@ -172,21 +172,21 @@ class DateFormatter { $regex = $this->regexes[$i]; // Horrible hack - if (!$linked) { + if ( !$linked ) { $regex = str_replace( array( '\[\[', '\]\]' ), '', $regex ); } - if ($match_whole) { + if ( $match_whole ) { // Let's hope this works $regex = preg_replace( '!^/!', '/^', $regex ); $regex = str_replace( $this->regexTrail, - '$'.$this->regexTrail, $regex ); + '$' . $this->regexTrail, $regex ); } // Another horrible hack $this->mLinked = $linked; $text = preg_replace_callback( $regex, array( &$this, 'replace' ), $text ); - unset($this->mLinked); + unset( $this->mLinked ); } return $text; } @@ -200,10 +200,10 @@ class DateFormatter { $linked = true; if ( isset( $this->mLinked ) ) $linked = $this->mLinked; - + $bits = array(); $key = $this->keys[$this->mSource]; - for ( $p=0; $p < strlen($key); $p++ ) { + for ( $p = 0; $p < strlen( $key ); $p++ ) { if ( $key[$p] != ' ' ) { $bits[$key[$p]] = $matches[$p+1]; } @@ -219,8 +219,8 @@ class DateFormatter { */ function formatDate( $bits, $link = true ) { $format = $this->targets[$this->mTarget]; - - if (!$link) { + + if ( !$link ) { // strip piped links $format = preg_replace( '/\[\[[^|]+\|([^\]]+)\]\]/', '$1', $format ); // strip remaining links @@ -246,11 +246,11 @@ class DateFormatter { } } - if ( !isset($bits['d']) ) { + if ( !isset( $bits['d'] ) ) { $bits['d'] = sprintf( '%02d', $bits['j'] ); } - for ( $p=0; $p < strlen( $format ); $p++ ) { + for ( $p = 0; $p < strlen( $format ); $p++ ) { $char = $format[$p]; switch ( $char ) { case 'd': # ISO day of month @@ -263,7 +263,7 @@ class DateFormatter { $text .= $bits['y']; break; case 'j': # ordinary day of month - if ( !isset($bits['j']) ) { + if ( !isset( $bits['j'] ) ) { $text .= intval( $bits['d'] ); } else { $text .= $bits['j']; @@ -271,7 +271,7 @@ class DateFormatter { break; case 'F': # long month if ( !isset( $bits['F'] ) ) { - $m = intval($bits['m']); + $m = intval( $bits['m'] ); if ( $m > 12 || $m < 1 ) { $fail = true; } else { @@ -293,7 +293,7 @@ class DateFormatter { } $isoBits = array(); - if ( isset($bits['y']) ) + if ( isset( $bits['y'] ) ) $isoBits[] = $bits['y']; $isoBits[] = $bits['m']; $isoBits[] = $bits['d']; @@ -321,7 +321,7 @@ class DateFormatter { /** * Makes an ISO month, e.g. 02, from a month name - * @param $monthName String: month name + * @param string $monthName month name * @return string ISO month name */ function makeIsoMonth( $monthName ) { @@ -331,7 +331,7 @@ class DateFormatter { /** * @todo document - * @param $year String: Year name + * @param string $year Year name * @return string ISO year name */ function makeIsoYear( $year ) { diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php index d9356b48..49b2d333 100644 --- a/includes/parser/LinkHolderArray.php +++ b/includes/parser/LinkHolderArray.php @@ -43,9 +43,9 @@ class LinkHolderArray { } } - /** + /** * Don't serialize the parent object, it is big, and not needed when it is - * a parameter to mergeForeign(), which is the only application of + * a parameter to mergeForeign(), which is the only application of * serializing at present. * * Compact the titles, only serialize the text form. @@ -103,15 +103,15 @@ class LinkHolderArray { } /** - * Merge a LinkHolderArray from another parser instance into this one. The - * keys will not be preserved. Any text which went with the old - * LinkHolderArray and needs to work with the new one should be passed in + * Merge a LinkHolderArray from another parser instance into this one. The + * keys will not be preserved. Any text which went with the old + * LinkHolderArray and needs to work with the new one should be passed in * the $texts array. The strings in this array will have their link holders * converted for use in the destination link holder. The resulting array of * strings will be returned. * * @param $other LinkHolderArray - * @param $texts Array of strings + * @param array $texts of strings * @return Array */ function mergeForeign( $other, $texts ) { @@ -126,7 +126,7 @@ class LinkHolderArray { $maxId = $newKey > $maxId ? $newKey : $maxId; } } - $texts = preg_replace_callback( '/(<!--LINK \d+:)(\d+)(-->)/', + $texts = preg_replace_callback( '/(<!--LINK \d+:)(\d+)(-->)/', array( $this, 'mergeForeignCallback' ), $texts ); # Renumber interwiki links @@ -135,7 +135,7 @@ class LinkHolderArray { $this->interwikis[$newKey] = $entry; $maxId = $newKey > $maxId ? $newKey : $maxId; } - $texts = preg_replace_callback( '/(<!--IWLINK )(\d+)(-->)/', + $texts = preg_replace_callback( '/(<!--IWLINK )(\d+)(-->)/', array( $this, 'mergeForeignCallback' ), $texts ); # Set the parent link ID to be beyond the highest used ID @@ -159,8 +159,8 @@ class LinkHolderArray { # Internal links $pos = 0; while ( $pos < strlen( $text ) ) { - if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/', - $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) + if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/', + $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) { break; } @@ -210,14 +210,14 @@ class LinkHolderArray { * * @param $nt Title * @param $text String - * @param $query Array [optional] - * @param $trail String [optional] - * @param $prefix String [optional] + * @param array $query [optional] + * @param string $trail [optional] + * @param string $prefix [optional] * @return string */ - function makeHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { + function makeHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { wfProfileIn( __METHOD__ ); - if ( ! is_object($nt) ) { + if ( !is_object( $nt ) ) { # Fail gracefully $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}"; } else { @@ -226,7 +226,7 @@ class LinkHolderArray { $entry = array( 'title' => $nt, - 'text' => $prefix.$text.$inside, + 'text' => $prefix . $text . $inside, 'pdbk' => $nt->getPrefixedDBkey(), ); if ( $query !== array() ) { @@ -254,12 +254,12 @@ class LinkHolderArray { * @todo FIXME: Update documentation. makeLinkObj() is deprecated. * Replace <!--LINK--> link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() - * Returns an array of link CSS classes, indexed by PDBK. + * @return array of link CSS classes, indexed by PDBK. */ function replace( &$text ) { wfProfileIn( __METHOD__ ); - $colours = $this->replaceInternal( $text ); + $colours = $this->replaceInternal( $text ); // FIXME: replaceInternal doesn't return a value $this->replaceInterwiki( $text ); wfProfileOut( __METHOD__ ); @@ -281,7 +281,7 @@ class LinkHolderArray { $linkCache = LinkCache::singleton(); $output = $this->parent->getOutput(); - wfProfileIn( __METHOD__.'-check' ); + wfProfileIn( __METHOD__ . '-check' ); $dbr = wfGetDB( DB_SLAVE ); $threshold = $this->parent->getOptions()->getStubThreshold(); @@ -322,7 +322,7 @@ class LinkHolderArray { } if ( $queries ) { $where = array(); - foreach( $queries as $ns => $pages ){ + foreach( $queries as $ns => $pages ) { $where[] = $dbr->makeList( array( 'page_namespace' => $ns, @@ -355,19 +355,19 @@ class LinkHolderArray { } unset( $res ); } - if ( count($linkcolour_ids) ) { + if ( count( $linkcolour_ids ) ) { //pass an array of page_ids to an extension wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); } - wfProfileOut( __METHOD__.'-check' ); + wfProfileOut( __METHOD__ . '-check' ); # Do a second query for different language variants of links and categories - if($wgContLang->hasVariants()) { + if( $wgContLang->hasVariants() ) { $this->doVariants( $colours ); } # Construct search and replace arrays - wfProfileIn( __METHOD__.'-construct' ); + wfProfileIn( __METHOD__ . '-construct' ); $replacePairs = array(); foreach ( $this->internals as $ns => $entries ) { foreach ( $entries as $index => $entry ) { @@ -399,16 +399,16 @@ class LinkHolderArray { } } $replacer = new HashtableReplacer( $replacePairs, 1 ); - wfProfileOut( __METHOD__.'-construct' ); + wfProfileOut( __METHOD__ . '-construct' ); # Do the thing - wfProfileIn( __METHOD__.'-replace' ); + wfProfileIn( __METHOD__ . '-replace' ); $text = preg_replace_callback( '/(<!--LINK .*?-->)/', $replacer->cb(), $text); - wfProfileOut( __METHOD__.'-replace' ); + wfProfileOut( __METHOD__ . '-replace' ); wfProfileOut( __METHOD__ ); } @@ -497,20 +497,23 @@ class LinkHolderArray { // process categories, check if a category exists in some variant $categoryMap = array(); // maps $category_variant => $category (dbkeys) $varCategories = array(); // category replacements oldDBkey => newDBkey - foreach( $output->getCategoryLinks() as $category ){ + foreach ( $output->getCategoryLinks() as $category ) { + $categoryTitle = Title::makeTitleSafe( NS_CATEGORY, $category ); + $linkBatch->addObj( $categoryTitle ); $variants = $wgContLang->autoConvertToAllVariants( $category ); - foreach($variants as $variant){ - if($variant != $category){ - $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) ); - if(is_null($variantTitle)) continue; + foreach ( $variants as $variant ) { + if ( $variant !== $category ) { + $variantTitle = Title::makeTitleSafe( NS_CATEGORY, $variant ); + if ( is_null( $variantTitle ) ) { + continue; + } $linkBatch->addObj( $variantTitle ); - $categoryMap[$variant] = $category; + $categoryMap[$variant] = array( $category, $categoryTitle ); } } } - - if(!$linkBatch->isEmpty()){ + if( !$linkBatch->isEmpty() ) { // construct query $dbr = wfGetDB( DB_SLAVE ); $varRes = $dbr->select( 'page', @@ -556,25 +559,28 @@ class LinkHolderArray { } // check if the object is a variant of a category - if(isset($categoryMap[$vardbk])){ - $oldkey = $categoryMap[$vardbk]; - if($oldkey != $vardbk) - $varCategories[$oldkey]=$vardbk; + if ( isset( $categoryMap[$vardbk] ) ) { + list( $oldkey, $oldtitle ) = $categoryMap[$vardbk]; + if ( !isset( $varCategories[$oldkey] ) && !$oldtitle->exists() ) { + $varCategories[$oldkey] = $vardbk; + } } } wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); // rebuild the categories in original order (if there are replacements) - if(count($varCategories)>0){ + if( count( $varCategories ) > 0 ) { $newCats = array(); $originalCats = $output->getCategories(); - foreach($originalCats as $cat => $sortkey){ + foreach( $originalCats as $cat => $sortkey ) { // make the replacement - if( array_key_exists($cat,$varCategories) ) + if( array_key_exists( $cat, $varCategories ) ) { $newCats[$varCategories[$cat]] = $sortkey; - else $newCats[$cat] = $sortkey; + } else { + $newCats[$cat] = $sortkey; + } } - $output->setCategoryLinks($newCats); + $output->setCategoryLinks( $newCats ); } } } @@ -607,7 +613,7 @@ class LinkHolderArray { */ function replaceTextCallback( $matches ) { $type = $matches[1]; - $key = $matches[2]; + $key = $matches[2]; if( $type == 'LINK' ) { list( $ns, $index ) = explode( ':', $key, 2 ); if( isset( $this->internals[$ns][$index]['text'] ) ) { diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 10765de2..5ef0bc71 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -62,7 +62,6 @@ * $wgAllowSpecialInclusion * $wgInterwikiMagic * $wgMaxArticleSize - * $wgUseDynamicDates * * @ingroup Parser */ @@ -123,8 +122,8 @@ class Parser { var $mFunctionHooks = array(); var $mFunctionSynonyms = array( 0 => array(), 1 => array() ); var $mFunctionTagHooks = array(); - var $mStripList = array(); - var $mDefaultStripList = array(); + var $mStripList = array(); + var $mDefaultStripList = array(); var $mVarCache = array(); var $mImageParams = array(); var $mImageParamsMagicArray = array(); @@ -201,6 +200,13 @@ class Parser { var $mUniqPrefix; /** + * @var Array with the language name of each language link (i.e. the + * interwiki prefix) in the key, value arbitrary. Used to avoid sending + * duplicate language links to the ParserOutput. + */ + var $mLangLinkLanguages; + + /** * Constructor * * @param $conf array @@ -208,8 +214,8 @@ class Parser { public function __construct( $conf = array() ) { $this->mConf = $conf; $this->mUrlProtocols = wfUrlProtocols(); - $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')'. - self::EXT_LINK_URL_CLASS.'+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; + $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' . + self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; } elseif ( defined( 'MW_COMPILED' ) ) { @@ -240,6 +246,13 @@ class Parser { } /** + * Allow extensions to clean up when the parser is cloned + */ + function __clone() { + wfRunHooks( 'ParserCloned', array( $this ) ); + } + + /** * Do various kinds of initialisation on the first call of the parser */ function firstCallInit() { @@ -282,6 +295,7 @@ class Parser { $this->mRevisionId = $this->mRevisionUser = null; $this->mVarCache = array(); $this->mUser = null; + $this->mLangLinkLanguages = array(); /** * Prefix for temporary replacement strings for the multipass parser. @@ -291,12 +305,11 @@ class Parser { * string constructs. * * Must not consist of all title characters, or else it will change - * the behaviour of <nowiki> in a link. + * the behavior of <nowiki> in a link. */ $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); $this->mStripState = new StripState( $this->mUniqPrefix ); - # Clear these on every parse, bug 4549 $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array(); @@ -327,12 +340,12 @@ class Parser { * Convert wikitext to HTML * Do not call this function recursively. * - * @param $text String: text we want to parse + * @param string $text text we want to parse * @param $title Title object * @param $options ParserOptions * @param $linestart boolean * @param $clearState boolean - * @param $revid Int: number to pass in {{REVISIONID}} + * @param int $revid number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) { @@ -342,7 +355,7 @@ class Parser { */ global $wgUseTidy, $wgAlwaysUseTidy; - $fname = __METHOD__.'-' . wfGetCaller(); + $fname = __METHOD__ . '-' . wfGetCaller(); wfProfileIn( __METHOD__ ); wfProfileIn( $fname ); @@ -397,9 +410,7 @@ class Parser { if ( !( $options->getDisableContentConversion() || isset( $this->mDoubleUnderscores['nocontentconvert'] ) ) ) { - # Run convert unconditionally in 1.18-compatible mode - global $wgBug34832TransitionalRollback; - if ( $wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage() ) { + if ( !$this->mOptions->getInterfaceMessage() ) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. @@ -486,8 +497,8 @@ class Parser { "Preprocessor generated node count: " . "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" . - "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n". - "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n". + "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n" . + "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n" . $PFreport; wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) ); @@ -496,6 +507,11 @@ class Parser { $limitReport = str_replace( array( '-', '&' ), array( '‐', '&' ), $limitReport ); $text .= "\n<!-- \n$limitReport-->\n"; + + if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) { + wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . + $this->mTitle->getPrefixedDBkey() ); + } } $this->mOutput->setText( $text ); @@ -515,7 +531,7 @@ class Parser { * * If $frame is not provided, then template variables (e.g., {{{1}}}) within $text are not expanded * - * @param $text String: text extension wants to have parsed + * @param string $text text extension wants to have parsed * @param $frame PPFrame: The frame to use for expanding any template variables * * @return string @@ -534,7 +550,7 @@ class Parser { * Also removes comments. * @return mixed|string */ - function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) { + function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null ) { wfProfileIn( __METHOD__ ); $this->startParse( $title, $options, self::OT_PREPROCESS, true ); if ( $revid !== null ) { @@ -552,7 +568,7 @@ class Parser { * Recursive parser entry point that can be called from an extension tag * hook. * - * @param $text String: text to be expanded + * @param string $text text to be expanded * @param $frame PPFrame: The frame to use for expanding any template variables * @return String * @since 1.19 @@ -593,7 +609,7 @@ class Parser { * * @return string */ - static public function getRandomString() { + public static function getRandomString() { return wfRandomString( 16 ); } @@ -682,7 +698,7 @@ class Parser { /** * Accessor/mutator for the output type * - * @param $x int|null New value or null to just get the current one + * @param int|null $x New value or null to just get the current one * @return Integer */ function OutputType( $x = null ) { @@ -745,6 +761,7 @@ class Parser { * * @since 1.19 * + * @throws MWException * @return Language|null */ public function getTargetLanguage() { @@ -765,12 +782,7 @@ class Parser { * Get the language object for language conversion */ function getConverterLanguage() { - global $wgBug34832TransitionalRollback, $wgContLang; - if ( $wgBug34832TransitionalRollback ) { - return $wgContLang; - } else { - return $this->getTargetLanguage(); - } + return $this->getTargetLanguage(); } /** @@ -813,9 +825,9 @@ class Parser { * '<element param="x">tag content</element>' ) ) * @endcode * - * @param $elements array list of element names. Comments are always extracted. - * @param $text string Source text string. - * @param $matches array Out parameter, Array: extracted tags + * @param array $elements list of element names. Comments are always extracted. + * @param string $text Source text string. + * @param array $matches Out parameter, Array: extracted tags * @param $uniq_prefix string * @return String: stripped text */ @@ -835,16 +847,16 @@ class Parser { } if ( count( $p ) > 5 ) { # comment - $element = $p[4]; + $element = $p[4]; $attributes = ''; - $close = ''; - $inside = $p[5]; + $close = ''; + $inside = $p[5]; } else { # tag - $element = $p[1]; + $element = $p[1]; $attributes = $p[2]; - $close = $p[3]; - $inside = $p[4]; + $close = $p[3]; + $inside = $p[4]; } $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX; @@ -928,33 +940,33 @@ class Parser { $line = trim( $outLine ); if ( $line === '' ) { # empty line, go to next line - $out .= $outLine."\n"; + $out .= $outLine . "\n"; continue; } $first_character = $line[0]; $matches = array(); - if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) { + if ( preg_match( '/^(:*)\{\|(.*)$/', $line, $matches ) ) { # First check if we are starting a new table $indent_level = strlen( $matches[1] ); $attributes = $this->mStripState->unstripBoth( $matches[2] ); - $attributes = Sanitizer::fixTagAttributes( $attributes , 'table' ); - - $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>"; - array_push( $td_history , false ); - array_push( $last_tag_history , '' ); - array_push( $tr_history , false ); - array_push( $tr_attributes , '' ); - array_push( $has_opened_tr , false ); + $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' ); + + $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>"; + array_push( $td_history, false ); + array_push( $last_tag_history, '' ); + array_push( $tr_history, false ); + array_push( $tr_attributes, '' ); + array_push( $has_opened_tr, false ); } elseif ( count( $td_history ) == 0 ) { # Don't do any of the following - $out .= $outLine."\n"; + $out .= $outLine . "\n"; continue; - } elseif ( substr( $line , 0 , 2 ) === '|}' ) { + } elseif ( substr( $line, 0, 2 ) === '|}' ) { # We are ending a table - $line = '</table>' . substr( $line , 2 ); + $line = '</table>' . substr( $line, 2 ); $last_tag = array_pop( $last_tag_history ); if ( !array_pop( $has_opened_tr ) ) { @@ -969,8 +981,8 @@ class Parser { $line = "</{$last_tag}>{$line}"; } array_pop( $tr_attributes ); - $outLine = $line . str_repeat( '</dd></dl>' , $indent_level ); - } elseif ( substr( $line , 0 , 2 ) === '|-' ) { + $outLine = $line . str_repeat( '</dd></dl>', $indent_level ); + } elseif ( substr( $line, 0, 2 ) === '|-' ) { # Now we have a table row $line = preg_replace( '#^\|-+#', '', $line ); @@ -983,7 +995,7 @@ class Parser { $line = ''; $last_tag = array_pop( $last_tag_history ); array_pop( $has_opened_tr ); - array_push( $has_opened_tr , true ); + array_push( $has_opened_tr, true ); if ( array_pop( $tr_history ) ) { $line = '</tr>'; @@ -994,27 +1006,27 @@ class Parser { } $outLine = $line; - array_push( $tr_history , false ); - array_push( $td_history , false ); - array_push( $last_tag_history , '' ); - } elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 ) === '|+' ) { + array_push( $tr_history, false ); + array_push( $td_history, false ); + array_push( $last_tag_history, '' ); + } elseif ( $first_character === '|' || $first_character === '!' || substr( $line, 0, 2 ) === '|+' ) { # This might be cell elements, td, th or captions - if ( substr( $line , 0 , 2 ) === '|+' ) { + if ( substr( $line, 0, 2 ) === '|+' ) { $first_character = '+'; - $line = substr( $line , 1 ); + $line = substr( $line, 1 ); } - $line = substr( $line , 1 ); + $line = substr( $line, 1 ); if ( $first_character === '!' ) { - $line = str_replace( '!!' , '||' , $line ); + $line = str_replace( '!!', '||', $line ); } # Split up multiple cells on the same line. # FIXME : This can result in improper nesting of tags processed # by earlier parser steps, but should avoid splitting up eg # attribute values containing literal "||". - $cells = StringUtils::explodeMarkup( '||' , $line ); + $cells = StringUtils::explodeMarkup( '||', $line ); $outLine = ''; @@ -1026,10 +1038,10 @@ class Parser { if ( !array_pop( $tr_history ) ) { $previous = "<tr{$tr_after}>\n"; } - array_push( $tr_history , true ); - array_push( $tr_attributes , '' ); + array_push( $tr_history, true ); + array_push( $tr_attributes, '' ); array_pop( $has_opened_tr ); - array_push( $has_opened_tr , true ); + array_push( $has_opened_tr, true ); } $last_tag = array_pop( $last_tag_history ); @@ -1048,10 +1060,10 @@ class Parser { $last_tag = ''; } - array_push( $last_tag_history , $last_tag ); + array_push( $last_tag_history, $last_tag ); # A cell could contain both parameters and data - $cell_data = explode( '|' , $cell , 2 ); + $cell_data = explode( '|', $cell, 2 ); # Bug 553: Note that a '|' inside an invalid link should not # be mistaken as delimiting cell parameters @@ -1061,12 +1073,12 @@ class Parser { $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; } else { $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); - $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag ); + $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag ); $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; } $outLine .= $cell; - array_push( $td_history , true ); + array_push( $td_history, true ); } } $out .= $outLine . "\n"; @@ -1081,7 +1093,7 @@ class Parser { $out .= "</tr>\n"; } if ( !array_pop( $has_opened_tr ) ) { - $out .= "<tr><td></td></tr>\n" ; + $out .= "<tr><td></td></tr>\n"; } $out .= "</table>\n"; @@ -1122,7 +1134,7 @@ class Parser { # Hook to suspend the parser in this state if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { wfProfileOut( __METHOD__ ); - return $text ; + return $text; } # if $frame is provided, then use $frame for replacing any variables @@ -1156,17 +1168,13 @@ class Parser { $text = $this->doDoubleUnderscore( $text ); $text = $this->doHeadings( $text ); - if ( $this->mOptions->getUseDynamicDates() ) { - $df = DateFormatter::getInstance(); - $text = $df->reformat( $this->mOptions->getDateFormat(), $text ); - } $text = $this->replaceInternalLinks( $text ); $text = $this->doAllQuotes( $text ); $text = $this->replaceExternalLinks( $text ); # replaceInternalLinks may sometimes leave behind # absolute URLs, which have to be masked to hide them from replaceExternalLinks - $text = str_replace( $this->mUniqPrefix.'NOPARSE', '', $text ); + $text = str_replace( $this->mUniqPrefix . 'NOPARSE', '', $text ); $text = $this->doMagicLinks( $text ); $text = $this->formatHeadings( $text, $origText, $isMain ); @@ -1234,7 +1242,7 @@ class Parser { $CssClass = 'mw-magiclink-pmid'; $id = $m[4]; } else { - throw new MWException( __METHOD__.': unrecognised match type "' . + throw new MWException( __METHOD__ . ': unrecognised match type "' . substr( $m[0], 0, 20 ) . '"' ); } $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text(); @@ -1298,7 +1306,8 @@ class Parser { if ( $text === false ) { # Not an image, make a link $text = Linker::makeExternalLink( $url, - $this->getConverterLanguage()->markNoConversion($url), true, 'free', + $this->getConverterLanguage()->markNoConversion( $url, true ), + true, 'free', $this->getExternalLinkAttribs( $url ) ); # Register it in the output object... # Replace unnecessary URL escape codes with their equivalent characters @@ -1309,7 +1318,6 @@ class Parser { return $text . $trail; } - /** * Parse headers and return html * @@ -1323,8 +1331,7 @@ class Parser { wfProfileIn( __METHOD__ ); for ( $i = 6; $i >= 1; --$i ) { $h = str_repeat( '=', $i ); - $text = preg_replace( "/^$h(.+)$h\\s*$/m", - "<h$i>\\1</h$i>", $text ); + $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text ); } wfProfileOut( __METHOD__ ); return $text; @@ -1345,7 +1352,7 @@ class Parser { foreach ( $lines as $line ) { $outtext .= $this->doQuotes( $line ) . "\n"; } - $outtext = substr( $outtext, 0,-1 ); + $outtext = substr( $outtext, 0, -1 ); wfProfileOut( __METHOD__ ); return $outtext; } @@ -1410,7 +1417,7 @@ class Parser { if ( $firstspace == -1 ) { $firstspace = $i; } - } elseif ( $x2 === ' ') { + } elseif ( $x2 === ' ' ) { if ( $firstsingleletterword == -1 ) { $firstsingleletterword = $i; } @@ -1461,7 +1468,7 @@ class Parser { } elseif ( $state === 'ib' ) { $output .= '</b></i><b>'; $state = 'b'; } elseif ( $state === 'both' ) { - $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; + $output .= '<b><i>' . $buffer . '</i>'; $state = 'b'; } else { # $state can be 'b' or '' $output .= '<i>'; $state .= 'i'; } @@ -1473,7 +1480,7 @@ class Parser { } elseif ( $state === 'ib' ) { $output .= '</b>'; $state = 'i'; } elseif ( $state === 'both' ) { - $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; + $output .= '<i><b>' . $buffer . '</b>'; $state = 'i'; } else { # $state can be 'i' or '' $output .= '<b>'; $state .= 'b'; } @@ -1487,7 +1494,7 @@ class Parser { } elseif ( $state === 'ib' ) { $output .= '</b></i>'; $state = ''; } elseif ( $state === 'both' ) { - $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; + $output .= '<i><b>' . $buffer . '</b></i>'; $state = ''; } else { # ($state == '') $buffer = ''; $state = 'both'; } @@ -1507,7 +1514,7 @@ class Parser { } # There might be lonely ''''', so make sure we have a buffer if ( $state === 'both' && $buffer ) { - $output .= '<b><i>'.$buffer.'</i></b>'; + $output .= '<b><i>' . $buffer . '</i></b>'; } return $output; } @@ -1523,6 +1530,7 @@ class Parser { * * @param $text string * + * @throws MWException * @return string */ function replaceExternalLinks( $text ) { @@ -1537,7 +1545,7 @@ class Parser { $i = 0; while ( $i<count( $bits ) ) { $url = $bits[$i++]; - $protocol = $bits[$i++]; + $i++; // protocol $text = $bits[$i++]; $trail = $bits[$i++]; @@ -1595,26 +1603,39 @@ class Parser { wfProfileOut( __METHOD__ ); return $s; } - + /** + * Get the rel attribute for a particular external link. + * + * @since 1.21 + * @param string|bool $url optional URL, to extract the domain from for rel => + * nofollow if appropriate + * @param $title Title optional Title, for wgNoFollowNsExceptions lookups + * @return string|null rel attribute for $url + */ + public static function getExternalLinkRel( $url = false, $title = null ) { + global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions; + $ns = $title ? $title->getNamespace() : false; + if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) && + !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) ) + { + return 'nofollow'; + } + return null; + } /** * Get an associative array of additional HTML attributes appropriate for a * particular external link. This currently may include rel => nofollow * (depending on configuration, namespace, and the URL's domain) and/or a * target attribute (depending on configuration). * - * @param $url String|bool optional URL, to extract the domain from for rel => + * @param string|bool $url optional URL, to extract the domain from for rel => * nofollow if appropriate * @return Array associative array of HTML attributes */ function getExternalLinkAttribs( $url = false ) { $attribs = array(); - global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions; - $ns = $this->mTitle->getNamespace(); - if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) && - !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) ) - { - $attribs['rel'] = 'nofollow'; - } + $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle ); + if ( $this->mOptions->getExternalLinkTarget() ) { $attribs['target'] = $this->mOptions->getExternalLinkTarget(); } @@ -1726,6 +1747,8 @@ class Parser { /** * Process [[ ]] wikilinks (RIL) + * @param $s + * @throws MWException * @return LinkHolderArray * * @private @@ -1733,8 +1756,8 @@ class Parser { function replaceInternalLinks2( &$s ) { wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__.'-setup' ); - static $tc = FALSE, $e1, $e1_img; + wfProfileIn( __METHOD__ . '-setup' ); + static $tc = false, $e1, $e1_img; # the % is needed to support urlencoded titles as well if ( !$tc ) { $tc = Title::legalChars() . '#%'; @@ -1763,9 +1786,9 @@ class Parser { } if ( is_null( $this->mTitle ) ) { - wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( __METHOD__ . '-setup' ); wfProfileOut( __METHOD__ ); - throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); + throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" ); } $nottalk = !$this->mTitle->isTalkPage(); @@ -1780,17 +1803,11 @@ class Parser { $prefix = ''; } - if ( $this->getConverterLanguage()->hasVariants() ) { - $selflink = $this->getConverterLanguage()->autoConvertToAllVariants( - $this->mTitle->getPrefixedText() ); - } else { - $selflink = array( $this->mTitle->getPrefixedText() ); - } $useSubpages = $this->areSubpagesAllowed(); - wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( __METHOD__ . '-setup' ); # Loop for each link - for ( ; $line !== false && $line !== null ; $a->next(), $line = $a->current() ) { + for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) { # Check for excessive memory usage if ( $holders->isBig() ) { # Too big @@ -1800,24 +1817,24 @@ class Parser { } if ( $useLinkPrefixExtension ) { - wfProfileIn( __METHOD__.'-prefixhandling' ); + wfProfileIn( __METHOD__ . '-prefixhandling' ); if ( preg_match( $e2, $s, $m ) ) { $prefix = $m[2]; $s = $m[1]; } else { - $prefix=''; + $prefix = ''; } # first link if ( $first_prefix ) { $prefix = $first_prefix; $first_prefix = false; } - wfProfileOut( __METHOD__.'-prefixhandling' ); + wfProfileOut( __METHOD__ . '-prefixhandling' ); } $might_be_img = false; - wfProfileIn( __METHOD__."-e1" ); + wfProfileIn( __METHOD__ . "-e1" ); if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; # If we get a ] at the beginning of $m[3] that means we have a link that's something like: @@ -1839,7 +1856,7 @@ class Parser { # fix up urlencoded title texts if ( strpos( $m[1], '%' ) !== false ) { # Should anchors '#' also be rejected? - $m[1] = str_replace( array('<', '>'), array('<', '>'), rawurldecode( $m[1] ) ); + $m[1] = str_replace( array( '<', '>' ), array( '<', '>' ), rawurldecode( $m[1] ) ); } $trail = $m[3]; } elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption @@ -1850,19 +1867,19 @@ class Parser { } $trail = ""; } else { # Invalid form; output directly - $s .= $prefix . '[[' . $line ; - wfProfileOut( __METHOD__."-e1" ); + $s .= $prefix . '[[' . $line; + wfProfileOut( __METHOD__ . "-e1" ); continue; } - wfProfileOut( __METHOD__."-e1" ); - wfProfileIn( __METHOD__."-misc" ); + wfProfileOut( __METHOD__ . "-e1" ); + wfProfileIn( __METHOD__ . "-misc" ); # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $m[1] ) ) { - $s .= $prefix . '[[' . $line ; - wfProfileOut( __METHOD__."-misc" ); + $s .= $prefix . '[[' . $line; + wfProfileOut( __METHOD__ . "-misc" ); continue; } @@ -1879,21 +1896,21 @@ class Parser { $link = substr( $link, 1 ); } - wfProfileOut( __METHOD__."-misc" ); - wfProfileIn( __METHOD__."-title" ); + wfProfileOut( __METHOD__ . "-misc" ); + wfProfileIn( __METHOD__ . "-title" ); $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) ); if ( $nt === null ) { $s .= $prefix . '[[' . $line; - wfProfileOut( __METHOD__."-title" ); + wfProfileOut( __METHOD__ . "-title" ); continue; } $ns = $nt->getNamespace(); $iw = $nt->getInterWiki(); - wfProfileOut( __METHOD__."-title" ); + wfProfileOut( __METHOD__ . "-title" ); if ( $might_be_img ) { # if this is actually an invalid link - wfProfileIn( __METHOD__."-might_be_img" ); + wfProfileIn( __METHOD__ . "-might_be_img" ); if ( $ns == NS_FILE && $noforce ) { # but might be an image $found = false; while ( true ) { @@ -1925,19 +1942,19 @@ class Parser { $holders->merge( $this->replaceInternalLinks2( $text ) ); $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( __METHOD__ . "-might_be_img" ); continue; } } else { # it's not an image, so output it raw $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( __METHOD__ . "-might_be_img" ); continue; } - wfProfileOut( __METHOD__."-might_be_img" ); + wfProfileOut( __METHOD__ . "-might_be_img" ); } - $wasblank = ( $text == '' ); + $wasblank = ( $text == '' ); if ( $wasblank ) { $text = $link; } else { @@ -1951,18 +1968,25 @@ class Parser { # Link not escaped by : , create the various objects if ( $noforce ) { # Interwikis - wfProfileIn( __METHOD__."-interwiki" ); + wfProfileIn( __METHOD__ . "-interwiki" ); if ( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && Language::fetchLanguageName( $iw, null, 'mw' ) ) { - $this->mOutput->addLanguageLink( $nt->getFullText() ); + // XXX: the above check prevents links to sites with identifiers that are not language codes + + # Bug 24502: filter duplicates + if ( !isset( $this->mLangLinkLanguages[$iw] ) ) { + $this->mLangLinkLanguages[$iw] = true; + $this->mOutput->addLanguageLink( $nt->getFullText() ); + } + $s = rtrim( $s . $prefix ); $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail; - wfProfileOut( __METHOD__."-interwiki" ); + wfProfileOut( __METHOD__ . "-interwiki" ); continue; } - wfProfileOut( __METHOD__."-interwiki" ); + wfProfileOut( __METHOD__ . "-interwiki" ); if ( $ns == NS_FILE ) { - wfProfileIn( __METHOD__."-image" ); + wfProfileIn( __METHOD__ . "-image" ); if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { if ( $wasblank ) { # if no parameters were passed, $text @@ -1983,12 +2007,12 @@ class Parser { } else { $s .= $prefix . $trail; } - wfProfileOut( __METHOD__."-image" ); + wfProfileOut( __METHOD__ . "-image" ); continue; } if ( $ns == NS_CATEGORY ) { - wfProfileIn( __METHOD__."-category" ); + wfProfileIn( __METHOD__ . "-category" ); $s = rtrim( $s . "\n" ); # bug 87 if ( $wasblank ) { @@ -2007,14 +2031,18 @@ class Parser { */ $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail; - wfProfileOut( __METHOD__."-category" ); + wfProfileOut( __METHOD__ . "-category" ); continue; } } # Self-link checking if ( $nt->getFragment() === '' && $ns != NS_SPECIAL ) { - if ( in_array( $nt->getPrefixedText(), $selflink, true ) ) { + if ( $nt->equals( $this->mTitle ) || ( !$nt->isKnown() && in_array( + $this->mTitle->getPrefixedText(), + $this->getConverterLanguage()->autoConvertToAllVariants( $nt->getPrefixedText() ), + true + ) ) ) { $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); continue; } @@ -2023,7 +2051,7 @@ class Parser { # NS_MEDIA is a pseudo-namespace for linking directly to a file # @todo FIXME: Should do batch file existence checks, see comment below if ( $ns == NS_MEDIA ) { - wfProfileIn( __METHOD__."-media" ); + wfProfileIn( __METHOD__ . "-media" ); # Give extensions a chance to select the file revision for us $options = array(); $descQuery = false; @@ -2034,11 +2062,11 @@ class Parser { # Cloak with NOPARSE to avoid replacement in replaceExternalLinks $s .= $prefix . $this->armorLinks( Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail; - wfProfileOut( __METHOD__."-media" ); + wfProfileOut( __METHOD__ . "-media" ); continue; } - wfProfileIn( __METHOD__."-always_known" ); + wfProfileIn( __METHOD__ . "-always_known" ); # Some titles, such as valid special pages or files in foreign repos, should # be shown as bluelinks even though they're not included in the page table # @@ -2051,7 +2079,7 @@ class Parser { # Links will be added to the output link list after checking $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix ); } - wfProfileOut( __METHOD__."-always_known" ); + wfProfileOut( __METHOD__ . "-always_known" ); } wfProfileOut( __METHOD__ ); return $holders; @@ -2066,7 +2094,7 @@ class Parser { * * @param $nt Title * @param $text String - * @param $query Array or String + * @param array $query or String * @param $trail String * @param $prefix String * @return String: HTML-wikitext mix oh yuck @@ -2093,7 +2121,7 @@ class Parser { * Not needed quite as much as it used to be since free links are a bit * more sensible these days. But bracketed links are still an issue. * - * @param $text String: more-or-less HTML + * @param string $text more-or-less HTML * @return String: less-or-more HTML with NOPARSE bits */ function armorLinks( $text ) { @@ -2113,7 +2141,7 @@ class Parser { /** * Handle link to subpage if necessary * - * @param $target String: the source of the link + * @param string $target the source of the link * @param &$text String: the link text, modified as necessary * @return string the full name of the link * @private @@ -2239,7 +2267,7 @@ class Parser { } else { return '<!-- ERR 3 -->'; } - return $text."\n"; + return $text . "\n"; } /**#@-*/ @@ -2306,7 +2334,7 @@ class Parser { $output .= $this->nextItem( substr( $prefix, -1 ) ); $paragraphStack = false; - if ( substr( $prefix, -1 ) === ';') { + if ( substr( $prefix, -1 ) === ';' ) { # The one nasty exception: definition lists work like this: # ; title : definition text # So we check for : in the remainder text to split up the @@ -2354,13 +2382,13 @@ class Parser { # If we have no prefixes, go to paragraph mode. if ( 0 == $prefixLength ) { - wfProfileIn( __METHOD__."-paragraph" ); + wfProfileIn( __METHOD__ . "-paragraph" ); # No prefix (not in list)--go to paragraph mode # XXX: use a stack for nestable elements like span, table and div - $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t ); + $openmatch = preg_match( '/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS', $t ); $closematch = preg_match( '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'. - '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t ); + '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t ); if ( $openmatch or $closematch ) { $paragraphStack = false; # TODO bug 5718: paragraph closed @@ -2374,7 +2402,7 @@ class Parser { # pre if ( $this->mLastSection !== 'pre' ) { $paragraphStack = false; - $output .= $this->closeParagraph().'<pre>'; + $output .= $this->closeParagraph() . '<pre>'; $this->mLastSection = 'pre'; } $t = substr( $t, 1 ); @@ -2382,7 +2410,7 @@ class Parser { # paragraph if ( trim( $t ) === '' ) { if ( $paragraphStack ) { - $output .= $paragraphStack.'<br />'; + $output .= $paragraphStack . '<br />'; $paragraphStack = false; $this->mLastSection = 'p'; } else { @@ -2400,20 +2428,20 @@ class Parser { $paragraphStack = false; $this->mLastSection = 'p'; } elseif ( $this->mLastSection !== 'p' ) { - $output .= $this->closeParagraph().'<p>'; + $output .= $this->closeParagraph() . '<p>'; $this->mLastSection = 'p'; } } } } - wfProfileOut( __METHOD__."-paragraph" ); + wfProfileOut( __METHOD__ . "-paragraph" ); } # somewhere above we forget to get out of pre block (bug 785) if ( $preCloseMatch && $this->mInPre ) { $this->mInPre = false; } if ( $paragraphStack === false ) { - $output .= $t."\n"; + $output .= $t . "\n"; } } while ( $prefixLength ) { @@ -2433,9 +2461,10 @@ class Parser { * Split up a string on ':', ignoring any occurrences inside tags * to prevent illegal overlapping. * - * @param $str String the string to split + * @param string $str the string to split * @param &$before String set to everything before the ':' * @param &$after String set to everything after the ':' + * @throws MWException * @return String the position of the ':', or false if none found */ function findColonNoLinks( $str, &$before, &$after ) { @@ -2546,7 +2575,7 @@ class Parser { if ( $c === ">" ) { $stack--; if ( $stack < 0 ) { - wfDebug( __METHOD__.": Invalid input; too many close tags\n" ); + wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" ); wfProfileOut( __METHOD__ ); return false; } @@ -2586,7 +2615,7 @@ class Parser { } } if ( $stack > 0 ) { - wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" ); + wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" ); wfProfileOut( __METHOD__ ); return false; } @@ -2600,8 +2629,9 @@ class Parser { * @private * * @param $index integer - * @param $frame PPFrame + * @param bool|\PPFrame $frame * + * @throws MWException * @return string */ function getVariableValue( $index, $frame = false ) { @@ -2813,7 +2843,7 @@ class Parser { $value = $this->getRevisionUser(); break; case 'namespace': - $value = str_replace( '_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) ); + $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); break; case 'namespacee': $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); @@ -2822,7 +2852,7 @@ class Parser { $value = $this->mTitle->getNamespace(); break; case 'talkspace': - $value = $this->mTitle->canTalk() ? str_replace( '_',' ',$this->mTitle->getTalkNsText() ) : ''; + $value = $this->mTitle->canTalk() ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() ) : ''; break; case 'talkspacee': $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : ''; @@ -2960,7 +2990,7 @@ class Parser { * Preprocess some wikitext and return the document tree. * This is the ghost of replace_variables(). * - * @param $text String: The text to parse + * @param string $text The text to parse * @param $flags Integer: bitwise combination of: * self::PTD_FOR_INCLUSION Handle "<noinclude>" and "<includeonly>" as if the text is being * included. Default is to assume a direct page view. @@ -3015,7 +3045,7 @@ class Parser { * self::OT_PREPROCESS: templates but not extension tags * self::OT_HTML: all templates and extension tags * - * @param $text String the text to transform + * @param string $text the text to transform * @param $frame PPFrame Object describing the arguments passed to the template. * Arguments may also be provided as an associative array, as was the usual case before MW1.12. * Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly. @@ -3034,7 +3064,7 @@ class Parser { if ( $frame === false ) { $frame = $this->getPreprocessor()->newFrame(); } elseif ( !( $frame instanceof PPFrame ) ) { - wfDebug( __METHOD__." called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" ); + wfDebug( __METHOD__ . " called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" ); $frame = $this->getPreprocessor()->newCustomFrame( $frame ); } @@ -3079,7 +3109,7 @@ class Parser { * Warn the user when a parser limitation is reached * Will warn at most once the user per limitation type * - * @param $limitationType String: should be one of: + * @param string $limitationType should be one of: * 'expensive-parserfunction' (corresponding messages: * 'expensive-parserfunction-warning', * 'expensive-parserfunction-category') @@ -3089,8 +3119,8 @@ class Parser { * 'post-expand-template-inclusion' (corresponding messages: * 'post-expand-template-inclusion-warning', * 'post-expand-template-inclusion-category') - * @param $current int|null Current value - * @param $max int|null Maximum allowed, when an explicit limit has been + * @param int|null $current Current value + * @param int|null $max Maximum allowed, when an explicit limit has been * exceeded, provide the values (optional) */ function limitationWarn( $limitationType, $current = '', $max = '' ) { @@ -3105,18 +3135,19 @@ class Parser { * Return the text of a template, after recursively * replacing any variables or templates within the template. * - * @param $piece Array: the parts of the template + * @param array $piece the parts of the template * $piece['title']: the title, i.e. the part before the | * $piece['parts']: the parameter array * $piece['lineStart']: whether the brace was at the start of a line * @param $frame PPFrame The current frame, contains template arguments + * @throws MWException * @return String: the text of the template * @private */ function braceSubstitution( $piece, $frame ) { global $wgContLang; wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__.'-setup' ); + wfProfileIn( __METHOD__ . '-setup' ); # Flags $found = false; # $text has been filled @@ -3141,12 +3172,12 @@ class Parser { # $args is a list of argument nodes, starting from index 0, not including $part1 # @todo FIXME: If piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; - wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( __METHOD__ . '-setup' ); $titleProfileIn = null; // profile templates # SUBST - wfProfileIn( __METHOD__.'-modifiers' ); + wfProfileIn( __METHOD__ . '-modifiers' ); if ( !$found ) { $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 ); @@ -3203,7 +3234,7 @@ class Parser { $forceRawInterwiki = true; } } - wfProfileOut( __METHOD__.'-modifiers' ); + wfProfileOut( __METHOD__ . '-modifiers' ); # Parser functions if ( !$found ) { @@ -3211,70 +3242,22 @@ class Parser { $colonPos = strpos( $part1, ':' ); if ( $colonPos !== false ) { - # Case sensitive functions - $function = substr( $part1, 0, $colonPos ); - if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { - $function = $this->mFunctionSynonyms[1][$function]; - } else { - # Case insensitive functions - $function = $wgContLang->lc( $function ); - if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { - $function = $this->mFunctionSynonyms[0][$function]; - } else { - $function = false; - } + $func = substr( $part1, 0, $colonPos ); + $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) ); + for ( $i = 0; $i < $args->getLength(); $i++ ) { + $funcArgs[] = $args->item( $i ); } - if ( $function ) { - wfProfileIn( __METHOD__ . '-pfunc-' . $function ); - list( $callback, $flags ) = $this->mFunctionHooks[$function]; - $initialArgs = array( &$this ); - $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) ); - if ( $flags & SFH_OBJECT_ARGS ) { - # Add a frame parameter, and pass the arguments as an array - $allArgs = $initialArgs; - $allArgs[] = $frame; - for ( $i = 0; $i < $args->getLength(); $i++ ) { - $funcArgs[] = $args->item( $i ); - } - $allArgs[] = $funcArgs; - } else { - # Convert arguments to plain text - for ( $i = 0; $i < $args->getLength(); $i++ ) { - $funcArgs[] = trim( $frame->expand( $args->item( $i ) ) ); - } - $allArgs = array_merge( $initialArgs, $funcArgs ); - } - - # Workaround for PHP bug 35229 and similar - if ( !is_callable( $callback ) ) { - wfProfileOut( __METHOD__ . '-pfunc-' . $function ); - wfProfileOut( __METHOD__ . '-pfunc' ); - wfProfileOut( __METHOD__ ); - throw new MWException( "Tag hook for $function is not callable\n" ); - } - $result = call_user_func_array( $callback, $allArgs ); - $found = true; - $noparse = true; - $preprocessFlags = 0; - - if ( is_array( $result ) ) { - if ( isset( $result[0] ) ) { - $text = $result[0]; - unset( $result[0] ); - } - - # Extract flags into the local scope - # This allows callers to set flags such as nowiki, found, etc. - extract( $result ); - } else { - $text = $result; - } - if ( !$noparse ) { - $text = $this->preprocessToDom( $text, $preprocessFlags ); - $isChildObj = true; - } - wfProfileOut( __METHOD__ . '-pfunc-' . $function ); + try { + $result = $this->callParserFunction( $frame, $func, $funcArgs ); + } catch ( Exception $ex ) { + wfProfileOut( __METHOD__ . '-pfunc' ); + throw $ex; } + + # The interface for parser functions allows for extracting + # flags into the local scope. Extract any forwarded flags + # here. + extract( $result ); } wfProfileOut( __METHOD__ . '-pfunc' ); } @@ -3350,7 +3333,7 @@ class Parser { } } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) { $found = false; # access denied - wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() ); + wfDebug( __METHOD__ . ": template inclusion denied for " . $title->getPrefixedDBkey() ); } else { list( $text, $title ) = $this->getTemplateDom( $title ); if ( $text !== false ) { @@ -3385,7 +3368,7 @@ class Parser { $text = '<span class="error">' . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text() . '</span>'; - wfDebug( __METHOD__.": template loop broken at '$titleText'\n" ); + wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" ); } wfProfileOut( __METHOD__ . '-loadtpl' ); } @@ -3442,7 +3425,7 @@ class Parser { { # Bug 529: if the template begins with a table or block-level # element, it should be treated as beginning a new line. - # This behaviour is somewhat controversial. + # This behavior is somewhat controversial. $text = "\n" . $text; } @@ -3472,6 +3455,120 @@ class Parser { } /** + * Call a parser function and return an array with text and flags. + * + * The returned array will always contain a boolean 'found', indicating + * whether the parser function was found or not. It may also contain the + * following: + * text: string|object, resulting wikitext or PP DOM object + * isHTML: bool, $text is HTML, armour it against wikitext transformation + * isChildObj: bool, $text is a DOM node needing expansion in a child frame + * isLocalObj: bool, $text is a DOM node needing expansion in the current frame + * nowiki: bool, wiki markup in $text should be escaped + * + * @since 1.21 + * @param $frame PPFrame The current frame, contains template arguments + * @param $function string Function name + * @param $args array Arguments to the function + * @return array + */ + public function callParserFunction( $frame, $function, array $args = array() ) { + global $wgContLang; + + wfProfileIn( __METHOD__ ); + + # Case sensitive functions + if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { + $function = $this->mFunctionSynonyms[1][$function]; + } else { + # Case insensitive functions + $function = $wgContLang->lc( $function ); + if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { + $function = $this->mFunctionSynonyms[0][$function]; + } else { + wfProfileOut( __METHOD__ ); + return array( 'found' => false ); + } + } + + wfProfileIn( __METHOD__ . '-pfunc-' . $function ); + list( $callback, $flags ) = $this->mFunctionHooks[$function]; + + # Workaround for PHP bug 35229 and similar + if ( !is_callable( $callback ) ) { + wfProfileOut( __METHOD__ . '-pfunc-' . $function ); + wfProfileOut( __METHOD__ ); + throw new MWException( "Tag hook for $function is not callable\n" ); + } + + $allArgs = array( &$this ); + if ( $flags & SFH_OBJECT_ARGS ) { + # Convert arguments to PPNodes and collect for appending to $allArgs + $funcArgs = array(); + foreach ( $args as $k => $v ) { + if ( $v instanceof PPNode || $k === 0 ) { + $funcArgs[] = $v; + } else { + $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 ); + } + } + + # Add a frame parameter, and pass the arguments as an array + $allArgs[] = $frame; + $allArgs[] = $funcArgs; + } else { + # Convert arguments to plain text and append to $allArgs + foreach ( $args as $k => $v ) { + if ( $v instanceof PPNode ) { + $allArgs[] = trim( $frame->expand( $v ) ); + } elseif ( is_int( $k ) && $k >= 0 ) { + $allArgs[] = trim( $v ); + } else { + $allArgs[] = trim( "$k=$v" ); + } + } + } + + $result = call_user_func_array( $callback, $allArgs ); + + # The interface for function hooks allows them to return a wikitext + # string or an array containing the string and any flags. This mungs + # things around to match what this method should return. + if ( !is_array( $result ) ) { + $result = array( + 'found' => true, + 'text' => $result, + ); + } else { + if ( isset( $result[0] ) && !isset( $result['text'] ) ) { + $result['text'] = $result[0]; + } + unset( $result[0] ); + $result += array( + 'found' => true, + ); + } + + $noparse = true; + $preprocessFlags = 0; + if ( isset( $result['noparse'] ) ) { + $noparse = $result['noparse']; + } + if ( isset( $result['preprocessFlags'] ) ) { + $preprocessFlags = $result['preprocessFlags']; + } + + if ( !$noparse ) { + $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags ); + $result['isChildObj'] = true; + } + wfProfileOut( __METHOD__ . '-pfunc-' . $function ); + wfProfileOut( __METHOD__ ); + + return $result; + } + + /** * Get the semi-parsed DOM representation of a template with a given title, * and its redirect destination title. Cached. * @@ -3593,7 +3690,13 @@ class Parser { } if ( $rev ) { - $text = $rev->getText(); + $content = $rev->getContent(); + $text = $content ? $content->getWikitextForTransclusion() : null; + + if ( $text === false || $text === null ) { + $text = false; + break; + } } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) { global $wgContLang; $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage(); @@ -3601,16 +3704,17 @@ class Parser { $text = false; break; } + $content = $message->content(); $text = $message->plain(); } else { break; } - if ( $text === false ) { + if ( !$content ) { break; } # Redirect? $finalTitle = $title; - $title = Title::newFromRedirect( $text ); + $title = $content->getRedirectTarget(); } return array( 'text' => $text, @@ -3622,7 +3726,7 @@ class Parser { * Fetch a file and its title and register a reference to it. * If 'broken' is a key in $options then the file will appear as a broken thumbnail. * @param Title $title - * @param Array $options Array of options to RepoGroup::findFile + * @param array $options Array of options to RepoGroup::findFile * @return File|bool */ function fetchFile( $title, $options = array() ) { @@ -3634,7 +3738,7 @@ class Parser { * Fetch a file and its title and register a reference to it. * If 'broken' is a key in $options then the file will appear as a broken thumbnail. * @param Title $title - * @param Array $options Array of options to RepoGroup::findFile + * @param array $options Array of options to RepoGroup::findFile * @return Array ( File or false, Title of file ) */ function fetchFileAndTitle( $title, $options = array() ) { @@ -3674,7 +3778,7 @@ class Parser { global $wgEnableScaryTranscluding; if ( !$wgEnableScaryTranscluding ) { - return wfMessage('scarytranscludedisabled')->inContentLanguage()->text(); + return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text(); } $url = $title->getFullUrl( "action=$action" ); @@ -3693,19 +3797,24 @@ class Parser { global $wgTranscludeCacheExpiry; $dbr = wfGetDB( DB_SLAVE ); $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry ); - $obj = $dbr->selectRow( 'transcache', array('tc_time', 'tc_contents' ), + $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ), array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) ); if ( $obj ) { return $obj->tc_contents; } - $text = Http::get( $url ); - if ( !$text ) { + $req = MWHttpRequest::factory( $url ); + $status = $req->execute(); // Status object + if ( $status->isOK() ) { + $text = $req->getContent(); + } elseif ( $req->getStatus() != 200 ) { // Though we failed to fetch the content, this status is useless. + return wfMessage( 'scarytranscludefailed-httpstatus', $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text(); + } else { return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text(); } $dbw = wfGetDB( DB_MASTER ); - $dbw->replace( 'transcache', array('tc_url'), array( + $dbw->replace( 'transcache', array( 'tc_url' ), array( 'tc_url' => $url, 'tc_time' => $dbw->timestamp( time() ), 'tc_contents' => $text) @@ -3731,7 +3840,7 @@ class Parser { $argName = trim( $nameWithSpaces ); $object = false; $text = $frame->getArgument( $argName ); - if ( $text === false && $parts->getLength() > 0 + if ( $text === false && $parts->getLength() > 0 && ( $this->ot['html'] || $this->ot['pre'] @@ -3767,7 +3876,7 @@ class Parser { * Return the text to be used for a given extension tag. * This is the ghost of strip(). * - * @param $params array Associative array of parameters: + * @param array $params Associative array of parameters: * name PPNode for the tag name * attr PPNode for unparsed text where tag attributes are thought to be * attributes Optional associative array of parsed attributes @@ -3775,6 +3884,7 @@ class Parser { * noClose Original text did not have a close tag * @param $frame PPFrame * + * @throws MWException * @return string */ function extensionSubstitution( $params, $frame ) { @@ -3783,7 +3893,7 @@ class Parser { $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); $marker = "{$this->mUniqPrefix}-$name-" . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX; - $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower($name)] ) && + $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) && ( $this->ot['html'] || $this->ot['pre'] ); if ( $isFunctionTag ) { $markerType = 'none'; @@ -3805,7 +3915,7 @@ class Parser { $output = call_user_func_array( $this->mTagHooks[$name], array( $content, $attributes, $this, $frame ) ); } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) { - list( $callback, $flags ) = $this->mFunctionTagHooks[$name]; + list( $callback, ) = $this->mFunctionTagHooks[$name]; if ( !is_callable( $callback ) ) { throw new MWException( "Tag hook for $name is not callable\n" ); } @@ -3848,7 +3958,7 @@ class Parser { } elseif ( $markerType === 'general' ) { $this->mStripState->addGeneral( $marker, $output ); } else { - throw new MWException( __METHOD__.': invalid marker type' ); + throw new MWException( __METHOD__ . ': invalid marker type' ); } return $marker; } @@ -3856,7 +3966,7 @@ class Parser { /** * Increment an include size counter * - * @param $type String: the type of expansion + * @param string $type the type of expansion * @param $size Integer: the size of the text * @return Boolean: false if this inclusion would take it over the maximum, true otherwise */ @@ -3942,12 +4052,12 @@ class Parser { * Add a tracking category, getting the title from a system message, * or print a debug message if the title is invalid. * - * @param $msg String: message key + * @param string $msg message key * @return Boolean: whether the addition was successful */ public function addTrackingCategory( $msg ) { if ( $this->mTitle->getNamespace() === NS_SPECIAL ) { - wfDebug( __METHOD__.": Not adding tracking category $msg to special page!\n" ); + wfDebug( __METHOD__ . ": Not adding tracking category $msg to special page!\n" ); return false; } // Important to parse with correct title (bug 31469) @@ -3966,7 +4076,7 @@ class Parser { $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() ); return true; } else { - wfDebug( __METHOD__.": [[MediaWiki:$msg]] is not a valid title!\n" ); + wfDebug( __METHOD__ . ": [[MediaWiki:$msg]] is not a valid title!\n" ); return false; } } @@ -3982,7 +4092,7 @@ class Parser { * string and re-inserts the newly formatted headlines. * * @param $text String - * @param $origText String: original, untouched wikitext + * @param string $origText original, untouched wikitext * @param $isMain Boolean * @return mixed|string * @private @@ -4062,7 +4172,7 @@ class Parser { $sectionIndex = false; $numbering = ''; $markerMatches = array(); - if ( preg_match("/^$markerRegex/", $headline, $markerMatches ) ) { + if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) { $serial = $markerMatches[1]; list( $titleText, $sectionIndex ) = $this->mHeadings[$serial]; $isTemplate = ( $titleText != $baseTitleText ); @@ -4078,7 +4188,7 @@ class Parser { # Increase TOC level $toclevel++; $sublevelCount[$toclevel] = 0; - if ( $toclevel<$wgMaxTocLevel ) { + if ( $toclevel < $wgMaxTocLevel ) { $prevtoclevel = $toclevel; $toc .= Linker::tocIndent(); $numVisible++; @@ -4100,7 +4210,7 @@ class Parser { if ( $i == 0 ) { $toclevel = 1; } - if ( $toclevel<$wgMaxTocLevel ) { + if ( $toclevel < $wgMaxTocLevel ) { if ( $prevtoclevel < $wgMaxTocLevel ) { # Unindent only if the previous toc level was shown :p $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel ); @@ -4111,7 +4221,7 @@ class Parser { } } else { # No change in level, end TOC line - if ( $toclevel<$wgMaxTocLevel ) { + if ( $toclevel < $wgMaxTocLevel ) { $toc .= Linker::tocLineEnd(); } } @@ -4119,7 +4229,7 @@ class Parser { $levelCount[$toclevel] = $level; # count number of headlines for each level - @$sublevelCount[$toclevel]++; + $sublevelCount[$toclevel]++; $dot = 0; for( $i = 1; $i <= $toclevel; $i++ ) { if ( !empty( $sublevelCount[$i] ) ) { @@ -4144,11 +4254,17 @@ class Parser { $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline ); # Strip out HTML (first regex removes any tag not allowed) - # Allowed tags are <sup> and <sub> (bug 8393), <i> (bug 26375) and <b> (r105284) - # We strip any parameter from accepted tags (second regex) + # Allowed tags are: + # * <sup> and <sub> (bug 8393) + # * <i> (bug 26375) + # * <b> (r105284) + # * <span dir="rtl"> and <span dir="ltr"> (bug 35167) + # + # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>, + # to allow setting directionality in toc items. $tocline = preg_replace( - array( '#<(?!/?(sup|sub|i|b)(?: [^>]*)?>).*?'.'>#', '#<(/?(sup|sub|i|b))(?: .*?)?'.'>#' ), - array( '', '<$1>' ), + array( '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?'.'>#', '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?'.'>#' ), + array( '', '<$1>' ), $safeHeadline ); $tocline = trim( $tocline ); @@ -4269,9 +4385,9 @@ class Parser { // We use a page and section attribute to stop the language converter from converting these important bits // of data, but put the headline hint inside a content block because the language converter is supposed to // be able to convert that piece of data. - $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]); - $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"'; - if ( isset($editlinkArgs[2]) ) { + $editlink = '<mw:editsection page="' . htmlspecialchars( $editlinkArgs[0] ); + $editlink .= '" section="' . htmlspecialchars( $editlinkArgs[1] ) . '"'; + if ( isset( $editlinkArgs[2] ) ) { $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>'; } else { $editlink .= '/>'; @@ -4353,7 +4469,7 @@ class Parser { * Transform wiki markup when saving a page by doing "\r\n" -> "\n" * conversion, substitting signatures, {{subst:}} templates, etc. * - * @param $text String: the text to transform + * @param string $text the text to transform * @param $title Title: the Title object for the current article * @param $user User: the User object describing the current user * @param $options ParserOptions: parsing options @@ -4436,14 +4552,14 @@ class Parser { '~~~' => $sigText ) ); - # Context links: [[|name]] and [[name (context)|]] + # Context links ("pipe tricks"): [[|name]] and [[name (context)|]] $tc = '[' . Title::legalChars() . ']'; $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii! - $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; # [[ns:page (context)|]] - $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; # [[ns:page(context)|]] - $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/"; # [[ns:page (context), context|]] - $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] + $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; # [[ns:page (context)|]] + $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; # [[ns:page(context)|]] (double-width brackets, added in r40257) + $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/"; # [[ns:page (context), context|]] (using either single or double-width comma) + $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] (reverse pipe trick: add context from page title) # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); @@ -4476,7 +4592,7 @@ class Parser { * as it may have changed if it's the $wgParser. * * @param $user User - * @param $nickname String|bool nickname to use or false to use user's default nickname + * @param string|bool $nickname nickname to use or false to use user's default nickname * @param $fancySig Boolean|null whether the nicknname is the complete signature * or null to use default value * @return string @@ -4507,7 +4623,7 @@ class Parser { } else { # Failed to validate; fall back to the default $nickname = $username; - wfDebug( __METHOD__.": $username has bad XML tags in signature.\n" ); + wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" ); } } @@ -4539,7 +4655,7 @@ class Parser { * 2) Substitute all transclusions * * @param $text String - * @param $parsing bool Whether we're cleaning (preferences save) or parsing + * @param bool $parsing Whether we're cleaning (preferences save) or parsing * @return String: signature text */ public function cleanSig( $text, $parsing = false ) { @@ -4614,7 +4730,7 @@ class Parser { /** * Wrapper for preprocess() * - * @param $text String: the text to preprocess + * @param string $text the text to preprocess * @param $options ParserOptions: options * @param $title Title object or null to use $wgTitle * @return String @@ -4633,11 +4749,7 @@ class Parser { global $wgTitle; $title = $wgTitle; } - if ( !$title ) { - # It's not uncommon having a null $wgTitle in scripts. See r80898 - # Create a ghost title in such case - $title = Title::newFromText( 'Dwimmerlaik' ); - } + $text = $this->preprocess( $text, $title, $options ); $executing = false; @@ -4666,6 +4778,7 @@ class Parser { * * @param $tag Mixed: the tag to use, e.g. 'hook' for "<hook>" * @param $callback Mixed: the callback function (and object) to use for the tag + * @throws MWException * @return Mixed|null The old value of the mTagHooks array associated with the hook */ public function setHook( $tag, $callback ) { @@ -4696,6 +4809,7 @@ class Parser { * * @param $tag Mixed: the tag to use, e.g. 'hook' for "<hook>" * @param $callback Mixed: the callback function (and object) to use for the tag + * @throws MWException * @return Mixed|null The old value of the mTagHooks array associated with the hook */ function setTransparentTagHook( $tag, $callback ) { @@ -4734,7 +4848,7 @@ class Parser { * nowiki Wiki markup in the return value should be escaped * isHTML The returned text is HTML, armour it against wikitext transformation * - * @param $id String: The magic word ID + * @param string $id The magic word ID * @param $callback Mixed: the callback function (and object) to use * @param $flags Integer: a combination of the following flags: * SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}} @@ -4758,6 +4872,7 @@ class Parser { * Please read the documentation in includes/parser/Preprocessor.php for more information * about the methods available in PPFrame and PPNode. * + * @throws MWException * @return string|callback The old callback function for this name, if any */ public function setFunctionHook( $id, $callback, $flags = 0 ) { @@ -4769,7 +4884,7 @@ class Parser { # Add to function cache $mw = MagicWord::get( $id ); if ( !$mw ) - throw new MWException( __METHOD__.'() expecting a magic word identifier.' ); + throw new MWException( __METHOD__ . '() expecting a magic word identifier.' ); $synonyms = $mw->getSynonyms(); $sensitive = intval( $mw->isCaseSensitive() ); @@ -4805,6 +4920,10 @@ class Parser { * Create a tag function, e.g. "<test>some stuff</test>". * Unlike tag hooks, tag functions are parsed at preprocessor level. * Unlike parser functions, their content is not preprocessed. + * @param $tag + * @param $callback + * @param $flags + * @throws MWException * @return null */ function setFunctionTagHook( $tag, $callback, $flags ) { @@ -4920,7 +5039,7 @@ class Parser { // is defined for images in galleries $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); - $parameterMatches = StringUtils::explode('|', $matches[3]); + $parameterMatches = StringUtils::explode( '|', $matches[3] ); $magicWordAlt = MagicWord::get( 'img_alt' ); $magicWordLink = MagicWord::get( 'img_link' ); @@ -4928,14 +5047,18 @@ class Parser { if ( $match = $magicWordAlt->matchVariableStartToEnd( $parameterMatch ) ) { $alt = $this->stripAltText( $match, false ); } - elseif( $match = $magicWordLink->matchVariableStartToEnd( $parameterMatch ) ){ - $link = strip_tags($this->replaceLinkHoldersText($match)); + elseif( $match = $magicWordLink->matchVariableStartToEnd( $parameterMatch ) ) { + $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); $chars = self::EXT_LINK_URL_CLASS; $prots = $this->mUrlProtocols; //check to see if link matches an absolute url, if not then it must be a wiki link. - if(!preg_match( "/^($prots)$chars+$/u", $link)){ - $localLinkTitle = Title::newFromText($link); - $link = $localLinkTitle->getLocalURL(); + if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) { + $link = $linkValue; + } else { + $localLinkTitle = Title::newFromText( $linkValue ); + if ( $localLinkTitle !== null ) { + $link = $localLinkTitle->getLocalURL(); + } } } else { @@ -4947,7 +5070,7 @@ class Parser { $label = substr( $label, 1 ); } - $ig->add( $title, $label, $alt ,$link); + $ig->add( $title, $label, $alt, $link ); } return $ig->toHTML(); } @@ -4962,7 +5085,7 @@ class Parser { } else { $handlerClass = ''; } - if ( !isset( $this->mImageParams[$handlerClass] ) ) { + if ( !isset( $this->mImageParams[$handlerClass] ) ) { # Initialise static lists static $internalParamNames = array( 'horizAlign' => array( 'left', 'right', 'center', 'none' ), @@ -5180,7 +5303,7 @@ class Parser { } else { # Inline image if ( !isset( $params['frame']['alt'] ) ) { # No alt text, use the "caption" for the alt text - if ( $caption !== '') { + if ( $caption !== '' ) { $params['frame']['alt'] = $this->stripAltText( $caption, $holders ); } else { # No caption, fall back to using the filename for the @@ -5303,8 +5426,8 @@ class Parser { * * External callers should use the getSection and replaceSection methods. * - * @param $text String: Page wikitext - * @param $section String: a section identifier string of the form: + * @param string $text Page wikitext + * @param string $section a section identifier string of the form: * "<flag1> - <flag2> - ... - <section number>" * * Currently the only recognised flag is "T", which means the target section number @@ -5321,12 +5444,12 @@ class Parser { * string. If $text is the empty string and section 0 is replaced, $newText is * returned. * - * @param $mode String: one of "get" or "replace" - * @param $newText String: replacement text for section data. + * @param string $mode one of "get" or "replace" + * @param string $newText replacement text for section data. * @return String: for "get", the extracted section text. * for "replace", the whole page with the section replaced. */ - private function extractSections( $text, $section, $mode, $newText='' ) { + private function extractSections( $text, $section, $mode, $newText = '' ) { global $wgTitle; # not generally used but removes an ugly failure mode $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true ); $outText = ''; @@ -5442,12 +5565,12 @@ class Parser { * * If a section contains subsections, these are also returned. * - * @param $text String: text to look in - * @param $section String: section identifier - * @param $deftext String: default to return if section is not found + * @param string $text text to look in + * @param string $section section identifier + * @param string $deftext default to return if section is not found * @return string text of the requested section */ - public function getSection( $text, $section, $deftext='' ) { + public function getSection( $text, $section, $deftext = '' ) { return $this->extractSections( $text, $section, "get", $deftext ); } @@ -5456,9 +5579,9 @@ class Parser { * specified by $section has been replaced with $text. If the target * section does not exist, $oldtext is returned unchanged. * - * @param $oldtext String: former text of the article - * @param $section int section identifier - * @param $text String: replacing text + * @param string $oldtext former text of the article + * @param int $section section identifier + * @param string $text replacing text * @return String: modified text */ public function replaceSection( $oldtext, $section, $text ) { @@ -5540,7 +5663,7 @@ class Parser { /** * Mutator for $mDefaultSort * - * @param $sort string New value + * @param string $sort New value */ public function setDefaultSort( $sort ) { $this->mDefaultSort = $sort; @@ -5596,7 +5719,7 @@ class Parser { * instead. For use in redirects, since IE6 interprets Redirect: headers * as something other than UTF-8 (apparently?), resulting in breakage. * - * @param $text String: The section name + * @param string $text The section name * @return string An anchor */ public function guessLegacySectionNameFromWikiText( $text ) { @@ -5616,7 +5739,7 @@ class Parser { * to create valid section anchors by mimicing the output of the * parser when headings are parsed. * - * @param $text String: text string to be stripped of wikitext + * @param string $text text string to be stripped of wikitext * for use in a Section anchor * @return string Filtered text string */ @@ -5767,12 +5890,13 @@ class Parser { * If the $data array has been stored persistently, the caller should first * check whether it is still valid, by calling isValidHalfParsedText(). * - * @param $data array Serialized data + * @param array $data Serialized data + * @throws MWException * @return String */ function unserializeHalfParsedText( $data ) { if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) { - throw new MWException( __METHOD__.': invalid version' ); + throw new MWException( __METHOD__ . ': invalid version' ); } # First, extract the strip state. diff --git a/includes/parser/ParserCache.php b/includes/parser/ParserCache.php index 6a4ef0c5..0faa40a8 100644 --- a/includes/parser/ParserCache.php +++ b/includes/parser/ParserCache.php @@ -48,6 +48,7 @@ class ParserCache { * May be a memcached client or a BagOStuff derivative. * * @param $memCached Object + * @throws MWException */ protected function __construct( $memCached ) { if ( !$memCached ) { @@ -66,7 +67,7 @@ class ParserCache { // idhash seem to mean 'page id' + 'rendering hash' (r3710) $pageid = $article->getID(); - $renderkey = (int)($wgRequest->getVal('action') == 'render'); + $renderkey = (int)($wgRequest->getVal( 'action' ) == 'render'); $key = wfMemcKey( 'pcache', 'idhash', "{$pageid}-{$renderkey}!{$hash}" ); return $key; @@ -200,8 +201,8 @@ class ParserCache { wfDebug( "ParserOutput cache found.\n" ); - // The edit section preference may not be the appropiate one in - // the ParserOutput, as we are not storing it in the parsercache + // The edit section preference may not be the appropiate one in + // the ParserOutput, as we are not storing it in the parsercache // key. Force it here. See bug 31445. $value->setEditSectionTokens( $popts->getEditSection() ); diff --git a/includes/parser/ParserOptions.php b/includes/parser/ParserOptions.php index 009b18a1..3eb83e36 100644 --- a/includes/parser/ParserOptions.php +++ b/includes/parser/ParserOptions.php @@ -29,67 +29,62 @@ * @ingroup Parser */ class ParserOptions { - - /** - * Use DateFormatter to format dates - */ - var $mUseDynamicDates; - + /** * Interlanguage links are removed and returned in an array */ var $mInterwikiMagic; - + /** * Allow external images inline? */ var $mAllowExternalImages; - + /** * If not, any exception? */ var $mAllowExternalImagesFrom; - + /** * If not or it doesn't match, should we check an on-wiki whitelist? */ var $mEnableImageWhitelist; - + /** * Date format index */ var $mDateFormat = null; - + /** * Create "edit section" links? */ var $mEditSection = true; - + /** * Allow inclusion of special pages? */ var $mAllowSpecialInclusion; - + /** * Use tidy to cleanup output HTML? */ var $mTidy = false; - + /** * Which lang to call for PLURAL and GRAMMAR */ var $mInterfaceMessage = false; - + /** * Overrides $mInterfaceMessage with arbitrary language */ var $mTargetLanguage = null; - + /** * Maximum size of template expansions, in bytes */ var $mMaxIncludeSize; - + /** * Maximum number of nodes touched by PPFrame::expand() */ @@ -99,56 +94,56 @@ class ParserOptions { * Maximum number of nodes generated by Preprocessor::preprocessToObj() */ var $mMaxGeneratedPPNodeCount; - + /** * Maximum recursion depth in PPFrame::expand() */ var $mMaxPPExpandDepth; - + /** * Maximum recursion depth for templates within templates */ var $mMaxTemplateDepth; - + /** * Maximum number of calls per parse to expensive parser functions */ var $mExpensiveParserFunctionLimit; - + /** * Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS */ var $mRemoveComments = true; - + /** * Callback for template fetching. Used as first argument to call_user_func(). */ var $mTemplateCallback = array( 'Parser', 'statelessFetchTemplate' ); - + /** * Enable limit report in an HTML comment on output */ var $mEnableLimitReport = false; - + /** * Timestamp used for {{CURRENTDAY}} etc. */ var $mTimestamp; - + /** * Target attribute for external links */ var $mExternalLinkTarget; - + /** - * Clean up signature texts? + * Clean up signature texts? * * 1) Strip ~~~, ~~~~ and ~~~~~ out of signatures * 2) Substitute all transclusions */ var $mCleanSignatures; - + /** * Transform wiki markup when saving the page? */ @@ -168,43 +163,43 @@ class ParserOptions { * Automatically number headings? */ var $mNumberHeadings; - + /** * User math preference (as integer). Not used (1.19) */ var $mMath; - + /** * Thumb size preferred by the user. */ var $mThumbSize; - + /** * Maximum article size of an article to be marked as "stub" */ private $mStubThreshold; - + /** * Language object of the User language. */ var $mUserLang; /** - * @var User + * @var User * Stored user object */ var $mUser; - + /** * Parsing the page for a "preview" operation? */ var $mIsPreview = false; - + /** * Parsing the page for a "preview" operation on a single section? */ var $mIsSectionPreview = false; - + /** * Parsing the printable version of the page? */ @@ -220,7 +215,6 @@ class ParserOptions { */ protected $onAccessCallback = null; - function getUseDynamicDates() { return $this->mUseDynamicDates; } function getInterwikiMagic() { return $this->mInterwikiMagic; } function getAllowExternalImages() { return $this->mAllowExternalImages; } function getAllowExternalImagesFrom() { return $this->mAllowExternalImagesFrom; } @@ -308,7 +302,6 @@ class ParserOptions { return $this->getUserLangObj()->getCode(); } - function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); } function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); } function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); } function setAllowExternalImagesFrom( $x ) { return wfSetVar( $this->mAllowExternalImagesFrom, $x ); } @@ -415,14 +408,14 @@ class ParserOptions { return new ParserOptions( $context->getUser(), $context->getLanguage() ); } - /** - * Get user options + /** + * Get user options * * @param $user User object * @param $lang Language object */ private function initialiseFromUser( $user, $lang ) { - global $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages, + global $wgInterwikiMagic, $wgAllowExternalImages, $wgAllowExternalImagesFrom, $wgEnableImageWhitelist, $wgAllowSpecialInclusion, $wgMaxArticleSize, $wgMaxPPNodeCount, $wgMaxTemplateDepth, $wgMaxPPExpandDepth, $wgCleanSignatures, $wgExternalLinkTarget, $wgExpensiveParserFunctionLimit, @@ -430,7 +423,6 @@ class ParserOptions { wfProfileIn( __METHOD__ ); - $this->mUseDynamicDates = $wgUseDynamicDates; $this->mInterwikiMagic = $wgInterwikiMagic; $this->mAllowExternalImages = $wgAllowExternalImages; $this->mAllowExternalImagesFrom = $wgAllowExternalImagesFrom; @@ -481,12 +473,7 @@ class ParserOptions { * @return array */ public static function legacyOptions() { - global $wgUseDynamicDates; - $legacyOpts = array( 'math', 'stubthreshold', 'numberheadings', 'userlang', 'thumbsize', 'editsection', 'printable' ); - if ( $wgUseDynamicDates ) { - $legacyOpts[] = 'dateformat'; - } - return $legacyOpts; + return array( 'math', 'stubthreshold', 'numberheadings', 'userlang', 'thumbsize', 'editsection', 'printable' ); } /** @@ -518,14 +505,13 @@ class ParserOptions { $confstr .= '*'; } - // Space assigned for the stubthreshold but unused // since it disables the parser cache, its value will always // be 0 when this function is called by parsercache. if ( in_array( 'stubthreshold', $forOptions ) ) { $confstr .= '!' . $this->mStubThreshold; } else { - $confstr .= '!*' ; + $confstr .= '!*'; } if ( in_array( 'dateformat', $forOptions ) ) { diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index 41b4a385..db649f11 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -50,7 +50,8 @@ class ParserOutput extends CacheTime { $mTimestamp; # Timestamp of the revision private $mIndexPolicy = ''; # 'index' or 'noindex'? Any other value will result in no change. private $mAccessedOptions = array(); # List of ParserOptions (stored in the keys) - private $mSecondaryDataUpdates = array(); # List of instances of SecondaryDataObject(), used to cause some information extracted from the page in a custom place. + private $mSecondaryDataUpdates = array(); # List of DataUpdate, used to save info from the page somewhere else. + private $mExtensionData = array(); # extra data used by extensions const EDITSECTION_REGEX = '#<(?:mw:)?editsection page="(.*?)" section="(.*?)"(?:/>|>(.*?)(</(?:mw:)?editsection>))#'; @@ -75,18 +76,20 @@ class ParserOutput extends CacheTime { /** * callback used by getText to replace editsection tokens * @private + * @param $m + * @throws MWException * @return mixed */ function replaceEditSectionLinksCallback( $m ) { global $wgOut, $wgLang; $args = array( - htmlspecialchars_decode($m[1]), - htmlspecialchars_decode($m[2]), - isset($m[4]) ? $m[3] : null, + htmlspecialchars_decode( $m[1] ), + htmlspecialchars_decode( $m[2] ), + isset( $m[4] ) ? $m[3] : null, ); $args[0] = Title::newFromText( $args[0] ); - if ( !is_object($args[0]) ) { - throw new MWException("Bad parser output text."); + if ( !is_object( $args[0] ) ) { + throw new MWException( "Bad parser output text." ); } $args[] = $wgLang->getCode(); $skin = $wgOut->getSkin(); @@ -150,11 +153,35 @@ class ParserOutput extends CacheTime { return (bool)$this->mNewSection; } + /** + * Checks, if a url is pointing to the own server + * + * @param string $internal the server to check against + * @param string $url the url to check + * @return bool + */ + static function isLinkInternal( $internal, $url ) { + return (bool)preg_match( '/^' . + # If server is proto relative, check also for http/https links + ( substr( $internal, 0, 2 ) === '//' ? '(?:https?:)?' : '' ) . + preg_quote( $internal, '/' ) . + # check for query/path/anchor or end of link in each case + '(?:[\?\/\#]|$)/i', + $url + ); + } + function addExternalLink( $url ) { # We don't register links pointing to our own server, unless... :-) global $wgServer, $wgRegisterInternalExternals; - if( $wgRegisterInternalExternals or stripos($url,$wgServer.'/')!==0) + + $registerExternalLink = true; + if( !$wgRegisterInternalExternals ) { + $registerExternalLink = !self::isLinkInternal( $wgServer, $url ); + } + if( $registerExternalLink ) { $this->mExternalLinks[$url] = 1; + } } /** @@ -163,7 +190,7 @@ class ParserOutput extends CacheTime { * @param $title Title object * @param $id Mixed: optional known page_id so we can skip the lookup */ - function addLink( $title, $id = null ) { + function addLink( Title $title, $id = null ) { if ( $title->isExternal() ) { // Don't record interwikis in pagelinks $this->addInterwikiLink( $title ); @@ -193,9 +220,9 @@ class ParserOutput extends CacheTime { /** * Register a file dependency for this output - * @param $name string Title dbKey - * @param $timestamp string MW timestamp of file creation (or false if non-existing) - * @param $sha1 string base 36 SHA-1 of file (or false if non-existing) + * @param string $name Title dbKey + * @param string $timestamp MW timestamp of file creation (or false if non-existing) + * @param string $sha1 base 36 SHA-1 of file (or false if non-existing) * @return void */ function addImage( $name, $timestamp = null, $sha1 = null ) { @@ -234,7 +261,7 @@ class ParserOutput extends CacheTime { if( $prefix == '' ) { throw new MWException( 'Non-interwiki link passed, internal parser error.' ); } - if (!isset($this->mInterwikiLinks[$prefix])) { + if ( !isset( $this->mInterwikiLinks[$prefix] ) ) { $this->mInterwikiLinks[$prefix] = array(); } $this->mInterwikiLinks[$prefix][$title->getDBkey()] = 1; @@ -288,7 +315,7 @@ class ParserOutput extends CacheTime { * -- this is assumed to have been validated * (check equal normalisation, etc.) * - * @param $text String: desired title text + * @param string $text desired title text */ public function setDisplayTitle( $text ) { $this->setTitleText( $text ); @@ -320,13 +347,67 @@ class ParserOutput extends CacheTime { } /** - * Set a property to be cached in the DB + * Set a property to be stored in the page_props database table. + * + * page_props is a key value store indexed by the page ID. This allows + * the parser to set a property on a page which can then be quickly + * retrieved given the page ID or via a DB join when given the page + * title. + * + * setProperty() is thus used to propagate properties from the parsed + * page to request contexts other than a page view of the currently parsed + * article. + * + * Some applications examples: + * + * * To implement hidden categories, hiding pages from category listings + * by storing a property. + * + * * Overriding the displayed article title. + * @see ParserOutput::setDisplayTitle() + * + * * To implement image tagging, for example displaying an icon on an + * image thumbnail to indicate that it is listed for deletion on + * Wikimedia Commons. + * This is not actually implemented, yet but would be pretty cool. + * + * @note: Do not use setProperty() to set a property which is only used + * in a context where the ParserOutput object itself is already available, + * for example a normal page view. There is no need to save such a property + * in the database since it the text is already parsed. You can just hook + * OutputPageParserOutput and get your data out of the ParserOutput object. + * + * If you are writing an extension where you want to set a property in the + * parser which is used by an OutputPageParserOutput hook, you have to + * associate the extension data directly with the ParserOutput object. + * Since MediaWiki 1.21, you can use setExtensionData() to do this: + * + * @par Example: + * @code + * $parser->getOutput()->setExtensionData( 'my_ext_foo', '...' ); + * @endcode + * + * And then later, in OutputPageParserOutput or similar: + * + * @par Example: + * @code + * $output->getExtensionData( 'my_ext_foo' ); + * @endcode + * + * In MediaWiki 1.20 and older, you have to use a custom member variable + * within the ParserOutput object: + * + * @par Example: + * @code + * $parser->getOutput()->my_ext_foo = '...'; + * @endcode + * */ public function setProperty( $name, $value ) { $this->mProperties[$name] = $value; } - public function getProperty( $name ){ + public function getProperty( $name ) { return isset( $this->mProperties[$name] ) ? $this->mProperties[$name] : false; } @@ -337,26 +418,25 @@ class ParserOutput extends CacheTime { return $this->mProperties; } - /** * Returns the options from its ParserOptions which have been taken * into account to produce this output or false if not available. * @return mixed Array */ - public function getUsedOptions() { + public function getUsedOptions() { if ( !isset( $this->mAccessedOptions ) ) { return array(); } return array_keys( $this->mAccessedOptions ); - } + } - /** - * Callback passed by the Parser to the ParserOptions to keep track of which options are used. - * @access private - */ - function recordOption( $option ) { - $this->mAccessedOptions[$option] = true; - } + /** + * Callback passed by the Parser to the ParserOptions to keep track of which options are used. + * @access private + */ + function recordOption( $option ) { + $this->mAccessedOptions[$option] = true; + } /** * Adds an update job to the output. Any update jobs added to the output will eventually bexecuted in order to @@ -375,9 +455,13 @@ class ParserOutput extends CacheTime { * extracted from the page's content, including a LinksUpdate object for all links stored in * this ParserOutput object. * + * @note: Avoid using this method directly, use ContentHandler::getSecondaryDataUpdates() instead! The content + * handler may provide additional update objects. + * * @since 1.20 * - * @param $title Title of the page we're updating. If not given, a title object will be created based on $this->getTitleText() + * @param $title Title The title of the page we're updating. If not given, a title object will be created + * based on $this->getTitleText() * @param $recursive Boolean: queue jobs for recursive updates? * * @return Array. An array of instances of DataUpdate @@ -389,13 +473,75 @@ class ParserOutput extends CacheTime { $linksUpdate = new LinksUpdate( $title, $this, $recursive ); - if ( $this->mSecondaryDataUpdates === array() ) { - return array( $linksUpdate ); + return array_merge( $this->mSecondaryDataUpdates, array( $linksUpdate ) ); + } + + /** + * Attaches arbitrary data to this ParserObject. This can be used to store some information in + * the ParserOutput object for later use during page output. The data will be cached along with + * the ParserOutput object, but unlike data set using setProperty(), it is not recorded in the + * database. + * + * This method is provided to overcome the unsafe practice of attaching extra information to a + * ParserObject by directly assigning member variables. + * + * To use setExtensionData() to pass extension information from a hook inside the parser to a + * hook in the page output, use this in the parser hook: + * + * @par Example: + * @code + * $parser->getOutput()->setExtensionData( 'my_ext_foo', '...' ); + * @endcode + * + * And then later, in OutputPageParserOutput or similar: + * + * @par Example: + * @code + * $output->getExtensionData( 'my_ext_foo' ); + * @endcode + * + * In MediaWiki 1.20 and older, you have to use a custom member variable + * within the ParserOutput object: + * + * @par Example: + * @code + * $parser->getOutput()->my_ext_foo = '...'; + * @endcode + * + * @since 1.21 + * + * @param string $key The key for accessing the data. Extensions should take care to avoid + * conflicts in naming keys. It is suggested to use the extension's name as a + * prefix. + * + * @param mixed $value The value to set. Setting a value to null is equivalent to removing + * the value. + */ + public function setExtensionData( $key, $value ) { + if ( $value === null ) { + unset( $this->mExtensionData[$key] ); } else { - $updates = array_merge( $this->mSecondaryDataUpdates, array( $linksUpdate ) ); + $this->mExtensionData[$key] = $value; } + } - return $updates; - } + /** + * Gets extensions data previously attached to this ParserOutput using setExtensionData(). + * Typically, such data would be set while parsing the page, e.g. by a parser function. + * + * @since 1.21 + * + * @param string $key The key to look up. + * + * @return mixed The value previously set for the given key using setExtensionData( $key ), + * or null if no value was set for this key. + */ + public function getExtensionData( $key ) { + if ( isset( $this->mExtensionData[$key] ) ) { + return $this->mExtensionData[$key]; + } + + return null; + } } diff --git a/includes/parser/Parser_LinkHooks.php b/includes/parser/Parser_LinkHooks.php index 6bcc324d..b2cdc41a 100644 --- a/includes/parser/Parser_LinkHooks.php +++ b/includes/parser/Parser_LinkHooks.php @@ -32,7 +32,7 @@ class Parser_LinkHooks extends Parser { * can automatically discard old data. */ const VERSION = '1.6.4'; - + # Flags for Parser::setLinkHook # Also available as global constants from Defines.php const SLH_PATTERN = 1; @@ -84,11 +84,11 @@ class Parser_LinkHooks extends Parser { * Create a link hook, e.g. [[Namepsace:...|display}} * The callback function should have the form: * function myLinkCallback( $parser, $holders, $markers, - * Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { ... } + * Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { ... } * * Or with SLH_PATTERN: * function myLinkCallback( $parser, $holders, $markers, ) - * &$titleText, &$sortText = null, &$leadingColon = false ) { ... } + * &$titleText, &$sortText = null, &$leadingColon = false ) { ... } * * The callback may either return a number of different possible values: * String) Text result of the link @@ -100,18 +100,19 @@ class Parser_LinkHooks extends Parser { * @param $flags Integer: a combination of the following flags: * SLH_PATTERN Use a regex link pattern rather than a namespace * + * @throws MWException * @return callback|null The old callback function for this name, if any */ public function setLinkHook( $ns, $callback, $flags = 0 ) { if( $flags & SLH_PATTERN && !is_string($ns) ) - throw new MWException( __METHOD__.'() expecting a regex string pattern.' ); - elseif( $flags | ~SLH_PATTERN && !is_int($ns) ) - throw new MWException( __METHOD__.'() expecting a namespace index.' ); + throw new MWException( __METHOD__ . '() expecting a regex string pattern.' ); + elseif( $flags | ~SLH_PATTERN && !is_int( $ns ) ) + throw new MWException( __METHOD__ . '() expecting a namespace index.' ); $oldVal = isset( $this->mLinkHooks[$ns] ) ? $this->mLinkHooks[$ns][0] : null; $this->mLinkHooks[$ns] = array( $callback, $flags ); return $oldVal; } - + /** * Get all registered link hook identifiers * @@ -120,9 +121,11 @@ class Parser_LinkHooks extends Parser { function getLinkHooks() { return array_keys( $this->mLinkHooks ); } - + /** * Process [[ ]] wikilinks + * @param $s + * @throws MWException * @return LinkHolderArray * * @private @@ -130,8 +133,8 @@ class Parser_LinkHooks extends Parser { function replaceInternalLinks2( &$s ) { wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__.'-setup' ); - static $tc = FALSE, $titleRegex;//$e1, $e1_img; + wfProfileIn( __METHOD__ . '-setup' ); + static $tc = false, $titleRegex; //$e1, $e1_img; if( !$tc ) { # the % is needed to support urlencoded titles as well $tc = Title::legalChars() . '#%'; @@ -144,15 +147,15 @@ class Parser_LinkHooks extends Parser { } $holders = new LinkHolderArray( $this ); - + if( is_null( $this->mTitle ) ) { + wfProfileOut( __METHOD__ . '-setup' ); wfProfileOut( __METHOD__ ); - wfProfileOut( __METHOD__.'-setup' ); - throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); + throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" ); } - wfProfileOut( __METHOD__.'-setup' ); - + wfProfileOut( __METHOD__ . '-setup' ); + $offset = 0; $offsetStack = array(); $markers = new LinkMarkerReplacer( $this, $holders, array( &$this, 'replaceInternalLinksCallback' ) ); @@ -164,7 +167,7 @@ class Parser_LinkHooks extends Parser { # Determine if the bracket is a starting or ending bracket # When we find both, use the first one elseif( $startBracketOffset !== false && $endBracketOffset !== false ) - $isStart = $startBracketOffset <= $endBracketOffset; + $isStart = $startBracketOffset <= $endBracketOffset; # When we only found one, check which it is else $isStart = $startBracketOffset !== false; $bracketOffset = $isStart ? $startBracketOffset : $endBracketOffset; @@ -175,26 +178,26 @@ class Parser_LinkHooks extends Parser { } else { /** Closing bracket **/ # Pop the start pos for our current link zone off the stack - $startBracketOffset = array_pop($offsetStack); + $startBracketOffset = array_pop( $offsetStack ); # Just to clean up the code, lets place offsets on the outer ends $endBracketOffset += 2; - + # Only do logic if we actually have a opening bracket for this - if( isset($startBracketOffset) ) { + if( isset( $startBracketOffset ) ) { # Extract text inside the link - @list( $titleText, $paramText ) = explode('|', - substr($s, $startBracketOffset+2, $endBracketOffset-$startBracketOffset-4), 2); + @list( $titleText, $paramText ) = explode( '|', + substr( $s, $startBracketOffset + 2, $endBracketOffset - $startBracketOffset - 4 ), 2 ); # Create markers only for valid links if( preg_match( $titleRegex, $titleText ) ) { # Store the text for the marker - $marker = $markers->addMarker($titleText, $paramText); + $marker = $markers->addMarker( $titleText, $paramText ); # Replace the current link with the marker - $s = substr($s,0,$startBracketOffset). - $marker. - substr($s, $endBracketOffset); + $s = substr( $s, 0, $startBracketOffset ) . + $marker . + substr( $s, $endBracketOffset ); # We have modified $s, because of this we need to set the # offset manually since the end position is different now - $offset = $startBracketOffset+strlen($marker); + $offset = $startBracketOffset+strlen( $marker ); continue; } # ToDo: Some LinkHooks may allow recursive links inside of @@ -203,55 +206,55 @@ class Parser_LinkHooks extends Parser { # ToDO: Some LinkHooks use patterns rather than namespaces # these need to be tested at this point here } - } # Bump our offset to after our current bracket $offset = $bracketOffset+2; } - - + # Now expand our tree - wfProfileIn( __METHOD__.'-expand' ); + wfProfileIn( __METHOD__ . '-expand' ); $s = $markers->expand( $s ); - wfProfileOut( __METHOD__.'-expand' ); - + wfProfileOut( __METHOD__ . '-expand' ); + wfProfileOut( __METHOD__ ); return $holders; } - + function replaceInternalLinksCallback( $parser, $holders, $markers, $titleText, $paramText ) { wfProfileIn( __METHOD__ ); - $wt = isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]"; - wfProfileIn( __METHOD__."-misc" ); + $wt = isset( $paramText ) ? "[[$titleText|$paramText]]" : "[[$titleText]]"; + wfProfileIn( __METHOD__ . "-misc" ); + # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. - if( preg_match('/^\b(?i:' . wfUrlProtocols() . ')/', $titleText) ) { + if( preg_match( '/^\b(?i:' . wfUrlProtocols() . ')/', $titleText ) ) { + wfProfileOut( __METHOD__ . "-misc" ); wfProfileOut( __METHOD__ ); return $wt; } - + # Make subpage if necessary if( $this->areSubpagesAllowed() ) { $titleText = $this->maybeDoSubpageLink( $titleText, $paramText ); } - + # Check for a leading colon and strip it if it is there $leadingColon = $titleText[0] == ':'; if( $leadingColon ) $titleText = substr( $titleText, 1 ); - - wfProfileOut( __METHOD__."-misc" ); + + wfProfileOut( __METHOD__ . "-misc" ); # Make title object - wfProfileIn( __METHOD__."-title" ); + wfProfileIn( __METHOD__ . "-title" ); $title = Title::newFromText( $this->mStripState->unstripNoWiki( $titleText ) ); if( !$title ) { - wfProfileOut( __METHOD__."-title" ); + wfProfileOut( __METHOD__ . "-title" ); wfProfileOut( __METHOD__ ); return $wt; } $ns = $title->getNamespace(); - wfProfileOut( __METHOD__."-title" ); - + wfProfileOut( __METHOD__ . "-title" ); + # Default for Namespaces is a default link # ToDo: Default for patterns is plain wikitext $return = true; @@ -270,25 +273,25 @@ class Parser_LinkHooks extends Parser { } if( $return === true ) { # True (treat as plain link) was returned, call the defaultLinkHook - $return = CoreLinkFunctions::defaultLinkHook( $parser, $holders, $markers, $title, + $return = CoreLinkFunctions::defaultLinkHook( $parser, $holders, $markers, $title, $titleText, $paramText, $leadingColon ); } if( $return === false ) { # False (no link) was returned, output plain wikitext # Build it again as the hook is allowed to modify $paramText - $return = isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]"; + $return = isset( $paramText ) ? "[[$titleText|$paramText]]" : "[[$titleText]]"; } # Content was returned, return it wfProfileOut( __METHOD__ ); return $return; } - + } class LinkMarkerReplacer { - + protected $markers, $nextId, $parser, $holders, $callback; - + function __construct( $parser, $holders, $callback ) { $this->nextId = 0; $this->markers = array(); @@ -296,29 +299,28 @@ class LinkMarkerReplacer { $this->holders = $holders; $this->callback = $callback; } - - function addMarker($titleText, $paramText) { + + function addMarker( $titleText, $paramText ) { $id = $this->nextId++; $this->markers[$id] = array( $titleText, $paramText ); return "<!-- LINKMARKER $id -->"; } - + function findMarker( $string ) { - return (bool) preg_match('/<!-- LINKMARKER [0-9]+ -->/', $string ); + return (bool) preg_match( '/<!-- LINKMARKER [0-9]+ -->/', $string ); } - + function expand( $string ) { return StringUtils::delimiterReplaceCallback( "<!-- LINKMARKER ", " -->", array( &$this, 'callback' ), $string ); } - + function callback( $m ) { - $id = intval($m[1]); - if( !array_key_exists($id, $this->markers) ) return $m[0]; + $id = intval( $m[1] ); + if( !array_key_exists( $id, $this->markers ) ) return $m[0]; $args = $this->markers[$id]; array_unshift( $args, $this ); array_unshift( $args, $this->holders ); array_unshift( $args, $this->parser ); return call_user_func_array( $this->callback, $args ); } - } diff --git a/includes/parser/Preprocessor.php b/includes/parser/Preprocessor.php index bd13f9ae..aeacd2e1 100644 --- a/includes/parser/Preprocessor.php +++ b/includes/parser/Preprocessor.php @@ -84,9 +84,9 @@ interface PPFrame { /** * Create a child frame * - * @param $args array - * @param $title Title - * @param $indexOffset A number subtracted from the index attributes of the arguments + * @param array $args + * @param Title $title + * @param int $indexOffset A number subtracted from the index attributes of the arguments * * @return PPFrame */ @@ -205,7 +205,6 @@ interface PPNode { */ function getChildrenOfType( $type ); - /** * Returns the length of the array, or false if this is not an array-type node */ diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index 34de0ba5..d0c57ab5 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -110,7 +110,7 @@ class Preprocessor_DOM implements Preprocessor { * Preprocess some wikitext and return the document tree. * This is the ghost of Parser::replace_variables(). * - * @param $text String: the text to parse + * @param string $text the text to parse * @param $flags Integer: bitwise combination of: * Parser::PTD_FOR_INCLUSION Handle "<noinclude>" and "<includeonly>" as if the text is being * included. Default is to assume a direct page view. @@ -126,6 +126,7 @@ class Preprocessor_DOM implements Preprocessor { * cache may be implemented at a later date which takes further advantage of these strict * dependency requirements. * + * @throws MWException * @return PPNode_DOM */ function preprocessToObj( $text, $flags = 0 ) { @@ -136,9 +137,9 @@ class Preprocessor_DOM implements Preprocessor { $cacheable = ( $wgPreprocessorCacheThreshold !== false && strlen( $text ) > $wgPreprocessorCacheThreshold ); if ( $cacheable ) { - wfProfileIn( __METHOD__.'-cacheable' ); + wfProfileIn( __METHOD__ . '-cacheable' ); - $cacheKey = wfMemcKey( 'preprocess-xml', md5($text), $flags ); + $cacheKey = wfMemcKey( 'preprocess-xml', md5( $text ), $flags ); $cacheValue = $wgMemc->get( $cacheKey ); if ( $cacheValue ) { $version = substr( $cacheValue, 0, 8 ); @@ -151,11 +152,11 @@ class Preprocessor_DOM implements Preprocessor { } if ( $xml === false ) { if ( $cacheable ) { - wfProfileIn( __METHOD__.'-cache-miss' ); + wfProfileIn( __METHOD__ . '-cache-miss' ); $xml = $this->preprocessToXml( $text, $flags ); $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml; $wgMemc->set( $cacheKey, $cacheValue, 86400 ); - wfProfileOut( __METHOD__.'-cache-miss' ); + wfProfileOut( __METHOD__ . '-cache-miss' ); wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" ); } else { $xml = $this->preprocessToXml( $text, $flags ); @@ -164,14 +165,14 @@ class Preprocessor_DOM implements Preprocessor { } // Fail if the number of elements exceeds acceptable limits - // Do not attempt to generate the DOM + // Do not attempt to generate the DOM $this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' ); $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount(); if ( $this->parser->mGeneratedPPNodeCount > $max ) { - throw new MWException( __METHOD__.': generated node count limit exceeded' ); + throw new MWException( __METHOD__ . ': generated node count limit exceeded' ); } - wfProfileIn( __METHOD__.'-loadXML' ); + wfProfileIn( __METHOD__ . '-loadXML' ); $dom = new DOMDocument; wfSuppressWarnings(); $result = $dom->loadXML( $xml ); @@ -182,13 +183,13 @@ class Preprocessor_DOM implements Preprocessor { // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep $result = $dom->loadXML( $xml, 1 << 19 ); if ( !$result ) { - throw new MWException( __METHOD__.' generated invalid XML' ); + throw new MWException( __METHOD__ . ' generated invalid XML' ); } } $obj = new PPNode_DOM( $dom->documentElement ); - wfProfileOut( __METHOD__.'-loadXML' ); + wfProfileOut( __METHOD__ . '-loadXML' ); if ( $cacheable ) { - wfProfileOut( __METHOD__.'-cacheable' ); + wfProfileOut( __METHOD__ . '-cacheable' ); } wfProfileOut( __METHOD__ ); return $obj; @@ -396,7 +397,7 @@ class Preprocessor_DOM implements Preprocessor { if ( $stack->top ) { $part = $stack->top->getCurrentPart(); - if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) { + if ( !(isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) { $part->visualEnd = $wsStart; } // Else comments abutting, no change in visual end @@ -521,7 +522,7 @@ class Preprocessor_DOM implements Preprocessor { if ( $equalsLength > 0 ) { if ( $searchStart - $equalsLength == $piece->startPos ) { // This is just a single string of equals signs on its own line - // Replicate the doHeadings behaviour /={count}(.+)={count}/ + // Replicate the doHeadings behavior /={count}(.+)={count}/ // First find out how many equals signs there really are (don't stop at 6) $count = $equalsLength; if ( $count < 3 ) { @@ -657,19 +658,13 @@ class Preprocessor_DOM implements Preprocessor { $piece->parts = array( new PPDPart ); $piece->count -= $matchingCount; # do we still qualify for any callback with remaining count? - $names = $rules[$piece->open]['names']; - $skippedBraces = 0; - $enclosingAccum =& $accum; - while ( $piece->count ) { - if ( array_key_exists( $piece->count, $names ) ) { - $stack->push( $piece ); - $accum =& $stack->getAccum(); - break; - } - --$piece->count; - $skippedBraces ++; + $min = $rules[$piece->open]['min']; + if ( $piece->count >= $min ) { + $stack->push( $piece ); + $accum =& $stack->getAccum(); + } else { + $accum .= str_repeat( $piece->open, $piece->count ); } - $enclosingAccum .= str_repeat( $piece->open, $skippedBraces ); } $flags = $stack->getFlags(); extract( $flags ); @@ -757,7 +752,7 @@ class PPDStack { function pop() { if ( !count( $this->stack ) ) { - throw new MWException( __METHOD__.': no elements remaining' ); + throw new MWException( __METHOD__ . ': no elements remaining' ); } $temp = array_pop( $this->stack ); @@ -796,8 +791,8 @@ class PPDStack { * @ingroup Parser */ class PPDStackElement { - var $open, // Opening character (\n for heading) - $close, // Matching closing character + var $open, // Opening character (\n for heading) + $close, // Matching closing character $count, // Number of opening characters found (number of "=" for heading) $parts, // Array of PPDPart objects describing pipe-separated parts. $lineStart; // True if the open char appeared at the start of the input line. Not set for headings. @@ -814,7 +809,7 @@ class PPDStackElement { } function &getAccum() { - return $this->parts[count($this->parts) - 1]->out; + return $this->parts[count( $this->parts ) - 1]->out; } function addPart( $s = '' ) { @@ -823,7 +818,7 @@ class PPDStackElement { } function getCurrentPart() { - return $this->parts[count($this->parts) - 1]; + return $this->parts[count( $this->parts ) - 1]; } /** @@ -916,7 +911,6 @@ class PPFrame_DOM implements PPFrame { */ var $depth; - /** * Construct a new preprocessor frame. * @param $preprocessor Preprocessor The parent preprocessor @@ -1117,7 +1111,7 @@ class PPFrame_DOM implements PPFrame { } # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result # Not in RECOVER_COMMENTS mode (extractSections) though - elseif ( $this->parser->ot['wiki'] && ! ( $flags & PPFrame::RECOVER_COMMENTS ) ) { + elseif ( $this->parser->ot['wiki'] && !( $flags & PPFrame::RECOVER_COMMENTS ) ) { $out .= $this->parser->insertStripItem( $contextNode->textContent ); } # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove @@ -1174,7 +1168,7 @@ class PPFrame_DOM implements PPFrame { } } else { wfProfileOut( __METHOD__ ); - throw new MWException( __METHOD__.': Invalid parameter type' ); + throw new MWException( __METHOD__ . ': Invalid parameter type' ); } if ( $newIterator !== false ) { @@ -1458,25 +1452,25 @@ class PPTemplateFrame_DOM extends PPFrame_DOM { function getArguments() { $arguments = array(); foreach ( array_merge( - array_keys($this->numberedArgs), - array_keys($this->namedArgs)) as $key ) { - $arguments[$key] = $this->getArgument($key); + array_keys( $this->numberedArgs ), + array_keys( $this->namedArgs ) ) as $key ) { + $arguments[$key] = $this->getArgument( $key ); } return $arguments; } function getNumberedArguments() { $arguments = array(); - foreach ( array_keys($this->numberedArgs) as $key ) { - $arguments[$key] = $this->getArgument($key); + foreach ( array_keys( $this->numberedArgs ) as $key ) { + $arguments[$key] = $this->getArgument( $key ); } return $arguments; } function getNamedArguments() { $arguments = array(); - foreach ( array_keys($this->namedArgs) as $key ) { - $arguments[$key] = $this->getArgument($key); + foreach ( array_keys( $this->namedArgs ) as $key ) { + $arguments[$key] = $this->getArgument( $key ); } return $arguments; } @@ -1673,6 +1667,7 @@ class PPNode_DOM implements PPNode { * - index String index * - value PPNode value * + * @throws MWException * @return array */ function splitArg() { @@ -1694,6 +1689,7 @@ class PPNode_DOM implements PPNode { * Split an "<ext>" node into an associative array containing name, attr, inner and close * All values in the resulting array are PPNodes. Inner and close are optional. * + * @throws MWException * @return array */ function splitExt() { @@ -1719,6 +1715,7 @@ class PPNode_DOM implements PPNode { /** * Split a "<h>" node + * @throws MWException * @return array */ function splitHeading() { diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php index 4f04c865..fad1adbb 100644 --- a/includes/parser/Preprocessor_Hash.php +++ b/includes/parser/Preprocessor_Hash.php @@ -89,7 +89,7 @@ class Preprocessor_Hash implements Preprocessor { * Preprocess some wikitext and return the document tree. * This is the ghost of Parser::replace_variables(). * - * @param $text String: the text to parse + * @param string $text the text to parse * @param $flags Integer: bitwise combination of: * Parser::PTD_FOR_INCLUSION Handle "<noinclude>" and "<includeonly>" as if the text is being * included. Default is to assume a direct page view. @@ -105,6 +105,7 @@ class Preprocessor_Hash implements Preprocessor { * cache may be implemented at a later date which takes further advantage of these strict * dependency requirements. * + * @throws MWException * @return PPNode_Hash_Tree */ function preprocessToObj( $text, $flags = 0 ) { @@ -115,9 +116,9 @@ class Preprocessor_Hash implements Preprocessor { $cacheable = $wgPreprocessorCacheThreshold !== false && strlen( $text ) > $wgPreprocessorCacheThreshold; if ( $cacheable ) { - wfProfileIn( __METHOD__.'-cacheable' ); + wfProfileIn( __METHOD__ . '-cacheable' ); - $cacheKey = wfMemcKey( 'preprocess-hash', md5($text), $flags ); + $cacheKey = wfMemcKey( 'preprocess-hash', md5( $text ), $flags ); $cacheValue = $wgMemc->get( $cacheKey ); if ( $cacheValue ) { $version = substr( $cacheValue, 0, 8 ); @@ -126,12 +127,12 @@ class Preprocessor_Hash implements Preprocessor { // From the cache wfDebugLog( "Preprocessor", "Loaded preprocessor hash from memcached (key $cacheKey)" ); - wfProfileOut( __METHOD__.'-cacheable' ); + wfProfileOut( __METHOD__ . '-cacheable' ); wfProfileOut( __METHOD__ ); return $hash; } } - wfProfileIn( __METHOD__.'-cache-miss' ); + wfProfileIn( __METHOD__ . '-cache-miss' ); } $rules = array( @@ -331,7 +332,7 @@ class Preprocessor_Hash implements Preprocessor { if ( $stack->top ) { $part = $stack->top->getCurrentPart(); - if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) { + if ( !(isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) { $part->visualEnd = $wsStart; } // Else comments abutting, no change in visual end @@ -390,7 +391,7 @@ class Preprocessor_Hash implements Preprocessor { } // <includeonly> and <noinclude> just become <ignore> tags if ( in_array( $lowerName, $ignoredElements ) ) { - $accum->addNodeWithText( 'ignore', substr( $text, $tagStartPos, $i - $tagStartPos ) ); + $accum->addNodeWithText( 'ignore', substr( $text, $tagStartPos, $i - $tagStartPos ) ); continue; } @@ -461,7 +462,7 @@ class Preprocessor_Hash implements Preprocessor { if ( $equalsLength > 0 ) { if ( $searchStart - $equalsLength == $piece->startPos ) { // This is just a single string of equals signs on its own line - // Replicate the doHeadings behaviour /={count}(.+)={count}/ + // Replicate the doHeadings behavior /={count}(.+)={count}/ // First find out how many equals signs there really are (don't stop at 6) $count = $equalsLength; if ( $count < 3 ) { @@ -548,7 +549,7 @@ class Preprocessor_Hash implements Preprocessor { } } - if ($matchingCount <= 0) { + if ( $matchingCount <= 0 ) { # No matching element found in callback array # Output a literal closing brace and continue $accum->addLiteral( str_repeat( $curChar, $count ) ); @@ -590,10 +591,10 @@ class Preprocessor_Hash implements Preprocessor { $lastNode = $node; } if ( !$node ) { - throw new MWException( __METHOD__. ': eqpos not found' ); + throw new MWException( __METHOD__ . ': eqpos not found' ); } if ( $node->name !== 'equals' ) { - throw new MWException( __METHOD__ .': eqpos is not equals' ); + throw new MWException( __METHOD__ . ': eqpos is not equals' ); } $equalsNode = $node; @@ -638,23 +639,17 @@ class Preprocessor_Hash implements Preprocessor { $accum =& $stack->getAccum(); # Re-add the old stack element if it still has unmatched opening characters remaining - if ($matchingCount < $piece->count) { + if ( $matchingCount < $piece->count ) { $piece->parts = array( new PPDPart_Hash ); $piece->count -= $matchingCount; # do we still qualify for any callback with remaining count? - $names = $rules[$piece->open]['names']; - $skippedBraces = 0; - $enclosingAccum =& $accum; - while ( $piece->count ) { - if ( array_key_exists( $piece->count, $names ) ) { - $stack->push( $piece ); - $accum =& $stack->getAccum(); - break; - } - --$piece->count; - $skippedBraces ++; + $min = $rules[$piece->open]['min']; + if ( $piece->count >= $min ) { + $stack->push( $piece ); + $accum =& $stack->getAccum(); + } else { + $accum->addLiteral( str_repeat( $piece->open, $piece->count ) ); } - $enclosingAccum->addLiteral( str_repeat( $piece->open, $skippedBraces ) ); } extract( $stack->getFlags() ); @@ -695,11 +690,11 @@ class Preprocessor_Hash implements Preprocessor { $rootNode->lastChild = $stack->rootAccum->lastNode; // Cache - if ($cacheable) { + if ( $cacheable ) { $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode ); $wgMemc->set( $cacheKey, $cacheValue, 86400 ); - wfProfileOut( __METHOD__.'-cache-miss' ); - wfProfileOut( __METHOD__.'-cacheable' ); + wfProfileOut( __METHOD__ . '-cache-miss' ); + wfProfileOut( __METHOD__ . '-cacheable' ); wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" ); } @@ -866,7 +861,6 @@ class PPFrame_Hash implements PPFrame { */ var $depth; - /** * Construct a new preprocessor frame. * @param $preprocessor Preprocessor: the parent preprocessor @@ -884,9 +878,11 @@ class PPFrame_Hash implements PPFrame { * Create a new child frame * $args is optionally a multi-root PPNode or array containing the template arguments * - * @param $args PPNode_Hash_Array|array + * @param array|bool|\PPNode_Hash_Array $args PPNode_Hash_Array|array * @param $title Title|bool * + * @param int $indexOffset + * @throws MWException * @return PPTemplateFrame_Hash */ function newChild( $args = false, $title = false, $indexOffset = 0 ) { @@ -1035,7 +1031,7 @@ class PPFrame_Hash implements PPFrame { } # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result # Not in RECOVER_COMMENTS mode (extractSections) though - elseif ( $this->parser->ot['wiki'] && ! ( $flags & PPFrame::RECOVER_COMMENTS ) ) { + elseif ( $this->parser->ot['wiki'] && !( $flags & PPFrame::RECOVER_COMMENTS ) ) { $out .= $this->parser->insertStripItem( $contextNode->firstChild->value ); } # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove @@ -1082,7 +1078,7 @@ class PPFrame_Hash implements PPFrame { $newIterator = $contextNode->getChildren(); } } else { - throw new MWException( __METHOD__.': Invalid parameter type' ); + throw new MWException( __METHOD__ . ': Invalid parameter type' ); } if ( $newIterator !== false ) { @@ -1371,9 +1367,9 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { function getArguments() { $arguments = array(); foreach ( array_merge( - array_keys($this->numberedArgs), - array_keys($this->namedArgs)) as $key ) { - $arguments[$key] = $this->getArgument($key); + array_keys( $this->numberedArgs ), + array_keys( $this->namedArgs ) ) as $key ) { + $arguments[$key] = $this->getArgument( $key ); } return $arguments; } @@ -1383,8 +1379,8 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { */ function getNumberedArguments() { $arguments = array(); - foreach ( array_keys($this->numberedArgs) as $key ) { - $arguments[$key] = $this->getArgument($key); + foreach ( array_keys( $this->numberedArgs ) as $key ) { + $arguments[$key] = $this->getArgument( $key ); } return $arguments; } @@ -1394,8 +1390,8 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { */ function getNamedArguments() { $arguments = array(); - foreach ( array_keys($this->namedArgs) as $key ) { - $arguments[$key] = $this->getArgument($key); + foreach ( array_keys( $this->namedArgs ) as $key ) { + $arguments[$key] = $this->getArgument( $key ); } return $arguments; } @@ -1609,6 +1605,7 @@ class PPNode_Hash_Tree implements PPNode { * - index String index * - value PPNode value * + * @throws MWException * @return array */ function splitArg() { @@ -1642,6 +1639,7 @@ class PPNode_Hash_Tree implements PPNode { * Split an "<ext>" node into an associative array containing name, attr, inner and close * All values in the resulting array are PPNodes. Inner and close are optional. * + * @throws MWException * @return array */ function splitExt() { @@ -1669,6 +1667,7 @@ class PPNode_Hash_Tree implements PPNode { /** * Split an "<h>" node * + * @throws MWException * @return array */ function splitHeading() { @@ -1695,6 +1694,7 @@ class PPNode_Hash_Tree implements PPNode { /** * Split a "<template>" or "<tplarg>" node * + * @throws MWException * @return array */ function splitTemplate() { diff --git a/includes/parser/Preprocessor_HipHop.hphp b/includes/parser/Preprocessor_HipHop.hphp deleted file mode 100644 index 8b71a1b5..00000000 --- a/includes/parser/Preprocessor_HipHop.hphp +++ /dev/null @@ -1,2013 +0,0 @@ -<?php -/** - * A preprocessor optimised for HipHop, using HipHop-specific syntax. - * vim: ft=php - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup Parser - */ - -/** - * @ingroup Parser - */ -class Preprocessor_HipHop implements Preprocessor { - /** - * @var Parser - */ - var $parser; - - const CACHE_VERSION = 1; - - /** - * @param $parser Parser - */ - function __construct( $parser ) { - $this->parser = $parser; - } - - /** - * @return PPFrame_HipHop - */ - function newFrame() { - return new PPFrame_HipHop( $this ); - } - - /** - * @param $args array - * @return PPCustomFrame_HipHop - */ - function newCustomFrame( $args ) { - return new PPCustomFrame_HipHop( $this, $args ); - } - - /** - * @param $values array - * @return PPNode_HipHop_Array - */ - function newPartNodeArray( $values ) { - $list = array(); - - foreach ( $values as $k => $val ) { - $partNode = new PPNode_HipHop_Tree( 'part' ); - $nameNode = new PPNode_HipHop_Tree( 'name' ); - - if ( is_int( $k ) ) { - $nameNode->addChild( new PPNode_HipHop_Attr( 'index', $k ) ); - $partNode->addChild( $nameNode ); - } else { - $nameNode->addChild( new PPNode_HipHop_Text( $k ) ); - $partNode->addChild( $nameNode ); - $partNode->addChild( new PPNode_HipHop_Text( '=' ) ); - } - - $valueNode = new PPNode_HipHop_Tree( 'value' ); - $valueNode->addChild( new PPNode_HipHop_Text( $val ) ); - $partNode->addChild( $valueNode ); - - $list[] = $partNode; - } - - $node = new PPNode_HipHop_Array( $list ); - return $node; - } - - /** - * Preprocess some wikitext and return the document tree. - * This is the ghost of Parser::replace_variables(). - * - * @param $text String: the text to parse - * @param $flags Integer: bitwise combination of: - * Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being - * included. Default is to assume a direct page view. - * - * The generated DOM tree must depend only on the input text and the flags. - * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899. - * - * Any flag added to the $flags parameter here, or any other parameter liable to cause a - * change in the DOM tree for a given text, must be passed through the section identifier - * in the section edit link and thus back to extractSections(). - * - * The output of this function is currently only cached in process memory, but a persistent - * cache may be implemented at a later date which takes further advantage of these strict - * dependency requirements. - * - * @throws MWException - * @return PPNode_HipHop_Tree - */ - function preprocessToObj( $text, $flags = 0 ) { - wfProfileIn( __METHOD__ ); - - // Check cache. - global $wgMemc, $wgPreprocessorCacheThreshold; - - $lengthText = strlen( $text ); - - $cacheable = ($wgPreprocessorCacheThreshold !== false && $lengthText > $wgPreprocessorCacheThreshold); - if ( $cacheable ) { - wfProfileIn( __METHOD__.'-cacheable' ); - - $cacheKey = strval( wfMemcKey( 'preprocess-hash', md5($text), $flags ) ); - $cacheValue = strval( $wgMemc->get( $cacheKey ) ); - if ( $cacheValue !== '' ) { - $version = substr( $cacheValue, 0, 8 ); - if ( intval( $version ) == self::CACHE_VERSION ) { - $hash = unserialize( substr( $cacheValue, 8 ) ); - // From the cache - wfDebugLog( "Preprocessor", - "Loaded preprocessor hash from memcached (key $cacheKey)" ); - wfProfileOut( __METHOD__.'-cacheable' ); - wfProfileOut( __METHOD__ ); - return $hash; - } - } - wfProfileIn( __METHOD__.'-cache-miss' ); - } - - $rules = array( - '{' => array( - 'end' => '}', - 'names' => array( - 2 => 'template', - 3 => 'tplarg', - ), - 'min' => 2, - 'max' => 3, - ), - '[' => array( - 'end' => ']', - 'names' => array( 2 => 'LITERAL' ), - 'min' => 2, - 'max' => 2, - ) - ); - - $forInclusion = (bool)( $flags & Parser::PTD_FOR_INCLUSION ); - - $xmlishElements = (array)$this->parser->getStripList(); - $enableOnlyinclude = false; - if ( $forInclusion ) { - $ignoredTags = array( 'includeonly', '/includeonly' ); - $ignoredElements = array( 'noinclude' ); - $xmlishElements[] = 'noinclude'; - if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) { - $enableOnlyinclude = true; - } - } else if ( $this->parser->ot['wiki'] ) { - $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude', 'includeonly', '/includeonly' ); - $ignoredElements = array(); - } else { - $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ); - $ignoredElements = array( 'includeonly' ); - $xmlishElements[] = 'includeonly'; - } - $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) ); - - // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset - $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; - - $stack = new PPDStack_HipHop; - - $searchBase = "[{<\n"; - $revText = strrev( $text ); // For fast reverse searches - - $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start - $accum = $stack->getAccum(); # Current accumulator - $headingIndex = 1; - $stackFlags = array( - 'findPipe' => false, # True to take notice of pipe characters - 'findEquals' => false, # True to find equals signs in arguments - 'inHeading' => false, # True if $i is inside a possible heading - ); - $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i - $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude> - $fakeLineStart = true; # Do a line-start run without outputting an LF character - - while ( true ) { - //$this->memCheck(); - - if ( $findOnlyinclude ) { - // Ignore all input up to the next <onlyinclude> - $variantStartPos = strpos( $text, '<onlyinclude>', $i ); - if ( $variantStartPos === false ) { - // Ignored section runs to the end - $accum->addNodeWithText( 'ignore', strval( substr( $text, $i ) ) ); - break; - } - $startPos1 = intval( $variantStartPos ); - $tagEndPos = $startPos1 + strlen( '<onlyinclude>' ); // past-the-end - $accum->addNodeWithText( 'ignore', strval( substr( $text, $i, $tagEndPos - $i ) ) ); - $i = $tagEndPos; - $findOnlyinclude = false; - } - - if ( $fakeLineStart ) { - $found = 'line-start'; - $curChar = ''; - } else { - # Find next opening brace, closing brace or pipe - $search = $searchBase; - if ( $stack->top === false ) { - $currentClosing = ''; - } else { - $currentClosing = strval( $stack->getTop()->close ); - $search .= $currentClosing; - } - if ( $stackFlags['findPipe'] ) { - $search .= '|'; - } - if ( $stackFlags['findEquals'] ) { - // First equals will be for the template - $search .= '='; - } - $rule = null; - # Output literal section, advance input counter - $literalLength = intval( strcspn( $text, $search, $i ) ); - if ( $literalLength > 0 ) { - $accum->addLiteral( strval( substr( $text, $i, $literalLength ) ) ); - $i += $literalLength; - } - if ( $i >= $lengthText ) { - if ( $currentClosing === "\n" ) { - // Do a past-the-end run to finish off the heading - $curChar = ''; - $found = 'line-end'; - } else { - # All done - break; - } - } else { - $curChar = $text[$i]; - if ( $curChar === '|' ) { - $found = 'pipe'; - } elseif ( $curChar === '=' ) { - $found = 'equals'; - } elseif ( $curChar === '<' ) { - $found = 'angle'; - } elseif ( $curChar === "\n" ) { - if ( $stackFlags['inHeading'] ) { - $found = 'line-end'; - } else { - $found = 'line-start'; - } - } elseif ( $curChar === $currentClosing ) { - $found = 'close'; - } elseif ( isset( $rules[$curChar] ) ) { - $found = 'open'; - $rule = $rules[$curChar]; - } else { - # Some versions of PHP have a strcspn which stops on null characters - # Ignore and continue - ++$i; - continue; - } - } - } - - if ( $found === 'angle' ) { - $matches = false; - // Handle </onlyinclude> - if ( $enableOnlyinclude - && substr( $text, $i, strlen( '</onlyinclude>' ) ) === '</onlyinclude>' ) - { - $findOnlyinclude = true; - continue; - } - - // Determine element name - if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) { - // Element name missing or not listed - $accum->addLiteral( '<' ); - ++$i; - continue; - } - // Handle comments - if ( isset( $matches[2] ) && $matches[2] === '!--' ) { - // To avoid leaving blank lines, when a comment is both preceded - // and followed by a newline (ignoring spaces), trim leading and - // trailing spaces and one of the newlines. - - // Find the end - $variantEndPos = strpos( $text, '-->', $i + 4 ); - if ( $variantEndPos === false ) { - // Unclosed comment in input, runs to end - $inner = strval( substr( $text, $i ) ); - $accum->addNodeWithText( 'comment', $inner ); - $i = $lengthText; - } else { - $endPos = intval( $variantEndPos ); - // Search backwards for leading whitespace - if ( $i ) { - $wsStart = $i - intval( strspn( $revText, ' ', $lengthText - $i ) ); - } else { - $wsStart = 0; - } - // Search forwards for trailing whitespace - // $wsEnd will be the position of the last space (or the '>' if there's none) - $wsEnd = $endPos + 2 + intval( strspn( $text, ' ', $endPos + 3 ) ); - // Eat the line if possible - // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at - // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but - // it's a possible beneficial b/c break. - if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) === "\n" - && substr( $text, $wsEnd + 1, 1 ) === "\n" ) - { - $startPos2 = $wsStart; - $endPos = $wsEnd + 1; - // Remove leading whitespace from the end of the accumulator - // Sanity check first though - $wsLength = $i - $wsStart; - if ( $wsLength > 0 - && $accum->lastNode instanceof PPNode_HipHop_Text - && substr( $accum->lastNode->value, -$wsLength ) === str_repeat( ' ', $wsLength ) ) - { - $accum->lastNode->value = strval( substr( $accum->lastNode->value, 0, -$wsLength ) ); - } - // Do a line-start run next time to look for headings after the comment - $fakeLineStart = true; - } else { - // No line to eat, just take the comment itself - $startPos2 = $i; - $endPos += 2; - } - - if ( $stack->top ) { - $part = $stack->getTop()->getCurrentPart(); - if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) { - $part->visualEnd = $wsStart; - } - // Else comments abutting, no change in visual end - $part->commentEnd = $endPos; - } - $i = $endPos + 1; - $inner = strval( substr( $text, $startPos2, $endPos - $startPos2 + 1 ) ); - $accum->addNodeWithText( 'comment', $inner ); - } - continue; - } - $name = strval( $matches[1] ); - $lowerName = strtolower( $name ); - $attrStart = $i + strlen( $name ) + 1; - - // Find end of tag - $variantTagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart ); - if ( $variantTagEndPos === false ) { - // Infinite backtrack - // Disable tag search to prevent worst-case O(N^2) performance - $noMoreGT = true; - $accum->addLiteral( '<' ); - ++$i; - continue; - } - $tagEndPos = intval( $variantTagEndPos ); - - // Handle ignored tags - if ( in_array( $lowerName, $ignoredTags ) ) { - $accum->addNodeWithText( 'ignore', strval( substr( $text, $i, $tagEndPos - $i + 1 ) ) ); - $i = $tagEndPos + 1; - continue; - } - - $tagStartPos = $i; - $close = ''; - if ( $text[$tagEndPos-1] === '/' ) { - // Short end tag - $attrEnd = $tagEndPos - 1; - $shortEnd = true; - $inner = ''; - $i = $tagEndPos + 1; - $haveClose = false; - } else { - $attrEnd = $tagEndPos; - $shortEnd = false; - // Find closing tag - if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", - $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) - { - $inner = strval( substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ) ); - $i = intval( $matches[0][1] ) + strlen( $matches[0][0] ); - $close = strval( $matches[0][0] ); - $haveClose = true; - } else { - // No end tag -- let it run out to the end of the text. - $inner = strval( substr( $text, $tagEndPos + 1 ) ); - $i = $lengthText; - $haveClose = false; - } - } - // <includeonly> and <noinclude> just become <ignore> tags - if ( in_array( $lowerName, $ignoredElements ) ) { - $accum->addNodeWithText( 'ignore', strval( substr( $text, $tagStartPos, $i - $tagStartPos ) ) ); - continue; - } - - if ( $attrEnd <= $attrStart ) { - $attr = ''; - } else { - // Note that the attr element contains the whitespace between name and attribute, - // this is necessary for precise reconstruction during pre-save transform. - $attr = strval( substr( $text, $attrStart, $attrEnd - $attrStart ) ); - } - - $extNode = new PPNode_HipHop_Tree( 'ext' ); - $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'name', $name ) ); - $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'attr', $attr ) ); - if ( !$shortEnd ) { - $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'inner', $inner ) ); - } - if ( $haveClose ) { - $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'close', $close ) ); - } - $accum->addNode( $extNode ); - } - - elseif ( $found === 'line-start' ) { - // Is this the start of a heading? - // Line break belongs before the heading element in any case - if ( $fakeLineStart ) { - $fakeLineStart = false; - } else { - $accum->addLiteral( $curChar ); - $i++; - } - - $count = intval( strspn( $text, '=', $i, 6 ) ); - if ( $count == 1 && $stackFlags['findEquals'] ) { - // DWIM: This looks kind of like a name/value separator - // Let's let the equals handler have it and break the potential heading - // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex. - } elseif ( $count > 0 ) { - $partData = array( - 'open' => "\n", - 'close' => "\n", - 'parts' => array( new PPDPart_HipHop( str_repeat( '=', $count ) ) ), - 'startPos' => $i, - 'count' => $count ); - $stack->push( $partData ); - $accum = $stack->getAccum(); - $stackFlags = $stack->getFlags(); - $i += $count; - } - } elseif ( $found === 'line-end' ) { - $piece = $stack->getTop(); - // A heading must be open, otherwise \n wouldn't have been in the search list - assert( $piece->open === "\n" ); // Passing the assert condition directly instead of string, as - // HPHP /compiler/ chokes on strings when ASSERT_ACTIVE != 0. - $part = $piece->getCurrentPart(); - // Search back through the input to see if it has a proper close - // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient - $wsLength = intval( strspn( $revText, " \t", $lengthText - $i ) ); - $searchStart = $i - $wsLength; - if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) { - // Comment found at line end - // Search for equals signs before the comment - $searchStart = intval( $part->visualEnd ); - $searchStart -= intval( strspn( $revText, " \t", $lengthText - $searchStart ) ); - } - $count = intval( $piece->count ); - $equalsLength = intval( strspn( $revText, '=', $lengthText - $searchStart ) ); - $isTreeNode = false; - $resultAccum = $accum; - if ( $equalsLength > 0 ) { - if ( $searchStart - $equalsLength == $piece->startPos ) { - // This is just a single string of equals signs on its own line - // Replicate the doHeadings behaviour /={count}(.+)={count}/ - // First find out how many equals signs there really are (don't stop at 6) - $count = $equalsLength; - if ( $count < 3 ) { - $count = 0; - } else { - $count = intval( ( $count - 1 ) / 2 ); - if ( $count > 6 ) { - $count = 6; - } - } - } else { - if ( $count > $equalsLength ) { - $count = $equalsLength; - } - } - if ( $count > 0 ) { - // Normal match, output <h> - $tree = new PPNode_HipHop_Tree( 'possible-h' ); - $tree->addChild( new PPNode_HipHop_Attr( 'level', $count ) ); - $tree->addChild( new PPNode_HipHop_Attr( 'i', $headingIndex++ ) ); - $tree->lastChild->nextSibling = $accum->firstNode; - $tree->lastChild = $accum->lastNode; - $isTreeNode = true; - } else { - // Single equals sign on its own line, count=0 - // Output $resultAccum - } - } else { - // No match, no <h>, just pass down the inner text - // Output $resultAccum - } - // Unwind the stack - $stack->pop(); - $accum = $stack->getAccum(); - $stackFlags = $stack->getFlags(); - - // Append the result to the enclosing accumulator - if ( $isTreeNode ) { - $accum->addNode( $tree ); - } else { - $accum->addAccum( $resultAccum ); - } - // Note that we do NOT increment the input pointer. - // This is because the closing linebreak could be the opening linebreak of - // another heading. Infinite loops are avoided because the next iteration MUST - // hit the heading open case above, which unconditionally increments the - // input pointer. - } elseif ( $found === 'open' ) { - # count opening brace characters - $count = intval( strspn( $text, $curChar, $i ) ); - - # we need to add to stack only if opening brace count is enough for one of the rules - if ( $count >= $rule['min'] ) { - # Add it to the stack - $partData = array( - 'open' => $curChar, - 'close' => $rule['end'], - 'count' => $count, - 'lineStart' => ($i == 0 || $text[$i-1] === "\n"), - ); - - $stack->push( $partData ); - $accum = $stack->getAccum(); - $stackFlags = $stack->getFlags(); - } else { - # Add literal brace(s) - $accum->addLiteral( str_repeat( $curChar, $count ) ); - } - $i += $count; - } elseif ( $found === 'close' ) { - $piece = $stack->getTop(); - # lets check if there are enough characters for closing brace - $maxCount = intval( $piece->count ); - $count = intval( strspn( $text, $curChar, $i, $maxCount ) ); - - # check for maximum matching characters (if there are 5 closing - # characters, we will probably need only 3 - depending on the rules) - $rule = $rules[$piece->open]; - if ( $count > $rule['max'] ) { - # The specified maximum exists in the callback array, unless the caller - # has made an error - $matchingCount = intval( $rule['max'] ); - } else { - # Count is less than the maximum - # Skip any gaps in the callback array to find the true largest match - # Need to use array_key_exists not isset because the callback can be null - $matchingCount = $count; - while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) { - --$matchingCount; - } - } - - if ($matchingCount <= 0) { - # No matching element found in callback array - # Output a literal closing brace and continue - $accum->addLiteral( str_repeat( $curChar, $count ) ); - $i += $count; - continue; - } - $name = strval( $rule['names'][$matchingCount] ); - $isTreeNode = false; - if ( $name === 'LITERAL' ) { - // No element, just literal text - $resultAccum = $piece->breakSyntax( $matchingCount ); - $resultAccum->addLiteral( str_repeat( $rule['end'], $matchingCount ) ); - } else { - # Create XML element - # Note: $parts is already XML, does not need to be encoded further - $isTreeNode = true; - $parts = $piece->parts; - $titleAccum = PPDAccum_HipHop::cast( $parts[0]->out ); - unset( $parts[0] ); - - $tree = new PPNode_HipHop_Tree( $name ); - - # The invocation is at the start of the line if lineStart is set in - # the stack, and all opening brackets are used up. - if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) { - $tree->addChild( new PPNode_HipHop_Attr( 'lineStart', 1 ) ); - } - $titleNode = new PPNode_HipHop_Tree( 'title' ); - $titleNode->firstChild = $titleAccum->firstNode; - $titleNode->lastChild = $titleAccum->lastNode; - $tree->addChild( $titleNode ); - $argIndex = 1; - foreach ( $parts as $variantPart ) { - $part = PPDPart_HipHop::cast( $variantPart ); - if ( isset( $part->eqpos ) ) { - // Find equals - $lastNode = false; - for ( $node = $part->out->firstNode; $node; $node = $node->nextSibling ) { - if ( $node === $part->eqpos ) { - break; - } - $lastNode = $node; - } - if ( !$node ) { - throw new MWException( __METHOD__. ': eqpos not found' ); - } - if ( $node->name !== 'equals' ) { - throw new MWException( __METHOD__ .': eqpos is not equals' ); - } - $equalsNode = $node; - - // Construct name node - $nameNode = new PPNode_HipHop_Tree( 'name' ); - if ( $lastNode !== false ) { - $lastNode->nextSibling = false; - $nameNode->firstChild = $part->out->firstNode; - $nameNode->lastChild = $lastNode; - } - - // Construct value node - $valueNode = new PPNode_HipHop_Tree( 'value' ); - if ( $equalsNode->nextSibling !== false ) { - $valueNode->firstChild = $equalsNode->nextSibling; - $valueNode->lastChild = $part->out->lastNode; - } - $partNode = new PPNode_HipHop_Tree( 'part' ); - $partNode->addChild( $nameNode ); - $partNode->addChild( $equalsNode->firstChild ); - $partNode->addChild( $valueNode ); - $tree->addChild( $partNode ); - } else { - $partNode = new PPNode_HipHop_Tree( 'part' ); - $nameNode = new PPNode_HipHop_Tree( 'name' ); - $nameNode->addChild( new PPNode_HipHop_Attr( 'index', $argIndex++ ) ); - $valueNode = new PPNode_HipHop_Tree( 'value' ); - $valueNode->firstChild = $part->out->firstNode; - $valueNode->lastChild = $part->out->lastNode; - $partNode->addChild( $nameNode ); - $partNode->addChild( $valueNode ); - $tree->addChild( $partNode ); - } - } - } - - # Advance input pointer - $i += $matchingCount; - - # Unwind the stack - $stack->pop(); - $accum = $stack->getAccum(); - - # Re-add the old stack element if it still has unmatched opening characters remaining - if ($matchingCount < $piece->count) { - $piece->parts = array( new PPDPart_HipHop ); - $piece->count -= $matchingCount; - # do we still qualify for any callback with remaining count? - $names = $rules[$piece->open]['names']; - $skippedBraces = 0; - $enclosingAccum = $accum; - while ( $piece->count ) { - if ( array_key_exists( $piece->count, $names ) ) { - $stack->push( $piece ); - $accum = $stack->getAccum(); - break; - } - --$piece->count; - $skippedBraces ++; - } - $enclosingAccum->addLiteral( str_repeat( $piece->open, $skippedBraces ) ); - } - - $stackFlags = $stack->getFlags(); - - # Add XML element to the enclosing accumulator - if ( $isTreeNode ) { - $accum->addNode( $tree ); - } else { - $accum->addAccum( $resultAccum ); - } - } elseif ( $found === 'pipe' ) { - $stackFlags['findEquals'] = true; // shortcut for getFlags() - $stack->addPart(); - $accum = $stack->getAccum(); - ++$i; - } elseif ( $found === 'equals' ) { - $stackFlags['findEquals'] = false; // shortcut for getFlags() - $accum->addNodeWithText( 'equals', '=' ); - $stack->getCurrentPart()->eqpos = $accum->lastNode; - ++$i; - } - } - - # Output any remaining unclosed brackets - foreach ( $stack->stack as $variantPiece ) { - $piece = PPDStackElement_HipHop::cast( $variantPiece ); - $stack->rootAccum->addAccum( $piece->breakSyntax() ); - } - - # Enable top-level headings - for ( $node = $stack->rootAccum->firstNode; $node; $node = $node->nextSibling ) { - if ( isset( $node->name ) && $node->name === 'possible-h' ) { - $node->name = 'h'; - } - } - - $rootNode = new PPNode_HipHop_Tree( 'root' ); - $rootNode->firstChild = $stack->rootAccum->firstNode; - $rootNode->lastChild = $stack->rootAccum->lastNode; - - // Cache - if ($cacheable) { - $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode ); - $wgMemc->set( $cacheKey, $cacheValue, 86400 ); - wfProfileOut( __METHOD__.'-cache-miss' ); - wfProfileOut( __METHOD__.'-cacheable' ); - wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" ); - } - - wfProfileOut( __METHOD__ ); - return $rootNode; - } -} - - - -/** - * Stack class to help Preprocessor::preprocessToObj() - * @ingroup Parser - */ -class PPDStack_HipHop { - var $stack, $rootAccum; - - /** - * @var PPDStack - */ - var $top; - var $out; - - static $false = false; - - function __construct() { - $this->stack = array(); - $this->top = false; - $this->rootAccum = new PPDAccum_HipHop; - $this->accum = $this->rootAccum; - } - - /** - * @return int - */ - function count() { - return count( $this->stack ); - } - - function getAccum() { - return PPDAccum_HipHop::cast( $this->accum ); - } - - function getCurrentPart() { - return $this->getTop()->getCurrentPart(); - } - - function getTop() { - return PPDStackElement_HipHop::cast( $this->top ); - } - - function push( $data ) { - if ( $data instanceof PPDStackElement_HipHop ) { - $this->stack[] = $data; - } else { - $this->stack[] = new PPDStackElement_HipHop( $data ); - } - $this->top = $this->stack[ count( $this->stack ) - 1 ]; - $this->accum = $this->top->getAccum(); - } - - function pop() { - if ( !count( $this->stack ) ) { - throw new MWException( __METHOD__.': no elements remaining' ); - } - $temp = array_pop( $this->stack ); - - if ( count( $this->stack ) ) { - $this->top = $this->stack[ count( $this->stack ) - 1 ]; - $this->accum = $this->top->getAccum(); - } else { - $this->top = self::$false; - $this->accum = $this->rootAccum; - } - return $temp; - } - - function addPart( $s = '' ) { - $this->top->addPart( $s ); - $this->accum = $this->top->getAccum(); - } - - /** - * @return array - */ - function getFlags() { - if ( !count( $this->stack ) ) { - return array( - 'findEquals' => false, - 'findPipe' => false, - 'inHeading' => false, - ); - } else { - return $this->top->getFlags(); - } - } -} - -/** - * @ingroup Parser - */ -class PPDStackElement_HipHop { - var $open, // Opening character (\n for heading) - $close, // Matching closing character - $count, // Number of opening characters found (number of "=" for heading) - $parts, // Array of PPDPart objects describing pipe-separated parts. - $lineStart; // True if the open char appeared at the start of the input line. Not set for headings. - - /** - * @param $obj PPDStackElement_HipHop - * @return PPDStackElement_HipHop - */ - static function cast( PPDStackElement_HipHop $obj ) { - return $obj; - } - - /** - * @param $data array - */ - function __construct( $data = array() ) { - $this->parts = array( new PPDPart_HipHop ); - - foreach ( $data as $name => $value ) { - $this->$name = $value; - } - } - - /** - * @return PPDAccum_HipHop - */ - function getAccum() { - return PPDAccum_HipHop::cast( $this->parts[count($this->parts) - 1]->out ); - } - - /** - * @param $s string - */ - function addPart( $s = '' ) { - $this->parts[] = new PPDPart_HipHop( $s ); - } - - /** - * @return PPDPart_HipHop - */ - function getCurrentPart() { - return PPDPart_HipHop::cast( $this->parts[count($this->parts) - 1] ); - } - - /** - * @return array - */ - function getFlags() { - $partCount = count( $this->parts ); - $findPipe = $this->open !== "\n" && $this->open !== '['; - return array( - 'findPipe' => $findPipe, - 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->parts[$partCount - 1]->eqpos ), - 'inHeading' => $this->open === "\n", - ); - } - - /** - * Get the accumulator that would result if the close is not found. - * - * @param $openingCount bool - * @return PPDAccum_HipHop - */ - function breakSyntax( $openingCount = false ) { - if ( $this->open === "\n" ) { - $accum = PPDAccum_HipHop::cast( $this->parts[0]->out ); - } else { - if ( $openingCount === false ) { - $openingCount = $this->count; - } - $accum = new PPDAccum_HipHop; - $accum->addLiteral( str_repeat( $this->open, $openingCount ) ); - $first = true; - foreach ( $this->parts as $part ) { - if ( $first ) { - $first = false; - } else { - $accum->addLiteral( '|' ); - } - $accum->addAccum( $part->out ); - } - } - return $accum; - } -} - -/** - * @ingroup Parser - */ -class PPDPart_HipHop { - var $out; // Output accumulator object - - // Optional member variables: - // eqpos Position of equals sign in output accumulator - // commentEnd Past-the-end input pointer for the last comment encountered - // visualEnd Past-the-end input pointer for the end of the accumulator minus comments - - function __construct( $out = '' ) { - $this->out = new PPDAccum_HipHop; - if ( $out !== '' ) { - $this->out->addLiteral( $out ); - } - } - - static function cast( PPDPart_HipHop $obj ) { - return $obj; - } -} - -/** - * @ingroup Parser - */ -class PPDAccum_HipHop { - var $firstNode, $lastNode; - - function __construct() { - $this->firstNode = $this->lastNode = false; - } - - static function cast( PPDAccum_HipHop $obj ) { - return $obj; - } - - /** - * Append a string literal - */ - function addLiteral( string $s ) { - if ( $this->lastNode === false ) { - $this->firstNode = $this->lastNode = new PPNode_HipHop_Text( $s ); - } elseif ( $this->lastNode instanceof PPNode_HipHop_Text ) { - $this->lastNode->value .= $s; - } else { - $this->lastNode->nextSibling = new PPNode_HipHop_Text( $s ); - $this->lastNode = $this->lastNode->nextSibling; - } - } - - /** - * Append a PPNode - */ - function addNode( PPNode $node ) { - if ( $this->lastNode === false ) { - $this->firstNode = $this->lastNode = $node; - } else { - $this->lastNode->nextSibling = $node; - $this->lastNode = $node; - } - } - - /** - * Append a tree node with text contents - */ - function addNodeWithText( string $name, string $value ) { - $node = PPNode_HipHop_Tree::newWithText( $name, $value ); - $this->addNode( $node ); - } - - /** - * Append a PPDAccum_HipHop - * Takes over ownership of the nodes in the source argument. These nodes may - * subsequently be modified, especially nextSibling. - */ - function addAccum( PPDAccum_HipHop $accum ) { - if ( $accum->lastNode === false ) { - // nothing to add - } elseif ( $this->lastNode === false ) { - $this->firstNode = $accum->firstNode; - $this->lastNode = $accum->lastNode; - } else { - $this->lastNode->nextSibling = $accum->firstNode; - $this->lastNode = $accum->lastNode; - } - } -} - -/** - * An expansion frame, used as a context to expand the result of preprocessToObj() - * @ingroup Parser - */ -class PPFrame_HipHop implements PPFrame { - - /** - * @var Parser - */ - var $parser; - - /** - * @var Preprocessor - */ - var $preprocessor; - - /** - * @var Title - */ - var $title; - var $titleCache; - - /** - * Hashtable listing templates which are disallowed for expansion in this frame, - * having been encountered previously in parent frames. - */ - var $loopCheckHash; - - /** - * Recursion depth of this frame, top = 0 - * Note that this is NOT the same as expansion depth in expand() - */ - var $depth; - - /** - * Construct a new preprocessor frame. - * @param $preprocessor Preprocessor: the parent preprocessor - */ - function __construct( $preprocessor ) { - $this->preprocessor = $preprocessor; - $this->parser = $preprocessor->parser; - $this->title = $this->parser->mTitle; - $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false ); - $this->loopCheckHash = array(); - $this->depth = 0; - } - - /** - * Create a new child frame - * $args is optionally a multi-root PPNode or array containing the template arguments - * - * @param $args PPNode_HipHop_Array|array|bool - * @param $title Title|bool - * @param $indexOffset A number subtracted from the index attributes of the arguments - * - * @throws MWException - * @return PPTemplateFrame_HipHop - */ - function newChild( $args = false, $title = false, $indexOffset = 0 ) { - $namedArgs = array(); - $numberedArgs = array(); - if ( $title === false ) { - $title = $this->title; - } - if ( $args !== false ) { - if ( $args instanceof PPNode_HipHop_Array ) { - $args = $args->value; - } elseif ( !is_array( $args ) ) { - throw new MWException( __METHOD__ . ': $args must be array or PPNode_HipHop_Array' ); - } - foreach ( $args as $arg ) { - $bits = $arg->splitArg(); - if ( $bits['index'] !== '' ) { - // Numbered parameter - $numberedArgs[$bits['index']] = $bits['value']; - unset( $namedArgs[$bits['index']] ); - } else { - // Named parameter - $name = trim( $this->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) ); - $namedArgs[$name] = $bits['value']; - unset( $numberedArgs[$name] ); - } - } - } - return new PPTemplateFrame_HipHop( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title ); - } - - /** - * @throws MWException - * @param $root - * @param $flags int - * @return string - */ - function expand( $root, $flags = 0 ) { - static $expansionDepth = 0; - if ( is_string( $root ) ) { - return $root; - } - - if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() ) { - $this->parser->limitationWarn( 'node-count-exceeded', - $this->parser->mPPNodeCount, - $this->parser->mOptions->getMaxPPNodeCount() - ); - return '<span class="error">Node-count limit exceeded</span>'; - } - if ( $expansionDepth > $this->parser->mOptions->getMaxPPExpandDepth() ) { - $this->parser->limitationWarn( 'expansion-depth-exceeded', - $expansionDepth, - $this->parser->mOptions->getMaxPPExpandDepth() - ); - return '<span class="error">Expansion depth limit exceeded</span>'; - } - ++$expansionDepth; - if ( $expansionDepth > $this->parser->mHighestExpansionDepth ) { - $this->parser->mHighestExpansionDepth = $expansionDepth; - } - - $outStack = array( '', '' ); - $iteratorStack = array( false, $root ); - $indexStack = array( 0, 0 ); - - while ( count( $iteratorStack ) > 1 ) { - $level = count( $outStack ) - 1; - $iteratorNode =& $iteratorStack[ $level ]; - $out =& $outStack[$level]; - $index =& $indexStack[$level]; - - if ( is_array( $iteratorNode ) ) { - if ( $index >= count( $iteratorNode ) ) { - // All done with this iterator - $iteratorStack[$level] = false; - $contextNode = false; - } else { - $contextNode = $iteratorNode[$index]; - $index++; - } - } elseif ( $iteratorNode instanceof PPNode_HipHop_Array ) { - if ( $index >= $iteratorNode->getLength() ) { - // All done with this iterator - $iteratorStack[$level] = false; - $contextNode = false; - } else { - $contextNode = $iteratorNode->item( $index ); - $index++; - } - } else { - // Copy to $contextNode and then delete from iterator stack, - // because this is not an iterator but we do have to execute it once - $contextNode = $iteratorStack[$level]; - $iteratorStack[$level] = false; - } - - $newIterator = false; - - if ( $contextNode === false ) { - // nothing to do - } elseif ( is_string( $contextNode ) ) { - $out .= $contextNode; - } elseif ( is_array( $contextNode ) || $contextNode instanceof PPNode_HipHop_Array ) { - $newIterator = $contextNode; - } elseif ( $contextNode instanceof PPNode_HipHop_Attr ) { - // No output - } elseif ( $contextNode instanceof PPNode_HipHop_Text ) { - $out .= $contextNode->value; - } elseif ( $contextNode instanceof PPNode_HipHop_Tree ) { - if ( $contextNode->name === 'template' ) { - # Double-brace expansion - $bits = $contextNode->splitTemplate(); - if ( $flags & PPFrame::NO_TEMPLATES ) { - $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $bits['title'], $bits['parts'] ); - } else { - $ret = $this->parser->braceSubstitution( $bits, $this ); - if ( isset( $ret['object'] ) ) { - $newIterator = $ret['object']; - } else { - $out .= $ret['text']; - } - } - } elseif ( $contextNode->name === 'tplarg' ) { - # Triple-brace expansion - $bits = $contextNode->splitTemplate(); - if ( $flags & PPFrame::NO_ARGS ) { - $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $bits['title'], $bits['parts'] ); - } else { - $ret = $this->parser->argSubstitution( $bits, $this ); - if ( isset( $ret['object'] ) ) { - $newIterator = $ret['object']; - } else { - $out .= $ret['text']; - } - } - } elseif ( $contextNode->name === 'comment' ) { - # HTML-style comment - # Remove it in HTML, pre+remove and STRIP_COMMENTS modes - if ( $this->parser->ot['html'] - || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() ) - || ( $flags & PPFrame::STRIP_COMMENTS ) ) - { - $out .= ''; - } - # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result - # Not in RECOVER_COMMENTS mode (extractSections) though - elseif ( $this->parser->ot['wiki'] && ! ( $flags & PPFrame::RECOVER_COMMENTS ) ) { - $out .= $this->parser->insertStripItem( $contextNode->firstChild->value ); - } - # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove - else { - $out .= $contextNode->firstChild->value; - } - } elseif ( $contextNode->name === 'ignore' ) { - # Output suppression used by <includeonly> etc. - # OT_WIKI will only respect <ignore> in substed templates. - # The other output types respect it unless NO_IGNORE is set. - # extractSections() sets NO_IGNORE and so never respects it. - if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & PPFrame::NO_IGNORE ) ) { - $out .= $contextNode->firstChild->value; - } else { - //$out .= ''; - } - } elseif ( $contextNode->name === 'ext' ) { - # Extension tag - $bits = $contextNode->splitExt() + array( 'attr' => null, 'inner' => null, 'close' => null ); - $out .= $this->parser->extensionSubstitution( $bits, $this ); - } elseif ( $contextNode->name === 'h' ) { - # Heading - if ( $this->parser->ot['html'] ) { - # Expand immediately and insert heading index marker - $s = ''; - for ( $node = $contextNode->firstChild; $node; $node = $node->nextSibling ) { - $s .= $this->expand( $node, $flags ); - } - - $bits = $contextNode->splitHeading(); - $titleText = $this->title->getPrefixedDBkey(); - $this->parser->mHeadings[] = array( $titleText, $bits['i'] ); - $serial = count( $this->parser->mHeadings ) - 1; - $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX; - $s = substr( $s, 0, $bits['level'] ) . $marker . substr( $s, $bits['level'] ); - $this->parser->mStripState->addGeneral( $marker, '' ); - $out .= $s; - } else { - # Expand in virtual stack - $newIterator = $contextNode->getChildren(); - } - } else { - # Generic recursive expansion - $newIterator = $contextNode->getChildren(); - } - } else { - throw new MWException( __METHOD__.': Invalid parameter type' ); - } - - if ( $newIterator !== false ) { - $outStack[] = ''; - $iteratorStack[] = $newIterator; - $indexStack[] = 0; - } elseif ( $iteratorStack[$level] === false ) { - // Return accumulated value to parent - // With tail recursion - while ( $iteratorStack[$level] === false && $level > 0 ) { - $outStack[$level - 1] .= $out; - array_pop( $outStack ); - array_pop( $iteratorStack ); - array_pop( $indexStack ); - $level--; - } - } - } - --$expansionDepth; - return $outStack[0]; - } - - /** - * @param $sep - * @param $flags - * @return string - */ - function implodeWithFlags( $sep, $flags /*, ... */ ) { - $args = array_slice( func_get_args(), 2 ); - - $first = true; - $s = ''; - foreach ( $args as $root ) { - if ( $root instanceof PPNode_HipHop_Array ) { - $root = $root->value; - } - if ( !is_array( $root ) ) { - $root = array( $root ); - } - foreach ( $root as $node ) { - if ( $first ) { - $first = false; - } else { - $s .= $sep; - } - $s .= $this->expand( $node, $flags ); - } - } - return $s; - } - - /** - * Implode with no flags specified - * This previously called implodeWithFlags but has now been inlined to reduce stack depth - * @param $sep - * @return string - */ - function implode( $sep /*, ... */ ) { - $args = array_slice( func_get_args(), 1 ); - - $first = true; - $s = ''; - foreach ( $args as $root ) { - if ( $root instanceof PPNode_HipHop_Array ) { - $root = $root->value; - } - if ( !is_array( $root ) ) { - $root = array( $root ); - } - foreach ( $root as $node ) { - if ( $first ) { - $first = false; - } else { - $s .= $sep; - } - $s .= $this->expand( $node ); - } - } - return $s; - } - - /** - * Makes an object that, when expand()ed, will be the same as one obtained - * with implode() - * - * @param $sep - * @return PPNode_HipHop_Array - */ - function virtualImplode( $sep /*, ... */ ) { - $args = array_slice( func_get_args(), 1 ); - $out = array(); - $first = true; - - foreach ( $args as $root ) { - if ( $root instanceof PPNode_HipHop_Array ) { - $root = $root->value; - } - if ( !is_array( $root ) ) { - $root = array( $root ); - } - foreach ( $root as $node ) { - if ( $first ) { - $first = false; - } else { - $out[] = $sep; - } - $out[] = $node; - } - } - return new PPNode_HipHop_Array( $out ); - } - - /** - * Virtual implode with brackets - * - * @param $start - * @param $sep - * @param $end - * @return PPNode_HipHop_Array - */ - function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) { - $args = array_slice( func_get_args(), 3 ); - $out = array( $start ); - $first = true; - - foreach ( $args as $root ) { - if ( $root instanceof PPNode_HipHop_Array ) { - $root = $root->value; - } - if ( !is_array( $root ) ) { - $root = array( $root ); - } - foreach ( $root as $node ) { - if ( $first ) { - $first = false; - } else { - $out[] = $sep; - } - $out[] = $node; - } - } - $out[] = $end; - return new PPNode_HipHop_Array( $out ); - } - - function __toString() { - return 'frame{}'; - } - - /** - * @param $level bool - * @return array|bool|String - */ - function getPDBK( $level = false ) { - if ( $level === false ) { - return $this->title->getPrefixedDBkey(); - } else { - return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false; - } - } - - /** - * @return array - */ - function getArguments() { - return array(); - } - - /** - * @return array - */ - function getNumberedArguments() { - return array(); - } - - /** - * @return array - */ - function getNamedArguments() { - return array(); - } - - /** - * Returns true if there are no arguments in this frame - * - * @return bool - */ - function isEmpty() { - return true; - } - - /** - * @param $name - * @return bool - */ - function getArgument( $name ) { - return false; - } - - /** - * Returns true if the infinite loop check is OK, false if a loop is detected - * - * @param $title Title - * - * @return bool - */ - function loopCheck( $title ) { - return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] ); - } - - /** - * Return true if the frame is a template frame - * - * @return bool - */ - function isTemplate() { - return false; - } - - /** - * Get a title of frame - * - * @return Title - */ - function getTitle() { - return $this->title; - } -} - -/** - * Expansion frame with template arguments - * @ingroup Parser - */ -class PPTemplateFrame_HipHop extends PPFrame_HipHop { - var $numberedArgs, $namedArgs, $parent; - var $numberedExpansionCache, $namedExpansionCache; - - /** - * @param $preprocessor Preprocessor_HipHop - * @param $parent bool - * @param $numberedArgs array - * @param $namedArgs array - * @param $title Title|bool - */ - function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) { - parent::__construct( $preprocessor ); - - $this->parent = $parent; - $this->numberedArgs = $numberedArgs; - $this->namedArgs = $namedArgs; - $this->title = $title; - $pdbk = $title ? $title->getPrefixedDBkey() : false; - $this->titleCache = $parent->titleCache; - $this->titleCache[] = $pdbk; - $this->loopCheckHash = /*clone*/ $parent->loopCheckHash; - if ( $pdbk !== false ) { - $this->loopCheckHash[$pdbk] = true; - } - $this->depth = $parent->depth + 1; - $this->numberedExpansionCache = $this->namedExpansionCache = array(); - } - - function __toString() { - $s = 'tplframe{'; - $first = true; - $args = $this->numberedArgs + $this->namedArgs; - foreach ( $args as $name => $value ) { - if ( $first ) { - $first = false; - } else { - $s .= ', '; - } - $s .= "\"$name\":\"" . - str_replace( '"', '\\"', $value->__toString() ) . '"'; - } - $s .= '}'; - return $s; - } - /** - * Returns true if there are no arguments in this frame - * - * @return bool - */ - function isEmpty() { - return !count( $this->numberedArgs ) && !count( $this->namedArgs ); - } - - /** - * @return array - */ - function getArguments() { - $arguments = array(); - foreach ( array_merge( - array_keys($this->numberedArgs), - array_keys($this->namedArgs)) as $key ) { - $arguments[$key] = $this->getArgument($key); - } - return $arguments; - } - - /** - * @return array - */ - function getNumberedArguments() { - $arguments = array(); - foreach ( array_keys($this->numberedArgs) as $key ) { - $arguments[$key] = $this->getArgument($key); - } - return $arguments; - } - - /** - * @return array - */ - function getNamedArguments() { - $arguments = array(); - foreach ( array_keys($this->namedArgs) as $key ) { - $arguments[$key] = $this->getArgument($key); - } - return $arguments; - } - - /** - * @param $index - * @return array|bool - */ - function getNumberedArgument( $index ) { - if ( !isset( $this->numberedArgs[$index] ) ) { - return false; - } - if ( !isset( $this->numberedExpansionCache[$index] ) ) { - # No trimming for unnamed arguments - $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], PPFrame::STRIP_COMMENTS ); - } - return $this->numberedExpansionCache[$index]; - } - - /** - * @param $name - * @return bool - */ - function getNamedArgument( $name ) { - if ( !isset( $this->namedArgs[$name] ) ) { - return false; - } - if ( !isset( $this->namedExpansionCache[$name] ) ) { - # Trim named arguments post-expand, for backwards compatibility - $this->namedExpansionCache[$name] = trim( - $this->parent->expand( $this->namedArgs[$name], PPFrame::STRIP_COMMENTS ) ); - } - return $this->namedExpansionCache[$name]; - } - - /** - * @param $name - * @return array|bool - */ - function getArgument( $name ) { - $text = $this->getNumberedArgument( $name ); - if ( $text === false ) { - $text = $this->getNamedArgument( $name ); - } - return $text; - } - - /** - * Return true if the frame is a template frame - * - * @return bool - */ - function isTemplate() { - return true; - } -} - -/** - * Expansion frame with custom arguments - * @ingroup Parser - */ -class PPCustomFrame_HipHop extends PPFrame_HipHop { - var $args; - - function __construct( $preprocessor, $args ) { - parent::__construct( $preprocessor ); - $this->args = $args; - } - - function __toString() { - $s = 'cstmframe{'; - $first = true; - foreach ( $this->args as $name => $value ) { - if ( $first ) { - $first = false; - } else { - $s .= ', '; - } - $s .= "\"$name\":\"" . - str_replace( '"', '\\"', $value->__toString() ) . '"'; - } - $s .= '}'; - return $s; - } - - /** - * @return bool - */ - function isEmpty() { - return !count( $this->args ); - } - - /** - * @param $index - * @return bool - */ - function getArgument( $index ) { - if ( !isset( $this->args[$index] ) ) { - return false; - } - return $this->args[$index]; - } -} - -/** - * @ingroup Parser - */ -class PPNode_HipHop_Tree implements PPNode { - var $name, $firstChild, $lastChild, $nextSibling; - - function __construct( $name ) { - $this->name = $name; - $this->firstChild = $this->lastChild = $this->nextSibling = false; - } - - function __toString() { - $inner = ''; - $attribs = ''; - for ( $node = $this->firstChild; $node; $node = $node->nextSibling ) { - if ( $node instanceof PPNode_HipHop_Attr ) { - $attribs .= ' ' . $node->name . '="' . htmlspecialchars( $node->value ) . '"'; - } else { - $inner .= $node->__toString(); - } - } - if ( $inner === '' ) { - return "<{$this->name}$attribs/>"; - } else { - return "<{$this->name}$attribs>$inner</{$this->name}>"; - } - } - - /** - * @param $name - * @param $text - * @return PPNode_HipHop_Tree - */ - static function newWithText( $name, $text ) { - $obj = new self( $name ); - $obj->addChild( new PPNode_HipHop_Text( $text ) ); - return $obj; - } - - function addChild( $node ) { - if ( $this->lastChild === false ) { - $this->firstChild = $this->lastChild = $node; - } else { - $this->lastChild->nextSibling = $node; - $this->lastChild = $node; - } - } - - /** - * @return PPNode_HipHop_Array - */ - function getChildren() { - $children = array(); - for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { - $children[] = $child; - } - return new PPNode_HipHop_Array( $children ); - } - - function getFirstChild() { - return $this->firstChild; - } - - function getNextSibling() { - return $this->nextSibling; - } - - /** - * @param $name string - * @return array - */ - function getChildrenOfType( $name ) { - $children = array(); - for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { - if ( isset( $child->name ) && $child->name === $name ) { - $children[] = $child; - } - } - return $children; - } - - /** - * @return bool - */ - function getLength() { - return false; - } - - /** - * @param $i - * @return bool - */ - function item( $i ) { - return false; - } - - /** - * @return string - */ - function getName() { - return $this->name; - } - - /** - * Split a <part> node into an associative array containing: - * name PPNode name - * index String index - * value PPNode value - * - * @throws MWException - * @return array - */ - function splitArg() { - $bits = array(); - for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { - if ( !isset( $child->name ) ) { - continue; - } - if ( $child->name === 'name' ) { - $bits['name'] = $child; - if ( $child->firstChild instanceof PPNode_HipHop_Attr - && $child->firstChild->name === 'index' ) - { - $bits['index'] = $child->firstChild->value; - } - } elseif ( $child->name === 'value' ) { - $bits['value'] = $child; - } - } - - if ( !isset( $bits['name'] ) ) { - throw new MWException( 'Invalid brace node passed to ' . __METHOD__ ); - } - if ( !isset( $bits['index'] ) ) { - $bits['index'] = ''; - } - return $bits; - } - - /** - * Split an <ext> node into an associative array containing name, attr, inner and close - * All values in the resulting array are PPNodes. Inner and close are optional. - * - * @throws MWException - * @return array - */ - function splitExt() { - $bits = array(); - for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { - if ( !isset( $child->name ) ) { - continue; - } - if ( $child->name === 'name' ) { - $bits['name'] = $child; - } elseif ( $child->name === 'attr' ) { - $bits['attr'] = $child; - } elseif ( $child->name === 'inner' ) { - $bits['inner'] = $child; - } elseif ( $child->name === 'close' ) { - $bits['close'] = $child; - } - } - if ( !isset( $bits['name'] ) ) { - throw new MWException( 'Invalid ext node passed to ' . __METHOD__ ); - } - return $bits; - } - - /** - * Split an <h> node - * - * @throws MWException - * @return array - */ - function splitHeading() { - if ( $this->name !== 'h' ) { - throw new MWException( 'Invalid h node passed to ' . __METHOD__ ); - } - $bits = array(); - for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { - if ( !isset( $child->name ) ) { - continue; - } - if ( $child->name === 'i' ) { - $bits['i'] = $child->value; - } elseif ( $child->name === 'level' ) { - $bits['level'] = $child->value; - } - } - if ( !isset( $bits['i'] ) ) { - throw new MWException( 'Invalid h node passed to ' . __METHOD__ ); - } - return $bits; - } - - /** - * Split a <template> or <tplarg> node - * - * @return array - */ - function splitTemplate() { - $parts = array(); - $bits = array( 'lineStart' => '' ); - for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { - if ( !isset( $child->name ) ) { - continue; - } - if ( $child->name === 'title' ) { - $bits['title'] = $child; - } - if ( $child->name === 'part' ) { - $parts[] = $child; - } - if ( $child->name === 'lineStart' ) { - $bits['lineStart'] = '1'; - } - } - if ( !isset( $bits['title'] ) ) { - throw new MWException( 'Invalid node passed to ' . __METHOD__ ); - } - $bits['parts'] = new PPNode_HipHop_Array( $parts ); - return $bits; - } -} - -/** - * @ingroup Parser - */ -class PPNode_HipHop_Text implements PPNode { - var $value, $nextSibling; - - function __construct( $value ) { - if ( is_object( $value ) ) { - throw new MWException( __CLASS__ . ' given object instead of string' ); - } - $this->value = $value; - } - - function __toString() { - return htmlspecialchars( $this->value ); - } - - function getNextSibling() { - return $this->nextSibling; - } - - function getChildren() { return false; } - function getFirstChild() { return false; } - function getChildrenOfType( $name ) { return false; } - function getLength() { return false; } - function item( $i ) { return false; } - function getName() { return '#text'; } - function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); } - function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); } - function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); } -} - -/** - * @ingroup Parser - */ -class PPNode_HipHop_Array implements PPNode { - var $value, $nextSibling; - - function __construct( $value ) { - $this->value = $value; - } - - function __toString() { - return var_export( $this, true ); - } - - function getLength() { - return count( $this->value ); - } - - function item( $i ) { - return $this->value[$i]; - } - - function getName() { return '#nodelist'; } - - function getNextSibling() { - return $this->nextSibling; - } - - function getChildren() { return false; } - function getFirstChild() { return false; } - function getChildrenOfType( $name ) { return false; } - function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); } - function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); } - function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); } -} - -/** - * @ingroup Parser - */ -class PPNode_HipHop_Attr implements PPNode { - var $name, $value, $nextSibling; - - function __construct( $name, $value ) { - $this->name = $name; - $this->value = $value; - } - - function __toString() { - return "<@{$this->name}>" . htmlspecialchars( $this->value ) . "</@{$this->name}>"; - } - - function getName() { - return $this->name; - } - - function getNextSibling() { - return $this->nextSibling; - } - - function getChildren() { return false; } - function getFirstChild() { return false; } - function getChildrenOfType( $name ) { return false; } - function getLength() { return false; } - function item( $i ) { return false; } - function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); } - function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); } - function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); } -} diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php index ad95d5f7..5f3f18ea 100644 --- a/includes/parser/StripState.php +++ b/includes/parser/StripState.php @@ -112,7 +112,7 @@ class StripState { * @return mixed */ protected function unstripType( $type, $text ) { - // Shortcut + // Shortcut if ( !count( $this->data[$type] ) ) { return $text; } @@ -139,7 +139,7 @@ class StripState { . '</span>'; } if ( $this->recursionLevel >= self::UNSTRIP_RECURSION_LIMIT ) { - return '<span class="error">' . + return '<span class="error">' . wfMessage( 'parser-unstrip-recursion-limit' ) ->numParams( self::UNSTRIP_RECURSION_LIMIT )->inContentLanguage()->text() . '</span>'; @@ -156,7 +156,7 @@ class StripState { } /** - * Get a StripState object which is sufficient to unstrip the given text. + * Get a StripState object which is sufficient to unstrip the given text. * It will contain the minimum subset of strip items necessary. * * @param $text string @@ -233,4 +233,3 @@ class StripState { return preg_replace( $this->regex, '', $text ); } } - diff --git a/includes/parser/Tidy.php b/includes/parser/Tidy.php index ed2d436d..0f7e0d31 100644 --- a/includes/parser/Tidy.php +++ b/includes/parser/Tidy.php @@ -59,12 +59,18 @@ class MWTidyWrapper { dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); $this->mMarkerIndex = 0; + // Replace <mw:editsection> elements with placeholders $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX, array( &$this, 'replaceEditSectionLinksCallback' ), $text ); - $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'. - ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'. - '<head><title>test</title></head><body>'.$wrappedtext.'</body></html>'; + // Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so + // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config + $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext ); + + // Wrap the whole thing in a doctype and body for Tidy. + $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' . + ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>' . + '<head><title>test</title></head><body>' . $wrappedtext . '</body></html>'; return $wrappedtext; } @@ -86,7 +92,13 @@ class MWTidyWrapper { * @return string */ public function postprocess( $text ) { - return $this->mTokens->replace( $text ); + // Revert <html-{link,meta}> back to <{link,meta}> + $text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text ); + + // Restore the contents of placeholder tokens + $text = $this->mTokens->replace( $text ); + + return $text; } } @@ -106,7 +118,7 @@ class MWTidy { * If tidy isn't able to correct the markup, the original will be * returned in all its glory with a warning comment appended. * - * @param $text String: hideous HTML input + * @param string $text hideous HTML input * @return String: corrected HTML output */ public static function tidy( $text ) { @@ -159,7 +171,7 @@ class MWTidy { * Spawn an external HTML tidy process and get corrected markup back from it. * Also called in OutputHandler.php for full page validation * - * @param $text String: HTML to check + * @param string $text HTML to check * @param $stderr Boolean: Whether to read result from STDERR rather than STDOUT * @param &$retval int Exit code (-1 on internal error) * @return mixed String or null @@ -223,7 +235,7 @@ class MWTidy { * Use the HTML tidy extension to use the tidy library in-process, * saving the overhead of spawning a new process. * - * @param $text String: HTML to check + * @param string $text HTML to check * @param $stderr Boolean: Whether to read result from error status instead of output * @param &$retval int Exit code (-1 on internal error) * @return mixed String or null @@ -248,24 +260,24 @@ class MWTidy { wfProfileOut( __METHOD__ ); return $tidy->errorBuffer; + } + + $tidy->cleanRepair(); + $retval = $tidy->getStatus(); + if ( $retval == 2 ) { + // 2 is magic number for fatal error + // http://www.php.net/manual/en/function.tidy-get-status.php + $cleansource = null; } else { - $tidy->cleanRepair(); - $retval = $tidy->getStatus(); - if ( $retval == 2 ) { - // 2 is magic number for fatal error - // http://www.php.net/manual/en/function.tidy-get-status.php - $cleansource = null; - } else { - $cleansource = tidy_get_output( $tidy ); - if ( $wgDebugTidy && $retval > 0 ) { - $cleansource .= "<!--\nTidy reports:\n" . - str_replace( '-->', '-->', $tidy->errorBuffer ) . - "\n-->"; - } + $cleansource = tidy_get_output( $tidy ); + if ( $wgDebugTidy && $retval > 0 ) { + $cleansource .= "<!--\nTidy reports:\n" . + str_replace( '-->', '-->', $tidy->errorBuffer ) . + "\n-->"; } - - wfProfileOut( __METHOD__ ); - return $cleansource; } + + wfProfileOut( __METHOD__ ); + return $cleansource; } } |