From b9b85843572bf283f48285001e276ba7e61b63f6 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Sun, 22 Feb 2009 13:37:51 +0100 Subject: updated to MediaWiki 1.14.0 --- includes/parser/CoreLinkFunctions.php | 47 + includes/parser/CoreParserFunctions.php | 75 +- includes/parser/LinkHolderArray.php | 438 +++ includes/parser/Parser.php | 1471 +++++---- includes/parser/ParserCache.php | 4 +- includes/parser/ParserOptions.php | 16 +- includes/parser/ParserOutput.php | 63 +- includes/parser/Parser_DiffTest.php | 34 +- includes/parser/Parser_LinkHooks.php | 315 ++ includes/parser/Parser_OldPP.php | 4944 ------------------------------- includes/parser/Preprocessor_DOM.php | 41 +- includes/parser/Preprocessor_Hash.php | 37 +- 12 files changed, 1683 insertions(+), 5802 deletions(-) create mode 100644 includes/parser/CoreLinkFunctions.php create mode 100644 includes/parser/LinkHolderArray.php create mode 100644 includes/parser/Parser_LinkHooks.php delete mode 100644 includes/parser/Parser_OldPP.php (limited to 'includes/parser') diff --git a/includes/parser/CoreLinkFunctions.php b/includes/parser/CoreLinkFunctions.php new file mode 100644 index 00000000..d6d11880 --- /dev/null +++ b/includes/parser/CoreLinkFunctions.php @@ -0,0 +1,47 @@ +setLinkHook( NS_CATEGORY, array( __CLASS__, 'categoryLinkHook' ) ); + return true; + } + + static function defaultLinkHook( $parser, $holders, $markers, + Title $title, $titleText, &$displayText = null, &$leadingColon = false ) { + if( isset($displayText) && $markers->findMarker( $displayText ) ) { + # There are links inside of the displayText + # For backwards compatibility the deepest links are dominant so this + # link should not be handled + $displayText = $markers->expand($displayText); + # Return false so that this link is reverted back to WikiText + return false; + } + return $holders->makeHolder( $title, isset($displayText) ? $displayText : $titleText, '', '', '' ); + } + + static function categoryLinkHook( $parser, $holders, $markers, + Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { + global $wgContLang; + # When a category link starts with a : treat it as a normal link + if( $leadingColon ) return true; + if( isset($sortText) && $markers->findMarker( $sortText ) ) { + # There are links inside of the sortText + # For backwards compatibility the deepest links are dominant so this + # link should not be handled + $sortText = $markers->expand($sortText); + # Return false so that this link is reverted back to WikiText + return false; + } + if( !isset($sortText) ) $sortText = $parser->getDefaultSort(); + $sortText = Sanitizer::decodeCharReferences( $sortText ); + $sortText = str_replace( "\n", '', $sortText ); + $sortText = $wgContLang->convertCategoryKey( $sortText ); + $parser->mOutput->addCategory( $title->getDBkey(), $sortText ); + return ''; + } + +} diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php index d9072e93..a3b5189a 100644 --- a/includes/parser/CoreParserFunctions.php +++ b/includes/parser/CoreParserFunctions.php @@ -33,7 +33,9 @@ class CoreParserFunctions { $parser->setFunctionHook( 'numberofarticles', array( __CLASS__, 'numberofarticles' ), SFH_NO_HASH ); $parser->setFunctionHook( 'numberoffiles', array( __CLASS__, 'numberoffiles' ), SFH_NO_HASH ); $parser->setFunctionHook( 'numberofadmins', array( __CLASS__, 'numberofadmins' ), SFH_NO_HASH ); + $parser->setFunctionHook( 'numberingroup', array( __CLASS__, 'numberingroup' ), SFH_NO_HASH ); $parser->setFunctionHook( 'numberofedits', array( __CLASS__, 'numberofedits' ), SFH_NO_HASH ); + $parser->setFunctionHook( 'numberofviews', array( __CLASS__, 'numberofviews' ), SFH_NO_HASH ); $parser->setFunctionHook( 'language', array( __CLASS__, 'language' ), SFH_NO_HASH ); $parser->setFunctionHook( 'padleft', array( __CLASS__, 'padleft' ), SFH_NO_HASH ); $parser->setFunctionHook( 'padright', array( __CLASS__, 'padright' ), SFH_NO_HASH ); @@ -56,7 +58,10 @@ class CoreParserFunctions { static function intFunction( $parser, $part1 = '' /*, ... */ ) { if ( strval( $part1 ) !== '' ) { $args = array_slice( func_get_args(), 2 ); - return wfMsgReal( $part1, $args, true ); + $message = wfMsgGetKey( $part1, true, false, false ); + $message = wfMsgReplaceArgs( $message, $args ); + $message = $parser->replaceVariables( $message ); // like $wgMessageCache->transform() + return $message; } else { return array( 'found' => false ); } @@ -64,20 +69,13 @@ class CoreParserFunctions { static function ns( $parser, $part1 = '' ) { global $wgContLang; - $found = false; if ( intval( $part1 ) || $part1 == "0" ) { - $text = $wgContLang->getNsText( intval( $part1 ) ); - $found = true; + $index = intval( $part1 ); } else { - $param = str_replace( ' ', '_', strtolower( $part1 ) ); - $index = MWNamespace::getCanonicalIndex( strtolower( $param ) ); - if ( !is_null( $index ) ) { - $text = $wgContLang->getNsText( $index ); - $found = true; - } + $index = $wgContLang->getNsIndex( str_replace( ' ', '_', $part1 ) ); } - if ( $found ) { - return $text; + if ( $index !== false ) { + return $wgContLang->getFormattedNsText( $index ); } else { return array( 'found' => false ); } @@ -128,8 +126,12 @@ class CoreParserFunctions { # attempt, url-decode and try for a second. if( is_null( $title ) ) $title = Title::newFromUrl( urldecode( $s ) ); - if ( !is_null( $title ) ) { - if ( !is_null( $arg ) ) { + if( !is_null( $title ) ) { + # Convert NS_MEDIA -> NS_FILE + if( $title->getNamespace() == NS_MEDIA ) { + $title = Title::makeTitle( NS_FILE, $title->getDBKey() ); + } + if( !is_null( $arg ) ) { $text = $title->$func( $arg ); } else { $text = $title->$func(); @@ -167,10 +169,16 @@ class CoreParserFunctions { * @return string */ static function displaytitle( $parser, $text = '' ) { + global $wgRestrictDisplayTitle; $text = trim( Sanitizer::decodeCharReferences( $text ) ); - $title = Title::newFromText( $text ); - if( $title instanceof Title && $title->getFragment() == '' && $title->equals( $parser->mTitle ) ) + + if ( !$wgRestrictDisplayTitle ) { $parser->mOutput->setDisplayTitle( $text ); + } else { + $title = Title::newFromText( $text ); + if( $title instanceof Title && $title->getFragment() == '' && $title->equals( $parser->mTitle ) ) + $parser->mOutput->setDisplayTitle( $text ); + } return ''; } @@ -207,14 +215,20 @@ class CoreParserFunctions { return self::formatRaw( SiteStats::images(), $raw ); } static function numberofadmins( $parser, $raw = null ) { - return self::formatRaw( SiteStats::admins(), $raw ); + return self::formatRaw( SiteStats::numberingroup('sysop'), $raw ); } static function numberofedits( $parser, $raw = null ) { return self::formatRaw( SiteStats::edits(), $raw ); } + static function numberofviews( $parser, $raw = null ) { + return self::formatRaw( SiteStats::views(), $raw ); + } static function pagesinnamespace( $parser, $namespace = 0, $raw = null ) { return self::formatRaw( SiteStats::pagesInNs( intval( $namespace ) ), $raw ); } + static function numberingroup( $parser, $name = '', $raw = null) { + return self::formatRaw( SiteStats::numberingroup( strtolower( $name ) ), $raw ); + } /** * Return the number of pages in the given category, or 0 if it's nonexis- @@ -269,12 +283,12 @@ class CoreParserFunctions { if( isset( $cache[$page] ) ) { $length = $cache[$page]; } elseif( $parser->incrementExpensiveFunctionCount() ) { - $length = $cache[$page] = $title->getLength(); + $rev = Revision::newFromTitle($title); + $id = $rev ? $rev->getPage() : 0; + $length = $cache[$page] = $rev ? $rev->getSize() : 0; // Register dependency in templatelinks - $id = $title->getArticleId(); - $revid = Revision::newFromTitle($title); - $parser->mOutput->addTemplate($title, $id, $revid); + $parser->mOutput->addTemplate( $title, $id, $rev ? $rev->getId() : 0 ); } return self::formatRaw( $length, $raw ); } @@ -320,9 +334,18 @@ class CoreParserFunctions { public static function defaultsort( $parser, $text ) { $text = trim( $text ); - if( strlen( $text ) > 0 ) - $parser->setDefaultSort( $text ); - return ''; + if( strlen( $text ) == 0 ) + return ''; + $old = $parser->getCustomDefaultSort(); + $parser->setDefaultSort( $text ); + if( $old === false || $old == $text ) + return ''; + else + return( '' . + wfMsg( 'duplicate-defaultsort', + htmlspecialchars( $old ), + htmlspecialchars( $text ) ) . + '' ); } public static function filepath( $parser, $name='', $option='' ) { @@ -330,7 +353,7 @@ class CoreParserFunctions { if( $file ) { $url = $file->getFullUrl(); if( $option == 'nowiki' ) { - return "$url"; + return array( $url, 'nowiki' => true ); } return $url; } else { @@ -365,7 +388,7 @@ class CoreParserFunctions { foreach ( $args as $arg ) { $bits = $arg->splitArg(); if ( strval( $bits['index'] ) === '' ) { - $name = $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ); + $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) ); $value = trim( $frame->expand( $bits['value'] ) ); if ( preg_match( '/^(?:["\'](.+)["\']|""|\'\')$/s', $value, $m ) ) { $value = isset( $m[1] ) ? $m[1] : ''; diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php new file mode 100644 index 00000000..35b672b9 --- /dev/null +++ b/includes/parser/LinkHolderArray.php @@ -0,0 +1,438 @@ +parent = $parent; + } + + /** + * Reduce memory usage to reduce the impact of circular references + */ + function __destruct() { + foreach ( $this as $name => $value ) { + unset( $this->$name ); + } + } + + /** + * Merge another LinkHolderArray into this one + */ + function merge( $other ) { + foreach ( $other->internals as $ns => $entries ) { + $this->size += count( $entries ); + if ( !isset( $this->internals[$ns] ) ) { + $this->internals[$ns] = $entries; + } else { + $this->internals[$ns] += $entries; + } + } + $this->interwikis += $other->interwikis; + } + + /** + * Returns true if the memory requirements of this object are getting large + */ + function isBig() { + global $wgLinkHolderBatchSize; + return $this->size > $wgLinkHolderBatchSize; + } + + /** + * Clear all stored link holders. + * Make sure you don't have any text left using these link holders, before you call this + */ + function clear() { + $this->internals = array(); + $this->interwikis = array(); + $this->size = 0; + } + + /** + * Make a link placeholder. The text returned can be later resolved to a real link with + * replaceLinkHolders(). This is done for two reasons: firstly to avoid further + * parsing of interwiki links, and secondly to allow all existence checks and + * article length checks (for stub links) to be bundled into a single query. + * + */ + function makeHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) { + wfProfileIn( __METHOD__ ); + if ( ! is_object($nt) ) { + # Fail gracefully + $retVal = "{$prefix}{$text}{$trail}"; + } else { + # Separate the link trail from the rest of the link + list( $inside, $trail ) = Linker::splitTrail( $trail ); + + $entry = array( + 'title' => $nt, + 'text' => $prefix.$text.$inside, + 'pdbk' => $nt->getPrefixedDBkey(), + ); + if ( $query !== '' ) { + $entry['query'] = $query; + } + + if ( $nt->isExternal() ) { + // Use a globally unique ID to keep the objects mergable + $key = $this->parent->nextLinkID(); + $this->interwikis[$key] = $entry; + $retVal = "{$trail}"; + } else { + $key = $this->parent->nextLinkID(); + $ns = $nt->getNamespace(); + $this->internals[$ns][$key] = $entry; + $retVal = "{$trail}"; + } + $this->size++; + } + wfProfileOut( __METHOD__ ); + return $retVal; + } + + /** + * Get the stub threshold + */ + function getStubThreshold() { + global $wgUser; + if ( !isset( $this->stubThreshold ) ) { + $this->stubThreshold = $wgUser->getOption('stubthreshold'); + } + return $this->stubThreshold; + } + + /** + * Replace link placeholders with actual links, in the buffer + * Placeholders created in Skin::makeLinkObj() + * Returns an array of link CSS classes, indexed by PDBK. + */ + function replace( &$text ) { + wfProfileIn( __METHOD__ ); + + $colours = $this->replaceInternal( $text ); + $this->replaceInterwiki( $text ); + + wfProfileOut( __METHOD__ ); + return $colours; + } + + /** + * Replace internal links + */ + protected function replaceInternal( &$text ) { + if ( !$this->internals ) { + return; + } + + wfProfileIn( __METHOD__ ); + global $wgContLang; + + $colours = array(); + $sk = $this->parent->getOptions()->getSkin(); + $linkCache = LinkCache::singleton(); + $output = $this->parent->getOutput(); + + wfProfileIn( __METHOD__.'-check' ); + $dbr = wfGetDB( DB_SLAVE ); + $page = $dbr->tableName( 'page' ); + $threshold = $this->getStubThreshold(); + + # Sort by namespace + ksort( $this->internals ); + + # Generate query + $query = false; + $current = null; + foreach ( $this->internals as $ns => $entries ) { + foreach ( $entries as $index => $entry ) { + $key = "$ns:$index"; + $title = $entry['title']; + $pdbk = $entry['pdbk']; + + # Skip invalid entries. + # Result will be ugly, but prevents crash. + if ( is_null( $title ) ) { + continue; + } + + # Check if it's a static known link, e.g. interwiki + if ( $title->isAlwaysKnown() ) { + $colours[$pdbk] = ''; + } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { + $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); + $output->addLink( $title, $id ); + } elseif ( $linkCache->isBadLink( $pdbk ) ) { + $colours[$pdbk] = 'new'; + } else { + # Not in the link cache, add it to the query + if ( !isset( $current ) ) { + $current = $ns; + $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; + $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; + } elseif ( $current != $ns ) { + $current = $ns; + $query .= ")) OR (page_namespace=$ns AND page_title IN("; + } else { + $query .= ', '; + } + + $query .= $dbr->addQuotes( $title->getDBkey() ); + } + } + } + if ( $query ) { + $query .= '))'; + + $res = $dbr->query( $query, __METHOD__ ); + + # Fetch data and form into an associative array + # non-existent = broken + $linkcolour_ids = array(); + while ( $s = $dbr->fetchObject($res) ) { + $title = Title::makeTitle( $s->page_namespace, $s->page_title ); + $pdbk = $title->getPrefixedDBkey(); + $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect ); + $output->addLink( $title, $s->page_id ); + # FIXME: convoluted data flow + # The redirect status and length is passed to getLinkColour via the LinkCache + # Use formal parameters instead + $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); + //add id to the extension todolist + $linkcolour_ids[$s->page_id] = $pdbk; + } + unset( $res ); + //pass an array of page_ids to an extension + wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); + } + wfProfileOut( __METHOD__.'-check' ); + + # Do a second query for different language variants of links and categories + if($wgContLang->hasVariants()) { + $this->doVariants( $colours ); + } + + # Construct search and replace arrays + wfProfileIn( __METHOD__.'-construct' ); + $replacePairs = array(); + foreach ( $this->internals as $ns => $entries ) { + foreach ( $entries as $index => $entry ) { + $pdbk = $entry['pdbk']; + $title = $entry['title']; + $query = isset( $entry['query'] ) ? $entry['query'] : ''; + $key = "$ns:$index"; + $searchkey = ""; + if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) { + $linkCache->addBadLinkObj( $title ); + $colours[$pdbk] = 'new'; + $output->addLink( $title, 0 ); + $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title, + $entry['text'], + $query ); + } else { + $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk], + $entry['text'], + $query ); + } + } + } + $replacer = new HashtableReplacer( $replacePairs, 1 ); + wfProfileOut( __METHOD__.'-construct' ); + + # Do the thing + wfProfileIn( __METHOD__.'-replace' ); + $text = preg_replace_callback( + '/()/', + $replacer->cb(), + $text); + + wfProfileOut( __METHOD__.'-replace' ); + wfProfileOut( __METHOD__ ); + } + + /** + * Replace interwiki links + */ + protected function replaceInterwiki( &$text ) { + if ( empty( $this->interwikis ) ) { + return; + } + + wfProfileIn( __METHOD__ ); + # Make interwiki link HTML + $sk = $this->parent->getOptions()->getSkin(); + $replacePairs = array(); + foreach( $this->interwikis as $key => $link ) { + $replacePairs[$key] = $sk->link( $link['title'], $link['text'] ); + } + $replacer = new HashtableReplacer( $replacePairs, 1 ); + + $text = preg_replace_callback( + '//', + $replacer->cb(), + $text ); + wfProfileOut( __METHOD__ ); + } + + /** + * Modify $this->internals and $colours according to language variant linking rules + */ + protected function doVariants( &$colours ) { + global $wgContLang; + $linkBatch = new LinkBatch(); + $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders) + $output = $this->parent->getOutput(); + $linkCache = LinkCache::singleton(); + $sk = $this->parent->getOptions()->getSkin(); + $threshold = $this->getStubThreshold(); + + // Add variants of links to link batch + foreach ( $this->internals as $ns => $entries ) { + foreach ( $entries as $index => $entry ) { + $key = "$ns:$index"; + $pdbk = $entry['pdbk']; + $title = $entry['title']; + $titleText = $title->getText(); + + // generate all variants of the link title text + $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText); + + // if link was not found (in first query), add all variants to query + if ( !isset($colours[$pdbk]) ){ + foreach($allTextVariants as $textVariant){ + if($textVariant != $titleText){ + $variantTitle = Title::makeTitle( $ns, $textVariant ); + if(is_null($variantTitle)) continue; + $linkBatch->addObj( $variantTitle ); + $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; + } + } + } + } + } + + // process categories, check if a category exists in some variant + $categoryMap = array(); // maps $category_variant => $category (dbkeys) + $varCategories = array(); // category replacements oldDBkey => newDBkey + foreach( $output->getCategoryLinks() as $category ){ + $variants = $wgContLang->convertLinkToAllVariants($category); + foreach($variants as $variant){ + if($variant != $category){ + $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) ); + if(is_null($variantTitle)) continue; + $linkBatch->addObj( $variantTitle ); + $categoryMap[$variant] = $category; + } + } + } + + + if(!$linkBatch->isEmpty()){ + // construct query + $dbr = wfGetDB( DB_SLAVE ); + $page = $dbr->tableName( 'page' ); + $titleClause = $linkBatch->constructSet('page', $dbr); + $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; + $variantQuery .= " FROM $page WHERE $titleClause"; + $varRes = $dbr->query( $variantQuery, __METHOD__ ); + $linkcolour_ids = array(); + + // for each found variants, figure out link holders and replace + while ( $s = $dbr->fetchObject($varRes) ) { + + $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); + $varPdbk = $variantTitle->getPrefixedDBkey(); + $vardbk = $variantTitle->getDBkey(); + + $holderKeys = array(); + if(isset($variantMap[$varPdbk])){ + $holderKeys = $variantMap[$varPdbk]; + $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect ); + $output->addLink( $variantTitle, $s->page_id ); + } + + // loop over link holders + foreach($holderKeys as $key){ + list( $ns, $index ) = explode( ':', $key, 2 ); + $entry =& $this->internals[$ns][$index]; + $pdbk = $entry['pdbk']; + + if(!isset($colours[$pdbk])){ + // found link in some of the variants, replace the link holder data + $entry['title'] = $variantTitle; + $entry['pdbk'] = $varPdbk; + + // set pdbk and colour + # FIXME: convoluted data flow + # The redirect status and length is passed to getLinkColour via the LinkCache + # Use formal parameters instead + $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold ); + $linkcolour_ids[$s->page_id] = $pdbk; + } + } + + // check if the object is a variant of a category + if(isset($categoryMap[$vardbk])){ + $oldkey = $categoryMap[$vardbk]; + if($oldkey != $vardbk) + $varCategories[$oldkey]=$vardbk; + } + } + wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); + + // rebuild the categories in original order (if there are replacements) + if(count($varCategories)>0){ + $newCats = array(); + $originalCats = $output->getCategories(); + foreach($originalCats as $cat => $sortkey){ + // make the replacement + if( array_key_exists($cat,$varCategories) ) + $newCats[$varCategories[$cat]] = $sortkey; + else $newCats[$cat] = $sortkey; + } + $output->setCategoryLinks($newCats); + } + } + } + + /** + * Replace link placeholders with plain text of links + * (not HTML-formatted). + * @param string $text + * @return string + */ + function replaceText( $text ) { + wfProfileIn( __METHOD__ ); + + $text = preg_replace_callback( + '//', + array( &$this, 'replaceTextCallback' ), + $text ); + + wfProfileOut( __METHOD__ ); + return $text; + } + + /** + * @param array $matches + * @return string + * @private + */ + function replaceTextCallback( $matches ) { + $type = $matches[1]; + $key = $matches[2]; + if( $type == 'LINK' ) { + list( $ns, $index ) = explode( ':', $key, 2 ); + if( isset( $this->internals[$ns][$index]['text'] ) ) { + return $this->internals[$ns][$index]['text']; + } + } elseif( $type == 'IWLINK' ) { + if( isset( $this->interwikis[$key]['text'] ) ) { + return $this->interwikis[$key]['text']; + } + } + return $matches[0]; + } +} diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 3ff56a2b..7fcfb90a 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -92,17 +92,18 @@ class Parser # Persistent: var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex, $mPreprocessor, - $mExtLinkBracketedRegex, $mDefaultStripList, $mVarCache, $mConf; + $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList, $mVarCache, $mConf; # Cleared with clearState(): var $mOutput, $mAutonumber, $mDTopen, $mStripState; var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; - var $mInterwikiLinkHolders, $mLinkHolders; + var $mLinkHolders, $mLinkID; var $mIncludeSizes, $mPPNodeCount, $mDefaultSort; var $mTplExpandCache; // empty-frame expansion cache var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; var $mExpensiveFunctionCount; // number of expensive parser function calls + var $mFileCache; # Temporary # These are variables reset at least once per parse regardless of $clearState @@ -128,6 +129,7 @@ class Parser $this->mFunctionHooks = array(); $this->mFunctionSynonyms = array( 0 => array(), 1 => array() ); $this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery' ); + $this->mUrlProtocols = wfUrlProtocols(); $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S'; $this->mVarCache = array(); @@ -146,6 +148,18 @@ class Parser $this->mFirstCall = true; } + /** + * Reduce memory usage to reduce the impact of circular references + */ + function __destruct() { + if ( isset( $this->mLinkHolders ) ) { + $this->mLinkHolders->__destruct(); + } + foreach ( $this as $name => $value ) { + unset( $this->$name ); + } + } + /** * Do various kinds of initialisation on the first call of the parser */ @@ -183,17 +197,8 @@ class Parser $this->mStripState = new StripState; $this->mArgStack = false; $this->mInPre = false; - $this->mInterwikiLinkHolders = array( - 'texts' => array(), - 'titles' => array() - ); - $this->mLinkHolders = array( - 'namespaces' => array(), - 'dbkeys' => array(), - 'queries' => array(), - 'texts' => array(), - 'titles' => array() - ); + $this->mLinkHolders = new LinkHolderArray( $this ); + $this->mLinkID = 0; $this->mRevisionTimestamp = $this->mRevisionId = null; /** @@ -208,7 +213,7 @@ class Parser */ #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); # Changed to \x7f to allow XML double-parsing -- TS - $this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString(); + $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); # Clear these on every parse, bug 4549 @@ -225,6 +230,7 @@ class Parser $this->mHeadings = array(); $this->mDoubleUnderscores = array(); $this->mExpensiveFunctionCount = 0; + $this->mFileCache = array(); # Fix cloning if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) { @@ -283,22 +289,22 @@ class Parser * Convert wikitext to HTML * Do not call this function recursively. * - * @param string $text Text we want to parse - * @param Title &$title A title object - * @param array $options - * @param boolean $linestart - * @param boolean $clearState - * @param int $revid number to pass in {{REVISIONID}} + * @param $text String: text we want to parse + * @param $title A title object + * @param $options ParserOptions + * @param $linestart boolean + * @param $clearState boolean + * @param $revid Int: number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ - public function parse( $text, &$title, $options, $linestart = true, $clearState = true, $revid = null ) { + public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) { /** * First pass--just handle sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang; - $fname = 'Parser::parse-' . wfGetCaller(); + $fname = __METHOD__.'-' . wfGetCaller(); wfProfileIn( __METHOD__ ); wfProfileIn( $fname ); @@ -332,7 +338,6 @@ class Parser ); $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); - # only once and last $text = $this->doBlockLevels( $text, $linestart ); $this->replaceLinkHolders( $text ); @@ -352,7 +357,7 @@ class Parser $uniq_prefix = $this->mUniqPrefix; $matches = array(); $elements = array_keys( $this->mTransparentTagHooks ); - $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); + $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); foreach( $matches as $marker => $data ) { list( $element, $content, $params, $tag ) = $data; @@ -370,7 +375,7 @@ class Parser $text = Sanitizer::normalizeCharReferences( $text ); if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { - $text = Parser::tidy($text); + $text = self::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) @@ -475,6 +480,8 @@ class Parser function &getTitle() { return $this->mTitle; } function getOptions() { return $this->mOptions; } function getRevisionId() { return $this->mRevisionId; } + function getOutput() { return $this->mOutput; } + function nextLinkID() { return $this->mLinkID++; } function getFunctionLang() { global $wgLang, $wgContLang; @@ -553,7 +560,7 @@ class Parser $text = $inside; $tail = null; } else { - if( $element == '!--' ) { + if( $element === '!--' ) { $end = '/(-->)/'; } else { $end = "/(<\\/$element\\s*>)/i"; @@ -658,18 +665,27 @@ class Parser */ function tidy( $text ) { global $wgTidyInternal; + $wrappedtext = ''. 'test'.$text.''; + + # Tidy is known to clobber tabs; convert 'em to entities + $wrappedtext = str_replace("\t", ' ', $wrappedtext); + if( $wgTidyInternal ) { - $correctedtext = Parser::internalTidy( $wrappedtext ); + $correctedtext = self::internalTidy( $wrappedtext ); } else { - $correctedtext = Parser::externalTidy( $wrappedtext ); + $correctedtext = self::externalTidy( $wrappedtext ); } if( is_null( $correctedtext ) ) { wfDebug( "Tidy error detected!\n" ); return $text . "\n\n"; } + + # Convert the tabs back from entities + $correctedtext = str_replace(' ', "\t", $correctedtext); + return $correctedtext; } @@ -681,8 +697,7 @@ class Parser */ function externalTidy( $text ) { global $wgTidyConf, $wgTidyBin, $wgTidyOpts; - $fname = 'Parser::externalTidy'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $cleansource = ''; $opts = ' -utf8'; @@ -693,23 +708,25 @@ class Parser 2 => array('file', wfGetNull(), 'a') ); $pipes = array(); - $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); - if (is_resource($process)) { - // Theoretically, this style of communication could cause a deadlock - // here. If the stdout buffer fills up, then writes to stdin could - // block. This doesn't appear to happen with tidy, because tidy only - // writes to stdout after it's finished reading from stdin. Search - // for tidyParseStdin and tidySaveStdout in console/tidy.c - fwrite($pipes[0], $text); - fclose($pipes[0]); - while (!feof($pipes[1])) { - $cleansource .= fgets($pipes[1], 1024); + if( function_exists('proc_open') ) { + $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); + if (is_resource($process)) { + // Theoretically, this style of communication could cause a deadlock + // here. If the stdout buffer fills up, then writes to stdin could + // block. This doesn't appear to happen with tidy, because tidy only + // writes to stdout after it's finished reading from stdin. Search + // for tidyParseStdin and tidySaveStdout in console/tidy.c + fwrite($pipes[0], $text); + fclose($pipes[0]); + while (!feof($pipes[1])) { + $cleansource .= fgets($pipes[1], 1024); + } + fclose($pipes[1]); + proc_close($process); } - fclose($pipes[1]); - proc_close($process); } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); if( $cleansource == '' && $text != '') { // Some kind of error happened, so we couldn't get the corrected text. @@ -731,8 +748,7 @@ class Parser */ function internalTidy( $text ) { global $wgTidyConf, $IP, $wgDebugTidy; - $fname = 'Parser::internalTidy'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $tidy = new tidy; $tidy->parseString( $text, $wgTidyConf, 'utf8' ); @@ -750,7 +766,7 @@ class Parser "\n-->"; } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $cleansource; } @@ -760,34 +776,35 @@ class Parser * @private */ function doTableStuff ( $text ) { - $fname = 'Parser::doTableStuff'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); - $lines = explode ( "\n" , $text ); + $lines = StringUtils::explode( "\n", $text ); + $out = ''; $td_history = array (); // Is currently a td tag open? $last_tag_history = array (); // Save history of last lag activated (td, th or caption) $tr_history = array (); // Is currently a tr tag open? $tr_attributes = array (); // history of tr attributes $has_opened_tr = array(); // Did this table open a element? $indent_level = 0; // indent level of the table - foreach ( $lines as $key => $line ) - { - $line = trim ( $line ); + + foreach ( $lines as $outLine ) { + $line = trim( $outLine ); if( $line == '' ) { // empty line, go to next line + $out .= $outLine."\n"; continue; } - $first_character = $line{0}; + $first_character = $line[0]; $matches = array(); - if ( preg_match( '/^(:*)\{\|(.*)$/' , $line , $matches ) ) { + if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) { // First check if we are starting a new table $indent_level = strlen( $matches[1] ); $attributes = $this->mStripState->unstripBoth( $matches[2] ); $attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' ); - $lines[$key] = str_repeat( '
' , $indent_level ) . ""; + $outLine = str_repeat( '
' , $indent_level ) . ""; array_push ( $td_history , false ); array_push ( $last_tag_history , '' ); array_push ( $tr_history , false ); @@ -795,8 +812,9 @@ class Parser array_push ( $has_opened_tr , false ); } else if ( count ( $td_history ) == 0 ) { // Don't do any of the following + $out .= $outLine."\n"; continue; - } else if ( substr ( $line , 0 , 2 ) == '|}' ) { + } else if ( substr ( $line , 0 , 2 ) === '|}' ) { // We are ending a table $line = '' . substr ( $line , 2 ); $last_tag = array_pop ( $last_tag_history ); @@ -813,8 +831,8 @@ class Parser $line = "{$line}"; } array_pop ( $tr_attributes ); - $lines[$key] = $line . str_repeat( '
' , $indent_level ); - } else if ( substr ( $line , 0 , 2 ) == '|-' ) { + $outLine = $line . str_repeat( '
' , $indent_level ); + } else if ( substr ( $line , 0 , 2 ) === '|-' ) { // Now we have a table row $line = preg_replace( '#^\|-+#', '', $line ); @@ -837,21 +855,21 @@ class Parser $line = "{$line}"; } - $lines[$key] = $line; + $outLine = $line; array_push ( $tr_history , false ); array_push ( $td_history , false ); array_push ( $last_tag_history , '' ); } - else if ( $first_character == '|' || $first_character == '!' || substr ( $line , 0 , 2 ) == '|+' ) { + else if ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 ) === '|+' ) { // This might be cell elements, td, th or captions - if ( substr ( $line , 0 , 2 ) == '|+' ) { + if ( substr ( $line , 0 , 2 ) === '|+' ) { $first_character = '+'; $line = substr ( $line , 1 ); } $line = substr ( $line , 1 ); - if ( $first_character == '!' ) { + if ( $first_character === '!' ) { $line = str_replace ( '!!' , '||' , $line ); } @@ -861,13 +879,13 @@ class Parser // attribute values containing literal "||". $cells = StringUtils::explodeMarkup( '||' , $line ); - $lines[$key] = ''; + $outLine = ''; // Loop through each table cell foreach ( $cells as $cell ) { $previous = ''; - if ( $first_character != '+' ) + if ( $first_character !== '+' ) { $tr_after = array_pop ( $tr_attributes ); if ( !array_pop ( $tr_history ) ) { @@ -885,11 +903,11 @@ class Parser $previous = "{$previous}"; } - if ( $first_character == '|' ) { + if ( $first_character === '|' ) { $last_tag = 'td'; - } else if ( $first_character == '!' ) { + } else if ( $first_character === '!' ) { $last_tag = 'th'; - } else if ( $first_character == '+' ) { + } else if ( $first_character === '+' ) { $last_tag = 'caption'; } else { $last_tag = ''; @@ -912,38 +930,42 @@ class Parser $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; } - $lines[$key] .= $cell; + $outLine .= $cell; array_push ( $td_history , true ); } } + $out .= $outLine . "\n"; } // Closing open td, tr && table while ( count ( $td_history ) > 0 ) { if ( array_pop ( $td_history ) ) { - $lines[] = '' ; + $out .= "\n"; } if ( array_pop ( $tr_history ) ) { - $lines[] = '' ; + $out .= "\n"; } if ( !array_pop ( $has_opened_tr ) ) { - $lines[] = "" ; + $out .= "\n" ; } - $lines[] = '' ; + $out .= "\n"; } - $output = implode ( "\n" , $lines ) ; + // Remove trailing line-ending (b/c) + if ( substr( $out, -1 ) === "\n" ) { + $out = substr( $out, 0, -1 ); + } // special case: don't return empty table - if( $output == "\n\n
" ) { - $output = ''; + if( $out === "\n\n
" ) { + $out = ''; } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); - return $output; + return $out; } /** @@ -954,12 +976,11 @@ class Parser */ function internalParse( $text ) { $isMain = true; - $fname = 'Parser::internalParse'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); # Hook to suspend the parser in this state if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $text ; } @@ -992,84 +1013,147 @@ class Parser $text = $this->doMagicLinks( $text ); $text = $this->formatHeadings( $text, $isMain ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $text; } /** * Replace special strings like "ISBN xxx" and "RFC xxx" with * magic external links. - * + * + * DML * @private */ function doMagicLinks( $text ) { wfProfileIn( __METHOD__ ); + $prots = $this->mUrlProtocols; + $urlChar = self::EXT_LINK_URL_CLASS; $text = preg_replace_callback( '!(?: # Start cases - | # Skip link text - <.*?> | # Skip stuff inside HTML elements - (?:RFC|PMID)\s+([0-9]+) | # RFC or PMID, capture number as m[1] - ISBN\s+(\b # ISBN, capture number as m[2] - (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix - (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters - [0-9Xx] # check digit - \b) + () | # m[1]: Skip link text + (<.*?>) | # m[2]: Skip stuff inside HTML elements' . " + (\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . ' + (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number + ISBN\s+(\b # m[5]: ISBN, capture number + (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix + (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters + [0-9Xx] # check digit + \b) )!x', array( &$this, 'magicLinkCallback' ), $text ); wfProfileOut( __METHOD__ ); return $text; } function magicLinkCallback( $m ) { - if ( substr( $m[0], 0, 1 ) == '<' ) { + if ( isset( $m[1] ) && strval( $m[1] ) !== '' ) { + # Skip anchor + return $m[0]; + } elseif ( isset( $m[2] ) && strval( $m[2] ) !== '' ) { # Skip HTML element return $m[0]; - } elseif ( substr( $m[0], 0, 4 ) == 'ISBN' ) { - $isbn = $m[2]; - $num = strtr( $isbn, array( - '-' => '', - ' ' => '', - 'x' => 'X', - )); - $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); - $text = 'ISBN $isbn"; - } else { - if ( substr( $m[0], 0, 3 ) == 'RFC' ) { + } elseif ( isset( $m[3] ) && strval( $m[3] ) !== '' ) { + # Free external link + return $this->makeFreeExternalLink( $m[0] ); + } elseif ( isset( $m[4] ) && strval( $m[4] ) !== '' ) { + # RFC or PMID + if ( substr( $m[0], 0, 3 ) === 'RFC' ) { $keyword = 'RFC'; $urlmsg = 'rfcurl'; - $id = $m[1]; - } elseif ( substr( $m[0], 0, 4 ) == 'PMID' ) { + $id = $m[4]; + } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) { $keyword = 'PMID'; $urlmsg = 'pubmedurl'; - $id = $m[1]; + $id = $m[4]; } else { throw new MWException( __METHOD__.': unrecognised match type "' . substr($m[0], 0, 20 ) . '"' ); } - $url = wfMsg( $urlmsg, $id); $sk = $this->mOptions->getSkin(); $la = $sk->getExternalLinkAttributes( $url, $keyword.$id ); - $text = "{$keyword} {$id}"; + return "{$keyword} {$id}"; + } elseif ( isset( $m[5] ) && strval( $m[5] ) !== '' ) { + # ISBN + $isbn = $m[5]; + $num = strtr( $isbn, array( + '-' => '', + ' ' => '', + 'x' => 'X', + )); + $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); + return'ISBN $isbn"; + } else { + return $m[0]; } - return $text; } + /** + * Make a free external link, given a user-supplied URL + * @return HTML + * @private + */ + function makeFreeExternalLink( $url ) { + global $wgContLang; + wfProfileIn( __METHOD__ ); + + $sk = $this->mOptions->getSkin(); + $trail = ''; + + # The characters '<' and '>' (which were escaped by + # removeHTMLtags()) should not be included in + # URLs, per RFC 2396. + $m2 = array(); + if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) { + $trail = substr($url, $m2[0][1]) . $trail; + $url = substr($url, 0, $m2[0][1]); + } + + # Move trailing punctuation to $trail + $sep = ',;\.:!?'; + # If there is no left bracket, then consider right brackets fair game too + if ( strpos( $url, '(' ) === false ) { + $sep .= ')'; + } + + $numSepChars = strspn( strrev( $url ), $sep ); + if ( $numSepChars ) { + $trail = substr( $url, -$numSepChars ) . $trail; + $url = substr( $url, 0, -$numSepChars ); + } + + $url = Sanitizer::cleanUrl( $url ); + + # Is this an external image? + $text = $this->maybeMakeExternalImage( $url ); + if ( $text === false ) { + # Not an image, make a link + $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', + $this->getExternalLinkAttribs() ); + # Register it in the output object... + # Replace unnecessary URL escape codes with their equivalent characters + $pasteurized = self::replaceUnusualEscapes( $url ); + $this->mOutput->addExternalLink( $pasteurized ); + } + wfProfileOut( __METHOD__ ); + return $text . $trail; + } + + /** * Parse headers and return html * * @private */ function doHeadings( $text ) { - $fname = 'Parser::doHeadings'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); for ( $i = 6; $i >= 1; --$i ) { $h = str_repeat( '=', $i ); $text = preg_replace( "/^$h(.+)$h\\s*$/m", "\\1", $text ); } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $text; } @@ -1079,15 +1163,14 @@ class Parser * @return string the altered text */ function doAllQuotes( $text ) { - $fname = 'Parser::doAllQuotes'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $outtext = ''; - $lines = explode( "\n", $text ); + $lines = StringUtils::explode( "\n", $text ); foreach ( $lines as $line ) { - $outtext .= $this->doQuotes ( $line ) . "\n"; + $outtext .= $this->doQuotes( $line ) . "\n"; } $outtext = substr($outtext, 0,-1); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $outtext; } @@ -1149,9 +1232,9 @@ class Parser { $x1 = substr ($arr[$i-1], -1); $x2 = substr ($arr[$i-1], -2, 1); - if ($x1 == ' ') { + if ($x1 === ' ') { if ($firstspace == -1) $firstspace = $i; - } else if ($x2 == ' ') { + } else if ($x2 === ' ') { if ($firstsingleletterword == -1) $firstsingleletterword = $i; } else { if ($firstmultiletterword == -1) $firstmultiletterword = $i; @@ -1191,7 +1274,7 @@ class Parser { if (($i % 2) == 0) { - if ($state == 'both') + if ($state === 'both') $buffer .= $r; else $output .= $r; @@ -1200,41 +1283,41 @@ class Parser { if (strlen ($r) == 2) { - if ($state == 'i') + if ($state === 'i') { $output .= ''; $state = ''; } - else if ($state == 'bi') + else if ($state === 'bi') { $output .= ''; $state = 'b'; } - else if ($state == 'ib') + else if ($state === 'ib') { $output .= ''; $state = 'b'; } - else if ($state == 'both') + else if ($state === 'both') { $output .= ''.$buffer.''; $state = 'b'; } else # $state can be 'b' or '' { $output .= ''; $state .= 'i'; } } else if (strlen ($r) == 3) { - if ($state == 'b') + if ($state === 'b') { $output .= ''; $state = ''; } - else if ($state == 'bi') + else if ($state === 'bi') { $output .= ''; $state = 'i'; } - else if ($state == 'ib') + else if ($state === 'ib') { $output .= ''; $state = 'i'; } - else if ($state == 'both') + else if ($state === 'both') { $output .= ''.$buffer.''; $state = 'i'; } else # $state can be 'i' or '' { $output .= ''; $state .= 'b'; } } else if (strlen ($r) == 5) { - if ($state == 'b') + if ($state === 'b') { $output .= ''; $state = 'i'; } - else if ($state == 'i') + else if ($state === 'i') { $output .= ''; $state = 'b'; } - else if ($state == 'bi') + else if ($state === 'bi') { $output .= ''; $state = ''; } - else if ($state == 'ib') + else if ($state === 'ib') { $output .= ''; $state = ''; } - else if ($state == 'both') + else if ($state === 'both') { $output .= ''.$buffer.''; $state = ''; } else # ($state == '') { $buffer = ''; $state = 'both'; } @@ -1243,21 +1326,21 @@ class Parser $i++; } # Now close all remaining tags. Notice that the order is important. - if ($state == 'b' || $state == 'ib') + if ($state === 'b' || $state === 'ib') $output .= ''; - if ($state == 'i' || $state == 'bi' || $state == 'ib') + if ($state === 'i' || $state === 'bi' || $state === 'ib') $output .= ''; - if ($state == 'bi') + if ($state === 'bi') $output .= ''; # There might be lonely ''''', so make sure we have a buffer - if ($state == 'both' && $buffer) + if ($state === 'both' && $buffer) $output .= ''.$buffer.''; return $output; } } /** - * Replace external links + * Replace external links (REL) * * Note: this is all very hackish and the order of execution matters a lot. * Make sure to run maintenance/parserTests.php if you change this code. @@ -1266,14 +1349,12 @@ class Parser */ function replaceExternalLinks( $text ) { global $wgContLang; - $fname = 'Parser::replaceExternalLinks'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $sk = $this->mOptions->getSkin(); $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); - - $s = $this->replaceFreeExternalLinks( array_shift( $bits ) ); + $s = array_shift( $bits ); $i = 0; while ( $imAutonumber . ']'; + $langObj = $this->getFunctionLang(); + $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']'; $linktype = 'autonumber'; } else { # Otherwise just use the URL @@ -1324,108 +1406,44 @@ class Parser $url = Sanitizer::cleanUrl( $url ); - # Process the trail (i.e. everything after this link up until start of the next link), - # replacing any non-bracketed links - $trail = $this->replaceFreeExternalLinks( $trail ); + if ( $this->mOptions->mExternalLinkTarget ) { + $attribs = array( 'target' => $this->mOptions->mExternalLinkTarget ); + } else { + $attribs = array(); + } # Use the encoded URL # This means that users can paste URLs directly into the text # Funny characters like ö aren't valid in URLs anyway # This was changed in August 2004 - $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->mTitle->getNamespace() ) . $dtrail . $trail; + $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->getExternalLinkAttribs() ) + . $dtrail . $trail; # Register link in the output object. # Replace unnecessary URL escape codes with the referenced character # This prevents spammers from hiding links from the filters - $pasteurized = Parser::replaceUnusualEscapes( $url ); + $pasteurized = self::replaceUnusualEscapes( $url ); $this->mOutput->addExternalLink( $pasteurized ); } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $s; } - /** - * Replace anything that looks like a URL with a link - * @private - */ - function replaceFreeExternalLinks( $text ) { - global $wgContLang; - $fname = 'Parser::replaceFreeExternalLinks'; - wfProfileIn( $fname ); - - $bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); - $s = array_shift( $bits ); - $i = 0; - - $sk = $this->mOptions->getSkin(); - - while ( $i < count( $bits ) ){ - $protocol = $bits[$i++]; - $remainder = $bits[$i++]; - - $m = array(); - if ( preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) { - # Found some characters after the protocol that look promising - $url = $protocol . $m[1]; - $trail = $m[2]; - - # special case: handle urls as url args: - # http://www.example.com/foo?=http://www.example.com/bar - if(strlen($trail) == 0 && - isset($bits[$i]) && - preg_match('/^'. wfUrlProtocols() . '$/S', $bits[$i]) && - preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m )) - { - # add protocol, arg - $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link - $i += 2; - $trail = $m[2]; - } - - # The characters '<' and '>' (which were escaped by - # removeHTMLtags()) should not be included in - # URLs, per RFC 2396. - $m2 = array(); - if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) { - $trail = substr($url, $m2[0][1]) . $trail; - $url = substr($url, 0, $m2[0][1]); - } - - # Move trailing punctuation to $trail - $sep = ',;\.:!?'; - # If there is no left bracket, then consider right brackets fair game too - if ( strpos( $url, '(' ) === false ) { - $sep .= ')'; - } - - $numSepChars = strspn( strrev( $url ), $sep ); - if ( $numSepChars ) { - $trail = substr( $url, -$numSepChars ) . $trail; - $url = substr( $url, 0, -$numSepChars ); - } - - $url = Sanitizer::cleanUrl( $url ); - - # Is this an external image? - $text = $this->maybeMakeExternalImage( $url ); - if ( $text === false ) { - # Not an image, make a link - $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() ); - # Register it in the output object... - # Replace unnecessary URL escape codes with their equivalent characters - $pasteurized = Parser::replaceUnusualEscapes( $url ); - $this->mOutput->addExternalLink( $pasteurized ); - } - $s .= $text . $trail; - } else { - $s .= $protocol . $remainder; - } + function getExternalLinkAttribs() { + $attribs = array(); + global $wgNoFollowLinks, $wgNoFollowNsExceptions; + $ns = $this->mTitle->getNamespace(); + if( $wgNoFollowLinks && !in_array($ns, $wgNoFollowNsExceptions) ) { + $attribs['rel'] = 'nofollow'; } - wfProfileOut( $fname ); - return $s; + if ( $this->mOptions->getExternalLinkTarget() ) { + $attribs['target'] = $this->mOptions->getExternalLinkTarget(); + } + return $attribs; } + /** * Replace unusual URL escape codes with their equivalent characters * @param string @@ -1438,7 +1456,7 @@ class Parser */ static function replaceUnusualEscapes( $url ) { return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', - array( 'Parser', 'replaceUnusualEscapesCallback' ), $url ); + array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url ); } /** @@ -1462,7 +1480,7 @@ class Parser /** * make an image if it's allowed, either through the global - * option or through the exception + * option, through the exception, or through the on-wiki whitelist * @private */ function maybeMakeExternalImage( $url ) { @@ -1470,47 +1488,88 @@ class Parser $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); $imagesexception = !empty($imagesfrom); $text = false; + # $imagesfrom could be either a single string or an array of strings, parse out the latter + if( $imagesexception && is_array( $imagesfrom ) ) { + $imagematch = false; + foreach( $imagesfrom as $match ) { + if( strpos( $url, $match ) === 0 ) { + $imagematch = true; + break; + } + } + } elseif( $imagesexception ) { + $imagematch = (strpos( $url, $imagesfrom ) === 0); + } else { + $imagematch = false; + } if ( $this->mOptions->getAllowExternalImages() - || ( $imagesexception && strpos( $url, $imagesfrom ) === 0 ) ) { + || ( $imagesexception && $imagematch ) ) { if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) { # Image found $text = $sk->makeExternalImage( $url ); } } + if( !$text && $this->mOptions->getEnableImageWhitelist() + && preg_match( self::EXT_IMAGE_REGEX, $url ) ) { + $whitelist = explode( "\n", wfMsgForContent( 'external_image_whitelist' ) ); + foreach( $whitelist as $entry ) { + # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments + if( strpos( $entry, '#' ) === 0 || $entry === '' ) + continue; + if( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) { + # Image matches a whitelist entry + $text = $sk->makeExternalImage( $url ); + break; + } + } + } return $text; } /** * Process [[ ]] wikilinks + * @return processed text * * @private */ function replaceInternalLinks( $s ) { + $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) ); + return $s; + } + + /** + * Process [[ ]] wikilinks (RIL) + * @return LinkHolderArray + * + * @private + */ + function replaceInternalLinks2( &$s ) { global $wgContLang; - static $fname = 'Parser::replaceInternalLinks' ; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); - wfProfileIn( $fname.'-setup' ); - static $tc = FALSE; + wfProfileIn( __METHOD__.'-setup' ); + static $tc = FALSE, $e1, $e1_img; # the % is needed to support urlencoded titles as well - if ( !$tc ) { $tc = Title::legalChars() . '#%'; } + if ( !$tc ) { + $tc = Title::legalChars() . '#%'; + # Match a link having the form [[namespace:link|alternate]]trail + $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; + # Match cases where there is no "]]", which might still be images + $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; + } $sk = $this->mOptions->getSkin(); + $holders = new LinkHolderArray( $this ); #split the entire text string on occurences of [[ - $a = explode( '[[', ' ' . $s ); + $a = StringUtils::explode( '[[', ' ' . $s ); #get the first element (all text up to first [[), and remove the space we added - $s = array_shift( $a ); + $s = $a->current(); + $a->next(); + $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void" $s = substr( $s, 1 ); - # Match a link having the form [[namespace:link|alternate]]trail - static $e1 = FALSE; - if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; } - # Match cases where there is no "]]", which might still be images - static $e1_img = FALSE; - if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; } - $useLinkPrefixExtension = $wgContLang->linkPrefixExtension(); $e2 = null; if ( $useLinkPrefixExtension ) { @@ -1520,8 +1579,8 @@ class Parser } if( is_null( $this->mTitle ) ) { - wfProfileOut( $fname ); - wfProfileOut( $fname.'-setup' ); + wfProfileOut( __METHOD__.'-setup' ); + wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); } $nottalk = !$this->mTitle->isTalkPage(); @@ -1543,13 +1602,20 @@ class Parser $selflink = array($this->mTitle->getPrefixedText()); } $useSubpages = $this->areSubpagesAllowed(); - wfProfileOut( $fname.'-setup' ); + wfProfileOut( __METHOD__.'-setup' ); # Loop for each link - for ($k = 0; isset( $a[$k] ); $k++) { - $line = $a[$k]; + for ( ; $line !== false && $line !== null ; $a->next(), $line = $a->current() ) { + # Check for excessive memory usage + if ( $holders->isBig() ) { + # Too big + # Do the existence check, replace the link holders and clear the array + $holders->replace( $s ); + $holders->clear(); + } + if ( $useLinkPrefixExtension ) { - wfProfileIn( $fname.'-prefixhandling' ); + wfProfileIn( __METHOD__.'-prefixhandling' ); if ( preg_match( $e2, $s, $m ) ) { $prefix = $m[2]; $s = $m[1]; @@ -1561,12 +1627,12 @@ class Parser $prefix = $first_prefix; $first_prefix = false; } - wfProfileOut( $fname.'-prefixhandling' ); + wfProfileOut( __METHOD__.'-prefixhandling' ); } $might_be_img = false; - wfProfileIn( "$fname-e1" ); + wfProfileIn( __METHOD__."-e1" ); if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; # If we get a ] at the beginning of $m[3] that means we have a link that's something like: @@ -1600,18 +1666,18 @@ class Parser $trail = ""; } else { # Invalid form; output directly $s .= $prefix . '[[' . $line ; - wfProfileOut( "$fname-e1" ); + wfProfileOut( __METHOD__."-e1" ); continue; } - wfProfileOut( "$fname-e1" ); - wfProfileIn( "$fname-misc" ); + wfProfileOut( __METHOD__."-e1" ); + wfProfileIn( __METHOD__."-misc" ); # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) { $s .= $prefix . '[[' . $line ; - wfProfileOut( "$fname-misc" ); + wfProfileOut( __METHOD__."-misc" ); continue; } @@ -1622,33 +1688,36 @@ class Parser $link = $m[1]; } - $noforce = (substr($m[1], 0, 1) != ':'); + $noforce = (substr($m[1], 0, 1) !== ':'); if (!$noforce) { # Strip off leading ':' $link = substr($link, 1); } - wfProfileOut( "$fname-misc" ); - wfProfileIn( "$fname-title" ); + wfProfileOut( __METHOD__."-misc" ); + wfProfileIn( __METHOD__."-title" ); $nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) ); if( !$nt ) { $s .= $prefix . '[[' . $line; - wfProfileOut( "$fname-title" ); + wfProfileOut( __METHOD__."-title" ); continue; } $ns = $nt->getNamespace(); $iw = $nt->getInterWiki(); - wfProfileOut( "$fname-title" ); + wfProfileOut( __METHOD__."-title" ); if ($might_be_img) { # if this is actually an invalid link - wfProfileIn( "$fname-might_be_img" ); - if ($ns == NS_IMAGE && $noforce) { #but might be an image + wfProfileIn( __METHOD__."-might_be_img" ); + if ($ns == NS_FILE && $noforce) { #but might be an image $found = false; - while (isset ($a[$k+1]) ) { + while ( true ) { #look at the next 'line' to see if we can close it there - $spliced = array_splice( $a, $k + 1, 1 ); - $next_line = array_shift( $spliced ); + $a->next(); + $next_line = $a->current(); + if ( $next_line === false || $next_line === null ) { + break; + } $m = explode( ']]', $next_line, 3 ); if ( count( $m ) == 3 ) { # the first ]] closes the inner link, the second the image @@ -1668,19 +1737,19 @@ class Parser if ( !$found ) { # we couldn't find the end of this imageLink, so output it raw #but don't ignore what might be perfectly normal links in the text we've examined - $text = $this->replaceInternalLinks($text); + $holders->merge( $this->replaceInternalLinks2( $text ) ); $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( "$fname-might_be_img" ); + wfProfileOut( __METHOD__."-might_be_img" ); continue; } } else { #it's not an image, so output it raw $s .= "{$prefix}[[$link|$text"; # note: no $trail, because without an end, there *is* no trail - wfProfileOut( "$fname-might_be_img" ); + wfProfileOut( __METHOD__."-might_be_img" ); continue; } - wfProfileOut( "$fname-might_be_img" ); + wfProfileOut( __METHOD__."-might_be_img" ); } $wasblank = ( '' == $text ); @@ -1690,41 +1759,36 @@ class Parser if( $noforce ) { # Interwikis - wfProfileIn( "$fname-interwiki" ); + wfProfileIn( __METHOD__."-interwiki" ); if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) { $this->mOutput->addLanguageLink( $nt->getFullText() ); $s = rtrim($s . $prefix); $s .= trim($trail, "\n") == '' ? '': $prefix . $trail; - wfProfileOut( "$fname-interwiki" ); + wfProfileOut( __METHOD__."-interwiki" ); continue; } - wfProfileOut( "$fname-interwiki" ); + wfProfileOut( __METHOD__."-interwiki" ); - if ( $ns == NS_IMAGE ) { - wfProfileIn( "$fname-image" ); + if ( $ns == NS_FILE ) { + wfProfileIn( __METHOD__."-image" ); if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { # recursively parse links inside the image caption # actually, this will parse them in any other parameters, too, # but it might be hard to fix that, and it doesn't matter ATM $text = $this->replaceExternalLinks($text); - $text = $this->replaceInternalLinks($text); + $holders->merge( $this->replaceInternalLinks2( $text ) ); # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them - $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail; - $this->mOutput->addImage( $nt->getDBkey() ); - - wfProfileOut( "$fname-image" ); - continue; - } else { - # We still need to record the image's presence on the page - $this->mOutput->addImage( $nt->getDBkey() ); + $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail; } - wfProfileOut( "$fname-image" ); + $this->mOutput->addImage( $nt->getDBkey() ); + wfProfileOut( __METHOD__."-image" ); + continue; } if ( $ns == NS_CATEGORY ) { - wfProfileIn( "$fname-category" ); + wfProfileIn( __METHOD__."-category" ); $s = rtrim($s . "\n"); # bug 87 if ( $wasblank ) { @@ -1743,26 +1807,27 @@ class Parser */ $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail; - wfProfileOut( "$fname-category" ); + wfProfileOut( __METHOD__."-category" ); continue; } } # Self-link checking - if( $nt->getFragment() === '' ) { + if( $nt->getFragment() === '' && $ns != NS_SPECIAL ) { if( in_array( $nt->getPrefixedText(), $selflink, true ) ) { $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail ); continue; } } - # Special and Media are pseudo-namespaces; no pages actually exist in them + # NS_MEDIA is a pseudo-namespace for linking directly to a file + # FIXME: Should do batch file existence checks, see comment below if( $ns == NS_MEDIA ) { # Give extensions a chance to select the file revision for us $skip = $time = false; wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$nt, &$skip, &$time ) ); if ( $skip ) { - $link = $sk->makeLinkObj( $nt ); + $link = $sk->link( $nt ); } else { $link = $sk->makeMediaLinkObj( $nt, $text, $time ); } @@ -1770,28 +1835,23 @@ class Parser $s .= $prefix . $this->armorLinks( $link ) . $trail; $this->mOutput->addImage( $nt->getDBkey() ); continue; - } elseif( $ns == NS_SPECIAL ) { - if( SpecialPage::exists( $nt->getDBkey() ) ) { - $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); - } else { - $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); - } - continue; - } elseif( $ns == NS_IMAGE ) { - $img = wfFindFile( $nt ); - if( $img ) { - // Force a blue link if the file exists; may be a remote - // upload on the shared repository, and we want to see its - // auto-generated page. - $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); - $this->mOutput->addLink( $nt ); - continue; - } } - $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); + + # Some titles, such as valid special pages or files in foreign repos, should + # be shown as bluelinks even though they're not included in the page table + # + # FIXME: isAlwaysKnown() can be expensive for file links; we should really do + # batch file existence checks for NS_FILE and NS_MEDIA + if( $iw == '' && $nt->isAlwaysKnown() ) { + $this->mOutput->addLink( $nt ); + $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); + } else { + # Links will be added to the output link list after checking + $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix ); + } } - wfProfileOut( $fname ); - return $s; + wfProfileOut( __METHOD__ ); + return $holders; } /** @@ -1800,32 +1860,10 @@ class Parser * parsing of interwiki links, and secondly to allow all existence checks and * article length checks (for stub links) to be bundled into a single query. * + * @deprecated */ function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) { - wfProfileIn( __METHOD__ ); - if ( ! is_object($nt) ) { - # Fail gracefully - $retVal = "{$prefix}{$text}{$trail}"; - } else { - # Separate the link trail from the rest of the link - list( $inside, $trail ) = Linker::splitTrail( $trail ); - - if ( $nt->isExternal() ) { - $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside ); - $this->mInterwikiLinkHolders['titles'][] = $nt; - $retVal = '{$trail}"; - } else { - $nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() ); - $this->mLinkHolders['dbkeys'][] = $nt->getDBkey(); - $this->mLinkHolders['queries'][] = $query; - $this->mLinkHolders['texts'][] = $prefix.$text.$inside; - $this->mLinkHolders['titles'][] = $nt; - - $retVal = '{$trail}"; - } - } - wfProfileOut( __METHOD__ ); - return $retVal; + return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix ); } /** @@ -1853,10 +1891,8 @@ class Parser * Insert a NOPARSE hacky thing into any inline links in a chunk that's * going to go through further parsing steps before inline URL expansion. * - * In particular this is important when using action=render, which causes - * full URLs to be included. - * - * Oh man I hate our multi-layer parser! + * Not needed quite as much as it used to be since free links are a bit + * more sensible these days. But bracketed links are still an issue. * * @param string more-or-less HTML * @return string less-or-more HTML with NOPARSE bits @@ -1891,8 +1927,7 @@ class Parser # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage - $fname = 'Parser::maybeDoSubpageLink'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $ret = $target; # default return value is no change # Some namespaces don't allow subpages, @@ -1908,7 +1943,7 @@ class Parser # bug 7425 $target = trim( $target ); # Look at the first character - if( $target != '' && $target{0} == '/' ) { + if( $target != '' && $target{0} === '/' ) { # / at end means we don't want the slash to be shown $m = array(); $trailingSlashes = preg_match_all( '%(/+)$%', $target, $m ); @@ -1935,7 +1970,7 @@ class Parser if( count( $exploded ) > $dotdotcount ) { # not allowed to go below top level page $ret = implode( '/', array_slice( $exploded, 0, -$dotdotcount ) ); # / at the end means don't show full path - if( substr( $nodotdot, -1, 1 ) == '/' ) { + if( substr( $nodotdot, -1, 1 ) === '/' ) { $nodotdot = substr( $nodotdot, 0, -1 ); if( '' === $text ) { $text = $nodotdot . $suffix; @@ -1951,7 +1986,7 @@ class Parser } } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $ret; } @@ -1987,10 +2022,10 @@ class Parser /* private */ function openList( $char ) { $result = $this->closeParagraph(); - if ( '*' == $char ) { $result .= '
  • '; } - else if ( '#' == $char ) { $result .= '
    1. '; } - else if ( ':' == $char ) { $result .= '
      '; } - else if ( ';' == $char ) { + if ( '*' === $char ) { $result .= '
      • '; } + else if ( '#' === $char ) { $result .= '
        1. '; } + else if ( ':' === $char ) { $result .= '
          '; } + else if ( ';' === $char ) { $result .= '
          '; $this->mDTopen = true; } @@ -2000,11 +2035,11 @@ class Parser } /* private */ function nextItem( $char ) { - if ( '*' == $char || '#' == $char ) { return '
        2. '; } - else if ( ':' == $char || ';' == $char ) { + if ( '*' === $char || '#' === $char ) { return '
        3. '; } + else if ( ':' === $char || ';' === $char ) { $close = '
      '; if ( $this->mDTopen ) { $close = ''; } - if ( ';' == $char ) { + if ( ';' === $char ) { $this->mDTopen = true; return $close . '
      '; } else { @@ -2016,9 +2051,9 @@ class Parser } /* private */ function closeList( $char ) { - if ( '*' == $char ) { $text = '
'; } - else if ( '#' == $char ) { $text = ''; } - else if ( ':' == $char ) { + if ( '*' === $char ) { $text = ''; } + else if ( '#' === $char ) { $text = ''; } + else if ( ':' === $char ) { if ( $this->mDTopen ) { $this->mDTopen = false; $text = ''; @@ -2032,56 +2067,59 @@ class Parser /**#@-*/ /** - * Make lists from lines starting with ':', '*', '#', etc. + * Make lists from lines starting with ':', '*', '#', etc. (DBL) * * @private * @return string the lists rendered as HTML */ function doBlockLevels( $text, $linestart ) { - $fname = 'Parser::doBlockLevels'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); # Parsing through the text line by line. The main thing # happening here is handling of block-level elements p, pre, # and making lists from lines starting with * # : etc. # - $textLines = explode( "\n", $text ); + $textLines = StringUtils::explode( "\n", $text ); $lastPrefix = $output = ''; $this->mDTopen = $inBlockElem = false; $prefixLength = 0; $paragraphStack = false; - if ( !$linestart ) { - $output .= array_shift( $textLines ); - } foreach ( $textLines as $oLine ) { + # Fix up $linestart + if ( !$linestart ) { + $output .= $oLine; + $linestart = true; + continue; + } + $lastPrefixLength = strlen( $lastPrefix ); $preCloseMatch = preg_match('/<\\/pre/i', $oLine ); $preOpenMatch = preg_match('/
mInPre ) {
 				# Multiple prefixes may abut each other for nested lists.
 				$prefixLength = strspn( $oLine, '*#:;' );
-				$pref = substr( $oLine, 0, $prefixLength );
+				$prefix = substr( $oLine, 0, $prefixLength );
 
 				# eh?
-				$pref2 = str_replace( ';', ':', $pref );
+				$prefix2 = str_replace( ';', ':', $prefix );
 				$t = substr( $oLine, $prefixLength );
-				$this->mInPre = !empty($preOpenMatch);
+				$this->mInPre = (bool)$preOpenMatch;
 			} else {
 				# Don't interpret any other prefixes in preformatted text
 				$prefixLength = 0;
-				$pref = $pref2 = '';
+				$prefix = $prefix2 = '';
 				$t = $oLine;
 			}
 
 			# List generation
-			if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
+			if( $prefixLength && $lastPrefix === $prefix2 ) {
 				# Same as the last item, so no need to deal with nesting or opening stuff
-				$output .= $this->nextItem( substr( $pref, -1 ) );
+				$output .= $this->nextItem( substr( $prefix, -1 ) );
 				$paragraphStack = false;
 
-				if ( substr( $pref, -1 ) == ';') {
+				if ( substr( $prefix, -1 ) === ';') {
 					# The one nasty exception: definition lists work like this:
 					# ; title : definition text
 					# So we check for : in the remainder text to split up the
@@ -2094,21 +2132,21 @@ class Parser
 				}
 			} elseif( $prefixLength || $lastPrefixLength ) {
 				# Either open or close a level...
-				$commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
+				$commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
 				$paragraphStack = false;
 
 				while( $commonPrefixLength < $lastPrefixLength ) {
-					$output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
+					$output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] );
 					--$lastPrefixLength;
 				}
 				if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
-					$output .= $this->nextItem( $pref{$commonPrefixLength-1} );
+					$output .= $this->nextItem( $prefix[$commonPrefixLength-1] );
 				}
 				while ( $prefixLength > $commonPrefixLength ) {
-					$char = substr( $pref, $commonPrefixLength, 1 );
+					$char = substr( $prefix, $commonPrefixLength, 1 );
 					$output .= $this->openList( $char );
 
-					if ( ';' == $char ) {
+					if ( ';' === $char ) {
 						# FIXME: This is dupe of code above
 						if ($this->findColonNoLinks($t, $term, $t2) !== false) {
 							$t = $t2;
@@ -2117,10 +2155,10 @@ class Parser
 					}
 					++$commonPrefixLength;
 				}
-				$lastPrefix = $pref2;
+				$lastPrefix = $prefix2;
 			}
 			if( 0 == $prefixLength ) {
-				wfProfileIn( "$fname-paragraph" );
+				wfProfileIn( __METHOD__."-paragraph" );
 				# No prefix (not in list)--go to paragraph mode
 				// XXX: use a stack for nestable elements like span, table and div
 				$openmatch = preg_match('/(?:mInPre ) {
-					if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
+					if ( ' ' == $t{0} and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) {
 						// pre
-						if ($this->mLastSection != 'pre') {
+						if ($this->mLastSection !== 'pre') {
 							$paragraphStack = false;
 							$output .= $this->closeParagraph().'
';
 							$this->mLastSection = 'pre';
@@ -2156,7 +2194,7 @@ class Parser
 								$paragraphStack = false;
 								$this->mLastSection = 'p';
 							} else {
-								if ($this->mLastSection != 'p' ) {
+								if ($this->mLastSection !== 'p' ) {
 									$output .= $this->closeParagraph();
 									$this->mLastSection = '';
 									$paragraphStack = '

'; @@ -2169,14 +2207,14 @@ class Parser $output .= $paragraphStack; $paragraphStack = false; $this->mLastSection = 'p'; - } else if ($this->mLastSection != 'p') { + } else if ($this->mLastSection !== 'p') { $output .= $this->closeParagraph().'

'; $this->mLastSection = 'p'; } } } } - wfProfileOut( "$fname-paragraph" ); + wfProfileOut( __METHOD__."-paragraph" ); } // somewhere above we forget to get out of pre block (bug 785) if($preCloseMatch && $this->mInPre) { @@ -2187,7 +2225,7 @@ class Parser } } while ( $prefixLength ) { - $output .= $this->closeList( $pref2{$prefixLength-1} ); + $output .= $this->closeList( $prefix2[$prefixLength-1] ); --$prefixLength; } if ( '' != $this->mLastSection ) { @@ -2195,7 +2233,7 @@ class Parser $this->mLastSection = ''; } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $output; } @@ -2208,13 +2246,12 @@ class Parser * return string the position of the ':', or false if none found */ function findColonNoLinks($str, &$before, &$after) { - $fname = 'Parser::findColonNoLinks'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $pos = strpos( $str, ':' ); if( $pos === false ) { // Nothing to find! - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } @@ -2223,7 +2260,7 @@ class Parser // Easy; no tag nesting to worry about $before = substr( $str, 0, $pos ); $after = substr( $str, $pos+1 ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $pos; } @@ -2247,7 +2284,7 @@ class Parser // We found it! $before = substr( $str, 0, $i ); $after = substr( $str, $i + 1 ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $i; } // Embedded in a tag; don't break it. @@ -2257,7 +2294,7 @@ class Parser $colon = strpos( $str, ':', $i ); if( $colon === false ) { // Nothing else interesting - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } $lt = strpos( $str, '<', $i ); @@ -2266,7 +2303,7 @@ class Parser // We found it! $before = substr( $str, 0, $colon ); $after = substr( $str, $colon + 1 ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $i; } } @@ -2313,18 +2350,18 @@ class Parser break; case 3: // self::COLON_STATE_CLOSETAG: // In a - if( $c == ">" ) { + if( $c === ">" ) { $stack--; if( $stack < 0 ) { - wfDebug( "Invalid input in $fname; too many close tags\n" ); - wfProfileOut( $fname ); + wfDebug( __METHOD__.": Invalid input; too many close tags\n" ); + wfProfileOut( __METHOD__ ); return false; } $state = self::COLON_STATE_TEXT; } break; case self::COLON_STATE_TAGSLASH: - if( $c == ">" ) { + if( $c === ">" ) { // Yes, a self-closed tag $state = self::COLON_STATE_TEXT; } else { @@ -2333,33 +2370,33 @@ class Parser } break; case 5: // self::COLON_STATE_COMMENT: - if( $c == "-" ) { + if( $c === "-" ) { $state = self::COLON_STATE_COMMENTDASH; } break; case self::COLON_STATE_COMMENTDASH: - if( $c == "-" ) { + if( $c === "-" ) { $state = self::COLON_STATE_COMMENTDASHDASH; } else { $state = self::COLON_STATE_COMMENT; } break; case self::COLON_STATE_COMMENTDASHDASH: - if( $c == ">" ) { + if( $c === ">" ) { $state = self::COLON_STATE_TEXT; } else { $state = self::COLON_STATE_COMMENT; } break; default: - throw new MWException( "State machine error in $fname" ); + throw new MWException( "State machine error in " . __METHOD__ ); } } if( $stack > 0 ) { - wfDebug( "Invalid input in $fname; not enough close tags (stack $stack, state $state)\n" ); + wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" ); return false; } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return false; } @@ -2552,9 +2589,11 @@ class Parser case 'numberofpages': return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::pages() ); case 'numberofadmins': - return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::admins() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::numberingroup('sysop') ); case 'numberofedits': return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::edits() ); + case 'numberofviews': + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::views() ); case 'currenttimestamp': return $this->mVarCache[$index] = wfTimestamp( TS_MW, $ts ); case 'localtimestamp': @@ -2589,12 +2628,11 @@ class Parser * @private */ function initialiseVariables() { - $fname = 'Parser::initialiseVariables'; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); $variableIDs = MagicWord::getVariableIDs(); $this->mVariables = new MagicWordArray( $variableIDs ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); } /** @@ -2663,8 +2701,7 @@ class Parser return $text; } - $fname = __METHOD__; - wfProfileIn( $fname ); + wfProfileIn( __METHOD__ ); if ( $frame === false ) { $frame = $this->getPreprocessor()->newFrame(); @@ -2677,7 +2714,7 @@ class Parser $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; $text = $frame->expand( $dom, $flags ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $text; } @@ -2718,7 +2755,7 @@ class Parser function limitationWarn( $limitationType, $current=null, $max=null) { $msgName = $limitationType . '-warning'; //does no harm if $current and $max are present but are unnecessary for the message - $warning = wfMsg( $msgName, $current, $max); + $warning = wfMsgExt( $msgName, array( 'parsemag', 'escape' ), $current, $max ); $this->mOutput->addWarning( $warning ); $cat = Title::makeTitleSafe( NS_CATEGORY, wfMsgForContent( $limitationType . '-category' ) ); if ( $cat ) { @@ -2739,9 +2776,8 @@ class Parser * @private */ function braceSubstitution( $piece, $frame ) { - global $wgContLang, $wgLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces; - $fname = __METHOD__; - wfProfileIn( $fname ); + global $wgContLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces; + wfProfileIn( __METHOD__ ); wfProfileIn( __METHOD__.'-setup' ); # Flags @@ -2855,7 +2891,7 @@ class Parser # Workaround for PHP bug 35229 and similar if ( !is_callable( $callback ) ) { - throw new MWException( "Tag hook for $name is not callable\n" ); + throw new MWException( "Tag hook for $function is not callable\n" ); } $result = call_user_func_array( $callback, $allArgs ); $found = true; @@ -2898,19 +2934,19 @@ class Parser $titleText = $title->getPrefixedText(); # Check for language variants if the template is not found if($wgContLang->hasVariants() && $title->getArticleID() == 0){ - $wgContLang->findVariantLink($part1, $title); + $wgContLang->findVariantLink( $part1, $title, true ); } # Do infinite loop check if ( !$frame->loopCheck( $title ) ) { $found = true; - $text = "Template loop detected: [[$titleText]]"; + $text = '' . wfMsgForContent( 'parser-template-loop-warning', $titleText ) . ''; wfDebug( __METHOD__.": template loop broken at '$titleText'\n" ); } # Do recursion depth check $limit = $this->mOptions->getMaxTemplateDepth(); if ( $frame->depth >= $limit ) { $found = true; - $text = "Template recursion depth limit exceeded ($limit)"; + $text = '' . wfMsgForContent( 'parser-template-recursion-depth-warning', $limit ) . ''; } } } @@ -2928,7 +2964,7 @@ class Parser } } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) { $found = false; //access denied - wfDebug( "$fname: template inclusion denied for " . $title->getPrefixedDBkey() ); + wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() ); } else { list( $text, $title ) = $this->getTemplateDom( $title ); if ( $text !== false ) { @@ -2962,7 +2998,7 @@ class Parser # Recover the source wikitext and return it if ( !$found ) { $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return array( 'object' => $text ); } @@ -3021,7 +3057,7 @@ class Parser $ret = array( 'text' => $text ); } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $ret; } @@ -3121,8 +3157,8 @@ class Parser if( $rev ) { $text = $rev->getText(); } elseif( $title->getNamespace() == NS_MEDIAWIKI ) { - global $wgLang; - $message = $wgLang->lcfirst( $title->getText() ); + global $wgContLang; + $message = $wgContLang->lcfirst( $title->getText() ); $text = wfMsgForContentNoTrans( $message ); if( wfEmptyMsg( $message, $text ) ) { $text = false; @@ -3308,7 +3344,7 @@ class Parser } } - if ( $name == 'html' || $name == 'nowiki' ) { + if ( $name === 'html' || $name === 'nowiki' ) { $this->mStripState->nowiki->setPair( $marker, $output ); } else { $this->mStripState->general->setPair( $marker, $output ); @@ -3384,6 +3420,16 @@ class Parser wfDebug( __METHOD__.": [[MediaWiki:hidden-category-category]] is not a valid title!\n" ); } } + # (bug 8068) Allow control over whether robots index a page. + # + # FIXME (bug 14899): __INDEX__ always overrides __NOINDEX__ here! This + # is not desirable, the last one on the page should win. + if( isset( $this->mDoubleUnderscores['noindex'] ) ) { + $this->mOutput->setIndexPolicy( 'noindex' ); + } elseif( isset( $this->mDoubleUnderscores['index'] ) ) { + $this->mOutput->setIndexPolicy( 'index' ); + } + return $text; } @@ -3402,13 +3448,14 @@ class Parser * @private */ function formatHeadings( $text, $isMain=true ) { - global $wgMaxTocLevel, $wgContLang; + global $wgMaxTocLevel, $wgContLang, $wgEnforceHtmlIds; $doNumberHeadings = $this->mOptions->getNumberHeadings(); - if( !$this->mTitle->quickUserCan( 'edit' ) ) { + $showEditLink = $this->mOptions->getEditSection(); + + // Do not call quickUserCan unless necessary + if( $showEditLink && !$this->mTitle->quickUserCan( 'edit' ) ) { $showEditLink = 0; - } else { - $showEditLink = $this->mOptions->getEditSection(); } # Inhibit editsection links if requested in the page @@ -3554,12 +3601,7 @@ class Parser # # turns into # link text with suffix - $safeHeadline = preg_replace( '//e', - "\$this->mLinkHolders['texts'][\$1]", - $safeHeadline ); - $safeHeadline = preg_replace( '//e', - "\$this->mInterwikiLinkHolders['texts'][\$1]", - $safeHeadline ); + $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline ); # Strip out HTML (other than plain and : bug 8393) $tocline = preg_replace( @@ -3575,13 +3617,60 @@ class Parser # Save headline for section edit hint before it's escaped $headlineHint = $safeHeadline; - $safeHeadline = Sanitizer::escapeId( $safeHeadline ); - # HTML names must be case-insensitively unique (bug 10721) + + if ( $wgEnforceHtmlIds ) { + $legacyHeadline = false; + $safeHeadline = Sanitizer::escapeId( $safeHeadline, + 'noninitial' ); + } else { + # For reverse compatibility, provide an id that's + # HTML4-compatible, like we used to. + # + # It may be worth noting, academically, that it's possible for + # the legacy anchor to conflict with a non-legacy headline + # anchor on the page. In this case likely the "correct" thing + # would be to either drop the legacy anchors or make sure + # they're numbered first. However, this would require people + # to type in section names like "abc_.D7.93.D7.90.D7.A4" + # manually, so let's not bother worrying about it. + $legacyHeadline = Sanitizer::escapeId( $safeHeadline, + 'noninitial' ); + $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' ); + + if ( $legacyHeadline == $safeHeadline ) { + # No reason to have both (in fact, we can't) + $legacyHeadline = false; + } elseif ( $legacyHeadline != Sanitizer::escapeId( + $legacyHeadline, 'xml' ) ) { + # The legacy id is invalid XML. We used to allow this, but + # there's no reason to do so anymore. Backward + # compatibility will fail slightly in this case, but it's + # no big deal. + $legacyHeadline = false; + } + } + + # HTML names must be case-insensitively unique (bug 10721). FIXME: + # Does this apply to Unicode characters? Because we aren't + # handling those here. $arrayKey = strtolower( $safeHeadline ); + if ( $legacyHeadline === false ) { + $legacyArrayKey = false; + } else { + $legacyArrayKey = strtolower( $legacyHeadline ); + } # count how many in assoc. array so we can track dupes in anchors - isset( $refers[$arrayKey] ) ? $refers[$arrayKey]++ : $refers[$arrayKey] = 1; - $refcount[$headlineCount] = $refers[$arrayKey]; + if ( isset( $refers[$arrayKey] ) ) { + $refers[$arrayKey]++; + } else { + $refers[$arrayKey] = 1; + } + if ( isset( $refers[$legacyArrayKey] ) ) { + $refers[$legacyArrayKey]++; + } else { + $refers[$legacyArrayKey] = 1; + } # Don't number the heading if it is the only one (looks silly) if( $doNumberHeadings && count( $matches[3] ) > 1) { @@ -3591,8 +3680,12 @@ class Parser # Create the anchor for linking from the TOC to the section $anchor = $safeHeadline; - if($refcount[$headlineCount] > 1 ) { - $anchor .= '_' . $refcount[$headlineCount]; + $legacyAnchor = $legacyHeadline; + if ( $refers[$arrayKey] > 1 ) { + $anchor .= '_' . $refers[$arrayKey]; + } + if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) { + $legacyAnchor .= '_' . $refers[$legacyArrayKey]; } if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) { $toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel); @@ -3603,14 +3696,16 @@ class Parser if( $isTemplate ) { # Put a T flag in the section identifier, to indicate to extractSections() # that sections inside should be counted. - $editlink = $sk->editSectionLinkForOther($titleText, "T-$sectionIndex"); + $editlink = $sk->doEditSectionLink(Title::newFromText( $titleText ), "T-$sectionIndex"); } else { - $editlink = $sk->editSectionLink($this->mTitle, $sectionIndex, $headlineHint); + $editlink = $sk->doEditSectionLink($this->mTitle, $sectionIndex, $headlineHint); } } else { $editlink = ''; } - $head[$headlineCount] = $sk->makeHeadline( $level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink ); + $head[$headlineCount] = $sk->makeHeadline( $level, + $matches['attrib'][$headlineCount], $anchor, $headline, + $editlink, $legacyAnchor ); $headlineCount++; } @@ -3635,7 +3730,7 @@ class Parser $i = 0; foreach( $blocks as $block ) { - if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) { + if( $showEditLink && $headlineCount > 0 && $i == 0 && $block !== "\n" ) { # This is the [edit] link that appears for the top block of text when # section editing is enabled @@ -3737,11 +3832,13 @@ class Parser $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii! $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]] + $p4 = "/\[\[(:?$nc+:|:|)($tc+?)(($tc+))\\|]]/"; # [[ns:page(context)|]] $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]] $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); + $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text ); $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text ); $t = $this->mTitle->getText(); @@ -3787,7 +3884,7 @@ class Parser } else { # Failed to validate; fall back to the default $nickname = $username; - wfDebug( "Parser::getUserSig: $username has bad XML tags in signature.\n" ); + wfDebug( __METHOD__.": $username has bad XML tags in signature.\n" ); } } @@ -3811,7 +3908,7 @@ class Parser * @return mixed An expanded string, or false if invalid. */ function validateSig( $text ) { - return( wfIsWellFormedXmlFragment( $text ) ? $text : false ); + return( Xml::isWellFormedXmlFragment( $text ) ? $text : false ); } /** @@ -3833,6 +3930,11 @@ class Parser $this->setOutputType = self::OT_PREPROCESS; } + # Option to disable this feature + if ( !$this->mOptions->getCleanSignatures() ) { + return $text; + } + # FIXME: regex doesn't respect extension tags or nowiki # => Move this logic to braceSubstitution() $substWord = MagicWord::get( 'subst' ); @@ -3888,19 +3990,17 @@ class Parser global $wgTitle; static $executing = false; - $fname = "Parser::transformMsg"; - # Guard against infinite recursion if ( $executing ) { return $text; } $executing = true; - wfProfileIn($fname); + wfProfileIn(__METHOD__); $text = $this->preprocess( $text, $wgTitle, $options ); $executing = false; - wfProfileOut($fname); + wfProfileOut(__METHOD__); return $text; } @@ -3997,7 +4097,7 @@ class Parser # Add to function cache $mw = MagicWord::get( $id ); if( !$mw ) - throw new MWException( 'Parser::setFunctionHook() expecting a magic word identifier.' ); + throw new MWException( __METHOD__.'() expecting a magic word identifier.' ); $synonyms = $mw->getSynonyms(); $sensitive = intval( $mw->isCaseSensitive() ); @@ -4012,7 +4112,7 @@ class Parser $syn = '#' . $syn; } # Remove trailing colon - if ( substr( $syn, -1, 1 ) == ':' ) { + if ( substr( $syn, -1, 1 ) === ':' ) { $syn = substr( $syn, 0, -1 ); } $this->mFunctionSynonyms[$sensitive][$syn] = $id; @@ -4033,266 +4133,9 @@ class Parser * Replace link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() * Returns an array of link CSS classes, indexed by PDBK. - * $options is a bit field, RLH_FOR_UPDATE to select for update */ function replaceLinkHolders( &$text, $options = 0 ) { - global $wgUser; - global $wgContLang; - - $fname = 'Parser::replaceLinkHolders'; - wfProfileIn( $fname ); - - $pdbks = array(); - $colours = array(); - $linkcolour_ids = array(); - $sk = $this->mOptions->getSkin(); - $linkCache = LinkCache::singleton(); - - if ( !empty( $this->mLinkHolders['namespaces'] ) ) { - wfProfileIn( $fname.'-check' ); - $dbr = wfGetDB( DB_SLAVE ); - $page = $dbr->tableName( 'page' ); - $threshold = $wgUser->getOption('stubthreshold'); - - # Sort by namespace - asort( $this->mLinkHolders['namespaces'] ); - - # Generate query - $query = false; - $current = null; - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { - # Make title object - $title = $this->mLinkHolders['titles'][$key]; - - # Skip invalid entries. - # Result will be ugly, but prevents crash. - if ( is_null( $title ) ) { - continue; - } - $pdbk = $pdbks[$key] = $title->getPrefixedDBkey(); - - # Check if it's a static known link, e.g. interwiki - if ( $title->isAlwaysKnown() ) { - $colours[$pdbk] = ''; - } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { - $colours[$pdbk] = ''; - $this->mOutput->addLink( $title, $id ); - } elseif ( $linkCache->isBadLink( $pdbk ) ) { - $colours[$pdbk] = 'new'; - } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) { - $colours[$pdbk] = 'new'; - } else { - # Not in the link cache, add it to the query - if ( !isset( $current ) ) { - $current = $ns; - $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; - $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; - } elseif ( $current != $ns ) { - $current = $ns; - $query .= ")) OR (page_namespace=$ns AND page_title IN("; - } else { - $query .= ', '; - } - - $query .= $dbr->addQuotes( $this->mLinkHolders['dbkeys'][$key] ); - } - } - if ( $query ) { - $query .= '))'; - if ( $options & RLH_FOR_UPDATE ) { - $query .= ' FOR UPDATE'; - } - - $res = $dbr->query( $query, $fname ); - - # Fetch data and form into an associative array - # non-existent = broken - while ( $s = $dbr->fetchObject($res) ) { - $title = Title::makeTitle( $s->page_namespace, $s->page_title ); - $pdbk = $title->getPrefixedDBkey(); - $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect ); - $this->mOutput->addLink( $title, $s->page_id ); - $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); - //add id to the extension todolist - $linkcolour_ids[$s->page_id] = $pdbk; - } - //pass an array of page_ids to an extension - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); - } - wfProfileOut( $fname.'-check' ); - - # Do a second query for different language variants of links and categories - if($wgContLang->hasVariants()){ - $linkBatch = new LinkBatch(); - $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders) - $categoryMap = array(); // maps $category_variant => $category (dbkeys) - $varCategories = array(); // category replacements oldDBkey => newDBkey - - $categories = $this->mOutput->getCategoryLinks(); - - // Add variants of links to link batch - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { - $title = $this->mLinkHolders['titles'][$key]; - if ( is_null( $title ) ) - continue; - - $pdbk = $title->getPrefixedDBkey(); - $titleText = $title->getText(); - - // generate all variants of the link title text - $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText); - - // if link was not found (in first query), add all variants to query - if ( !isset($colours[$pdbk]) ){ - foreach($allTextVariants as $textVariant){ - if($textVariant != $titleText){ - $variantTitle = Title::makeTitle( $ns, $textVariant ); - if(is_null($variantTitle)) continue; - $linkBatch->addObj( $variantTitle ); - $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; - } - } - } - } - - // process categories, check if a category exists in some variant - foreach( $categories as $category ){ - $variants = $wgContLang->convertLinkToAllVariants($category); - foreach($variants as $variant){ - if($variant != $category){ - $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) ); - if(is_null($variantTitle)) continue; - $linkBatch->addObj( $variantTitle ); - $categoryMap[$variant] = $category; - } - } - } - - - if(!$linkBatch->isEmpty()){ - // construct query - $titleClause = $linkBatch->constructSet('page', $dbr); - - $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; - - $variantQuery .= " FROM $page WHERE $titleClause"; - if ( $options & RLH_FOR_UPDATE ) { - $variantQuery .= ' FOR UPDATE'; - } - - $varRes = $dbr->query( $variantQuery, $fname ); - - // for each found variants, figure out link holders and replace - while ( $s = $dbr->fetchObject($varRes) ) { - - $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); - $varPdbk = $variantTitle->getPrefixedDBkey(); - $vardbk = $variantTitle->getDBkey(); - - $holderKeys = array(); - if(isset($variantMap[$varPdbk])){ - $holderKeys = $variantMap[$varPdbk]; - $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect ); - $this->mOutput->addLink( $variantTitle, $s->page_id ); - } - - // loop over link holders - foreach($holderKeys as $key){ - $title = $this->mLinkHolders['titles'][$key]; - if ( is_null( $title ) ) continue; - - $pdbk = $title->getPrefixedDBkey(); - - if(!isset($colours[$pdbk])){ - // found link in some of the variants, replace the link holder data - $this->mLinkHolders['titles'][$key] = $variantTitle; - $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey(); - - // set pdbk and colour - $pdbks[$key] = $varPdbk; - $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold ); - $linkcolour_ids[$s->page_id] = $pdbk; - } - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); - } - - // check if the object is a variant of a category - if(isset($categoryMap[$vardbk])){ - $oldkey = $categoryMap[$vardbk]; - if($oldkey != $vardbk) - $varCategories[$oldkey]=$vardbk; - } - } - - // rebuild the categories in original order (if there are replacements) - if(count($varCategories)>0){ - $newCats = array(); - $originalCats = $this->mOutput->getCategories(); - foreach($originalCats as $cat => $sortkey){ - // make the replacement - if( array_key_exists($cat,$varCategories) ) - $newCats[$varCategories[$cat]] = $sortkey; - else $newCats[$cat] = $sortkey; - } - $this->mOutput->setCategoryLinks($newCats); - } - } - } - - # Construct search and replace arrays - wfProfileIn( $fname.'-construct' ); - $replacePairs = array(); - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { - $pdbk = $pdbks[$key]; - $searchkey = ""; - $title = $this->mLinkHolders['titles'][$key]; - if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) { - $linkCache->addBadLinkObj( $title ); - $colours[$pdbk] = 'new'; - $this->mOutput->addLink( $title, 0 ); - $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title, - $this->mLinkHolders['texts'][$key], - $this->mLinkHolders['queries'][$key] ); - } else { - $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk], - $this->mLinkHolders['texts'][$key], - $this->mLinkHolders['queries'][$key] ); - } - } - $replacer = new HashtableReplacer( $replacePairs, 1 ); - wfProfileOut( $fname.'-construct' ); - - # Do the thing - wfProfileIn( $fname.'-replace' ); - $text = preg_replace_callback( - '/()/', - $replacer->cb(), - $text); - - wfProfileOut( $fname.'-replace' ); - } - - # Now process interwiki link holders - # This is quite a bit simpler than internal links - if ( !empty( $this->mInterwikiLinkHolders['texts'] ) ) { - wfProfileIn( $fname.'-interwiki' ); - # Make interwiki link HTML - $replacePairs = array(); - foreach( $this->mInterwikiLinkHolders['texts'] as $key => $link ) { - $title = $this->mInterwikiLinkHolders['titles'][$key]; - $replacePairs[$key] = $sk->makeLinkObj( $title, $link ); - } - $replacer = new HashtableReplacer( $replacePairs, 1 ); - - $text = preg_replace_callback( - '//', - $replacer->cb(), - $text ); - wfProfileOut( $fname.'-interwiki' ); - } - - wfProfileOut( $fname ); - return $colours; + return $this->mLinkHolders->replace( $text ); } /** @@ -4302,36 +4145,7 @@ class Parser * @return string */ function replaceLinkHoldersText( $text ) { - $fname = 'Parser::replaceLinkHoldersText'; - wfProfileIn( $fname ); - - $text = preg_replace_callback( - '//', - array( &$this, 'replaceLinkHoldersTextCallback' ), - $text ); - - wfProfileOut( $fname ); - return $text; - } - - /** - * @param array $matches - * @return string - * @private - */ - function replaceLinkHoldersTextCallback( $matches ) { - $type = $matches[1]; - $key = $matches[2]; - if( $type == 'LINK' ) { - if( isset( $this->mLinkHolders['texts'][$key] ) ) { - return $this->mLinkHolders['texts'][$key]; - } - } elseif( $type == 'IWLINK' ) { - if( isset( $this->mInterwikiLinkHolders['texts'][$key] ) ) { - return $this->mInterwikiLinkHolders['texts'][$key]; - } - } - return $matches[0]; + return $this->mLinkHolders->replaceText( $text ); } /** @@ -4342,7 +4156,7 @@ class Parser $content = StringUtils::delimiterReplace( '', '', '$1', $text, 'i' ); $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' ); - return wfOpenElement( 'pre', $attribs ) . + return Xml::openElement( 'pre', $attribs ) . Xml::escapeTagsOnly( $content ) . '

'; } @@ -4385,7 +4199,7 @@ class Parser wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); - $lines = explode( "\n", $text ); + $lines = StringUtils::explode( "\n", $text ); foreach ( $lines as $line ) { # match lines like these: # Image:someimage.jpg|This is some image @@ -4398,7 +4212,7 @@ class Parser if ( strpos( $matches[0], '%' ) !== false ) $matches[1] = urldecode( $matches[1] ); - $tp = Title::newFromText( $matches[1] ); + $tp = Title::newFromText( $matches[1]/*, NS_FILE*/ ); $nt =& $tp; if( is_null( $nt ) ) { # Bogus title. Ignore these so we don't bomb out later. @@ -4415,7 +4229,7 @@ class Parser $ig->add( $nt, $html ); # Only add real images (bug #5586) - if ( $nt->getNamespace() == NS_IMAGE ) { + if ( $nt->getNamespace() == NS_FILE ) { $this->mOutput->addImage( $nt->getDBkey() ); } } @@ -4435,7 +4249,7 @@ class Parser 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 'bottom', 'text-bottom' ), 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless', - 'upright', 'border' ), + 'upright', 'border', 'link', 'alt' ), ); static $internalParamMap; if ( !$internalParamMap ) { @@ -4464,20 +4278,24 @@ class Parser /** * Parse image options text and use it to make an image + * @param Title $title + * @param string $options + * @param LinkHolderArray $holders */ - function makeImage( $title, $options ) { + function makeImage( $title, $options, $holders = false ) { # Check if the options text is of the form "options|alt text" # Options are: - # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang - # * left no resizing, just left align. label is used for alt= only - # * right same, but right aligned - # * none same, but not aligned - # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox - # * center center the image - # * framed Keep original image size, no magnify-button. - # * frameless like 'thumb' but without a frame. Keeps user preferences for width - # * upright reduce width for upright images, rounded to full __0 px - # * border draw a 1px border around the image + # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang + # * left no resizing, just left align. label is used for alt= only + # * right same, but right aligned + # * none same, but not aligned + # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox + # * center center the image + # * framed Keep original image size, no magnify-button. + # * frameless like 'thumb' but without a frame. Keeps user preferences for width + # * upright reduce width for upright images, rounded to full __0 px + # * border draw a 1px border around the image + # * alt Text for HTML alt attribute (defaults to empty) # vertical-align values (no % or length right now): # * baseline # * sub @@ -4488,7 +4306,7 @@ class Parser # * bottom # * text-bottom - $parts = array_map( 'trim', explode( '|', $options) ); + $parts = StringUtils::explode( "|", $options ); $sk = $this->mOptions->getSkin(); # Give extensions a chance to select the file revision for us @@ -4496,11 +4314,21 @@ class Parser wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$title, &$skip, &$time, &$descQuery ) ); if ( $skip ) { - return $sk->makeLinkObj( $title ); + return $sk->link( $title ); } + # Get the file + $imagename = $title->getDBkey(); + if ( isset( $this->mFileCache[$imagename][$time] ) ) { + $file = $this->mFileCache[$imagename][$time]; + } else { + $file = wfFindFile( $title, $time ); + if ( count( $this->mFileCache ) > 1000 ) { + $this->mFileCache = array(); + } + $this->mFileCache[$imagename][$time] = $file; + } # Get parameter map - $file = wfFindFile( $title, $time ); $handler = $file ? $file->getHandler() : false; list( $paramMap, $mwArray ) = $this->getImageParams( $handler ); @@ -4510,13 +4338,14 @@ class Parser $params = array( 'frame' => array(), 'handler' => array(), 'horizAlign' => array(), 'vertAlign' => array() ); foreach( $parts as $part ) { + $part = trim( $part ); list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part ); $validated = false; if( isset( $paramMap[$magicName] ) ) { list( $type, $paramName ) = $paramMap[$magicName]; // Special case; width and height come in one variable together - if( $type == 'handler' && $paramName == 'width' ) { + if( $type === 'handler' && $paramName === 'width' ) { $m = array(); # (bug 13500) In both cases (width/height and width only), # permit trailing "px" for backward compatibility. @@ -4539,16 +4368,42 @@ class Parser } } // else no validation -- bug 13436 } else { - if ( $type == 'handler' ) { + if ( $type === 'handler' ) { # Validate handler parameter $validated = $handler->validateParam( $paramName, $value ); } else { # Validate internal parameters switch( $paramName ) { - case "manualthumb": - /// @fixme - possibly check validity here? - /// downstream behavior seems odd with missing manual thumbs. + case 'manualthumb': + case 'alt': + // @fixme - possibly check validity here for + // manualthumb? downstream behavior seems odd with + // missing manual thumbs. $validated = true; + $value = $this->stripAltText( $value, $holders ); + break; + case 'link': + $chars = self::EXT_LINK_URL_CLASS; + $prots = $this->mUrlProtocols; + if ( $value === '' ) { + $paramName = 'no-link'; + $value = true; + $validated = true; + } elseif ( preg_match( "/^$prots/", $value ) ) { + if ( preg_match( "/^($prots)$chars+$/", $value, $m ) ) { + $paramName = 'link-url'; + $this->mOutput->addExternalLink( $value ); + $validated = true; + } + } else { + $linkTitle = Title::newFromText( $value ); + if ( $linkTitle ) { + $paramName = 'link-title'; + $value = $linkTitle; + $this->mOutput->addLink( $linkTitle ); + $validated = true; + } + } break; default: // Most other things appear to be empty or numeric... @@ -4574,17 +4429,32 @@ class Parser $params['frame']['valign'] = key( $params['vertAlign'] ); } - # Strip bad stuff out of the alt text - $alt = $this->replaceLinkHoldersText( $caption ); + $params['frame']['caption'] = $caption; - # make sure there are no placeholders in thumbnail attributes - # that are later expanded to html- so expand them now and - # remove the tags - $alt = $this->mStripState->unstripBoth( $alt ); - $alt = Sanitizer::stripAllTags( $alt ); + $params['frame']['title'] = $this->stripAltText( $caption, $holders ); - $params['frame']['alt'] = $alt; - $params['frame']['caption'] = $caption; + # In the old days, [[Image:Foo|text...]] would set alt text. Later it + # came to also set the caption, ordinary text after the image -- which + # makes no sense, because that just repeats the text multiple times in + # screen readers. It *also* came to set the title attribute. + # + # Now that we have an alt attribute, we should not set the alt text to + # equal the caption: that's worse than useless, it just repeats the + # text. This is the framed/thumbnail case. If there's no caption, we + # use the unnamed parameter for alt text as well, just for the time be- + # ing, if the unnamed param is set and the alt param is not. + # + # For the future, we need to figure out if we want to tweak this more, + # e.g., introducing a title= parameter for the title; ignoring the un- + # named parameter entirely for images without a caption; adding an ex- + # plicit caption= parameter and preserving the old magic unnamed para- + # meter for BC; ... + if( $caption !== '' && !isset( $params['frame']['alt'] ) + && !isset( $params['frame']['framed'] ) + && !isset( $params['frame']['thumbnail'] ) + && !isset( $params['frame']['manualthumb'] ) ) { + $params['frame']['alt'] = $params['frame']['title']; + } wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params ) ); @@ -4598,6 +4468,25 @@ class Parser return $ret; } + + protected function stripAltText( $caption, $holders ) { + # Strip bad stuff out of the title (tooltip). We can't just use + # replaceLinkHoldersText() here, because if this function is called + # from replaceInternalLinks2(), mLinkHolders won't be up-to-date. + if ( $holders ) { + $tooltip = $holders->replaceText( $caption ); + } else { + $tooltip = $this->replaceLinkHoldersText( $caption ); + } + + # make sure there are no placeholders in thumbnail attributes + # that are later expanded to html- so expand them now and + # remove the tags + $tooltip = $this->mStripState->unstripBoth( $tooltip ); + $tooltip = Sanitizer::stripAllTags( $tooltip ); + + return $tooltip; + } /** * Set a flag in the output object indicating that the content is dynamic and @@ -4678,7 +4567,7 @@ class Parser $sectionParts = explode( '-', $section ); $sectionIndex = array_pop( $sectionParts ); foreach ( $sectionParts as $part ) { - if ( $part == 'T' ) { + if ( $part === 'T' ) { $flags |= self::PTD_FOR_INCLUSION; } } @@ -4695,14 +4584,14 @@ class Parser $targetLevel = 1000; } else { while ( $node ) { - if ( $node->getName() == 'h' ) { + if ( $node->getName() === 'h' ) { $bits = $node->splitHeading(); if ( $bits['i'] == $sectionIndex ) { $targetLevel = $bits['level']; break; } } - if ( $mode == 'replace' ) { + if ( $mode === 'replace' ) { $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); } $node = $node->getNextSibling(); @@ -4711,7 +4600,7 @@ class Parser if ( !$node ) { // Not found - if ( $mode == 'get' ) { + if ( $mode === 'get' ) { return $newText; } else { return $text; @@ -4720,21 +4609,21 @@ class Parser // Find the end of the section, including nested sections do { - if ( $node->getName() == 'h' ) { + if ( $node->getName() === 'h' ) { $bits = $node->splitHeading(); $curLevel = $bits['level']; if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) { break; } } - if ( $mode == 'get' ) { + if ( $mode === 'get' ) { $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); } $node = $node->getNextSibling(); } while ( $node ); // Write out the remainder (in replace mode only) - if ( $mode == 'replace' ) { + if ( $mode === 'replace' ) { // Output the replacement text // Add two newlines on -- trailing whitespace in $newText is conventionally // stripped by the editor, so we need both newlines to restore the paragraph gap @@ -4820,15 +4709,27 @@ class Parser * @return string */ public function getDefaultSort() { + global $wgCategoryPrefixedDefaultSortkey; if( $this->mDefaultSort !== false ) { return $this->mDefaultSort; + } elseif ($this->mTitle->getNamespace() == NS_CATEGORY || + !$wgCategoryPrefixedDefaultSortkey) { + return $this->mTitle->getText(); } else { - return $this->mTitle->getNamespace() == NS_CATEGORY - ? $this->mTitle->getText() - : $this->mTitle->getPrefixedText(); + return $this->mTitle->getPrefixedText(); } } + /** + * Accessor for $mDefaultSort + * Unlike getDefaultSort(), will return false if none is set + * + * @return string or false + */ + public function getCustomDefaultSort() { + return $this->mDefaultSort; + } + /** * Try to guess the section anchor name based on a wikitext fragment * presumably extracted from a heading, for example "Header" from @@ -4962,7 +4863,7 @@ class StripState { do { $oldText = $text; $text = $this->general->replace( $text ); - } while ( $text != $oldText ); + } while ( $text !== $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4972,7 +4873,7 @@ class StripState { do { $oldText = $text; $text = $this->nowiki->replace( $text ); - } while ( $text != $oldText ); + } while ( $text !== $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4983,7 +4884,7 @@ class StripState { $oldText = $text; $text = $this->general->replace( $text ); $text = $this->nowiki->replace( $text ); - } while ( $text != $oldText ); + } while ( $text !== $oldText ); wfProfileOut( __METHOD__ ); return $text; } @@ -4997,7 +4898,7 @@ class OnlyIncludeReplacer { var $output = ''; function replace( $matches ) { - if ( substr( $matches[1], -1 ) == "\n" ) { + if ( substr( $matches[1], -1 ) === "\n" ) { $this->output .= substr( $matches[1], 0, -1 ); } else { $this->output .= $matches[1]; diff --git a/includes/parser/ParserCache.php b/includes/parser/ParserCache.php index bf11da2e..7e61157a 100644 --- a/includes/parser/ParserCache.php +++ b/includes/parser/ParserCache.php @@ -35,9 +35,9 @@ class ParserCache { } else { $edit = ''; } - $pageid = intval( $article->getID() ); + $pageid = $article->getID(); $renderkey = (int)($action == 'render'); - $key = wfMemcKey( 'pcache', 'idhash', "$pageid-$renderkey!$hash$edit" ); + $key = wfMemcKey( 'pcache', 'idhash', "{$pageid}-{$renderkey}!{$hash}{$edit}" ); return $key; } diff --git a/includes/parser/ParserOptions.php b/includes/parser/ParserOptions.php index 330ec446..5b8cd3ee 100644 --- a/includes/parser/ParserOptions.php +++ b/includes/parser/ParserOptions.php @@ -13,6 +13,7 @@ class ParserOptions var $mInterwikiMagic; # Interlanguage links are removed and returned in an array var $mAllowExternalImages; # Allow external images inline var $mAllowExternalImagesFrom; # If not, any exception? + var $mEnableImageWhitelist; # If not or it doesn't match, should we check an on-wiki whitelist? var $mSkin; # Reference to the preferred skin var $mDateFormat; # Date format index var $mEditSection; # Create "edit section" links @@ -29,6 +30,7 @@ class ParserOptions var $mTemplateCallback; # Callback for template fetching var $mEnableLimitReport; # Enable limit report in an HTML comment on output var $mTimestamp; # Timestamp used for {{CURRENTDAY}} etc. + var $mExternalLinkTarget; # Target attribute for external links var $mUser; # Stored user object, just used to initialise the skin @@ -37,6 +39,7 @@ class ParserOptions function getInterwikiMagic() { return $this->mInterwikiMagic; } function getAllowExternalImages() { return $this->mAllowExternalImages; } function getAllowExternalImagesFrom() { return $this->mAllowExternalImagesFrom; } + function getEnableImageWhitelist() { return $this->mEnableImageWhitelist; } function getEditSection() { return $this->mEditSection; } function getNumberHeadings() { return $this->mNumberHeadings; } function getAllowSpecialInclusion() { return $this->mAllowSpecialInclusion; } @@ -49,6 +52,8 @@ class ParserOptions function getRemoveComments() { return $this->mRemoveComments; } function getTemplateCallback() { return $this->mTemplateCallback; } function getEnableLimitReport() { return $this->mEnableLimitReport; } + function getCleanSignatures() { return $this->mCleanSignatures; } + function getExternalLinkTarget() { return $this->mExternalLinkTarget; } function getSkin() { if ( !isset( $this->mSkin ) ) { @@ -76,6 +81,7 @@ class ParserOptions function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); } function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); } function setAllowExternalImagesFrom( $x ) { return wfSetVar( $this->mAllowExternalImagesFrom, $x ); } + function setEnableImageWhitelist( $x ) { return wfSetVar( $this->mEnableImageWhitelist, $x ); } function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); } function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); } function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); } @@ -91,6 +97,8 @@ class ParserOptions function setTemplateCallback( $x ) { return wfSetVar( $this->mTemplateCallback, $x ); } function enableLimitReport( $x = true ) { return wfSetVar( $this->mEnableLimitReport, $x ); } function setTimestamp( $x ) { return wfSetVar( $this->mTimestamp, $x ); } + function setCleanSignatures( $x ) { return wfSetVar( $this->mCleanSignatures, $x ); } + function setExternalLinkTarget( $x ) { return wfSetVar( $this->mExternalLinkTarget, $x ); } function __construct( $user = null ) { $this->initialiseFromUser( $user ); @@ -107,8 +115,9 @@ class ParserOptions /** Get user options */ function initialiseFromUser( $userInput ) { global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages; - global $wgAllowExternalImagesFrom, $wgAllowSpecialInclusion, $wgMaxArticleSize; - global $wgMaxPPNodeCount, $wgMaxTemplateDepth, $wgMaxPPExpandDepth; + global $wgAllowExternalImagesFrom, $wgEnableImageWhitelist, $wgAllowSpecialInclusion, $wgMaxArticleSize; + global $wgMaxPPNodeCount, $wgMaxTemplateDepth, $wgMaxPPExpandDepth, $wgCleanSignatures; + global $wgExternalLinkTarget; $fname = 'ParserOptions::initialiseFromUser'; wfProfileIn( $fname ); if ( !$userInput ) { @@ -129,6 +138,7 @@ class ParserOptions $this->mInterwikiMagic = $wgInterwikiMagic; $this->mAllowExternalImages = $wgAllowExternalImages; $this->mAllowExternalImagesFrom = $wgAllowExternalImagesFrom; + $this->mEnableImageWhitelist = $wgEnableImageWhitelist; $this->mSkin = null; # Deferred $this->mDateFormat = null; # Deferred $this->mEditSection = true; @@ -144,6 +154,8 @@ class ParserOptions $this->mRemoveComments = true; $this->mTemplateCallback = array( 'Parser', 'statelessFetchTemplate' ); $this->mEnableLimitReport = false; + $this->mCleanSignatures = $wgCleanSignatures; + $this->mExternalLinkTarget = $wgExternalLinkTarget; wfProfileOut( $fname ); } } diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index f98d5641..35cb5c92 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -5,25 +5,26 @@ */ class ParserOutput { - var $mText, # The output text - $mLanguageLinks, # List of the full text of language links, in the order they appear - $mCategories, # Map of category names to sort keys - $mContainsOldMagic, # Boolean variable indicating if the input contained variables like {{CURRENTDAY}} - $mCacheTime, # Time when this object was generated, or -1 for uncacheable. Used in ParserCache. - $mVersion, # Compatibility check - $mTitleText, # title text of the chosen language variant - $mLinks, # 2-D map of NS/DBK to ID for the links in the document. ID=zero for broken. - $mTemplates, # 2-D map of NS/DBK to ID for the template references. ID=zero for broken. - $mTemplateIds, # 2-D map of NS/DBK to rev ID for the template references. ID=zero for broken. - $mImages, # DB keys of the images used, in the array key only - $mExternalLinks, # External link URLs, in the key only - $mNewSection, # Show a new section link? - $mNoGallery, # No gallery on category page? (__NOGALLERY__) - $mHeadItems, # Items to put in the section - $mOutputHooks, # Hook tags as per $wgParserOutputHooks - $mWarnings, # Warning text to be returned to the user. Wikitext formatted, in the key only - $mSections, # Table of contents - $mProperties; # Name/value pairs to be cached in the DB + var $mText, # The output text + $mLanguageLinks, # List of the full text of language links, in the order they appear + $mCategories, # Map of category names to sort keys + $mContainsOldMagic, # Boolean variable indicating if the input contained variables like {{CURRENTDAY}} + $mTitleText, # title text of the chosen language variant + $mCacheTime = '', # Time when this object was generated, or -1 for uncacheable. Used in ParserCache. + $mVersion = Parser::VERSION, # Compatibility check + $mLinks = array(), # 2-D map of NS/DBK to ID for the links in the document. ID=zero for broken. + $mTemplates = array(), # 2-D map of NS/DBK to ID for the template references. ID=zero for broken. + $mTemplateIds = array(), # 2-D map of NS/DBK to rev ID for the template references. ID=zero for broken. + $mImages = array(), # DB keys of the images used, in the array key only + $mExternalLinks = array(), # External link URLs, in the key only + $mNewSection = false, # Show a new section link? + $mNoGallery = false, # No gallery on category page? (__NOGALLERY__) + $mHeadItems = array(), # Items to put in the section + $mOutputHooks = array(), # Hook tags as per $wgParserOutputHooks + $mWarnings = array(), # Warning text to be returned to the user. Wikitext formatted, in the key only + $mSections = array(), # Table of contents + $mProperties = array(); # Name/value pairs to be cached in the DB + private $mIndexPolicy = ''; # 'index' or 'noindex'? Any other value will result in no change. /** * Overridden title for display @@ -37,21 +38,7 @@ class ParserOutput $this->mLanguageLinks = $languageLinks; $this->mCategories = $categoryLinks; $this->mContainsOldMagic = $containsOldMagic; - $this->mCacheTime = ''; - $this->mVersion = Parser::VERSION; $this->mTitleText = $titletext; - $this->mSections = array(); - $this->mLinks = array(); - $this->mTemplates = array(); - $this->mImages = array(); - $this->mExternalLinks = array(); - $this->mNewSection = false; - $this->mNoGallery = false; - $this->mHeadItems = array(); - $this->mTemplateIds = array(); - $this->mOutputHooks = array(); - $this->mWarnings = array(); - $this->mProperties = array(); } function getText() { return $this->mText; } @@ -69,6 +56,7 @@ class ParserOutput function getSubtitle() { return $this->mSubtitle; } function getOutputHooks() { return (array)$this->mOutputHooks; } function getWarnings() { return array_keys( $this->mWarnings ); } + function getIndexPolicy() { return $this->mIndexPolicy; } function containsOldMagic() { return $this->mContainsOldMagic; } function setText( $text ) { return wfSetVar( $this->mText, $text ); } @@ -78,6 +66,7 @@ class ParserOutput function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); } function setTitleText( $t ) { return wfSetVar( $this->mTitleText, $t ); } function setSections( $toc ) { return wfSetVar( $this->mSections, $toc ); } + function setIndexPolicy( $policy ) { return wfSetVar( $this->mIndexPolicy, $policy ); } function addCategory( $c, $sort ) { $this->mCategories[$c] = $sort; } function addLanguageLink( $t ) { $this->mLanguageLinks[] = $t; } @@ -98,6 +87,14 @@ class ParserOutput function addLink( $title, $id = null ) { $ns = $title->getNamespace(); $dbk = $title->getDBkey(); + if ( $ns == NS_MEDIA ) { + // Normalize this pseudo-alias if it makes it down here... + $ns = NS_FILE; + } elseif( $ns == NS_SPECIAL ) { + // We don't record Special: links currently + // It might actually be wise to, but we'd need to do some normalization. + return; + } if ( !isset( $this->mLinks[$ns] ) ) { $this->mLinks[$ns] = array(); } diff --git a/includes/parser/Parser_DiffTest.php b/includes/parser/Parser_DiffTest.php index be3702cf..608c883a 100644 --- a/includes/parser/Parser_DiffTest.php +++ b/includes/parser/Parser_DiffTest.php @@ -6,6 +6,7 @@ class Parser_DiffTest { var $parsers, $conf; + var $shortOutput = false; var $dfUniqPrefix; @@ -28,6 +29,9 @@ class Parser_DiffTest $doneHook = true; $wgHooks['ParserClearState'][] = array( $this, 'onClearState' ); } + if ( isset( $this->conf['shortOutput'] ) ) { + $this->shortOutput = $this->conf['shortOutput']; + } foreach ( $this->conf['parsers'] as $i => $parserConf ) { if ( !is_array( $parserConf ) ) { @@ -65,13 +69,37 @@ class Parser_DiffTest $lastResult = $currentResult; } if ( $mismatch ) { - throw new MWException( "Parser_DiffTest: results mismatch on call to $name\n" . - 'Arguments: ' . var_export( $args, true ) . "\n" . - 'Results: ' . var_export( $results, true ) . "\n" ); + if ( count( $results ) == 2 ) { + $resultsList = array(); + foreach ( $this->parsers as $i => $parser ) { + $resultsList[] = var_export( $results[$i], true ); + } + $diff = wfDiff( $resultsList[0], $resultsList[1] ); + } else { + $diff = '[too many parsers]'; + } + $msg = "Parser_DiffTest: results mismatch on call to $name\n"; + if ( !$this->shortOutput ) { + $msg .= 'Arguments: ' . $this->formatArray( $args ) . "\n"; + } + $msg .= 'Results: ' . $this->formatArray( $results ) . "\n" . + "Diff: $diff\n"; + throw new MWException( $msg ); } return $lastResult; } + function formatArray( $array ) { + if ( $this->shortOutput ) { + foreach ( $array as $key => $value ) { + if ( $value instanceof ParserOutput ) { + $array[$key] = "ParserOutput: {$value->getText()}"; + } + } + } + return var_export( $array, true ); + } + function setFunctionHook( $id, $callback, $flags = 0 ) { $this->init(); foreach ( $this->parsers as $i => $parser ) { diff --git a/includes/parser/Parser_LinkHooks.php b/includes/parser/Parser_LinkHooks.php new file mode 100644 index 00000000..2b306933 --- /dev/null +++ b/includes/parser/Parser_LinkHooks.php @@ -0,0 +1,315 @@ +"\\x00-\\x20\\x7F]'; + const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+) + \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx'; + + /**#@+ + * @private + */ + # Persistent: + var $mLinkHooks; + + /**#@-*/ + + /** + * Constructor + * + * @public + */ + function __construct( $conf = array() ) { + parent::__construct( $conf ); + $this->mLinkHooks = array(); + } + + /** + * Do various kinds of initialisation on the first call of the parser + */ + function firstCallInit() { + parent::__construct(); + if ( !$this->mFirstCall ) { + return; + } + $this->mFirstCall = false; + + wfProfileIn( __METHOD__ ); + + $this->setHook( 'pre', array( $this, 'renderPreTag' ) ); + CoreParserFunctions::register( $this ); + CoreLinkFunctions::register( $this ); + $this->initialiseVariables(); + + wfRunHooks( 'ParserFirstCallInit', array( &$this ) ); + wfProfileOut( __METHOD__ ); + } + + /** + * Create a link hook, e.g. [[Namepsace:...|display}} + * The callback function should have the form: + * function myLinkCallback( $parser, $holders, $markers, + * Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { ... } + * + * Or with SLH_PATTERN: + * function myLinkCallback( $parser, $holders, $markers, ) + * &$titleText, &$sortText = null, &$leadingColon = false ) { ... } + * + * The callback may either return a number of different possible values: + * String) Text result of the link + * True) (Treat as link) Parse the link according to normal link rules + * False) (Bad link) Just output the raw wikitext (You may modify the text first) + * + * @public + * + * @param integer|string $ns The Namespace ID or regex pattern if SLH_PATTERN is set + * @param mixed $callback The callback function (and object) to use + * @param integer $flags a combination of the following flags: + * SLH_PATTERN Use a regex link pattern rather than a namespace + * + * @return The old callback function for this name, if any + */ + function setLinkHook( $ns, $callback, $flags = 0 ) { + if( $flags & SLH_PATTERN && !is_string($ns) ) + throw new MWException( __METHOD__.'() expecting a regex string pattern.' ); + elseif( $flags | ~SLH_PATTERN && !is_int($ns) ) + throw new MWException( __METHOD__.'() expecting a namespace index.' ); + $oldVal = isset( $this->mLinkHooks[$ns] ) ? $this->mLinkHooks[$ns][0] : null; + $this->mLinkHooks[$ns] = array( $callback, $flags ); + return $oldVal; + } + + /** + * Get all registered link hook identifiers + * + * @return array + */ + function getLinkHooks() { + return array_keys( $this->mLinkHooks ); + } + + /** + * Process [[ ]] wikilinks + * @return LinkHolderArray + * + * @private + */ + function replaceInternalLinks2( &$s ) { + global $wgContLang; + + wfProfileIn( __METHOD__ ); + + wfProfileIn( __METHOD__.'-setup' ); + static $tc = FALSE, $titleRegex;//$e1, $e1_img; + if( !$tc ) { + # the % is needed to support urlencoded titles as well + $tc = Title::legalChars() . '#%'; + # Match a link having the form [[namespace:link|alternate]]trail + //$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; + # Match cases where there is no "]]", which might still be images + //$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; + # Match a valid plain title + $titleRegex = "/^([{$tc}]+)$/sD"; + } + + $sk = $this->mOptions->getSkin(); + $holders = new LinkHolderArray( $this ); + + if( is_null( $this->mTitle ) ) { + wfProfileOut( __METHOD__ ); + wfProfileOut( __METHOD__.'-setup' ); + throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); + } + $nottalk = !$this->mTitle->isTalkPage(); + + if($wgContLang->hasVariants()) { + $selflink = $wgContLang->convertLinkToAllVariants($this->mTitle->getPrefixedText()); + } else { + $selflink = array($this->mTitle->getPrefixedText()); + } + wfProfileOut( __METHOD__.'-setup' ); + + $offset = 0; + $offsetStack = array(); + $markers = new LinkMarkerReplacer( $this, $holders, array( &$this, 'replaceInternalLinksCallback' ) ); + while( true ) { + $startBracketOffset = strpos( $s, '[[', $offset ); + $endBracketOffset = strpos( $s, ']]', $offset ); + # Finish when there are no more brackets + if( $startBracketOffset === false && $endBracketOffset === false ) break; + # Determine if the bracket is a starting or ending bracket + # When we find both, use the first one + elseif( $startBracketOffset !== false && $endBracketOffset !== false ) + $isStart = $startBracketOffset <= $endBracketOffset; + # When we only found one, check which it is + else $isStart = $startBracketOffset !== false; + $bracketOffset = $isStart ? $startBracketOffset : $endBracketOffset; + if( $isStart ) { + /** Opening bracket **/ + # Just push our current offset in the string onto the stack + $offsetStack[] = $startBracketOffset; + } else { + /** Closing bracket **/ + # Pop the start pos for our current link zone off the stack + $startBracketOffset = array_pop($offsetStack); + # Just to clean up the code, lets place offsets on the outer ends + $endBracketOffset += 2; + + # Only do logic if we actually have a opening bracket for this + if( isset($startBracketOffset) ) { + # Extract text inside the link + @list( $titleText, $paramText ) = explode('|', + substr($s, $startBracketOffset+2, $endBracketOffset-$startBracketOffset-4), 2); + # Create markers only for valid links + if( preg_match( $titleRegex, $titleText ) ) { + # Store the text for the marker + $marker = $markers->addMarker($titleText, $paramText); + # Replace the current link with the marker + $s = substr($s,0,$startBracketOffset). + $marker. + substr($s, $endBracketOffset); + # We have modified $s, because of this we need to set the + # offset manually since the end position is different now + $offset = $startBracketOffset+strlen($marker); + continue; + } + # ToDo: Some LinkHooks may allow recursive links inside of + # the link text, create a regex that also matches our + # sequence in titles + # ToDO: Some LinkHooks use patterns rather than namespaces + # these need to be tested at this point here + } + + } + # Bump our offset to after our current bracket + $offset = $bracketOffset+2; + } + + + # Now expand our tree + wfProfileIn( __METHOD__.'-expand' ); + $s = $markers->expand( $s ); + wfProfileOut( __METHOD__.'-expand' ); + + wfProfileOut( __METHOD__ ); + return $holders; + } + + function replaceInternalLinksCallback( $parser, $holders, $markers, $titleText, $paramText ) { + wfProfileIn( __METHOD__ ); + $wt = isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]"; + wfProfileIn( __METHOD__."-misc" ); + # Don't allow internal links to pages containing + # PROTO: where PROTO is a valid URL protocol; these + # should be external links. + if( preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $titleText) ) { + wfProfileOut( __METHOD__ ); + return $wt; + } + + # Make subpage if necessary + if( $this->areSubpagesAllowed() ) { + $titleText = $this->maybeDoSubpageLink( $titleText, $paramText ); + } + + # Check for a leading colon and strip it if it is there + $leadingColon = $titleText[0] == ':'; + if( $leadingColon ) $titleText = substr( $titleText, 1 ); + + wfProfileOut( __METHOD__."-misc" ); + # Make title object + wfProfileIn( __METHOD__."-title" ); + $title = Title::newFromText( $this->mStripState->unstripNoWiki($titleText) ); + if( !$title ) { + wfProfileOut( __METHOD__."-title" ); + wfProfileOut( __METHOD__ ); + return $wt; + } + $ns = $title->getNamespace(); + wfProfileOut( __METHOD__."-title" ); + + # Default for Namespaces is a default link + # ToDo: Default for patterns is plain wikitext + $return = true; + if( isset($this->mLinkHooks[$ns]) ) { + list( $callback, $flags ) = $this->mLinkHooks[$ns]; + if( $flags & SLH_PATTERN ) { + $args = array( $parser, $holders, $markers, $titleText, &$paramText, &$leadingColon ); + } else { + $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon ); + } + # Workaround for PHP bug 35229 and similar + if ( !is_callable( $callback ) ) { + throw new MWException( "Tag hook for $name is not callable\n" ); + } + $return = call_user_func_array( $callback, $args ); + } + if( $return === true ) { + # True (treat as plain link) was returned, call the defaultLinkHook + $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon ); + $return = call_user_func_array( array( 'CoreLinkFunctions', 'defaultLinkHook' ), $args ); + } + if( $return === false ) { + # False (no link) was returned, output plain wikitext + # Build it again as the hook is allowed to modify $paramText + return isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]"; + } + # Content was returned, return it + return $return; + } + +} + +class LinkMarkerReplacer { + + protected $markers, $nextId, $parser, $holders, $callback; + + function __construct( $parser, $holders, $callback ) { + $this->nextId = 0; + $this->markers = array(); + $this->parser = $parser; + $this->holders = $holders; + $this->callback = $callback; + } + + function addMarker($titleText, $paramText) { + $id = $this->nextId++; + $this->markers[$id] = array( $titleText, $paramText ); + return ""; + } + + function findMarker( $string ) { + return (bool) preg_match('//', $string ); + } + + function expand( $string ) { + return StringUtils::delimiterReplaceCallback( "", array( &$this, 'callback' ), $string ); + } + + function callback( $m ) { + $id = intval($m[1]); + if( !array_key_exists($id, $this->markers) ) return $m[0]; + $args = $this->markers[$id]; + array_unshift( $args, $this ); + array_unshift( $args, $this->holders ); + array_unshift( $args, $this->parser ); + return call_user_func_array( $this->callback, $args ); + } + +} diff --git a/includes/parser/Parser_OldPP.php b/includes/parser/Parser_OldPP.php deleted file mode 100644 index 487d3ffd..00000000 --- a/includes/parser/Parser_OldPP.php +++ /dev/null @@ -1,4944 +0,0 @@ -"\\x00-\\x20\\x7F]'; - const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)\\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/S'; - - // State constants for the definition list colon extraction - const COLON_STATE_TEXT = 0; - const COLON_STATE_TAG = 1; - const COLON_STATE_TAGSTART = 2; - const COLON_STATE_CLOSETAG = 3; - const COLON_STATE_TAGSLASH = 4; - const COLON_STATE_COMMENT = 5; - const COLON_STATE_COMMENTDASH = 6; - const COLON_STATE_COMMENTDASHDASH = 7; - - // Allowed values for $this->mOutputType - // Parameter to startExternalParse(). - const OT_HTML = 1; - const OT_WIKI = 2; - const OT_PREPROCESS = 3; - const OT_MSG = 4; - - /**#@+ - * @private - */ - # Persistent: - var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables, - $mImageParams, $mImageParamsMagicArray, $mExtLinkBracketedRegex; - - # Cleared with clearState(): - var $mOutput, $mAutonumber, $mDTopen, $mStripState; - var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; - var $mInterwikiLinkHolders, $mLinkHolders, $mUniqPrefix; - var $mIncludeSizes, $mDefaultSort; - var $mTemplates, // cache of already loaded templates, avoids - // multiple SQL queries for the same string - $mTemplatePath; // stores an unsorted hash of all the templates already loaded - // in this path. Used for loop detection. - - # Temporary - # These are variables reset at least once per parse regardless of $clearState - var $mOptions, // ParserOptions object - $mTitle, // Title context, used for self-link rendering and similar things - $mOutputType, // Output type, one of the OT_xxx constants - $ot, // Shortcut alias, see setOutputType() - $mRevisionId, // ID to display in {{REVISIONID}} tags - $mRevisionTimestamp, // The timestamp of the specified revision ID - $mRevIdForTs; // The revision ID which was used to fetch the timestamp - - /**#@-*/ - - /** - * Constructor - * - * @public - */ - function __construct( $conf = array() ) { - $this->mTagHooks = array(); - $this->mTransparentTagHooks = array(); - $this->mFunctionHooks = array(); - $this->mFunctionSynonyms = array( 0 => array(), 1 => array() ); - $this->mFirstCall = true; - $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. - '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S'; - } - - /** - * Do various kinds of initialisation on the first call of the parser - */ - function firstCallInit() { - if ( !$this->mFirstCall ) { - return; - } - $this->mFirstCall = false; - - wfProfileIn( __METHOD__ ); - global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions; - - $this->setHook( 'pre', array( $this, 'renderPreTag' ) ); - - # Syntax for arguments (see self::setFunctionHook): - # "name for lookup in localized magic words array", - # function callback, - # optional SFH_NO_HASH to omit the hash from calls (e.g. {{int:...} - # instead of {{#int:...}}) - $this->setFunctionHook( 'int', array( 'CoreParserFunctions', 'intFunction' ), SFH_NO_HASH ); - $this->setFunctionHook( 'ns', array( 'CoreParserFunctions', 'ns' ), SFH_NO_HASH ); - $this->setFunctionHook( 'urlencode', array( 'CoreParserFunctions', 'urlencode' ), SFH_NO_HASH ); - $this->setFunctionHook( 'lcfirst', array( 'CoreParserFunctions', 'lcfirst' ), SFH_NO_HASH ); - $this->setFunctionHook( 'ucfirst', array( 'CoreParserFunctions', 'ucfirst' ), SFH_NO_HASH ); - $this->setFunctionHook( 'lc', array( 'CoreParserFunctions', 'lc' ), SFH_NO_HASH ); - $this->setFunctionHook( 'uc', array( 'CoreParserFunctions', 'uc' ), SFH_NO_HASH ); - $this->setFunctionHook( 'localurl', array( 'CoreParserFunctions', 'localurl' ), SFH_NO_HASH ); - $this->setFunctionHook( 'localurle', array( 'CoreParserFunctions', 'localurle' ), SFH_NO_HASH ); - $this->setFunctionHook( 'fullurl', array( 'CoreParserFunctions', 'fullurl' ), SFH_NO_HASH ); - $this->setFunctionHook( 'fullurle', array( 'CoreParserFunctions', 'fullurle' ), SFH_NO_HASH ); - $this->setFunctionHook( 'formatnum', array( 'CoreParserFunctions', 'formatnum' ), SFH_NO_HASH ); - $this->setFunctionHook( 'grammar', array( 'CoreParserFunctions', 'grammar' ), SFH_NO_HASH ); - $this->setFunctionHook( 'plural', array( 'CoreParserFunctions', 'plural' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberofpages', array( 'CoreParserFunctions', 'numberofpages' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberofusers', array( 'CoreParserFunctions', 'numberofusers' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberofarticles', array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberoffiles', array( 'CoreParserFunctions', 'numberoffiles' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberofadmins', array( 'CoreParserFunctions', 'numberofadmins' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberofedits', array( 'CoreParserFunctions', 'numberofedits' ), SFH_NO_HASH ); - $this->setFunctionHook( 'language', array( 'CoreParserFunctions', 'language' ), SFH_NO_HASH ); - $this->setFunctionHook( 'padleft', array( 'CoreParserFunctions', 'padleft' ), SFH_NO_HASH ); - $this->setFunctionHook( 'padright', array( 'CoreParserFunctions', 'padright' ), SFH_NO_HASH ); - $this->setFunctionHook( 'anchorencode', array( 'CoreParserFunctions', 'anchorencode' ), SFH_NO_HASH ); - $this->setFunctionHook( 'special', array( 'CoreParserFunctions', 'special' ) ); - $this->setFunctionHook( 'defaultsort', array( 'CoreParserFunctions', 'defaultsort' ), SFH_NO_HASH ); - $this->setFunctionHook( 'filepath', array( 'CoreParserFunctions', 'filepath' ), SFH_NO_HASH ); - - if ( $wgAllowDisplayTitle ) { - $this->setFunctionHook( 'displaytitle', array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH ); - } - if ( $wgAllowSlowParserFunctions ) { - $this->setFunctionHook( 'pagesinnamespace', array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH ); - } - - $this->initialiseVariables(); - - wfRunHooks( 'ParserFirstCallInit', array( &$this ) ); - wfProfileOut( __METHOD__ ); - } - - /** - * Clear Parser state - * - * @private - */ - function clearState() { - wfProfileIn( __METHOD__ ); - if ( $this->mFirstCall ) { - $this->firstCallInit(); - } - $this->mOutput = new ParserOutput; - $this->mAutonumber = 0; - $this->mLastSection = ''; - $this->mDTopen = false; - $this->mIncludeCount = array(); - $this->mStripState = new StripState; - $this->mArgStack = array(); - $this->mInPre = false; - $this->mInterwikiLinkHolders = array( - 'texts' => array(), - 'titles' => array() - ); - $this->mLinkHolders = array( - 'namespaces' => array(), - 'dbkeys' => array(), - 'queries' => array(), - 'texts' => array(), - 'titles' => array() - ); - $this->mRevisionTimestamp = $this->mRevisionId = null; - - /** - * Prefix for temporary replacement strings for the multipass parser. - * \x07 should never appear in input as it's disallowed in XML. - * Using it at the front also gives us a little extra robustness - * since it shouldn't match when butted up against identifier-like - * string constructs. - */ - $this->mUniqPrefix = "\x07UNIQ" . self::getRandomString(); - - # Clear these on every parse, bug 4549 - $this->mTemplates = array(); - $this->mTemplatePath = array(); - - $this->mShowToc = true; - $this->mForceTocPosition = false; - $this->mIncludeSizes = array( - 'pre-expand' => 0, - 'post-expand' => 0, - 'arg' => 0 - ); - $this->mDefaultSort = false; - - wfRunHooks( 'ParserClearState', array( &$this ) ); - wfProfileOut( __METHOD__ ); - } - - function setOutputType( $ot ) { - $this->mOutputType = $ot; - // Shortcut alias - $this->ot = array( - 'html' => $ot == self::OT_HTML, - 'wiki' => $ot == self::OT_WIKI, - 'msg' => $ot == self::OT_MSG, - 'pre' => $ot == self::OT_PREPROCESS, - ); - } - - /** - * Accessor for mUniqPrefix. - * - * @public - */ - function uniqPrefix() { - return $this->mUniqPrefix; - } - - /** - * Convert wikitext to HTML - * Do not call this function recursively. - * - * @param string $text Text we want to parse - * @param Title &$title A title object - * @param array $options - * @param boolean $linestart - * @param boolean $clearState - * @param int $revid number to pass in {{REVISIONID}} - * @return ParserOutput a ParserOutput - */ - public function parse( $text, &$title, $options, $linestart = true, $clearState = true, $revid = null ) { - /** - * First pass--just handle sections, pass the rest off - * to internalParse() which does all the real work. - */ - - global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang; - $fname = 'Parser::parse-' . wfGetCaller(); - wfProfileIn( __METHOD__ ); - wfProfileIn( $fname ); - - if ( $clearState ) { - $this->clearState(); - } - - $this->mOptions = $options; - $this->mTitle =& $title; - $oldRevisionId = $this->mRevisionId; - $oldRevisionTimestamp = $this->mRevisionTimestamp; - if( $revid !== null ) { - $this->mRevisionId = $revid; - $this->mRevisionTimestamp = null; - } - $this->setOutputType( self::OT_HTML ); - wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); - $text = $this->strip( $text, $this->mStripState ); - wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); - $text = $this->internalParse( $text ); - $text = $this->mStripState->unstripGeneral( $text ); - - # Clean up special characters, only run once, next-to-last before doBlockLevels - $fixtags = array( - # french spaces, last one Guillemet-left - # only if there is something before the space - '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2', - # french spaces, Guillemet-right - '/(\\302\\253) /' => '\\1 ', - ); - $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); - - # only once and last - $text = $this->doBlockLevels( $text, $linestart ); - - $this->replaceLinkHolders( $text ); - - # the position of the parserConvert() call should not be changed. it - # assumes that the links are all replaced and the only thing left - # is the mark. - # Side-effects: this calls $this->mOutput->setTitleText() - $text = $wgContLang->parserConvert( $text, $this ); - - $text = $this->mStripState->unstripNoWiki( $text ); - - wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) ); - -//!JF Move to its own function - - $uniq_prefix = $this->mUniqPrefix; - $matches = array(); - $elements = array_keys( $this->mTransparentTagHooks ); - $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); - - foreach( $matches as $marker => $data ) { - list( $element, $content, $params, $tag ) = $data; - $tagName = strtolower( $element ); - if( isset( $this->mTransparentTagHooks[$tagName] ) ) { - $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], - array( $content, $params, $this ) ); - } else { - $output = $tag; - } - $this->mStripState->general->setPair( $marker, $output ); - } - $text = $this->mStripState->unstripGeneral( $text ); - - $text = Sanitizer::normalizeCharReferences( $text ); - - if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { - $text = self::tidy($text); - } else { - # attempt to sanitize at least some nesting problems - # (bug #2702 and quite a few others) - $tidyregs = array( - # ''Something [http://www.cool.com cool''] --> - # Somethingcool> - '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' => - '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', - # fix up an anchor inside another anchor, only - # at least for a single single nested link (bug 3695) - '/(]+>)([^<]*)(]+>[^<]*)<\/a>(.*)<\/a>/' => - '\\1\\2\\3\\1\\4', - # fix div inside inline elements- doBlockLevels won't wrap a line which - # contains a div, so fix it up here; replace - # div with escaped text - '/(<([aib]) [^>]+>)([^<]*)(]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' => - '\\1\\3<div\\5>\\6</div>\\8\\9', - # remove empty italic or bold tag pairs, some - # introduced by rules above - '/<([bi])><\/\\1>/' => '', - ); - - $text = preg_replace( - array_keys( $tidyregs ), - array_values( $tidyregs ), - $text ); - } - - wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) ); - - # Information on include size limits, for the benefit of users who try to skirt them - if ( $this->mOptions->getEnableLimitReport() ) { - $max = $this->mOptions->getMaxIncludeSize(); - $limitReport = - "Pre-expand include size: {$this->mIncludeSizes['pre-expand']}/$max bytes\n" . - "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" . - "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n"; - wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) ); - $text .= "\n"; - } - $this->mOutput->setText( $text ); - $this->mRevisionId = $oldRevisionId; - $this->mRevisionTimestamp = $oldRevisionTimestamp; - wfProfileOut( $fname ); - wfProfileOut( __METHOD__ ); - - return $this->mOutput; - } - - /** - * Recursive parser entry point that can be called from an extension tag - * hook. - */ - function recursiveTagParse( $text ) { - wfProfileIn( __METHOD__ ); - wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); - $text = $this->strip( $text, $this->mStripState ); - wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); - $text = $this->internalParse( $text ); - wfProfileOut( __METHOD__ ); - return $text; - } - - /** - * Expand templates and variables in the text, producing valid, static wikitext. - * Also removes comments. - */ - function preprocess( $text, $title, $options, $revid = null ) { - wfProfileIn( __METHOD__ ); - $this->clearState(); - $this->setOutputType( self::OT_PREPROCESS ); - $this->mOptions = $options; - $this->mTitle = $title; - if( $revid !== null ) { - $this->mRevisionId = $revid; - } - wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); - $text = $this->strip( $text, $this->mStripState ); - wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); - if ( $this->mOptions->getRemoveComments() ) { - $text = Sanitizer::removeHTMLcomments( $text ); - } - $text = $this->replaceVariables( $text ); - $text = $this->mStripState->unstripBoth( $text ); - wfProfileOut( __METHOD__ ); - return $text; - } - - /** - * Get a random string - * - * @private - * @static - */ - function getRandomString() { - return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff)); - } - - function &getTitle() { return $this->mTitle; } - function getOptions() { return $this->mOptions; } - function getRevisionId() { return $this->mRevisionId; } - - function getFunctionLang() { - global $wgLang, $wgContLang; - return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang; - } - - /** - * Replaces all occurrences of HTML-style comments and the given tags - * in the text with a random marker and returns teh next text. The output - * parameter $matches will be an associative array filled with data in - * the form: - * 'UNIQ-xxxxx' => array( - * 'element', - * 'tag content', - * array( 'param' => 'x' ), - * 'tag content' ) ) - * - * @param $elements list of element names. Comments are always extracted. - * @param $text Source text string. - * @param $uniq_prefix - * - * @public - * @static - */ - function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){ - static $n = 1; - $stripped = ''; - $matches = array(); - - $taglist = implode( '|', $elements ); - $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i"; - - while ( '' != $text ) { - $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); - $stripped .= $p[0]; - if( count( $p ) < 5 ) { - break; - } - if( count( $p ) > 5 ) { - // comment - $element = $p[4]; - $attributes = ''; - $close = ''; - $inside = $p[5]; - } else { - // tag - $element = $p[1]; - $attributes = $p[2]; - $close = $p[3]; - $inside = $p[4]; - } - - $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . "-QINU\x07"; - $stripped .= $marker; - - if ( $close === '/>' ) { - // Empty element tag, - $content = null; - $text = $inside; - $tail = null; - } else { - if( $element == '!--' ) { - $end = '/(-->)/'; - } else { - $end = "/(<\\/$element\\s*>)/i"; - } - $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); - $content = $q[0]; - if( count( $q ) < 3 ) { - # No end tag -- let it run out to the end of the text. - $tail = ''; - $text = ''; - } else { - $tail = $q[1]; - $text = $q[2]; - } - } - - $matches[$marker] = array( $element, - $content, - Sanitizer::decodeTagAttributes( $attributes ), - "<$element$attributes$close$content$tail" ); - } - return $stripped; - } - - /** - * Strips and renders nowiki, pre, math, hiero - * If $render is set, performs necessary rendering operations on plugins - * Returns the text, and fills an array with data needed in unstrip() - * - * @param StripState $state - * - * @param bool $stripcomments when set, HTML comments - * will be stripped in addition to other tags. This is important - * for section editing, where these comments cause confusion when - * counting the sections in the wikisource - * - * @param array dontstrip contains tags which should not be stripped; - * used to prevent stipping of when saving (fixes bug 2700) - * - * @private - */ - function strip( $text, $state, $stripcomments = false , $dontstrip = array () ) { - global $wgContLang; - wfProfileIn( __METHOD__ ); - $render = ($this->mOutputType == self::OT_HTML); - - $uniq_prefix = $this->mUniqPrefix; - $commentState = new ReplacementArray; - $nowikiItems = array(); - $generalItems = array(); - - $elements = array_merge( - array( 'nowiki', 'gallery' ), - array_keys( $this->mTagHooks ) ); - global $wgRawHtml; - if( $wgRawHtml ) { - $elements[] = 'html'; - } - if( $this->mOptions->getUseTeX() ) { - $elements[] = 'math'; - } - - # Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700) - foreach ( $elements AS $k => $v ) { - if ( !in_array ( $v , $dontstrip ) ) continue; - unset ( $elements[$k] ); - } - - $matches = array(); - $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); - - foreach( $matches as $marker => $data ) { - list( $element, $content, $params, $tag ) = $data; - if( $render ) { - $tagName = strtolower( $element ); - wfProfileIn( __METHOD__."-render-$tagName" ); - switch( $tagName ) { - case '!--': - // Comment - if( substr( $tag, -3 ) == '-->' ) { - $output = $tag; - } else { - // Unclosed comment in input. - // Close it so later stripping can remove it - $output = "$tag-->"; - } - break; - case 'html': - if( $wgRawHtml ) { - $output = $content; - break; - } - // Shouldn't happen otherwise. :) - case 'nowiki': - $output = Xml::escapeTagsOnly( $content ); - break; - case 'math': - $output = $wgContLang->armourMath( - MathRenderer::renderMath( $content, $params ) ); - break; - case 'gallery': - $output = $this->renderImageGallery( $content, $params ); - break; - default: - if( isset( $this->mTagHooks[$tagName] ) ) { - $output = call_user_func_array( $this->mTagHooks[$tagName], - array( $content, $params, $this ) ); - } else { - throw new MWException( "Invalid call hook $element" ); - } - } - wfProfileOut( __METHOD__."-render-$tagName" ); - } else { - // Just stripping tags; keep the source - $output = $tag; - } - - // Unstrip the output, to support recursive strip() calls - $output = $state->unstripBoth( $output ); - - if( !$stripcomments && $element == '!--' ) { - $commentState->setPair( $marker, $output ); - } elseif ( $element == 'html' || $element == 'nowiki' ) { - $nowikiItems[$marker] = $output; - } else { - $generalItems[$marker] = $output; - } - } - # Add the new items to the state - # We do this after the loop instead of during it to avoid slowing - # down the recursive unstrip - $state->nowiki->mergeArray( $nowikiItems ); - $state->general->mergeArray( $generalItems ); - - # Unstrip comments unless explicitly told otherwise. - # (The comments are always stripped prior to this point, so as to - # not invoke any extension tags / parser hooks contained within - # a comment.) - if ( !$stripcomments ) { - // Put them all back and forget them - $text = $commentState->replace( $text ); - } - - wfProfileOut( __METHOD__ ); - return $text; - } - - /** - * Restores pre, math, and other extensions removed by strip() - * - * always call unstripNoWiki() after this one - * @private - * @deprecated use $this->mStripState->unstrip() - */ - function unstrip( $text, $state ) { - return $state->unstripGeneral( $text ); - } - - /** - * Always call this after unstrip() to preserve the order - * - * @private - * @deprecated use $this->mStripState->unstrip() - */ - function unstripNoWiki( $text, $state ) { - return $state->unstripNoWiki( $text ); - } - - /** - * @deprecated use $this->mStripState->unstripBoth() - */ - function unstripForHTML( $text ) { - return $this->mStripState->unstripBoth( $text ); - } - - /** - * Add an item to the strip state - * Returns the unique tag which must be inserted into the stripped text - * The tag will be replaced with the original text in unstrip() - * - * @private - */ - function insertStripItem( $text, &$state ) { - $rnd = $this->mUniqPrefix . '-item' . self::getRandomString(); - $state->general->setPair( $rnd, $text ); - return $rnd; - } - - /** - * Interface with html tidy, used if $wgUseTidy = true. - * If tidy isn't able to correct the markup, the original will be - * returned in all its glory with a warning comment appended. - * - * Either the external tidy program or the in-process tidy extension - * will be used depending on availability. Override the default - * $wgTidyInternal setting to disable the internal if it's not working. - * - * @param string $text Hideous HTML input - * @return string Corrected HTML output - * @public - * @static - */ - function tidy( $text ) { - global $wgTidyInternal; - $wrappedtext = ''. -'test'.$text.''; - if( $wgTidyInternal ) { - $correctedtext = self::internalTidy( $wrappedtext ); - } else { - $correctedtext = self::externalTidy( $wrappedtext ); - } - if( is_null( $correctedtext ) ) { - wfDebug( "Tidy error detected!\n" ); - return $text . "\n\n"; - } - return $correctedtext; - } - - /** - * Spawn an external HTML tidy process and get corrected markup back from it. - * - * @private - * @static - */ - function externalTidy( $text ) { - global $wgTidyConf, $wgTidyBin, $wgTidyOpts; - $fname = 'Parser::externalTidy'; - wfProfileIn( $fname ); - - $cleansource = ''; - $opts = ' -utf8'; - - $descriptorspec = array( - 0 => array('pipe', 'r'), - 1 => array('pipe', 'w'), - 2 => array('file', wfGetNull(), 'a') - ); - $pipes = array(); - $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); - if (is_resource($process)) { - // Theoretically, this style of communication could cause a deadlock - // here. If the stdout buffer fills up, then writes to stdin could - // block. This doesn't appear to happen with tidy, because tidy only - // writes to stdout after it's finished reading from stdin. Search - // for tidyParseStdin and tidySaveStdout in console/tidy.c - fwrite($pipes[0], $text); - fclose($pipes[0]); - while (!feof($pipes[1])) { - $cleansource .= fgets($pipes[1], 1024); - } - fclose($pipes[1]); - proc_close($process); - } - - wfProfileOut( $fname ); - - if( $cleansource == '' && $text != '') { - // Some kind of error happened, so we couldn't get the corrected text. - // Just give up; we'll use the source text and append a warning. - return null; - } else { - return $cleansource; - } - } - - /** - * Use the HTML tidy PECL extension to use the tidy library in-process, - * saving the overhead of spawning a new process. - * - * 'pear install tidy' should be able to compile the extension module. - * - * @private - * @static - */ - function internalTidy( $text ) { - global $wgTidyConf, $IP; - $fname = 'Parser::internalTidy'; - wfProfileIn( $fname ); - - $tidy = new tidy; - $tidy->parseString( $text, $wgTidyConf, 'utf8' ); - $tidy->cleanRepair(); - if( $tidy->getStatus() == 2 ) { - // 2 is magic number for fatal error - // http://www.php.net/manual/en/function.tidy-get-status.php - $cleansource = null; - } else { - $cleansource = tidy_get_output( $tidy ); - } - wfProfileOut( $fname ); - return $cleansource; - } - - /** - * parse the wiki syntax used to render tables - * - * @private - */ - function doTableStuff ( $text ) { - $fname = 'Parser::doTableStuff'; - wfProfileIn( $fname ); - - $lines = explode ( "\n" , $text ); - $td_history = array (); // Is currently a td tag open? - $last_tag_history = array (); // Save history of last lag activated (td, th or caption) - $tr_history = array (); // Is currently a tr tag open? - $tr_attributes = array (); // history of tr attributes - $has_opened_tr = array(); // Did this table open a element? - $indent_level = 0; // indent level of the table - foreach ( $lines as $key => $line ) - { - $line = trim ( $line ); - - if( $line == '' ) { // empty line, go to next line - continue; - } - $first_character = $line{0}; - $matches = array(); - - if ( preg_match( '/^(:*)\{\|(.*)$/' , $line , $matches ) ) { - // First check if we are starting a new table - $indent_level = strlen( $matches[1] ); - - $attributes = $this->mStripState->unstripBoth( $matches[2] ); - $attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' ); - - $lines[$key] = str_repeat( '
' , $indent_level ) . ""; - array_push ( $td_history , false ); - array_push ( $last_tag_history , '' ); - array_push ( $tr_history , false ); - array_push ( $tr_attributes , '' ); - array_push ( $has_opened_tr , false ); - } else if ( count ( $td_history ) == 0 ) { - // Don't do any of the following - continue; - } else if ( substr ( $line , 0 , 2 ) == '|}' ) { - // We are ending a table - $line = '' . substr ( $line , 2 ); - $last_tag = array_pop ( $last_tag_history ); - - if ( !array_pop ( $has_opened_tr ) ) { - $line = "{$line}"; - } - - if ( array_pop ( $tr_history ) ) { - $line = "{$line}"; - } - - if ( array_pop ( $td_history ) ) { - $line = "{$line}"; - } - array_pop ( $tr_attributes ); - $lines[$key] = $line . str_repeat( '
' , $indent_level ); - } else if ( substr ( $line , 0 , 2 ) == '|-' ) { - // Now we have a table row - $line = preg_replace( '#^\|-+#', '', $line ); - - // Whats after the tag is now only attributes - $attributes = $this->mStripState->unstripBoth( $line ); - $attributes = Sanitizer::fixTagAttributes ( $attributes , 'tr' ); - array_pop ( $tr_attributes ); - array_push ( $tr_attributes , $attributes ); - - $line = ''; - $last_tag = array_pop ( $last_tag_history ); - array_pop ( $has_opened_tr ); - array_push ( $has_opened_tr , true ); - - if ( array_pop ( $tr_history ) ) { - $line = ''; - } - - if ( array_pop ( $td_history ) ) { - $line = "{$line}"; - } - - $lines[$key] = $line; - array_push ( $tr_history , false ); - array_push ( $td_history , false ); - array_push ( $last_tag_history , '' ); - } - else if ( $first_character == '|' || $first_character == '!' || substr ( $line , 0 , 2 ) == '|+' ) { - // This might be cell elements, td, th or captions - if ( substr ( $line , 0 , 2 ) == '|+' ) { - $first_character = '+'; - $line = substr ( $line , 1 ); - } - - $line = substr ( $line , 1 ); - - if ( $first_character == '!' ) { - $line = str_replace ( '!!' , '||' , $line ); - } - - // Split up multiple cells on the same line. - // FIXME : This can result in improper nesting of tags processed - // by earlier parser steps, but should avoid splitting up eg - // attribute values containing literal "||". - $cells = StringUtils::explodeMarkup( '||' , $line ); - - $lines[$key] = ''; - - // Loop through each table cell - foreach ( $cells as $cell ) - { - $previous = ''; - if ( $first_character != '+' ) - { - $tr_after = array_pop ( $tr_attributes ); - if ( !array_pop ( $tr_history ) ) { - $previous = "\n"; - } - array_push ( $tr_history , true ); - array_push ( $tr_attributes , '' ); - array_pop ( $has_opened_tr ); - array_push ( $has_opened_tr , true ); - } - - $last_tag = array_pop ( $last_tag_history ); - - if ( array_pop ( $td_history ) ) { - $previous = "{$previous}"; - } - - if ( $first_character == '|' ) { - $last_tag = 'td'; - } else if ( $first_character == '!' ) { - $last_tag = 'th'; - } else if ( $first_character == '+' ) { - $last_tag = 'caption'; - } else { - $last_tag = ''; - } - - array_push ( $last_tag_history , $last_tag ); - - // A cell could contain both parameters and data - $cell_data = explode ( '|' , $cell , 2 ); - - // Bug 553: Note that a '|' inside an invalid link should not - // be mistaken as delimiting cell parameters - if ( strpos( $cell_data[0], '[[' ) !== false ) { - $cell = "{$previous}<{$last_tag}>{$cell}"; - } else if ( count ( $cell_data ) == 1 ) - $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; - else { - $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); - $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag ); - $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; - } - - $lines[$key] .= $cell; - array_push ( $td_history , true ); - } - } - } - - // Closing open td, tr && table - while ( count ( $td_history ) > 0 ) - { - if ( array_pop ( $td_history ) ) { - $lines[] = '' ; - } - if ( array_pop ( $tr_history ) ) { - $lines[] = '' ; - } - if ( !array_pop ( $has_opened_tr ) ) { - $lines[] = "" ; - } - - $lines[] = '' ; - } - - $output = implode ( "\n" , $lines ) ; - - // special case: don't return empty table - if( $output == "\n\n
" ) { - $output = ''; - } - - wfProfileOut( $fname ); - - return $output; - } - - /** - * Helper function for parse() that transforms wiki markup into - * HTML. Only called for $mOutputType == OT_HTML. - * - * @private - */ - function internalParse( $text ) { - $args = array(); - $isMain = true; - $fname = 'Parser::internalParse'; - wfProfileIn( $fname ); - - # Hook to suspend the parser in this state - if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { - wfProfileOut( $fname ); - return $text ; - } - - # Remove tags and sections - $text = strtr( $text, array( '' => '' , '' => '' ) ); - $text = strtr( $text, array( '' => '', '' => '') ); - $text = StringUtils::delimiterReplace( '', '', '', $text ); - - $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), array(), array_keys( $this->mTransparentTagHooks ) ); - - $text = $this->replaceVariables( $text, $args ); - wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) ); - - // Tables need to come after variable replacement for things to work - // properly; putting them before other transformations should keep - // exciting things like link expansions from showing up in surprising - // places. - $text = $this->doTableStuff( $text ); - - $text = preg_replace( '/(^|\n)-----*/', '\\1
', $text ); - - $text = $this->stripToc( $text ); - $this->stripNoGallery( $text ); - $text = $this->doHeadings( $text ); - if($this->mOptions->getUseDynamicDates()) { - $df =& DateFormatter::getInstance(); - $text = $df->reformat( $this->mOptions->getDateFormat(), $text ); - } - $text = $this->doAllQuotes( $text ); - $text = $this->replaceInternalLinks( $text ); - $text = $this->replaceExternalLinks( $text ); - - # replaceInternalLinks may sometimes leave behind - # absolute URLs, which have to be masked to hide them from replaceExternalLinks - $text = str_replace($this->mUniqPrefix."NOPARSE", "", $text); - - $text = $this->doMagicLinks( $text ); - $text = $this->formatHeadings( $text, $isMain ); - - wfProfileOut( $fname ); - return $text; - } - - /** - * Replace special strings like "ISBN xxx" and "RFC xxx" with - * magic external links. - * - * @private - */ - function &doMagicLinks( &$text ) { - wfProfileIn( __METHOD__ ); - $text = preg_replace_callback( - '!(?: # Start cases - | # Skip link text - <.*?> | # Skip stuff inside HTML elements - (?:RFC|PMID)\s+([0-9]+) | # RFC or PMID, capture number as m[1] - ISBN\s+(\b # ISBN, capture number as m[2] - (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix - (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters - [0-9Xx] # check digit - \b) - )!x', array( &$this, 'magicLinkCallback' ), $text ); - wfProfileOut( __METHOD__ ); - return $text; - } - - function magicLinkCallback( $m ) { - if ( substr( $m[0], 0, 1 ) == '<' ) { - # Skip HTML element - return $m[0]; - } elseif ( substr( $m[0], 0, 4 ) == 'ISBN' ) { - $isbn = $m[2]; - $num = strtr( $isbn, array( - '-' => '', - ' ' => '', - 'x' => 'X', - )); - $titleObj = SpecialPage::getTitleFor( 'Booksources' ); - $text = 'ISBN $isbn"; - } else { - if ( substr( $m[0], 0, 3 ) == 'RFC' ) { - $keyword = 'RFC'; - $urlmsg = 'rfcurl'; - $id = $m[1]; - } elseif ( substr( $m[0], 0, 4 ) == 'PMID' ) { - $keyword = 'PMID'; - $urlmsg = 'pubmedurl'; - $id = $m[1]; - } else { - throw new MWException( __METHOD__.': unrecognised match type "' . - substr($m[0], 0, 20 ) . '"' ); - } - - $url = wfMsg( $urlmsg, $id); - $sk = $this->mOptions->getSkin(); - $la = $sk->getExternalLinkAttributes( $url, $keyword.$id ); - $text = "{$keyword} {$id}"; - } - return $text; - } - - /** - * Parse headers and return html - * - * @private - */ - function doHeadings( $text ) { - $fname = 'Parser::doHeadings'; - wfProfileIn( $fname ); - for ( $i = 6; $i >= 1; --$i ) { - $h = str_repeat( '=', $i ); - $text = preg_replace( "/^{$h}(.+){$h}\\s*$/m", - "\\1\\2", $text ); - } - wfProfileOut( $fname ); - return $text; - } - - /** - * Replace single quotes with HTML markup - * @private - * @return string the altered text - */ - function doAllQuotes( $text ) { - $fname = 'Parser::doAllQuotes'; - wfProfileIn( $fname ); - $outtext = ''; - $lines = explode( "\n", $text ); - foreach ( $lines as $line ) { - $outtext .= $this->doQuotes ( $line ) . "\n"; - } - $outtext = substr($outtext, 0,-1); - wfProfileOut( $fname ); - return $outtext; - } - - /** - * Helper function for doAllQuotes() - */ - public function doQuotes( $text ) { - $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); - if ( count( $arr ) == 1 ) - return $text; - else - { - # First, do some preliminary work. This may shift some apostrophes from - # being mark-up to being text. It also counts the number of occurrences - # of bold and italics mark-ups. - $i = 0; - $numbold = 0; - $numitalics = 0; - foreach ( $arr as $r ) - { - if ( ( $i % 2 ) == 1 ) - { - # If there are ever four apostrophes, assume the first is supposed to - # be text, and the remaining three constitute mark-up for bold text. - if ( strlen( $arr[$i] ) == 4 ) - { - $arr[$i-1] .= "'"; - $arr[$i] = "'''"; - } - # If there are more than 5 apostrophes in a row, assume they're all - # text except for the last 5. - else if ( strlen( $arr[$i] ) > 5 ) - { - $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 ); - $arr[$i] = "'''''"; - } - # Count the number of occurrences of bold and italics mark-ups. - # We are not counting sequences of five apostrophes. - if ( strlen( $arr[$i] ) == 2 ) { $numitalics++; } - else if ( strlen( $arr[$i] ) == 3 ) { $numbold++; } - else if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; } - } - $i++; - } - - # If there is an odd number of both bold and italics, it is likely - # that one of the bold ones was meant to be an apostrophe followed - # by italics. Which one we cannot know for certain, but it is more - # likely to be one that has a single-letter word before it. - if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) - { - $i = 0; - $firstsingleletterword = -1; - $firstmultiletterword = -1; - $firstspace = -1; - foreach ( $arr as $r ) - { - if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) - { - $x1 = substr ($arr[$i-1], -1); - $x2 = substr ($arr[$i-1], -2, 1); - if ($x1 == ' ') { - if ($firstspace == -1) $firstspace = $i; - } else if ($x2 == ' ') { - if ($firstsingleletterword == -1) $firstsingleletterword = $i; - } else { - if ($firstmultiletterword == -1) $firstmultiletterword = $i; - } - } - $i++; - } - - # If there is a single-letter word, use it! - if ($firstsingleletterword > -1) - { - $arr [ $firstsingleletterword ] = "''"; - $arr [ $firstsingleletterword-1 ] .= "'"; - } - # If not, but there's a multi-letter word, use that one. - else if ($firstmultiletterword > -1) - { - $arr [ $firstmultiletterword ] = "''"; - $arr [ $firstmultiletterword-1 ] .= "'"; - } - # ... otherwise use the first one that has neither. - # (notice that it is possible for all three to be -1 if, for example, - # there is only one pentuple-apostrophe in the line) - else if ($firstspace > -1) - { - $arr [ $firstspace ] = "''"; - $arr [ $firstspace-1 ] .= "'"; - } - } - - # Now let's actually convert our apostrophic mush to HTML! - $output = ''; - $buffer = ''; - $state = ''; - $i = 0; - foreach ($arr as $r) - { - if (($i % 2) == 0) - { - if ($state == 'both') - $buffer .= $r; - else - $output .= $r; - } - else - { - if (strlen ($r) == 2) - { - if ($state == 'i') - { $output .= ''; $state = ''; } - else if ($state == 'bi') - { $output .= ''; $state = 'b'; } - else if ($state == 'ib') - { $output .= ''; $state = 'b'; } - else if ($state == 'both') - { $output .= ''.$buffer.''; $state = 'b'; } - else # $state can be 'b' or '' - { $output .= ''; $state .= 'i'; } - } - else if (strlen ($r) == 3) - { - if ($state == 'b') - { $output .= ''; $state = ''; } - else if ($state == 'bi') - { $output .= ''; $state = 'i'; } - else if ($state == 'ib') - { $output .= ''; $state = 'i'; } - else if ($state == 'both') - { $output .= ''.$buffer.''; $state = 'i'; } - else # $state can be 'i' or '' - { $output .= ''; $state .= 'b'; } - } - else if (strlen ($r) == 5) - { - if ($state == 'b') - { $output .= ''; $state = 'i'; } - else if ($state == 'i') - { $output .= ''; $state = 'b'; } - else if ($state == 'bi') - { $output .= ''; $state = ''; } - else if ($state == 'ib') - { $output .= ''; $state = ''; } - else if ($state == 'both') - { $output .= ''.$buffer.''; $state = ''; } - else # ($state == '') - { $buffer = ''; $state = 'both'; } - } - } - $i++; - } - # Now close all remaining tags. Notice that the order is important. - if ($state == 'b' || $state == 'ib') - $output .= ''; - if ($state == 'i' || $state == 'bi' || $state == 'ib') - $output .= ''; - if ($state == 'bi') - $output .= ''; - # There might be lonely ''''', so make sure we have a buffer - if ($state == 'both' && $buffer) - $output .= ''.$buffer.''; - return $output; - } - } - - /** - * Replace external links - * - * Note: this is all very hackish and the order of execution matters a lot. - * Make sure to run maintenance/parserTests.php if you change this code. - * - * @private - */ - function replaceExternalLinks( $text ) { - global $wgContLang; - $fname = 'Parser::replaceExternalLinks'; - wfProfileIn( $fname ); - - $sk = $this->mOptions->getSkin(); - - $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); - - $s = $this->replaceFreeExternalLinks( array_shift( $bits ) ); - - $i = 0; - while ( $i' (which were escaped by - # removeHTMLtags()) should not be included in - # URLs, per RFC 2396. - $m2 = array(); - if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) { - $text = substr($url, $m2[0][1]) . ' ' . $text; - $url = substr($url, 0, $m2[0][1]); - } - - # If the link text is an image URL, replace it with an tag - # This happened by accident in the original parser, but some people used it extensively - $img = $this->maybeMakeExternalImage( $text ); - if ( $img !== false ) { - $text = $img; - } - - $dtrail = ''; - - # Set linktype for CSS - if URL==text, link is essentially free - $linktype = ($text == $url) ? 'free' : 'text'; - - # No link text, e.g. [http://domain.tld/some.link] - if ( $text == '' ) { - # Autonumber if allowed. See bug #5918 - if ( strpos( wfUrlProtocols(), substr($protocol, 0, strpos($protocol, ':')) ) !== false ) { - $text = '[' . ++$this->mAutonumber . ']'; - $linktype = 'autonumber'; - } else { - # Otherwise just use the URL - $text = htmlspecialchars( $url ); - $linktype = 'free'; - } - } else { - # Have link text, e.g. [http://domain.tld/some.link text]s - # Check for trail - list( $dtrail, $trail ) = Linker::splitTrail( $trail ); - } - - $text = $wgContLang->markNoConversion($text); - - $url = Sanitizer::cleanUrl( $url ); - - # Process the trail (i.e. everything after this link up until start of the next link), - # replacing any non-bracketed links - $trail = $this->replaceFreeExternalLinks( $trail ); - - # Use the encoded URL - # This means that users can paste URLs directly into the text - # Funny characters like ö aren't valid in URLs anyway - # This was changed in August 2004 - $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->mTitle->getNamespace() ) . $dtrail . $trail; - - # Register link in the output object. - # Replace unnecessary URL escape codes with the referenced character - # This prevents spammers from hiding links from the filters - $pasteurized = self::replaceUnusualEscapes( $url ); - $this->mOutput->addExternalLink( $pasteurized ); - } - - wfProfileOut( $fname ); - return $s; - } - - /** - * Replace anything that looks like a URL with a link - * @private - */ - function replaceFreeExternalLinks( $text ) { - global $wgContLang; - $fname = 'Parser::replaceFreeExternalLinks'; - wfProfileIn( $fname ); - - $bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); - $s = array_shift( $bits ); - $i = 0; - - $sk = $this->mOptions->getSkin(); - - while ( $i < count( $bits ) ){ - $protocol = $bits[$i++]; - $remainder = $bits[$i++]; - - $m = array(); - if ( preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) { - # Found some characters after the protocol that look promising - $url = $protocol . $m[1]; - $trail = $m[2]; - - # special case: handle urls as url args: - # http://www.example.com/foo?=http://www.example.com/bar - if(strlen($trail) == 0 && - isset($bits[$i]) && - preg_match('/^'. wfUrlProtocols() . '$/S', $bits[$i]) && - preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m )) - { - # add protocol, arg - $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link - $i += 2; - $trail = $m[2]; - } - - # The characters '<' and '>' (which were escaped by - # removeHTMLtags()) should not be included in - # URLs, per RFC 2396. - $m2 = array(); - if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) { - $trail = substr($url, $m2[0][1]) . $trail; - $url = substr($url, 0, $m2[0][1]); - } - - # Move trailing punctuation to $trail - $sep = ',;\.:!?'; - # If there is no left bracket, then consider right brackets fair game too - if ( strpos( $url, '(' ) === false ) { - $sep .= ')'; - } - - $numSepChars = strspn( strrev( $url ), $sep ); - if ( $numSepChars ) { - $trail = substr( $url, -$numSepChars ) . $trail; - $url = substr( $url, 0, -$numSepChars ); - } - - $url = Sanitizer::cleanUrl( $url ); - - # Is this an external image? - $text = $this->maybeMakeExternalImage( $url ); - if ( $text === false ) { - # Not an image, make a link - $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() ); - # Register it in the output object... - # Replace unnecessary URL escape codes with their equivalent characters - $pasteurized = self::replaceUnusualEscapes( $url ); - $this->mOutput->addExternalLink( $pasteurized ); - } - $s .= $text . $trail; - } else { - $s .= $protocol . $remainder; - } - } - wfProfileOut( $fname ); - return $s; - } - - /** - * Replace unusual URL escape codes with their equivalent characters - * @param string - * @return string - * @static - * @todo This can merge genuinely required bits in the path or query string, - * breaking legit URLs. A proper fix would treat the various parts of - * the URL differently; as a workaround, just use the output for - * statistical records, not for actual linking/output. - */ - static function replaceUnusualEscapes( $url ) { - return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', - array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url ); - } - - /** - * Callback function used in replaceUnusualEscapes(). - * Replaces unusual URL escape codes with their equivalent character - * @static - * @private - */ - private static function replaceUnusualEscapesCallback( $matches ) { - $char = urldecode( $matches[0] ); - $ord = ord( $char ); - // Is it an unsafe or HTTP reserved character according to RFC 1738? - if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) { - // No, shouldn't be escaped - return $char; - } else { - // Yes, leave it escaped - return $matches[0]; - } - } - - /** - * make an image if it's allowed, either through the global - * option or through the exception - * @private - */ - function maybeMakeExternalImage( $url ) { - $sk = $this->mOptions->getSkin(); - $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); - $imagesexception = !empty($imagesfrom); - $text = false; - if ( $this->mOptions->getAllowExternalImages() - || ( $imagesexception && strpos( $url, $imagesfrom ) === 0 ) ) { - if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) { - # Image found - $text = $sk->makeExternalImage( $url ); - } - } - return $text; - } - - /** - * Process [[ ]] wikilinks - * - * @private - */ - function replaceInternalLinks( $s ) { - global $wgContLang; - static $fname = 'Parser::replaceInternalLinks' ; - - wfProfileIn( $fname ); - - wfProfileIn( $fname.'-setup' ); - static $tc = FALSE; - # the % is needed to support urlencoded titles as well - if ( !$tc ) { $tc = Title::legalChars() . '#%'; } - - $sk = $this->mOptions->getSkin(); - - #split the entire text string on occurences of [[ - $a = explode( '[[', ' ' . $s ); - #get the first element (all text up to first [[), and remove the space we added - $s = array_shift( $a ); - $s = substr( $s, 1 ); - - # Match a link having the form [[namespace:link|alternate]]trail - static $e1 = FALSE; - if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; } - # Match cases where there is no "]]", which might still be images - static $e1_img = FALSE; - if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; } - # Match the end of a line for a word that's not followed by whitespace, - # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched - $e2 = wfMsgForContent( 'linkprefix' ); - - $useLinkPrefixExtension = $wgContLang->linkPrefixExtension(); - if( is_null( $this->mTitle ) ) { - throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); - } - $nottalk = !$this->mTitle->isTalkPage(); - - if ( $useLinkPrefixExtension ) { - $m = array(); - if ( preg_match( $e2, $s, $m ) ) { - $first_prefix = $m[2]; - } else { - $first_prefix = false; - } - } else { - $prefix = ''; - } - - if($wgContLang->hasVariants()) { - $selflink = $wgContLang->convertLinkToAllVariants($this->mTitle->getPrefixedText()); - } else { - $selflink = array($this->mTitle->getPrefixedText()); - } - $useSubpages = $this->areSubpagesAllowed(); - wfProfileOut( $fname.'-setup' ); - - # Loop for each link - for ($k = 0; isset( $a[$k] ); $k++) { - $line = $a[$k]; - if ( $useLinkPrefixExtension ) { - wfProfileIn( $fname.'-prefixhandling' ); - if ( preg_match( $e2, $s, $m ) ) { - $prefix = $m[2]; - $s = $m[1]; - } else { - $prefix=''; - } - # first link - if($first_prefix) { - $prefix = $first_prefix; - $first_prefix = false; - } - wfProfileOut( $fname.'-prefixhandling' ); - } - - $might_be_img = false; - - wfProfileIn( "$fname-e1" ); - if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt - $text = $m[2]; - # If we get a ] at the beginning of $m[3] that means we have a link that's something like: - # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up, - # the real problem is with the $e1 regex - # See bug 1300. - # - # Still some problems for cases where the ] is meant to be outside punctuation, - # and no image is in sight. See bug 2095. - # - if( $text !== '' && - substr( $m[3], 0, 1 ) === ']' && - strpos($text, '[') !== false - ) - { - $text .= ']'; # so that replaceExternalLinks($text) works later - $m[3] = substr( $m[3], 1 ); - } - # fix up urlencoded title texts - if( strpos( $m[1], '%' ) !== false ) { - # Should anchors '#' also be rejected? - $m[1] = str_replace( array('<', '>'), array('<', '>'), urldecode($m[1]) ); - } - $trail = $m[3]; - } elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption - $might_be_img = true; - $text = $m[2]; - if ( strpos( $m[1], '%' ) !== false ) { - $m[1] = urldecode($m[1]); - } - $trail = ""; - } else { # Invalid form; output directly - $s .= $prefix . '[[' . $line ; - wfProfileOut( "$fname-e1" ); - continue; - } - wfProfileOut( "$fname-e1" ); - wfProfileIn( "$fname-misc" ); - - # Don't allow internal links to pages containing - # PROTO: where PROTO is a valid URL protocol; these - # should be external links. - if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) { - $s .= $prefix . '[[' . $line ; - continue; - } - - # Make subpage if necessary - if( $useSubpages ) { - $link = $this->maybeDoSubpageLink( $m[1], $text ); - } else { - $link = $m[1]; - } - - $noforce = (substr($m[1], 0, 1) != ':'); - if (!$noforce) { - # Strip off leading ':' - $link = substr($link, 1); - } - - wfProfileOut( "$fname-misc" ); - wfProfileIn( "$fname-title" ); - $nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) ); - if( !$nt ) { - $s .= $prefix . '[[' . $line; - wfProfileOut( "$fname-title" ); - continue; - } - - $ns = $nt->getNamespace(); - $iw = $nt->getInterWiki(); - wfProfileOut( "$fname-title" ); - - if ($might_be_img) { # if this is actually an invalid link - wfProfileIn( "$fname-might_be_img" ); - if ($ns == NS_IMAGE && $noforce) { #but might be an image - $found = false; - while (isset ($a[$k+1]) ) { - #look at the next 'line' to see if we can close it there - $spliced = array_splice( $a, $k + 1, 1 ); - $next_line = array_shift( $spliced ); - $m = explode( ']]', $next_line, 3 ); - if ( count( $m ) == 3 ) { - # the first ]] closes the inner link, the second the image - $found = true; - $text .= "[[{$m[0]}]]{$m[1]}"; - $trail = $m[2]; - break; - } elseif ( count( $m ) == 2 ) { - #if there's exactly one ]] that's fine, we'll keep looking - $text .= "[[{$m[0]}]]{$m[1]}"; - } else { - #if $next_line is invalid too, we need look no further - $text .= '[[' . $next_line; - break; - } - } - if ( !$found ) { - # we couldn't find the end of this imageLink, so output it raw - #but don't ignore what might be perfectly normal links in the text we've examined - $text = $this->replaceInternalLinks($text); - $s .= "{$prefix}[[$link|$text"; - # note: no $trail, because without an end, there *is* no trail - wfProfileOut( "$fname-might_be_img" ); - continue; - } - } else { #it's not an image, so output it raw - $s .= "{$prefix}[[$link|$text"; - # note: no $trail, because without an end, there *is* no trail - wfProfileOut( "$fname-might_be_img" ); - continue; - } - wfProfileOut( "$fname-might_be_img" ); - } - - $wasblank = ( '' == $text ); - if( $wasblank ) $text = $link; - - # Link not escaped by : , create the various objects - if( $noforce ) { - - # Interwikis - wfProfileIn( "$fname-interwiki" ); - if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) { - $this->mOutput->addLanguageLink( $nt->getFullText() ); - $s = rtrim($s . $prefix); - $s .= trim($trail, "\n") == '' ? '': $prefix . $trail; - wfProfileOut( "$fname-interwiki" ); - continue; - } - wfProfileOut( "$fname-interwiki" ); - - if ( $ns == NS_IMAGE ) { - wfProfileIn( "$fname-image" ); - if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { - # recursively parse links inside the image caption - # actually, this will parse them in any other parameters, too, - # but it might be hard to fix that, and it doesn't matter ATM - $text = $this->replaceExternalLinks($text); - $text = $this->replaceInternalLinks($text); - - # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them - $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail; - $this->mOutput->addImage( $nt->getDBkey() ); - - wfProfileOut( "$fname-image" ); - continue; - } else { - # We still need to record the image's presence on the page - $this->mOutput->addImage( $nt->getDBkey() ); - } - wfProfileOut( "$fname-image" ); - - } - - if ( $ns == NS_CATEGORY ) { - wfProfileIn( "$fname-category" ); - $s = rtrim($s . "\n"); # bug 87 - - if ( $wasblank ) { - $sortkey = $this->getDefaultSort(); - } else { - $sortkey = $text; - } - $sortkey = Sanitizer::decodeCharReferences( $sortkey ); - $sortkey = str_replace( "\n", '', $sortkey ); - $sortkey = $wgContLang->convertCategoryKey( $sortkey ); - $this->mOutput->addCategory( $nt->getDBkey(), $sortkey ); - - /** - * Strip the whitespace Category links produce, see bug 87 - * @todo We might want to use trim($tmp, "\n") here. - */ - $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail; - - wfProfileOut( "$fname-category" ); - continue; - } - } - - # Self-link checking - if( $nt->getFragment() === '' ) { - if( in_array( $nt->getPrefixedText(), $selflink, true ) ) { - $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail ); - continue; - } - } - - # Special and Media are pseudo-namespaces; no pages actually exist in them - if( $ns == NS_MEDIA ) { - # Give extensions a chance to select the file revision for us - $skip = $time = false; - wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$nt, &$skip, &$time ) ); - if ( $skip ) { - $link = $sk->makeLinkObj( $nt ); - } else { - $link = $sk->makeMediaLinkObj( $nt, $text, $time ); - } - # Cloak with NOPARSE to avoid replacement in replaceExternalLinks - $s .= $prefix . $this->armorLinks( $link ) . $trail; - $this->mOutput->addImage( $nt->getDBkey() ); - continue; - } elseif( $ns == NS_SPECIAL ) { - if( SpecialPage::exists( $nt->getDBkey() ) ) { - $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); - } else { - $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); - } - continue; - } elseif( $ns == NS_IMAGE ) { - $img = wfFindFile( $nt ); - if( $img ) { - // Force a blue link if the file exists; may be a remote - // upload on the shared repository, and we want to see its - // auto-generated page. - $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); - $this->mOutput->addLink( $nt ); - continue; - } - } - $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix ); - } - wfProfileOut( $fname ); - return $s; - } - - /** - * Make a link placeholder. The text returned can be later resolved to a real link with - * replaceLinkHolders(). This is done for two reasons: firstly to avoid further - * parsing of interwiki links, and secondly to allow all existence checks and - * article length checks (for stub links) to be bundled into a single query. - * - */ - function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) { - wfProfileIn( __METHOD__ ); - if ( ! is_object($nt) ) { - # Fail gracefully - $retVal = "{$prefix}{$text}{$trail}"; - } else { - # Separate the link trail from the rest of the link - list( $inside, $trail ) = Linker::splitTrail( $trail ); - - if ( $nt->isExternal() ) { - $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside ); - $this->mInterwikiLinkHolders['titles'][] = $nt; - $retVal = '{$trail}"; - } else { - $nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() ); - $this->mLinkHolders['dbkeys'][] = $nt->getDBkey(); - $this->mLinkHolders['queries'][] = $query; - $this->mLinkHolders['texts'][] = $prefix.$text.$inside; - $this->mLinkHolders['titles'][] = $nt; - - $retVal = '{$trail}"; - } - } - wfProfileOut( __METHOD__ ); - return $retVal; - } - - /** - * Render a forced-blue link inline; protect against double expansion of - * URLs if we're in a mode that prepends full URL prefixes to internal links. - * Since this little disaster has to split off the trail text to avoid - * breaking URLs in the following text without breaking trails on the - * wiki links, it's been made into a horrible function. - * - * @param Title $nt - * @param string $text - * @param string $query - * @param string $trail - * @param string $prefix - * @return string HTML-wikitext mix oh yuck - */ - function makeKnownLinkHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) { - list( $inside, $trail ) = Linker::splitTrail( $trail ); - $sk = $this->mOptions->getSkin(); - $link = $sk->makeKnownLinkObj( $nt, $text, $query, $inside, $prefix ); - return $this->armorLinks( $link ) . $trail; - } - - /** - * Insert a NOPARSE hacky thing into any inline links in a chunk that's - * going to go through further parsing steps before inline URL expansion. - * - * In particular this is important when using action=render, which causes - * full URLs to be included. - * - * Oh man I hate our multi-layer parser! - * - * @param string more-or-less HTML - * @return string less-or-more HTML with NOPARSE bits - */ - function armorLinks( $text ) { - return preg_replace( '/\b(' . wfUrlProtocols() . ')/', - "{$this->mUniqPrefix}NOPARSE$1", $text ); - } - - /** - * Return true if subpage links should be expanded on this page. - * @return bool - */ - function areSubpagesAllowed() { - # Some namespaces don't allow subpages - global $wgNamespacesWithSubpages; - return !empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]); - } - - /** - * Handle link to subpage if necessary - * @param string $target the source of the link - * @param string &$text the link text, modified as necessary - * @return string the full name of the link - * @private - */ - function maybeDoSubpageLink($target, &$text) { - # Valid link forms: - # Foobar -- normal - # :Foobar -- override special treatment of prefix (images, language links) - # /Foobar -- convert to CurrentPage/Foobar - # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text - # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage - # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage - - $fname = 'Parser::maybeDoSubpageLink'; - wfProfileIn( $fname ); - $ret = $target; # default return value is no change - - # Some namespaces don't allow subpages, - # so only perform processing if subpages are allowed - if( $this->areSubpagesAllowed() ) { - $hash = strpos( $target, '#' ); - if( $hash !== false ) { - $suffix = substr( $target, $hash ); - $target = substr( $target, 0, $hash ); - } else { - $suffix = ''; - } - # bug 7425 - $target = trim( $target ); - # Look at the first character - if( $target != '' && $target{0} == '/' ) { - # / at end means we don't want the slash to be shown - $m = array(); - $trailingSlashes = preg_match_all( '%(/+)$%', $target, $m ); - if( $trailingSlashes ) { - $noslash = $target = substr( $target, 1, -strlen($m[0][0]) ); - } else { - $noslash = substr( $target, 1 ); - } - - $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash) . $suffix; - if( '' === $text ) { - $text = $target . $suffix; - } # this might be changed for ugliness reasons - } else { - # check for .. subpage backlinks - $dotdotcount = 0; - $nodotdot = $target; - while( strncmp( $nodotdot, "../", 3 ) == 0 ) { - ++$dotdotcount; - $nodotdot = substr( $nodotdot, 3 ); - } - if($dotdotcount > 0) { - $exploded = explode( '/', $this->mTitle->GetPrefixedText() ); - if( count( $exploded ) > $dotdotcount ) { # not allowed to go below top level page - $ret = implode( '/', array_slice( $exploded, 0, -$dotdotcount ) ); - # / at the end means don't show full path - if( substr( $nodotdot, -1, 1 ) == '/' ) { - $nodotdot = substr( $nodotdot, 0, -1 ); - if( '' === $text ) { - $text = $nodotdot . $suffix; - } - } - $nodotdot = trim( $nodotdot ); - if( $nodotdot != '' ) { - $ret .= '/' . $nodotdot; - } - $ret .= $suffix; - } - } - } - } - - wfProfileOut( $fname ); - return $ret; - } - - /**#@+ - * Used by doBlockLevels() - * @private - */ - /* private */ function closeParagraph() { - $result = ''; - if ( '' != $this->mLastSection ) { - $result = 'mLastSection . ">\n"; - } - $this->mInPre = false; - $this->mLastSection = ''; - return $result; - } - # getCommon() returns the length of the longest common substring - # of both arguments, starting at the beginning of both. - # - /* private */ function getCommon( $st1, $st2 ) { - $fl = strlen( $st1 ); - $shorter = strlen( $st2 ); - if ( $fl < $shorter ) { $shorter = $fl; } - - for ( $i = 0; $i < $shorter; ++$i ) { - if ( $st1{$i} != $st2{$i} ) { break; } - } - return $i; - } - # These next three functions open, continue, and close the list - # element appropriate to the prefix character passed into them. - # - /* private */ function openList( $char ) { - $result = $this->closeParagraph(); - - if ( '*' == $char ) { $result .= '
  • '; } - else if ( '#' == $char ) { $result .= '
    1. '; } - else if ( ':' == $char ) { $result .= '
      '; } - else if ( ';' == $char ) { - $result .= '
      '; - $this->mDTopen = true; - } - else { $result = ''; } - - return $result; - } - - /* private */ function nextItem( $char ) { - if ( '*' == $char || '#' == $char ) { return '
    2. '; } - else if ( ':' == $char || ';' == $char ) { - $close = ''; - if ( $this->mDTopen ) { $close = ''; } - if ( ';' == $char ) { - $this->mDTopen = true; - return $close . '
      '; - } else { - $this->mDTopen = false; - return $close . '
      '; - } - } - return ''; - } - - /* private */ function closeList( $char ) { - if ( '*' == $char ) { $text = '
'; } - else if ( '#' == $char ) { $text = ''; } - else if ( ':' == $char ) { - if ( $this->mDTopen ) { - $this->mDTopen = false; - $text = ''; - } else { - $text = ''; - } - } - else { return ''; } - return $text."\n"; - } - /**#@-*/ - - /** - * Make lists from lines starting with ':', '*', '#', etc. - * - * @private - * @return string the lists rendered as HTML - */ - function doBlockLevels( $text, $linestart ) { - $fname = 'Parser::doBlockLevels'; - wfProfileIn( $fname ); - - # Parsing through the text line by line. The main thing - # happening here is handling of block-level elements p, pre, - # and making lists from lines starting with * # : etc. - # - $textLines = explode( "\n", $text ); - - $lastPrefix = $output = ''; - $this->mDTopen = $inBlockElem = false; - $prefixLength = 0; - $paragraphStack = false; - - if ( !$linestart ) { - $output .= array_shift( $textLines ); - } - foreach ( $textLines as $oLine ) { - $lastPrefixLength = strlen( $lastPrefix ); - $preCloseMatch = preg_match('/<\\/pre/i', $oLine ); - $preOpenMatch = preg_match('/
mInPre ) {
-				# Multiple prefixes may abut each other for nested lists.
-				$prefixLength = strspn( $oLine, '*#:;' );
-				$pref = substr( $oLine, 0, $prefixLength );
-
-				# eh?
-				$pref2 = str_replace( ';', ':', $pref );
-				$t = substr( $oLine, $prefixLength );
-				$this->mInPre = !empty($preOpenMatch);
-			} else {
-				# Don't interpret any other prefixes in preformatted text
-				$prefixLength = 0;
-				$pref = $pref2 = '';
-				$t = $oLine;
-			}
-
-			# List generation
-			if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
-				# Same as the last item, so no need to deal with nesting or opening stuff
-				$output .= $this->nextItem( substr( $pref, -1 ) );
-				$paragraphStack = false;
-
-				if ( substr( $pref, -1 ) == ';') {
-					# The one nasty exception: definition lists work like this:
-					# ; title : definition text
-					# So we check for : in the remainder text to split up the
-					# title and definition, without b0rking links.
-					$term = $t2 = '';
-					if ($this->findColonNoLinks($t, $term, $t2) !== false) {
-						$t = $t2;
-						$output .= $term . $this->nextItem( ':' );
-					}
-				}
-			} elseif( $prefixLength || $lastPrefixLength ) {
-				# Either open or close a level...
-				$commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
-				$paragraphStack = false;
-
-				while( $commonPrefixLength < $lastPrefixLength ) {
-					$output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
-					--$lastPrefixLength;
-				}
-				if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
-					$output .= $this->nextItem( $pref{$commonPrefixLength-1} );
-				}
-				while ( $prefixLength > $commonPrefixLength ) {
-					$char = substr( $pref, $commonPrefixLength, 1 );
-					$output .= $this->openList( $char );
-
-					if ( ';' == $char ) {
-						# FIXME: This is dupe of code above
-						if ($this->findColonNoLinks($t, $term, $t2) !== false) {
-							$t = $t2;
-							$output .= $term . $this->nextItem( ':' );
-						}
-					}
-					++$commonPrefixLength;
-				}
-				$lastPrefix = $pref2;
-			}
-			if( 0 == $prefixLength ) {
-				wfProfileIn( "$fname-paragraph" );
-				# No prefix (not in list)--go to paragraph mode
-				// XXX: use a stack for nestable elements like span, table and div
-				$openmatch = preg_match('/(?:mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
-				if ( $openmatch or $closematch ) {
-					$paragraphStack = false;
-					# TODO bug 5718: paragraph closed
-					$output .= $this->closeParagraph();
-					if ( $preOpenMatch and !$preCloseMatch ) {
-						$this->mInPre = true;
-					}
-					if ( $closematch ) {
-						$inBlockElem = false;
-					} else {
-						$inBlockElem = true;
-					}
-				} else if ( !$inBlockElem && !$this->mInPre ) {
-					if ( '' != $t and ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
-						// pre
-						if ($this->mLastSection != 'pre') {
-							$paragraphStack = false;
-							$output .= $this->closeParagraph().'
';
-							$this->mLastSection = 'pre';
-						}
-						$t = substr( $t, 1 );
-					} else {
-						// paragraph
-						if ( '' == trim($t) ) {
-							if ( $paragraphStack ) {
-								$output .= $paragraphStack.'
'; - $paragraphStack = false; - $this->mLastSection = 'p'; - } else { - if ($this->mLastSection != 'p' ) { - $output .= $this->closeParagraph(); - $this->mLastSection = ''; - $paragraphStack = '

'; - } else { - $paragraphStack = '

'; - } - } - } else { - if ( $paragraphStack ) { - $output .= $paragraphStack; - $paragraphStack = false; - $this->mLastSection = 'p'; - } else if ($this->mLastSection != 'p') { - $output .= $this->closeParagraph().'

'; - $this->mLastSection = 'p'; - } - } - } - } - wfProfileOut( "$fname-paragraph" ); - } - // somewhere above we forget to get out of pre block (bug 785) - if($preCloseMatch && $this->mInPre) { - $this->mInPre = false; - } - if ($paragraphStack === false) { - $output .= $t."\n"; - } - } - while ( $prefixLength ) { - $output .= $this->closeList( $pref2{$prefixLength-1} ); - --$prefixLength; - } - if ( '' != $this->mLastSection ) { - $output .= 'mLastSection . '>'; - $this->mLastSection = ''; - } - - wfProfileOut( $fname ); - return $output; - } - - /** - * Split up a string on ':', ignoring any occurences inside tags - * to prevent illegal overlapping. - * @param string $str the string to split - * @param string &$before set to everything before the ':' - * @param string &$after set to everything after the ':' - * return string the position of the ':', or false if none found - */ - function findColonNoLinks($str, &$before, &$after) { - $fname = 'Parser::findColonNoLinks'; - wfProfileIn( $fname ); - - $pos = strpos( $str, ':' ); - if( $pos === false ) { - // Nothing to find! - wfProfileOut( $fname ); - return false; - } - - $lt = strpos( $str, '<' ); - if( $lt === false || $lt > $pos ) { - // Easy; no tag nesting to worry about - $before = substr( $str, 0, $pos ); - $after = substr( $str, $pos+1 ); - wfProfileOut( $fname ); - return $pos; - } - - // Ugly state machine to walk through avoiding tags. - $state = self::COLON_STATE_TEXT; - $stack = 0; - $len = strlen( $str ); - for( $i = 0; $i < $len; $i++ ) { - $c = $str{$i}; - - switch( $state ) { - // (Using the number is a performance hack for common cases) - case 0: // self::COLON_STATE_TEXT: - switch( $c ) { - case "<": - // Could be either a tag or an tag - $state = self::COLON_STATE_TAGSTART; - break; - case ":": - if( $stack == 0 ) { - // We found it! - $before = substr( $str, 0, $i ); - $after = substr( $str, $i + 1 ); - wfProfileOut( $fname ); - return $i; - } - // Embedded in a tag; don't break it. - break; - default: - // Skip ahead looking for something interesting - $colon = strpos( $str, ':', $i ); - if( $colon === false ) { - // Nothing else interesting - wfProfileOut( $fname ); - return false; - } - $lt = strpos( $str, '<', $i ); - if( $stack === 0 ) { - if( $lt === false || $colon < $lt ) { - // We found it! - $before = substr( $str, 0, $colon ); - $after = substr( $str, $colon + 1 ); - wfProfileOut( $fname ); - return $i; - } - } - if( $lt === false ) { - // Nothing else interesting to find; abort! - // We're nested, but there's no close tags left. Abort! - break 2; - } - // Skip ahead to next tag start - $i = $lt; - $state = self::COLON_STATE_TAGSTART; - } - break; - case 1: // self::COLON_STATE_TAG: - // In a - switch( $c ) { - case ">": - $stack++; - $state = self::COLON_STATE_TEXT; - break; - case "/": - // Slash may be followed by >? - $state = self::COLON_STATE_TAGSLASH; - break; - default: - // ignore - } - break; - case 2: // self::COLON_STATE_TAGSTART: - switch( $c ) { - case "/": - $state = self::COLON_STATE_CLOSETAG; - break; - case "!": - $state = self::COLON_STATE_COMMENT; - break; - case ">": - // Illegal early close? This shouldn't happen D: - $state = self::COLON_STATE_TEXT; - break; - default: - $state = self::COLON_STATE_TAG; - } - break; - case 3: // self::COLON_STATE_CLOSETAG: - // In a - if( $c == ">" ) { - $stack--; - if( $stack < 0 ) { - wfDebug( "Invalid input in $fname; too many close tags\n" ); - wfProfileOut( $fname ); - return false; - } - $state = self::COLON_STATE_TEXT; - } - break; - case self::COLON_STATE_TAGSLASH: - if( $c == ">" ) { - // Yes, a self-closed tag - $state = self::COLON_STATE_TEXT; - } else { - // Probably we're jumping the gun, and this is an attribute - $state = self::COLON_STATE_TAG; - } - break; - case 5: // self::COLON_STATE_COMMENT: - if( $c == "-" ) { - $state = self::COLON_STATE_COMMENTDASH; - } - break; - case self::COLON_STATE_COMMENTDASH: - if( $c == "-" ) { - $state = self::COLON_STATE_COMMENTDASHDASH; - } else { - $state = self::COLON_STATE_COMMENT; - } - break; - case self::COLON_STATE_COMMENTDASHDASH: - if( $c == ">" ) { - $state = self::COLON_STATE_TEXT; - } else { - $state = self::COLON_STATE_COMMENT; - } - break; - default: - throw new MWException( "State machine error in $fname" ); - } - } - if( $stack > 0 ) { - wfDebug( "Invalid input in $fname; not enough close tags (stack $stack, state $state)\n" ); - return false; - } - wfProfileOut( $fname ); - return false; - } - - /** - * Return value of a magic variable (like PAGENAME) - * - * @private - */ - function getVariableValue( $index ) { - global $wgContLang, $wgSitename, $wgServer, $wgServerName, $wgScriptPath; - - /** - * Some of these require message or data lookups and can be - * expensive to check many times. - */ - static $varCache = array(); - if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$varCache ) ) ) { - if ( isset( $varCache[$index] ) ) { - return $varCache[$index]; - } - } - - $ts = time(); - wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); - - # Use the time zone - global $wgLocaltimezone; - if ( isset( $wgLocaltimezone ) ) { - $oldtz = getenv( 'TZ' ); - putenv( 'TZ='.$wgLocaltimezone ); - } - - wfSuppressWarnings(); // E_STRICT system time bitching - $localTimestamp = date( 'YmdHis', $ts ); - $localMonth = date( 'm', $ts ); - $localMonthName = date( 'n', $ts ); - $localDay = date( 'j', $ts ); - $localDay2 = date( 'd', $ts ); - $localDayOfWeek = date( 'w', $ts ); - $localWeek = date( 'W', $ts ); - $localYear = date( 'Y', $ts ); - $localHour = date( 'H', $ts ); - if ( isset( $wgLocaltimezone ) ) { - putenv( 'TZ='.$oldtz ); - } - wfRestoreWarnings(); - - switch ( $index ) { - case 'currentmonth': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'm', $ts ) ); - case 'currentmonthname': - return $varCache[$index] = $wgContLang->getMonthName( gmdate( 'n', $ts ) ); - case 'currentmonthnamegen': - return $varCache[$index] = $wgContLang->getMonthNameGen( gmdate( 'n', $ts ) ); - case 'currentmonthabbrev': - return $varCache[$index] = $wgContLang->getMonthAbbreviation( gmdate( 'n', $ts ) ); - case 'currentday': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'j', $ts ) ); - case 'currentday2': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'd', $ts ) ); - case 'localmonth': - return $varCache[$index] = $wgContLang->formatNum( $localMonth ); - case 'localmonthname': - return $varCache[$index] = $wgContLang->getMonthName( $localMonthName ); - case 'localmonthnamegen': - return $varCache[$index] = $wgContLang->getMonthNameGen( $localMonthName ); - case 'localmonthabbrev': - return $varCache[$index] = $wgContLang->getMonthAbbreviation( $localMonthName ); - case 'localday': - return $varCache[$index] = $wgContLang->formatNum( $localDay ); - case 'localday2': - return $varCache[$index] = $wgContLang->formatNum( $localDay2 ); - case 'pagename': - return wfEscapeWikiText( $this->mTitle->getText() ); - case 'pagenamee': - return $this->mTitle->getPartialURL(); - case 'fullpagename': - return wfEscapeWikiText( $this->mTitle->getPrefixedText() ); - case 'fullpagenamee': - return $this->mTitle->getPrefixedURL(); - case 'subpagename': - return wfEscapeWikiText( $this->mTitle->getSubpageText() ); - case 'subpagenamee': - return $this->mTitle->getSubpageUrlForm(); - case 'basepagename': - return wfEscapeWikiText( $this->mTitle->getBaseText() ); - case 'basepagenamee': - return wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ); - case 'talkpagename': - if( $this->mTitle->canTalk() ) { - $talkPage = $this->mTitle->getTalkPage(); - return wfEscapeWikiText( $talkPage->getPrefixedText() ); - } else { - return ''; - } - case 'talkpagenamee': - if( $this->mTitle->canTalk() ) { - $talkPage = $this->mTitle->getTalkPage(); - return $talkPage->getPrefixedUrl(); - } else { - return ''; - } - case 'subjectpagename': - $subjPage = $this->mTitle->getSubjectPage(); - return wfEscapeWikiText( $subjPage->getPrefixedText() ); - case 'subjectpagenamee': - $subjPage = $this->mTitle->getSubjectPage(); - return $subjPage->getPrefixedUrl(); - case 'revisionid': - return $this->mRevisionId; - case 'revisionday': - return intval( substr( $this->getRevisionTimestamp(), 6, 2 ) ); - case 'revisionday2': - return substr( $this->getRevisionTimestamp(), 6, 2 ); - case 'revisionmonth': - return intval( substr( $this->getRevisionTimestamp(), 4, 2 ) ); - case 'revisionyear': - return substr( $this->getRevisionTimestamp(), 0, 4 ); - case 'revisiontimestamp': - return $this->getRevisionTimestamp(); - case 'namespace': - return str_replace('_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) ); - case 'namespacee': - return wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); - case 'talkspace': - return $this->mTitle->canTalk() ? str_replace('_',' ',$this->mTitle->getTalkNsText()) : ''; - case 'talkspacee': - return $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : ''; - case 'subjectspace': - return $this->mTitle->getSubjectNsText(); - case 'subjectspacee': - return( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); - case 'currentdayname': - return $varCache[$index] = $wgContLang->getWeekdayName( gmdate( 'w', $ts ) + 1 ); - case 'currentyear': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'Y', $ts ), true ); - case 'currenttime': - return $varCache[$index] = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false ); - case 'currenthour': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'H', $ts ), true ); - case 'currentweek': - // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to - // int to remove the padding - return $varCache[$index] = $wgContLang->formatNum( (int)gmdate( 'W', $ts ) ); - case 'currentdow': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'w', $ts ) ); - case 'localdayname': - return $varCache[$index] = $wgContLang->getWeekdayName( $localDayOfWeek + 1 ); - case 'localyear': - return $varCache[$index] = $wgContLang->formatNum( $localYear, true ); - case 'localtime': - return $varCache[$index] = $wgContLang->time( $localTimestamp, false, false ); - case 'localhour': - return $varCache[$index] = $wgContLang->formatNum( $localHour, true ); - case 'localweek': - // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to - // int to remove the padding - return $varCache[$index] = $wgContLang->formatNum( (int)$localWeek ); - case 'localdow': - return $varCache[$index] = $wgContLang->formatNum( $localDayOfWeek ); - case 'numberofarticles': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::articles() ); - case 'numberoffiles': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::images() ); - case 'numberofusers': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::users() ); - case 'numberofpages': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::pages() ); - case 'numberofadmins': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::admins() ); - case 'numberofedits': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::edits() ); - case 'currenttimestamp': - return $varCache[$index] = wfTimestampNow(); - case 'localtimestamp': - return $varCache[$index] = $localTimestamp; - case 'currentversion': - return $varCache[$index] = SpecialVersion::getVersion(); - case 'sitename': - return $wgSitename; - case 'server': - return $wgServer; - case 'servername': - return $wgServerName; - case 'scriptpath': - return $wgScriptPath; - case 'directionmark': - return $wgContLang->getDirMark(); - case 'contentlanguage': - global $wgContLanguageCode; - return $wgContLanguageCode; - default: - $ret = null; - if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$varCache, &$index, &$ret ) ) ) - return $ret; - else - return null; - } - } - - /** - * initialise the magic variables (like CURRENTMONTHNAME) - * - * @private - */ - function initialiseVariables() { - $fname = 'Parser::initialiseVariables'; - wfProfileIn( $fname ); - $variableIDs = MagicWord::getVariableIDs(); - - $this->mVariables = array(); - foreach ( $variableIDs as $id ) { - $mw =& MagicWord::get( $id ); - $mw->addToArray( $this->mVariables, $id ); - } - wfProfileOut( $fname ); - } - - /** - * parse any parentheses in format ((title|part|part)) - * and call callbacks to get a replacement text for any found piece - * - * @param string $text The text to parse - * @param array $callbacks rules in form: - * '{' => array( # opening parentheses - * 'end' => '}', # closing parentheses - * 'cb' => array(2 => callback, # replacement callback to call if {{..}} is found - * 3 => callback # replacement callback to call if {{{..}}} is found - * ) - * ) - * 'min' => 2, # Minimum parenthesis count in cb - * 'max' => 3, # Maximum parenthesis count in cb - * @private - */ - function replace_callback ($text, $callbacks) { - wfProfileIn( __METHOD__ ); - $openingBraceStack = array(); # this array will hold a stack of parentheses which are not closed yet - $lastOpeningBrace = -1; # last not closed parentheses - - $validOpeningBraces = implode( '', array_keys( $callbacks ) ); - - $i = 0; - while ( $i < strlen( $text ) ) { - # Find next opening brace, closing brace or pipe - if ( $lastOpeningBrace == -1 ) { - $currentClosing = ''; - $search = $validOpeningBraces; - } else { - $currentClosing = $openingBraceStack[$lastOpeningBrace]['braceEnd']; - $search = $validOpeningBraces . '|' . $currentClosing; - } - $rule = null; - $i += strcspn( $text, $search, $i ); - if ( $i < strlen( $text ) ) { - if ( $text[$i] == '|' ) { - $found = 'pipe'; - } elseif ( $text[$i] == $currentClosing ) { - $found = 'close'; - } elseif ( isset( $callbacks[$text[$i]] ) ) { - $found = 'open'; - $rule = $callbacks[$text[$i]]; - } else { - # Some versions of PHP have a strcspn which stops on null characters - # Ignore and continue - ++$i; - continue; - } - } else { - # All done - break; - } - - if ( $found == 'open' ) { - # found opening brace, let's add it to parentheses stack - $piece = array('brace' => $text[$i], - 'braceEnd' => $rule['end'], - 'title' => '', - 'parts' => null); - - # count opening brace characters - $piece['count'] = strspn( $text, $piece['brace'], $i ); - $piece['startAt'] = $piece['partStart'] = $i + $piece['count']; - $i += $piece['count']; - - # we need to add to stack only if opening brace count is enough for one of the rules - if ( $piece['count'] >= $rule['min'] ) { - $lastOpeningBrace ++; - $openingBraceStack[$lastOpeningBrace] = $piece; - } - } elseif ( $found == 'close' ) { - # lets check if it is enough characters for closing brace - $maxCount = $openingBraceStack[$lastOpeningBrace]['count']; - $count = strspn( $text, $text[$i], $i, $maxCount ); - - # check for maximum matching characters (if there are 5 closing - # characters, we will probably need only 3 - depending on the rules) - $matchingCount = 0; - $matchingCallback = null; - $cbType = $callbacks[$openingBraceStack[$lastOpeningBrace]['brace']]; - if ( $count > $cbType['max'] ) { - # The specified maximum exists in the callback array, unless the caller - # has made an error - $matchingCount = $cbType['max']; - } else { - # Count is less than the maximum - # Skip any gaps in the callback array to find the true largest match - # Need to use array_key_exists not isset because the callback can be null - $matchingCount = $count; - while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $cbType['cb'] ) ) { - --$matchingCount; - } - } - - if ($matchingCount <= 0) { - $i += $count; - continue; - } - $matchingCallback = $cbType['cb'][$matchingCount]; - - # let's set a title or last part (if '|' was found) - if (null === $openingBraceStack[$lastOpeningBrace]['parts']) { - $openingBraceStack[$lastOpeningBrace]['title'] = - substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], - $i - $openingBraceStack[$lastOpeningBrace]['partStart']); - } else { - $openingBraceStack[$lastOpeningBrace]['parts'][] = - substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], - $i - $openingBraceStack[$lastOpeningBrace]['partStart']); - } - - $pieceStart = $openingBraceStack[$lastOpeningBrace]['startAt'] - $matchingCount; - $pieceEnd = $i + $matchingCount; - - if( is_callable( $matchingCallback ) ) { - $cbArgs = array ( - 'text' => substr($text, $pieceStart, $pieceEnd - $pieceStart), - 'title' => trim($openingBraceStack[$lastOpeningBrace]['title']), - 'parts' => $openingBraceStack[$lastOpeningBrace]['parts'], - 'lineStart' => (($pieceStart > 0) && ($text[$pieceStart-1] == "\n")), - ); - # finally we can call a user callback and replace piece of text - $replaceWith = call_user_func( $matchingCallback, $cbArgs ); - $text = substr($text, 0, $pieceStart) . $replaceWith . substr($text, $pieceEnd); - $i = $pieceStart + strlen($replaceWith); - } else { - # null value for callback means that parentheses should be parsed, but not replaced - $i += $matchingCount; - } - - # reset last opening parentheses, but keep it in case there are unused characters - $piece = array('brace' => $openingBraceStack[$lastOpeningBrace]['brace'], - 'braceEnd' => $openingBraceStack[$lastOpeningBrace]['braceEnd'], - 'count' => $openingBraceStack[$lastOpeningBrace]['count'], - 'title' => '', - 'parts' => null, - 'startAt' => $openingBraceStack[$lastOpeningBrace]['startAt']); - $openingBraceStack[$lastOpeningBrace--] = null; - - if ($matchingCount < $piece['count']) { - $piece['count'] -= $matchingCount; - $piece['startAt'] -= $matchingCount; - $piece['partStart'] = $piece['startAt']; - # do we still qualify for any callback with remaining count? - $currentCbList = $callbacks[$piece['brace']]['cb']; - while ( $piece['count'] ) { - if ( array_key_exists( $piece['count'], $currentCbList ) ) { - $lastOpeningBrace++; - $openingBraceStack[$lastOpeningBrace] = $piece; - break; - } - --$piece['count']; - } - } - } elseif ( $found == 'pipe' ) { - # lets set a title if it is a first separator, or next part otherwise - if (null === $openingBraceStack[$lastOpeningBrace]['parts']) { - $openingBraceStack[$lastOpeningBrace]['title'] = - substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], - $i - $openingBraceStack[$lastOpeningBrace]['partStart']); - $openingBraceStack[$lastOpeningBrace]['parts'] = array(); - } else { - $openingBraceStack[$lastOpeningBrace]['parts'][] = - substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], - $i - $openingBraceStack[$lastOpeningBrace]['partStart']); - } - $openingBraceStack[$lastOpeningBrace]['partStart'] = ++$i; - } - } - - wfProfileOut( __METHOD__ ); - return $text; - } - - /** - * Replace magic variables, templates, and template arguments - * with the appropriate text. Templates are substituted recursively, - * taking care to avoid infinite loops. - * - * Note that the substitution depends on value of $mOutputType: - * self::OT_WIKI: only {{subst:}} templates - * self::OT_MSG: only magic variables - * self::OT_HTML: all templates and magic variables - * - * @param string $tex The text to transform - * @param array $args Key-value pairs representing template parameters to substitute - * @param bool $argsOnly Only do argument (triple-brace) expansion, not double-brace expansion - * @private - */ - function replaceVariables( $text, $args = array(), $argsOnly = false ) { - # Prevent too big inclusions - if( strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { - return $text; - } - - $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/; - wfProfileIn( $fname ); - - # This function is called recursively. To keep track of arguments we need a stack: - array_push( $this->mArgStack, $args ); - - $braceCallbacks = array(); - if ( !$argsOnly ) { - $braceCallbacks[2] = array( &$this, 'braceSubstitution' ); - } - if ( $this->mOutputType != self::OT_MSG ) { - $braceCallbacks[3] = array( &$this, 'argSubstitution' ); - } - if ( $braceCallbacks ) { - $callbacks = array( - '{' => array( - 'end' => '}', - 'cb' => $braceCallbacks, - 'min' => $argsOnly ? 3 : 2, - 'max' => isset( $braceCallbacks[3] ) ? 3 : 2, - ), - '[' => array( - 'end' => ']', - 'cb' => array(2=>null), - 'min' => 2, - 'max' => 2, - ) - ); - $text = $this->replace_callback ($text, $callbacks); - - array_pop( $this->mArgStack ); - } - wfProfileOut( $fname ); - return $text; - } - - /** - * Replace magic variables - * @private - */ - function variableSubstitution( $matches ) { - global $wgContLang; - $fname = 'Parser::variableSubstitution'; - $varname = $wgContLang->lc($matches[1]); - wfProfileIn( $fname ); - $skip = false; - if ( $this->mOutputType == self::OT_WIKI ) { - # Do only magic variables prefixed by SUBST - $mwSubst =& MagicWord::get( 'subst' ); - if (!$mwSubst->matchStartAndRemove( $varname )) - $skip = true; - # Note that if we don't substitute the variable below, - # we don't remove the {{subst:}} magic word, in case - # it is a template rather than a magic variable. - } - if ( !$skip && array_key_exists( $varname, $this->mVariables ) ) { - $id = $this->mVariables[$varname]; - # Now check if we did really match, case sensitive or not - $mw =& MagicWord::get( $id ); - if ($mw->match($matches[1])) { - $text = $this->getVariableValue( $id ); - if (MagicWord::getCacheTTL($id)>-1) - $this->mOutput->mContainsOldMagic = true; - } else { - $text = $matches[0]; - } - } else { - $text = $matches[0]; - } - wfProfileOut( $fname ); - return $text; - } - - - /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. - static function createAssocArgs( $args ) { - $assocArgs = array(); - $index = 1; - foreach( $args as $arg ) { - $eqpos = strpos( $arg, '=' ); - if ( $eqpos === false ) { - $assocArgs[$index++] = $arg; - } else { - $name = trim( substr( $arg, 0, $eqpos ) ); - $value = trim( substr( $arg, $eqpos+1 ) ); - if ( $value === false ) { - $value = ''; - } - if ( $name !== false ) { - $assocArgs[$name] = $value; - } - } - } - - return $assocArgs; - } - - /** - * Return the text of a template, after recursively - * replacing any variables or templates within the template. - * - * @param array $piece The parts of the template - * $piece['text']: matched text - * $piece['title']: the title, i.e. the part before the | - * $piece['parts']: the parameter array - * @return string the text of the template - * @private - */ - function braceSubstitution( $piece ) { - global $wgContLang, $wgLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces; - $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/; - wfProfileIn( $fname ); - wfProfileIn( __METHOD__.'-setup' ); - - # Flags - $found = false; # $text has been filled - $nowiki = false; # wiki markup in $text should be escaped - $noparse = false; # Unsafe HTML tags should not be stripped, etc. - $noargs = false; # Don't replace triple-brace arguments in $text - $replaceHeadings = false; # Make the edit section links go to the template not the article - $headingOffset = 0; # Skip headings when number, to account for those that weren't transcluded. - $isHTML = false; # $text is HTML, armour it against wikitext transformation - $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered - - # Title object, where $text came from - $title = NULL; - - $linestart = ''; - - - # $part1 is the bit before the first |, and must contain only title characters - # $args is a list of arguments, starting from index 0, not including $part1 - - $titleText = $part1 = $piece['title']; - # If the third subpattern matched anything, it will start with | - - if (null == $piece['parts']) { - $replaceWith = $this->variableSubstitution (array ($piece['text'], $piece['title'])); - if ($replaceWith != $piece['text']) { - $text = $replaceWith; - $found = true; - $noparse = true; - $noargs = true; - } - } - - $args = (null == $piece['parts']) ? array() : $piece['parts']; - wfProfileOut( __METHOD__.'-setup' ); - - # SUBST - wfProfileIn( __METHOD__.'-modifiers' ); - if ( !$found ) { - $mwSubst =& MagicWord::get( 'subst' ); - if ( $mwSubst->matchStartAndRemove( $part1 ) xor $this->ot['wiki'] ) { - # One of two possibilities is true: - # 1) Found SUBST but not in the PST phase - # 2) Didn't find SUBST and in the PST phase - # In either case, return without further processing - $text = $piece['text']; - $found = true; - $noparse = true; - $noargs = true; - } - } - - # MSG, MSGNW and RAW - if ( !$found ) { - # Check for MSGNW: - $mwMsgnw =& MagicWord::get( 'msgnw' ); - if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { - $nowiki = true; - } else { - # Remove obsolete MSG: - $mwMsg =& MagicWord::get( 'msg' ); - $mwMsg->matchStartAndRemove( $part1 ); - } - - # Check for RAW: - $mwRaw =& MagicWord::get( 'raw' ); - if ( $mwRaw->matchStartAndRemove( $part1 ) ) { - $forceRawInterwiki = true; - } - } - wfProfileOut( __METHOD__.'-modifiers' ); - - //save path level before recursing into functions & templates. - $lastPathLevel = $this->mTemplatePath; - - # Parser functions - if ( !$found ) { - wfProfileIn( __METHOD__ . '-pfunc' ); - - $colonPos = strpos( $part1, ':' ); - if ( $colonPos !== false ) { - # Case sensitive functions - $function = substr( $part1, 0, $colonPos ); - if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { - $function = $this->mFunctionSynonyms[1][$function]; - } else { - # Case insensitive functions - $function = strtolower( $function ); - if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { - $function = $this->mFunctionSynonyms[0][$function]; - } else { - $function = false; - } - } - if ( $function ) { - $funcArgs = array_map( 'trim', $args ); - $funcArgs = array_merge( array( &$this, trim( substr( $part1, $colonPos + 1 ) ) ), $funcArgs ); - $result = call_user_func_array( $this->mFunctionHooks[$function], $funcArgs ); - $found = true; - - // The text is usually already parsed, doesn't need triple-brace tags expanded, etc. - //$noargs = true; - //$noparse = true; - - if ( is_array( $result ) ) { - if ( isset( $result[0] ) ) { - $text = $linestart . $result[0]; - unset( $result[0] ); - } - - // Extract flags into the local scope - // This allows callers to set flags such as nowiki, noparse, found, etc. - extract( $result ); - } else { - $text = $linestart . $result; - } - } - } - wfProfileOut( __METHOD__ . '-pfunc' ); - } - - # Template table test - - # Did we encounter this template already? If yes, it is in the cache - # and we need to check for loops. - if ( !$found && isset( $this->mTemplates[$piece['title']] ) ) { - $found = true; - - # Infinite loop test - if ( isset( $this->mTemplatePath[$part1] ) ) { - $noparse = true; - $noargs = true; - $found = true; - $text = $linestart . - "[[$part1]]"; - wfDebug( __METHOD__.": template loop broken at '$part1'\n" ); - } else { - # set $text to cached message. - $text = $linestart . $this->mTemplates[$piece['title']]; - #treat title for cached page the same as others - $ns = NS_TEMPLATE; - $subpage = ''; - $part1 = $this->maybeDoSubpageLink( $part1, $subpage ); - if ($subpage !== '') { - $ns = $this->mTitle->getNamespace(); - } - $title = Title::newFromText( $part1, $ns ); - //used by include size checking - $titleText = $title->getPrefixedText(); - //used by edit section links - $replaceHeadings = true; - - } - } - - # Load from database - if ( !$found ) { - wfProfileIn( __METHOD__ . '-loadtpl' ); - $ns = NS_TEMPLATE; - # declaring $subpage directly in the function call - # does not work correctly with references and breaks - # {{/subpage}}-style inclusions - $subpage = ''; - $part1 = $this->maybeDoSubpageLink( $part1, $subpage ); - if ($subpage !== '') { - $ns = $this->mTitle->getNamespace(); - } - $title = Title::newFromText( $part1, $ns ); - - - if ( !is_null( $title ) ) { - $titleText = $title->getPrefixedText(); - # Check for language variants if the template is not found - if($wgContLang->hasVariants() && $title->getArticleID() == 0){ - $wgContLang->findVariantLink($part1, $title); - } - - if ( !$title->isExternal() ) { - if ( $title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html'] ) { - $text = SpecialPage::capturePath( $title ); - if ( is_string( $text ) ) { - $found = true; - $noparse = true; - $noargs = true; - $isHTML = true; - $this->disableCache(); - } - } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) { - $found = false; //access denied - wfDebug( "$fname: template inclusion denied for " . $title->getPrefixedDBkey() ); - } else { - list($articleContent,$title) = $this->fetchTemplateAndtitle( $title ); - if ( $articleContent !== false ) { - $found = true; - $text = $articleContent; - $replaceHeadings = true; - } - } - - # If the title is valid but undisplayable, make a link to it - if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) { - $text = "[[:$titleText]]"; - $found = true; - } - } elseif ( $title->isTrans() ) { - // Interwiki transclusion - if ( $this->ot['html'] && !$forceRawInterwiki ) { - $text = $this->interwikiTransclude( $title, 'render' ); - $isHTML = true; - $noparse = true; - } else { - $text = $this->interwikiTransclude( $title, 'raw' ); - $replaceHeadings = true; - } - $found = true; - } - - # Template cache array insertion - # Use the original $piece['title'] not the mangled $part1, so that - # modifiers such as RAW: produce separate cache entries - if( $found ) { - if( $isHTML ) { - // A special page; don't store it in the template cache. - } else { - $this->mTemplates[$piece['title']] = $text; - } - $text = $linestart . $text; - } - } - wfProfileOut( __METHOD__ . '-loadtpl' ); - } - - if ( $found && !$this->incrementIncludeSize( 'pre-expand', strlen( $text ) ) ) { - # Error, oversize inclusion - $text = $linestart . - "[[$titleText]]"; - $noparse = true; - $noargs = true; - } - - # Recursive parsing, escaping and link table handling - # Only for HTML output - if ( $nowiki && $found && ( $this->ot['html'] || $this->ot['pre'] ) ) { - $text = wfEscapeWikiText( $text ); - } elseif ( !$this->ot['msg'] && $found ) { - if ( $noargs ) { - $assocArgs = array(); - } else { - # Clean up argument array - $assocArgs = self::createAssocArgs($args); - # Add a new element to the templace recursion path - $this->mTemplatePath[$part1] = 1; - } - - if ( !$noparse ) { - # If there are any tags, only include them - if ( in_string( '', $text ) && in_string( '', $text ) ) { - $replacer = new OnlyIncludeReplacer; - StringUtils::delimiterReplaceCallback( '', '', - array( &$replacer, 'replace' ), $text ); - $text = $replacer->output; - } - # Remove sections and tags - $text = StringUtils::delimiterReplace( '', '', '', $text ); - $text = strtr( $text, array( '' => '' , '' => '' ) ); - - if( $this->ot['html'] || $this->ot['pre'] ) { - # Strip ,

, etc.
-					$text = $this->strip( $text, $this->mStripState );
-					if ( $this->ot['html'] ) {
-						$text = Sanitizer::removeHTMLtags( $text, array( &$this, 'replaceVariables' ), $assocArgs );
-					} elseif ( $this->ot['pre'] && $this->mOptions->getRemoveComments() ) {
-						$text = Sanitizer::removeHTMLcomments( $text );
-					}
-				}
-				$text = $this->replaceVariables( $text, $assocArgs );
-
-				# If the template begins with a table or block-level
-				# element, it should be treated as beginning a new line.
-				if (!$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{
-					$text = "\n" . $text;
-				}
-			} elseif ( !$noargs ) {
-				# $noparse and !$noargs
-				# Just replace the arguments, not any double-brace items
-				# This is used for rendered interwiki transclusion
-				$text = $this->replaceVariables( $text, $assocArgs, true );
-			}
-		}
-		# Prune lower levels off the recursion check path
-		$this->mTemplatePath = $lastPathLevel;
-
-		if ( $found && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
-			# Error, oversize inclusion
-			$text = $linestart .
-				"[[$titleText]]";
-			$noparse = true;
-			$noargs = true;
-		}
-
-		if ( !$found ) {
-			wfProfileOut( $fname );
-			return $piece['text'];
-		} else {
-			wfProfileIn( __METHOD__ . '-placeholders' );
-			if ( $isHTML ) {
-				# Replace raw HTML by a placeholder
-				# Add a blank line preceding, to prevent it from mucking up
-				# immediately preceding headings
-				$text = "\n\n" . $this->insertStripItem( $text, $this->mStripState );
-			} else {
-				# replace ==section headers==
-				# XXX this needs to go away once we have a better parser.
-				if ( !$this->ot['wiki'] && !$this->ot['pre'] && $replaceHeadings ) {
-					if( !is_null( $title ) )
-						$encodedname = base64_encode($title->getPrefixedDBkey());
-					else
-						$encodedname = base64_encode("");
-					$m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
-						PREG_SPLIT_DELIM_CAPTURE);
-					$text = '';
-					$nsec = $headingOffset;
-
-					for( $i = 0; $i < count($m); $i += 2 ) {
-						$text .= $m[$i];
-						if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
-						$hl = $m[$i + 1];
-						if( strstr($hl, "" . $m2[3];
-
-						$nsec++;
-					}
-				}
-			}
-			wfProfileOut( __METHOD__ . '-placeholders' );
-		}
-
-		# Prune lower levels off the recursion check path
-		$this->mTemplatePath = $lastPathLevel;
-
-		if ( !$found ) {
-			wfProfileOut( $fname );
-			return $piece['text'];
-		} else {
-			wfProfileOut( $fname );
-			return $text;
-		}
-	}
-
-	/**
-	 * Fetch the unparsed text of a template and register a reference to it.
-	 */
-	function fetchTemplateAndTitle( $title ) {
-		$templateCb = $this->mOptions->getTemplateCallback();
-		$stuff = call_user_func( $templateCb, $title, $this );
-		$text = $stuff['text'];
-		$finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
-		if ( isset( $stuff['deps'] ) ) {
-			foreach ( $stuff['deps'] as $dep ) {
-				$this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
-			}
-		}
-		return array($text,$finalTitle);
-	}
-
-	function fetchTemplate( $title ) {
-		$rv = $this->fetchTemplateAndtitle($title);
-		return $rv[0];
-	}
-
-	/**
-	 * Static function to get a template
-	 * Can be overridden via ParserOptions::setTemplateCallback().
-	 *
-	 * Returns an associative array:
-	 *    text          The unparsed template text
-	 *    finalTitle    (Optional) The title after following redirects
-	 *    deps          (Optional) An array of associative array dependencies:
-	 *                       title:    The dependency title, to be registered in templatelinks
-	 *                       page_id:  The page_id of the title
-	 *                       rev_id:   The revision ID loaded
-	 */
-	static function statelessFetchTemplate( $title, $parser=false ) {
-		$text = $skip = false;
-		$finalTitle = $title;
-		$deps = array();
-
-		// Loop to fetch the article, with up to 1 redirect
-		for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
-			# Give extensions a chance to select the revision instead
-			$id = false; // Assume current
-			wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) );
-
-			if( $skip ) {
-				$text = false;
-				$deps[] = array(
-					'title' => $title,
-					'page_id' => $title->getArticleID(),
-					'rev_id' => null );
-				break;
-			}
-			$rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title );
-			$rev_id = $rev ? $rev->getId() : 0;
-
-			$deps[] = array(
-				'title' => $title,
-				'page_id' => $title->getArticleID(),
-				'rev_id' => $rev_id );
-
-			if( $rev ) {
-				$text = $rev->getText();
-			} elseif( $title->getNamespace() == NS_MEDIAWIKI ) {
-				global $wgLang;
-				$message = $wgLang->lcfirst( $title->getText() );
-				$text = wfMsgForContentNoTrans( $message );
-				if( wfEmptyMsg( $message, $text ) ) {
-					$text = false;
-					break;
-				}
-			} else {
-				break;
-			}
-			if ( $text === false ) {
-				break;
-			}
-			// Redirect?
-			$finalTitle = $title;
-			$title = Title::newFromRedirect( $text );
-		}
-		return array(
-			'text' => $text,
-			'finalTitle' => $finalTitle,
-			'deps' => $deps );
-	}
-
-	/**
-	 * Transclude an interwiki link.
-	 */
-	function interwikiTransclude( $title, $action ) {
-		global $wgEnableScaryTranscluding;
-
-		if (!$wgEnableScaryTranscluding)
-			return wfMsg('scarytranscludedisabled');
-
-		$url = $title->getFullUrl( "action=$action" );
-
-		if (strlen($url) > 255)
-			return wfMsg('scarytranscludetoolong');
-		return $this->fetchScaryTemplateMaybeFromCache($url);
-	}
-
-	function fetchScaryTemplateMaybeFromCache($url) {
-		global $wgTranscludeCacheExpiry;
-		$dbr = wfGetDB(DB_SLAVE);
-		$obj = $dbr->selectRow('transcache', array('tc_time', 'tc_contents'),
-				array('tc_url' => $url));
-		if ($obj) {
-			$time = $obj->tc_time;
-			$text = $obj->tc_contents;
-			if ($time && time() < $time + $wgTranscludeCacheExpiry ) {
-				return $text;
-			}
-		}
-
-		$text = Http::get($url);
-		if (!$text)
-			return wfMsg('scarytranscludefailed', $url);
-
-		$dbw = wfGetDB(DB_MASTER);
-		$dbw->replace('transcache', array('tc_url'), array(
-			'tc_url' => $url,
-			'tc_time' => time(),
-			'tc_contents' => $text));
-		return $text;
-	}
-
-
-	/**
-	 * Triple brace replacement -- used for template arguments
-	 * @private
-	 */
-	function argSubstitution( $matches ) {
-		$arg = trim( $matches['title'] );
-		$text = $matches['text'];
-		$inputArgs = end( $this->mArgStack );
-
-		if ( array_key_exists( $arg, $inputArgs ) ) {
-			$text = $inputArgs[$arg];
-		} else if (($this->mOutputType == self::OT_HTML || $this->mOutputType == self::OT_PREPROCESS ) &&
-		null != $matches['parts'] && count($matches['parts']) > 0) {
-			$text = $matches['parts'][0];
-		}
-		if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
-			$text = $matches['text'] .
-				'';
-		}
-
-		return $text;
-	}
-
-	/**
-	 * Increment an include size counter
-	 *
-	 * @param string $type The type of expansion
-	 * @param integer $size The size of the text
-	 * @return boolean False if this inclusion would take it over the maximum, true otherwise
-	 */
-	function incrementIncludeSize( $type, $size ) {
-		if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
-			return false;
-		} else {
-			$this->mIncludeSizes[$type] += $size;
-			return true;
-		}
-	}
-
-	/**
-	 * Detect __NOGALLERY__ magic word and set a placeholder
-	 */
-	function stripNoGallery( &$text ) {
-		# if the string __NOGALLERY__ (not case-sensitive) occurs in the HTML,
-		# do not add TOC
-		$mw = MagicWord::get( 'nogallery' );
-		$this->mOutput->mNoGallery = $mw->matchAndRemove( $text ) ;
-	}
-
-	/**
-	 * Find the first __TOC__ magic word and set a 
-	 * placeholder that will then be replaced by the real TOC in
-	 * ->formatHeadings, this works because at this points real
-	 * comments will have already been discarded by the sanitizer.
-	 *
-	 * Any additional __TOC__ magic words left over will be discarded
-	 * as there can only be one TOC on the page.
-	 */
-	function stripToc( $text ) {
-		# if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
-		# do not add TOC
-		$mw = MagicWord::get( 'notoc' );
-		if( $mw->matchAndRemove( $text ) ) {
-			$this->mShowToc = false;
-		}
-
-		$mw = MagicWord::get( 'toc' );
-		if( $mw->match( $text ) ) {
-			$this->mShowToc = true;
-			$this->mForceTocPosition = true;
-
-			// Set a placeholder. At the end we'll fill it in with the TOC.
-			$text = $mw->replace( '', $text, 1 );
-
-			// Only keep the first one.
-			$text = $mw->replace( '', $text );
-		}
-		return $text;
-	}
-
-	/**
-	 * This function accomplishes several tasks:
-	 * 1) Auto-number headings if that option is enabled
-	 * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
-	 * 3) Add a Table of contents on the top for users who have enabled the option
-	 * 4) Auto-anchor headings
-	 *
-	 * It loops through all headlines, collects the necessary data, then splits up the
-	 * string and re-inserts the newly formatted headlines.
-	 *
-	 * @param string $text
-	 * @param boolean $isMain
-	 * @private
-	 */
-	function formatHeadings( $text, $isMain=true ) {
-		global $wgMaxTocLevel, $wgContLang;
-
-		$doNumberHeadings = $this->mOptions->getNumberHeadings();
-		if( !$this->mTitle->quickUserCan( 'edit' ) ) {
-			$showEditLink = 0;
-		} else {
-			$showEditLink = $this->mOptions->getEditSection();
-		}
-
-		# Inhibit editsection links if requested in the page
-		$esw =& MagicWord::get( 'noeditsection' );
-		if( $esw->matchAndRemove( $text ) ) {
-			$showEditLink = 0;
-		}
-
-		# Get all headlines for numbering them and adding funky stuff like [edit]
-		# links - this is for later, but we need the number of headlines right now
-		$matches = array();
-		$numMatches = preg_match_all( '/[1-6])(?P.*?'.'>)(?P
.*?)<\/H[1-6] *>/i', $text, $matches ); - - # if there are fewer than 4 headlines in the article, do not show TOC - # unless it's been explicitly enabled. - $enoughToc = $this->mShowToc && - (($numMatches >= 4) || $this->mForceTocPosition); - - # Allow user to stipulate that a page should have a "new section" - # link added via __NEWSECTIONLINK__ - $mw =& MagicWord::get( 'newsectionlink' ); - if( $mw->matchAndRemove( $text ) ) - $this->mOutput->setNewSection( true ); - - # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, - # override above conditions and always show TOC above first header - $mw =& MagicWord::get( 'forcetoc' ); - if ($mw->matchAndRemove( $text ) ) { - $this->mShowToc = true; - $enoughToc = true; - } - - # We need this to perform operations on the HTML - $sk = $this->mOptions->getSkin(); - - # headline counter - $headlineCount = 0; - $sectionCount = 0; # headlineCount excluding template sections - $numVisible = 0; - - # Ugh .. the TOC should have neat indentation levels which can be - # passed to the skin functions. These are determined here - $toc = ''; - $full = ''; - $head = array(); - $sublevelCount = array(); - $levelCount = array(); - $toclevel = 0; - $level = 0; - $prevlevel = 0; - $toclevel = 0; - $prevtoclevel = 0; - $tocraw = array(); - - foreach( $matches[3] as $headline ) { - $istemplate = 0; - $templatetitle = ''; - $templatesection = 0; - $numbering = ''; - $mat = array(); - if (preg_match("//", $headline, $mat)) { - $istemplate = 1; - $templatetitle = base64_decode($mat[1]); - $templatesection = 1 + (int)base64_decode($mat[2]); - $headline = preg_replace("//", "", $headline); - } - - if( $toclevel ) { - $prevlevel = $level; - $prevtoclevel = $toclevel; - } - $level = $matches[1][$headlineCount]; - - if( $doNumberHeadings || $enoughToc ) { - - if ( $level > $prevlevel ) { - # Increase TOC level - $toclevel++; - $sublevelCount[$toclevel] = 0; - if( $toclevel<$wgMaxTocLevel ) { - $prevtoclevel = $toclevel; - $toc .= $sk->tocIndent(); - $numVisible++; - } - } - elseif ( $level < $prevlevel && $toclevel > 1 ) { - # Decrease TOC level, find level to jump to - - if ( $toclevel == 2 && $level <= $levelCount[1] ) { - # Can only go down to level 1 - $toclevel = 1; - } else { - for ($i = $toclevel; $i > 0; $i--) { - if ( $levelCount[$i] == $level ) { - # Found last matching level - $toclevel = $i; - break; - } - elseif ( $levelCount[$i] < $level ) { - # Found first matching level below current level - $toclevel = $i + 1; - break; - } - } - } - if( $toclevel<$wgMaxTocLevel ) { - if($prevtoclevel < $wgMaxTocLevel) { - # Unindent only if the previous toc level was shown :p - $toc .= $sk->tocUnindent( $prevtoclevel - $toclevel ); - } else { - $toc .= $sk->tocLineEnd(); - } - } - } - else { - # No change in level, end TOC line - if( $toclevel<$wgMaxTocLevel ) { - $toc .= $sk->tocLineEnd(); - } - } - - $levelCount[$toclevel] = $level; - - # count number of headlines for each level - @$sublevelCount[$toclevel]++; - $dot = 0; - for( $i = 1; $i <= $toclevel; $i++ ) { - if( !empty( $sublevelCount[$i] ) ) { - if( $dot ) { - $numbering .= '.'; - } - $numbering .= $wgContLang->formatNum( $sublevelCount[$i] ); - $dot = 1; - } - } - } - - # The canonized header is a version of the header text safe to use for links - # Avoid insertion of weird stuff like by expanding the relevant sections - $canonized_headline = $this->mStripState->unstripBoth( $headline ); - - # Remove link placeholders by the link text. - # - # turns into - # link text with suffix - $canonized_headline = preg_replace( '//e', - "\$this->mLinkHolders['texts'][\$1]", - $canonized_headline ); - $canonized_headline = preg_replace( '//e', - "\$this->mInterwikiLinkHolders['texts'][\$1]", - $canonized_headline ); - - # Strip out HTML (other than plain and : bug 8393) - $tocline = preg_replace( - array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ), - array( '', '<$1>'), - $canonized_headline - ); - $tocline = trim( $tocline ); - - # For the anchor, strip out HTML-y stuff period - $canonized_headline = preg_replace( '/<.*?'.'>/', '', $canonized_headline ); - $canonized_headline = trim( $canonized_headline ); - - # Save headline for section edit hint before it's escaped - $headline_hint = $canonized_headline; - $canonized_headline = Sanitizer::escapeId( $canonized_headline ); - $refers[$headlineCount] = $canonized_headline; - - # count how many in assoc. array so we can track dupes in anchors - isset( $refers[$canonized_headline] ) ? $refers[$canonized_headline]++ : $refers[$canonized_headline] = 1; - $refcount[$headlineCount]=$refers[$canonized_headline]; - - # Don't number the heading if it is the only one (looks silly) - if( $doNumberHeadings && count( $matches[3] ) > 1) { - # the two are different if the line contains a link - $headline=$numbering . ' ' . $headline; - } - - # Create the anchor for linking from the TOC to the section - $anchor = $canonized_headline; - if($refcount[$headlineCount] > 1 ) { - $anchor .= '_' . $refcount[$headlineCount]; - } - if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) { - $toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel); - $tocraw[] = array( 'toclevel' => $toclevel, 'level' => $level, 'line' => $tocline, 'number' => $numbering ); - } - # give headline the correct tag - if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) { - if( $istemplate ) - $editlink = $sk->editSectionLinkForOther($templatetitle, $templatesection); - else - $editlink = $sk->editSectionLink($this->mTitle, $sectionCount+1, $headline_hint); - } else { - $editlink = ''; - } - $head[$headlineCount] = $sk->makeHeadline( $level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink ); - - $headlineCount++; - if( !$istemplate ) - $sectionCount++; - } - - $this->mOutput->setSections( $tocraw ); - - # Never ever show TOC if no headers - if( $numVisible < 1 ) { - $enoughToc = false; - } - - if( $enoughToc ) { - if( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) { - $toc .= $sk->tocUnindent( $prevtoclevel - 1 ); - } - $toc = $sk->tocList( $toc ); - } - - # split up and insert constructed headlines - - $blocks = preg_split( '/.*?<\/H[1-6]>/i', $text ); - $i = 0; - - foreach( $blocks as $block ) { - if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) { - # This is the [edit] link that appears for the top block of text when - # section editing is enabled - - # Disabled because it broke block formatting - # For example, a bullet point in the top line - # $full .= $sk->editSectionLink(0); - } - $full .= $block; - if( $enoughToc && !$i && $isMain && !$this->mForceTocPosition ) { - # Top anchor now in skin - $full = $full.$toc; - } - - if( !empty( $head[$i] ) ) { - $full .= $head[$i]; - } - $i++; - } - if( $this->mForceTocPosition ) { - return str_replace( '', $toc, $full ); - } else { - return $full; - } - } - - /** - * Transform wiki markup when saving a page by doing \r\n -> \n - * conversion, substitting signatures, {{subst:}} templates, etc. - * - * @param string $text the text to transform - * @param Title &$title the Title object for the current article - * @param User &$user the User object describing the current user - * @param ParserOptions $options parsing options - * @param bool $clearState whether to clear the parser state first - * @return string the altered wiki markup - * @public - */ - function preSaveTransform( $text, &$title, $user, $options, $clearState = true ) { - $this->mOptions = $options; - $this->mTitle =& $title; - $this->setOutputType( self::OT_WIKI ); - - if ( $clearState ) { - $this->clearState(); - } - - $stripState = new StripState; - $pairs = array( - "\r\n" => "\n", - ); - $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text ); - $text = $this->strip( $text, $stripState, true, array( 'gallery' ) ); - $text = $this->pstPass2( $text, $stripState, $user ); - $text = $stripState->unstripBoth( $text ); - return $text; - } - - /** - * Pre-save transform helper function - * @private - */ - function pstPass2( $text, &$stripState, $user ) { - global $wgContLang, $wgLocaltimezone; - - /* Note: This is the timestamp saved as hardcoded wikitext to - * the database, we use $wgContLang here in order to give - * everyone the same signature and use the default one rather - * than the one selected in each user's preferences. - */ - if ( isset( $wgLocaltimezone ) ) { - $oldtz = getenv( 'TZ' ); - putenv( 'TZ='.$wgLocaltimezone ); - } - $d = $wgContLang->timeanddate( date( 'YmdHis' ), false, false) . - ' (' . date( 'T' ) . ')'; - if ( isset( $wgLocaltimezone ) ) { - putenv( 'TZ='.$oldtz ); - } - - # Variable replacement - # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags - $text = $this->replaceVariables( $text ); - - # Strip out etc. added via replaceVariables - $text = $this->strip( $text, $stripState, false, array( 'gallery' ) ); - - # Signatures - $sigText = $this->getUserSig( $user ); - $text = strtr( $text, array( - '~~~~~' => $d, - '~~~~' => "$sigText $d", - '~~~' => $sigText - ) ); - - # Context links: [[|name]] and [[name (context)|]] - # - global $wgLegalTitleChars; - $tc = "[$wgLegalTitleChars]"; - $nc = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii! - - $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]] - $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]] - $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] - - # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" - $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); - $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text ); - - $t = $this->mTitle->getText(); - $m = array(); - if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) { - $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); - } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && '' != "$m[1]$m[2]" ) { - $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); - } else { - # if there's no context, don't bother duplicating the title - $text = preg_replace( $p2, '[[\\1]]', $text ); - } - - # Trim trailing whitespace - $text = rtrim( $text ); - - return $text; - } - - /** - * Fetch the user's signature text, if any, and normalize to - * validated, ready-to-insert wikitext. - * - * @param User $user - * @return string - * @private - */ - function getUserSig( &$user ) { - global $wgMaxSigChars; - - $username = $user->getName(); - $nickname = $user->getOption( 'nickname' ); - $nickname = $nickname === '' ? $username : $nickname; - - if( mb_strlen( $nickname ) > $wgMaxSigChars ) { - $nickname = $username; - wfDebug( __METHOD__ . ": $username has overlong signature.\n" ); - } elseif( $user->getBoolOption( 'fancysig' ) !== false ) { - # Sig. might contain markup; validate this - if( $this->validateSig( $nickname ) !== false ) { - # Validated; clean up (if needed) and return it - return $this->cleanSig( $nickname, true ); - } else { - # Failed to validate; fall back to the default - $nickname = $username; - wfDebug( "Parser::getUserSig: $username has bad XML tags in signature.\n" ); - } - } - - // Make sure nickname doesnt get a sig in a sig - $nickname = $this->cleanSigInSig( $nickname ); - - # If we're still here, make it a link to the user page - $userText = wfEscapeWikiText( $username ); - $nickText = wfEscapeWikiText( $nickname ); - if ( $user->isAnon() ) { - return wfMsgExt( 'signature-anon', array( 'content', 'parsemag' ), $userText, $nickText ); - } else { - return wfMsgExt( 'signature', array( 'content', 'parsemag' ), $userText, $nickText ); - } - } - - /** - * Check that the user's signature contains no bad XML - * - * @param string $text - * @return mixed An expanded string, or false if invalid. - */ - function validateSig( $text ) { - return( wfIsWellFormedXmlFragment( $text ) ? $text : false ); - } - - /** - * Clean up signature text - * - * 1) Strip ~~~, ~~~~ and ~~~~~ out of signatures @see cleanSigInSig - * 2) Substitute all transclusions - * - * @param string $text - * @param $parsing Whether we're cleaning (preferences save) or parsing - * @return string Signature text - */ - function cleanSig( $text, $parsing = false ) { - global $wgTitle; - $this->startExternalParse( $this->mTitle, new ParserOptions(), $parsing ? self::OT_WIKI : self::OT_MSG ); - - $substWord = MagicWord::get( 'subst' ); - $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); - $substText = '{{' . $substWord->getSynonym( 0 ); - - $text = preg_replace( $substRegex, $substText, $text ); - $text = $this->cleanSigInSig( $text ); - $text = $this->replaceVariables( $text ); - - $this->clearState(); - return $text; - } - - /** - * Strip ~~~, ~~~~ and ~~~~~ out of signatures - * @param string $text - * @return string Signature text with /~{3,5}/ removed - */ - function cleanSigInSig( $text ) { - $text = preg_replace( '/~{3,5}/', '', $text ); - return $text; - } - - /** - * Set up some variables which are usually set up in parse() - * so that an external function can call some class members with confidence - * @public - */ - function startExternalParse( &$title, $options, $outputType, $clearState = true ) { - $this->mTitle =& $title; - $this->mOptions = $options; - $this->setOutputType( $outputType ); - if ( $clearState ) { - $this->clearState(); - } - } - - /** - * Transform a MediaWiki message by replacing magic variables. - * - * @param string $text the text to transform - * @param ParserOptions $options options - * @return string the text with variables substituted - * @public - */ - function transformMsg( $text, $options ) { - global $wgTitle; - static $executing = false; - - $fname = "Parser::transformMsg"; - - # Guard against infinite recursion - if ( $executing ) { - return $text; - } - $executing = true; - - wfProfileIn($fname); - - if ( $wgTitle && !( $wgTitle instanceof FakeTitle ) ) { - $this->mTitle = $wgTitle; - } else { - $this->mTitle = Title::newFromText('msg'); - } - $this->mOptions = $options; - $this->setOutputType( self::OT_MSG ); - $this->clearState(); - $text = $this->replaceVariables( $text ); - - $executing = false; - wfProfileOut($fname); - return $text; - } - - /** - * Create an HTML-style tag, e.g. special text - * The callback should have the following form: - * function myParserHook( $text, $params, &$parser ) { ... } - * - * Transform and return $text. Use $parser for any required context, e.g. use - * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions - * - * @public - * - * @param mixed $tag The tag to use, e.g. 'hook' for - * @param mixed $callback The callback function (and object) to use for the tag - * - * @return The old value of the mTagHooks array associated with the hook - */ - function setHook( $tag, $callback ) { - $tag = strtolower( $tag ); - $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null; - $this->mTagHooks[$tag] = $callback; - - return $oldVal; - } - - function setTransparentTagHook( $tag, $callback ) { - $tag = strtolower( $tag ); - $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null; - $this->mTransparentTagHooks[$tag] = $callback; - - return $oldVal; - } - - /** - * Create a function, e.g. {{sum:1|2|3}} - * The callback function should have the form: - * function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... } - * - * The callback may either return the text result of the function, or an array with the text - * in element 0, and a number of flags in the other elements. The names of the flags are - * specified in the keys. Valid flags are: - * found The text returned is valid, stop processing the template. This - * is on by default. - * nowiki Wiki markup in the return value should be escaped - * noparse Unsafe HTML tags should not be stripped, etc. - * noargs Don't replace triple-brace arguments in the return value - * isHTML The returned text is HTML, armour it against wikitext transformation - * - * @public - * - * @param string $id The magic word ID - * @param mixed $callback The callback function (and object) to use - * @param integer $flags a combination of the following flags: - * SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}} - * - * @return The old callback function for this name, if any - */ - function setFunctionHook( $id, $callback, $flags = 0 ) { - $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id] : null; - $this->mFunctionHooks[$id] = $callback; - - # Add to function cache - $mw = MagicWord::get( $id ); - if( !$mw ) - throw new MWException( 'Parser::setFunctionHook() expecting a magic word identifier.' ); - - $synonyms = $mw->getSynonyms(); - $sensitive = intval( $mw->isCaseSensitive() ); - - foreach ( $synonyms as $syn ) { - # Case - if ( !$sensitive ) { - $syn = strtolower( $syn ); - } - # Add leading hash - if ( !( $flags & SFH_NO_HASH ) ) { - $syn = '#' . $syn; - } - # Remove trailing colon - if ( substr( $syn, -1, 1 ) == ':' ) { - $syn = substr( $syn, 0, -1 ); - } - $this->mFunctionSynonyms[$sensitive][$syn] = $id; - } - return $oldVal; - } - - /** - * Get all registered function hook identifiers - * - * @return array - */ - function getFunctionHooks() { - return array_keys( $this->mFunctionHooks ); - } - - /** - * Replace link placeholders with actual links, in the buffer - * Placeholders created in Skin::makeLinkObj() - * Returns an array of links found, indexed by PDBK: - * 0 - broken - * 1 - normal link - * 2 - stub - * $options is a bit field, RLH_FOR_UPDATE to select for update - */ - function replaceLinkHolders( &$text, $options = 0 ) { - global $wgUser; - global $wgContLang; - - $fname = 'Parser::replaceLinkHolders'; - wfProfileIn( $fname ); - - $pdbks = array(); - $colours = array(); - $sk = $this->mOptions->getSkin(); - $linkCache = LinkCache::singleton(); - - if ( !empty( $this->mLinkHolders['namespaces'] ) ) { - wfProfileIn( $fname.'-check' ); - $dbr = wfGetDB( DB_SLAVE ); - $page = $dbr->tableName( 'page' ); - $threshold = $wgUser->getOption('stubthreshold'); - - # Sort by namespace - asort( $this->mLinkHolders['namespaces'] ); - - # Generate query - $query = false; - $current = null; - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { - # Make title object - $title = $this->mLinkHolders['titles'][$key]; - - # Skip invalid entries. - # Result will be ugly, but prevents crash. - if ( is_null( $title ) ) { - continue; - } - $pdbk = $pdbks[$key] = $title->getPrefixedDBkey(); - - # Check if it's a static known link, e.g. interwiki - if ( $title->isAlwaysKnown() ) { - $colours[$pdbk] = 1; - } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { - $colours[$pdbk] = 1; - $this->mOutput->addLink( $title, $id ); - } elseif ( $linkCache->isBadLink( $pdbk ) ) { - $colours[$pdbk] = 0; - } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) { - $colours[$pdbk] = 0; - } else { - # Not in the link cache, add it to the query - if ( !isset( $current ) ) { - $current = $ns; - $query = "SELECT page_id, page_namespace, page_title, page_len, page_is_redirect"; - $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; - } elseif ( $current != $ns ) { - $current = $ns; - $query .= ")) OR (page_namespace=$ns AND page_title IN("; - } else { - $query .= ', '; - } - - $query .= $dbr->addQuotes( $this->mLinkHolders['dbkeys'][$key] ); - } - } - if ( $query ) { - $query .= '))'; - if ( $options & RLH_FOR_UPDATE ) { - $query .= ' FOR UPDATE'; - } - - $res = $dbr->query( $query, $fname ); - - # Fetch data and form into an associative array - # non-existent = broken - # 1 = known - # 2 = stub - while ( $s = $dbr->fetchObject($res) ) { - $title = Title::makeTitle( $s->page_namespace, $s->page_title ); - $pdbk = $title->getPrefixedDBkey(); - $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect ); - $this->mOutput->addLink( $title, $s->page_id ); - - $colours[$pdbk] = ( $threshold == 0 || ( - $s->page_len >= $threshold || # always true if $threshold <= 0 - $s->page_is_redirect || - !MWNamespace::isContent( $s->page_namespace ) ) - ? 1 : 2 ); - } - } - wfProfileOut( $fname.'-check' ); - - # Do a second query for different language variants of links and categories - if( $wgContLang->hasVariants() ) { - $linkBatch = new LinkBatch(); - $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders) - $categoryMap = array(); // maps $category_variant => $category (dbkeys) - $varCategories = array(); // category replacements oldDBkey => newDBkey - - $categories = $this->mOutput->getCategoryLinks(); - - // Add variants of links to link batch - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { - $title = $this->mLinkHolders['titles'][$key]; - if ( is_null( $title ) ) - continue; - - $pdbk = $title->getPrefixedDBkey(); - $titleText = $title->getText(); - - // generate all variants of the link title text - $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText); - - // if link was not found (in first query), add all variants to query - if ( !isset($colours[$pdbk]) ){ - foreach($allTextVariants as $textVariant){ - if($textVariant != $titleText){ - $variantTitle = Title::makeTitle( $ns, $textVariant ); - if(is_null($variantTitle)) continue; - $linkBatch->addObj( $variantTitle ); - $variantMap[$variantTitle->getPrefixedDBkey()][] = $key; - } - } - } - } - - // process categories, check if a category exists in some variant - foreach( $categories as $category ){ - $variants = $wgContLang->convertLinkToAllVariants($category); - foreach($variants as $variant){ - if($variant != $category){ - $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) ); - if(is_null($variantTitle)) continue; - $linkBatch->addObj( $variantTitle ); - $categoryMap[$variant] = $category; - } - } - } - - - if ( !$linkBatch->isEmpty() ){ - // construct query - $titleClause = $linkBatch->constructSet('page', $dbr); - - $variantQuery = "SELECT page_id, page_namespace, page_title, page_len, page_is_redirect"; - - $variantQuery .= " FROM $page WHERE $titleClause"; - if ( $options & RLH_FOR_UPDATE ) { - $variantQuery .= ' FOR UPDATE'; - } - - $varRes = $dbr->query( $variantQuery, $fname ); - - // for each found variants, figure out link holders and replace - while ( $s = $dbr->fetchObject($varRes) ) { - - $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title ); - $varPdbk = $variantTitle->getPrefixedDBkey(); - $vardbk = $variantTitle->getDBkey(); - - $holderKeys = array(); - if(isset($variantMap[$varPdbk])){ - $holderKeys = $variantMap[$varPdbk]; - $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect ); - $this->mOutput->addLink( $variantTitle, $s->page_id ); - } - - // loop over link holders - foreach($holderKeys as $key){ - $title = $this->mLinkHolders['titles'][$key]; - if ( is_null( $title ) ) continue; - - $pdbk = $title->getPrefixedDBkey(); - - if(!isset($colours[$pdbk])){ - // found link in some of the variants, replace the link holder data - $this->mLinkHolders['titles'][$key] = $variantTitle; - $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey(); - - // set pdbk and colour - $pdbks[$key] = $varPdbk; - if ( $threshold > 0 ) { - $size = $s->page_len; - if ( $s->page_is_redirect || $s->page_namespace != 0 || $size >= $threshold ) { - $colours[$varPdbk] = 1; - } else { - $colours[$varPdbk] = 2; - } - } - else { - $colours[$varPdbk] = 1; - } - } - } - - // check if the object is a variant of a category - if(isset($categoryMap[$vardbk])){ - $oldkey = $categoryMap[$vardbk]; - if($oldkey != $vardbk) - $varCategories[$oldkey]=$vardbk; - } - } - - // rebuild the categories in original order (if there are replacements) - if(count($varCategories)>0){ - $newCats = array(); - $originalCats = $this->mOutput->getCategories(); - foreach($originalCats as $cat => $sortkey){ - // make the replacement - if( array_key_exists($cat,$varCategories) ) - $newCats[$varCategories[$cat]] = $sortkey; - else $newCats[$cat] = $sortkey; - } - $this->mOutput->setCategoryLinks($newCats); - } - } - } - - # Construct search and replace arrays - wfProfileIn( $fname.'-construct' ); - $replacePairs = array(); - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { - $pdbk = $pdbks[$key]; - $searchkey = ""; - $title = $this->mLinkHolders['titles'][$key]; - if ( empty( $colours[$pdbk] ) ) { - $linkCache->addBadLinkObj( $title ); - $colours[$pdbk] = 0; - $this->mOutput->addLink( $title, 0 ); - $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title, - $this->mLinkHolders['texts'][$key], - $this->mLinkHolders['queries'][$key] ); - } elseif ( $colours[$pdbk] == 1 ) { - $replacePairs[$searchkey] = $sk->makeKnownLinkObj( $title, - $this->mLinkHolders['texts'][$key], - $this->mLinkHolders['queries'][$key] ); - } elseif ( $colours[$pdbk] == 2 ) { - $replacePairs[$searchkey] = $sk->makeStubLinkObj( $title, - $this->mLinkHolders['texts'][$key], - $this->mLinkHolders['queries'][$key] ); - } - } - $replacer = new HashtableReplacer( $replacePairs, 1 ); - wfProfileOut( $fname.'-construct' ); - - # Do the thing - wfProfileIn( $fname.'-replace' ); - $text = preg_replace_callback( - '/()/', - $replacer->cb(), - $text); - - wfProfileOut( $fname.'-replace' ); - } - - # Now process interwiki link holders - # This is quite a bit simpler than internal links - if ( !empty( $this->mInterwikiLinkHolders['texts'] ) ) { - wfProfileIn( $fname.'-interwiki' ); - # Make interwiki link HTML - $replacePairs = array(); - foreach( $this->mInterwikiLinkHolders['texts'] as $key => $link ) { - $title = $this->mInterwikiLinkHolders['titles'][$key]; - $replacePairs[$key] = $sk->makeLinkObj( $title, $link ); - } - $replacer = new HashtableReplacer( $replacePairs, 1 ); - - $text = preg_replace_callback( - '//', - $replacer->cb(), - $text ); - wfProfileOut( $fname.'-interwiki' ); - } - - wfProfileOut( $fname ); - return $colours; - } - - /** - * Replace link placeholders with plain text of links - * (not HTML-formatted). - * @param string $text - * @return string - */ - function replaceLinkHoldersText( $text ) { - $fname = 'Parser::replaceLinkHoldersText'; - wfProfileIn( $fname ); - - $text = preg_replace_callback( - '//', - array( &$this, 'replaceLinkHoldersTextCallback' ), - $text ); - - wfProfileOut( $fname ); - return $text; - } - - /** - * @param array $matches - * @return string - * @private - */ - function replaceLinkHoldersTextCallback( $matches ) { - $type = $matches[1]; - $key = $matches[2]; - if( $type == 'LINK' ) { - if( isset( $this->mLinkHolders['texts'][$key] ) ) { - return $this->mLinkHolders['texts'][$key]; - } - } elseif( $type == 'IWLINK' ) { - if( isset( $this->mInterwikiLinkHolders['texts'][$key] ) ) { - return $this->mInterwikiLinkHolders['texts'][$key]; - } - } - return $matches[0]; - } - - /** - * Tag hook handler for 'pre'. - */ - function renderPreTag( $text, $attribs ) { - // Backwards-compatibility hack - $content = StringUtils::delimiterReplace( '', '', '$1', $text, 'i' ); - - $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' ); - return wfOpenElement( 'pre', $attribs ) . - Xml::escapeTagsOnly( $content ) . - '
'; - } - - /** - * Renders an image gallery from a text with one line per image. - * text labels may be given by using |-style alternative text. E.g. - * Image:one.jpg|The number "1" - * Image:tree.jpg|A tree - * given as text will return the HTML of a gallery with two images, - * labeled 'The number "1"' and - * 'A tree'. - */ - function renderImageGallery( $text, $params ) { - $ig = new ImageGallery(); - $ig->setContextTitle( $this->mTitle ); - $ig->setShowBytes( false ); - $ig->setShowFilename( false ); - $ig->setParser( $this ); - $ig->setHideBadImages(); - $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) ); - $ig->useSkin( $this->mOptions->getSkin() ); - $ig->mRevisionId = $this->mRevisionId; - - if( isset( $params['caption'] ) ) { - $caption = $params['caption']; - $caption = htmlspecialchars( $caption ); - $caption = $this->replaceInternalLinks( $caption ); - $ig->setCaptionHtml( $caption ); - } - if( isset( $params['perrow'] ) ) { - $ig->setPerRow( $params['perrow'] ); - } - if( isset( $params['widths'] ) ) { - $ig->setWidths( $params['widths'] ); - } - if( isset( $params['heights'] ) ) { - $ig->setHeights( $params['heights'] ); - } - - wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); - - $lines = explode( "\n", $text ); - foreach ( $lines as $line ) { - # match lines like these: - # Image:someimage.jpg|This is some image - $matches = array(); - preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches ); - # Skip empty lines - if ( count( $matches ) == 0 ) { - continue; - } - $tp = Title::newFromText( $matches[1] ); - $nt =& $tp; - if( is_null( $nt ) ) { - # Bogus title. Ignore these so we don't bomb out later. - continue; - } - if ( isset( $matches[3] ) ) { - $label = $matches[3]; - } else { - $label = ''; - } - - $pout = $this->parse( $label, - $this->mTitle, - $this->mOptions, - false, // Strip whitespace...? - false // Don't clear state! - ); - $html = $pout->getText(); - - $ig->add( $nt, $html ); - - # Only add real images (bug #5586) - if ( $nt->getNamespace() == NS_IMAGE ) { - $this->mOutput->addImage( $nt->getDBkey() ); - } - } - return $ig->toHTML(); - } - - function getImageParams( $handler ) { - if ( $handler ) { - $handlerClass = get_class( $handler ); - } else { - $handlerClass = ''; - } - if ( !isset( $this->mImageParams[$handlerClass] ) ) { - // Initialise static lists - static $internalParamNames = array( - 'horizAlign' => array( 'left', 'right', 'center', 'none' ), - 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', - 'bottom', 'text-bottom' ), - 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless', - 'upright', 'border' ), - ); - static $internalParamMap; - if ( !$internalParamMap ) { - $internalParamMap = array(); - foreach ( $internalParamNames as $type => $names ) { - foreach ( $names as $name ) { - $magicName = str_replace( '-', '_', "img_$name" ); - $internalParamMap[$magicName] = array( $type, $name ); - } - } - } - - // Add handler params - $paramMap = $internalParamMap; - if ( $handler ) { - $handlerParamMap = $handler->getParamMap(); - foreach ( $handlerParamMap as $magic => $paramName ) { - $paramMap[$magic] = array( 'handler', $paramName ); - } - } - $this->mImageParams[$handlerClass] = $paramMap; - $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) ); - } - return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ); - } - - /** - * Parse image options text and use it to make an image - */ - function makeImage( $title, $options ) { - # @TODO: let the MediaHandler specify its transform parameters - # - # Check if the options text is of the form "options|alt text" - # Options are: - # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang - # * left no resizing, just left align. label is used for alt= only - # * right same, but right aligned - # * none same, but not aligned - # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox - # * center center the image - # * framed Keep original image size, no magnify-button. - # * frameless like 'thumb' but without a frame. Keeps user preferences for width - # * upright reduce width for upright images, rounded to full __0 px - # * border draw a 1px border around the image - # vertical-align values (no % or length right now): - # * baseline - # * sub - # * super - # * top - # * text-top - # * middle - # * bottom - # * text-bottom - - $parts = array_map( 'trim', explode( '|', $options) ); - $sk = $this->mOptions->getSkin(); - - # Give extensions a chance to select the file revision for us - $skip = $time = false; - wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$title, &$skip, &$time ) ); - - if ( $skip ) { - return $sk->makeLinkObj( $title ); - } - - # Get parameter map - $file = wfFindFile( $title, $time ); - $handler = $file ? $file->getHandler() : false; - - list( $paramMap, $mwArray ) = $this->getImageParams( $handler ); - - # Process the input parameters - $caption = ''; - $params = array( 'frame' => array(), 'handler' => array(), - 'horizAlign' => array(), 'vertAlign' => array() ); - foreach( $parts as $part ) { - list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part ); - if ( isset( $paramMap[$magicName] ) ) { - list( $type, $paramName ) = $paramMap[$magicName]; - $params[$type][$paramName] = $value; - - // Special case; width and height come in one variable together - if( $type == 'handler' && $paramName == 'width' ) { - $m = array(); - if ( preg_match( '/^([0-9]*)x([0-9]*)$/', $value, $m ) ) { - $params[$type]['width'] = intval( $m[1] ); - $params[$type]['height'] = intval( $m[2] ); - } else { - $params[$type]['width'] = intval( $value ); - } - } - } else { - $caption = $part; - } - } - - # Process alignment parameters - if ( $params['horizAlign'] ) { - $params['frame']['align'] = key( $params['horizAlign'] ); - } - if ( $params['vertAlign'] ) { - $params['frame']['valign'] = key( $params['vertAlign'] ); - } - - # Validate the handler parameters - if ( $handler ) { - foreach ( $params['handler'] as $name => $value ) { - if ( !$handler->validateParam( $name, $value ) ) { - unset( $params['handler'][$name] ); - } - } - } - - # Strip bad stuff out of the alt text - $alt = $this->replaceLinkHoldersText( $caption ); - - # make sure there are no placeholders in thumbnail attributes - # that are later expanded to html- so expand them now and - # remove the tags - $alt = $this->mStripState->unstripBoth( $alt ); - $alt = Sanitizer::stripAllTags( $alt ); - - $params['frame']['alt'] = $alt; - $params['frame']['caption'] = $caption; - - # Linker does the rest - $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'] ); - - # Give the handler a chance to modify the parser object - if ( $handler ) { - $handler->parserTransformHook( $this, $file ); - } - - return $ret; - } - - /** - * Set a flag in the output object indicating that the content is dynamic and - * shouldn't be cached. - */ - function disableCache() { - wfDebug( "Parser output marked as uncacheable.\n" ); - $this->mOutput->mCacheTime = -1; - } - - /**#@+ - * Callback from the Sanitizer for expanding items found in HTML attribute - * values, so they can be safely tested and escaped. - * @param string $text - * @param array $args - * @return string - * @private - */ - function attributeStripCallback( &$text, $args ) { - $text = $this->replaceVariables( $text, $args ); - $text = $this->mStripState->unstripBoth( $text ); - return $text; - } - - /**#@-*/ - - /**#@+ - * Accessor/mutator - */ - function Title( $x = NULL ) { return wfSetVar( $this->mTitle, $x ); } - function Options( $x = NULL ) { return wfSetVar( $this->mOptions, $x ); } - function OutputType( $x = NULL ) { return wfSetVar( $this->mOutputType, $x ); } - /**#@-*/ - - /**#@+ - * Accessor - */ - function getTags() { return array_merge( array_keys($this->mTransparentTagHooks), array_keys( $this->mTagHooks ) ); } - /**#@-*/ - - - /** - * Break wikitext input into sections, and either pull or replace - * some particular section's text. - * - * External callers should use the getSection and replaceSection methods. - * - * @param $text Page wikitext - * @param $section Numbered section. 0 pulls the text before the first - * heading; other numbers will pull the given section - * along with its lower-level subsections. - * @param $mode One of "get" or "replace" - * @param $newtext Replacement text for section data. - * @return string for "get", the extracted section text. - * for "replace", the whole page with the section replaced. - */ - private function extractSections( $text, $section, $mode, $newtext='' ) { - # I.... _hope_ this is right. - # Otherwise, sometimes we don't have things initialized properly. - $this->clearState(); - - # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML - # comments to be stripped as well) - $stripState = new StripState; - - $oldOutputType = $this->mOutputType; - $oldOptions = $this->mOptions; - $this->mOptions = new ParserOptions(); - $this->setOutputType( self::OT_WIKI ); - - $striptext = $this->strip( $text, $stripState, true ); - - $this->setOutputType( $oldOutputType ); - $this->mOptions = $oldOptions; - - # now that we can be sure that no pseudo-sections are in the source, - # split it up by section - $uniq = preg_quote( $this->uniqPrefix(), '/' ); - $comment = "(?:$uniq-!--.*?QINU\x07)"; - $secs = preg_split( - "/ - ( - ^ - (?:$comment|<\/?noinclude>)* # Initial comments will be stripped - (=+) # Should this be limited to 6? - .+? # Section title... - \\2 # Ending = count must match start - (?:$comment|<\/?noinclude>|[ \\t]+)* # Trailing whitespace ok - $ - | - - .*? - <\/h\\3\s*> - ) - /mix", - $striptext, -1, - PREG_SPLIT_DELIM_CAPTURE); - - if( $mode == "get" ) { - if( $section == 0 ) { - // "Section 0" returns the content before any other section. - $rv = $secs[0]; - } else { - //track missing section, will replace if found. - $rv = $newtext; - } - } elseif( $mode == "replace" ) { - if( $section == 0 ) { - $rv = $newtext . "\n\n"; - $remainder = true; - } else { - $rv = $secs[0]; - $remainder = false; - } - } - $count = 0; - $sectionLevel = 0; - for( $index = 1; $index < count( $secs ); ) { - $headerLine = $secs[$index++]; - if( $secs[$index] ) { - // A wiki header - $headerLevel = strlen( $secs[$index++] ); - } else { - // An HTML header - $index++; - $headerLevel = intval( $secs[$index++] ); - } - $content = $secs[$index++]; - - $count++; - if( $mode == "get" ) { - if( $count == $section ) { - $rv = $headerLine . $content; - $sectionLevel = $headerLevel; - } elseif( $count > $section ) { - if( $sectionLevel && $headerLevel > $sectionLevel ) { - $rv .= $headerLine . $content; - } else { - // Broke out to a higher-level section - break; - } - } - } elseif( $mode == "replace" ) { - if( $count < $section ) { - $rv .= $headerLine . $content; - } elseif( $count == $section ) { - $rv .= $newtext . "\n\n"; - $sectionLevel = $headerLevel; - } elseif( $count > $section ) { - if( $headerLevel <= $sectionLevel ) { - // Passed the section's sub-parts. - $remainder = true; - } - if( $remainder ) { - $rv .= $headerLine . $content; - } - } - } - } - if (is_string($rv)) - # reinsert stripped tags - $rv = trim( $stripState->unstripBoth( $rv ) ); - - return $rv; - } - - /** - * This function returns the text of a section, specified by a number ($section). - * A section is text under a heading like == Heading == or \Heading\, or - * the first section before any such heading (section 0). - * - * If a section contains subsections, these are also returned. - * - * @param $text String: text to look in - * @param $section Integer: section number - * @param $deftext: default to return if section is not found - * @return string text of the requested section - */ - public function getSection( $text, $section, $deftext='' ) { - return $this->extractSections( $text, $section, "get", $deftext ); - } - - public function replaceSection( $oldtext, $section, $text ) { - return $this->extractSections( $oldtext, $section, "replace", $text ); - } - - /** - * Get the timestamp associated with the current revision, adjusted for - * the default server-local timestamp - */ - function getRevisionTimestamp() { - if ( is_null( $this->mRevisionTimestamp ) ) { - wfProfileIn( __METHOD__ ); - global $wgContLang; - $dbr = wfGetDB( DB_SLAVE ); - $timestamp = $dbr->selectField( 'revision', 'rev_timestamp', - array( 'rev_id' => $this->mRevisionId ), __METHOD__ ); - - // Normalize timestamp to internal MW format for timezone processing. - // This has the added side-effect of replacing a null value with - // the current time, which gives us more sensible behavior for - // previews. - $timestamp = wfTimestamp( TS_MW, $timestamp ); - - // The cryptic '' timezone parameter tells to use the site-default - // timezone offset instead of the user settings. - // - // Since this value will be saved into the parser cache, served - // to other users, and potentially even used inside links and such, - // it needs to be consistent for all visitors. - $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' ); - - wfProfileOut( __METHOD__ ); - } - return $this->mRevisionTimestamp; - } - - /** - * Mutator for $mDefaultSort - * - * @param $sort New value - */ - public function setDefaultSort( $sort ) { - $this->mDefaultSort = $sort; - } - - /** - * Accessor for $mDefaultSort - * Will use the title/prefixed title if none is set - * - * @return string - */ - public function getDefaultSort() { - if( $this->mDefaultSort !== false ) { - return $this->mDefaultSort; - } else { - return $this->mTitle->getNamespace() == NS_CATEGORY - ? $this->mTitle->getText() - : $this->mTitle->getPrefixedText(); - } - } - - /** - * Try to guess the section anchor name based on a wikitext fragment - * presumably extracted from a heading, for example "Header" from - * "== Header ==". - */ - public function guessSectionNameFromWikiText( $text ) { - # Strip out wikitext links(they break the anchor) - $text = $this->stripSectionName( $text ); - $headline = Sanitizer::decodeCharReferences( $text ); - # strip out HTML - $headline = StringUtils::delimiterReplace( '<', '>', '', $headline ); - $headline = trim( $headline ); - $sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) ); - $replacearray = array( - '%3A' => ':', - '%' => '.' - ); - return str_replace( - array_keys( $replacearray ), - array_values( $replacearray ), - $sectionanchor ); - } - - /** - * Strips a text string of wikitext for use in a section anchor - * - * Accepts a text string and then removes all wikitext from the - * string and leaves only the resultant text (i.e. the result of - * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of - * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended - * to create valid section anchors by mimicing the output of the - * parser when headings are parsed. - * - * @param $text string Text string to be stripped of wikitext - * for use in a Section anchor - * @return Filtered text string - */ - public function stripSectionName( $text ) { - # Strip internal link markup - $text = preg_replace('/\[\[:?([^[|]+)\|([^[]+)\]\]/','$2',$text); - $text = preg_replace('/\[\[:?([^[]+)\|?\]\]/','$1',$text); - - # Strip external link markup (FIXME: Not Tolerant to blank link text - # I.E. [http://www.mediawiki.org] will render as [1] or something depending - # on how many empty links there are on the page - need to figure that out. - $text = preg_replace('/\[(?:' . wfUrlProtocols() . ')([^ ]+?) ([^[]+)\]/','$2',$text); - - # Parse wikitext quotes (italics & bold) - $text = $this->doQuotes($text); - - # Strip HTML tags - $text = StringUtils::delimiterReplace( '<', '>', '', $text ); - return $text; - } - - /** - * strip/replaceVariables/unstrip for preprocessor regression testing - */ - function srvus( $text ) { - $text = $this->strip( $text, $this->mStripState ); - $text = Sanitizer::removeHTMLtags( $text ); - $text = $this->replaceVariables( $text ); - $text = preg_replace( '//', '', $text ); - $text = $this->mStripState->unstripBoth( $text ); - return $text; - } -} diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index 34d58967..af591b67 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -770,6 +770,7 @@ class PPFrame_DOM implements PPFrame { /** * Recursion depth of this frame, top = 0 + * Note that this is NOT the same as expansion depth in expand() */ var $depth; @@ -826,20 +827,21 @@ class PPFrame_DOM implements PPFrame { } function expand( $root, $flags = 0 ) { - static $depth = 0; + static $expansionDepth = 0; if ( is_string( $root ) ) { return $root; } + wfProfileIn( __METHOD__ ); if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount ) { return 'Node-count limit exceeded'; } - if ( $depth > $this->parser->mOptions->mMaxPPExpandDepth ) { + if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) { return 'Expansion depth limit exceeded'; } - ++$depth; + ++$expansionDepth; if ( $root instanceof PPNode_DOM ) { $root = $root->node; @@ -1005,6 +1007,7 @@ class PPFrame_DOM implements PPFrame { $newIterator = $contextNode->childNodes; } } else { + wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__.': Invalid parameter type' ); } @@ -1027,7 +1030,8 @@ class PPFrame_DOM implements PPFrame { } } } - --$depth; + --$expansionDepth; + wfProfileOut( __METHOD__ ); return $outStack[0]; } @@ -1218,6 +1222,32 @@ class PPTemplateFrame_DOM extends PPFrame_DOM { return !count( $this->numberedArgs ) && !count( $this->namedArgs ); } + function getArguments() { + $arguments = array(); + foreach ( array_merge( + array_keys($this->numberedArgs), + array_keys($this->namedArgs)) as $key ) { + $arguments[$key] = $this->getArgument($key); + } + return $arguments; + } + + function getNumberedArguments() { + $arguments = array(); + foreach ( array_keys($this->numberedArgs) as $key ) { + $arguments[$key] = $this->getArgument($key); + } + return $arguments; + } + + function getNamedArguments() { + $arguments = array(); + foreach ( array_keys($this->namedArgs) as $key ) { + $arguments[$key] = $this->getArgument($key); + } + return $arguments; + } + function getNumberedArgument( $index ) { if ( !isset( $this->numberedArgs[$index] ) ) { return false; @@ -1291,6 +1321,9 @@ class PPCustomFrame_DOM extends PPFrame_DOM { } function getArgument( $index ) { + if ( !isset( $this->args[$index] ) ) { + return false; + } return $this->args[$index]; } } diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php index b5775243..62028291 100644 --- a/includes/parser/Preprocessor_Hash.php +++ b/includes/parser/Preprocessor_Hash.php @@ -758,6 +758,7 @@ class PPFrame_Hash implements PPFrame { /** * Recursion depth of this frame, top = 0 + * Note that this is NOT the same as expansion depth in expand() */ var $depth; @@ -810,6 +811,7 @@ class PPFrame_Hash implements PPFrame { } function expand( $root, $flags = 0 ) { + static $expansionDepth = 0; if ( is_string( $root ) ) { return $root; } @@ -818,10 +820,10 @@ class PPFrame_Hash implements PPFrame { { return 'Node-count limit exceeded'; } - if ( $this->depth > $this->parser->mOptions->mMaxPPExpandDepth ) { + if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) { return 'Expansion depth limit exceeded'; } - ++$this->depth; + ++$expansionDepth; $outStack = array( '', '' ); $iteratorStack = array( false, $root ); @@ -974,7 +976,7 @@ class PPFrame_Hash implements PPFrame { } } } - --$this->depth; + --$expansionDepth; return $outStack[0]; } @@ -1173,6 +1175,32 @@ class PPTemplateFrame_Hash extends PPFrame_Hash { return !count( $this->numberedArgs ) && !count( $this->namedArgs ); } + function getArguments() { + $arguments = array(); + foreach ( array_merge( + array_keys($this->numberedArgs), + array_keys($this->namedArgs)) as $key ) { + $arguments[$key] = $this->getArgument($key); + } + return $arguments; + } + + function getNumberedArguments() { + $arguments = array(); + foreach ( array_keys($this->numberedArgs) as $key ) { + $arguments[$key] = $this->getArgument($key); + } + return $arguments; + } + + function getNamedArguments() { + $arguments = array(); + foreach ( array_keys($this->namedArgs) as $key ) { + $arguments[$key] = $this->getArgument($key); + } + return $arguments; + } + function getNumberedArgument( $index ) { if ( !isset( $this->numberedArgs[$index] ) ) { return false; @@ -1246,6 +1274,9 @@ class PPCustomFrame_Hash extends PPFrame_Hash { } function getArgument( $index ) { + if ( !isset( $this->args[$index] ) ) { + return false; + } return $this->args[$index]; } } -- cgit v1.2.3-54-g00ecf