diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2013-12-08 09:55:49 +0100 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2013-12-08 09:55:49 +0100 |
commit | 4ac9fa081a7c045f6a9f1cfc529d82423f485b2e (patch) | |
tree | af68743f2f4a47d13f2b0eb05f5c4aaf86d8ea37 /includes/parser/Preprocessor_DOM.php | |
parent | af4da56f1ad4d3ef7b06557bae365da2ea27a897 (diff) |
Update to MediaWiki 1.22.0
Diffstat (limited to 'includes/parser/Preprocessor_DOM.php')
-rw-r--r-- | includes/parser/Preprocessor_DOM.php | 99 |
1 files changed, 68 insertions, 31 deletions
diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index d0c57ab5..3138f483 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -72,9 +72,8 @@ class Preprocessor_DOM implements Preprocessor { $xml = "<list>"; foreach ( $values as $k => $val ) { - if ( is_int( $k ) ) { - $xml .= "<part><name index=\"$k\"/><value>" . htmlspecialchars( $val ) ."</value></part>"; + $xml .= "<part><name index=\"$k\"/><value>" . htmlspecialchars( $val ) . "</value></part>"; } else { $xml .= "<part><name>" . htmlspecialchars( $k ) . "</name>=<value>" . htmlspecialchars( $val ) . "</value></part>"; } @@ -149,26 +148,28 @@ class Preprocessor_DOM implements Preprocessor { wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" ); } } - } - if ( $xml === false ) { - if ( $cacheable ) { + if ( $xml === false ) { wfProfileIn( __METHOD__ . '-cache-miss' ); $xml = $this->preprocessToXml( $text, $flags ); $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml; $wgMemc->set( $cacheKey, $cacheValue, 86400 ); wfProfileOut( __METHOD__ . '-cache-miss' ); wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" ); - } else { - $xml = $this->preprocessToXml( $text, $flags ); } - + } else { + $xml = $this->preprocessToXml( $text, $flags ); } + // Fail if the number of elements exceeds acceptable limits // Do not attempt to generate the DOM $this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' ); $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount(); if ( $this->parser->mGeneratedPPNodeCount > $max ) { + if ( $cacheable ) { + wfProfileOut( __METHOD__ . '-cacheable' ); + } + wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__ . ': generated node count limit exceeded' ); } @@ -182,16 +183,21 @@ class Preprocessor_DOM implements Preprocessor { $xml = UtfNormal::cleanUp( $xml ); // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep $result = $dom->loadXML( $xml, 1 << 19 ); - if ( !$result ) { - throw new MWException( __METHOD__ . ' generated invalid XML' ); - } } - $obj = new PPNode_DOM( $dom->documentElement ); + if ( $result ) { + $obj = new PPNode_DOM( $dom->documentElement ); + } wfProfileOut( __METHOD__ . '-loadXML' ); + if ( $cacheable ) { wfProfileOut( __METHOD__ . '-cacheable' ); } + wfProfileOut( __METHOD__ ); + + if ( !$result ) { + throw new MWException( __METHOD__ . ' generated invalid XML' ); + } return $obj; } @@ -355,9 +361,11 @@ class Preprocessor_DOM implements Preprocessor { } // Handle comments if ( isset( $matches[2] ) && $matches[2] == '!--' ) { - // To avoid leaving blank lines, when a comment is both preceded - // and followed by a newline (ignoring spaces), trim leading and - // trailing spaces and one of the newlines. + + // To avoid leaving blank lines, when a sequence of + // space-separated comments is both preceded and followed by + // a newline (ignoring spaces), then + // trim leading and trailing spaces and the trailing newline. // Find the end $endPos = strpos( $text, '-->', $i + 4 ); @@ -368,10 +376,25 @@ class Preprocessor_DOM implements Preprocessor { $i = $lengthText; } else { // Search backwards for leading whitespace - $wsStart = $i ? ( $i - strspn( $revText, ' ', $lengthText - $i ) ) : 0; + $wsStart = $i ? ( $i - strspn( $revText, " \t", $lengthText - $i ) ) : 0; + // Search forwards for trailing whitespace // $wsEnd will be the position of the last space (or the '>' if there's none) - $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 ); + $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 ); + + // Keep looking forward as long as we're finding more + // comments. + $comments = array( array( $wsStart, $wsEnd ) ); + while ( substr( $text, $wsEnd + 1, 4 ) == '<!--' ) { + $c = strpos( $text, '-->', $wsEnd + 4 ); + if ( $c === false ) { + break; + } + $c = $c + 2 + strspn( $text, " \t", $c + 3 ); + $comments[] = array( $wsEnd + 1, $c ); + $wsEnd = $c; + } + // Eat the line if possible // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but @@ -379,14 +402,26 @@ class Preprocessor_DOM implements Preprocessor { if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n" && substr( $text, $wsEnd + 1, 1 ) == "\n" ) { - $startPos = $wsStart; - $endPos = $wsEnd + 1; // Remove leading whitespace from the end of the accumulator // Sanity check first though $wsLength = $i - $wsStart; - if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) { + if ( $wsLength > 0 + && strspn( $accum, " \t", -$wsLength ) === $wsLength ) + { $accum = substr( $accum, 0, -$wsLength ); } + + // Dump all but the last comment to the accumulator + foreach ( $comments as $j => $com ) { + $startPos = $com[0]; + $endPos = $com[1] + 1; + if ( $j == ( count( $comments ) - 1 ) ) { + break; + } + $inner = substr( $text, $startPos, $endPos - $startPos ); + $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>'; + } + // Do a line-start run next time to look for headings after the comment $fakeLineStart = true; } else { @@ -397,7 +432,7 @@ class Preprocessor_DOM implements Preprocessor { if ( $stack->top ) { $part = $stack->top->getCurrentPart(); - if ( !(isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) { + if ( !( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) ) { $part->visualEnd = $wsStart; } // Else comments abutting, no change in visual end @@ -432,7 +467,7 @@ class Preprocessor_DOM implements Preprocessor { } $tagStartPos = $i; - if ( $text[$tagEndPos-1] == '/' ) { + if ( $text[$tagEndPos - 1] == '/' ) { $attrEnd = $tagEndPos - 1; $inner = null; $i = $tagEndPos + 1; @@ -569,7 +604,7 @@ class Preprocessor_DOM implements Preprocessor { 'open' => $curChar, 'close' => $rule['end'], 'count' => $count, - 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), + 'lineStart' => ( $i > 0 && $text[$i - 1] == "\n" ), ); $stack->push( $piece ); @@ -746,7 +781,7 @@ class PPDStack { $class = $this->elementClass; $this->stack[] = new $class( $data ); } - $this->top = $this->stack[ count( $this->stack ) - 1 ]; + $this->top = $this->stack[count( $this->stack ) - 1]; $this->accum =& $this->top->getAccum(); } @@ -757,7 +792,7 @@ class PPDStack { $temp = array_pop( $this->stack ); if ( count( $this->stack ) ) { - $this->top = $this->stack[ count( $this->stack ) - 1 ]; + $this->top = $this->stack[count( $this->stack ) - 1]; $this->accum =& $this->top->getAccum(); } else { $this->top = self::$false; @@ -1014,11 +1049,13 @@ class PPFrame_DOM implements PPFrame { while ( count( $iteratorStack ) > 1 ) { $level = count( $outStack ) - 1; - $iteratorNode =& $iteratorStack[ $level ]; + $iteratorNode =& $iteratorStack[$level]; $out =& $outStack[$level]; $index =& $indexStack[$level]; - if ( $iteratorNode instanceof PPNode_DOM ) $iteratorNode = $iteratorNode->node; + if ( $iteratorNode instanceof PPNode_DOM ) { + $iteratorNode = $iteratorNode->node; + } if ( is_array( $iteratorNode ) ) { if ( $index >= count( $iteratorNode ) ) { @@ -1148,9 +1185,7 @@ class PPFrame_DOM implements PPFrame { # Insert a heading marker only for <h> children of <root> # This is to stop extractSections from going over multiple tree levels - if ( $contextNode->parentNode->nodeName == 'root' - && $this->parser->ot['html'] ) - { + if ( $contextNode->parentNode->nodeName == 'root' && $this->parser->ot['html'] ) { # Insert heading index marker $headingIndex = $contextNode->getAttribute( 'i' ); $titleText = $this->title->getPrefixedDBkey(); @@ -1206,7 +1241,9 @@ class PPFrame_DOM implements PPFrame { $first = true; $s = ''; foreach ( $args as $root ) { - if ( $root instanceof PPNode_DOM ) $root = $root->node; + if ( $root instanceof PPNode_DOM ) { + $root = $root->node; + } if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { $root = array( $root ); } |