diff options
Diffstat (limited to 'includes/parser/Tidy.php')
-rw-r--r-- | includes/parser/Tidy.php | 58 |
1 files changed, 35 insertions, 23 deletions
diff --git a/includes/parser/Tidy.php b/includes/parser/Tidy.php index ed2d436d..0f7e0d31 100644 --- a/includes/parser/Tidy.php +++ b/includes/parser/Tidy.php @@ -59,12 +59,18 @@ class MWTidyWrapper { dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); $this->mMarkerIndex = 0; + // Replace <mw:editsection> elements with placeholders $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX, array( &$this, 'replaceEditSectionLinksCallback' ), $text ); - $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'. - ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'. - '<head><title>test</title></head><body>'.$wrappedtext.'</body></html>'; + // Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so + // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config + $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext ); + + // Wrap the whole thing in a doctype and body for Tidy. + $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' . + ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>' . + '<head><title>test</title></head><body>' . $wrappedtext . '</body></html>'; return $wrappedtext; } @@ -86,7 +92,13 @@ class MWTidyWrapper { * @return string */ public function postprocess( $text ) { - return $this->mTokens->replace( $text ); + // Revert <html-{link,meta}> back to <{link,meta}> + $text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text ); + + // Restore the contents of placeholder tokens + $text = $this->mTokens->replace( $text ); + + return $text; } } @@ -106,7 +118,7 @@ class MWTidy { * If tidy isn't able to correct the markup, the original will be * returned in all its glory with a warning comment appended. * - * @param $text String: hideous HTML input + * @param string $text hideous HTML input * @return String: corrected HTML output */ public static function tidy( $text ) { @@ -159,7 +171,7 @@ class MWTidy { * Spawn an external HTML tidy process and get corrected markup back from it. * Also called in OutputHandler.php for full page validation * - * @param $text String: HTML to check + * @param string $text HTML to check * @param $stderr Boolean: Whether to read result from STDERR rather than STDOUT * @param &$retval int Exit code (-1 on internal error) * @return mixed String or null @@ -223,7 +235,7 @@ class MWTidy { * Use the HTML tidy extension to use the tidy library in-process, * saving the overhead of spawning a new process. * - * @param $text String: HTML to check + * @param string $text HTML to check * @param $stderr Boolean: Whether to read result from error status instead of output * @param &$retval int Exit code (-1 on internal error) * @return mixed String or null @@ -248,24 +260,24 @@ class MWTidy { wfProfileOut( __METHOD__ ); return $tidy->errorBuffer; + } + + $tidy->cleanRepair(); + $retval = $tidy->getStatus(); + if ( $retval == 2 ) { + // 2 is magic number for fatal error + // http://www.php.net/manual/en/function.tidy-get-status.php + $cleansource = null; } else { - $tidy->cleanRepair(); - $retval = $tidy->getStatus(); - if ( $retval == 2 ) { - // 2 is magic number for fatal error - // http://www.php.net/manual/en/function.tidy-get-status.php - $cleansource = null; - } else { - $cleansource = tidy_get_output( $tidy ); - if ( $wgDebugTidy && $retval > 0 ) { - $cleansource .= "<!--\nTidy reports:\n" . - str_replace( '-->', '-->', $tidy->errorBuffer ) . - "\n-->"; - } + $cleansource = tidy_get_output( $tidy ); + if ( $wgDebugTidy && $retval > 0 ) { + $cleansource .= "<!--\nTidy reports:\n" . + str_replace( '-->', '-->', $tidy->errorBuffer ) . + "\n-->"; } - - wfProfileOut( __METHOD__ ); - return $cleansource; } + + wfProfileOut( __METHOD__ ); + return $cleansource; } } |