From 124299758ca7454561118f466a0470905758924f Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Wed, 13 Apr 2011 05:42:02 +0200 Subject: update to MediaWiki 1.16.3 --- includes/Sanitizer.php | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) (limited to 'includes/Sanitizer.php') diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index b5725ce8..26837b3c 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -726,29 +726,35 @@ class Sanitizer { /** * Pick apart some CSS and check it for forbidden or unsafe structures. - * Returns a sanitized string, or false if it was just too evil. + * Returns a sanitized string. This sanitized string will have + * character references and escape sequences decoded, and comments + * stripped. If the input is just too evil, only a comment complaining + * about evilness will be returned. * * Currently URL references, 'expression', 'tps' are forbidden. * + * NOTE: Despite the fact that character references are decoded, the + * returned string may contain character references given certain + * clever input strings. These character references must + * be escaped before the return value is embedded in HTML. + * * @param $value String - * @return Mixed + * @return String */ static function checkCss( $value ) { + // Decode character references like { $value = Sanitizer::decodeCharReferences( $value ); - // Remove any comments; IE gets token splitting wrong - $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value ); - - // Remove anything after a comment-start token, to guard against - // incorrect client implementations. - $commentPos = strpos( $value, '/*' ); - if ( $commentPos !== false ) { - $value = substr( $value, 0, $commentPos ); - } - // Decode escape sequences and line continuation // See the grammar in the CSS 2 spec, appendix D. - static $decodeRegex, $reencodeTable; + // This has to be done AFTER decoding character references. + // This means it isn't possible for this function to return + // unsanitized escape sequences. It is possible to manufacture + // input that contains character references that decode to + // escape sequences that decode to character references, but + // it's OK for the return value to contain character references + // because the caller is supposed to escape those anyway. + static $decodeRegex; if ( !$decodeRegex ) { $space = '[\\x20\\t\\r\\n\\f]'; $nl = '(?:\\n|\\r\\n|\\r|\\f)'; @@ -763,6 +769,21 @@ class Sanitizer { } $value = preg_replace_callback( $decodeRegex, array( __CLASS__, 'cssDecodeCallback' ), $value ); + + // Remove any comments; IE gets token splitting wrong + // This must be done AFTER decoding character references and + // escape sequences, because those steps can introduce comments + // This step cannot introduce character references or escape + // sequences, because it replaces comments with spaces rather + // than removing them completely. + $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value ); + + // Remove anything after a comment-start token, to guard against + // incorrect client implementations. + $commentPos = strpos( $value, '/*' ); + if ( $commentPos !== false ) { + $value = substr( $value, 0, $commentPos ); + } // Reject problematic keywords and control characters if ( preg_match( '/[\000-\010\016-\037\177]/', $value ) ) { -- cgit v1.2.3-54-g00ecf