diff options
Diffstat (limited to 'includes/libs/IEUrlExtension.php')
-rw-r--r-- | includes/libs/IEUrlExtension.php | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/includes/libs/IEUrlExtension.php b/includes/libs/IEUrlExtension.php new file mode 100644 index 00000000..100454d4 --- /dev/null +++ b/includes/libs/IEUrlExtension.php @@ -0,0 +1,247 @@ +<?php + +/** + * Internet Explorer derives a cache filename from a URL, and then in certain + * circumstances, uses the extension of the resulting file to determine the + * content type of the data, ignoring the Content-Type header. + * + * This can be a problem, especially when non-HTML content is sent by MediaWiki, + * and Internet Explorer interprets it as HTML, exposing an XSS vulnerability. + * + * Usually the script filename (e.g. api.php) is present in the URL, and this + * makes Internet Explorer think the extension is a harmless script extension. + * But Internet Explorer 6 and earlier allows the script extension to be + * obscured by encoding the dot as "%2E". + * + * This class contains functions which help in detecting and dealing with this + * situation. + * + * Checking the URL for a bad extension is somewhat complicated due to the fact + * that CGI doesn't provide a standard method to determine the URL. Instead it + * is necessary to pass a subset of $_SERVER variables, which we then attempt + * to use to guess parts of the URL. + */ +class IEUrlExtension { + /** + * Check a subset of $_SERVER (or the whole of $_SERVER if you like) + * to see if it indicates that the request was sent with a bad file + * extension. Returns true if the request should be denied or modified, + * false otherwise. The relevant $_SERVER elements are: + * + * - SERVER_SOFTWARE + * - REQUEST_URI + * - QUERY_STRING + * - PATH_INFO + * + * If the a variable is unset in $_SERVER, it should be unset in $vars. + * + * @param $vars A subset of $_SERVER. + * @param $extWhitelist Extensions which are allowed, assumed harmless. + */ + public static function areServerVarsBad( $vars, $extWhitelist = array() ) { + // Check QUERY_STRING or REQUEST_URI + if ( isset( $vars['SERVER_SOFTWARE'] ) + && isset( $vars['REQUEST_URI'] ) + && self::haveUndecodedRequestUri( $vars['SERVER_SOFTWARE'] ) ) + { + $urlPart = $vars['REQUEST_URI']; + } elseif ( isset( $vars['QUERY_STRING'] ) ) { + $urlPart = $vars['QUERY_STRING']; + } else { + $urlPart = ''; + } + + if ( self::isUrlExtensionBad( $urlPart, $extWhitelist ) ) { + return true; + } + + // Some servers have PATH_INFO but not REQUEST_URI, so we check both + // to be on the safe side. + if ( isset( $vars['PATH_INFO'] ) + && self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist ) ) + { + return true; + } + + // All checks passed + return false; + } + + /** + * Given a right-hand portion of a URL, determine whether IE would detect + * a potentially harmful file extension. + * + * @param $urlPart The right-hand portion of a URL + * @param $extWhitelist An array of file extensions which may occur in this + * URL, and which should be allowed. + * @return bool + */ + public static function isUrlExtensionBad( $urlPart, $extWhitelist = array() ) { + if ( strval( $urlPart ) === '' ) { + return false; + } + + $extension = self::findIE6Extension( $urlPart ); + if ( strval( $extension ) === '' ) { + // No extension or empty extension + return false; + } + + if ( in_array( $extension, array( 'php', 'php5' ) ) ) { + // Script extension, OK + return false; + } + if ( in_array( $extension, $extWhitelist ) ) { + // Whitelisted extension + return false; + } + + if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) { + // Non-alphanumeric extension, unlikely to be registered. + // + // The regex above is known to match all registered file extensions + // in a default Windows XP installation. It's important to allow + // extensions with ampersands and percent signs, since that reduces + // the number of false positives substantially. + return false; + } + + // Possibly bad extension + return true; + } + + /** + * Returns a variant of $url which will pass isUrlExtensionBad() but has the + * same GET parameters, or false if it can't figure one out. + */ + public static function fixUrlForIE6( $url, $extWhitelist = array() ) { + $questionPos = strpos( $url, '?' ); + if ( $questionPos === false ) { + $beforeQuery = $url . '?'; + $query = ''; + } elseif ( $questionPos === strlen( $url ) - 1 ) { + $beforeQuery = $url; + $query = ''; + } else { + $beforeQuery = substr( $url, 0, $questionPos + 1 ); + $query = substr( $url, $questionPos + 1 ); + } + + // Multiple question marks cause problems. Encode the second and + // subsequent question mark. + $query = str_replace( '?', '%3E', $query ); + // Append an invalid path character so that IE6 won't see the end of the + // query string as an extension + $query .= '&*'; + // Put the URL back together + $url = $beforeQuery . $query; + if ( self::isUrlExtensionBad( $url, $extWhitelist ) ) { + // Avoid a redirect loop + return false; + } + return $url; + } + + /** + * Determine what extension IE6 will infer from a certain query string. + * If the URL has an extension before the question mark, IE6 will use + * that and ignore the query string, but per the comment at + * isPathInfoBad() we don't have a reliable way to determine the URL, + * so isPathInfoBad() just passes in the query string for $url. + * All entry points have safe extensions (php, php5) anyway, so + * checking the query string is possibly overly paranoid but never + * insecure. + * + * The criteria for finding an extension are as follows: + * - a possible extension is a dot followed by one or more characters not + * in <>\"/:|?.# + * - if we find a possible extension followed by the end of the string or + * a #, that's our extension + * - if we find a possible extension followed by a ?, that's our extension + * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue + * searching for another possible extension + * - if we find a possible extension followed by a dot or another illegal + * character, we ignore it and continue searching + * + * @param $url string URL + * @return mixed Detected extension (string), or false if none found + */ + public static function findIE6Extension( $url ) { + $pos = 0; + $hashPos = strpos( $url, '#' ); + if ( $hashPos !== false ) { + $urlLength = $hashPos; + } else { + $urlLength = strlen( $url ); + } + $remainingLength = $urlLength; + while ( $remainingLength > 0 ) { + // Skip ahead to the next dot + $pos += strcspn( $url, '.', $pos, $remainingLength ); + if ( $pos >= $urlLength ) { + // End of string, we're done + return false; + } + + // We found a dot. Skip past it + $pos++; + $remainingLength = $urlLength - $pos; + + // Check for illegal characters in our prospective extension, + // or for another dot + $nextPos = $pos + strcspn( $url, "<>\\\"/:|?*.", $pos, $remainingLength ); + if ( $nextPos >= $urlLength ) { + // No illegal character or next dot + // We have our extension + return substr( $url, $pos, $urlLength - $pos ); + } + if ( $url[$nextPos] === '?' ) { + // We've found a legal extension followed by a question mark + // If the extension is NOT exe, dll or cgi, return it + $extension = substr( $url, $pos, $nextPos - $pos ); + if ( strcasecmp( $extension, 'exe' ) && strcasecmp( $extension, 'dll' ) && + strcasecmp( $extension, 'cgi' ) ) + { + return $extension; + } + // Else continue looking + } + // We found an illegal character or another dot + // Skip to that character and continue the loop + $pos = $nextPos + 1; + $remainingLength = $urlLength - $pos; + } + return false; + } + + /** + * When passed the value of $_SERVER['SERVER_SOFTWARE'], this function + * returns true if that server is known to have a REQUEST_URI variable + * with %2E not decoded to ".". On such a server, it is possible to detect + * whether the script filename has been obscured. + * + * The function returns false if the server is not known to have this + * behaviour. Microsoft IIS in particular is known to decode escaped script + * filenames. + * + * SERVER_SOFTWARE typically contains either a plain string such as "Zeus", + * or a specification in the style of a User-Agent header, such as + * "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2" + * + * @param $serverSoftware + * @return bool + * + */ + public static function haveUndecodedRequestUri( $serverSoftware ) { + static $whitelist = array( + 'Apache', + 'Zeus', + 'LiteSpeed' ); + if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) { + return in_array( $m[1], $whitelist ); + } else { + return false; + } + } + +} |