diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2012-05-03 13:01:35 +0200 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2012-05-03 13:01:35 +0200 |
commit | d9022f63880ce039446fba8364f68e656b7bf4cb (patch) | |
tree | 16b40fbf17bf7c9ee6f4ead25b16dd192378050a /includes/libs | |
parent | 27cf83d177256813e2e802241085fce5dd0f3fb9 (diff) |
Update to MediaWiki 1.19.0
Diffstat (limited to 'includes/libs')
-rw-r--r-- | includes/libs/CSSJanus.php | 32 | ||||
-rw-r--r-- | includes/libs/CSSMin.php | 12 | ||||
-rw-r--r-- | includes/libs/IEContentAnalyzer.php | 79 | ||||
-rw-r--r-- | includes/libs/IEUrlExtension.php | 58 | ||||
-rw-r--r-- | includes/libs/JavaScriptMinifier.php | 49 | ||||
-rw-r--r-- | includes/libs/jsminplus.php | 223 |
6 files changed, 286 insertions, 167 deletions
diff --git a/includes/libs/CSSJanus.php b/includes/libs/CSSJanus.php index aa04bc49..c8fc296b 100644 --- a/includes/libs/CSSJanus.php +++ b/includes/libs/CSSJanus.php @@ -22,7 +22,9 @@ * written for LTR to RTL. * * The original Python version of CSSJanus is Copyright 2008 by Google Inc. and - * is distributed under the Apache license. + * is distributed under the Apache license. This PHP port is Copyright 2010 by + * Roan Kattouw and is dual-licensed under the GPL (as in the comment above) and + * the Apache (as in the original code) licenses. * * Original code: http://code.google.com/p/cssjanus/source/browse/trunk/cssjanus.py * License of original code: http://code.google.com/p/cssjanus/source/browse/trunk/LICENSE @@ -111,8 +113,8 @@ class CSSJanus { $patterns['four_notation_color'] = "/(-color\s*:\s*){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}/i"; // The two regexes below are parenthesized differently then in the original implementation to make the // callback's job more straightforward - $patterns['bg_horizontal_percentage'] = "/(background(?:-position)?\s*:\s*[^%]*?)({$patterns['num']})(%\s*(?:{$patterns['quantity']}|{$patterns['ident']}))/"; - $patterns['bg_horizontal_percentage_x'] = "/(background-position-x\s*:\s*)({$patterns['num']})(%)/"; + $patterns['bg_horizontal_percentage'] = "/(background(?:-position)?\s*:\s*[^%]*?)(-?{$patterns['num']})(%\s*(?:{$patterns['quantity']}|{$patterns['ident']}))/"; + $patterns['bg_horizontal_percentage_x'] = "/(background-position-x\s*:\s*)(-?{$patterns['num']})(%)/"; } /** @@ -173,6 +175,8 @@ class CSSJanus { * * See http://code.google.com/p/cssjanus/issues/detail?id=15 and * TODO: URL + * @param $css string + * @return string */ private static function fixDirection( $css ) { $css = preg_replace( self::$patterns['direction_ltr'], @@ -185,6 +189,8 @@ class CSSJanus { /** * Replace 'ltr' with 'rtl' and vice versa in background URLs + * @param $css string + * @return string */ private static function fixLtrRtlInURL( $css ) { $css = preg_replace( self::$patterns['ltr_in_url'], self::$patterns['tmpToken'], $css ); @@ -196,6 +202,8 @@ class CSSJanus { /** * Replace 'left' with 'right' and vice versa in background URLs + * @param $css string + * @return string */ private static function fixLeftRightInURL( $css ) { $css = preg_replace( self::$patterns['left_in_url'], self::$patterns['tmpToken'], $css ); @@ -207,6 +215,8 @@ class CSSJanus { /** * Flip rules like left: , padding-right: , etc. + * @param $css string + * @return string */ private static function fixLeftAndRight( $css ) { $css = preg_replace( self::$patterns['left'], self::$patterns['tmpToken'], $css ); @@ -218,6 +228,8 @@ class CSSJanus { /** * Flip East and West in rules like cursor: nw-resize; + * @param $css string + * @return string */ private static function fixCursorProperties( $css ) { $css = preg_replace( self::$patterns['cursor_east'], @@ -237,6 +249,8 @@ class CSSJanus { * and four-part color rules with multiple whitespace characters between * colors are not recognized. * See http://code.google.com/p/cssjanus/issues/detail?id=16 + * @param $css string + * @return string */ private static function fixFourPartNotation( $css ) { $css = preg_replace( self::$patterns['four_notation_quantity'], '$1$2$7$4$5$6$3', $css ); @@ -247,6 +261,8 @@ class CSSJanus { /** * Flip horizontal background percentages. + * @param $css string + * @return string */ private static function fixBackgroundPosition( $css ) { $css = preg_replace_callback( self::$patterns['bg_horizontal_percentage'], @@ -259,6 +275,8 @@ class CSSJanus { /** * Callback for calculateNewBackgroundPosition() + * @param $matches array + * @return string */ private static function calculateNewBackgroundPosition( $matches ) { return $matches[1] . ( 100 - $matches[2] ) . $matches[3]; @@ -295,6 +313,10 @@ class CSSJanus_Tokenizer { return preg_replace_callback( $this->regex, array( $this, 'tokenizeCallback' ), $str ); } + /** + * @param $matches array + * @return string + */ private function tokenizeCallback( $matches ) { $this->originals[] = $matches[0]; return $this->token; @@ -314,6 +336,10 @@ class CSSJanus_Tokenizer { array( $this, 'detokenizeCallback' ), $str ); } + /** + * @param $matches + * @return mixed + */ private function detokenizeCallback( $matches ) { $retval = current( $this->originals ); next( $this->originals ); diff --git a/includes/libs/CSSMin.php b/includes/libs/CSSMin.php index 4012b695..4f4b28bb 100644 --- a/includes/libs/CSSMin.php +++ b/includes/libs/CSSMin.php @@ -1,5 +1,5 @@ <?php -/* +/** * Copyright 2010 Wikimedia Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -79,6 +79,10 @@ class CSSMin { return $files; } + /** + * @param $file string + * @return bool|string + */ protected static function getMimeType( $file ) { $realpath = realpath( $file ); // Try a couple of different ways to get the mime-type of a file, in order of @@ -112,10 +116,10 @@ class CSSMin { * @param $source string CSS data to remap * @param $local string File path where the source was read from * @param $remote string URL path to the file - * @param $embed ??? + * @param $embedData bool If false, never do any data URI embedding, even if / * @embed * / is found * @return string Remapped CSS data */ - public static function remap( $source, $local, $remote, $embed = true ) { + public static function remap( $source, $local, $remote, $embedData = true ) { $pattern = '/((?P<embed>\s*\/\*\s*\@embed\s*\*\/)(?P<pre>[^\;\}]*))?' . self::URL_REGEX . '(?P<post>[^;]*)[\;]?/'; $offset = 0; @@ -162,7 +166,7 @@ class CSSMin { // using Z for the timezone, meaning GMT $url .= '?' . gmdate( 'Y-m-d\TH:i:s\Z', round( filemtime( $file ), -2 ) ); // Embedding requires a bit of extra processing, so let's skip that if we can - if ( $embed ) { + if ( $embedData && $embed ) { $type = self::getMimeType( $file ); // Detect when URLs were preceeded with embed tags, and also verify file size is // below the limit diff --git a/includes/libs/IEContentAnalyzer.php b/includes/libs/IEContentAnalyzer.php index a2ef1a09..01e72e68 100644 --- a/includes/libs/IEContentAnalyzer.php +++ b/includes/libs/IEContentAnalyzer.php @@ -1,19 +1,19 @@ <?php /** - * This class simulates Microsoft Internet Explorer's terribly broken and + * This class simulates Microsoft Internet Explorer's terribly broken and * insecure MIME type detection algorithm. It can be used to check web uploads - * with an apparently safe type, to see if IE will reinterpret them to produce + * with an apparently safe type, to see if IE will reinterpret them to produce * something dangerous. * - * It is full of bugs and strange design choices should not under any - * circumstances be used to determine a MIME type to present to a user or + * It is full of bugs and strange design choices should not under any + * circumstances be used to determine a MIME type to present to a user or * client. (Apple Safari developers, this means you too.) * - * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have - * attempted to ensure that this code works in exactly the same way as Internet - * Explorer, it does not share any source code, or creative choices such as - * variable names, thus I (Tim Starling) claim copyright on it. + * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have + * attempted to ensure that this code works in exactly the same way as Internet + * Explorer, it does not share any source code, or creative choices such as + * variable names, thus I (Tim Starling) claim copyright on it. * * It may be redistributed without restriction. To aid reuse, this class does * not depend on any MediaWiki module. @@ -24,8 +24,8 @@ class IEContentAnalyzer { */ protected $baseTypeTable = array( 'ambiguous' /*1*/ => array( - 'text/plain', - 'application/octet-stream', + 'text/plain', + 'application/octet-stream', 'application/x-netcdf', // [sic] ), 'text' /*3*/ => array( @@ -34,8 +34,8 @@ class IEContentAnalyzer { ), 'binary' /*4*/ => array( 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif', - 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp', - 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi', + 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp', + 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi', 'video/x-msvideo', 'video/mpeg', 'application/x-compressed', 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java', 'application/x-msdownload' @@ -293,21 +293,21 @@ class IEContentAnalyzer { '.xsl' => 'text/xml', ); - /** - * IE versions which have been analysed to bring you this class, and for - * which some substantive difference exists. These will appear as keys + /** + * IE versions which have been analysed to bring you this class, and for + * which some substantive difference exists. These will appear as keys * in the return value of getRealMimesFromData(). The names are chosen to sort correctly. */ protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' ); /** - * Type table with versions expanded + * Type table with versions expanded */ protected $typeTable = array(); /** constructor */ function __construct() { - // Construct versioned type arrays from the base type array plus additions + // Construct versioned type arrays from the base type array plus additions $types = $this->baseTypeTable; foreach ( $this->versions as $version ) { if ( isset( $this->addedTypes[$version] ) ) { @@ -320,7 +320,7 @@ class IEContentAnalyzer { } /** - * Get the MIME types from getMimesFromData(), but convert the result from IE's + * Get the MIME types from getMimesFromData(), but convert the result from IE's * idiosyncratic private types into something other apps will understand. * * @param $fileName String: the file name (unused at present) @@ -338,6 +338,8 @@ class IEContentAnalyzer { /** * Translate a MIME type from IE's idiosyncratic private types into * more commonly understood type strings + * @param $type + * @return string */ public function translateMimeType( $type ) { static $table = array( @@ -375,6 +377,11 @@ class IEContentAnalyzer { /** * Get the MIME type for a given named version + * @param $version + * @param $fileName + * @param $chunk + * @param $proposed + * @return bool|string */ protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) { // Strip text after a semicolon @@ -397,8 +404,8 @@ class IEContentAnalyzer { // Truncate chunk at 255 bytes $chunk = substr( $chunk, 0, 255 ); - // IE does the Check*Headers() calls last, and instead does the following image - // type checks by directly looking for the magic numbers. What I do here should + // IE does the Check*Headers() calls last, and instead does the following image + // type checks by directly looking for the magic numbers. What I do here should // have the same effect since the magic number checks are identical in both cases. $result = $this->sampleData( $version, $chunk ); $sampleFound = $result['found']; @@ -413,7 +420,7 @@ class IEContentAnalyzer { return 'image/gif'; } if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' ) - && $binaryType == 'image/pjpeg' ) + && $binaryType == 'image/pjpeg' ) { return $proposed; } @@ -430,7 +437,7 @@ class IEContentAnalyzer { return 'application/x-cdf'; } - // RSS and Atom were added in IE 7 so they won't be in $sampleFound for + // RSS and Atom were added in IE 7 so they won't be in $sampleFound for // previous versions if ( isset( $sampleFound['rss'] ) ) { return 'application/rss+xml'; @@ -483,8 +490,8 @@ class IEContentAnalyzer { // Freaky heuristics to determine if the data is text or binary // The heuristic is of course broken for non-ASCII text - if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] ) - < ( $counters['ctrl'] + $counters['high'] ) * 16 ) + if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] ) + < ( $counters['ctrl'] + $counters['high'] ) * 16 ) { $kindOfBinary = true; $type = $binaryType ? $binaryType : $textType; @@ -529,8 +536,8 @@ class IEContentAnalyzer { return $this->registry[$ext]; } - // TODO: If the extension has an application registered to it, IE will return - // application/octet-stream. We'll skip that, so we could erroneously + // TODO: If the extension has an application registered to it, IE will return + // application/octet-stream. We'll skip that, so we could erroneously // return text/plain or application/x-netcdf where application/octet-stream // would be correct. @@ -540,6 +547,9 @@ class IEContentAnalyzer { /** * Check for text headers at the start of the chunk * Confirmed same in 5 and 7. + * @param $version + * @param $chunk + * @return bool|string */ private function checkTextHeaders( $version, $chunk ) { $chunk2 = substr( $chunk, 0, 2 ); @@ -563,6 +573,9 @@ class IEContentAnalyzer { /** * Check for binary headers at the start of the chunk * Confirmed same in 5 and 7. + * @param $version + * @param $chunk + * @return bool|string */ private function checkBinaryHeaders( $version, $chunk ) { $chunk2 = substr( $chunk, 0, 2 ); @@ -578,13 +591,13 @@ class IEContentAnalyzer { return 'image/pjpeg'; // actually plain JPEG but this is what IE returns } - if ( $chunk2 == 'BM' + if ( $chunk2 == 'BM' && substr( $chunk, 6, 2 ) == "\000\000" && substr( $chunk, 8, 2 ) == "\000\000" ) { return 'image/bmp'; // another non-standard MIME } - if ( $chunk4 == 'RIFF' + if ( $chunk4 == 'RIFF' && substr( $chunk, 8, 4 ) == 'WAVE' ) { return 'audio/wav'; @@ -661,6 +674,9 @@ class IEContentAnalyzer { /** * Do heuristic checks on the bulk of the data sample. * Search for HTML tags. + * @param $version + * @param $chunk + * @return array */ protected function sampleData( $version, $chunk ) { $found = array(); @@ -774,7 +790,7 @@ class IEContentAnalyzer { } if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) { - if ( isset( $found['rdf-tag'] ) + if ( isset( $found['rdf-tag'] ) && isset( $found['rdf-url'] ) ) // [sic] { break; @@ -808,6 +824,11 @@ class IEContentAnalyzer { return array( 'found' => $found, 'counters' => $counters ); } + /** + * @param $version + * @param $type + * @return int|string + */ protected function getDataFormat( $version, $type ) { $types = $this->typeTable[$version]; if ( $type == '(null)' || strval( $type ) === '' ) { diff --git a/includes/libs/IEUrlExtension.php b/includes/libs/IEUrlExtension.php index 100454d4..e00e6663 100644 --- a/includes/libs/IEUrlExtension.php +++ b/includes/libs/IEUrlExtension.php @@ -1,31 +1,31 @@ <?php /** - * Internet Explorer derives a cache filename from a URL, and then in certain - * circumstances, uses the extension of the resulting file to determine the - * content type of the data, ignoring the Content-Type header. + * Internet Explorer derives a cache filename from a URL, and then in certain + * circumstances, uses the extension of the resulting file to determine the + * content type of the data, ignoring the Content-Type header. * * This can be a problem, especially when non-HTML content is sent by MediaWiki, * and Internet Explorer interprets it as HTML, exposing an XSS vulnerability. * - * Usually the script filename (e.g. api.php) is present in the URL, and this + * Usually the script filename (e.g. api.php) is present in the URL, and this * makes Internet Explorer think the extension is a harmless script extension. - * But Internet Explorer 6 and earlier allows the script extension to be - * obscured by encoding the dot as "%2E". + * But Internet Explorer 6 and earlier allows the script extension to be + * obscured by encoding the dot as "%2E". * - * This class contains functions which help in detecting and dealing with this + * This class contains functions which help in detecting and dealing with this * situation. * - * Checking the URL for a bad extension is somewhat complicated due to the fact + * Checking the URL for a bad extension is somewhat complicated due to the fact * that CGI doesn't provide a standard method to determine the URL. Instead it - * is necessary to pass a subset of $_SERVER variables, which we then attempt + * is necessary to pass a subset of $_SERVER variables, which we then attempt * to use to guess parts of the URL. */ class IEUrlExtension { /** * Check a subset of $_SERVER (or the whole of $_SERVER if you like) - * to see if it indicates that the request was sent with a bad file - * extension. Returns true if the request should be denied or modified, + * to see if it indicates that the request was sent with a bad file + * extension. Returns true if the request should be denied or modified, * false otherwise. The relevant $_SERVER elements are: * * - SERVER_SOFTWARE @@ -37,6 +37,7 @@ class IEUrlExtension { * * @param $vars A subset of $_SERVER. * @param $extWhitelist Extensions which are allowed, assumed harmless. + * @return bool */ public static function areServerVarsBad( $vars, $extWhitelist = array() ) { // Check QUERY_STRING or REQUEST_URI @@ -55,7 +56,7 @@ class IEUrlExtension { return true; } - // Some servers have PATH_INFO but not REQUEST_URI, so we check both + // Some servers have PATH_INFO but not REQUEST_URI, so we check both // to be on the safe side. if ( isset( $vars['PATH_INFO'] ) && self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist ) ) @@ -71,7 +72,7 @@ class IEUrlExtension { * Given a right-hand portion of a URL, determine whether IE would detect * a potentially harmful file extension. * - * @param $urlPart The right-hand portion of a URL + * @param $urlPart string The right-hand portion of a URL * @param $extWhitelist An array of file extensions which may occur in this * URL, and which should be allowed. * @return bool @@ -97,10 +98,10 @@ class IEUrlExtension { } if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) { - // Non-alphanumeric extension, unlikely to be registered. + // Non-alphanumeric extension, unlikely to be registered. // // The regex above is known to match all registered file extensions - // in a default Windows XP installation. It's important to allow + // in a default Windows XP installation. It's important to allow // extensions with ampersands and percent signs, since that reduces // the number of false positives substantially. return false; @@ -111,8 +112,11 @@ class IEUrlExtension { } /** - * Returns a variant of $url which will pass isUrlExtensionBad() but has the + * Returns a variant of $url which will pass isUrlExtensionBad() but has the * same GET parameters, or false if it can't figure one out. + * @param $url + * @param $extWhitelist array + * @return bool|string */ public static function fixUrlForIE6( $url, $extWhitelist = array() ) { $questionPos = strpos( $url, '?' ); @@ -127,7 +131,7 @@ class IEUrlExtension { $query = substr( $url, $questionPos + 1 ); } - // Multiple question marks cause problems. Encode the second and + // Multiple question marks cause problems. Encode the second and // subsequent question mark. $query = str_replace( '?', '%3E', $query ); // Append an invalid path character so that IE6 won't see the end of the @@ -153,16 +157,16 @@ class IEUrlExtension { * insecure. * * The criteria for finding an extension are as follows: - * - a possible extension is a dot followed by one or more characters not + * - a possible extension is a dot followed by one or more characters not * in <>\"/:|?.# - * - if we find a possible extension followed by the end of the string or + * - if we find a possible extension followed by the end of the string or * a #, that's our extension * - if we find a possible extension followed by a ?, that's our extension - * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue + * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue * searching for another possible extension - * - if we find a possible extension followed by a dot or another illegal + * - if we find a possible extension followed by a dot or another illegal * character, we ignore it and continue searching - * + * * @param $url string URL * @return mixed Detected extension (string), or false if none found */ @@ -182,7 +186,7 @@ class IEUrlExtension { // End of string, we're done return false; } - + // We found a dot. Skip past it $pos++; $remainingLength = $urlLength - $pos; @@ -220,12 +224,12 @@ class IEUrlExtension { * with %2E not decoded to ".". On such a server, it is possible to detect * whether the script filename has been obscured. * - * The function returns false if the server is not known to have this + * The function returns false if the server is not known to have this * behaviour. Microsoft IIS in particular is known to decode escaped script * filenames. * * SERVER_SOFTWARE typically contains either a plain string such as "Zeus", - * or a specification in the style of a User-Agent header, such as + * or a specification in the style of a User-Agent header, such as * "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2" * * @param $serverSoftware @@ -234,8 +238,8 @@ class IEUrlExtension { */ public static function haveUndecodedRequestUri( $serverSoftware ) { static $whitelist = array( - 'Apache', - 'Zeus', + 'Apache', + 'Zeus', 'LiteSpeed' ); if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) { return in_array( $m[1], $whitelist ); diff --git a/includes/libs/JavaScriptMinifier.php b/includes/libs/JavaScriptMinifier.php index a991d915..baf93385 100644 --- a/includes/libs/JavaScriptMinifier.php +++ b/includes/libs/JavaScriptMinifier.php @@ -484,22 +484,42 @@ class JavaScriptMinifier { $end++; } } elseif( + $ch === '0' + && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' ) + ) { + // Hex numeric literal + $end++; // x or X + $len = strspn( $s, '0123456789ABCDEFabcdef', $end ); + if ( !$len ) { + return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' ); + } + $end += $len; + } elseif( ctype_digit( $ch ) || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) ) ) { - // Numeric literal. Search for the end of it, but don't care about [+-]exponent - // at the end, as the results of "numeric [+-] numeric" and "numeric" are - // identical to our state machine. - $end += strspn( $s, '0123456789ABCDEFabcdefXx.', $end ); - while( $s[$end - 1] === '.' ) { - // Special case: When a numeric ends with a dot, we have to check the - // literal for proper syntax - $decimal = strspn( $s, '0123456789', $pos, $end - $pos - 1 ); - if( $decimal === $end - $pos - 1 ) { - break; - } else { - $end--; + $end += strspn( $s, '0123456789', $end ); + $decimal = strspn( $s, '.', $end ); + if ($decimal) { + if ( $decimal > 2 ) { + return self::parseError($s, $end, 'The number has too many decimal points' ); + } + $end += strspn( $s, '0123456789', $end + 1 ) + $decimal; + } + $exponent = strspn( $s, 'eE', $end ); + if( $exponent ) { + if ( $exponent > 1 ) { + return self::parseError($s, $end, 'Number with several E' ); + } + $end++; + + // + sign is optional; - sign is required. + $end += strspn( $s, '-+', $end ); + $len = strspn( $s, '0123456789', $end ); + if ( !$len ) { + return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' ); } + $end += $len; } } elseif( isset( $opChars[$ch] ) ) { // Punctuation character. Search for the longest matching operator. @@ -576,4 +596,9 @@ class JavaScriptMinifier { } return $out; } + + static function parseError($fullJavascript, $position, $errorMsg) { + // TODO: Handle the error: trigger_error, throw exception, return false... + return false; + } } diff --git a/includes/libs/jsminplus.php b/includes/libs/jsminplus.php index bab4ff49..8ed08d74 100644 --- a/includes/libs/jsminplus.php +++ b/includes/libs/jsminplus.php @@ -1,7 +1,7 @@ <?php /** - * JSMinPlus version 1.3 + * JSMinPlus version 1.4 * * Minifies a javascript file using a javascript parser * @@ -15,8 +15,10 @@ * Usage: $minified = JSMinPlus::minify($script [, $filename]) * * Versionlog (see also changelog.txt): - * 19-07-2011 - expanded operator and keyword defines. Fixes the notices when creating several JSTokenizer - * 17-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs + * 23-07-2011 - remove dynamic creation of OP_* and KEYWORD_* defines and declare them on top + * reduce memory footprint by minifying by block-scope + * some small byte-saving and performance improvements + * 12-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs * 18-04-2009 - fixed crashbug in PHP 5.2.9 and several other bugfixes * 12-04-2009 - some small bugfixes and performance improvements * 09-04-2009 - initial open sourced version 1.0 @@ -46,7 +48,7 @@ * the Initial Developer. All Rights Reserved. * * Contributor(s): Tino Zijdel <crisp@tweakers.net> - * PHP port, modifications and minifier routine are (C) 2009 + * PHP port, modifications and minifier routine are (C) 2009-2011 * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or @@ -86,6 +88,8 @@ define('JS_SETTER', 111); define('JS_GROUP', 112); define('JS_LIST', 113); +define('JS_MINIFIED', 999); + define('DECLARED_FORM', 0); define('EXPRESSED_FORM', 1); define('STATEMENT_FORM', 2); @@ -188,7 +192,7 @@ class JSMinPlus private function __construct() { - $this->parser = new JSParser(); + $this->parser = new JSParser($this); } public static function minify($js, $filename='') @@ -217,22 +221,18 @@ class JSMinPlus return false; } - private function parseTree($n, $noBlockGrouping = false) + public function parseTree($n, $noBlockGrouping = false) { $s = ''; switch ($n->type) { - case KEYWORD_FUNCTION: - $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '('; - $params = $n->params; - for ($i = 0, $j = count($params); $i < $j; $i++) - $s .= ($i ? ',' : '') . $params[$i]; - $s .= '){' . $this->parseTree($n->body, true) . '}'; + case JS_MINIFIED: + $s = $n->value; break; case JS_SCRIPT: - // we do nothing with funDecls or varDecls + // we do nothing yet with funDecls or varDecls $noBlockGrouping = true; // FALL THROUGH @@ -279,6 +279,14 @@ class JSMinPlus } break; + case KEYWORD_FUNCTION: + $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '('; + $params = $n->params; + for ($i = 0, $j = count($params); $i < $j; $i++) + $s .= ($i ? ',' : '') . $params[$i]; + $s .= '){' . $this->parseTree($n->body, true) . '}'; + break; + case KEYWORD_IF: $s = 'if(' . $this->parseTree($n->condition) . ')'; $thenPart = $this->parseTree($n->thenPart); @@ -385,19 +393,14 @@ class JSMinPlus break; case KEYWORD_THROW: - $s = 'throw ' . $this->parseTree($n->exception); - break; - case KEYWORD_RETURN: - $s = 'return'; + $s = $n->type; if ($n->value) { $t = $this->parseTree($n->value); if (strlen($t)) { - if ( $t[0] != '(' && $t[0] != '[' && $t[0] != '{' && - $t[0] != '"' && $t[0] != "'" && $t[0] != '/' - ) + if ($this->isWordChar($t[0]) || $t[0] == '\\') $s .= ' '; $s .= $t; @@ -423,6 +426,40 @@ class JSMinPlus } break; + case KEYWORD_IN: + case KEYWORD_INSTANCEOF: + $left = $this->parseTree($n->treeNodes[0]); + $right = $this->parseTree($n->treeNodes[1]); + + $s = $left; + + if ($this->isWordChar(substr($left, -1))) + $s .= ' '; + + $s .= $n->type; + + if ($this->isWordChar($right[0]) || $right[0] == '\\') + $s .= ' '; + + $s .= $right; + break; + + case KEYWORD_DELETE: + case KEYWORD_TYPEOF: + $right = $this->parseTree($n->treeNodes[0]); + + $s = $n->type; + + if ($this->isWordChar($right[0]) || $right[0] == '\\') + $s .= ' '; + + $s .= $right; + break; + + case KEYWORD_VOID: + $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')'; + break; + case KEYWORD_DEBUGGER: throw new Exception('NOT IMPLEMENTED: DEBUGGER'); break; @@ -497,26 +534,6 @@ class JSMinPlus } break; - case KEYWORD_IN: - $s = $this->parseTree($n->treeNodes[0]) . ' in ' . $this->parseTree($n->treeNodes[1]); - break; - - case KEYWORD_INSTANCEOF: - $s = $this->parseTree($n->treeNodes[0]) . ' instanceof ' . $this->parseTree($n->treeNodes[1]); - break; - - case KEYWORD_DELETE: - $s = 'delete ' . $this->parseTree($n->treeNodes[0]); - break; - - case KEYWORD_VOID: - $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')'; - break; - - case KEYWORD_TYPEOF: - $s = 'typeof ' . $this->parseTree($n->treeNodes[0]); - break; - case OP_NOT: case OP_BITWISE_NOT: case OP_UNARY_PLUS: @@ -606,13 +623,33 @@ class JSMinPlus $s .= '}'; break; + case TOKEN_NUMBER: + $s = $n->value; + if (preg_match('/^([1-9]+)(0{3,})$/', $s, $m)) + $s = $m[1] . 'e' . strlen($m[2]); + break; + case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE: - case TOKEN_IDENTIFIER: case TOKEN_NUMBER: case TOKEN_STRING: case TOKEN_REGEXP: + case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_REGEXP: $s = $n->value; break; case JS_GROUP: - $s = '(' . $this->parseTree($n->treeNodes[0]) . ')'; + if (in_array( + $n->treeNodes[0]->type, + array( + JS_ARRAY_INIT, JS_OBJECT_INIT, JS_GROUP, + TOKEN_NUMBER, TOKEN_STRING, TOKEN_REGEXP, TOKEN_IDENTIFIER, + KEYWORD_NULL, KEYWORD_THIS, KEYWORD_TRUE, KEYWORD_FALSE + ) + )) + { + $s = $this->parseTree($n->treeNodes[0]); + } + else + { + $s = '(' . $this->parseTree($n->treeNodes[0]) . ')'; + } break; default: @@ -626,11 +663,17 @@ class JSMinPlus { return preg_match('/^[a-zA-Z_][a-zA-Z0-9_]*$/', $string) && !in_array($string, $this->reserved); } + + private function isWordChar($char) + { + return $char == '_' || $char == '$' || ctype_alnum($char); + } } class JSParser { private $t; + private $minifier; private $opPrecedence = array( ';' => 0, @@ -680,8 +723,9 @@ class JSParser TOKEN_CONDCOMMENT_START => 1, TOKEN_CONDCOMMENT_END => 1 ); - public function __construct() + public function __construct($minifier=null) { + $this->minifier = $minifier; $this->t = new JSTokenizer(); } @@ -705,6 +749,19 @@ class JSParser $n->funDecls = $x->funDecls; $n->varDecls = $x->varDecls; + // minify by scope + if ($this->minifier) + { + $n->value = $this->minifier->parseTree($n); + + // clear tree from node to save memory + $n->treeNodes = null; + $n->funDecls = null; + $n->varDecls = null; + + $n->type = JS_MINIFIED; + } + return $n; } @@ -963,7 +1020,7 @@ class JSParser case KEYWORD_THROW: $n = new JSNode($this->t); - $n->exception = $this->Expression($x); + $n->value = $this->Expression($x); break; case KEYWORD_RETURN: @@ -1678,44 +1735,11 @@ class JSTokenizer ); private $opTypeNames = array( - ';' => 'SEMICOLON', - ',' => 'COMMA', - '?' => 'HOOK', - ':' => 'COLON', - '||' => 'OR', - '&&' => 'AND', - '|' => 'BITWISE_OR', - '^' => 'BITWISE_XOR', - '&' => 'BITWISE_AND', - '===' => 'STRICT_EQ', - '==' => 'EQ', - '=' => 'ASSIGN', - '!==' => 'STRICT_NE', - '!=' => 'NE', - '<<' => 'LSH', - '<=' => 'LE', - '<' => 'LT', - '>>>' => 'URSH', - '>>' => 'RSH', - '>=' => 'GE', - '>' => 'GT', - '++' => 'INCREMENT', - '--' => 'DECREMENT', - '+' => 'PLUS', - '-' => 'MINUS', - '*' => 'MUL', - '/' => 'DIV', - '%' => 'MOD', - '!' => 'NOT', - '~' => 'BITWISE_NOT', - '.' => 'DOT', - '[' => 'LEFT_BRACKET', - ']' => 'RIGHT_BRACKET', - '{' => 'LEFT_CURLY', - '}' => 'RIGHT_CURLY', - '(' => 'LEFT_PAREN', - ')' => 'RIGHT_PAREN', - '@*/' => 'CONDCOMMENT_END' + ';', ',', '?', ':', '||', '&&', '|', '^', + '&', '===', '==', '=', '!==', '!=', '<<', '<=', + '<', '>>>', '>>', '>=', '>', '++', '--', '+', + '-', '*', '/', '%', '!', '~', '.', '[', + ']', '{', '}', '(', ')', '@*/' ); private $assignOps = array('|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%'); @@ -1723,7 +1747,7 @@ class JSTokenizer public function __construct() { - $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', array_keys($this->opTypeNames))) . ')#'; + $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#'; } public function init($source, $filename = '', $lineno = 1) @@ -1874,22 +1898,38 @@ class JSTokenizer { switch ($input[0]) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - if (preg_match('/^\d+\.\d*(?:[eE][-+]?\d+)?|^\d+(?:\.\d*)?[eE][-+]?\d+/', $input, $match)) + case '0': + // hexadecimal + if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match)) { $tt = TOKEN_NUMBER; + break; } - else if (preg_match('/^0[xX][\da-fA-F]+|^0[0-7]*|^\d+/', $input, $match)) + // FALL THROUGH + + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + // should always match + preg_match('/^\d+(?:\.\d*)?(?:[eE][-+]?\d+)?/', $input, $match); + $tt = TOKEN_NUMBER; + break; + + case "'": + if (preg_match('/^\'(?:[^\\\\\'\r\n]++|\\\\(?:.|\r?\n))*\'/', $input, $match)) { - // this should always match because of \d+ - $tt = TOKEN_NUMBER; + $tt = TOKEN_STRING; + } + else + { + if ($chunksize) + return $this->get(null); // retry with a full chunk fetch + + throw $this->newSyntaxError('Unterminated string literal'); } break; case '"': - case "'": - if (preg_match('/^"(?:\\\\(?:.|\r?\n)|[^\\\\"\r\n]+)*"|^\'(?:\\\\(?:.|\r?\n)|[^\\\\\'\r\n]+)*\'/', $input, $match)) + if (preg_match('/^"(?:[^\\\\"\r\n]++|\\\\(?:.|\r?\n))*"/', $input, $match)) { $tt = TOKEN_STRING; } @@ -2091,4 +2131,3 @@ class JSToken public $lineno; public $assignOp; } - |