Update to MediaWiki 1.19.0

author: Pierre Schmitz <pierre@archlinux.de> 2012-05-03 13:01:35 +0200
committer: Pierre Schmitz <pierre@archlinux.de> 2012-05-03 13:01:35 +0200
commit: d9022f63880ce039446fba8364f68e656b7bf4cb (patch)
tree: 16b40fbf17bf7c9ee6f4ead25b16dd192378050a /includes/libs
parent: 27cf83d177256813e2e802241085fce5dd0f3fb9 (diff)
6 files changed, 286 insertions, 167 deletions
diff --git a/includes/libs/CSSJanus.php b/includes/libs/CSSJanus.php
index aa04bc49..c8fc296b 100644
--- a/includes/libs/CSSJanus.php
+++ b/includes/libs/CSSJanus.php
@@ -22,7 +22,9 @@
  * written for LTR to RTL.
  *
  * The original Python version of CSSJanus is Copyright 2008 by Google Inc. and
- * is distributed under the Apache license.
+ * is distributed under the Apache license. This PHP port is Copyright 2010 by
+ * Roan Kattouw and is dual-licensed under the GPL (as in the comment above) and
+ * the Apache (as in the original code) licenses.
  *
  * Original code: http://code.google.com/p/cssjanus/source/browse/trunk/cssjanus.py
  * License of original code: http://code.google.com/p/cssjanus/source/browse/trunk/LICENSE
@@ -111,8 +113,8 @@ class CSSJanus {
 		$patterns['four_notation_color'] = "/(-color\s*:\s*){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}/i";
 		// The two regexes below are parenthesized differently then in the original implementation to make the
 		// callback's job more straightforward
-		$patterns['bg_horizontal_percentage'] = "/(background(?:-position)?\s*:\s*[^%]*?)({$patterns['num']})(%\s*(?:{$patterns['quantity']}|{$patterns['ident']}))/";
-		$patterns['bg_horizontal_percentage_x'] = "/(background-position-x\s*:\s*)({$patterns['num']})(%)/";
+		$patterns['bg_horizontal_percentage'] = "/(background(?:-position)?\s*:\s*[^%]*?)(-?{$patterns['num']})(%\s*(?:{$patterns['quantity']}|{$patterns['ident']}))/";
+		$patterns['bg_horizontal_percentage_x'] = "/(background-position-x\s*:\s*)(-?{$patterns['num']})(%)/";
 	}
 
 	/**
@@ -173,6 +175,8 @@ class CSSJanus {
 	 *
 	 * See http://code.google.com/p/cssjanus/issues/detail?id=15 and
 	 * TODO: URL
+	 * @param $css string
+	 * @return string
 	 */
 	private static function fixDirection( $css ) {
 		$css = preg_replace( self::$patterns['direction_ltr'],
@@ -185,6 +189,8 @@ class CSSJanus {
 
 	/**
 	 * Replace 'ltr' with 'rtl' and vice versa in background URLs
+	 * @param $css string
+	 * @return string
 	 */
 	private static function fixLtrRtlInURL( $css ) {
 		$css = preg_replace( self::$patterns['ltr_in_url'], self::$patterns['tmpToken'], $css );
@@ -196,6 +202,8 @@ class CSSJanus {
 
 	/**
 	 * Replace 'left' with 'right' and vice versa in background URLs
+	 * @param $css string
+	 * @return string
 	 */
 	private static function fixLeftRightInURL( $css ) {
 		$css = preg_replace( self::$patterns['left_in_url'], self::$patterns['tmpToken'], $css );
@@ -207,6 +215,8 @@ class CSSJanus {
 
 	/**
 	 * Flip rules like left: , padding-right: , etc.
+	 * @param $css string
+	 * @return string
 	 */
 	private static function fixLeftAndRight( $css ) {
 		$css = preg_replace( self::$patterns['left'], self::$patterns['tmpToken'], $css );
@@ -218,6 +228,8 @@ class CSSJanus {
 
 	/**
 	 * Flip East and West in rules like cursor: nw-resize;
+	 * @param $css string
+	 * @return string
 	 */
 	private static function fixCursorProperties( $css ) {
 		$css = preg_replace( self::$patterns['cursor_east'],
@@ -237,6 +249,8 @@ class CSSJanus {
 	 * and four-part color rules with multiple whitespace characters between
 	 * colors are not recognized.
 	 * See http://code.google.com/p/cssjanus/issues/detail?id=16
+	 * @param $css string
+	 * @return string
 	 */
 	private static function fixFourPartNotation( $css ) {
 		$css = preg_replace( self::$patterns['four_notation_quantity'], '$1$2$7$4$5$6$3', $css );
@@ -247,6 +261,8 @@ class CSSJanus {
 
 	/**
 	 * Flip horizontal background percentages.
+	 * @param $css string
+	 * @return string
 	 */
 	private static function fixBackgroundPosition( $css ) {
 		$css = preg_replace_callback( self::$patterns['bg_horizontal_percentage'],
@@ -259,6 +275,8 @@ class CSSJanus {
 
 	/**
 	 * Callback for calculateNewBackgroundPosition()
+	 * @param $matches array
+	 * @return string
 	 */
 	private static function calculateNewBackgroundPosition( $matches ) {
 		return $matches[1] . ( 100 - $matches[2] ) . $matches[3];
@@ -295,6 +313,10 @@ class CSSJanus_Tokenizer {
 		return preg_replace_callback( $this->regex, array( $this, 'tokenizeCallback' ), $str );
 	}
 
+	/**
+	 * @param $matches array
+	 * @return string
+	 */
 	private function tokenizeCallback( $matches ) {
 		$this->originals[] = $matches[0];
 		return $this->token;
@@ -314,6 +336,10 @@ class CSSJanus_Tokenizer {
 			array( $this, 'detokenizeCallback' ), $str );
 	}
 
+	/**
+	 * @param $matches
+	 * @return mixed
+	 */
 	private function detokenizeCallback( $matches ) {
 		$retval = current( $this->originals );
 		next( $this->originals );
diff --git a/includes/libs/CSSMin.php b/includes/libs/CSSMin.php
index 4012b695..4f4b28bb 100644
--- a/includes/libs/CSSMin.php
+++ b/includes/libs/CSSMin.php
@@ -1,5 +1,5 @@
 <?php
-/*
+/**
  * Copyright 2010 Wikimedia Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may
@@ -79,6 +79,10 @@ class CSSMin {
 		return $files;
 	}
 
+	/**
+	 * @param $file string
+	 * @return bool|string
+	 */
 	protected static function getMimeType( $file ) {
 		$realpath = realpath( $file );
 		// Try a couple of different ways to get the mime-type of a file, in order of
@@ -112,10 +116,10 @@ class CSSMin {
 	 * @param $source string CSS data to remap
 	 * @param $local string File path where the source was read from
 	 * @param $remote string URL path to the file
-	 * @param $embed ???
+	 * @param $embedData bool If false, never do any data URI embedding, even if / * @embed * / is found
 	 * @return string Remapped CSS data
 	 */
-	public static function remap( $source, $local, $remote, $embed = true ) {
+	public static function remap( $source, $local, $remote, $embedData = true ) {
 		$pattern = '/((?P<embed>\s*\/\*\s*\@embed\s*\*\/)(?P<pre>[^\;\}]*))?' .
 			self::URL_REGEX . '(?P<post>[^;]*)[\;]?/';
 		$offset = 0;
@@ -162,7 +166,7 @@ class CSSMin {
 				// using Z for the timezone, meaning GMT
 				$url .= '?' . gmdate( 'Y-m-d\TH:i:s\Z', round( filemtime( $file ), -2 ) );
 				// Embedding requires a bit of extra processing, so let's skip that if we can
-				if ( $embed ) {
+				if ( $embedData && $embed ) {
 					$type = self::getMimeType( $file );
 					// Detect when URLs were preceeded with embed tags, and also verify file size is
 					// below the limit
diff --git a/includes/libs/IEContentAnalyzer.php b/includes/libs/IEContentAnalyzer.php
index a2ef1a09..01e72e68 100644
--- a/includes/libs/IEContentAnalyzer.php
+++ b/includes/libs/IEContentAnalyzer.php
@@ -1,19 +1,19 @@
 <?php
 
 /**
- * This class simulates Microsoft Internet Explorer's terribly broken and 
+ * This class simulates Microsoft Internet Explorer's terribly broken and
  * insecure MIME type detection algorithm. It can be used to check web uploads
- * with an apparently safe type, to see if IE will reinterpret them to produce 
+ * with an apparently safe type, to see if IE will reinterpret them to produce
  * something dangerous.
  *
- * It is full of bugs and strange design choices should not under any 
- * circumstances be used to determine a MIME type to present to a user or 
+ * It is full of bugs and strange design choices should not under any
+ * circumstances be used to determine a MIME type to present to a user or
  * client. (Apple Safari developers, this means you too.)
  *
- * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have 
- * attempted to ensure that this code works in exactly the same way as Internet 
- * Explorer, it does not share any source code, or creative choices such as 
- * variable names, thus I (Tim Starling) claim copyright on it. 
+ * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
+ * attempted to ensure that this code works in exactly the same way as Internet
+ * Explorer, it does not share any source code, or creative choices such as
+ * variable names, thus I (Tim Starling) claim copyright on it.
  *
  * It may be redistributed without restriction. To aid reuse, this class does
  * not depend on any MediaWiki module.
@@ -24,8 +24,8 @@ class IEContentAnalyzer {
 	 */
 	protected $baseTypeTable = array(
 		'ambiguous' /*1*/ => array(
-			'text/plain', 
-			'application/octet-stream', 
+			'text/plain',
+			'application/octet-stream',
 			'application/x-netcdf', // [sic]
 		),
 		'text' /*3*/ => array(
@@ -34,8 +34,8 @@ class IEContentAnalyzer {
 		),
 		'binary' /*4*/ => array(
 			'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
-			'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp', 
-			'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi', 
+			'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
+			'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
 			'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
 			'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
 			'application/x-msdownload'
@@ -293,21 +293,21 @@ class IEContentAnalyzer {
 		'.xsl' => 'text/xml',
 	);
 
-	/** 
-	 * IE versions which have been analysed to bring you this class, and for 
-	 * which some substantive difference exists. These will appear as keys 
+	/**
+	 * IE versions which have been analysed to bring you this class, and for
+	 * which some substantive difference exists. These will appear as keys
 	 * in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
 	 */
 	protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' );
 
 	/**
-	 * Type table with versions expanded 
+	 * Type table with versions expanded
 	 */
 	protected $typeTable = array();
 
 	/** constructor */
 	function __construct() {
-		// Construct versioned type arrays from the base type array plus additions 
+		// Construct versioned type arrays from the base type array plus additions
 		$types = $this->baseTypeTable;
 		foreach ( $this->versions as $version ) {
 			if ( isset( $this->addedTypes[$version] ) ) {
@@ -320,7 +320,7 @@ class IEContentAnalyzer {
 	}
 
 	/**
-	 * Get the MIME types from getMimesFromData(), but convert the result from IE's 
+	 * Get the MIME types from getMimesFromData(), but convert the result from IE's
 	 * idiosyncratic private types into something other apps will understand.
 	 *
 	 * @param $fileName String: the file name (unused at present)
@@ -338,6 +338,8 @@ class IEContentAnalyzer {
 	/**
 	 * Translate a MIME type from IE's idiosyncratic private types into
 	 * more commonly understood type strings
+	 * @param $type
+	 * @return string
 	 */
 	public function translateMimeType( $type ) {
 		static $table = array(
@@ -375,6 +377,11 @@ class IEContentAnalyzer {
 
 	/**
 	 * Get the MIME type for a given named version
+	 * @param $version
+	 * @param $fileName
+	 * @param $chunk
+	 * @param $proposed
+	 * @return bool|string
 	 */
 	protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
 		// Strip text after a semicolon
@@ -397,8 +404,8 @@ class IEContentAnalyzer {
 		// Truncate chunk at 255 bytes
 		$chunk = substr( $chunk, 0, 255 );
 
-		// IE does the Check*Headers() calls last, and instead does the following image 
-		// type checks by directly looking for the magic numbers. What I do here should 
+		// IE does the Check*Headers() calls last, and instead does the following image
+		// type checks by directly looking for the magic numbers. What I do here should
 		// have the same effect since the magic number checks are identical in both cases.
 		$result = $this->sampleData( $version, $chunk );
 		$sampleFound = $result['found'];
@@ -413,7 +420,7 @@ class IEContentAnalyzer {
 			return 'image/gif';
 		}
 		if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
-			&& $binaryType == 'image/pjpeg' ) 
+			&& $binaryType == 'image/pjpeg' )
 		{
 			return $proposed;
 		}
@@ -430,7 +437,7 @@ class IEContentAnalyzer {
 			return 'application/x-cdf';
 		}
 
-		// RSS and Atom were added in IE 7 so they won't be in $sampleFound for 
+		// RSS and Atom were added in IE 7 so they won't be in $sampleFound for
 		// previous versions
 		if ( isset( $sampleFound['rss'] ) ) {
 			return 'application/rss+xml';
@@ -483,8 +490,8 @@ class IEContentAnalyzer {
 
 		// Freaky heuristics to determine if the data is text or binary
 		// The heuristic is of course broken for non-ASCII text
-		if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] ) 
-			< ( $counters['ctrl'] + $counters['high'] ) * 16 ) 
+		if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
+			< ( $counters['ctrl'] + $counters['high'] ) * 16 )
 		{
 			$kindOfBinary = true;
 			$type = $binaryType ? $binaryType : $textType;
@@ -529,8 +536,8 @@ class IEContentAnalyzer {
 			return $this->registry[$ext];
 		}
 
-		// TODO: If the extension has an application registered to it, IE will return 
-		// application/octet-stream. We'll skip that, so we could erroneously 
+		// TODO: If the extension has an application registered to it, IE will return
+		// application/octet-stream. We'll skip that, so we could erroneously
 		// return text/plain or application/x-netcdf where application/octet-stream
 		// would be correct.
 
@@ -540,6 +547,9 @@ class IEContentAnalyzer {
 	/**
 	 * Check for text headers at the start of the chunk
 	 * Confirmed same in 5 and 7.
+	 * @param $version
+	 * @param $chunk
+	 * @return bool|string
 	 */
 	private function checkTextHeaders( $version, $chunk ) {
 		$chunk2 = substr( $chunk, 0, 2 );
@@ -563,6 +573,9 @@ class IEContentAnalyzer {
 	/**
 	 * Check for binary headers at the start of the chunk
 	 * Confirmed same in 5 and 7.
+	 * @param $version
+	 * @param $chunk
+	 * @return bool|string
 	 */
 	private function checkBinaryHeaders( $version, $chunk ) {
 		$chunk2 = substr( $chunk, 0, 2 );
@@ -578,13 +591,13 @@ class IEContentAnalyzer {
 			return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
 		}
 
-		if ( $chunk2 == 'BM' 
+		if ( $chunk2 == 'BM'
 			&& substr( $chunk, 6, 2 ) == "\000\000"
 			&& substr( $chunk, 8, 2 ) == "\000\000" )
 		{
 			return 'image/bmp'; // another non-standard MIME
 		}
-		if ( $chunk4 == 'RIFF' 
+		if ( $chunk4 == 'RIFF'
 			&& substr( $chunk, 8, 4 ) == 'WAVE' )
 		{
 			return 'audio/wav';
@@ -661,6 +674,9 @@ class IEContentAnalyzer {
 	/**
 	 * Do heuristic checks on the bulk of the data sample.
 	 * Search for HTML tags.
+	 * @param $version
+	 * @param $chunk
+	 * @return array
 	 */
 	protected function sampleData( $version, $chunk ) {
 		$found = array();
@@ -774,7 +790,7 @@ class IEContentAnalyzer {
 			}
 
 			if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
-				if ( isset( $found['rdf-tag'] ) 
+				if ( isset( $found['rdf-tag'] )
 					&& isset( $found['rdf-url'] ) ) // [sic]
 				{
 					break;
@@ -808,6 +824,11 @@ class IEContentAnalyzer {
 		return array( 'found' => $found, 'counters' => $counters );
 	}
 
+	/**
+	 * @param $version
+	 * @param $type
+	 * @return int|string
+	 */
 	protected function getDataFormat( $version, $type ) {
 		$types = $this->typeTable[$version];
 		if ( $type == '(null)' || strval( $type ) === '' ) {
diff --git a/includes/libs/IEUrlExtension.php b/includes/libs/IEUrlExtension.php
index 100454d4..e00e6663 100644
--- a/includes/libs/IEUrlExtension.php
+++ b/includes/libs/IEUrlExtension.php
@@ -1,31 +1,31 @@
 <?php
 
 /**
- * Internet Explorer derives a cache filename from a URL, and then in certain 
- * circumstances, uses the extension of the resulting file to determine the 
- * content type of the data, ignoring the Content-Type header. 
+ * Internet Explorer derives a cache filename from a URL, and then in certain
+ * circumstances, uses the extension of the resulting file to determine the
+ * content type of the data, ignoring the Content-Type header.
  *
  * This can be a problem, especially when non-HTML content is sent by MediaWiki,
  * and Internet Explorer interprets it as HTML, exposing an XSS vulnerability.
  *
- * Usually the script filename (e.g. api.php) is present in the URL, and this 
+ * Usually the script filename (e.g. api.php) is present in the URL, and this
  * makes Internet Explorer think the extension is a harmless script extension.
- * But Internet Explorer 6 and earlier allows the script extension to be 
- * obscured by encoding the dot as "%2E". 
+ * But Internet Explorer 6 and earlier allows the script extension to be
+ * obscured by encoding the dot as "%2E".
  *
- * This class contains functions which help in detecting and dealing with this 
+ * This class contains functions which help in detecting and dealing with this
  * situation.
  *
- * Checking the URL for a bad extension is somewhat complicated due to the fact 
+ * Checking the URL for a bad extension is somewhat complicated due to the fact
  * that CGI doesn't provide a standard method to determine the URL. Instead it
- * is necessary to pass a subset of $_SERVER variables, which we then attempt 
+ * is necessary to pass a subset of $_SERVER variables, which we then attempt
  * to use to guess parts of the URL.
  */
 class IEUrlExtension {
 	/**
 	 * Check a subset of $_SERVER (or the whole of $_SERVER if you like)
-	 * to see if it indicates that the request was sent with a bad file 
-	 * extension. Returns true if the request should be denied or modified, 
+	 * to see if it indicates that the request was sent with a bad file
+	 * extension. Returns true if the request should be denied or modified,
 	 * false otherwise. The relevant $_SERVER elements are:
 	 *
 	 *   - SERVER_SOFTWARE
@@ -37,6 +37,7 @@ class IEUrlExtension {
 	 *
 	 * @param $vars A subset of $_SERVER.
 	 * @param $extWhitelist Extensions which are allowed, assumed harmless.
+	 * @return bool
 	 */
 	public static function areServerVarsBad( $vars, $extWhitelist = array() ) {
 		// Check QUERY_STRING or REQUEST_URI
@@ -55,7 +56,7 @@ class IEUrlExtension {
 			return true;
 		}
 
-		// Some servers have PATH_INFO but not REQUEST_URI, so we check both 
+		// Some servers have PATH_INFO but not REQUEST_URI, so we check both
 		// to be on the safe side.
 		if ( isset( $vars['PATH_INFO'] )
 			&& self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist ) )
@@ -71,7 +72,7 @@ class IEUrlExtension {
 	 * Given a right-hand portion of a URL, determine whether IE would detect
 	 * a potentially harmful file extension.
 	 *
-	 * @param $urlPart The right-hand portion of a URL
+	 * @param $urlPart string The right-hand portion of a URL
 	 * @param $extWhitelist An array of file extensions which may occur in this
 	 *    URL, and which should be allowed.
 	 * @return bool
@@ -97,10 +98,10 @@ class IEUrlExtension {
 		}
 
 		if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) {
-			// Non-alphanumeric extension, unlikely to be registered. 
+			// Non-alphanumeric extension, unlikely to be registered.
 			//
 			// The regex above is known to match all registered file extensions
-			// in a default Windows XP installation. It's important to allow 
+			// in a default Windows XP installation. It's important to allow
 			// extensions with ampersands and percent signs, since that reduces
 			// the number of false positives substantially.
 			return false;
@@ -111,8 +112,11 @@ class IEUrlExtension {
 	}
 
 	/**
-	 * Returns a variant of $url which will pass isUrlExtensionBad() but has the 
+	 * Returns a variant of $url which will pass isUrlExtensionBad() but has the
 	 * same GET parameters, or false if it can't figure one out.
+	 * @param $url
+	 * @param $extWhitelist array
+	 * @return bool|string
 	 */
 	public static function fixUrlForIE6( $url, $extWhitelist = array() ) {
 		$questionPos = strpos( $url, '?' );
@@ -127,7 +131,7 @@ class IEUrlExtension {
 			$query = substr( $url, $questionPos + 1 );
 		}
 
-		// Multiple question marks cause problems. Encode the second and 
+		// Multiple question marks cause problems. Encode the second and
 		// subsequent question mark.
 		$query = str_replace( '?', '%3E', $query );
 		// Append an invalid path character so that IE6 won't see the end of the
@@ -153,16 +157,16 @@ class IEUrlExtension {
 	 * insecure.
 	 *
 	 * The criteria for finding an extension are as follows:
-	 * - a possible extension is a dot followed by one or more characters not 
+	 * - a possible extension is a dot followed by one or more characters not
 	 *   in <>\"/:|?.#
-	 * - if we find a possible extension followed by the end of the string or 
+	 * - if we find a possible extension followed by the end of the string or
 	 *   a #, that's our extension
 	 * - if we find a possible extension followed by a ?, that's our extension
-	 *    - UNLESS it's exe, dll or cgi, in which case we ignore it and continue 
+	 *    - UNLESS it's exe, dll or cgi, in which case we ignore it and continue
 	 *      searching for another possible extension
-	 * - if we find a possible extension followed by a dot or another illegal 
+	 * - if we find a possible extension followed by a dot or another illegal
 	 *   character, we ignore it and continue searching
-	 * 
+	 *
 	 * @param $url string URL
 	 * @return mixed Detected extension (string), or false if none found
 	 */
@@ -182,7 +186,7 @@ class IEUrlExtension {
 				// End of string, we're done
 				return false;
 			}
-			
+
 			// We found a dot. Skip past it
 			$pos++;
 			$remainingLength = $urlLength - $pos;
@@ -220,12 +224,12 @@ class IEUrlExtension {
 	 * with %2E not decoded to ".". On such a server, it is possible to detect
 	 * whether the script filename has been obscured.
 	 *
-	 * The function returns false if the server is not known to have this 
+	 * The function returns false if the server is not known to have this
 	 * behaviour. Microsoft IIS in particular is known to decode escaped script
 	 * filenames.
 	 *
 	 * SERVER_SOFTWARE typically contains either a plain string such as "Zeus",
-	 * or a specification in the style of a User-Agent header, such as 
+	 * or a specification in the style of a User-Agent header, such as
 	 * "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2"
 	 *
 	 * @param $serverSoftware
@@ -234,8 +238,8 @@ class IEUrlExtension {
 	 */
 	public static function haveUndecodedRequestUri( $serverSoftware ) {
 		static $whitelist = array(
-			'Apache', 
-			'Zeus', 
+			'Apache',
+			'Zeus',
 			'LiteSpeed' );
 		if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) {
 			return in_array( $m[1], $whitelist );
diff --git a/includes/libs/JavaScriptMinifier.php b/includes/libs/JavaScriptMinifier.php
index a991d915..baf93385 100644
--- a/includes/libs/JavaScriptMinifier.php
+++ b/includes/libs/JavaScriptMinifier.php
@@ -484,22 +484,42 @@ class JavaScriptMinifier {
 					$end++;
 				}
 			} elseif(
+				$ch === '0'
+				&& ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
+			) {
+				// Hex numeric literal
+				$end++; // x or X
+				$len = strspn( $s, '0123456789ABCDEFabcdef', $end );
+				if ( !$len ) {
+					return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' );
+				}
+				$end += $len;
+			} elseif(
 				ctype_digit( $ch )
 				|| ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
 			) {
-				// Numeric literal. Search for the end of it, but don't care about [+-]exponent
-				// at the end, as the results of "numeric [+-] numeric" and "numeric" are
-				// identical to our state machine.
-				$end += strspn( $s, '0123456789ABCDEFabcdefXx.', $end );
-				while( $s[$end - 1] === '.' ) {
-					// Special case: When a numeric ends with a dot, we have to check the 
-					// literal for proper syntax
-					$decimal = strspn( $s, '0123456789', $pos, $end - $pos - 1 );
-					if( $decimal === $end - $pos - 1 ) {
-						break;
-					} else {
-						$end--;
+				$end += strspn( $s, '0123456789', $end );
+				$decimal = strspn( $s, '.', $end );
+				if ($decimal) {
+					if ( $decimal > 2 ) {
+						return self::parseError($s, $end, 'The number has too many decimal points' );
+					}
+					$end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
+				}
+				$exponent = strspn( $s, 'eE', $end );
+				if( $exponent ) {
+					if ( $exponent > 1 ) {
+						return self::parseError($s, $end, 'Number with several E' );
+					}
+					$end++;
+					
+					// + sign is optional; - sign is required.
+					$end += strspn( $s, '-+', $end );
+					$len = strspn( $s, '0123456789', $end );
+					if ( !$len ) {
+						return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' );
 					}
+					$end += $len;
 				}
 			} elseif( isset( $opChars[$ch] ) ) {
 				// Punctuation character. Search for the longest matching operator.
@@ -576,4 +596,9 @@ class JavaScriptMinifier {
 		}
 		return $out;
 	}
+	
+	static function parseError($fullJavascript, $position, $errorMsg) {
+		// TODO: Handle the error: trigger_error, throw exception, return false...
+		return false;
+	}
 }
diff --git a/includes/libs/jsminplus.php b/includes/libs/jsminplus.php
index bab4ff49..8ed08d74 100644
--- a/includes/libs/jsminplus.php
+++ b/includes/libs/jsminplus.php
@@ -1,7 +1,7 @@
 <?php
 
 /**
- * JSMinPlus version 1.3
+ * JSMinPlus version 1.4
  *
  * Minifies a javascript file using a javascript parser
  *
@@ -15,8 +15,10 @@
  * Usage: $minified = JSMinPlus::minify($script [, $filename])
  *
  * Versionlog (see also changelog.txt):
- * 19-07-2011 - expanded operator and keyword defines. Fixes the notices when creating several JSTokenizer
- * 17-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs
+ * 23-07-2011 - remove dynamic creation of OP_* and KEYWORD_* defines and declare them on top
+ *              reduce memory footprint by minifying by block-scope
+ *              some small byte-saving and performance improvements
+ * 12-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs
  * 18-04-2009 - fixed crashbug in PHP 5.2.9 and several other bugfixes
  * 12-04-2009 - some small bugfixes and performance improvements
  * 09-04-2009 - initial open sourced version 1.0
@@ -46,7 +48,7 @@
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s): Tino Zijdel <crisp@tweakers.net>
- * PHP port, modifications and minifier routine are (C) 2009
+ * PHP port, modifications and minifier routine are (C) 2009-2011
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
@@ -86,6 +88,8 @@ define('JS_SETTER', 111);
 define('JS_GROUP', 112);
 define('JS_LIST', 113);
 
+define('JS_MINIFIED', 999);
+
 define('DECLARED_FORM', 0);
 define('EXPRESSED_FORM', 1);
 define('STATEMENT_FORM', 2);
@@ -188,7 +192,7 @@ class JSMinPlus
 
 	private function __construct()
 	{
-		$this->parser = new JSParser();
+		$this->parser = new JSParser($this);
 	}
 
 	public static function minify($js, $filename='')
@@ -217,22 +221,18 @@ class JSMinPlus
 		return false;
 	}
 
-	private function parseTree($n, $noBlockGrouping = false)
+	public function parseTree($n, $noBlockGrouping = false)
 	{
 		$s = '';
 
 		switch ($n->type)
 		{
-			case KEYWORD_FUNCTION:
-				$s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
-				$params = $n->params;
-				for ($i = 0, $j = count($params); $i < $j; $i++)
-					$s .= ($i ? ',' : '') . $params[$i];
-				$s .= '){' . $this->parseTree($n->body, true) . '}';
+			case JS_MINIFIED:
+				$s = $n->value;
 			break;
 
 			case JS_SCRIPT:
-				// we do nothing with funDecls or varDecls
+				// we do nothing yet with funDecls or varDecls
 				$noBlockGrouping = true;
 			// FALL THROUGH
 
@@ -279,6 +279,14 @@ class JSMinPlus
 				}
 			break;
 
+			case KEYWORD_FUNCTION:
+				$s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
+				$params = $n->params;
+				for ($i = 0, $j = count($params); $i < $j; $i++)
+					$s .= ($i ? ',' : '') . $params[$i];
+				$s .= '){' . $this->parseTree($n->body, true) . '}';
+			break;
+
 			case KEYWORD_IF:
 				$s = 'if(' . $this->parseTree($n->condition) . ')';
 				$thenPart = $this->parseTree($n->thenPart);
@@ -385,19 +393,14 @@ class JSMinPlus
 			break;
 
 			case KEYWORD_THROW:
-				$s = 'throw ' . $this->parseTree($n->exception);
-			break;
-
 			case KEYWORD_RETURN:
-				$s = 'return';
+				$s = $n->type;
 				if ($n->value)
 				{
 					$t = $this->parseTree($n->value);
 					if (strlen($t))
 					{
-						if (	$t[0] != '(' && $t[0] != '[' && $t[0] != '{' &&
-							$t[0] != '"' && $t[0] != "'" && $t[0] != '/'
-						)
+						if ($this->isWordChar($t[0]) || $t[0] == '\\')
 							$s .= ' ';
 
 						$s .= $t;
@@ -423,6 +426,40 @@ class JSMinPlus
 				}
 			break;
 
+			case KEYWORD_IN:
+			case KEYWORD_INSTANCEOF:
+				$left = $this->parseTree($n->treeNodes[0]);
+				$right = $this->parseTree($n->treeNodes[1]);
+
+				$s = $left;
+
+				if ($this->isWordChar(substr($left, -1)))
+					$s .= ' ';
+
+				$s .= $n->type;
+
+				if ($this->isWordChar($right[0]) || $right[0] == '\\')
+					$s .= ' ';
+
+				$s .= $right;
+			break;
+
+			case KEYWORD_DELETE:
+			case KEYWORD_TYPEOF:
+				$right = $this->parseTree($n->treeNodes[0]);
+
+				$s = $n->type;
+
+				if ($this->isWordChar($right[0]) || $right[0] == '\\')
+					$s .= ' ';
+
+				$s .= $right;
+			break;
+
+			case KEYWORD_VOID:
+				$s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
+			break;
+
 			case KEYWORD_DEBUGGER:
 				throw new Exception('NOT IMPLEMENTED: DEBUGGER');
 			break;
@@ -497,26 +534,6 @@ class JSMinPlus
 				}
 			break;
 
-			case KEYWORD_IN:
-				$s = $this->parseTree($n->treeNodes[0]) . ' in ' . $this->parseTree($n->treeNodes[1]);
-			break;
-
-			case KEYWORD_INSTANCEOF:
-				$s = $this->parseTree($n->treeNodes[0]) . ' instanceof ' . $this->parseTree($n->treeNodes[1]);
-			break;
-
-			case KEYWORD_DELETE:
-				$s = 'delete ' . $this->parseTree($n->treeNodes[0]);
-			break;
-
-			case KEYWORD_VOID:
-				$s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
-			break;
-
-			case KEYWORD_TYPEOF:
-				$s = 'typeof ' . $this->parseTree($n->treeNodes[0]);
-			break;
-
 			case OP_NOT:
 			case OP_BITWISE_NOT:
 			case OP_UNARY_PLUS:
@@ -606,13 +623,33 @@ class JSMinPlus
 				$s .= '}';
 			break;
 
+			case TOKEN_NUMBER:
+				$s = $n->value;
+				if (preg_match('/^([1-9]+)(0{3,})$/', $s, $m))
+					$s = $m[1] . 'e' . strlen($m[2]);
+			break;
+
 			case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE:
-			case TOKEN_IDENTIFIER: case TOKEN_NUMBER: case TOKEN_STRING: case TOKEN_REGEXP:
+			case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_REGEXP:
 				$s = $n->value;
 			break;
 
 			case JS_GROUP:
-				$s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
+				if (in_array(
+					$n->treeNodes[0]->type,
+					array(
+						JS_ARRAY_INIT, JS_OBJECT_INIT, JS_GROUP,
+						TOKEN_NUMBER, TOKEN_STRING, TOKEN_REGEXP, TOKEN_IDENTIFIER,
+						KEYWORD_NULL, KEYWORD_THIS, KEYWORD_TRUE, KEYWORD_FALSE
+					)
+				))
+				{
+					$s = $this->parseTree($n->treeNodes[0]);
+				}
+				else
+				{
+					$s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
+				}
 			break;
 
 			default:
@@ -626,11 +663,17 @@ class JSMinPlus
 	{
 		return preg_match('/^[a-zA-Z_][a-zA-Z0-9_]*$/', $string) && !in_array($string, $this->reserved);
 	}
+
+	private function isWordChar($char)
+	{
+		return $char == '_' || $char == '$' || ctype_alnum($char);
+	}
 }
 
 class JSParser
 {
 	private $t;
+	private $minifier;
 
 	private $opPrecedence = array(
 		';' => 0,
@@ -680,8 +723,9 @@ class JSParser
 		TOKEN_CONDCOMMENT_START => 1, TOKEN_CONDCOMMENT_END => 1
 	);
 
-	public function __construct()
+	public function __construct($minifier=null)
 	{
+		$this->minifier = $minifier;
 		$this->t = new JSTokenizer();
 	}
 
@@ -705,6 +749,19 @@ class JSParser
 		$n->funDecls = $x->funDecls;
 		$n->varDecls = $x->varDecls;
 
+		// minify by scope
+		if ($this->minifier)
+		{
+			$n->value = $this->minifier->parseTree($n);
+
+			// clear tree from node to save memory
+			$n->treeNodes = null;
+			$n->funDecls = null;
+			$n->varDecls = null;
+
+			$n->type = JS_MINIFIED;
+		}
+
 		return $n;
 	}
 
@@ -963,7 +1020,7 @@ class JSParser
 
 			case KEYWORD_THROW:
 				$n = new JSNode($this->t);
-				$n->exception = $this->Expression($x);
+				$n->value = $this->Expression($x);
 			break;
 
 			case KEYWORD_RETURN:
@@ -1678,44 +1735,11 @@ class JSTokenizer
 	);
 
 	private $opTypeNames = array(
-		';'	=> 'SEMICOLON',
-		','	=> 'COMMA',
-		'?'	=> 'HOOK',
-		':'	=> 'COLON',
-		'||'	=> 'OR',
-		'&&'	=> 'AND',
-		'|'	=> 'BITWISE_OR',
-		'^'	=> 'BITWISE_XOR',
-		'&'	=> 'BITWISE_AND',
-		'==='	=> 'STRICT_EQ',
-		'=='	=> 'EQ',
-		'='	=> 'ASSIGN',
-		'!=='	=> 'STRICT_NE',
-		'!='	=> 'NE',
-		'<<'	=> 'LSH',
-		'<='	=> 'LE',
-		'<'	=> 'LT',
-		'>>>'	=> 'URSH',
-		'>>'	=> 'RSH',
-		'>='	=> 'GE',
-		'>'	=> 'GT',
-		'++'	=> 'INCREMENT',
-		'--'	=> 'DECREMENT',
-		'+'	=> 'PLUS',
-		'-'	=> 'MINUS',
-		'*'	=> 'MUL',
-		'/'	=> 'DIV',
-		'%'	=> 'MOD',
-		'!'	=> 'NOT',
-		'~'	=> 'BITWISE_NOT',
-		'.'	=> 'DOT',
-		'['	=> 'LEFT_BRACKET',
-		']'	=> 'RIGHT_BRACKET',
-		'{'	=> 'LEFT_CURLY',
-		'}'	=> 'RIGHT_CURLY',
-		'('	=> 'LEFT_PAREN',
-		')'	=> 'RIGHT_PAREN',
-		'@*/'	=> 'CONDCOMMENT_END'
+		';', ',', '?', ':', '||', '&&', '|', '^',
+		'&', '===', '==', '=', '!==', '!=', '<<', '<=',
+		'<', '>>>', '>>', '>=', '>', '++', '--', '+',
+		'-', '*', '/', '%', '!', '~', '.', '[',
+		']', '{', '}', '(', ')', '@*/'
 	);
 
 	private $assignOps = array('|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%');
@@ -1723,7 +1747,7 @@ class JSTokenizer
 
 	public function __construct()
 	{
-		$this->opRegExp = '#^(' . implode('|', array_map('preg_quote', array_keys($this->opTypeNames))) . ')#';
+		$this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#';
 	}
 
 	public function init($source, $filename = '', $lineno = 1)
@@ -1874,22 +1898,38 @@ class JSTokenizer
 		{
 			switch ($input[0])
 			{
-				case '0': case '1': case '2': case '3': case '4':
-				case '5': case '6': case '7': case '8': case '9':
-					if (preg_match('/^\d+\.\d*(?:[eE][-+]?\d+)?|^\d+(?:\.\d*)?[eE][-+]?\d+/', $input, $match))
+				case '0':
+					// hexadecimal
+					if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match))
 					{
 						$tt = TOKEN_NUMBER;
+						break;
 					}
-					else if (preg_match('/^0[xX][\da-fA-F]+|^0[0-7]*|^\d+/', $input, $match))
+				// FALL THROUGH
+
+				case '1': case '2': case '3': case '4': case '5':
+				case '6': case '7': case '8': case '9':
+					// should always match
+					preg_match('/^\d+(?:\.\d*)?(?:[eE][-+]?\d+)?/', $input, $match);
+					$tt = TOKEN_NUMBER;
+				break;
+
+				case "'":
+					if (preg_match('/^\'(?:[^\\\\\'\r\n]++|\\\\(?:.|\r?\n))*\'/', $input, $match))
 					{
-						// this should always match because of \d+
-						$tt = TOKEN_NUMBER;
+						$tt = TOKEN_STRING;
+					}
+					else
+					{
+						if ($chunksize)
+							return $this->get(null); // retry with a full chunk fetch
+
+						throw $this->newSyntaxError('Unterminated string literal');
 					}
 				break;
 
 				case '"':
-				case "'":
-					if (preg_match('/^"(?:\\\\(?:.|\r?\n)|[^\\\\"\r\n]+)*"|^\'(?:\\\\(?:.|\r?\n)|[^\\\\\'\r\n]+)*\'/', $input, $match))
+					if (preg_match('/^"(?:[^\\\\"\r\n]++|\\\\(?:.|\r?\n))*"/', $input, $match))
 					{
 						$tt = TOKEN_STRING;
 					}
@@ -2091,4 +2131,3 @@ class JSToken
 	public $lineno;
 	public $assignOp;
 }
-
author	Pierre Schmitz <pierre@archlinux.de>	2012-05-03 13:01:35 +0200
committer	Pierre Schmitz <pierre@archlinux.de>	2012-05-03 13:01:35 +0200
commit	d9022f63880ce039446fba8364f68e656b7bf4cb (patch)
tree	16b40fbf17bf7c9ee6f4ead25b16dd192378050a /includes/libs
parent	27cf83d177256813e2e802241085fce5dd0f3fb9 (diff)