summaryrefslogtreecommitdiff
path: root/includes/libs
diff options
context:
space:
mode:
authorPierre Schmitz <pierre@archlinux.de>2011-06-22 11:28:20 +0200
committerPierre Schmitz <pierre@archlinux.de>2011-06-22 11:28:20 +0200
commit9db190c7e736ec8d063187d4241b59feaf7dc2d1 (patch)
tree46d1a0dee7febef5c2d57a9f7b972be16a163b3d /includes/libs
parent78677c7bbdcc9739f6c10c75935898a20e1acd9e (diff)
update to MediaWiki 1.17.0
Diffstat (limited to 'includes/libs')
-rw-r--r--includes/libs/CSSJanus.php323
-rw-r--r--includes/libs/CSSMin.php214
-rw-r--r--includes/libs/IEContentAnalyzer.php824
-rw-r--r--includes/libs/IEUrlExtension.php247
-rw-r--r--includes/libs/JavaScriptMinifier.php579
-rw-r--r--includes/libs/README4
-rw-r--r--includes/libs/spyc.php248
7 files changed, 2439 insertions, 0 deletions
diff --git a/includes/libs/CSSJanus.php b/includes/libs/CSSJanus.php
new file mode 100644
index 00000000..aa04bc49
--- /dev/null
+++ b/includes/libs/CSSJanus.php
@@ -0,0 +1,323 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+/**
+ * This is a PHP port of CSSJanus, a utility that transforms CSS style sheets
+ * written for LTR to RTL.
+ *
+ * The original Python version of CSSJanus is Copyright 2008 by Google Inc. and
+ * is distributed under the Apache license.
+ *
+ * Original code: http://code.google.com/p/cssjanus/source/browse/trunk/cssjanus.py
+ * License of original code: http://code.google.com/p/cssjanus/source/browse/trunk/LICENSE
+ * @author Roan Kattouw
+ *
+ */
+class CSSJanus {
+ // Patterns defined as null are built dynamically by buildPatterns()
+ private static $patterns = array(
+ 'tmpToken' => '`TMP`',
+ 'nonAscii' => '[\200-\377]',
+ 'unicode' => '(?:(?:\\[0-9a-f]{1,6})(?:\r\n|\s)?)',
+ 'num' => '(?:[0-9]*\.[0-9]+|[0-9]+)',
+ 'unit' => '(?:em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)',
+ 'body_selector' => 'body\s*{\s*',
+ 'direction' => 'direction\s*:\s*',
+ 'escape' => null,
+ 'nmstart' => null,
+ 'nmchar' => null,
+ 'ident' => null,
+ 'quantity' => null,
+ 'possibly_negative_quantity' => null,
+ 'color' => null,
+ 'url_special_chars' => '[!#$%&*-~]',
+ 'valid_after_uri_chars' => '[\'\"]?\s*',
+ 'url_chars' => null,
+ 'lookahead_not_open_brace' => null,
+ 'lookahead_not_closing_paren' => null,
+ 'lookahead_for_closing_paren' => null,
+ 'lookbehind_not_letter' => '(?<![a-zA-Z])',
+ 'chars_within_selector' => '[^\}]*?',
+ 'noflip_annotation' => '\/\*\s*@noflip\s*\*\/',
+ 'noflip_single' => null,
+ 'noflip_class' => null,
+ 'comment' => '/\/\*[^*]*\*+([^\/*][^*]*\*+)*\//',
+ 'direction_ltr' => null,
+ 'direction_rtl' => null,
+ 'left' => null,
+ 'right' => null,
+ 'left_in_url' => null,
+ 'right_in_url' => null,
+ 'ltr_in_url' => null,
+ 'rtl_in_url' => null,
+ 'cursor_east' => null,
+ 'cursor_west' => null,
+ 'four_notation_quantity' => null,
+ 'four_notation_color' => null,
+ 'bg_horizontal_percentage' => null,
+ 'bg_horizontal_percentage_x' => null,
+ );
+
+ /**
+ * Build patterns we can't define above because they depend on other patterns.
+ */
+ private static function buildPatterns() {
+ if ( !is_null( self::$patterns['escape'] ) ) {
+ // Patterns have already been built
+ return;
+ }
+
+ $patterns =& self::$patterns;
+ $patterns['escape'] = "(?:{$patterns['unicode']}|\\[^\r\n\f0-9a-f])";
+ $patterns['nmstart'] = "(?:[_a-z]|{$patterns['nonAscii']}|{$patterns['escape']})";
+ $patterns['nmchar'] = "(?:[_a-z0-9-]|{$patterns['nonAscii']}|{$patterns['escape']})";
+ $patterns['ident'] = "-?{$patterns['nmstart']}{$patterns['nmchar']}*";
+ $patterns['quantity'] = "{$patterns['num']}(?:\s*{$patterns['unit']}|{$patterns['ident']})?";
+ $patterns['possibly_negative_quantity'] = "((?:-?{$patterns['quantity']})|(?:inherit|auto))";
+ $patterns['color'] = "(#?{$patterns['nmchar']}+)";
+ $patterns['url_chars'] = "(?:{$patterns['url_special_chars']}|{$patterns['nonAscii']}|{$patterns['escape']})*";
+ $patterns['lookahead_not_open_brace'] = "(?!({$patterns['nmchar']}|\r?\n|\s|#|\:|\.|\,|\+|>)*?{)";
+ $patterns['lookahead_not_closing_paren'] = "(?!{$patterns['url_chars']}?{$patterns['valid_after_uri_chars']}\))";
+ $patterns['lookahead_for_closing_paren'] = "(?={$patterns['url_chars']}?{$patterns['valid_after_uri_chars']}\))";
+ $patterns['noflip_single'] = "/({$patterns['noflip_annotation']}{$patterns['lookahead_not_open_brace']}[^;}]+;?)/i";
+ $patterns['noflip_class'] = "/({$patterns['noflip_annotation']}{$patterns['chars_within_selector']}})/i";
+ $patterns['direction_ltr'] = "/({$patterns['direction']})ltr/i";
+ $patterns['direction_rtl'] = "/({$patterns['direction']})rtl/i";
+ $patterns['left'] = "/{$patterns['lookbehind_not_letter']}(left){$patterns['lookahead_not_closing_paren']}{$patterns['lookahead_not_open_brace']}/i";
+ $patterns['right'] = "/{$patterns['lookbehind_not_letter']}(right){$patterns['lookahead_not_closing_paren']}{$patterns['lookahead_not_open_brace']}/i";
+ $patterns['left_in_url'] = "/{$patterns['lookbehind_not_letter']}(left){$patterns['lookahead_for_closing_paren']}/i";
+ $patterns['right_in_url'] = "/{$patterns['lookbehind_not_letter']}(right){$patterns['lookahead_for_closing_paren']}/i";
+ $patterns['ltr_in_url'] = "/{$patterns['lookbehind_not_letter']}(ltr){$patterns['lookahead_for_closing_paren']}/i";
+ $patterns['rtl_in_url'] = "/{$patterns['lookbehind_not_letter']}(rtl){$patterns['lookahead_for_closing_paren']}/i";
+ $patterns['cursor_east'] = "/{$patterns['lookbehind_not_letter']}([ns]?)e-resize/";
+ $patterns['cursor_west'] = "/{$patterns['lookbehind_not_letter']}([ns]?)w-resize/";
+ $patterns['four_notation_quantity'] = "/{$patterns['possibly_negative_quantity']}(\s+){$patterns['possibly_negative_quantity']}(\s+){$patterns['possibly_negative_quantity']}(\s+){$patterns['possibly_negative_quantity']}/i";
+ $patterns['four_notation_color'] = "/(-color\s*:\s*){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}/i";
+ // The two regexes below are parenthesized differently then in the original implementation to make the
+ // callback's job more straightforward
+ $patterns['bg_horizontal_percentage'] = "/(background(?:-position)?\s*:\s*[^%]*?)({$patterns['num']})(%\s*(?:{$patterns['quantity']}|{$patterns['ident']}))/";
+ $patterns['bg_horizontal_percentage_x'] = "/(background-position-x\s*:\s*)({$patterns['num']})(%)/";
+ }
+
+ /**
+ * Transform an LTR stylesheet to RTL
+ * @param $css String: stylesheet to transform
+ * @param $swapLtrRtlInURL Boolean: If true, swap 'ltr' and 'rtl' in URLs
+ * @param $swapLeftRightInURL Boolean: If true, swap 'left' and 'right' in URLs
+ * @return Transformed stylesheet
+ */
+ public static function transform( $css, $swapLtrRtlInURL = false, $swapLeftRightInURL = false ) {
+ // We wrap tokens in ` , not ~ like the original implementation does.
+ // This was done because ` is not a legal character in CSS and can only
+ // occur in URLs, where we escape it to %60 before inserting our tokens.
+ $css = str_replace( '`', '%60', $css );
+
+ self::buildPatterns();
+
+ // Tokenize single line rules with /* @noflip */
+ $noFlipSingle = new CSSJanus_Tokenizer( self::$patterns['noflip_single'], '`NOFLIP_SINGLE`' );
+ $css = $noFlipSingle->tokenize( $css );
+
+ // Tokenize class rules with /* @noflip */
+ $noFlipClass = new CSSJanus_Tokenizer( self::$patterns['noflip_class'], '`NOFLIP_CLASS`' );
+ $css = $noFlipClass->tokenize( $css );
+
+ // Tokenize comments
+ $comments = new CSSJanus_Tokenizer( self::$patterns['comment'], '`C`' );
+ $css = $comments->tokenize( $css );
+
+ // LTR->RTL fixes start here
+ $css = self::fixDirection( $css );
+ if ( $swapLtrRtlInURL ) {
+ $css = self::fixLtrRtlInURL( $css );
+ }
+
+ if ( $swapLeftRightInURL ) {
+ $css = self::fixLeftRightInURL( $css );
+ }
+ $css = self::fixLeftAndRight( $css );
+ $css = self::fixCursorProperties( $css );
+ $css = self::fixFourPartNotation( $css );
+ $css = self::fixBackgroundPosition( $css );
+
+ // Detokenize stuff we tokenized before
+ $css = $comments->detokenize( $css );
+ $css = $noFlipClass->detokenize( $css );
+ $css = $noFlipSingle->detokenize( $css );
+
+ return $css;
+ }
+
+ /**
+ * Replace direction: ltr; with direction: rtl; and vice versa.
+ *
+ * The original implementation only does this inside body selectors
+ * and misses "body\n{\ndirection:ltr;\n}". This function does not have
+ * these problems.
+ *
+ * See http://code.google.com/p/cssjanus/issues/detail?id=15 and
+ * TODO: URL
+ */
+ private static function fixDirection( $css ) {
+ $css = preg_replace( self::$patterns['direction_ltr'],
+ '$1' . self::$patterns['tmpToken'], $css );
+ $css = preg_replace( self::$patterns['direction_rtl'], '$1ltr', $css );
+ $css = str_replace( self::$patterns['tmpToken'], 'rtl', $css );
+
+ return $css;
+ }
+
+ /**
+ * Replace 'ltr' with 'rtl' and vice versa in background URLs
+ */
+ private static function fixLtrRtlInURL( $css ) {
+ $css = preg_replace( self::$patterns['ltr_in_url'], self::$patterns['tmpToken'], $css );
+ $css = preg_replace( self::$patterns['rtl_in_url'], 'ltr', $css );
+ $css = str_replace( self::$patterns['tmpToken'], 'rtl', $css );
+
+ return $css;
+ }
+
+ /**
+ * Replace 'left' with 'right' and vice versa in background URLs
+ */
+ private static function fixLeftRightInURL( $css ) {
+ $css = preg_replace( self::$patterns['left_in_url'], self::$patterns['tmpToken'], $css );
+ $css = preg_replace( self::$patterns['right_in_url'], 'left', $css );
+ $css = str_replace( self::$patterns['tmpToken'], 'right', $css );
+
+ return $css;
+ }
+
+ /**
+ * Flip rules like left: , padding-right: , etc.
+ */
+ private static function fixLeftAndRight( $css ) {
+ $css = preg_replace( self::$patterns['left'], self::$patterns['tmpToken'], $css );
+ $css = preg_replace( self::$patterns['right'], 'left', $css );
+ $css = str_replace( self::$patterns['tmpToken'], 'right', $css );
+
+ return $css;
+ }
+
+ /**
+ * Flip East and West in rules like cursor: nw-resize;
+ */
+ private static function fixCursorProperties( $css ) {
+ $css = preg_replace( self::$patterns['cursor_east'],
+ '$1' . self::$patterns['tmpToken'], $css );
+ $css = preg_replace( self::$patterns['cursor_west'], '$1e-resize', $css );
+ $css = str_replace( self::$patterns['tmpToken'], 'w-resize', $css );
+
+ return $css;
+ }
+
+ /**
+ * Swap the second and fourth parts in four-part notation rules like
+ * padding: 1px 2px 3px 4px;
+ *
+ * Unlike the original implementation, this function doesn't suffer from
+ * the bug where whitespace is not preserved when flipping four-part rules
+ * and four-part color rules with multiple whitespace characters between
+ * colors are not recognized.
+ * See http://code.google.com/p/cssjanus/issues/detail?id=16
+ */
+ private static function fixFourPartNotation( $css ) {
+ $css = preg_replace( self::$patterns['four_notation_quantity'], '$1$2$7$4$5$6$3', $css );
+ $css = preg_replace( self::$patterns['four_notation_color'], '$1$2$3$8$5$6$7$4', $css );
+
+ return $css;
+ }
+
+ /**
+ * Flip horizontal background percentages.
+ */
+ private static function fixBackgroundPosition( $css ) {
+ $css = preg_replace_callback( self::$patterns['bg_horizontal_percentage'],
+ array( 'self', 'calculateNewBackgroundPosition' ), $css );
+ $css = preg_replace_callback( self::$patterns['bg_horizontal_percentage_x'],
+ array( 'self', 'calculateNewBackgroundPosition' ), $css );
+
+ return $css;
+ }
+
+ /**
+ * Callback for calculateNewBackgroundPosition()
+ */
+ private static function calculateNewBackgroundPosition( $matches ) {
+ return $matches[1] . ( 100 - $matches[2] ) . $matches[3];
+ }
+}
+
+/**
+ * Utility class used by CSSJanus that tokenizes and untokenizes things we want
+ * to protect from being janused.
+ * @author Roan Kattouw
+ */
+class CSSJanus_Tokenizer {
+ private $regex, $token;
+ private $originals;
+
+ /**
+ * Constructor
+ * @param $regex string Regular expression whose matches to replace by a token.
+ * @param $token string Token
+ */
+ public function __construct( $regex, $token ) {
+ $this->regex = $regex;
+ $this->token = $token;
+ $this->originals = array();
+ }
+
+ /**
+ * Replace all occurrences of $regex in $str with a token and remember
+ * the original strings.
+ * @param $str String to tokenize
+ * @return string Tokenized string
+ */
+ public function tokenize( $str ) {
+ return preg_replace_callback( $this->regex, array( $this, 'tokenizeCallback' ), $str );
+ }
+
+ private function tokenizeCallback( $matches ) {
+ $this->originals[] = $matches[0];
+ return $this->token;
+ }
+
+ /**
+ * Replace tokens with their originals. If multiple strings were tokenized, it's important they be
+ * detokenized in exactly the SAME ORDER.
+ * @param $str String: previously run through tokenize()
+ * @return string Original string
+ */
+ public function detokenize( $str ) {
+ // PHP has no function to replace only the first occurrence or to
+ // replace occurrences of the same string with different values,
+ // so we use preg_replace_callback() even though we don't really need a regex
+ return preg_replace_callback( '/' . preg_quote( $this->token, '/' ) . '/',
+ array( $this, 'detokenizeCallback' ), $str );
+ }
+
+ private function detokenizeCallback( $matches ) {
+ $retval = current( $this->originals );
+ next( $this->originals );
+
+ return $retval;
+ }
+}
diff --git a/includes/libs/CSSMin.php b/includes/libs/CSSMin.php
new file mode 100644
index 00000000..c0e78112
--- /dev/null
+++ b/includes/libs/CSSMin.php
@@ -0,0 +1,214 @@
+<?php
+/*
+ * Copyright 2010 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ * OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+/**
+ * Transforms CSS data
+ *
+ * This class provides minification, URL remapping, URL extracting, and data-URL embedding.
+ *
+ * @file
+ * @version 0.1.1 -- 2010-09-11
+ * @author Trevor Parscal <tparscal@wikimedia.org>
+ * @copyright Copyright 2010 Wikimedia Foundation
+ * @license http://www.apache.org/licenses/LICENSE-2.0
+ */
+class CSSMin {
+
+ /* Constants */
+
+ /**
+ * Maximum file size to still qualify for in-line embedding as a data-URI
+ *
+ * 24,576 is used because Internet Explorer has a 32,768 byte limit for data URIs,
+ * which when base64 encoded will result in a 1/3 increase in size.
+ */
+ const EMBED_SIZE_LIMIT = 24576;
+ const URL_REGEX = 'url\(\s*[\'"]?(?P<file>[^\?\)\'"]*)(?P<query>\??[^\)\'"]*)[\'"]?\s*\)';
+
+ /* Protected Static Members */
+
+ /** @var array List of common image files extensions and mime-types */
+ protected static $mimeTypes = array(
+ 'gif' => 'image/gif',
+ 'jpe' => 'image/jpeg',
+ 'jpeg' => 'image/jpeg',
+ 'jpg' => 'image/jpeg',
+ 'png' => 'image/png',
+ 'tif' => 'image/tiff',
+ 'tiff' => 'image/tiff',
+ 'xbm' => 'image/x-xbitmap',
+ );
+
+ /* Static Methods */
+
+ /**
+ * Gets a list of local file paths which are referenced in a CSS style sheet
+ *
+ * @param $source string CSS data to remap
+ * @param $path string File path where the source was read from (optional)
+ * @return array List of local file references
+ */
+ public static function getLocalFileReferences( $source, $path = null ) {
+ $files = array();
+ $rFlags = PREG_OFFSET_CAPTURE | PREG_SET_ORDER;
+ if ( preg_match_all( '/' . self::URL_REGEX . '/', $source, $matches, $rFlags ) ) {
+ foreach ( $matches as $match ) {
+ $file = ( isset( $path )
+ ? rtrim( $path, '/' ) . '/'
+ : '' ) . "{$match['file'][0]}";
+
+ // Only proceed if we can access the file
+ if ( !is_null( $path ) && file_exists( $file ) ) {
+ $files[] = $file;
+ }
+ }
+ }
+ return $files;
+ }
+
+ protected static function getMimeType( $file ) {
+ $realpath = realpath( $file );
+ // Try a couple of different ways to get the mime-type of a file, in order of
+ // preference
+ if (
+ $realpath
+ && function_exists( 'finfo_file' )
+ && function_exists( 'finfo_open' )
+ && defined( 'FILEINFO_MIME_TYPE' )
+ ) {
+ // As of PHP 5.3, this is how you get the mime-type of a file; it uses the Fileinfo
+ // PECL extension
+ return finfo_file( finfo_open( FILEINFO_MIME_TYPE ), $realpath );
+ } else if ( function_exists( 'mime_content_type' ) ) {
+ // Before this was deprecated in PHP 5.3, this was how you got the mime-type of a file
+ return mime_content_type( $file );
+ } else {
+ // Worst-case scenario has happened, use the file extension to infer the mime-type
+ $ext = strtolower( pathinfo( $file, PATHINFO_EXTENSION ) );
+ if ( isset( self::$mimeTypes[$ext] ) ) {
+ return self::$mimeTypes[$ext];
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Remaps CSS URL paths and automatically embeds data URIs for URL rules
+ * preceded by an /* @embed * / comment
+ *
+ * @param $source string CSS data to remap
+ * @param $local string File path where the source was read from
+ * @param $remote string URL path to the file
+ * @param $embed ???
+ * @return string Remapped CSS data
+ */
+ public static function remap( $source, $local, $remote, $embed = true ) {
+ $pattern = '/((?P<embed>\s*\/\*\s*\@embed\s*\*\/)(?P<pre>[^\;\}]*))?' .
+ self::URL_REGEX . '(?P<post>[^;]*)[\;]?/';
+ $offset = 0;
+ while ( preg_match( $pattern, $source, $match, PREG_OFFSET_CAPTURE, $offset ) ) {
+ // Skip fully-qualified URLs and data URIs
+ $urlScheme = parse_url( $match['file'][0], PHP_URL_SCHEME );
+ if ( $urlScheme ) {
+ // Move the offset to the end of the match, leaving it alone
+ $offset = $match[0][1] + strlen( $match[0][0] );
+ continue;
+ }
+ // URLs with absolute paths like /w/index.php need to be expanded
+ // to absolute URLs but otherwise left alone
+ if ( $match['file'][0] !== '' && $match['file'][0][0] === '/' ) {
+ // Replace the file path with an expanded URL
+ $source = substr_replace( $source, wfExpandUrl( $match['file'][0] ),
+ $match['file'][1], strlen( $match['file'][0] )
+ );
+ // Move the offset to the end of the match, leaving it alone
+ $offset = $match[0][1] + strlen( $match[0][0] );
+ continue;
+ }
+ // Shortcuts
+ $embed = $match['embed'][0];
+ $pre = $match['pre'][0];
+ $post = $match['post'][0];
+ $query = $match['query'][0];
+ $url = "{$remote}/{$match['file'][0]}";
+ $file = "{$local}/{$match['file'][0]}";
+ // bug 27052 - Guard against double slashes, because foo//../bar
+ // apparently resolves to foo/bar on (some?) clients
+ $url = preg_replace( '#([^:])//+#', '\1/', $url );
+ $replacement = false;
+ if ( $local !== false && file_exists( $file ) ) {
+ // Add version parameter as a time-stamp in ISO 8601 format,
+ // using Z for the timezone, meaning GMT
+ $url .= '?' . gmdate( 'Y-m-d\TH:i:s\Z', round( filemtime( $file ), -2 ) );
+ // Embedding requires a bit of extra processing, so let's skip that if we can
+ if ( $embed ) {
+ $type = self::getMimeType( $file );
+ // Detect when URLs were preceeded with embed tags, and also verify file size is
+ // below the limit
+ if (
+ $type
+ && $match['embed'][1] > 0
+ && filesize( $file ) < self::EMBED_SIZE_LIMIT
+ ) {
+ // Strip off any trailing = symbols (makes browsers freak out)
+ $data = base64_encode( file_get_contents( $file ) );
+ // Build 2 CSS properties; one which uses a base64 encoded data URI in place
+ // of the @embed comment to try and retain line-number integrity, and the
+ // other with a remapped an versioned URL and an Internet Explorer hack
+ // making it ignored in all browsers that support data URIs
+ $replacement = "{$pre}url(data:{$type};base64,{$data}){$post};";
+ $replacement .= "{$pre}url({$url}){$post}!ie;";
+ }
+ }
+ if ( $replacement === false ) {
+ // Assume that all paths are relative to $remote, and make them absolute
+ $replacement = "{$embed}{$pre}url({$url}){$post};";
+ }
+ } else if ( $local === false ) {
+ // Assume that all paths are relative to $remote, and make them absolute
+ $replacement = "{$embed}{$pre}url({$url}{$query}){$post};";
+ }
+ if ( $replacement !== false ) {
+ // Perform replacement on the source
+ $source = substr_replace(
+ $source, $replacement, $match[0][1], strlen( $match[0][0] )
+ );
+ // Move the offset to the end of the replacement in the source
+ $offset = $match[0][1] + strlen( $replacement );
+ continue;
+ }
+ // Move the offset to the end of the match, leaving it alone
+ $offset = $match[0][1] + strlen( $match[0][0] );
+ }
+ return $source;
+ }
+
+ /**
+ * Removes whitespace from CSS data
+ *
+ * @param $css string CSS data to minify
+ * @return string Minified CSS data
+ */
+ public static function minify( $css ) {
+ return trim(
+ str_replace(
+ array( '; ', ': ', ' {', '{ ', ', ', '} ', ';}' ),
+ array( ';', ':', '{', '{', ',', '}', '}' ),
+ preg_replace( array( '/\s+/', '/\/\*.*?\*\//s' ), array( ' ', '' ), $css )
+ )
+ );
+ }
+}
diff --git a/includes/libs/IEContentAnalyzer.php b/includes/libs/IEContentAnalyzer.php
new file mode 100644
index 00000000..a2ef1a09
--- /dev/null
+++ b/includes/libs/IEContentAnalyzer.php
@@ -0,0 +1,824 @@
+<?php
+
+/**
+ * This class simulates Microsoft Internet Explorer's terribly broken and
+ * insecure MIME type detection algorithm. It can be used to check web uploads
+ * with an apparently safe type, to see if IE will reinterpret them to produce
+ * something dangerous.
+ *
+ * It is full of bugs and strange design choices should not under any
+ * circumstances be used to determine a MIME type to present to a user or
+ * client. (Apple Safari developers, this means you too.)
+ *
+ * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
+ * attempted to ensure that this code works in exactly the same way as Internet
+ * Explorer, it does not share any source code, or creative choices such as
+ * variable names, thus I (Tim Starling) claim copyright on it.
+ *
+ * It may be redistributed without restriction. To aid reuse, this class does
+ * not depend on any MediaWiki module.
+ */
+class IEContentAnalyzer {
+ /**
+ * Relevant data taken from the type table in IE 5
+ */
+ protected $baseTypeTable = array(
+ 'ambiguous' /*1*/ => array(
+ 'text/plain',
+ 'application/octet-stream',
+ 'application/x-netcdf', // [sic]
+ ),
+ 'text' /*3*/ => array(
+ 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
+ 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
+ ),
+ 'binary' /*4*/ => array(
+ 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
+ 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
+ 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
+ 'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
+ 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
+ 'application/x-msdownload'
+ ),
+ 'html' /*5*/ => array( 'text/html' ),
+ );
+
+ /**
+ * Changes to the type table in later versions of IE
+ */
+ protected $addedTypes = array(
+ 'ie07' => array(
+ 'text' => array( 'text/xml', 'application/xml' )
+ ),
+ );
+
+ /**
+ * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a
+ * typical Windows installation.
+ *
+ * Used for extension to MIME type mapping if detection fails.
+ */
+ protected $registry = array(
+ '.323' => 'text/h323',
+ '.3g2' => 'video/3gpp2',
+ '.3gp' => 'video/3gpp',
+ '.3gp2' => 'video/3gpp2',
+ '.3gpp' => 'video/3gpp',
+ '.aac' => 'audio/aac',
+ '.ac3' => 'audio/ac3',
+ '.accda' => 'application/msaccess',
+ '.accdb' => 'application/msaccess',
+ '.accdc' => 'application/msaccess',
+ '.accde' => 'application/msaccess',
+ '.accdr' => 'application/msaccess',
+ '.accdt' => 'application/msaccess',
+ '.ade' => 'application/msaccess',
+ '.adp' => 'application/msaccess',
+ '.adts' => 'audio/aac',
+ '.ai' => 'application/postscript',
+ '.aif' => 'audio/aiff',
+ '.aifc' => 'audio/aiff',
+ '.aiff' => 'audio/aiff',
+ '.amc' => 'application/x-mpeg',
+ '.application' => 'application/x-ms-application',
+ '.asf' => 'video/x-ms-asf',
+ '.asx' => 'video/x-ms-asf',
+ '.au' => 'audio/basic',
+ '.avi' => 'video/avi',
+ '.bmp' => 'image/bmp',
+ '.caf' => 'audio/x-caf',
+ '.cat' => 'application/vnd.ms-pki.seccat',
+ '.cbo' => 'application/sha',
+ '.cdda' => 'audio/aiff',
+ '.cer' => 'application/x-x509-ca-cert',
+ '.conf' => 'text/plain',
+ '.crl' => 'application/pkix-crl',
+ '.crt' => 'application/x-x509-ca-cert',
+ '.css' => 'text/css',
+ '.csv' => 'application/vnd.ms-excel',
+ '.der' => 'application/x-x509-ca-cert',
+ '.dib' => 'image/bmp',
+ '.dif' => 'video/x-dv',
+ '.dll' => 'application/x-msdownload',
+ '.doc' => 'application/msword',
+ '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',
+ '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+ '.dot' => 'application/msword',
+ '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
+ '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
+ '.dv' => 'video/x-dv',
+ '.dwfx' => 'model/vnd.dwfx+xps',
+ '.edn' => 'application/vnd.adobe.edn',
+ '.eml' => 'message/rfc822',
+ '.eps' => 'application/postscript',
+ '.etd' => 'application/x-ebx',
+ '.exe' => 'application/x-msdownload',
+ '.fdf' => 'application/vnd.fdf',
+ '.fif' => 'application/fractals',
+ '.gif' => 'image/gif',
+ '.gsm' => 'audio/x-gsm',
+ '.hqx' => 'application/mac-binhex40',
+ '.hta' => 'application/hta',
+ '.htc' => 'text/x-component',
+ '.htm' => 'text/html',
+ '.html' => 'text/html',
+ '.htt' => 'text/webviewhtml',
+ '.hxa' => 'application/xml',
+ '.hxc' => 'application/xml',
+ '.hxd' => 'application/octet-stream',
+ '.hxe' => 'application/xml',
+ '.hxf' => 'application/xml',
+ '.hxh' => 'application/octet-stream',
+ '.hxi' => 'application/octet-stream',
+ '.hxk' => 'application/xml',
+ '.hxq' => 'application/octet-stream',
+ '.hxr' => 'application/octet-stream',
+ '.hxs' => 'application/octet-stream',
+ '.hxt' => 'application/xml',
+ '.hxv' => 'application/xml',
+ '.hxw' => 'application/octet-stream',
+ '.ico' => 'image/x-icon',
+ '.iii' => 'application/x-iphone',
+ '.ins' => 'application/x-internet-signup',
+ '.iqy' => 'text/x-ms-iqy',
+ '.isp' => 'application/x-internet-signup',
+ '.jfif' => 'image/jpeg',
+ '.jnlp' => 'application/x-java-jnlp-file',
+ '.jpe' => 'image/jpeg',
+ '.jpeg' => 'image/jpeg',
+ '.jpg' => 'image/jpeg',
+ '.jtx' => 'application/x-jtx+xps',
+ '.latex' => 'application/x-latex',
+ '.log' => 'text/plain',
+ '.m1v' => 'video/mpeg',
+ '.m2v' => 'video/mpeg',
+ '.m3u' => 'audio/x-mpegurl',
+ '.mac' => 'image/x-macpaint',
+ '.man' => 'application/x-troff-man',
+ '.mda' => 'application/msaccess',
+ '.mdb' => 'application/msaccess',
+ '.mde' => 'application/msaccess',
+ '.mfp' => 'application/x-shockwave-flash',
+ '.mht' => 'message/rfc822',
+ '.mhtml' => 'message/rfc822',
+ '.mid' => 'audio/mid',
+ '.midi' => 'audio/mid',
+ '.mod' => 'video/mpeg',
+ '.mov' => 'video/quicktime',
+ '.mp2' => 'video/mpeg',
+ '.mp2v' => 'video/mpeg',
+ '.mp3' => 'audio/mpeg',
+ '.mp4' => 'video/mp4',
+ '.mpa' => 'video/mpeg',
+ '.mpe' => 'video/mpeg',
+ '.mpeg' => 'video/mpeg',
+ '.mpf' => 'application/vnd.ms-mediapackage',
+ '.mpg' => 'video/mpeg',
+ '.mpv2' => 'video/mpeg',
+ '.mqv' => 'video/quicktime',
+ '.NMW' => 'application/nmwb',
+ '.nws' => 'message/rfc822',
+ '.odc' => 'text/x-ms-odc',
+ '.ols' => 'application/vnd.ms-publisher',
+ '.p10' => 'application/pkcs10',
+ '.p12' => 'application/x-pkcs12',
+ '.p7b' => 'application/x-pkcs7-certificates',
+ '.p7c' => 'application/pkcs7-mime',
+ '.p7m' => 'application/pkcs7-mime',
+ '.p7r' => 'application/x-pkcs7-certreqresp',
+ '.p7s' => 'application/pkcs7-signature',
+ '.pct' => 'image/pict',
+ '.pdf' => 'application/pdf',
+ '.pdx' => 'application/vnd.adobe.pdx',
+ '.pfx' => 'application/x-pkcs12',
+ '.pic' => 'image/pict',
+ '.pict' => 'image/pict',
+ '.pinstall' => 'application/x-picasa-detect',
+ '.pko' => 'application/vnd.ms-pki.pko',
+ '.png' => 'image/png',
+ '.pnt' => 'image/x-macpaint',
+ '.pntg' => 'image/x-macpaint',
+ '.pot' => 'application/vnd.ms-powerpoint',
+ '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
+ '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
+ '.ppa' => 'application/vnd.ms-powerpoint',
+ '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
+ '.pps' => 'application/vnd.ms-powerpoint',
+ '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
+ '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
+ '.ppt' => 'application/vnd.ms-powerpoint',
+ '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
+ '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+ '.prf' => 'application/pics-rules',
+ '.ps' => 'application/postscript',
+ '.pub' => 'application/vnd.ms-publisher',
+ '.pwz' => 'application/vnd.ms-powerpoint',
+ '.py' => 'text/plain',
+ '.pyw' => 'text/plain',
+ '.qht' => 'text/x-html-insertion',
+ '.qhtm' => 'text/x-html-insertion',
+ '.qt' => 'video/quicktime',
+ '.qti' => 'image/x-quicktime',
+ '.qtif' => 'image/x-quicktime',
+ '.qtl' => 'application/x-quicktimeplayer',
+ '.rat' => 'application/rat-file',
+ '.rmf' => 'application/vnd.adobe.rmf',
+ '.rmi' => 'audio/mid',
+ '.rqy' => 'text/x-ms-rqy',
+ '.rtf' => 'application/msword',
+ '.sct' => 'text/scriptlet',
+ '.sd2' => 'audio/x-sd2',
+ '.sdp' => 'application/sdp',
+ '.shtml' => 'text/html',
+ '.sit' => 'application/x-stuffit',
+ '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
+ '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
+ '.slk' => 'application/vnd.ms-excel',
+ '.snd' => 'audio/basic',
+ '.so' => 'application/x-apachemodule',
+ '.sol' => 'text/plain',
+ '.sor' => 'text/plain',
+ '.spc' => 'application/x-pkcs7-certificates',
+ '.spl' => 'application/futuresplash',
+ '.sst' => 'application/vnd.ms-pki.certstore',
+ '.stl' => 'application/vnd.ms-pki.stl',
+ '.swf' => 'application/x-shockwave-flash',
+ '.thmx' => 'application/vnd.ms-officetheme',
+ '.tif' => 'image/tiff',
+ '.tiff' => 'image/tiff',
+ '.txt' => 'text/plain',
+ '.uls' => 'text/iuls',
+ '.vcf' => 'text/x-vcard',
+ '.vdx' => 'application/vnd.ms-visio.viewer',
+ '.vsd' => 'application/vnd.ms-visio.viewer',
+ '.vss' => 'application/vnd.ms-visio.viewer',
+ '.vst' => 'application/vnd.ms-visio.viewer',
+ '.vsx' => 'application/vnd.ms-visio.viewer',
+ '.vtx' => 'application/vnd.ms-visio.viewer',
+ '.wav' => 'audio/wav',
+ '.wax' => 'audio/x-ms-wax',
+ '.wbk' => 'application/msword',
+ '.wdp' => 'image/vnd.ms-photo',
+ '.wiz' => 'application/msword',
+ '.wm' => 'video/x-ms-wm',
+ '.wma' => 'audio/x-ms-wma',
+ '.wmd' => 'application/x-ms-wmd',
+ '.wmv' => 'video/x-ms-wmv',
+ '.wmx' => 'video/x-ms-wmx',
+ '.wmz' => 'application/x-ms-wmz',
+ '.wpl' => 'application/vnd.ms-wpl',
+ '.wsc' => 'text/scriptlet',
+ '.wvx' => 'video/x-ms-wvx',
+ '.xaml' => 'application/xaml+xml',
+ '.xbap' => 'application/x-ms-xbap',
+ '.xdp' => 'application/vnd.adobe.xdp+xml',
+ '.xfdf' => 'application/vnd.adobe.xfdf',
+ '.xht' => 'application/xhtml+xml',
+ '.xhtml' => 'application/xhtml+xml',
+ '.xla' => 'application/vnd.ms-excel',
+ '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
+ '.xlk' => 'application/vnd.ms-excel',
+ '.xll' => 'application/vnd.ms-excel',
+ '.xlm' => 'application/vnd.ms-excel',
+ '.xls' => 'application/vnd.ms-excel',
+ '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
+ '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
+ '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+ '.xlt' => 'application/vnd.ms-excel',
+ '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
+ '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
+ '.xlw' => 'application/vnd.ms-excel',
+ '.xml' => 'text/xml',
+ '.xps' => 'application/vnd.ms-xpsdocument',
+ '.xsl' => 'text/xml',
+ );
+
+ /**
+ * IE versions which have been analysed to bring you this class, and for
+ * which some substantive difference exists. These will appear as keys
+ * in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
+ */
+ protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' );
+
+ /**
+ * Type table with versions expanded
+ */
+ protected $typeTable = array();
+
+ /** constructor */
+ function __construct() {
+ // Construct versioned type arrays from the base type array plus additions
+ $types = $this->baseTypeTable;
+ foreach ( $this->versions as $version ) {
+ if ( isset( $this->addedTypes[$version] ) ) {
+ foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {
+ $types[$format] = array_merge( $types[$format], $addedTypes );
+ }
+ }
+ $this->typeTable[$version] = $types;
+ }
+ }
+
+ /**
+ * Get the MIME types from getMimesFromData(), but convert the result from IE's
+ * idiosyncratic private types into something other apps will understand.
+ *
+ * @param $fileName String: the file name (unused at present)
+ * @param $chunk String: the first 256 bytes of the file
+ * @param $proposed String: the MIME type proposed by the server
+ *
+ * @return Array: map of IE version to detected mime type
+ */
+ public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
+ $types = $this->getMimesFromData( $fileName, $chunk, $proposed );
+ $types = array_map( array( $this, 'translateMimeType' ), $types );
+ return $types;
+ }
+
+ /**
+ * Translate a MIME type from IE's idiosyncratic private types into
+ * more commonly understood type strings
+ */
+ public function translateMimeType( $type ) {
+ static $table = array(
+ 'image/pjpeg' => 'image/jpeg',
+ 'image/x-png' => 'image/png',
+ 'image/x-wmf' => 'application/x-msmetafile',
+ 'image/bmp' => 'image/x-bmp',
+ 'application/x-zip-compressed' => 'application/zip',
+ 'application/x-compressed' => 'application/x-compress',
+ 'application/x-gzip-compressed' => 'application/x-gzip',
+ 'audio/mid' => 'audio/midi',
+ );
+ if ( isset( $table[$type] ) ) {
+ $type = $table[$type];
+ }
+ return $type;
+ }
+
+ /**
+ * Get the untranslated MIME types for all known versions
+ *
+ * @param $fileName String: the file name (unused at present)
+ * @param $chunk String: the first 256 bytes of the file
+ * @param $proposed String: the MIME type proposed by the server
+ *
+ * @return Array: map of IE version to detected mime type
+ */
+ public function getMimesFromData( $fileName, $chunk, $proposed ) {
+ $types = array();
+ foreach ( $this->versions as $version ) {
+ $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
+ }
+ return $types;
+ }
+
+ /**
+ * Get the MIME type for a given named version
+ */
+ protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
+ // Strip text after a semicolon
+ $semiPos = strpos( $proposed, ';' );
+ if ( $semiPos !== false ) {
+ $proposed = substr( $proposed, 0, $semiPos );
+ }
+
+ $proposedFormat = $this->getDataFormat( $version, $proposed );
+ if ( $proposedFormat == 'unknown'
+ && $proposed != 'multipart/mixed'
+ && $proposed != 'multipart/x-mixed-replace' )
+ {
+ return $proposed;
+ }
+ if ( strval( $chunk ) === '' ) {
+ return $proposed;
+ }
+
+ // Truncate chunk at 255 bytes
+ $chunk = substr( $chunk, 0, 255 );
+
+ // IE does the Check*Headers() calls last, and instead does the following image
+ // type checks by directly looking for the magic numbers. What I do here should
+ // have the same effect since the magic number checks are identical in both cases.
+ $result = $this->sampleData( $version, $chunk );
+ $sampleFound = $result['found'];
+ $counters = $result['counters'];
+ $binaryType = $this->checkBinaryHeaders( $version, $chunk );
+ $textType = $this->checkTextHeaders( $version, $chunk );
+
+ if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {
+ return 'text/html';
+ }
+ if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {
+ return 'image/gif';
+ }
+ if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
+ && $binaryType == 'image/pjpeg' )
+ {
+ return $proposed;
+ }
+ // PNG check added in IE 7
+ if ( $version >= 'ie07'
+ && ( $proposed == 'image/x-png' || $proposed == 'image/png' )
+ && $binaryType == 'image/x-png' )
+ {
+ return $proposed;
+ }
+
+ // CDF was removed in IE 7 so it won't be in $sampleFound for later versions
+ if ( isset( $sampleFound['cdf'] ) ) {
+ return 'application/x-cdf';
+ }
+
+ // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
+ // previous versions
+ if ( isset( $sampleFound['rss'] ) ) {
+ return 'application/rss+xml';
+ }
+ if ( isset( $sampleFound['rdf-tag'] )
+ && isset( $sampleFound['rdf-url'] )
+ && isset( $sampleFound['rdf-purl'] ) )
+ {
+ return 'application/rss+xml';
+ }
+ if ( isset( $sampleFound['atom'] ) ) {
+ return 'application/atom+xml';
+ }
+
+ if ( isset( $sampleFound['xml'] ) ) {
+ // TODO: I'm not sure under what circumstances this flag is enabled
+ if ( strpos( $version, 'strict' ) !== false ) {
+ if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {
+ return 'text/xml';
+ }
+ } else {
+ return 'text/xml';
+ }
+ }
+ if ( isset( $sampleFound['html'] ) ) {
+ // TODO: I'm not sure under what circumstances this flag is enabled
+ if ( strpos( $version, 'nohtml' ) !== false ) {
+ if ( $proposed == 'text/plain' ) {
+ return 'text/html';
+ }
+ } else {
+ return 'text/html';
+ }
+ }
+ if ( isset( $sampleFound['xbm'] ) ) {
+ return 'image/x-bitmap';
+ }
+ if ( isset( $sampleFound['binhex'] ) ) {
+ return 'application/macbinhex40';
+ }
+ if ( isset( $sampleFound['scriptlet'] ) ) {
+ if ( strpos( $version, 'strict' ) !== false ) {
+ if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {
+ return 'text/scriptlet';
+ }
+ } else {
+ return 'text/scriptlet';
+ }
+ }
+
+ // Freaky heuristics to determine if the data is text or binary
+ // The heuristic is of course broken for non-ASCII text
+ if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
+ < ( $counters['ctrl'] + $counters['high'] ) * 16 )
+ {
+ $kindOfBinary = true;
+ $type = $binaryType ? $binaryType : $textType;
+ if ( $type === false ) {
+ $type = 'application/octet-stream';
+ }
+ } else {
+ $kindOfBinary = false;
+ $type = $textType ? $textType : $binaryType;
+ if ( $type === false ) {
+ $type = 'text/plain';
+ }
+ }
+
+ // Check if the output format is ambiguous
+ // This generally means that detection failed, real types aren't ambiguous
+ $detectedFormat = $this->getDataFormat( $version, $type );
+ if ( $detectedFormat != 'ambiguous' ) {
+ return $type;
+ }
+
+ if ( $proposedFormat != 'ambiguous' ) {
+ // FormatAgreesWithData()
+ if ( $proposedFormat == 'text' && !$kindOfBinary ) {
+ return $proposed;
+ }
+ if ( $proposedFormat == 'binary' && $kindOfBinary ) {
+ return $proposed;
+ }
+ if ( $proposedFormat == 'html' ) {
+ return $proposed;
+ }
+ }
+
+ // Find a MIME type by searching the registry for the file extension.
+ $dotPos = strrpos( $fileName, '.' );
+ if ( $dotPos === false ) {
+ return $type;
+ }
+ $ext = substr( $fileName, $dotPos );
+ if ( isset( $this->registry[$ext] ) ) {
+ return $this->registry[$ext];
+ }
+
+ // TODO: If the extension has an application registered to it, IE will return
+ // application/octet-stream. We'll skip that, so we could erroneously
+ // return text/plain or application/x-netcdf where application/octet-stream
+ // would be correct.
+
+ return $type;
+ }
+
+ /**
+ * Check for text headers at the start of the chunk
+ * Confirmed same in 5 and 7.
+ */
+ private function checkTextHeaders( $version, $chunk ) {
+ $chunk2 = substr( $chunk, 0, 2 );
+ $chunk4 = substr( $chunk, 0, 4 );
+ $chunk5 = substr( $chunk, 0, 5 );
+ if ( $chunk4 == '%PDF' ) {
+ return 'application/pdf';
+ }
+ if ( $chunk2 == '%!' ) {
+ return 'application/postscript';
+ }
+ if ( $chunk5 == '{\\rtf' ) {
+ return 'text/richtext';
+ }
+ if ( $chunk5 == 'begin' ) {
+ return 'application/base64';
+ }
+ return false;
+ }
+
+ /**
+ * Check for binary headers at the start of the chunk
+ * Confirmed same in 5 and 7.
+ */
+ private function checkBinaryHeaders( $version, $chunk ) {
+ $chunk2 = substr( $chunk, 0, 2 );
+ $chunk3 = substr( $chunk, 0, 3 );
+ $chunk4 = substr( $chunk, 0, 4 );
+ $chunk5 = substr( $chunk, 0, 5 );
+ $chunk5uc = strtoupper( $chunk5 );
+ $chunk8 = substr( $chunk, 0, 8 );
+ if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) {
+ return 'image/gif';
+ }
+ if ( $chunk2 == "\xff\xd8" ) {
+ return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
+ }
+
+ if ( $chunk2 == 'BM'
+ && substr( $chunk, 6, 2 ) == "\000\000"
+ && substr( $chunk, 8, 2 ) == "\000\000" )
+ {
+ return 'image/bmp'; // another non-standard MIME
+ }
+ if ( $chunk4 == 'RIFF'
+ && substr( $chunk, 8, 4 ) == 'WAVE' )
+ {
+ return 'audio/wav';
+ }
+ // These were integer literals in IE
+ // Perhaps the author was not sure what the target endianness was
+ if ( $chunk4 == ".sd\000"
+ || $chunk4 == ".snd"
+ || $chunk4 == "\000ds."
+ || $chunk4 == "dns." )
+ {
+ return 'audio/basic';
+ }
+ if ( $chunk3 == "MM\000" ) {
+ return 'image/tiff';
+ }
+ if ( $chunk2 == 'MZ' ) {
+ return 'application/x-msdownload';
+ }
+ if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {
+ return 'image/x-png'; // [sic]
+ }
+ if ( strlen( $chunk ) >= 5 ) {
+ $byte2 = ord( $chunk[2] );
+ $byte4 = ord( $chunk[4] );
+ if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {
+ return 'image/x-jg';
+ }
+ }
+ // More endian confusion?
+ if ( $chunk4 == 'MROF' ) {
+ return 'audio/x-aiff';
+ }
+ $chunk4_8 = substr( $chunk, 8, 4 );
+ if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {
+ return 'audio/x-aiff';
+ }
+ if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {
+ return 'video/avi';
+ }
+ if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {
+ return 'video/mpeg';
+ }
+ if ( $chunk4 == "\001\000\000\000"
+ && substr( $chunk, 40, 4 ) == ' EMF' )
+ {
+ return 'image/x-emf';
+ }
+ if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
+ return 'image/x-wmf';
+ }
+ if ( $chunk4 == "\xca\xfe\xba\xbe" ) {
+ return 'application/java';
+ }
+ if ( $chunk2 == 'PK' ) {
+ return 'application/x-zip-compressed';
+ }
+ if ( $chunk2 == "\x1f\x9d" ) {
+ return 'application/x-compressed';
+ }
+ if ( $chunk2 == "\x1f\x8b" ) {
+ return 'application/x-gzip-compressed';
+ }
+ // Skip redundant check for ZIP
+ if ( $chunk5 == "MThd\000" ) {
+ return 'audio/mid';
+ }
+ if ( $chunk4 == '%PDF' ) {
+ return 'application/pdf';
+ }
+ return false;
+ }
+
+ /**
+ * Do heuristic checks on the bulk of the data sample.
+ * Search for HTML tags.
+ */
+ protected function sampleData( $version, $chunk ) {
+ $found = array();
+ $counters = array(
+ 'ctrl' => 0,
+ 'high' => 0,
+ 'low' => 0,
+ 'lf' => 0,
+ 'cr' => 0,
+ 'ff' => 0
+ );
+ $htmlTags = array(
+ 'html',
+ 'head',
+ 'title',
+ 'body',
+ 'script',
+ 'a href',
+ 'pre',
+ 'img',
+ 'plaintext',
+ 'table'
+ );
+ $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $rdfPurl = 'http://purl.org/rss/1.0/';
+ $xbmMagic1 = '#define';
+ $xbmMagic2 = '_width';
+ $xbmMagic3 = '_bits';
+ $binhexMagic = 'converted with BinHex';
+
+ for ( $offset = 0; $offset < strlen( $chunk ); $offset++ ) {
+ $curChar = $chunk[$offset];
+ if ( $curChar == "\x0a" ) {
+ $counters['lf']++;
+ continue;
+ } elseif ( $curChar == "\x0d" ) {
+ $counters['cr']++;
+ continue;
+ } elseif ( $curChar == "\x0c" ) {
+ $counters['ff']++;
+ continue;
+ } elseif ( $curChar == "\t" ) {
+ $counters['low']++;
+ continue;
+ } elseif ( ord( $curChar ) < 32 ) {
+ $counters['ctrl']++;
+ continue;
+ } elseif ( ord( $curChar ) >= 128 ) {
+ $counters['high']++;
+ continue;
+ }
+
+ $counters['low']++;
+ if ( $curChar == '<' ) {
+ // XML
+ $remainder = substr( $chunk, $offset + 1 );
+ if ( !strncasecmp( $remainder, '?XML', 4 ) ) {
+ $nextChar = substr( $chunk, $offset + 5, 1 );
+ if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {
+ $found['xml'] = true;
+ }
+ }
+ // Scriptlet (JSP)
+ if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {
+ $found['scriptlet'] = true;
+ break;
+ }
+ // HTML
+ foreach ( $htmlTags as $tag ) {
+ if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {
+ $found['html'] = true;
+ }
+ }
+ // Skip broken check for additional tags (HR etc.)
+
+ // CHANNEL replaced by RSS, RDF and FEED in IE 7
+ if ( $version < 'ie07' ) {
+ if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {
+ $found['cdf'] = true;
+ }
+ } else {
+ // RSS
+ if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {
+ $found['rss'] = true;
+ break; // return from SampleData
+ }
+ if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {
+ $found['rdf-tag'] = true;
+ // no break
+ }
+ if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {
+ $found['atom'] = true;
+ break;
+ }
+ }
+ continue;
+ }
+ // Skip broken check for -->
+
+ // RSS URL checks
+ // For some reason both URLs must appear before it is recognised
+ $remainder = substr( $chunk, $offset );
+ if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
+ $found['rdf-url'] = true;
+ if ( isset( $found['rdf-tag'] )
+ && isset( $found['rdf-purl'] ) ) // [sic]
+ {
+ break;
+ }
+ continue;
+ }
+
+ if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
+ if ( isset( $found['rdf-tag'] )
+ && isset( $found['rdf-url'] ) ) // [sic]
+ {
+ break;
+ }
+ continue;
+ }
+
+ // XBM checks
+ if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {
+ $found['xbm1'] = true;
+ continue;
+ }
+ if ( $curChar == '_' ) {
+ if ( isset( $found['xbm2'] ) ) {
+ if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {
+ $found['xbm'] = true;
+ break;
+ }
+ } elseif ( isset( $found['xbm1'] ) ) {
+ if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {
+ $found['xbm2'] = true;
+ }
+ }
+ }
+
+ // BinHex
+ if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {
+ $found['binhex'] = true;
+ }
+ }
+ return array( 'found' => $found, 'counters' => $counters );
+ }
+
+ protected function getDataFormat( $version, $type ) {
+ $types = $this->typeTable[$version];
+ if ( $type == '(null)' || strval( $type ) === '' ) {
+ return 'ambiguous';
+ }
+ foreach ( $types as $format => $list ) {
+ if ( in_array( $type, $list ) ) {
+ return $format;
+ }
+ }
+ return 'unknown';
+ }
+}
+
diff --git a/includes/libs/IEUrlExtension.php b/includes/libs/IEUrlExtension.php
new file mode 100644
index 00000000..100454d4
--- /dev/null
+++ b/includes/libs/IEUrlExtension.php
@@ -0,0 +1,247 @@
+<?php
+
+/**
+ * Internet Explorer derives a cache filename from a URL, and then in certain
+ * circumstances, uses the extension of the resulting file to determine the
+ * content type of the data, ignoring the Content-Type header.
+ *
+ * This can be a problem, especially when non-HTML content is sent by MediaWiki,
+ * and Internet Explorer interprets it as HTML, exposing an XSS vulnerability.
+ *
+ * Usually the script filename (e.g. api.php) is present in the URL, and this
+ * makes Internet Explorer think the extension is a harmless script extension.
+ * But Internet Explorer 6 and earlier allows the script extension to be
+ * obscured by encoding the dot as "%2E".
+ *
+ * This class contains functions which help in detecting and dealing with this
+ * situation.
+ *
+ * Checking the URL for a bad extension is somewhat complicated due to the fact
+ * that CGI doesn't provide a standard method to determine the URL. Instead it
+ * is necessary to pass a subset of $_SERVER variables, which we then attempt
+ * to use to guess parts of the URL.
+ */
+class IEUrlExtension {
+ /**
+ * Check a subset of $_SERVER (or the whole of $_SERVER if you like)
+ * to see if it indicates that the request was sent with a bad file
+ * extension. Returns true if the request should be denied or modified,
+ * false otherwise. The relevant $_SERVER elements are:
+ *
+ * - SERVER_SOFTWARE
+ * - REQUEST_URI
+ * - QUERY_STRING
+ * - PATH_INFO
+ *
+ * If the a variable is unset in $_SERVER, it should be unset in $vars.
+ *
+ * @param $vars A subset of $_SERVER.
+ * @param $extWhitelist Extensions which are allowed, assumed harmless.
+ */
+ public static function areServerVarsBad( $vars, $extWhitelist = array() ) {
+ // Check QUERY_STRING or REQUEST_URI
+ if ( isset( $vars['SERVER_SOFTWARE'] )
+ && isset( $vars['REQUEST_URI'] )
+ && self::haveUndecodedRequestUri( $vars['SERVER_SOFTWARE'] ) )
+ {
+ $urlPart = $vars['REQUEST_URI'];
+ } elseif ( isset( $vars['QUERY_STRING'] ) ) {
+ $urlPart = $vars['QUERY_STRING'];
+ } else {
+ $urlPart = '';
+ }
+
+ if ( self::isUrlExtensionBad( $urlPart, $extWhitelist ) ) {
+ return true;
+ }
+
+ // Some servers have PATH_INFO but not REQUEST_URI, so we check both
+ // to be on the safe side.
+ if ( isset( $vars['PATH_INFO'] )
+ && self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist ) )
+ {
+ return true;
+ }
+
+ // All checks passed
+ return false;
+ }
+
+ /**
+ * Given a right-hand portion of a URL, determine whether IE would detect
+ * a potentially harmful file extension.
+ *
+ * @param $urlPart The right-hand portion of a URL
+ * @param $extWhitelist An array of file extensions which may occur in this
+ * URL, and which should be allowed.
+ * @return bool
+ */
+ public static function isUrlExtensionBad( $urlPart, $extWhitelist = array() ) {
+ if ( strval( $urlPart ) === '' ) {
+ return false;
+ }
+
+ $extension = self::findIE6Extension( $urlPart );
+ if ( strval( $extension ) === '' ) {
+ // No extension or empty extension
+ return false;
+ }
+
+ if ( in_array( $extension, array( 'php', 'php5' ) ) ) {
+ // Script extension, OK
+ return false;
+ }
+ if ( in_array( $extension, $extWhitelist ) ) {
+ // Whitelisted extension
+ return false;
+ }
+
+ if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) {
+ // Non-alphanumeric extension, unlikely to be registered.
+ //
+ // The regex above is known to match all registered file extensions
+ // in a default Windows XP installation. It's important to allow
+ // extensions with ampersands and percent signs, since that reduces
+ // the number of false positives substantially.
+ return false;
+ }
+
+ // Possibly bad extension
+ return true;
+ }
+
+ /**
+ * Returns a variant of $url which will pass isUrlExtensionBad() but has the
+ * same GET parameters, or false if it can't figure one out.
+ */
+ public static function fixUrlForIE6( $url, $extWhitelist = array() ) {
+ $questionPos = strpos( $url, '?' );
+ if ( $questionPos === false ) {
+ $beforeQuery = $url . '?';
+ $query = '';
+ } elseif ( $questionPos === strlen( $url ) - 1 ) {
+ $beforeQuery = $url;
+ $query = '';
+ } else {
+ $beforeQuery = substr( $url, 0, $questionPos + 1 );
+ $query = substr( $url, $questionPos + 1 );
+ }
+
+ // Multiple question marks cause problems. Encode the second and
+ // subsequent question mark.
+ $query = str_replace( '?', '%3E', $query );
+ // Append an invalid path character so that IE6 won't see the end of the
+ // query string as an extension
+ $query .= '&*';
+ // Put the URL back together
+ $url = $beforeQuery . $query;
+ if ( self::isUrlExtensionBad( $url, $extWhitelist ) ) {
+ // Avoid a redirect loop
+ return false;
+ }
+ return $url;
+ }
+
+ /**
+ * Determine what extension IE6 will infer from a certain query string.
+ * If the URL has an extension before the question mark, IE6 will use
+ * that and ignore the query string, but per the comment at
+ * isPathInfoBad() we don't have a reliable way to determine the URL,
+ * so isPathInfoBad() just passes in the query string for $url.
+ * All entry points have safe extensions (php, php5) anyway, so
+ * checking the query string is possibly overly paranoid but never
+ * insecure.
+ *
+ * The criteria for finding an extension are as follows:
+ * - a possible extension is a dot followed by one or more characters not
+ * in <>\"/:|?.#
+ * - if we find a possible extension followed by the end of the string or
+ * a #, that's our extension
+ * - if we find a possible extension followed by a ?, that's our extension
+ * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue
+ * searching for another possible extension
+ * - if we find a possible extension followed by a dot or another illegal
+ * character, we ignore it and continue searching
+ *
+ * @param $url string URL
+ * @return mixed Detected extension (string), or false if none found
+ */
+ public static function findIE6Extension( $url ) {
+ $pos = 0;
+ $hashPos = strpos( $url, '#' );
+ if ( $hashPos !== false ) {
+ $urlLength = $hashPos;
+ } else {
+ $urlLength = strlen( $url );
+ }
+ $remainingLength = $urlLength;
+ while ( $remainingLength > 0 ) {
+ // Skip ahead to the next dot
+ $pos += strcspn( $url, '.', $pos, $remainingLength );
+ if ( $pos >= $urlLength ) {
+ // End of string, we're done
+ return false;
+ }
+
+ // We found a dot. Skip past it
+ $pos++;
+ $remainingLength = $urlLength - $pos;
+
+ // Check for illegal characters in our prospective extension,
+ // or for another dot
+ $nextPos = $pos + strcspn( $url, "<>\\\"/:|?*.", $pos, $remainingLength );
+ if ( $nextPos >= $urlLength ) {
+ // No illegal character or next dot
+ // We have our extension
+ return substr( $url, $pos, $urlLength - $pos );
+ }
+ if ( $url[$nextPos] === '?' ) {
+ // We've found a legal extension followed by a question mark
+ // If the extension is NOT exe, dll or cgi, return it
+ $extension = substr( $url, $pos, $nextPos - $pos );
+ if ( strcasecmp( $extension, 'exe' ) && strcasecmp( $extension, 'dll' ) &&
+ strcasecmp( $extension, 'cgi' ) )
+ {
+ return $extension;
+ }
+ // Else continue looking
+ }
+ // We found an illegal character or another dot
+ // Skip to that character and continue the loop
+ $pos = $nextPos + 1;
+ $remainingLength = $urlLength - $pos;
+ }
+ return false;
+ }
+
+ /**
+ * When passed the value of $_SERVER['SERVER_SOFTWARE'], this function
+ * returns true if that server is known to have a REQUEST_URI variable
+ * with %2E not decoded to ".". On such a server, it is possible to detect
+ * whether the script filename has been obscured.
+ *
+ * The function returns false if the server is not known to have this
+ * behaviour. Microsoft IIS in particular is known to decode escaped script
+ * filenames.
+ *
+ * SERVER_SOFTWARE typically contains either a plain string such as "Zeus",
+ * or a specification in the style of a User-Agent header, such as
+ * "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2"
+ *
+ * @param $serverSoftware
+ * @return bool
+ *
+ */
+ public static function haveUndecodedRequestUri( $serverSoftware ) {
+ static $whitelist = array(
+ 'Apache',
+ 'Zeus',
+ 'LiteSpeed' );
+ if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) {
+ return in_array( $m[1], $whitelist );
+ } else {
+ return false;
+ }
+ }
+
+}
diff --git a/includes/libs/JavaScriptMinifier.php b/includes/libs/JavaScriptMinifier.php
new file mode 100644
index 00000000..a991d915
--- /dev/null
+++ b/includes/libs/JavaScriptMinifier.php
@@ -0,0 +1,579 @@
+<?php
+/**
+ * JavaScript Minifier
+ *
+ * This class is meant to safely minify javascript code, while leaving syntactically correct
+ * programs intact. Other libraries, such as JSMin require a certain coding style to work
+ * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
+ * slow, because they construct a complete parse tree before outputting the code minified.
+ * So this class is meant to allow arbitrary (but syntactically correct) input, while being
+ * fast enough to be used for on-the-fly minifying.
+ *
+ * Author: Paul Copperman <paul.copperman@gmail.com>
+ * License: choose any of Apache, MIT, GPL, LGPL
+ */
+
+class JavaScriptMinifier {
+
+ /* Class constants */
+ /* Parsing states.
+ * The state machine is only necessary to decide whether to parse a slash as division
+ * operator or as regexp literal.
+ * States are named after the next expected item. We only distinguish states when the
+ * distinction is relevant for our purpose.
+ */
+ const STATEMENT = 0;
+ const CONDITION = 1;
+ const PROPERTY_ASSIGNMENT = 2;
+ const EXPRESSION = 3;
+ const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
+ const EXPRESSION_OP = 5;
+ const EXPRESSION_FUNC = 6;
+ const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
+ const EXPRESSION_TERNARY_OP = 8;
+ const EXPRESSION_TERNARY_FUNC = 9;
+ const PAREN_EXPRESSION = 10; // expression which is not on the top level
+ const PAREN_EXPRESSION_OP = 11;
+ const PAREN_EXPRESSION_FUNC = 12;
+ const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
+ const PROPERTY_EXPRESSION_OP = 14;
+ const PROPERTY_EXPRESSION_FUNC = 15;
+
+ /* Token types */
+ const TYPE_UN_OP = 1; // unary operators
+ const TYPE_INCR_OP = 2; // ++ and --
+ const TYPE_BIN_OP = 3; // binary operators
+ const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops
+ const TYPE_HOOK = 5; // ?
+ const TYPE_COLON = 6; // :
+ const TYPE_COMMA = 7; // ,
+ const TYPE_SEMICOLON = 8; // ;
+ const TYPE_BRACE_OPEN = 9; // {
+ const TYPE_BRACE_CLOSE = 10; // }
+ const TYPE_PAREN_OPEN = 11; // ( and [
+ const TYPE_PAREN_CLOSE = 12; // ) and ]
+ const TYPE_RETURN = 13; // keywords: break, continue, return, throw
+ const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if
+ const TYPE_DO = 15; // keywords: case, var, finally, else, do, try
+ const TYPE_FUNC = 16; // keywords: function
+ const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens
+
+ // Sanity limit to avoid excessive memory usage
+ const STACK_LIMIT = 1000;
+
+ /* Static functions */
+
+ /**
+ * Returns minified JavaScript code.
+ *
+ * NOTE: $maxLineLength isn't a strict maximum. Longer lines will be produced when
+ * literals (e.g. quoted strings) longer than $maxLineLength are encountered
+ * or when required to guard against semicolon insertion.
+ *
+ * @param $s String JavaScript code to minify
+ * @param $statementsOnOwnLine Bool Whether to put each statement on its own line
+ * @param $maxLineLength Int Maximum length of a single line, or -1 for no maximum.
+ * @return String Minified code
+ */
+ public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) {
+ // First we declare a few tables that contain our parsing rules
+
+ // $opChars : characters, which can be combined without whitespace in between them
+ $opChars = array(
+ '!' => true,
+ '"' => true,
+ '%' => true,
+ '&' => true,
+ "'" => true,
+ '(' => true,
+ ')' => true,
+ '*' => true,
+ '+' => true,
+ ',' => true,
+ '-' => true,
+ '.' => true,
+ '/' => true,
+ ':' => true,
+ ';' => true,
+ '<' => true,
+ '=' => true,
+ '>' => true,
+ '?' => true,
+ '[' => true,
+ ']' => true,
+ '^' => true,
+ '{' => true,
+ '|' => true,
+ '}' => true,
+ '~' => true
+ );
+
+ // $tokenTypes : maps keywords and operators to their corresponding token type
+ $tokenTypes = array(
+ '!' => self::TYPE_UN_OP,
+ '~' => self::TYPE_UN_OP,
+ 'delete' => self::TYPE_UN_OP,
+ 'new' => self::TYPE_UN_OP,
+ 'typeof' => self::TYPE_UN_OP,
+ 'void' => self::TYPE_UN_OP,
+ '++' => self::TYPE_INCR_OP,
+ '--' => self::TYPE_INCR_OP,
+ '!=' => self::TYPE_BIN_OP,
+ '!==' => self::TYPE_BIN_OP,
+ '%' => self::TYPE_BIN_OP,
+ '%=' => self::TYPE_BIN_OP,
+ '&' => self::TYPE_BIN_OP,
+ '&&' => self::TYPE_BIN_OP,
+ '&=' => self::TYPE_BIN_OP,
+ '*' => self::TYPE_BIN_OP,
+ '*=' => self::TYPE_BIN_OP,
+ '+=' => self::TYPE_BIN_OP,
+ '-=' => self::TYPE_BIN_OP,
+ '.' => self::TYPE_BIN_OP,
+ '/' => self::TYPE_BIN_OP,
+ '/=' => self::TYPE_BIN_OP,
+ '<' => self::TYPE_BIN_OP,
+ '<<' => self::TYPE_BIN_OP,
+ '<<=' => self::TYPE_BIN_OP,
+ '<=' => self::TYPE_BIN_OP,
+ '=' => self::TYPE_BIN_OP,
+ '==' => self::TYPE_BIN_OP,
+ '===' => self::TYPE_BIN_OP,
+ '>' => self::TYPE_BIN_OP,
+ '>=' => self::TYPE_BIN_OP,
+ '>>' => self::TYPE_BIN_OP,
+ '>>=' => self::TYPE_BIN_OP,
+ '>>>' => self::TYPE_BIN_OP,
+ '>>>=' => self::TYPE_BIN_OP,
+ '^' => self::TYPE_BIN_OP,
+ '^=' => self::TYPE_BIN_OP,
+ '|' => self::TYPE_BIN_OP,
+ '|=' => self::TYPE_BIN_OP,
+ '||' => self::TYPE_BIN_OP,
+ 'in' => self::TYPE_BIN_OP,
+ 'instanceof' => self::TYPE_BIN_OP,
+ '+' => self::TYPE_ADD_OP,
+ '-' => self::TYPE_ADD_OP,
+ '?' => self::TYPE_HOOK,
+ ':' => self::TYPE_COLON,
+ ',' => self::TYPE_COMMA,
+ ';' => self::TYPE_SEMICOLON,
+ '{' => self::TYPE_BRACE_OPEN,
+ '}' => self::TYPE_BRACE_CLOSE,
+ '(' => self::TYPE_PAREN_OPEN,
+ '[' => self::TYPE_PAREN_OPEN,
+ ')' => self::TYPE_PAREN_CLOSE,
+ ']' => self::TYPE_PAREN_CLOSE,
+ 'break' => self::TYPE_RETURN,
+ 'continue' => self::TYPE_RETURN,
+ 'return' => self::TYPE_RETURN,
+ 'throw' => self::TYPE_RETURN,
+ 'catch' => self::TYPE_IF,
+ 'for' => self::TYPE_IF,
+ 'if' => self::TYPE_IF,
+ 'switch' => self::TYPE_IF,
+ 'while' => self::TYPE_IF,
+ 'with' => self::TYPE_IF,
+ 'case' => self::TYPE_DO,
+ 'do' => self::TYPE_DO,
+ 'else' => self::TYPE_DO,
+ 'finally' => self::TYPE_DO,
+ 'try' => self::TYPE_DO,
+ 'var' => self::TYPE_DO,
+ 'function' => self::TYPE_FUNC
+ );
+
+ // $goto : This is the main table for our state machine. For every state/token pair
+ // the following state is defined. When no rule exists for a given pair,
+ // the state is left unchanged.
+ $goto = array(
+ self::STATEMENT => array(
+ self::TYPE_UN_OP => self::EXPRESSION,
+ self::TYPE_INCR_OP => self::EXPRESSION,
+ self::TYPE_ADD_OP => self::EXPRESSION,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_RETURN => self::EXPRESSION_NO_NL,
+ self::TYPE_IF => self::CONDITION,
+ self::TYPE_FUNC => self::CONDITION,
+ self::TYPE_LITERAL => self::EXPRESSION_OP
+ ),
+ self::CONDITION => array(
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+ ),
+ self::PROPERTY_ASSIGNMENT => array(
+ self::TYPE_COLON => self::PROPERTY_EXPRESSION,
+ self::TYPE_BRACE_OPEN => self::STATEMENT
+ ),
+ self::EXPRESSION => array(
+ self::TYPE_SEMICOLON => self::STATEMENT,
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_FUNC => self::EXPRESSION_FUNC,
+ self::TYPE_LITERAL => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_NO_NL => array(
+ self::TYPE_SEMICOLON => self::STATEMENT,
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_FUNC => self::EXPRESSION_FUNC,
+ self::TYPE_LITERAL => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_OP => array(
+ self::TYPE_BIN_OP => self::EXPRESSION,
+ self::TYPE_ADD_OP => self::EXPRESSION,
+ self::TYPE_HOOK => self::EXPRESSION_TERNARY,
+ self::TYPE_COLON => self::STATEMENT,
+ self::TYPE_COMMA => self::EXPRESSION,
+ self::TYPE_SEMICOLON => self::STATEMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+ ),
+ self::EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::STATEMENT
+ ),
+ self::EXPRESSION_TERNARY => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC,
+ self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP
+ ),
+ self::EXPRESSION_TERNARY_OP => array(
+ self::TYPE_BIN_OP => self::EXPRESSION_TERNARY,
+ self::TYPE_ADD_OP => self::EXPRESSION_TERNARY,
+ self::TYPE_HOOK => self::EXPRESSION_TERNARY,
+ self::TYPE_COMMA => self::EXPRESSION_TERNARY,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+ ),
+ self::EXPRESSION_TERNARY_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::STATEMENT
+ ),
+ self::PAREN_EXPRESSION => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC,
+ self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP
+ ),
+ self::PAREN_EXPRESSION_OP => array(
+ self::TYPE_BIN_OP => self::PAREN_EXPRESSION,
+ self::TYPE_ADD_OP => self::PAREN_EXPRESSION,
+ self::TYPE_HOOK => self::PAREN_EXPRESSION,
+ self::TYPE_COLON => self::PAREN_EXPRESSION,
+ self::TYPE_COMMA => self::PAREN_EXPRESSION,
+ self::TYPE_SEMICOLON => self::PAREN_EXPRESSION,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+ ),
+ self::PAREN_EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::STATEMENT
+ ),
+ self::PROPERTY_EXPRESSION => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC,
+ self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP
+ ),
+ self::PROPERTY_EXPRESSION_OP => array(
+ self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION,
+ self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION,
+ self::TYPE_HOOK => self::PROPERTY_EXPRESSION,
+ self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+ ),
+ self::PROPERTY_EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::STATEMENT
+ )
+ );
+
+ // $push : This table contains the rules for when to push a state onto the stack.
+ // The pushed state is the state to return to when the corresponding
+ // closing token is found
+ $push = array(
+ self::STATEMENT => array(
+ self::TYPE_BRACE_OPEN => self::STATEMENT,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+ ),
+ self::CONDITION => array(
+ self::TYPE_PAREN_OPEN => self::STATEMENT
+ ),
+ self::PROPERTY_ASSIGNMENT => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
+ ),
+ self::EXPRESSION => array(
+ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_NO_NL => array(
+ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_OP => array(
+ self::TYPE_HOOK => self::EXPRESSION,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_TERNARY => array(
+ self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
+ ),
+ self::EXPRESSION_TERNARY_OP => array(
+ self::TYPE_HOOK => self::EXPRESSION_TERNARY,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
+ ),
+ self::EXPRESSION_TERNARY_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
+ ),
+ self::PAREN_EXPRESSION => array(
+ self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
+ ),
+ self::PAREN_EXPRESSION_OP => array(
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
+ ),
+ self::PAREN_EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
+ ),
+ self::PROPERTY_EXPRESSION => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
+ self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
+ ),
+ self::PROPERTY_EXPRESSION_OP => array(
+ self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
+ ),
+ self::PROPERTY_EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
+ )
+ );
+
+ // $pop : Rules for when to pop a state from the stack
+ $pop = array(
+ self::STATEMENT => array( self::TYPE_BRACE_CLOSE => true ),
+ self::PROPERTY_ASSIGNMENT => array( self::TYPE_BRACE_CLOSE => true ),
+ self::EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
+ self::EXPRESSION_NO_NL => array( self::TYPE_BRACE_CLOSE => true ),
+ self::EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ),
+ self::EXPRESSION_TERNARY_OP => array( self::TYPE_COLON => true ),
+ self::PAREN_EXPRESSION => array( self::TYPE_PAREN_CLOSE => true ),
+ self::PAREN_EXPRESSION_OP => array( self::TYPE_PAREN_CLOSE => true ),
+ self::PROPERTY_EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
+ self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true )
+ );
+
+ // $semicolon : Rules for when a semicolon insertion is appropriate
+ $semicolon = array(
+ self::EXPRESSION_NO_NL => array(
+ self::TYPE_UN_OP => true,
+ self::TYPE_INCR_OP => true,
+ self::TYPE_ADD_OP => true,
+ self::TYPE_BRACE_OPEN => true,
+ self::TYPE_PAREN_OPEN => true,
+ self::TYPE_RETURN => true,
+ self::TYPE_IF => true,
+ self::TYPE_DO => true,
+ self::TYPE_FUNC => true,
+ self::TYPE_LITERAL => true
+ ),
+ self::EXPRESSION_OP => array(
+ self::TYPE_UN_OP => true,
+ self::TYPE_INCR_OP => true,
+ self::TYPE_BRACE_OPEN => true,
+ self::TYPE_RETURN => true,
+ self::TYPE_IF => true,
+ self::TYPE_DO => true,
+ self::TYPE_FUNC => true,
+ self::TYPE_LITERAL => true
+ )
+ );
+
+ // Rules for when newlines should be inserted if
+ // $statementsOnOwnLine is enabled.
+ // $newlineBefore is checked before switching state,
+ // $newlineAfter is checked after
+ $newlineBefore = array(
+ self::STATEMENT => array(
+ self::TYPE_BRACE_CLOSE => true,
+ ),
+ );
+ $newlineAfter = array(
+ self::STATEMENT => array(
+ self::TYPE_BRACE_OPEN => true,
+ self::TYPE_PAREN_CLOSE => true,
+ self::TYPE_SEMICOLON => true,
+ ),
+ );
+
+ // $divStates : Contains all states that can be followed by a division operator
+ $divStates = array(
+ self::EXPRESSION_OP => true,
+ self::EXPRESSION_TERNARY_OP => true,
+ self::PAREN_EXPRESSION_OP => true,
+ self::PROPERTY_EXPRESSION_OP => true
+ );
+
+ // Here's where the minifying takes place: Loop through the input, looking for tokens
+ // and output them to $out, taking actions to the above defined rules when appropriate.
+ $out = '';
+ $pos = 0;
+ $length = strlen( $s );
+ $lineLength = 0;
+ $newlineFound = true;
+ $state = self::STATEMENT;
+ $stack = array();
+ $last = ';'; // Pretend that we have seen a semicolon yet
+ while( $pos < $length ) {
+ // First, skip over any whitespace and multiline comments, recording whether we
+ // found any newline character
+ $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
+ if( !$skip ) {
+ $ch = $s[$pos];
+ if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
+ // Multiline comment. Search for the end token or EOT.
+ $end = strpos( $s, '*/', $pos + 2 );
+ $skip = $end === false ? $length - $pos : $end - $pos + 2;
+ }
+ }
+ if( $skip ) {
+ // The semicolon insertion mechanism needs to know whether there was a newline
+ // between two tokens, so record it now.
+ if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
+ $newlineFound = true;
+ }
+ $pos += $skip;
+ continue;
+ }
+ // Handle C++-style comments and html comments, which are treated as single line
+ // comments by the browser, regardless of whether the end tag is on the same line.
+ // Handle --> the same way, but only if it's at the beginning of the line
+ if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
+ || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
+ || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
+ ) {
+ $pos += strcspn( $s, "\r\n", $pos );
+ continue;
+ }
+
+ // Find out which kind of token we're handling. $end will point past the end of it.
+ $end = $pos + 1;
+ // Handle string literals
+ if( $ch === "'" || $ch === '"' ) {
+ // Search to the end of the string literal, skipping over backslash escapes
+ $search = $ch . '\\';
+ do{
+ $end += strcspn( $s, $search, $end ) + 2;
+ } while( $end - 2 < $length && $s[$end - 2] === '\\' );
+ $end--;
+ // We have to distinguish between regexp literals and division operators
+ // A division operator is only possible in certain states
+ } elseif( $ch === '/' && !isset( $divStates[$state] ) ) {
+ // Regexp literal, search to the end, skipping over backslash escapes and
+ // character classes
+ for( ; ; ) {
+ do{
+ $end += strcspn( $s, '/[\\', $end ) + 2;
+ } while( $end - 2 < $length && $s[$end - 2] === '\\' );
+ $end--;
+ if( $end - 1 >= $length || $s[$end - 1] === '/' ) {
+ break;
+ }
+ do{
+ $end += strcspn( $s, ']\\', $end ) + 2;
+ } while( $end - 2 < $length && $s[$end - 2] === '\\' );
+ $end--;
+ };
+ // Search past the regexp modifiers (gi)
+ while( $end < $length && ctype_alpha( $s[$end] ) ) {
+ $end++;
+ }
+ } elseif(
+ ctype_digit( $ch )
+ || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
+ ) {
+ // Numeric literal. Search for the end of it, but don't care about [+-]exponent
+ // at the end, as the results of "numeric [+-] numeric" and "numeric" are
+ // identical to our state machine.
+ $end += strspn( $s, '0123456789ABCDEFabcdefXx.', $end );
+ while( $s[$end - 1] === '.' ) {
+ // Special case: When a numeric ends with a dot, we have to check the
+ // literal for proper syntax
+ $decimal = strspn( $s, '0123456789', $pos, $end - $pos - 1 );
+ if( $decimal === $end - $pos - 1 ) {
+ break;
+ } else {
+ $end--;
+ }
+ }
+ } elseif( isset( $opChars[$ch] ) ) {
+ // Punctuation character. Search for the longest matching operator.
+ while(
+ $end < $length
+ && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
+ ) {
+ $end++;
+ }
+ } else {
+ // Identifier or reserved word. Search for the end by excluding whitespace and
+ // punctuation.
+ $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
+ }
+
+ // Now get the token type from our type array
+ $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
+ $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
+
+ if( $newlineFound && isset( $semicolon[$state][$type] ) ) {
+ // This token triggers the semicolon insertion mechanism of javascript. While we
+ // could add the ; token here ourselves, keeping the newline has a few advantages.
+ $out .= "\n";
+ $state = self::STATEMENT;
+ $lineLength = 0;
+ } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength &&
+ !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP )
+ {
+ // This line would get too long if we added $token, so add a newline first.
+ // Only do this if it won't trigger semicolon insertion and if it won't
+ // put a postfix increment operator on its own line, which is illegal in js.
+ $out .= "\n";
+ $lineLength = 0;
+ // Check, whether we have to separate the token from the last one with whitespace
+ } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
+ $out .= ' ';
+ $lineLength++;
+ // Don't accidentally create ++, -- or // tokens
+ } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
+ $out .= ' ';
+ $lineLength++;
+ }
+
+ $out .= $token;
+ $lineLength += $end - $pos; // += strlen( $token )
+ $last = $s[$end - 1];
+ $pos = $end;
+ $newlineFound = false;
+
+ // Output a newline after the token if required
+ // This is checked before AND after switching state
+ $newlineAdded = false;
+ if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) {
+ $out .= "\n";
+ $lineLength = 0;
+ $newlineAdded = true;
+ }
+
+ // Now that we have output our token, transition into the new state.
+ if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
+ $stack[] = $push[$state][$type];
+ }
+ if( $stack && isset( $pop[$state][$type] ) ) {
+ $state = array_pop( $stack );
+ } elseif( isset( $goto[$state][$type] ) ) {
+ $state = $goto[$state][$type];
+ }
+
+ // Check for newline insertion again
+ if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) {
+ $out .= "\n";
+ $lineLength = 0;
+ }
+ }
+ return $out;
+ }
+}
diff --git a/includes/libs/README b/includes/libs/README
new file mode 100644
index 00000000..85e3db3c
--- /dev/null
+++ b/includes/libs/README
@@ -0,0 +1,4 @@
+The classes in this directory ./includes/libs are considered standalone
+from the remainder of the MediaWiki codebase. They do not call on any other
+portions of MediaWiki code, and can be used in other projects without
+dependency issues.
diff --git a/includes/libs/spyc.php b/includes/libs/spyc.php
new file mode 100644
index 00000000..bc92e869
--- /dev/null
+++ b/includes/libs/spyc.php
@@ -0,0 +1,248 @@
+<?php
+/**
+ * Spyc -- A Simple PHP YAML Class
+ *
+ * @file
+ * @version 0.2.3 -- 2006-02-04
+ * @author Chris Wanstrath <chris@ozmm.org>
+ * @see http://spyc.sourceforge.net/
+ * @copyright Copyright 2005-2006 Chris Wanstrath
+ * @license http://www.opensource.org/licenses/mit-license.php MIT License
+ */
+
+/**
+ * The Simple PHP YAML Class.
+ *
+ * This class can be used to read a YAML file and convert its contents
+ * into a PHP array. It currently supports a very limited subsection of
+ * the YAML spec.
+ *
+ * @ingroup API
+ */
+class Spyc {
+
+ /**
+ * Dump YAML from PHP array statically
+ *
+ * The dump method, when supplied with an array, will do its best
+ * to convert the array into friendly YAML. Pretty simple. Feel free to
+ * save the returned string as nothing.yml and pass it around.
+ *
+ * Oh, and you can decide how big the indent is and what the wordwrap
+ * for folding is. Pretty cool -- just pass in 'false' for either if
+ * you want to use the default.
+ *
+ * Indent's default is 2 spaces, wordwrap's default is 40 characters. And
+ * you can turn off wordwrap by passing in 0.
+ *
+ * @param $array Array: PHP array
+ * @param $indent Integer: Pass in false to use the default, which is 2
+ * @param $wordwrap Integer: Pass in 0 for no wordwrap, false for default (40)
+ * @return String
+ */
+ public static function YAMLDump( $array, $indent = false, $wordwrap = false ) {
+ $spyc = new Spyc;
+ return $spyc->dump( $array, $indent, $wordwrap );
+ }
+
+ /**
+ * Dump PHP array to YAML
+ *
+ * The dump method, when supplied with an array, will do its best
+ * to convert the array into friendly YAML. Pretty simple. Feel free to
+ * save the returned string as tasteful.yml and pass it around.
+ *
+ * Oh, and you can decide how big the indent is and what the wordwrap
+ * for folding is. Pretty cool -- just pass in 'false' for either if
+ * you want to use the default.
+ *
+ * Indent's default is 2 spaces, wordwrap's default is 40 characters. And
+ * you can turn off wordwrap by passing in 0.
+ *
+ * @param $array Array: PHP array
+ * @param $indent Integer: Pass in false to use the default, which is 2
+ * @param $wordwrap Integer: Pass in 0 for no wordwrap, false for default (40)
+ * @return String
+ */
+ public function dump( $array, $indent = false, $wordwrap = false ) {
+ // Dumps to some very clean YAML. We'll have to add some more features
+ // and options soon. And better support for folding.
+
+ // New features and options.
+ if ( $indent === false or !is_numeric( $indent ) ) {
+ $this->_dumpIndent = 2;
+ } else {
+ $this->_dumpIndent = $indent;
+ }
+
+ if ( $wordwrap === false or !is_numeric( $wordwrap ) ) {
+ $this->_dumpWordWrap = 40;
+ } else {
+ $this->_dumpWordWrap = $wordwrap;
+ }
+
+ // New YAML document
+ $string = "---\n";
+
+ // Start at the base of the array and move through it.
+ foreach ( $array as $key => $value ) {
+ $string .= $this->_yamlize( $key, $value, 0 );
+ }
+ return $string;
+ }
+
+ /**** Private Properties ****/
+
+ /**
+ * Unused variables, but just commented rather than deleting
+ * to save altering the library
+ private $_haveRefs;
+ private $_allNodes;
+ private $_lastIndent;
+ private $_lastNode;
+ private $_inBlock;
+ private $_isInline;
+ **/
+ private $_dumpIndent;
+ private $_dumpWordWrap;
+
+ /**** Private Methods ****/
+
+ /**
+ * Attempts to convert a key / value array item to YAML
+ *
+ * @param $key Mixed: the name of the key
+ * @param $value Mixed: the value of the item
+ * @param $indent Integer: the indent of the current node
+ * @return String
+ */
+ private function _yamlize( $key, $value, $indent ) {
+ if ( is_array( $value ) ) {
+ // It has children. What to do?
+ // Make it the right kind of item
+ $string = $this->_dumpNode( $key, null, $indent );
+ // Add the indent
+ $indent += $this->_dumpIndent;
+ // Yamlize the array
+ $string .= $this->_yamlizeArray( $value, $indent );
+ } elseif ( !is_array( $value ) ) {
+ // It doesn't have children. Yip.
+ $string = $this->_dumpNode( $key, $value, $indent );
+ }
+ return $string;
+ }
+
+ /**
+ * Attempts to convert an array to YAML
+ *
+ * @param $array Array: the array you want to convert
+ * @param $indent Integer: the indent of the current level
+ * @return String
+ */
+ private function _yamlizeArray( $array, $indent ) {
+ if ( is_array( $array ) ) {
+ $string = '';
+ foreach ( $array as $key => $value ) {
+ $string .= $this->_yamlize( $key, $value, $indent );
+ }
+ return $string;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Find out whether a string needs to be output as a literal rather than in plain style.
+ * Added by Roan Kattouw 13-03-2008
+ *
+ * @param $value String: the string to check
+ * @return Boolean
+ */
+ function _needLiteral( $value ) {
+ // Check whether the string contains # or : or begins with any of:
+ // [ - ? , [ ] { } ! * & | > ' " % @ ` ]
+ // or is a number or contains newlines
+ return (bool)( gettype( $value ) == "string" &&
+ ( is_numeric( $value ) ||
+ strpos( $value, "\n" ) ||
+ preg_match( "/[#:]/", $value ) ||
+ preg_match( "/^[-?,[\]{}!*&|>'\"%@`]/", $value ) ) );
+ }
+
+ /**
+ * Returns YAML from a key and a value
+ *
+ * @param $key Mixed: the name of the key
+ * @param $value Mixed: the value of the item
+ * @param $indent Integer: the indent of the current node
+ * @return String
+ */
+ private function _dumpNode( $key, $value, $indent ) {
+ // do some folding here, for blocks
+ if ( $this->_needLiteral( $value ) ) {
+ $value = $this->_doLiteralBlock( $value, $indent );
+ } else {
+ $value = $this->_doFolding( $value, $indent );
+ }
+
+ $spaces = str_repeat( ' ', $indent );
+
+ if ( is_int( $key ) ) {
+ // It's a sequence
+ if ( $value !== '' && !is_null( $value ) )
+ $string = $spaces . '- ' . $value . "\n";
+ else
+ $string = $spaces . "-\n";
+ } else {
+ if ( $key == '*' ) // bug 21922 - Quote asterix used as keys
+ $key = "'*'";
+
+ // It's mapped
+ if ( $value !== '' && !is_null( $value ) )
+ $string = $spaces . $key . ': ' . $value . "\n";
+ else
+ $string = $spaces . $key . ":\n";
+ }
+ return $string;
+ }
+
+ /**
+ * Creates a literal block for dumping
+ *
+ * @param $value String
+ * @param $indent Integer: the value of the indent
+ * @return String
+ */
+ private function _doLiteralBlock( $value, $indent ) {
+ $exploded = explode( "\n", $value );
+ $newValue = '|-';
+ $indent += $this->_dumpIndent;
+ $spaces = str_repeat( ' ', $indent );
+ foreach ( $exploded as $line ) {
+ $newValue .= "\n" . $spaces . trim( $line );
+ }
+ return $newValue;
+ }
+
+ /**
+ * Folds a string of text, if necessary
+ *
+ * @param $value String: the string you wish to fold
+ * @param $indent Integer: the indent of the current node
+ * @return String
+ */
+ private function _doFolding( $value, $indent ) {
+ // Don't do anything if wordwrap is set to 0
+ if ( $this->_dumpWordWrap === 0 ) {
+ return $value;
+ }
+
+ if ( strlen( $value ) > $this->_dumpWordWrap ) {
+ $indent += $this->_dumpIndent;
+ $indent = str_repeat( ' ', $indent );
+ $wrapped = wordwrap( $value, $this->_dumpWordWrap, "\n$indent" );
+ $value = ">-\n" . $indent . $wrapped;
+ }
+ return $value;
+ }
+}