summaryrefslogtreecommitdiff
path: root/includes/upload/UploadBase.php
diff options
context:
space:
mode:
authorPierre Schmitz <pierre@archlinux.de>2013-05-01 08:56:50 +0200
committerPierre Schmitz <pierre@archlinux.de>2013-05-01 08:56:50 +0200
commitcdafed9759bbff5952f09e5a3d866f24fba57104 (patch)
tree009e2e480b45e5cfd52051b964a60d52a2a7c6df /includes/upload/UploadBase.php
parentf7253921201bcf43d385440317ab279fb83a4658 (diff)
Update to MediaWiki 1.20.5
Diffstat (limited to 'includes/upload/UploadBase.php')
-rw-r--r--includes/upload/UploadBase.php67
1 files changed, 67 insertions, 0 deletions
diff --git a/includes/upload/UploadBase.php b/includes/upload/UploadBase.php
index d40b53d3..3a5733ca 100644
--- a/includes/upload/UploadBase.php
+++ b/includes/upload/UploadBase.php
@@ -46,6 +46,8 @@ abstract class UploadBase {
protected $mBlackListedExtensions;
protected $mJavaDetected;
+ protected static $safeXmlEncodings = array( 'UTF-8', 'ISO-8859-1', 'ISO-8859-2', 'UTF-16', 'UTF-32' );
+
const SUCCESS = 0;
const OK = 0;
const EMPTY_FILE = 3;
@@ -966,6 +968,15 @@ abstract class UploadBase {
return true;
}
+ // Some browsers will interpret obscure xml encodings as UTF-8, while
+ // PHP/expat will interpret the given encoding in the xml declaration (bug 47304)
+ if ( $extension == 'svg' || strpos( $mime, 'image/svg' ) === 0 ) {
+ if ( self::checkXMLEncodingMissmatch( $file ) ) {
+ wfProfileOut( __METHOD__ );
+ return true;
+ }
+ }
+
/**
* Internet Explorer for Windows performs some really stupid file type
* autodetection which can cause it to interpret valid image files as HTML
@@ -1037,6 +1048,62 @@ abstract class UploadBase {
return false;
}
+
+ /**
+ * Check a whitelist of xml encodings that are known not to be interpreted differently
+ * by the server's xml parser (expat) and some common browsers.
+ *
+ * @param string $file pathname to the temporary upload file
+ * @return Boolean: true if the file contains an encoding that could be misinterpreted
+ */
+ public static function checkXMLEncodingMissmatch( $file ) {
+ global $wgSVGMetadataCutoff;
+ $contents = file_get_contents( $file, false, null, -1, $wgSVGMetadataCutoff );
+ $encodingRegex = '!encoding[ \t\n\r]*=[ \t\n\r]*[\'"](.*?)[\'"]!si';
+
+ if ( preg_match( "!<\?xml\b(.*?)\?>!si", $contents, $matches ) ) {
+ if ( preg_match( $encodingRegex, $matches[1], $encMatch )
+ && !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings )
+ ) {
+ wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'\n" );
+ return true;
+ }
+ } elseif ( preg_match( "!<\?xml\b!si", $contents ) ) {
+ // Start of XML declaration without an end in the first $wgSVGMetadataCutoff
+ // bytes. There shouldn't be a legitimate reason for this to happen.
+ wfDebug( __METHOD__ . ": Unmatched XML declaration start\n" );
+ return true;
+ } elseif ( substr( $contents, 0, 4) == "\x4C\x6F\xA7\x94" ) {
+ // EBCDIC encoded XML
+ wfDebug( __METHOD__ . ": EBCDIC Encoded XML\n" );
+ return true;
+ }
+
+ // It's possible the file is encoded with multi-byte encoding, so re-encode attempt to
+ // detect the encoding in case is specifies an encoding not whitelisted in self::$safeXmlEncodings
+ $attemptEncodings = array( 'UTF-16', 'UTF-16BE', 'UTF-32', 'UTF-32BE' );
+ foreach ( $attemptEncodings as $encoding ) {
+ wfSuppressWarnings();
+ $str = iconv( $encoding, 'UTF-8', $contents );
+ wfRestoreWarnings();
+ if ( $str != '' && preg_match( "!<\?xml\b(.*?)\?>!si", $str, $matches ) ) {
+ if ( preg_match( $encodingRegex, $matches[1], $encMatch )
+ && !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings )
+ ) {
+ wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'\n" );
+ return true;
+ }
+ } elseif ( $str != '' && preg_match( "!<\?xml\b!si", $str ) ) {
+ // Start of XML declaration without an end in the first $wgSVGMetadataCutoff
+ // bytes. There shouldn't be a legitimate reason for this to happen.
+ wfDebug( __METHOD__ . ": Unmatched XML declaration start\n" );
+ return true;
+ }
+ }
+
+ return false;
+ }
+
/**
* @param $filename string
* @return bool