summaryrefslogtreecommitdiff
path: root/includes/libs
diff options
context:
space:
mode:
authorcoadde [Márcio Alexandre Silva Delgado] <coadde@parabola.nu>2016-07-04 16:01:35 -0300
committercoadde [Márcio Alexandre Silva Delgado] <coadde@parabola.nu>2016-07-04 16:01:35 -0300
commit7a31146918cdceef14689bf05d8f1602ec05bfcb (patch)
treed52097466c0e690ac95e8ef20376c4ed4e9cc4cd /includes/libs
parentc54534b54ae0f8d50fb0367e3bae8ab4a10bfbb3 (diff)
remove nonfree Database support
Diffstat (limited to 'includes/libs')
-rw-r--r--includes/libs/IEContentAnalyzer.php851
-rw-r--r--includes/libs/IEUrlExtension.php271
2 files changed, 0 insertions, 1122 deletions
diff --git a/includes/libs/IEContentAnalyzer.php b/includes/libs/IEContentAnalyzer.php
deleted file mode 100644
index c31a3527..00000000
--- a/includes/libs/IEContentAnalyzer.php
+++ /dev/null
@@ -1,851 +0,0 @@
-<?php
-/**
- * Simulation of Microsoft Internet Explorer's MIME type detection algorithm.
- *
- * @file
- * @todo Define the exact license of this file.
- */
-
-/**
- * This class simulates Microsoft Internet Explorer's terribly broken and
- * insecure MIME type detection algorithm. It can be used to check web uploads
- * with an apparently safe type, to see if IE will reinterpret them to produce
- * something dangerous.
- *
- * It is full of bugs and strange design choices should not under any
- * circumstances be used to determine a MIME type to present to a user or
- * client. (Apple Safari developers, this means you too.)
- *
- * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
- * attempted to ensure that this code works in exactly the same way as Internet
- * Explorer, it does not share any source code, or creative choices such as
- * variable names, thus I (Tim Starling) claim copyright on it.
- *
- * It may be redistributed without restriction. To aid reuse, this class does
- * not depend on any MediaWiki module.
- */
-class IEContentAnalyzer {
- /**
- * Relevant data taken from the type table in IE 5
- */
- protected $baseTypeTable = array(
- 'ambiguous' /*1*/ => array(
- 'text/plain',
- 'application/octet-stream',
- 'application/x-netcdf', // [sic]
- ),
- 'text' /*3*/ => array(
- 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
- 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
- ),
- 'binary' /*4*/ => array(
- 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
- 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
- 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
- 'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
- 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
- 'application/x-msdownload'
- ),
- 'html' /*5*/ => array( 'text/html' ),
- );
-
- /**
- * Changes to the type table in later versions of IE
- */
- protected $addedTypes = array(
- 'ie07' => array(
- 'text' => array( 'text/xml', 'application/xml' )
- ),
- );
-
- /**
- * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a
- * typical Windows installation.
- *
- * Used for extension to MIME type mapping if detection fails.
- */
- protected $registry = array(
- '.323' => 'text/h323',
- '.3g2' => 'video/3gpp2',
- '.3gp' => 'video/3gpp',
- '.3gp2' => 'video/3gpp2',
- '.3gpp' => 'video/3gpp',
- '.aac' => 'audio/aac',
- '.ac3' => 'audio/ac3',
- '.accda' => 'application/msaccess',
- '.accdb' => 'application/msaccess',
- '.accdc' => 'application/msaccess',
- '.accde' => 'application/msaccess',
- '.accdr' => 'application/msaccess',
- '.accdt' => 'application/msaccess',
- '.ade' => 'application/msaccess',
- '.adp' => 'application/msaccess',
- '.adts' => 'audio/aac',
- '.ai' => 'application/postscript',
- '.aif' => 'audio/aiff',
- '.aifc' => 'audio/aiff',
- '.aiff' => 'audio/aiff',
- '.amc' => 'application/x-mpeg',
- '.application' => 'application/x-ms-application',
- '.asf' => 'video/x-ms-asf',
- '.asx' => 'video/x-ms-asf',
- '.au' => 'audio/basic',
- '.avi' => 'video/avi',
- '.bmp' => 'image/bmp',
- '.caf' => 'audio/x-caf',
- '.cat' => 'application/vnd.ms-pki.seccat',
- '.cbo' => 'application/sha',
- '.cdda' => 'audio/aiff',
- '.cer' => 'application/x-x509-ca-cert',
- '.conf' => 'text/plain',
- '.crl' => 'application/pkix-crl',
- '.crt' => 'application/x-x509-ca-cert',
- '.css' => 'text/css',
- '.csv' => 'application/vnd.ms-excel',
- '.der' => 'application/x-x509-ca-cert',
- '.dib' => 'image/bmp',
- '.dif' => 'video/x-dv',
- '.dll' => 'application/x-msdownload',
- '.doc' => 'application/msword',
- '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',
- '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
- '.dot' => 'application/msword',
- '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
- '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
- '.dv' => 'video/x-dv',
- '.dwfx' => 'model/vnd.dwfx+xps',
- '.edn' => 'application/vnd.adobe.edn',
- '.eml' => 'message/rfc822',
- '.eps' => 'application/postscript',
- '.etd' => 'application/x-ebx',
- '.exe' => 'application/x-msdownload',
- '.fdf' => 'application/vnd.fdf',
- '.fif' => 'application/fractals',
- '.gif' => 'image/gif',
- '.gsm' => 'audio/x-gsm',
- '.hqx' => 'application/mac-binhex40',
- '.hta' => 'application/hta',
- '.htc' => 'text/x-component',
- '.htm' => 'text/html',
- '.html' => 'text/html',
- '.htt' => 'text/webviewhtml',
- '.hxa' => 'application/xml',
- '.hxc' => 'application/xml',
- '.hxd' => 'application/octet-stream',
- '.hxe' => 'application/xml',
- '.hxf' => 'application/xml',
- '.hxh' => 'application/octet-stream',
- '.hxi' => 'application/octet-stream',
- '.hxk' => 'application/xml',
- '.hxq' => 'application/octet-stream',
- '.hxr' => 'application/octet-stream',
- '.hxs' => 'application/octet-stream',
- '.hxt' => 'application/xml',
- '.hxv' => 'application/xml',
- '.hxw' => 'application/octet-stream',
- '.ico' => 'image/x-icon',
- '.iii' => 'application/x-iphone',
- '.ins' => 'application/x-internet-signup',
- '.iqy' => 'text/x-ms-iqy',
- '.isp' => 'application/x-internet-signup',
- '.jfif' => 'image/jpeg',
- '.jnlp' => 'application/x-java-jnlp-file',
- '.jpe' => 'image/jpeg',
- '.jpeg' => 'image/jpeg',
- '.jpg' => 'image/jpeg',
- '.jtx' => 'application/x-jtx+xps',
- '.latex' => 'application/x-latex',
- '.log' => 'text/plain',
- '.m1v' => 'video/mpeg',
- '.m2v' => 'video/mpeg',
- '.m3u' => 'audio/x-mpegurl',
- '.mac' => 'image/x-macpaint',
- '.man' => 'application/x-troff-man',
- '.mda' => 'application/msaccess',
- '.mdb' => 'application/msaccess',
- '.mde' => 'application/msaccess',
- '.mfp' => 'application/x-shockwave-flash',
- '.mht' => 'message/rfc822',
- '.mhtml' => 'message/rfc822',
- '.mid' => 'audio/mid',
- '.midi' => 'audio/mid',
- '.mod' => 'video/mpeg',
- '.mov' => 'video/quicktime',
- '.mp2' => 'video/mpeg',
- '.mp2v' => 'video/mpeg',
- '.mp3' => 'audio/mpeg',
- '.mp4' => 'video/mp4',
- '.mpa' => 'video/mpeg',
- '.mpe' => 'video/mpeg',
- '.mpeg' => 'video/mpeg',
- '.mpf' => 'application/vnd.ms-mediapackage',
- '.mpg' => 'video/mpeg',
- '.mpv2' => 'video/mpeg',
- '.mqv' => 'video/quicktime',
- '.NMW' => 'application/nmwb',
- '.nws' => 'message/rfc822',
- '.odc' => 'text/x-ms-odc',
- '.ols' => 'application/vnd.ms-publisher',
- '.p10' => 'application/pkcs10',
- '.p12' => 'application/x-pkcs12',
- '.p7b' => 'application/x-pkcs7-certificates',
- '.p7c' => 'application/pkcs7-mime',
- '.p7m' => 'application/pkcs7-mime',
- '.p7r' => 'application/x-pkcs7-certreqresp',
- '.p7s' => 'application/pkcs7-signature',
- '.pct' => 'image/pict',
- '.pdf' => 'application/pdf',
- '.pdx' => 'application/vnd.adobe.pdx',
- '.pfx' => 'application/x-pkcs12',
- '.pic' => 'image/pict',
- '.pict' => 'image/pict',
- '.pinstall' => 'application/x-picasa-detect',
- '.pko' => 'application/vnd.ms-pki.pko',
- '.png' => 'image/png',
- '.pnt' => 'image/x-macpaint',
- '.pntg' => 'image/x-macpaint',
- '.pot' => 'application/vnd.ms-powerpoint',
- '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
- '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
- '.ppa' => 'application/vnd.ms-powerpoint',
- '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
- '.pps' => 'application/vnd.ms-powerpoint',
- '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
- '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
- '.ppt' => 'application/vnd.ms-powerpoint',
- '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
- '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
- '.prf' => 'application/pics-rules',
- '.ps' => 'application/postscript',
- '.pub' => 'application/vnd.ms-publisher',
- '.pwz' => 'application/vnd.ms-powerpoint',
- '.py' => 'text/plain',
- '.pyw' => 'text/plain',
- '.qht' => 'text/x-html-insertion',
- '.qhtm' => 'text/x-html-insertion',
- '.qt' => 'video/quicktime',
- '.qti' => 'image/x-quicktime',
- '.qtif' => 'image/x-quicktime',
- '.qtl' => 'application/x-quicktimeplayer',
- '.rat' => 'application/rat-file',
- '.rmf' => 'application/vnd.adobe.rmf',
- '.rmi' => 'audio/mid',
- '.rqy' => 'text/x-ms-rqy',
- '.rtf' => 'application/msword',
- '.sct' => 'text/scriptlet',
- '.sd2' => 'audio/x-sd2',
- '.sdp' => 'application/sdp',
- '.shtml' => 'text/html',
- '.sit' => 'application/x-stuffit',
- '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
- '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
- '.slk' => 'application/vnd.ms-excel',
- '.snd' => 'audio/basic',
- '.so' => 'application/x-apachemodule',
- '.sol' => 'text/plain',
- '.sor' => 'text/plain',
- '.spc' => 'application/x-pkcs7-certificates',
- '.spl' => 'application/futuresplash',
- '.sst' => 'application/vnd.ms-pki.certstore',
- '.stl' => 'application/vnd.ms-pki.stl',
- '.swf' => 'application/x-shockwave-flash',
- '.thmx' => 'application/vnd.ms-officetheme',
- '.tif' => 'image/tiff',
- '.tiff' => 'image/tiff',
- '.txt' => 'text/plain',
- '.uls' => 'text/iuls',
- '.vcf' => 'text/x-vcard',
- '.vdx' => 'application/vnd.ms-visio.viewer',
- '.vsd' => 'application/vnd.ms-visio.viewer',
- '.vss' => 'application/vnd.ms-visio.viewer',
- '.vst' => 'application/vnd.ms-visio.viewer',
- '.vsx' => 'application/vnd.ms-visio.viewer',
- '.vtx' => 'application/vnd.ms-visio.viewer',
- '.wav' => 'audio/wav',
- '.wax' => 'audio/x-ms-wax',
- '.wbk' => 'application/msword',
- '.wdp' => 'image/vnd.ms-photo',
- '.wiz' => 'application/msword',
- '.wm' => 'video/x-ms-wm',
- '.wma' => 'audio/x-ms-wma',
- '.wmd' => 'application/x-ms-wmd',
- '.wmv' => 'video/x-ms-wmv',
- '.wmx' => 'video/x-ms-wmx',
- '.wmz' => 'application/x-ms-wmz',
- '.wpl' => 'application/vnd.ms-wpl',
- '.wsc' => 'text/scriptlet',
- '.wvx' => 'video/x-ms-wvx',
- '.xaml' => 'application/xaml+xml',
- '.xbap' => 'application/x-ms-xbap',
- '.xdp' => 'application/vnd.adobe.xdp+xml',
- '.xfdf' => 'application/vnd.adobe.xfdf',
- '.xht' => 'application/xhtml+xml',
- '.xhtml' => 'application/xhtml+xml',
- '.xla' => 'application/vnd.ms-excel',
- '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
- '.xlk' => 'application/vnd.ms-excel',
- '.xll' => 'application/vnd.ms-excel',
- '.xlm' => 'application/vnd.ms-excel',
- '.xls' => 'application/vnd.ms-excel',
- '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
- '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
- '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
- '.xlt' => 'application/vnd.ms-excel',
- '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
- '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
- '.xlw' => 'application/vnd.ms-excel',
- '.xml' => 'text/xml',
- '.xps' => 'application/vnd.ms-xpsdocument',
- '.xsl' => 'text/xml',
- );
-
- /**
- * IE versions which have been analysed to bring you this class, and for
- * which some substantive difference exists. These will appear as keys
- * in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
- */
- protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' );
-
- /**
- * Type table with versions expanded
- */
- protected $typeTable = array();
-
- /** constructor */
- function __construct() {
- // Construct versioned type arrays from the base type array plus additions
- $types = $this->baseTypeTable;
- foreach ( $this->versions as $version ) {
- if ( isset( $this->addedTypes[$version] ) ) {
- foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {
- $types[$format] = array_merge( $types[$format], $addedTypes );
- }
- }
- $this->typeTable[$version] = $types;
- }
- }
-
- /**
- * Get the MIME types from getMimesFromData(), but convert the result from IE's
- * idiosyncratic private types into something other apps will understand.
- *
- * @param string $fileName the file name (unused at present)
- * @param string $chunk the first 256 bytes of the file
- * @param string $proposed the MIME type proposed by the server
- *
- * @return Array: map of IE version to detected MIME type
- */
- public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
- $types = $this->getMimesFromData( $fileName, $chunk, $proposed );
- $types = array_map( array( $this, 'translateMimeType' ), $types );
- return $types;
- }
-
- /**
- * Translate a MIME type from IE's idiosyncratic private types into
- * more commonly understood type strings
- * @param $type
- * @return string
- */
- public function translateMimeType( $type ) {
- static $table = array(
- 'image/pjpeg' => 'image/jpeg',
- 'image/x-png' => 'image/png',
- 'image/x-wmf' => 'application/x-msmetafile',
- 'image/bmp' => 'image/x-bmp',
- 'application/x-zip-compressed' => 'application/zip',
- 'application/x-compressed' => 'application/x-compress',
- 'application/x-gzip-compressed' => 'application/x-gzip',
- 'audio/mid' => 'audio/midi',
- );
- if ( isset( $table[$type] ) ) {
- $type = $table[$type];
- }
- return $type;
- }
-
- /**
- * Get the untranslated MIME types for all known versions
- *
- * @param string $fileName the file name (unused at present)
- * @param string $chunk the first 256 bytes of the file
- * @param string $proposed the MIME type proposed by the server
- *
- * @return Array: map of IE version to detected MIME type
- */
- public function getMimesFromData( $fileName, $chunk, $proposed ) {
- $types = array();
- foreach ( $this->versions as $version ) {
- $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
- }
- return $types;
- }
-
- /**
- * Get the MIME type for a given named version
- * @param $version
- * @param $fileName
- * @param $chunk
- * @param $proposed
- * @return bool|string
- */
- protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
- // Strip text after a semicolon
- $semiPos = strpos( $proposed, ';' );
- if ( $semiPos !== false ) {
- $proposed = substr( $proposed, 0, $semiPos );
- }
-
- $proposedFormat = $this->getDataFormat( $version, $proposed );
- if ( $proposedFormat == 'unknown'
- && $proposed != 'multipart/mixed'
- && $proposed != 'multipart/x-mixed-replace' )
- {
- return $proposed;
- }
- if ( strval( $chunk ) === '' ) {
- return $proposed;
- }
-
- // Truncate chunk at 255 bytes
- $chunk = substr( $chunk, 0, 255 );
-
- // IE does the Check*Headers() calls last, and instead does the following image
- // type checks by directly looking for the magic numbers. What I do here should
- // have the same effect since the magic number checks are identical in both cases.
- $result = $this->sampleData( $version, $chunk );
- $sampleFound = $result['found'];
- $counters = $result['counters'];
- $binaryType = $this->checkBinaryHeaders( $version, $chunk );
- $textType = $this->checkTextHeaders( $version, $chunk );
-
- if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {
- return 'text/html';
- }
- if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {
- return 'image/gif';
- }
- if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
- && $binaryType == 'image/pjpeg' )
- {
- return $proposed;
- }
- // PNG check added in IE 7
- if ( $version >= 'ie07'
- && ( $proposed == 'image/x-png' || $proposed == 'image/png' )
- && $binaryType == 'image/x-png' )
- {
- return $proposed;
- }
-
- // CDF was removed in IE 7 so it won't be in $sampleFound for later versions
- if ( isset( $sampleFound['cdf'] ) ) {
- return 'application/x-cdf';
- }
-
- // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
- // previous versions
- if ( isset( $sampleFound['rss'] ) ) {
- return 'application/rss+xml';
- }
- if ( isset( $sampleFound['rdf-tag'] )
- && isset( $sampleFound['rdf-url'] )
- && isset( $sampleFound['rdf-purl'] ) )
- {
- return 'application/rss+xml';
- }
- if ( isset( $sampleFound['atom'] ) ) {
- return 'application/atom+xml';
- }
-
- if ( isset( $sampleFound['xml'] ) ) {
- // TODO: I'm not sure under what circumstances this flag is enabled
- if ( strpos( $version, 'strict' ) !== false ) {
- if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {
- return 'text/xml';
- }
- } else {
- return 'text/xml';
- }
- }
- if ( isset( $sampleFound['html'] ) ) {
- // TODO: I'm not sure under what circumstances this flag is enabled
- if ( strpos( $version, 'nohtml' ) !== false ) {
- if ( $proposed == 'text/plain' ) {
- return 'text/html';
- }
- } else {
- return 'text/html';
- }
- }
- if ( isset( $sampleFound['xbm'] ) ) {
- return 'image/x-bitmap';
- }
- if ( isset( $sampleFound['binhex'] ) ) {
- return 'application/macbinhex40';
- }
- if ( isset( $sampleFound['scriptlet'] ) ) {
- if ( strpos( $version, 'strict' ) !== false ) {
- if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {
- return 'text/scriptlet';
- }
- } else {
- return 'text/scriptlet';
- }
- }
-
- // Freaky heuristics to determine if the data is text or binary
- // The heuristic is of course broken for non-ASCII text
- if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
- < ( $counters['ctrl'] + $counters['high'] ) * 16 )
- {
- $kindOfBinary = true;
- $type = $binaryType ? $binaryType : $textType;
- if ( $type === false ) {
- $type = 'application/octet-stream';
- }
- } else {
- $kindOfBinary = false;
- $type = $textType ? $textType : $binaryType;
- if ( $type === false ) {
- $type = 'text/plain';
- }
- }
-
- // Check if the output format is ambiguous
- // This generally means that detection failed, real types aren't ambiguous
- $detectedFormat = $this->getDataFormat( $version, $type );
- if ( $detectedFormat != 'ambiguous' ) {
- return $type;
- }
-
- if ( $proposedFormat != 'ambiguous' ) {
- // FormatAgreesWithData()
- if ( $proposedFormat == 'text' && !$kindOfBinary ) {
- return $proposed;
- }
- if ( $proposedFormat == 'binary' && $kindOfBinary ) {
- return $proposed;
- }
- if ( $proposedFormat == 'html' ) {
- return $proposed;
- }
- }
-
- // Find a MIME type by searching the registry for the file extension.
- $dotPos = strrpos( $fileName, '.' );
- if ( $dotPos === false ) {
- return $type;
- }
- $ext = substr( $fileName, $dotPos );
- if ( isset( $this->registry[$ext] ) ) {
- return $this->registry[$ext];
- }
-
- // TODO: If the extension has an application registered to it, IE will return
- // application/octet-stream. We'll skip that, so we could erroneously
- // return text/plain or application/x-netcdf where application/octet-stream
- // would be correct.
-
- return $type;
- }
-
- /**
- * Check for text headers at the start of the chunk
- * Confirmed same in 5 and 7.
- * @param $version
- * @param $chunk
- * @return bool|string
- */
- private function checkTextHeaders( $version, $chunk ) {
- $chunk2 = substr( $chunk, 0, 2 );
- $chunk4 = substr( $chunk, 0, 4 );
- $chunk5 = substr( $chunk, 0, 5 );
- if ( $chunk4 == '%PDF' ) {
- return 'application/pdf';
- }
- if ( $chunk2 == '%!' ) {
- return 'application/postscript';
- }
- if ( $chunk5 == '{\\rtf' ) {
- return 'text/richtext';
- }
- if ( $chunk5 == 'begin' ) {
- return 'application/base64';
- }
- return false;
- }
-
- /**
- * Check for binary headers at the start of the chunk
- * Confirmed same in 5 and 7.
- * @param $version
- * @param $chunk
- * @return bool|string
- */
- private function checkBinaryHeaders( $version, $chunk ) {
- $chunk2 = substr( $chunk, 0, 2 );
- $chunk3 = substr( $chunk, 0, 3 );
- $chunk4 = substr( $chunk, 0, 4 );
- $chunk5 = substr( $chunk, 0, 5 );
- $chunk5uc = strtoupper( $chunk5 );
- $chunk8 = substr( $chunk, 0, 8 );
- if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) {
- return 'image/gif';
- }
- if ( $chunk2 == "\xff\xd8" ) {
- return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
- }
-
- if ( $chunk2 == 'BM'
- && substr( $chunk, 6, 2 ) == "\000\000"
- && substr( $chunk, 8, 2 ) == "\000\000" )
- {
- return 'image/bmp'; // another non-standard MIME
- }
- if ( $chunk4 == 'RIFF'
- && substr( $chunk, 8, 4 ) == 'WAVE' )
- {
- return 'audio/wav';
- }
- // These were integer literals in IE
- // Perhaps the author was not sure what the target endianness was
- if ( $chunk4 == ".sd\000"
- || $chunk4 == ".snd"
- || $chunk4 == "\000ds."
- || $chunk4 == "dns." )
- {
- return 'audio/basic';
- }
- if ( $chunk3 == "MM\000" ) {
- return 'image/tiff';
- }
- if ( $chunk2 == 'MZ' ) {
- return 'application/x-msdownload';
- }
- if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {
- return 'image/x-png'; // [sic]
- }
- if ( strlen( $chunk ) >= 5 ) {
- $byte2 = ord( $chunk[2] );
- $byte4 = ord( $chunk[4] );
- if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {
- return 'image/x-jg';
- }
- }
- // More endian confusion?
- if ( $chunk4 == 'MROF' ) {
- return 'audio/x-aiff';
- }
- $chunk4_8 = substr( $chunk, 8, 4 );
- if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {
- return 'audio/x-aiff';
- }
- if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {
- return 'video/avi';
- }
- if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {
- return 'video/mpeg';
- }
- if ( $chunk4 == "\001\000\000\000"
- && substr( $chunk, 40, 4 ) == ' EMF' )
- {
- return 'image/x-emf';
- }
- if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
- return 'image/x-wmf';
- }
- if ( $chunk4 == "\xca\xfe\xba\xbe" ) {
- return 'application/java';
- }
- if ( $chunk2 == 'PK' ) {
- return 'application/x-zip-compressed';
- }
- if ( $chunk2 == "\x1f\x9d" ) {
- return 'application/x-compressed';
- }
- if ( $chunk2 == "\x1f\x8b" ) {
- return 'application/x-gzip-compressed';
- }
- // Skip redundant check for ZIP
- if ( $chunk5 == "MThd\000" ) {
- return 'audio/mid';
- }
- if ( $chunk4 == '%PDF' ) {
- return 'application/pdf';
- }
- return false;
- }
-
- /**
- * Do heuristic checks on the bulk of the data sample.
- * Search for HTML tags.
- * @param $version
- * @param $chunk
- * @return array
- */
- protected function sampleData( $version, $chunk ) {
- $found = array();
- $counters = array(
- 'ctrl' => 0,
- 'high' => 0,
- 'low' => 0,
- 'lf' => 0,
- 'cr' => 0,
- 'ff' => 0
- );
- $htmlTags = array(
- 'html',
- 'head',
- 'title',
- 'body',
- 'script',
- 'a href',
- 'pre',
- 'img',
- 'plaintext',
- 'table'
- );
- $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
- $rdfPurl = 'http://purl.org/rss/1.0/';
- $xbmMagic1 = '#define';
- $xbmMagic2 = '_width';
- $xbmMagic3 = '_bits';
- $binhexMagic = 'converted with BinHex';
- $chunkLength = strlen( $chunk );
-
- for ( $offset = 0; $offset < $chunkLength; $offset++ ) {
- $curChar = $chunk[$offset];
- if ( $curChar == "\x0a" ) {
- $counters['lf']++;
- continue;
- } elseif ( $curChar == "\x0d" ) {
- $counters['cr']++;
- continue;
- } elseif ( $curChar == "\x0c" ) {
- $counters['ff']++;
- continue;
- } elseif ( $curChar == "\t" ) {
- $counters['low']++;
- continue;
- } elseif ( ord( $curChar ) < 32 ) {
- $counters['ctrl']++;
- continue;
- } elseif ( ord( $curChar ) >= 128 ) {
- $counters['high']++;
- continue;
- }
-
- $counters['low']++;
- if ( $curChar == '<' ) {
- // XML
- $remainder = substr( $chunk, $offset + 1 );
- if ( !strncasecmp( $remainder, '?XML', 4 ) ) {
- $nextChar = substr( $chunk, $offset + 5, 1 );
- if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {
- $found['xml'] = true;
- }
- }
- // Scriptlet (JSP)
- if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {
- $found['scriptlet'] = true;
- break;
- }
- // HTML
- foreach ( $htmlTags as $tag ) {
- if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {
- $found['html'] = true;
- }
- }
- // Skip broken check for additional tags (HR etc.)
-
- // CHANNEL replaced by RSS, RDF and FEED in IE 7
- if ( $version < 'ie07' ) {
- if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {
- $found['cdf'] = true;
- }
- } else {
- // RSS
- if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {
- $found['rss'] = true;
- break; // return from SampleData
- }
- if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {
- $found['rdf-tag'] = true;
- // no break
- }
- if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {
- $found['atom'] = true;
- break;
- }
- }
- continue;
- }
- // Skip broken check for -->
-
- // RSS URL checks
- // For some reason both URLs must appear before it is recognised
- $remainder = substr( $chunk, $offset );
- if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
- $found['rdf-url'] = true;
- if ( isset( $found['rdf-tag'] )
- && isset( $found['rdf-purl'] ) ) // [sic]
- {
- break;
- }
- continue;
- }
-
- if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
- if ( isset( $found['rdf-tag'] )
- && isset( $found['rdf-url'] ) ) // [sic]
- {
- break;
- }
- continue;
- }
-
- // XBM checks
- if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {
- $found['xbm1'] = true;
- continue;
- }
- if ( $curChar == '_' ) {
- if ( isset( $found['xbm2'] ) ) {
- if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {
- $found['xbm'] = true;
- break;
- }
- } elseif ( isset( $found['xbm1'] ) ) {
- if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {
- $found['xbm2'] = true;
- }
- }
- }
-
- // BinHex
- if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {
- $found['binhex'] = true;
- }
- }
- return array( 'found' => $found, 'counters' => $counters );
- }
-
- /**
- * @param $version
- * @param $type
- * @return int|string
- */
- protected function getDataFormat( $version, $type ) {
- $types = $this->typeTable[$version];
- if ( $type == '(null)' || strval( $type ) === '' ) {
- return 'ambiguous';
- }
- foreach ( $types as $format => $list ) {
- if ( in_array( $type, $list ) ) {
- return $format;
- }
- }
- return 'unknown';
- }
-}
diff --git a/includes/libs/IEUrlExtension.php b/includes/libs/IEUrlExtension.php
deleted file mode 100644
index 49d05d4b..00000000
--- a/includes/libs/IEUrlExtension.php
+++ /dev/null
@@ -1,271 +0,0 @@
-<?php
-/**
- * Checks for validity of requested URL's extension.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-/**
- * Internet Explorer derives a cache filename from a URL, and then in certain
- * circumstances, uses the extension of the resulting file to determine the
- * content type of the data, ignoring the Content-Type header.
- *
- * This can be a problem, especially when non-HTML content is sent by MediaWiki,
- * and Internet Explorer interprets it as HTML, exposing an XSS vulnerability.
- *
- * Usually the script filename (e.g. api.php) is present in the URL, and this
- * makes Internet Explorer think the extension is a harmless script extension.
- * But Internet Explorer 6 and earlier allows the script extension to be
- * obscured by encoding the dot as "%2E".
- *
- * This class contains functions which help in detecting and dealing with this
- * situation.
- *
- * Checking the URL for a bad extension is somewhat complicated due to the fact
- * that CGI doesn't provide a standard method to determine the URL. Instead it
- * is necessary to pass a subset of $_SERVER variables, which we then attempt
- * to use to guess parts of the URL.
- */
-class IEUrlExtension {
- /**
- * Check a subset of $_SERVER (or the whole of $_SERVER if you like)
- * to see if it indicates that the request was sent with a bad file
- * extension. Returns true if the request should be denied or modified,
- * false otherwise. The relevant $_SERVER elements are:
- *
- * - SERVER_SOFTWARE
- * - REQUEST_URI
- * - QUERY_STRING
- * - PATH_INFO
- *
- * If the a variable is unset in $_SERVER, it should be unset in $vars.
- *
- * @param array $vars A subset of $_SERVER.
- * @param array $extWhitelist Extensions which are allowed, assumed harmless.
- * @return bool
- */
- public static function areServerVarsBad( $vars, $extWhitelist = array() ) {
- // Check QUERY_STRING or REQUEST_URI
- if ( isset( $vars['SERVER_SOFTWARE'] )
- && isset( $vars['REQUEST_URI'] )
- && self::haveUndecodedRequestUri( $vars['SERVER_SOFTWARE'] ) )
- {
- $urlPart = $vars['REQUEST_URI'];
- } elseif ( isset( $vars['QUERY_STRING'] ) ) {
- $urlPart = $vars['QUERY_STRING'];
- } else {
- $urlPart = '';
- }
-
- if ( self::isUrlExtensionBad( $urlPart, $extWhitelist ) ) {
- return true;
- }
-
- // Some servers have PATH_INFO but not REQUEST_URI, so we check both
- // to be on the safe side.
- if ( isset( $vars['PATH_INFO'] )
- && self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist ) )
- {
- return true;
- }
-
- // All checks passed
- return false;
- }
-
- /**
- * Given a right-hand portion of a URL, determine whether IE would detect
- * a potentially harmful file extension.
- *
- * @param string $urlPart The right-hand portion of a URL
- * @param array $extWhitelist An array of file extensions which may occur in this
- * URL, and which should be allowed.
- * @return bool
- */
- public static function isUrlExtensionBad( $urlPart, $extWhitelist = array() ) {
- if ( strval( $urlPart ) === '' ) {
- return false;
- }
-
- $extension = self::findIE6Extension( $urlPart );
- if ( strval( $extension ) === '' ) {
- // No extension or empty extension
- return false;
- }
-
- if ( in_array( $extension, array( 'php', 'php5' ) ) ) {
- // Script extension, OK
- return false;
- }
- if ( in_array( $extension, $extWhitelist ) ) {
- // Whitelisted extension
- return false;
- }
-
- if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) {
- // Non-alphanumeric extension, unlikely to be registered.
- //
- // The regex above is known to match all registered file extensions
- // in a default Windows XP installation. It's important to allow
- // extensions with ampersands and percent signs, since that reduces
- // the number of false positives substantially.
- return false;
- }
-
- // Possibly bad extension
- return true;
- }
-
- /**
- * Returns a variant of $url which will pass isUrlExtensionBad() but has the
- * same GET parameters, or false if it can't figure one out.
- * @param $url
- * @param $extWhitelist array
- * @return bool|string
- */
- public static function fixUrlForIE6( $url, $extWhitelist = array() ) {
- $questionPos = strpos( $url, '?' );
- if ( $questionPos === false ) {
- $beforeQuery = $url . '?';
- $query = '';
- } elseif ( $questionPos === strlen( $url ) - 1 ) {
- $beforeQuery = $url;
- $query = '';
- } else {
- $beforeQuery = substr( $url, 0, $questionPos + 1 );
- $query = substr( $url, $questionPos + 1 );
- }
-
- // Multiple question marks cause problems. Encode the second and
- // subsequent question mark.
- $query = str_replace( '?', '%3E', $query );
- // Append an invalid path character so that IE6 won't see the end of the
- // query string as an extension
- $query .= '&*';
- // Put the URL back together
- $url = $beforeQuery . $query;
- if ( self::isUrlExtensionBad( $url, $extWhitelist ) ) {
- // Avoid a redirect loop
- return false;
- }
- return $url;
- }
-
- /**
- * Determine what extension IE6 will infer from a certain query string.
- * If the URL has an extension before the question mark, IE6 will use
- * that and ignore the query string, but per the comment at
- * isPathInfoBad() we don't have a reliable way to determine the URL,
- * so isPathInfoBad() just passes in the query string for $url.
- * All entry points have safe extensions (php, php5) anyway, so
- * checking the query string is possibly overly paranoid but never
- * insecure.
- *
- * The criteria for finding an extension are as follows:
- * - a possible extension is a dot followed by one or more characters not
- * in <>\"/:|?.#
- * - if we find a possible extension followed by the end of the string or
- * a #, that's our extension
- * - if we find a possible extension followed by a ?, that's our extension
- * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue
- * searching for another possible extension
- * - if we find a possible extension followed by a dot or another illegal
- * character, we ignore it and continue searching
- *
- * @param string $url URL
- * @return mixed Detected extension (string), or false if none found
- */
- public static function findIE6Extension( $url ) {
- $pos = 0;
- $hashPos = strpos( $url, '#' );
- if ( $hashPos !== false ) {
- $urlLength = $hashPos;
- } else {
- $urlLength = strlen( $url );
- }
- $remainingLength = $urlLength;
- while ( $remainingLength > 0 ) {
- // Skip ahead to the next dot
- $pos += strcspn( $url, '.', $pos, $remainingLength );
- if ( $pos >= $urlLength ) {
- // End of string, we're done
- return false;
- }
-
- // We found a dot. Skip past it
- $pos++;
- $remainingLength = $urlLength - $pos;
-
- // Check for illegal characters in our prospective extension,
- // or for another dot
- $nextPos = $pos + strcspn( $url, "<>\\\"/:|?*.", $pos, $remainingLength );
- if ( $nextPos >= $urlLength ) {
- // No illegal character or next dot
- // We have our extension
- return substr( $url, $pos, $urlLength - $pos );
- }
- if ( $url[$nextPos] === '?' ) {
- // We've found a legal extension followed by a question mark
- // If the extension is NOT exe, dll or cgi, return it
- $extension = substr( $url, $pos, $nextPos - $pos );
- if ( strcasecmp( $extension, 'exe' ) && strcasecmp( $extension, 'dll' ) &&
- strcasecmp( $extension, 'cgi' ) )
- {
- return $extension;
- }
- // Else continue looking
- }
- // We found an illegal character or another dot
- // Skip to that character and continue the loop
- $pos = $nextPos;
- $remainingLength = $urlLength - $pos;
- }
- return false;
- }
-
- /**
- * When passed the value of $_SERVER['SERVER_SOFTWARE'], this function
- * returns true if that server is known to have a REQUEST_URI variable
- * with %2E not decoded to ".". On such a server, it is possible to detect
- * whether the script filename has been obscured.
- *
- * The function returns false if the server is not known to have this
- * behavior. Microsoft IIS in particular is known to decode escaped script
- * filenames.
- *
- * SERVER_SOFTWARE typically contains either a plain string such as "Zeus",
- * or a specification in the style of a User-Agent header, such as
- * "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2"
- *
- * @param $serverSoftware
- * @return bool
- *
- */
- public static function haveUndecodedRequestUri( $serverSoftware ) {
- static $whitelist = array(
- 'Apache',
- 'Zeus',
- 'LiteSpeed' );
- if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) {
- return in_array( $m[1], $whitelist );
- } else {
- return false;
- }
- }
-
-}