diff options
author | Brion Vibber <brion@pobox.com> | 2010-05-10 16:18:29 -0700 |
---|---|---|
committer | Brion Vibber <brion@pobox.com> | 2010-06-28 08:59:47 -0700 |
commit | 9c7b66984c46668f314f93337d28c62854b6d134 (patch) | |
tree | 75c9c75503803f8d863a7bd04f33c2f139a37849 | |
parent | 41d81b996fdd8276cc04e750297a12f852a97bf4 (diff) |
Enhanced upload file type detection. If given an original filename, we'll attempt to detect type from the extension if we were unable to make a definitive match from content. Generic octet-stream, zip, and MS Office type are explicitly singled out for re-checks, which fixes OpenOffice and MS Office documents to come up with the proper types when misdetected.
File extensions can also be added to the upload type whitelist; they'll be normalized to types for the actual comparison, so only known extensions will work.
-rw-r--r-- | lib/mediafile.php | 54 | ||||
-rw-r--r-- | tests/MediaFileTest.php | 73 | ||||
-rw-r--r-- | tests/sample-uploads/image.gif | bin | 0 -> 35 bytes | |||
-rw-r--r-- | tests/sample-uploads/image.jpeg | bin | 0 -> 306 bytes | |||
-rw-r--r-- | tests/sample-uploads/image.jpg | bin | 0 -> 306 bytes | |||
-rw-r--r-- | tests/sample-uploads/image.png | bin | 0 -> 159 bytes |
6 files changed, 97 insertions, 30 deletions
diff --git a/lib/mediafile.php b/lib/mediafile.php index 10d90d008..85d673d92 100644 --- a/lib/mediafile.php +++ b/lib/mediafile.php @@ -180,7 +180,8 @@ class MediaFile return; } - $mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name']); + $mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name'], + $_FILES[$param]['name']); $filename = null; @@ -241,19 +242,41 @@ class MediaFile return new MediaFile($user, $filename, $mimetype); } - static function getUploadedFileType($f) { + /** + * Attempt to identify the content type of a given file. + * + * @param mixed $f file handle resource, or filesystem path as string + * @param string $originalFilename (optional) for extension-based detection + * @return string + * + * @fixme is this an internal or public method? It's called from GetFileAction + * @fixme this seems to tie a front-end error message in, kinda confusing + * @fixme this looks like it could return a PEAR_Error in some cases, if + * type can't be identified and $config['attachments']['supported'] is true + * + * @throws ClientException if type is known, but not supported for local uploads + */ + static function getUploadedFileType($f, $originalFilename=false) { require_once 'MIME/Type.php'; + require_once 'MIME/Type/Extension.php'; + $mte = new MIME_Type_Extension(); $cmd = &PEAR::getStaticProperty('MIME_Type', 'fileCmd'); $cmd = common_config('attachments', 'filecommand'); $filetype = null; + // If we couldn't get a clear type from the file extension, + // we'll go ahead and try checking the content. Content checks + // are unambiguous for most image files, but nearly useless + // for office document formats. + if (is_string($f)) { // assuming a filename $filetype = MIME_Type::autoDetect($f); + } else { // assuming a filehandle @@ -262,7 +285,32 @@ class MediaFile $filetype = MIME_Type::autoDetect($stream['uri']); } - if (common_config('attachments', 'supported') === true || in_array($filetype, common_config('attachments', 'supported'))) { + // The content-based sources for MIME_Type::autoDetect() + // are wildly unreliable for office-type documents. If we've + // gotten an unclear reponse back or just couldn't identify it, + // we'll try detecting a type from its extension... + $unclearTypes = array('application/octet-stream', + 'application/vnd.ms-office', + 'application/zip'); + + if ($originalFilename && (!$filetype || in_array($filetype, $unclearTypes))) { + $type = $mte->getMIMEType($originalFilename); + if (is_string($type)) { + $filetype = $type; + } + } + + $supported = common_config('attachments', 'supported'); + if (is_array($supported)) { + // Normalize extensions to mime types + foreach ($supported as $i => $entry) { + if (strpos($entry, '/') === false) { + common_log(LOG_INFO, "sample.$entry"); + $supported[$i] = $mte->getMIMEType("sample.$entry"); + } + } + } + if ($supported === true || in_array($filetype, $supported)) { return $filetype; } $media = MIME_Type::getMedia($filetype); diff --git a/tests/MediaFileTest.php b/tests/MediaFileTest.php index 6fe995621..a76a4f45e 100644 --- a/tests/MediaFileTest.php +++ b/tests/MediaFileTest.php @@ -34,43 +34,62 @@ class MediaFileTest extends PHPUnit_Framework_TestCase if (!file_exists($filename)) { throw new Exception("WTF? $filename test file missing"); } - $this->assertEquals($expectedType, MediaFile::getUploadedFileType($filename)); + + $type = MediaFile::getUploadedFileType($filename, basename($filename)); + $this->assertEquals($expectedType, $type); + } + + /** + * @dataProvider fileTypeCases + * + */ + public function testUploadedFileType($filename, $expectedType) + { + if (!file_exists($filename)) { + throw new Exception("WTF? $filename test file missing"); + } + $tmp = tmpfile(); + fwrite($tmp, file_get_contents($filename)); + + $type = MediaFile::getUploadedFileType($tmp, basename($filename)); + $this->assertEquals($expectedType, $type); } static public function fileTypeCases() { $base = dirname(__FILE__); $dir = "$base/sample-uploads"; - return array( - array("$dir/office.pdf", "application/pdf"), + $files = array( + "image.png" => "image/png", + "image.gif" => "image/gif", + "image.jpg" => "image/jpeg", + "image.jpeg" => "image/jpeg", + + "office.pdf" => "application/pdf", - array("$dir/wordproc.odt", "application/vnd.oasis.opendocument.text"), - array("$dir/wordproc.ott", "application/vnd.oasis.opendocument.text-template"), - array("$dir/wordproc.doc", "application/msword"), - array("$dir/wordproc.docx", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document"), - array("$dir/wordproc.rtf", "text/rtf"), + "wordproc.odt" => "application/vnd.oasis.opendocument.text", + "wordproc.ott" => "application/vnd.oasis.opendocument.text-template", + "wordproc.doc" => "application/msword", + "wordproc.docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "wordproc.rtf" => "text/rtf", - array("$dir/spreadsheet.ods", - "application/vnd.oasis.opendocument.spreadsheet"), - array("$dir/spreadsheet.ots", - "application/vnd.oasis.opendocument.spreadsheet-template"), - array("$dir/spreadsheet.xls", "application/vnd.ms-excel"), - array("$dir/spreadsheet.xlt", "application/vnd.ms-excel"), - array("$dir/spreadsheet.xlsx", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), + "spreadsheet.ods" => "application/vnd.oasis.opendocument.spreadsheet", + "spreadsheet.ots" => "application/vnd.oasis.opendocument.spreadsheet-template", + "spreadsheet.xls" => "application/vnd.ms-excel", + "spreadsheet.xlt" => "application/vnd.ms-excel", + "spreadsheet.xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - array("$dir/presentation.odp", - "application/vnd.oasis-opendocument.presentation"), - array("$dir/presentation.otp", - "application/vnd.oasis-opendocument.presentation-template"), - array("$dir/presentation.ppt", - "application/vnd.ms-powerpoint"), - array("$dir/presentation.pot", - "application/vnd.ms-powerpoint"), - array("$dir/presentation.pptx", - "application/vnd.openxmlformats-officedocument.presentationml.presentation"), + "presentation.odp" => "application/vnd.oasis.opendocument.presentation", + "presentation.otp" => "application/vnd.oasis.opendocument.presentation-template", + "presentation.ppt" => "application/vnd.ms-powerpoint", + "presentation.pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation", ); + + $dataset = array(); + foreach ($files as $file => $type) { + $dataset[] = array("$dir/$file", $type); + } + return $dataset; } } diff --git a/tests/sample-uploads/image.gif b/tests/sample-uploads/image.gif Binary files differnew file mode 100644 index 000000000..b636f4b8d --- /dev/null +++ b/tests/sample-uploads/image.gif diff --git a/tests/sample-uploads/image.jpeg b/tests/sample-uploads/image.jpeg Binary files differnew file mode 100644 index 000000000..21fcb5aef --- /dev/null +++ b/tests/sample-uploads/image.jpeg diff --git a/tests/sample-uploads/image.jpg b/tests/sample-uploads/image.jpg Binary files differnew file mode 100644 index 000000000..21fcb5aef --- /dev/null +++ b/tests/sample-uploads/image.jpg diff --git a/tests/sample-uploads/image.png b/tests/sample-uploads/image.png Binary files differnew file mode 100644 index 000000000..60cbcfd17 --- /dev/null +++ b/tests/sample-uploads/image.png |