From 41d81b996fdd8276cc04e750297a12f852a97bf4 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 10 May 2010 15:32:02 -0700 Subject: Test cases for MediaFile::getUploadedFileType() with OpenOffice, MS Office, and PDF sample files (as saved from OpenOffice 3.2) Only 3 of 16 cases pass on my dev box with default config. Ouch! --- tests/MediaFileTest.php | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tests/MediaFileTest.php (limited to 'tests/MediaFileTest.php') diff --git a/tests/MediaFileTest.php b/tests/MediaFileTest.php new file mode 100644 index 000000000..6fe995621 --- /dev/null +++ b/tests/MediaFileTest.php @@ -0,0 +1,77 @@ +old_attachments_supported = common_config('attachments', 'supported'); + $GLOBALS['config']['attachments']['supported'] = true; + } + + public function tearDown() + { + $GLOBALS['config']['attachments']['supported'] = $this->old_attachments_supported; + } + + /** + * @dataProvider fileTypeCases + * + */ + public function testFileType($filename, $expectedType) + { + if (!file_exists($filename)) { + throw new Exception("WTF? $filename test file missing"); + } + $this->assertEquals($expectedType, MediaFile::getUploadedFileType($filename)); + } + + static public function fileTypeCases() + { + $base = dirname(__FILE__); + $dir = "$base/sample-uploads"; + return array( + array("$dir/office.pdf", "application/pdf"), + + array("$dir/wordproc.odt", "application/vnd.oasis.opendocument.text"), + array("$dir/wordproc.ott", "application/vnd.oasis.opendocument.text-template"), + array("$dir/wordproc.doc", "application/msword"), + array("$dir/wordproc.docx", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document"), + array("$dir/wordproc.rtf", "text/rtf"), + + array("$dir/spreadsheet.ods", + "application/vnd.oasis.opendocument.spreadsheet"), + array("$dir/spreadsheet.ots", + "application/vnd.oasis.opendocument.spreadsheet-template"), + array("$dir/spreadsheet.xls", "application/vnd.ms-excel"), + array("$dir/spreadsheet.xlt", "application/vnd.ms-excel"), + array("$dir/spreadsheet.xlsx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), + + array("$dir/presentation.odp", + "application/vnd.oasis-opendocument.presentation"), + array("$dir/presentation.otp", + "application/vnd.oasis-opendocument.presentation-template"), + array("$dir/presentation.ppt", + "application/vnd.ms-powerpoint"), + array("$dir/presentation.pot", + "application/vnd.ms-powerpoint"), + array("$dir/presentation.pptx", + "application/vnd.openxmlformats-officedocument.presentationml.presentation"), + ); + } + +} + -- cgit v1.2.3-54-g00ecf From 9c7b66984c46668f314f93337d28c62854b6d134 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 10 May 2010 16:18:29 -0700 Subject: Enhanced upload file type detection. If given an original filename, we'll attempt to detect type from the extension if we were unable to make a definitive match from content. Generic octet-stream, zip, and MS Office type are explicitly singled out for re-checks, which fixes OpenOffice and MS Office documents to come up with the proper types when misdetected. File extensions can also be added to the upload type whitelist; they'll be normalized to types for the actual comparison, so only known extensions will work. --- lib/mediafile.php | 54 +++++++++++++++++++++++++++-- tests/MediaFileTest.php | 73 +++++++++++++++++++++++++--------------- tests/sample-uploads/image.gif | Bin 0 -> 35 bytes tests/sample-uploads/image.jpeg | Bin 0 -> 306 bytes tests/sample-uploads/image.jpg | Bin 0 -> 306 bytes tests/sample-uploads/image.png | Bin 0 -> 159 bytes 6 files changed, 97 insertions(+), 30 deletions(-) create mode 100644 tests/sample-uploads/image.gif create mode 100644 tests/sample-uploads/image.jpeg create mode 100644 tests/sample-uploads/image.jpg create mode 100644 tests/sample-uploads/image.png (limited to 'tests/MediaFileTest.php') diff --git a/lib/mediafile.php b/lib/mediafile.php index 10d90d008..85d673d92 100644 --- a/lib/mediafile.php +++ b/lib/mediafile.php @@ -180,7 +180,8 @@ class MediaFile return; } - $mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name']); + $mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name'], + $_FILES[$param]['name']); $filename = null; @@ -241,19 +242,41 @@ class MediaFile return new MediaFile($user, $filename, $mimetype); } - static function getUploadedFileType($f) { + /** + * Attempt to identify the content type of a given file. + * + * @param mixed $f file handle resource, or filesystem path as string + * @param string $originalFilename (optional) for extension-based detection + * @return string + * + * @fixme is this an internal or public method? It's called from GetFileAction + * @fixme this seems to tie a front-end error message in, kinda confusing + * @fixme this looks like it could return a PEAR_Error in some cases, if + * type can't be identified and $config['attachments']['supported'] is true + * + * @throws ClientException if type is known, but not supported for local uploads + */ + static function getUploadedFileType($f, $originalFilename=false) { require_once 'MIME/Type.php'; + require_once 'MIME/Type/Extension.php'; + $mte = new MIME_Type_Extension(); $cmd = &PEAR::getStaticProperty('MIME_Type', 'fileCmd'); $cmd = common_config('attachments', 'filecommand'); $filetype = null; + // If we couldn't get a clear type from the file extension, + // we'll go ahead and try checking the content. Content checks + // are unambiguous for most image files, but nearly useless + // for office document formats. + if (is_string($f)) { // assuming a filename $filetype = MIME_Type::autoDetect($f); + } else { // assuming a filehandle @@ -262,7 +285,32 @@ class MediaFile $filetype = MIME_Type::autoDetect($stream['uri']); } - if (common_config('attachments', 'supported') === true || in_array($filetype, common_config('attachments', 'supported'))) { + // The content-based sources for MIME_Type::autoDetect() + // are wildly unreliable for office-type documents. If we've + // gotten an unclear reponse back or just couldn't identify it, + // we'll try detecting a type from its extension... + $unclearTypes = array('application/octet-stream', + 'application/vnd.ms-office', + 'application/zip'); + + if ($originalFilename && (!$filetype || in_array($filetype, $unclearTypes))) { + $type = $mte->getMIMEType($originalFilename); + if (is_string($type)) { + $filetype = $type; + } + } + + $supported = common_config('attachments', 'supported'); + if (is_array($supported)) { + // Normalize extensions to mime types + foreach ($supported as $i => $entry) { + if (strpos($entry, '/') === false) { + common_log(LOG_INFO, "sample.$entry"); + $supported[$i] = $mte->getMIMEType("sample.$entry"); + } + } + } + if ($supported === true || in_array($filetype, $supported)) { return $filetype; } $media = MIME_Type::getMedia($filetype); diff --git a/tests/MediaFileTest.php b/tests/MediaFileTest.php index 6fe995621..a76a4f45e 100644 --- a/tests/MediaFileTest.php +++ b/tests/MediaFileTest.php @@ -34,43 +34,62 @@ class MediaFileTest extends PHPUnit_Framework_TestCase if (!file_exists($filename)) { throw new Exception("WTF? $filename test file missing"); } - $this->assertEquals($expectedType, MediaFile::getUploadedFileType($filename)); + + $type = MediaFile::getUploadedFileType($filename, basename($filename)); + $this->assertEquals($expectedType, $type); + } + + /** + * @dataProvider fileTypeCases + * + */ + public function testUploadedFileType($filename, $expectedType) + { + if (!file_exists($filename)) { + throw new Exception("WTF? $filename test file missing"); + } + $tmp = tmpfile(); + fwrite($tmp, file_get_contents($filename)); + + $type = MediaFile::getUploadedFileType($tmp, basename($filename)); + $this->assertEquals($expectedType, $type); } static public function fileTypeCases() { $base = dirname(__FILE__); $dir = "$base/sample-uploads"; - return array( - array("$dir/office.pdf", "application/pdf"), + $files = array( + "image.png" => "image/png", + "image.gif" => "image/gif", + "image.jpg" => "image/jpeg", + "image.jpeg" => "image/jpeg", + + "office.pdf" => "application/pdf", - array("$dir/wordproc.odt", "application/vnd.oasis.opendocument.text"), - array("$dir/wordproc.ott", "application/vnd.oasis.opendocument.text-template"), - array("$dir/wordproc.doc", "application/msword"), - array("$dir/wordproc.docx", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document"), - array("$dir/wordproc.rtf", "text/rtf"), + "wordproc.odt" => "application/vnd.oasis.opendocument.text", + "wordproc.ott" => "application/vnd.oasis.opendocument.text-template", + "wordproc.doc" => "application/msword", + "wordproc.docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "wordproc.rtf" => "text/rtf", - array("$dir/spreadsheet.ods", - "application/vnd.oasis.opendocument.spreadsheet"), - array("$dir/spreadsheet.ots", - "application/vnd.oasis.opendocument.spreadsheet-template"), - array("$dir/spreadsheet.xls", "application/vnd.ms-excel"), - array("$dir/spreadsheet.xlt", "application/vnd.ms-excel"), - array("$dir/spreadsheet.xlsx", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), + "spreadsheet.ods" => "application/vnd.oasis.opendocument.spreadsheet", + "spreadsheet.ots" => "application/vnd.oasis.opendocument.spreadsheet-template", + "spreadsheet.xls" => "application/vnd.ms-excel", + "spreadsheet.xlt" => "application/vnd.ms-excel", + "spreadsheet.xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - array("$dir/presentation.odp", - "application/vnd.oasis-opendocument.presentation"), - array("$dir/presentation.otp", - "application/vnd.oasis-opendocument.presentation-template"), - array("$dir/presentation.ppt", - "application/vnd.ms-powerpoint"), - array("$dir/presentation.pot", - "application/vnd.ms-powerpoint"), - array("$dir/presentation.pptx", - "application/vnd.openxmlformats-officedocument.presentationml.presentation"), + "presentation.odp" => "application/vnd.oasis.opendocument.presentation", + "presentation.otp" => "application/vnd.oasis.opendocument.presentation-template", + "presentation.ppt" => "application/vnd.ms-powerpoint", + "presentation.pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation", ); + + $dataset = array(); + foreach ($files as $file => $type) { + $dataset[] = array("$dir/$file", $type); + } + return $dataset; } } diff --git a/tests/sample-uploads/image.gif b/tests/sample-uploads/image.gif new file mode 100644 index 000000000..b636f4b8d Binary files /dev/null and b/tests/sample-uploads/image.gif differ diff --git a/tests/sample-uploads/image.jpeg b/tests/sample-uploads/image.jpeg new file mode 100644 index 000000000..21fcb5aef Binary files /dev/null and b/tests/sample-uploads/image.jpeg differ diff --git a/tests/sample-uploads/image.jpg b/tests/sample-uploads/image.jpg new file mode 100644 index 000000000..21fcb5aef Binary files /dev/null and b/tests/sample-uploads/image.jpg differ diff --git a/tests/sample-uploads/image.png b/tests/sample-uploads/image.png new file mode 100644 index 000000000..60cbcfd17 Binary files /dev/null and b/tests/sample-uploads/image.png differ -- cgit v1.2.3-54-g00ecf